Add offline mosaic EXIF tagging (stitch --write-exif, tag_mosaic_exif CLI)

- spruce.exif: tag_mosaic_jpeg_for_scan_dir, resolve_machine_label_for_scan_dir; ProcessingSoftware for tile-stitched mosaics
- spruce.settings: load_config(require_credentials=False) for config without login
- scripts/tag_mosaic_exif.py and tests; stitch script --write-exif path
This commit is contained in:
2026-04-26 20:47:23 -04:00
parent 314b68322c
commit 08a29d124a
6 changed files with 424 additions and 5 deletions
+31
View File
@@ -26,6 +26,9 @@ if str(_REPO_ROOT) not in sys.path:
from PIL import Image, ImageChops
from spruce.exif import tag_mosaic_jpeg_for_scan_dir
from spruce.settings import DEFAULT_CONFIG, load_config
TILE_FILENAME_RE = re.compile(r"tile_r(\d+)_c(\d+)\.jpg$", re.IGNORECASE)
@@ -281,6 +284,20 @@ def main() -> None:
default=95,
help="JPEG quality for output (default: 95).",
)
parser.add_argument(
"--write-exif",
action="store_true",
help=(
"After saving, write mosaic EXIF using metadata.json and config "
"(implies override if write_exif is false in config). JPEG output only."
),
)
parser.add_argument(
"--config",
default=DEFAULT_CONFIG,
metavar="FILE",
help=f"YAML config for EXIF machine_metadata (default: {DEFAULT_CONFIG})",
)
parser.add_argument(
"--tile-gap",
type=int,
@@ -339,6 +356,20 @@ def main() -> None:
f"grid {nx}x{ny}, tile_gap={args.tile_gap})"
)
if args.write_exif:
if out.suffix.lower() not in (".jpg", ".jpeg"):
raise SystemExit("--write-exif requires a .jpg or .jpeg output path.")
cfg_path = Path(args.config).expanduser()
if not cfg_path.is_file():
raise SystemExit(f"Config not found: {cfg_path}")
config = load_config(str(cfg_path), require_credentials=False)
try:
ok = tag_mosaic_jpeg_for_scan_dir(scan_dir, out, config, force=True)
except ValueError as exc:
raise SystemExit(str(exc)) from exc
if not ok:
raise SystemExit("EXIF tagging failed (see log).")
if args.compare_mosaic:
compare_mosaics(canvas, scan_dir / "mosaic.jpg", fit=args.fit)
+112
View File
@@ -0,0 +1,112 @@
#!/usr/bin/env python3
"""
Write mosaic EXIF into a JPEG using metadata.json and config machine_metadata.
Usage:
python scripts/tag_mosaic_exif.py /path/to/scan_dir
python scripts/tag_mosaic_exif.py /path/to/scan_dir --jpeg mosaic.jpg --machine "BW1-6 [AMR-19]"
"""
from __future__ import annotations
import argparse
import logging
import sys
from pathlib import Path
_REPO_ROOT = Path(__file__).resolve().parent.parent
if str(_REPO_ROOT) not in sys.path:
sys.path.insert(0, str(_REPO_ROOT))
from spruce.exif import tag_mosaic_jpeg_for_scan_dir
from spruce.settings import DEFAULT_CONFIG, load_config
log = logging.getLogger(__name__)
def main() -> None:
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)-8s %(message)s",
datefmt="%H:%M:%S",
)
parser = argparse.ArgumentParser(
description=(
"Insert RootView-style mosaic EXIF (piexif, no re-encode) using "
"scan_dir/metadata.json and machine_metadata from config."
)
)
parser.add_argument(
"scan_dir",
type=Path,
help="Directory containing metadata.json (…/<machine>/<date>/<scan_id>/)",
)
parser.add_argument(
"--jpeg",
type=Path,
default=None,
help="JPEG to tag (default: <scan_dir>/mosaic_reconstructed.jpg)",
)
parser.add_argument(
"--config",
default=DEFAULT_CONFIG,
metavar="FILE",
help=f"YAML config (default: {DEFAULT_CONFIG})",
)
parser.add_argument(
"--machine",
metavar="LABEL",
default=None,
help='RootView machine label, e.g. "BW1-6 [AMR-19]" (skip archive slug inference)',
)
parser.add_argument(
"--force",
action="store_true",
help="Tag even when write_exif is false in config",
)
parser.add_argument(
"--processing-software",
default=None,
metavar="STR",
help="Override ProcessingSoftware EXIF string",
)
args = parser.parse_args()
scan_dir = args.scan_dir.expanduser().resolve()
if not scan_dir.is_dir():
sys.exit(f"Not a directory: {scan_dir}")
jpeg = args.jpeg
if jpeg is None:
jpeg = scan_dir / "mosaic_reconstructed.jpg"
else:
jpeg = jpeg.expanduser().resolve()
if not jpeg.is_file():
sys.exit(f"JPEG not found: {jpeg}")
cfg_path = Path(args.config).expanduser()
if not cfg_path.is_file():
sys.exit(f"Config not found: {cfg_path}")
config = load_config(str(cfg_path), require_credentials=False)
try:
ok = tag_mosaic_jpeg_for_scan_dir(
scan_dir,
jpeg,
config,
machine_label=args.machine,
processing_software=args.processing_software,
force=args.force,
)
except ValueError as exc:
sys.exit(str(exc))
if not ok:
sys.exit("EXIF tagging was skipped or failed (see log).")
log.info("EXIF written: %s", jpeg)
if __name__ == "__main__":
main()
+95
View File
@@ -2,18 +2,24 @@
Write EXIF metadata into downloaded mosaic JPEGs (piexif, no re-encode).
"""
import json
import logging
import re
from pathlib import Path
from typing import Any
import piexif
from spruce.paths import machine_dir_name
from piexif import ExifIFD, GPSIFD, ImageIFD
log = logging.getLogger(__name__)
USER_COMMENT_ASCII = b"ASCII\x00\x00\x00"
# ProcessingSoftware for mosaics stitched from tiles (distinct from server download path).
DEFAULT_PROCESSING_SOFTWARE_MOSAIC_FROM_TILES = "spruce-scraper/1.0 mosaic_from_tiles"
def _fmt_exif_datetime(scan_time: str) -> str:
"""`YYYY-MM-DD HH:MM:SS` -> `YYYY:MM:DD HH:MM:SS` for EXIF; empty on failure."""
@@ -252,3 +258,92 @@ def write_mosaic_exif(
log.warning("EXIF write failed for %s: %s", jpeg_path, exc)
return False
return True
def resolve_machine_label_for_scan_dir(
scan_dir: Path,
config: dict[str, Any],
machine_label: str | None,
) -> str:
"""
Return the RootView machine label for EXIF.
If ``machine_label`` is set, it is returned stripped. Otherwise the parent
of ``scan_dir`` is walked (archive slug under ``machine_dir_name``) and
matched uniquely against keys in ``config['machine_metadata']``.
"""
if machine_label is not None and str(machine_label).strip() != "":
return str(machine_label).strip()
slug = scan_dir.parent.parent.name
meta = config.get("machine_metadata") or {}
matches = [lbl for lbl in meta if machine_dir_name({"label": lbl}) == slug]
if len(matches) == 1:
return matches[0]
if not matches:
raise ValueError(
f"Could not map archive folder {slug!r} to a machine label. "
f"Add it under machine_metadata in config or pass machine_label explicitly."
)
raise ValueError(
f"Ambiguous archive folder {slug!r}: multiple machine_metadata keys "
f"match ({matches!r}). Pass machine_label explicitly."
)
def tag_mosaic_jpeg_for_scan_dir(
scan_dir: Path,
jpeg_path: Path,
config: dict[str, Any],
*,
machine_label: str | None = None,
processing_software: str | None = None,
force: bool = False,
) -> bool:
"""
Write the same mosaic EXIF as the scraper, using ``metadata.json`` in
``scan_dir`` and optional ``machine_metadata`` from ``config``.
Returns False if tagging was skipped (e.g. ``write_exif`` disabled and not
``force``). Raises ``ValueError`` for invalid paths or JPEG suffix.
``force=True`` tags even when ``config['write_exif']`` is false (e.g. stitch
script ``--write-exif``).
"""
scan_dir = scan_dir.resolve()
jpeg_path = jpeg_path.resolve()
suffix = jpeg_path.suffix.lower()
if suffix not in (".jpg", ".jpeg"):
raise ValueError(
f"EXIF tagging only supports JPEG files; got suffix {jpeg_path.suffix!r}: {jpeg_path}"
)
if not force and not config.get("write_exif", True):
log.warning(
"Skipping EXIF: write_exif is false in config (use force=True to override)."
)
return False
meta_path = scan_dir / "metadata.json"
if not meta_path.is_file():
raise ValueError(f"Missing metadata.json: {meta_path}")
with meta_path.open(encoding="utf-8") as fh:
scan_meta: dict[str, Any] = json.load(fh)
scan_id = int(scan_meta["scan_id"])
if scan_dir.name != str(scan_id):
raise ValueError(
f"scan_dir must be the scan id folder (got name {scan_dir.name!r}, "
f"metadata scan_id={scan_id}). Expected …/<date>/{scan_id}/"
)
label = resolve_machine_label_for_scan_dir(scan_dir, config, machine_label)
machine: dict[str, Any] = {"label": label, "version": ""}
mmeta = (config.get("machine_metadata") or {}).get(label)
proc = processing_software or DEFAULT_PROCESSING_SOFTWARE_MOSAIC_FROM_TILES
return write_mosaic_exif(
jpeg_path,
scan_meta,
machine,
scan_id,
mmeta,
processing_software=proc,
)
+12 -5
View File
@@ -99,13 +99,20 @@ def _clamp_workers(n: int) -> int:
# ---------------------------------------------------------------------------
def load_config(path: str) -> dict:
"""Load and validate config.yaml. Exits on missing required fields."""
def load_config(path: str, *, require_credentials: bool = True) -> dict:
"""Load and validate config.yaml. Exits on missing required fields.
With ``require_credentials=False`` (e.g. offline EXIF tagging), username and
password are not required; only fields needed for tagging are defaulted.
"""
with open(path) as fh:
cfg = yaml.safe_load(fh)
missing = [k for k in ("username", "password") if not cfg.get(k)]
if missing:
sys.exit(f"Config {path} is missing required fields: {missing}")
if cfg is None:
cfg = {}
if require_credentials:
missing = [k for k in ("username", "password") if not cfg.get(k)]
if missing:
sys.exit(f"Config {path} is missing required fields: {missing}")
cfg.setdefault("base_url", "http://205.149.147.131:8010/")
cfg.setdefault("image_base_url", "http://205.149.147.131:8011/")
cfg.setdefault("output_dir", "archives")
+164
View File
@@ -0,0 +1,164 @@
"""Tests for offline mosaic EXIF tagging (tag_mosaic_jpeg_for_scan_dir, resolver)."""
from __future__ import annotations
import json
import logging
import os
import shutil
import subprocess
import sys
from pathlib import Path
import piexif
import pytest
from piexif import ImageIFD
from spruce.exif import (
DEFAULT_PROCESSING_SOFTWARE_MOSAIC_FROM_TILES,
resolve_machine_label_for_scan_dir,
tag_mosaic_jpeg_for_scan_dir,
)
from spruce.paths import machine_dir_name
FIXTURES = Path(__file__).parent / "fixtures"
BLANK_JPEG = FIXTURES / "blank.jpg"
ROOT = Path(__file__).resolve().parents[1]
TAG_SCRIPT = ROOT / "scripts" / "tag_mosaic_exif.py"
def _scan_tree(tmp_path: Path, *, machine_label: str, scan_id: int) -> Path:
slug = machine_dir_name({"label": machine_label})
scan_dir = tmp_path / "archives" / slug / "2024-01-01" / str(scan_id)
scan_dir.mkdir(parents=True)
meta = {
"scan_id": scan_id,
"name": "Test scan",
"scan_time": "2024-06-28 11:00:00",
"user": "Tester",
"nx": 2,
"ny": 2,
"dx": 1.0,
"dy": 1.0,
"end_x": 10.0,
"end_y": 10.0,
}
(scan_dir / "metadata.json").write_text(json.dumps(meta), encoding="utf-8")
return scan_dir
def test_resolve_machine_label_from_slug_unique(tmp_path: Path) -> None:
label = "BW1-7 [AMR-18]"
scan_dir = _scan_tree(tmp_path, machine_label=label, scan_id=42)
cfg = {"machine_metadata": {label: {"plot_number": 7}}}
assert resolve_machine_label_for_scan_dir(scan_dir, cfg, None) == label
def test_resolve_machine_label_explicit_overrides_slug(tmp_path: Path) -> None:
scan_dir = _scan_tree(tmp_path, machine_label="BW1-7 [AMR-18]", scan_id=1)
cfg = {"machine_metadata": {"Other [X]": {}}}
assert (
resolve_machine_label_for_scan_dir(scan_dir, cfg, "Custom [Label]")
== "Custom [Label]"
)
def test_resolve_machine_label_no_match_raises(tmp_path: Path) -> None:
scan_dir = _scan_tree(tmp_path, machine_label="BW1-7 [AMR-18]", scan_id=1)
cfg = {"machine_metadata": {}}
with pytest.raises(ValueError, match="Could not map"):
resolve_machine_label_for_scan_dir(scan_dir, cfg, None)
def test_tag_mosaic_jpeg_for_scan_dir_writes_exif(tmp_path: Path) -> None:
label = "BW1-7 [AMR-18]"
scan_dir = _scan_tree(tmp_path, machine_label=label, scan_id=99)
jpeg = tmp_path / "out.jpg"
shutil.copy(BLANK_JPEG, jpeg)
cfg = {"write_exif": True, "machine_metadata": {label: {}}}
assert tag_mosaic_jpeg_for_scan_dir(scan_dir, jpeg, cfg, force=True)
exif = piexif.load(str(jpeg))
assert (
exif["0th"][ImageIFD.ProcessingSoftware]
== DEFAULT_PROCESSING_SOFTWARE_MOSAIC_FROM_TILES.encode("ascii")
)
assert b"SPRUCE scan 99" in exif["Exif"][piexif.ExifIFD.UserComment]
def test_tag_mosaic_skipped_when_write_exif_false_without_force(
tmp_path: Path, caplog: pytest.LogCaptureFixture
) -> None:
label = "BW1-7 [AMR-18]"
scan_dir = _scan_tree(tmp_path, machine_label=label, scan_id=1)
jpeg = tmp_path / "out.jpg"
shutil.copy(BLANK_JPEG, jpeg)
cfg = {"write_exif": False, "machine_metadata": {label: {}}}
with caplog.at_level(logging.WARNING):
ok = tag_mosaic_jpeg_for_scan_dir(scan_dir, jpeg, cfg, force=False)
assert not ok
assert "Skipping EXIF" in caplog.text
def test_tag_mosaic_scan_dir_name_mismatch_raises(tmp_path: Path) -> None:
label = "BW1-7 [AMR-18]"
scan_dir = _scan_tree(tmp_path, machine_label=label, scan_id=100)
bad = scan_dir.parent / "wrong_name"
shutil.move(str(scan_dir), str(bad))
jpeg = tmp_path / "out.jpg"
shutil.copy(BLANK_JPEG, jpeg)
cfg = {"machine_metadata": {label: {}}}
with pytest.raises(ValueError, match="scan_dir must be the scan id folder"):
tag_mosaic_jpeg_for_scan_dir(bad, jpeg, cfg, force=True)
def test_tag_mosaic_non_jpeg_raises(tmp_path: Path) -> None:
label = "BW1-7 [AMR-18]"
scan_dir = _scan_tree(tmp_path, machine_label=label, scan_id=1)
png = tmp_path / "out.png"
png.write_bytes(b"not really png")
cfg = {"machine_metadata": {label: {}}}
with pytest.raises(ValueError, match="only supports JPEG"):
tag_mosaic_jpeg_for_scan_dir(scan_dir, png, cfg, force=True)
def test_tag_mosaic_second_run_replaces_exif(tmp_path: Path) -> None:
label = "BW1-7 [AMR-18]"
scan_dir = _scan_tree(tmp_path, machine_label=label, scan_id=5)
jpeg = tmp_path / "out.jpg"
shutil.copy(BLANK_JPEG, jpeg)
cfg = {"machine_metadata": {label: {}}}
assert tag_mosaic_jpeg_for_scan_dir(scan_dir, jpeg, cfg, force=True)
assert tag_mosaic_jpeg_for_scan_dir(
scan_dir, jpeg, cfg, force=True, processing_software="custom/2.0"
)
exif = piexif.load(str(jpeg))
assert exif["0th"][ImageIFD.ProcessingSoftware] == b"custom/2.0"
def test_tag_mosaic_exif_script_cli(tmp_path: Path) -> None:
label = "BW1-6 [AMR-19]"
scan_dir = _scan_tree(tmp_path, machine_label=label, scan_id=156875)
jpeg = scan_dir / "mosaic_reconstructed.jpg"
shutil.copy(BLANK_JPEG, jpeg)
cfg = tmp_path / "mini.yaml"
cfg.write_text(
f"write_exif: true\nmachine_metadata:\n {label!r}: {{}}\n",
encoding="utf-8",
)
env = {**os.environ, "PYTHONPATH": str(ROOT)}
r = subprocess.run(
[
sys.executable,
str(TAG_SCRIPT),
str(scan_dir),
"--config",
str(cfg),
],
cwd=str(ROOT),
capture_output=True,
text=True,
env=env,
)
assert r.returncode == 0, r.stderr + r.stdout
exif = piexif.load(str(jpeg))
assert b"SPRUCE scan 156875" in exif["Exif"][piexif.ExifIFD.UserComment]
+10
View File
@@ -95,6 +95,16 @@ def test_load_config_missing_password_exits(tmp_path):
load_config(str(path))
def test_load_config_optional_credentials(tmp_path):
path = tmp_path / "config.yaml"
path.write_text(
yaml.dump({"machine_metadata": {"A [B]": {"plot_number": 2}}})
)
cfg = load_config(str(path), require_credentials=False)
assert cfg["machine_metadata"]["A [B]"]["plot_number"] == 2
assert cfg["write_exif"] is True
# ---------------------------------------------------------------------------
# CSV schemas (failure columns)
# ---------------------------------------------------------------------------