From 08a29d124a1e100bdafcd559cae83c7417fdc696 Mon Sep 17 00:00:00 2001 From: James Kolpack Date: Sun, 26 Apr 2026 20:47:23 -0400 Subject: [PATCH] Add offline mosaic EXIF tagging (stitch --write-exif, tag_mosaic_exif CLI) - spruce.exif: tag_mosaic_jpeg_for_scan_dir, resolve_machine_label_for_scan_dir; ProcessingSoftware for tile-stitched mosaics - spruce.settings: load_config(require_credentials=False) for config without login - scripts/tag_mosaic_exif.py and tests; stitch script --write-exif path --- scripts/stitch_mosaic_from_tiles.py | 31 ++++++ scripts/tag_mosaic_exif.py | 112 +++++++++++++++++++ spruce/exif.py | 95 ++++++++++++++++ spruce/settings.py | 17 ++- tests/test_mosaic_exif_tag.py | 164 ++++++++++++++++++++++++++++ tests/test_settings.py | 10 ++ 6 files changed, 424 insertions(+), 5 deletions(-) create mode 100644 scripts/tag_mosaic_exif.py create mode 100644 tests/test_mosaic_exif_tag.py diff --git a/scripts/stitch_mosaic_from_tiles.py b/scripts/stitch_mosaic_from_tiles.py index fcc2578..093af58 100644 --- a/scripts/stitch_mosaic_from_tiles.py +++ b/scripts/stitch_mosaic_from_tiles.py @@ -26,6 +26,9 @@ if str(_REPO_ROOT) not in sys.path: from PIL import Image, ImageChops +from spruce.exif import tag_mosaic_jpeg_for_scan_dir +from spruce.settings import DEFAULT_CONFIG, load_config + TILE_FILENAME_RE = re.compile(r"tile_r(\d+)_c(\d+)\.jpg$", re.IGNORECASE) @@ -281,6 +284,20 @@ def main() -> None: default=95, help="JPEG quality for output (default: 95).", ) + parser.add_argument( + "--write-exif", + action="store_true", + help=( + "After saving, write mosaic EXIF using metadata.json and config " + "(implies override if write_exif is false in config). JPEG output only." + ), + ) + parser.add_argument( + "--config", + default=DEFAULT_CONFIG, + metavar="FILE", + help=f"YAML config for EXIF machine_metadata (default: {DEFAULT_CONFIG})", + ) parser.add_argument( "--tile-gap", type=int, @@ -339,6 +356,20 @@ def main() -> None: f"grid {nx}x{ny}, tile_gap={args.tile_gap})" ) + if args.write_exif: + if out.suffix.lower() not in (".jpg", ".jpeg"): + raise SystemExit("--write-exif requires a .jpg or .jpeg output path.") + cfg_path = Path(args.config).expanduser() + if not cfg_path.is_file(): + raise SystemExit(f"Config not found: {cfg_path}") + config = load_config(str(cfg_path), require_credentials=False) + try: + ok = tag_mosaic_jpeg_for_scan_dir(scan_dir, out, config, force=True) + except ValueError as exc: + raise SystemExit(str(exc)) from exc + if not ok: + raise SystemExit("EXIF tagging failed (see log).") + if args.compare_mosaic: compare_mosaics(canvas, scan_dir / "mosaic.jpg", fit=args.fit) diff --git a/scripts/tag_mosaic_exif.py b/scripts/tag_mosaic_exif.py new file mode 100644 index 0000000..55fe0a7 --- /dev/null +++ b/scripts/tag_mosaic_exif.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 +""" +Write mosaic EXIF into a JPEG using metadata.json and config machine_metadata. + +Usage: + python scripts/tag_mosaic_exif.py /path/to/scan_dir + python scripts/tag_mosaic_exif.py /path/to/scan_dir --jpeg mosaic.jpg --machine "BW1-6 [AMR-19]" +""" + +from __future__ import annotations + +import argparse +import logging +import sys +from pathlib import Path + +_REPO_ROOT = Path(__file__).resolve().parent.parent +if str(_REPO_ROOT) not in sys.path: + sys.path.insert(0, str(_REPO_ROOT)) + +from spruce.exif import tag_mosaic_jpeg_for_scan_dir +from spruce.settings import DEFAULT_CONFIG, load_config + +log = logging.getLogger(__name__) + + +def main() -> None: + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(levelname)-8s %(message)s", + datefmt="%H:%M:%S", + ) + parser = argparse.ArgumentParser( + description=( + "Insert RootView-style mosaic EXIF (piexif, no re-encode) using " + "scan_dir/metadata.json and machine_metadata from config." + ) + ) + parser.add_argument( + "scan_dir", + type=Path, + help="Directory containing metadata.json (…////)", + ) + parser.add_argument( + "--jpeg", + type=Path, + default=None, + help="JPEG to tag (default: /mosaic_reconstructed.jpg)", + ) + parser.add_argument( + "--config", + default=DEFAULT_CONFIG, + metavar="FILE", + help=f"YAML config (default: {DEFAULT_CONFIG})", + ) + parser.add_argument( + "--machine", + metavar="LABEL", + default=None, + help='RootView machine label, e.g. "BW1-6 [AMR-19]" (skip archive slug inference)', + ) + parser.add_argument( + "--force", + action="store_true", + help="Tag even when write_exif is false in config", + ) + parser.add_argument( + "--processing-software", + default=None, + metavar="STR", + help="Override ProcessingSoftware EXIF string", + ) + args = parser.parse_args() + + scan_dir = args.scan_dir.expanduser().resolve() + if not scan_dir.is_dir(): + sys.exit(f"Not a directory: {scan_dir}") + + jpeg = args.jpeg + if jpeg is None: + jpeg = scan_dir / "mosaic_reconstructed.jpg" + else: + jpeg = jpeg.expanduser().resolve() + + if not jpeg.is_file(): + sys.exit(f"JPEG not found: {jpeg}") + + cfg_path = Path(args.config).expanduser() + if not cfg_path.is_file(): + sys.exit(f"Config not found: {cfg_path}") + + config = load_config(str(cfg_path), require_credentials=False) + + try: + ok = tag_mosaic_jpeg_for_scan_dir( + scan_dir, + jpeg, + config, + machine_label=args.machine, + processing_software=args.processing_software, + force=args.force, + ) + except ValueError as exc: + sys.exit(str(exc)) + + if not ok: + sys.exit("EXIF tagging was skipped or failed (see log).") + log.info("EXIF written: %s", jpeg) + + +if __name__ == "__main__": + main() diff --git a/spruce/exif.py b/spruce/exif.py index c94b158..de582bf 100644 --- a/spruce/exif.py +++ b/spruce/exif.py @@ -2,18 +2,24 @@ Write EXIF metadata into downloaded mosaic JPEGs (piexif, no re-encode). """ +import json import logging import re from pathlib import Path from typing import Any import piexif + +from spruce.paths import machine_dir_name from piexif import ExifIFD, GPSIFD, ImageIFD log = logging.getLogger(__name__) USER_COMMENT_ASCII = b"ASCII\x00\x00\x00" +# ProcessingSoftware for mosaics stitched from tiles (distinct from server download path). +DEFAULT_PROCESSING_SOFTWARE_MOSAIC_FROM_TILES = "spruce-scraper/1.0 mosaic_from_tiles" + def _fmt_exif_datetime(scan_time: str) -> str: """`YYYY-MM-DD HH:MM:SS` -> `YYYY:MM:DD HH:MM:SS` for EXIF; empty on failure.""" @@ -252,3 +258,92 @@ def write_mosaic_exif( log.warning("EXIF write failed for %s: %s", jpeg_path, exc) return False return True + + +def resolve_machine_label_for_scan_dir( + scan_dir: Path, + config: dict[str, Any], + machine_label: str | None, +) -> str: + """ + Return the RootView machine label for EXIF. + + If ``machine_label`` is set, it is returned stripped. Otherwise the parent + of ``scan_dir`` is walked (archive slug under ``machine_dir_name``) and + matched uniquely against keys in ``config['machine_metadata']``. + """ + if machine_label is not None and str(machine_label).strip() != "": + return str(machine_label).strip() + slug = scan_dir.parent.parent.name + meta = config.get("machine_metadata") or {} + matches = [lbl for lbl in meta if machine_dir_name({"label": lbl}) == slug] + if len(matches) == 1: + return matches[0] + if not matches: + raise ValueError( + f"Could not map archive folder {slug!r} to a machine label. " + f"Add it under machine_metadata in config or pass machine_label explicitly." + ) + raise ValueError( + f"Ambiguous archive folder {slug!r}: multiple machine_metadata keys " + f"match ({matches!r}). Pass machine_label explicitly." + ) + + +def tag_mosaic_jpeg_for_scan_dir( + scan_dir: Path, + jpeg_path: Path, + config: dict[str, Any], + *, + machine_label: str | None = None, + processing_software: str | None = None, + force: bool = False, +) -> bool: + """ + Write the same mosaic EXIF as the scraper, using ``metadata.json`` in + ``scan_dir`` and optional ``machine_metadata`` from ``config``. + + Returns False if tagging was skipped (e.g. ``write_exif`` disabled and not + ``force``). Raises ``ValueError`` for invalid paths or JPEG suffix. + + ``force=True`` tags even when ``config['write_exif']`` is false (e.g. stitch + script ``--write-exif``). + """ + scan_dir = scan_dir.resolve() + jpeg_path = jpeg_path.resolve() + suffix = jpeg_path.suffix.lower() + if suffix not in (".jpg", ".jpeg"): + raise ValueError( + f"EXIF tagging only supports JPEG files; got suffix {jpeg_path.suffix!r}: {jpeg_path}" + ) + + if not force and not config.get("write_exif", True): + log.warning( + "Skipping EXIF: write_exif is false in config (use force=True to override)." + ) + return False + + meta_path = scan_dir / "metadata.json" + if not meta_path.is_file(): + raise ValueError(f"Missing metadata.json: {meta_path}") + with meta_path.open(encoding="utf-8") as fh: + scan_meta: dict[str, Any] = json.load(fh) + scan_id = int(scan_meta["scan_id"]) + if scan_dir.name != str(scan_id): + raise ValueError( + f"scan_dir must be the scan id folder (got name {scan_dir.name!r}, " + f"metadata scan_id={scan_id}). Expected …//{scan_id}/" + ) + + label = resolve_machine_label_for_scan_dir(scan_dir, config, machine_label) + machine: dict[str, Any] = {"label": label, "version": ""} + mmeta = (config.get("machine_metadata") or {}).get(label) + proc = processing_software or DEFAULT_PROCESSING_SOFTWARE_MOSAIC_FROM_TILES + return write_mosaic_exif( + jpeg_path, + scan_meta, + machine, + scan_id, + mmeta, + processing_software=proc, + ) diff --git a/spruce/settings.py b/spruce/settings.py index 89b60c4..55a0e56 100644 --- a/spruce/settings.py +++ b/spruce/settings.py @@ -99,13 +99,20 @@ def _clamp_workers(n: int) -> int: # --------------------------------------------------------------------------- -def load_config(path: str) -> dict: - """Load and validate config.yaml. Exits on missing required fields.""" +def load_config(path: str, *, require_credentials: bool = True) -> dict: + """Load and validate config.yaml. Exits on missing required fields. + + With ``require_credentials=False`` (e.g. offline EXIF tagging), username and + password are not required; only fields needed for tagging are defaulted. + """ with open(path) as fh: cfg = yaml.safe_load(fh) - missing = [k for k in ("username", "password") if not cfg.get(k)] - if missing: - sys.exit(f"Config {path} is missing required fields: {missing}") + if cfg is None: + cfg = {} + if require_credentials: + missing = [k for k in ("username", "password") if not cfg.get(k)] + if missing: + sys.exit(f"Config {path} is missing required fields: {missing}") cfg.setdefault("base_url", "http://205.149.147.131:8010/") cfg.setdefault("image_base_url", "http://205.149.147.131:8011/") cfg.setdefault("output_dir", "archives") diff --git a/tests/test_mosaic_exif_tag.py b/tests/test_mosaic_exif_tag.py new file mode 100644 index 0000000..2dcbc85 --- /dev/null +++ b/tests/test_mosaic_exif_tag.py @@ -0,0 +1,164 @@ +"""Tests for offline mosaic EXIF tagging (tag_mosaic_jpeg_for_scan_dir, resolver).""" + +from __future__ import annotations + +import json +import logging +import os +import shutil +import subprocess +import sys +from pathlib import Path + +import piexif +import pytest +from piexif import ImageIFD + +from spruce.exif import ( + DEFAULT_PROCESSING_SOFTWARE_MOSAIC_FROM_TILES, + resolve_machine_label_for_scan_dir, + tag_mosaic_jpeg_for_scan_dir, +) +from spruce.paths import machine_dir_name + +FIXTURES = Path(__file__).parent / "fixtures" +BLANK_JPEG = FIXTURES / "blank.jpg" +ROOT = Path(__file__).resolve().parents[1] +TAG_SCRIPT = ROOT / "scripts" / "tag_mosaic_exif.py" + + +def _scan_tree(tmp_path: Path, *, machine_label: str, scan_id: int) -> Path: + slug = machine_dir_name({"label": machine_label}) + scan_dir = tmp_path / "archives" / slug / "2024-01-01" / str(scan_id) + scan_dir.mkdir(parents=True) + meta = { + "scan_id": scan_id, + "name": "Test scan", + "scan_time": "2024-06-28 11:00:00", + "user": "Tester", + "nx": 2, + "ny": 2, + "dx": 1.0, + "dy": 1.0, + "end_x": 10.0, + "end_y": 10.0, + } + (scan_dir / "metadata.json").write_text(json.dumps(meta), encoding="utf-8") + return scan_dir + + +def test_resolve_machine_label_from_slug_unique(tmp_path: Path) -> None: + label = "BW1-7 [AMR-18]" + scan_dir = _scan_tree(tmp_path, machine_label=label, scan_id=42) + cfg = {"machine_metadata": {label: {"plot_number": 7}}} + assert resolve_machine_label_for_scan_dir(scan_dir, cfg, None) == label + + +def test_resolve_machine_label_explicit_overrides_slug(tmp_path: Path) -> None: + scan_dir = _scan_tree(tmp_path, machine_label="BW1-7 [AMR-18]", scan_id=1) + cfg = {"machine_metadata": {"Other [X]": {}}} + assert ( + resolve_machine_label_for_scan_dir(scan_dir, cfg, "Custom [Label]") + == "Custom [Label]" + ) + + +def test_resolve_machine_label_no_match_raises(tmp_path: Path) -> None: + scan_dir = _scan_tree(tmp_path, machine_label="BW1-7 [AMR-18]", scan_id=1) + cfg = {"machine_metadata": {}} + with pytest.raises(ValueError, match="Could not map"): + resolve_machine_label_for_scan_dir(scan_dir, cfg, None) + + +def test_tag_mosaic_jpeg_for_scan_dir_writes_exif(tmp_path: Path) -> None: + label = "BW1-7 [AMR-18]" + scan_dir = _scan_tree(tmp_path, machine_label=label, scan_id=99) + jpeg = tmp_path / "out.jpg" + shutil.copy(BLANK_JPEG, jpeg) + cfg = {"write_exif": True, "machine_metadata": {label: {}}} + assert tag_mosaic_jpeg_for_scan_dir(scan_dir, jpeg, cfg, force=True) + exif = piexif.load(str(jpeg)) + assert ( + exif["0th"][ImageIFD.ProcessingSoftware] + == DEFAULT_PROCESSING_SOFTWARE_MOSAIC_FROM_TILES.encode("ascii") + ) + assert b"SPRUCE scan 99" in exif["Exif"][piexif.ExifIFD.UserComment] + + +def test_tag_mosaic_skipped_when_write_exif_false_without_force( + tmp_path: Path, caplog: pytest.LogCaptureFixture +) -> None: + label = "BW1-7 [AMR-18]" + scan_dir = _scan_tree(tmp_path, machine_label=label, scan_id=1) + jpeg = tmp_path / "out.jpg" + shutil.copy(BLANK_JPEG, jpeg) + cfg = {"write_exif": False, "machine_metadata": {label: {}}} + with caplog.at_level(logging.WARNING): + ok = tag_mosaic_jpeg_for_scan_dir(scan_dir, jpeg, cfg, force=False) + assert not ok + assert "Skipping EXIF" in caplog.text + + +def test_tag_mosaic_scan_dir_name_mismatch_raises(tmp_path: Path) -> None: + label = "BW1-7 [AMR-18]" + scan_dir = _scan_tree(tmp_path, machine_label=label, scan_id=100) + bad = scan_dir.parent / "wrong_name" + shutil.move(str(scan_dir), str(bad)) + jpeg = tmp_path / "out.jpg" + shutil.copy(BLANK_JPEG, jpeg) + cfg = {"machine_metadata": {label: {}}} + with pytest.raises(ValueError, match="scan_dir must be the scan id folder"): + tag_mosaic_jpeg_for_scan_dir(bad, jpeg, cfg, force=True) + + +def test_tag_mosaic_non_jpeg_raises(tmp_path: Path) -> None: + label = "BW1-7 [AMR-18]" + scan_dir = _scan_tree(tmp_path, machine_label=label, scan_id=1) + png = tmp_path / "out.png" + png.write_bytes(b"not really png") + cfg = {"machine_metadata": {label: {}}} + with pytest.raises(ValueError, match="only supports JPEG"): + tag_mosaic_jpeg_for_scan_dir(scan_dir, png, cfg, force=True) + + +def test_tag_mosaic_second_run_replaces_exif(tmp_path: Path) -> None: + label = "BW1-7 [AMR-18]" + scan_dir = _scan_tree(tmp_path, machine_label=label, scan_id=5) + jpeg = tmp_path / "out.jpg" + shutil.copy(BLANK_JPEG, jpeg) + cfg = {"machine_metadata": {label: {}}} + assert tag_mosaic_jpeg_for_scan_dir(scan_dir, jpeg, cfg, force=True) + assert tag_mosaic_jpeg_for_scan_dir( + scan_dir, jpeg, cfg, force=True, processing_software="custom/2.0" + ) + exif = piexif.load(str(jpeg)) + assert exif["0th"][ImageIFD.ProcessingSoftware] == b"custom/2.0" + + +def test_tag_mosaic_exif_script_cli(tmp_path: Path) -> None: + label = "BW1-6 [AMR-19]" + scan_dir = _scan_tree(tmp_path, machine_label=label, scan_id=156875) + jpeg = scan_dir / "mosaic_reconstructed.jpg" + shutil.copy(BLANK_JPEG, jpeg) + cfg = tmp_path / "mini.yaml" + cfg.write_text( + f"write_exif: true\nmachine_metadata:\n {label!r}: {{}}\n", + encoding="utf-8", + ) + env = {**os.environ, "PYTHONPATH": str(ROOT)} + r = subprocess.run( + [ + sys.executable, + str(TAG_SCRIPT), + str(scan_dir), + "--config", + str(cfg), + ], + cwd=str(ROOT), + capture_output=True, + text=True, + env=env, + ) + assert r.returncode == 0, r.stderr + r.stdout + exif = piexif.load(str(jpeg)) + assert b"SPRUCE scan 156875" in exif["Exif"][piexif.ExifIFD.UserComment] diff --git a/tests/test_settings.py b/tests/test_settings.py index b2c635c..4bc3416 100644 --- a/tests/test_settings.py +++ b/tests/test_settings.py @@ -95,6 +95,16 @@ def test_load_config_missing_password_exits(tmp_path): load_config(str(path)) +def test_load_config_optional_credentials(tmp_path): + path = tmp_path / "config.yaml" + path.write_text( + yaml.dump({"machine_metadata": {"A [B]": {"plot_number": 2}}}) + ) + cfg = load_config(str(path), require_credentials=False) + assert cfg["machine_metadata"]["A [B]"]["plot_number"] == 2 + assert cfg["write_exif"] is True + + # --------------------------------------------------------------------------- # CSV schemas (failure columns) # ---------------------------------------------------------------------------