Add offline mosaic EXIF tagging (stitch --write-exif, tag_mosaic_exif CLI)

- spruce.exif: tag_mosaic_jpeg_for_scan_dir, resolve_machine_label_for_scan_dir; ProcessingSoftware for tile-stitched mosaics
- spruce.settings: load_config(require_credentials=False) for config without login
- scripts/tag_mosaic_exif.py and tests; stitch script --write-exif path
This commit is contained in:
2026-04-26 20:47:23 -04:00
parent 314b68322c
commit 08a29d124a
6 changed files with 424 additions and 5 deletions
+95
View File
@@ -2,18 +2,24 @@
Write EXIF metadata into downloaded mosaic JPEGs (piexif, no re-encode).
"""
import json
import logging
import re
from pathlib import Path
from typing import Any
import piexif
from spruce.paths import machine_dir_name
from piexif import ExifIFD, GPSIFD, ImageIFD
log = logging.getLogger(__name__)
USER_COMMENT_ASCII = b"ASCII\x00\x00\x00"
# ProcessingSoftware for mosaics stitched from tiles (distinct from server download path).
DEFAULT_PROCESSING_SOFTWARE_MOSAIC_FROM_TILES = "spruce-scraper/1.0 mosaic_from_tiles"
def _fmt_exif_datetime(scan_time: str) -> str:
"""`YYYY-MM-DD HH:MM:SS` -> `YYYY:MM:DD HH:MM:SS` for EXIF; empty on failure."""
@@ -252,3 +258,92 @@ def write_mosaic_exif(
log.warning("EXIF write failed for %s: %s", jpeg_path, exc)
return False
return True
def resolve_machine_label_for_scan_dir(
scan_dir: Path,
config: dict[str, Any],
machine_label: str | None,
) -> str:
"""
Return the RootView machine label for EXIF.
If ``machine_label`` is set, it is returned stripped. Otherwise the parent
of ``scan_dir`` is walked (archive slug under ``machine_dir_name``) and
matched uniquely against keys in ``config['machine_metadata']``.
"""
if machine_label is not None and str(machine_label).strip() != "":
return str(machine_label).strip()
slug = scan_dir.parent.parent.name
meta = config.get("machine_metadata") or {}
matches = [lbl for lbl in meta if machine_dir_name({"label": lbl}) == slug]
if len(matches) == 1:
return matches[0]
if not matches:
raise ValueError(
f"Could not map archive folder {slug!r} to a machine label. "
f"Add it under machine_metadata in config or pass machine_label explicitly."
)
raise ValueError(
f"Ambiguous archive folder {slug!r}: multiple machine_metadata keys "
f"match ({matches!r}). Pass machine_label explicitly."
)
def tag_mosaic_jpeg_for_scan_dir(
scan_dir: Path,
jpeg_path: Path,
config: dict[str, Any],
*,
machine_label: str | None = None,
processing_software: str | None = None,
force: bool = False,
) -> bool:
"""
Write the same mosaic EXIF as the scraper, using ``metadata.json`` in
``scan_dir`` and optional ``machine_metadata`` from ``config``.
Returns False if tagging was skipped (e.g. ``write_exif`` disabled and not
``force``). Raises ``ValueError`` for invalid paths or JPEG suffix.
``force=True`` tags even when ``config['write_exif']`` is false (e.g. stitch
script ``--write-exif``).
"""
scan_dir = scan_dir.resolve()
jpeg_path = jpeg_path.resolve()
suffix = jpeg_path.suffix.lower()
if suffix not in (".jpg", ".jpeg"):
raise ValueError(
f"EXIF tagging only supports JPEG files; got suffix {jpeg_path.suffix!r}: {jpeg_path}"
)
if not force and not config.get("write_exif", True):
log.warning(
"Skipping EXIF: write_exif is false in config (use force=True to override)."
)
return False
meta_path = scan_dir / "metadata.json"
if not meta_path.is_file():
raise ValueError(f"Missing metadata.json: {meta_path}")
with meta_path.open(encoding="utf-8") as fh:
scan_meta: dict[str, Any] = json.load(fh)
scan_id = int(scan_meta["scan_id"])
if scan_dir.name != str(scan_id):
raise ValueError(
f"scan_dir must be the scan id folder (got name {scan_dir.name!r}, "
f"metadata scan_id={scan_id}). Expected …/<date>/{scan_id}/"
)
label = resolve_machine_label_for_scan_dir(scan_dir, config, machine_label)
machine: dict[str, Any] = {"label": label, "version": ""}
mmeta = (config.get("machine_metadata") or {}).get(label)
proc = processing_software or DEFAULT_PROCESSING_SOFTWARE_MOSAIC_FROM_TILES
return write_mosaic_exif(
jpeg_path,
scan_meta,
machine,
scan_id,
mmeta,
processing_software=proc,
)
+12 -5
View File
@@ -99,13 +99,20 @@ def _clamp_workers(n: int) -> int:
# ---------------------------------------------------------------------------
def load_config(path: str) -> dict:
"""Load and validate config.yaml. Exits on missing required fields."""
def load_config(path: str, *, require_credentials: bool = True) -> dict:
"""Load and validate config.yaml. Exits on missing required fields.
With ``require_credentials=False`` (e.g. offline EXIF tagging), username and
password are not required; only fields needed for tagging are defaulted.
"""
with open(path) as fh:
cfg = yaml.safe_load(fh)
missing = [k for k in ("username", "password") if not cfg.get(k)]
if missing:
sys.exit(f"Config {path} is missing required fields: {missing}")
if cfg is None:
cfg = {}
if require_credentials:
missing = [k for k in ("username", "password") if not cfg.get(k)]
if missing:
sys.exit(f"Config {path} is missing required fields: {missing}")
cfg.setdefault("base_url", "http://205.149.147.131:8010/")
cfg.setdefault("image_base_url", "http://205.149.147.131:8011/")
cfg.setdefault("output_dir", "archives")