Add offline mosaic EXIF tagging (stitch --write-exif, tag_mosaic_exif CLI)

- spruce.exif: tag_mosaic_jpeg_for_scan_dir, resolve_machine_label_for_scan_dir; ProcessingSoftware for tile-stitched mosaics
- spruce.settings: load_config(require_credentials=False) for config without login
- scripts/tag_mosaic_exif.py and tests; stitch script --write-exif path
This commit is contained in:
2026-04-26 20:47:23 -04:00
parent 314b68322c
commit 08a29d124a
6 changed files with 424 additions and 5 deletions
+31
View File
@@ -26,6 +26,9 @@ if str(_REPO_ROOT) not in sys.path:
from PIL import Image, ImageChops from PIL import Image, ImageChops
from spruce.exif import tag_mosaic_jpeg_for_scan_dir
from spruce.settings import DEFAULT_CONFIG, load_config
TILE_FILENAME_RE = re.compile(r"tile_r(\d+)_c(\d+)\.jpg$", re.IGNORECASE) TILE_FILENAME_RE = re.compile(r"tile_r(\d+)_c(\d+)\.jpg$", re.IGNORECASE)
@@ -281,6 +284,20 @@ def main() -> None:
default=95, default=95,
help="JPEG quality for output (default: 95).", help="JPEG quality for output (default: 95).",
) )
parser.add_argument(
"--write-exif",
action="store_true",
help=(
"After saving, write mosaic EXIF using metadata.json and config "
"(implies override if write_exif is false in config). JPEG output only."
),
)
parser.add_argument(
"--config",
default=DEFAULT_CONFIG,
metavar="FILE",
help=f"YAML config for EXIF machine_metadata (default: {DEFAULT_CONFIG})",
)
parser.add_argument( parser.add_argument(
"--tile-gap", "--tile-gap",
type=int, type=int,
@@ -339,6 +356,20 @@ def main() -> None:
f"grid {nx}x{ny}, tile_gap={args.tile_gap})" f"grid {nx}x{ny}, tile_gap={args.tile_gap})"
) )
if args.write_exif:
if out.suffix.lower() not in (".jpg", ".jpeg"):
raise SystemExit("--write-exif requires a .jpg or .jpeg output path.")
cfg_path = Path(args.config).expanduser()
if not cfg_path.is_file():
raise SystemExit(f"Config not found: {cfg_path}")
config = load_config(str(cfg_path), require_credentials=False)
try:
ok = tag_mosaic_jpeg_for_scan_dir(scan_dir, out, config, force=True)
except ValueError as exc:
raise SystemExit(str(exc)) from exc
if not ok:
raise SystemExit("EXIF tagging failed (see log).")
if args.compare_mosaic: if args.compare_mosaic:
compare_mosaics(canvas, scan_dir / "mosaic.jpg", fit=args.fit) compare_mosaics(canvas, scan_dir / "mosaic.jpg", fit=args.fit)
+112
View File
@@ -0,0 +1,112 @@
#!/usr/bin/env python3
"""
Write mosaic EXIF into a JPEG using metadata.json and config machine_metadata.
Usage:
python scripts/tag_mosaic_exif.py /path/to/scan_dir
python scripts/tag_mosaic_exif.py /path/to/scan_dir --jpeg mosaic.jpg --machine "BW1-6 [AMR-19]"
"""
from __future__ import annotations
import argparse
import logging
import sys
from pathlib import Path
_REPO_ROOT = Path(__file__).resolve().parent.parent
if str(_REPO_ROOT) not in sys.path:
sys.path.insert(0, str(_REPO_ROOT))
from spruce.exif import tag_mosaic_jpeg_for_scan_dir
from spruce.settings import DEFAULT_CONFIG, load_config
log = logging.getLogger(__name__)
def main() -> None:
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)-8s %(message)s",
datefmt="%H:%M:%S",
)
parser = argparse.ArgumentParser(
description=(
"Insert RootView-style mosaic EXIF (piexif, no re-encode) using "
"scan_dir/metadata.json and machine_metadata from config."
)
)
parser.add_argument(
"scan_dir",
type=Path,
help="Directory containing metadata.json (…/<machine>/<date>/<scan_id>/)",
)
parser.add_argument(
"--jpeg",
type=Path,
default=None,
help="JPEG to tag (default: <scan_dir>/mosaic_reconstructed.jpg)",
)
parser.add_argument(
"--config",
default=DEFAULT_CONFIG,
metavar="FILE",
help=f"YAML config (default: {DEFAULT_CONFIG})",
)
parser.add_argument(
"--machine",
metavar="LABEL",
default=None,
help='RootView machine label, e.g. "BW1-6 [AMR-19]" (skip archive slug inference)',
)
parser.add_argument(
"--force",
action="store_true",
help="Tag even when write_exif is false in config",
)
parser.add_argument(
"--processing-software",
default=None,
metavar="STR",
help="Override ProcessingSoftware EXIF string",
)
args = parser.parse_args()
scan_dir = args.scan_dir.expanduser().resolve()
if not scan_dir.is_dir():
sys.exit(f"Not a directory: {scan_dir}")
jpeg = args.jpeg
if jpeg is None:
jpeg = scan_dir / "mosaic_reconstructed.jpg"
else:
jpeg = jpeg.expanduser().resolve()
if not jpeg.is_file():
sys.exit(f"JPEG not found: {jpeg}")
cfg_path = Path(args.config).expanduser()
if not cfg_path.is_file():
sys.exit(f"Config not found: {cfg_path}")
config = load_config(str(cfg_path), require_credentials=False)
try:
ok = tag_mosaic_jpeg_for_scan_dir(
scan_dir,
jpeg,
config,
machine_label=args.machine,
processing_software=args.processing_software,
force=args.force,
)
except ValueError as exc:
sys.exit(str(exc))
if not ok:
sys.exit("EXIF tagging was skipped or failed (see log).")
log.info("EXIF written: %s", jpeg)
if __name__ == "__main__":
main()
+95
View File
@@ -2,18 +2,24 @@
Write EXIF metadata into downloaded mosaic JPEGs (piexif, no re-encode). Write EXIF metadata into downloaded mosaic JPEGs (piexif, no re-encode).
""" """
import json
import logging import logging
import re import re
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any
import piexif import piexif
from spruce.paths import machine_dir_name
from piexif import ExifIFD, GPSIFD, ImageIFD from piexif import ExifIFD, GPSIFD, ImageIFD
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
USER_COMMENT_ASCII = b"ASCII\x00\x00\x00" USER_COMMENT_ASCII = b"ASCII\x00\x00\x00"
# ProcessingSoftware for mosaics stitched from tiles (distinct from server download path).
DEFAULT_PROCESSING_SOFTWARE_MOSAIC_FROM_TILES = "spruce-scraper/1.0 mosaic_from_tiles"
def _fmt_exif_datetime(scan_time: str) -> str: def _fmt_exif_datetime(scan_time: str) -> str:
"""`YYYY-MM-DD HH:MM:SS` -> `YYYY:MM:DD HH:MM:SS` for EXIF; empty on failure.""" """`YYYY-MM-DD HH:MM:SS` -> `YYYY:MM:DD HH:MM:SS` for EXIF; empty on failure."""
@@ -252,3 +258,92 @@ def write_mosaic_exif(
log.warning("EXIF write failed for %s: %s", jpeg_path, exc) log.warning("EXIF write failed for %s: %s", jpeg_path, exc)
return False return False
return True return True
def resolve_machine_label_for_scan_dir(
scan_dir: Path,
config: dict[str, Any],
machine_label: str | None,
) -> str:
"""
Return the RootView machine label for EXIF.
If ``machine_label`` is set, it is returned stripped. Otherwise the parent
of ``scan_dir`` is walked (archive slug under ``machine_dir_name``) and
matched uniquely against keys in ``config['machine_metadata']``.
"""
if machine_label is not None and str(machine_label).strip() != "":
return str(machine_label).strip()
slug = scan_dir.parent.parent.name
meta = config.get("machine_metadata") or {}
matches = [lbl for lbl in meta if machine_dir_name({"label": lbl}) == slug]
if len(matches) == 1:
return matches[0]
if not matches:
raise ValueError(
f"Could not map archive folder {slug!r} to a machine label. "
f"Add it under machine_metadata in config or pass machine_label explicitly."
)
raise ValueError(
f"Ambiguous archive folder {slug!r}: multiple machine_metadata keys "
f"match ({matches!r}). Pass machine_label explicitly."
)
def tag_mosaic_jpeg_for_scan_dir(
scan_dir: Path,
jpeg_path: Path,
config: dict[str, Any],
*,
machine_label: str | None = None,
processing_software: str | None = None,
force: bool = False,
) -> bool:
"""
Write the same mosaic EXIF as the scraper, using ``metadata.json`` in
``scan_dir`` and optional ``machine_metadata`` from ``config``.
Returns False if tagging was skipped (e.g. ``write_exif`` disabled and not
``force``). Raises ``ValueError`` for invalid paths or JPEG suffix.
``force=True`` tags even when ``config['write_exif']`` is false (e.g. stitch
script ``--write-exif``).
"""
scan_dir = scan_dir.resolve()
jpeg_path = jpeg_path.resolve()
suffix = jpeg_path.suffix.lower()
if suffix not in (".jpg", ".jpeg"):
raise ValueError(
f"EXIF tagging only supports JPEG files; got suffix {jpeg_path.suffix!r}: {jpeg_path}"
)
if not force and not config.get("write_exif", True):
log.warning(
"Skipping EXIF: write_exif is false in config (use force=True to override)."
)
return False
meta_path = scan_dir / "metadata.json"
if not meta_path.is_file():
raise ValueError(f"Missing metadata.json: {meta_path}")
with meta_path.open(encoding="utf-8") as fh:
scan_meta: dict[str, Any] = json.load(fh)
scan_id = int(scan_meta["scan_id"])
if scan_dir.name != str(scan_id):
raise ValueError(
f"scan_dir must be the scan id folder (got name {scan_dir.name!r}, "
f"metadata scan_id={scan_id}). Expected …/<date>/{scan_id}/"
)
label = resolve_machine_label_for_scan_dir(scan_dir, config, machine_label)
machine: dict[str, Any] = {"label": label, "version": ""}
mmeta = (config.get("machine_metadata") or {}).get(label)
proc = processing_software or DEFAULT_PROCESSING_SOFTWARE_MOSAIC_FROM_TILES
return write_mosaic_exif(
jpeg_path,
scan_meta,
machine,
scan_id,
mmeta,
processing_software=proc,
)
+12 -5
View File
@@ -99,13 +99,20 @@ def _clamp_workers(n: int) -> int:
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
def load_config(path: str) -> dict: def load_config(path: str, *, require_credentials: bool = True) -> dict:
"""Load and validate config.yaml. Exits on missing required fields.""" """Load and validate config.yaml. Exits on missing required fields.
With ``require_credentials=False`` (e.g. offline EXIF tagging), username and
password are not required; only fields needed for tagging are defaulted.
"""
with open(path) as fh: with open(path) as fh:
cfg = yaml.safe_load(fh) cfg = yaml.safe_load(fh)
missing = [k for k in ("username", "password") if not cfg.get(k)] if cfg is None:
if missing: cfg = {}
sys.exit(f"Config {path} is missing required fields: {missing}") if require_credentials:
missing = [k for k in ("username", "password") if not cfg.get(k)]
if missing:
sys.exit(f"Config {path} is missing required fields: {missing}")
cfg.setdefault("base_url", "http://205.149.147.131:8010/") cfg.setdefault("base_url", "http://205.149.147.131:8010/")
cfg.setdefault("image_base_url", "http://205.149.147.131:8011/") cfg.setdefault("image_base_url", "http://205.149.147.131:8011/")
cfg.setdefault("output_dir", "archives") cfg.setdefault("output_dir", "archives")
+164
View File
@@ -0,0 +1,164 @@
"""Tests for offline mosaic EXIF tagging (tag_mosaic_jpeg_for_scan_dir, resolver)."""
from __future__ import annotations
import json
import logging
import os
import shutil
import subprocess
import sys
from pathlib import Path
import piexif
import pytest
from piexif import ImageIFD
from spruce.exif import (
DEFAULT_PROCESSING_SOFTWARE_MOSAIC_FROM_TILES,
resolve_machine_label_for_scan_dir,
tag_mosaic_jpeg_for_scan_dir,
)
from spruce.paths import machine_dir_name
FIXTURES = Path(__file__).parent / "fixtures"
BLANK_JPEG = FIXTURES / "blank.jpg"
ROOT = Path(__file__).resolve().parents[1]
TAG_SCRIPT = ROOT / "scripts" / "tag_mosaic_exif.py"
def _scan_tree(tmp_path: Path, *, machine_label: str, scan_id: int) -> Path:
slug = machine_dir_name({"label": machine_label})
scan_dir = tmp_path / "archives" / slug / "2024-01-01" / str(scan_id)
scan_dir.mkdir(parents=True)
meta = {
"scan_id": scan_id,
"name": "Test scan",
"scan_time": "2024-06-28 11:00:00",
"user": "Tester",
"nx": 2,
"ny": 2,
"dx": 1.0,
"dy": 1.0,
"end_x": 10.0,
"end_y": 10.0,
}
(scan_dir / "metadata.json").write_text(json.dumps(meta), encoding="utf-8")
return scan_dir
def test_resolve_machine_label_from_slug_unique(tmp_path: Path) -> None:
label = "BW1-7 [AMR-18]"
scan_dir = _scan_tree(tmp_path, machine_label=label, scan_id=42)
cfg = {"machine_metadata": {label: {"plot_number": 7}}}
assert resolve_machine_label_for_scan_dir(scan_dir, cfg, None) == label
def test_resolve_machine_label_explicit_overrides_slug(tmp_path: Path) -> None:
scan_dir = _scan_tree(tmp_path, machine_label="BW1-7 [AMR-18]", scan_id=1)
cfg = {"machine_metadata": {"Other [X]": {}}}
assert (
resolve_machine_label_for_scan_dir(scan_dir, cfg, "Custom [Label]")
== "Custom [Label]"
)
def test_resolve_machine_label_no_match_raises(tmp_path: Path) -> None:
scan_dir = _scan_tree(tmp_path, machine_label="BW1-7 [AMR-18]", scan_id=1)
cfg = {"machine_metadata": {}}
with pytest.raises(ValueError, match="Could not map"):
resolve_machine_label_for_scan_dir(scan_dir, cfg, None)
def test_tag_mosaic_jpeg_for_scan_dir_writes_exif(tmp_path: Path) -> None:
label = "BW1-7 [AMR-18]"
scan_dir = _scan_tree(tmp_path, machine_label=label, scan_id=99)
jpeg = tmp_path / "out.jpg"
shutil.copy(BLANK_JPEG, jpeg)
cfg = {"write_exif": True, "machine_metadata": {label: {}}}
assert tag_mosaic_jpeg_for_scan_dir(scan_dir, jpeg, cfg, force=True)
exif = piexif.load(str(jpeg))
assert (
exif["0th"][ImageIFD.ProcessingSoftware]
== DEFAULT_PROCESSING_SOFTWARE_MOSAIC_FROM_TILES.encode("ascii")
)
assert b"SPRUCE scan 99" in exif["Exif"][piexif.ExifIFD.UserComment]
def test_tag_mosaic_skipped_when_write_exif_false_without_force(
tmp_path: Path, caplog: pytest.LogCaptureFixture
) -> None:
label = "BW1-7 [AMR-18]"
scan_dir = _scan_tree(tmp_path, machine_label=label, scan_id=1)
jpeg = tmp_path / "out.jpg"
shutil.copy(BLANK_JPEG, jpeg)
cfg = {"write_exif": False, "machine_metadata": {label: {}}}
with caplog.at_level(logging.WARNING):
ok = tag_mosaic_jpeg_for_scan_dir(scan_dir, jpeg, cfg, force=False)
assert not ok
assert "Skipping EXIF" in caplog.text
def test_tag_mosaic_scan_dir_name_mismatch_raises(tmp_path: Path) -> None:
label = "BW1-7 [AMR-18]"
scan_dir = _scan_tree(tmp_path, machine_label=label, scan_id=100)
bad = scan_dir.parent / "wrong_name"
shutil.move(str(scan_dir), str(bad))
jpeg = tmp_path / "out.jpg"
shutil.copy(BLANK_JPEG, jpeg)
cfg = {"machine_metadata": {label: {}}}
with pytest.raises(ValueError, match="scan_dir must be the scan id folder"):
tag_mosaic_jpeg_for_scan_dir(bad, jpeg, cfg, force=True)
def test_tag_mosaic_non_jpeg_raises(tmp_path: Path) -> None:
label = "BW1-7 [AMR-18]"
scan_dir = _scan_tree(tmp_path, machine_label=label, scan_id=1)
png = tmp_path / "out.png"
png.write_bytes(b"not really png")
cfg = {"machine_metadata": {label: {}}}
with pytest.raises(ValueError, match="only supports JPEG"):
tag_mosaic_jpeg_for_scan_dir(scan_dir, png, cfg, force=True)
def test_tag_mosaic_second_run_replaces_exif(tmp_path: Path) -> None:
label = "BW1-7 [AMR-18]"
scan_dir = _scan_tree(tmp_path, machine_label=label, scan_id=5)
jpeg = tmp_path / "out.jpg"
shutil.copy(BLANK_JPEG, jpeg)
cfg = {"machine_metadata": {label: {}}}
assert tag_mosaic_jpeg_for_scan_dir(scan_dir, jpeg, cfg, force=True)
assert tag_mosaic_jpeg_for_scan_dir(
scan_dir, jpeg, cfg, force=True, processing_software="custom/2.0"
)
exif = piexif.load(str(jpeg))
assert exif["0th"][ImageIFD.ProcessingSoftware] == b"custom/2.0"
def test_tag_mosaic_exif_script_cli(tmp_path: Path) -> None:
label = "BW1-6 [AMR-19]"
scan_dir = _scan_tree(tmp_path, machine_label=label, scan_id=156875)
jpeg = scan_dir / "mosaic_reconstructed.jpg"
shutil.copy(BLANK_JPEG, jpeg)
cfg = tmp_path / "mini.yaml"
cfg.write_text(
f"write_exif: true\nmachine_metadata:\n {label!r}: {{}}\n",
encoding="utf-8",
)
env = {**os.environ, "PYTHONPATH": str(ROOT)}
r = subprocess.run(
[
sys.executable,
str(TAG_SCRIPT),
str(scan_dir),
"--config",
str(cfg),
],
cwd=str(ROOT),
capture_output=True,
text=True,
env=env,
)
assert r.returncode == 0, r.stderr + r.stdout
exif = piexif.load(str(jpeg))
assert b"SPRUCE scan 156875" in exif["Exif"][piexif.ExifIFD.UserComment]
+10
View File
@@ -95,6 +95,16 @@ def test_load_config_missing_password_exits(tmp_path):
load_config(str(path)) load_config(str(path))
def test_load_config_optional_credentials(tmp_path):
path = tmp_path / "config.yaml"
path.write_text(
yaml.dump({"machine_metadata": {"A [B]": {"plot_number": 2}}})
)
cfg = load_config(str(path), require_credentials=False)
assert cfg["machine_metadata"]["A [B]"]["plot_number"] == 2
assert cfg["write_exif"] is True
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# CSV schemas (failure columns) # CSV schemas (failure columns)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------