Add EXIF writing and machine metadata support

This commit is contained in:
2026-04-24 18:21:37 -04:00
parent f2193011ca
commit e8d3bf7180
11 changed files with 577 additions and 12 deletions
+15 -3
View File
@@ -297,13 +297,25 @@ def _print_summary(
log.info(sep)
log.info(row("Machines:", str(len(machines))))
log.info(
row("Scans fetched:", str(totals.scans_fetched),
row(
"Scans (metadata) fetched:",
str(totals.scans_fetched),
f"{totals.scans_skipped} already cached, "
f"{totals.scans_failed} failed"
if totals.scans_skipped or totals.scans_failed else "")
f"{totals.scans_failed} metadata failed"
if totals.scans_skipped or totals.scans_failed
else "",
)
)
if not metadata_only:
log.info(row("Mosaics downloaded:", str(totals.mosaics_downloaded)))
if totals.mosaics_failed:
log.info(
row(
"Mosaics failed:",
str(totals.mosaics_failed),
"0 bytes or HTTP error; see log above",
)
)
if not metadata_only and not mosaic_only:
log.info(row("Tiles downloaded:", str(totals.tiles_downloaded)))
if metadata_only:
+254
View File
@@ -0,0 +1,254 @@
"""
Write EXIF metadata into downloaded mosaic JPEGs (piexif, no re-encode).
"""
import logging
import re
from pathlib import Path
from typing import Any
import piexif
from piexif import ExifIFD, GPSIFD, ImageIFD
log = logging.getLogger(__name__)
USER_COMMENT_ASCII = b"ASCII\x00\x00\x00"
def _fmt_exif_datetime(scan_time: str) -> str:
"""`YYYY-MM-DD HH:MM:SS` -> `YYYY:MM:DD HH:MM:SS` for EXIF; empty on failure."""
m = re.match(
r"^(\d{4})-(\d{2})-(\d{2})\s+(\d{2}):(\d{2}):(\d{2})$", scan_time.strip()
)
if not m:
return ""
y, mo, d, h, mi, s = m.groups()
return f"{y}:{mo}:{d} {h}:{mi}:{s}"
def _fmt_dim(v: Any) -> str:
if v is None:
return "?"
if isinstance(v, float) and v == int(v):
return str(int(v))
return str(v)
def _fmt_machine_meta_scalar(v: Any) -> str:
if isinstance(v, float) and v == int(v):
return str(int(v))
if isinstance(v, float):
return format(v, "g")
return str(v).strip()
def _user_comment_treatment_suffix(machine_meta: dict[str, Any] | None) -> str:
if not machine_meta:
return ""
parts: list[str] = []
pn = machine_meta.get("plot_number")
if pn is not None and str(pn).strip() != "":
parts.append(f"plot_number {_fmt_machine_meta_scalar(pn)}")
if machine_meta.get("enclosure") is not None:
enc = machine_meta["enclosure"]
if isinstance(enc, bool):
parts.append("enclosure yes" if enc else "enclosure no")
else:
parts.append(f"enclosure {_fmt_machine_meta_scalar(enc)}")
if machine_meta.get("temp_treatment") is not None:
tt = machine_meta["temp_treatment"]
parts.append(f"temp_treatment {_fmt_machine_meta_scalar(tt)}")
if machine_meta.get("co2_treatment") is not None:
c = str(machine_meta["co2_treatment"]).strip().lower()
parts.append(f"co2_treatment {c}")
if not parts:
return ""
return " | " + " | ".join(parts)
def _build_user_comment(
scan_meta: dict[str, Any],
machine: dict[str, Any],
scan_id: int,
machine_meta: dict[str, Any] | None = None,
) -> bytes:
nx = scan_meta.get("nx", "?")
ny = scan_meta.get("ny", "?")
dx = _fmt_dim(scan_meta.get("dx"))
dy = _fmt_dim(scan_meta.get("dy"))
ex = _fmt_dim(scan_meta.get("end_x"))
ey = _fmt_dim(scan_meta.get("end_y"))
text = (
f"SPRUCE scan {scan_id} | machine {machine['label']} | "
f"grid {nx}x{ny} @ {dx}x{dy}mm over {ex}x{ey}mm | "
f"see metadata.json"
)
text += _user_comment_treatment_suffix(machine_meta)
return USER_COMMENT_ASCII + text.encode("ascii", errors="replace")
def _co2_keyword(co2: str) -> str:
c = (co2 or "").strip().lower()
if c == "ambient":
return "aCO2"
if c == "elevated":
return "eCO2"
return c or "co2"
def _enclosure_keyword(enclosure: Any) -> str | None:
if enclosure is None:
return None
if isinstance(enclosure, bool):
return "enclosed" if enclosure else "no enclosure"
s = str(enclosure).strip().lower()
if s in ("yes", "y", "true", "1"):
return "enclosed"
if s in ("no", "n", "false", "0"):
return "no enclosure"
return None
def _temp_treatment_keyword(temp: Any) -> str | None:
if temp is None or isinstance(temp, bool):
return None
if isinstance(temp, (int, float)):
w = float(temp)
s = f"{w:g}C" if w != int(w) else f"{int(w)}C"
return f"temp +{s}"
t = str(temp).strip()
if not t:
return None
try:
w = float(t)
s = f"{w:g}C" if w != int(w) else f"{int(w)}C"
return f"temp +{s}"
except (TypeError, ValueError):
return f"temp {t}"
def _build_xp_keywords(machine_meta: dict[str, Any] | None) -> bytes | None:
if not machine_meta:
return None
parts: list[str] = []
pn = machine_meta.get("plot_number")
if pn is not None and str(pn).strip() != "":
parts.append(f"plot {pn}")
enc = _enclosure_keyword(machine_meta.get("enclosure"))
if enc:
parts.append(enc)
if machine_meta.get("temp_treatment") is not None:
tk = _temp_treatment_keyword(machine_meta["temp_treatment"])
if tk:
parts.append(tk)
if machine_meta.get("co2_treatment") is not None:
parts.append(_co2_keyword(str(machine_meta["co2_treatment"])))
if not parts:
return None
text = "SPRUCE; " + "; ".join(parts)
return text.encode("utf-16le") + b"\x00\x00"
def _decimal_to_dms_rational(deg: float) -> list[tuple[tuple[int, int], ...]]:
abs_deg = abs(deg)
d = int(abs_deg)
t_min = (abs_deg - d) * 60.0
m = int(t_min)
s = (t_min - m) * 60.0
sec = round(s * 1_000_000)
return [(d, 1), (m, 1), (sec, 1_000_000)]
def write_mosaic_exif(
jpeg_path: Path,
scan_meta: dict[str, Any],
machine: dict[str, Any],
scan_id: int,
machine_meta: dict[str, Any] | None,
processing_software: str = "spruce-scraper/1.0",
) -> bool:
"""
Insert EXIF into a mosaic JPEG. Returns True on success.
On failure, logs a warning and returns False.
"""
try:
name = (scan_meta.get("name") or "").strip()
desc = f"{machine['label']} scan {scan_id}"
if name:
desc = f"{desc} ({name})"
desc_b = desc.encode("utf-8", errors="replace")
make_b = b"RootView"
ver = (machine.get("version") or "").strip()
software_b = f"RootView {ver}".encode("utf-8") if ver else b"RootView"
model_b = str(machine.get("label", "")).encode("utf-8", errors="replace")
proc_b = processing_software.encode("utf-8", errors="replace")
artist = (scan_meta.get("user") or "").strip()
artist_b = artist.encode("utf-8", errors="replace") if artist else b""
scan_time = (scan_meta.get("scan_time") or "").strip()
dt = _fmt_exif_datetime(scan_time)
dt_b = dt.encode("ascii") if dt else b""
zeroth: dict[int, Any] = {
ImageIFD.ImageDescription: desc_b,
ImageIFD.Make: make_b,
ImageIFD.Model: model_b,
ImageIFD.Software: software_b,
ImageIFD.ProcessingSoftware: proc_b,
}
if artist_b:
zeroth[ImageIFD.Artist] = artist_b
if dt_b:
zeroth[ImageIFD.DateTime] = dt_b
wp = _build_xp_keywords(machine_meta)
if wp is not None:
zeroth[ImageIFD.XPKeywords] = wp
exif_ifd: dict[int, Any] = {
ExifIFD.UserComment: _build_user_comment(
scan_meta, machine, scan_id, machine_meta
),
}
if dt:
bdt = dt.encode("ascii")
exif_ifd[ExifIFD.DateTimeOriginal] = bdt
exif_ifd[ExifIFD.DateTimeDigitized] = bdt
exif_dict: dict[str, Any] = {
"0th": zeroth,
"Exif": exif_ifd,
}
lat_raw = None if not machine_meta else machine_meta.get("latitude_wgs_84")
lon_raw = None if not machine_meta else machine_meta.get("longitude_wgs_84")
if machine_meta and lat_raw is not None and lon_raw is not None:
lat = float(lat_raw)
lon = float(lon_raw)
if not (-90 <= lat <= 90) or not (-180 <= lon <= 180):
log.warning("Invalid lat/lon for EXIF GPS, skipping GPS: %s", jpeg_path)
else:
gps: dict[int, Any] = {
GPSIFD.GPSVersionID: (2, 0, 0, 0),
GPSIFD.GPSLatitudeRef: b"N" if lat >= 0 else b"S",
GPSIFD.GPSLatitude: _decimal_to_dms_rational(abs(lat)),
GPSIFD.GPSLongitudeRef: b"E" if lon >= 0 else b"W",
GPSIFD.GPSLongitude: _decimal_to_dms_rational(abs(lon)),
}
if machine_meta.get("elevation_masl") is not None:
alt = float(machine_meta["elevation_masl"])
alt_abs = abs(alt)
gps[GPSIFD.GPSAltitudeRef] = 0 if alt >= 0 else 1
if alt_abs == int(alt_abs):
gps[GPSIFD.GPSAltitude] = (int(alt_abs), 1)
else:
num = round(alt_abs * 1000)
gps[GPSIFD.GPSAltitude] = (num, 1000)
exif_dict["GPS"] = gps
exif_bytes = piexif.dump(exif_dict)
piexif.insert(exif_bytes, str(jpeg_path))
except Exception as exc:
log.warning("EXIF write failed for %s: %s", jpeg_path, exc)
return False
return True
+29 -4
View File
@@ -14,11 +14,12 @@ from typing import Any
class RunStats:
"""Accumulated counters for one or more machines."""
scans_fetched: int = 0 # metadata fetched from server this run
scans_fetched: int = 0 # scan detail page fetched (metadata), not tiles/mosaics
scans_skipped: int = 0 # metadata.json already on disk; no HTTP request
scans_failed: int = 0 # fetch error or missing grid params
scans_failed: int = 0 # metadata fetch error or missing grid params
metadata_written: int = 0 # new metadata.json files created
mosaics_downloaded: int = 0
mosaics_failed: int = 0 # mosaic URL attempted but 0 bytes / HTTP error
tiles_downloaded: int = 0
def merge(self, other: "RunStats") -> None:
@@ -27,10 +28,12 @@ class RunStats:
self.scans_failed += other.scans_failed
self.metadata_written += other.metadata_written
self.mosaics_downloaded += other.mosaics_downloaded
self.mosaics_failed += other.mosaics_failed
self.tiles_downloaded += other.tiles_downloaded
from tqdm import tqdm
from spruce.exif import write_mosaic_exif
from spruce.paths import machine_dir_name, tile_dest, mosaic_dest, _extract_date
from spruce.progress import ProgressTracker, CsvWriter
from spruce.session import MachineSession
@@ -50,6 +53,7 @@ def _download_mosaic(
mosaic_path: Path,
progress: ProgressTracker,
machine: dict[str, Any],
config: dict[str, Any],
dry_run: bool,
) -> bool:
"""Download the scan mosaic if not already done. Returns True if downloaded."""
@@ -62,6 +66,13 @@ def _download_mosaic(
log.info("[%s] Downloading mosaic for scan %d", machine["label"], scan_id)
size = sess.download_file(url, mosaic_path)
if size:
if config.get("write_exif", True):
mmeta: dict[str, Any] | None = config.get("machine_metadata", {}).get(
machine["label"]
)
write_mosaic_exif(
mosaic_path, scan_meta, machine, scan_id, mmeta
)
progress.mark_done(url)
progress.save()
log.info(
@@ -283,10 +294,24 @@ def process_scan(
mosaic_just_downloaded = False
else:
mosaic_just_downloaded = _download_mosaic(
sess, scan_meta, scan_id, mosaic_path, progress, machine, dry_run
sess,
scan_meta,
scan_id,
mosaic_path,
progress,
machine,
config,
dry_run,
)
if mosaic_just_downloaded:
if not metadata_only and mosaic_just_downloaded:
stats.mosaics_downloaded += 1
elif (
not metadata_only
and not dry_run
and not mosaic_already_done
and not mosaic_just_downloaded
):
stats.mosaics_failed += 1
# Write scan-level CSV row only if this scan hasn't been recorded before.
if mosaic_already_done and not metadata_only:
+2
View File
@@ -105,5 +105,7 @@ def load_config(path: str) -> dict:
cfg.setdefault("timeout", 60)
cfg.setdefault("request_delay", 0.5)
cfg.setdefault("tile_scale", 1)
cfg.setdefault("write_exif", True)
cfg.setdefault("machine_metadata", {})
cfg["workers"] = _clamp_workers(cfg["workers"])
return cfg