#!/usr/bin/env python3 """ Build preview MP4s for the top N ROIs per machine (default N=2, max-height 1080). Reads archives/scans.csv once, groups on-disk mosaic rows by machine, then for each machine picks the most frequent ROI extents and calls encode_movie(). Usage: python scripts/build_mosaic_movies_batch.py python scripts/build_mosaic_movies_batch.py --dry-run python scripts/build_mosaic_movies_batch.py --skip-existing python scripts/build_mosaic_movies_batch.py --machine "BW2-10 [AMR-22]" python scripts/build_mosaic_movies_batch.py --full-res # no max-height cap """ from __future__ import annotations import argparse import sys import time from dataclasses import dataclass from pathlib import Path _SCRIPTS_DIR = Path(__file__).resolve().parent # Import sibling module (run as python scripts/build_mosaic_movies_batch.py from repo root) sys.path.insert(0, str(_SCRIPTS_DIR)) import build_mosaic_movie as bmm # noqa: E402 def read_machine_labels(path: Path) -> list[str]: out: list[str] = [] with path.open(encoding="utf-8") as fh: for line in fh: s = line.strip() if not s or s.startswith("#"): continue out.append(s) return out @dataclass class Job: machine: str rank: int roi: tuple[float, float, float, float] extent_count: int rows: list[dict] output_path: Path def collect_jobs( *, machines: list[str], by_machine: dict[str, list[dict]], archive: Path, top_rois: int, max_height: int | None, metadata_overlay: bool, ) -> list[Job]: jobs: list[Job] = [] for machine in machines: rows = by_machine.get(machine, []) if not rows: continue picks = bmm.pick_top_rois(rows, top_rois) for rank, (roi, extent_count) in enumerate(picks, start=1): rows_roi = [r for r in rows if bmm.extent_close(r, roi)] out = bmm.default_output_path( archive, machine, roi, max_height=max_height, metadata_overlay=metadata_overlay, rank=rank, ) jobs.append( Job( machine=machine, rank=rank, roi=roi, extent_count=extent_count, rows=rows_roi, output_path=out, ) ) return jobs def parse_args() -> argparse.Namespace: p = argparse.ArgumentParser(description=__doc__) p.add_argument( "--machines-file", type=Path, default=_SCRIPTS_DIR / "machines.example.txt", help="One machine label per line (default: scripts/machines.example.txt next to this script)", ) p.add_argument( "--machine", metavar="LABEL", help='If set, only this machine (overrides list to a single job set), e.g. "BW2-10 [AMR-22]"', ) p.add_argument("--archive", type=Path, default=Path("archives")) p.add_argument("--scans-csv", type=Path, default=None) p.add_argument("--top-rois", type=int, default=2, help="How many top extents per machine (default: 2)") p.add_argument("--max-height", type=int, default=1080, help="Preview cap in px (default: 1080)") p.add_argument( "--full-res", action="store_true", help="Disable max-height cap (full mosaic resolution; can be huge)", ) p.add_argument("--fps", type=float, default=10.0) p.add_argument("--dry-run", action="store_true") p.add_argument( "--skip-existing", action="store_true", help="Skip encode if output MP4 exists and is non-empty", ) p.add_argument("--no-metadata-overlay", action="store_true") args = p.parse_args() if args.full_res: args.max_height = None return args def main() -> None: args = parse_args() archive: Path = args.archive scans_csv: Path = args.scans_csv or (archive / "scans.csv") if not scans_csv.is_file(): sys.exit(f"scans.csv not found: {scans_csv}") if args.machine: machines = [args.machine.strip()] else: if not args.machines_file.is_file(): sys.exit(f"Machines file not found: {args.machines_file}") machines = read_machine_labels(args.machines_file) if not machines: sys.exit(f"No machine labels in {args.machines_file}") t_load0 = time.perf_counter() by_machine = bmm.load_on_disk_rows_by_machine(scans_csv) load_s = time.perf_counter() - t_load0 max_height: int | None = args.max_height metadata_overlay = not args.no_metadata_overlay jobs = collect_jobs( machines=machines, by_machine=by_machine, archive=archive, top_rois=args.top_rois, max_height=max_height, metadata_overlay=metadata_overlay, ) total = len(jobs) if total == 0: sys.exit("No jobs (no on-disk mosaics for selected machines).") print(f"Loaded scans.csv grouped by machine in {load_s:.2f}s ({total} job(s))") if max_height is not None: print(f"Max height: {max_height}px") else: print("Max height: (full resolution)") print(f"Metadata overlay: {'on' if metadata_overlay else 'off'}") print() summary_rows: list[tuple[str, ...]] = [] for idx, job in enumerate(jobs, start=1): sx, sy, ex, ey = job.roi roi_s = f"{sx},{sy}..{ex},{ey}" print( f"[{idx}/{total}] {job.machine} rank={job.rank} ROI {roi_s} " f"({job.extent_count} CSV rows this extent, {len(job.rows)} deduped rows)" ) if args.skip_existing and job.output_path.is_file() and job.output_path.stat().st_size > 0: sz = job.output_path.stat().st_size / (1024 * 1024) print(f" SKIP (exists): {job.output_path}") summary_rows.append( ( job.machine, str(job.rank), roi_s, str(len(job.rows)), "-", "-", str(job.output_path), "SKIP (exists)", f"{sz:.2f}", "-", ) ) continue enc_t0 = time.perf_counter() res = bmm.encode_movie( machine=job.machine, roi=job.roi, rows=job.rows, archive=archive, max_height=max_height, metadata_overlay=metadata_overlay, fps=float(args.fps), output=None, dry_run=args.dry_run, rank=job.rank, quiet=True, ) enc_elapsed = time.perf_counter() - enc_t0 if res.success: if args.dry_run: print(f" dry-run OK -> {res.output_path} (missing files: {res.missing})") else: print( f" OK {res.written} frames " f"{(res.size_mb or 0):.2f} MB {enc_elapsed:.1f}s" ) else: print(f" FAIL {res.skipped_reason or 'unknown'}") status = "OK" if res.success else "FAIL" if not res.success and res.skipped_reason: status = f"FAIL: {res.skipped_reason[:40]}" mb = f"{res.size_mb:.2f}" if res.size_mb is not None else "-" es = f"{enc_elapsed:.1f}" if not args.dry_run else "-" w = str(res.written) if not args.dry_run else "0" if args.dry_run: status = "dry-run" mb = "-" summary_rows.append( ( job.machine, str(job.rank), roi_s, str(len(job.rows)), w, str(res.missing), str(res.output_path or job.output_path), status, mb, es, ) ) print() print("=" * 120) hdr = ( f"{'machine':<26} {'rk':>2} {'ROI (mm)':<36} {'csv':>4} {'out':>5} " f"{'miss':>4} {'MB':>7} {'s':>6} {'status':<22}" ) print(hdr) print("-" * 120) for row in summary_rows: m, rk, roi_s, csv_n, wrt, miss, path, status, mb, es = row print( f"{m:<26} {rk:>2} {roi_s:<36} {csv_n:>4} {wrt:>5} {miss:>4} " f"{mb:>7} {es:>6} {status:<22}" ) print(f" -> {path}") print("=" * 120) if __name__ == "__main__": main()