Files
poprhythm e122f6435a Initial commit
Add spruce scraper with CLI, session management, parsers, progress tracking,
recheck logic, and test suite. Includes example config and README.
2026-04-22 10:41:18 -04:00

63 lines
1.6 KiB
Python

"""
Pure path-helper functions — no network, no JSON, no progress state.
"""
import re
from pathlib import Path
from typing import Any
def machine_dir_name(machine: dict[str, Any]) -> str:
"""Sanitise machine label for use as a directory name."""
return re.sub(r"[^\w\-.]", "_", machine["label"]).strip("_")
def _extract_date(dt_str: str) -> str:
"""Pull YYYY-MM-DD from a datetime string, fall back to 'unknown'."""
m = re.search(r"(\d{4}-\d{2}-\d{2})", dt_str)
return m.group(1) if m else "unknown"
def tile_dest(
output_dir: Path,
machine: dict[str, Any],
scan_meta: dict[str, Any],
tile: dict[str, Any],
) -> Path:
"""Return the local path for a single tile file."""
scan_date = _extract_date(scan_meta.get("scan_time", ""))
scan_id = tile["scan_id"]
ny = scan_meta.get("ny", 1)
nx = scan_meta.get("nx", 1)
row_width = len(str(ny - 1)) if ny > 1 else 1
col_width = len(str(nx - 1)) if nx > 1 else 1
filename = (
f"tile_r{tile['row_index']:0{row_width}d}"
f"_c{tile['col_index']:0{col_width}d}.jpg"
)
return (
output_dir
/ machine_dir_name(machine)
/ scan_date
/ str(scan_id)
/ "tiles"
/ filename
)
def mosaic_dest(
output_dir: Path,
machine: dict[str, Any],
scan_meta: dict[str, Any],
scan_id: int,
) -> Path:
"""Return the local path for a scan's mosaic file."""
scan_date = _extract_date(scan_meta.get("scan_time", ""))
return (
output_dir
/ machine_dir_name(machine)
/ scan_date
/ str(scan_id)
/ "mosaic.jpg"
)