e122f6435a
Add spruce scraper with CLI, session management, parsers, progress tracking, recheck logic, and test suite. Includes example config and README.
63 lines
1.6 KiB
Python
63 lines
1.6 KiB
Python
"""
|
|
Pure path-helper functions — no network, no JSON, no progress state.
|
|
"""
|
|
|
|
import re
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
|
|
def machine_dir_name(machine: dict[str, Any]) -> str:
|
|
"""Sanitise machine label for use as a directory name."""
|
|
return re.sub(r"[^\w\-.]", "_", machine["label"]).strip("_")
|
|
|
|
|
|
def _extract_date(dt_str: str) -> str:
|
|
"""Pull YYYY-MM-DD from a datetime string, fall back to 'unknown'."""
|
|
m = re.search(r"(\d{4}-\d{2}-\d{2})", dt_str)
|
|
return m.group(1) if m else "unknown"
|
|
|
|
|
|
def tile_dest(
|
|
output_dir: Path,
|
|
machine: dict[str, Any],
|
|
scan_meta: dict[str, Any],
|
|
tile: dict[str, Any],
|
|
) -> Path:
|
|
"""Return the local path for a single tile file."""
|
|
scan_date = _extract_date(scan_meta.get("scan_time", ""))
|
|
scan_id = tile["scan_id"]
|
|
ny = scan_meta.get("ny", 1)
|
|
nx = scan_meta.get("nx", 1)
|
|
row_width = len(str(ny - 1)) if ny > 1 else 1
|
|
col_width = len(str(nx - 1)) if nx > 1 else 1
|
|
filename = (
|
|
f"tile_r{tile['row_index']:0{row_width}d}"
|
|
f"_c{tile['col_index']:0{col_width}d}.jpg"
|
|
)
|
|
return (
|
|
output_dir
|
|
/ machine_dir_name(machine)
|
|
/ scan_date
|
|
/ str(scan_id)
|
|
/ "tiles"
|
|
/ filename
|
|
)
|
|
|
|
|
|
def mosaic_dest(
|
|
output_dir: Path,
|
|
machine: dict[str, Any],
|
|
scan_meta: dict[str, Any],
|
|
scan_id: int,
|
|
) -> Path:
|
|
"""Return the local path for a scan's mosaic file."""
|
|
scan_date = _extract_date(scan_meta.get("scan_time", ""))
|
|
return (
|
|
output_dir
|
|
/ machine_dir_name(machine)
|
|
/ scan_date
|
|
/ str(scan_id)
|
|
/ "mosaic.jpg"
|
|
)
|