Initial commit
Add spruce scraper with CLI, session management, parsers, progress tracking, recheck logic, and test suite. Includes example config and README.
This commit is contained in:
@@ -0,0 +1,62 @@
|
||||
"""
|
||||
Pure path-helper functions — no network, no JSON, no progress state.
|
||||
"""
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
||||
def machine_dir_name(machine: dict[str, Any]) -> str:
|
||||
"""Sanitise machine label for use as a directory name."""
|
||||
return re.sub(r"[^\w\-.]", "_", machine["label"]).strip("_")
|
||||
|
||||
|
||||
def _extract_date(dt_str: str) -> str:
|
||||
"""Pull YYYY-MM-DD from a datetime string, fall back to 'unknown'."""
|
||||
m = re.search(r"(\d{4}-\d{2}-\d{2})", dt_str)
|
||||
return m.group(1) if m else "unknown"
|
||||
|
||||
|
||||
def tile_dest(
|
||||
output_dir: Path,
|
||||
machine: dict[str, Any],
|
||||
scan_meta: dict[str, Any],
|
||||
tile: dict[str, Any],
|
||||
) -> Path:
|
||||
"""Return the local path for a single tile file."""
|
||||
scan_date = _extract_date(scan_meta.get("scan_time", ""))
|
||||
scan_id = tile["scan_id"]
|
||||
ny = scan_meta.get("ny", 1)
|
||||
nx = scan_meta.get("nx", 1)
|
||||
row_width = len(str(ny - 1)) if ny > 1 else 1
|
||||
col_width = len(str(nx - 1)) if nx > 1 else 1
|
||||
filename = (
|
||||
f"tile_r{tile['row_index']:0{row_width}d}"
|
||||
f"_c{tile['col_index']:0{col_width}d}.jpg"
|
||||
)
|
||||
return (
|
||||
output_dir
|
||||
/ machine_dir_name(machine)
|
||||
/ scan_date
|
||||
/ str(scan_id)
|
||||
/ "tiles"
|
||||
/ filename
|
||||
)
|
||||
|
||||
|
||||
def mosaic_dest(
|
||||
output_dir: Path,
|
||||
machine: dict[str, Any],
|
||||
scan_meta: dict[str, Any],
|
||||
scan_id: int,
|
||||
) -> Path:
|
||||
"""Return the local path for a scan's mosaic file."""
|
||||
scan_date = _extract_date(scan_meta.get("scan_time", ""))
|
||||
return (
|
||||
output_dir
|
||||
/ machine_dir_name(machine)
|
||||
/ scan_date
|
||||
/ str(scan_id)
|
||||
/ "mosaic.jpg"
|
||||
)
|
||||
Reference in New Issue
Block a user