Files
SPRUCE-scraper/tests/test_paths.py
poprhythm e122f6435a Initial commit
Add spruce scraper with CLI, session management, parsers, progress tracking,
recheck logic, and test suite. Includes example config and README.
2026-04-22 10:41:18 -04:00

103 lines
3.0 KiB
Python

"""Tests for spruce.paths — pure path helpers, no network."""
from pathlib import Path
import pytest
from spruce.paths import _extract_date, machine_dir_name, mosaic_dest, tile_dest
MACHINE = {"label": "BW3-20 [AMR-26]", "machine_id": "26"}
SCAN_META = {"scan_time": "2024-07-29 05:00", "nx": 103, "ny": 328}
TILE = {"scan_id": 158374, "row_index": 0, "col_index": 2}
# ---------------------------------------------------------------------------
# machine_dir_name
# ---------------------------------------------------------------------------
def test_machine_dir_name_sanitises_brackets():
name = machine_dir_name({"label": "BW3-20 [AMR-26]"})
# Brackets and spaces replaced by underscores
assert "[" not in name
assert "]" not in name
assert " " not in name
def test_machine_dir_name_no_leading_trailing_underscores():
name = machine_dir_name({"label": "BW3-20 [AMR-26]"})
assert not name.startswith("_")
assert not name.endswith("_")
def test_machine_dir_name_stable():
# Same label should always produce the same dir name
assert machine_dir_name(MACHINE) == machine_dir_name(MACHINE)
# ---------------------------------------------------------------------------
# _extract_date
# ---------------------------------------------------------------------------
def test_extract_date_standard():
assert _extract_date("2024-07-29 05:00:00") == "2024-07-29"
def test_extract_date_date_only():
assert _extract_date("2024-07-29") == "2024-07-29"
def test_extract_date_no_date():
assert _extract_date("no date here") == "unknown"
def test_extract_date_empty():
assert _extract_date("") == "unknown"
# ---------------------------------------------------------------------------
# tile_dest
# ---------------------------------------------------------------------------
def test_tile_dest_structure(tmp_path):
dest = tile_dest(tmp_path, MACHINE, SCAN_META, TILE)
parts = dest.parts
assert str(TILE["scan_id"]) in parts
assert "tiles" in parts
assert dest.suffix == ".jpg"
assert "tile_r" in dest.name
def test_tile_dest_zero_padded(tmp_path):
# ny=328 → row index needs 3 digits; nx=103 → col index needs 3 digits
dest = tile_dest(tmp_path, MACHINE, SCAN_META, TILE)
# row_index=0 padded to 3 digits (max is 327) → "000"
assert "tile_r000_c" in dest.name
def test_tile_dest_contains_date(tmp_path):
dest = tile_dest(tmp_path, MACHINE, SCAN_META, TILE)
assert "2024-07-29" in str(dest)
# ---------------------------------------------------------------------------
# mosaic_dest
# ---------------------------------------------------------------------------
def test_mosaic_dest_filename(tmp_path):
dest = mosaic_dest(tmp_path, MACHINE, SCAN_META, 158374)
assert dest.name == "mosaic.jpg"
def test_mosaic_dest_contains_scan_id(tmp_path):
dest = mosaic_dest(tmp_path, MACHINE, SCAN_META, 158374)
assert "158374" in str(dest)
def test_mosaic_dest_contains_date(tmp_path):
dest = mosaic_dest(tmp_path, MACHINE, SCAN_META, 158374)
assert "2024-07-29" in str(dest)