""" Tests for spruce.recheck — synthetic archive tree under tmp_path. These tests verify the key improvement: a single --recheck pass is enough. Zero-byte tiles are deleted from disk AND their URLs removed from progress without needing a second pass. """ from pathlib import Path import pytest from spruce.progress import ProgressTracker from spruce.recheck import recheck_archive, recheck_tile_files BASE_URL = "http://192.0.2.1:8010/index.php" def _tile_url(scan_id: int, x: float, y: float) -> str: return f"{BASE_URL}?cmd=image&mode=image_scan&id={scan_id}&s=1&x={x}&y={y}" def _make_tile(path: Path, size: int = 1024) -> None: """Create a tile file. size=0 simulates a zero-byte / corrupt download.""" path.parent.mkdir(parents=True, exist_ok=True) path.write_bytes(b"\xff" * size) def _archive_tile_path(tmp_path: Path, scan_id: int, row: int, col: int) -> Path: return ( tmp_path / "BW3-20__AMR-26_" / "2024-07-29" / str(scan_id) / "tiles" / f"tile_r{row:03d}_c{col:03d}.jpg" ) # --------------------------------------------------------------------------- # recheck_tile_files # --------------------------------------------------------------------------- def test_recheck_tile_files_no_zero_bytes(tmp_path): p = ProgressTracker(tmp_path / ".progress.json") tile = _archive_tile_path(tmp_path, 158374, 0, 0) _make_tile(tile, size=1024) url = _tile_url(158374, 0.0, 0.0) p.mark_done(url) p.save() deleted = recheck_tile_files(tmp_path, p) assert deleted == 0 assert tile.exists() assert p.is_done(url) def test_recheck_tile_files_deletes_zero_byte(tmp_path): p = ProgressTracker(tmp_path / ".progress.json") tile = _archive_tile_path(tmp_path, 158374, 0, 0) _make_tile(tile, size=0) url = _tile_url(158374, 0.0, 0.0) p.mark_done(url) p.save() deleted = recheck_tile_files(tmp_path, p) assert deleted == 1 assert not tile.exists() def test_recheck_tile_files_single_pass_removes_url(tmp_path): """ The two-run wart is fixed: after recheck_tile_files the URL is already removed from progress — no second pass required. """ p = ProgressTracker(tmp_path / ".progress.json") tile = _archive_tile_path(tmp_path, 158374, 0, 0) _make_tile(tile, size=0) url = _tile_url(158374, 0.0, 0.0) p.mark_done(url) p.save() recheck_tile_files(tmp_path, p) # Reload progress from disk to confirm the change was persisted p2 = ProgressTracker(tmp_path / ".progress.json") assert not p2.is_done(url) def test_recheck_tile_files_healthy_tiles_untouched(tmp_path): p = ProgressTracker(tmp_path / ".progress.json") good = _archive_tile_path(tmp_path, 158374, 0, 0) bad = _archive_tile_path(tmp_path, 158374, 0, 1) _make_tile(good, size=512) _make_tile(bad, size=0) url_good = _tile_url(158374, 0.0, 0.0) url_bad = _tile_url(158374, 3.01, 0.0) p.mark_done(url_good) p.mark_done(url_bad) p.save() deleted = recheck_tile_files(tmp_path, p) assert deleted == 1 assert good.exists() assert not bad.exists() # --------------------------------------------------------------------------- # recheck_archive # --------------------------------------------------------------------------- def test_recheck_archive_empty_progress(tmp_path): p = ProgressTracker(tmp_path / ".progress.json") removed = recheck_archive(tmp_path, p) assert removed == 0 def test_recheck_archive_healthy(tmp_path): p = ProgressTracker(tmp_path / ".progress.json") tile = _archive_tile_path(tmp_path, 158374, 0, 0) _make_tile(tile, size=1024) p.mark_done(_tile_url(158374, 0.0, 0.0)) p.save() removed = recheck_archive(tmp_path, p) assert removed == 0 def test_recheck_archive_removes_missing_scan(tmp_path): p = ProgressTracker(tmp_path / ".progress.json") # Mark a URL done but create no files on disk p.mark_done(_tile_url(999999, 0.0, 0.0)) p.save() removed = recheck_archive(tmp_path, p) assert removed == 1 assert not p.is_done(_tile_url(999999, 0.0, 0.0)) def test_recheck_archive_skips_mosaic_urls(tmp_path): p = ProgressTracker(tmp_path / ".progress.json") mosaic_url = "http://192.0.2.1:8011/RootView_Database/158374/mosaic.jpg" p.mark_done(mosaic_url) p.save() # recheck_verifies a non-zero mosaic exists under */*//mosaic.jpg mpath = tmp_path / "M" / "2020-01-01" / "158374" / "mosaic.jpg" mpath.parent.mkdir(parents=True) mpath.write_bytes(b"\xff\xd8\xff\xd9") # minimal JPEG soff + eoi removed = recheck_archive(tmp_path, p) assert removed == 0 assert p.is_done(mosaic_url)