"""Tests for spruce.progress — file I/O only, uses tmp_path.""" import csv import json from pathlib import Path import pytest from spruce.progress import CsvWriter, ProgressTracker # --------------------------------------------------------------------------- # ProgressTracker # --------------------------------------------------------------------------- def test_progress_mark_and_check(tmp_path): p = ProgressTracker(tmp_path / ".progress.json") assert not p.is_done("http://example.com/a") p.mark_done("http://example.com/a") assert p.is_done("http://example.com/a") def test_progress_roundtrip(tmp_path): path = tmp_path / ".progress.json" p = ProgressTracker(path) p.mark_done("http://example.com/a") p.mark_done("http://example.com/b") p.save() p2 = ProgressTracker(path) assert p2.is_done("http://example.com/a") assert p2.is_done("http://example.com/b") assert not p2.is_done("http://example.com/c") def test_progress_discard(tmp_path): p = ProgressTracker(tmp_path / ".progress.json") p.mark_done("http://example.com/x") assert p.is_done("http://example.com/x") p.discard("http://example.com/x") assert not p.is_done("http://example.com/x") def test_progress_discard_nonexistent_is_noop(tmp_path): p = ProgressTracker(tmp_path / ".progress.json") p.discard("http://example.com/never") # should not raise def test_progress_iter_urls(tmp_path): p = ProgressTracker(tmp_path / ".progress.json") p.mark_done("http://example.com/1") p.mark_done("http://example.com/2") assert set(p.iter_urls()) == { "http://example.com/1", "http://example.com/2", } def test_progress_len(tmp_path): p = ProgressTracker(tmp_path / ".progress.json") assert len(p) == 0 p.mark_done("http://example.com/1") assert len(p) == 1 p.mark_done("http://example.com/2") assert len(p) == 2 p.discard("http://example.com/1") assert len(p) == 1 def test_progress_save_creates_parent(tmp_path): path = tmp_path / "nested" / "dir" / ".progress.json" p = ProgressTracker(path) p.mark_done("http://example.com/z") p.save() assert path.exists() data = json.loads(path.read_text()) assert "http://example.com/z" in data["completed_urls"] def test_progress_corrupt_file_starts_fresh(tmp_path): path = tmp_path / ".progress.json" path.write_text("not valid json") p = ProgressTracker(path) assert len(p) == 0 # starts fresh, no exception # --------------------------------------------------------------------------- # CsvWriter # --------------------------------------------------------------------------- FIELDS = ["a", "b", "c"] def test_csv_writer_creates_header(tmp_path): path = tmp_path / "out.csv" w = CsvWriter(path, FIELDS) w.close() rows = list(csv.DictReader(path.open())) assert rows == [] header = path.read_text().splitlines()[0] assert header == "a,b,c" def test_csv_writer_write_row(tmp_path): path = tmp_path / "out.csv" w = CsvWriter(path, FIELDS) w.write({"a": "1", "b": "2", "c": "3"}) w.close() rows = list(csv.DictReader(path.open())) assert len(rows) == 1 assert rows[0]["a"] == "1" assert rows[0]["c"] == "3" def test_csv_writer_missing_fields_fill_empty(tmp_path): path = tmp_path / "out.csv" w = CsvWriter(path, FIELDS) w.write({"a": "hello"}) # b and c missing w.close() rows = list(csv.DictReader(path.open())) assert rows[0]["b"] == "" assert rows[0]["c"] == "" def test_csv_writer_appends_on_second_open(tmp_path): path = tmp_path / "out.csv" w = CsvWriter(path, FIELDS) w.write({"a": "first"}) w.close() w2 = CsvWriter(path, FIELDS) w2.write({"a": "second"}) w2.close() rows = list(csv.DictReader(path.open())) assert len(rows) == 2 assert rows[0]["a"] == "first" assert rows[1]["a"] == "second"