Initial commit

Add spruce scraper with CLI, session management, parsers, progress tracking,
recheck logic, and test suite. Includes example config and README.
This commit is contained in:
2026-04-22 10:41:18 -04:00
commit e122f6435a
23 changed files with 3789 additions and 0 deletions
+91
View File
@@ -0,0 +1,91 @@
"""Tests for spruce.settings — config loading and worker clamping."""
import logging
import pytest
import yaml
from spruce.settings import (
MAX_SAFE_WORKERS,
_clamp_workers,
load_config,
)
# ---------------------------------------------------------------------------
# _clamp_workers
# ---------------------------------------------------------------------------
def test_clamp_workers_below_limit():
assert _clamp_workers(2) == 2
def test_clamp_workers_at_limit():
assert _clamp_workers(MAX_SAFE_WORKERS) == MAX_SAFE_WORKERS
def test_clamp_workers_above_limit_caps(caplog):
with caplog.at_level(logging.WARNING):
result = _clamp_workers(MAX_SAFE_WORKERS + 1)
assert result == MAX_SAFE_WORKERS
assert "exceeds the safe limit" in caplog.text
def test_clamp_workers_zero():
assert _clamp_workers(0) == 0
# ---------------------------------------------------------------------------
# load_config
# ---------------------------------------------------------------------------
def _write_config(tmp_path, **overrides):
base = {
"username": "testuser",
"password": "testpass",
}
base.update(overrides)
path = tmp_path / "config.yaml"
path.write_text(yaml.dump(base))
return str(path)
def test_load_config_defaults(tmp_path):
path = _write_config(tmp_path)
cfg = load_config(path)
assert cfg["base_url"] == "http://205.149.147.131:8010/"
assert cfg["workers"] == 2
assert cfg["timeout"] == 60
assert cfg["request_delay"] == 0.5
assert cfg["output_dir"] == "archives"
def test_load_config_overrides(tmp_path):
path = _write_config(tmp_path, workers=3, output_dir="my_archives")
cfg = load_config(path)
assert cfg["workers"] == 3
assert cfg["output_dir"] == "my_archives"
def test_load_config_caps_workers(tmp_path, caplog):
path = _write_config(tmp_path, workers=MAX_SAFE_WORKERS + 2)
with caplog.at_level(logging.WARNING):
cfg = load_config(path)
assert cfg["workers"] == MAX_SAFE_WORKERS
assert "exceeds the safe limit" in caplog.text
def test_load_config_missing_username_exits(tmp_path):
path = tmp_path / "config.yaml"
path.write_text(yaml.dump({"password": "x"}))
with pytest.raises(SystemExit):
load_config(str(path))
def test_load_config_missing_password_exits(tmp_path):
path = tmp_path / "config.yaml"
path.write_text(yaml.dump({"username": "x"}))
with pytest.raises(SystemExit):
load_config(str(path))