Enhance CSV metadata with error tracking for mosaics and tiles
This commit is contained in:
@@ -0,0 +1,36 @@
|
||||
"""Tests for spruce.download_result classification."""
|
||||
|
||||
import requests
|
||||
|
||||
from spruce.download_result import (
|
||||
PERMANENT_MISSING,
|
||||
TRANSIENT,
|
||||
UNKNOWN,
|
||||
classify_http_error,
|
||||
error_code_str,
|
||||
)
|
||||
|
||||
|
||||
def test_classify_404_permanent():
|
||||
assert classify_http_error(404, None) == PERMANENT_MISSING
|
||||
|
||||
|
||||
def test_classify_410_permanent():
|
||||
assert classify_http_error(410, None) == PERMANENT_MISSING
|
||||
|
||||
|
||||
def test_classify_503_transient():
|
||||
assert classify_http_error(503, None) == TRANSIENT
|
||||
|
||||
|
||||
def test_classify_timeout_transient():
|
||||
assert classify_http_error(None, requests.Timeout()) == TRANSIENT
|
||||
|
||||
|
||||
def test_classify_unknown_4xx():
|
||||
assert classify_http_error(403, None) == UNKNOWN
|
||||
|
||||
|
||||
def test_error_code_str():
|
||||
assert error_code_str(None) == ""
|
||||
assert error_code_str(404) == "404"
|
||||
@@ -0,0 +1,85 @@
|
||||
"""Mosaic download outcomes for scans.csv (RunStats / MosaicAttempt)."""
|
||||
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from spruce.download_result import (
|
||||
DownloadResult,
|
||||
PERMANENT_MISSING,
|
||||
TRANSIENT,
|
||||
UNKNOWN,
|
||||
)
|
||||
from spruce.orchestrator import _download_mosaic
|
||||
|
||||
_MACHINE = {"label": "M", "option_value": "v", "machine_id": "1"}
|
||||
_CONFIG: dict = {
|
||||
"write_exif": False,
|
||||
"timeout": 10,
|
||||
"request_delay": 0.0,
|
||||
"machine_metadata": {},
|
||||
}
|
||||
|
||||
|
||||
def test_download_mosaic_404_permanent_class():
|
||||
sess = MagicMock()
|
||||
url = "http://img/RootView_Database/9/mosaic.jpg"
|
||||
sess.mosaic_url.return_value = url
|
||||
sess.download_file.return_value = DownloadResult(0, 404, "404", PERMANENT_MISSING)
|
||||
|
||||
progress = MagicMock()
|
||||
progress.is_done.return_value = False
|
||||
mpath = Path("/tmp/mosaic_404.jpg")
|
||||
|
||||
out = _download_mosaic(
|
||||
sess, {}, 9, mpath, progress, _MACHINE, _CONFIG, dry_run=False
|
||||
)
|
||||
assert out.csv_status == "failed"
|
||||
assert out.error_class == PERMANENT_MISSING
|
||||
assert out.error_code == "404"
|
||||
progress.mark_done.assert_not_called()
|
||||
|
||||
|
||||
def test_download_mosaic_503_transient_class():
|
||||
sess = MagicMock()
|
||||
sess.mosaic_url.return_value = "http://x/m.jpg"
|
||||
sess.download_file.return_value = DownloadResult(0, 503, "err", TRANSIENT)
|
||||
|
||||
progress = MagicMock()
|
||||
progress.is_done.return_value = False
|
||||
|
||||
out = _download_mosaic(
|
||||
sess,
|
||||
{},
|
||||
1,
|
||||
Path("/tmp/m.jpg"),
|
||||
progress,
|
||||
_MACHINE,
|
||||
_CONFIG,
|
||||
dry_run=False,
|
||||
)
|
||||
assert out.error_class == TRANSIENT
|
||||
assert out.error_code == "503"
|
||||
|
||||
|
||||
def test_download_mosaic_empty_body_unknown():
|
||||
sess = MagicMock()
|
||||
sess.mosaic_url.return_value = "http://x/m.jpg"
|
||||
sess.download_file.return_value = DownloadResult(
|
||||
0, 200, "0 bytes in response body", UNKNOWN
|
||||
)
|
||||
|
||||
progress = MagicMock()
|
||||
progress.is_done.return_value = False
|
||||
|
||||
out = _download_mosaic(
|
||||
sess,
|
||||
{},
|
||||
1,
|
||||
Path("/tmp/m.jpg"),
|
||||
progress,
|
||||
_MACHINE,
|
||||
_CONFIG,
|
||||
dry_run=False,
|
||||
)
|
||||
assert out.error_class == UNKNOWN
|
||||
assert "bytes" in out.error or out.error
|
||||
@@ -0,0 +1,75 @@
|
||||
"""HTTP download result wiring in MachineSession."""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
from spruce.download_result import PERMANENT_MISSING, TRANSIENT
|
||||
from spruce.session import MachineSession
|
||||
|
||||
|
||||
def _minimal_config() -> dict:
|
||||
return {
|
||||
"base_url": "http://127.0.0.1:8010/",
|
||||
"image_base_url": "http://127.0.0.1:8011/",
|
||||
"username": "u",
|
||||
"password": "p",
|
||||
"timeout": 10,
|
||||
"request_delay": 0.0,
|
||||
"workers": 2,
|
||||
}
|
||||
|
||||
|
||||
def _machine() -> dict:
|
||||
return {
|
||||
"label": "T [AMR-0]",
|
||||
"option_value": "x",
|
||||
"machine_id": "0",
|
||||
}
|
||||
|
||||
|
||||
def test_download_file_http_404_result(tmp_path: Path):
|
||||
dest = tmp_path / "f.bin"
|
||||
sess = MachineSession(_machine(), _minimal_config())
|
||||
|
||||
def fail_get(*_a, **_k):
|
||||
r = requests.Response()
|
||||
r.status_code = 404
|
||||
r.url = "http://example/mosaic.jpg"
|
||||
r.reason = "Not Found"
|
||||
r.raise_for_status()
|
||||
|
||||
sess.http.get = fail_get # type: ignore[method-assign]
|
||||
|
||||
res = sess.download_file("http://example/mosaic.jpg", dest, retries=1)
|
||||
assert res.size == 0
|
||||
assert res.status_code == 404
|
||||
assert res.error_class == PERMANENT_MISSING
|
||||
assert "404" in (res.error or "")
|
||||
|
||||
|
||||
def test_download_tile_row_on_failure(tmp_path: Path):
|
||||
dest = tmp_path / "t.jpg"
|
||||
tile = {
|
||||
"scan_id": 1,
|
||||
"row_index": 0,
|
||||
"col_index": 0,
|
||||
"x_mm": 0.0,
|
||||
"y_mm": 0.0,
|
||||
"url": "http://example/tile",
|
||||
}
|
||||
sess = MachineSession(_machine(), _minimal_config())
|
||||
|
||||
def fail_get(*_a, **_k):
|
||||
r = requests.Response()
|
||||
r.status_code = 500
|
||||
r.url = "http://example"
|
||||
r.reason = "Server Error"
|
||||
r.raise_for_status()
|
||||
|
||||
sess.http.get = fail_get # type: ignore[method-assign]
|
||||
row = sess.download_tile(tile, dest, dry_run=False)
|
||||
assert row["status"] == "failed"
|
||||
assert row["error_code"] == "500"
|
||||
assert row["error_class"] == TRANSIENT
|
||||
@@ -7,6 +7,8 @@ import yaml
|
||||
|
||||
from spruce.settings import (
|
||||
MAX_SAFE_WORKERS,
|
||||
SCANS_CSV_FIELDS,
|
||||
TILES_CSV_FIELDS,
|
||||
_clamp_workers,
|
||||
load_config,
|
||||
)
|
||||
@@ -91,3 +93,24 @@ def test_load_config_missing_password_exits(tmp_path):
|
||||
path.write_text(yaml.dump({"username": "x"}))
|
||||
with pytest.raises(SystemExit):
|
||||
load_config(str(path))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CSV schemas (failure columns)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_scans_csv_fields_includes_mosaic_failure_columns():
|
||||
for name in (
|
||||
"mosaic_download_status",
|
||||
"mosaic_error",
|
||||
"mosaic_error_code",
|
||||
"mosaic_error_class",
|
||||
):
|
||||
assert name in SCANS_CSV_FIELDS
|
||||
|
||||
|
||||
def test_tiles_csv_fields_includes_status_and_error_columns():
|
||||
for name in ("status", "error", "error_code", "error_class"):
|
||||
assert name in TILES_CSV_FIELDS
|
||||
assert TILES_CSV_FIELDS.index("status") < TILES_CSV_FIELDS.index("downloaded_at")
|
||||
|
||||
Reference in New Issue
Block a user