Enhance CSV metadata with error tracking for mosaics and tiles
This commit is contained in:
+46
-7
@@ -12,6 +12,13 @@ from typing import Any
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from spruce.download_result import (
|
||||
OK,
|
||||
UNKNOWN,
|
||||
DownloadResult,
|
||||
classify_http_error,
|
||||
error_code_str,
|
||||
)
|
||||
from spruce.parsers import parse_scan_row, parse_scan_view, _grid_values
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
@@ -205,8 +212,14 @@ class MachineSession:
|
||||
# Downloads
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def download_file(self, url: str, dest: Path, retries: int = 3) -> int:
|
||||
"""Stream-download url to dest with retries. Returns bytes written (0 on failure)."""
|
||||
def download_file(
|
||||
self, url: str, dest: Path, retries: int = 3
|
||||
) -> DownloadResult:
|
||||
"""
|
||||
Stream-download url to dest with retries.
|
||||
|
||||
Returns a DownloadResult (bytes, optional HTTP code, final error, class).
|
||||
"""
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
backoff = 5.0
|
||||
for attempt in range(1, retries + 1):
|
||||
@@ -221,8 +234,21 @@ class MachineSession:
|
||||
if chunk:
|
||||
fh.write(chunk)
|
||||
size += len(chunk)
|
||||
return size
|
||||
if size == 0:
|
||||
return DownloadResult(
|
||||
0,
|
||||
resp.status_code,
|
||||
"0 bytes in response body",
|
||||
UNKNOWN,
|
||||
)
|
||||
return DownloadResult(size, resp.status_code, None, OK)
|
||||
except Exception as exc:
|
||||
sc: int | None = None
|
||||
if (
|
||||
isinstance(exc, requests.HTTPError)
|
||||
and exc.response is not None
|
||||
):
|
||||
sc = exc.response.status_code
|
||||
if attempt < retries:
|
||||
log.debug(
|
||||
"Attempt %d/%d failed %s: %s — retrying in %.0fs",
|
||||
@@ -241,7 +267,9 @@ class MachineSession:
|
||||
url,
|
||||
exc,
|
||||
)
|
||||
return 0
|
||||
cl = classify_http_error(sc, exc)
|
||||
return DownloadResult(0, sc, str(exc), cl)
|
||||
return DownloadResult(0, None, "download_file: exhausted", UNKNOWN)
|
||||
|
||||
def download_tile(
|
||||
self, tile: dict[str, Any], dest: Path, dry_run: bool
|
||||
@@ -260,11 +288,22 @@ class MachineSession:
|
||||
"local_path": str(dest),
|
||||
"downloaded_at": "",
|
||||
"file_size_bytes": "",
|
||||
"status": "",
|
||||
"error": "",
|
||||
"error_code": "",
|
||||
"error_class": "",
|
||||
}
|
||||
if dry_run:
|
||||
row["status"] = "dry_run"
|
||||
return row
|
||||
size = self.download_file(tile["url"], dest)
|
||||
if size:
|
||||
res = self.download_file(tile["url"], dest)
|
||||
if res.ok:
|
||||
row["status"] = "downloaded"
|
||||
row["downloaded_at"] = datetime.now(timezone.utc).isoformat()
|
||||
row["file_size_bytes"] = size
|
||||
row["file_size_bytes"] = res.size
|
||||
else:
|
||||
row["status"] = "failed"
|
||||
row["error"] = res.error or ""
|
||||
row["error_code"] = error_code_str(res.status_code)
|
||||
row["error_class"] = res.error_class
|
||||
return row
|
||||
|
||||
Reference in New Issue
Block a user