64 lines
1.5 KiB
Python
64 lines
1.5 KiB
Python
"""
|
|
Structured HTTP download result and error classification.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
|
|
import requests
|
|
|
|
# public constants for error_class
|
|
PERMANENT_MISSING = "permanent_missing"
|
|
TRANSIENT = "transient"
|
|
UNKNOWN = "unknown"
|
|
OK = "" # success (no error class)
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class DownloadResult:
|
|
"""Result of a streaming download (after all retries if applicable)."""
|
|
|
|
size: int
|
|
status_code: int | None
|
|
error: str | None
|
|
error_class: str
|
|
|
|
@property
|
|
def ok(self) -> bool:
|
|
return self.size > 0 and self.error is None
|
|
|
|
|
|
def classify_http_error(
|
|
status_code: int | None, exc: BaseException | None
|
|
) -> str:
|
|
"""
|
|
404/410 => likely gone forever.
|
|
5xx and transport/timeouts => retry may help.
|
|
"""
|
|
if status_code in (404, 410):
|
|
return PERMANENT_MISSING
|
|
if status_code is not None and 500 <= status_code < 600:
|
|
return TRANSIENT
|
|
if exc is not None:
|
|
if isinstance(
|
|
exc,
|
|
(
|
|
requests.Timeout,
|
|
requests.ConnectTimeout,
|
|
requests.ReadTimeout,
|
|
),
|
|
):
|
|
return TRANSIENT
|
|
if isinstance(exc, (requests.exceptions.ConnectionError, OSError)):
|
|
return TRANSIENT
|
|
if isinstance(exc, requests.exceptions.ChunkedEncodingError):
|
|
return TRANSIENT
|
|
return UNKNOWN
|
|
|
|
|
|
def error_code_str(status_code: int | None) -> str:
|
|
if status_code is None:
|
|
return ""
|
|
return str(status_code)
|