Add sample_random_scans script and first-page list-scans option

- scripts/sample_random_scans.sh: pick a random scan per machine (default: first list page) and download mosaic and/or tiles
- --list-scans-first-page-only: one HTTP request for scan list (up to 320 IDs)
- scripts/machines.example.txt; .gitignore local machines.txt (copy from example)
- README: document usage
This commit is contained in:
2026-04-26 20:56:52 -04:00
parent 08a29d124a
commit 4118e6e4f0
6 changed files with 236 additions and 7 deletions
+15 -2
View File
@@ -105,6 +105,14 @@ def parse_args() -> argparse.Namespace:
action="store_true",
help="Print all scans for --machine and exit",
)
p.add_argument(
"--list-scans-first-page-only",
action="store_true",
help=(
"With --list-scans: only fetch the first list page (up to 320 scans) "
"— one HTTP request, no pagination"
),
)
p.add_argument(
"--recheck",
action="store_true",
@@ -134,6 +142,9 @@ def main() -> None:
if args.verbose:
logging.getLogger().setLevel(logging.DEBUG)
if args.list_scans_first_page_only and not args.list_scans:
sys.exit("--list-scans-first-page-only requires --list-scans")
# --list-machines doesn't need credentials
if args.list_machines:
base_url = "http://205.149.147.131:8010/"
@@ -213,7 +224,8 @@ def main() -> None:
sess = MachineSession(machines[0], config)
if not sess.login():
sys.exit("Login failed.")
scans = sess.get_all_scans()
first_only = bool(args.list_scans_first_page_only)
scans = sess.get_all_scans(first_page_only=first_only)
print(f"{'ID':>8} {'Date':<22} {'Name':<40} {'Status'}")
print("-" * 85)
for sc in scans:
@@ -221,7 +233,8 @@ def main() -> None:
f"{sc['scan_id']:>8} {sc.get('scan_time', ''):<22} "
f"{sc.get('name', ''):<40} {sc.get('status', '')}"
)
print(f"\nTotal: {len(scans)} scans")
total_note = " (first page only — not full archive)" if first_only else ""
print(f"\nTotal: {len(scans)} scans{total_note}")
return
log.info(
+17 -5
View File
@@ -77,16 +77,28 @@ class MachineSession:
# Scan list (paginated)
# ------------------------------------------------------------------
def get_all_scans(self) -> list[dict[str, Any]]:
def get_all_scans(
self, first_page_only: bool = False
) -> list[dict[str, Any]]:
"""
Fetch the complete scan list across all pages.
Fetch the scan list from the RootView table.
Uses a large FilterCount (320) to minimise round-trips.
Falls back to repeated pages if the list is longer.
By default, walks all pages. With first_page_only=True, only the first
request is made (FilterCount 320) — enough for a random pick without
paginating a large history.
"""
page_size = 320
if first_page_only:
all_scans = self._fetch_scan_page(0, page_size)
log.info(
"[%s] First page only: %d scan(s) (not paginating).",
self.machine["label"],
len(all_scans),
)
return all_scans
all_scans: list[dict[str, Any]] = []
start = 0
page_size = 320
while True:
page_scans = self._fetch_scan_page(start, page_size)