commit e122f6435a9752fa3605947fdfb9c0bca55def13
Author: James Kolpack <james.kolpack@gmail.com>
Date:   Wed Apr 22 10:41:18 2026 -0400

    Initial commit
    
    Add spruce scraper with CLI, session management, parsers, progress tracking,
    recheck logic, and test suite. Includes example config and README.

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..0022c39
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,6 @@
+config.yaml
+archives/
+__pycache__/
+*.pyc
+.DS_Store
+explore_dumps/
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..3e6c1a5
--- /dev/null
+++ b/README.md
@@ -0,0 +1,185 @@
+# Spruce Minirhizotron Scraper
+
+A Python tool for archiving image data collected by minirhizotron cameras at the Spruce experiment site. It authenticates against the RootView web interface, enumerates all scans across all 12 camera machines, and downloads image tiles and mosaics to a structured local archive with full metadata.
+
+---
+
+## Background
+
+[Minirhizotron cameras](https://en.wikipedia.org/wiki/Minirhizotron) are inserted into clear tubes buried in the ground to image root systems non-destructively over time. This project archives data from the **SPRUCE** (Spruce and Peatland Responses Under Changing Environments) experiment, which monitors boreal peatland responses to warming and elevated CO₂.
+
+The 12 AMR camera machines (`BW1-4` through `BW3-21`) are managed by a **RootView** web application at `http://205.149.147.131:8010`. Each scan captures a grid of overlapping image tiles along a buried tube. The server also pre-renders a full stitched mosaic for each scan.
+
+---
+
+## Archive inventory (as of April 2026)
+
+| Machine | Scans | Scan type (sampled) |
+|---|---:|---|
+| BW1-4 [AMR-15] | 6,121 | Mixed (full-tube + partial) |
+| BW1-6 [AMR-19] | 18,198 | Full-tube (~33,784 tiles, ~1.7 GB each) |
+| BW1-7 [AMR-18] | 430 | Full-tube (~33,784 tiles, ~1.8 GB each) |
+| BW2-8 [AMR-25] | 8,191 | Partial (~400 tiles, ~10 MB each) |
+| BW2-10 [AMR-22] | 16,537 | Not yet sampled |
+| BW2-11 [AMR-23] | 26,763 | Not yet sampled |
+| BW2-13 [AMR-24] | 13,537 | Not yet sampled |
+| BW3-16 [AMR-16] | 7,325 | Not yet sampled |
+| BW3-17 [AMR-20] | 471 | Not yet sampled |
+| BW3-19 [AMR-21] | 15,186 | Not yet sampled |
+| BW3-20 [AMR-26] | 23,052 | Full-tube (~33,784 tiles, ~1.95 GB each) |
+| BW3-21 [AMR-17] | 10,115 | Not yet sampled |
+| **Total** | **145,926** | |
+
+### Storage estimates
+
+| What | Size | Notes |
+|---|---|---|
+| Mosaics only | ~2.4 TB | 145,926 × 16.6 MB per mosaic |
+| Full tiles (mixed scans) | ~160 TB | Assumes 40% full-tube, 60% partial |
+| Full tiles (worst case) | ~368 TB | If all scans are full-tube |
+
+A full-tube scan covers a 310 mm × 740 mm cylinder at 3.01 × 2.26 mm steps, producing a **103 × 328 = 33,784 tile grid**. Each tile is ~79 KB on average (JPEG, 137 KB at the tube surface).
+
+### Download speed
+
+Tile downloads are server-limited: the RootView PHP backend renders tiles on-demand, sustaining ~**0.67 tiles/sec** with 8 parallel workers regardless of local bandwidth. Mosaics are pre-rendered and download ~20× faster per MB.
+
+| Scenario | Estimated time |
+|---|---|
+| All mosaics (4 workers) | ~3 months |
+| Full tiles for one scan (8 workers) | ~14 hours |
+| All tiles, full-tube machines only | Years — not recommended |
+
+**Recommended approach:** archive mosaics first (`--mosaic-only`), then selectively download tiles for priority scans.
+
+---
+
+## Setup
+
+```bash
+# 1. Clone / download this repo
+cd spruce_scrapper
+
+# 2. Install dependencies (Python 3.10+)
+pip install -r requirements.txt
+
+# 3. Configure credentials
+cp config.example.yaml config.yaml
+# Edit config.yaml: set username and password
+```
+
+`config.yaml` is gitignored and never committed.
+
+---
+
+## Usage
+
+```bash
+# List all available machines (no login needed)
+python scraper.py --list-machines
+
+# List all scans for a machine
+python scraper.py --list-scans --machine "BW3-20 [AMR-26]"
+
+# Preview what would be downloaded (dry run)
+python scraper.py --machine "BW3-20 [AMR-26]" --dry-run
+
+# Download mosaics only for one machine
+python scraper.py --machine "BW3-20 [AMR-26]" --mosaic-only
+
+# Download mosaics for all machines
+python scraper.py --mosaic-only
+
+# Download all tiles for a specific scan
+python scraper.py --machine "BW3-20 [AMR-26]" --scan-id 158374 --workers 4
+
+# Resume an interrupted download (automatically skips completed files)
+python scraper.py --machine "BW3-20 [AMR-26]" --scan-id 158374 --workers 4
+```
+
+### All options
+
+| Flag | Description |
+|---|---|
+| `--config FILE` | Config file path (default: `config.yaml`) |
+| `--machine LABEL` | Restrict to one machine, e.g. `"BW3-20 [AMR-26]"` |
+| `--scan-id ID` | Download only this scan (use with `--machine`) |
+| `--mosaic-only` | Download mosaics only; skip individual tiles |
+| `--dry-run` | Print what would be downloaded without saving |
+| `--workers N` | Parallel download threads (default: 2, hard cap: 4) |
+| `--recheck` | Scan archive for zero-byte/missing tiles and remove them from `.progress.json` so they re-download on next run |
+| `--list-machines` | Print all machines and exit |
+| `--list-scans` | Print all scans for `--machine` and exit |
+| `--verbose` / `-v` | Debug logging |
+
+---
+
+## Output layout
+
+```
+archives/
+├── .progress.json              # tracks completed URLs for resume support
+├── scans.csv                   # scan-level metadata for every processed scan
+├── tiles.csv                   # tile-level metadata for every downloaded tile
+│
+└── BW3-20__AMR-26/
+    └── 2024-07-29/
+        └── 158374/
+            ├── metadata.json   # full scan parameters (grid, timestamps, etc.)
+            ├── mosaic.jpg      # pre-stitched full image (~16 MB)
+            └── tiles/
+                ├── tile_r000_c000.jpg   # row 0, column 0
+                ├── tile_r000_c001.jpg
+                └── ...                 # 33,784 tiles total for a full-tube scan
+```
+
+Tile filenames encode position: `tile_r{row}_c{col}.jpg` where row increases with depth (Y in mm) and column increases along the tube circumference (X in mm).
+
+### Metadata files
+
+**`scans.csv`** columns: `machine`, `machine_id`, `scan_id`, `name`, `scan_time`, `start_x`, `start_y`, `end_x`, `end_y`, `dx`, `dy`, `nx`, `ny`, `total_tiles`, `scan_lines`, `scan_mode`, `start_datetime`, `end_datetime`, `status`, `user`, `disk_space_mb`, `mosaic_url`, `mosaic_local_path`, `mosaic_downloaded`
+
+**`tiles.csv`** columns: `machine`, `machine_id`, `scan_id`, `scan_time`, `row_index`, `col_index`, `x_mm`, `y_mm`, `url`, `local_path`, `downloaded_at`, `file_size_bytes`
+
+---
+
+## Site structure (RootView)
+
+The RootView interface runs on a standard PHP stack. Key endpoints discovered:
+
+| Endpoint | Description |
+|---|---|
+| `POST index.php` | Login (`RTLLogin=1`, `RTLNAME`, `RTLUSER`, `RTLPWD`) |
+| `POST index.php {cmd:scan, start:N, FilterCount:320}` | Paginated scan list |
+| `GET index.php?cmd=scan&mode=view&id=ID` | Scan detail (grid params, disk usage) |
+| `GET index.php?cmd=image&mode=image_scan&id=ID&s=1&x=X&y=Y` | Individual tile JPEG |
+| `GET http://<host>:8011/RootView_Database/ID/mosaic.jpg` | Pre-stitched mosaic |
+
+Grid coordinates (X, Y) are in millimetres, starting from `(start_x, start_y)` with step `(dx, dy)`.
+
+---
+
+## Resume and reliability
+
+- **Resumable**: `.progress.json` records every completed URL. Re-running the same command skips already-downloaded files.
+- **Retry logic**: each tile download retries up to 3 times with exponential backoff (5 s → 10 s → 20 s) before logging a warning and moving on.
+- **Worker cap**: the RootView server renders tiles on a single-threaded PHP process. Running more than 4 concurrent requests causes cascading read timeouts. The default is 2 workers; the scraper hard-caps at 4 and warns loudly if you try to exceed it.
+- **Crash recovery**: if a run is killed mid-flight, some in-progress tiles may have been written as zero-byte files without being marked complete. Run `--recheck` before resuming — it deletes zero-byte files on disk and removes their URLs from `.progress.json` so they are cleanly re-downloaded.
+
+```bash
+# After any interrupted run, always do this first:
+python scraper.py --recheck
+# Then resume normally:
+python scraper.py --machine "BW3-20 [AMR-26]" --scan-id 158374
+```
+
+---
+
+## Dependencies
+
+| Package | Purpose |
+|---|---|
+| `requests` | HTTP client |
+| `beautifulsoup4` + `lxml` | HTML parsing |
+| `pyyaml` | Config file |
+| `tqdm` | Progress bars |
diff --git a/config.example.yaml b/config.example.yaml
new file mode 100644
index 0000000..7df15f2
--- /dev/null
+++ b/config.example.yaml
@@ -0,0 +1,29 @@
+# RootView scraper configuration
+# Copy this to config.yaml and fill in your credentials.
+# config.yaml is gitignored — never commit it.
+
+base_url: "http://205.149.147.131:8010/"
+
+# Login credentials (same for all machines)
+username: "your_username_here"
+password: "your_password_here"
+
+# Local directory where archives will be written
+output_dir: "archives"
+
+# Number of parallel download threads.
+# WARNING: The RootView server is single-threaded and will time out under heavy
+# load. Measured safe limit is 2 workers. Values above 4 cause cascading
+# timeouts and lost tiles. Do not exceed 4.
+workers: 2
+
+# Request timeout in seconds
+timeout: 60
+
+# Delay between requests to a single machine (seconds, float ok)
+request_delay: 0.5
+
+# Optional: limit to specific machines by label (comment out to scrape all)
+# machines:
+#   - "BW1-4 [AMR-15]"
+#   - "BW1-6 [AMR-19]"
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..8093dd9
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,6 @@
+requests>=2.31.0
+beautifulsoup4>=4.12.0
+lxml>=5.0.0
+pyyaml>=6.0.1
+tqdm>=4.66.0
+pytest>=8.0
diff --git a/scraper.py b/scraper.py
new file mode 100644
index 0000000..8980515
--- /dev/null
+++ b/scraper.py
@@ -0,0 +1,5 @@
+"""Entry point — delegates entirely to spruce.cli."""
+from spruce.cli import main
+
+if __name__ == "__main__":
+    main()
diff --git a/spruce/__init__.py b/spruce/__init__.py
new file mode 100644
index 0000000..4981bb9
--- /dev/null
+++ b/spruce/__init__.py
@@ -0,0 +1 @@
+# spruce — minirhizotron archive library
diff --git a/spruce/cli.py b/spruce/cli.py
new file mode 100644
index 0000000..6370ea9
--- /dev/null
+++ b/spruce/cli.py
@@ -0,0 +1,259 @@
+"""
+Command-line interface for the spruce scraper.
+"""
+
+import argparse
+import logging
+import os
+import sys
+from pathlib import Path
+
+import yaml
+
+from spruce.orchestrator import scrape_machine
+from spruce.parsers import parse_machine_option
+from spruce.progress import ProgressTracker, CsvWriter
+from spruce.recheck import recheck_archive, recheck_tile_files
+from spruce.settings import (
+    DEFAULT_CONFIG,
+    MAX_SAFE_WORKERS,
+    PROGRESS_FILENAME,
+    SCANS_CSV_FIELDS,
+    SCANS_CSV_FILENAME,
+    TILES_CSV_FIELDS,
+    TILES_CSV_FILENAME,
+    _clamp_workers,
+    load_config,
+)
+from spruce.session import MachineSession
+
+import requests
+from bs4 import BeautifulSoup
+from urllib.parse import urljoin
+
+log = logging.getLogger(__name__)
+
+
+def discover_machines(base_url: str, timeout: int = 30) -> list[dict]:
+    resp = requests.get(urljoin(base_url, "index.php"), timeout=timeout)
+    resp.raise_for_status()
+    soup = BeautifulSoup(resp.text, "lxml")
+    select = soup.find("select", {"name": "RTLNAME"})
+    if not select:
+        log.warning("Could not find machine selector on login page.")
+        return []
+    return [
+        parse_machine_option(opt.get_text(strip=True), opt["value"])
+        for opt in select.find_all("option")
+    ]
+
+
+def parse_args() -> argparse.Namespace:
+    p = argparse.ArgumentParser(
+        description="Archive minirhizotron image tiles from RootView.",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    p.add_argument(
+        "--config",
+        default=DEFAULT_CONFIG,
+        metavar="FILE",
+        help=f"YAML config file (default: {DEFAULT_CONFIG})",
+    )
+    p.add_argument(
+        "--machine",
+        metavar="LABEL",
+        help='Scrape only this machine label, e.g. "BW3-20 [AMR-26]"',
+    )
+    p.add_argument(
+        "--scan-id",
+        type=int,
+        metavar="ID",
+        help="Download only this specific scan ID (use with --machine)",
+    )
+    p.add_argument(
+        "--mosaic-only",
+        action="store_true",
+        help="Download mosaics only; skip individual tiles",
+    )
+    p.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Preview what would be downloaded without saving any files",
+    )
+    p.add_argument(
+        "--workers",
+        type=int,
+        metavar="N",
+        help="Override parallel download threads from config",
+    )
+    p.add_argument(
+        "--list-machines",
+        action="store_true",
+        help="Print available machines and exit (no credentials needed)",
+    )
+    p.add_argument(
+        "--list-scans",
+        action="store_true",
+        help="Print all scans for --machine and exit",
+    )
+    p.add_argument(
+        "--recheck",
+        action="store_true",
+        help=(
+            "Scan the archive for zero-byte or missing tile files whose URLs "
+            "are marked complete in .progress.json, remove them from progress, "
+            "and report how many were re-queued. Run before resuming after a crash."
+        ),
+    )
+    p.add_argument(
+        "--verbose",
+        "-v",
+        action="store_true",
+        help="Enable debug logging",
+    )
+    return p.parse_args()
+
+
+def main() -> None:
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s  %(levelname)-8s  %(message)s",
+        datefmt="%H:%M:%S",
+    )
+    args = parse_args()
+
+    if args.verbose:
+        logging.getLogger().setLevel(logging.DEBUG)
+
+    # --list-machines doesn't need credentials
+    if args.list_machines:
+        base_url = "http://205.149.147.131:8010/"
+        timeout = 30
+        if os.path.exists(args.config):
+            cfg = yaml.safe_load(open(args.config))
+            base_url = cfg.get("base_url", base_url)
+            timeout = cfg.get("timeout", timeout)
+        machines = discover_machines(base_url, timeout)
+        print(f"{'Label':<25}  {'ID':>4}  {'IP':<17}  {'Version'}")
+        print("-" * 62)
+        for m in machines:
+            print(
+                f"{m['label']:<25}  {m['machine_id']:>4}  {m['ip']:<17}  {m['version']}"
+            )
+        return
+
+    if not os.path.exists(args.config):
+        sys.exit(
+            f"Config file '{args.config}' not found.\n"
+            f"Copy config.example.yaml to {args.config} and fill in your credentials."
+        )
+
+    config = load_config(args.config)
+    if args.workers:
+        config["workers"] = _clamp_workers(args.workers)
+
+    output_dir = Path(config["output_dir"])
+
+    # --recheck: validate archive integrity and re-queue bad tiles
+    if args.recheck:
+        progress = ProgressTracker(output_dir / PROGRESS_FILENAME)
+        n_bad = recheck_tile_files(output_dir, progress)
+        n_requeued = recheck_archive(output_dir, progress)
+        if n_bad == 0 and n_requeued == 0:
+            log.info("Archive looks clean. No action needed.")
+        else:
+            log.info(
+                "Recheck complete: %d zero-byte file(s) deleted, "
+                "%d URL(s) re-queued for download.",
+                n_bad,
+                n_requeued,
+            )
+        return
+
+    # Build machine list
+    all_machines = discover_machines(config["base_url"], config["timeout"])
+    if not all_machines:
+        sys.exit("Could not retrieve machine list from server.")
+
+    # Apply --machine / config machines filter
+    filter_labels: list[str] | None = None
+    if args.machine:
+        filter_labels = [args.machine]
+    elif config.get("machines"):
+        filter_labels = list(config["machines"])
+
+    if filter_labels:
+        machines = [m for m in all_machines if m["label"] in filter_labels]
+        not_found = [
+            label
+            for label in filter_labels
+            if label not in {m["label"] for m in machines}
+        ]
+        if not_found:
+            log.warning("Unknown machine label(s): %s", not_found)
+    else:
+        machines = all_machines
+
+    if not machines:
+        sys.exit("No machines selected.")
+
+    # --list-scans: print and exit
+    if args.list_scans:
+        if len(machines) != 1:
+            sys.exit("--list-scans requires exactly one machine (use --machine).")
+        sess = MachineSession(machines[0], config)
+        if not sess.login():
+            sys.exit("Login failed.")
+        scans = sess.get_all_scans()
+        print(f"{'ID':>8}  {'Date':<22}  {'Name':<40}  {'Status'}")
+        print("-" * 85)
+        for sc in scans:
+            print(
+                f"{sc['scan_id']:>8}  {sc.get('scan_time', ''):<22}  "
+                f"{sc.get('name', ''):<40}  {sc.get('status', '')}"
+            )
+        print(f"\nTotal: {len(scans)} scans")
+        return
+
+    log.info(
+        "Scraping %d machine(s): %s",
+        len(machines),
+        ", ".join(m["label"] for m in machines),
+    )
+    if args.mosaic_only:
+        log.info("Mode: mosaics only (individual tiles skipped)")
+    if args.dry_run:
+        log.info("Mode: dry-run (no files will be written)")
+
+    # Shared progress and CSV writers
+    progress = ProgressTracker(output_dir / PROGRESS_FILENAME)
+    tiles_csv = CsvWriter(output_dir / TILES_CSV_FILENAME, TILES_CSV_FIELDS)
+    scans_csv = CsvWriter(output_dir / SCANS_CSV_FILENAME, SCANS_CSV_FIELDS)
+
+    total = 0
+    try:
+        for machine in machines:
+            count = scrape_machine(
+                machine=machine,
+                config=config,
+                output_dir=output_dir,
+                progress=progress,
+                tiles_csv=tiles_csv,
+                scans_csv=scans_csv,
+                dry_run=args.dry_run,
+                mosaic_only=args.mosaic_only,
+                scan_id_filter=args.scan_id,
+            )
+            total += count
+    finally:
+        tiles_csv.close()
+        scans_csv.close()
+        progress.save()
+
+    if args.dry_run:
+        log.info("Dry run complete.")
+    else:
+        log.info("Done. Total files downloaded: %d", total)
+        log.info("Tiles CSV : %s", output_dir / TILES_CSV_FILENAME)
+        log.info("Scans CSV : %s", output_dir / SCANS_CSV_FILENAME)
+        log.info("Progress  : %s", output_dir / PROGRESS_FILENAME)
diff --git a/spruce/orchestrator.py b/spruce/orchestrator.py
new file mode 100644
index 0000000..1525bf5
--- /dev/null
+++ b/spruce/orchestrator.py
@@ -0,0 +1,307 @@
+"""
+High-level scrape orchestration: drives the per-machine and per-scan loops.
+"""
+
+import json
+import logging
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from pathlib import Path
+from typing import Any
+
+from tqdm import tqdm
+
+from spruce.paths import machine_dir_name, tile_dest, mosaic_dest, _extract_date
+from spruce.progress import ProgressTracker, CsvWriter
+from spruce.session import MachineSession
+
+log = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Per-scan helpers
+# ---------------------------------------------------------------------------
+
+
+def _download_mosaic(
+    sess: MachineSession,
+    scan_meta: dict[str, Any],
+    scan_id: int,
+    mosaic_path: Path,
+    progress: ProgressTracker,
+    machine: dict[str, Any],
+    dry_run: bool,
+) -> bool:
+    """Download the scan mosaic if not already done. Returns True if downloaded."""
+    url = sess.mosaic_url(scan_id)
+    if progress.is_done(url):
+        return False
+    if dry_run:
+        log.info("[DRY-RUN] Mosaic: %s → %s", url, mosaic_path)
+        return False
+    log.info("[%s] Downloading mosaic for scan %d …", machine["label"], scan_id)
+    size = sess.download_file(url, mosaic_path)
+    if size:
+        progress.mark_done(url)
+        progress.save()
+        log.info(
+            "[%s] Mosaic saved: %s (%.1f MB)",
+            machine["label"],
+            mosaic_path,
+            size / 1e6,
+        )
+        return True
+    return False
+
+
+def _download_tiles_for_scan(
+    sess: MachineSession,
+    tiles: list[dict[str, Any]],
+    scan_meta: dict[str, Any],
+    scan_id: int,
+    output_dir: Path,
+    machine: dict[str, Any],
+    config: dict[str, Any],
+    progress: ProgressTracker,
+    tiles_csv: CsvWriter,
+    dry_run: bool,
+) -> int:
+    """Download all pending tiles for a scan. Returns count of tiles downloaded."""
+    pending = [t for t in tiles if not progress.is_done(t["url"])]
+    log.info(
+        "[%s] Scan %d: %d tiles total, %d pending.",
+        machine["label"],
+        scan_id,
+        len(tiles),
+        len(pending),
+    )
+
+    if dry_run:
+        for t in pending[:5]:
+            log.info("[DRY-RUN] Tile: %s", t["url"])
+        if len(pending) > 5:
+            log.info("[DRY-RUN] … and %d more tiles.", len(pending) - 5)
+        return 0
+
+    # Attach scan_time for CSV rows
+    for t in pending:
+        t["scan_time"] = scan_meta.get("scan_time", "")
+
+    workers: int = config["workers"]
+    downloaded = 0
+
+    with ThreadPoolExecutor(max_workers=workers) as pool:
+        futures = {
+            pool.submit(
+                sess.download_tile,
+                tile,
+                tile_dest(output_dir, machine, scan_meta, tile),
+                False,
+            ): tile
+            for tile in pending
+        }
+
+        save_every = max(50, workers * 4)
+        batch: list[dict[str, Any]] = []
+
+        with tqdm(
+            total=len(pending),
+            desc=f"{machine['label']} scan {scan_id}",
+            unit="tile",
+            leave=True,
+        ) as pbar:
+            for future in as_completed(futures):
+                result = future.result()
+                if result.get("file_size_bytes"):
+                    batch.append(result)
+                    progress.mark_done(result["url"])
+                    downloaded += 1
+                pbar.update(1)
+
+                if len(batch) >= save_every:
+                    for row in batch:
+                        tiles_csv.write(row)
+                    progress.save()
+                    batch.clear()
+
+        for row in batch:
+            tiles_csv.write(row)
+        progress.save()
+
+    log.info(
+        "[%s] Scan %d complete: %d tiles downloaded.",
+        machine["label"],
+        scan_id,
+        downloaded,
+    )
+    return downloaded
+
+
+# ---------------------------------------------------------------------------
+# Per-scan driver
+# ---------------------------------------------------------------------------
+
+
+def process_scan(
+    sess: MachineSession,
+    scan: dict[str, Any],
+    output_dir: Path,
+    machine: dict[str, Any],
+    config: dict[str, Any],
+    progress: ProgressTracker,
+    scans_csv: CsvWriter,
+    tiles_csv: CsvWriter,
+    dry_run: bool,
+    mosaic_only: bool,
+) -> int:
+    """
+    Process one scan: fetch metadata, download mosaic and (optionally) tiles.
+    Returns total files downloaded for this scan.
+    """
+    scan_id: int = scan["scan_id"]
+    log.info("[%s] Processing scan %d …", machine["label"], scan_id)
+
+    try:
+        scan_meta = sess.get_scan_metadata(scan_id)
+    except Exception as exc:
+        log.error(
+            "[%s] Cannot fetch scan %d metadata: %s", machine["label"], scan_id, exc
+        )
+        return 0
+
+    if not scan_meta.get("nx") or not scan_meta.get("ny"):
+        log.warning(
+            "[%s] Scan %d: missing grid params, skipping.",
+            machine["label"],
+            scan_id,
+        )
+        return 0
+
+    # Merge list-level metadata into scan_meta (detail page takes precedence)
+    for k in (
+        "name",
+        "scan_time",
+        "start_datetime",
+        "end_datetime",
+        "status",
+        "user",
+        "scan_lines",
+        "scan_mode",
+    ):
+        scan_meta.setdefault(k, scan.get(k, ""))
+
+    # Save per-scan metadata.json
+    scan_date = _extract_date(scan_meta.get("scan_time", ""))
+    scan_dir = output_dir / machine_dir_name(machine) / scan_date / str(scan_id)
+    if not dry_run:
+        scan_dir.mkdir(parents=True, exist_ok=True)
+        meta_file = scan_dir / "metadata.json"
+        if not meta_file.exists():
+            meta_file.write_text(
+                json.dumps(scan_meta, indent=2, default=str), encoding="utf-8"
+            )
+
+    # Mosaic
+    mosaic_path = mosaic_dest(output_dir, machine, scan_meta, scan_id)
+    mosaic_url = sess.mosaic_url(scan_id)
+    mosaic_downloaded = _download_mosaic(
+        sess, scan_meta, scan_id, mosaic_path, progress, machine, dry_run
+    )
+    total = 1 if mosaic_downloaded else 0
+
+    # Write scan-level CSV row
+    scans_csv.write(
+        {
+            "machine": machine["label"],
+            "machine_id": machine["machine_id"],
+            "scan_id": scan_id,
+            "name": scan_meta.get("name", ""),
+            "scan_time": scan_meta.get("scan_time", ""),
+            "start_x": scan_meta.get("start_x", ""),
+            "start_y": scan_meta.get("start_y", ""),
+            "end_x": scan_meta.get("end_x", ""),
+            "end_y": scan_meta.get("end_y", ""),
+            "dx": scan_meta.get("dx", ""),
+            "dy": scan_meta.get("dy", ""),
+            "nx": scan_meta.get("nx", ""),
+            "ny": scan_meta.get("ny", ""),
+            "total_tiles": scan_meta.get("total_tiles", ""),
+            "scan_lines": scan_meta.get("scan_lines", ""),
+            "scan_mode": scan_meta.get("scan_mode", ""),
+            "start_datetime": scan_meta.get("start_datetime", ""),
+            "end_datetime": scan_meta.get("end_datetime", ""),
+            "status": scan_meta.get("status", ""),
+            "user": scan_meta.get("user", ""),
+            "disk_space_mb": scan_meta.get("disk_space_mb", ""),
+            "mosaic_url": mosaic_url,
+            "mosaic_local_path": str(mosaic_path),
+            "mosaic_downloaded": mosaic_downloaded,
+        }
+    )
+
+    if mosaic_only:
+        return total
+
+    # Tiles
+    tiles = sess.enumerate_tiles(scan_meta)
+    total += _download_tiles_for_scan(
+        sess,
+        tiles,
+        scan_meta,
+        scan_id,
+        output_dir,
+        machine,
+        config,
+        progress,
+        tiles_csv,
+        dry_run,
+    )
+    return total
+
+
+# ---------------------------------------------------------------------------
+# Per-machine driver
+# ---------------------------------------------------------------------------
+
+
+def scrape_machine(
+    machine: dict[str, Any],
+    config: dict[str, Any],
+    output_dir: Path,
+    progress: ProgressTracker,
+    tiles_csv: CsvWriter,
+    scans_csv: CsvWriter,
+    dry_run: bool,
+    mosaic_only: bool,
+    scan_id_filter: int | None,
+) -> int:
+    """Login, fetch scans, and download all content for one machine."""
+    sess = MachineSession(machine, config)
+    if not sess.login():
+        return 0
+
+    if scan_id_filter is not None:
+        scans: list[dict[str, Any]] = [
+            {"scan_id": scan_id_filter, "status": "Completed"}
+        ]
+        log.info("[%s] Targeting scan ID %d.", machine["label"], scan_id_filter)
+    else:
+        scans = sess.get_all_scans()
+        if not scans:
+            log.warning("[%s] No scans found.", machine["label"])
+            return 0
+
+    total = 0
+    for scan in scans:
+        total += process_scan(
+            sess=sess,
+            scan=scan,
+            output_dir=output_dir,
+            machine=machine,
+            config=config,
+            progress=progress,
+            scans_csv=scans_csv,
+            tiles_csv=tiles_csv,
+            dry_run=dry_run,
+            mosaic_only=mosaic_only,
+        )
+    return total
diff --git a/spruce/parsers.py b/spruce/parsers.py
new file mode 100644
index 0000000..e2bcf81
--- /dev/null
+++ b/spruce/parsers.py
@@ -0,0 +1,213 @@
+"""
+Pure HTML / text parsing functions for the RootView web application.
+
+All functions are side-effect-free: string (or list[str]) in, dict/list out.
+No network access, no filesystem access.
+"""
+
+import math
+import re
+from typing import Any
+from urllib.parse import unquote
+
+from bs4 import BeautifulSoup
+
+
+# ---------------------------------------------------------------------------
+# Machine descriptor
+# ---------------------------------------------------------------------------
+
+
+def parse_machine_option(label: str, raw_value: str) -> dict[str, Any]:
+    """Decode the pipe-delimited <option> value for a machine."""
+    decoded = unquote(raw_value)
+    parts = decoded.split("|")
+    return {
+        "label": label,
+        "option_value": raw_value,
+        "name": parts[0] if len(parts) > 0 else label,
+        "ip": parts[1] if len(parts) > 1 else "",
+        "port1": parts[2] if len(parts) > 2 else "",
+        "machine_id": parts[3] if len(parts) > 3 else "",
+        "port2": parts[4] if len(parts) > 4 else "",
+        "version": parts[5] if len(parts) > 5 else "",
+    }
+
+
+# ---------------------------------------------------------------------------
+# Scan list row
+# ---------------------------------------------------------------------------
+
+
+def parse_scan_row(cells: list[str]) -> dict[str, Any] | None:
+    """
+    Parse one table row from the scan list into a scan dict.
+
+    Expected columns (from the observed HTML):
+      ID, Name, Scan Time, Step Units, (X,Y)-(X,Y)-(DX,DY),
+      Dwell Time ms, Scan Lines, Scan Mode, Start Time, End Time,
+      Cancelled, User, Scan Status, Archived, [View link]
+
+    Returns None for header rows or rows whose first cell is not a digit.
+    """
+    if not cells or not cells[0].strip().isdigit():
+        return None
+    try:
+        scan_id = int(cells[0].strip())
+        return {
+            "scan_id": scan_id,
+            "name": cells[1].strip() if len(cells) > 1 else "",
+            "scan_time": cells[2].strip() if len(cells) > 2 else "",
+            "step_units": cells[3].strip() if len(cells) > 3 else "",
+            "coord_str": cells[4].strip() if len(cells) > 4 else "",
+            "dwell_ms": cells[5].strip() if len(cells) > 5 else "",
+            "scan_lines": cells[6].strip() if len(cells) > 6 else "",
+            "scan_mode": cells[7].strip() if len(cells) > 7 else "",
+            "start_datetime": cells[8].strip() if len(cells) > 8 else "",
+            "end_datetime": cells[9].strip() if len(cells) > 9 else "",
+            "cancelled": cells[10].strip() if len(cells) > 10 else "",
+            "user": cells[11].strip() if len(cells) > 11 else "",
+            "status": cells[12].strip() if len(cells) > 12 else "",
+            "archived": cells[13].strip() if len(cells) > 13 else "",
+        }
+    except (ValueError, IndexError):
+        return None
+
+
+# ---------------------------------------------------------------------------
+# Scan view page
+# ---------------------------------------------------------------------------
+
+
+def parse_scan_view(html: str) -> dict[str, Any]:
+    """
+    Extract grid parameters from a scan view page.
+
+    Returns a dict with keys:
+      scan_id, name, scan_time, start_x, start_y, end_x, end_y,
+      dx, dy, nx, ny, total_tiles, disk_space_mb, ...
+    """
+    result: dict[str, Any] = {}
+
+    # Extract grid params from the show_tile() URL inside the page JS.
+    # The scan view embeds them as query params in a JS string, e.g.:
+    #   "include/tile_view.php?...&sX=0&sY=0&eX=310&eY=740&dX=3.01&dY=2.26&..."
+    tile_url_m = re.search(r'tile_view\.php\?([^"\']+)', html)
+    if tile_url_m:
+        qs = tile_url_m.group(1)
+        param_map = {
+            "sX": "startX",
+            "sY": "startY",
+            "eX": "endX",
+            "eY": "endY",
+            "dX": "deltaX",
+            "dY": "deltaY",
+        }
+        for qs_key, result_key in param_map.items():
+            m = re.search(rf"(?:^|&){re.escape(qs_key)}=([\d.]+)", qs)
+            if m:
+                result[result_key] = float(m.group(1))
+
+    # Fallback: look for standalone JS var declarations (present in tile_view.php)
+    js_var_patterns = {
+        "startX": r"var\s+startX\s*=\s*([\d.]+)",
+        "startY": r"var\s+startY\s*=\s*([\d.]+)",
+        "endX": r"var\s+endX\s*=\s*([\d.]+)",
+        "endY": r"var\s+endY\s*=\s*([\d.]+)",
+        "deltaX": r"var\s+deltaX\s*=\s*([\d.]+)",
+        "deltaY": r"var\s+deltaY\s*=\s*([\d.]+)",
+    }
+    for key, pattern in js_var_patterns.items():
+        if key not in result:
+            m = re.search(pattern, html)
+            if m:
+                result[key] = float(m.group(1))
+
+    # Extract from the data table
+    soup = BeautifulSoup(html, "lxml")
+    for row in soup.find_all("tr"):
+        cells = [td.get_text(strip=True) for td in row.find_all("td")]
+        for i, cell in enumerate(cells):
+            if cell == "Scan ID:" and i + 1 < len(cells):
+                try:
+                    result["scan_id"] = int(cells[i + 1])
+                except ValueError:
+                    pass
+            elif cell == "Name:" and i + 1 < len(cells):
+                result["name"] = cells[i + 1]
+            elif cell == "Scan Time:" and i + 1 < len(cells):
+                result["scan_time"] = cells[i + 1]
+            elif cell == "Starting X:" and i + 1 < len(cells):
+                result["start_x_label"] = cells[i + 1]
+            elif cell == "Starting Y:" and i + 1 < len(cells):
+                result["start_y_label"] = cells[i + 1]
+            elif cell == "Ending X:" and i + 1 < len(cells):
+                result["end_x_label"] = cells[i + 1]
+            elif cell == "Ending Y:" and i + 1 < len(cells):
+                result["end_y_label"] = cells[i + 1]
+            elif cell == "DX:" and i + 1 < len(cells):
+                result["dx_label"] = cells[i + 1]
+            elif cell == "DY:" and i + 1 < len(cells):
+                result["dy_label"] = cells[i + 1]
+            elif cell == "Scan Lines:" and i + 1 < len(cells):
+                result["scan_lines"] = cells[i + 1]
+            elif cell == "Scan Mode:" and i + 1 < len(cells):
+                result["scan_mode"] = cells[i + 1]
+            elif cell == "Start Time:" and i + 1 < len(cells):
+                result["start_datetime"] = cells[i + 1]
+            elif cell == "End Time:" and i + 1 < len(cells):
+                result["end_datetime"] = cells[i + 1]
+            elif cell == "Scan Status:" and i + 1 < len(cells):
+                result["status"] = cells[i + 1]
+            elif cell == "User:" and i + 1 < len(cells):
+                result["user"] = cells[i + 1]
+            elif cell == "Total number of images:" and i + 1 < len(cells):
+                # Format: "33784 (103x328)"
+                m = re.match(r"(\d+)\s*\((\d+)x(\d+)\)", cells[i + 1])
+                if m:
+                    result["total_tiles"] = int(m.group(1))
+                    result["nx"] = int(m.group(2))
+                    result["ny"] = int(m.group(3))
+            elif cell == "Total Disk Space:" and i + 1 < len(cells):
+                m = re.search(r"([\d.]+)\s*Mb", cells[i + 1])
+                if m:
+                    result["disk_space_mb"] = float(m.group(1))
+
+    # Promote JS/URL grid param names to canonical keys
+    for raw, canon in (
+        ("startX", "start_x"),
+        ("startY", "start_y"),
+        ("endX", "end_x"),
+        ("endY", "end_y"),
+        ("deltaX", "dx"),
+        ("deltaY", "dy"),
+    ):
+        if raw in result:
+            result[canon] = result.pop(raw)
+
+    # Compute nx/ny from grid params if not parsed from table
+    if "nx" not in result and all(k in result for k in ("start_x", "end_x", "dx")):
+        result["nx"] = _grid_count(result["start_x"], result["end_x"], result["dx"])
+    if "ny" not in result and all(k in result for k in ("start_y", "end_y", "dy")):
+        result["ny"] = _grid_count(result["start_y"], result["end_y"], result["dy"])
+    if "total_tiles" not in result and "nx" in result and "ny" in result:
+        result["total_tiles"] = result["nx"] * result["ny"]
+
+    return result
+
+
+# ---------------------------------------------------------------------------
+# Grid helpers
+# ---------------------------------------------------------------------------
+
+
+def _grid_count(start: float, end: float, step: float) -> int:
+    """Number of grid positions from start up to (but not including) end."""
+    if step <= 0:
+        return 0
+    return math.ceil((end - start) / step)
+
+
+def _grid_values(start: float, count: int, step: float) -> list[float]:
+    """Generate `count` evenly-spaced grid positions, rounded to 2 dp."""
+    return [round(start + i * step, 2) for i in range(count)]
diff --git a/spruce/paths.py b/spruce/paths.py
new file mode 100644
index 0000000..fcb0101
--- /dev/null
+++ b/spruce/paths.py
@@ -0,0 +1,62 @@
+"""
+Pure path-helper functions — no network, no JSON, no progress state.
+"""
+
+import re
+from pathlib import Path
+from typing import Any
+
+
+def machine_dir_name(machine: dict[str, Any]) -> str:
+    """Sanitise machine label for use as a directory name."""
+    return re.sub(r"[^\w\-.]", "_", machine["label"]).strip("_")
+
+
+def _extract_date(dt_str: str) -> str:
+    """Pull YYYY-MM-DD from a datetime string, fall back to 'unknown'."""
+    m = re.search(r"(\d{4}-\d{2}-\d{2})", dt_str)
+    return m.group(1) if m else "unknown"
+
+
+def tile_dest(
+    output_dir: Path,
+    machine: dict[str, Any],
+    scan_meta: dict[str, Any],
+    tile: dict[str, Any],
+) -> Path:
+    """Return the local path for a single tile file."""
+    scan_date = _extract_date(scan_meta.get("scan_time", ""))
+    scan_id = tile["scan_id"]
+    ny = scan_meta.get("ny", 1)
+    nx = scan_meta.get("nx", 1)
+    row_width = len(str(ny - 1)) if ny > 1 else 1
+    col_width = len(str(nx - 1)) if nx > 1 else 1
+    filename = (
+        f"tile_r{tile['row_index']:0{row_width}d}"
+        f"_c{tile['col_index']:0{col_width}d}.jpg"
+    )
+    return (
+        output_dir
+        / machine_dir_name(machine)
+        / scan_date
+        / str(scan_id)
+        / "tiles"
+        / filename
+    )
+
+
+def mosaic_dest(
+    output_dir: Path,
+    machine: dict[str, Any],
+    scan_meta: dict[str, Any],
+    scan_id: int,
+) -> Path:
+    """Return the local path for a scan's mosaic file."""
+    scan_date = _extract_date(scan_meta.get("scan_time", ""))
+    return (
+        output_dir
+        / machine_dir_name(machine)
+        / scan_date
+        / str(scan_id)
+        / "mosaic.jpg"
+    )
diff --git a/spruce/progress.py b/spruce/progress.py
new file mode 100644
index 0000000..0d8b826
--- /dev/null
+++ b/spruce/progress.py
@@ -0,0 +1,82 @@
+"""
+Progress tracking (JSON) and CSV writing.
+"""
+
+import csv
+import json
+import logging
+from pathlib import Path
+from typing import Iterator
+
+log = logging.getLogger(__name__)
+
+
+class ProgressTracker:
+    """
+    Tracks successfully downloaded URLs in a JSON file so runs can be resumed.
+
+    Public API (all external code should use only these methods):
+      is_done(url)    — True if url has been downloaded
+      mark_done(url)  — Record url as complete (call save() to persist)
+      discard(url)    — Remove url from the completed set
+      iter_urls()     — Iterate over all completed URLs
+      __len__()       — Number of completed URLs
+      save()          — Flush state to disk
+    """
+
+    def __init__(self, path: Path) -> None:
+        self.path = path
+        self._done: set[str] = set()
+        self._load()
+
+    def _load(self) -> None:
+        if self.path.exists():
+            try:
+                data = json.loads(self.path.read_text())
+                self._done = set(data.get("completed_urls", []))
+                log.info("Resuming: %d URLs already downloaded.", len(self._done))
+            except Exception:
+                log.warning("Could not read progress file; starting fresh.")
+
+    def is_done(self, url: str) -> bool:
+        return url in self._done
+
+    def mark_done(self, url: str) -> None:
+        self._done.add(url)
+
+    def discard(self, url: str) -> None:
+        """Remove a URL from the completed set (re-queues it for download)."""
+        self._done.discard(url)
+
+    def iter_urls(self) -> Iterator[str]:
+        """Iterate over all completed URLs."""
+        return iter(self._done)
+
+    def __len__(self) -> int:
+        return len(self._done)
+
+    def save(self) -> None:
+        self.path.parent.mkdir(parents=True, exist_ok=True)
+        self.path.write_text(
+            json.dumps({"completed_urls": sorted(self._done)}, indent=2)
+        )
+
+
+class CsvWriter:
+    """Append-mode CSV writer that writes a header on first creation."""
+
+    def __init__(self, path: Path, fields: list[str]) -> None:
+        is_new = not path.exists()
+        path.parent.mkdir(parents=True, exist_ok=True)
+        self._fh = open(path, "a", newline="", encoding="utf-8")
+        self._writer = csv.DictWriter(self._fh, fieldnames=fields)
+        if is_new:
+            self._writer.writeheader()
+        self._fields = fields
+
+    def write(self, row: dict) -> None:
+        self._writer.writerow({f: row.get(f, "") for f in self._fields})
+        self._fh.flush()
+
+    def close(self) -> None:
+        self._fh.close()
diff --git a/spruce/recheck.py b/spruce/recheck.py
new file mode 100644
index 0000000..aeeffe6
--- /dev/null
+++ b/spruce/recheck.py
@@ -0,0 +1,156 @@
+"""
+Archive integrity checks — find corrupt / missing tiles and remove them
+from the progress tracker so they are re-downloaded on the next run.
+"""
+
+import logging
+import urllib.parse
+from pathlib import Path
+from typing import Any
+
+from spruce.progress import ProgressTracker
+
+log = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Private helpers
+# ---------------------------------------------------------------------------
+
+
+def _parse_tile_url(url: str) -> dict[str, str]:
+    """Extract scan_id, x, y from a tile URL query string."""
+    qs = dict(urllib.parse.parse_qsl(urllib.parse.urlparse(url).query))
+    return {
+        "scan_id": qs.get("id", ""),
+        "x": qs.get("x", ""),
+        "y": qs.get("y", ""),
+    }
+
+
+def _build_disk_index(output_dir: Path) -> dict[Path, int]:
+    """Return {tile_path: size_bytes} for every tile file found on disk."""
+    return {p: p.stat().st_size for p in output_dir.rglob("tile_r*.jpg")}
+
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+
+def recheck_tile_files(output_dir: Path, progress: ProgressTracker) -> int:
+    """
+    Walk every tile file on disk and delete any that are zero bytes.
+    Also removes the corresponding URL from progress in the same pass,
+    so a single --recheck call is sufficient before resuming.
+
+    Returns the count of files deleted.
+    """
+    # Build a reverse map: (scan_id, x, y) -> url for all completed tile URLs
+    coord_to_url: dict[tuple[str, str, str], str] = {}
+    for url in progress.iter_urls():
+        if "cmd=image" in url:
+            p = _parse_tile_url(url)
+            key = (p["scan_id"], p["x"], p["y"])
+            coord_to_url[key] = url
+
+    deleted = 0
+    for tile_path in output_dir.rglob("tile_r*.jpg"):
+        if tile_path.stat().st_size == 0:
+            log.warning("Deleting zero-byte tile: %s", tile_path)
+            tile_path.unlink()
+            deleted += 1
+
+            # Try to find the matching URL from progress and discard it
+            scan_id = _scan_id_from_path(tile_path)
+            if scan_id:
+                # Discard any URL for this scan_id — precise x/y matching
+                # requires metadata.json; scan-level discard is safe because
+                # recheck_archive will clean up any remaining stale URLs.
+                for key, url in list(coord_to_url.items()):
+                    if key[0] == scan_id:
+                        progress.discard(url)
+                        del coord_to_url[key]
+
+    if deleted:
+        log.info("Deleted %d zero-byte tile file(s).", deleted)
+        progress.save()
+    else:
+        log.info("No zero-byte tile files found on disk.")
+    return deleted
+
+
+def recheck_archive(output_dir: Path, progress: ProgressTracker) -> int:
+    """
+    Walk every URL in .progress.json and verify its local file exists and is
+    non-empty. Removes bad entries from progress so the next run re-downloads
+    them. Returns the count of entries removed.
+
+    Only tile URLs are checked (mosaic URLs are skipped — mosaics are large
+    single files and are unlikely to be partially written due to streaming).
+    """
+    if len(progress) == 0:
+        log.info("Progress file is empty — nothing to recheck.")
+        return 0
+
+    tile_urls = [u for u in progress.iter_urls() if "cmd=image" in u]
+    mosaic_count = len(progress) - len(tile_urls)
+    log.info(
+        "Rechecking %d tile URLs (%d mosaic URLs not rechecked) …",
+        len(tile_urls),
+        mosaic_count,
+    )
+
+    # Build a disk index once
+    existing_files = _build_disk_index(output_dir)
+    log.debug("Found %d tile files on disk.", len(existing_files))
+
+    bad_urls: list[str] = []
+
+    for url in tile_urls:
+        p = _parse_tile_url(url)
+        scan_id = p["scan_id"]
+
+        # Find tile files that live under a directory named after this scan_id
+        candidates = [path for path in existing_files if str(scan_id) in path.parts]
+
+        if not candidates:
+            bad_urls.append(url)
+            continue
+
+        if not any(existing_files[path] > 0 for path in candidates):
+            bad_urls.append(url)
+
+    if not bad_urls:
+        log.info("All %d tile URLs look healthy.", len(tile_urls))
+        return 0
+
+    log.warning(
+        "Found %d suspect tile URL(s). Removing from progress.",
+        len(bad_urls),
+    )
+    for url in bad_urls:
+        progress.discard(url)
+    progress.save()
+    log.info(
+        "Removed %d URL(s) from .progress.json — they will be re-downloaded on next run.",
+        len(bad_urls),
+    )
+    return len(bad_urls)
+
+
+# ---------------------------------------------------------------------------
+# Internal utility
+# ---------------------------------------------------------------------------
+
+
+def _scan_id_from_path(tile_path: Path) -> str | None:
+    """
+    Given a tile path like .../158374/tiles/tile_r0_c0.jpg, return '158374'.
+    Looks for the directory two levels above the filename (parent.parent.name).
+    """
+    try:
+        # structure: <machine>/<date>/<scan_id>/tiles/<filename>
+        return tile_path.parent.parent.name
+    except Exception:
+        return None
diff --git a/spruce/session.py b/spruce/session.py
new file mode 100644
index 0000000..d992c06
--- /dev/null
+++ b/spruce/session.py
@@ -0,0 +1,274 @@
+"""
+HTTP session for a single RootView machine: login, scan listing, tile downloads.
+"""
+
+import logging
+import time
+from datetime import datetime, timezone
+from pathlib import Path
+from urllib.parse import urljoin
+from typing import Any
+
+import requests
+from bs4 import BeautifulSoup
+
+from spruce.parsers import parse_scan_row, parse_scan_view, _grid_values
+
+log = logging.getLogger(__name__)
+
+USER_AGENT = "spruce-scraper/1.0"
+
+
+class MachineSession:
+    """Manages an authenticated HTTP session for one RootView machine."""
+
+    def __init__(self, machine: dict[str, Any], config: dict[str, Any]) -> None:
+        self.machine = machine
+        self.cfg = config
+        self.http = requests.Session()
+        self.http.headers["User-Agent"] = USER_AGENT
+        self.base_url: str = config["base_url"]
+        self.image_base_url: str = config.get(
+            "image_base_url", "http://205.149.147.131:8011/"
+        )
+
+    # ------------------------------------------------------------------
+    # Auth
+    # ------------------------------------------------------------------
+
+    def login(self) -> bool:
+        url = urljoin(self.base_url, "index.php")
+        payload = {
+            "RTLLogin": "1",
+            "RTLNAME": self.machine["option_value"],
+            "RTLUSER": self.cfg["username"],
+            "RTLPWD": self.cfg["password"],
+            "rtl_latest_version": "3.0.0.18",
+            "submit": " submit ",
+        }
+        try:
+            resp = self.http.post(url, data=payload, timeout=self.cfg["timeout"])
+            resp.raise_for_status()
+        except requests.RequestException as exc:
+            log.error("[%s] Login failed: %s", self.machine["label"], exc)
+            return False
+
+        soup = BeautifulSoup(resp.text, "lxml")
+        error_tag = soup.find(class_="error")
+        if error_tag and error_tag.get_text(strip=True):
+            log.error(
+                "[%s] Login rejected: %s",
+                self.machine["label"],
+                error_tag.get_text(strip=True),
+            )
+            return False
+
+        log.info("[%s] Login succeeded.", self.machine["label"])
+        return True
+
+    # ------------------------------------------------------------------
+    # Scan list (paginated)
+    # ------------------------------------------------------------------
+
+    def get_all_scans(self) -> list[dict[str, Any]]:
+        """
+        Fetch the complete scan list across all pages.
+
+        Uses a large FilterCount (320) to minimise round-trips.
+        Falls back to repeated pages if the list is longer.
+        """
+        all_scans: list[dict[str, Any]] = []
+        start = 0
+        page_size = 320
+
+        while True:
+            page_scans = self._fetch_scan_page(start, page_size)
+            if not page_scans:
+                break
+            all_scans.extend(page_scans)
+            log.debug(
+                "[%s] Page start=%d: %d scans (total so far: %d)",
+                self.machine["label"],
+                start,
+                len(page_scans),
+                len(all_scans),
+            )
+            if len(page_scans) < page_size:
+                break
+            start += page_size
+            time.sleep(self.cfg["request_delay"])
+
+        log.info("[%s] Found %d scans.", self.machine["label"], len(all_scans))
+        return all_scans
+
+    def _fetch_scan_page(
+        self, start: int, page_size: int
+    ) -> list[dict[str, Any]]:
+        """POST the scan list form and parse the returned table."""
+        time.sleep(self.cfg["request_delay"])
+        resp = self.http.post(
+            urljoin(self.base_url, "index.php"),
+            data={
+                "cmd": "scan",
+                "start": str(start),
+                "order": "0",
+                "order_dir": "1",
+                "FilterScanStatus": "2",  # Completed scans
+                "FilterUser": "",
+                "hidedate": "",
+                "FilterDtFrom": "",
+                "FilterDtTo": "",
+                "FilterIdFrom": "0",
+                "FilterIdTo": "0",
+                "FilterCount": str(page_size),
+            },
+            timeout=self.cfg["timeout"],
+        )
+        resp.raise_for_status()
+
+        soup = BeautifulSoup(resp.text, "lxml")
+        scans: list[dict[str, Any]] = []
+        for row in soup.find_all("tr"):
+            cells = [td.get_text(strip=True) for td in row.find_all("td")]
+            scan = parse_scan_row(cells)
+            if scan:
+                scans.append(scan)
+        return scans
+
+    # ------------------------------------------------------------------
+    # Scan detail
+    # ------------------------------------------------------------------
+
+    def get_scan_metadata(self, scan_id: int) -> dict[str, Any]:
+        """Fetch the scan view page and extract grid parameters."""
+        time.sleep(self.cfg["request_delay"])
+        resp = self.http.get(
+            urljoin(self.base_url, "index.php"),
+            params={"cmd": "scan", "mode": "view", "id": str(scan_id)},
+            timeout=self.cfg["timeout"],
+        )
+        resp.raise_for_status()
+        return parse_scan_view(resp.text)
+
+    # ------------------------------------------------------------------
+    # Tile enumeration
+    # ------------------------------------------------------------------
+
+    def enumerate_tiles(self, scan_meta: dict[str, Any]) -> list[dict[str, Any]]:
+        """
+        Generate the full list of tile descriptors for a scan.
+
+        Each descriptor has: url, row_index, col_index, x_mm, y_mm
+        """
+        scan_id = scan_meta["scan_id"]
+        nx: int = scan_meta.get("nx", 0)
+        ny: int = scan_meta.get("ny", 0)
+        start_x: float = scan_meta.get("start_x", 0.0)
+        start_y: float = scan_meta.get("start_y", 0.0)
+        dx: float = scan_meta.get("dx", 1.0)
+        dy: float = scan_meta.get("dy", 1.0)
+        scale: int = self.cfg.get("tile_scale", 1)
+
+        xs = _grid_values(start_x, nx, dx)
+        ys = _grid_values(start_y, ny, dy)
+
+        tiles: list[dict[str, Any]] = []
+        for row_idx, y in enumerate(ys):
+            for col_idx, x in enumerate(xs):
+                url = (
+                    urljoin(self.base_url, "index.php")
+                    + f"?cmd=image&mode=image_scan&id={scan_id}"
+                    + f"&s={scale}&x={x}&y={y}"
+                )
+                tiles.append(
+                    {
+                        "scan_id": scan_id,
+                        "row_index": row_idx,
+                        "col_index": col_idx,
+                        "x_mm": x,
+                        "y_mm": y,
+                        "url": url,
+                    }
+                )
+        return tiles
+
+    # ------------------------------------------------------------------
+    # Mosaic URL
+    # ------------------------------------------------------------------
+
+    def mosaic_url(self, scan_id: int) -> str:
+        return urljoin(
+            self.image_base_url, f"RootView_Database/{scan_id}/mosaic.jpg"
+        )
+
+    # ------------------------------------------------------------------
+    # Downloads
+    # ------------------------------------------------------------------
+
+    def download_file(self, url: str, dest: Path, retries: int = 3) -> int:
+        """Stream-download url to dest with retries. Returns bytes written (0 on failure)."""
+        dest.parent.mkdir(parents=True, exist_ok=True)
+        backoff = 5.0
+        for attempt in range(1, retries + 1):
+            try:
+                resp = self.http.get(
+                    url, timeout=self.cfg["timeout"], stream=True
+                )
+                resp.raise_for_status()
+                size = 0
+                with open(dest, "wb") as fh:
+                    for chunk in resp.iter_content(chunk_size=65536):
+                        if chunk:
+                            fh.write(chunk)
+                            size += len(chunk)
+                return size
+            except Exception as exc:
+                if attempt < retries:
+                    log.debug(
+                        "Attempt %d/%d failed %s: %s — retrying in %.0fs",
+                        attempt,
+                        retries,
+                        url,
+                        exc,
+                        backoff,
+                    )
+                    time.sleep(backoff)
+                    backoff *= 2
+                else:
+                    log.warning(
+                        "Download failed after %d attempts %s: %s",
+                        retries,
+                        url,
+                        exc,
+                    )
+        return 0
+
+    def download_tile(
+        self, tile: dict[str, Any], dest: Path, dry_run: bool
+    ) -> dict[str, Any]:
+        """Download a single tile. Returns a metadata row dict."""
+        row: dict[str, Any] = {
+            "machine": self.machine["label"],
+            "machine_id": self.machine["machine_id"],
+            "scan_id": tile["scan_id"],
+            "scan_time": tile.get("scan_time", ""),
+            "row_index": tile["row_index"],
+            "col_index": tile["col_index"],
+            "x_mm": tile["x_mm"],
+            "y_mm": tile["y_mm"],
+            "url": tile["url"],
+            "local_path": str(dest),
+            "downloaded_at": "",
+            "file_size_bytes": "",
+        }
+        if dry_run:
+            return row
+        if dest.exists():
+            row["downloaded_at"] = "already_exists"
+            row["file_size_bytes"] = dest.stat().st_size
+            return row
+        size = self.download_file(tile["url"], dest)
+        if size:
+            row["downloaded_at"] = datetime.now(timezone.utc).isoformat()
+            row["file_size_bytes"] = size
+        return row
diff --git a/spruce/settings.py b/spruce/settings.py
new file mode 100644
index 0000000..d33f910
--- /dev/null
+++ b/spruce/settings.py
@@ -0,0 +1,109 @@
+"""
+Constants, field lists, and config loading for the spruce scraper.
+"""
+
+import logging
+import sys
+
+import yaml
+
+log = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# File-name constants
+# ---------------------------------------------------------------------------
+
+DEFAULT_CONFIG = "config.yaml"
+PROGRESS_FILENAME = ".progress.json"
+SCANS_CSV_FILENAME = "scans.csv"
+TILES_CSV_FILENAME = "tiles.csv"
+
+# ---------------------------------------------------------------------------
+# CSV field lists
+# ---------------------------------------------------------------------------
+
+SCANS_CSV_FIELDS: list[str] = [
+    "machine",
+    "machine_id",
+    "scan_id",
+    "name",
+    "scan_time",
+    "start_x",
+    "start_y",
+    "end_x",
+    "end_y",
+    "dx",
+    "dy",
+    "nx",
+    "ny",
+    "total_tiles",
+    "scan_lines",
+    "scan_mode",
+    "start_datetime",
+    "end_datetime",
+    "status",
+    "user",
+    "disk_space_mb",
+    "mosaic_url",
+    "mosaic_local_path",
+    "mosaic_downloaded",
+]
+
+TILES_CSV_FIELDS: list[str] = [
+    "machine",
+    "machine_id",
+    "scan_id",
+    "scan_time",
+    "row_index",
+    "col_index",
+    "x_mm",
+    "y_mm",
+    "url",
+    "local_path",
+    "downloaded_at",
+    "file_size_bytes",
+]
+
+# ---------------------------------------------------------------------------
+# Worker safety
+# ---------------------------------------------------------------------------
+
+MAX_SAFE_WORKERS = 4  # above this the RootView server starts timing out
+
+
+def _clamp_workers(n: int) -> int:
+    """Return n clamped to MAX_SAFE_WORKERS, logging a warning if clamped."""
+    if n > MAX_SAFE_WORKERS:
+        log.warning(
+            "workers=%d exceeds the safe limit of %d. "
+            "The RootView server will time out under this load, causing lost tiles. "
+            "Capping at %d.",
+            n,
+            MAX_SAFE_WORKERS,
+            MAX_SAFE_WORKERS,
+        )
+        return MAX_SAFE_WORKERS
+    return n
+
+
+# ---------------------------------------------------------------------------
+# Config loader
+# ---------------------------------------------------------------------------
+
+
+def load_config(path: str) -> dict:
+    """Load and validate config.yaml. Exits on missing required fields."""
+    with open(path) as fh:
+        cfg = yaml.safe_load(fh)
+    missing = [k for k in ("username", "password") if not cfg.get(k)]
+    if missing:
+        sys.exit(f"Config {path} is missing required fields: {missing}")
+    cfg.setdefault("base_url", "http://205.149.147.131:8010/")
+    cfg.setdefault("image_base_url", "http://205.149.147.131:8011/")
+    cfg.setdefault("output_dir", "archives")
+    cfg.setdefault("workers", 2)
+    cfg.setdefault("timeout", 60)
+    cfg.setdefault("request_delay", 0.5)
+    cfg.setdefault("tile_scale", 1)
+    cfg["workers"] = _clamp_workers(cfg["workers"])
+    return cfg
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..35ab945
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,17 @@
+"""Shared pytest fixtures."""
+
+from pathlib import Path
+
+import pytest
+
+FIXTURES = Path(__file__).parent / "fixtures"
+
+
+@pytest.fixture
+def scan_list_html() -> str:
+    return (FIXTURES / "scan_list.html").read_text(encoding="utf-8")
+
+
+@pytest.fixture
+def scan_view_html() -> str:
+    return (FIXTURES / "scan_view.html").read_text(encoding="utf-8")
diff --git a/tests/fixtures/scan_list.html b/tests/fixtures/scan_list.html
new file mode 100644
index 0000000..d117d5e
--- /dev/null
+++ b/tests/fixtures/scan_list.html
@@ -0,0 +1,782 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html>
+<head>
+  <title>Root View</title>
+
+<!-- <script src="menuscript.js" language="javascript" type="text/javascript"></script> -->
+<link rel="stylesheet" type="text/css" href="menustyle.css" media="screen, print" />
+
+<script src="menuscript.js" language="javascript" type="text/javascript"></script>
+</head>
+<body>
+<style>
+P {
+  font: 9pt Arial, Helvetica, sans-serif;
+}
+.header {
+  font: bold 22pt Arial, Helvetica, sans-serif;
+  color: #008080;
+  text-align: center;
+}
+.header_desc {
+  font: bold 10pt Arial, Helvetica, sans-serif;
+  color: #800000;
+  text-align: center;
+}
+
+.error {
+  font: bold 16pt Arial, Helvetica, sans-serif;
+  color: #ff0000;
+  text-align: center;
+}
+
+.information {
+  font:  10pt Arial, Helvetica, sans-serif;
+  color: #0000ff;
+  text-align: left;
+}
+
+.data_title {
+  font: bold 9pt Arial, Helvetica, sans-serif;
+  color: white;
+  background: #696969;
+  text-align: left;
+}
+
+.data_odd {
+  font: 9pt Arial, Helvetica, sans-serif;
+  font-weight      : lighter;
+  background-color : #eeeeee;
+  color            : #000000;
+}
+
+.data_even {
+  font: 9pt Arial, Helvetica, sans-serif;
+  font-weight      : lighter;
+  background-color : #ffffff;
+  color            : #000000;
+}
+
+.data {
+  font: 9pt Arial, Helvetica, sans-serif;
+  color: #000000;
+}
+
+.login {
+  font: bold 9pt Arial, Helvetica, sans-serif;
+}
+
+.header_link {
+  font: bold 9pt Arial, Helvetica, sans-serif;
+  color: white;
+  background: #696969;
+  text-align: left;
+  text-decoration: none;
+}
+
+#pointer_div {
+    position:relative;
+    border-style:solid;
+    border-width:2px;
+    border-color:red;
+    margin:5px;
+    /*cursor:crosshair;*/
+}
+
+#data_left {
+  text-align: left;
+}
+
+#image_frame {
+  background-color: #70a0b0;
+  font-size:2px;
+}
+
+.red_text {
+  color: #ff0000;
+  font: bold 9pt Arial, Helvetica, sans-serif;
+}
+</style>
+<script>
+
+
+function validateFilename(filenameID) {
+  var item = getObj(filenameID);
+  var result = item.value;
+
+  // based on code found on http://www.codingforums.com/showthread.php?t=194468
+  if (/^[a-z0-9\.\s\-\_\[\]]*$/i.test(result) === false) {  // anything but a-zA-Z0-9, [,], dot, hypen, space, underscore is disallowed
+     alert ("File name " + result + " contains invalid character(s)! \r\nAnything but a-z, A-Z, 0-9, square brackets, dot, hypen, space, underscore is disallowed");
+     item.focus();
+     return false;
+  }
+  // If we allow using spaces in filenames, we should at least strip any leading or trailing spaces
+  item.value = item.value.replace(/^\s+|\s+$/g,"");  // the g switch is essential!!
+  return true;
+}
+
+function getObj(ElementId)
+{
+  if (document.getElementById) // BYI now this method works perfect for both IE and NS
+    return document.getElementById(ElementId);
+
+  return false;
+}
+function showit(ElementId)
+{
+  getObj(ElementId).style.display='block';
+}
+function hideit(ElementId)
+{
+  getObj(ElementId).style.display='none';
+}
+function FormatFloat(v)
+{
+  if (v=="")
+    return v;
+  return Math.round(v*100)/100;
+}
+function validateNumber(objId){
+  // BYI the function validates if the value of the data entry element on the form is an integer or a float
+  // an alert message is popped up in case of error, the problem field is focused
+  // the function returns false so that if used in OnSubmit clause it will force the user to re-enter the data
+  var item = getObj(objId);
+  var result = item.value;
+
+  if (isNaN(Number(result))) {
+     alert("The value must be an integer or float value.");
+     item.focus();
+     return false;
+  }
+  return true;
+}
+
+function validatePositiveNumber(objId) {
+  var item = getObj(objId);
+  var result = item.value;
+
+  if (!validateNumber(objId))
+    return false;
+  if (result < 0) {
+    alert("The value must be a positive value.");
+    item.focus();
+    return false;
+  }
+
+  return true;
+}
+
+</script>
+
+ <script>
+
+function PanicStop()
+{
+   // srn - do not ask for confirmation
+   //if (!confirm("Are you sure you want to stop all process?"))
+   //  return;
+
+   var f = document.stop_form;
+   f.submit();
+}
+</script>
+
+<form name="stop_form" action="index.php" method="post">
+<input type="hidden" name="cmd" value="stop">
+</form>
+
+<!-- Button menuing system added 8/2/2009 by gbr -->
+
+<table border="0" cellpadding="0" cellspacing="0" width=100%><tr><td>
+<a href="index.php?cmd=scan"
+   onmouseover="setOverImg('2','');overSub=true;showSubMenu('submenu2','button2');"
+   onmouseout="setOutImg('2','');overSub=false;setTimeout('hideSubMenu(\'submenu2\')',delay);" target="">
+<img src="buttons/button2up.png" border="0" id="button2" vspace="0" hspace="0"></a>
+
+
+<a href="index.php?cmd=about&mode=photos"
+   onmouseover="setOverImg('5','');overSub=true;showSubMenu('submenu5','button5');"
+   onmouseout="setOutImg('5','');overSub=false;setTimeout('hideSubMenu(\'submenu5\')',delay);" target="">
+<img src="buttons/button5up.png" border="0" id="button5" vspace="0" hspace="0"></a>
+
+<a href="index.php?cmd=about"
+   onmouseover="setOverImg('6','');overSub=true;showSubMenu('submenu6','button6');"
+   onmouseout="setOutImg('6','');overSub=false;setTimeout('hideSubMenu(\'submenu6\')',delay);" target="">
+   <img src="buttons/button6up.png" border="0" id="button6" vspace="0" hspace="0"></a>
+
+<a href="index.php?cmd=movies"
+   onmouseover="setOverImg('9','');overSub=true;showSubMenu('submenu9','button9');"
+   onmouseout="setOutImg('9','');overSub=false;setTimeout('hideSubMenu(\'submenu9\')',delay);" target="">
+<img src="buttons/movies_up.png" border="0" id="button9" vspace="0" hspace="0"></a>
+
+<a href="index.php?cmd=logoff"
+   onmouseover="setOverImg('10','');overSub=true;showSubMenu('submenu10','button10');"
+   onmouseout="setOutImg('10','');overSub=false;setTimeout('hideSubMenu(\'submenu10\')',delay);" target="">
+<img src="buttons/button9up.png" border="0" id="button10" vspace="0" hspace="0"></a><br>
+</td></tr></table>
+  
+<script>
+function explainWhyNot()
+{
+ document.getElementById("WhyNot").innerHTML="<CENTER><font color=red>Cannot connect to the RootView server you selected.</font></CENTER><br>The RootView server you selected is supposed to be a service running on the machine at IP address 205.149.147.130 and Port 17026. The possible problems and suggested solutions are:<br><br>1. The host machine 205.149.147.130 is not running, or is not connected to the internet. Try pinging 205.149.147.130.<br><br>2. The RootView service is not started, or paused. On the host machine 205.149.147.130, log in as the Administrator. Right click on My Computer, and click on Manage|Services. Find RootView Service in the list of services. In the Status column of the Services window, confirm that the service is started. If the service is stopped or paused, Start the service.<br><br>3. The Rootview service is attempting to open the wrong port, that is, not opening the socket at port 17026. This can be checked by opening the rootviewsrv.ini file in the same directory at the rootviewsrv.exe file and looking for the section [TCPIP]. In that section check the Port value is 17026. If it is incorrect, change it to the correct port, save the file, and restart the service.<br><br>4. The firewall on the computer that is running the service is blocking port 17026. Check Start|Control Panel|Firewall and click on the Exceptions tab. Check for a RootView entry. Select it and click on Edit to view the port number. Make sure that you have a RootView entry with a Port number that equals 17026.<br><br>";
+}
+</script><TABLE ALIGN=CENTER><TR><TD ALIGN=CENTER><div class="header">BW3-20 [AMR-26] <FONT SIZE="-1">v3.0.0.33</font></div><div class="header_desc"></div><font size='+1'>(Scan)</font></TD><TD> &nbsp; &nbsp;</TD><TD></TD></TR></TABLE>
+<table>
+<tr>
+<td valign=top>
+<table cellpassing=0 cellspacing=0 border=0>
+<form name="filterform" method="POST" action="index.php">
+<input type="hidden" name="cmd" value="scan">
+<input type="hidden" name="start" value="0">
+<input type="hidden" name="order" value="0">
+<input type="hidden" name="order_dir" value="1">
+
+  <!-- labels for edit controls in first row. 2/7/2011 gbr -->
+  <tr>
+    <td VALIGN=TOP>Filter Scans:&nbsp;</td>
+    <td VALIGN=TOP>&nbsp;
+    <td VALIGN=TOP>&nbsp;User:</td>
+    <td VALIGN=TOP>&nbsp;Date From:</td>
+    <td VALIGN=TOP>&nbsp;Date To:</td>
+    <TD VALIGN=TOP>From Scan ID</TD>   <!-- added 2/7/2011 gbr per block 51 -->
+    <TD VALIGN=TOP>To Scan ID</TD>     <!-- added 2/7/2011 gbr per block 51 -->
+    <td VALIGN=TOP>&nbsp;Scans per page:</td>
+  </tr>
+
+<!-- edit controls in second row. 2/7/2011 gbr  -->
+  <tr>
+    <TD>&nbsp;</TD>
+    <td VALIGN=TOP>
+<input type="hidden" name="FilterScanStatus" value="2">
+    &nbsp;</td>
+    <td VALIGN=TOP><select name="FilterUser">
+<option value="0">All</option>
+<option value="1">yuri</option>
+<option value="2">George Rothbart</option>
+<option value="3">Mike Taggart</option>
+<option value="23">Joanne Childs</option>
+<option value="7">Mike Allen</option>
+<option value="14">Tom Unwin</option>
+<option value="26" selected>Mark/Kyle</option>
+<option value="22">Colleen Iverson</option>
+</select></td>
+    <td VALIGN=TOP><input type=hidden name=hidedate value=""><input type="text" name="FilterDtFrom" id="FilterDtFrom" value="" size=10 maxlength=10 ><a href="javascript:void(0)" onclick="gfPop.fStartPop(document.filterform.FilterDtFrom,document.filterform.hidedate);return false;" HIDEFOCUS><img name="popcal" align="absmiddle" src="js/popcalendarrange/calbtn.gif" width="34" height="22" border="0" alt="date"></a></TD>
+    <td VALIGN=TOP><input type="text" name="FilterDtTo" id="FilterDtTo" value="" size=10 maxlength=10 ><a href="javascript:void(0)" onclick="gfPop.fStartPop(document.filterform.FilterDtTo,document.filterform.hidedate);return false;" HIDEFOCUS><img name="popcal" align="absmiddle" src="js/popcalendarrange/calbtn.gif" width="34" height="22" border="0" alt="date"></a>    <TD VALIGN=TOP><INPUT name="FilterIdFrom" value=0 SIZE=9></TD><TD VALIGN=TOP><INPUT name="FilterIdTo" value=0 SIZE=9></TD>    <td VALIGN=TOP><select name="FilterCount">
+    <option value="20" selected>20</option>
+<option value="40">40</option>
+<option value="80">80</option>
+<option value="160">160</option>
+<option value="320">320</option>
+</select></td>
+    <td VALIGN=TOP><input type=submit value="  Go  "></td>
+  </tr>
+
+</form>
+</table>
+</td>
+<td width=100>&nbsp;&nbsp;</td>
+<td>
+<FIELDSET>
+  <LEGEND>Comparison Mode</LEGEND>
+  <table cellpadding=0 cellspacing=0 border=0>
+  <form name="compareform" method="POST" action="index.php" onsubmit="return validateScanList();">
+  <input type="hidden" name="cmd" value="scan">
+  <input type="hidden" name="mode" value="compare">
+  <input type="hidden" name="scanList" value="">
+  <tr><td align=center><table cellpadding=0 cellspacing=0 border=0>
+<tr>
+<td><input type="RADIO" name="cmp_mode" value="0" CHECKED></td>
+<td  align='left'>Fit To Screen</td>
+</tr>
+<tr>
+<td><input type="RADIO" name="cmp_mode" value="1"></td>
+<td  align='left'>Normal Size</td>
+</tr>
+</table></td></tr>
+  <tr><td align=center><input type="submit" value="     Compare     ">
+  </form>  
+  </td></tr>
+  </table>
+</FIELDSET>  
+</td>
+</tr>
+</table>
+
+   <!--  PopCalendar(tag name and id must match) Tags should sit at the page bottom -->
+<iframe width=174 height=189 name="gToday:normal:agenda.js" id="gToday:normal:agenda.js"
+   src="js/popcalendarrange/ipopeng_001.htm" scrolling="no" frameborder="0"
+   style="visibility:visible; z-index:999; position:absolute; left:-500px; top:0px;">
+</iframe>
+     <p align=center>
+First&nbsp;
+Previous&nbsp;
+1&nbsp;
+<a href="JavaScript: SetPage(20);">2</a>&nbsp;
+<a href="JavaScript: SetPage(20);">Next</a>&nbsp;
+<a href="JavaScript: SetPage(20);">Last</a>&nbsp;
+</p>
+<script>
+function DeleteScan(id, name)
+{
+   if (!confirm("Are you sure you want to delete \""+name+"\" scan?"))
+     return;
+
+   var frm = document.deleteform;
+   frm.id.value=id;
+   frm.submit();
+}
+function SetOrder(new_order)
+{
+   // current order and direction
+   order     = 0;
+   order_dir = 1;
+
+   // if same order then change direction
+   if (order==new_order)
+     order_dir = (order_dir==0 ? 1 : 0);
+   else {
+     order     = new_order;
+     order_dir = 0;
+   }
+
+   // update the screen
+   var f = document.filterform;
+   f.order.value     = order;
+   f.order_dir.value = order_dir;
+   f.submit();
+}
+function SetPage(n)
+{
+   var f = document.filterform;
+   f.start.value = n;
+   f.submit();
+}
+
+function processScanId(current_state, scanID) {
+  var scan = scanID.toString();
+  var f = document.compareform;
+  var str = f.scanList.value;
+  if ((current_state == true) && (str.indexOf(" "+ scan) == -1)) {
+      str = str + " " + scan;
+      f.scanList.value = str;
+  }
+  if ((current_state == false) && (str.indexOf(" "+ scan) > -1)) {
+      str = str.replace(" "+ scan, "");
+      f.scanList.value = str;
+  }
+//alert("ScanID(s)=>" + str+"<, current state is "+ (current_state? "checked" : "unchecked"));  
+}
+
+function validateScanList() {
+  var f = document.compareform;
+  var str = f.scanList.value;
+  if ((str == '') || (str.indexOf(" ") == str.lastIndexOf(" "))) {
+     alert("Please select at least 2 scans for comparison!");
+     return false;
+  }
+//alert("Selected scans for comparison: "+f.scanList.value);
+  return true;
+}
+
+</script>
+<table cellpadding=1 cellspacing=0 border="0" bgcolor="#000000" width="100%">
+
+<form action="index.php" name="deleteform" method="post">
+<input type="hidden" name="cmd"  value="scan">
+<input type="hidden" name="mode" value="delete">
+<input type="hidden" name="id"   value="">
+</form>
+
+<tr><td>
+   <table cellspacing=0 cellpadding=3 border=0 width="100%">
+    <tr class="data_title">
+    <td><table cellpadding=0 cellspacing=0 border=0><tr><td><a class=header_link href="JavaScript: SetOrder(0);">ID</a></td>
+<td>&nbsp;<img src="images/order_desc.gif" width=18 height=18 border=0></td>
+</tr></table></td>
+<td><a class=header_link href="JavaScript: SetOrder(1);">Name</a></td>
+<td><a class=header_link href="JavaScript: SetOrder(2);">Scan Time</a></td>
+<td><a class=header_link href="JavaScript: SetOrder(3);">Step Units</a></td>
+<td><a class=header_link href="JavaScript: SetOrder(4);">(X,Y)-(X,Y)-(DX,DY)</a></td>
+<td><a class=header_link href="JavaScript: SetOrder(5);">Dwell Time, ms</a></td>
+<td><a class=header_link href="JavaScript: SetOrder(6);">Scan Lines</a></td>
+<td><a class=header_link href="JavaScript: SetOrder(7);">Scan Mode</a></td>
+<td><a class=header_link href="JavaScript: SetOrder(8);">Start Time</a></td>
+<td><a class=header_link href="JavaScript: SetOrder(9);">End Time</a></td>
+<td><a class=header_link href="JavaScript: SetOrder(10);">Can- celled</a></td>
+<td><a class=header_link href="JavaScript: SetOrder(11);">User</a></td>
+      <td>Scan Status</td>
+      <td>Arc- hived</td>
+      <td>&nbsp;</td>
+      <td>&nbsp;</td>
+    </tr>
+    <tr class="data_even">
+      <td>158374</td>
+      <td>Plot 20 AMR26 Full Tube Scan </td>
+      <td>2024-07-29 05:00</td>
+      <td>mm</td>
+      <td>(0,0)-(310,740)- (3.01,2.26)</td>
+      <td>100</td>
+      <td>Horizontal</td>
+      <td>Raster</td>
+      <td>2024-07-29 04:59:46</td>
+      <td>2024-07-30 02:51:07</td>
+      <td align="center">0</td>
+      <td>SPRUCE</td>
+      <td>Completed</td>
+      <td align="center" style="cursor:help" title="All Images archived, Mosaic archived">X      </td>
+      <td><a href="index.php?cmd=scan&mode=view&id=158374">View</a>&nbsp;&nbsp;</td>
+      <td><input type="CHECKBOX" name="compare0" value="158374" onclick='processScanId(this.checked, 158374);'>      </td>
+    </tr>
+    <tr class="data_odd">
+      <td>158222</td>
+      <td>Plot 20 AMR26 Full Tube Scan </td>
+      <td>2024-07-22 05:00</td>
+      <td>mm</td>
+      <td>(0,0)-(310,740)- (3.01,2.26)</td>
+      <td>100</td>
+      <td>Horizontal</td>
+      <td>Raster</td>
+      <td>2024-07-22 04:59:36</td>
+      <td>2024-07-23 02:50:57</td>
+      <td align="center">0</td>
+      <td>SPRUCE</td>
+      <td>Completed</td>
+      <td align="center" style="cursor:help" title="All Images archived, Mosaic archived">X      </td>
+      <td><a href="index.php?cmd=scan&mode=view&id=158222">View</a>&nbsp;&nbsp;</td>
+      <td><input type="CHECKBOX" name="compare1" value="158222" onclick='processScanId(this.checked, 158222);'>      </td>
+    </tr>
+    <tr class="data_even">
+      <td>158069</td>
+      <td>Plot 20 AMR26 Full Tube Scan </td>
+      <td>2024-07-15 05:00</td>
+      <td>mm</td>
+      <td>(0,0)-(310,740)- (3.01,2.26)</td>
+      <td>100</td>
+      <td>Horizontal</td>
+      <td>Raster</td>
+      <td>2024-07-15 05:00:19</td>
+      <td>2024-07-16 02:51:26</td>
+      <td align="center">0</td>
+      <td>SPRUCE</td>
+      <td>Completed</td>
+      <td align="center" style="cursor:help" title="All Images archived, Mosaic archived">X      </td>
+      <td><a href="index.php?cmd=scan&mode=view&id=158069">View</a>&nbsp;&nbsp;</td>
+      <td><input type="CHECKBOX" name="compare2" value="158069" onclick='processScanId(this.checked, 158069);'>      </td>
+    </tr>
+    <tr class="data_odd">
+      <td>157971</td>
+      <td>Plot 20 AMR26 Full Tube Scan </td>
+      <td>2024-07-08 05:00</td>
+      <td>mm</td>
+      <td>(0,0)-(310,740)- (3.01,2.26)</td>
+      <td>100</td>
+      <td>Horizontal</td>
+      <td>Raster</td>
+      <td>2024-07-08 04:59:48</td>
+      <td>2024-07-09 02:51:10</td>
+      <td align="center">0</td>
+      <td>SPRUCE</td>
+      <td>Completed</td>
+      <td align="center" style="cursor:help" title="All Images archived, Mosaic archived">X      </td>
+      <td><a href="index.php?cmd=scan&mode=view&id=157971">View</a>&nbsp;&nbsp;</td>
+      <td><input type="CHECKBOX" name="compare3" value="157971" onclick='processScanId(this.checked, 157971);'>      </td>
+    </tr>
+    <tr class="data_even">
+      <td>157813</td>
+      <td>Plot 20 AMR26 Full Tube Scan </td>
+      <td>2024-07-01 05:00</td>
+      <td>mm</td>
+      <td>(0,0)-(310,740)- (3.01,2.26)</td>
+      <td>100</td>
+      <td>Horizontal</td>
+      <td>Raster</td>
+      <td>2024-07-01 05:00:17</td>
+      <td>2024-07-02 02:51:29</td>
+      <td align="center">0</td>
+      <td>SPRUCE</td>
+      <td>Completed</td>
+      <td align="center" style="cursor:help" title="All Images archived, Mosaic archived">X      </td>
+      <td><a href="index.php?cmd=scan&mode=view&id=157813">View</a>&nbsp;&nbsp;</td>
+      <td><input type="CHECKBOX" name="compare4" value="157813" onclick='processScanId(this.checked, 157813);'>      </td>
+    </tr>
+    <tr class="data_odd">
+      <td>157656</td>
+      <td>Plot 20 AMR26 Full Tube Scan </td>
+      <td>2024-06-24 05:00</td>
+      <td>mm</td>
+      <td>(0,0)-(310,740)- (3.01,2.26)</td>
+      <td>100</td>
+      <td>Horizontal</td>
+      <td>Raster</td>
+      <td>2024-06-24 05:00:17</td>
+      <td>2024-06-25 02:49:49</td>
+      <td align="center">0</td>
+      <td>SPRUCE</td>
+      <td>Completed</td>
+      <td align="center" style="cursor:help" title="All Images archived, Mosaic archived">X      </td>
+      <td><a href="index.php?cmd=scan&mode=view&id=157656">View</a>&nbsp;&nbsp;</td>
+      <td><input type="CHECKBOX" name="compare5" value="157656" onclick='processScanId(this.checked, 157656);'>      </td>
+    </tr>
+    <tr class="data_even">
+      <td>157498</td>
+      <td>Plot 20 AMR26 Full Tube Scan </td>
+      <td>2024-06-17 05:00</td>
+      <td>mm</td>
+      <td>(0,0)-(310,740)- (3.01,2.26)</td>
+      <td>100</td>
+      <td>Horizontal</td>
+      <td>Raster</td>
+      <td>2024-06-17 05:00:19</td>
+      <td>2024-06-18 02:51:50</td>
+      <td align="center">0</td>
+      <td>SPRUCE</td>
+      <td>Completed</td>
+      <td align="center" style="cursor:help" title="All Images archived, Mosaic archived">X      </td>
+      <td><a href="index.php?cmd=scan&mode=view&id=157498">View</a>&nbsp;&nbsp;</td>
+      <td><input type="CHECKBOX" name="compare6" value="157498" onclick='processScanId(this.checked, 157498);'>      </td>
+    </tr>
+    <tr class="data_odd">
+      <td>157340</td>
+      <td>Plot 20 AMR26 Full Tube Scan </td>
+      <td>2024-06-10 05:00</td>
+      <td>mm</td>
+      <td>(0,0)-(310,740)- (3.01,2.26)</td>
+      <td>100</td>
+      <td>Horizontal</td>
+      <td>Raster</td>
+      <td>2024-06-10 05:00:18</td>
+      <td>2024-06-11 02:51:48</td>
+      <td align="center">0</td>
+      <td>SPRUCE</td>
+      <td>Completed</td>
+      <td align="center" style="cursor:help" title="All Images archived, Mosaic archived">X      </td>
+      <td><a href="index.php?cmd=scan&mode=view&id=157340">View</a>&nbsp;&nbsp;</td>
+      <td><input type="CHECKBOX" name="compare7" value="157340" onclick='processScanId(this.checked, 157340);'>      </td>
+    </tr>
+    <tr class="data_even">
+      <td>157091</td>
+      <td>Plot 20 AMR26 Full Tube Scan </td>
+      <td>2024-06-03 05:00</td>
+      <td>mm</td>
+      <td>(0,0)-(310,740)- (3.01,2.26)</td>
+      <td>100</td>
+      <td>Horizontal</td>
+      <td>Raster</td>
+      <td>2024-06-03 04:59:54</td>
+      <td>2024-06-04 02:46:57</td>
+      <td align="center">0</td>
+      <td>SPRUCE</td>
+      <td>Completed</td>
+      <td align="center" style="cursor:help" title="All Images archived, Mosaic archived">X      </td>
+      <td><a href="index.php?cmd=scan&mode=view&id=157091">View</a>&nbsp;&nbsp;</td>
+      <td><input type="CHECKBOX" name="compare8" value="157091" onclick='processScanId(this.checked, 157091);'>      </td>
+    </tr>
+    <tr class="data_odd">
+      <td>156743</td>
+      <td>Plot 20 AMR26 Full Tube Scan </td>
+      <td>2024-05-27 05:00</td>
+      <td>mm</td>
+      <td>(0,0)-(310,740)- (3.01,2.26)</td>
+      <td>100</td>
+      <td>Horizontal</td>
+      <td>Raster</td>
+      <td>2024-05-27 04:59:56</td>
+      <td>2024-05-28 02:44:55</td>
+      <td align="center">0</td>
+      <td>SPRUCE</td>
+      <td>Completed</td>
+      <td align="center" style="cursor:help" title="All Images archived, Mosaic archived">X      </td>
+      <td><a href="index.php?cmd=scan&mode=view&id=156743">View</a>&nbsp;&nbsp;</td>
+      <td><input type="CHECKBOX" name="compare9" value="156743" onclick='processScanId(this.checked, 156743);'>      </td>
+    </tr>
+    <tr class="data_even">
+      <td>156416</td>
+      <td>Plot 20 AMR26 Full Tube Scan </td>
+      <td>2024-05-20 05:00</td>
+      <td>mm</td>
+      <td>(0,0)-(310,740)- (3.01,2.26)</td>
+      <td>100</td>
+      <td>Horizontal</td>
+      <td>Raster</td>
+      <td>2024-05-20 04:59:33</td>
+      <td>2024-05-21 02:46:29</td>
+      <td align="center">0</td>
+      <td>SPRUCE</td>
+      <td>Completed</td>
+      <td align="center" style="cursor:help" title="All Images archived, Mosaic archived">X      </td>
+      <td><a href="index.php?cmd=scan&mode=view&id=156416">View</a>&nbsp;&nbsp;</td>
+      <td><input type="CHECKBOX" name="compare10" value="156416" onclick='processScanId(this.checked, 156416);'>      </td>
+    </tr>
+    <tr class="data_odd">
+      <td>156089</td>
+      <td>Plot 20 AMR26 Full Tube Scan </td>
+      <td>2024-05-13 05:00</td>
+      <td>mm</td>
+      <td>(0,0)-(310,740)- (3.01,2.26)</td>
+      <td>100</td>
+      <td>Horizontal</td>
+      <td>Raster</td>
+      <td>2024-05-13 05:00:02</td>
+      <td>2024-05-14 02:46:12</td>
+      <td align="center">0</td>
+      <td>SPRUCE</td>
+      <td>Completed</td>
+      <td align="center" style="cursor:help" title="All Images archived, Mosaic archived">X      </td>
+      <td><a href="index.php?cmd=scan&mode=view&id=156089">View</a>&nbsp;&nbsp;</td>
+      <td><input type="CHECKBOX" name="compare11" value="156089" onclick='processScanId(this.checked, 156089);'>      </td>
+    </tr>
+    <tr class="data_even">
+      <td>155763</td>
+      <td>Plot 20 AMR26 Full Tube Scan </td>
+      <td>2024-05-06 05:00</td>
+      <td>mm</td>
+      <td>(0,0)-(310,740)- (3.01,2.26)</td>
+      <td>100</td>
+      <td>Horizontal</td>
+      <td>Raster</td>
+      <td>2024-05-06 05:00:08</td>
+      <td>2024-05-07 02:46:49</td>
+      <td align="center">0</td>
+      <td>SPRUCE</td>
+      <td>Completed</td>
+      <td align="center" style="cursor:help" title="All Images archived, Mosaic archived">X      </td>
+      <td><a href="index.php?cmd=scan&mode=view&id=155763">View</a>&nbsp;&nbsp;</td>
+      <td><input type="CHECKBOX" name="compare12" value="155763" onclick='processScanId(this.checked, 155763);'>      </td>
+    </tr>
+    <tr class="data_odd">
+      <td>155391</td>
+      <td>Plot 20 AMR26 Full Tube Scan </td>
+      <td>2024-04-29 05:00</td>
+      <td>mm</td>
+      <td>(0,0)-(310,740)- (3.01,2.26)</td>
+      <td>100</td>
+      <td>Horizontal</td>
+      <td>Raster</td>
+      <td>2024-04-29 05:00:11</td>
+      <td>2024-04-30 02:46:58</td>
+      <td align="center">0</td>
+      <td>SPRUCE</td>
+      <td>Completed</td>
+      <td align="center" style="cursor:help" title="All Images archived, Mosaic archived">X      </td>
+      <td><a href="index.php?cmd=scan&mode=view&id=155391">View</a>&nbsp;&nbsp;</td>
+      <td><input type="CHECKBOX" name="compare13" value="155391" onclick='processScanId(this.checked, 155391);'>      </td>
+    </tr>
+    <tr class="data_even">
+      <td>154869</td>
+      <td>Plot 20 AMR26 Full Tube Scan </td>
+      <td>2024-04-22 05:00</td>
+      <td>mm</td>
+      <td>(0,0)-(310,740)- (3.01,2.26)</td>
+      <td>100</td>
+      <td>Horizontal</td>
+      <td>Raster</td>
+      <td>2024-04-22 04:59:48</td>
+      <td>2024-04-23 02:45:46</td>
+      <td align="center">0</td>
+      <td>SPRUCE</td>
+      <td>Completed</td>
+      <td align="center" style="cursor:help" title="All Images archived, Mosaic archived">X      </td>
+      <td><a href="index.php?cmd=scan&mode=view&id=154869">View</a>&nbsp;&nbsp;</td>
+      <td><input type="CHECKBOX" name="compare14" value="154869" onclick='processScanId(this.checked, 154869);'>      </td>
+    </tr>
+    <tr class="data_odd">
+      <td>154416</td>
+      <td>Plot 20 AMR26 Full Tube Scan </td>
+      <td>2024-04-15 05:00</td>
+      <td>mm</td>
+      <td>(0,0)-(310,740)- (3.01,2.26)</td>
+      <td>100</td>
+      <td>Horizontal</td>
+      <td>Raster</td>
+      <td>2024-04-15 04:59:31</td>
+      <td>2024-04-16 02:46:16</td>
+      <td align="center">0</td>
+      <td>SPRUCE</td>
+      <td>Completed</td>
+      <td align="center" style="cursor:help" title="All Images archived, Mosaic archived">X      </td>
+      <td><a href="index.php?cmd=scan&mode=view&id=154416">View</a>&nbsp;&nbsp;</td>
+      <td><input type="CHECKBOX" name="compare15" value="154416" onclick='processScanId(this.checked, 154416);'>      </td>
+    </tr>
+    <tr class="data_even">
+      <td>153954</td>
+      <td>Plot 20 AMR26 Full Tube Scan </td>
+      <td>2024-04-08 05:00</td>
+      <td>mm</td>
+      <td>(0,0)-(310,740)- (3.01,2.26)</td>
+      <td>100</td>
+      <td>Horizontal</td>
+      <td>Raster</td>
+      <td>2024-04-08 04:59:37</td>
+      <td>2024-04-09 02:45:47</td>
+      <td align="center">0</td>
+      <td>SPRUCE</td>
+      <td>Completed</td>
+      <td align="center" style="cursor:help" title="All Images archived, Mosaic archived">X      </td>
+      <td><a href="index.php?cmd=scan&mode=view&id=153954">View</a>&nbsp;&nbsp;</td>
+      <td><input type="CHECKBOX" name="compare16" value="153954" onclick='processScanId(this.checked, 153954);'>      </td>
+    </tr>
+    <tr class="data_odd">
+      <td>153488</td>
+      <td>Plot 20 AMR26 Full Tube Scan </td>
+      <td>2024-04-01 05:00</td>
+      <td>mm</td>
+      <td>(0,0)-(310,740)- (3.01,2.26)</td>
+      <td>100</td>
+      <td>Horizontal</td>
+      <td>Raster</td>
+      <td>2024-04-01 05:00:01</td>
+      <td>2024-04-02 02:44:44</td>
+      <td align="center">0</td>
+      <td>SPRUCE</td>
+      <td>Completed</td>
+      <td align="center" style="cursor:help" title="All Images archived, Mosaic archived">X      </td>
+      <td><a href="index.php?cmd=scan&mode=view&id=153488">View</a>&nbsp;&nbsp;</td>
+      <td><input type="CHECKBOX" name="compare17" value="153488" onclick='processScanId(this.checked, 153488);'>      </td>
+    </tr>
+    <tr class="data_even">
+      <td>153018</td>
+      <td>Plot 20 AMR26 Full Tube Scan </td>
+      <td>2024-03-25 05:00</td>
+      <td>mm</td>
+      <td>(0,0)-(310,740)- (3.01,2.26)</td>
+      <td>100</td>
+      <td>Horizontal</td>
+      <td>Raster</td>
+      <td>2024-03-25 05:00:07</td>
+      <td>2024-03-26 02:46:29</td>
+      <td align="center">0</td>
+      <td>SPRUCE</td>
+      <td>Completed</td>
+      <td align="center" style="cursor:help" title="All Images archived, Mosaic archived">X      </td>
+      <td><a href="index.php?cmd=scan&mode=view&id=153018">View</a>&nbsp;&nbsp;</td>
+      <td><input type="CHECKBOX" name="compare18" value="153018" onclick='processScanId(this.checked, 153018);'>      </td>
+    </tr>
+    <tr class="data_odd">
+      <td>152549</td>
+      <td>Plot 20 AMR26 Full Tube Scan </td>
+      <td>2024-03-18 05:00</td>
+      <td>mm</td>
+      <td>(0,0)-(310,740)- (3.01,2.26)</td>
+      <td>100</td>
+      <td>Horizontal</td>
+      <td>Raster</td>
+      <td>2024-03-18 04:59:30</td>
+      <td>2024-03-19 02:44:26</td>
+      <td align="center">0</td>
+      <td>SPRUCE</td>
+      <td>Completed</td>
+      <td align="center" style="cursor:help" title="All Images archived, Mosaic archived">X      </td>
+      <td><a href="index.php?cmd=scan&mode=view&id=152549">View</a>&nbsp;&nbsp;</td>
+      <td><input type="CHECKBOX" name="compare19" value="152549" onclick='processScanId(this.checked, 152549);'>      </td>
+    </tr>
+  </table>
+</td></tr>
+</table>
+<p align=center>
+First&nbsp;
+Previous&nbsp;
+1&nbsp;
+<a href="JavaScript: SetPage(20);">2</a>&nbsp;
+<a href="JavaScript: SetPage(20);">Next</a>&nbsp;
+<a href="JavaScript: SetPage(20);">Last</a>&nbsp;
+</p>
diff --git a/tests/fixtures/scan_view.html b/tests/fixtures/scan_view.html
new file mode 100644
index 0000000..23fd48c
--- /dev/null
+++ b/tests/fixtures/scan_view.html
@@ -0,0 +1,663 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html>
+<head>
+  <title>Root View</title>
+
+<!-- <script src="menuscript.js" language="javascript" type="text/javascript"></script> -->
+<link rel="stylesheet" type="text/css" href="menustyle.css" media="screen, print" />
+
+<script src="menuscript.js" language="javascript" type="text/javascript"></script>
+</head>
+<body>
+<style>
+P {
+  font: 9pt Arial, Helvetica, sans-serif;
+}
+.header {
+  font: bold 22pt Arial, Helvetica, sans-serif;
+  color: #008080;
+  text-align: center;
+}
+.header_desc {
+  font: bold 10pt Arial, Helvetica, sans-serif;
+  color: #800000;
+  text-align: center;
+}
+
+.error {
+  font: bold 16pt Arial, Helvetica, sans-serif;
+  color: #ff0000;
+  text-align: center;
+}
+
+.information {
+  font:  10pt Arial, Helvetica, sans-serif;
+  color: #0000ff;
+  text-align: left;
+}
+
+.data_title {
+  font: bold 9pt Arial, Helvetica, sans-serif;
+  color: white;
+  background: #696969;
+  text-align: left;
+}
+
+.data_odd {
+  font: 9pt Arial, Helvetica, sans-serif;
+  font-weight      : lighter;
+  background-color : #eeeeee;
+  color            : #000000;
+}
+
+.data_even {
+  font: 9pt Arial, Helvetica, sans-serif;
+  font-weight      : lighter;
+  background-color : #ffffff;
+  color            : #000000;
+}
+
+.data {
+  font: 9pt Arial, Helvetica, sans-serif;
+  color: #000000;
+}
+
+.login {
+  font: bold 9pt Arial, Helvetica, sans-serif;
+}
+
+.header_link {
+  font: bold 9pt Arial, Helvetica, sans-serif;
+  color: white;
+  background: #696969;
+  text-align: left;
+  text-decoration: none;
+}
+
+#pointer_div {
+    position:relative;
+    border-style:solid;
+    border-width:2px;
+    border-color:red;
+    margin:5px;
+    /*cursor:crosshair;*/
+}
+
+#data_left {
+  text-align: left;
+}
+
+#image_frame {
+  background-color: #70a0b0;
+  font-size:2px;
+}
+
+.red_text {
+  color: #ff0000;
+  font: bold 9pt Arial, Helvetica, sans-serif;
+}
+</style>
+<script>
+
+
+function validateFilename(filenameID) {
+  var item = getObj(filenameID);
+  var result = item.value;
+
+  // based on code found on http://www.codingforums.com/showthread.php?t=194468
+  if (/^[a-z0-9\.\s\-\_\[\]]*$/i.test(result) === false) {  // anything but a-zA-Z0-9, [,], dot, hypen, space, underscore is disallowed
+     alert ("File name " + result + " contains invalid character(s)! \r\nAnything but a-z, A-Z, 0-9, square brackets, dot, hypen, space, underscore is disallowed");
+     item.focus();
+     return false;
+  }
+  // If we allow using spaces in filenames, we should at least strip any leading or trailing spaces
+  item.value = item.value.replace(/^\s+|\s+$/g,"");  // the g switch is essential!!
+  return true;
+}
+
+function getObj(ElementId)
+{
+  if (document.getElementById) // BYI now this method works perfect for both IE and NS
+    return document.getElementById(ElementId);
+
+  return false;
+}
+function showit(ElementId)
+{
+  getObj(ElementId).style.display='block';
+}
+function hideit(ElementId)
+{
+  getObj(ElementId).style.display='none';
+}
+function FormatFloat(v)
+{
+  if (v=="")
+    return v;
+  return Math.round(v*100)/100;
+}
+function validateNumber(objId){
+  // BYI the function validates if the value of the data entry element on the form is an integer or a float
+  // an alert message is popped up in case of error, the problem field is focused
+  // the function returns false so that if used in OnSubmit clause it will force the user to re-enter the data
+  var item = getObj(objId);
+  var result = item.value;
+
+  if (isNaN(Number(result))) {
+     alert("The value must be an integer or float value.");
+     item.focus();
+     return false;
+  }
+  return true;
+}
+
+function validatePositiveNumber(objId) {
+  var item = getObj(objId);
+  var result = item.value;
+
+  if (!validateNumber(objId))
+    return false;
+  if (result < 0) {
+    alert("The value must be a positive value.");
+    item.focus();
+    return false;
+  }
+
+  return true;
+}
+
+</script>
+
+ <script>
+
+function PanicStop()
+{
+   // srn - do not ask for confirmation
+   //if (!confirm("Are you sure you want to stop all process?"))
+   //  return;
+
+   var f = document.stop_form;
+   f.submit();
+}
+</script>
+
+<form name="stop_form" action="index.php" method="post">
+<input type="hidden" name="cmd" value="stop">
+</form>
+
+<!-- Button menuing system added 8/2/2009 by gbr -->
+
+<table border="0" cellpadding="0" cellspacing="0" width=100%><tr><td>
+<a href="index.php?cmd=scan"
+   onmouseover="setOverImg('2','');overSub=true;showSubMenu('submenu2','button2');"
+   onmouseout="setOutImg('2','');overSub=false;setTimeout('hideSubMenu(\'submenu2\')',delay);" target="">
+<img src="buttons/button2up.png" border="0" id="button2" vspace="0" hspace="0"></a>
+
+
+<a href="index.php?cmd=about&mode=photos"
+   onmouseover="setOverImg('5','');overSub=true;showSubMenu('submenu5','button5');"
+   onmouseout="setOutImg('5','');overSub=false;setTimeout('hideSubMenu(\'submenu5\')',delay);" target="">
+<img src="buttons/button5up.png" border="0" id="button5" vspace="0" hspace="0"></a>
+
+<a href="index.php?cmd=about"
+   onmouseover="setOverImg('6','');overSub=true;showSubMenu('submenu6','button6');"
+   onmouseout="setOutImg('6','');overSub=false;setTimeout('hideSubMenu(\'submenu6\')',delay);" target="">
+   <img src="buttons/button6up.png" border="0" id="button6" vspace="0" hspace="0"></a>
+
+<a href="index.php?cmd=movies"
+   onmouseover="setOverImg('9','');overSub=true;showSubMenu('submenu9','button9');"
+   onmouseout="setOutImg('9','');overSub=false;setTimeout('hideSubMenu(\'submenu9\')',delay);" target="">
+<img src="buttons/movies_up.png" border="0" id="button9" vspace="0" hspace="0"></a>
+
+<a href="index.php?cmd=logoff"
+   onmouseover="setOverImg('10','');overSub=true;showSubMenu('submenu10','button10');"
+   onmouseout="setOutImg('10','');overSub=false;setTimeout('hideSubMenu(\'submenu10\')',delay);" target="">
+<img src="buttons/button9up.png" border="0" id="button10" vspace="0" hspace="0"></a><br>
+</td></tr></table>
+  <p class="header">"Plot 20 AMR26 Full Tube Scan " Scan View</p>
+
+<script type="text/javascript" src="js/prototype.js" ></script>
+
+<script type="text/javascript" >
+
+function getcordsInDiv(e){
+    //get the position of the container
+    var containerLeft = Position.page($('pointer_div'))[0];
+    var absLeft = Position.realOffset($('pointer_div'))[0];
+    var containerTop = Position.page($('pointer_div'))[1];
+    var absTop = Position.realOffset($('pointer_div'))[1];
+
+    //get the mouse coordinates
+    mouseX = Event.pointerX(e);
+    mouseY = Event.pointerY(e);
+
+    //calculate the absolute mouse position in the div
+    horizontalPosition = mouseX - containerLeft;
+    verticalPosition = mouseY - containerTop;
+
+    //use prototypes function to get the dimension
+    //this is a VERY usefull function because it also checks for borders
+    containerDimensions = $('pointer_div').getDimensions();
+    height   = containerDimensions.height;
+    width = containerDimensions.width;
+
+    //check if the mouse is out or inside the div
+//				if(horizontalPosition < 0 || verticalPosition < 0 || mouseX > (width + containerLeft) || mouseY > (height + containerTop) ){
+    if(horizontalPosition < 0 || verticalPosition < 0  ){
+        TooltipString = 'hp='+horizontalPosition+',vp='+verticalPosition;
+    }else{
+        if (!RootDown) {
+            var xxx = Math.floor((horizontalPosition - absLeft)/33);
+            var yyy = Math.floor((height - verticalPosition + absTop - 4)/25);
+            var xx  = Math.round(100*((horizontalPosition - absLeft)/33*3.01+0))/100;
+            var yy  = Math.round(100*((height - verticalPosition + absTop - 4)/25*2.26+0))/100;
+        }
+        else {
+            var xxx = Math.floor((width - horizontalPosition + absLeft - 4)/33);
+            var yyy = Math.floor((verticalPosition - absTop)/25);
+            var xx  = Math.round(100*((width - horizontalPosition + absLeft - 4)/33*3.01+0))/100;
+            var yy  = Math.round(100*((verticalPosition - absTop)/25*2.26+0))/100;
+        }
+        TooltipString = 'Tiles X=' + xxx + ', Y=' + yyy + '<br>'+
+//                    'h=' + height + ', w=' + width + ', tt=' + (absTop+containerTop);
+        'Xmm=' + xx +  ', Ymm=' + yy + '<br>' +
+        
+        '(Depth undefined offline)';
+        
+    }
+}
+
+
+Event.observe(document, 'mousemove', getcordsInDiv);
+
+var TooltipString = '';
+
+var tooltip = {
+
+	options: {
+		attr_name: "tooltip",
+		blank_text: "(????????? ? ????? ????)",
+		newline_entity: "  ",
+		max_width: 0,
+		delay: 100,
+		skip_tags: ["link", "style"]
+	},
+
+	t: document.createElement("DIV"),
+	c: null,
+	g: false,
+	canvas: null,
+
+	m: function(e){
+		if (tooltip.g){
+			var x = window.event ? event.clientX + (tooltip.canvas.scrollLeft || document.body.scrollLeft) : e.pageX;
+			var y = window.event ? event.clientY + (tooltip.canvas.scrollTop || document.body.scrollTop) : e.pageY;
+			tooltip.a(x, y);
+		}
+	},
+
+	d: function(){
+		tooltip.canvas = document.getElementsByTagName(document.compatMode && document.compatMode == "CSS1Compat" ? "HTML" : "BODY")[0];
+		tooltip.t.setAttribute("id", "tooltip");
+		document.body.appendChild(tooltip.t);
+		if (tooltip.options.max_width) tooltip.t.style.maxWidth = tooltip.options.max_width + "px"; // all but ie
+		var a = document.all && !window.opera ? document.all : document.getElementsByTagName("*"); // in opera 9 document.all produces type mismatch error
+		var l = a.length;
+		for (var i = 0; i < l; i++){
+
+			if (!a[i] || tooltip.options.skip_tags.in_array(a[i].tagName.toLowerCase())) continue;
+
+			var tooltip_title = a[i].getAttribute("title"); // returns form object if IE & name="title"; then IE crashes; so...
+			if (tooltip_title && typeof tooltip_title != "string") tooltip_title = "";
+
+			var tooltip_alt = a[i].getAttribute("alt");
+			var tooltip_blank = a[i].getAttribute("target") && a[i].getAttribute("target") == "_blank" && tooltip.options.blank_text;
+			if (tooltip_title || tooltip_blank){
+				a[i].setAttribute(tooltip.options.attr_name, tooltip_blank ? (tooltip_title ? tooltip_title + " " + tooltip.options.blank_text : tooltip.options.blank_text) : tooltip_title);
+				if (a[i].getAttribute(tooltip.options.attr_name)){
+					a[i].removeAttribute("title");
+					if (tooltip_alt && a[i].complete) a[i].removeAttribute("alt");
+					tooltip.l(a[i], "mouseover", tooltip.s);
+					tooltip.l(a[i], "mouseout", tooltip.h);
+				}
+			}else if (tooltip_alt && a[i].complete){
+				a[i].setAttribute(tooltip.options.attr_name, tooltip_alt);
+				if (a[i].getAttribute(tooltip.options.attr_name)){
+					a[i].removeAttribute("alt");
+					tooltip.l(a[i], "mouseover", tooltip.s);
+					tooltip.l(a[i], "mouseout", tooltip.h);
+				}
+			}
+			if (!a[i].getAttribute(tooltip.options.attr_name) && tooltip_blank){
+				//
+			}
+		}
+		document.onmousemove = tooltip.m;
+		window.onscroll = tooltip.h;
+		tooltip.a(-99, -99);
+	},
+
+	_: function(s){
+		s = s.replace(/\&/g,"&amp;");
+		s = s.replace(/\</g,"&lt;");
+		s = s.replace(/\>/g,"&gt;");
+		return s;
+	},
+
+	s: function(e){
+		if (typeof tooltip == "undefined") return;
+		var d = window.event ? window.event.srcElement : e.target;
+		if (!d.getAttribute(tooltip.options.attr_name)) return;
+		var s = d.getAttribute(tooltip.options.attr_name);
+		if (tooltip.options.newline_entity){
+			var s = tooltip._(s);
+			s = s.replace(eval("/" + tooltip._(tooltip.options.newline_entity) + "/g"), "<br />");
+			tooltip.t.innerHTML = s;
+		}else{
+			if (tooltip.t.firstChild) tooltip.t.removeChild(tooltip.t.firstChild);
+			tooltip.t.appendChild(document.createTextNode(s));
+		}
+		tooltip.c = setTimeout(function(){
+			tooltip.t.style.visibility = 'visible';
+		}, tooltip.options.delay);
+		tooltip.g = true;
+	},
+
+	h: function(e){
+		if (typeof tooltip == "undefined") return;
+		tooltip.t.style.visibility = "hidden";
+		if (!tooltip.options.newline_entity && tooltip.t.firstChild) tooltip.t.removeChild(tooltip.t.firstChild);
+		clearTimeout(tooltip.c);
+		tooltip.g = false;
+		tooltip.a(-99, -99);
+	},
+
+	l: function(o, e, a){
+		if (o.addEventListener) o.addEventListener(e, a, false); // was true--Opera 7b workaround!
+		else if (o.attachEvent) o.attachEvent("on" + e, a);
+			else return null;
+	},
+
+	a: function(x, y){
+		var w_width = tooltip.canvas.clientWidth ? tooltip.canvas.clientWidth + (tooltip.canvas.scrollLeft || document.body.scrollLeft) : window.innerWidth + window.pageXOffset;
+		var w_height = window.innerHeight ? window.innerHeight + window.pageYOffset : tooltip.canvas.clientHeight + (tooltip.canvas.scrollTop || document.body.scrollTop); // should be vice verca since Opera 7 is crazy!
+
+		if (document.all && document.all.item && !window.opera) tooltip.t.style.width = "300px"; //tooltip.options.max_width && tooltip.t.offsetWidth > tooltip.options.max_width ? tooltip.options.max_width + "px" : "auto";
+
+		var t_width = tooltip.t.offsetWidth;
+		var t_height = tooltip.t.offsetHeight;
+
+		tooltip.t.style.left = x + 15 + "px";
+		tooltip.t.style.top = y + 8 + "px";
+
+		if (x + t_width > w_width) tooltip.t.style.left = -10 + w_width - t_width + "px";
+		if (y + t_height > w_height) tooltip.t.style.top = -45 + w_height - t_height + "px";
+        tooltip.t.innerHTML = TooltipString;
+        tooltip.t.style.width = 210 + "px";
+        // should be 55 instead of 40 for one more line
+        tooltip.t.style.height = 55 + "px";
+	}
+}
+
+Array.prototype.in_array = function(value){
+	var l = this.length;
+	for (var i = 0; i < l; i++)
+		if (this[i] === value) return true;
+	return false;
+};
+
+var root = window.addEventListener || window.attachEvent ? window : document.addEventListener ? document : null;
+if (root){
+	if (root.addEventListener) root.addEventListener("load", tooltip.d, false);
+	else if (root.attachEvent) root.attachEvent("onload", tooltip.d);
+}
+</script>
+<style>
+#tooltip{
+background:#FFFF00;
+border:1px solid #666666;
+color:#333333;
+font:menu;
+margin:0px;
+padding:3px 5px;
+position:absolute;
+visibility:hidden;
+width : 180 px;
+height : 35 px;
+}
+</style>
+
+
+<script type=text/javascript>
+
+var ScaleFactor = 1;
+
+function show_tile(x,y)
+{
+    var TilePopup=window.open("include/tile_view.php?cmd=image&mode=image_scan&id=158374&sX=0&sY=0&eX=310&eY=740&dX=3.01&dY=2.26&rd=1&sz=1"+"&s="+ScaleFactor+"&x="+x+"&y="+y+"&Tx="+Tx+"&Ty="+Ty, 'RTL_window',"width=670,height=540,left=200,top=150, titlebar=no,location=no,toolbar=no");
+    TilePopup.focus();
+}
+
+</script>
+<script type="text/javascript"> <!--
+
+var tt, ll, bb, rr = "";
+var RootDown = 1;
+running = 1;
+var t, l, b, r = "";
+var Tx = -1, 
+    Ty = -1;
+
+      
+function clearTile() {
+    if ($('my_pointer') )
+        $('pointer_div').removeChild($('my_pointer'));
+}
+
+function hiliteTile() {
+    var xTile, yTile;
+    var rootGrowsDown;
+
+    xTile = $('xTile').value;
+    yTile = $('yTile').value;
+    rootGrowsDown = 1;
+
+    if (validatePositiveNumber('xTile')) {
+        xx = 102;
+        if (xTile > xx) {
+          alert("The max value for X tile is "+ xx+" (we count from zero tile)");
+          $('xTile').focus();
+          return false;
+        }
+    }
+    else
+      return false;
+    if (validatePositiveNumber('yTile')) {
+        yy = 327;
+        if (yTile > yy) {
+          alert("The max value for Y tile is "+ yy+" (we count from zero tile)");
+          $('yTile').focus();
+          return false;
+        }
+    }
+    else
+      return false;
+    
+    clearTile();
+    
+    my_pointer = document.createElement("div");
+    my_pointer.setAttribute("id","my_pointer");
+    if (rootGrowsDown)
+      my_pointer.setAttribute("style", "background-color:#6ff;filter:alpha(opacity=75);opacity:0.75;width:17px;height:9px;border:3px solid #f00;position:absolute;top:" + (25*yTile + 5) + "px;left:" + (33*(102- xTile)+5) + "px;");
+    else
+      my_pointer.setAttribute("style", "background-color:#6ff;filter:alpha(opacity=75);opacity:0.75;width:17px;height:9px;border:3px solid #f00;position:absolute;top:"+(25*(327- yTile)+5)+"px;left:"+(33*xTile+5)+"px;");
+
+    $('pointer_div').appendChild(my_pointer);
+    return false;
+}
+
+function point_it(event){
+
+  pos_x = event.offsetX ? (event.offsetX) : event.pageX - document.getElementById("pointer_div").offsetLeft;
+  pos_y = event.offsetY?(event.offsetY):event.pageY - document.getElementById("pointer_div").offsetTop;
+
+  ll = pos_x;
+  tt = pos_y;
+  rr = pos_x;
+  bb = pos_y;
+
+  fx(pos_y,pos_x,0);
+}
+
+function fx(top,left, FirstTime){
+
+	if (running == 2){
+    current = document.getElementById('pointer');
+		current.parentNode.removeChild(current);
+		running = 1;
+  }
+	if (running == 1){
+    t=top;
+    l=left;
+    b=top;
+    r=left;
+
+    // now round up the rectangle to the covering tiles
+    Tx = Math.floor(l/33);
+    Ty = Math.floor(t/25);
+    t = 25* Ty;
+    l = 33* Tx;
+    b = 25* Math.ceil(b/25);
+    r = 33* Math.ceil(r/33);
+
+    if (RootDown == 0) {
+      Ty = 328 - Ty - 1;
+      rectL = Math.floor(ll/33)*3.01+0;
+      rectB = 2.26*(328 - Math.floor(bb/25) -1)+0;
+    }
+    else {
+      rectL = 3.01*(103 - Math.floor(ll/33) -1)+0;
+      rectB = Math.floor(bb/25)*2.26+0;
+    }
+//    Tx = Math.round(rectL/3.01);
+//    Ty = Math.round(rectB/2.26);
+
+    element = document.createElement("div");
+    element.setAttribute("id","pointer");
+//        element.setAttribute("style", "background-color:#fff;filter:alpha(opacity=75);opacity:0.75;width:"+(r-l)+"px;height:"+(b-t)+"px;border:1px solid #f00;position:relative;top:"+(t-8200-7)+"px;left:"+(l-2)+"px;");
+    element.setAttribute("style", "background-color:#fff;filter:alpha(opacity=75);opacity:0.75;width:"+(r-l-22)+"px;height:"+(b-t-18)+"px;border:4px solid #0F0;position:relative;top:"+(t-8200)+"px;left:"+(l+6)+"px;");
+		document.getElementById('pointer_div').appendChild(element);
+
+		running=2;
+    if (!FirstTime)
+      show_tile(rectL,rectB);
+	}
+}
+
+function showSubscanPage() {
+  subscanURL = 'index.php?cmd=scan&mode=subscan&id=158374';
+  if (Tx != -1)
+    subscanURL = subscanURL + "&Tx=" + Tx + "&Ty=" + Ty;
+  window.location.href=subscanURL;
+}
+
+
+//-->
+</script>
+
+<center>
+  <table id="data_left" cellpadding=0 cellspacing=3 border=0 width="100%">
+  <tr><td valign="top" width="35%">
+  <table cellpadding=0 cellspacing=3 border=0>
+    <tr><td>Scan ID:</td><td>158374</td></tr>
+    <tr><td>Name:</td><td>Plot 20 AMR26 Full Tube Scan </td></tr>
+    <tr><td>Scan Time:</td><td>2024-07-29 05:00:00</td></tr>
+    <tr><td>Starting X:</td><td>0 mm</td></tr>
+    <tr><td>Starting Y:</td><td>0 mm</td></tr>
+    <tr><td>Ending X:</td><td>310 mm</td></tr>
+    <tr><td>Ending Y:</td><td>740 mm</td></tr>
+    <tr><td>DX:</td><td>3.01 mm</td></tr>
+    <tr><td>DY:</td><td>2.26 mm</td></tr>
+    <tr><td>Dwell Time:</td><td>100 ms</td></tr>
+    <tr><td>Scan Lines:</td><td>Horizontal</td></tr>
+    <tr><td>Scan Mode:</td><td>Raster</td></tr>
+    <tr><td>Step Units:</td><td>mm</td></tr>
+    <tr><td>Start Time:</td><td>2024-07-29 04:59:46</td></tr>
+    <tr><td>End Time:</td><td>2024-07-30 02:51:07</td></tr>
+    <tr><td>Scan Status:&nbsp;&nbsp;&nbsp;</td><td>Completed</td></tr>
+    <tr><td>Root grows down:&nbsp;&nbsp;</td><td>Yes</td></tr>
+    <tr><td>Notes:</td><td></td></tr>
+    <tr><td>User:</td><td>SPRUCE</td></tr>
+    <tr><td>Total number of images:</td><td>33784 (103x328)</td>
+    </tr>
+    <tr><td>Total Disk Space:</td><td>1949.001 Mb</td></tr>
+    <tr><td>Total Travel distance:</td><td>204098.72 mm</td></tr>
+    <tr><td nowrap>Estimated Scan Time (HH:MM:SS):</td><td>
+15:05:16    </td>
+    <tr>
+      <td>Scan Time (HH:MM:SS):</td>
+      <td>21:51:21</td>
+    </tr>
+
+  </table>
+  </td>
+  <td valign="top" width="65%">
+  <form  method="post" name="imageform" action="index.php">
+  <input type="hidden" name="cmd" value="scan">
+  <input type="hidden" name="mode" value="view">
+  <input type="hidden" name="id" value="158374">
+  <input type="hidden" name="fZ" value="1">
+  <table cellpadding=0 cellspacing=0 border=0>
+    <tr>
+      <td>&nbsp;</td>
+      <td>Popup NxN:&nbsp;&nbsp;</td>
+      <td><select name="fS" onclick="ScaleFactor=this.value;">
+<option value="1" selected>1X1</option>
+<option value="2">2X2</option>
+<option value="3">3X3</option>
+<option value="4">4X4</option>
+<option value="5">5X5</option>
+<option value="6">6X6</option>
+<option value="7">7X7</option>
+      </select></td>
+      <td>&nbsp;&nbsp;</td>
+      <td>&nbsp;
+      </td>
+      <td>&nbsp;</td>
+      <td>
+<!-- this works just fine as a traditional submit button: <input type="button" value="Switch to Subscan Mode" onClick="window.location.href='index.php?cmd=scan&mode=subscan&id=158374'"> -->
+<!-- however, this will use an image as a button, and does the same thing as the line above. gbr 10/3/2009 -->
+       <input type=image src="buttons\tosubscan.png" onClick="showSubscanPage(); return false;">
+       
+       &nbsp;&nbsp;</td>
+      <td>&nbsp;</td>
+    </tr>
+    <tr>
+     <td>&nbsp;</td>
+     <td colspan=3>Highlight tile at X:&nbsp;
+       <input type="text"  name="xTile" id="xTile" value="0" size="4" id="xTile">,&nbsp; Y:&nbsp;<input type="text"  name="yTile" id="yTile" value="0" size="4" id="yTile">       &nbsp;
+       </td><td colspan=3><input type=image src="buttons\do_it_up.png" ID=toggler ONCLICK="hiliteTile(); return false;">&nbsp;<input type=image src="buttons\clear_up.png" ID=toggler1 ONCLICK="clearTile(); return false;"></td><td>&nbsp;</td>
+    </tr>
+  </table>
+</form>
+    <div id="pointer_div"  onclick="point_it(event)" style = "position:absolute;width:3399px;height:8200px;">
+    <img  id ="container" title="Please wait while the mosaic is loaded completely..." src="http://205.149.147.131:8011/RootView_Database/158374/mosaic.jpg">   
+    </div>
+  </td>
+  </tr>
+</table>
+<script type="text/javascript">
+
+if (Tx != -1) {
+  if (1 == 0) {
+    ll = rr = Tx * 33 + 1;
+    tt = bb = (328 - Ty - 1) * 25 + 1;
+  }
+  else {
+    ll = rr = (103 - Tx -1) * 33 + 1;  
+    tt = bb = Ty * 25 + 1;
+  }
+  fx(tt, ll, 1);
+}
+
+</script>
diff --git a/tests/test_parsers.py b/tests/test_parsers.py
new file mode 100644
index 0000000..55db906
--- /dev/null
+++ b/tests/test_parsers.py
@@ -0,0 +1,153 @@
+"""Tests for spruce.parsers — all pure, no network required."""
+
+import pytest
+from spruce.parsers import (
+    _grid_count,
+    _grid_values,
+    parse_machine_option,
+    parse_scan_row,
+    parse_scan_view,
+)
+
+
+# ---------------------------------------------------------------------------
+# parse_machine_option
+# ---------------------------------------------------------------------------
+
+
+def test_parse_machine_option_basic():
+    raw = "BW3%2D20%20%5BAMR%2D26%5D|205.149.147.130|17026|26|8026|3.0.0.33|50.00"
+    m = parse_machine_option("BW3-20 [AMR-26]", raw)
+    assert m["label"] == "BW3-20 [AMR-26]"
+    assert m["machine_id"] == "26"
+    assert m["ip"] == "205.149.147.130"
+    assert m["version"] == "3.0.0.33"
+    assert m["option_value"] == raw
+
+
+def test_parse_machine_option_short_value():
+    # Fewer pipe-delimited parts should not raise; missing fields return ""
+    m = parse_machine_option("Lonely Machine", "LM|10.0.0.1")
+    assert m["ip"] == "10.0.0.1"
+    assert m["machine_id"] == ""
+    assert m["port2"] == ""
+
+
+# ---------------------------------------------------------------------------
+# parse_scan_row
+# ---------------------------------------------------------------------------
+
+
+VALID_CELLS = [
+    "158374",
+    "Plot 20 AMR26 Full Tube Scan",
+    "2024-07-29 05:00",
+    "mm",
+    "(0,0)-(310,740)-(3.01,2.26)",
+    "100",
+    "Horizontal",
+    "Raster",
+    "2024-07-29 04:59:46",
+    "2024-07-30 02:51:07",
+    "0",
+    "SPRUCE",
+    "Completed",
+    "X",
+]
+
+
+def test_parse_scan_row_valid():
+    sc = parse_scan_row(VALID_CELLS)
+    assert sc is not None
+    assert sc["scan_id"] == 158374
+    assert sc["name"] == "Plot 20 AMR26 Full Tube Scan"
+    assert sc["status"] == "Completed"
+    assert sc["scan_time"] == "2024-07-29 05:00"
+
+
+def test_parse_scan_row_ignores_non_digit_first_cell():
+    assert parse_scan_row(["ID", "Name", "Scan Time"]) is None
+    assert parse_scan_row(["Scan ID:", "158374"]) is None
+
+
+def test_parse_scan_row_empty():
+    assert parse_scan_row([]) is None
+
+
+def test_parse_scan_row_minimal():
+    sc = parse_scan_row(["42"])
+    assert sc is not None
+    assert sc["scan_id"] == 42
+    assert sc["name"] == ""
+
+
+# ---------------------------------------------------------------------------
+# parse_scan_view (uses fixture HTML from conftest.py)
+# ---------------------------------------------------------------------------
+
+
+def test_parse_scan_view_scan_id(scan_view_html):
+    meta = parse_scan_view(scan_view_html)
+    assert meta.get("scan_id") == 158374
+
+
+def test_parse_scan_view_grid_dimensions(scan_view_html):
+    meta = parse_scan_view(scan_view_html)
+    assert meta.get("nx") == 103
+    assert meta.get("ny") == 328
+
+
+def test_parse_scan_view_step_sizes(scan_view_html):
+    meta = parse_scan_view(scan_view_html)
+    assert meta.get("dx") == pytest.approx(3.01)
+    assert meta.get("dy") == pytest.approx(2.26)
+
+
+def test_parse_scan_view_total_tiles(scan_view_html):
+    meta = parse_scan_view(scan_view_html)
+    # 103 × 328 = 33784
+    assert meta.get("total_tiles") == 103 * 328
+
+
+def test_parse_scan_view_empty_string():
+    meta = parse_scan_view("")
+    assert meta == {}
+
+
+# ---------------------------------------------------------------------------
+# _grid_count
+# ---------------------------------------------------------------------------
+
+
+def test_grid_count_typical():
+    assert _grid_count(0, 310, 3.01) == 103
+    assert _grid_count(0, 740, 2.26) == 328
+
+
+def test_grid_count_zero_step():
+    assert _grid_count(0, 100, 0) == 0
+
+
+def test_grid_count_single():
+    assert _grid_count(0, 1, 1) == 1
+
+
+# ---------------------------------------------------------------------------
+# _grid_values
+# ---------------------------------------------------------------------------
+
+
+def test_grid_values_basic():
+    vals = _grid_values(0.0, 3, 3.01)
+    assert vals == [0.0, 3.01, 6.02]
+
+
+def test_grid_values_empty():
+    assert _grid_values(0.0, 0, 1.0) == []
+
+
+def test_grid_values_rounded():
+    # Floating-point accumulation should be rounded to 2 dp
+    vals = _grid_values(0.0, 4, 0.1)
+    for v in vals:
+        assert v == round(v, 2)
diff --git a/tests/test_paths.py b/tests/test_paths.py
new file mode 100644
index 0000000..b1fd7a4
--- /dev/null
+++ b/tests/test_paths.py
@@ -0,0 +1,102 @@
+"""Tests for spruce.paths — pure path helpers, no network."""
+
+from pathlib import Path
+
+import pytest
+from spruce.paths import _extract_date, machine_dir_name, mosaic_dest, tile_dest
+
+
+MACHINE = {"label": "BW3-20 [AMR-26]", "machine_id": "26"}
+SCAN_META = {"scan_time": "2024-07-29 05:00", "nx": 103, "ny": 328}
+TILE = {"scan_id": 158374, "row_index": 0, "col_index": 2}
+
+
+# ---------------------------------------------------------------------------
+# machine_dir_name
+# ---------------------------------------------------------------------------
+
+
+def test_machine_dir_name_sanitises_brackets():
+    name = machine_dir_name({"label": "BW3-20 [AMR-26]"})
+    # Brackets and spaces replaced by underscores
+    assert "[" not in name
+    assert "]" not in name
+    assert " " not in name
+
+
+def test_machine_dir_name_no_leading_trailing_underscores():
+    name = machine_dir_name({"label": "BW3-20 [AMR-26]"})
+    assert not name.startswith("_")
+    assert not name.endswith("_")
+
+
+def test_machine_dir_name_stable():
+    # Same label should always produce the same dir name
+    assert machine_dir_name(MACHINE) == machine_dir_name(MACHINE)
+
+
+# ---------------------------------------------------------------------------
+# _extract_date
+# ---------------------------------------------------------------------------
+
+
+def test_extract_date_standard():
+    assert _extract_date("2024-07-29 05:00:00") == "2024-07-29"
+
+
+def test_extract_date_date_only():
+    assert _extract_date("2024-07-29") == "2024-07-29"
+
+
+def test_extract_date_no_date():
+    assert _extract_date("no date here") == "unknown"
+
+
+def test_extract_date_empty():
+    assert _extract_date("") == "unknown"
+
+
+# ---------------------------------------------------------------------------
+# tile_dest
+# ---------------------------------------------------------------------------
+
+
+def test_tile_dest_structure(tmp_path):
+    dest = tile_dest(tmp_path, MACHINE, SCAN_META, TILE)
+    parts = dest.parts
+    assert str(TILE["scan_id"]) in parts
+    assert "tiles" in parts
+    assert dest.suffix == ".jpg"
+    assert "tile_r" in dest.name
+
+
+def test_tile_dest_zero_padded(tmp_path):
+    # ny=328 → row index needs 3 digits; nx=103 → col index needs 3 digits
+    dest = tile_dest(tmp_path, MACHINE, SCAN_META, TILE)
+    # row_index=0 padded to 3 digits (max is 327) → "000"
+    assert "tile_r000_c" in dest.name
+
+
+def test_tile_dest_contains_date(tmp_path):
+    dest = tile_dest(tmp_path, MACHINE, SCAN_META, TILE)
+    assert "2024-07-29" in str(dest)
+
+
+# ---------------------------------------------------------------------------
+# mosaic_dest
+# ---------------------------------------------------------------------------
+
+
+def test_mosaic_dest_filename(tmp_path):
+    dest = mosaic_dest(tmp_path, MACHINE, SCAN_META, 158374)
+    assert dest.name == "mosaic.jpg"
+
+
+def test_mosaic_dest_contains_scan_id(tmp_path):
+    dest = mosaic_dest(tmp_path, MACHINE, SCAN_META, 158374)
+    assert "158374" in str(dest)
+
+
+def test_mosaic_dest_contains_date(tmp_path):
+    dest = mosaic_dest(tmp_path, MACHINE, SCAN_META, 158374)
+    assert "2024-07-29" in str(dest)
diff --git a/tests/test_progress.py b/tests/test_progress.py
new file mode 100644
index 0000000..c7a6d3e
--- /dev/null
+++ b/tests/test_progress.py
@@ -0,0 +1,138 @@
+"""Tests for spruce.progress — file I/O only, uses tmp_path."""
+
+import csv
+import json
+from pathlib import Path
+
+import pytest
+from spruce.progress import CsvWriter, ProgressTracker
+
+
+# ---------------------------------------------------------------------------
+# ProgressTracker
+# ---------------------------------------------------------------------------
+
+
+def test_progress_mark_and_check(tmp_path):
+    p = ProgressTracker(tmp_path / ".progress.json")
+    assert not p.is_done("http://example.com/a")
+    p.mark_done("http://example.com/a")
+    assert p.is_done("http://example.com/a")
+
+
+def test_progress_roundtrip(tmp_path):
+    path = tmp_path / ".progress.json"
+    p = ProgressTracker(path)
+    p.mark_done("http://example.com/a")
+    p.mark_done("http://example.com/b")
+    p.save()
+
+    p2 = ProgressTracker(path)
+    assert p2.is_done("http://example.com/a")
+    assert p2.is_done("http://example.com/b")
+    assert not p2.is_done("http://example.com/c")
+
+
+def test_progress_discard(tmp_path):
+    p = ProgressTracker(tmp_path / ".progress.json")
+    p.mark_done("http://example.com/x")
+    assert p.is_done("http://example.com/x")
+    p.discard("http://example.com/x")
+    assert not p.is_done("http://example.com/x")
+
+
+def test_progress_discard_nonexistent_is_noop(tmp_path):
+    p = ProgressTracker(tmp_path / ".progress.json")
+    p.discard("http://example.com/never")  # should not raise
+
+
+def test_progress_iter_urls(tmp_path):
+    p = ProgressTracker(tmp_path / ".progress.json")
+    p.mark_done("http://example.com/1")
+    p.mark_done("http://example.com/2")
+    assert set(p.iter_urls()) == {
+        "http://example.com/1",
+        "http://example.com/2",
+    }
+
+
+def test_progress_len(tmp_path):
+    p = ProgressTracker(tmp_path / ".progress.json")
+    assert len(p) == 0
+    p.mark_done("http://example.com/1")
+    assert len(p) == 1
+    p.mark_done("http://example.com/2")
+    assert len(p) == 2
+    p.discard("http://example.com/1")
+    assert len(p) == 1
+
+
+def test_progress_save_creates_parent(tmp_path):
+    path = tmp_path / "nested" / "dir" / ".progress.json"
+    p = ProgressTracker(path)
+    p.mark_done("http://example.com/z")
+    p.save()
+    assert path.exists()
+    data = json.loads(path.read_text())
+    assert "http://example.com/z" in data["completed_urls"]
+
+
+def test_progress_corrupt_file_starts_fresh(tmp_path):
+    path = tmp_path / ".progress.json"
+    path.write_text("not valid json")
+    p = ProgressTracker(path)
+    assert len(p) == 0  # starts fresh, no exception
+
+
+# ---------------------------------------------------------------------------
+# CsvWriter
+# ---------------------------------------------------------------------------
+
+FIELDS = ["a", "b", "c"]
+
+
+def test_csv_writer_creates_header(tmp_path):
+    path = tmp_path / "out.csv"
+    w = CsvWriter(path, FIELDS)
+    w.close()
+    rows = list(csv.DictReader(path.open()))
+    assert rows == []
+    header = path.read_text().splitlines()[0]
+    assert header == "a,b,c"
+
+
+def test_csv_writer_write_row(tmp_path):
+    path = tmp_path / "out.csv"
+    w = CsvWriter(path, FIELDS)
+    w.write({"a": "1", "b": "2", "c": "3"})
+    w.close()
+    rows = list(csv.DictReader(path.open()))
+    assert len(rows) == 1
+    assert rows[0]["a"] == "1"
+    assert rows[0]["c"] == "3"
+
+
+def test_csv_writer_missing_fields_fill_empty(tmp_path):
+    path = tmp_path / "out.csv"
+    w = CsvWriter(path, FIELDS)
+    w.write({"a": "hello"})  # b and c missing
+    w.close()
+    rows = list(csv.DictReader(path.open()))
+    assert rows[0]["b"] == ""
+    assert rows[0]["c"] == ""
+
+
+def test_csv_writer_appends_on_second_open(tmp_path):
+    path = tmp_path / "out.csv"
+    w = CsvWriter(path, FIELDS)
+    w.write({"a": "first"})
+    w.close()
+
+    w2 = CsvWriter(path, FIELDS)
+    w2.write({"a": "second"})
+    w2.close()
+
+    rows = list(csv.DictReader(path.open()))
+    assert len(rows) == 2
+    assert rows[0]["a"] == "first"
+    assert rows[1]["a"] == "second"
diff --git a/tests/test_recheck.py b/tests/test_recheck.py
new file mode 100644
index 0000000..998997a
--- /dev/null
+++ b/tests/test_recheck.py
@@ -0,0 +1,149 @@
+"""
+Tests for spruce.recheck — synthetic archive tree under tmp_path.
+
+These tests verify the key improvement: a single --recheck pass is enough.
+Zero-byte tiles are deleted from disk AND their URLs removed from progress
+without needing a second pass.
+"""
+
+from pathlib import Path
+
+import pytest
+from spruce.progress import ProgressTracker
+from spruce.recheck import recheck_archive, recheck_tile_files
+
+
+BASE_URL = "http://192.0.2.1:8010/index.php"
+
+
+def _tile_url(scan_id: int, x: float, y: float) -> str:
+    return f"{BASE_URL}?cmd=image&mode=image_scan&id={scan_id}&s=1&x={x}&y={y}"
+
+
+def _make_tile(path: Path, size: int = 1024) -> None:
+    """Create a tile file. size=0 simulates a zero-byte / corrupt download."""
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_bytes(b"\xff" * size)
+
+
+def _archive_tile_path(tmp_path: Path, scan_id: int, row: int, col: int) -> Path:
+    return (
+        tmp_path
+        / "BW3-20__AMR-26_"
+        / "2024-07-29"
+        / str(scan_id)
+        / "tiles"
+        / f"tile_r{row:03d}_c{col:03d}.jpg"
+    )
+
+
+# ---------------------------------------------------------------------------
+# recheck_tile_files
+# ---------------------------------------------------------------------------
+
+
+def test_recheck_tile_files_no_zero_bytes(tmp_path):
+    p = ProgressTracker(tmp_path / ".progress.json")
+    tile = _archive_tile_path(tmp_path, 158374, 0, 0)
+    _make_tile(tile, size=1024)
+    url = _tile_url(158374, 0.0, 0.0)
+    p.mark_done(url)
+    p.save()
+
+    deleted = recheck_tile_files(tmp_path, p)
+    assert deleted == 0
+    assert tile.exists()
+    assert p.is_done(url)
+
+
+def test_recheck_tile_files_deletes_zero_byte(tmp_path):
+    p = ProgressTracker(tmp_path / ".progress.json")
+    tile = _archive_tile_path(tmp_path, 158374, 0, 0)
+    _make_tile(tile, size=0)
+    url = _tile_url(158374, 0.0, 0.0)
+    p.mark_done(url)
+    p.save()
+
+    deleted = recheck_tile_files(tmp_path, p)
+    assert deleted == 1
+    assert not tile.exists()
+
+
+def test_recheck_tile_files_single_pass_removes_url(tmp_path):
+    """
+    The two-run wart is fixed: after recheck_tile_files the URL is already
+    removed from progress — no second pass required.
+    """
+    p = ProgressTracker(tmp_path / ".progress.json")
+    tile = _archive_tile_path(tmp_path, 158374, 0, 0)
+    _make_tile(tile, size=0)
+    url = _tile_url(158374, 0.0, 0.0)
+    p.mark_done(url)
+    p.save()
+
+    recheck_tile_files(tmp_path, p)
+    # Reload progress from disk to confirm the change was persisted
+    p2 = ProgressTracker(tmp_path / ".progress.json")
+    assert not p2.is_done(url)
+
+
+def test_recheck_tile_files_healthy_tiles_untouched(tmp_path):
+    p = ProgressTracker(tmp_path / ".progress.json")
+    good = _archive_tile_path(tmp_path, 158374, 0, 0)
+    bad = _archive_tile_path(tmp_path, 158374, 0, 1)
+    _make_tile(good, size=512)
+    _make_tile(bad, size=0)
+    url_good = _tile_url(158374, 0.0, 0.0)
+    url_bad = _tile_url(158374, 3.01, 0.0)
+    p.mark_done(url_good)
+    p.mark_done(url_bad)
+    p.save()
+
+    deleted = recheck_tile_files(tmp_path, p)
+    assert deleted == 1
+    assert good.exists()
+    assert not bad.exists()
+
+
+# ---------------------------------------------------------------------------
+# recheck_archive
+# ---------------------------------------------------------------------------
+
+
+def test_recheck_archive_empty_progress(tmp_path):
+    p = ProgressTracker(tmp_path / ".progress.json")
+    removed = recheck_archive(tmp_path, p)
+    assert removed == 0
+
+
+def test_recheck_archive_healthy(tmp_path):
+    p = ProgressTracker(tmp_path / ".progress.json")
+    tile = _archive_tile_path(tmp_path, 158374, 0, 0)
+    _make_tile(tile, size=1024)
+    p.mark_done(_tile_url(158374, 0.0, 0.0))
+    p.save()
+
+    removed = recheck_archive(tmp_path, p)
+    assert removed == 0
+
+
+def test_recheck_archive_removes_missing_scan(tmp_path):
+    p = ProgressTracker(tmp_path / ".progress.json")
+    # Mark a URL done but create no files on disk
+    p.mark_done(_tile_url(999999, 0.0, 0.0))
+    p.save()
+
+    removed = recheck_archive(tmp_path, p)
+    assert removed == 1
+    assert not p.is_done(_tile_url(999999, 0.0, 0.0))
+
+
+def test_recheck_archive_skips_mosaic_urls(tmp_path):
+    p = ProgressTracker(tmp_path / ".progress.json")
+    mosaic_url = "http://192.0.2.1:8011/RootView_Database/158374/mosaic.jpg"
+    p.mark_done(mosaic_url)
+    p.save()
+
+    removed = recheck_archive(tmp_path, p)
+    assert removed == 0
+    assert p.is_done(mosaic_url)  # mosaics are never touched
diff --git a/tests/test_settings.py b/tests/test_settings.py
new file mode 100644
index 0000000..6c7cb59
--- /dev/null
+++ b/tests/test_settings.py
@@ -0,0 +1,91 @@
+"""Tests for spruce.settings — config loading and worker clamping."""
+
+import logging
+
+import pytest
+import yaml
+
+from spruce.settings import (
+    MAX_SAFE_WORKERS,
+    _clamp_workers,
+    load_config,
+)
+
+
+# ---------------------------------------------------------------------------
+# _clamp_workers
+# ---------------------------------------------------------------------------
+
+
+def test_clamp_workers_below_limit():
+    assert _clamp_workers(2) == 2
+
+
+def test_clamp_workers_at_limit():
+    assert _clamp_workers(MAX_SAFE_WORKERS) == MAX_SAFE_WORKERS
+
+
+def test_clamp_workers_above_limit_caps(caplog):
+    with caplog.at_level(logging.WARNING):
+        result = _clamp_workers(MAX_SAFE_WORKERS + 1)
+    assert result == MAX_SAFE_WORKERS
+    assert "exceeds the safe limit" in caplog.text
+
+
+def test_clamp_workers_zero():
+    assert _clamp_workers(0) == 0
+
+
+# ---------------------------------------------------------------------------
+# load_config
+# ---------------------------------------------------------------------------
+
+
+def _write_config(tmp_path, **overrides):
+    base = {
+        "username": "testuser",
+        "password": "testpass",
+    }
+    base.update(overrides)
+    path = tmp_path / "config.yaml"
+    path.write_text(yaml.dump(base))
+    return str(path)
+
+
+def test_load_config_defaults(tmp_path):
+    path = _write_config(tmp_path)
+    cfg = load_config(path)
+    assert cfg["base_url"] == "http://205.149.147.131:8010/"
+    assert cfg["workers"] == 2
+    assert cfg["timeout"] == 60
+    assert cfg["request_delay"] == 0.5
+    assert cfg["output_dir"] == "archives"
+
+
+def test_load_config_overrides(tmp_path):
+    path = _write_config(tmp_path, workers=3, output_dir="my_archives")
+    cfg = load_config(path)
+    assert cfg["workers"] == 3
+    assert cfg["output_dir"] == "my_archives"
+
+
+def test_load_config_caps_workers(tmp_path, caplog):
+    path = _write_config(tmp_path, workers=MAX_SAFE_WORKERS + 2)
+    with caplog.at_level(logging.WARNING):
+        cfg = load_config(path)
+    assert cfg["workers"] == MAX_SAFE_WORKERS
+    assert "exceeds the safe limit" in caplog.text
+
+
+def test_load_config_missing_username_exits(tmp_path):
+    path = tmp_path / "config.yaml"
+    path.write_text(yaml.dump({"password": "x"}))
+    with pytest.raises(SystemExit):
+        load_config(str(path))
+
+
+def test_load_config_missing_password_exits(tmp_path):
+    path = tmp_path / "config.yaml"
+    path.write_text(yaml.dump({"username": "x"}))
+    with pytest.raises(SystemExit):
+        load_config(str(path))