769 lines
27 KiB
Python
769 lines
27 KiB
Python
#!/usr/bin/env python3
|
|
"""Download configured geodata tiles into raw/ based on a TOML config."""
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import os
|
|
import shutil
|
|
import sys
|
|
import time
|
|
import queue
|
|
import threading
|
|
import zipfile
|
|
from concurrent.futures import Future, as_completed
|
|
from dataclasses import dataclass
|
|
from typing import Dict, Iterable, List, Optional, Tuple
|
|
from urllib.parse import urlparse
|
|
|
|
try:
|
|
import tomllib
|
|
except ImportError: # pragma: no cover - tomllib is required
|
|
raise SystemExit("tomllib is required (Python 3.11+).")
|
|
|
|
import requests
|
|
|
|
DEFAULT_CONFIG = "geodata_download.toml"
|
|
DEFAULT_OUTPUT_DIR = "raw"
|
|
|
|
OUTPUT_SUBDIRS = {
|
|
"dgm1": "dgm1",
|
|
"dom1": "dom1",
|
|
"dop20": "dop20",
|
|
"geb3dlo": os.path.join("citygml", "lod2"),
|
|
"citygml": os.path.join("citygml", "lod2"),
|
|
"bdom20rgbi": "bdom20rgbi",
|
|
"lpg": "lpolpg",
|
|
"lpo": "lpolpg",
|
|
"lpolpg": "lpolpg",
|
|
}
|
|
|
|
FILE_TYPE_SUBDIRS = {
|
|
"image": "jp2",
|
|
"worldfile": "j2w",
|
|
"metadata": "meta",
|
|
}
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class DownloadTask:
|
|
dataset: str
|
|
url: str
|
|
output_path: str
|
|
download_key: str
|
|
unzip: bool = False
|
|
unzip_dir: Optional[str] = None
|
|
|
|
|
|
class DownloadLogger:
|
|
def __init__(
|
|
self,
|
|
log_file: Optional[str] = None,
|
|
log_format: Optional[str] = None,
|
|
report_progress: bool = True,
|
|
) -> None:
|
|
self._log_file = log_file
|
|
self._log_format = log_format
|
|
self._report_progress = report_progress
|
|
|
|
def log(self, level: str, message: str) -> None:
|
|
line = self._format(level, message)
|
|
if self._report_progress:
|
|
print(line)
|
|
if self._log_file:
|
|
with open(self._log_file, "a", encoding="utf-8") as fh:
|
|
fh.write(line + "\n")
|
|
|
|
def _format(self, level: str, message: str) -> str:
|
|
if not self._log_format:
|
|
return f"[{level}] {message}"
|
|
timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
|
|
try:
|
|
return self._log_format.format(timestamp=timestamp, level=level, message=message)
|
|
except Exception:
|
|
return f"[{level}] {message}"
|
|
|
|
|
|
class DownloadProgress:
|
|
def __init__(self, total: int, enabled: bool) -> None:
|
|
self._total = max(total, 1)
|
|
self._enabled = enabled
|
|
self._downloaded = 0
|
|
self._missing = 0
|
|
self._failed = 0
|
|
self._bytes = 0
|
|
self._lock = threading.Lock()
|
|
self._start = time.time()
|
|
self._last_label = ""
|
|
self._last_render = 0.0
|
|
self._min_interval = 0.25
|
|
|
|
def add_bytes(self, bytes_delta: int, label: Optional[str] = None) -> None:
|
|
if not self._enabled or bytes_delta <= 0:
|
|
return
|
|
with self._lock:
|
|
self._bytes += bytes_delta
|
|
if label:
|
|
self._last_label = os.path.basename(label)
|
|
self._render_locked(force=False)
|
|
|
|
def set_counts(self, downloaded: int, missing: int, failed: int, label: Optional[str] = None) -> None:
|
|
if not self._enabled:
|
|
return
|
|
with self._lock:
|
|
self._downloaded = downloaded
|
|
self._missing = missing
|
|
self._failed = failed
|
|
if label:
|
|
self._last_label = os.path.basename(label)
|
|
self._render_locked(force=True)
|
|
|
|
def finish(self) -> None:
|
|
if not self._enabled:
|
|
return
|
|
sys.stderr.write("\n")
|
|
sys.stderr.flush()
|
|
|
|
def _render_locked(self, force: bool) -> None:
|
|
now = time.time()
|
|
if not force and (now - self._last_render) < self._min_interval:
|
|
return
|
|
self._last_render = now
|
|
elapsed = max(now - self._start, 0.001)
|
|
done = self._downloaded + self._missing + self._failed
|
|
rate = done / elapsed
|
|
remaining = max(self._total - done, 0)
|
|
eta = int(remaining / rate) if rate > 0 else 0
|
|
bytes_mb = self._bytes / (1024 * 1024)
|
|
bytes_gb = bytes_mb / 1024
|
|
byte_rate = bytes_mb / elapsed
|
|
width = 28
|
|
filled = int(width * done / self._total)
|
|
bar = "#" * filled + "-" * (width - filled)
|
|
line = (
|
|
f"\r[{bar}] {done}/{self._total} "
|
|
f"{rate:.2f}/s eta {eta}s ok={self._downloaded} miss={self._missing} fail={self._failed} "
|
|
f"{bytes_gb:.1f}GB {byte_rate:.1f}MB/s "
|
|
f"{self._last_label}"
|
|
)
|
|
sys.stderr.write(line[:200])
|
|
sys.stderr.flush()
|
|
|
|
|
|
class DaemonThreadPool:
|
|
"""Minimal daemon-thread pool that supports submit() + shutdown()."""
|
|
|
|
def __init__(self, max_workers: int) -> None:
|
|
if max_workers <= 0:
|
|
raise ValueError("max_workers must be greater than 0")
|
|
self._tasks: queue.Queue = queue.Queue()
|
|
self._threads: list[threading.Thread] = []
|
|
self._shutdown = False
|
|
for idx in range(max_workers):
|
|
thread = threading.Thread(
|
|
name=f"download-worker-{idx}",
|
|
target=self._worker,
|
|
daemon=True,
|
|
)
|
|
thread.start()
|
|
self._threads.append(thread)
|
|
|
|
def submit(self, fn, *args, **kwargs) -> Future:
|
|
if self._shutdown:
|
|
raise RuntimeError("Thread pool already shutdown")
|
|
future: Future = Future()
|
|
self._tasks.put((future, fn, args, kwargs))
|
|
return future
|
|
|
|
def shutdown(self, wait: bool = True, cancel_futures: bool = False) -> None:
|
|
self._shutdown = True
|
|
if cancel_futures:
|
|
while True:
|
|
try:
|
|
item = self._tasks.get_nowait()
|
|
except queue.Empty:
|
|
break
|
|
if item is None:
|
|
continue
|
|
future, _, _, _ = item
|
|
future.cancel()
|
|
for _ in self._threads:
|
|
self._tasks.put(None)
|
|
if wait:
|
|
for thread in self._threads:
|
|
thread.join()
|
|
|
|
def _worker(self) -> None:
|
|
while True:
|
|
item = self._tasks.get()
|
|
if item is None:
|
|
return
|
|
future, fn, args, kwargs = item
|
|
if not future.set_running_or_notify_cancel():
|
|
continue
|
|
try:
|
|
result = fn(*args, **kwargs)
|
|
except BaseException as exc:
|
|
future.set_exception(exc)
|
|
else:
|
|
future.set_result(result)
|
|
|
|
|
|
def _load_toml(path: str) -> dict:
|
|
if not os.path.exists(path):
|
|
raise SystemExit(f"Config not found: {path}")
|
|
with open(path, "rb") as fh:
|
|
return tomllib.load(fh)
|
|
|
|
|
|
def _parse_download_targets(cfg: dict) -> Tuple[Dict[str, dict], Optional[str]]:
|
|
download_cfg = cfg.get("download", {})
|
|
if not isinstance(download_cfg, dict):
|
|
raise SystemExit("download must be a table in the TOML config.")
|
|
has_targets = any(isinstance(value, dict) for value in download_cfg.values())
|
|
if has_targets:
|
|
targets = {name: value for name, value in download_cfg.items() if isinstance(value, dict)}
|
|
if not targets:
|
|
raise SystemExit("download targets must be tables, e.g. [download.geobasis].")
|
|
default_key = download_cfg.get("default_target") or download_cfg.get("default")
|
|
return targets, default_key
|
|
return {"default": download_cfg}, "default"
|
|
|
|
|
|
def _parse_tile_ranges(cfg: dict) -> Dict[str, dict]:
|
|
ranges = cfg.get("tile_ranges", {})
|
|
if not isinstance(ranges, dict):
|
|
raise SystemExit("tile_ranges must be a table in the TOML config.")
|
|
return ranges
|
|
|
|
|
|
def _iter_tiles(range_cfg: dict) -> Iterable[Tuple[int, int]]:
|
|
x_start = int(range_cfg["x_start"])
|
|
x_end = int(range_cfg["x_end"])
|
|
x_step = int(range_cfg.get("x_step", 1))
|
|
y_start = int(range_cfg["y_start"])
|
|
y_end = int(range_cfg["y_end"])
|
|
y_step = int(range_cfg.get("y_step", 1))
|
|
|
|
for x in range(x_start, x_end + 1, x_step):
|
|
for y in range(y_start, y_end + 1, y_step):
|
|
yield x, y
|
|
|
|
|
|
def _override_range(range_cfg: dict, start: Tuple[int, int], end: Tuple[int, int]) -> dict:
|
|
new_range = dict(range_cfg)
|
|
new_range["x_start"] = start[0]
|
|
new_range["y_start"] = start[1]
|
|
new_range["x_end"] = end[0]
|
|
new_range["y_end"] = end[1]
|
|
return new_range
|
|
|
|
|
|
def _resolve_output_dir(dataset_key: str, dataset_cfg: dict) -> str:
|
|
if dataset_key in OUTPUT_SUBDIRS:
|
|
return OUTPUT_SUBDIRS[dataset_key]
|
|
return dataset_cfg.get("output_subdir", dataset_key)
|
|
|
|
|
|
def _format_url(
|
|
template: str,
|
|
base_url: str,
|
|
x: Optional[int],
|
|
y: Optional[int],
|
|
extra: dict,
|
|
) -> str:
|
|
format_vars = {"base_url": base_url}
|
|
if x is not None:
|
|
format_vars["x"] = x
|
|
if y is not None:
|
|
format_vars["y"] = y
|
|
for key, value in extra.items():
|
|
if isinstance(value, (str, int, float)):
|
|
format_vars[key] = value
|
|
return template.format(**format_vars)
|
|
|
|
|
|
def _select_download_target(
|
|
download_targets: Dict[str, dict],
|
|
default_key: Optional[str],
|
|
dataset_key: str,
|
|
dataset_cfg: dict,
|
|
) -> Tuple[str, dict]:
|
|
requested = dataset_cfg.get("download")
|
|
if requested:
|
|
if requested not in download_targets:
|
|
raise SystemExit(f"{dataset_key}: download target not found: {requested}")
|
|
return requested, download_targets[requested]
|
|
if default_key:
|
|
if default_key not in download_targets:
|
|
raise SystemExit(f"download.default_target not found: {default_key}")
|
|
return default_key, download_targets[default_key]
|
|
if len(download_targets) == 1:
|
|
name = next(iter(download_targets))
|
|
return name, download_targets[name]
|
|
raise SystemExit(
|
|
f"{dataset_key}: multiple download targets defined; set datasets.{dataset_key}.download "
|
|
"or download.default_target.",
|
|
)
|
|
|
|
|
|
def _resolve_url(source_cfg: dict, base_url: str, x: Optional[int], y: Optional[int]) -> str:
|
|
if "url" in source_cfg:
|
|
return str(source_cfg["url"])
|
|
template = source_cfg.get("url_template")
|
|
if not template:
|
|
raise SystemExit("url_template or url must be defined for the dataset.")
|
|
return _format_url(str(template), base_url, x, y, source_cfg)
|
|
|
|
|
|
def _build_tasks(
|
|
cfg: dict,
|
|
datasets: Dict[str, dict],
|
|
tile_ranges: Dict[str, dict],
|
|
download_targets: Dict[str, dict],
|
|
default_target: Optional[str],
|
|
start_override: Optional[Tuple[int, int]],
|
|
end_override: Optional[Tuple[int, int]],
|
|
) -> List[DownloadTask]:
|
|
tasks: List[DownloadTask] = []
|
|
|
|
for dataset_key, dataset_cfg in datasets.items():
|
|
download_key, download_cfg = _select_download_target(
|
|
download_targets,
|
|
default_target,
|
|
dataset_key,
|
|
dataset_cfg,
|
|
)
|
|
base_url = download_cfg.get("base_url", "").rstrip("/")
|
|
base_output_dir = download_cfg.get("output_directory", DEFAULT_OUTPUT_DIR)
|
|
base_subdir = _resolve_output_dir(dataset_key, dataset_cfg)
|
|
dataset_out_dir = os.path.join(base_output_dir, base_subdir)
|
|
unzip = bool(dataset_cfg.get("unzip", False))
|
|
unzip_dir = dataset_out_dir
|
|
|
|
tile_range_key = dataset_cfg.get("tile_range")
|
|
if tile_range_key:
|
|
if tile_range_key not in tile_ranges:
|
|
raise SystemExit(f"{dataset_key}: tile_range not found: {tile_range_key}")
|
|
range_cfg = tile_ranges[tile_range_key]
|
|
if start_override and end_override:
|
|
range_cfg = _override_range(range_cfg, start_override, end_override)
|
|
|
|
for x, y in _iter_tiles(range_cfg):
|
|
if "files" in dataset_cfg:
|
|
for file_cfg in dataset_cfg["files"]:
|
|
file_type = file_cfg.get("type", "file")
|
|
file_subdir = FILE_TYPE_SUBDIRS.get(file_type, file_type)
|
|
out_dir = os.path.join(dataset_out_dir, file_subdir)
|
|
url = _resolve_url(file_cfg, base_url, x, y)
|
|
filename = os.path.basename(urlparse(url).path)
|
|
tasks.append(
|
|
DownloadTask(
|
|
dataset_key,
|
|
url,
|
|
os.path.join(out_dir, filename),
|
|
download_key,
|
|
unzip,
|
|
unzip_dir,
|
|
)
|
|
)
|
|
else:
|
|
url = _resolve_url(dataset_cfg, base_url, x, y)
|
|
filename = os.path.basename(urlparse(url).path)
|
|
tasks.append(
|
|
DownloadTask(
|
|
dataset_key,
|
|
url,
|
|
os.path.join(dataset_out_dir, filename),
|
|
download_key,
|
|
unzip,
|
|
unzip_dir,
|
|
)
|
|
)
|
|
else:
|
|
if "files" in dataset_cfg:
|
|
for file_cfg in dataset_cfg["files"]:
|
|
file_type = file_cfg.get("type", "file")
|
|
file_subdir = FILE_TYPE_SUBDIRS.get(file_type, file_type)
|
|
out_dir = os.path.join(dataset_out_dir, file_subdir)
|
|
url = _resolve_url(file_cfg, base_url, None, None)
|
|
filename = os.path.basename(urlparse(url).path)
|
|
tasks.append(
|
|
DownloadTask(
|
|
dataset_key,
|
|
url,
|
|
os.path.join(out_dir, filename),
|
|
download_key,
|
|
unzip,
|
|
unzip_dir,
|
|
)
|
|
)
|
|
else:
|
|
url = _resolve_url(dataset_cfg, base_url, None, None)
|
|
filename = os.path.basename(urlparse(url).path)
|
|
tasks.append(
|
|
DownloadTask(
|
|
dataset_key,
|
|
url,
|
|
os.path.join(dataset_out_dir, filename),
|
|
download_key,
|
|
unzip,
|
|
unzip_dir,
|
|
)
|
|
)
|
|
|
|
return tasks
|
|
|
|
|
|
def _select_datasets(cfg: dict, requested: Optional[List[str]]) -> Dict[str, dict]:
|
|
datasets = cfg.get("datasets", {})
|
|
if not isinstance(datasets, dict) or not datasets:
|
|
raise SystemExit("datasets must be defined in the TOML config.")
|
|
|
|
if requested:
|
|
missing = [name for name in requested if name not in datasets]
|
|
if missing:
|
|
raise SystemExit(f"Unknown dataset(s): {', '.join(missing)}")
|
|
selected = {name: datasets[name] for name in requested}
|
|
else:
|
|
selected = {name: ds for name, ds in datasets.items() if ds.get("enabled", True)}
|
|
|
|
if not selected:
|
|
raise SystemExit("No datasets selected for download.")
|
|
return selected
|
|
|
|
|
|
def _safe_remove_dir(base_dir: str, rel_dir: str) -> None:
|
|
target = os.path.abspath(os.path.join(base_dir, rel_dir))
|
|
base = os.path.abspath(base_dir)
|
|
if os.path.commonpath([target, base]) != base:
|
|
raise SystemExit(f"Refusing to delete outside base dir: {target}")
|
|
if target == base:
|
|
raise SystemExit(f"Refusing to delete base dir: {target}")
|
|
if os.path.exists(target):
|
|
shutil.rmtree(target)
|
|
|
|
|
|
def _unzipped_marker_path(task: DownloadTask) -> str:
|
|
unzip_dir = task.unzip_dir or os.path.dirname(task.output_path)
|
|
marker_name = f".{os.path.basename(task.output_path)}.unzipped"
|
|
return os.path.join(unzip_dir, marker_name)
|
|
|
|
|
|
def _needs_unzip(task: DownloadTask) -> bool:
|
|
if not task.unzip:
|
|
return False
|
|
if not os.path.exists(task.output_path):
|
|
return False
|
|
return not os.path.exists(_unzipped_marker_path(task))
|
|
|
|
|
|
def _maybe_unzip(task: DownloadTask, logger: DownloadLogger) -> bool:
|
|
if not task.unzip:
|
|
return True
|
|
if not os.path.exists(task.output_path):
|
|
logger.log("ERROR", f"Unzip failed; missing file: {task.output_path}")
|
|
return False
|
|
unzip_dir = task.unzip_dir or os.path.dirname(task.output_path)
|
|
os.makedirs(unzip_dir, exist_ok=True)
|
|
try:
|
|
with zipfile.ZipFile(task.output_path, "r") as zf:
|
|
zf.extractall(unzip_dir)
|
|
except (zipfile.BadZipFile, OSError) as exc:
|
|
logger.log("ERROR", f"Unzip failed: {task.output_path} ({exc})")
|
|
return False
|
|
marker = _unzipped_marker_path(task)
|
|
try:
|
|
with open(marker, "w", encoding="utf-8") as fh:
|
|
fh.write(time.strftime("%Y-%m-%d %H:%M:%S"))
|
|
except OSError as exc:
|
|
logger.log("WARN", f"Could not write unzip marker: {marker} ({exc})")
|
|
logger.log("INFO", f"Unzipped: {task.output_path} -> {unzip_dir}")
|
|
return True
|
|
|
|
|
|
def _download_task(
|
|
session: requests.Session,
|
|
task: DownloadTask,
|
|
timeout: int,
|
|
verify: bool | str,
|
|
retries: int,
|
|
stop_event: threading.Event,
|
|
progress: DownloadProgress,
|
|
) -> Tuple[str, DownloadTask, Optional[str]]:
|
|
os.makedirs(os.path.dirname(task.output_path), exist_ok=True)
|
|
tmp_path = f"{task.output_path}.part"
|
|
|
|
if stop_event.is_set():
|
|
return "aborted", task, "Interrupted"
|
|
|
|
for attempt in range(retries + 1):
|
|
if stop_event.is_set():
|
|
return "aborted", task, "Interrupted"
|
|
try:
|
|
with session.get(task.url, stream=True, timeout=timeout, verify=verify) as resp:
|
|
if resp.status_code in (404, 410):
|
|
return "missing", task, f"HTTP {resp.status_code}"
|
|
if resp.status_code >= 400:
|
|
return "failed", task, f"HTTP {resp.status_code}"
|
|
resp.raise_for_status()
|
|
with open(tmp_path, "wb") as fh:
|
|
for chunk in resp.iter_content(chunk_size=1024 * 1024):
|
|
if stop_event.is_set():
|
|
return "aborted", task, "Interrupted"
|
|
if chunk:
|
|
fh.write(chunk)
|
|
progress.add_bytes(len(chunk), task.output_path)
|
|
os.replace(tmp_path, task.output_path)
|
|
return "downloaded", task, None
|
|
except requests.RequestException as exc:
|
|
if attempt >= retries:
|
|
return "failed", task, str(exc)
|
|
time.sleep(1.0 + attempt * 0.5)
|
|
except OSError as exc:
|
|
return "failed", task, str(exc)
|
|
|
|
if os.path.exists(tmp_path):
|
|
try:
|
|
os.remove(tmp_path)
|
|
except OSError:
|
|
pass
|
|
return "failed", task, "Unknown error"
|
|
|
|
|
|
def run_download(
|
|
config_path: str,
|
|
requested_datasets: Optional[List[str]] = None,
|
|
start_override: Optional[Tuple[int, int]] = None,
|
|
end_override: Optional[Tuple[int, int]] = None,
|
|
clean_downloads: bool = False,
|
|
ca_bundle_override: Optional[str] = None,
|
|
) -> int:
|
|
cfg = _load_toml(config_path)
|
|
download_targets, default_target = _parse_download_targets(cfg)
|
|
tile_ranges = _parse_tile_ranges(cfg)
|
|
datasets = _select_datasets(cfg, requested_datasets)
|
|
|
|
logging_cfg = cfg.get("logging", {})
|
|
progress_enabled = bool(logging_cfg.get("report_progress", True))
|
|
logger = DownloadLogger(
|
|
logging_cfg.get("log_file"),
|
|
logging_cfg.get("log_format"),
|
|
progress_enabled,
|
|
)
|
|
|
|
if len(download_targets) > 1 and not default_target:
|
|
default_target = next(iter(download_targets))
|
|
logger.log(
|
|
"WARN",
|
|
"No download.default_target set; using "
|
|
f"{default_target} as default for datasets without a download key.",
|
|
)
|
|
|
|
if clean_downloads:
|
|
for dataset_key, dataset_cfg in datasets.items():
|
|
download_key, download_cfg = _select_download_target(
|
|
download_targets,
|
|
default_target,
|
|
dataset_key,
|
|
dataset_cfg,
|
|
)
|
|
base_output_dir = download_cfg.get("output_directory", DEFAULT_OUTPUT_DIR)
|
|
_safe_remove_dir(base_output_dir, _resolve_output_dir(dataset_key, dataset_cfg))
|
|
|
|
tasks = _build_tasks(
|
|
cfg,
|
|
datasets,
|
|
tile_ranges,
|
|
download_targets,
|
|
default_target,
|
|
start_override,
|
|
end_override,
|
|
)
|
|
|
|
if not tasks:
|
|
logger.log("INFO", "No download tasks generated.")
|
|
return 0
|
|
|
|
tasks_by_target: Dict[str, List[DownloadTask]] = {}
|
|
for task in tasks:
|
|
tasks_by_target.setdefault(task.download_key, []).append(task)
|
|
|
|
total_downloaded = 0
|
|
total_missing = 0
|
|
total_failed = 0
|
|
total_skipped = 0
|
|
total_unzipped_existing = 0
|
|
|
|
for target_key, target_cfg in download_targets.items():
|
|
target_tasks = tasks_by_target.get(target_key)
|
|
if not target_tasks:
|
|
continue
|
|
|
|
target_label = f"[{target_key}] "
|
|
skip_existing = not clean_downloads
|
|
pending: List[DownloadTask] = []
|
|
unzip_only: List[DownloadTask] = []
|
|
skipped = 0
|
|
|
|
for task in target_tasks:
|
|
if skip_existing and os.path.exists(task.output_path):
|
|
if _needs_unzip(task):
|
|
unzip_only.append(task)
|
|
else:
|
|
skipped += 1
|
|
continue
|
|
pending.append(task)
|
|
|
|
if skipped:
|
|
logger.log("INFO", f"{target_label}Skipped {skipped} existing file(s).")
|
|
if unzip_only:
|
|
logger.log("INFO", f"{target_label}Queued {len(unzip_only)} unzip(s) for existing files.")
|
|
|
|
verify_ssl = target_cfg.get("verify_ssl", True)
|
|
ca_bundle = ca_bundle_override or target_cfg.get("ca_bundle")
|
|
if verify_ssl and ca_bundle:
|
|
if os.path.exists(ca_bundle):
|
|
verify = ca_bundle
|
|
source = "CLI" if ca_bundle_override else "config"
|
|
logger.log("INFO", f"{target_label}Using CA bundle ({source}): {ca_bundle}")
|
|
else:
|
|
verify = True
|
|
logger.log("WARN", f"{target_label}CA bundle not found, using system trust: {ca_bundle}")
|
|
else:
|
|
verify = bool(verify_ssl)
|
|
if not verify_ssl:
|
|
logger.log("WARN", f"{target_label}TLS verification disabled by config.")
|
|
|
|
timeout = int(target_cfg.get("timeout_seconds", 300))
|
|
retries = int(target_cfg.get("retry_attempts", 3))
|
|
parallel = int(target_cfg.get("parallel_downloads", 4))
|
|
user_agent = target_cfg.get("user_agent", "geodata-download/1.0")
|
|
|
|
downloaded = 0
|
|
missing = 0
|
|
failed = 0
|
|
|
|
if pending:
|
|
progress = DownloadProgress(len(pending), progress_enabled)
|
|
stop_event = threading.Event()
|
|
interrupted = False
|
|
|
|
with requests.Session() as session:
|
|
session.headers.update({"User-Agent": user_agent})
|
|
executor = DaemonThreadPool(max_workers=parallel)
|
|
futures = [
|
|
executor.submit(_download_task, session, task, timeout, verify, retries, stop_event, progress)
|
|
for task in pending
|
|
]
|
|
try:
|
|
for future in as_completed(futures):
|
|
status, task, detail = future.result()
|
|
if status == "downloaded":
|
|
downloaded += 1
|
|
if task.unzip and not _maybe_unzip(task, logger):
|
|
failed += 1
|
|
elif status == "missing":
|
|
missing += 1
|
|
logger.log("WARN", f"{target_label}Missing tile: {task.url} ({detail})")
|
|
elif status == "aborted":
|
|
failed += 1
|
|
else:
|
|
failed += 1
|
|
extra = f" ({detail})" if detail else ""
|
|
logger.log("ERROR", f"{target_label}Failed: {task.url}{extra}")
|
|
progress.set_counts(downloaded, missing, failed, task.output_path)
|
|
except KeyboardInterrupt:
|
|
interrupted = True
|
|
stop_event.set()
|
|
logger.log("WARN", f"{target_label}Interrupted; stopping downloads.")
|
|
finally:
|
|
if interrupted:
|
|
executor.shutdown(wait=False, cancel_futures=True)
|
|
else:
|
|
executor.shutdown(wait=True)
|
|
|
|
progress.finish()
|
|
if interrupted:
|
|
return 130
|
|
else:
|
|
logger.log("INFO", f"{target_label}Nothing to download.")
|
|
|
|
if unzip_only:
|
|
for task in unzip_only:
|
|
if _maybe_unzip(task, logger):
|
|
total_unzipped_existing += 1
|
|
else:
|
|
failed += 1
|
|
|
|
total_downloaded += downloaded
|
|
total_missing += missing
|
|
total_failed += failed
|
|
total_skipped += skipped
|
|
|
|
logger.log(
|
|
"INFO",
|
|
"Done. "
|
|
f"Downloaded={total_downloaded}, Missing={total_missing}, Failed={total_failed}, "
|
|
f"Skipped={total_skipped}, UnzippedExisting={total_unzipped_existing}.",
|
|
)
|
|
return 1 if total_failed else 0
|
|
|
|
|
|
def parse_args(argv: Optional[Iterable[str]] = None) -> argparse.Namespace:
|
|
parser = argparse.ArgumentParser(description="Download geodata tiles from TOML config.")
|
|
parser.add_argument(
|
|
"--config",
|
|
default=DEFAULT_CONFIG,
|
|
help="Path to geodata_download.toml.",
|
|
)
|
|
parser.add_argument(
|
|
"--datasets",
|
|
help="Comma-separated dataset keys to download (default: enabled datasets).",
|
|
)
|
|
parser.add_argument(
|
|
"--start",
|
|
nargs=2,
|
|
type=int,
|
|
metavar=("X", "Y"),
|
|
help="Override tile range start (x y).",
|
|
)
|
|
parser.add_argument(
|
|
"--end",
|
|
nargs=2,
|
|
type=int,
|
|
metavar=("X", "Y"),
|
|
help="Override tile range end (x y).",
|
|
)
|
|
parser.add_argument(
|
|
"--clean-downloads",
|
|
action="store_true",
|
|
help="Delete selected dataset folders before downloading.",
|
|
)
|
|
parser.add_argument(
|
|
"--ca-bundle",
|
|
help="Path to a CA bundle file to override the config.",
|
|
)
|
|
return parser.parse_args(argv)
|
|
|
|
|
|
def main(argv: Optional[Iterable[str]] = None) -> int:
|
|
args = parse_args(argv)
|
|
datasets = [name.strip() for name in args.datasets.split(",")] if args.datasets else None
|
|
start = tuple(args.start) if args.start else None
|
|
end = tuple(args.end) if args.end else None
|
|
if (start is None) != (end is None):
|
|
raise SystemExit("--start and --end must be provided together.")
|
|
|
|
return run_download(
|
|
config_path=args.config,
|
|
requested_datasets=datasets,
|
|
start_override=start,
|
|
end_override=end,
|
|
clean_downloads=args.clean_downloads,
|
|
ca_bundle_override=args.ca_bundle,
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|