Update downloader and river erosion
This commit is contained in:
18
PIPELINE.md
18
PIPELINE.md
@@ -198,6 +198,24 @@ TREES-ENHANCED:
|
||||
|
||||
---
|
||||
|
||||
### 7. River Erosion (Optional)
|
||||
```
|
||||
export_unity/height_png16/*.png ──► depth raster from HydroRIVERS (buffered by order)
|
||||
export_unity/tile_index.csv ───────► bounds + scaling
|
||||
│
|
||||
▼
|
||||
Depth burn = MAX depth (emulated by sorting burns ascending + REPLACE)
|
||||
│
|
||||
▼
|
||||
export_unity/height_png16_river/{tile}.png
|
||||
export_unity/tile_index_river_{vr|server}.csv
|
||||
```
|
||||
Notes:
|
||||
- Uses `raw/hydrorivers/.../HydroRIVERS_v10_eu.shp` and runs after heightmap export.
|
||||
- Rasterize merge supports only REPLACE/ADD; max-depth is achieved by ordered burns.
|
||||
|
||||
---
|
||||
|
||||
## File Format Reference
|
||||
|
||||
| Output | Format | Resolution | Purpose |
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
[raw]
|
||||
dgm1_dir = "raw/dgm1"
|
||||
dop20_dir = "raw/dop20/jp2"
|
||||
citygml_lod1_dir = "raw/citygml/lod1"
|
||||
citygml_lod2_dir = "raw/citygml/lod2"
|
||||
dgm1_dir = "raw/geo_rlp/dgm1"
|
||||
dop20_dir = "raw/geo_rlp/dop20/jp2"
|
||||
citygml_lod1_dir = "raw/geo_rlp/citygml/lod1"
|
||||
citygml_lod2_dir = "raw/geo_rlp/citygml/lod2"
|
||||
|
||||
[archives]
|
||||
dgm1_dir = "archive/dgm1"
|
||||
@@ -29,7 +29,7 @@ use_tile_minmax = true
|
||||
|
||||
[river_erosion]
|
||||
enabled = true
|
||||
source_path = "raw/HydroRIVERS_v10_eu_shp/HydroRIVERS_v10_eu.shp"
|
||||
source_path = "raw/hydrorivers/hydrorivers_eu_shp/HydroRIVERS_v10_eu_shp/HydroRIVERS_v10_eu.shp"
|
||||
source_layer = "HydroRIVERS_v10_eu"
|
||||
output_dir = "export_unity/height_png16_river"
|
||||
manifest_vr = "export_unity/tile_index_river_vr.csv"
|
||||
@@ -98,3 +98,13 @@ proxy_min_tris = 120
|
||||
proxy_max_tris = 180
|
||||
building_buffer_m = 1.5
|
||||
instancing = false
|
||||
|
||||
[pointcloud]
|
||||
lpg_dir = "raw/geo_rlp/lpg"
|
||||
lpo_dir = "raw/geo_rlp/lpo"
|
||||
lpolpg_dir = "raw/geo_rlp/lpolpg"
|
||||
bdom_dir = "raw/geo_rlp/bdom20rgbi"
|
||||
dom1_dir = "raw/geo_rlp/dom1"
|
||||
chunk_size = 5000000
|
||||
use_lpg_validation = true
|
||||
use_lpo_refinement = true
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
[raw]
|
||||
dgm1_dir = "raw/dgm1"
|
||||
dop20_dir = "raw/dop20/jp2"
|
||||
citygml_lod1_dir = "raw/citygml/lod1"
|
||||
citygml_lod2_dir = "raw/citygml/lod2"
|
||||
dgm1_dir = "raw/geo_rlp/dgm1"
|
||||
dop20_dir = "raw/geo_rlp/dop20/jp2"
|
||||
citygml_lod1_dir = "raw/geo_rlp/citygml/lod1"
|
||||
citygml_lod2_dir = "raw/geo_rlp/citygml/lod2"
|
||||
|
||||
[archives]
|
||||
dgm1_dir = "archive/dgm1"
|
||||
@@ -27,7 +27,7 @@ tile_size_m = 1000
|
||||
|
||||
[river_erosion]
|
||||
enabled = true
|
||||
source_path = "raw/HydroRIVERS_v10_eu_shp/HydroRIVERS_v10_eu.shp"
|
||||
source_path = "raw/hydrorivers/hydrorivers_eu_shp/HydroRIVERS_v10_eu_shp/HydroRIVERS_v10_eu.shp"
|
||||
source_layer = "HydroRIVERS_v10_eu"
|
||||
output_dir = "export_unity/height_png16_river"
|
||||
manifest_vr = "export_unity/tile_index_river_vr.csv"
|
||||
@@ -96,3 +96,13 @@ proxy_min_tris = 120
|
||||
proxy_max_tris = 180
|
||||
building_buffer_m = 1.5
|
||||
instancing = false
|
||||
|
||||
[pointcloud]
|
||||
lpg_dir = "raw/geo_rlp/lpg"
|
||||
lpo_dir = "raw/geo_rlp/lpo"
|
||||
lpolpg_dir = "raw/geo_rlp/lpolpg"
|
||||
bdom_dir = "raw/geo_rlp/bdom20rgbi"
|
||||
dom1_dir = "raw/geo_rlp/dom1"
|
||||
chunk_size = 5000000
|
||||
use_lpg_validation = true
|
||||
use_lpo_refinement = true
|
||||
|
||||
@@ -9,6 +9,7 @@ import sys
|
||||
import time
|
||||
import queue
|
||||
import threading
|
||||
import zipfile
|
||||
from concurrent.futures import Future, as_completed
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, Iterable, List, Optional, Tuple
|
||||
@@ -48,6 +49,9 @@ class DownloadTask:
|
||||
dataset: str
|
||||
url: str
|
||||
output_path: str
|
||||
download_key: str
|
||||
unzip: bool = False
|
||||
unzip_dir: Optional[str] = None
|
||||
|
||||
|
||||
class DownloadLogger:
|
||||
@@ -211,6 +215,20 @@ def _load_toml(path: str) -> dict:
|
||||
return tomllib.load(fh)
|
||||
|
||||
|
||||
def _parse_download_targets(cfg: dict) -> Tuple[Dict[str, dict], Optional[str]]:
|
||||
download_cfg = cfg.get("download", {})
|
||||
if not isinstance(download_cfg, dict):
|
||||
raise SystemExit("download must be a table in the TOML config.")
|
||||
has_targets = any(isinstance(value, dict) for value in download_cfg.values())
|
||||
if has_targets:
|
||||
targets = {name: value for name, value in download_cfg.items() if isinstance(value, dict)}
|
||||
if not targets:
|
||||
raise SystemExit("download targets must be tables, e.g. [download.geobasis].")
|
||||
default_key = download_cfg.get("default_target") or download_cfg.get("default")
|
||||
return targets, default_key
|
||||
return {"default": download_cfg}, "default"
|
||||
|
||||
|
||||
def _parse_tile_ranges(cfg: dict) -> Dict[str, dict]:
|
||||
ranges = cfg.get("tile_ranges", {})
|
||||
if not isinstance(ranges, dict):
|
||||
@@ -249,52 +267,149 @@ def _resolve_output_dir(dataset_key: str, dataset_cfg: dict) -> str:
|
||||
def _format_url(
|
||||
template: str,
|
||||
base_url: str,
|
||||
x: int,
|
||||
y: int,
|
||||
x: Optional[int],
|
||||
y: Optional[int],
|
||||
extra: dict,
|
||||
) -> str:
|
||||
format_vars = {"base_url": base_url, "x": x, "y": y}
|
||||
format_vars = {"base_url": base_url}
|
||||
if x is not None:
|
||||
format_vars["x"] = x
|
||||
if y is not None:
|
||||
format_vars["y"] = y
|
||||
for key, value in extra.items():
|
||||
if isinstance(value, (str, int, float)):
|
||||
format_vars[key] = value
|
||||
return template.format(**format_vars)
|
||||
|
||||
|
||||
def _select_download_target(
|
||||
download_targets: Dict[str, dict],
|
||||
default_key: Optional[str],
|
||||
dataset_key: str,
|
||||
dataset_cfg: dict,
|
||||
) -> Tuple[str, dict]:
|
||||
requested = dataset_cfg.get("download")
|
||||
if requested:
|
||||
if requested not in download_targets:
|
||||
raise SystemExit(f"{dataset_key}: download target not found: {requested}")
|
||||
return requested, download_targets[requested]
|
||||
if default_key:
|
||||
if default_key not in download_targets:
|
||||
raise SystemExit(f"download.default_target not found: {default_key}")
|
||||
return default_key, download_targets[default_key]
|
||||
if len(download_targets) == 1:
|
||||
name = next(iter(download_targets))
|
||||
return name, download_targets[name]
|
||||
raise SystemExit(
|
||||
f"{dataset_key}: multiple download targets defined; set datasets.{dataset_key}.download "
|
||||
"or download.default_target.",
|
||||
)
|
||||
|
||||
|
||||
def _resolve_url(source_cfg: dict, base_url: str, x: Optional[int], y: Optional[int]) -> str:
|
||||
if "url" in source_cfg:
|
||||
return str(source_cfg["url"])
|
||||
template = source_cfg.get("url_template")
|
||||
if not template:
|
||||
raise SystemExit("url_template or url must be defined for the dataset.")
|
||||
return _format_url(str(template), base_url, x, y, source_cfg)
|
||||
|
||||
|
||||
def _build_tasks(
|
||||
cfg: dict,
|
||||
datasets: Dict[str, dict],
|
||||
tile_ranges: Dict[str, dict],
|
||||
base_output_dir: str,
|
||||
download_targets: Dict[str, dict],
|
||||
default_target: Optional[str],
|
||||
start_override: Optional[Tuple[int, int]],
|
||||
end_override: Optional[Tuple[int, int]],
|
||||
) -> List[DownloadTask]:
|
||||
tasks: List[DownloadTask] = []
|
||||
base_url = cfg.get("download", {}).get("base_url", "").rstrip("/")
|
||||
|
||||
for dataset_key, dataset_cfg in datasets.items():
|
||||
tile_range_key = dataset_cfg.get("tile_range")
|
||||
if not tile_range_key or tile_range_key not in tile_ranges:
|
||||
raise SystemExit(f"{dataset_key}: tile_range not found: {tile_range_key}")
|
||||
range_cfg = tile_ranges[tile_range_key]
|
||||
if start_override and end_override:
|
||||
range_cfg = _override_range(range_cfg, start_override, end_override)
|
||||
|
||||
download_key, download_cfg = _select_download_target(
|
||||
download_targets,
|
||||
default_target,
|
||||
dataset_key,
|
||||
dataset_cfg,
|
||||
)
|
||||
base_url = download_cfg.get("base_url", "").rstrip("/")
|
||||
base_output_dir = download_cfg.get("output_directory", DEFAULT_OUTPUT_DIR)
|
||||
base_subdir = _resolve_output_dir(dataset_key, dataset_cfg)
|
||||
dataset_out_dir = os.path.join(base_output_dir, base_subdir)
|
||||
unzip = bool(dataset_cfg.get("unzip", False))
|
||||
unzip_dir = dataset_out_dir
|
||||
|
||||
for x, y in _iter_tiles(range_cfg):
|
||||
tile_range_key = dataset_cfg.get("tile_range")
|
||||
if tile_range_key:
|
||||
if tile_range_key not in tile_ranges:
|
||||
raise SystemExit(f"{dataset_key}: tile_range not found: {tile_range_key}")
|
||||
range_cfg = tile_ranges[tile_range_key]
|
||||
if start_override and end_override:
|
||||
range_cfg = _override_range(range_cfg, start_override, end_override)
|
||||
|
||||
for x, y in _iter_tiles(range_cfg):
|
||||
if "files" in dataset_cfg:
|
||||
for file_cfg in dataset_cfg["files"]:
|
||||
file_type = file_cfg.get("type", "file")
|
||||
file_subdir = FILE_TYPE_SUBDIRS.get(file_type, file_type)
|
||||
out_dir = os.path.join(dataset_out_dir, file_subdir)
|
||||
url = _resolve_url(file_cfg, base_url, x, y)
|
||||
filename = os.path.basename(urlparse(url).path)
|
||||
tasks.append(
|
||||
DownloadTask(
|
||||
dataset_key,
|
||||
url,
|
||||
os.path.join(out_dir, filename),
|
||||
download_key,
|
||||
unzip,
|
||||
unzip_dir,
|
||||
)
|
||||
)
|
||||
else:
|
||||
url = _resolve_url(dataset_cfg, base_url, x, y)
|
||||
filename = os.path.basename(urlparse(url).path)
|
||||
tasks.append(
|
||||
DownloadTask(
|
||||
dataset_key,
|
||||
url,
|
||||
os.path.join(dataset_out_dir, filename),
|
||||
download_key,
|
||||
unzip,
|
||||
unzip_dir,
|
||||
)
|
||||
)
|
||||
else:
|
||||
if "files" in dataset_cfg:
|
||||
for file_cfg in dataset_cfg["files"]:
|
||||
file_type = file_cfg.get("type", "file")
|
||||
file_subdir = FILE_TYPE_SUBDIRS.get(file_type, file_type)
|
||||
out_dir = os.path.join(dataset_out_dir, file_subdir)
|
||||
url = _format_url(file_cfg["url_template"], base_url, x, y, file_cfg)
|
||||
url = _resolve_url(file_cfg, base_url, None, None)
|
||||
filename = os.path.basename(urlparse(url).path)
|
||||
tasks.append(DownloadTask(dataset_key, url, os.path.join(out_dir, filename)))
|
||||
tasks.append(
|
||||
DownloadTask(
|
||||
dataset_key,
|
||||
url,
|
||||
os.path.join(out_dir, filename),
|
||||
download_key,
|
||||
unzip,
|
||||
unzip_dir,
|
||||
)
|
||||
)
|
||||
else:
|
||||
url = _format_url(dataset_cfg["url_template"], base_url, x, y, dataset_cfg)
|
||||
url = _resolve_url(dataset_cfg, base_url, None, None)
|
||||
filename = os.path.basename(urlparse(url).path)
|
||||
tasks.append(DownloadTask(dataset_key, url, os.path.join(dataset_out_dir, filename)))
|
||||
tasks.append(
|
||||
DownloadTask(
|
||||
dataset_key,
|
||||
url,
|
||||
os.path.join(dataset_out_dir, filename),
|
||||
download_key,
|
||||
unzip,
|
||||
unzip_dir,
|
||||
)
|
||||
)
|
||||
|
||||
return tasks
|
||||
|
||||
@@ -328,6 +443,44 @@ def _safe_remove_dir(base_dir: str, rel_dir: str) -> None:
|
||||
shutil.rmtree(target)
|
||||
|
||||
|
||||
def _unzipped_marker_path(task: DownloadTask) -> str:
|
||||
unzip_dir = task.unzip_dir or os.path.dirname(task.output_path)
|
||||
marker_name = f".{os.path.basename(task.output_path)}.unzipped"
|
||||
return os.path.join(unzip_dir, marker_name)
|
||||
|
||||
|
||||
def _needs_unzip(task: DownloadTask) -> bool:
|
||||
if not task.unzip:
|
||||
return False
|
||||
if not os.path.exists(task.output_path):
|
||||
return False
|
||||
return not os.path.exists(_unzipped_marker_path(task))
|
||||
|
||||
|
||||
def _maybe_unzip(task: DownloadTask, logger: DownloadLogger) -> bool:
|
||||
if not task.unzip:
|
||||
return True
|
||||
if not os.path.exists(task.output_path):
|
||||
logger.log("ERROR", f"Unzip failed; missing file: {task.output_path}")
|
||||
return False
|
||||
unzip_dir = task.unzip_dir or os.path.dirname(task.output_path)
|
||||
os.makedirs(unzip_dir, exist_ok=True)
|
||||
try:
|
||||
with zipfile.ZipFile(task.output_path, "r") as zf:
|
||||
zf.extractall(unzip_dir)
|
||||
except (zipfile.BadZipFile, OSError) as exc:
|
||||
logger.log("ERROR", f"Unzip failed: {task.output_path} ({exc})")
|
||||
return False
|
||||
marker = _unzipped_marker_path(task)
|
||||
try:
|
||||
with open(marker, "w", encoding="utf-8") as fh:
|
||||
fh.write(time.strftime("%Y-%m-%d %H:%M:%S"))
|
||||
except OSError as exc:
|
||||
logger.log("WARN", f"Could not write unzip marker: {marker} ({exc})")
|
||||
logger.log("INFO", f"Unzipped: {task.output_path} -> {unzip_dir}")
|
||||
return True
|
||||
|
||||
|
||||
def _download_task(
|
||||
session: requests.Session,
|
||||
task: DownloadTask,
|
||||
@@ -386,7 +539,7 @@ def run_download(
|
||||
ca_bundle_override: Optional[str] = None,
|
||||
) -> int:
|
||||
cfg = _load_toml(config_path)
|
||||
download_cfg = cfg.get("download", {})
|
||||
download_targets, default_target = _parse_download_targets(cfg)
|
||||
tile_ranges = _parse_tile_ranges(cfg)
|
||||
datasets = _select_datasets(cfg, requested_datasets)
|
||||
|
||||
@@ -398,23 +551,31 @@ def run_download(
|
||||
progress_enabled,
|
||||
)
|
||||
|
||||
configured_output_dir = download_cfg.get("output_directory")
|
||||
base_output_dir = DEFAULT_OUTPUT_DIR
|
||||
if configured_output_dir and os.path.normpath(configured_output_dir) != DEFAULT_OUTPUT_DIR:
|
||||
if len(download_targets) > 1 and not default_target:
|
||||
default_target = next(iter(download_targets))
|
||||
logger.log(
|
||||
"WARN",
|
||||
f"Ignoring download.output_directory={configured_output_dir}; using raw/ to match pipeline.",
|
||||
"No download.default_target set; using "
|
||||
f"{default_target} as default for datasets without a download key.",
|
||||
)
|
||||
|
||||
if clean_downloads:
|
||||
for dataset_key, dataset_cfg in datasets.items():
|
||||
download_key, download_cfg = _select_download_target(
|
||||
download_targets,
|
||||
default_target,
|
||||
dataset_key,
|
||||
dataset_cfg,
|
||||
)
|
||||
base_output_dir = download_cfg.get("output_directory", DEFAULT_OUTPUT_DIR)
|
||||
_safe_remove_dir(base_output_dir, _resolve_output_dir(dataset_key, dataset_cfg))
|
||||
|
||||
tasks = _build_tasks(
|
||||
cfg,
|
||||
datasets,
|
||||
tile_ranges,
|
||||
base_output_dir,
|
||||
download_targets,
|
||||
default_target,
|
||||
start_override,
|
||||
end_override,
|
||||
)
|
||||
@@ -423,91 +584,129 @@ def run_download(
|
||||
logger.log("INFO", "No download tasks generated.")
|
||||
return 0
|
||||
|
||||
skip_existing = not clean_downloads
|
||||
pending: List[DownloadTask] = []
|
||||
skipped = 0
|
||||
tasks_by_target: Dict[str, List[DownloadTask]] = {}
|
||||
for task in tasks:
|
||||
if skip_existing and os.path.exists(task.output_path):
|
||||
skipped += 1
|
||||
tasks_by_target.setdefault(task.download_key, []).append(task)
|
||||
|
||||
total_downloaded = 0
|
||||
total_missing = 0
|
||||
total_failed = 0
|
||||
total_skipped = 0
|
||||
total_unzipped_existing = 0
|
||||
|
||||
for target_key, target_cfg in download_targets.items():
|
||||
target_tasks = tasks_by_target.get(target_key)
|
||||
if not target_tasks:
|
||||
continue
|
||||
pending.append(task)
|
||||
|
||||
if skipped:
|
||||
logger.log("INFO", f"Skipped {skipped} existing file(s).")
|
||||
target_label = f"[{target_key}] "
|
||||
skip_existing = not clean_downloads
|
||||
pending: List[DownloadTask] = []
|
||||
unzip_only: List[DownloadTask] = []
|
||||
skipped = 0
|
||||
|
||||
if not pending:
|
||||
logger.log("INFO", "Nothing to download after skipping existing files.")
|
||||
return 0
|
||||
for task in target_tasks:
|
||||
if skip_existing and os.path.exists(task.output_path):
|
||||
if _needs_unzip(task):
|
||||
unzip_only.append(task)
|
||||
else:
|
||||
skipped += 1
|
||||
continue
|
||||
pending.append(task)
|
||||
|
||||
verify_ssl = download_cfg.get("verify_ssl", True)
|
||||
ca_bundle = ca_bundle_override or download_cfg.get("ca_bundle")
|
||||
if verify_ssl and ca_bundle:
|
||||
if os.path.exists(ca_bundle):
|
||||
verify = ca_bundle
|
||||
source = "CLI" if ca_bundle_override else "config"
|
||||
logger.log("INFO", f"Using CA bundle ({source}): {ca_bundle}")
|
||||
if skipped:
|
||||
logger.log("INFO", f"{target_label}Skipped {skipped} existing file(s).")
|
||||
if unzip_only:
|
||||
logger.log("INFO", f"{target_label}Queued {len(unzip_only)} unzip(s) for existing files.")
|
||||
|
||||
verify_ssl = target_cfg.get("verify_ssl", True)
|
||||
ca_bundle = ca_bundle_override or target_cfg.get("ca_bundle")
|
||||
if verify_ssl and ca_bundle:
|
||||
if os.path.exists(ca_bundle):
|
||||
verify = ca_bundle
|
||||
source = "CLI" if ca_bundle_override else "config"
|
||||
logger.log("INFO", f"{target_label}Using CA bundle ({source}): {ca_bundle}")
|
||||
else:
|
||||
verify = True
|
||||
logger.log("WARN", f"{target_label}CA bundle not found, using system trust: {ca_bundle}")
|
||||
else:
|
||||
verify = True
|
||||
logger.log("WARN", f"CA bundle not found, using system trust: {ca_bundle}")
|
||||
else:
|
||||
verify = bool(verify_ssl)
|
||||
if not verify_ssl:
|
||||
logger.log("WARN", "TLS verification disabled by config.")
|
||||
verify = bool(verify_ssl)
|
||||
if not verify_ssl:
|
||||
logger.log("WARN", f"{target_label}TLS verification disabled by config.")
|
||||
|
||||
timeout = int(download_cfg.get("timeout_seconds", 300))
|
||||
retries = int(download_cfg.get("retry_attempts", 3))
|
||||
parallel = int(download_cfg.get("parallel_downloads", 4))
|
||||
user_agent = download_cfg.get("user_agent", "geodata-download/1.0")
|
||||
timeout = int(target_cfg.get("timeout_seconds", 300))
|
||||
retries = int(target_cfg.get("retry_attempts", 3))
|
||||
parallel = int(target_cfg.get("parallel_downloads", 4))
|
||||
user_agent = target_cfg.get("user_agent", "geodata-download/1.0")
|
||||
|
||||
downloaded = 0
|
||||
missing = 0
|
||||
failed = 0
|
||||
downloaded = 0
|
||||
missing = 0
|
||||
failed = 0
|
||||
|
||||
progress = DownloadProgress(len(pending), progress_enabled)
|
||||
stop_event = threading.Event()
|
||||
interrupted = False
|
||||
if pending:
|
||||
progress = DownloadProgress(len(pending), progress_enabled)
|
||||
stop_event = threading.Event()
|
||||
interrupted = False
|
||||
|
||||
with requests.Session() as session:
|
||||
session.headers.update({"User-Agent": user_agent})
|
||||
executor = DaemonThreadPool(max_workers=parallel)
|
||||
futures = [
|
||||
executor.submit(_download_task, session, task, timeout, verify, retries, stop_event, progress)
|
||||
for task in pending
|
||||
]
|
||||
try:
|
||||
for future in as_completed(futures):
|
||||
status, task, detail = future.result()
|
||||
if status == "downloaded":
|
||||
downloaded += 1
|
||||
elif status == "missing":
|
||||
missing += 1
|
||||
logger.log("WARN", f"Missing tile: {task.url} ({detail})")
|
||||
elif status == "aborted":
|
||||
failed += 1
|
||||
with requests.Session() as session:
|
||||
session.headers.update({"User-Agent": user_agent})
|
||||
executor = DaemonThreadPool(max_workers=parallel)
|
||||
futures = [
|
||||
executor.submit(_download_task, session, task, timeout, verify, retries, stop_event, progress)
|
||||
for task in pending
|
||||
]
|
||||
try:
|
||||
for future in as_completed(futures):
|
||||
status, task, detail = future.result()
|
||||
if status == "downloaded":
|
||||
downloaded += 1
|
||||
if task.unzip and not _maybe_unzip(task, logger):
|
||||
failed += 1
|
||||
elif status == "missing":
|
||||
missing += 1
|
||||
logger.log("WARN", f"{target_label}Missing tile: {task.url} ({detail})")
|
||||
elif status == "aborted":
|
||||
failed += 1
|
||||
else:
|
||||
failed += 1
|
||||
extra = f" ({detail})" if detail else ""
|
||||
logger.log("ERROR", f"{target_label}Failed: {task.url}{extra}")
|
||||
progress.set_counts(downloaded, missing, failed, task.output_path)
|
||||
except KeyboardInterrupt:
|
||||
interrupted = True
|
||||
stop_event.set()
|
||||
logger.log("WARN", f"{target_label}Interrupted; stopping downloads.")
|
||||
finally:
|
||||
if interrupted:
|
||||
executor.shutdown(wait=False, cancel_futures=True)
|
||||
else:
|
||||
executor.shutdown(wait=True)
|
||||
|
||||
progress.finish()
|
||||
if interrupted:
|
||||
return 130
|
||||
else:
|
||||
logger.log("INFO", f"{target_label}Nothing to download.")
|
||||
|
||||
if unzip_only:
|
||||
for task in unzip_only:
|
||||
if _maybe_unzip(task, logger):
|
||||
total_unzipped_existing += 1
|
||||
else:
|
||||
failed += 1
|
||||
extra = f" ({detail})" if detail else ""
|
||||
logger.log("ERROR", f"Failed: {task.url}{extra}")
|
||||
progress.set_counts(downloaded, missing, failed, task.output_path)
|
||||
except KeyboardInterrupt:
|
||||
interrupted = True
|
||||
stop_event.set()
|
||||
logger.log("WARN", "Interrupted; stopping downloads.")
|
||||
finally:
|
||||
if interrupted:
|
||||
executor.shutdown(wait=False, cancel_futures=True)
|
||||
else:
|
||||
executor.shutdown(wait=True)
|
||||
|
||||
progress.finish()
|
||||
if interrupted:
|
||||
return 130
|
||||
total_downloaded += downloaded
|
||||
total_missing += missing
|
||||
total_failed += failed
|
||||
total_skipped += skipped
|
||||
|
||||
logger.log(
|
||||
"INFO",
|
||||
f"Done. Downloaded={downloaded}, Missing={missing}, Failed={failed}, Skipped={skipped}.",
|
||||
"Done. "
|
||||
f"Downloaded={total_downloaded}, Missing={total_missing}, Failed={total_failed}, "
|
||||
f"Skipped={total_skipped}, UnzippedExisting={total_unzipped_existing}.",
|
||||
)
|
||||
return 1 if failed else 0
|
||||
return 1 if total_failed else 0
|
||||
|
||||
|
||||
def parse_args(argv: Optional[Iterable[str]] = None) -> argparse.Namespace:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Geodata Download Configuration for Trier Digital Twin Project
|
||||
# Generated: 2026-01-19
|
||||
# Data Source: Geobasis Rheinland-Pfalz (geobasis-rlp.de)
|
||||
# Generated: 2026-01-23
|
||||
# Data Source: Geobasis Rheinland-Pfalz (geobasis-rlp.de) + HydroSHEDS
|
||||
|
||||
[project]
|
||||
name = "Trier Digital Twin - Geodata Collection"
|
||||
@@ -9,9 +9,9 @@ coordinate_system = "ETRS89/UTM32"
|
||||
vertical_datum = "DHHN2016"
|
||||
region = "Stadt Trier and surrounding area"
|
||||
|
||||
[download]
|
||||
[download.geobasis_rlp_de]
|
||||
base_url = "https://geobasis-rlp.de/data"
|
||||
output_directory = "raw"
|
||||
output_directory = "raw/geo_rlp"
|
||||
parallel_downloads = 4
|
||||
retry_attempts = 3
|
||||
timeout_seconds = 300
|
||||
@@ -19,6 +19,14 @@ verify_ssl = true
|
||||
user_agent = "TrierDigitalTwin/1.0"
|
||||
ca_bundle = "certs/geobasis-ca.pem"
|
||||
|
||||
[download.hydrosheds_org]
|
||||
base_url = "https://data.hydrosheds.org/file"
|
||||
output_directory = "raw/hydrorivers"
|
||||
parallel_downloads = 4
|
||||
retry_attempts = 3
|
||||
timeout_seconds = 300
|
||||
verify_ssl = true
|
||||
|
||||
# Define tile range presets that datasets inherit from
|
||||
[tile_ranges.range_1km]
|
||||
description = "1x1km tiles covering full area"
|
||||
@@ -42,6 +50,7 @@ y_step = 2
|
||||
[datasets.dgm1]
|
||||
enabled = true
|
||||
name = "Digital Ground Model"
|
||||
download = "geobasis_rlp_de"
|
||||
tile_range = "range_1km"
|
||||
resolution = "1m"
|
||||
format = "GeoTIFF"
|
||||
@@ -53,6 +62,7 @@ output_subdir = "dgm1"
|
||||
[datasets.dom1]
|
||||
enabled = true
|
||||
name = "Digital Surface Model"
|
||||
download = "geobasis_rlp_de"
|
||||
tile_range = "range_1km"
|
||||
resolution = "1m"
|
||||
format = "GeoTIFF"
|
||||
@@ -65,6 +75,7 @@ output_subdir = "dom1"
|
||||
[datasets.geb3dlo]
|
||||
enabled = true
|
||||
name = "3D Buildings LoD2"
|
||||
download = "geobasis_rlp_de"
|
||||
tile_range = "range_2km"
|
||||
resolution = "LoD2"
|
||||
format = "CityGML"
|
||||
@@ -77,6 +88,7 @@ notes = "Semantically rich building models with roof structures"
|
||||
[datasets.dop20]
|
||||
enabled = true
|
||||
name = "Orthophotos 20cm"
|
||||
download = "geobasis_rlp_de"
|
||||
tile_range = "range_2km"
|
||||
resolution = "20cm"
|
||||
format = "JPEG2000"
|
||||
@@ -100,6 +112,7 @@ url_template = "{base_url}/dop20rgb/current/metadata/dop20rgb_32_{x}_{y}_2_rp_20
|
||||
[datasets.bdom20rgbi]
|
||||
enabled = true
|
||||
name = "Building/Object Point Cloud RGBI"
|
||||
download = "geobasis_rlp_de"
|
||||
tile_range = "range_2km"
|
||||
resolution = "20cm"
|
||||
format = "LAZ"
|
||||
@@ -112,6 +125,7 @@ notes = "RGB + Intensity point cloud for buildings and objects"
|
||||
[datasets.lpolpg]
|
||||
enabled = true
|
||||
name = "Laser Point Ground/Object"
|
||||
download = "geobasis_rlp_de"
|
||||
tile_range = "range_1km"
|
||||
resolution = "High density"
|
||||
format = "LAZ"
|
||||
@@ -120,6 +134,18 @@ priority = 2
|
||||
output_subdir = "lpolpg"
|
||||
notes = "Combined point cloud (LPO/LPG) in one LAZ file"
|
||||
|
||||
# HydroRIVERS (for terrain erosion)
|
||||
[datasets.hydrorivers]
|
||||
enabled = true
|
||||
name = "HydroRIVERS Europe and Middle East"
|
||||
download = "hydrosheds_org"
|
||||
format = "zip"
|
||||
unzip = true
|
||||
output_subdir = "hydrorivers_eu_shp"
|
||||
priority = 2
|
||||
url_template = "{base_url}/HydroRIVERS/HydroRIVERS_v10_eu_shp.zip"
|
||||
notes = "HydroRIVERS Shapefile for erosion of rivers"
|
||||
|
||||
[processing]
|
||||
convert_to_mesh = true
|
||||
generate_lods = true
|
||||
|
||||
@@ -134,6 +134,8 @@ def _build_depth_raster(
|
||||
layer.SetSpatialFilterRect(minx, miny, maxx, maxy)
|
||||
layer.ResetReading()
|
||||
|
||||
buffered_geoms: list[ogr.Geometry] = []
|
||||
burn_values: list[float] = []
|
||||
for feat in layer:
|
||||
order = _normalize_order(feat.GetField(profile.order_field), profile)
|
||||
if order is None:
|
||||
@@ -152,13 +154,43 @@ def _build_depth_raster(
|
||||
buffered = geom.Buffer(width_m * 0.5)
|
||||
if buffered is None or buffered.IsEmpty():
|
||||
continue
|
||||
gdal.RasterizeGeometries(
|
||||
depth_ds,
|
||||
[1],
|
||||
[buffered],
|
||||
burn_values=[depth_m],
|
||||
options=["MERGE_ALG=MAX"],
|
||||
)
|
||||
buffered_geoms.append(buffered)
|
||||
burn_values.append(depth_m)
|
||||
|
||||
if buffered_geoms:
|
||||
# GDAL only supports MERGE_ALG=REPLACE/ADD; emulate MAX by sorting ascending.
|
||||
ordered = sorted(zip(buffered_geoms, burn_values), key=lambda item: item[1])
|
||||
ordered_geoms = [geom for geom, _ in ordered]
|
||||
ordered_values = [float(value) for _, value in ordered]
|
||||
if hasattr(gdal, "RasterizeGeometries"):
|
||||
gdal.RasterizeGeometries(
|
||||
depth_ds,
|
||||
[1],
|
||||
ordered_geoms,
|
||||
burn_values=ordered_values,
|
||||
options=["MERGE_ALG=REPLACE"],
|
||||
)
|
||||
else:
|
||||
mem_driver = ogr.GetDriverByName("MEM") or ogr.GetDriverByName("Memory")
|
||||
if mem_driver is None:
|
||||
raise SystemExit("[river_erosion] OGR MEM driver missing; update GDAL.")
|
||||
mem_ds = mem_driver.CreateDataSource("")
|
||||
mem_layer = mem_ds.CreateLayer("burn", srs=tile_srs, geom_type=ogr.wkbPolygon)
|
||||
mem_layer.CreateField(ogr.FieldDefn("burn", ogr.OFTReal))
|
||||
layer_defn = mem_layer.GetLayerDefn()
|
||||
for geom, value in zip(ordered_geoms, ordered_values):
|
||||
feature = ogr.Feature(layer_defn)
|
||||
feature.SetField("burn", value)
|
||||
feature.SetGeometry(geom)
|
||||
mem_layer.CreateFeature(feature)
|
||||
feature = None
|
||||
gdal.RasterizeLayer(
|
||||
depth_ds,
|
||||
[1],
|
||||
mem_layer,
|
||||
options=["ATTRIBUTE=burn"],
|
||||
)
|
||||
mem_ds = None
|
||||
|
||||
depth = band.ReadAsArray().astype(np.float32)
|
||||
if profile.smooth_sigma_m > 0.0:
|
||||
@@ -281,11 +313,22 @@ def _process_profile(
|
||||
global_max = max(global_max, tile_max)
|
||||
|
||||
out_path = os.path.join(output_dir, f"{tile_id}.png")
|
||||
driver = gdal.GetDriverByName("PNG")
|
||||
out_ds = driver.Create(out_path, out_u16.shape[1], out_u16.shape[0], 1, gdal.GDT_UInt16, options=["WORLDFILE=YES"])
|
||||
mem_driver = gdal.GetDriverByName("MEM")
|
||||
out_ds = mem_driver.Create("", out_u16.shape[1], out_u16.shape[0], 1, gdal.GDT_UInt16)
|
||||
out_ds.SetGeoTransform(gt)
|
||||
out_ds.SetProjection(tile_srs.ExportToWkt())
|
||||
out_ds.GetRasterBand(1).WriteArray(out_u16)
|
||||
trans_opts = gdal.TranslateOptions(
|
||||
outputType=gdal.GDT_UInt16,
|
||||
format="PNG",
|
||||
creationOptions=["WORLDFILE=YES"],
|
||||
)
|
||||
try:
|
||||
gdal.Translate(out_path, out_ds, options=trans_opts)
|
||||
except RuntimeError as exc:
|
||||
print(f"[river_erosion:{name}] Translate failed for {tile_id}: {exc}")
|
||||
skipped += 1
|
||||
continue
|
||||
out_ds = None
|
||||
|
||||
rows_out.append(
|
||||
|
||||
Reference in New Issue
Block a user