diff --git a/PIPELINE.md b/PIPELINE.md index 3bbfc96..55fd898 100644 --- a/PIPELINE.md +++ b/PIPELINE.md @@ -198,6 +198,24 @@ TREES-ENHANCED: --- +### 7. River Erosion (Optional) +``` +export_unity/height_png16/*.png ──► depth raster from HydroRIVERS (buffered by order) +export_unity/tile_index.csv ───────► bounds + scaling + │ + ▼ +Depth burn = MAX depth (emulated by sorting burns ascending + REPLACE) + │ + ▼ +export_unity/height_png16_river/{tile}.png +export_unity/tile_index_river_{vr|server}.csv +``` +Notes: +- Uses `raw/hydrorivers/.../HydroRIVERS_v10_eu.shp` and runs after heightmap export. +- Rasterize merge supports only REPLACE/ADD; max-depth is achieved by ordered burns. + +--- + ## File Format Reference | Output | Format | Resolution | Purpose | diff --git a/geodata_config.example.toml b/geodata_config.example.toml index 818713a..2624501 100644 --- a/geodata_config.example.toml +++ b/geodata_config.example.toml @@ -1,8 +1,8 @@ [raw] -dgm1_dir = "raw/dgm1" -dop20_dir = "raw/dop20/jp2" -citygml_lod1_dir = "raw/citygml/lod1" -citygml_lod2_dir = "raw/citygml/lod2" +dgm1_dir = "raw/geo_rlp/dgm1" +dop20_dir = "raw/geo_rlp/dop20/jp2" +citygml_lod1_dir = "raw/geo_rlp/citygml/lod1" +citygml_lod2_dir = "raw/geo_rlp/citygml/lod2" [archives] dgm1_dir = "archive/dgm1" @@ -29,7 +29,7 @@ use_tile_minmax = true [river_erosion] enabled = true -source_path = "raw/HydroRIVERS_v10_eu_shp/HydroRIVERS_v10_eu.shp" +source_path = "raw/hydrorivers/hydrorivers_eu_shp/HydroRIVERS_v10_eu_shp/HydroRIVERS_v10_eu.shp" source_layer = "HydroRIVERS_v10_eu" output_dir = "export_unity/height_png16_river" manifest_vr = "export_unity/tile_index_river_vr.csv" @@ -98,3 +98,13 @@ proxy_min_tris = 120 proxy_max_tris = 180 building_buffer_m = 1.5 instancing = false + +[pointcloud] +lpg_dir = "raw/geo_rlp/lpg" +lpo_dir = "raw/geo_rlp/lpo" +lpolpg_dir = "raw/geo_rlp/lpolpg" +bdom_dir = "raw/geo_rlp/bdom20rgbi" +dom1_dir = "raw/geo_rlp/dom1" +chunk_size = 5000000 +use_lpg_validation = true +use_lpo_refinement = true diff --git a/geodata_config.toml b/geodata_config.toml index 55bf691..f568ebd 100644 --- a/geodata_config.toml +++ b/geodata_config.toml @@ -1,8 +1,8 @@ [raw] -dgm1_dir = "raw/dgm1" -dop20_dir = "raw/dop20/jp2" -citygml_lod1_dir = "raw/citygml/lod1" -citygml_lod2_dir = "raw/citygml/lod2" +dgm1_dir = "raw/geo_rlp/dgm1" +dop20_dir = "raw/geo_rlp/dop20/jp2" +citygml_lod1_dir = "raw/geo_rlp/citygml/lod1" +citygml_lod2_dir = "raw/geo_rlp/citygml/lod2" [archives] dgm1_dir = "archive/dgm1" @@ -27,7 +27,7 @@ tile_size_m = 1000 [river_erosion] enabled = true -source_path = "raw/HydroRIVERS_v10_eu_shp/HydroRIVERS_v10_eu.shp" +source_path = "raw/hydrorivers/hydrorivers_eu_shp/HydroRIVERS_v10_eu_shp/HydroRIVERS_v10_eu.shp" source_layer = "HydroRIVERS_v10_eu" output_dir = "export_unity/height_png16_river" manifest_vr = "export_unity/tile_index_river_vr.csv" @@ -96,3 +96,13 @@ proxy_min_tris = 120 proxy_max_tris = 180 building_buffer_m = 1.5 instancing = false + +[pointcloud] +lpg_dir = "raw/geo_rlp/lpg" +lpo_dir = "raw/geo_rlp/lpo" +lpolpg_dir = "raw/geo_rlp/lpolpg" +bdom_dir = "raw/geo_rlp/bdom20rgbi" +dom1_dir = "raw/geo_rlp/dom1" +chunk_size = 5000000 +use_lpg_validation = true +use_lpo_refinement = true diff --git a/geodata_download.py b/geodata_download.py index f688284..19624ed 100644 --- a/geodata_download.py +++ b/geodata_download.py @@ -9,6 +9,7 @@ import sys import time import queue import threading +import zipfile from concurrent.futures import Future, as_completed from dataclasses import dataclass from typing import Dict, Iterable, List, Optional, Tuple @@ -48,6 +49,9 @@ class DownloadTask: dataset: str url: str output_path: str + download_key: str + unzip: bool = False + unzip_dir: Optional[str] = None class DownloadLogger: @@ -211,6 +215,20 @@ def _load_toml(path: str) -> dict: return tomllib.load(fh) +def _parse_download_targets(cfg: dict) -> Tuple[Dict[str, dict], Optional[str]]: + download_cfg = cfg.get("download", {}) + if not isinstance(download_cfg, dict): + raise SystemExit("download must be a table in the TOML config.") + has_targets = any(isinstance(value, dict) for value in download_cfg.values()) + if has_targets: + targets = {name: value for name, value in download_cfg.items() if isinstance(value, dict)} + if not targets: + raise SystemExit("download targets must be tables, e.g. [download.geobasis].") + default_key = download_cfg.get("default_target") or download_cfg.get("default") + return targets, default_key + return {"default": download_cfg}, "default" + + def _parse_tile_ranges(cfg: dict) -> Dict[str, dict]: ranges = cfg.get("tile_ranges", {}) if not isinstance(ranges, dict): @@ -249,52 +267,149 @@ def _resolve_output_dir(dataset_key: str, dataset_cfg: dict) -> str: def _format_url( template: str, base_url: str, - x: int, - y: int, + x: Optional[int], + y: Optional[int], extra: dict, ) -> str: - format_vars = {"base_url": base_url, "x": x, "y": y} + format_vars = {"base_url": base_url} + if x is not None: + format_vars["x"] = x + if y is not None: + format_vars["y"] = y for key, value in extra.items(): if isinstance(value, (str, int, float)): format_vars[key] = value return template.format(**format_vars) +def _select_download_target( + download_targets: Dict[str, dict], + default_key: Optional[str], + dataset_key: str, + dataset_cfg: dict, +) -> Tuple[str, dict]: + requested = dataset_cfg.get("download") + if requested: + if requested not in download_targets: + raise SystemExit(f"{dataset_key}: download target not found: {requested}") + return requested, download_targets[requested] + if default_key: + if default_key not in download_targets: + raise SystemExit(f"download.default_target not found: {default_key}") + return default_key, download_targets[default_key] + if len(download_targets) == 1: + name = next(iter(download_targets)) + return name, download_targets[name] + raise SystemExit( + f"{dataset_key}: multiple download targets defined; set datasets.{dataset_key}.download " + "or download.default_target.", + ) + + +def _resolve_url(source_cfg: dict, base_url: str, x: Optional[int], y: Optional[int]) -> str: + if "url" in source_cfg: + return str(source_cfg["url"]) + template = source_cfg.get("url_template") + if not template: + raise SystemExit("url_template or url must be defined for the dataset.") + return _format_url(str(template), base_url, x, y, source_cfg) + + def _build_tasks( cfg: dict, datasets: Dict[str, dict], tile_ranges: Dict[str, dict], - base_output_dir: str, + download_targets: Dict[str, dict], + default_target: Optional[str], start_override: Optional[Tuple[int, int]], end_override: Optional[Tuple[int, int]], ) -> List[DownloadTask]: tasks: List[DownloadTask] = [] - base_url = cfg.get("download", {}).get("base_url", "").rstrip("/") for dataset_key, dataset_cfg in datasets.items(): - tile_range_key = dataset_cfg.get("tile_range") - if not tile_range_key or tile_range_key not in tile_ranges: - raise SystemExit(f"{dataset_key}: tile_range not found: {tile_range_key}") - range_cfg = tile_ranges[tile_range_key] - if start_override and end_override: - range_cfg = _override_range(range_cfg, start_override, end_override) - + download_key, download_cfg = _select_download_target( + download_targets, + default_target, + dataset_key, + dataset_cfg, + ) + base_url = download_cfg.get("base_url", "").rstrip("/") + base_output_dir = download_cfg.get("output_directory", DEFAULT_OUTPUT_DIR) base_subdir = _resolve_output_dir(dataset_key, dataset_cfg) dataset_out_dir = os.path.join(base_output_dir, base_subdir) + unzip = bool(dataset_cfg.get("unzip", False)) + unzip_dir = dataset_out_dir - for x, y in _iter_tiles(range_cfg): + tile_range_key = dataset_cfg.get("tile_range") + if tile_range_key: + if tile_range_key not in tile_ranges: + raise SystemExit(f"{dataset_key}: tile_range not found: {tile_range_key}") + range_cfg = tile_ranges[tile_range_key] + if start_override and end_override: + range_cfg = _override_range(range_cfg, start_override, end_override) + + for x, y in _iter_tiles(range_cfg): + if "files" in dataset_cfg: + for file_cfg in dataset_cfg["files"]: + file_type = file_cfg.get("type", "file") + file_subdir = FILE_TYPE_SUBDIRS.get(file_type, file_type) + out_dir = os.path.join(dataset_out_dir, file_subdir) + url = _resolve_url(file_cfg, base_url, x, y) + filename = os.path.basename(urlparse(url).path) + tasks.append( + DownloadTask( + dataset_key, + url, + os.path.join(out_dir, filename), + download_key, + unzip, + unzip_dir, + ) + ) + else: + url = _resolve_url(dataset_cfg, base_url, x, y) + filename = os.path.basename(urlparse(url).path) + tasks.append( + DownloadTask( + dataset_key, + url, + os.path.join(dataset_out_dir, filename), + download_key, + unzip, + unzip_dir, + ) + ) + else: if "files" in dataset_cfg: for file_cfg in dataset_cfg["files"]: file_type = file_cfg.get("type", "file") file_subdir = FILE_TYPE_SUBDIRS.get(file_type, file_type) out_dir = os.path.join(dataset_out_dir, file_subdir) - url = _format_url(file_cfg["url_template"], base_url, x, y, file_cfg) + url = _resolve_url(file_cfg, base_url, None, None) filename = os.path.basename(urlparse(url).path) - tasks.append(DownloadTask(dataset_key, url, os.path.join(out_dir, filename))) + tasks.append( + DownloadTask( + dataset_key, + url, + os.path.join(out_dir, filename), + download_key, + unzip, + unzip_dir, + ) + ) else: - url = _format_url(dataset_cfg["url_template"], base_url, x, y, dataset_cfg) + url = _resolve_url(dataset_cfg, base_url, None, None) filename = os.path.basename(urlparse(url).path) - tasks.append(DownloadTask(dataset_key, url, os.path.join(dataset_out_dir, filename))) + tasks.append( + DownloadTask( + dataset_key, + url, + os.path.join(dataset_out_dir, filename), + download_key, + unzip, + unzip_dir, + ) + ) return tasks @@ -328,6 +443,44 @@ def _safe_remove_dir(base_dir: str, rel_dir: str) -> None: shutil.rmtree(target) +def _unzipped_marker_path(task: DownloadTask) -> str: + unzip_dir = task.unzip_dir or os.path.dirname(task.output_path) + marker_name = f".{os.path.basename(task.output_path)}.unzipped" + return os.path.join(unzip_dir, marker_name) + + +def _needs_unzip(task: DownloadTask) -> bool: + if not task.unzip: + return False + if not os.path.exists(task.output_path): + return False + return not os.path.exists(_unzipped_marker_path(task)) + + +def _maybe_unzip(task: DownloadTask, logger: DownloadLogger) -> bool: + if not task.unzip: + return True + if not os.path.exists(task.output_path): + logger.log("ERROR", f"Unzip failed; missing file: {task.output_path}") + return False + unzip_dir = task.unzip_dir or os.path.dirname(task.output_path) + os.makedirs(unzip_dir, exist_ok=True) + try: + with zipfile.ZipFile(task.output_path, "r") as zf: + zf.extractall(unzip_dir) + except (zipfile.BadZipFile, OSError) as exc: + logger.log("ERROR", f"Unzip failed: {task.output_path} ({exc})") + return False + marker = _unzipped_marker_path(task) + try: + with open(marker, "w", encoding="utf-8") as fh: + fh.write(time.strftime("%Y-%m-%d %H:%M:%S")) + except OSError as exc: + logger.log("WARN", f"Could not write unzip marker: {marker} ({exc})") + logger.log("INFO", f"Unzipped: {task.output_path} -> {unzip_dir}") + return True + + def _download_task( session: requests.Session, task: DownloadTask, @@ -386,7 +539,7 @@ def run_download( ca_bundle_override: Optional[str] = None, ) -> int: cfg = _load_toml(config_path) - download_cfg = cfg.get("download", {}) + download_targets, default_target = _parse_download_targets(cfg) tile_ranges = _parse_tile_ranges(cfg) datasets = _select_datasets(cfg, requested_datasets) @@ -398,23 +551,31 @@ def run_download( progress_enabled, ) - configured_output_dir = download_cfg.get("output_directory") - base_output_dir = DEFAULT_OUTPUT_DIR - if configured_output_dir and os.path.normpath(configured_output_dir) != DEFAULT_OUTPUT_DIR: + if len(download_targets) > 1 and not default_target: + default_target = next(iter(download_targets)) logger.log( "WARN", - f"Ignoring download.output_directory={configured_output_dir}; using raw/ to match pipeline.", + "No download.default_target set; using " + f"{default_target} as default for datasets without a download key.", ) if clean_downloads: for dataset_key, dataset_cfg in datasets.items(): + download_key, download_cfg = _select_download_target( + download_targets, + default_target, + dataset_key, + dataset_cfg, + ) + base_output_dir = download_cfg.get("output_directory", DEFAULT_OUTPUT_DIR) _safe_remove_dir(base_output_dir, _resolve_output_dir(dataset_key, dataset_cfg)) tasks = _build_tasks( cfg, datasets, tile_ranges, - base_output_dir, + download_targets, + default_target, start_override, end_override, ) @@ -423,91 +584,129 @@ def run_download( logger.log("INFO", "No download tasks generated.") return 0 - skip_existing = not clean_downloads - pending: List[DownloadTask] = [] - skipped = 0 + tasks_by_target: Dict[str, List[DownloadTask]] = {} for task in tasks: - if skip_existing and os.path.exists(task.output_path): - skipped += 1 + tasks_by_target.setdefault(task.download_key, []).append(task) + + total_downloaded = 0 + total_missing = 0 + total_failed = 0 + total_skipped = 0 + total_unzipped_existing = 0 + + for target_key, target_cfg in download_targets.items(): + target_tasks = tasks_by_target.get(target_key) + if not target_tasks: continue - pending.append(task) - if skipped: - logger.log("INFO", f"Skipped {skipped} existing file(s).") + target_label = f"[{target_key}] " + skip_existing = not clean_downloads + pending: List[DownloadTask] = [] + unzip_only: List[DownloadTask] = [] + skipped = 0 - if not pending: - logger.log("INFO", "Nothing to download after skipping existing files.") - return 0 + for task in target_tasks: + if skip_existing and os.path.exists(task.output_path): + if _needs_unzip(task): + unzip_only.append(task) + else: + skipped += 1 + continue + pending.append(task) - verify_ssl = download_cfg.get("verify_ssl", True) - ca_bundle = ca_bundle_override or download_cfg.get("ca_bundle") - if verify_ssl and ca_bundle: - if os.path.exists(ca_bundle): - verify = ca_bundle - source = "CLI" if ca_bundle_override else "config" - logger.log("INFO", f"Using CA bundle ({source}): {ca_bundle}") + if skipped: + logger.log("INFO", f"{target_label}Skipped {skipped} existing file(s).") + if unzip_only: + logger.log("INFO", f"{target_label}Queued {len(unzip_only)} unzip(s) for existing files.") + + verify_ssl = target_cfg.get("verify_ssl", True) + ca_bundle = ca_bundle_override or target_cfg.get("ca_bundle") + if verify_ssl and ca_bundle: + if os.path.exists(ca_bundle): + verify = ca_bundle + source = "CLI" if ca_bundle_override else "config" + logger.log("INFO", f"{target_label}Using CA bundle ({source}): {ca_bundle}") + else: + verify = True + logger.log("WARN", f"{target_label}CA bundle not found, using system trust: {ca_bundle}") else: - verify = True - logger.log("WARN", f"CA bundle not found, using system trust: {ca_bundle}") - else: - verify = bool(verify_ssl) - if not verify_ssl: - logger.log("WARN", "TLS verification disabled by config.") + verify = bool(verify_ssl) + if not verify_ssl: + logger.log("WARN", f"{target_label}TLS verification disabled by config.") - timeout = int(download_cfg.get("timeout_seconds", 300)) - retries = int(download_cfg.get("retry_attempts", 3)) - parallel = int(download_cfg.get("parallel_downloads", 4)) - user_agent = download_cfg.get("user_agent", "geodata-download/1.0") + timeout = int(target_cfg.get("timeout_seconds", 300)) + retries = int(target_cfg.get("retry_attempts", 3)) + parallel = int(target_cfg.get("parallel_downloads", 4)) + user_agent = target_cfg.get("user_agent", "geodata-download/1.0") - downloaded = 0 - missing = 0 - failed = 0 + downloaded = 0 + missing = 0 + failed = 0 - progress = DownloadProgress(len(pending), progress_enabled) - stop_event = threading.Event() - interrupted = False + if pending: + progress = DownloadProgress(len(pending), progress_enabled) + stop_event = threading.Event() + interrupted = False - with requests.Session() as session: - session.headers.update({"User-Agent": user_agent}) - executor = DaemonThreadPool(max_workers=parallel) - futures = [ - executor.submit(_download_task, session, task, timeout, verify, retries, stop_event, progress) - for task in pending - ] - try: - for future in as_completed(futures): - status, task, detail = future.result() - if status == "downloaded": - downloaded += 1 - elif status == "missing": - missing += 1 - logger.log("WARN", f"Missing tile: {task.url} ({detail})") - elif status == "aborted": - failed += 1 + with requests.Session() as session: + session.headers.update({"User-Agent": user_agent}) + executor = DaemonThreadPool(max_workers=parallel) + futures = [ + executor.submit(_download_task, session, task, timeout, verify, retries, stop_event, progress) + for task in pending + ] + try: + for future in as_completed(futures): + status, task, detail = future.result() + if status == "downloaded": + downloaded += 1 + if task.unzip and not _maybe_unzip(task, logger): + failed += 1 + elif status == "missing": + missing += 1 + logger.log("WARN", f"{target_label}Missing tile: {task.url} ({detail})") + elif status == "aborted": + failed += 1 + else: + failed += 1 + extra = f" ({detail})" if detail else "" + logger.log("ERROR", f"{target_label}Failed: {task.url}{extra}") + progress.set_counts(downloaded, missing, failed, task.output_path) + except KeyboardInterrupt: + interrupted = True + stop_event.set() + logger.log("WARN", f"{target_label}Interrupted; stopping downloads.") + finally: + if interrupted: + executor.shutdown(wait=False, cancel_futures=True) + else: + executor.shutdown(wait=True) + + progress.finish() + if interrupted: + return 130 + else: + logger.log("INFO", f"{target_label}Nothing to download.") + + if unzip_only: + for task in unzip_only: + if _maybe_unzip(task, logger): + total_unzipped_existing += 1 else: failed += 1 - extra = f" ({detail})" if detail else "" - logger.log("ERROR", f"Failed: {task.url}{extra}") - progress.set_counts(downloaded, missing, failed, task.output_path) - except KeyboardInterrupt: - interrupted = True - stop_event.set() - logger.log("WARN", "Interrupted; stopping downloads.") - finally: - if interrupted: - executor.shutdown(wait=False, cancel_futures=True) - else: - executor.shutdown(wait=True) - progress.finish() - if interrupted: - return 130 + total_downloaded += downloaded + total_missing += missing + total_failed += failed + total_skipped += skipped logger.log( "INFO", - f"Done. Downloaded={downloaded}, Missing={missing}, Failed={failed}, Skipped={skipped}.", + "Done. " + f"Downloaded={total_downloaded}, Missing={total_missing}, Failed={total_failed}, " + f"Skipped={total_skipped}, UnzippedExisting={total_unzipped_existing}.", ) - return 1 if failed else 0 + return 1 if total_failed else 0 def parse_args(argv: Optional[Iterable[str]] = None) -> argparse.Namespace: diff --git a/geodata_download.toml b/geodata_download.toml index f9116a8..73a7f1d 100644 --- a/geodata_download.toml +++ b/geodata_download.toml @@ -1,6 +1,6 @@ # Geodata Download Configuration for Trier Digital Twin Project -# Generated: 2026-01-19 -# Data Source: Geobasis Rheinland-Pfalz (geobasis-rlp.de) +# Generated: 2026-01-23 +# Data Source: Geobasis Rheinland-Pfalz (geobasis-rlp.de) + HydroSHEDS [project] name = "Trier Digital Twin - Geodata Collection" @@ -9,9 +9,9 @@ coordinate_system = "ETRS89/UTM32" vertical_datum = "DHHN2016" region = "Stadt Trier and surrounding area" -[download] +[download.geobasis_rlp_de] base_url = "https://geobasis-rlp.de/data" -output_directory = "raw" +output_directory = "raw/geo_rlp" parallel_downloads = 4 retry_attempts = 3 timeout_seconds = 300 @@ -19,6 +19,14 @@ verify_ssl = true user_agent = "TrierDigitalTwin/1.0" ca_bundle = "certs/geobasis-ca.pem" +[download.hydrosheds_org] +base_url = "https://data.hydrosheds.org/file" +output_directory = "raw/hydrorivers" +parallel_downloads = 4 +retry_attempts = 3 +timeout_seconds = 300 +verify_ssl = true + # Define tile range presets that datasets inherit from [tile_ranges.range_1km] description = "1x1km tiles covering full area" @@ -42,6 +50,7 @@ y_step = 2 [datasets.dgm1] enabled = true name = "Digital Ground Model" +download = "geobasis_rlp_de" tile_range = "range_1km" resolution = "1m" format = "GeoTIFF" @@ -53,6 +62,7 @@ output_subdir = "dgm1" [datasets.dom1] enabled = true name = "Digital Surface Model" +download = "geobasis_rlp_de" tile_range = "range_1km" resolution = "1m" format = "GeoTIFF" @@ -65,6 +75,7 @@ output_subdir = "dom1" [datasets.geb3dlo] enabled = true name = "3D Buildings LoD2" +download = "geobasis_rlp_de" tile_range = "range_2km" resolution = "LoD2" format = "CityGML" @@ -77,6 +88,7 @@ notes = "Semantically rich building models with roof structures" [datasets.dop20] enabled = true name = "Orthophotos 20cm" +download = "geobasis_rlp_de" tile_range = "range_2km" resolution = "20cm" format = "JPEG2000" @@ -100,6 +112,7 @@ url_template = "{base_url}/dop20rgb/current/metadata/dop20rgb_32_{x}_{y}_2_rp_20 [datasets.bdom20rgbi] enabled = true name = "Building/Object Point Cloud RGBI" +download = "geobasis_rlp_de" tile_range = "range_2km" resolution = "20cm" format = "LAZ" @@ -112,6 +125,7 @@ notes = "RGB + Intensity point cloud for buildings and objects" [datasets.lpolpg] enabled = true name = "Laser Point Ground/Object" +download = "geobasis_rlp_de" tile_range = "range_1km" resolution = "High density" format = "LAZ" @@ -120,6 +134,18 @@ priority = 2 output_subdir = "lpolpg" notes = "Combined point cloud (LPO/LPG) in one LAZ file" +# HydroRIVERS (for terrain erosion) +[datasets.hydrorivers] +enabled = true +name = "HydroRIVERS Europe and Middle East" +download = "hydrosheds_org" +format = "zip" +unzip = true +output_subdir = "hydrorivers_eu_shp" +priority = 2 +url_template = "{base_url}/HydroRIVERS/HydroRIVERS_v10_eu_shp.zip" +notes = "HydroRIVERS Shapefile for erosion of rivers" + [processing] convert_to_mesh = true generate_lods = true diff --git a/geodata_pipeline/river_erosion.py b/geodata_pipeline/river_erosion.py index 1986d9c..1019a64 100644 --- a/geodata_pipeline/river_erosion.py +++ b/geodata_pipeline/river_erosion.py @@ -134,6 +134,8 @@ def _build_depth_raster( layer.SetSpatialFilterRect(minx, miny, maxx, maxy) layer.ResetReading() + buffered_geoms: list[ogr.Geometry] = [] + burn_values: list[float] = [] for feat in layer: order = _normalize_order(feat.GetField(profile.order_field), profile) if order is None: @@ -152,13 +154,43 @@ def _build_depth_raster( buffered = geom.Buffer(width_m * 0.5) if buffered is None or buffered.IsEmpty(): continue - gdal.RasterizeGeometries( - depth_ds, - [1], - [buffered], - burn_values=[depth_m], - options=["MERGE_ALG=MAX"], - ) + buffered_geoms.append(buffered) + burn_values.append(depth_m) + + if buffered_geoms: + # GDAL only supports MERGE_ALG=REPLACE/ADD; emulate MAX by sorting ascending. + ordered = sorted(zip(buffered_geoms, burn_values), key=lambda item: item[1]) + ordered_geoms = [geom for geom, _ in ordered] + ordered_values = [float(value) for _, value in ordered] + if hasattr(gdal, "RasterizeGeometries"): + gdal.RasterizeGeometries( + depth_ds, + [1], + ordered_geoms, + burn_values=ordered_values, + options=["MERGE_ALG=REPLACE"], + ) + else: + mem_driver = ogr.GetDriverByName("MEM") or ogr.GetDriverByName("Memory") + if mem_driver is None: + raise SystemExit("[river_erosion] OGR MEM driver missing; update GDAL.") + mem_ds = mem_driver.CreateDataSource("") + mem_layer = mem_ds.CreateLayer("burn", srs=tile_srs, geom_type=ogr.wkbPolygon) + mem_layer.CreateField(ogr.FieldDefn("burn", ogr.OFTReal)) + layer_defn = mem_layer.GetLayerDefn() + for geom, value in zip(ordered_geoms, ordered_values): + feature = ogr.Feature(layer_defn) + feature.SetField("burn", value) + feature.SetGeometry(geom) + mem_layer.CreateFeature(feature) + feature = None + gdal.RasterizeLayer( + depth_ds, + [1], + mem_layer, + options=["ATTRIBUTE=burn"], + ) + mem_ds = None depth = band.ReadAsArray().astype(np.float32) if profile.smooth_sigma_m > 0.0: @@ -281,11 +313,22 @@ def _process_profile( global_max = max(global_max, tile_max) out_path = os.path.join(output_dir, f"{tile_id}.png") - driver = gdal.GetDriverByName("PNG") - out_ds = driver.Create(out_path, out_u16.shape[1], out_u16.shape[0], 1, gdal.GDT_UInt16, options=["WORLDFILE=YES"]) + mem_driver = gdal.GetDriverByName("MEM") + out_ds = mem_driver.Create("", out_u16.shape[1], out_u16.shape[0], 1, gdal.GDT_UInt16) out_ds.SetGeoTransform(gt) out_ds.SetProjection(tile_srs.ExportToWkt()) out_ds.GetRasterBand(1).WriteArray(out_u16) + trans_opts = gdal.TranslateOptions( + outputType=gdal.GDT_UInt16, + format="PNG", + creationOptions=["WORLDFILE=YES"], + ) + try: + gdal.Translate(out_path, out_ds, options=trans_opts) + except RuntimeError as exc: + print(f"[river_erosion:{name}] Translate failed for {tile_id}: {exc}") + skipped += 1 + continue out_ds = None rows_out.append(