From 2dc6cf2b998fed100f6fccdac399a1a3d7051941 Mon Sep 17 00:00:00 2001 From: "s0wlz (Matthias Puchstein)" Date: Tue, 10 Feb 2026 22:02:08 +0100 Subject: [PATCH] Add SWE boundary v2 export with boundary inflow/source area cells --- geodata_config.example.toml | 9 ++ geodata_config.toml | 4 + geodata_pipeline/config.py | 4 + geodata_pipeline/swe_lods.py | 278 ++++++++++++++++++++++++++++++++--- 4 files changed, 278 insertions(+), 17 deletions(-) diff --git a/geodata_config.example.toml b/geodata_config.example.toml index ee37fcc..5f26852 100644 --- a/geodata_config.example.toml +++ b/geodata_config.example.toml @@ -98,6 +98,15 @@ enabled = true out_dir = "export_swe" tile_index_path = "export_swe/swe_tile_index.csv" manifest_path = "export_swe/swe_manifest.json" +boundary_manifest_path = "export_swe/swe_boundaries.json" +boundary_inflow_mask_dir = "raw/water_source_masks" +source_area_mask_dir = "raw/water_source_area_masks" +source_mask_dir = "raw/water_source_masks" +sink_mask_dir = "raw/water_sink_masks" +boundary_inflow_params_toml = "raw/water_source_masks/sources.toml" +source_area_params_toml = "raw/water_source_area_masks/source_areas.toml" +source_params_toml = "raw/water_source_masks/sources.toml" +sink_params_toml = "raw/water_sink_masks/sinks.toml" height_source = "river_erosion" height_source_dir = "" height_source_manifest = "" diff --git a/geodata_config.toml b/geodata_config.toml index 636676b..4ef27f1 100644 --- a/geodata_config.toml +++ b/geodata_config.toml @@ -115,8 +115,12 @@ out_dir = "export_swe" tile_index_path = "export_swe/swe_tile_index.csv" manifest_path = "export_swe/swe_manifest.json" boundary_manifest_path = "export_swe/swe_boundaries.json" +boundary_inflow_mask_dir = "raw/water_source_masks" +source_area_mask_dir = "raw/water_source_area_masks" source_mask_dir = "raw/water_source_masks" sink_mask_dir = "raw/water_sink_masks" +boundary_inflow_params_toml = "raw/water_source_masks/sources.toml" +source_area_params_toml = "raw/water_source_area_masks/source_areas.toml" source_params_toml = "raw/water_source_masks/sources.toml" sink_params_toml = "raw/water_sink_masks/sinks.toml" height_source = "river_erosion" diff --git a/geodata_pipeline/config.py b/geodata_pipeline/config.py index d325032..26d8ed9 100644 --- a/geodata_pipeline/config.py +++ b/geodata_pipeline/config.py @@ -264,8 +264,12 @@ class SweLodConfig: tile_index_path: str = "export_swe/swe_tile_index.csv" manifest_path: str = "export_swe/swe_manifest.json" boundary_manifest_path: str = "export_swe/swe_boundaries.json" + boundary_inflow_mask_dir: str = "raw/water_source_masks" + source_area_mask_dir: str = "raw/water_source_area_masks" source_mask_dir: str = "raw/water_source_masks" sink_mask_dir: str = "raw/water_sink_masks" + boundary_inflow_params_toml: str = "raw/water_source_masks/sources.toml" + source_area_params_toml: str = "raw/water_source_area_masks/source_areas.toml" source_params_toml: str = "raw/water_source_masks/sources.toml" sink_params_toml: str = "raw/water_sink_masks/sinks.toml" height_source: str = "river_erosion" diff --git a/geodata_pipeline/swe_lods.py b/geodata_pipeline/swe_lods.py index 0ae4cd3..3c814ed 100644 --- a/geodata_pipeline/swe_lods.py +++ b/geodata_pipeline/swe_lods.py @@ -741,24 +741,42 @@ def _write_boundary_manifest( *, force_vrt: bool, ) -> None: - source_ds = _open_boundary_mask_dataset( + boundary_inflow_mask_dir = swe_cfg.boundary_inflow_mask_dir or swe_cfg.source_mask_dir + source_area_mask_dir = swe_cfg.source_area_mask_dir + sink_mask_dir = swe_cfg.sink_mask_dir + + boundary_inflow_ds = _open_boundary_mask_dataset( cfg, - mask_dir=swe_cfg.source_mask_dir, - kind="source", + mask_dir=boundary_inflow_mask_dir, + kind="boundary_inflow", + force_vrt=force_vrt, + ) + source_area_ds = _open_boundary_mask_dataset( + cfg, + mask_dir=source_area_mask_dir, + kind="source_area", force_vrt=force_vrt, ) sink_ds = _open_boundary_mask_dataset( cfg, - mask_dir=swe_cfg.sink_mask_dir, + mask_dir=sink_mask_dir, kind="sink", force_vrt=force_vrt, ) - source_params = _load_boundary_params_toml(swe_cfg.source_params_toml, kind="sources") + boundary_inflow_params = _load_boundary_params_multi( + swe_cfg.boundary_inflow_params_toml or swe_cfg.source_params_toml, + kinds=("boundary_inflows", "boundary_inflow", "sources", "source"), + ) + source_area_params = _load_boundary_params_multi( + swe_cfg.source_area_params_toml, + kinds=("source_areas", "source_area", "sources", "source"), + ) sink_params = _load_boundary_params_toml(swe_cfg.sink_params_toml, kind="sinks") tiles_payload = [] - source_stats: dict[int, dict] = {} + boundary_inflow_stats: dict[int, dict] = {} + source_area_stats: dict[int, dict] = {} sink_stats: dict[int, dict] = {} for row in tile_rows: @@ -768,23 +786,49 @@ def _write_boundary_manifest( tile_x = int(row["tile_x"]) tile_y = int(row["tile_y"]) - source_arr = _warp_id_array(source_ds, bounds, resolution, resolution) if source_ds is not None else None + boundary_inflow_arr = ( + _warp_id_array(boundary_inflow_ds, bounds, resolution, resolution) + if boundary_inflow_ds is not None + else None + ) + source_area_arr = ( + _warp_id_array(source_area_ds, bounds, resolution, resolution) + if source_area_ds is not None + else None + ) sink_arr = _warp_id_array(sink_ds, bounds, resolution, resolution) if sink_ds is not None else None - source_ids = _ids_to_entries(source_arr) + boundary_inflow_ids = _ids_to_entries(boundary_inflow_arr) + source_area_ids = _ids_to_entries(source_area_arr) sink_ids = _ids_to_entries(sink_arr) - _accumulate_id_stats(source_stats, source_ids, lod) + _accumulate_id_stats(boundary_inflow_stats, boundary_inflow_ids, lod) + _accumulate_id_stats(source_area_stats, source_area_ids, lod) _accumulate_id_stats(sink_stats, sink_ids, lod) + boundary_cells = _boundary_cells_from_ids(boundary_inflow_arr) + source_area_cells = _cell_groups_from_ids(source_area_arr) + sink_cells = _cell_groups_from_ids(sink_arr) + + boundary_inflow_id_path = "" + source_area_id_path = "" source_id_path = "" sink_id_path = "" lod_dir = os.path.join(swe_cfg.out_dir, lod) - if source_arr is not None: + if boundary_inflow_arr is not None: source_dir = os.path.join(lod_dir, "source_ids") + boundary_dir = os.path.join(lod_dir, "boundary_inflow_ids") ensure_dir(source_dir) + ensure_dir(boundary_dir) source_id_path = os.path.join(source_dir, f"source_ids_{tile_x}_{tile_y}.exr") - _write_exr(source_id_path, source_arr.astype(np.float32, copy=False), swe_cfg.prefer_float16) + boundary_inflow_id_path = os.path.join(boundary_dir, f"boundary_inflow_ids_{tile_x}_{tile_y}.exr") + _write_exr(source_id_path, boundary_inflow_arr.astype(np.float32, copy=False), swe_cfg.prefer_float16) + _write_exr(boundary_inflow_id_path, boundary_inflow_arr.astype(np.float32, copy=False), swe_cfg.prefer_float16) + if source_area_arr is not None: + source_area_dir = os.path.join(lod_dir, "source_area_ids") + ensure_dir(source_area_dir) + source_area_id_path = os.path.join(source_area_dir, f"source_area_ids_{tile_x}_{tile_y}.exr") + _write_exr(source_area_id_path, source_area_arr.astype(np.float32, copy=False), swe_cfg.prefer_float16) if sink_arr is not None: sink_dir = os.path.join(lod_dir, "sink_ids") ensure_dir(sink_dir) @@ -799,22 +843,42 @@ def _write_boundary_manifest( "tile_size_m": float(row["tile_size_m"]), "resolution": resolution, "bounds": [bounds[0], bounds[1], bounds[2], bounds[3]], - "source_ids": source_ids, + "source_ids": boundary_inflow_ids, "sink_ids": sink_ids, + "boundary_inflow_ids": boundary_inflow_ids, + "source_area_ids": source_area_ids, "source_id_path": source_id_path, "sink_id_path": sink_id_path, + "boundary_inflow_id_path": boundary_inflow_id_path, + "source_area_id_path": source_area_id_path, + "boundary_cells": boundary_cells, + "source_area_cells": source_area_cells, + "sink_cells": sink_cells, } ) payload = { - "schema_version": 1, + "schema_version": 2, "id_encoding": "id = B + 256*G + 65536*R", - "source_mask_dir": swe_cfg.source_mask_dir, - "sink_mask_dir": swe_cfg.sink_mask_dir, - "source_params_toml": swe_cfg.source_params_toml, + "boundary_inflow_mask_dir": boundary_inflow_mask_dir, + "source_area_mask_dir": source_area_mask_dir, + "sink_mask_dir": sink_mask_dir, + "boundary_inflow_params_toml": swe_cfg.boundary_inflow_params_toml, + "source_area_params_toml": swe_cfg.source_area_params_toml, "sink_params_toml": swe_cfg.sink_params_toml, - "sources": _merge_stats_with_params(source_stats, source_params), + # Legacy aliases kept for existing tooling. + "source_mask_dir": boundary_inflow_mask_dir, + "source_params_toml": swe_cfg.boundary_inflow_params_toml or swe_cfg.source_params_toml, + "sources": _merge_stats_with_params(boundary_inflow_stats, boundary_inflow_params), "sinks": _merge_stats_with_params(sink_stats, sink_params), + "boundaries": _merge_boundary_definitions( + boundary_inflow_stats, + boundary_inflow_params, + source_area_stats, + source_area_params, + sink_stats, + sink_params, + ), "tiles": tiles_payload, } @@ -917,6 +981,66 @@ def _accumulate_id_stats(stats: dict[int, dict], ids: list[dict], lod: str) -> N node["lod_pixels"][lod] = int(node["lod_pixels"].get(lod, 0) + pixels) +def _boundary_cells_from_ids(ids: np.ndarray | None) -> list[dict]: + if ids is None: + return [] + + resolution = int(ids.shape[0]) + ghost_resolution = resolution + 2 + cells_per_id: dict[int, set[int]] = {} + + for y in range(resolution): + for x in range(resolution): + ident = int(ids[y, x]) + if ident <= 0: + continue + cell_set = cells_per_id.setdefault(ident, set()) + if x == 0: + cell_set.add(((y + 1) * ghost_resolution) + 0) + if x == resolution - 1: + cell_set.add(((y + 1) * ghost_resolution) + (ghost_resolution - 1)) + if y == 0: + cell_set.add((0 * ghost_resolution) + (x + 1)) + if y == resolution - 1: + cell_set.add(((ghost_resolution - 1) * ghost_resolution) + (x + 1)) + + out = [] + for ident in sorted(cells_per_id.keys()): + cells = sorted(cells_per_id[ident]) + out.append( + { + "id": int(ident), + "count": len(cells), + "cells": [int(cell) for cell in cells], + } + ) + return out + + +def _cell_groups_from_ids(ids: np.ndarray | None) -> list[dict]: + if ids is None: + return [] + + out = [] + unique_ids = np.unique(ids) + for ident in unique_ids.tolist(): + ident = int(ident) + if ident <= 0: + continue + flat = np.flatnonzero(ids == ident) + cells = [int(v) for v in flat.tolist()] + if not cells: + continue + out.append( + { + "id": ident, + "count": len(cells), + "cells": cells, + } + ) + return out + + def _load_boundary_params_toml(path: str, *, kind: str) -> dict[int, dict]: if not path or not os.path.exists(path): return {} @@ -957,6 +1081,16 @@ def _load_boundary_params_toml(path: str, *, kind: str) -> dict[int, dict]: return out +def _load_boundary_params_multi(path: str, *, kinds: tuple[str, ...]) -> dict[int, dict]: + if not path: + return {} + for kind in kinds: + out = _load_boundary_params_toml(path, kind=kind) + if out: + return out + return {} + + def _parse_int_id(value) -> int | None: try: return int(value) @@ -1002,6 +1136,116 @@ def _merge_stats_with_params(stats: dict[int, dict], params: dict[int, dict]) -> return out +def _merge_boundary_definitions( + boundary_inflow_stats: dict[int, dict], + boundary_inflow_params: dict[int, dict], + source_area_stats: dict[int, dict], + source_area_params: dict[int, dict], + sink_stats: dict[int, dict], + sink_params: dict[int, dict], +) -> list[dict]: + out = [] + out.extend( + _build_boundary_definitions( + "boundary_inflow", + boundary_inflow_stats, + boundary_inflow_params, + ) + ) + out.extend( + _build_boundary_definitions( + "source_area", + source_area_stats, + source_area_params, + ) + ) + out.extend( + _build_boundary_definitions( + "sink", + sink_stats, + sink_params, + ) + ) + return out + + +def _build_boundary_definitions(kind: str, stats: dict[int, dict], params: dict[int, dict]) -> list[dict]: + all_ids = sorted(set(stats.keys()) | set(params.keys())) + out = [] + for ident in all_ids: + node = { + "kind": kind, + "id": int(ident), + "tile_count": 0, + "total_pixels": 0, + "lod_pixels": {}, + "params": {}, + "default_state": _default_boundary_state(kind, params.get(ident, {})), + } + if ident in stats: + node["tile_count"] = int(stats[ident]["tile_count"]) + node["total_pixels"] = int(stats[ident]["total_pixels"]) + node["lod_pixels"] = dict(stats[ident]["lod_pixels"]) + if ident in params: + node["params"] = dict(params[ident]) + out.append(node) + return out + + +def _default_boundary_state(kind: str, params: dict) -> dict: + mode = str(params.get("mode", "")).strip().lower() + enabled = _parse_bool(params.get("enabled"), default=(kind == "sink" and mode == "free_outflow")) + + water_level = _parse_float_or_default( + params.get("water_level_m"), + _parse_float_or_default(params.get("base_level_offset_m"), 0.0), + ) + velocity_u = _parse_float_or_default( + params.get("velocity_u_mps"), + _parse_float_or_default(params.get("u_mps"), 0.0), + ) + velocity_v = _parse_float_or_default( + params.get("velocity_v_mps"), + _parse_float_or_default(params.get("v_mps"), 0.0), + ) + depth_rate = _parse_float_or_default( + params.get("depth_rate_mps"), + _parse_float_or_default(params.get("base_depth_rate_mps"), 0.0), + ) + + if kind == "sink" and depth_rate > 0.0: + depth_rate = -depth_rate + + return { + "enabled": enabled, + "water_level_m": float(water_level), + "velocity_u_mps": float(velocity_u), + "velocity_v_mps": float(velocity_v), + "depth_rate_mps": float(depth_rate), + } + + +def _parse_bool(value, *, default: bool) -> bool: + if isinstance(value, bool): + return value + if isinstance(value, (int, float)): + return bool(value) + if isinstance(value, str): + raw = value.strip().lower() + if raw in {"1", "true", "yes", "on"}: + return True + if raw in {"0", "false", "no", "off"}: + return False + return default + + +def _parse_float_or_default(value, default: float) -> float: + try: + return float(value) + except (TypeError, ValueError): + return float(default) + + def _cleanup_patterns(raw_dir: str) -> Iterable[str]: return [ os.path.join("work", "*_tmp.tif"),