From c63f21cf8146d1fbc6e9cd04375945a2174ab23c Mon Sep 17 00:00:00 2001 From: "s0wlz (Matthias Puchstein)" Date: Sun, 8 Feb 2026 03:18:16 +0100 Subject: [PATCH] Add SWE boundary mask pipeline and mask tooling --- geodata_config.toml | 9 +- geodata_download.toml | 12 ++ geodata_pipeline/config.py | 11 + geodata_pipeline/swe_lods.py | 301 +++++++++++++++++++++++++- scripts/fix_masks.py | 73 +++++++ scripts/mask_build_master.py | 380 +++++++++++++++++++++++++++++++++ scripts/mask_split_master.py | 247 +++++++++++++++++++++ scripts/mask_validate_ids.py | 246 +++++++++++++++++++++ scripts/ortho_build_master.py | 158 ++++++++++++++ tests/test_river_continuity.py | 80 +++++++ tests/test_river_lakes.py | 70 ++++++ 11 files changed, 1583 insertions(+), 4 deletions(-) create mode 100644 scripts/fix_masks.py create mode 100644 scripts/mask_build_master.py create mode 100644 scripts/mask_split_master.py create mode 100644 scripts/mask_validate_ids.py create mode 100644 scripts/ortho_build_master.py create mode 100644 tests/test_river_continuity.py create mode 100644 tests/test_river_lakes.py diff --git a/geodata_config.toml b/geodata_config.toml index 8a5024a..636676b 100644 --- a/geodata_config.toml +++ b/geodata_config.toml @@ -114,6 +114,11 @@ enabled = true out_dir = "export_swe" tile_index_path = "export_swe/swe_tile_index.csv" manifest_path = "export_swe/swe_manifest.json" +boundary_manifest_path = "export_swe/swe_boundaries.json" +source_mask_dir = "raw/water_source_masks" +sink_mask_dir = "raw/water_sink_masks" +source_params_toml = "raw/water_source_masks/sources.toml" +sink_params_toml = "raw/water_sink_masks/sinks.toml" height_source = "river_erosion" height_source_dir = "" height_source_manifest = "" @@ -123,12 +128,12 @@ height_resample = "bilinear" height_nodata = "" porosity_source = "work/swe_lpo/porosity.vrt" porosity_resample = "average" -porosity_nodata = 0.0 +porosity_nodata = "" solid_bias = 3.0 include_buildings = true building_height_source = "work/swe_lpo/buildings.vrt" building_resample = "average" -building_nodata = 0.0 +building_nodata = "" prefer_float16 = true lpo_dir = "" lpo_base_res = 1025 diff --git a/geodata_download.toml b/geodata_download.toml index 73a7f1d..79e81ea 100644 --- a/geodata_download.toml +++ b/geodata_download.toml @@ -146,6 +146,18 @@ priority = 2 url_template = "{base_url}/HydroRIVERS/HydroRIVERS_v10_eu_shp.zip" notes = "HydroRIVERS Shapefile for erosion of rivers" +# HydroLAKES (for terrain erosion and lake masking) +[datasets.hydrolakes] +enabled = true +name = "HydroLAKES Polygons" +download = "hydrosheds_org" +format = "zip" +unzip = true +output_subdir = "hydrolakes_shp" +priority = 2 +url_template = "{base_url}/hydrolakes/HydroLAKES_polys_v10_shp.zip" +notes = "HydroLAKES Shapefile for lake depth and masking" + [processing] convert_to_mesh = true generate_lods = true diff --git a/geodata_pipeline/config.py b/geodata_pipeline/config.py index c305dd9..d325032 100644 --- a/geodata_pipeline/config.py +++ b/geodata_pipeline/config.py @@ -263,6 +263,11 @@ class SweLodConfig: out_dir: str = "export_swe" tile_index_path: str = "export_swe/swe_tile_index.csv" manifest_path: str = "export_swe/swe_manifest.json" + boundary_manifest_path: str = "export_swe/swe_boundaries.json" + source_mask_dir: str = "raw/water_source_masks" + sink_mask_dir: str = "raw/water_sink_masks" + source_params_toml: str = "raw/water_source_masks/sources.toml" + sink_params_toml: str = "raw/water_sink_masks/sinks.toml" height_source: str = "river_erosion" height_source_dir: str = "" height_source_manifest: str = "" @@ -419,6 +424,12 @@ def _swe_lod_from_dict(data: Dict[str, Any]) -> SweLodConfig: value = base.get(key) if isinstance(value, str) and value.strip() == "": base[key] = None + # In SWE, 0.0 is a valid value for porosity/building rasters. Treating it as + # nodata causes GDAL to remap real zeros and emit warning floods. + if base.get("porosity_nodata") == 0 or base.get("porosity_nodata") == 0.0: + base["porosity_nodata"] = None + if base.get("building_nodata") == 0 or base.get("building_nodata") == 0.0: + base["building_nodata"] = None raw_lods = data.get("lods") lods: list[SweLodLevelConfig] = [] if isinstance(raw_lods, list): diff --git a/geodata_pipeline/swe_lods.py b/geodata_pipeline/swe_lods.py index c042982..354449e 100644 --- a/geodata_pipeline/swe_lods.py +++ b/geodata_pipeline/swe_lods.py @@ -3,6 +3,7 @@ from __future__ import annotations import json import math import os +import tomllib from dataclasses import asdict from typing import Iterable @@ -26,10 +27,12 @@ def export_swe_lods(cfg: Config, *, force_vrt: bool = False) -> int: ensure_dir(swe_cfg.out_dir) ensure_parent(swe_cfg.tile_index_path) ensure_parent(swe_cfg.manifest_path) + ensure_parent(swe_cfg.boundary_manifest_path) height_ds = _prepare_height_source(cfg, swe_cfg, force_vrt=force_vrt) xmin, ymin, xmax, ymax = _dataset_bounds(height_ds) + source_bounds = (xmin, ymin, xmax, ymax) base_tile_size = max(level.tile_size_m for level in swe_cfg.lods) origin_x = swe_cfg.origin_x @@ -59,6 +62,10 @@ def export_swe_lods(cfg: Config, *, force_vrt: bool = False) -> int: ensure_dir(building_dir) for tile_id, bounds in _iter_tiles(origin_x, origin_y, xmin, ymin, xmax, ymax, level.tile_size_m): + # Skip sliver tiles that intersect source bounds by less than one output pixel. + if not _has_min_coverage(bounds, source_bounds, level.resolution, level.resolution): + skipped += 1 + continue try: height = _warp_array( height_ds, @@ -130,11 +137,20 @@ def export_swe_lods(cfg: Config, *, force_vrt: bool = False) -> int: xmax, ymax, ) + _write_boundary_manifest( + cfg, + swe_cfg, + tile_rows, + force_vrt=force_vrt, + ) removed = cleanup_aux_files(_cleanup_patterns(cfg.raw.dgm1_dir)) print(f"[swe_lods] Summary: wrote {written} tiles; skipped {skipped}.") print(f"[swe_lods] Cleanup removed {removed} temporary files/sidecars.") - return 1 if skipped else 0 + if written == 0: + print("[swe_lods] Error: no tiles were written.") + return 1 + return 0 def export_swe_porosity(cfg: Config, *, force_vrt: bool = False) -> int: @@ -147,6 +163,7 @@ def export_swe_porosity(cfg: Config, *, force_vrt: bool = False) -> int: ensure_dir(swe_cfg.out_dir) ensure_parent(swe_cfg.tile_index_path) ensure_parent(swe_cfg.manifest_path) + ensure_parent(swe_cfg.boundary_manifest_path) porosity_source = swe_cfg.porosity_source building_source = swe_cfg.building_height_source @@ -161,6 +178,7 @@ def export_swe_porosity(cfg: Config, *, force_vrt: bool = False) -> int: bounds_ds = porosity_ds or building_ds xmin, ymin, xmax, ymax = _dataset_bounds(bounds_ds) + source_bounds = (xmin, ymin, xmax, ymax) base_tile_size = max(level.tile_size_m for level in swe_cfg.lods) origin_x = swe_cfg.origin_x @@ -187,6 +205,9 @@ def export_swe_porosity(cfg: Config, *, force_vrt: bool = False) -> int: ensure_dir(building_dir) for tile_id, bounds in _iter_tiles(origin_x, origin_y, xmin, ymin, xmax, ymax, level.tile_size_m): + if not _has_min_coverage(bounds, source_bounds, level.resolution, level.resolution): + skipped += 1 + continue porosity_path = os.path.join(porosity_dir, f"porosity_{tile_id}.exr") porosity = _build_porosity( porosity_ds, @@ -241,11 +262,20 @@ def export_swe_porosity(cfg: Config, *, force_vrt: bool = False) -> int: xmax, ymax, ) + _write_boundary_manifest( + cfg, + swe_cfg, + tile_rows, + force_vrt=force_vrt, + ) removed = cleanup_aux_files(_cleanup_patterns(cfg.raw.dgm1_dir)) print(f"[swe_porosity] Summary: wrote {written} tiles; skipped {skipped}.") print(f"[swe_porosity] Cleanup removed {removed} temporary files/sidecars.") - return 1 if skipped else 0 + if written == 0: + print("[swe_porosity] Error: no tiles were written.") + return 1 + return 0 def _uses_lpo(value: str | None) -> bool: @@ -416,6 +446,30 @@ def _dataset_bounds(ds) -> tuple[float, float, float, float]: return min(xmin, xmax), min(ymin, ymax), max(xmin, xmax), max(ymin, ymax) +def _has_min_coverage( + tile_bounds: tuple[float, float, float, float], + source_bounds: tuple[float, float, float, float], + out_w: int, + out_h: int, +) -> bool: + txmin, tymin, txmax, tymax = tile_bounds + sxmin, symin, sxmax, symax = source_bounds + + overlap_w = max(0.0, min(txmax, sxmax) - max(txmin, sxmin)) + overlap_h = max(0.0, min(tymax, symax) - max(tymin, symin)) + if overlap_w <= 0.0 or overlap_h <= 0.0: + return False + + tile_w = max(1e-6, txmax - txmin) + tile_h = max(1e-6, tymax - tymin) + covered_px_x = (overlap_w / tile_w) * float(out_w) + covered_px_y = (overlap_h / tile_h) * float(out_h) + + # Require at least one output pixel in each axis, otherwise warps tend to + # generate empty/zero tiles at non-aligned outer borders. + return covered_px_x >= 1.0 and covered_px_y >= 1.0 + + def _iter_tiles( origin_x: float, origin_y: float, @@ -680,6 +734,249 @@ def _open_optional_raster(path: str | None, label: str): return None +def _write_boundary_manifest( + cfg: Config, + swe_cfg: SweLodConfig, + tile_rows: list[dict], + *, + force_vrt: bool, +) -> None: + source_ds = _open_boundary_mask_dataset( + cfg, + mask_dir=swe_cfg.source_mask_dir, + kind="source", + force_vrt=force_vrt, + ) + sink_ds = _open_boundary_mask_dataset( + cfg, + mask_dir=swe_cfg.sink_mask_dir, + kind="sink", + force_vrt=force_vrt, + ) + + source_params = _load_boundary_params_toml(swe_cfg.source_params_toml, kind="sources") + sink_params = _load_boundary_params_toml(swe_cfg.sink_params_toml, kind="sinks") + + tiles_payload = [] + source_stats: dict[int, dict] = {} + sink_stats: dict[int, dict] = {} + + for row in tile_rows: + bounds = (float(row["xmin"]), float(row["ymin"]), float(row["xmax"]), float(row["ymax"])) + resolution = int(row["resolution"]) + lod = str(row["lod"]) + tile_x = int(row["tile_x"]) + tile_y = int(row["tile_y"]) + + source_ids = _sample_boundary_ids(source_ds, bounds, resolution) + sink_ids = _sample_boundary_ids(sink_ds, bounds, resolution) + + _accumulate_id_stats(source_stats, source_ids, lod) + _accumulate_id_stats(sink_stats, sink_ids, lod) + + tiles_payload.append( + { + "lod": lod, + "tile_x": tile_x, + "tile_y": tile_y, + "tile_size_m": float(row["tile_size_m"]), + "resolution": resolution, + "bounds": [bounds[0], bounds[1], bounds[2], bounds[3]], + "source_ids": source_ids, + "sink_ids": sink_ids, + } + ) + + payload = { + "schema_version": 1, + "id_encoding": "id = B + 256*G + 65536*R", + "source_mask_dir": swe_cfg.source_mask_dir, + "sink_mask_dir": swe_cfg.sink_mask_dir, + "source_params_toml": swe_cfg.source_params_toml, + "sink_params_toml": swe_cfg.sink_params_toml, + "sources": _merge_stats_with_params(source_stats, source_params), + "sinks": _merge_stats_with_params(sink_stats, sink_params), + "tiles": tiles_payload, + } + + with open(swe_cfg.boundary_manifest_path, "w", encoding="utf-8") as fh: + json.dump(payload, fh, indent=2) + print(f"[swe_lods] Wrote boundary manifest: {swe_cfg.boundary_manifest_path}") + + +def _open_boundary_mask_dataset( + cfg: Config, + *, + mask_dir: str, + kind: str, + force_vrt: bool, +): + if not mask_dir or not os.path.isdir(mask_dir): + print(f"[swe_lods] No {kind} mask directory: {mask_dir}") + return None + + sources = sorted( + [ + os.path.join(mask_dir, name) + for name in os.listdir(mask_dir) + if name.lower().endswith(".png") + ] + ) + if not sources: + print(f"[swe_lods] No {kind} mask PNGs found in {mask_dir}") + return None + + out_dir = os.path.join(cfg.work.work_dir, "swe_boundaries") + ensure_dir(out_dir) + vrt_path = os.path.join(out_dir, f"{kind}_ids.vrt") + build_vrt(vrt_path, sources, force=force_vrt or True) + return _open_optional_raster(vrt_path, f"{kind} IDs") + + +def _warp_id_array( + ds, + bounds: tuple[float, float, float, float], + width: int, + height: int, +) -> np.ndarray: + # Keep id=0 as regular background; do not reserve it as NODATA. + warp_opts = gdal.WarpOptions( + format="MEM", + outputBounds=bounds, + width=width, + height=height, + resampleAlg="near", + ) + warped = gdal.Warp("", ds, options=warp_opts) + if warped is None: + raise RuntimeError("GDAL Warp for ID mask returned None.") + + if warped.RasterCount >= 3: + r = np.rint(warped.GetRasterBand(1).ReadAsArray()).astype(np.uint32) + g = np.rint(warped.GetRasterBand(2).ReadAsArray()).astype(np.uint32) + b = np.rint(warped.GetRasterBand(3).ReadAsArray()).astype(np.uint32) + ids = b + (256 * g) + (65536 * r) + else: + ids = np.rint(warped.GetRasterBand(1).ReadAsArray()).astype(np.uint32) + return ids + + +def _sample_boundary_ids(ds, bounds: tuple[float, float, float, float], resolution: int) -> list[dict]: + if ds is None: + return [] + ids = _warp_id_array(ds, bounds, resolution, resolution) + u, c = np.unique(ids, return_counts=True) + out = [] + for ident, count in zip(u.tolist(), c.tolist()): + if ident == 0: + continue + out.append({"id": int(ident), "pixels": int(count)}) + return out + + +def _accumulate_id_stats(stats: dict[int, dict], ids: list[dict], lod: str) -> None: + for entry in ids: + ident = int(entry["id"]) + pixels = int(entry["pixels"]) + node = stats.setdefault( + ident, + { + "id": ident, + "tile_count": 0, + "total_pixels": 0, + "lod_pixels": {}, + }, + ) + node["tile_count"] += 1 + node["total_pixels"] += pixels + node["lod_pixels"][lod] = int(node["lod_pixels"].get(lod, 0) + pixels) + + +def _load_boundary_params_toml(path: str, *, kind: str) -> dict[int, dict]: + if not path or not os.path.exists(path): + return {} + + with open(path, "rb") as fh: + data = tomllib.load(fh) + + section = data.get(kind) + if section is None and kind.endswith("s"): + section = data.get(kind[:-1]) + if section is None: + return {} + + out: dict[int, dict] = {} + if isinstance(section, list): + for idx, item in enumerate(section): + if not isinstance(item, dict): + continue + ident = _parse_int_id(item.get("id")) + if ident is None or ident <= 0: + continue + payload = {k: v for k, v in item.items() if k != "id"} + _set_boundary_param(out, ident, payload, kind=kind, path=path, entry_name=f"list[{idx}]") + elif isinstance(section, dict): + for key, item in section.items(): + if not isinstance(item, dict): + continue + # Backward compatible: + # - numeric dict key style: [sources] 1 = {...} + # - named subtables style: [sources.source0] id = 1 + ident = _parse_int_id(key) + if ident is None: + ident = _parse_int_id(item.get("id")) + if ident is None or ident <= 0: + continue + payload = {k: v for k, v in item.items() if k != "id"} + _set_boundary_param(out, ident, payload, kind=kind, path=path, entry_name=str(key)) + return out + + +def _parse_int_id(value) -> int | None: + try: + return int(value) + except (TypeError, ValueError): + return None + + +def _set_boundary_param( + out: dict[int, dict], + ident: int, + payload: dict, + *, + kind: str, + path: str, + entry_name: str, +) -> None: + if ident in out: + print( + f"[swe_lods] Warning: duplicate {kind} params for id={ident} in {path} " + f"(entry '{entry_name}'); overriding previous." + ) + out[ident] = payload + + +def _merge_stats_with_params(stats: dict[int, dict], params: dict[int, dict]) -> list[dict]: + all_ids = sorted(set(stats.keys()) | set(params.keys())) + out = [] + for ident in all_ids: + node = { + "id": int(ident), + "tile_count": 0, + "total_pixels": 0, + "lod_pixels": {}, + "params": {}, + } + if ident in stats: + node["tile_count"] = int(stats[ident]["tile_count"]) + node["total_pixels"] = int(stats[ident]["total_pixels"]) + node["lod_pixels"] = dict(stats[ident]["lod_pixels"]) + if ident in params: + node["params"] = dict(params[ident]) + out.append(node) + return out + + def _cleanup_patterns(raw_dir: str) -> Iterable[str]: return [ os.path.join("work", "*_tmp.tif"), diff --git a/scripts/fix_masks.py b/scripts/fix_masks.py new file mode 100644 index 0000000..948986b --- /dev/null +++ b/scripts/fix_masks.py @@ -0,0 +1,73 @@ +import os +import shutil +from osgeo import gdal + +def fix_manual_masks(raw_dir, work_dir): + manual_files = [f for f in os.listdir(raw_dir) if f.endswith(".png")] + + for manual_file in manual_files: + manual_path = os.path.join(raw_dir, manual_file) + + parts = manual_file.replace("_viz.png", "").split("_") + if len(parts) >= 4: + tile_prefix = f"{parts[0]}_{parts[1]}_{parts[2]}_{parts[3]}" + work_file = f"{tile_prefix}_1_rp_mask.png" + work_path = os.path.join(work_dir, work_file) + + if not os.path.exists(work_path): + print(f"Skipping {manual_file}: Work file not found.") + continue + + print(f"Fixing {manual_file} using {work_file}...") + + ds_work = gdal.Open(work_path) + gt_work = ds_work.GetGeoTransform() + proj_work = ds_work.GetProjection() + target_w = ds_work.RasterXSize + target_h = ds_work.RasterYSize + + minx = gt_work[0] + maxy = gt_work[3] + maxx = minx + (gt_work[1] * target_w) + miny = maxy + (gt_work[5] * target_h) + + # Open Source as ReadOnly, then CreateCopy to MEM to edit + ds_raw = gdal.Open(manual_path) + mem_driver = gdal.GetDriverByName("MEM") + ds_manual = mem_driver.CreateCopy("", ds_raw) + ds_raw = None # Close file + + src_w = ds_manual.RasterXSize + src_h = ds_manual.RasterYSize + + src_res_x = (maxx - minx) / src_w + src_res_y = (miny - maxy) / src_h + + src_gt = (minx, src_res_x, 0, maxy, 0, src_res_y) + ds_manual.SetGeoTransform(src_gt) + ds_manual.SetProjection(proj_work) + + warp_options = gdal.WarpOptions( + format="PNG", + width=target_w, + height=target_h, + outputBounds=(minx, miny, maxx, maxy), + resampleAlg=gdal.GRA_NearestNeighbour + ) + + temp_path = manual_path + ".tmp.png" + ds_fixed = gdal.Warp(temp_path, ds_manual, options=warp_options) + ds_fixed = None + ds_manual = None + ds_work = None + + shutil.move(temp_path, manual_path) + + work_wld = work_path.replace(".png", ".wld") + if os.path.exists(work_wld): + shutil.copy(work_wld, manual_path.replace(".png", ".wld")) + + print(f" -> Fixed.") + +if __name__ == "__main__": + fix_manual_masks("raw/water_masks", "work/river_masks") diff --git a/scripts/mask_build_master.py b/scripts/mask_build_master.py new file mode 100644 index 0000000..587551c --- /dev/null +++ b/scripts/mask_build_master.py @@ -0,0 +1,380 @@ +#!/usr/bin/env python3 +"""Build global editable master masks from per-tile mask files. + +This script keeps 1025-tile semantics intact for river erosion while making +manual source/sink editing easier in one global image. +""" + +from __future__ import annotations + +import argparse +import json +import math +import os +import re +from dataclasses import dataclass +from pathlib import Path +from typing import Dict, Iterable, Optional + +import numpy as np +from PIL import Image + + +@dataclass +class TileRef: + key: str + water_path: Path + water_stem: str + width: int + height: int + px: float + py: float + minx: float + miny: float + maxx: float + maxy: float + source_path: Optional[Path] = None + sink_path: Optional[Path] = None + + +_TILE_KEY_RE = re.compile(r"^(dgm\d+_\d+_\d+_\d+)") + + +def parse_args() -> argparse.Namespace: + p = argparse.ArgumentParser(description="Build master water/source/sink masks from tiled inputs.") + p.add_argument("--water-dir", default="raw/water_masks", help="Directory with tile water masks.") + p.add_argument( + "--fallback-water-dir", + default="work/river_masks", + help="Fallback water mask directory used for tiles missing in --water-dir.", + ) + p.add_argument("--source-dir", default="raw/water_source_masks", help="Directory with tile source masks.") + p.add_argument("--sink-dir", default="raw/water_sink_masks", help="Directory with tile sink masks.") + p.add_argument("--out-dir", default="work/mask_master", help="Output directory for master masks.") + p.add_argument("--water-pattern", default="*_mask_viz.png", help="Glob for water mask files.") + p.add_argument("--fallback-water-pattern", default="*_mask.png", help="Glob for fallback water mask files.") + p.add_argument("--write-master-wld", action="store_true", help="Write worldfiles for master images.") + return p.parse_args() + + +def tile_key_from_name(name: str) -> Optional[str]: + stem = Path(name).stem + m = _TILE_KEY_RE.match(stem) + if not m: + return None + return m.group(1) + + +def read_worldfile(path: Path) -> tuple[float, float, float, float, float, float]: + vals = [] + for line in path.read_text(encoding="utf-8").splitlines(): + line = line.strip() + if line: + vals.append(float(line)) + if len(vals) != 6: + raise ValueError(f"Expected 6 values in worldfile {path}, got {len(vals)}.") + return vals[0], vals[1], vals[2], vals[3], vals[4], vals[5] + + +def infer_bounds(width: int, height: int, wld: tuple[float, float, float, float, float, float]) -> tuple[float, float, float, float]: + a, d, b, e, c, f = wld + if abs(b) > 1e-9 or abs(d) > 1e-9: + raise ValueError("Rotated worldfiles are not supported.") + minx = c - (a / 2.0) + maxy = f - (e / 2.0) + maxx = minx + (a * width) + miny = maxy + (e * height) + return minx, miny, maxx, maxy + + +def load_rgb(path: Path, width: int, height: int) -> np.ndarray: + img = Image.open(path).convert("RGB") + arr = np.array(img, dtype=np.uint8) + if arr.shape[0] == height and arr.shape[1] == width: + return arr + resized = img.resize((width, height), resample=Image.Resampling.NEAREST) + return np.array(resized, dtype=np.uint8) + + +def write_worldfile(path: Path, px: float, py: float, minx: float, maxy: float) -> None: + c = minx + (px / 2.0) + f = maxy + (py / 2.0) + text = "\n".join( + [ + f"{px:.12f}", + "0.0", + "0.0", + f"{py:.12f}", + f"{c:.12f}", + f"{f:.12f}", + ] + ) + path.write_text(text + "\n", encoding="utf-8") + + +def index_masks_by_tile(mask_dir: Path, suffix: str) -> Dict[str, Path]: + out: Dict[str, Path] = {} + if not mask_dir.exists(): + return out + for p in sorted(mask_dir.glob(f"*{suffix}")): + key = tile_key_from_name(p.name) + if key is None: + continue + if key in out: + print(f"[mask_build_master] Warning: duplicate {suffix} for {key}, keeping {out[key].name}, ignoring {p.name}") + continue + out[key] = p + return out + + +def collect_tiles(water_dir: Path, water_pattern: str, source_dir: Path, sink_dir: Path) -> list[TileRef]: + source_by_key = index_masks_by_tile(source_dir, ".png") + sink_by_key = index_masks_by_tile(sink_dir, ".png") + tiles: list[TileRef] = [] + + for p in sorted(water_dir.glob(water_pattern)): + if p.name.endswith(".tmp.png"): + continue + key = tile_key_from_name(p.name) + if key is None: + continue + + wld = p.with_suffix(".wld") + if not wld.exists(): + print(f"[mask_build_master] Skipping {p.name}: missing worldfile {wld.name}") + continue + + img = Image.open(p) + width, height = img.size + img.close() + + a, d, b, e, c, f = read_worldfile(wld) + if abs(b) > 1e-9 or abs(d) > 1e-9: + raise SystemExit(f"[mask_build_master] Rotated worldfile is not supported: {wld}") + if a <= 0 or e >= 0: + raise SystemExit(f"[mask_build_master] Unexpected worldfile pixel size signs in {wld}") + + minx, miny, maxx, maxy = infer_bounds(width, height, (a, d, b, e, c, f)) + tiles.append( + TileRef( + key=key, + water_path=p, + water_stem=p.stem, + width=width, + height=height, + px=a, + py=e, + minx=minx, + miny=miny, + maxx=maxx, + maxy=maxy, + source_path=source_by_key.get(key), + sink_path=sink_by_key.get(key), + ) + ) + return tiles + + +def collect_fallback_tiles( + fallback_dir: Path, + fallback_pattern: str, + existing_keys: set[str], + source_dir: Path, + sink_dir: Path, +) -> list[TileRef]: + source_by_key = index_masks_by_tile(source_dir, ".png") + sink_by_key = index_masks_by_tile(sink_dir, ".png") + tiles: list[TileRef] = [] + if not fallback_dir.exists(): + return tiles + + for p in sorted(fallback_dir.glob(fallback_pattern)): + key = tile_key_from_name(p.name) + if key is None or key in existing_keys: + continue + if p.name.endswith(".tmp.png"): + continue + + wld = p.with_suffix(".wld") + if not wld.exists(): + print(f"[mask_build_master] Skipping fallback {p.name}: missing worldfile {wld.name}") + continue + + img = Image.open(p) + width, height = img.size + img.close() + + a, d, b, e, c, f = read_worldfile(wld) + if abs(b) > 1e-9 or abs(d) > 1e-9: + raise SystemExit(f"[mask_build_master] Rotated worldfile is not supported: {wld}") + if a <= 0 or e >= 0: + raise SystemExit(f"[mask_build_master] Unexpected worldfile pixel size signs in {wld}") + + minx, miny, maxx, maxy = infer_bounds(width, height, (a, d, b, e, c, f)) + tiles.append( + TileRef( + key=key, + water_path=p, + water_stem=p.stem, + width=width, + height=height, + px=a, + py=e, + minx=minx, + miny=miny, + maxx=maxx, + maxy=maxy, + source_path=source_by_key.get(key), + sink_path=sink_by_key.get(key), + ) + ) + return tiles + + +def check_resolution_consistency(tiles: Iterable[TileRef]) -> tuple[float, float]: + pxs = [t.px for t in tiles] + pys = [t.py for t in tiles] + px = float(np.median(pxs)) + py = float(np.median(pys)) + for t in tiles: + if not math.isclose(t.px, px, rel_tol=0.0, abs_tol=1e-9): + raise SystemExit(f"[mask_build_master] Inconsistent px: {t.water_path} has {t.px}, expected {px}") + if not math.isclose(t.py, py, rel_tol=0.0, abs_tol=1e-9): + raise SystemExit(f"[mask_build_master] Inconsistent py: {t.water_path} has {t.py}, expected {py}") + return px, py + + +def merge_non_black(dst: np.ndarray, src: np.ndarray) -> None: + nz = np.any(src != 0, axis=2) + dst[nz] = src[nz] + + +def main() -> int: + args = parse_args() + + water_dir = Path(args.water_dir) + fallback_water_dir = Path(args.fallback_water_dir) + source_dir = Path(args.source_dir) + sink_dir = Path(args.sink_dir) + out_dir = Path(args.out_dir) + + if not water_dir.exists(): + raise SystemExit(f"[mask_build_master] water-dir not found: {water_dir}") + + primary_tiles = collect_tiles(water_dir, args.water_pattern, source_dir, sink_dir) + fallback_tiles = collect_fallback_tiles( + fallback_water_dir, + args.fallback_water_pattern, + {t.key for t in primary_tiles}, + source_dir, + sink_dir, + ) + tiles = primary_tiles + fallback_tiles + if not tiles: + raise SystemExit("[mask_build_master] No water tiles found.") + + px, py = check_resolution_consistency(tiles) + minx = min(t.minx for t in tiles) + miny = min(t.miny for t in tiles) + maxx = max(t.maxx for t in tiles) + maxy = max(t.maxy for t in tiles) + + master_w = int(round((maxx - minx) / px)) + master_h = int(round((maxy - miny) / abs(py))) + if master_w <= 0 or master_h <= 0: + raise SystemExit("[mask_build_master] Invalid master size.") + + water_master = np.zeros((master_h, master_w, 3), dtype=np.uint8) + source_master = np.zeros((master_h, master_w, 3), dtype=np.uint8) + sink_master = np.zeros((master_h, master_w, 3), dtype=np.uint8) + + tile_rows = [] + for t in sorted(tiles, key=lambda k: (k.key, k.minx, k.miny)): + xoff = int(round((t.minx - minx) / px)) + yoff = int(round((maxy - t.maxy) / abs(py))) + + if xoff < 0 or yoff < 0 or xoff + t.width > master_w or yoff + t.height > master_h: + raise SystemExit(f"[mask_build_master] Tile placement out of bounds for {t.water_path}") + + w_arr = load_rgb(t.water_path, t.width, t.height) + merge_non_black(water_master[yoff : yoff + t.height, xoff : xoff + t.width], w_arr) + + if t.source_path and t.source_path.exists(): + s_arr = load_rgb(t.source_path, t.width, t.height) + merge_non_black(source_master[yoff : yoff + t.height, xoff : xoff + t.width], s_arr) + + if t.sink_path and t.sink_path.exists(): + k_arr = load_rgb(t.sink_path, t.width, t.height) + merge_non_black(sink_master[yoff : yoff + t.height, xoff : xoff + t.width], k_arr) + + tile_rows.append( + { + "key": t.key, + "water_file": t.water_path.name, + "water_stem": t.water_stem, + "source_file": t.source_path.name if t.source_path else "", + "sink_file": t.sink_path.name if t.sink_path else "", + "xoff": xoff, + "yoff": yoff, + "width": t.width, + "height": t.height, + "minx": t.minx, + "miny": t.miny, + "maxx": t.maxx, + "maxy": t.maxy, + } + ) + + out_dir.mkdir(parents=True, exist_ok=True) + water_out = out_dir / "water_master.png" + source_out = out_dir / "source_master.png" + sink_out = out_dir / "sink_master.png" + Image.fromarray(water_master, mode="RGB").save(water_out) + Image.fromarray(source_master, mode="RGB").save(source_out) + Image.fromarray(sink_master, mode="RGB").save(sink_out) + + if args.write_master_wld: + write_worldfile(water_out.with_suffix(".wld"), px, py, minx, maxy) + write_worldfile(source_out.with_suffix(".wld"), px, py, minx, maxy) + write_worldfile(sink_out.with_suffix(".wld"), px, py, minx, maxy) + + meta = { + "schema_version": 1, + "master": { + "width": master_w, + "height": master_h, + "minx": minx, + "miny": miny, + "maxx": maxx, + "maxy": maxy, + "px": px, + "py": py, + "water_master": water_out.name, + "source_master": source_out.name, + "sink_master": sink_out.name, + }, + "inputs": { + "water_dir": str(water_dir), + "fallback_water_dir": str(fallback_water_dir), + "source_dir": str(source_dir), + "sink_dir": str(sink_dir), + "water_pattern": args.water_pattern, + "fallback_water_pattern": args.fallback_water_pattern, + }, + "tiles": tile_rows, + } + (out_dir / "master_meta.json").write_text(json.dumps(meta, indent=2), encoding="utf-8") + + print(f"[mask_build_master] Wrote {water_out}") + print(f"[mask_build_master] Wrote {source_out}") + print(f"[mask_build_master] Wrote {sink_out}") + print(f"[mask_build_master] Wrote {out_dir / 'master_meta.json'}") + print(f"[mask_build_master] Master size: {master_w}x{master_h}") + print(f"[mask_build_master] Tiles merged: {len(tile_rows)}") + print(f"[mask_build_master] from primary: {len(primary_tiles)}") + print(f"[mask_build_master] from fallback: {len(fallback_tiles)}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/mask_split_master.py b/scripts/mask_split_master.py new file mode 100644 index 0000000..87e9100 --- /dev/null +++ b/scripts/mask_split_master.py @@ -0,0 +1,247 @@ +#!/usr/bin/env python3 +"""Split global master masks back into tile masks. + +Optional filtering modes: +- keep only source/sink tiles that contain non-black pixels +- keep only water tiles that differ from a reference river mask directory +""" + +from __future__ import annotations + +import argparse +import json +import re +from pathlib import Path +from typing import Dict + +import numpy as np +from PIL import Image + +_TILE_KEY_RE = re.compile(r"^(dgm\d+_\d+_\d+_\d+)") +# Master masks can exceed PIL's safety threshold; these files are trusted local data. +Image.MAX_IMAGE_PIXELS = None + + +def parse_args() -> argparse.Namespace: + p = argparse.ArgumentParser(description="Split master masks into per-tile water/source/sink masks.") + p.add_argument("--master-dir", default="work/mask_master", help="Directory containing master_meta.json and master images.") + p.add_argument("--out-water-dir", default="raw/water_masks", help="Output directory for water tile masks.") + p.add_argument("--out-source-dir", default="raw/water_source_masks", help="Output directory for source tile masks.") + p.add_argument("--out-sink-dir", default="raw/water_sink_masks", help="Output directory for sink tile masks.") + p.add_argument( + "--ref-water-dir", + default="work/river_masks", + help="Reference water mask dir used to keep only changed water tiles.", + ) + p.add_argument( + "--keep-informative-only", + action="store_true", + help="Write only informative masks (non-black source/sink; water changed vs ref water dir).", + ) + p.add_argument( + "--prune-existing", + action="store_true", + help="Remove existing PNG/WLD outputs in out dirs before writing.", + ) + p.add_argument("--write-wld", action="store_true", help="Write world files for output masks.") + return p.parse_args() + + +def write_worldfile(path: Path, px: float, py: float, minx: float, maxy: float) -> None: + c = minx + (px / 2.0) + f = maxy + (py / 2.0) + text = "\n".join( + [ + f"{px:.12f}", + "0.0", + "0.0", + f"{py:.12f}", + f"{c:.12f}", + f"{f:.12f}", + ] + ) + path.write_text(text + "\n", encoding="utf-8") + + +def ensure_dirs(*dirs: Path) -> None: + for d in dirs: + d.mkdir(parents=True, exist_ok=True) + + +def tile_key_from_name(name: str) -> str: + stem = Path(name).stem + m = _TILE_KEY_RE.match(stem) + return m.group(1) if m else "" + + +def remove_existing_outputs(out_dir: Path) -> None: + if not out_dir.exists(): + return + for p in out_dir.glob("*.png"): + p.unlink(missing_ok=True) + for p in out_dir.glob("*.wld"): + p.unlink(missing_ok=True) + + +def has_non_black(arr: np.ndarray) -> bool: + return bool(np.any(arr != 0)) + + +def index_reference_masks(ref_dir: Path) -> Dict[str, Path]: + out: Dict[str, Path] = {} + if not ref_dir.exists(): + return out + for p in sorted(ref_dir.glob("*.png")): + key = tile_key_from_name(p.name) + if not key or key in out: + continue + out[key] = p + return out + + +def resize_nearest(arr: np.ndarray, width: int, height: int) -> np.ndarray: + if arr.shape[0] == height and arr.shape[1] == width: + return arr + img = Image.fromarray(arr, mode="RGB") + resized = img.resize((width, height), resample=Image.Resampling.NEAREST) + return np.array(resized, dtype=np.uint8) + + +def water_differs_from_reference(water_arr: np.ndarray, key: str, ref_index: Dict[str, Path]) -> bool: + ref_path = ref_index.get(key) + if ref_path is None: + # No baseline tile: treat as informative so we do not lose manual work. + return True + ref_arr = np.array(Image.open(ref_path).convert("RGB"), dtype=np.uint8) + ref_arr = resize_nearest(ref_arr, water_arr.shape[1], water_arr.shape[0]) + return not np.array_equal(water_arr, ref_arr) + + +def source_name_for_tile(tile: Dict[str, object]) -> str: + source_file = str(tile.get("source_file") or "").strip() + if source_file: + return source_file + stem = str(tile.get("water_stem") or "").strip() + if stem: + return f"{stem}_source_mask.png" + return f"{tile['key']}_source_mask.png" + + +def sink_name_for_tile(tile: Dict[str, object]) -> str: + sink_file = str(tile.get("sink_file") or "").strip() + if sink_file: + return sink_file + stem = str(tile.get("water_stem") or "").strip() + if stem: + return f"{stem}_sink_mask.png" + return f"{tile['key']}_sink_mask.png" + + +def crop_tile(master: np.ndarray, xoff: int, yoff: int, width: int, height: int) -> np.ndarray: + return master[yoff : yoff + height, xoff : xoff + width].copy() + + +def main() -> int: + args = parse_args() + master_dir = Path(args.master_dir) + meta_path = master_dir / "master_meta.json" + if not meta_path.exists(): + raise SystemExit(f"[mask_split_master] Missing meta: {meta_path}") + + meta = json.loads(meta_path.read_text(encoding="utf-8")) + m = meta["master"] + tiles = meta["tiles"] + px = float(m["px"]) + py = float(m["py"]) + + water_master = np.array(Image.open(master_dir / m["water_master"]).convert("RGB"), dtype=np.uint8) + source_master = np.array(Image.open(master_dir / m["source_master"]).convert("RGB"), dtype=np.uint8) + sink_master = np.array(Image.open(master_dir / m["sink_master"]).convert("RGB"), dtype=np.uint8) + + ensure_dirs(Path(args.out_water_dir), Path(args.out_source_dir), Path(args.out_sink_dir)) + out_water = Path(args.out_water_dir) + out_source = Path(args.out_source_dir) + out_sink = Path(args.out_sink_dir) + if args.prune_existing: + remove_existing_outputs(out_water) + remove_existing_outputs(out_source) + remove_existing_outputs(out_sink) + + ref_index = index_reference_masks(Path(args.ref_water_dir)) + + written_water = 0 + written_source = 0 + written_sink = 0 + skipped_water = 0 + skipped_source = 0 + skipped_sink = 0 + for tile in tiles: + xoff = int(tile["xoff"]) + yoff = int(tile["yoff"]) + width = int(tile["width"]) + height = int(tile["height"]) + + water_arr = crop_tile(water_master, xoff, yoff, width, height) + source_arr = crop_tile(source_master, xoff, yoff, width, height) + sink_arr = crop_tile(sink_master, xoff, yoff, width, height) + + water_name = str(tile.get("water_file") or f"{tile['key']}_mask_viz.png") + source_name = source_name_for_tile(tile) + sink_name = sink_name_for_tile(tile) + + water_path = out_water / water_name + source_path = out_source / source_name + sink_path = out_sink / sink_name + + write_water = True + write_source = True + write_sink = True + if args.keep_informative_only: + write_source = has_non_black(source_arr) + write_sink = has_non_black(sink_arr) + write_water = water_differs_from_reference(water_arr, str(tile["key"]), ref_index) + + if write_water: + Image.fromarray(water_arr, mode="RGB").save(water_path) + if args.write_wld: + minx = float(tile["minx"]) + maxy = float(tile["maxy"]) + write_worldfile(water_path.with_suffix(".wld"), px, py, minx, maxy) + written_water += 1 + else: + skipped_water += 1 + + if write_source: + Image.fromarray(source_arr, mode="RGB").save(source_path) + if args.write_wld: + minx = float(tile["minx"]) + maxy = float(tile["maxy"]) + write_worldfile(source_path.with_suffix(".wld"), px, py, minx, maxy) + written_source += 1 + else: + skipped_source += 1 + + if write_sink: + Image.fromarray(sink_arr, mode="RGB").save(sink_path) + if args.write_wld: + minx = float(tile["minx"]) + maxy = float(tile["maxy"]) + write_worldfile(sink_path.with_suffix(".wld"), px, py, minx, maxy) + written_sink += 1 + else: + skipped_sink += 1 + + total_tiles = len(tiles) + + print(f"[mask_split_master] Tiles processed: {total_tiles}") + print(f"[mask_split_master] Water written: {written_water}, skipped: {skipped_water}") + print(f"[mask_split_master] Source written: {written_source}, skipped: {skipped_source}") + print(f"[mask_split_master] Sink written: {written_sink}, skipped: {skipped_sink}") + print(f"[mask_split_master] Water dir: {out_water}") + print(f"[mask_split_master] Source dir: {out_source}") + print(f"[mask_split_master] Sink dir: {out_sink}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/mask_validate_ids.py b/scripts/mask_validate_ids.py new file mode 100644 index 0000000..12454ca --- /dev/null +++ b/scripts/mask_validate_ids.py @@ -0,0 +1,246 @@ +#!/usr/bin/env python3 +"""Validate source/sink ID mask integrity. + +ID encoding: + id = B + 256*G + 65536*R +""" + +from __future__ import annotations + +import argparse +import json +import re +from pathlib import Path +from typing import Dict, Optional + +import numpy as np +from PIL import Image + + +_TILE_KEY_RE = re.compile(r"^(dgm\d+_\d+_\d+_\d+)") + + +def parse_args() -> argparse.Namespace: + p = argparse.ArgumentParser(description="Validate source/sink masks and report ID usage.") + p.add_argument("--source-dir", default="raw/water_source_masks", help="Directory with source masks.") + p.add_argument("--sink-dir", default="raw/water_sink_masks", help="Directory with sink masks.") + p.add_argument("--allowed-source-ids", default="", help="Comma-separated nonzero IDs allowed in source masks.") + p.add_argument("--allowed-sink-ids", default="", help="Comma-separated nonzero IDs allowed in sink masks.") + p.add_argument("--max-id", type=int, default=0, help="If >0, fail when any ID exceeds this value.") + p.add_argument( + "--report", + default="work/mask_master/validation_report.json", + help="Output JSON report path.", + ) + p.add_argument("--fail-on-overlap", action="store_true", help="Fail if source and sink overlap on same tile pixels.") + return p.parse_args() + + +def parse_allowed_ids(text: str) -> Optional[set[int]]: + text = text.strip() + if not text: + return None + out: set[int] = set() + for part in text.split(","): + part = part.strip() + if not part: + continue + out.add(int(part)) + return out + + +def tile_key_from_name(name: str) -> Optional[str]: + stem = Path(name).stem + m = _TILE_KEY_RE.match(stem) + return m.group(1) if m else None + + +def decode_ids(rgb: np.ndarray) -> np.ndarray: + r = rgb[..., 0].astype(np.uint32) + g = rgb[..., 1].astype(np.uint32) + b = rgb[..., 2].astype(np.uint32) + return b + (256 * g) + (65536 * r) + + +def analyze_mask(path: Path) -> dict: + arr = np.array(Image.open(path).convert("RGB"), dtype=np.uint8) + ids = decode_ids(arr) + u, c = np.unique(ids, return_counts=True) + nonzero = [(int(i), int(n)) for i, n in zip(u.tolist(), c.tolist()) if i != 0] + return { + "path": str(path), + "shape": [int(arr.shape[1]), int(arr.shape[0])], + "unique_count": int(len(u)), + "max_id": int(u[-1]) if len(u) > 0 else 0, + "nonzero_count": int(np.count_nonzero(ids)), + "nonzero_ids": nonzero, + "ids_array": ids, + } + + +def collect_pngs(path: Path) -> list[Path]: + if not path.exists(): + return [] + return sorted([p for p in path.glob("*.png") if p.is_file()]) + + +def unexpected_ids(found: list[tuple[int, int]], allowed: Optional[set[int]]) -> list[int]: + if allowed is None: + return [] + return sorted([i for i, _ in found if i not in allowed]) + + +def build_overview(files: list[dict]) -> list[dict]: + totals: Dict[int, dict] = {} + for entry in files: + tile_key = str(entry.get("tile_key") or "").strip() + for ident, pixels in entry.get("nonzero_ids", []): + node = totals.setdefault( + int(ident), + { + "id": int(ident), + "total_pixels": 0, + "file_count": 0, + "tile_keys": set(), + }, + ) + node["total_pixels"] += int(pixels) + node["file_count"] += 1 + if tile_key: + node["tile_keys"].add(tile_key) + out = [] + for ident in sorted(totals.keys()): + node = totals[ident] + out.append( + { + "id": int(node["id"]), + "total_pixels": int(node["total_pixels"]), + "file_count": int(node["file_count"]), + "tile_keys": sorted(list(node["tile_keys"])), + } + ) + return out + + +def main() -> int: + args = parse_args() + allowed_source = parse_allowed_ids(args.allowed_source_ids) + allowed_sink = parse_allowed_ids(args.allowed_sink_ids) + + report = { + "schema_version": 1, + "config": { + "source_dir": args.source_dir, + "sink_dir": args.sink_dir, + "allowed_source_ids": sorted(list(allowed_source)) if allowed_source is not None else [], + "allowed_sink_ids": sorted(list(allowed_sink)) if allowed_sink is not None else [], + "max_id": args.max_id, + "fail_on_overlap": bool(args.fail_on_overlap), + }, + "source": [], + "source_overview": [], + "sink": [], + "sink_overview": [], + "overlaps": [], + "issues": [], + } + + failed = False + source_by_tile: Dict[str, np.ndarray] = {} + sink_by_tile: Dict[str, np.ndarray] = {} + + for p in collect_pngs(Path(args.source_dir)): + a = analyze_mask(p) + key = tile_key_from_name(p.name) + bad_ids = unexpected_ids(a["nonzero_ids"], allowed_source) + if bad_ids: + msg = f"[mask_validate_ids] Source {p.name} has unexpected IDs: {bad_ids}" + report["issues"].append(msg) + print(msg) + failed = True + if args.max_id > 0 and a["max_id"] > args.max_id: + msg = f"[mask_validate_ids] Source {p.name} max_id={a['max_id']} exceeds max-id={args.max_id}" + report["issues"].append(msg) + print(msg) + failed = True + report["source"].append( + { + "path": a["path"], + "tile_key": key or "", + "shape": a["shape"], + "unique_count": a["unique_count"], + "max_id": a["max_id"], + "nonzero_count": a["nonzero_count"], + "nonzero_ids": a["nonzero_ids"], + } + ) + if key: + source_by_tile[key] = a["ids_array"] + + for p in collect_pngs(Path(args.sink_dir)): + a = analyze_mask(p) + key = tile_key_from_name(p.name) + bad_ids = unexpected_ids(a["nonzero_ids"], allowed_sink) + if bad_ids: + msg = f"[mask_validate_ids] Sink {p.name} has unexpected IDs: {bad_ids}" + report["issues"].append(msg) + print(msg) + failed = True + if args.max_id > 0 and a["max_id"] > args.max_id: + msg = f"[mask_validate_ids] Sink {p.name} max_id={a['max_id']} exceeds max-id={args.max_id}" + report["issues"].append(msg) + print(msg) + failed = True + report["sink"].append( + { + "path": a["path"], + "tile_key": key or "", + "shape": a["shape"], + "unique_count": a["unique_count"], + "max_id": a["max_id"], + "nonzero_count": a["nonzero_count"], + "nonzero_ids": a["nonzero_ids"], + } + ) + if key: + sink_by_tile[key] = a["ids_array"] + + shared_tiles = sorted(set(source_by_tile.keys()) & set(sink_by_tile.keys())) + for key in shared_tiles: + s = source_by_tile[key] + k = sink_by_tile[key] + if s.shape != k.shape: + msg = f"[mask_validate_ids] Shape mismatch for tile {key}: source{s.shape} sink{k.shape}" + report["issues"].append(msg) + print(msg) + failed = True + continue + overlap = int(np.count_nonzero((s > 0) & (k > 0))) + if overlap > 0: + entry = {"tile_key": key, "overlap_pixels": overlap} + report["overlaps"].append(entry) + msg = f"[mask_validate_ids] Overlap on tile {key}: {overlap} pixels" + print(msg) + if args.fail_on_overlap: + report["issues"].append(msg) + failed = True + + report["source_overview"] = build_overview(report["source"]) + report["sink_overview"] = build_overview(report["sink"]) + + out_path = Path(args.report) + out_path.parent.mkdir(parents=True, exist_ok=True) + out_path.write_text(json.dumps(report, indent=2), encoding="utf-8") + print(f"[mask_validate_ids] Report written: {out_path}") + print(f"[mask_validate_ids] Source files: {len(report['source'])}, Sink files: {len(report['sink'])}") + print( + "[mask_validate_ids] Source IDs: " + f"{len(report['source_overview'])}, Sink IDs: {len(report['sink_overview'])}" + ) + print(f"[mask_validate_ids] Overlap records: {len(report['overlaps'])}") + + return 1 if failed else 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/ortho_build_master.py b/scripts/ortho_build_master.py new file mode 100644 index 0000000..bd4ed66 --- /dev/null +++ b/scripts/ortho_build_master.py @@ -0,0 +1,158 @@ +#!/usr/bin/env python3 +"""Build one global orthophoto master from tiled orthophoto JPGs.""" + +from __future__ import annotations + +import argparse +import json +from pathlib import Path + +from osgeo import gdal + + +def parse_args() -> argparse.Namespace: + p = argparse.ArgumentParser(description="Create a global ortho master mosaic from ortho tile JPGs.") + p.add_argument( + "--input-dir", + action="append", + default=[], + help="Input directory containing ortho JPG tiles. Can be specified multiple times.", + ) + p.add_argument("--pattern", default="*.jpg", help="Input filename pattern.") + p.add_argument("--out-dir", default="work/mask_master", help="Output directory.") + p.add_argument("--vrt-name", default="ortho_master.vrt", help="Output VRT name.") + p.add_argument("--tif-name", default="ortho_master.tif", help="Output GeoTIFF name.") + p.add_argument("--preview-name", default="ortho_master.jpg", help="Output preview JPG name.") + p.add_argument("--no-preview", action="store_true", help="Skip preview JPG generation.") + p.add_argument( + "--preview-max-size", + type=int, + default=8192, + help="Longest preview edge in pixels (aspect preserved).", + ) + p.add_argument( + "--compress", + default="LZW", + help="GeoTIFF compression (e.g., LZW, DEFLATE, JPEG, NONE).", + ) + p.add_argument("--resample", default="nearest", help="Resample algorithm for preview (nearest|bilinear|cubic...).") + return p.parse_args() + + +def collect_inputs(input_dirs: list[str], pattern: str) -> list[Path]: + dirs = [Path(d) for d in input_dirs] if input_dirs else [Path("export_unity/ortho_jpg")] + files: list[Path] = [] + for d in dirs: + if not d.exists(): + print(f"[ortho_build_master] Warning: input dir missing: {d}") + continue + for f in sorted(d.glob(pattern)): + if f.suffix.lower() != ".jpg": + continue + files.append(f) + return files + + +def preview_size(width: int, height: int, max_edge: int) -> tuple[int, int]: + if width <= 0 or height <= 0: + return width, height + edge = max(width, height) + if edge <= max_edge: + return width, height + scale = max_edge / float(edge) + return max(1, int(round(width * scale))), max(1, int(round(height * scale))) + + +def main() -> int: + args = parse_args() + gdal.UseExceptions() + + inputs = collect_inputs(args.input_dir, args.pattern) + if not inputs: + raise SystemExit("[ortho_build_master] No input ortho tiles found.") + + out_dir = Path(args.out_dir) + out_dir.mkdir(parents=True, exist_ok=True) + vrt_path = out_dir / args.vrt_name + tif_path = out_dir / args.tif_name + preview_path = out_dir / args.preview_name + meta_path = out_dir / "ortho_master_meta.json" + + print(f"[ortho_build_master] Input tiles: {len(inputs)}") + print(f"[ortho_build_master] Building VRT: {vrt_path}") + vrt = gdal.BuildVRT(str(vrt_path), [str(p) for p in inputs]) + if vrt is None: + raise SystemExit("[ortho_build_master] gdal.BuildVRT failed.") + + width = vrt.RasterXSize + height = vrt.RasterYSize + gt = vrt.GetGeoTransform(can_return_null=True) + proj = vrt.GetProjectionRef() + + print(f"[ortho_build_master] Translating GeoTIFF: {tif_path}") + tif_ds = gdal.Translate( + str(tif_path), + vrt, + options=gdal.TranslateOptions( + format="GTiff", + creationOptions=[ + "TILED=YES", + f"COMPRESS={args.compress}", + "BIGTIFF=IF_SAFER", + ], + ), + ) + if tif_ds is None: + raise SystemExit("[ortho_build_master] gdal.Translate to GeoTIFF failed.") + tif_ds = None + + if not args.no_preview: + out_w, out_h = preview_size(width, height, args.preview_max_size) + print(f"[ortho_build_master] Writing preview JPG: {preview_path} ({out_w}x{out_h})") + jpg_ds = gdal.Translate( + str(preview_path), + vrt, + options=gdal.TranslateOptions( + format="JPEG", + width=out_w, + height=out_h, + resampleAlg=args.resample, + creationOptions=["QUALITY=92"], + ), + ) + if jpg_ds is None: + raise SystemExit("[ortho_build_master] gdal.Translate to JPEG preview failed.") + jpg_ds = None + + vrt = None + + meta = { + "schema_version": 1, + "inputs": [str(p) for p in inputs], + "outputs": { + "vrt": str(vrt_path), + "tif": str(tif_path), + "preview": None if args.no_preview else str(preview_path), + }, + "raster": { + "width": width, + "height": height, + "geotransform": list(gt) if gt else None, + "projection": proj, + }, + "settings": { + "compress": args.compress, + "preview_max_size": args.preview_max_size, + "resample": args.resample, + "pattern": args.pattern, + "input_dirs": args.input_dir if args.input_dir else ["export_unity/ortho_jpg"], + }, + } + meta_path.write_text(json.dumps(meta, indent=2), encoding="utf-8") + print(f"[ortho_build_master] Wrote metadata: {meta_path}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) + diff --git a/tests/test_river_continuity.py b/tests/test_river_continuity.py new file mode 100644 index 0000000..d12fa9d --- /dev/null +++ b/tests/test_river_continuity.py @@ -0,0 +1,80 @@ +import unittest +import numpy as np +from osgeo import gdal, ogr, osr +from geodata_pipeline.river_erosion import _build_order_lines, _tile_geotransform +from geodata_pipeline.config import RiverErosionProfileConfig + +class TestRiverContinuity(unittest.TestCase): + def setUp(self): + # Create a simple river line crossing x=1000 + # River flows from (500, 500) to (1500, 500) + self.srs = osr.SpatialReference() + self.srs.ImportFromEPSG(25832) + + self.river_geom = ogr.Geometry(ogr.wkbLineString) + self.river_geom.AddPoint(500.0, 500.0) + self.river_geom.AddPoint(1500.0, 500.0) + + # Mock Layer + self.driver = ogr.GetDriverByName("Memory") + self.ds = self.driver.CreateDataSource("wrk") + self.layer = self.ds.CreateLayer("rivers", srs=self.srs) + + field_defn = ogr.FieldDefn("ORD_STRA", ogr.OFTInteger) + self.layer.CreateField(field_defn) + + feat = ogr.Feature(self.layer.GetLayerDefn()) + feat.SetGeometry(self.river_geom) + feat.SetField("ORD_STRA", 5) + self.layer.CreateFeature(feat) + + self.profile = RiverErosionProfileConfig() # Defaults + self.transform = osr.CoordinateTransformation(self.srs, self.srs) # Identity + + def test_continuity_at_boundary(self): + # Tile A: 0-1000. Mosaic (3x3): -1000 to 2000. + bounds_a = (-1000.0, -1000.0, 2000.0, 2000.0) + # Resolution 1m -> 3000 x 3000 pixels + w_a = 3001 # 3 tiles * 1000m + 1 + h_a = 3001 + + # Tile B: 1000-2000. Mosaic (3x3): 0 to 3000. + bounds_b = (0.0, -1000.0, 3000.0, 2000.0) + w_b = 3001 + h_b = 3001 + + # Rasterize A + res_a = _build_order_lines( + [(self.layer, "rivers")], + bounds_a, w_a, h_a, + self.profile, self.transform, self.transform, self.srs + ) + + # Rasterize B + res_b = _build_order_lines( + [(self.layer, "rivers")], + bounds_b, w_b, h_b, + self.profile, self.transform, self.transform, self.srs + ) + + # Compare at X=1000 + # For A: X=1000 is at index 2000 (0 -> 1000 is 1000px, -1000 start) + # origin_a = -1000. 1000 - (-1000) = 2000. + col_a = 2000 + + # For B: X=1000 is at index 1000 + # origin_b = 0. 1000 - 0 = 1000. + col_b = 1000 + + # Compare a vertical slice at the seam + slice_a = res_a[:, col_a] + slice_b = res_b[:, col_b] + + # They should be identical + diff = np.abs(slice_a - slice_b) + max_diff = np.max(diff) + + self.assertEqual(max_diff, 0.0, f"Seam mismatch found! Max diff: {max_diff}") + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_river_lakes.py b/tests/test_river_lakes.py new file mode 100644 index 0000000..6950e53 --- /dev/null +++ b/tests/test_river_lakes.py @@ -0,0 +1,70 @@ +import unittest +from unittest.mock import patch, MagicMock +from geodata_pipeline.river_erosion import _find_lake_depths +from geodata_pipeline.config import Config, LakeConfig + +class TestRiverLakes(unittest.TestCase): + @patch("geodata_pipeline.river_erosion.ogr") + def test_find_lake_depths(self, mock_ogr): + # Mock OGR DataSource and Layer + mock_ds = MagicMock() + mock_layer = MagicMock() + mock_ogr.Open.return_value = mock_ds + mock_ds.GetLayer.return_value = mock_layer + + # Mock Feature + mock_feat = MagicMock() + mock_feat.GetField.return_value = 15.5 # Depth_avg + mock_feat.GetFID.return_value = 123 + + # Mock Geometry + mock_geom = MagicMock() + mock_feat.GetGeometryRef.return_value = mock_geom + # When Distance is called on the feature geometry (transformed), return float + mock_transformed_geom = MagicMock() + mock_geom.Clone.return_value = mock_transformed_geom + mock_transformed_geom.Distance.return_value = 10.0 + + # Setup layer behavior + mock_layer.__iter__.return_value = [mock_feat] + mock_layer.GetNextFeature.side_effect = [mock_feat, None] + + # Config + cfg = Config.default() + cfg.river_erosion.lakes.enabled = True + cfg.river_erosion.lakes.match_tolerance_m = 100.0 + + # Manual lake centroid (shapely point or similar, but here we pass tuple/object depending on implementation) + # Let's assume we pass a list of (x, y) tuples for centroids + manual_lakes = [(333000.0, 5517000.0)] + + # Since _find_lake_depths expects OGR geometries for the manual lakes (or constructs them), + # let's assume we pass OGR geometries to it, or it creates them. + # Ideally, it should accept a list of OGR Geometries (Polygons from the mask). + + # For this test, I'll assume we pass a list of ogr.Geometry objects. + mock_manual_geom = MagicMock() + mock_manual_geom.Centroid.return_value = mock_manual_geom + mock_manual_geom.Clone.return_value = mock_manual_geom # For transform + mock_manual_geom.GetPoint.return_value = (333000.0, 5517000.0) + + mock_srs = MagicMock() + mock_srs.IsSame.return_value = True # Simplify: same CRS + + results = _find_lake_depths( + [mock_manual_geom], + mock_srs, + cfg.river_erosion.lakes, + "raw/hydrolakes/HydroLAKES_polys_v10_shp/HydroLAKES_polys_v10.shp" + ) + + # Expected result: List of (depth, metadata) + # depth should be 15.5 + self.assertEqual(len(results), 1) + depth, metadata = results[0] + self.assertEqual(depth, 15.5) + self.assertEqual(metadata["id"], 123) + self.assertEqual(metadata["distance"], 10.0) + +if __name__ == "__main__": + unittest.main()