Add SWE boundary mask pipeline and mask tooling

This commit is contained in:
2026-02-08 03:18:16 +01:00
parent 65e5232a85
commit c63f21cf81
11 changed files with 1583 additions and 4 deletions

View File

@@ -114,6 +114,11 @@ enabled = true
out_dir = "export_swe"
tile_index_path = "export_swe/swe_tile_index.csv"
manifest_path = "export_swe/swe_manifest.json"
boundary_manifest_path = "export_swe/swe_boundaries.json"
source_mask_dir = "raw/water_source_masks"
sink_mask_dir = "raw/water_sink_masks"
source_params_toml = "raw/water_source_masks/sources.toml"
sink_params_toml = "raw/water_sink_masks/sinks.toml"
height_source = "river_erosion"
height_source_dir = ""
height_source_manifest = ""
@@ -123,12 +128,12 @@ height_resample = "bilinear"
height_nodata = ""
porosity_source = "work/swe_lpo/porosity.vrt"
porosity_resample = "average"
porosity_nodata = 0.0
porosity_nodata = ""
solid_bias = 3.0
include_buildings = true
building_height_source = "work/swe_lpo/buildings.vrt"
building_resample = "average"
building_nodata = 0.0
building_nodata = ""
prefer_float16 = true
lpo_dir = ""
lpo_base_res = 1025

View File

@@ -146,6 +146,18 @@ priority = 2
url_template = "{base_url}/HydroRIVERS/HydroRIVERS_v10_eu_shp.zip"
notes = "HydroRIVERS Shapefile for erosion of rivers"
# HydroLAKES (for terrain erosion and lake masking)
[datasets.hydrolakes]
enabled = true
name = "HydroLAKES Polygons"
download = "hydrosheds_org"
format = "zip"
unzip = true
output_subdir = "hydrolakes_shp"
priority = 2
url_template = "{base_url}/hydrolakes/HydroLAKES_polys_v10_shp.zip"
notes = "HydroLAKES Shapefile for lake depth and masking"
[processing]
convert_to_mesh = true
generate_lods = true

View File

@@ -263,6 +263,11 @@ class SweLodConfig:
out_dir: str = "export_swe"
tile_index_path: str = "export_swe/swe_tile_index.csv"
manifest_path: str = "export_swe/swe_manifest.json"
boundary_manifest_path: str = "export_swe/swe_boundaries.json"
source_mask_dir: str = "raw/water_source_masks"
sink_mask_dir: str = "raw/water_sink_masks"
source_params_toml: str = "raw/water_source_masks/sources.toml"
sink_params_toml: str = "raw/water_sink_masks/sinks.toml"
height_source: str = "river_erosion"
height_source_dir: str = ""
height_source_manifest: str = ""
@@ -419,6 +424,12 @@ def _swe_lod_from_dict(data: Dict[str, Any]) -> SweLodConfig:
value = base.get(key)
if isinstance(value, str) and value.strip() == "":
base[key] = None
# In SWE, 0.0 is a valid value for porosity/building rasters. Treating it as
# nodata causes GDAL to remap real zeros and emit warning floods.
if base.get("porosity_nodata") == 0 or base.get("porosity_nodata") == 0.0:
base["porosity_nodata"] = None
if base.get("building_nodata") == 0 or base.get("building_nodata") == 0.0:
base["building_nodata"] = None
raw_lods = data.get("lods")
lods: list[SweLodLevelConfig] = []
if isinstance(raw_lods, list):

View File

@@ -3,6 +3,7 @@ from __future__ import annotations
import json
import math
import os
import tomllib
from dataclasses import asdict
from typing import Iterable
@@ -26,10 +27,12 @@ def export_swe_lods(cfg: Config, *, force_vrt: bool = False) -> int:
ensure_dir(swe_cfg.out_dir)
ensure_parent(swe_cfg.tile_index_path)
ensure_parent(swe_cfg.manifest_path)
ensure_parent(swe_cfg.boundary_manifest_path)
height_ds = _prepare_height_source(cfg, swe_cfg, force_vrt=force_vrt)
xmin, ymin, xmax, ymax = _dataset_bounds(height_ds)
source_bounds = (xmin, ymin, xmax, ymax)
base_tile_size = max(level.tile_size_m for level in swe_cfg.lods)
origin_x = swe_cfg.origin_x
@@ -59,6 +62,10 @@ def export_swe_lods(cfg: Config, *, force_vrt: bool = False) -> int:
ensure_dir(building_dir)
for tile_id, bounds in _iter_tiles(origin_x, origin_y, xmin, ymin, xmax, ymax, level.tile_size_m):
# Skip sliver tiles that intersect source bounds by less than one output pixel.
if not _has_min_coverage(bounds, source_bounds, level.resolution, level.resolution):
skipped += 1
continue
try:
height = _warp_array(
height_ds,
@@ -130,11 +137,20 @@ def export_swe_lods(cfg: Config, *, force_vrt: bool = False) -> int:
xmax,
ymax,
)
_write_boundary_manifest(
cfg,
swe_cfg,
tile_rows,
force_vrt=force_vrt,
)
removed = cleanup_aux_files(_cleanup_patterns(cfg.raw.dgm1_dir))
print(f"[swe_lods] Summary: wrote {written} tiles; skipped {skipped}.")
print(f"[swe_lods] Cleanup removed {removed} temporary files/sidecars.")
return 1 if skipped else 0
if written == 0:
print("[swe_lods] Error: no tiles were written.")
return 1
return 0
def export_swe_porosity(cfg: Config, *, force_vrt: bool = False) -> int:
@@ -147,6 +163,7 @@ def export_swe_porosity(cfg: Config, *, force_vrt: bool = False) -> int:
ensure_dir(swe_cfg.out_dir)
ensure_parent(swe_cfg.tile_index_path)
ensure_parent(swe_cfg.manifest_path)
ensure_parent(swe_cfg.boundary_manifest_path)
porosity_source = swe_cfg.porosity_source
building_source = swe_cfg.building_height_source
@@ -161,6 +178,7 @@ def export_swe_porosity(cfg: Config, *, force_vrt: bool = False) -> int:
bounds_ds = porosity_ds or building_ds
xmin, ymin, xmax, ymax = _dataset_bounds(bounds_ds)
source_bounds = (xmin, ymin, xmax, ymax)
base_tile_size = max(level.tile_size_m for level in swe_cfg.lods)
origin_x = swe_cfg.origin_x
@@ -187,6 +205,9 @@ def export_swe_porosity(cfg: Config, *, force_vrt: bool = False) -> int:
ensure_dir(building_dir)
for tile_id, bounds in _iter_tiles(origin_x, origin_y, xmin, ymin, xmax, ymax, level.tile_size_m):
if not _has_min_coverage(bounds, source_bounds, level.resolution, level.resolution):
skipped += 1
continue
porosity_path = os.path.join(porosity_dir, f"porosity_{tile_id}.exr")
porosity = _build_porosity(
porosity_ds,
@@ -241,11 +262,20 @@ def export_swe_porosity(cfg: Config, *, force_vrt: bool = False) -> int:
xmax,
ymax,
)
_write_boundary_manifest(
cfg,
swe_cfg,
tile_rows,
force_vrt=force_vrt,
)
removed = cleanup_aux_files(_cleanup_patterns(cfg.raw.dgm1_dir))
print(f"[swe_porosity] Summary: wrote {written} tiles; skipped {skipped}.")
print(f"[swe_porosity] Cleanup removed {removed} temporary files/sidecars.")
return 1 if skipped else 0
if written == 0:
print("[swe_porosity] Error: no tiles were written.")
return 1
return 0
def _uses_lpo(value: str | None) -> bool:
@@ -416,6 +446,30 @@ def _dataset_bounds(ds) -> tuple[float, float, float, float]:
return min(xmin, xmax), min(ymin, ymax), max(xmin, xmax), max(ymin, ymax)
def _has_min_coverage(
tile_bounds: tuple[float, float, float, float],
source_bounds: tuple[float, float, float, float],
out_w: int,
out_h: int,
) -> bool:
txmin, tymin, txmax, tymax = tile_bounds
sxmin, symin, sxmax, symax = source_bounds
overlap_w = max(0.0, min(txmax, sxmax) - max(txmin, sxmin))
overlap_h = max(0.0, min(tymax, symax) - max(tymin, symin))
if overlap_w <= 0.0 or overlap_h <= 0.0:
return False
tile_w = max(1e-6, txmax - txmin)
tile_h = max(1e-6, tymax - tymin)
covered_px_x = (overlap_w / tile_w) * float(out_w)
covered_px_y = (overlap_h / tile_h) * float(out_h)
# Require at least one output pixel in each axis, otherwise warps tend to
# generate empty/zero tiles at non-aligned outer borders.
return covered_px_x >= 1.0 and covered_px_y >= 1.0
def _iter_tiles(
origin_x: float,
origin_y: float,
@@ -680,6 +734,249 @@ def _open_optional_raster(path: str | None, label: str):
return None
def _write_boundary_manifest(
cfg: Config,
swe_cfg: SweLodConfig,
tile_rows: list[dict],
*,
force_vrt: bool,
) -> None:
source_ds = _open_boundary_mask_dataset(
cfg,
mask_dir=swe_cfg.source_mask_dir,
kind="source",
force_vrt=force_vrt,
)
sink_ds = _open_boundary_mask_dataset(
cfg,
mask_dir=swe_cfg.sink_mask_dir,
kind="sink",
force_vrt=force_vrt,
)
source_params = _load_boundary_params_toml(swe_cfg.source_params_toml, kind="sources")
sink_params = _load_boundary_params_toml(swe_cfg.sink_params_toml, kind="sinks")
tiles_payload = []
source_stats: dict[int, dict] = {}
sink_stats: dict[int, dict] = {}
for row in tile_rows:
bounds = (float(row["xmin"]), float(row["ymin"]), float(row["xmax"]), float(row["ymax"]))
resolution = int(row["resolution"])
lod = str(row["lod"])
tile_x = int(row["tile_x"])
tile_y = int(row["tile_y"])
source_ids = _sample_boundary_ids(source_ds, bounds, resolution)
sink_ids = _sample_boundary_ids(sink_ds, bounds, resolution)
_accumulate_id_stats(source_stats, source_ids, lod)
_accumulate_id_stats(sink_stats, sink_ids, lod)
tiles_payload.append(
{
"lod": lod,
"tile_x": tile_x,
"tile_y": tile_y,
"tile_size_m": float(row["tile_size_m"]),
"resolution": resolution,
"bounds": [bounds[0], bounds[1], bounds[2], bounds[3]],
"source_ids": source_ids,
"sink_ids": sink_ids,
}
)
payload = {
"schema_version": 1,
"id_encoding": "id = B + 256*G + 65536*R",
"source_mask_dir": swe_cfg.source_mask_dir,
"sink_mask_dir": swe_cfg.sink_mask_dir,
"source_params_toml": swe_cfg.source_params_toml,
"sink_params_toml": swe_cfg.sink_params_toml,
"sources": _merge_stats_with_params(source_stats, source_params),
"sinks": _merge_stats_with_params(sink_stats, sink_params),
"tiles": tiles_payload,
}
with open(swe_cfg.boundary_manifest_path, "w", encoding="utf-8") as fh:
json.dump(payload, fh, indent=2)
print(f"[swe_lods] Wrote boundary manifest: {swe_cfg.boundary_manifest_path}")
def _open_boundary_mask_dataset(
cfg: Config,
*,
mask_dir: str,
kind: str,
force_vrt: bool,
):
if not mask_dir or not os.path.isdir(mask_dir):
print(f"[swe_lods] No {kind} mask directory: {mask_dir}")
return None
sources = sorted(
[
os.path.join(mask_dir, name)
for name in os.listdir(mask_dir)
if name.lower().endswith(".png")
]
)
if not sources:
print(f"[swe_lods] No {kind} mask PNGs found in {mask_dir}")
return None
out_dir = os.path.join(cfg.work.work_dir, "swe_boundaries")
ensure_dir(out_dir)
vrt_path = os.path.join(out_dir, f"{kind}_ids.vrt")
build_vrt(vrt_path, sources, force=force_vrt or True)
return _open_optional_raster(vrt_path, f"{kind} IDs")
def _warp_id_array(
ds,
bounds: tuple[float, float, float, float],
width: int,
height: int,
) -> np.ndarray:
# Keep id=0 as regular background; do not reserve it as NODATA.
warp_opts = gdal.WarpOptions(
format="MEM",
outputBounds=bounds,
width=width,
height=height,
resampleAlg="near",
)
warped = gdal.Warp("", ds, options=warp_opts)
if warped is None:
raise RuntimeError("GDAL Warp for ID mask returned None.")
if warped.RasterCount >= 3:
r = np.rint(warped.GetRasterBand(1).ReadAsArray()).astype(np.uint32)
g = np.rint(warped.GetRasterBand(2).ReadAsArray()).astype(np.uint32)
b = np.rint(warped.GetRasterBand(3).ReadAsArray()).astype(np.uint32)
ids = b + (256 * g) + (65536 * r)
else:
ids = np.rint(warped.GetRasterBand(1).ReadAsArray()).astype(np.uint32)
return ids
def _sample_boundary_ids(ds, bounds: tuple[float, float, float, float], resolution: int) -> list[dict]:
if ds is None:
return []
ids = _warp_id_array(ds, bounds, resolution, resolution)
u, c = np.unique(ids, return_counts=True)
out = []
for ident, count in zip(u.tolist(), c.tolist()):
if ident == 0:
continue
out.append({"id": int(ident), "pixels": int(count)})
return out
def _accumulate_id_stats(stats: dict[int, dict], ids: list[dict], lod: str) -> None:
for entry in ids:
ident = int(entry["id"])
pixels = int(entry["pixels"])
node = stats.setdefault(
ident,
{
"id": ident,
"tile_count": 0,
"total_pixels": 0,
"lod_pixels": {},
},
)
node["tile_count"] += 1
node["total_pixels"] += pixels
node["lod_pixels"][lod] = int(node["lod_pixels"].get(lod, 0) + pixels)
def _load_boundary_params_toml(path: str, *, kind: str) -> dict[int, dict]:
if not path or not os.path.exists(path):
return {}
with open(path, "rb") as fh:
data = tomllib.load(fh)
section = data.get(kind)
if section is None and kind.endswith("s"):
section = data.get(kind[:-1])
if section is None:
return {}
out: dict[int, dict] = {}
if isinstance(section, list):
for idx, item in enumerate(section):
if not isinstance(item, dict):
continue
ident = _parse_int_id(item.get("id"))
if ident is None or ident <= 0:
continue
payload = {k: v for k, v in item.items() if k != "id"}
_set_boundary_param(out, ident, payload, kind=kind, path=path, entry_name=f"list[{idx}]")
elif isinstance(section, dict):
for key, item in section.items():
if not isinstance(item, dict):
continue
# Backward compatible:
# - numeric dict key style: [sources] 1 = {...}
# - named subtables style: [sources.source0] id = 1
ident = _parse_int_id(key)
if ident is None:
ident = _parse_int_id(item.get("id"))
if ident is None or ident <= 0:
continue
payload = {k: v for k, v in item.items() if k != "id"}
_set_boundary_param(out, ident, payload, kind=kind, path=path, entry_name=str(key))
return out
def _parse_int_id(value) -> int | None:
try:
return int(value)
except (TypeError, ValueError):
return None
def _set_boundary_param(
out: dict[int, dict],
ident: int,
payload: dict,
*,
kind: str,
path: str,
entry_name: str,
) -> None:
if ident in out:
print(
f"[swe_lods] Warning: duplicate {kind} params for id={ident} in {path} "
f"(entry '{entry_name}'); overriding previous."
)
out[ident] = payload
def _merge_stats_with_params(stats: dict[int, dict], params: dict[int, dict]) -> list[dict]:
all_ids = sorted(set(stats.keys()) | set(params.keys()))
out = []
for ident in all_ids:
node = {
"id": int(ident),
"tile_count": 0,
"total_pixels": 0,
"lod_pixels": {},
"params": {},
}
if ident in stats:
node["tile_count"] = int(stats[ident]["tile_count"])
node["total_pixels"] = int(stats[ident]["total_pixels"])
node["lod_pixels"] = dict(stats[ident]["lod_pixels"])
if ident in params:
node["params"] = dict(params[ident])
out.append(node)
return out
def _cleanup_patterns(raw_dir: str) -> Iterable[str]:
return [
os.path.join("work", "*_tmp.tif"),

73
scripts/fix_masks.py Normal file
View File

@@ -0,0 +1,73 @@
import os
import shutil
from osgeo import gdal
def fix_manual_masks(raw_dir, work_dir):
manual_files = [f for f in os.listdir(raw_dir) if f.endswith(".png")]
for manual_file in manual_files:
manual_path = os.path.join(raw_dir, manual_file)
parts = manual_file.replace("_viz.png", "").split("_")
if len(parts) >= 4:
tile_prefix = f"{parts[0]}_{parts[1]}_{parts[2]}_{parts[3]}"
work_file = f"{tile_prefix}_1_rp_mask.png"
work_path = os.path.join(work_dir, work_file)
if not os.path.exists(work_path):
print(f"Skipping {manual_file}: Work file not found.")
continue
print(f"Fixing {manual_file} using {work_file}...")
ds_work = gdal.Open(work_path)
gt_work = ds_work.GetGeoTransform()
proj_work = ds_work.GetProjection()
target_w = ds_work.RasterXSize
target_h = ds_work.RasterYSize
minx = gt_work[0]
maxy = gt_work[3]
maxx = minx + (gt_work[1] * target_w)
miny = maxy + (gt_work[5] * target_h)
# Open Source as ReadOnly, then CreateCopy to MEM to edit
ds_raw = gdal.Open(manual_path)
mem_driver = gdal.GetDriverByName("MEM")
ds_manual = mem_driver.CreateCopy("", ds_raw)
ds_raw = None # Close file
src_w = ds_manual.RasterXSize
src_h = ds_manual.RasterYSize
src_res_x = (maxx - minx) / src_w
src_res_y = (miny - maxy) / src_h
src_gt = (minx, src_res_x, 0, maxy, 0, src_res_y)
ds_manual.SetGeoTransform(src_gt)
ds_manual.SetProjection(proj_work)
warp_options = gdal.WarpOptions(
format="PNG",
width=target_w,
height=target_h,
outputBounds=(minx, miny, maxx, maxy),
resampleAlg=gdal.GRA_NearestNeighbour
)
temp_path = manual_path + ".tmp.png"
ds_fixed = gdal.Warp(temp_path, ds_manual, options=warp_options)
ds_fixed = None
ds_manual = None
ds_work = None
shutil.move(temp_path, manual_path)
work_wld = work_path.replace(".png", ".wld")
if os.path.exists(work_wld):
shutil.copy(work_wld, manual_path.replace(".png", ".wld"))
print(f" -> Fixed.")
if __name__ == "__main__":
fix_manual_masks("raw/water_masks", "work/river_masks")

View File

@@ -0,0 +1,380 @@
#!/usr/bin/env python3
"""Build global editable master masks from per-tile mask files.
This script keeps 1025-tile semantics intact for river erosion while making
manual source/sink editing easier in one global image.
"""
from __future__ import annotations
import argparse
import json
import math
import os
import re
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, Iterable, Optional
import numpy as np
from PIL import Image
@dataclass
class TileRef:
key: str
water_path: Path
water_stem: str
width: int
height: int
px: float
py: float
minx: float
miny: float
maxx: float
maxy: float
source_path: Optional[Path] = None
sink_path: Optional[Path] = None
_TILE_KEY_RE = re.compile(r"^(dgm\d+_\d+_\d+_\d+)")
def parse_args() -> argparse.Namespace:
p = argparse.ArgumentParser(description="Build master water/source/sink masks from tiled inputs.")
p.add_argument("--water-dir", default="raw/water_masks", help="Directory with tile water masks.")
p.add_argument(
"--fallback-water-dir",
default="work/river_masks",
help="Fallback water mask directory used for tiles missing in --water-dir.",
)
p.add_argument("--source-dir", default="raw/water_source_masks", help="Directory with tile source masks.")
p.add_argument("--sink-dir", default="raw/water_sink_masks", help="Directory with tile sink masks.")
p.add_argument("--out-dir", default="work/mask_master", help="Output directory for master masks.")
p.add_argument("--water-pattern", default="*_mask_viz.png", help="Glob for water mask files.")
p.add_argument("--fallback-water-pattern", default="*_mask.png", help="Glob for fallback water mask files.")
p.add_argument("--write-master-wld", action="store_true", help="Write worldfiles for master images.")
return p.parse_args()
def tile_key_from_name(name: str) -> Optional[str]:
stem = Path(name).stem
m = _TILE_KEY_RE.match(stem)
if not m:
return None
return m.group(1)
def read_worldfile(path: Path) -> tuple[float, float, float, float, float, float]:
vals = []
for line in path.read_text(encoding="utf-8").splitlines():
line = line.strip()
if line:
vals.append(float(line))
if len(vals) != 6:
raise ValueError(f"Expected 6 values in worldfile {path}, got {len(vals)}.")
return vals[0], vals[1], vals[2], vals[3], vals[4], vals[5]
def infer_bounds(width: int, height: int, wld: tuple[float, float, float, float, float, float]) -> tuple[float, float, float, float]:
a, d, b, e, c, f = wld
if abs(b) > 1e-9 or abs(d) > 1e-9:
raise ValueError("Rotated worldfiles are not supported.")
minx = c - (a / 2.0)
maxy = f - (e / 2.0)
maxx = minx + (a * width)
miny = maxy + (e * height)
return minx, miny, maxx, maxy
def load_rgb(path: Path, width: int, height: int) -> np.ndarray:
img = Image.open(path).convert("RGB")
arr = np.array(img, dtype=np.uint8)
if arr.shape[0] == height and arr.shape[1] == width:
return arr
resized = img.resize((width, height), resample=Image.Resampling.NEAREST)
return np.array(resized, dtype=np.uint8)
def write_worldfile(path: Path, px: float, py: float, minx: float, maxy: float) -> None:
c = minx + (px / 2.0)
f = maxy + (py / 2.0)
text = "\n".join(
[
f"{px:.12f}",
"0.0",
"0.0",
f"{py:.12f}",
f"{c:.12f}",
f"{f:.12f}",
]
)
path.write_text(text + "\n", encoding="utf-8")
def index_masks_by_tile(mask_dir: Path, suffix: str) -> Dict[str, Path]:
out: Dict[str, Path] = {}
if not mask_dir.exists():
return out
for p in sorted(mask_dir.glob(f"*{suffix}")):
key = tile_key_from_name(p.name)
if key is None:
continue
if key in out:
print(f"[mask_build_master] Warning: duplicate {suffix} for {key}, keeping {out[key].name}, ignoring {p.name}")
continue
out[key] = p
return out
def collect_tiles(water_dir: Path, water_pattern: str, source_dir: Path, sink_dir: Path) -> list[TileRef]:
source_by_key = index_masks_by_tile(source_dir, ".png")
sink_by_key = index_masks_by_tile(sink_dir, ".png")
tiles: list[TileRef] = []
for p in sorted(water_dir.glob(water_pattern)):
if p.name.endswith(".tmp.png"):
continue
key = tile_key_from_name(p.name)
if key is None:
continue
wld = p.with_suffix(".wld")
if not wld.exists():
print(f"[mask_build_master] Skipping {p.name}: missing worldfile {wld.name}")
continue
img = Image.open(p)
width, height = img.size
img.close()
a, d, b, e, c, f = read_worldfile(wld)
if abs(b) > 1e-9 or abs(d) > 1e-9:
raise SystemExit(f"[mask_build_master] Rotated worldfile is not supported: {wld}")
if a <= 0 or e >= 0:
raise SystemExit(f"[mask_build_master] Unexpected worldfile pixel size signs in {wld}")
minx, miny, maxx, maxy = infer_bounds(width, height, (a, d, b, e, c, f))
tiles.append(
TileRef(
key=key,
water_path=p,
water_stem=p.stem,
width=width,
height=height,
px=a,
py=e,
minx=minx,
miny=miny,
maxx=maxx,
maxy=maxy,
source_path=source_by_key.get(key),
sink_path=sink_by_key.get(key),
)
)
return tiles
def collect_fallback_tiles(
fallback_dir: Path,
fallback_pattern: str,
existing_keys: set[str],
source_dir: Path,
sink_dir: Path,
) -> list[TileRef]:
source_by_key = index_masks_by_tile(source_dir, ".png")
sink_by_key = index_masks_by_tile(sink_dir, ".png")
tiles: list[TileRef] = []
if not fallback_dir.exists():
return tiles
for p in sorted(fallback_dir.glob(fallback_pattern)):
key = tile_key_from_name(p.name)
if key is None or key in existing_keys:
continue
if p.name.endswith(".tmp.png"):
continue
wld = p.with_suffix(".wld")
if not wld.exists():
print(f"[mask_build_master] Skipping fallback {p.name}: missing worldfile {wld.name}")
continue
img = Image.open(p)
width, height = img.size
img.close()
a, d, b, e, c, f = read_worldfile(wld)
if abs(b) > 1e-9 or abs(d) > 1e-9:
raise SystemExit(f"[mask_build_master] Rotated worldfile is not supported: {wld}")
if a <= 0 or e >= 0:
raise SystemExit(f"[mask_build_master] Unexpected worldfile pixel size signs in {wld}")
minx, miny, maxx, maxy = infer_bounds(width, height, (a, d, b, e, c, f))
tiles.append(
TileRef(
key=key,
water_path=p,
water_stem=p.stem,
width=width,
height=height,
px=a,
py=e,
minx=minx,
miny=miny,
maxx=maxx,
maxy=maxy,
source_path=source_by_key.get(key),
sink_path=sink_by_key.get(key),
)
)
return tiles
def check_resolution_consistency(tiles: Iterable[TileRef]) -> tuple[float, float]:
pxs = [t.px for t in tiles]
pys = [t.py for t in tiles]
px = float(np.median(pxs))
py = float(np.median(pys))
for t in tiles:
if not math.isclose(t.px, px, rel_tol=0.0, abs_tol=1e-9):
raise SystemExit(f"[mask_build_master] Inconsistent px: {t.water_path} has {t.px}, expected {px}")
if not math.isclose(t.py, py, rel_tol=0.0, abs_tol=1e-9):
raise SystemExit(f"[mask_build_master] Inconsistent py: {t.water_path} has {t.py}, expected {py}")
return px, py
def merge_non_black(dst: np.ndarray, src: np.ndarray) -> None:
nz = np.any(src != 0, axis=2)
dst[nz] = src[nz]
def main() -> int:
args = parse_args()
water_dir = Path(args.water_dir)
fallback_water_dir = Path(args.fallback_water_dir)
source_dir = Path(args.source_dir)
sink_dir = Path(args.sink_dir)
out_dir = Path(args.out_dir)
if not water_dir.exists():
raise SystemExit(f"[mask_build_master] water-dir not found: {water_dir}")
primary_tiles = collect_tiles(water_dir, args.water_pattern, source_dir, sink_dir)
fallback_tiles = collect_fallback_tiles(
fallback_water_dir,
args.fallback_water_pattern,
{t.key for t in primary_tiles},
source_dir,
sink_dir,
)
tiles = primary_tiles + fallback_tiles
if not tiles:
raise SystemExit("[mask_build_master] No water tiles found.")
px, py = check_resolution_consistency(tiles)
minx = min(t.minx for t in tiles)
miny = min(t.miny for t in tiles)
maxx = max(t.maxx for t in tiles)
maxy = max(t.maxy for t in tiles)
master_w = int(round((maxx - minx) / px))
master_h = int(round((maxy - miny) / abs(py)))
if master_w <= 0 or master_h <= 0:
raise SystemExit("[mask_build_master] Invalid master size.")
water_master = np.zeros((master_h, master_w, 3), dtype=np.uint8)
source_master = np.zeros((master_h, master_w, 3), dtype=np.uint8)
sink_master = np.zeros((master_h, master_w, 3), dtype=np.uint8)
tile_rows = []
for t in sorted(tiles, key=lambda k: (k.key, k.minx, k.miny)):
xoff = int(round((t.minx - minx) / px))
yoff = int(round((maxy - t.maxy) / abs(py)))
if xoff < 0 or yoff < 0 or xoff + t.width > master_w or yoff + t.height > master_h:
raise SystemExit(f"[mask_build_master] Tile placement out of bounds for {t.water_path}")
w_arr = load_rgb(t.water_path, t.width, t.height)
merge_non_black(water_master[yoff : yoff + t.height, xoff : xoff + t.width], w_arr)
if t.source_path and t.source_path.exists():
s_arr = load_rgb(t.source_path, t.width, t.height)
merge_non_black(source_master[yoff : yoff + t.height, xoff : xoff + t.width], s_arr)
if t.sink_path and t.sink_path.exists():
k_arr = load_rgb(t.sink_path, t.width, t.height)
merge_non_black(sink_master[yoff : yoff + t.height, xoff : xoff + t.width], k_arr)
tile_rows.append(
{
"key": t.key,
"water_file": t.water_path.name,
"water_stem": t.water_stem,
"source_file": t.source_path.name if t.source_path else "",
"sink_file": t.sink_path.name if t.sink_path else "",
"xoff": xoff,
"yoff": yoff,
"width": t.width,
"height": t.height,
"minx": t.minx,
"miny": t.miny,
"maxx": t.maxx,
"maxy": t.maxy,
}
)
out_dir.mkdir(parents=True, exist_ok=True)
water_out = out_dir / "water_master.png"
source_out = out_dir / "source_master.png"
sink_out = out_dir / "sink_master.png"
Image.fromarray(water_master, mode="RGB").save(water_out)
Image.fromarray(source_master, mode="RGB").save(source_out)
Image.fromarray(sink_master, mode="RGB").save(sink_out)
if args.write_master_wld:
write_worldfile(water_out.with_suffix(".wld"), px, py, minx, maxy)
write_worldfile(source_out.with_suffix(".wld"), px, py, minx, maxy)
write_worldfile(sink_out.with_suffix(".wld"), px, py, minx, maxy)
meta = {
"schema_version": 1,
"master": {
"width": master_w,
"height": master_h,
"minx": minx,
"miny": miny,
"maxx": maxx,
"maxy": maxy,
"px": px,
"py": py,
"water_master": water_out.name,
"source_master": source_out.name,
"sink_master": sink_out.name,
},
"inputs": {
"water_dir": str(water_dir),
"fallback_water_dir": str(fallback_water_dir),
"source_dir": str(source_dir),
"sink_dir": str(sink_dir),
"water_pattern": args.water_pattern,
"fallback_water_pattern": args.fallback_water_pattern,
},
"tiles": tile_rows,
}
(out_dir / "master_meta.json").write_text(json.dumps(meta, indent=2), encoding="utf-8")
print(f"[mask_build_master] Wrote {water_out}")
print(f"[mask_build_master] Wrote {source_out}")
print(f"[mask_build_master] Wrote {sink_out}")
print(f"[mask_build_master] Wrote {out_dir / 'master_meta.json'}")
print(f"[mask_build_master] Master size: {master_w}x{master_h}")
print(f"[mask_build_master] Tiles merged: {len(tile_rows)}")
print(f"[mask_build_master] from primary: {len(primary_tiles)}")
print(f"[mask_build_master] from fallback: {len(fallback_tiles)}")
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -0,0 +1,247 @@
#!/usr/bin/env python3
"""Split global master masks back into tile masks.
Optional filtering modes:
- keep only source/sink tiles that contain non-black pixels
- keep only water tiles that differ from a reference river mask directory
"""
from __future__ import annotations
import argparse
import json
import re
from pathlib import Path
from typing import Dict
import numpy as np
from PIL import Image
_TILE_KEY_RE = re.compile(r"^(dgm\d+_\d+_\d+_\d+)")
# Master masks can exceed PIL's safety threshold; these files are trusted local data.
Image.MAX_IMAGE_PIXELS = None
def parse_args() -> argparse.Namespace:
p = argparse.ArgumentParser(description="Split master masks into per-tile water/source/sink masks.")
p.add_argument("--master-dir", default="work/mask_master", help="Directory containing master_meta.json and master images.")
p.add_argument("--out-water-dir", default="raw/water_masks", help="Output directory for water tile masks.")
p.add_argument("--out-source-dir", default="raw/water_source_masks", help="Output directory for source tile masks.")
p.add_argument("--out-sink-dir", default="raw/water_sink_masks", help="Output directory for sink tile masks.")
p.add_argument(
"--ref-water-dir",
default="work/river_masks",
help="Reference water mask dir used to keep only changed water tiles.",
)
p.add_argument(
"--keep-informative-only",
action="store_true",
help="Write only informative masks (non-black source/sink; water changed vs ref water dir).",
)
p.add_argument(
"--prune-existing",
action="store_true",
help="Remove existing PNG/WLD outputs in out dirs before writing.",
)
p.add_argument("--write-wld", action="store_true", help="Write world files for output masks.")
return p.parse_args()
def write_worldfile(path: Path, px: float, py: float, minx: float, maxy: float) -> None:
c = minx + (px / 2.0)
f = maxy + (py / 2.0)
text = "\n".join(
[
f"{px:.12f}",
"0.0",
"0.0",
f"{py:.12f}",
f"{c:.12f}",
f"{f:.12f}",
]
)
path.write_text(text + "\n", encoding="utf-8")
def ensure_dirs(*dirs: Path) -> None:
for d in dirs:
d.mkdir(parents=True, exist_ok=True)
def tile_key_from_name(name: str) -> str:
stem = Path(name).stem
m = _TILE_KEY_RE.match(stem)
return m.group(1) if m else ""
def remove_existing_outputs(out_dir: Path) -> None:
if not out_dir.exists():
return
for p in out_dir.glob("*.png"):
p.unlink(missing_ok=True)
for p in out_dir.glob("*.wld"):
p.unlink(missing_ok=True)
def has_non_black(arr: np.ndarray) -> bool:
return bool(np.any(arr != 0))
def index_reference_masks(ref_dir: Path) -> Dict[str, Path]:
out: Dict[str, Path] = {}
if not ref_dir.exists():
return out
for p in sorted(ref_dir.glob("*.png")):
key = tile_key_from_name(p.name)
if not key or key in out:
continue
out[key] = p
return out
def resize_nearest(arr: np.ndarray, width: int, height: int) -> np.ndarray:
if arr.shape[0] == height and arr.shape[1] == width:
return arr
img = Image.fromarray(arr, mode="RGB")
resized = img.resize((width, height), resample=Image.Resampling.NEAREST)
return np.array(resized, dtype=np.uint8)
def water_differs_from_reference(water_arr: np.ndarray, key: str, ref_index: Dict[str, Path]) -> bool:
ref_path = ref_index.get(key)
if ref_path is None:
# No baseline tile: treat as informative so we do not lose manual work.
return True
ref_arr = np.array(Image.open(ref_path).convert("RGB"), dtype=np.uint8)
ref_arr = resize_nearest(ref_arr, water_arr.shape[1], water_arr.shape[0])
return not np.array_equal(water_arr, ref_arr)
def source_name_for_tile(tile: Dict[str, object]) -> str:
source_file = str(tile.get("source_file") or "").strip()
if source_file:
return source_file
stem = str(tile.get("water_stem") or "").strip()
if stem:
return f"{stem}_source_mask.png"
return f"{tile['key']}_source_mask.png"
def sink_name_for_tile(tile: Dict[str, object]) -> str:
sink_file = str(tile.get("sink_file") or "").strip()
if sink_file:
return sink_file
stem = str(tile.get("water_stem") or "").strip()
if stem:
return f"{stem}_sink_mask.png"
return f"{tile['key']}_sink_mask.png"
def crop_tile(master: np.ndarray, xoff: int, yoff: int, width: int, height: int) -> np.ndarray:
return master[yoff : yoff + height, xoff : xoff + width].copy()
def main() -> int:
args = parse_args()
master_dir = Path(args.master_dir)
meta_path = master_dir / "master_meta.json"
if not meta_path.exists():
raise SystemExit(f"[mask_split_master] Missing meta: {meta_path}")
meta = json.loads(meta_path.read_text(encoding="utf-8"))
m = meta["master"]
tiles = meta["tiles"]
px = float(m["px"])
py = float(m["py"])
water_master = np.array(Image.open(master_dir / m["water_master"]).convert("RGB"), dtype=np.uint8)
source_master = np.array(Image.open(master_dir / m["source_master"]).convert("RGB"), dtype=np.uint8)
sink_master = np.array(Image.open(master_dir / m["sink_master"]).convert("RGB"), dtype=np.uint8)
ensure_dirs(Path(args.out_water_dir), Path(args.out_source_dir), Path(args.out_sink_dir))
out_water = Path(args.out_water_dir)
out_source = Path(args.out_source_dir)
out_sink = Path(args.out_sink_dir)
if args.prune_existing:
remove_existing_outputs(out_water)
remove_existing_outputs(out_source)
remove_existing_outputs(out_sink)
ref_index = index_reference_masks(Path(args.ref_water_dir))
written_water = 0
written_source = 0
written_sink = 0
skipped_water = 0
skipped_source = 0
skipped_sink = 0
for tile in tiles:
xoff = int(tile["xoff"])
yoff = int(tile["yoff"])
width = int(tile["width"])
height = int(tile["height"])
water_arr = crop_tile(water_master, xoff, yoff, width, height)
source_arr = crop_tile(source_master, xoff, yoff, width, height)
sink_arr = crop_tile(sink_master, xoff, yoff, width, height)
water_name = str(tile.get("water_file") or f"{tile['key']}_mask_viz.png")
source_name = source_name_for_tile(tile)
sink_name = sink_name_for_tile(tile)
water_path = out_water / water_name
source_path = out_source / source_name
sink_path = out_sink / sink_name
write_water = True
write_source = True
write_sink = True
if args.keep_informative_only:
write_source = has_non_black(source_arr)
write_sink = has_non_black(sink_arr)
write_water = water_differs_from_reference(water_arr, str(tile["key"]), ref_index)
if write_water:
Image.fromarray(water_arr, mode="RGB").save(water_path)
if args.write_wld:
minx = float(tile["minx"])
maxy = float(tile["maxy"])
write_worldfile(water_path.with_suffix(".wld"), px, py, minx, maxy)
written_water += 1
else:
skipped_water += 1
if write_source:
Image.fromarray(source_arr, mode="RGB").save(source_path)
if args.write_wld:
minx = float(tile["minx"])
maxy = float(tile["maxy"])
write_worldfile(source_path.with_suffix(".wld"), px, py, minx, maxy)
written_source += 1
else:
skipped_source += 1
if write_sink:
Image.fromarray(sink_arr, mode="RGB").save(sink_path)
if args.write_wld:
minx = float(tile["minx"])
maxy = float(tile["maxy"])
write_worldfile(sink_path.with_suffix(".wld"), px, py, minx, maxy)
written_sink += 1
else:
skipped_sink += 1
total_tiles = len(tiles)
print(f"[mask_split_master] Tiles processed: {total_tiles}")
print(f"[mask_split_master] Water written: {written_water}, skipped: {skipped_water}")
print(f"[mask_split_master] Source written: {written_source}, skipped: {skipped_source}")
print(f"[mask_split_master] Sink written: {written_sink}, skipped: {skipped_sink}")
print(f"[mask_split_master] Water dir: {out_water}")
print(f"[mask_split_master] Source dir: {out_source}")
print(f"[mask_split_master] Sink dir: {out_sink}")
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -0,0 +1,246 @@
#!/usr/bin/env python3
"""Validate source/sink ID mask integrity.
ID encoding:
id = B + 256*G + 65536*R
"""
from __future__ import annotations
import argparse
import json
import re
from pathlib import Path
from typing import Dict, Optional
import numpy as np
from PIL import Image
_TILE_KEY_RE = re.compile(r"^(dgm\d+_\d+_\d+_\d+)")
def parse_args() -> argparse.Namespace:
p = argparse.ArgumentParser(description="Validate source/sink masks and report ID usage.")
p.add_argument("--source-dir", default="raw/water_source_masks", help="Directory with source masks.")
p.add_argument("--sink-dir", default="raw/water_sink_masks", help="Directory with sink masks.")
p.add_argument("--allowed-source-ids", default="", help="Comma-separated nonzero IDs allowed in source masks.")
p.add_argument("--allowed-sink-ids", default="", help="Comma-separated nonzero IDs allowed in sink masks.")
p.add_argument("--max-id", type=int, default=0, help="If >0, fail when any ID exceeds this value.")
p.add_argument(
"--report",
default="work/mask_master/validation_report.json",
help="Output JSON report path.",
)
p.add_argument("--fail-on-overlap", action="store_true", help="Fail if source and sink overlap on same tile pixels.")
return p.parse_args()
def parse_allowed_ids(text: str) -> Optional[set[int]]:
text = text.strip()
if not text:
return None
out: set[int] = set()
for part in text.split(","):
part = part.strip()
if not part:
continue
out.add(int(part))
return out
def tile_key_from_name(name: str) -> Optional[str]:
stem = Path(name).stem
m = _TILE_KEY_RE.match(stem)
return m.group(1) if m else None
def decode_ids(rgb: np.ndarray) -> np.ndarray:
r = rgb[..., 0].astype(np.uint32)
g = rgb[..., 1].astype(np.uint32)
b = rgb[..., 2].astype(np.uint32)
return b + (256 * g) + (65536 * r)
def analyze_mask(path: Path) -> dict:
arr = np.array(Image.open(path).convert("RGB"), dtype=np.uint8)
ids = decode_ids(arr)
u, c = np.unique(ids, return_counts=True)
nonzero = [(int(i), int(n)) for i, n in zip(u.tolist(), c.tolist()) if i != 0]
return {
"path": str(path),
"shape": [int(arr.shape[1]), int(arr.shape[0])],
"unique_count": int(len(u)),
"max_id": int(u[-1]) if len(u) > 0 else 0,
"nonzero_count": int(np.count_nonzero(ids)),
"nonzero_ids": nonzero,
"ids_array": ids,
}
def collect_pngs(path: Path) -> list[Path]:
if not path.exists():
return []
return sorted([p for p in path.glob("*.png") if p.is_file()])
def unexpected_ids(found: list[tuple[int, int]], allowed: Optional[set[int]]) -> list[int]:
if allowed is None:
return []
return sorted([i for i, _ in found if i not in allowed])
def build_overview(files: list[dict]) -> list[dict]:
totals: Dict[int, dict] = {}
for entry in files:
tile_key = str(entry.get("tile_key") or "").strip()
for ident, pixels in entry.get("nonzero_ids", []):
node = totals.setdefault(
int(ident),
{
"id": int(ident),
"total_pixels": 0,
"file_count": 0,
"tile_keys": set(),
},
)
node["total_pixels"] += int(pixels)
node["file_count"] += 1
if tile_key:
node["tile_keys"].add(tile_key)
out = []
for ident in sorted(totals.keys()):
node = totals[ident]
out.append(
{
"id": int(node["id"]),
"total_pixels": int(node["total_pixels"]),
"file_count": int(node["file_count"]),
"tile_keys": sorted(list(node["tile_keys"])),
}
)
return out
def main() -> int:
args = parse_args()
allowed_source = parse_allowed_ids(args.allowed_source_ids)
allowed_sink = parse_allowed_ids(args.allowed_sink_ids)
report = {
"schema_version": 1,
"config": {
"source_dir": args.source_dir,
"sink_dir": args.sink_dir,
"allowed_source_ids": sorted(list(allowed_source)) if allowed_source is not None else [],
"allowed_sink_ids": sorted(list(allowed_sink)) if allowed_sink is not None else [],
"max_id": args.max_id,
"fail_on_overlap": bool(args.fail_on_overlap),
},
"source": [],
"source_overview": [],
"sink": [],
"sink_overview": [],
"overlaps": [],
"issues": [],
}
failed = False
source_by_tile: Dict[str, np.ndarray] = {}
sink_by_tile: Dict[str, np.ndarray] = {}
for p in collect_pngs(Path(args.source_dir)):
a = analyze_mask(p)
key = tile_key_from_name(p.name)
bad_ids = unexpected_ids(a["nonzero_ids"], allowed_source)
if bad_ids:
msg = f"[mask_validate_ids] Source {p.name} has unexpected IDs: {bad_ids}"
report["issues"].append(msg)
print(msg)
failed = True
if args.max_id > 0 and a["max_id"] > args.max_id:
msg = f"[mask_validate_ids] Source {p.name} max_id={a['max_id']} exceeds max-id={args.max_id}"
report["issues"].append(msg)
print(msg)
failed = True
report["source"].append(
{
"path": a["path"],
"tile_key": key or "",
"shape": a["shape"],
"unique_count": a["unique_count"],
"max_id": a["max_id"],
"nonzero_count": a["nonzero_count"],
"nonzero_ids": a["nonzero_ids"],
}
)
if key:
source_by_tile[key] = a["ids_array"]
for p in collect_pngs(Path(args.sink_dir)):
a = analyze_mask(p)
key = tile_key_from_name(p.name)
bad_ids = unexpected_ids(a["nonzero_ids"], allowed_sink)
if bad_ids:
msg = f"[mask_validate_ids] Sink {p.name} has unexpected IDs: {bad_ids}"
report["issues"].append(msg)
print(msg)
failed = True
if args.max_id > 0 and a["max_id"] > args.max_id:
msg = f"[mask_validate_ids] Sink {p.name} max_id={a['max_id']} exceeds max-id={args.max_id}"
report["issues"].append(msg)
print(msg)
failed = True
report["sink"].append(
{
"path": a["path"],
"tile_key": key or "",
"shape": a["shape"],
"unique_count": a["unique_count"],
"max_id": a["max_id"],
"nonzero_count": a["nonzero_count"],
"nonzero_ids": a["nonzero_ids"],
}
)
if key:
sink_by_tile[key] = a["ids_array"]
shared_tiles = sorted(set(source_by_tile.keys()) & set(sink_by_tile.keys()))
for key in shared_tiles:
s = source_by_tile[key]
k = sink_by_tile[key]
if s.shape != k.shape:
msg = f"[mask_validate_ids] Shape mismatch for tile {key}: source{s.shape} sink{k.shape}"
report["issues"].append(msg)
print(msg)
failed = True
continue
overlap = int(np.count_nonzero((s > 0) & (k > 0)))
if overlap > 0:
entry = {"tile_key": key, "overlap_pixels": overlap}
report["overlaps"].append(entry)
msg = f"[mask_validate_ids] Overlap on tile {key}: {overlap} pixels"
print(msg)
if args.fail_on_overlap:
report["issues"].append(msg)
failed = True
report["source_overview"] = build_overview(report["source"])
report["sink_overview"] = build_overview(report["sink"])
out_path = Path(args.report)
out_path.parent.mkdir(parents=True, exist_ok=True)
out_path.write_text(json.dumps(report, indent=2), encoding="utf-8")
print(f"[mask_validate_ids] Report written: {out_path}")
print(f"[mask_validate_ids] Source files: {len(report['source'])}, Sink files: {len(report['sink'])}")
print(
"[mask_validate_ids] Source IDs: "
f"{len(report['source_overview'])}, Sink IDs: {len(report['sink_overview'])}"
)
print(f"[mask_validate_ids] Overlap records: {len(report['overlaps'])}")
return 1 if failed else 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -0,0 +1,158 @@
#!/usr/bin/env python3
"""Build one global orthophoto master from tiled orthophoto JPGs."""
from __future__ import annotations
import argparse
import json
from pathlib import Path
from osgeo import gdal
def parse_args() -> argparse.Namespace:
p = argparse.ArgumentParser(description="Create a global ortho master mosaic from ortho tile JPGs.")
p.add_argument(
"--input-dir",
action="append",
default=[],
help="Input directory containing ortho JPG tiles. Can be specified multiple times.",
)
p.add_argument("--pattern", default="*.jpg", help="Input filename pattern.")
p.add_argument("--out-dir", default="work/mask_master", help="Output directory.")
p.add_argument("--vrt-name", default="ortho_master.vrt", help="Output VRT name.")
p.add_argument("--tif-name", default="ortho_master.tif", help="Output GeoTIFF name.")
p.add_argument("--preview-name", default="ortho_master.jpg", help="Output preview JPG name.")
p.add_argument("--no-preview", action="store_true", help="Skip preview JPG generation.")
p.add_argument(
"--preview-max-size",
type=int,
default=8192,
help="Longest preview edge in pixels (aspect preserved).",
)
p.add_argument(
"--compress",
default="LZW",
help="GeoTIFF compression (e.g., LZW, DEFLATE, JPEG, NONE).",
)
p.add_argument("--resample", default="nearest", help="Resample algorithm for preview (nearest|bilinear|cubic...).")
return p.parse_args()
def collect_inputs(input_dirs: list[str], pattern: str) -> list[Path]:
dirs = [Path(d) for d in input_dirs] if input_dirs else [Path("export_unity/ortho_jpg")]
files: list[Path] = []
for d in dirs:
if not d.exists():
print(f"[ortho_build_master] Warning: input dir missing: {d}")
continue
for f in sorted(d.glob(pattern)):
if f.suffix.lower() != ".jpg":
continue
files.append(f)
return files
def preview_size(width: int, height: int, max_edge: int) -> tuple[int, int]:
if width <= 0 or height <= 0:
return width, height
edge = max(width, height)
if edge <= max_edge:
return width, height
scale = max_edge / float(edge)
return max(1, int(round(width * scale))), max(1, int(round(height * scale)))
def main() -> int:
args = parse_args()
gdal.UseExceptions()
inputs = collect_inputs(args.input_dir, args.pattern)
if not inputs:
raise SystemExit("[ortho_build_master] No input ortho tiles found.")
out_dir = Path(args.out_dir)
out_dir.mkdir(parents=True, exist_ok=True)
vrt_path = out_dir / args.vrt_name
tif_path = out_dir / args.tif_name
preview_path = out_dir / args.preview_name
meta_path = out_dir / "ortho_master_meta.json"
print(f"[ortho_build_master] Input tiles: {len(inputs)}")
print(f"[ortho_build_master] Building VRT: {vrt_path}")
vrt = gdal.BuildVRT(str(vrt_path), [str(p) for p in inputs])
if vrt is None:
raise SystemExit("[ortho_build_master] gdal.BuildVRT failed.")
width = vrt.RasterXSize
height = vrt.RasterYSize
gt = vrt.GetGeoTransform(can_return_null=True)
proj = vrt.GetProjectionRef()
print(f"[ortho_build_master] Translating GeoTIFF: {tif_path}")
tif_ds = gdal.Translate(
str(tif_path),
vrt,
options=gdal.TranslateOptions(
format="GTiff",
creationOptions=[
"TILED=YES",
f"COMPRESS={args.compress}",
"BIGTIFF=IF_SAFER",
],
),
)
if tif_ds is None:
raise SystemExit("[ortho_build_master] gdal.Translate to GeoTIFF failed.")
tif_ds = None
if not args.no_preview:
out_w, out_h = preview_size(width, height, args.preview_max_size)
print(f"[ortho_build_master] Writing preview JPG: {preview_path} ({out_w}x{out_h})")
jpg_ds = gdal.Translate(
str(preview_path),
vrt,
options=gdal.TranslateOptions(
format="JPEG",
width=out_w,
height=out_h,
resampleAlg=args.resample,
creationOptions=["QUALITY=92"],
),
)
if jpg_ds is None:
raise SystemExit("[ortho_build_master] gdal.Translate to JPEG preview failed.")
jpg_ds = None
vrt = None
meta = {
"schema_version": 1,
"inputs": [str(p) for p in inputs],
"outputs": {
"vrt": str(vrt_path),
"tif": str(tif_path),
"preview": None if args.no_preview else str(preview_path),
},
"raster": {
"width": width,
"height": height,
"geotransform": list(gt) if gt else None,
"projection": proj,
},
"settings": {
"compress": args.compress,
"preview_max_size": args.preview_max_size,
"resample": args.resample,
"pattern": args.pattern,
"input_dirs": args.input_dir if args.input_dir else ["export_unity/ortho_jpg"],
},
}
meta_path.write_text(json.dumps(meta, indent=2), encoding="utf-8")
print(f"[ortho_build_master] Wrote metadata: {meta_path}")
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -0,0 +1,80 @@
import unittest
import numpy as np
from osgeo import gdal, ogr, osr
from geodata_pipeline.river_erosion import _build_order_lines, _tile_geotransform
from geodata_pipeline.config import RiverErosionProfileConfig
class TestRiverContinuity(unittest.TestCase):
def setUp(self):
# Create a simple river line crossing x=1000
# River flows from (500, 500) to (1500, 500)
self.srs = osr.SpatialReference()
self.srs.ImportFromEPSG(25832)
self.river_geom = ogr.Geometry(ogr.wkbLineString)
self.river_geom.AddPoint(500.0, 500.0)
self.river_geom.AddPoint(1500.0, 500.0)
# Mock Layer
self.driver = ogr.GetDriverByName("Memory")
self.ds = self.driver.CreateDataSource("wrk")
self.layer = self.ds.CreateLayer("rivers", srs=self.srs)
field_defn = ogr.FieldDefn("ORD_STRA", ogr.OFTInteger)
self.layer.CreateField(field_defn)
feat = ogr.Feature(self.layer.GetLayerDefn())
feat.SetGeometry(self.river_geom)
feat.SetField("ORD_STRA", 5)
self.layer.CreateFeature(feat)
self.profile = RiverErosionProfileConfig() # Defaults
self.transform = osr.CoordinateTransformation(self.srs, self.srs) # Identity
def test_continuity_at_boundary(self):
# Tile A: 0-1000. Mosaic (3x3): -1000 to 2000.
bounds_a = (-1000.0, -1000.0, 2000.0, 2000.0)
# Resolution 1m -> 3000 x 3000 pixels
w_a = 3001 # 3 tiles * 1000m + 1
h_a = 3001
# Tile B: 1000-2000. Mosaic (3x3): 0 to 3000.
bounds_b = (0.0, -1000.0, 3000.0, 2000.0)
w_b = 3001
h_b = 3001
# Rasterize A
res_a = _build_order_lines(
[(self.layer, "rivers")],
bounds_a, w_a, h_a,
self.profile, self.transform, self.transform, self.srs
)
# Rasterize B
res_b = _build_order_lines(
[(self.layer, "rivers")],
bounds_b, w_b, h_b,
self.profile, self.transform, self.transform, self.srs
)
# Compare at X=1000
# For A: X=1000 is at index 2000 (0 -> 1000 is 1000px, -1000 start)
# origin_a = -1000. 1000 - (-1000) = 2000.
col_a = 2000
# For B: X=1000 is at index 1000
# origin_b = 0. 1000 - 0 = 1000.
col_b = 1000
# Compare a vertical slice at the seam
slice_a = res_a[:, col_a]
slice_b = res_b[:, col_b]
# They should be identical
diff = np.abs(slice_a - slice_b)
max_diff = np.max(diff)
self.assertEqual(max_diff, 0.0, f"Seam mismatch found! Max diff: {max_diff}")
if __name__ == "__main__":
unittest.main()

70
tests/test_river_lakes.py Normal file
View File

@@ -0,0 +1,70 @@
import unittest
from unittest.mock import patch, MagicMock
from geodata_pipeline.river_erosion import _find_lake_depths
from geodata_pipeline.config import Config, LakeConfig
class TestRiverLakes(unittest.TestCase):
@patch("geodata_pipeline.river_erosion.ogr")
def test_find_lake_depths(self, mock_ogr):
# Mock OGR DataSource and Layer
mock_ds = MagicMock()
mock_layer = MagicMock()
mock_ogr.Open.return_value = mock_ds
mock_ds.GetLayer.return_value = mock_layer
# Mock Feature
mock_feat = MagicMock()
mock_feat.GetField.return_value = 15.5 # Depth_avg
mock_feat.GetFID.return_value = 123
# Mock Geometry
mock_geom = MagicMock()
mock_feat.GetGeometryRef.return_value = mock_geom
# When Distance is called on the feature geometry (transformed), return float
mock_transformed_geom = MagicMock()
mock_geom.Clone.return_value = mock_transformed_geom
mock_transformed_geom.Distance.return_value = 10.0
# Setup layer behavior
mock_layer.__iter__.return_value = [mock_feat]
mock_layer.GetNextFeature.side_effect = [mock_feat, None]
# Config
cfg = Config.default()
cfg.river_erosion.lakes.enabled = True
cfg.river_erosion.lakes.match_tolerance_m = 100.0
# Manual lake centroid (shapely point or similar, but here we pass tuple/object depending on implementation)
# Let's assume we pass a list of (x, y) tuples for centroids
manual_lakes = [(333000.0, 5517000.0)]
# Since _find_lake_depths expects OGR geometries for the manual lakes (or constructs them),
# let's assume we pass OGR geometries to it, or it creates them.
# Ideally, it should accept a list of OGR Geometries (Polygons from the mask).
# For this test, I'll assume we pass a list of ogr.Geometry objects.
mock_manual_geom = MagicMock()
mock_manual_geom.Centroid.return_value = mock_manual_geom
mock_manual_geom.Clone.return_value = mock_manual_geom # For transform
mock_manual_geom.GetPoint.return_value = (333000.0, 5517000.0)
mock_srs = MagicMock()
mock_srs.IsSame.return_value = True # Simplify: same CRS
results = _find_lake_depths(
[mock_manual_geom],
mock_srs,
cfg.river_erosion.lakes,
"raw/hydrolakes/HydroLAKES_polys_v10_shp/HydroLAKES_polys_v10.shp"
)
# Expected result: List of (depth, metadata)
# depth should be 15.5
self.assertEqual(len(results), 1)
depth, metadata = results[0]
self.assertEqual(depth, 15.5)
self.assertEqual(metadata["id"], 123)
self.assertEqual(metadata["distance"], 10.0)
if __name__ == "__main__":
unittest.main()