Files
GeoData/scripts/mask_split_master.py

248 lines
8.5 KiB
Python

#!/usr/bin/env python3
"""Split global master masks back into tile masks.
Optional filtering modes:
- keep only source/sink tiles that contain non-black pixels
- keep only water tiles that differ from a reference river mask directory
"""
from __future__ import annotations
import argparse
import json
import re
from pathlib import Path
from typing import Dict
import numpy as np
from PIL import Image
_TILE_KEY_RE = re.compile(r"^(dgm\d+_\d+_\d+_\d+)")
# Master masks can exceed PIL's safety threshold; these files are trusted local data.
Image.MAX_IMAGE_PIXELS = None
def parse_args() -> argparse.Namespace:
p = argparse.ArgumentParser(description="Split master masks into per-tile water/source/sink masks.")
p.add_argument("--master-dir", default="work/mask_master", help="Directory containing master_meta.json and master images.")
p.add_argument("--out-water-dir", default="raw/water_masks", help="Output directory for water tile masks.")
p.add_argument("--out-source-dir", default="raw/water_source_masks", help="Output directory for source tile masks.")
p.add_argument("--out-sink-dir", default="raw/water_sink_masks", help="Output directory for sink tile masks.")
p.add_argument(
"--ref-water-dir",
default="work/river_masks",
help="Reference water mask dir used to keep only changed water tiles.",
)
p.add_argument(
"--keep-informative-only",
action="store_true",
help="Write only informative masks (non-black source/sink; water changed vs ref water dir).",
)
p.add_argument(
"--prune-existing",
action="store_true",
help="Remove existing PNG/WLD outputs in out dirs before writing.",
)
p.add_argument("--write-wld", action="store_true", help="Write world files for output masks.")
return p.parse_args()
def write_worldfile(path: Path, px: float, py: float, minx: float, maxy: float) -> None:
c = minx + (px / 2.0)
f = maxy + (py / 2.0)
text = "\n".join(
[
f"{px:.12f}",
"0.0",
"0.0",
f"{py:.12f}",
f"{c:.12f}",
f"{f:.12f}",
]
)
path.write_text(text + "\n", encoding="utf-8")
def ensure_dirs(*dirs: Path) -> None:
for d in dirs:
d.mkdir(parents=True, exist_ok=True)
def tile_key_from_name(name: str) -> str:
stem = Path(name).stem
m = _TILE_KEY_RE.match(stem)
return m.group(1) if m else ""
def remove_existing_outputs(out_dir: Path) -> None:
if not out_dir.exists():
return
for p in out_dir.glob("*.png"):
p.unlink(missing_ok=True)
for p in out_dir.glob("*.wld"):
p.unlink(missing_ok=True)
def has_non_black(arr: np.ndarray) -> bool:
return bool(np.any(arr != 0))
def index_reference_masks(ref_dir: Path) -> Dict[str, Path]:
out: Dict[str, Path] = {}
if not ref_dir.exists():
return out
for p in sorted(ref_dir.glob("*.png")):
key = tile_key_from_name(p.name)
if not key or key in out:
continue
out[key] = p
return out
def resize_nearest(arr: np.ndarray, width: int, height: int) -> np.ndarray:
if arr.shape[0] == height and arr.shape[1] == width:
return arr
img = Image.fromarray(arr, mode="RGB")
resized = img.resize((width, height), resample=Image.Resampling.NEAREST)
return np.array(resized, dtype=np.uint8)
def water_differs_from_reference(water_arr: np.ndarray, key: str, ref_index: Dict[str, Path]) -> bool:
ref_path = ref_index.get(key)
if ref_path is None:
# No baseline tile: treat as informative so we do not lose manual work.
return True
ref_arr = np.array(Image.open(ref_path).convert("RGB"), dtype=np.uint8)
ref_arr = resize_nearest(ref_arr, water_arr.shape[1], water_arr.shape[0])
return not np.array_equal(water_arr, ref_arr)
def source_name_for_tile(tile: Dict[str, object]) -> str:
source_file = str(tile.get("source_file") or "").strip()
if source_file:
return source_file
stem = str(tile.get("water_stem") or "").strip()
if stem:
return f"{stem}_source_mask.png"
return f"{tile['key']}_source_mask.png"
def sink_name_for_tile(tile: Dict[str, object]) -> str:
sink_file = str(tile.get("sink_file") or "").strip()
if sink_file:
return sink_file
stem = str(tile.get("water_stem") or "").strip()
if stem:
return f"{stem}_sink_mask.png"
return f"{tile['key']}_sink_mask.png"
def crop_tile(master: np.ndarray, xoff: int, yoff: int, width: int, height: int) -> np.ndarray:
return master[yoff : yoff + height, xoff : xoff + width].copy()
def main() -> int:
args = parse_args()
master_dir = Path(args.master_dir)
meta_path = master_dir / "master_meta.json"
if not meta_path.exists():
raise SystemExit(f"[mask_split_master] Missing meta: {meta_path}")
meta = json.loads(meta_path.read_text(encoding="utf-8"))
m = meta["master"]
tiles = meta["tiles"]
px = float(m["px"])
py = float(m["py"])
water_master = np.array(Image.open(master_dir / m["water_master"]).convert("RGB"), dtype=np.uint8)
source_master = np.array(Image.open(master_dir / m["source_master"]).convert("RGB"), dtype=np.uint8)
sink_master = np.array(Image.open(master_dir / m["sink_master"]).convert("RGB"), dtype=np.uint8)
ensure_dirs(Path(args.out_water_dir), Path(args.out_source_dir), Path(args.out_sink_dir))
out_water = Path(args.out_water_dir)
out_source = Path(args.out_source_dir)
out_sink = Path(args.out_sink_dir)
if args.prune_existing:
remove_existing_outputs(out_water)
remove_existing_outputs(out_source)
remove_existing_outputs(out_sink)
ref_index = index_reference_masks(Path(args.ref_water_dir))
written_water = 0
written_source = 0
written_sink = 0
skipped_water = 0
skipped_source = 0
skipped_sink = 0
for tile in tiles:
xoff = int(tile["xoff"])
yoff = int(tile["yoff"])
width = int(tile["width"])
height = int(tile["height"])
water_arr = crop_tile(water_master, xoff, yoff, width, height)
source_arr = crop_tile(source_master, xoff, yoff, width, height)
sink_arr = crop_tile(sink_master, xoff, yoff, width, height)
water_name = str(tile.get("water_file") or f"{tile['key']}_mask_viz.png")
source_name = source_name_for_tile(tile)
sink_name = sink_name_for_tile(tile)
water_path = out_water / water_name
source_path = out_source / source_name
sink_path = out_sink / sink_name
write_water = True
write_source = True
write_sink = True
if args.keep_informative_only:
write_source = has_non_black(source_arr)
write_sink = has_non_black(sink_arr)
write_water = water_differs_from_reference(water_arr, str(tile["key"]), ref_index)
if write_water:
Image.fromarray(water_arr, mode="RGB").save(water_path)
if args.write_wld:
minx = float(tile["minx"])
maxy = float(tile["maxy"])
write_worldfile(water_path.with_suffix(".wld"), px, py, minx, maxy)
written_water += 1
else:
skipped_water += 1
if write_source:
Image.fromarray(source_arr, mode="RGB").save(source_path)
if args.write_wld:
minx = float(tile["minx"])
maxy = float(tile["maxy"])
write_worldfile(source_path.with_suffix(".wld"), px, py, minx, maxy)
written_source += 1
else:
skipped_source += 1
if write_sink:
Image.fromarray(sink_arr, mode="RGB").save(sink_path)
if args.write_wld:
minx = float(tile["minx"])
maxy = float(tile["maxy"])
write_worldfile(sink_path.with_suffix(".wld"), px, py, minx, maxy)
written_sink += 1
else:
skipped_sink += 1
total_tiles = len(tiles)
print(f"[mask_split_master] Tiles processed: {total_tiles}")
print(f"[mask_split_master] Water written: {written_water}, skipped: {skipped_water}")
print(f"[mask_split_master] Source written: {written_source}, skipped: {skipped_source}")
print(f"[mask_split_master] Sink written: {written_sink}, skipped: {skipped_sink}")
print(f"[mask_split_master] Water dir: {out_water}")
print(f"[mask_split_master] Source dir: {out_source}")
print(f"[mask_split_master] Sink dir: {out_sink}")
return 0
if __name__ == "__main__":
raise SystemExit(main())