Files
GeoData/scripts/verify_pipeline.py

201 lines
6.9 KiB
Python

#!/usr/bin/env python3
"""Lightweight checks for the CityGML → CityJSON → GLB pipeline."""
from __future__ import annotations
import argparse
import sys
from pathlib import Path
from typing import Iterable, Sequence
DEFAULT_RAW_DIR = Path("raw/citygml/lod2")
DEFAULT_CITYJSON_DIR = Path("work/cityjson_lod2")
DEFAULT_CITYJSON_TRI_DIR = Path("work/cityjson_lod2/tri")
DEFAULT_CITYJSON_SPLIT_DIR = Path("work/cityjson_lod2/split")
DEFAULT_GLB_DIR = Path("export_unity/buildings_tiles")
DEFAULT_GLB_SPLIT_DIR = Path("export_unity/buildings_glb_split")
DEFAULT_TILE_INDEX = Path("export_unity/tile_index.csv")
def parse_args(argv: Iterable[str] | None = None) -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Validate presence of CityGML/CityJSON/GLB artifacts for the CityGML pipeline."
)
parser.add_argument(
"--mode",
choices=["cityjson", "glb", "both"],
default="both",
help="Which parts of the pipeline to check.",
)
parser.add_argument(
"--tiles",
nargs="*",
help="Optional list of tile IDs to check (e.g., LoD2_32_328_5511). Defaults to all GML tiles.",
)
parser.add_argument(
"--raw-dir",
type=Path,
default=DEFAULT_RAW_DIR,
help="Directory containing LoD2 CityGML tiles.",
)
parser.add_argument(
"--cityjson-dir",
type=Path,
default=DEFAULT_CITYJSON_DIR,
help="Directory containing CityJSON exports.",
)
parser.add_argument(
"--tri-dir",
type=Path,
default=DEFAULT_CITYJSON_TRI_DIR,
help="Directory containing cleaned + triangulated CityJSON files.",
)
parser.add_argument(
"--split-dir",
type=Path,
default=DEFAULT_CITYJSON_SPLIT_DIR,
help="Directory containing split CityJSON files (roof/wall). Optional; skipped unless --check-split.",
)
parser.add_argument(
"--glb-dir",
type=Path,
default=DEFAULT_GLB_DIR,
help="Directory containing base GLB exports.",
)
parser.add_argument(
"--glb-split-dir",
type=Path,
default=DEFAULT_GLB_SPLIT_DIR,
help="Directory containing split GLB exports. Optional; skipped unless --check-split.",
)
parser.add_argument(
"--tile-index",
type=Path,
default=DEFAULT_TILE_INDEX,
help="Path to the tile_index.csv placement manifest.",
)
parser.add_argument(
"--check-split",
action="store_true",
help="Also verify split roof/wall CityJSON and GLB outputs (current pipeline does not emit them by default).",
)
return parser.parse_args(argv)
def discover_tiles(raw_dir: Path, provided: Sequence[str] | None, tile_index: Path | None) -> list[str]:
if provided:
return sorted(set(provided))
if tile_index and tile_index.exists():
try:
import csv
with tile_index.open(newline="", encoding="utf-8") as handle:
reader = csv.DictReader(handle)
tiles = [row["tile_id"] for row in reader if "tile_id" in row and row.get("tile_id")]
if tiles:
return sorted(set(tiles))
except Exception:
pass
if raw_dir.exists():
return sorted(path.stem for path in raw_dir.glob("LoD2_*.gml"))
return []
def path_exists(path: Path) -> bool:
if path.exists():
return True
if path.is_dir():
return True
# citygml-tools may output a directory named *.city.json containing a *.json
if path.suffixes == [".city", ".json"]:
candidate = path / f"{path.stem}.json"
if candidate.exists():
return True
return False
def collect_missing(tile_ids: Sequence[str], directory: Path, pattern: str) -> list[Path]:
expected = [directory / pattern.format(tile_id=tile_id) for tile_id in tile_ids]
return [path for path in expected if not path_exists(path)]
def summarize_missing(label: str, missing: list[Path]) -> str:
sample = ", ".join(str(path) for path in missing[:5])
return f"{label}: missing {len(missing)} file(s){' (e.g., ' + sample + ')' if sample else ''}"
def verify_cityjson(tile_ids: Sequence[str], args: argparse.Namespace) -> list[str]:
issues: list[str] = []
missing_gml = collect_missing(tile_ids, args.raw_dir, "{tile_id}.gml")
if missing_gml:
issues.append(summarize_missing("raw CityGML tiles", missing_gml))
missing_cityjson = collect_missing(tile_ids, args.cityjson_dir, "{tile_id}.city.json")
if missing_cityjson:
issues.append(summarize_missing("CityJSON exports", missing_cityjson))
missing_tri = collect_missing(tile_ids, args.tri_dir, "{tile_id}.tri.city.json")
if missing_tri:
issues.append(summarize_missing("clean + triangulated CityJSON exports", missing_tri))
if args.check_split:
missing_roof = collect_missing(tile_ids, args.split_dir, "{tile_id}.roof.city.json")
if missing_roof:
issues.append(summarize_missing("split roof CityJSON exports", missing_roof))
missing_wall = collect_missing(tile_ids, args.split_dir, "{tile_id}.wall.city.json")
if missing_wall:
issues.append(summarize_missing("split wall CityJSON exports", missing_wall))
return issues
def verify_glb(tile_ids: Sequence[str], args: argparse.Namespace) -> list[str]:
issues: list[str] = []
missing_base = collect_missing(tile_ids, args.glb_dir, "{tile_id}.glb")
if missing_base:
issues.append(summarize_missing("base GLB exports", missing_base))
if args.check_split:
missing_roof = collect_missing(tile_ids, args.glb_split_dir, "{tile_id}_roof.glb")
if missing_roof:
issues.append(summarize_missing("split roof GLB exports", missing_roof))
missing_wall = collect_missing(tile_ids, args.glb_split_dir, "{tile_id}_wall.glb")
if missing_wall:
issues.append(summarize_missing("split wall GLB exports", missing_wall))
return issues
def main(argv: Iterable[str] | None = None) -> int:
args = parse_args(argv)
tile_ids = discover_tiles(args.raw_dir, args.tiles, args.tile_index)
if not tile_ids:
print("No tile IDs found. Add LoD2 GML files or pass --tiles.", file=sys.stderr)
return 1
issues: list[str] = []
if args.tile_index and not args.tile_index.exists():
issues.append(f"tile_index.csv missing: {args.tile_index}")
if args.mode in ("cityjson", "both"):
issues.extend(verify_cityjson(tile_ids, args))
if args.mode in ("glb", "both"):
issues.extend(verify_glb(tile_ids, args))
if issues:
print("Pipeline verification failed:")
for issue in issues:
print(f"- {issue}")
return 1
print(f"Pipeline verification passed for {len(tile_ids)} tile(s) in mode='{args.mode}'.")
return 0
if __name__ == "__main__":
sys.exit(main())