#!/usr/bin/env python3 """Lightweight checks for the CityGML → CityJSON → GLB pipeline.""" from __future__ import annotations import argparse import sys from pathlib import Path from typing import Iterable, Sequence DEFAULT_RAW_DIR = Path("raw/citygml/lod2") DEFAULT_CITYJSON_DIR = Path("work/cityjson") DEFAULT_CITYJSON_TRI_DIR = Path("work/cityjson_tri") DEFAULT_CITYJSON_SPLIT_DIR = Path("work/cityjson_split") DEFAULT_GLB_DIR = Path("export_unity/buildings_glb") DEFAULT_GLB_SPLIT_DIR = Path("export_unity/buildings_glb_split") DEFAULT_TILE_INDEX = Path("export_unity/tile_index.csv") def parse_args(argv: Iterable[str] | None = None) -> argparse.Namespace: parser = argparse.ArgumentParser( description="Validate presence of CityGML/CityJSON/GLB artifacts for the CityGML pipeline." ) parser.add_argument( "--mode", choices=["cityjson", "glb", "both"], default="both", help="Which parts of the pipeline to check.", ) parser.add_argument( "--tiles", nargs="*", help="Optional list of tile IDs to check (e.g., LoD2_32_328_5511). Defaults to all GML tiles.", ) parser.add_argument( "--raw-dir", type=Path, default=DEFAULT_RAW_DIR, help="Directory containing LoD2 CityGML tiles.", ) parser.add_argument( "--cityjson-dir", type=Path, default=DEFAULT_CITYJSON_DIR, help="Directory containing CityJSON exports.", ) parser.add_argument( "--tri-dir", type=Path, default=DEFAULT_CITYJSON_TRI_DIR, help="Directory containing cleaned + triangulated CityJSON files.", ) parser.add_argument( "--split-dir", type=Path, default=DEFAULT_CITYJSON_SPLIT_DIR, help="Directory containing split CityJSON files (roof/wall).", ) parser.add_argument( "--glb-dir", type=Path, default=DEFAULT_GLB_DIR, help="Directory containing base GLB exports.", ) parser.add_argument( "--glb-split-dir", type=Path, default=DEFAULT_GLB_SPLIT_DIR, help="Directory containing split GLB exports.", ) parser.add_argument( "--tile-index", type=Path, default=DEFAULT_TILE_INDEX, help="Path to the tile_index.csv placement manifest.", ) return parser.parse_args(argv) def discover_tiles(raw_dir: Path, provided: Sequence[str] | None) -> list[str]: if provided: return sorted(set(provided)) if not raw_dir.exists(): return [] return sorted(path.stem for path in raw_dir.glob("LoD2_*.gml")) def path_exists(path: Path) -> bool: if path.exists(): return True if path.is_dir(): return True # citygml-tools may output a directory named *.city.json containing a *.json if path.suffixes == [".city", ".json"]: candidate = path / f"{path.stem}.json" if candidate.exists(): return True return False def collect_missing(tile_ids: Sequence[str], directory: Path, pattern: str) -> list[Path]: expected = [directory / pattern.format(tile_id=tile_id) for tile_id in tile_ids] return [path for path in expected if not path_exists(path)] def summarize_missing(label: str, missing: list[Path]) -> str: sample = ", ".join(str(path) for path in missing[:5]) return f"{label}: missing {len(missing)} file(s){' (e.g., ' + sample + ')' if sample else ''}" def verify_cityjson(tile_ids: Sequence[str], args: argparse.Namespace) -> list[str]: issues: list[str] = [] missing_gml = collect_missing(tile_ids, args.raw_dir, "{tile_id}.gml") if missing_gml: issues.append(summarize_missing("raw CityGML tiles", missing_gml)) missing_cityjson = collect_missing(tile_ids, args.cityjson_dir, "{tile_id}.city.json") if missing_cityjson: issues.append(summarize_missing("CityJSON exports", missing_cityjson)) missing_tri = collect_missing(tile_ids, args.tri_dir, "{tile_id}.tri.city.json") if missing_tri: issues.append(summarize_missing("clean + triangulated CityJSON exports", missing_tri)) missing_roof = collect_missing(tile_ids, args.split_dir, "{tile_id}.roof.city.json") if missing_roof: issues.append(summarize_missing("split roof CityJSON exports", missing_roof)) missing_wall = collect_missing(tile_ids, args.split_dir, "{tile_id}.wall.city.json") if missing_wall: issues.append(summarize_missing("split wall CityJSON exports", missing_wall)) return issues def verify_glb(tile_ids: Sequence[str], args: argparse.Namespace) -> list[str]: issues: list[str] = [] missing_base = collect_missing(tile_ids, args.glb_dir, "{tile_id}.glb") if missing_base: issues.append(summarize_missing("base GLB exports", missing_base)) missing_roof = collect_missing(tile_ids, args.glb_split_dir, "{tile_id}_roof.glb") if missing_roof: issues.append(summarize_missing("split roof GLB exports", missing_roof)) missing_wall = collect_missing(tile_ids, args.glb_split_dir, "{tile_id}_wall.glb") if missing_wall: issues.append(summarize_missing("split wall GLB exports", missing_wall)) return issues def main(argv: Iterable[str] | None = None) -> int: args = parse_args(argv) tile_ids = discover_tiles(args.raw_dir, args.tiles) if not tile_ids: print("No tile IDs found. Add LoD2 GML files or pass --tiles.", file=sys.stderr) return 1 issues: list[str] = [] if args.tile_index and not args.tile_index.exists(): issues.append(f"tile_index.csv missing: {args.tile_index}") if args.mode in ("cityjson", "both"): issues.extend(verify_cityjson(tile_ids, args)) if args.mode in ("glb", "both"): issues.extend(verify_glb(tile_ids, args)) if issues: print("Pipeline verification failed:") for issue in issues: print(f"- {issue}") return 1 print(f"Pipeline verification passed for {len(tile_ids)} tile(s) in mode='{args.mode}'.") return 0 if __name__ == "__main__": sys.exit(main())