Files
GeoData/scripts/verify_pipeline.py

169 lines
5.7 KiB
Python

#!/usr/bin/env python3
"""Lightweight checks for the CityGML → CityJSON → GLB pipeline."""
from __future__ import annotations
import argparse
import sys
from pathlib import Path
from typing import Iterable, Sequence
DEFAULT_RAW_DIR = Path("raw/citygml/lod2")
DEFAULT_CITYJSON_DIR = Path("work/cityjson")
DEFAULT_CITYJSON_TRI_DIR = Path("work/cityjson_tri")
DEFAULT_CITYJSON_SPLIT_DIR = Path("work/cityjson_split")
DEFAULT_GLB_DIR = Path("export_unity/buildings_glb")
DEFAULT_GLB_SPLIT_DIR = Path("export_unity/buildings_glb_split")
DEFAULT_TILE_INDEX = Path("export_unity/tile_index.csv")
def parse_args(argv: Iterable[str] | None = None) -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Validate presence of CityGML/CityJSON/GLB artifacts for the CityGML pipeline."
)
parser.add_argument(
"--mode",
choices=["cityjson", "glb", "both"],
default="both",
help="Which parts of the pipeline to check.",
)
parser.add_argument(
"--tiles",
nargs="*",
help="Optional list of tile IDs to check (e.g., LoD2_32_328_5511). Defaults to all GML tiles.",
)
parser.add_argument(
"--raw-dir",
type=Path,
default=DEFAULT_RAW_DIR,
help="Directory containing LoD2 CityGML tiles.",
)
parser.add_argument(
"--cityjson-dir",
type=Path,
default=DEFAULT_CITYJSON_DIR,
help="Directory containing CityJSON exports.",
)
parser.add_argument(
"--tri-dir",
type=Path,
default=DEFAULT_CITYJSON_TRI_DIR,
help="Directory containing cleaned + triangulated CityJSON files.",
)
parser.add_argument(
"--split-dir",
type=Path,
default=DEFAULT_CITYJSON_SPLIT_DIR,
help="Directory containing split CityJSON files (roof/wall).",
)
parser.add_argument(
"--glb-dir",
type=Path,
default=DEFAULT_GLB_DIR,
help="Directory containing base GLB exports.",
)
parser.add_argument(
"--glb-split-dir",
type=Path,
default=DEFAULT_GLB_SPLIT_DIR,
help="Directory containing split GLB exports.",
)
parser.add_argument(
"--tile-index",
type=Path,
default=DEFAULT_TILE_INDEX,
help="Path to the tile_index.csv placement manifest.",
)
return parser.parse_args(argv)
def discover_tiles(raw_dir: Path, provided: Sequence[str] | None) -> list[str]:
if provided:
return sorted(set(provided))
if not raw_dir.exists():
return []
return sorted(path.stem for path in raw_dir.glob("LoD2_*.gml"))
def collect_missing(tile_ids: Sequence[str], directory: Path, pattern: str) -> list[Path]:
expected = [directory / pattern.format(tile_id=tile_id) for tile_id in tile_ids]
return [path for path in expected if not path.exists()]
def summarize_missing(label: str, missing: list[Path]) -> str:
sample = ", ".join(str(path) for path in missing[:5])
return f"{label}: missing {len(missing)} file(s){' (e.g., ' + sample + ')' if sample else ''}"
def verify_cityjson(tile_ids: Sequence[str], args: argparse.Namespace) -> list[str]:
issues: list[str] = []
missing_gml = collect_missing(tile_ids, args.raw_dir, "{tile_id}.gml")
if missing_gml:
issues.append(summarize_missing("raw CityGML tiles", missing_gml))
missing_cityjson = collect_missing(tile_ids, args.cityjson_dir, "{tile_id}.city.json")
if missing_cityjson:
issues.append(summarize_missing("CityJSON exports", missing_cityjson))
missing_tri = collect_missing(tile_ids, args.tri_dir, "{tile_id}.tri.city.json")
if missing_tri:
issues.append(summarize_missing("clean + triangulated CityJSON exports", missing_tri))
missing_roof = collect_missing(tile_ids, args.split_dir, "{tile_id}.roof.city.json")
if missing_roof:
issues.append(summarize_missing("split roof CityJSON exports", missing_roof))
missing_wall = collect_missing(tile_ids, args.split_dir, "{tile_id}.wall.city.json")
if missing_wall:
issues.append(summarize_missing("split wall CityJSON exports", missing_wall))
return issues
def verify_glb(tile_ids: Sequence[str], args: argparse.Namespace) -> list[str]:
issues: list[str] = []
missing_base = collect_missing(tile_ids, args.glb_dir, "{tile_id}.glb")
if missing_base:
issues.append(summarize_missing("base GLB exports", missing_base))
missing_roof = collect_missing(tile_ids, args.glb_split_dir, "{tile_id}_roof.glb")
if missing_roof:
issues.append(summarize_missing("split roof GLB exports", missing_roof))
missing_wall = collect_missing(tile_ids, args.glb_split_dir, "{tile_id}_wall.glb")
if missing_wall:
issues.append(summarize_missing("split wall GLB exports", missing_wall))
return issues
def main(argv: Iterable[str] | None = None) -> int:
args = parse_args(argv)
tile_ids = discover_tiles(args.raw_dir, args.tiles)
if not tile_ids:
print("No tile IDs found. Add LoD2 GML files or pass --tiles.", file=sys.stderr)
return 1
issues: list[str] = []
if args.tile_index and not args.tile_index.exists():
issues.append(f"tile_index.csv missing: {args.tile_index}")
if args.mode in ("cityjson", "both"):
issues.extend(verify_cityjson(tile_ids, args))
if args.mode in ("glb", "both"):
issues.extend(verify_glb(tile_ids, args))
if issues:
print("Pipeline verification failed:")
for issue in issues:
print(f"- {issue}")
return 1
print(f"Pipeline verification passed for {len(tile_ids)} tile(s) in mode='{args.mode}'.")
return 0
if __name__ == "__main__":
sys.exit(main())