Add CityGML semantics pipeline utilities
This commit is contained in:
193
scripts/split_semantics.py
Normal file
193
scripts/split_semantics.py
Normal file
@@ -0,0 +1,193 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Split triangulated CityJSON files by semantic surface type."""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterable
|
||||
|
||||
SEMANTIC_PRESETS = {
|
||||
"roof": {"RoofSurface"},
|
||||
"wall": {"WallSurface"},
|
||||
"ground": {"GroundSurface"},
|
||||
"closure": {"ClosureSurface"},
|
||||
}
|
||||
|
||||
|
||||
def parse_args(argv: Iterable[str] | None = None) -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Split triangulated CityJSON files into per-semantic outputs (e.g., roof/wall)."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--input-dir",
|
||||
type=Path,
|
||||
default=Path("work/cityjson_tri"),
|
||||
help="Directory containing triangulated CityJSON files.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-dir",
|
||||
type=Path,
|
||||
default=Path("work/cityjson_split"),
|
||||
help="Directory to write filtered CityJSON files.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--targets",
|
||||
nargs="+",
|
||||
choices=sorted(SEMANTIC_PRESETS),
|
||||
default=["roof", "wall"],
|
||||
help="Semantic presets to emit. Defaults to roof and wall.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--pattern",
|
||||
default="*.city.json",
|
||||
help="Glob pattern for input files (defaults to all .city.json files).",
|
||||
)
|
||||
return parser.parse_args(argv)
|
||||
|
||||
|
||||
def read_json(path: Path) -> dict[str, Any]:
|
||||
with path.open("r", encoding="utf-8") as handle:
|
||||
return json.load(handle)
|
||||
|
||||
|
||||
def write_json(path: Path, payload: dict[str, Any]) -> None:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with path.open("w", encoding="utf-8") as handle:
|
||||
json.dump(payload, handle, ensure_ascii=True, indent=2)
|
||||
handle.write("\n")
|
||||
|
||||
|
||||
def base_name(path: Path) -> str:
|
||||
name = path.name
|
||||
if name.endswith(".tri.city.json"):
|
||||
return name[: -len(".tri.city.json")]
|
||||
if name.endswith(".city.json"):
|
||||
return name[: -len(".city.json")]
|
||||
return path.stem
|
||||
|
||||
|
||||
def allowed_semantic_indices(semantics: dict[str, Any], allowed_types: set[str]) -> set[int]:
|
||||
surfaces = semantics.get("surfaces") or []
|
||||
return {idx for idx, surface in enumerate(surfaces) if surface.get("type") in allowed_types}
|
||||
|
||||
|
||||
def filter_boundaries(
|
||||
boundaries: list[Any],
|
||||
values: list[Any],
|
||||
allowed_indices: set[int],
|
||||
) -> tuple[list[Any], list[Any]]:
|
||||
filtered_boundaries: list[Any] = []
|
||||
filtered_values: list[Any] = []
|
||||
|
||||
for boundary, value in zip(boundaries, values):
|
||||
if isinstance(value, list):
|
||||
nested_boundaries, nested_values = filter_boundaries(boundary, value, allowed_indices)
|
||||
if nested_boundaries:
|
||||
filtered_boundaries.append(nested_boundaries)
|
||||
filtered_values.append(nested_values)
|
||||
continue
|
||||
|
||||
if value is None:
|
||||
continue
|
||||
if value in allowed_indices:
|
||||
filtered_boundaries.append(boundary)
|
||||
filtered_values.append(value)
|
||||
|
||||
return filtered_boundaries, filtered_values
|
||||
|
||||
|
||||
def filter_geometry(geometry: dict[str, Any], allowed_types: set[str]) -> dict[str, Any] | None:
|
||||
semantics = geometry.get("semantics")
|
||||
boundaries = geometry.get("boundaries")
|
||||
if not semantics or boundaries is None:
|
||||
return None
|
||||
|
||||
values = semantics.get("values")
|
||||
if values is None:
|
||||
return None
|
||||
|
||||
allowed_indices = allowed_semantic_indices(semantics, allowed_types)
|
||||
if not allowed_indices:
|
||||
return None
|
||||
|
||||
filtered_boundaries, filtered_values = filter_boundaries(boundaries, values, allowed_indices)
|
||||
if not filtered_boundaries:
|
||||
return None
|
||||
|
||||
filtered_geometry = dict(geometry)
|
||||
filtered_geometry["boundaries"] = filtered_boundaries
|
||||
filtered_geometry["semantics"] = dict(semantics)
|
||||
filtered_geometry["semantics"]["values"] = filtered_values
|
||||
return filtered_geometry
|
||||
|
||||
|
||||
def filter_cityobject(cityobject: dict[str, Any], allowed_types: set[str]) -> dict[str, Any] | None:
|
||||
geometries = cityobject.get("geometry") or []
|
||||
filtered_geometries = []
|
||||
for geometry in geometries:
|
||||
filtered_geometry = filter_geometry(geometry, allowed_types)
|
||||
if filtered_geometry:
|
||||
filtered_geometries.append(filtered_geometry)
|
||||
|
||||
if not filtered_geometries:
|
||||
return None
|
||||
|
||||
filtered_object = dict(cityobject)
|
||||
filtered_object["geometry"] = filtered_geometries
|
||||
return filtered_object
|
||||
|
||||
|
||||
def filter_cityjson(cityjson: dict[str, Any], allowed_types: set[str]) -> dict[str, Any] | None:
|
||||
cityobjects = cityjson.get("CityObjects") or {}
|
||||
filtered_objects = {}
|
||||
for object_id, cityobject in cityobjects.items():
|
||||
filtered_object = filter_cityobject(cityobject, allowed_types)
|
||||
if filtered_object:
|
||||
filtered_objects[object_id] = filtered_object
|
||||
|
||||
if not filtered_objects:
|
||||
return None
|
||||
|
||||
filtered_cityjson = {k: v for k, v in cityjson.items() if k != "CityObjects"}
|
||||
filtered_cityjson["CityObjects"] = filtered_objects
|
||||
return filtered_cityjson
|
||||
|
||||
|
||||
def process_file(path: Path, targets: list[str], output_dir: Path) -> int:
|
||||
cityjson = read_json(path)
|
||||
written = 0
|
||||
base = base_name(path)
|
||||
|
||||
for target in targets:
|
||||
allowed_types = SEMANTIC_PRESETS[target]
|
||||
filtered = filter_cityjson(cityjson, allowed_types)
|
||||
if not filtered:
|
||||
print(f"[skip] {base} has no geometry for target '{target}'", file=sys.stderr)
|
||||
continue
|
||||
|
||||
output_path = output_dir / f"{base}.{target}.city.json"
|
||||
write_json(output_path, filtered)
|
||||
written += 1
|
||||
|
||||
return written
|
||||
|
||||
|
||||
def main(argv: Iterable[str] | None = None) -> int:
|
||||
args = parse_args(argv)
|
||||
files = sorted(args.input_dir.glob(args.pattern))
|
||||
if not files:
|
||||
print(f"No input files matched pattern '{args.pattern}' in {args.input_dir}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
total_written = 0
|
||||
for path in files:
|
||||
total_written += process_file(path, args.targets, args.output_dir)
|
||||
|
||||
print(f"Wrote {total_written} files to {args.output_dir}")
|
||||
return 0 if total_written else 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
168
scripts/verify_pipeline.py
Normal file
168
scripts/verify_pipeline.py
Normal file
@@ -0,0 +1,168 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Lightweight checks for the CityGML → CityJSON → GLB pipeline."""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Iterable, Sequence
|
||||
|
||||
DEFAULT_RAW_DIR = Path("raw/citygml/lod2")
|
||||
DEFAULT_CITYJSON_DIR = Path("work/cityjson")
|
||||
DEFAULT_CITYJSON_TRI_DIR = Path("work/cityjson_tri")
|
||||
DEFAULT_CITYJSON_SPLIT_DIR = Path("work/cityjson_split")
|
||||
DEFAULT_GLB_DIR = Path("export_unity/buildings_glb")
|
||||
DEFAULT_GLB_SPLIT_DIR = Path("export_unity/buildings_glb_split")
|
||||
DEFAULT_TILE_INDEX = Path("export_unity/tile_index.csv")
|
||||
|
||||
|
||||
def parse_args(argv: Iterable[str] | None = None) -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Validate presence of CityGML/CityJSON/GLB artifacts for the CityGML pipeline."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--mode",
|
||||
choices=["cityjson", "glb", "both"],
|
||||
default="both",
|
||||
help="Which parts of the pipeline to check.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tiles",
|
||||
nargs="*",
|
||||
help="Optional list of tile IDs to check (e.g., LoD2_32_328_5511). Defaults to all GML tiles.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--raw-dir",
|
||||
type=Path,
|
||||
default=DEFAULT_RAW_DIR,
|
||||
help="Directory containing LoD2 CityGML tiles.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--cityjson-dir",
|
||||
type=Path,
|
||||
default=DEFAULT_CITYJSON_DIR,
|
||||
help="Directory containing CityJSON exports.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tri-dir",
|
||||
type=Path,
|
||||
default=DEFAULT_CITYJSON_TRI_DIR,
|
||||
help="Directory containing cleaned + triangulated CityJSON files.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--split-dir",
|
||||
type=Path,
|
||||
default=DEFAULT_CITYJSON_SPLIT_DIR,
|
||||
help="Directory containing split CityJSON files (roof/wall).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--glb-dir",
|
||||
type=Path,
|
||||
default=DEFAULT_GLB_DIR,
|
||||
help="Directory containing base GLB exports.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--glb-split-dir",
|
||||
type=Path,
|
||||
default=DEFAULT_GLB_SPLIT_DIR,
|
||||
help="Directory containing split GLB exports.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tile-index",
|
||||
type=Path,
|
||||
default=DEFAULT_TILE_INDEX,
|
||||
help="Path to the tile_index.csv placement manifest.",
|
||||
)
|
||||
return parser.parse_args(argv)
|
||||
|
||||
|
||||
def discover_tiles(raw_dir: Path, provided: Sequence[str] | None) -> list[str]:
|
||||
if provided:
|
||||
return sorted(set(provided))
|
||||
|
||||
if not raw_dir.exists():
|
||||
return []
|
||||
|
||||
return sorted(path.stem for path in raw_dir.glob("LoD2_*.gml"))
|
||||
|
||||
|
||||
def collect_missing(tile_ids: Sequence[str], directory: Path, pattern: str) -> list[Path]:
|
||||
expected = [directory / pattern.format(tile_id=tile_id) for tile_id in tile_ids]
|
||||
return [path for path in expected if not path.exists()]
|
||||
|
||||
|
||||
def summarize_missing(label: str, missing: list[Path]) -> str:
|
||||
sample = ", ".join(str(path) for path in missing[:5])
|
||||
return f"{label}: missing {len(missing)} file(s){' (e.g., ' + sample + ')' if sample else ''}"
|
||||
|
||||
|
||||
def verify_cityjson(tile_ids: Sequence[str], args: argparse.Namespace) -> list[str]:
|
||||
issues: list[str] = []
|
||||
missing_gml = collect_missing(tile_ids, args.raw_dir, "{tile_id}.gml")
|
||||
if missing_gml:
|
||||
issues.append(summarize_missing("raw CityGML tiles", missing_gml))
|
||||
|
||||
missing_cityjson = collect_missing(tile_ids, args.cityjson_dir, "{tile_id}.city.json")
|
||||
if missing_cityjson:
|
||||
issues.append(summarize_missing("CityJSON exports", missing_cityjson))
|
||||
|
||||
missing_tri = collect_missing(tile_ids, args.tri_dir, "{tile_id}.tri.city.json")
|
||||
if missing_tri:
|
||||
issues.append(summarize_missing("clean + triangulated CityJSON exports", missing_tri))
|
||||
|
||||
missing_roof = collect_missing(tile_ids, args.split_dir, "{tile_id}.roof.city.json")
|
||||
if missing_roof:
|
||||
issues.append(summarize_missing("split roof CityJSON exports", missing_roof))
|
||||
|
||||
missing_wall = collect_missing(tile_ids, args.split_dir, "{tile_id}.wall.city.json")
|
||||
if missing_wall:
|
||||
issues.append(summarize_missing("split wall CityJSON exports", missing_wall))
|
||||
|
||||
return issues
|
||||
|
||||
|
||||
def verify_glb(tile_ids: Sequence[str], args: argparse.Namespace) -> list[str]:
|
||||
issues: list[str] = []
|
||||
missing_base = collect_missing(tile_ids, args.glb_dir, "{tile_id}.glb")
|
||||
if missing_base:
|
||||
issues.append(summarize_missing("base GLB exports", missing_base))
|
||||
|
||||
missing_roof = collect_missing(tile_ids, args.glb_split_dir, "{tile_id}_roof.glb")
|
||||
if missing_roof:
|
||||
issues.append(summarize_missing("split roof GLB exports", missing_roof))
|
||||
|
||||
missing_wall = collect_missing(tile_ids, args.glb_split_dir, "{tile_id}_wall.glb")
|
||||
if missing_wall:
|
||||
issues.append(summarize_missing("split wall GLB exports", missing_wall))
|
||||
|
||||
return issues
|
||||
|
||||
|
||||
def main(argv: Iterable[str] | None = None) -> int:
|
||||
args = parse_args(argv)
|
||||
tile_ids = discover_tiles(args.raw_dir, args.tiles)
|
||||
if not tile_ids:
|
||||
print("No tile IDs found. Add LoD2 GML files or pass --tiles.", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
issues: list[str] = []
|
||||
if args.tile_index and not args.tile_index.exists():
|
||||
issues.append(f"tile_index.csv missing: {args.tile_index}")
|
||||
|
||||
if args.mode in ("cityjson", "both"):
|
||||
issues.extend(verify_cityjson(tile_ids, args))
|
||||
if args.mode in ("glb", "both"):
|
||||
issues.extend(verify_glb(tile_ids, args))
|
||||
|
||||
if issues:
|
||||
print("Pipeline verification failed:")
|
||||
for issue in issues:
|
||||
print(f"- {issue}")
|
||||
return 1
|
||||
|
||||
print(f"Pipeline verification passed for {len(tile_ids)} tile(s) in mode='{args.mode}'.")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user