#!/usr/bin/env python3 """Drop faces that reference out-of-range vertex indices in CityJSON files.""" from __future__ import annotations import argparse import json import sys from pathlib import Path from typing import Any, Iterable def parse_args(argv: Iterable[str] | None = None) -> argparse.Namespace: parser = argparse.ArgumentParser( description="Remove faces referencing invalid vertex indices from CityJSON files." ) parser.add_argument( "--input-dir", type=Path, default=Path("work/cityjson"), help="Directory containing CityJSON exports (files or citygml-tools folders).", ) parser.add_argument( "--output-dir", type=Path, default=Path("work/cityjson_clean"), help="Directory to write cleaned CityJSON files.", ) parser.add_argument( "--pattern", default="**/*.json", help="Glob pattern for input files (defaults to any .json under the input dir).", ) return parser.parse_args(argv) def read_json(path: Path) -> dict[str, Any]: with path.open("r", encoding="utf-8") as handle: return json.load(handle) def write_json(path: Path, payload: dict[str, Any]) -> None: path.parent.mkdir(parents=True, exist_ok=True) with path.open("w", encoding="utf-8") as handle: json.dump(payload, handle, ensure_ascii=True, indent=2) handle.write("\n") def base_name(path: Path) -> str: name = path.name if name.endswith(".city.json"): return name[: -len(".city.json")] if name.endswith(".json"): return name[: -len(".json")] return path.stem def resolve_input_file(path: Path) -> Path | None: if path.is_file(): return path if path.is_dir(): candidate = path / f"{path.stem}.json" if candidate.is_file(): return candidate matches = list(path.glob("*.json")) if len(matches) == 1: return matches[0] return None def boundary_valid(boundary: Any, vertex_count: int) -> bool: if isinstance(boundary, int): return 0 <= boundary < vertex_count if isinstance(boundary, list): return all(boundary_valid(item, vertex_count) for item in boundary) return False def filter_boundaries(boundaries: list[Any], values: list[Any], vertex_count: int) -> tuple[list[Any], list[Any]]: filtered_boundaries: list[Any] = [] filtered_values: list[Any] = [] for boundary, value in zip(boundaries, values): if isinstance(value, list): nested_boundaries, nested_values = filter_boundaries(boundary, value, vertex_count) if nested_boundaries: filtered_boundaries.append(nested_boundaries) filtered_values.append(nested_values) continue if boundary_valid(boundary, vertex_count): filtered_boundaries.append(boundary) filtered_values.append(value) return filtered_boundaries, filtered_values def filter_geometry(geometry: dict[str, Any], vertex_count: int) -> dict[str, Any] | None: semantics = geometry.get("semantics") boundaries = geometry.get("boundaries") values = semantics.get("values") if semantics else None if boundaries is None or values is None: return geometry filtered_boundaries, filtered_values = filter_boundaries(boundaries, values, vertex_count) if not filtered_boundaries: return None filtered_geometry = dict(geometry) filtered_geometry["boundaries"] = filtered_boundaries filtered_geometry["semantics"] = dict(semantics) filtered_geometry["semantics"]["values"] = filtered_values return filtered_geometry def filter_cityobject(cityobject: dict[str, Any], vertex_count: int) -> dict[str, Any] | None: geometries = cityobject.get("geometry") or [] filtered_geometries = [] for geometry in geometries: filtered_geometry = filter_geometry(geometry, vertex_count) if filtered_geometry: filtered_geometries.append(filtered_geometry) if not filtered_geometries: return None filtered_object = dict(cityobject) filtered_object["geometry"] = filtered_geometries return filtered_object def clean_cityjson(cityjson: dict[str, Any]) -> dict[str, Any]: vertices = cityjson.get("vertices") or [] vertex_count = len(vertices) cityobjects = cityjson.get("CityObjects") or {} cleaned_objects = {} for object_id, cityobject in cityobjects.items(): cleaned_object = filter_cityobject(cityobject, vertex_count) if cleaned_object: cleaned_objects[object_id] = cleaned_object cleaned_cityjson = {k: v for k, v in cityjson.items() if k != "CityObjects"} cleaned_cityjson["CityObjects"] = cleaned_objects return cleaned_cityjson def process_file(path: Path, output_dir: Path) -> int: resolved = resolve_input_file(path) if not resolved: print(f"[skip] cannot resolve CityJSON file for {path}", file=sys.stderr) return 0 cityjson = read_json(resolved) cleaned = clean_cityjson(cityjson) output_path = output_dir / f"{base_name(path)}.city.json" write_json(output_path, cleaned) return 1 def main(argv: Iterable[str] | None = None) -> int: args = parse_args(argv) files = sorted(args.input_dir.glob(args.pattern)) if not files: print(f"No input files matched pattern '{args.pattern}' in {args.input_dir}", file=sys.stderr) return 1 total = 0 for path in files: total += process_file(path, args.output_dir) print(f"Wrote {total} cleaned file(s) to {args.output_dir}") return 0 if total else 1 if __name__ == "__main__": sys.exit(main())