diff --git a/README.md b/README.md index 2004c3d..57068a0 100644 --- a/README.md +++ b/README.md @@ -79,9 +79,16 @@ LoD2 CityGML tiles can be converted to GLB per tile while preserving roof/wall s -o "work/cityjson/${base}.city.json" done ``` + If citygml-tools outputs directories named `.city.json/`, they contain `.json` inside. + If cjio later reports out-of-range vertex indices during triangulation, clean first: + ```bash + mkdir -p work/cityjson_clean + uv run python scripts/clean_cityjson_vertices.py --input-dir work/cityjson --output-dir work/cityjson_clean + ``` 2. Clean + triangulate: ```bash - for f in work/cityjson/*.city.json; do + mkdir -p work/cityjson_tri + for f in work/cityjson_clean/*.city.json; do base="$(basename "$f" .city.json)" input_json="$f/$base.json" # citygml-tools writes a .json inside the .city.json folder uv run cjio --ignore_duplicate_keys "$input_json" upgrade triangulate vertices_clean \ diff --git a/scripts/clean_cityjson_vertices.py b/scripts/clean_cityjson_vertices.py new file mode 100644 index 0000000..00da2fa --- /dev/null +++ b/scripts/clean_cityjson_vertices.py @@ -0,0 +1,176 @@ +#!/usr/bin/env python3 +"""Drop faces that reference out-of-range vertex indices in CityJSON files.""" +from __future__ import annotations + +import argparse +import json +import sys +from pathlib import Path +from typing import Any, Iterable + + +def parse_args(argv: Iterable[str] | None = None) -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Remove faces referencing invalid vertex indices from CityJSON files." + ) + parser.add_argument( + "--input-dir", + type=Path, + default=Path("work/cityjson"), + help="Directory containing CityJSON exports (files or citygml-tools folders).", + ) + parser.add_argument( + "--output-dir", + type=Path, + default=Path("work/cityjson_clean"), + help="Directory to write cleaned CityJSON files.", + ) + parser.add_argument( + "--pattern", + default="**/*.json", + help="Glob pattern for input files (defaults to any .json under the input dir).", + ) + return parser.parse_args(argv) + + +def read_json(path: Path) -> dict[str, Any]: + with path.open("r", encoding="utf-8") as handle: + return json.load(handle) + + +def write_json(path: Path, payload: dict[str, Any]) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + with path.open("w", encoding="utf-8") as handle: + json.dump(payload, handle, ensure_ascii=True, indent=2) + handle.write("\n") + + +def base_name(path: Path) -> str: + name = path.name + if name.endswith(".city.json"): + return name[: -len(".city.json")] + if name.endswith(".json"): + return name[: -len(".json")] + return path.stem + + +def resolve_input_file(path: Path) -> Path | None: + if path.is_file(): + return path + if path.is_dir(): + candidate = path / f"{path.stem}.json" + if candidate.is_file(): + return candidate + matches = list(path.glob("*.json")) + if len(matches) == 1: + return matches[0] + return None + + +def boundary_valid(boundary: Any, vertex_count: int) -> bool: + if isinstance(boundary, int): + return 0 <= boundary < vertex_count + if isinstance(boundary, list): + return all(boundary_valid(item, vertex_count) for item in boundary) + return False + + +def filter_boundaries(boundaries: list[Any], values: list[Any], vertex_count: int) -> tuple[list[Any], list[Any]]: + filtered_boundaries: list[Any] = [] + filtered_values: list[Any] = [] + + for boundary, value in zip(boundaries, values): + if isinstance(value, list): + nested_boundaries, nested_values = filter_boundaries(boundary, value, vertex_count) + if nested_boundaries: + filtered_boundaries.append(nested_boundaries) + filtered_values.append(nested_values) + continue + + if boundary_valid(boundary, vertex_count): + filtered_boundaries.append(boundary) + filtered_values.append(value) + + return filtered_boundaries, filtered_values + + +def filter_geometry(geometry: dict[str, Any], vertex_count: int) -> dict[str, Any] | None: + semantics = geometry.get("semantics") + boundaries = geometry.get("boundaries") + values = semantics.get("values") if semantics else None + if boundaries is None or values is None: + return geometry + + filtered_boundaries, filtered_values = filter_boundaries(boundaries, values, vertex_count) + if not filtered_boundaries: + return None + + filtered_geometry = dict(geometry) + filtered_geometry["boundaries"] = filtered_boundaries + filtered_geometry["semantics"] = dict(semantics) + filtered_geometry["semantics"]["values"] = filtered_values + return filtered_geometry + + +def filter_cityobject(cityobject: dict[str, Any], vertex_count: int) -> dict[str, Any] | None: + geometries = cityobject.get("geometry") or [] + filtered_geometries = [] + for geometry in geometries: + filtered_geometry = filter_geometry(geometry, vertex_count) + if filtered_geometry: + filtered_geometries.append(filtered_geometry) + + if not filtered_geometries: + return None + + filtered_object = dict(cityobject) + filtered_object["geometry"] = filtered_geometries + return filtered_object + + +def clean_cityjson(cityjson: dict[str, Any]) -> dict[str, Any]: + vertices = cityjson.get("vertices") or [] + vertex_count = len(vertices) + + cityobjects = cityjson.get("CityObjects") or {} + cleaned_objects = {} + for object_id, cityobject in cityobjects.items(): + cleaned_object = filter_cityobject(cityobject, vertex_count) + if cleaned_object: + cleaned_objects[object_id] = cleaned_object + + cleaned_cityjson = {k: v for k, v in cityjson.items() if k != "CityObjects"} + cleaned_cityjson["CityObjects"] = cleaned_objects + return cleaned_cityjson + + +def process_file(path: Path, output_dir: Path) -> int: + resolved = resolve_input_file(path) + if not resolved: + print(f"[skip] cannot resolve CityJSON file for {path}", file=sys.stderr) + return 0 + + cityjson = read_json(resolved) + cleaned = clean_cityjson(cityjson) + output_path = output_dir / f"{base_name(path)}.city.json" + write_json(output_path, cleaned) + return 1 + + +def main(argv: Iterable[str] | None = None) -> int: + args = parse_args(argv) + files = sorted(args.input_dir.glob(args.pattern)) + if not files: + print(f"No input files matched pattern '{args.pattern}' in {args.input_dir}", file=sys.stderr) + return 1 + + total = 0 + for path in files: + total += process_file(path, args.output_dir) + + print(f"Wrote {total} cleaned file(s) to {args.output_dir}") + return 0 if total else 1 + + +if __name__ == "__main__": + sys.exit(main())