Add CityJSON cleaner for invalid vertex indices
This commit is contained in:
@@ -79,9 +79,16 @@ LoD2 CityGML tiles can be converted to GLB per tile while preserving roof/wall s
|
||||
-o "work/cityjson/${base}.city.json"
|
||||
done
|
||||
```
|
||||
If citygml-tools outputs directories named `<tile>.city.json/`, they contain `<tile>.json` inside.
|
||||
If cjio later reports out-of-range vertex indices during triangulation, clean first:
|
||||
```bash
|
||||
mkdir -p work/cityjson_clean
|
||||
uv run python scripts/clean_cityjson_vertices.py --input-dir work/cityjson --output-dir work/cityjson_clean
|
||||
```
|
||||
2. Clean + triangulate:
|
||||
```bash
|
||||
for f in work/cityjson/*.city.json; do
|
||||
mkdir -p work/cityjson_tri
|
||||
for f in work/cityjson_clean/*.city.json; do
|
||||
base="$(basename "$f" .city.json)"
|
||||
input_json="$f/$base.json" # citygml-tools writes a .json inside the .city.json folder
|
||||
uv run cjio --ignore_duplicate_keys "$input_json" upgrade triangulate vertices_clean \
|
||||
|
||||
176
scripts/clean_cityjson_vertices.py
Normal file
176
scripts/clean_cityjson_vertices.py
Normal file
@@ -0,0 +1,176 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Drop faces that reference out-of-range vertex indices in CityJSON files."""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterable
|
||||
|
||||
|
||||
def parse_args(argv: Iterable[str] | None = None) -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Remove faces referencing invalid vertex indices from CityJSON files."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--input-dir",
|
||||
type=Path,
|
||||
default=Path("work/cityjson"),
|
||||
help="Directory containing CityJSON exports (files or citygml-tools folders).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-dir",
|
||||
type=Path,
|
||||
default=Path("work/cityjson_clean"),
|
||||
help="Directory to write cleaned CityJSON files.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--pattern",
|
||||
default="**/*.json",
|
||||
help="Glob pattern for input files (defaults to any .json under the input dir).",
|
||||
)
|
||||
return parser.parse_args(argv)
|
||||
|
||||
|
||||
def read_json(path: Path) -> dict[str, Any]:
|
||||
with path.open("r", encoding="utf-8") as handle:
|
||||
return json.load(handle)
|
||||
|
||||
|
||||
def write_json(path: Path, payload: dict[str, Any]) -> None:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with path.open("w", encoding="utf-8") as handle:
|
||||
json.dump(payload, handle, ensure_ascii=True, indent=2)
|
||||
handle.write("\n")
|
||||
|
||||
|
||||
def base_name(path: Path) -> str:
|
||||
name = path.name
|
||||
if name.endswith(".city.json"):
|
||||
return name[: -len(".city.json")]
|
||||
if name.endswith(".json"):
|
||||
return name[: -len(".json")]
|
||||
return path.stem
|
||||
|
||||
|
||||
def resolve_input_file(path: Path) -> Path | None:
|
||||
if path.is_file():
|
||||
return path
|
||||
if path.is_dir():
|
||||
candidate = path / f"{path.stem}.json"
|
||||
if candidate.is_file():
|
||||
return candidate
|
||||
matches = list(path.glob("*.json"))
|
||||
if len(matches) == 1:
|
||||
return matches[0]
|
||||
return None
|
||||
|
||||
|
||||
def boundary_valid(boundary: Any, vertex_count: int) -> bool:
|
||||
if isinstance(boundary, int):
|
||||
return 0 <= boundary < vertex_count
|
||||
if isinstance(boundary, list):
|
||||
return all(boundary_valid(item, vertex_count) for item in boundary)
|
||||
return False
|
||||
|
||||
|
||||
def filter_boundaries(boundaries: list[Any], values: list[Any], vertex_count: int) -> tuple[list[Any], list[Any]]:
|
||||
filtered_boundaries: list[Any] = []
|
||||
filtered_values: list[Any] = []
|
||||
|
||||
for boundary, value in zip(boundaries, values):
|
||||
if isinstance(value, list):
|
||||
nested_boundaries, nested_values = filter_boundaries(boundary, value, vertex_count)
|
||||
if nested_boundaries:
|
||||
filtered_boundaries.append(nested_boundaries)
|
||||
filtered_values.append(nested_values)
|
||||
continue
|
||||
|
||||
if boundary_valid(boundary, vertex_count):
|
||||
filtered_boundaries.append(boundary)
|
||||
filtered_values.append(value)
|
||||
|
||||
return filtered_boundaries, filtered_values
|
||||
|
||||
|
||||
def filter_geometry(geometry: dict[str, Any], vertex_count: int) -> dict[str, Any] | None:
|
||||
semantics = geometry.get("semantics")
|
||||
boundaries = geometry.get("boundaries")
|
||||
values = semantics.get("values") if semantics else None
|
||||
if boundaries is None or values is None:
|
||||
return geometry
|
||||
|
||||
filtered_boundaries, filtered_values = filter_boundaries(boundaries, values, vertex_count)
|
||||
if not filtered_boundaries:
|
||||
return None
|
||||
|
||||
filtered_geometry = dict(geometry)
|
||||
filtered_geometry["boundaries"] = filtered_boundaries
|
||||
filtered_geometry["semantics"] = dict(semantics)
|
||||
filtered_geometry["semantics"]["values"] = filtered_values
|
||||
return filtered_geometry
|
||||
|
||||
|
||||
def filter_cityobject(cityobject: dict[str, Any], vertex_count: int) -> dict[str, Any] | None:
|
||||
geometries = cityobject.get("geometry") or []
|
||||
filtered_geometries = []
|
||||
for geometry in geometries:
|
||||
filtered_geometry = filter_geometry(geometry, vertex_count)
|
||||
if filtered_geometry:
|
||||
filtered_geometries.append(filtered_geometry)
|
||||
|
||||
if not filtered_geometries:
|
||||
return None
|
||||
|
||||
filtered_object = dict(cityobject)
|
||||
filtered_object["geometry"] = filtered_geometries
|
||||
return filtered_object
|
||||
|
||||
|
||||
def clean_cityjson(cityjson: dict[str, Any]) -> dict[str, Any]:
|
||||
vertices = cityjson.get("vertices") or []
|
||||
vertex_count = len(vertices)
|
||||
|
||||
cityobjects = cityjson.get("CityObjects") or {}
|
||||
cleaned_objects = {}
|
||||
for object_id, cityobject in cityobjects.items():
|
||||
cleaned_object = filter_cityobject(cityobject, vertex_count)
|
||||
if cleaned_object:
|
||||
cleaned_objects[object_id] = cleaned_object
|
||||
|
||||
cleaned_cityjson = {k: v for k, v in cityjson.items() if k != "CityObjects"}
|
||||
cleaned_cityjson["CityObjects"] = cleaned_objects
|
||||
return cleaned_cityjson
|
||||
|
||||
|
||||
def process_file(path: Path, output_dir: Path) -> int:
|
||||
resolved = resolve_input_file(path)
|
||||
if not resolved:
|
||||
print(f"[skip] cannot resolve CityJSON file for {path}", file=sys.stderr)
|
||||
return 0
|
||||
|
||||
cityjson = read_json(resolved)
|
||||
cleaned = clean_cityjson(cityjson)
|
||||
output_path = output_dir / f"{base_name(path)}.city.json"
|
||||
write_json(output_path, cleaned)
|
||||
return 1
|
||||
|
||||
|
||||
def main(argv: Iterable[str] | None = None) -> int:
|
||||
args = parse_args(argv)
|
||||
files = sorted(args.input_dir.glob(args.pattern))
|
||||
if not files:
|
||||
print(f"No input files matched pattern '{args.pattern}' in {args.input_dir}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
total = 0
|
||||
for path in files:
|
||||
total += process_file(path, args.output_dir)
|
||||
|
||||
print(f"Wrote {total} cleaned file(s) to {args.output_dir}")
|
||||
return 0 if total else 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user