Files
GeoData/geodata_pipeline/buildings.py

570 lines
22 KiB
Python

from __future__ import annotations
import json
import math
import os
import shlex
import struct
import subprocess
from typing import Dict, Iterable, List, Tuple
import numpy as np
from osgeo import gdal
from .citygml_utils import find_citygml_lod2
from .config import Config
from .gdal_utils import ensure_dir
CITYGML_TOOLS = os.environ.get("CITYGML_TOOLS", "tools/citygml-tools-2.4.0/citygml-tools")
# Tooling defaults; can be overridden via env vars if needed.
# Default cjio command assumes we are already running inside the project venv.
# If you invoke the exporter from outside the venv, set CJIO="uv run cjio".
CJIO_CMD = os.environ.get("CJIO", "cjio")
def _pad4(data: bytes, pad_byte: bytes = b"\x20") -> bytes:
pad_len = (4 - (len(data) % 4)) % 4
return data + pad_byte * pad_len
def _tile_suffix(tile_id: str) -> str:
parts = tile_id.split("_")
return "_".join(parts[-3:]) if len(parts) >= 3 else tile_id
def _load_cityjson(path: str) -> dict | None:
if not os.path.exists(path):
return None
with open(path, "r", encoding="utf-8") as handle:
return json.load(handle)
def _dedup_cityjson_ids(path: str) -> bool:
"""Rewrite CityJSON so CityObjects keys are unique (cjio rejects duplicates)."""
if os.path.isdir(path):
candidate = os.path.join(path, f"{os.path.basename(path)}.json")
if os.path.isfile(candidate):
path = candidate
else:
matches = [p for p in os.listdir(path) if p.endswith(".json")]
if matches:
path = os.path.join(path, matches[0])
else:
return False
try:
raw = json.load(open(path, "r", encoding="utf-8"), object_pairs_hook=lambda pairs: [(k, v) for k, v in pairs])
except (OSError, json.JSONDecodeError):
return False
first_names: dict[str, str] = {}
def convert(node, in_cityobjects: bool = False):
# object represented as list of pairs from object_pairs_hook
if isinstance(node, list) and node and all(isinstance(p, tuple) and len(p) == 2 for p in node):
if in_cityobjects:
counts: dict[str, int] = {}
obj: dict[str, object] = {}
for key, val in node:
new_val = convert(val, False)
idx = counts.get(key, 0)
new_key = key if idx == 0 else f"{key}_{idx}"
counts[key] = idx + 1
# avoid accidental collision if suffix already existed
while new_key in obj:
idx = counts[key]
new_key = f"{key}_{idx}"
counts[key] = idx + 1
first_names.setdefault(key, new_key)
obj[new_key] = new_val
return obj
obj: dict[str, object] = {}
for key, val in node:
obj[key] = convert(val, key == "CityObjects")
return obj
if isinstance(node, list):
return [convert(x, False) for x in node]
return node
data = convert(raw, False)
cityobjects = data.get("CityObjects", {})
if isinstance(cityobjects, dict):
for co in cityobjects.values():
if not isinstance(co, dict):
continue
for rel in ("children", "parents"):
if rel in co and isinstance(co[rel], list):
co[rel] = [first_names.get(cid, cid) for cid in co[rel] if isinstance(cid, str)]
try:
with open(path, "w", encoding="utf-8") as handle_out:
json.dump(data, handle_out, ensure_ascii=True, indent=2)
handle_out.write("\n")
return True
except OSError:
return False
def _flatten_faces(boundaries, values, surfaces, current_type: str | None, faces: List[Tuple[List[int], str]]) -> None:
"""Recursively flatten boundaries with semantics values into faces."""
if isinstance(values, list) and isinstance(boundaries, list):
for b, v in zip(boundaries, values):
_flatten_faces(b, v, surfaces, current_type, faces)
return
sem_type = current_type
if isinstance(values, int):
if surfaces and 0 <= values < len(surfaces):
sem_type = surfaces[values].get("type") or current_type
# boundaries at this point should be list of indices or list of rings
if not isinstance(boundaries, list):
return
# handle polygon ring lists
if boundaries and isinstance(boundaries[0], list):
# take outer ring only
ring = boundaries[0]
else:
ring = boundaries
if len(ring) < 3:
return
# simple fan triangulation
for i in range(1, len(ring) - 1):
faces.append(([ring[0], ring[i], ring[i + 1]], sem_type or "WallSurface"))
def _collect_faces(cityjson: dict) -> Tuple[List[List[float]], List[Tuple[List[int], str]]]:
vertices = cityjson.get("vertices") or []
faces: list[Tuple[List[int], str]] = []
cityobjects = cityjson.get("CityObjects") or {}
for obj in cityobjects.values():
geometries = obj.get("geometry") or []
for geom in geometries:
boundaries = geom.get("boundaries")
semantics = geom.get("semantics") or {}
surfaces = semantics.get("surfaces") or []
values = semantics.get("values")
if boundaries is None:
continue
_flatten_faces(boundaries, values, surfaces, None, faces)
return vertices, faces
def _split_roof_wall(faces: List[Tuple[List[int], str]]) -> Tuple[List[List[int]], List[List[int]]]:
roofs: list[List[int]] = []
walls: list[List[int]] = []
for face, sem_type in faces:
target = roofs if (sem_type and "roof" in sem_type.lower()) else walls
target.append(face)
return roofs, walls
def _compute_normals(vertices: np.ndarray, faces: np.ndarray) -> np.ndarray:
normals = np.zeros_like(vertices)
if faces.size == 0:
return normals
tri = vertices[faces]
v1 = tri[:, 1] - tri[:, 0]
v2 = tri[:, 2] - tri[:, 0]
face_normals = np.cross(v1, v2)
lens = np.linalg.norm(face_normals, axis=1)
lens[lens == 0] = 1.0
face_normals = face_normals / lens[:, None]
for i, fn in enumerate(face_normals):
for vi in faces[i]:
normals[vi] += fn
lens_v = np.linalg.norm(normals, axis=1)
lens_v[lens_v == 0] = 1.0
normals /= lens_v[:, None]
return normals
def _decimate(faces: np.ndarray, budget: int, seed: int = 42) -> np.ndarray:
if len(faces) <= budget:
return faces
rng = np.random.default_rng(seed)
idx = rng.choice(len(faces), size=budget, replace=False)
return faces[idx]
def _build_materials(embed_roof: bytes | None, unlit_roof: bool) -> Tuple[List[dict], List[str]]:
materials = []
extensions_used: list[str] = []
roof_mat = {
"name": "Roof",
"pbrMetallicRoughness": {"metallicFactor": 0.0, "roughnessFactor": 1.0},
}
if embed_roof is not None:
roof_mat["pbrMetallicRoughness"]["baseColorTexture"] = {"index": 0}
if unlit_roof:
roof_mat.setdefault("extensions", {})["KHR_materials_unlit"] = {}
extensions_used.append("KHR_materials_unlit")
materials.append(roof_mat)
wall_mat = {
"name": "Wall",
"pbrMetallicRoughness": {
"baseColorFactor": [0.75, 0.75, 0.75, 1.0],
"metallicFactor": 0.0,
"roughnessFactor": 1.0,
},
}
materials.append(wall_mat)
return materials, extensions_used
def _compose_glb(
roof_vertices: np.ndarray,
roof_normals: np.ndarray,
roof_uv: np.ndarray,
roof_faces: np.ndarray,
wall_vertices: np.ndarray,
wall_normals: np.ndarray,
wall_colors: np.ndarray,
wall_faces: np.ndarray,
embed_roof_tex: bytes | None,
unlit_roof: bool,
) -> bytes:
buffer_views = []
accessors = []
meshes = []
nodes = []
bin_data = bytearray()
def add_view(data: bytes) -> int:
offset = int(math.ceil(len(bin_data) / 4.0) * 4)
if offset > len(bin_data):
bin_data.extend(b"\x00" * (offset - len(bin_data)))
bin_data.extend(data)
buffer_views.append({"buffer": 0, "byteOffset": offset, "byteLength": len(data)})
return len(buffer_views) - 1
def add_accessor(view_idx: int, count: int, comp: int, type_str: str, min_val=None, max_val=None) -> int:
acc = {"bufferView": view_idx, "componentType": comp, "count": count, "type": type_str}
if min_val is not None:
acc["min"] = min_val
if max_val is not None:
acc["max"] = max_val
accessors.append(acc)
return len(accessors) - 1
materials, extensions_used = _build_materials(embed_roof_tex, unlit_roof)
def add_primitive(
positions: np.ndarray,
normals: np.ndarray,
faces: np.ndarray,
material_idx: int,
texcoord: np.ndarray | None = None,
colors: np.ndarray | None = None,
) -> dict:
prim: dict = {}
pos_view = add_view(positions.astype(np.float32).tobytes())
pos_acc = add_accessor(pos_view, len(positions), 5126, "VEC3", positions.min(axis=0).tolist(), positions.max(axis=0).tolist())
nor_view = add_view(normals.astype(np.float32).tobytes())
nor_acc = add_accessor(nor_view, len(normals), 5126, "VEC3")
idx_view = add_view(faces.astype(np.uint32).reshape(-1).tobytes())
idx_acc = add_accessor(idx_view, faces.size, 5125, "SCALAR")
attrs = {"POSITION": pos_acc, "NORMAL": nor_acc}
if texcoord is not None:
uv_view = add_view(texcoord.astype(np.float32).tobytes())
uv_acc = add_accessor(uv_view, len(texcoord), 5126, "VEC2")
attrs["TEXCOORD_0"] = uv_acc
if colors is not None:
col_view = add_view(colors.astype(np.float32).tobytes())
col_acc = add_accessor(col_view, len(colors), 5126, "VEC3")
attrs["COLOR_0"] = col_acc
prim["attributes"] = attrs
prim["indices"] = idx_acc
prim["material"] = material_idx
return prim
if roof_faces.size:
meshes.append({"primitives": [add_primitive(roof_vertices, roof_normals, roof_faces, 0, texcoord=roof_uv)]})
if wall_faces.size:
meshes.append({"primitives": [add_primitive(wall_vertices, wall_normals, wall_faces, 1, colors=wall_colors)]})
nodes = [{"mesh": i} for i in range(len(meshes))]
gltf = {
"asset": {"version": "2.0"},
"scene": 0,
"scenes": [{"nodes": list(range(len(nodes)))}],
"nodes": nodes,
"meshes": meshes,
"materials": materials,
}
# textures
if embed_roof_tex is not None:
img_view_idx = add_view(embed_roof_tex)
accessors # ensure referenced
gltf["textures"] = [{"source": 0}]
gltf["images"] = [{"bufferView": img_view_idx, "mimeType": "image/jpeg"}]
gltf["samplers"] = [{"magFilter": 9729, "minFilter": 9729, "wrapS": 10497, "wrapT": 10497}]
gltf["buffers"] = [{"byteLength": len(bin_data)}]
gltf["bufferViews"] = buffer_views
gltf["accessors"] = accessors
if extensions_used:
gltf["extensionsUsed"] = extensions_used
json_bytes = json.dumps(gltf, separators=(",", ":")).encode("utf-8")
json_padded = _pad4(json_bytes)
bin_padded = _pad4(bytes(bin_data))
total_len = 12 + 8 + len(json_padded) + 8 + len(bin_padded)
header = struct.pack("<4sII", b"glTF", 2, total_len)
json_header = struct.pack("<I4s", len(json_padded), b"JSON")
bin_header = struct.pack("<I4s", len(bin_padded), b"BIN\x00")
return b"".join([header, json_header, json_padded, bin_header, bin_padded])
def _load_ortho(tile_id: str, path: str) -> bytes | None:
if not os.path.exists(path):
print(f"[buildings] missing ortho for {tile_id}: {path}")
return None
with open(path, "rb") as handle:
return handle.read()
def _bounds_from_row(row: Dict[str, str]) -> Tuple[float, float, float, float]:
return float(row["xmin"]), float(row["ymin"]), float(row["xmax"]), float(row["ymax"])
def _run(cmd: list[str], desc: str) -> bool:
argv = []
for part in cmd:
# Allow space-separated env overrides (e.g., "uv run cjio")
if " " in part:
argv.extend(shlex.split(part))
else:
argv.append(part)
try:
res = subprocess.run(argv, check=False)
if res.returncode != 0:
print(f"[buildings] {desc} failed with code {res.returncode}: {' '.join(argv)}")
return False
return True
except FileNotFoundError:
print(f"[buildings] missing tool for {desc}: {' '.join(argv)}")
return False
def _rebase_cityjson(path: str, bounds: Tuple[float, float, float, float], out_path: str) -> bool:
data = _load_cityjson(path)
if not data:
return False
xmin, ymin, _, _ = bounds
transform = data.get("transform") or {}
scale = transform.get("scale") or [1.0, 1.0, 1.0]
translate = transform.get("translate") or [0.0, 0.0, 0.0]
verts = data.get("vertices") or []
rebased: list[list[float]] = []
for v in verts:
if len(v) >= 3:
x = v[0] * scale[0] + translate[0] - xmin
y = v[1] * scale[1] + translate[1] - ymin
z = v[2] * scale[2] + translate[2]
rebased.append([x, y, z])
data["vertices"] = rebased
if "transform" in data:
data.pop("transform", None)
# Update geographicalExtent if present
xs = [v[0] for v in rebased]
ys = [v[1] for v in rebased]
zs = [v[2] for v in rebased]
if xs and ys and zs:
extent = [min(xs), min(ys), min(zs), max(xs), max(ys), max(zs)]
data.setdefault("metadata", {})["geographicalExtent"] = extent
os.makedirs(os.path.dirname(out_path), exist_ok=True)
with open(out_path, "w", encoding="utf-8") as handle:
json.dump(data, handle, ensure_ascii=True, indent=2)
return True
def _ensure_cityjson_for_tile(tile_id: str, bounds: Tuple[float, float, float, float], cfg: Config) -> str | None:
"""Create CityJSON -> triangulated -> rebased file if missing. Returns rebased path."""
gml_path = find_citygml_lod2(tile_id, cfg)
if not gml_path:
print(f"[buildings] missing GML for {tile_id} in {cfg.raw.citygml_lod2_dir}")
return None
def resolve_cityjson(path: str) -> str | None:
if os.path.isfile(path):
return path
if os.path.isdir(path):
candidate = os.path.join(path, f"{os.path.basename(path)}.json")
if os.path.isfile(candidate):
return candidate
matches = [p for p in os.listdir(path) if p.endswith(".json")]
if matches:
return os.path.join(path, matches[0])
return None
base = os.path.splitext(os.path.basename(gml_path))[0]
cityjson_dir = cfg.buildings.work_cityjson_dir
tri_dir = os.path.join(cityjson_dir, "tri")
rebased_dir = cfg.buildings.work_rebased_dir
os.makedirs(cityjson_dir, exist_ok=True)
os.makedirs(tri_dir, exist_ok=True)
os.makedirs(rebased_dir, exist_ok=True)
cityjson_path = os.path.join(cityjson_dir, f"{base}.city.json")
tri_path = os.path.join(tri_dir, f"{base}.tri.city.json")
rebased_path = os.path.join(rebased_dir, f"{base}.tri.city.json")
if not os.path.exists(cityjson_path):
ok = _run([CITYGML_TOOLS, "to-cityjson", gml_path, "-o", cityjson_path], f"CityGML->CityJSON ({tile_id})")
if not ok:
return None
if not os.path.isfile(cityjson_path):
resolved = resolve_cityjson(cityjson_path)
if resolved:
cityjson_path = resolved
if not _dedup_cityjson_ids(cityjson_path):
print(f"[buildings] could not sanitize CityJSON IDs for {tile_id}: {cityjson_path}")
return None
if not os.path.exists(tri_path):
ok = _run([CJIO_CMD, cityjson_path, "upgrade", "triangulate", "vertices_clean", "save", tri_path], f"triangulate ({tile_id})")
if not ok:
return None
if not os.path.exists(rebased_path):
ok = _rebase_cityjson(tri_path, bounds, rebased_path)
if not ok:
return None
else:
existing = _load_cityjson(rebased_path) or {}
if existing.get("transform"):
ok = _rebase_cityjson(tri_path, bounds, rebased_path)
if not ok:
return None
return rebased_path
def export_buildings(cfg: Config) -> int:
ensure_dir(cfg.buildings.out_dir)
if not os.path.exists(cfg.export.manifest_path):
raise SystemExit(f"Tile index missing: {cfg.export.manifest_path}. Run heightmap export first.")
import csv
written = 0
with open(cfg.export.manifest_path, newline="", encoding="utf-8") as handle:
reader = csv.DictReader(handle)
for row in reader:
tile_id = row.get("tile_id")
if not tile_id:
continue
bounds = _bounds_from_row(row)
cityjson_path = _ensure_cityjson_for_tile(tile_id, bounds, cfg)
if not cityjson_path:
continue
data = _load_cityjson(cityjson_path)
if not data:
print(f"[buildings] could not read {cityjson_path}")
continue
verts, faces_all = _collect_faces(data)
if not verts or not faces_all:
print(f"[buildings] no geometry for {tile_id}")
continue
roof_faces_raw, wall_faces_raw = _split_roof_wall(faces_all)
vertices = np.array(verts, dtype=np.float32)
roof_faces = np.array(roof_faces_raw, dtype=np.uint32) if roof_faces_raw else np.zeros((0, 3), dtype=np.uint32)
wall_faces = np.array(wall_faces_raw, dtype=np.uint32) if wall_faces_raw else np.zeros((0, 3), dtype=np.uint32)
# Decimate if over budget
total = len(roof_faces) + len(wall_faces)
if total > 0:
max_budget = cfg.buildings.triangle_budget_max
min_budget = cfg.buildings.triangle_budget_min
target = min(total, max_budget)
if target < min_budget and total > min_budget:
target = min_budget
if total > target:
roof_share = len(roof_faces) / total if total else 0.0
roof_budget = max(0, int(target * roof_share))
wall_budget = max(0, target - roof_budget)
if roof_faces.size:
roof_faces = _decimate(roof_faces, roof_budget or len(roof_faces))
if wall_faces.size:
wall_faces = _decimate(wall_faces, wall_budget or len(wall_faces))
# Ground snap (simple: clamp below-ground vertices up to DTM)
try:
dgm_ds = gdal.Open(cfg.work.heightmap_vrt)
if dgm_ds:
gt = dgm_ds.GetGeoTransform()
band = dgm_ds.GetRasterBand(1)
arr = band.ReadAsArray()
nodata = band.GetNoDataValue()
for idx, (vx, vy, vz) in enumerate(vertices):
wx = vx + bounds[0]
wy = vy + bounds[1]
col = int((wx - gt[0]) / gt[1])
row = int((wy - gt[3]) / gt[5])
if 0 <= row < arr.shape[0] and 0 <= col < arr.shape[1]:
g = float(arr[row, col])
if nodata is not None and g == nodata:
continue
if vz < g:
vertices[idx, 2] = g
except Exception:
pass
xmin, ymin, xmax, ymax = bounds
w = xmax - xmin
h = ymax - ymin
if w == 0 or h == 0:
print(f"[buildings] invalid bounds for {tile_id}")
continue
# Convert to glTF-friendly axes: x=east, y=height (z), z=-north
source_xy = vertices[:, :2].copy()
gltf_vertices = np.zeros_like(vertices)
gltf_vertices[:, 0] = vertices[:, 0]
gltf_vertices[:, 1] = vertices[:, 2]
gltf_vertices[:, 2] = -vertices[:, 1]
roof_normals = _compute_normals(gltf_vertices, roof_faces) if roof_faces.size else np.zeros_like(gltf_vertices)
wall_normals = _compute_normals(gltf_vertices, wall_faces) if wall_faces.size else np.zeros_like(gltf_vertices)
uv = np.zeros((len(vertices), 2), dtype=np.float32)
uv[:, 0] = source_xy[:, 0] / w
uv[:, 1] = 1.0 - (source_xy[:, 1] / h)
# Wall colors sampled from ortho if available (fallback constant)
wall_color = np.zeros((len(vertices), 3), dtype=np.float32) + 0.75
ortho_path = os.path.join(cfg.export.ortho_dir, f"{tile_id}.jpg")
ortho_bytes = _load_ortho(tile_id, ortho_path)
if ortho_bytes:
try:
ortho_ds = gdal.Open(ortho_path)
if ortho_ds:
gt_o = ortho_ds.GetGeoTransform()
bands = [ortho_ds.GetRasterBand(i + 1).ReadAsArray() for i in range(min(3, ortho_ds.RasterCount))]
for idx, (vx, vy, _) in enumerate(vertices):
wx = vx + xmin
wy = vy + ymin
col = int((wx - gt_o[0]) / gt_o[1])
row = int((wy - gt_o[3]) / gt_o[5])
if 0 <= row < ortho_ds.RasterYSize and 0 <= col < ortho_ds.RasterXSize:
rgb = [bands[i][row, col] for i in range(len(bands))]
if rgb:
wall_color[idx] = np.array(rgb[:3], dtype=np.float32) / 255.0
except Exception:
pass
glb_bytes = _compose_glb(
gltf_vertices,
roof_normals,
uv,
roof_faces,
gltf_vertices,
wall_normals,
wall_color,
wall_faces,
ortho_bytes,
cfg.buildings.roof_unlit,
)
out_path = os.path.join(cfg.buildings.out_dir, f"{tile_id}.glb")
ensure_dir(cfg.buildings.out_dir)
with open(out_path, "wb") as handle_out:
handle_out.write(glb_bytes)
written += 1
print(f"[buildings] wrote {out_path}")
print(f"[buildings] Summary: wrote {written} tile GLB(s).")
return 0 if written else 1