fix(buildings): Implement windowed reading for ground snapping

Optimized memory usage by reading only the required pixel window from the heightmap VRT instead of loading the entire dataset into memory. This prevents crashes on large datasets while maintaining scientific accuracy for ground-snapping.
This commit is contained in:
2026-02-03 23:12:47 +01:00
parent 3d05459a2d
commit f9668f0549
2 changed files with 85 additions and 5 deletions

View File

@@ -493,13 +493,26 @@ def export_buildings(cfg: Config) -> int:
if dgm_ds:
gt = dgm_ds.GetGeoTransform()
band = dgm_ds.GetRasterBand(1)
arr = band.ReadAsArray()
# Calculate srcwin for the tile
xmin, ymin, xmax, ymax = bounds
xoff = int((xmin - gt[0]) / gt[1])
yoff = int((ymax - gt[3]) / gt[5])
xsize = int((xmax - xmin) / gt[1]) + 1
ysize = int((ymax - ymin) / abs(gt[5])) + 1
# Clamp to raster dimensions
xoff_clamped = max(0, min(xoff, dgm_ds.RasterXSize - 1))
yoff_clamped = max(0, min(yoff, dgm_ds.RasterYSize - 1))
xsize_clamped = max(1, min(xsize, dgm_ds.RasterXSize - xoff_clamped))
ysize_clamped = max(1, min(ysize, dgm_ds.RasterYSize - yoff_clamped))
arr = band.ReadAsArray(xoff_clamped, yoff_clamped, xsize_clamped, ysize_clamped)
nodata = band.GetNoDataValue()
for idx, (vx, vy, vz) in enumerate(vertices):
wx = vx + bounds[0]
wy = vy + bounds[1]
col = int((wx - gt[0]) / gt[1])
row = int((wy - gt[3]) / gt[5])
wx = vx + xmin
wy = vy + ymin
col = int((wx - gt[0]) / gt[1]) - xoff_clamped
row = int((wy - gt[3]) / gt[5]) - yoff_clamped
if 0 <= row < arr.shape[0] and 0 <= col < arr.shape[1]:
g = float(arr[row, col])
if nodata is not None and g == nodata:

View File

@@ -0,0 +1,67 @@
import unittest
from unittest.mock import patch, MagicMock
import numpy as np
from geodata_pipeline.buildings import export_buildings
from geodata_pipeline.config import Config
class TestBuildingsMemory(unittest.TestCase):
@patch("geodata_pipeline.buildings.gdal.Open")
@patch("geodata_pipeline.buildings._ensure_cityjson_for_tile")
@patch("geodata_pipeline.buildings._load_cityjson")
@patch("geodata_pipeline.buildings._collect_faces")
@patch("geodata_pipeline.buildings._load_ortho")
@patch("geodata_pipeline.buildings._compose_glb")
@patch("geodata_pipeline.buildings.ensure_dir")
@patch("os.path.exists")
@patch("builtins.open")
def test_ground_snapping_windowed_read(self, mock_open_file, mock_exists, mock_ensure_dir, mock_compose, mock_load_ortho, mock_collect, mock_load_cj, mock_ensure_cj, mock_gdal_open):
# Setup mocks
mock_exists.return_value = True
# Mock file handle for manifest CSV
mock_handle = MagicMock()
mock_open_file.return_value.__enter__.return_value = mock_handle
mock_handle.__iter__.return_value = [
"tile_id,xmin,ymin,xmax,ymax,global_min,global_max,out_res,tile_key,tile_min,tile_max\n",
"tile1,1000,1000,2000,2000,0,100,1025,1_1,0,100\n"
]
mock_ensure_cj.return_value = "dummy.json"
mock_load_cj.return_value = {"CityObjects": {}}
mock_collect.return_value = (
[[10.0, 10.0, 5.0]], # vertices (local to tile)
[([0, 0, 0], "WallSurface")] # faces
)
mock_load_ortho.return_value = None
# Mock GDAL for heightmap
mock_ds = MagicMock()
mock_gdal_open.return_value = mock_ds
mock_ds.GetGeoTransform.return_value = (0, 1, 0, 3000, 0, -1) # Origin (0, 3000), 1m resolution
mock_ds.RasterXSize = 5000
mock_ds.RasterYSize = 5000
mock_band = MagicMock()
mock_ds.GetRasterBand.return_value = mock_band
mock_band.GetNoDataValue.return_value = -9999
mock_band.ReadAsArray.return_value = np.array([[50.0, 50.0], [50.0, 50.0]]) # Return 2x2 array
cfg = Config.default()
cfg.work.heightmap_vrt = "dgm.vrt"
cfg.export.manifest_path = "manifest.csv"
export_buildings(cfg)
# Verify ReadAsArray was called with a window
args, kwargs = mock_band.ReadAsArray.call_args
# ReadAsArray(xoff, yoff, xsize, ysize)
self.assertEqual(len(args), 4)
xoff, yoff, xsize, ysize = args
# tile1 starts at 1000, 1000 (xmin, ymin) -> (xmin, ymax) = (1000, 2000) for GDAL
# xoff = (1000 - 0) / 1 = 1000
# yoff = (2000 - 3000) / -1 = 1000
self.assertEqual(xoff, 1000)
self.assertEqual(yoff, 1000)
if __name__ == "__main__":
unittest.main()