From f9668f0549f2b2c3f09141c9fb73d8e668616451 Mon Sep 17 00:00:00 2001 From: "s0wlz (Matthias Puchstein)" Date: Tue, 3 Feb 2026 23:12:47 +0100 Subject: [PATCH] fix(buildings): Implement windowed reading for ground snapping Optimized memory usage by reading only the required pixel window from the heightmap VRT instead of loading the entire dataset into memory. This prevents crashes on large datasets while maintaining scientific accuracy for ground-snapping. --- geodata_pipeline/buildings.py | 23 +++++++++--- tests/test_buildings_memory.py | 67 ++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 5 deletions(-) create mode 100644 tests/test_buildings_memory.py diff --git a/geodata_pipeline/buildings.py b/geodata_pipeline/buildings.py index 3cbebcd..4bb3531 100644 --- a/geodata_pipeline/buildings.py +++ b/geodata_pipeline/buildings.py @@ -493,13 +493,26 @@ def export_buildings(cfg: Config) -> int: if dgm_ds: gt = dgm_ds.GetGeoTransform() band = dgm_ds.GetRasterBand(1) - arr = band.ReadAsArray() + # Calculate srcwin for the tile + xmin, ymin, xmax, ymax = bounds + xoff = int((xmin - gt[0]) / gt[1]) + yoff = int((ymax - gt[3]) / gt[5]) + xsize = int((xmax - xmin) / gt[1]) + 1 + ysize = int((ymax - ymin) / abs(gt[5])) + 1 + + # Clamp to raster dimensions + xoff_clamped = max(0, min(xoff, dgm_ds.RasterXSize - 1)) + yoff_clamped = max(0, min(yoff, dgm_ds.RasterYSize - 1)) + xsize_clamped = max(1, min(xsize, dgm_ds.RasterXSize - xoff_clamped)) + ysize_clamped = max(1, min(ysize, dgm_ds.RasterYSize - yoff_clamped)) + + arr = band.ReadAsArray(xoff_clamped, yoff_clamped, xsize_clamped, ysize_clamped) nodata = band.GetNoDataValue() for idx, (vx, vy, vz) in enumerate(vertices): - wx = vx + bounds[0] - wy = vy + bounds[1] - col = int((wx - gt[0]) / gt[1]) - row = int((wy - gt[3]) / gt[5]) + wx = vx + xmin + wy = vy + ymin + col = int((wx - gt[0]) / gt[1]) - xoff_clamped + row = int((wy - gt[3]) / gt[5]) - yoff_clamped if 0 <= row < arr.shape[0] and 0 <= col < arr.shape[1]: g = float(arr[row, col]) if nodata is not None and g == nodata: diff --git a/tests/test_buildings_memory.py b/tests/test_buildings_memory.py new file mode 100644 index 0000000..0e367dc --- /dev/null +++ b/tests/test_buildings_memory.py @@ -0,0 +1,67 @@ +import unittest +from unittest.mock import patch, MagicMock +import numpy as np +from geodata_pipeline.buildings import export_buildings +from geodata_pipeline.config import Config + +class TestBuildingsMemory(unittest.TestCase): + @patch("geodata_pipeline.buildings.gdal.Open") + @patch("geodata_pipeline.buildings._ensure_cityjson_for_tile") + @patch("geodata_pipeline.buildings._load_cityjson") + @patch("geodata_pipeline.buildings._collect_faces") + @patch("geodata_pipeline.buildings._load_ortho") + @patch("geodata_pipeline.buildings._compose_glb") + @patch("geodata_pipeline.buildings.ensure_dir") + @patch("os.path.exists") + @patch("builtins.open") + def test_ground_snapping_windowed_read(self, mock_open_file, mock_exists, mock_ensure_dir, mock_compose, mock_load_ortho, mock_collect, mock_load_cj, mock_ensure_cj, mock_gdal_open): + # Setup mocks + mock_exists.return_value = True + + # Mock file handle for manifest CSV + mock_handle = MagicMock() + mock_open_file.return_value.__enter__.return_value = mock_handle + mock_handle.__iter__.return_value = [ + "tile_id,xmin,ymin,xmax,ymax,global_min,global_max,out_res,tile_key,tile_min,tile_max\n", + "tile1,1000,1000,2000,2000,0,100,1025,1_1,0,100\n" + ] + + mock_ensure_cj.return_value = "dummy.json" + mock_load_cj.return_value = {"CityObjects": {}} + mock_collect.return_value = ( + [[10.0, 10.0, 5.0]], # vertices (local to tile) + [([0, 0, 0], "WallSurface")] # faces + ) + mock_load_ortho.return_value = None + + # Mock GDAL for heightmap + mock_ds = MagicMock() + mock_gdal_open.return_value = mock_ds + mock_ds.GetGeoTransform.return_value = (0, 1, 0, 3000, 0, -1) # Origin (0, 3000), 1m resolution + mock_ds.RasterXSize = 5000 + mock_ds.RasterYSize = 5000 + + mock_band = MagicMock() + mock_ds.GetRasterBand.return_value = mock_band + mock_band.GetNoDataValue.return_value = -9999 + mock_band.ReadAsArray.return_value = np.array([[50.0, 50.0], [50.0, 50.0]]) # Return 2x2 array + + cfg = Config.default() + cfg.work.heightmap_vrt = "dgm.vrt" + cfg.export.manifest_path = "manifest.csv" + + export_buildings(cfg) + + # Verify ReadAsArray was called with a window + args, kwargs = mock_band.ReadAsArray.call_args + # ReadAsArray(xoff, yoff, xsize, ysize) + self.assertEqual(len(args), 4) + xoff, yoff, xsize, ysize = args + # tile1 starts at 1000, 1000 (xmin, ymin) -> (xmin, ymax) = (1000, 2000) for GDAL + # xoff = (1000 - 0) / 1 = 1000 + # yoff = (2000 - 3000) / -1 = 1000 + self.assertEqual(xoff, 1000) + self.assertEqual(yoff, 1000) + +if __name__ == "__main__": + unittest.main() \ No newline at end of file