diff --git a/geodata_pipeline/swe_lods.py b/geodata_pipeline/swe_lods.py index 354449e..0ae4cd3 100644 --- a/geodata_pipeline/swe_lods.py +++ b/geodata_pipeline/swe_lods.py @@ -768,12 +768,29 @@ def _write_boundary_manifest( tile_x = int(row["tile_x"]) tile_y = int(row["tile_y"]) - source_ids = _sample_boundary_ids(source_ds, bounds, resolution) - sink_ids = _sample_boundary_ids(sink_ds, bounds, resolution) + source_arr = _warp_id_array(source_ds, bounds, resolution, resolution) if source_ds is not None else None + sink_arr = _warp_id_array(sink_ds, bounds, resolution, resolution) if sink_ds is not None else None + + source_ids = _ids_to_entries(source_arr) + sink_ids = _ids_to_entries(sink_arr) _accumulate_id_stats(source_stats, source_ids, lod) _accumulate_id_stats(sink_stats, sink_ids, lod) + source_id_path = "" + sink_id_path = "" + lod_dir = os.path.join(swe_cfg.out_dir, lod) + if source_arr is not None: + source_dir = os.path.join(lod_dir, "source_ids") + ensure_dir(source_dir) + source_id_path = os.path.join(source_dir, f"source_ids_{tile_x}_{tile_y}.exr") + _write_exr(source_id_path, source_arr.astype(np.float32, copy=False), swe_cfg.prefer_float16) + if sink_arr is not None: + sink_dir = os.path.join(lod_dir, "sink_ids") + ensure_dir(sink_dir) + sink_id_path = os.path.join(sink_dir, f"sink_ids_{tile_x}_{tile_y}.exr") + _write_exr(sink_id_path, sink_arr.astype(np.float32, copy=False), swe_cfg.prefer_float16) + tiles_payload.append( { "lod": lod, @@ -784,6 +801,8 @@ def _write_boundary_manifest( "bounds": [bounds[0], bounds[1], bounds[2], bounds[3]], "source_ids": source_ids, "sink_ids": sink_ids, + "source_id_path": source_id_path, + "sink_id_path": sink_id_path, } ) @@ -865,6 +884,12 @@ def _sample_boundary_ids(ds, bounds: tuple[float, float, float, float], resoluti if ds is None: return [] ids = _warp_id_array(ds, bounds, resolution, resolution) + return _ids_to_entries(ids) + + +def _ids_to_entries(ids: np.ndarray | None) -> list[dict]: + if ids is None: + return [] u, c = np.unique(ids, return_counts=True) out = [] for ident, count in zip(u.tolist(), c.tolist()):