GeoData/geodata_pipeline/heightmaps_enhanced.py

"""Enhanced heightmap export with LPG ground point validation.

Extends the standard heightmap export by:
1. Validating DGM1 raster against LPG ground points
2. Flagging areas where DGM1 and LPG diverge significantly
3. Optionally exporting quality metrics
"""

from __future__ import annotations

import csv
import os
from typing import Optional, Tuple

import numpy as np
from osgeo import gdal

from .config import Config
from .gdal_utils import build_vrt, open_dataset
from .heightmaps import export_heightmaps  # Reuse existing export
from .pointcloud import find_pointcloud_file, read_pointcloud_file


def validate_tile_with_lpg(
    tile_id: str,
    dgm1_path: str,
    xmin: float,
    ymin: float,
    xmax: float,
    ymax: float,
    cfg: Config,
) -> Optional[dict]:
    """Validate DGM1 tile against LPG ground points.

    Args:
        tile_id: Tile identifier
        dgm1_path: Path to DGM1 raster
        xmin, ymin, xmax, ymax: Tile bounds
        cfg: Configuration

    Returns:
        Dict with validation metrics, or None if no LPG data
    """
    pc_cfg = cfg.pointcloud
    eh_cfg = cfg.heightmap_enhanced

    # Check for LPG data
    lpg_file = find_pointcloud_file(pc_cfg.lpg_dir, tile_id)
    if not lpg_file:
        return None

    # Load DGM1 raster
    dgm1_ds = open_dataset(dgm1_path, required=False)
    if dgm1_ds is None:
        return None

    dgm1 = dgm1_ds.GetRasterBand(1).ReadAsArray().astype(np.float32)
    gt = dgm1_ds.GetGeoTransform()
    nodata = dgm1_ds.GetRasterBand(1).GetNoDataValue() or -9999

    # Load LPG points for tile
    lpg = read_pointcloud_file(lpg_file, bounds=(xmin, ymin, xmax, ymax))
    if len(lpg) == 0:
        return {"point_count": 0, "valid": True}

    # Sample DGM1 at LPG point locations
    differences = []
    for x, y, z in zip(lpg.x, lpg.y, lpg.z):
        # Convert world coords to pixel coords
        col = int((x - gt[0]) / gt[1])
        row = int((y - gt[3]) / gt[5])

        if 0 <= row < dgm1.shape[0] and 0 <= col < dgm1.shape[1]:
            dgm_val = dgm1[row, col]
            if dgm_val > nodata + 1:
                diff = z - dgm_val
                differences.append(diff)

    if not differences:
        return {"point_count": len(lpg), "sampled": 0, "valid": True}

    diffs = np.array(differences)

    # Compute validation metrics
    metrics = {
        "point_count": len(lpg),
        "sampled": len(diffs),
        "mean_diff": float(np.mean(diffs)),
        "std_diff": float(np.std(diffs)),
        "median_diff": float(np.median(diffs)),
        "min_diff": float(np.min(diffs)),
        "max_diff": float(np.max(diffs)),
        "rmse": float(np.sqrt(np.mean(diffs ** 2))),
        "outlier_count": int(np.sum(np.abs(diffs) > eh_cfg.lpg_validation_threshold_m)),
        "outlier_pct": float(np.mean(np.abs(diffs) > eh_cfg.lpg_validation_threshold_m) * 100),
        "valid": bool(np.abs(np.median(diffs)) < eh_cfg.lpg_validation_threshold_m),
    }

    return metrics


def export_quality_map(
    tile_id: str,
    dgm1_path: str,
    xmin: float,
    ymin: float,
    xmax: float,
    ymax: float,
    cfg: Config,
) -> Optional[str]:
    """Export quality/difference map comparing DGM1 to LPG.

    Args:
        tile_id: Tile identifier
        dgm1_path: Path to DGM1 raster
        xmin, ymin, xmax, ymax: Tile bounds
        cfg: Configuration

    Returns:
        Path to quality map PNG, or None if no LPG data
    """
    pc_cfg = cfg.pointcloud
    eh_cfg = cfg.heightmap_enhanced

    lpg_file = find_pointcloud_file(pc_cfg.lpg_dir, tile_id)
    if not lpg_file:
        return None

    dgm1_ds = open_dataset(dgm1_path, required=False)
    if dgm1_ds is None:
        return None

    dgm1 = dgm1_ds.GetRasterBand(1).ReadAsArray().astype(np.float32)
    gt = dgm1_ds.GetGeoTransform()
    nodata = dgm1_ds.GetRasterBand(1).GetNoDataValue() or -9999

    # Load LPG points
    lpg = read_pointcloud_file(lpg_file, bounds=(xmin, ymin, xmax, ymax))
    if len(lpg) == 0:
        return None

    # Create difference accumulator
    diff_sum = np.zeros_like(dgm1)
    diff_count = np.zeros_like(dgm1, dtype=np.int32)

    for x, y, z in zip(lpg.x, lpg.y, lpg.z):
        col = int((x - gt[0]) / gt[1])
        row = int((y - gt[3]) / gt[5])

        if 0 <= row < dgm1.shape[0] and 0 <= col < dgm1.shape[1]:
            dgm_val = dgm1[row, col]
            if dgm_val > nodata + 1:
                diff_sum[row, col] += (z - dgm_val)
                diff_count[row, col] += 1

    # Compute mean difference per cell
    with np.errstate(divide='ignore', invalid='ignore'):
        diff_mean = np.where(diff_count > 0, diff_sum / diff_count, 0)

    # Scale to 8-bit for visualization
    # Map [-threshold, +threshold] to [0, 255]
    threshold = eh_cfg.lpg_validation_threshold_m * 2
    quality = (diff_mean + threshold) / (2 * threshold) * 255
    quality = np.clip(quality, 0, 255).astype(np.uint8)

    # Export as PNG
    os.makedirs(eh_cfg.quality_map_dir, exist_ok=True)
    out_path = os.path.join(eh_cfg.quality_map_dir, f"{tile_id}_quality.png")

    driver = gdal.GetDriverByName("PNG")
    out_ds = driver.Create(out_path, dgm1.shape[1], dgm1.shape[0], 1, gdal.GDT_Byte)
    out_ds.SetGeoTransform(gt)
    out_ds.GetRasterBand(1).WriteArray(quality)
    out_ds = None

    return out_path


def export_heightmaps_enhanced(cfg: Config) -> int:
    """Export heightmaps with LPG validation.

    This wraps the standard heightmap export and adds:
    1. LPG validation metrics per tile
    2. Optional quality map export

    Args:
        cfg: Configuration

    Returns:
        Exit code (0 = success)
    """
    pc_cfg = cfg.pointcloud
    eh_cfg = cfg.heightmap_enhanced

    # First, run standard heightmap export
    result = export_heightmaps(cfg)
    if result != 0:
        return result

    # Now validate with LPG
    if not pc_cfg.use_lpg_validation:
        print("[heightmaps_enhanced] LPG validation disabled, skipping.")
        return 0

    # Load tile manifest
    if not os.path.exists(cfg.export.manifest_path):
        print(f"[heightmaps_enhanced] Manifest not found: {cfg.export.manifest_path}")
        return 1

    tiles = []
    with open(cfg.export.manifest_path, "r") as f:
        reader = csv.DictReader(f)
        for row in reader:
            tiles.append({
                "tile_id": row["tile_id"],
                "xmin": float(row["xmin"]),
                "ymin": float(row["ymin"]),
                "xmax": float(row["xmax"]),
                "ymax": float(row["ymax"]),
            })

    validation_results = []
    quality_maps = []

    for tile in tiles:
        tile_id = tile["tile_id"]

        # Find DGM1 file
        import glob
        dgm1_pattern = os.path.join(cfg.raw.dgm1_dir, f"*{tile_id}*.tif")
        dgm1_files = glob.glob(dgm1_pattern)

        if not dgm1_files:
            print(f"[heightmaps_enhanced] No DGM1 for {tile_id}, skipping validation")
            continue

        dgm1_path = dgm1_files[0]

        # Validate against LPG
        metrics = validate_tile_with_lpg(
            tile_id,
            dgm1_path,
            tile["xmin"],
            tile["ymin"],
            tile["xmax"],
            tile["ymax"],
            cfg,
        )

        if metrics:
            metrics["tile_id"] = tile_id
            validation_results.append(metrics)

            status = "OK" if metrics.get("valid", True) else "WARN"
            rmse = metrics.get("rmse", 0)
            outlier_pct = metrics.get("outlier_pct", 0)
            print(f"[heightmaps_enhanced] {tile_id}: {status} (RMSE={rmse:.3f}m, outliers={outlier_pct:.1f}%)")

            # Export quality map if enabled
            if eh_cfg.export_quality_map:
                qm_path = export_quality_map(
                    tile_id,
                    dgm1_path,
                    tile["xmin"],
                    tile["ymin"],
                    tile["xmax"],
                    tile["ymax"],
                    cfg,
                )
                if qm_path:
                    quality_maps.append(qm_path)
        else:
            print(f"[heightmaps_enhanced] {tile_id}: No LPG data")

    # Export validation summary
    if validation_results:
        summary_path = os.path.join(eh_cfg.quality_map_dir, "validation_summary.csv")
        os.makedirs(eh_cfg.quality_map_dir, exist_ok=True)

        fieldnames = ["tile_id", "point_count", "sampled", "mean_diff", "std_diff",
                      "median_diff", "min_diff", "max_diff", "rmse", "outlier_count",
                      "outlier_pct", "valid"]

        with open(summary_path, "w", newline="") as f:
            writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction="ignore")
            writer.writeheader()
            writer.writerows(validation_results)

        print(f"[heightmaps_enhanced] Validation summary: {summary_path}")

    valid_count = sum(1 for r in validation_results if r.get("valid", True))
    total_count = len(validation_results)
    print(f"[heightmaps_enhanced] DONE. {valid_count}/{total_count} tiles passed validation.")

    if quality_maps:
        print(f"[heightmaps_enhanced] Quality maps exported: {len(quality_maps)}")

    return 0