Files
GeoData/geodata_pipeline/gdal_utils.py

143 lines
3.9 KiB
Python

from __future__ import annotations
import glob
import os
import subprocess
import tempfile
import xml.etree.ElementTree as ET
from typing import Iterable, Sequence
from osgeo import gdal
gdal.UseExceptions()
def ensure_dir(path: str) -> None:
os.makedirs(path, exist_ok=True)
def ensure_parent(path: str) -> None:
parent = os.path.dirname(path)
if parent:
ensure_dir(parent)
def open_dataset(path: str, purpose: str):
try:
ds = gdal.Open(path)
except RuntimeError as exc:
raise SystemExit(f"{purpose}: {exc}") from exc
if ds is None:
raise SystemExit(f"{purpose}: GDAL returned None for {path}")
return ds
def _vrt_has_missing_sources(vrt_path: str) -> bool:
try:
tree = ET.parse(vrt_path)
except (ET.ParseError, OSError):
return True
vrt_dir = os.path.dirname(os.path.abspath(vrt_path))
missing = []
for source in tree.iterfind(".//SourceFilename"):
raw = (source.text or "").strip()
if not raw:
continue
candidates = []
if os.path.isabs(raw):
candidates.append(raw)
else:
candidates.extend(
[
os.path.abspath(raw),
os.path.abspath(os.path.join(vrt_dir, raw)),
]
)
if not any(os.path.exists(path) for path in candidates):
missing.append(raw)
return bool(missing)
def _vrt_has_sources(vrt_path: str) -> bool:
try:
tree = ET.parse(vrt_path)
except (ET.ParseError, OSError):
return False
return any(True for _ in tree.iterfind(".//SourceFilename"))
def _build_vrt_cli(vrt_path: str, sources: Sequence[str]) -> None:
ensure_parent(vrt_path)
if os.path.exists(vrt_path):
safe_remove(vrt_path)
with tempfile.NamedTemporaryFile("w", delete=False, encoding="utf-8") as handle:
list_path = handle.name
for src in sources:
handle.write(f"{src}\n")
try:
subprocess.run(
["gdalbuildvrt", "-input_file_list", list_path, vrt_path],
check=True,
)
except (OSError, subprocess.CalledProcessError) as exc:
raise SystemExit(f"Could not build {vrt_path} using gdalbuildvrt: {exc}") from exc
finally:
safe_remove(list_path)
def build_vrt(vrt_path: str, sources: Sequence[str], force: bool = False) -> bool:
rebuild = force
if os.path.exists(vrt_path) and not rebuild:
if _vrt_has_missing_sources(vrt_path):
print(f"{vrt_path} references missing sources; rebuilding...")
rebuild = True
else:
return False
if rebuild and os.path.exists(vrt_path):
try:
os.remove(vrt_path)
except OSError as exc:
raise SystemExit(f"Could not remove existing VRT {vrt_path}: {exc}") from exc
if not sources:
raise SystemExit(f"No sources available to build VRT {vrt_path}.")
ensure_parent(vrt_path)
print(f"Building {vrt_path} from {len(sources)} files...")
try:
ds = gdal.BuildVRT(vrt_path, list(sources))
if ds is not None:
ds.FlushCache()
ds = None
except RuntimeError as exc:
print(f"Warning: GDAL BuildVRT failed for {vrt_path}: {exc}")
if not _vrt_has_sources(vrt_path):
print(f"Warning: {vrt_path} missing sources after BuildVRT; retrying with gdalbuildvrt...")
_build_vrt_cli(vrt_path, sources)
return True
def safe_remove(path: str) -> bool:
try:
os.remove(path)
return True
except FileNotFoundError:
return False
except OSError as exc:
print(f"Warning: could not remove {path}: {exc}")
return False
def cleanup_aux_files(patterns: Iterable[str]) -> int:
removed = 0
for pattern in patterns:
for match in glob.glob(pattern):
if safe_remove(match):
removed += 1
return removed