Extend lpolpg split workflow

Changelog:

- add --split-lpolpg-delete-source to remove source files after split

- auto-split after downloads when lpolpg is enabled in config
This commit is contained in:
2026-01-22 00:59:37 +01:00
parent 8ef494f446
commit a8f954805e
2 changed files with 42 additions and 0 deletions

View File

@@ -43,6 +43,7 @@ def split_lpolpg(
formats: Iterable[str] = ("laz", "xyz"),
ground_classes: Iterable[int] = (2,),
overwrite: bool = False,
delete_source: bool = False,
) -> int:
try:
import laspy
@@ -139,5 +140,11 @@ def split_lpolpg(
f"[lpolpg] {name}: ground={stats.ground_points}, "
f"objects={stats.object_points}, total={stats.total}"
)
if delete_source:
try:
os.remove(in_path)
print(f"[lpolpg] removed source {name}")
except OSError as exc:
print(f"[lpolpg] warn: could not remove {name}: {exc}")
return 0

View File

@@ -7,6 +7,11 @@ import os
import sys
from typing import Iterable
try:
import tomllib
except ImportError: # pragma: no cover - tomllib is required
tomllib = None
from geodata_download import run_download
from geodata_pipeline.config import Config, DEFAULT_CONFIG_PATH, ensure_default_config
@@ -113,6 +118,11 @@ def parse_args(argv: Iterable[str] | None = None) -> argparse.Namespace:
action="store_true",
help="Overwrite existing LPG/LPO outputs when splitting lpolpg.",
)
parser.add_argument(
"--split-lpolpg-delete-source",
action="store_true",
help="Delete lpolpg source files after a successful split.",
)
return parser.parse_args(argv)
@@ -129,6 +139,27 @@ def load_config(args: argparse.Namespace) -> Config:
return cfg.with_overrides(raw_dgm1_path=args.raw_dgm1_path, raw_dop20_path=args.raw_dop20_path)
def _download_requests_lpolpg(download_config: str, requested: list[str] | None) -> bool:
lpolpg_keys = {"lpolpg", "lpg", "lpo"}
if requested:
return any(name in lpolpg_keys for name in requested)
if tomllib is None:
return False
try:
with open(download_config, "rb") as fh:
cfg = tomllib.load(fh)
except OSError:
return False
datasets = cfg.get("datasets", {})
if not isinstance(datasets, dict):
return False
for key in lpolpg_keys:
dataset_cfg = datasets.get(key)
if isinstance(dataset_cfg, dict) and dataset_cfg.get("enabled", True):
return True
return False
def main(argv: Iterable[str] | None = None) -> int:
args = parse_args(argv)
cfg = load_config(args)
@@ -167,6 +198,9 @@ def main(argv: Iterable[str] | None = None) -> int:
)
if download_exit != 0:
return download_exit
if not args.split_lpolpg and _download_requests_lpolpg(args.download_config, datasets):
print("[download] lpolpg detected; splitting into lpg/lpo.")
args.split_lpolpg = True
if args.split_lpolpg:
formats = [fmt.strip() for fmt in args.split_lpolpg_formats.split(",") if fmt.strip()]
ground = [int(val) for val in args.split_lpolpg_ground_classes.split(",") if val.strip()]
@@ -175,6 +209,7 @@ def main(argv: Iterable[str] | None = None) -> int:
formats=formats,
ground_classes=ground,
overwrite=args.split_lpolpg_overwrite,
delete_source=args.split_lpolpg_delete_source,
)
if split_exit != 0:
return split_exit