Add monolithic archive build mode with clean+validate
This commit is contained in:
@@ -40,7 +40,7 @@ This repository converts DGM1 elevation tiles into Unity-ready 16-bit PNG height
|
||||
### How the export works
|
||||
- Heightmaps: the pipeline builds `work/dgm.vrt` from all `raw/dgm1/*.tif`, computes a global min/max once (legacy fallback), and warps each tile footprint to `heightmap.out_res` with `srcNodata=-9999`. Per-tile min/max are computed from the warped tile and used to scale PNGs to `[0, 65535]` by default (`heightmap.use_tile_minmax=false` keeps global scaling). `export_unity/tile_index.csv` records `global_min/global_max`, `tile_min/tile_max`, and `tile_key = f"{floor((xmin + overlap_x) / tile_size_x)}_{floor((ymin + overlap_y) / tile_size_y)}"` (defaults: `tile_size_x=1000.0`, `tile_size_y=1000.0`, `overlap_x=0.5`, `overlap_y=0.5` in `[tile_key]`).
|
||||
- Orthophotos: `work/dop.vrt` is built from `raw/dop20/jp2/*.jp2`; the manifest drives the cropping bounds. JPEG tiles are written to `export_unity/ortho_jpg/` with matching `.jgw` worldfiles. If the manifest is missing, the orthophoto export aborts—run the heightmap export first or use `--export all`.
|
||||
- Archives: `--build-from-archive` expands every `*.zip` under `archive/*` into the matching `raw/*` directories and copies `archive/dop20/filelist.txt` next to `raw/dop20/` for the downloader.
|
||||
- Archives: `--build-from-archive` supports a monolithic ZIP (`archive/archive_raw.zip`) and expands every `*.zip` under `archive/*` into the matching `raw/*` directories; dataset zips overlay the monolithic data. It also copies `archive/dop20/filelist.txt` next to `raw/dop20/` for the downloader.
|
||||
- Cleanup: temporary `_tmp.tif` and GDAL aux XML files under `work/` and `raw/dgm1/` are removed at the end of the heightmap export; avoid storing non-GDAL metadata in those folders.
|
||||
|
||||
### Key Commands
|
||||
@@ -48,7 +48,8 @@ This repository converts DGM1 elevation tiles into Unity-ready 16-bit PNG height
|
||||
- Run export pipeline: `uv run python geodata_to_unity.py --export all`
|
||||
- Inspect an output tile: `gdalinfo export_unity/height_png16/<tile>.png | head`
|
||||
- Override config paths: use `--config <path>`, `--raw-dgm1-path <dir>`, `--raw-dop20-path <dir>`.
|
||||
- Build raws from archives: `uv run python geodata_to_unity.py --build-from-archive --export all` (unzips `archive/*`; dop20 filelist stays in `archive/dop20/` for the downloader).
|
||||
- Build raws from archives: `uv run python geodata_to_unity.py --build-from-archive --export all` (uses `archive/archive_raw.zip` when present, then overlays `archive/*/*.zip`).
|
||||
- Deterministic submission rebuild: `uv run python geodata_to_unity.py --build-from-archive --clean-raw --validate --export all --force-vrt`.
|
||||
- Rebuild VRTs after moving data: add `--force-vrt`.
|
||||
|
||||
### Workflow Notes
|
||||
@@ -58,6 +59,8 @@ This repository converts DGM1 elevation tiles into Unity-ready 16-bit PNG height
|
||||
- Keep file names stable to avoid churn in Unity scenes; re-exports overwrite in place.
|
||||
- Large raw datasets are intentionally excluded from version control—document download sources or scripts instead of committing data.
|
||||
- Additional inputs: download helper lives in `scripts/dlscript_dop20.sh` and pulls JP2/J2W/XML orthophotos listed in `archive/dop20/filelist.txt` (one URL per line); `archive/` can hold zipped 3D building tiles for future use.
|
||||
- `--clean-raw` only removes managed ingestion dirs (`raw/dgm1`, `raw/dop20/jp2`, `raw/dop20/j2w`, `raw/dop20/meta`, `raw/citygml/lod1`, `raw/citygml/lod2`) and intentionally keeps custom masks.
|
||||
- `--validate` writes `work/archive_materialize_report.json` and fails only when core datasets are missing (`dgm1 tif`, `dop20 jp2`, `citygml lod2`); optional sidecar gaps are warnings.
|
||||
- Handoff to Unity: copy/sync `export_unity/height_png16/` and `export_unity/tile_index.csv` into `DTrierFlood/Assets/GeoData/` before running the Unity-side importer. Keep `heightmap.out_res` aligned with the importer’s expected resolution (currently 1025).
|
||||
|
||||
### Orthophotos (textures)
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import glob
|
||||
import os
|
||||
import shutil
|
||||
import zipfile
|
||||
from datetime import datetime, timezone
|
||||
from typing import Iterable
|
||||
|
||||
from .config import Config, ensure_default_config
|
||||
@@ -31,33 +33,404 @@ def _paths_from_config(cfg: Config) -> Iterable[str]:
|
||||
]
|
||||
|
||||
|
||||
def materialize_archives(cfg: Config) -> None:
|
||||
"""Best-effort expansion of archive zips into raw inputs."""
|
||||
def materialize_archives(
|
||||
cfg: Config,
|
||||
*,
|
||||
clean_raw: bool = False,
|
||||
validate: bool = False,
|
||||
archive_raw_zip: str | None = None,
|
||||
) -> int:
|
||||
"""Expand archive zips into raw inputs with optional cleanup and validation."""
|
||||
ensure_directories(cfg)
|
||||
_unpack_all(cfg.archives.dgm1_dir, cfg.raw.dgm1_dir)
|
||||
_unpack_all(cfg.archives.citygml_lod1_dir, cfg.raw.citygml_lod1_dir)
|
||||
_unpack_all(cfg.archives.citygml_lod2_dir, cfg.raw.citygml_lod2_dir)
|
||||
_unpack_all(cfg.archives.dop20_dir, cfg.raw.dop20_dir)
|
||||
|
||||
report = {
|
||||
"schema_version": 1,
|
||||
"timestamp_utc": datetime.now(timezone.utc).isoformat(),
|
||||
"mode": {
|
||||
"clean_raw": bool(clean_raw),
|
||||
"validate": bool(validate),
|
||||
},
|
||||
"paths": {
|
||||
"archive_raw_zip_default": os.path.join("archive", "archive_raw.zip"),
|
||||
"archive_raw_zip_requested": archive_raw_zip or "",
|
||||
"raw_dgm1_dir": cfg.raw.dgm1_dir,
|
||||
"raw_dop20_jp2_dir": cfg.raw.dop20_dir,
|
||||
"raw_citygml_lod1_dir": cfg.raw.citygml_lod1_dir,
|
||||
"raw_citygml_lod2_dir": cfg.raw.citygml_lod2_dir,
|
||||
},
|
||||
"clean_raw": {
|
||||
"removed_dirs": [],
|
||||
"errors": [],
|
||||
},
|
||||
"monolithic_zip": {
|
||||
"used": False,
|
||||
"path": "",
|
||||
"entries_total": 0,
|
||||
"entries_extracted": 0,
|
||||
"entries_skipped": 0,
|
||||
"files_overwritten": 0,
|
||||
"unsafe_entries": 0,
|
||||
"errors": [],
|
||||
},
|
||||
"per_dataset": {},
|
||||
"filelist_copy": {
|
||||
"source": os.path.join(cfg.archives.dop20_dir, "filelist.txt"),
|
||||
"destination": os.path.join(os.path.dirname(cfg.raw.dop20_dir), "filelist.txt"),
|
||||
"copied": False,
|
||||
"missing_source": False,
|
||||
"error": "",
|
||||
},
|
||||
"validation": {
|
||||
"enabled": bool(validate),
|
||||
"passed": True,
|
||||
"errors": [],
|
||||
"warnings": [],
|
||||
"counts": {},
|
||||
},
|
||||
}
|
||||
|
||||
if clean_raw:
|
||||
_clean_managed_raw_dirs(cfg, report)
|
||||
|
||||
monolithic_zip = archive_raw_zip or os.path.join("archive", "archive_raw.zip")
|
||||
report["monolithic_zip"]["path"] = monolithic_zip
|
||||
|
||||
if archive_raw_zip and not os.path.isfile(monolithic_zip):
|
||||
msg = f"[archive] Requested monolithic ZIP not found: {monolithic_zip}"
|
||||
print(msg)
|
||||
report["monolithic_zip"]["errors"].append(msg)
|
||||
_write_materialize_report(cfg, report)
|
||||
return 1
|
||||
|
||||
if os.path.isfile(monolithic_zip):
|
||||
report["monolithic_zip"]["used"] = True
|
||||
if not _extract_monolithic_zip(monolithic_zip, cfg, report):
|
||||
_write_materialize_report(cfg, report)
|
||||
return 1
|
||||
else:
|
||||
print(f"[archive] Monolithic ZIP not found (optional): {monolithic_zip}")
|
||||
|
||||
unpack_ok = True
|
||||
unpack_ok &= _unpack_all(
|
||||
cfg.archives.dgm1_dir,
|
||||
cfg.raw.dgm1_dir,
|
||||
report_node=report["per_dataset"].setdefault("dgm1", {}),
|
||||
)
|
||||
unpack_ok &= _unpack_all(
|
||||
cfg.archives.citygml_lod1_dir,
|
||||
cfg.raw.citygml_lod1_dir,
|
||||
report_node=report["per_dataset"].setdefault("citygml_lod1", {}),
|
||||
)
|
||||
unpack_ok &= _unpack_all(
|
||||
cfg.archives.citygml_lod2_dir,
|
||||
cfg.raw.citygml_lod2_dir,
|
||||
report_node=report["per_dataset"].setdefault("citygml_lod2", {}),
|
||||
)
|
||||
unpack_ok &= _unpack_all(
|
||||
cfg.archives.dop20_dir,
|
||||
cfg.raw.dop20_dir,
|
||||
report_node=report["per_dataset"].setdefault("dop20_jp2", {}),
|
||||
)
|
||||
if not unpack_ok:
|
||||
_write_materialize_report(cfg, report)
|
||||
return 1
|
||||
|
||||
_copy_filelist(
|
||||
os.path.join(cfg.archives.dop20_dir, "filelist.txt"),
|
||||
os.path.join(os.path.dirname(cfg.raw.dop20_dir), "filelist.txt"),
|
||||
report["filelist_copy"]["source"],
|
||||
report["filelist_copy"]["destination"],
|
||||
report["filelist_copy"],
|
||||
)
|
||||
|
||||
if validate:
|
||||
valid = _validate_materialized_raw(cfg, report)
|
||||
_write_materialize_report(cfg, report)
|
||||
if not valid:
|
||||
return 1
|
||||
else:
|
||||
_write_materialize_report(cfg, report)
|
||||
|
||||
def _unpack_all(archive_dir: str, dest_dir: str) -> None:
|
||||
print(
|
||||
"[archive] Materialization complete: "
|
||||
f"monolithic_used={report['monolithic_zip']['used']}, "
|
||||
f"report=work/archive_materialize_report.json"
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
def _clean_managed_raw_dirs(cfg: Config, report: dict) -> None:
|
||||
dop20_root = os.path.dirname(cfg.raw.dop20_dir)
|
||||
managed_dirs = [
|
||||
cfg.raw.dgm1_dir,
|
||||
cfg.raw.citygml_lod1_dir,
|
||||
cfg.raw.citygml_lod2_dir,
|
||||
cfg.raw.dop20_dir,
|
||||
os.path.join(dop20_root, "j2w"),
|
||||
os.path.join(dop20_root, "meta"),
|
||||
]
|
||||
|
||||
for path in managed_dirs:
|
||||
if os.path.isdir(path):
|
||||
try:
|
||||
shutil.rmtree(path)
|
||||
report["clean_raw"]["removed_dirs"].append(path)
|
||||
print(f"[archive] Removed raw dir: {path}")
|
||||
except OSError as exc:
|
||||
msg = f"[archive] Failed to remove {path}: {exc}"
|
||||
report["clean_raw"]["errors"].append(msg)
|
||||
print(msg)
|
||||
|
||||
os.makedirs(path, exist_ok=True)
|
||||
|
||||
|
||||
def _extract_monolithic_zip(zip_path: str, cfg: Config, report: dict) -> bool:
|
||||
targets = _monolithic_targets(cfg)
|
||||
node = report["monolithic_zip"]
|
||||
|
||||
try:
|
||||
with zipfile.ZipFile(zip_path, "r") as zf:
|
||||
entries = zf.infolist()
|
||||
node["entries_total"] = len(entries)
|
||||
for info in entries:
|
||||
if info.is_dir():
|
||||
continue
|
||||
parts = _safe_zip_parts(info.filename)
|
||||
if parts is None:
|
||||
node["unsafe_entries"] += 1
|
||||
node["entries_skipped"] += 1
|
||||
continue
|
||||
|
||||
resolved = _resolve_monolithic_member(parts, targets)
|
||||
if resolved is None:
|
||||
node["entries_skipped"] += 1
|
||||
continue
|
||||
|
||||
_, out_path = resolved
|
||||
if os.path.exists(out_path):
|
||||
node["files_overwritten"] += 1
|
||||
_extract_member(zf, info, out_path)
|
||||
node["entries_extracted"] += 1
|
||||
except (OSError, zipfile.BadZipFile, zipfile.LargeZipFile) as exc:
|
||||
msg = f"[archive] Could not unpack monolithic ZIP {zip_path}: {exc}"
|
||||
node["errors"].append(msg)
|
||||
print(msg)
|
||||
return False
|
||||
|
||||
print(
|
||||
f"[archive] Monolithic ZIP extracted: {zip_path} "
|
||||
f"(extracted={node['entries_extracted']}, skipped={node['entries_skipped']})"
|
||||
)
|
||||
return True
|
||||
|
||||
|
||||
def _monolithic_targets(cfg: Config) -> dict[str, dict]:
|
||||
dop20_root = os.path.dirname(cfg.raw.dop20_dir)
|
||||
return {
|
||||
"dgm1": {
|
||||
"dest": cfg.raw.dgm1_dir,
|
||||
"markers": [("raw", "dgm1"), ("dgm1",)],
|
||||
},
|
||||
"dop20_jp2": {
|
||||
"dest": cfg.raw.dop20_dir,
|
||||
"markers": [("raw", "dop20", "jp2"), ("dop20", "jp2")],
|
||||
},
|
||||
"dop20_j2w": {
|
||||
"dest": os.path.join(dop20_root, "j2w"),
|
||||
"markers": [("raw", "dop20", "j2w"), ("dop20", "j2w")],
|
||||
},
|
||||
"dop20_meta": {
|
||||
"dest": os.path.join(dop20_root, "meta"),
|
||||
"markers": [("raw", "dop20", "meta"), ("dop20", "meta")],
|
||||
},
|
||||
"citygml_lod1": {
|
||||
"dest": cfg.raw.citygml_lod1_dir,
|
||||
"markers": [("raw", "citygml", "lod1"), ("citygml", "lod1")],
|
||||
},
|
||||
"citygml_lod2": {
|
||||
"dest": cfg.raw.citygml_lod2_dir,
|
||||
"markers": [("raw", "citygml", "lod2"), ("citygml", "lod2")],
|
||||
},
|
||||
"dop20_filelist": {
|
||||
"dest": os.path.join(dop20_root, "filelist.txt"),
|
||||
"markers": [("raw", "dop20"), ("dop20",)],
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _resolve_monolithic_member(parts: list[str], targets: dict[str, dict]) -> tuple[str, str] | None:
|
||||
lower_parts = [p.lower() for p in parts]
|
||||
# Prefer more specific markers first.
|
||||
keys = (
|
||||
"dop20_jp2",
|
||||
"dop20_j2w",
|
||||
"dop20_meta",
|
||||
"citygml_lod1",
|
||||
"citygml_lod2",
|
||||
"dgm1",
|
||||
"dop20_filelist",
|
||||
)
|
||||
for key in keys:
|
||||
target = targets[key]
|
||||
for marker in target["markers"]:
|
||||
idx = _find_marker(lower_parts, marker)
|
||||
if idx is None:
|
||||
continue
|
||||
tail = parts[idx + len(marker) :]
|
||||
if key == "dop20_filelist":
|
||||
if len(tail) == 1 and tail[0].lower() == "filelist.txt":
|
||||
return key, target["dest"]
|
||||
continue
|
||||
if not tail:
|
||||
continue
|
||||
return key, os.path.join(target["dest"], *tail)
|
||||
return None
|
||||
|
||||
|
||||
def _find_marker(parts: list[str], marker: tuple[str, ...]) -> int | None:
|
||||
width = len(marker)
|
||||
if width == 0 or len(parts) < width:
|
||||
return None
|
||||
for idx in range(0, len(parts) - width + 1):
|
||||
if tuple(parts[idx : idx + width]) == marker:
|
||||
return idx
|
||||
return None
|
||||
|
||||
|
||||
def _safe_zip_parts(member_name: str) -> list[str] | None:
|
||||
# Normalize to POSIX separators to make archive parsing deterministic.
|
||||
normalized = member_name.replace("\\", "/")
|
||||
normalized = normalized.strip("/")
|
||||
if not normalized:
|
||||
return None
|
||||
|
||||
parts = []
|
||||
for part in normalized.split("/"):
|
||||
token = part.strip()
|
||||
if token in ("", "."):
|
||||
continue
|
||||
if token == "..":
|
||||
return None
|
||||
parts.append(token)
|
||||
if not parts:
|
||||
return None
|
||||
return parts
|
||||
|
||||
|
||||
def _extract_member(zf: zipfile.ZipFile, info: zipfile.ZipInfo, out_path: str) -> None:
|
||||
os.makedirs(os.path.dirname(out_path), exist_ok=True)
|
||||
with zf.open(info, "r") as src, open(out_path, "wb") as dst:
|
||||
shutil.copyfileobj(src, dst)
|
||||
|
||||
|
||||
def _unpack_all(archive_dir: str, dest_dir: str, *, report_node: dict) -> bool:
|
||||
os.makedirs(dest_dir, exist_ok=True)
|
||||
for zpath in glob.glob(os.path.join(archive_dir, "*.zip")):
|
||||
zips = sorted(glob.glob(os.path.join(archive_dir, "*.zip")))
|
||||
report_node["archive_dir"] = archive_dir
|
||||
report_node["dest_dir"] = dest_dir
|
||||
report_node["zip_count"] = len(zips)
|
||||
report_node["files_extracted"] = 0
|
||||
report_node["files_overwritten"] = 0
|
||||
report_node["unsafe_entries"] = 0
|
||||
report_node["errors"] = []
|
||||
|
||||
for zpath in zips:
|
||||
print(f"Unpacking {zpath} -> {dest_dir}")
|
||||
with zipfile.ZipFile(zpath, "r") as zf:
|
||||
zf.extractall(dest_dir)
|
||||
try:
|
||||
with zipfile.ZipFile(zpath, "r") as zf:
|
||||
for info in zf.infolist():
|
||||
if info.is_dir():
|
||||
continue
|
||||
parts = _safe_zip_parts(info.filename)
|
||||
if parts is None:
|
||||
report_node["unsafe_entries"] += 1
|
||||
continue
|
||||
out_path = os.path.join(dest_dir, *parts)
|
||||
if os.path.exists(out_path):
|
||||
report_node["files_overwritten"] += 1
|
||||
_extract_member(zf, info, out_path)
|
||||
report_node["files_extracted"] += 1
|
||||
except (OSError, zipfile.BadZipFile, zipfile.LargeZipFile) as exc:
|
||||
msg = f"[archive] Could not unpack {zpath}: {exc}"
|
||||
report_node["errors"].append(msg)
|
||||
print(msg)
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _copy_filelist(src: str, dest: str) -> None:
|
||||
def _copy_filelist(src: str, dest: str, report_node: dict) -> None:
|
||||
if not os.path.exists(src):
|
||||
report_node["missing_source"] = True
|
||||
print(f"[archive] Optional dop20 filelist missing: {src}")
|
||||
return
|
||||
os.makedirs(os.path.dirname(dest), exist_ok=True)
|
||||
shutil.copy2(src, dest)
|
||||
print(f"Copied filelist: {src} -> {dest}")
|
||||
try:
|
||||
shutil.copy2(src, dest)
|
||||
report_node["copied"] = True
|
||||
print(f"Copied filelist: {src} -> {dest}")
|
||||
except OSError as exc:
|
||||
report_node["error"] = str(exc)
|
||||
print(f"[archive] Failed to copy filelist {src} -> {dest}: {exc}")
|
||||
|
||||
|
||||
def _count_ext(root: str, suffixes: tuple[str, ...]) -> int:
|
||||
if not os.path.isdir(root):
|
||||
return 0
|
||||
total = 0
|
||||
for cur_root, _, files in os.walk(root):
|
||||
for name in files:
|
||||
if name.lower().endswith(suffixes):
|
||||
total += 1
|
||||
return total
|
||||
|
||||
|
||||
def _validate_materialized_raw(cfg: Config, report: dict) -> bool:
|
||||
node = report["validation"]
|
||||
dop20_root = os.path.dirname(cfg.raw.dop20_dir)
|
||||
|
||||
counts = {
|
||||
"dgm1_tif": _count_ext(cfg.raw.dgm1_dir, (".tif", ".tiff")),
|
||||
"dop20_jp2": _count_ext(cfg.raw.dop20_dir, (".jp2",)),
|
||||
"citygml_lod2": _count_ext(cfg.raw.citygml_lod2_dir, (".gml", ".xml")),
|
||||
"citygml_lod1": _count_ext(cfg.raw.citygml_lod1_dir, (".gml", ".xml")),
|
||||
"dop20_j2w": _count_ext(os.path.join(dop20_root, "j2w"), (".j2w", ".wld")),
|
||||
"dop20_meta": _count_ext(os.path.join(dop20_root, "meta"), (".xml",)),
|
||||
}
|
||||
node["counts"] = counts
|
||||
|
||||
if counts["dgm1_tif"] == 0:
|
||||
node["errors"].append(f"Missing required DGM1 TIFFs in {cfg.raw.dgm1_dir}")
|
||||
if counts["dop20_jp2"] == 0:
|
||||
node["errors"].append(f"Missing required DOP20 JP2s in {cfg.raw.dop20_dir}")
|
||||
if counts["citygml_lod2"] == 0:
|
||||
node["errors"].append(f"Missing required CityGML LoD2 files in {cfg.raw.citygml_lod2_dir}")
|
||||
|
||||
if counts["citygml_lod1"] == 0:
|
||||
node["warnings"].append(f"No CityGML LoD1 files found in {cfg.raw.citygml_lod1_dir}")
|
||||
if counts["dop20_j2w"] == 0:
|
||||
node["warnings"].append(f"No DOP20 worldfiles found in {os.path.join(dop20_root, 'j2w')}")
|
||||
if counts["dop20_meta"] == 0:
|
||||
node["warnings"].append(f"No DOP20 metadata XML files found in {os.path.join(dop20_root, 'meta')}")
|
||||
if not os.path.exists(os.path.join(dop20_root, "filelist.txt")):
|
||||
node["warnings"].append(f"No dop20 filelist found at {os.path.join(dop20_root, 'filelist.txt')}")
|
||||
|
||||
for msg in node["warnings"]:
|
||||
print(f"[archive][validate] Warning: {msg}")
|
||||
for msg in node["errors"]:
|
||||
print(f"[archive][validate] Error: {msg}")
|
||||
|
||||
node["passed"] = len(node["errors"]) == 0
|
||||
if node["passed"]:
|
||||
print("[archive][validate] Validation passed.")
|
||||
else:
|
||||
print("[archive][validate] Validation failed.")
|
||||
return bool(node["passed"])
|
||||
|
||||
|
||||
def _write_materialize_report(cfg: Config, report: dict) -> None:
|
||||
out_path = os.path.join(cfg.work.work_dir, "archive_materialize_report.json")
|
||||
os.makedirs(os.path.dirname(out_path), exist_ok=True)
|
||||
with open(out_path, "w", encoding="utf-8") as handle:
|
||||
json.dump(report, handle, indent=2)
|
||||
print(f"[archive] Wrote report: {out_path}")
|
||||
|
||||
|
||||
__all__ = ["ensure_directories", "materialize_archives", "ensure_default_config"]
|
||||
|
||||
@@ -51,7 +51,22 @@ def parse_args(argv: Iterable[str] | None = None) -> argparse.Namespace:
|
||||
parser.add_argument(
|
||||
"--build-from-archive",
|
||||
action="store_true",
|
||||
help="Populate raw inputs from archives (unzips zips, leaves dop20 filelist in archive).",
|
||||
help="Populate raw inputs from archives (supports monolithic archive/archive_raw.zip + per-dataset zips).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--clean-raw",
|
||||
action="store_true",
|
||||
help="With --build-from-archive: remove managed raw input dirs before unpacking archives.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--validate",
|
||||
action="store_true",
|
||||
help="With --build-from-archive: validate materialized raw inputs and fail on missing core datasets.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--archive-raw-zip",
|
||||
default=None,
|
||||
help="Optional path to a monolithic raw ZIP (default when omitted: archive/archive_raw.zip if present).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--setup",
|
||||
@@ -170,6 +185,10 @@ def _download_requests_lpolpg(download_config: str, requested: list[str] | None)
|
||||
def main(argv: Iterable[str] | None = None) -> int:
|
||||
args = parse_args(argv)
|
||||
cfg = load_config(args)
|
||||
|
||||
if (args.clean_raw or args.validate or args.archive_raw_zip) and not args.build_from_archive:
|
||||
print("[archive] --clean-raw/--validate/--archive-raw-zip are ignored without --build-from-archive.")
|
||||
|
||||
target_export = None
|
||||
action_flags = args.download or args.split_lpolpg or args.erode_rivers
|
||||
if args.export is not None:
|
||||
@@ -181,12 +200,26 @@ def main(argv: Iterable[str] | None = None) -> int:
|
||||
ensure_directories(cfg)
|
||||
print(f"Directories ensured. Config at {args.config}.")
|
||||
if args.build_from_archive:
|
||||
materialize_archives(cfg)
|
||||
archive_exit = materialize_archives(
|
||||
cfg,
|
||||
clean_raw=args.clean_raw,
|
||||
validate=args.validate,
|
||||
archive_raw_zip=args.archive_raw_zip,
|
||||
)
|
||||
if archive_exit != 0:
|
||||
return archive_exit
|
||||
if args.export is None and not args.download:
|
||||
return 0
|
||||
|
||||
if args.build_from_archive and not args.setup:
|
||||
materialize_archives(cfg)
|
||||
archive_exit = materialize_archives(
|
||||
cfg,
|
||||
clean_raw=args.clean_raw,
|
||||
validate=args.validate,
|
||||
archive_raw_zip=args.archive_raw_zip,
|
||||
)
|
||||
if archive_exit != 0:
|
||||
return archive_exit
|
||||
|
||||
if args.download:
|
||||
datasets = (
|
||||
|
||||
Reference in New Issue
Block a user