diff --git a/AGENTS.md b/AGENTS.md index 0b5cb50..5d8436b 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -19,6 +19,7 @@ - Rebuild VRTs after moving data: add `--force-vrt`. - Expected warning: `Computed -srcwin ... falls partially outside source raster extent` means the DOP coverage is slightly smaller than the tile footprint; edge pixels will be filled with NoData/zeros. Add adjacent JP2s or shrink the requested window if you need to silence it. - Scripts accept CLI overrides (e.g., `--config`, `--raw-dgm1-path`, `--raw-dop20-path`, `--export`, `--build-from-archive`); run `uv run python geodata_to_unity.py -h` to see options. +- DOP20 downloader assumes Linux/OpenSSL with system CA at `/etc/ssl/certs/ca-certificates.crt` to build a trust chain from the geobasis site. macOS/Windows users should either set `CURL_CA_BUNDLE` to a combined CA or download manually and place files in `raw/dop20/`. ## Coding Style & Naming Conventions - Python scripts use 4-space indentation, early-exit error handling, and `SystemExit` for fatal issues; follow PEP 8 where practical. diff --git a/README.md b/README.md index 72a173e..e5ed9d7 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,7 @@ This repository converts DGM1 elevation tiles into Unity-ready 16-bit PNG height ``` This builds `work/dop.vrt` if missing and writes `export_unity/ortho_jpg/.jpg` + `.jgw` aligned to `tile_index.csv`. - If you see `Computed -srcwin ... falls partially outside source raster extent` warnings, the DOP coverage is slightly smaller than the tile footprint; edge pixels will be filled with NoData/zeros. Add adjacent JP2s or shrink the requested window if you need to avoid the warning. + - The download script relies on a Linux/OpenSSL toolchain with system CA bundle at `/etc/ssl/certs/ca-certificates.crt`; it builds a trust chain by fetching the geobasis intermediate. macOS/Windows users should either provide a combined CA via `CURL_CA_BUNDLE` or download with a browser/wget and place files manually. ### Buildings The building export pipeline is temporarily disabled while we choose a mesh conversion approach (GDAL lacks a native OBJ writer). CityGML LoD2 sources remain in `raw/citygml/lod2/` locally (ignored in git); consider CityGML→glTF/OBJ tools (e.g., citygml-tools + cityjson2gltf) for future integration. diff --git a/scripts/dlscript_dop20.sh b/scripts/dlscript_dop20.sh index b019d09..387181f 100644 --- a/scripts/dlscript_dop20.sh +++ b/scripts/dlscript_dop20.sh @@ -1,42 +1,82 @@ #!/usr/bin/env bash -# Download DOP20 assets (JP2/J2W/XML) listed line-by-line in archive/dop20/filelist.txt. set -euo pipefail +# Download DOP20 assets (JP2/J2W/XML) listed line-by-line in archive/dop20/filelist.txt. + ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")"/.. && pwd)" +LIST_FILE="${1:-$ROOT/archive/dop20/filelist.txt}" + DOP_ROOT="$ROOT/raw/dop20" -LIST="${LIST:-$ROOT/archive/dop20/filelist.txt}" OUT_JP2="$DOP_ROOT/jp2" OUT_J2W="$DOP_ROOT/j2w" -OUT_META="$DOP_ROOT/meta" +OUT_XML="$DOP_ROOT/meta" +CERT_DIR="$DOP_ROOT/certs" -if [[ ! -f "$LIST" ]]; then - echo "Missing filelist: $LIST" >&2 +mkdir -p "$OUT_JP2" "$OUT_J2W" "$OUT_XML" "$CERT_DIR" + +SYSTEM_CA="/etc/ssl/certs/ca-certificates.crt" +LEAF_PEM="$CERT_DIR/geobasis-leaf.pem" +INT_DER="$CERT_DIR/geobasis-intermediate.der" +INT_PEM="$CERT_DIR/geobasis-intermediate.pem" +COMBINED_CA="$CERT_DIR/geobasis-ca.pem" + +if [[ ! -f "$SYSTEM_CA" ]]; then + echo "ERROR: System CA bundle not found: $SYSTEM_CA" exit 1 fi -mkdir -p "$OUT_JP2" "$OUT_J2W" "$OUT_META" - -DOP20_CURL_OPTS=() -if [[ "${DOP20_INSECURE:-}" == "1" ]]; then - DOP20_CURL_OPTS+=("-k") - echo "Warning: DOP20_INSECURE=1 set — skipping TLS verification." >&2 +if [[ ! -f "$LIST_FILE" ]]; then + echo "ERROR: List file not found: $LIST_FILE" + exit 1 fi +echo "[1/3] Extracting leaf certificate from geobasis-rlp.de ..." +openssl s_client -connect geobasis-rlp.de:443 -servername geobasis-rlp.de /dev/null \ + | openssl x509 -outform PEM > "$LEAF_PEM" + +echo "[2/3] Reading AIA (CA Issuers) URL from leaf certificate ..." +AIA_URL="$(openssl x509 -in "$LEAF_PEM" -noout -text \ + | awk -F'URI:' '/CA Issuers - URI:/{print $2; exit}' \ + | tr -d '\r\n[:space:]')" + +if [[ -z "${AIA_URL}" ]]; then + echo "ERROR: Could not find CA Issuers URI in certificate (AIA missing)." + echo "You can inspect: openssl x509 -in $LEAF_PEM -noout -text | sed -n '/Authority Information Access/,+12p'" + exit 1 +fi + +echo "AIA URL: $AIA_URL" +echo "Downloading intermediate certificate ..." +curl -L --fail --retry 10 --retry-delay 2 --retry-all-errors \ + -o "$INT_DER" "$AIA_URL" + +echo "Converting intermediate to PEM ..." +openssl x509 -inform DER -in "$INT_DER" -out "$INT_PEM" + +echo "Building combined CA bundle: $COMBINED_CA" +cat "$INT_PEM" "$SYSTEM_CA" > "$COMBINED_CA" + +echo "[3/3] Downloading files from $LIST_FILE into jp2/j2w/xml ..." while IFS= read -r url; do [[ -z "$url" || "$url" =~ ^# ]] && continue - fname="${url##*/}" - case "$fname" in - *.jp2) dest="$OUT_JP2/$fname" ;; - *.j2w) dest="$OUT_J2W/$fname" ;; - *_meta.xml) dest="$OUT_META/$fname" ;; - *) echo "Skipping unknown asset type: $fname" >&2; continue ;; - esac - if [[ -f "$dest" ]]; then - echo "Exists: $fname" - continue - fi - echo "Downloading $fname" - curl -fL "${DOP20_CURL_OPTS[@]}" "$url" -o "$dest" -done < "$LIST" -echo "Done. Files in $OUT_JP2, $OUT_J2W, $OUT_META" + fname="$(basename "$url")" + case "$fname" in + *.xml) outdir="$OUT_XML" ;; + *.jp2) outdir="$OUT_JP2" ;; + *.j2w) outdir="$OUT_J2W" ;; + *) echo "Skipping unknown type: $fname"; continue ;; + esac + + outpath="${outdir}/${fname}" + echo "-> $outpath" + + curl -L --fail \ + --cacert "$COMBINED_CA" \ + --retry 10 --retry-delay 2 --retry-all-errors \ + -C - \ + -o "$outpath" \ + "$url" +done < "$LIST_FILE" + +echo "Done."