Handle external redirects and add sync options

This commit is contained in:
2025-11-16 00:01:46 +01:00
parent f724216fb8
commit b5ea4b901c
5 changed files with 304 additions and 51 deletions

62
Cargo.lock generated
View File

@@ -258,6 +258,35 @@ version = "0.4.30"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a9b614a5787ef0c8802a55766480563cb3a93b435898c422ed2a359cf811582" checksum = "3a9b614a5787ef0c8802a55766480563cb3a93b435898c422ed2a359cf811582"
[[package]]
name = "cookie"
version = "0.18.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ddef33a339a91ea89fb53151bd0a4689cfce27055c291dfa69945475d22c747"
dependencies = [
"percent-encoding",
"time",
"version_check",
]
[[package]]
name = "cookie_store"
version = "0.21.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2eac901828f88a5241ee0600950ab981148a18f2f756900ffba1b125ca6a3ef9"
dependencies = [
"cookie",
"document-features",
"idna",
"log",
"publicsuffix",
"serde",
"serde_derive",
"serde_json",
"time",
"url",
]
[[package]] [[package]]
name = "cpufeatures" name = "cpufeatures"
version = "0.2.17" version = "0.2.17"
@@ -337,6 +366,15 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "document-features"
version = "0.2.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4b8a88685455ed29a21542a33abd9cb6510b6b129abadabdcef0f4c55bc8f61"
dependencies = [
"litrs",
]
[[package]] [[package]]
name = "equivalent" name = "equivalent"
version = "1.0.2" version = "1.0.2"
@@ -773,6 +811,12 @@ version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77"
[[package]]
name = "litrs"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "11d3d7f243d5c5a8b9bb5d6dd2b1602c0cb0b9db1621bafc7ed66e35ff9fe092"
[[package]] [[package]]
name = "log" name = "log"
version = "0.4.28" version = "0.4.28"
@@ -905,6 +949,22 @@ dependencies = [
"unicode-ident", "unicode-ident",
] ]
[[package]]
name = "psl-types"
version = "2.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33cb294fe86a74cbcf50d4445b37da762029549ebeea341421c7c70370f86cac"
[[package]]
name = "publicsuffix"
version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f42ea446cab60335f76979ec15e12619a2165b5ae2c12166bef27d283a9fadf"
dependencies = [
"idna",
"psl-types",
]
[[package]] [[package]]
name = "quinn" name = "quinn"
version = "0.11.9" version = "0.11.9"
@@ -1041,6 +1101,8 @@ dependencies = [
"async-compression", "async-compression",
"base64", "base64",
"bytes", "bytes",
"cookie",
"cookie_store",
"futures-core", "futures-core",
"futures-util", "futures-util",
"http", "http",

View File

@@ -8,7 +8,7 @@ anyhow = "1.0"
base64 = "0.22" base64 = "0.22"
clap = { version = "4.5", features = ["derive"] } clap = { version = "4.5", features = ["derive"] }
directories = "5.0" directories = "5.0"
reqwest = { version = "0.12", default-features = false, features = ["json", "stream", "gzip", "brotli", "deflate", "rustls-tls"] } reqwest = { version = "0.12", default-features = false, features = ["json", "stream", "gzip", "brotli", "deflate", "rustls-tls", "cookies"] }
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0" serde_json = "1.0"
thiserror = "1.0" thiserror = "1.0"

View File

@@ -7,7 +7,7 @@
- `init-config` writes a ready-to-edit config template (respecting `--download-root` and `--force` to overwrite). - `init-config` writes a ready-to-edit config template (respecting `--download-root` and `--force` to overwrite).
- `auth` subcommand stores Base64-encoded credentials per profile (passwords are never logged). - `auth` subcommand stores Base64-encoded credentials per profile (passwords are never logged).
- `list-courses` fetches `/users/me`, paginates enrolled courses, infers semester keys, caches the metadata, and prints a concise table. - `list-courses` fetches `/users/me`, paginates enrolled courses, infers semester keys, caches the metadata, and prints a concise table.
- `sync` traverses every course folder/file tree, normalizes names (Unicode NFKD + transliteration so `Ökologie/ß/œ` becomes `Oekologie/ss/oe`), streams downloads to disk, tracks checksums/remote timestamps, and supports `--dry-run`, `--prune`, and `--since <semester|date>` filters (e.g., `--since ws2526` or `--since 01032024`). - `sync` traverses every course folder/file tree, normalizes names (Unicode NFKD + transliteration so `Ökologie/ß/œ` becomes `Oekologie/ss/oe`), streams downloads to disk, tracks checksums/remote timestamps, and supports `--dry-run`, `--prune`, `--prune-empty-dirs`, `--write-external-links`, and `--since <semester|date>` filters (e.g., `--since ws2526` or `--since 01032024`).
- XDG-compliant config (`~/.config/studip-sync/config.toml`) and state (`~/.local/share/studip-sync/state.toml`) stores everything in TOML. - XDG-compliant config (`~/.config/studip-sync/config.toml`) and state (`~/.local/share/studip-sync/state.toml`) stores everything in TOML.
- Extensive logging controls: `--quiet`, `--verbose/-v`, `--debug`, and `--json`. - Extensive logging controls: `--quiet`, `--verbose/-v`, `--debug`, and `--json`.
@@ -41,7 +41,12 @@
cargo run -- sync --dry-run cargo run -- sync --dry-run
# Run the real sync (omit --dry-run); add --prune to delete stray files # Run the real sync (omit --dry-run); add --prune to delete stray files
# or --prune-empty-dirs to only remove empty directories
cargo run -- sync --prune cargo run -- sync --prune
cargo run -- sync --prune-empty-dirs
# Use --write-external-links to drop .url shortcuts whenever Stud.IP
# points to files hosted on third-party sites you can't fetch directly
cargo run -- sync --write-external-links
``` ```
Use `--profile`, `--config-dir`, or `--data-dir` when working with multiple identities or non-standard paths. Use `--profile`, `--config-dir`, or `--data-dir` when working with multiple identities or non-standard paths.
@@ -70,7 +75,7 @@ max_concurrent_downloads = 3 # placeholder for future concurrency control
| `init-config` | Write a default config template (fails if config exists unless forced). | `--force`, `--download-root` | | `init-config` | Write a default config template (fails if config exists unless forced). | `--force`, `--download-root` |
| `auth` | Collect username/password, encode them, and save them to the active profile. | `--non-interactive`, `--username`, `--password` | | `auth` | Collect username/password, encode them, and save them to the active profile. | `--non-interactive`, `--username`, `--password` |
| `list-courses` | List cached or freshly fetched courses with semester keys and IDs. | `--refresh` | | `list-courses` | List cached or freshly fetched courses with semester keys and IDs. | `--refresh` |
| `sync` | Download files for every enrolled course into the local tree. | `--dry-run`, `--prune`, `--since <semester key | DDMMYY | DDMMYYYY | RFC3339>` | | `sync` | Download files for every enrolled course into the local tree. | `--dry-run`, `--prune`, `--prune-empty-dirs`, `--write-external-links`, `--since <semester key \| DDMMYY \| DDMMYYYY \| RFC3339>` |
Global flags: `--quiet`, `--debug`, `--json`, `-v/--verbose` (stackable), `--config-dir`, `--data-dir` (state + default downloads), `--profile`. Global flags: `--quiet`, `--debug`, `--json`, `-v/--verbose` (stackable), `--config-dir`, `--data-dir` (state + default downloads), `--profile`.
@@ -83,7 +88,7 @@ Global flags: `--quiet`, `--debug`, `--json`, `-v/--verbose` (stackable), `--con
- List file refs via `/folders/{id}/file-refs`, normalize filenames (including transliteration of umlauts/ligatures like `ä→ae`, `Ö→Oe`, `ß→ss`, `œ→oe`), and ensure unique siblings through a `NameRegistry`. - List file refs via `/folders/{id}/file-refs`, normalize filenames (including transliteration of umlauts/ligatures like `ä→ae`, `Ö→Oe`, `ß→ss`, `œ→oe`), and ensure unique siblings through a `NameRegistry`.
- Skip downloads when the local file exists and matches the stored checksum / size / remote `chdate`. - Skip downloads when the local file exists and matches the stored checksum / size / remote `chdate`.
- Stream downloads to `*.part`, hash contents on the fly, then rename atomically to the final path. - Stream downloads to `*.part`, hash contents on the fly, then rename atomically to the final path.
4. Maintain a set of remote files so `--prune` can remove local files that no longer exist remotely (and optionally delete now-empty directories). When `--since` is provided, files whose remote `chdate` precedes the resolved timestamp (semester start or explicit date) are skipped; newer files continue through the regular checksum/size logic. 4. Maintain a set of remote files so `--prune` can remove local files that no longer exist remotely (and clean up any directories left empty). When `--prune-empty-dirs` is used instead, only empty directories are removed without touching files. When `--write-external-links` is enabled, any file that redirects to an unsupported host gets a `filename.ext.url` shortcut so you can open it manually later. When `--since` is provided, files whose remote `chdate` precedes the resolved timestamp (semester start or explicit date) are skipped; newer files continue through the regular checksum/size logic.
5. `--dry-run` prints planned work but never writes to disk. 5. `--dry-run` prints planned work but never writes to disk.
## Development Notes ## Development Notes

View File

@@ -5,11 +5,14 @@ use crate::{
paths::{AppPaths, PathOverrides}, paths::{AppPaths, PathOverrides},
semesters, semesters,
state::{CourseState, ProfileState, SemesterState, StateFile}, state::{CourseState, ProfileState, SemesterState, StateFile},
studip_client::{Course, FileRef, Folder, SemesterData, SemesterResponse, StudipClient}, studip_client::{
Course, FileRef, Folder, SemesterData, SemesterResponse, StudipClient, StudipHttpError,
},
}; };
use anyhow::{Context, anyhow, bail}; use anyhow::{Context, anyhow, bail};
use base64::{Engine, engine::general_purpose::STANDARD as BASE64}; use base64::{Engine, engine::general_purpose::STANDARD as BASE64};
use clap::{ArgAction, Parser, Subcommand, ValueHint}; use clap::{ArgAction, Parser, Subcommand, ValueHint};
use reqwest::StatusCode;
use rpassword::prompt_password; use rpassword::prompt_password;
use sha2::{Digest, Sha256}; use sha2::{Digest, Sha256};
use std::{ use std::{
@@ -25,8 +28,9 @@ use time::{
format_description::well_known::Rfc3339, macros::format_description, format_description::well_known::Rfc3339, macros::format_description,
}; };
use tokio::{fs, io::AsyncWriteExt}; use tokio::{fs, io::AsyncWriteExt};
use tracing::info; use tracing::{info, warn};
use unicode_normalization::{UnicodeNormalization, char::is_combining_mark}; use unicode_normalization::{UnicodeNormalization, char::is_combining_mark};
use url::Url;
use walkdir::WalkDir; use walkdir::WalkDir;
const USERNAME_ENV: &str = "STUDIP_SYNC_USERNAME"; const USERNAME_ENV: &str = "STUDIP_SYNC_USERNAME";
@@ -94,6 +98,12 @@ pub struct SyncArgs {
pub dry_run: bool, pub dry_run: bool,
#[arg(long = "prune", action = ArgAction::SetTrue)] #[arg(long = "prune", action = ArgAction::SetTrue)]
pub prune: bool, pub prune: bool,
/// Remove empty directories under the download root once syncing is finished.
#[arg(long = "prune-empty-dirs", action = ArgAction::SetTrue)]
pub prune_empty_dirs: bool,
/// Write .url shortcuts for files that redirect to unsupported external hosts.
#[arg(long = "write-external-links", action = ArgAction::SetTrue)]
pub write_external_links: bool,
#[arg(long = "since")] #[arg(long = "since")]
pub since: Option<String>, pub since: Option<String>,
} }
@@ -487,23 +497,44 @@ impl SyncArgs {
if self.prune { if self.prune {
let prune = prune_local(&download_root, &remote_files, self.dry_run)?; let prune = prune_local(&download_root, &remote_files, self.dry_run)?;
stats.pruned_files = prune.removed_files; stats.pruned_files += prune.removed_files;
stats.pruned_dirs = prune.removed_dirs; stats.pruned_dirs += prune.removed_dirs;
}
if self.prune_empty_dirs && !self.prune {
let removed = prune_empty_directories(&download_root, self.dry_run)?;
stats.pruned_dirs += removed;
} }
info!( info!(
profile = ctx.profile_name(), profile = ctx.profile_name(),
dry_run = self.dry_run, dry_run = self.dry_run,
prune = self.prune, prune = self.prune,
prune_empty_dirs = self.prune_empty_dirs,
write_external_links = self.write_external_links,
downloaded = stats.downloaded, downloaded = stats.downloaded,
skipped = stats.skipped, skipped = stats.skipped,
planned = stats.planned, planned = stats.planned,
skipped_since = stats.skipped_since, skipped_since = stats.skipped_since,
skipped_external = stats.skipped_external,
pruned_files = stats.pruned_files, pruned_files = stats.pruned_files,
pruned_dirs = stats.pruned_dirs, pruned_dirs = stats.pruned_dirs,
since = self.since.as_deref().unwrap_or(""), since = self.since.as_deref().unwrap_or(""),
"sync completed" "sync completed"
); );
if !stats.skipped_external_details.is_empty() {
println!("\nSkipped external downloads:");
for detail in &stats.skipped_external_details {
println!(
" {} -> {} {}",
detail.path.display(),
detail.status,
detail.url
);
}
}
Ok(()) Ok(())
} }
} }
@@ -674,6 +705,8 @@ struct SyncStats {
skipped_since: usize, skipped_since: usize,
pruned_files: usize, pruned_files: usize,
pruned_dirs: usize, pruned_dirs: usize,
skipped_external: usize,
skipped_external_details: Vec<ExternalSkip>,
} }
#[derive(Default)] #[derive(Default)]
@@ -842,10 +875,45 @@ async fn sync_file_ref(
} }
if needs_download { if needs_download {
let checksum = download_file_to(client, &file_ref, &local_path).await?; match download_file_to(client, &file_ref, &local_path).await {
update_file_state(ctx, &file_ref, &local_path, Some(checksum))?; Ok(checksum) => {
println!("Downloaded {}", relative_path.display()); if args.write_external_links {
stats.downloaded += 1; remove_external_link(&local_path).await?;
}
update_file_state(ctx, &file_ref, &local_path, Some(checksum))?;
println!("Downloaded {}", relative_path.display());
stats.downloaded += 1;
}
Err(err) => {
if let Some(http_err) = err.downcast_ref::<StudipHttpError>()
&& http_err.external
{
warn!(
target: "studip_sync",
url = %http_err.url,
status = %http_err.status,
"External download failed; skipping"
);
println!(
"Skipped {} (external download failed: {} {})",
relative_path.display(),
http_err.status,
http_err.url
);
stats.skipped_external += 1;
if args.write_external_links {
write_external_link(&local_path, &http_err.url, args.dry_run).await?;
}
stats.skipped_external_details.push(ExternalSkip {
path: relative_path.clone(),
status: http_err.status,
url: http_err.url.clone(),
});
return Ok(());
}
return Err(err);
}
}
} else { } else {
stats.skipped += 1; stats.skipped += 1;
} }
@@ -983,8 +1051,6 @@ fn normalize_component(input: &str) -> String {
if ch.is_alphanumeric() { if ch.is_alphanumeric() {
sanitized.push(ch); sanitized.push(ch);
last_was_separator = false; last_was_separator = false;
} else if ch.is_whitespace() || matches!(ch, '-' | '_' | '.') {
push_separator(&mut sanitized, &mut last_was_separator);
} else { } else {
push_separator(&mut sanitized, &mut last_was_separator); push_separator(&mut sanitized, &mut last_was_separator);
} }
@@ -1102,6 +1168,18 @@ fn prune_local(
} }
} }
stats.removed_dirs += prune_empty_directories(root, dry_run)?;
Ok(stats)
}
fn prune_empty_directories(root: &Path, dry_run: bool) -> Result<usize> {
if !root.exists() {
return Ok(0);
}
let mut removed = 0;
for entry in WalkDir::new(root) for entry in WalkDir::new(root)
.contents_first(true) .contents_first(true)
.into_iter() .into_iter()
@@ -1111,29 +1189,65 @@ fn prune_local(
if entry.path() == root { if entry.path() == root {
continue; continue;
} }
if entry
let is_empty = entry
.path() .path()
.read_dir() .read_dir()
.map(|mut i| i.next().is_none()) .map(|mut iter| iter.next().is_none())
.unwrap_or(false) .unwrap_or(false);
{ if !is_empty {
if dry_run { continue;
println!(
"Would remove empty directory {}",
entry
.path()
.strip_prefix(root)
.unwrap_or(entry.path())
.display()
);
} else {
std::fs::remove_dir(entry.path()).ok();
}
stats.removed_dirs += 1;
} }
let rel = entry.path().strip_prefix(root).unwrap_or(entry.path());
if dry_run {
println!("Would remove empty directory {}", rel.display());
} else {
let _ = std::fs::remove_dir(entry.path());
}
removed += 1;
} }
Ok(stats) Ok(removed)
}
async fn write_external_link(destination: &Path, url: &Url, dry_run: bool) -> Result<()> {
let link_path = external_link_path(destination);
if dry_run {
println!(
"Would write external link {} -> {}",
link_path.display(),
url
);
return Ok(());
}
if let Some(parent) = link_path.parent() {
fs::create_dir_all(parent).await?;
}
let mut file = fs::File::create(&link_path).await?;
let content = format!("[InternetShortcut]\nURL={}\n", url);
file.write_all(content.as_bytes()).await?;
file.flush().await?;
Ok(())
}
async fn remove_external_link(destination: &Path) -> Result<()> {
let link_path = external_link_path(destination);
if tokio::fs::try_exists(&link_path).await.unwrap_or(false) {
let _ = fs::remove_file(link_path).await;
}
Ok(())
}
fn external_link_path(destination: &Path) -> PathBuf {
let mut name = destination
.file_name()
.map(|n| n.to_os_string())
.unwrap_or_default();
name.push(".url");
destination.with_file_name(name)
} }
#[derive(Default)] #[derive(Default)]
@@ -1298,3 +1412,8 @@ max_concurrent_downloads = 3
"# "#
) )
} }
struct ExternalSkip {
path: PathBuf,
status: StatusCode,
url: Url,
}

View File

@@ -1,10 +1,12 @@
use crate::{Result, config::ConfigProfile}; use crate::{Result, config::ConfigProfile};
use anyhow::{Context, anyhow, bail}; use anyhow::{Context, anyhow, bail};
use reqwest::{ use reqwest::{
Client, Response, Client, Response, StatusCode,
header::{AUTHORIZATION, HeaderValue}, header::{AUTHORIZATION, HeaderValue, LOCATION},
redirect::Policy,
}; };
use serde::{Deserialize, de::DeserializeOwned}; use serde::{Deserialize, de::DeserializeOwned};
use thiserror::Error;
use url::Url; use url::Url;
#[derive(Clone)] #[derive(Clone)]
@@ -29,6 +31,8 @@ impl StudipClient {
let http = Client::builder() let http = Client::builder()
.user_agent("studip-sync/0.1") .user_agent("studip-sync/0.1")
.cookie_store(true)
.redirect(Policy::none())
.build() .build()
.context("Failed to build HTTP client")?; .context("Failed to build HTTP client")?;
@@ -125,27 +129,90 @@ impl StudipClient {
Ok(items) Ok(items)
} }
fn download_endpoint(&self, path: &str) -> Result<Url> { fn download_endpoint(&self, path: &str) -> Result<Url> {
let normalized = path.trim_start_matches('/'); if let Ok(url) = Url::parse(path) {
self.base.join(normalized).map_err(Into::into) return Ok(url);
}
self.base.join(path).map_err(Into::into)
} }
async fn send_request(&self, url: Url) -> Result<Response> { async fn send_request(&self, url: Url) -> Result<Response> {
let response = self self.follow_redirects(url, 10).await
.http
.get(url.clone())
.header(AUTHORIZATION, self.auth_header.clone())
.send()
.await
.with_context(|| format!("GET {}", url))?;
if !response.status().is_success() {
let status = response.status();
let body = response.text().await.unwrap_or_default();
bail!("Stud.IP request failed ({status}) - {body}");
}
Ok(response)
} }
async fn follow_redirects(&self, url: Url, max_redirects: usize) -> Result<Response> {
let mut current_url = url;
let mut redirects_left = max_redirects;
let mut include_auth = true;
loop {
let mut request = self.http.get(current_url.clone());
if include_auth {
request = request.header(AUTHORIZATION, self.auth_header.clone());
}
let response = request
.send()
.await
.with_context(|| format!("GET {}", current_url))?;
if response.status().is_redirection() {
if redirects_left == 0 {
bail!(
"Exceeded redirect limit while requesting {}",
response.url()
);
}
let location = response.headers().get(LOCATION).ok_or_else(|| {
anyhow!("Redirect from {} missing Location header", response.url())
})?;
let location = location
.to_str()
.context("Invalid redirect Location header")?;
let next_url = if let Ok(absolute) = Url::parse(location) {
absolute
} else {
response
.url()
.join(location)
.with_context(|| format!("Invalid redirect location {location}"))?
};
if include_auth && next_url.origin() != self.base.origin() {
include_auth = false;
}
current_url = next_url;
redirects_left -= 1;
continue;
}
if !response.status().is_success() {
let status = response.status();
let final_url = response.url().clone();
let body = response.text().await.unwrap_or_default();
let external = final_url.origin() != self.base.origin();
return Err(StudipHttpError {
status,
url: final_url,
body,
external,
}
.into());
}
return Ok(response);
}
}
}
#[derive(Debug, Error)]
#[error("Stud.IP request failed ({status}) at {url} - {body}")]
pub struct StudipHttpError {
pub status: StatusCode,
pub url: Url,
pub body: String,
pub external: bool,
} }
fn build_root_and_api_urls(profile: &ConfigProfile) -> Result<(Url, Url)> { fn build_root_and_api_urls(profile: &ConfigProfile) -> Result<(Url, Url)> {