Handle external redirects and add sync options
This commit is contained in:
62
Cargo.lock
generated
62
Cargo.lock
generated
@@ -258,6 +258,35 @@ version = "0.4.30"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3a9b614a5787ef0c8802a55766480563cb3a93b435898c422ed2a359cf811582"
|
||||
|
||||
[[package]]
|
||||
name = "cookie"
|
||||
version = "0.18.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4ddef33a339a91ea89fb53151bd0a4689cfce27055c291dfa69945475d22c747"
|
||||
dependencies = [
|
||||
"percent-encoding",
|
||||
"time",
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cookie_store"
|
||||
version = "0.21.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2eac901828f88a5241ee0600950ab981148a18f2f756900ffba1b125ca6a3ef9"
|
||||
dependencies = [
|
||||
"cookie",
|
||||
"document-features",
|
||||
"idna",
|
||||
"log",
|
||||
"publicsuffix",
|
||||
"serde",
|
||||
"serde_derive",
|
||||
"serde_json",
|
||||
"time",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cpufeatures"
|
||||
version = "0.2.17"
|
||||
@@ -337,6 +366,15 @@ dependencies = [
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "document-features"
|
||||
version = "0.2.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d4b8a88685455ed29a21542a33abd9cb6510b6b129abadabdcef0f4c55bc8f61"
|
||||
dependencies = [
|
||||
"litrs",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "equivalent"
|
||||
version = "1.0.2"
|
||||
@@ -773,6 +811,12 @@ version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77"
|
||||
|
||||
[[package]]
|
||||
name = "litrs"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "11d3d7f243d5c5a8b9bb5d6dd2b1602c0cb0b9db1621bafc7ed66e35ff9fe092"
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.28"
|
||||
@@ -905,6 +949,22 @@ dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "psl-types"
|
||||
version = "2.0.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "33cb294fe86a74cbcf50d4445b37da762029549ebeea341421c7c70370f86cac"
|
||||
|
||||
[[package]]
|
||||
name = "publicsuffix"
|
||||
version = "2.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6f42ea446cab60335f76979ec15e12619a2165b5ae2c12166bef27d283a9fadf"
|
||||
dependencies = [
|
||||
"idna",
|
||||
"psl-types",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quinn"
|
||||
version = "0.11.9"
|
||||
@@ -1041,6 +1101,8 @@ dependencies = [
|
||||
"async-compression",
|
||||
"base64",
|
||||
"bytes",
|
||||
"cookie",
|
||||
"cookie_store",
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"http",
|
||||
|
||||
@@ -8,7 +8,7 @@ anyhow = "1.0"
|
||||
base64 = "0.22"
|
||||
clap = { version = "4.5", features = ["derive"] }
|
||||
directories = "5.0"
|
||||
reqwest = { version = "0.12", default-features = false, features = ["json", "stream", "gzip", "brotli", "deflate", "rustls-tls"] }
|
||||
reqwest = { version = "0.12", default-features = false, features = ["json", "stream", "gzip", "brotli", "deflate", "rustls-tls", "cookies"] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
thiserror = "1.0"
|
||||
|
||||
11
README.md
11
README.md
@@ -7,7 +7,7 @@
|
||||
- `init-config` writes a ready-to-edit config template (respecting `--download-root` and `--force` to overwrite).
|
||||
- `auth` subcommand stores Base64-encoded credentials per profile (passwords are never logged).
|
||||
- `list-courses` fetches `/users/me`, paginates enrolled courses, infers semester keys, caches the metadata, and prints a concise table.
|
||||
- `sync` traverses every course folder/file tree, normalizes names (Unicode NFKD + transliteration so `Ökologie/ß/œ` becomes `Oekologie/ss/oe`), streams downloads to disk, tracks checksums/remote timestamps, and supports `--dry-run`, `--prune`, and `--since <semester|date>` filters (e.g., `--since ws2526` or `--since 01032024`).
|
||||
- `sync` traverses every course folder/file tree, normalizes names (Unicode NFKD + transliteration so `Ökologie/ß/œ` becomes `Oekologie/ss/oe`), streams downloads to disk, tracks checksums/remote timestamps, and supports `--dry-run`, `--prune`, `--prune-empty-dirs`, `--write-external-links`, and `--since <semester|date>` filters (e.g., `--since ws2526` or `--since 01032024`).
|
||||
- XDG-compliant config (`~/.config/studip-sync/config.toml`) and state (`~/.local/share/studip-sync/state.toml`) stores everything in TOML.
|
||||
- Extensive logging controls: `--quiet`, `--verbose/-v`, `--debug`, and `--json`.
|
||||
|
||||
@@ -41,7 +41,12 @@
|
||||
cargo run -- sync --dry-run
|
||||
|
||||
# Run the real sync (omit --dry-run); add --prune to delete stray files
|
||||
# or --prune-empty-dirs to only remove empty directories
|
||||
cargo run -- sync --prune
|
||||
cargo run -- sync --prune-empty-dirs
|
||||
# Use --write-external-links to drop .url shortcuts whenever Stud.IP
|
||||
# points to files hosted on third-party sites you can't fetch directly
|
||||
cargo run -- sync --write-external-links
|
||||
```
|
||||
Use `--profile`, `--config-dir`, or `--data-dir` when working with multiple identities or non-standard paths.
|
||||
|
||||
@@ -70,7 +75,7 @@ max_concurrent_downloads = 3 # placeholder for future concurrency control
|
||||
| `init-config` | Write a default config template (fails if config exists unless forced). | `--force`, `--download-root` |
|
||||
| `auth` | Collect username/password, encode them, and save them to the active profile. | `--non-interactive`, `--username`, `--password` |
|
||||
| `list-courses` | List cached or freshly fetched courses with semester keys and IDs. | `--refresh` |
|
||||
| `sync` | Download files for every enrolled course into the local tree. | `--dry-run`, `--prune`, `--since <semester key | DDMMYY | DDMMYYYY | RFC3339>` |
|
||||
| `sync` | Download files for every enrolled course into the local tree. | `--dry-run`, `--prune`, `--prune-empty-dirs`, `--write-external-links`, `--since <semester key \| DDMMYY \| DDMMYYYY \| RFC3339>` |
|
||||
|
||||
Global flags: `--quiet`, `--debug`, `--json`, `-v/--verbose` (stackable), `--config-dir`, `--data-dir` (state + default downloads), `--profile`.
|
||||
|
||||
@@ -83,7 +88,7 @@ Global flags: `--quiet`, `--debug`, `--json`, `-v/--verbose` (stackable), `--con
|
||||
- List file refs via `/folders/{id}/file-refs`, normalize filenames (including transliteration of umlauts/ligatures like `ä→ae`, `Ö→Oe`, `ß→ss`, `œ→oe`), and ensure unique siblings through a `NameRegistry`.
|
||||
- Skip downloads when the local file exists and matches the stored checksum / size / remote `chdate`.
|
||||
- Stream downloads to `*.part`, hash contents on the fly, then rename atomically to the final path.
|
||||
4. Maintain a set of remote files so `--prune` can remove local files that no longer exist remotely (and optionally delete now-empty directories). When `--since` is provided, files whose remote `chdate` precedes the resolved timestamp (semester start or explicit date) are skipped; newer files continue through the regular checksum/size logic.
|
||||
4. Maintain a set of remote files so `--prune` can remove local files that no longer exist remotely (and clean up any directories left empty). When `--prune-empty-dirs` is used instead, only empty directories are removed without touching files. When `--write-external-links` is enabled, any file that redirects to an unsupported host gets a `filename.ext.url` shortcut so you can open it manually later. When `--since` is provided, files whose remote `chdate` precedes the resolved timestamp (semester start or explicit date) are skipped; newer files continue through the regular checksum/size logic.
|
||||
5. `--dry-run` prints planned work but never writes to disk.
|
||||
|
||||
## Development Notes
|
||||
|
||||
171
src/cli.rs
171
src/cli.rs
@@ -5,11 +5,14 @@ use crate::{
|
||||
paths::{AppPaths, PathOverrides},
|
||||
semesters,
|
||||
state::{CourseState, ProfileState, SemesterState, StateFile},
|
||||
studip_client::{Course, FileRef, Folder, SemesterData, SemesterResponse, StudipClient},
|
||||
studip_client::{
|
||||
Course, FileRef, Folder, SemesterData, SemesterResponse, StudipClient, StudipHttpError,
|
||||
},
|
||||
};
|
||||
use anyhow::{Context, anyhow, bail};
|
||||
use base64::{Engine, engine::general_purpose::STANDARD as BASE64};
|
||||
use clap::{ArgAction, Parser, Subcommand, ValueHint};
|
||||
use reqwest::StatusCode;
|
||||
use rpassword::prompt_password;
|
||||
use sha2::{Digest, Sha256};
|
||||
use std::{
|
||||
@@ -25,8 +28,9 @@ use time::{
|
||||
format_description::well_known::Rfc3339, macros::format_description,
|
||||
};
|
||||
use tokio::{fs, io::AsyncWriteExt};
|
||||
use tracing::info;
|
||||
use tracing::{info, warn};
|
||||
use unicode_normalization::{UnicodeNormalization, char::is_combining_mark};
|
||||
use url::Url;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
const USERNAME_ENV: &str = "STUDIP_SYNC_USERNAME";
|
||||
@@ -94,6 +98,12 @@ pub struct SyncArgs {
|
||||
pub dry_run: bool,
|
||||
#[arg(long = "prune", action = ArgAction::SetTrue)]
|
||||
pub prune: bool,
|
||||
/// Remove empty directories under the download root once syncing is finished.
|
||||
#[arg(long = "prune-empty-dirs", action = ArgAction::SetTrue)]
|
||||
pub prune_empty_dirs: bool,
|
||||
/// Write .url shortcuts for files that redirect to unsupported external hosts.
|
||||
#[arg(long = "write-external-links", action = ArgAction::SetTrue)]
|
||||
pub write_external_links: bool,
|
||||
#[arg(long = "since")]
|
||||
pub since: Option<String>,
|
||||
}
|
||||
@@ -487,23 +497,44 @@ impl SyncArgs {
|
||||
|
||||
if self.prune {
|
||||
let prune = prune_local(&download_root, &remote_files, self.dry_run)?;
|
||||
stats.pruned_files = prune.removed_files;
|
||||
stats.pruned_dirs = prune.removed_dirs;
|
||||
stats.pruned_files += prune.removed_files;
|
||||
stats.pruned_dirs += prune.removed_dirs;
|
||||
}
|
||||
|
||||
if self.prune_empty_dirs && !self.prune {
|
||||
let removed = prune_empty_directories(&download_root, self.dry_run)?;
|
||||
stats.pruned_dirs += removed;
|
||||
}
|
||||
|
||||
info!(
|
||||
profile = ctx.profile_name(),
|
||||
dry_run = self.dry_run,
|
||||
prune = self.prune,
|
||||
prune_empty_dirs = self.prune_empty_dirs,
|
||||
write_external_links = self.write_external_links,
|
||||
downloaded = stats.downloaded,
|
||||
skipped = stats.skipped,
|
||||
planned = stats.planned,
|
||||
skipped_since = stats.skipped_since,
|
||||
skipped_external = stats.skipped_external,
|
||||
pruned_files = stats.pruned_files,
|
||||
pruned_dirs = stats.pruned_dirs,
|
||||
since = self.since.as_deref().unwrap_or(""),
|
||||
"sync completed"
|
||||
);
|
||||
|
||||
if !stats.skipped_external_details.is_empty() {
|
||||
println!("\nSkipped external downloads:");
|
||||
for detail in &stats.skipped_external_details {
|
||||
println!(
|
||||
" {} -> {} {}",
|
||||
detail.path.display(),
|
||||
detail.status,
|
||||
detail.url
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -674,6 +705,8 @@ struct SyncStats {
|
||||
skipped_since: usize,
|
||||
pruned_files: usize,
|
||||
pruned_dirs: usize,
|
||||
skipped_external: usize,
|
||||
skipped_external_details: Vec<ExternalSkip>,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
@@ -842,10 +875,45 @@ async fn sync_file_ref(
|
||||
}
|
||||
|
||||
if needs_download {
|
||||
let checksum = download_file_to(client, &file_ref, &local_path).await?;
|
||||
match download_file_to(client, &file_ref, &local_path).await {
|
||||
Ok(checksum) => {
|
||||
if args.write_external_links {
|
||||
remove_external_link(&local_path).await?;
|
||||
}
|
||||
update_file_state(ctx, &file_ref, &local_path, Some(checksum))?;
|
||||
println!("Downloaded {}", relative_path.display());
|
||||
stats.downloaded += 1;
|
||||
}
|
||||
Err(err) => {
|
||||
if let Some(http_err) = err.downcast_ref::<StudipHttpError>()
|
||||
&& http_err.external
|
||||
{
|
||||
warn!(
|
||||
target: "studip_sync",
|
||||
url = %http_err.url,
|
||||
status = %http_err.status,
|
||||
"External download failed; skipping"
|
||||
);
|
||||
println!(
|
||||
"Skipped {} (external download failed: {} {})",
|
||||
relative_path.display(),
|
||||
http_err.status,
|
||||
http_err.url
|
||||
);
|
||||
stats.skipped_external += 1;
|
||||
if args.write_external_links {
|
||||
write_external_link(&local_path, &http_err.url, args.dry_run).await?;
|
||||
}
|
||||
stats.skipped_external_details.push(ExternalSkip {
|
||||
path: relative_path.clone(),
|
||||
status: http_err.status,
|
||||
url: http_err.url.clone(),
|
||||
});
|
||||
return Ok(());
|
||||
}
|
||||
return Err(err);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
stats.skipped += 1;
|
||||
}
|
||||
@@ -983,8 +1051,6 @@ fn normalize_component(input: &str) -> String {
|
||||
if ch.is_alphanumeric() {
|
||||
sanitized.push(ch);
|
||||
last_was_separator = false;
|
||||
} else if ch.is_whitespace() || matches!(ch, '-' | '_' | '.') {
|
||||
push_separator(&mut sanitized, &mut last_was_separator);
|
||||
} else {
|
||||
push_separator(&mut sanitized, &mut last_was_separator);
|
||||
}
|
||||
@@ -1102,6 +1168,18 @@ fn prune_local(
|
||||
}
|
||||
}
|
||||
|
||||
stats.removed_dirs += prune_empty_directories(root, dry_run)?;
|
||||
|
||||
Ok(stats)
|
||||
}
|
||||
|
||||
fn prune_empty_directories(root: &Path, dry_run: bool) -> Result<usize> {
|
||||
if !root.exists() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let mut removed = 0;
|
||||
|
||||
for entry in WalkDir::new(root)
|
||||
.contents_first(true)
|
||||
.into_iter()
|
||||
@@ -1111,29 +1189,65 @@ fn prune_local(
|
||||
if entry.path() == root {
|
||||
continue;
|
||||
}
|
||||
if entry
|
||||
|
||||
let is_empty = entry
|
||||
.path()
|
||||
.read_dir()
|
||||
.map(|mut i| i.next().is_none())
|
||||
.unwrap_or(false)
|
||||
{
|
||||
if dry_run {
|
||||
println!(
|
||||
"Would remove empty directory {}",
|
||||
entry
|
||||
.path()
|
||||
.strip_prefix(root)
|
||||
.unwrap_or(entry.path())
|
||||
.display()
|
||||
);
|
||||
} else {
|
||||
std::fs::remove_dir(entry.path()).ok();
|
||||
}
|
||||
stats.removed_dirs += 1;
|
||||
}
|
||||
.map(|mut iter| iter.next().is_none())
|
||||
.unwrap_or(false);
|
||||
if !is_empty {
|
||||
continue;
|
||||
}
|
||||
|
||||
Ok(stats)
|
||||
let rel = entry.path().strip_prefix(root).unwrap_or(entry.path());
|
||||
if dry_run {
|
||||
println!("Would remove empty directory {}", rel.display());
|
||||
} else {
|
||||
let _ = std::fs::remove_dir(entry.path());
|
||||
}
|
||||
removed += 1;
|
||||
}
|
||||
|
||||
Ok(removed)
|
||||
}
|
||||
|
||||
async fn write_external_link(destination: &Path, url: &Url, dry_run: bool) -> Result<()> {
|
||||
let link_path = external_link_path(destination);
|
||||
if dry_run {
|
||||
println!(
|
||||
"Would write external link {} -> {}",
|
||||
link_path.display(),
|
||||
url
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if let Some(parent) = link_path.parent() {
|
||||
fs::create_dir_all(parent).await?;
|
||||
}
|
||||
|
||||
let mut file = fs::File::create(&link_path).await?;
|
||||
let content = format!("[InternetShortcut]\nURL={}\n", url);
|
||||
file.write_all(content.as_bytes()).await?;
|
||||
file.flush().await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn remove_external_link(destination: &Path) -> Result<()> {
|
||||
let link_path = external_link_path(destination);
|
||||
if tokio::fs::try_exists(&link_path).await.unwrap_or(false) {
|
||||
let _ = fs::remove_file(link_path).await;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn external_link_path(destination: &Path) -> PathBuf {
|
||||
let mut name = destination
|
||||
.file_name()
|
||||
.map(|n| n.to_os_string())
|
||||
.unwrap_or_default();
|
||||
name.push(".url");
|
||||
destination.with_file_name(name)
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
@@ -1298,3 +1412,8 @@ max_concurrent_downloads = 3
|
||||
"#
|
||||
)
|
||||
}
|
||||
struct ExternalSkip {
|
||||
path: PathBuf,
|
||||
status: StatusCode,
|
||||
url: Url,
|
||||
}
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
use crate::{Result, config::ConfigProfile};
|
||||
use anyhow::{Context, anyhow, bail};
|
||||
use reqwest::{
|
||||
Client, Response,
|
||||
header::{AUTHORIZATION, HeaderValue},
|
||||
Client, Response, StatusCode,
|
||||
header::{AUTHORIZATION, HeaderValue, LOCATION},
|
||||
redirect::Policy,
|
||||
};
|
||||
use serde::{Deserialize, de::DeserializeOwned};
|
||||
use thiserror::Error;
|
||||
use url::Url;
|
||||
|
||||
#[derive(Clone)]
|
||||
@@ -29,6 +31,8 @@ impl StudipClient {
|
||||
|
||||
let http = Client::builder()
|
||||
.user_agent("studip-sync/0.1")
|
||||
.cookie_store(true)
|
||||
.redirect(Policy::none())
|
||||
.build()
|
||||
.context("Failed to build HTTP client")?;
|
||||
|
||||
@@ -125,28 +129,91 @@ impl StudipClient {
|
||||
Ok(items)
|
||||
}
|
||||
fn download_endpoint(&self, path: &str) -> Result<Url> {
|
||||
let normalized = path.trim_start_matches('/');
|
||||
self.base.join(normalized).map_err(Into::into)
|
||||
if let Ok(url) = Url::parse(path) {
|
||||
return Ok(url);
|
||||
}
|
||||
|
||||
self.base.join(path).map_err(Into::into)
|
||||
}
|
||||
|
||||
async fn send_request(&self, url: Url) -> Result<Response> {
|
||||
let response = self
|
||||
.http
|
||||
.get(url.clone())
|
||||
.header(AUTHORIZATION, self.auth_header.clone())
|
||||
self.follow_redirects(url, 10).await
|
||||
}
|
||||
|
||||
async fn follow_redirects(&self, url: Url, max_redirects: usize) -> Result<Response> {
|
||||
let mut current_url = url;
|
||||
let mut redirects_left = max_redirects;
|
||||
let mut include_auth = true;
|
||||
|
||||
loop {
|
||||
let mut request = self.http.get(current_url.clone());
|
||||
if include_auth {
|
||||
request = request.header(AUTHORIZATION, self.auth_header.clone());
|
||||
}
|
||||
let response = request
|
||||
.send()
|
||||
.await
|
||||
.with_context(|| format!("GET {}", url))?;
|
||||
.with_context(|| format!("GET {}", current_url))?;
|
||||
|
||||
if response.status().is_redirection() {
|
||||
if redirects_left == 0 {
|
||||
bail!(
|
||||
"Exceeded redirect limit while requesting {}",
|
||||
response.url()
|
||||
);
|
||||
}
|
||||
|
||||
let location = response.headers().get(LOCATION).ok_or_else(|| {
|
||||
anyhow!("Redirect from {} missing Location header", response.url())
|
||||
})?;
|
||||
let location = location
|
||||
.to_str()
|
||||
.context("Invalid redirect Location header")?;
|
||||
let next_url = if let Ok(absolute) = Url::parse(location) {
|
||||
absolute
|
||||
} else {
|
||||
response
|
||||
.url()
|
||||
.join(location)
|
||||
.with_context(|| format!("Invalid redirect location {location}"))?
|
||||
};
|
||||
|
||||
if include_auth && next_url.origin() != self.base.origin() {
|
||||
include_auth = false;
|
||||
}
|
||||
|
||||
current_url = next_url;
|
||||
redirects_left -= 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if !response.status().is_success() {
|
||||
let status = response.status();
|
||||
let final_url = response.url().clone();
|
||||
let body = response.text().await.unwrap_or_default();
|
||||
bail!("Stud.IP request failed ({status}) - {body}");
|
||||
let external = final_url.origin() != self.base.origin();
|
||||
return Err(StudipHttpError {
|
||||
status,
|
||||
url: final_url,
|
||||
body,
|
||||
external,
|
||||
}
|
||||
.into());
|
||||
}
|
||||
|
||||
Ok(response)
|
||||
return Ok(response);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
#[error("Stud.IP request failed ({status}) at {url} - {body}")]
|
||||
pub struct StudipHttpError {
|
||||
pub status: StatusCode,
|
||||
pub url: Url,
|
||||
pub body: String,
|
||||
pub external: bool,
|
||||
}
|
||||
|
||||
fn build_root_and_api_urls(profile: &ConfigProfile) -> Result<(Url, Url)> {
|
||||
let base = profile.base_url.trim_end_matches('/');
|
||||
|
||||
Reference in New Issue
Block a user