diff --git a/AGENTS.md b/AGENTS.md index 3f6ba4c..120ccff 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -30,14 +30,14 @@ This document equips future agents with the current mental model for the `studip 1. `studip-sync init-config` writes a config template (optionally overriding `download_root` and guarded by `--force`). 2. `studip-sync auth` collects credentials (CL flags, env, or interactive prompts), Base64-encodes `username:password`, and persists it in the active profile. -3. `studip-sync list-courses` builds a `StudipClient`, resolves/caches the user ID via `/users/me`, paginates `/users/{id}/courses`, fetches missing semesters, upserts course metadata into `state.toml`, and prints a table sorted by semester/title. +3. `studip-sync list-courses` builds a `StudipClient`, resolves/caches the user ID via `/users/me`, paginates `/users/{id}/courses`, fetches missing semesters (including start/end timestamps and re-fetching cached semesters that lack them), upserts course metadata into `state.toml`, and prints a table sorted by semester/title. 4. `studip-sync sync`: - Resolves download root (`config.download_root` or `$XDG_DATA_HOME/studip-sync/downloads`) and ensures directories exist unless `--dry-run`. - Refreshes course + semester info, then for each course performs a depth-first walk: `/courses/{id}/folders` ➜ `/folders/{id}/file-refs` ➜ `/folders/{id}/folders`. Pagination is handled by `fetch_all_pages`. - Normalizes path components and uses `NameRegistry` to avoid collisions, guaranteeing human-readable yet unique names. - Checks file state (size, modified timestamp, checksum) against `state.toml` to skip unchanged files; downloads stream to `*.part` before rename. - - Records remote metadata + local path hints in state. `--dry-run` reports actions without touching disk; `--prune` (plus non–dry-run) deletes stray files/dirs with `walkdir`. - - `--since ` is accepted for future incremental sync work; at the moment it is recorded in logs/state but no API filters are issued. + - Records remote metadata + local path hints in state. `--dry-run` reports actions without touching disk; `--prune` (plus non–dry-run) deletes stray files/dirs with `walkdir`. + - `--since ` now resolves either a semester key (e.g., `ws2526`) to its cached start timestamp or a date string (`DDMMYY`, `DDMMYYYY`, RFC3339) and skips remote files whose `chdate` precedes that cutoff. 5. HTTP errors propagate via `anyhow`, but 401/403 currently surface as generic failures—production UX should point users to `studip-sync auth`. ## Configuration & State @@ -45,7 +45,7 @@ This document equips future agents with the current mental model for the `studip - Config path: `${XDG_CONFIG_HOME:-~/.config}/studip-sync/config.toml`. Override with `--config-dir` when needed. Example keys: `base_url`, `jsonapi_path`, `basic_auth_b64`, `download_root`, `max_concurrent_downloads`. - State path: `${XDG_DATA_HOME:-~/.local/share}/studip-sync/state.toml`. `--data-dir` relocates this tree (and the default `downloads/` folder). Explicitly setting `download_root` decouples downloads from the data dir; otherwise it defaults to `/downloads`. - `profiles..user_id` caches `/users/me`. - - `profiles..semesters.` stores semester IDs/titles/keys. + - `profiles..semesters.` stores semester IDs/titles/keys plus `start`/`end` timestamps (needed for `--since ws2526`). Running `list-courses --refresh` will also refresh already-known semesters missing those timestamps so `sync --since …` can resolve properly. - `profiles..courses.` keeps display names + `last_sync`. - `profiles..files.` remembers size, checksum, timestamps, and the last local path to avoid redundant downloads. - Multiple profiles are supported; `--profile` switches, otherwise the config’s `default_profile` is used. @@ -65,7 +65,7 @@ This document equips future agents with the current mental model for the `studip ## Known Gaps / Backlog - `ConfigProfile::max_concurrent_downloads` is defined but unused; downloads happen sequentially. Introduce a bounded task queue if concurrency is needed. -- `SyncArgs::since` exists but is not wired into any API calls; ideal future work would leverage Stud.IP filters or local timestamps. +- Server-side filtering (`filter[since]`, if supported) is not used yet. Local cutoff logic relies on Stud.IP timestamps; future work could add conditional API query parameters once officially documented. - No automated tests (unit/integration) are present; critical helpers like `semesters::infer_key`, `normalize_component`, and state transitions should gain coverage. - Error UX for auth failures could be clearer (detect 401/403 and prompt users to re-run `auth`). - There is no CI config; if one is added, ensure it runs fmt/clippy/test. diff --git a/Cargo.toml b/Cargo.toml index bdaabd4..a667eee 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,5 +20,5 @@ url = "2.4" atty = "0.2" rpassword = "7.3" walkdir = "2.5" -time = "0.3" +time = { version = "0.3", features = ["formatting", "parsing", "macros"] } sha2 = "0.10" diff --git a/README.md b/README.md index e85dac1..9ce2369 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ - `init-config` writes a ready-to-edit config template (respecting `--download-root` and `--force` to overwrite). - `auth` subcommand stores Base64-encoded credentials per profile (passwords are never logged). - `list-courses` fetches `/users/me`, paginates enrolled courses, infers semester keys, caches the metadata, and prints a concise table. -- `sync` traverses every course folder/file tree, normalizes names, streams downloads to disk, tracks checksums/remote timestamps, and supports `--dry-run` plus `--prune` to delete orphaned files. +- `sync` traverses every course folder/file tree, normalizes names, streams downloads to disk, tracks checksums/remote timestamps, and supports `--dry-run`, `--prune`, and `--since ` filters (e.g., `--since ws2526` or `--since 01032024`). - XDG-compliant config (`~/.config/studip-sync/config.toml`) and state (`~/.local/share/studip-sync/state.toml`) stores everything in TOML. - Extensive logging controls: `--quiet`, `--verbose/-v`, `--debug`, and `--json`. @@ -15,7 +15,7 @@ - Config lives under `${XDG_CONFIG_HOME:-~/.config}/studip-sync/config.toml`. Override this with `--config-dir` if you want the config somewhere else. - State is cached in `${XDG_DATA_HOME:-~/.local/share}/studip-sync/state.toml`; `--data-dir` only changes this location (and anything else the tool stores under data, such as the default downloads folder). Use this when you want the state cache on a different disk but keep the config where it is. -- `download_root` determines where files land. If omitted, it falls back to `/downloads`, so moving the data dir automatically relocates the default downloads. Setting `download_root` explicitly decouples it from the data dir. Each path segment is sanitized to keep names human-readable yet filesystem-safe. +- `download_root` determines where files land. If omitted, it falls back to `/downloads`, so moving the data dir automatically relocates the default downloads. Setting `download_root` explicitly decouples it from the data dir. Each path segment is sanitized to keep names human-readable yet filesystem-safe. Semester entries cached in `state.toml` now include start/end timestamps so CLI filters such as `--since ws2526` know when a term begins (`list-courses --refresh` also re-fetches any cached semester still missing those timestamps). ## Getting Started @@ -70,20 +70,20 @@ max_concurrent_downloads = 3 # placeholder for future concurrency control | `init-config` | Write a default config template (fails if config exists unless forced). | `--force`, `--download-root` | | `auth` | Collect username/password, encode them, and save them to the active profile. | `--non-interactive`, `--username`, `--password` | | `list-courses` | List cached or freshly fetched courses with semester keys and IDs. | `--refresh` | -| `sync` | Download files for every enrolled course into the local tree. | `--dry-run`, `--prune`, `--since` *(currently just records the user-provided timestamp; the API filtering hook is planned but not implemented yet)* | +| `sync` | Download files for every enrolled course into the local tree. | `--dry-run`, `--prune`, `--since ` | Global flags: `--quiet`, `--debug`, `--json`, `-v/--verbose` (stackable), `--config-dir`, `--data-dir` (state + default downloads), `--profile`. ## Sync Behavior 1. Resolve user ID (cached in `state.toml`) and fetch current courses. -2. Cache missing semesters via `/semesters/{id}` and infer keys like `ws2425` / `ss25`. +2. Cache missing semesters via `/semesters/{id}` and infer keys like `ws2425` / `ss25`. When `--refresh` is passed, already-known semesters that never recorded a `start` timestamp are re-fetched so `--since` filters have the data they need. 3. For each course: - Walk folders using the JSON:API pagination helpers; fetch nested folders via `/folders/{id}/folders`. - List file refs via `/folders/{id}/file-refs`, normalize filenames, and ensure unique siblings through a `NameRegistry`. - Skip downloads when the local file exists and matches the stored checksum / size / remote `chdate`. - Stream downloads to `*.part`, hash contents on the fly, then rename atomically to the final path. -4. Maintain a set of remote files so `--prune` can remove local files that no longer exist remotely (and optionally delete now-empty directories). `--since` is accepted for future incremental sync work and currently acts as an annotation only—no API filters are applied yet. +4. Maintain a set of remote files so `--prune` can remove local files that no longer exist remotely (and optionally delete now-empty directories). When `--since` is provided, files whose remote `chdate` precedes the resolved timestamp (semester start or explicit date) are skipped; newer files continue through the regular checksum/size logic. 5. `--dry-run` prints planned work but never writes to disk. ## Development Notes diff --git a/src/cli.rs b/src/cli.rs index 221e50c..b2f857e 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -20,7 +20,10 @@ use std::{ io::{self, BufReader, Read, Write}, path::{Path, PathBuf}, }; -use time::{OffsetDateTime, format_description::well_known::Rfc3339}; +use time::{ + Date, OffsetDateTime, PrimitiveDateTime, Time, UtcOffset, + format_description::well_known::Rfc3339, macros::format_description, +}; use tokio::{fs, io::AsyncWriteExt}; use tracing::info; use walkdir::WalkDir; @@ -217,6 +220,11 @@ impl CommandContext { self.state.profile(&self.active_profile) } + pub fn semester_by_key(&self, key: &str) -> Option<&SemesterState> { + self.state_profile() + .and_then(|profile| profile.semesters.get(key)) + } + pub fn state_profile_mut(&mut self) -> &mut ProfileState { self.state_dirty = true; self.state.profile_mut(&self.active_profile) @@ -240,6 +248,15 @@ impl CommandContext { mut semester: SemesterState, ) -> String { if let Some(existing_key) = self.semester_key_by_id(&semester.id) { + if let Some(existing) = self.state_profile_mut().semesters.get_mut(&existing_key) { + existing.title = semester.title.clone(); + if semester.start.is_some() { + existing.start = semester.start.clone(); + } + if semester.end.is_some() { + existing.end = semester.end.clone(); + } + } return existing_key; } @@ -435,6 +452,8 @@ impl SyncArgs { ensure_semesters_cached(ctx, &client, &courses).await?; + let since_filter = resolve_since_filter(ctx, self.since.as_deref())?; + let fallback_root = ctx.paths().data_dir.join("downloads"); let download_root = ctx .config_profile() @@ -458,6 +477,7 @@ impl SyncArgs { &download_root, &mut name_registry, self, + since_filter, &mut remote_files, &mut stats, ) @@ -477,8 +497,10 @@ impl SyncArgs { downloaded = stats.downloaded, skipped = stats.skipped, planned = stats.planned, + skipped_since = stats.skipped_since, pruned_files = stats.pruned_files, pruned_dirs = stats.pruned_dirs, + since = self.since.as_deref().unwrap_or(""), "sync completed" ); Ok(()) @@ -579,13 +601,12 @@ async fn resolve_user_id( client: &StudipClient, force_refresh: bool, ) -> Result { - if !force_refresh { - if let Some(existing) = ctx + if !force_refresh + && let Some(existing) = ctx .state_profile() .and_then(|profile| profile.user_id.clone()) - { - return Ok(existing); - } + { + return Ok(existing); } let response = client.current_user().await?; @@ -602,7 +623,14 @@ async fn ensure_semesters_cached( let mut missing = HashSet::new(); for course in courses { if let Some(semester_id) = course.relationships.semester_id() { - if ctx.semester_key_by_id(semester_id).is_none() { + let needs_fetch = match ctx.semester_key_by_id(semester_id) { + Some(existing_key) => ctx + .semester_by_key(&existing_key) + .is_none_or(|semester| semester.start.is_none()), + None => true, + }; + + if needs_fetch { missing.insert(semester_id.to_string()); } } @@ -611,7 +639,7 @@ async fn ensure_semesters_cached( for semester_id in missing { let SemesterResponse { data } = client.semester(&semester_id).await?; let SemesterData { id, attributes } = data; - let title = attributes.title; + let title = attributes.title.clone(); let preferred_key = semesters::infer_key(&title); ctx.insert_semester( preferred_key, @@ -619,6 +647,8 @@ async fn ensure_semesters_cached( id, title, key: String::new(), + start: attributes.start.clone(), + end: attributes.end.clone(), }, ); } @@ -640,6 +670,7 @@ struct SyncStats { downloaded: usize, skipped: usize, planned: usize, + skipped_since: usize, pruned_files: usize, pruned_dirs: usize, } @@ -650,6 +681,7 @@ struct PruneStats { removed_dirs: usize, } +#[allow(clippy::too_many_arguments)] async fn sync_course( ctx: &mut CommandContext, client: &StudipClient, @@ -657,6 +689,7 @@ async fn sync_course( download_root: &Path, name_registry: &mut NameRegistry, args: &SyncArgs, + since: Option, remote_files: &mut HashSet, stats: &mut SyncStats, ) -> Result<()> { @@ -687,15 +720,25 @@ async fn sync_course( download_root, name_registry, args, + since, remote_files, stats, ) .await?; } + let last_sync = current_timestamp()?; + ctx.upsert_course(CourseState { + id: course.id.clone(), + name: course_display_title(course), + semester_key, + last_sync: Some(last_sync), + }); + Ok(()) } +#[allow(clippy::too_many_arguments)] async fn sync_folder( ctx: &mut CommandContext, client: &StudipClient, @@ -704,6 +747,7 @@ async fn sync_folder( download_root: &Path, name_registry: &mut NameRegistry, args: &SyncArgs, + since: Option, remote_files: &mut HashSet, stats: &mut SyncStats, ) -> Result<()> { @@ -728,6 +772,7 @@ async fn sync_folder( download_root, name_registry, args, + since, remote_files, stats, ) @@ -743,6 +788,7 @@ async fn sync_folder( Ok(()) } +#[allow(clippy::too_many_arguments)] async fn sync_file_ref( ctx: &mut CommandContext, client: &StudipClient, @@ -751,6 +797,7 @@ async fn sync_file_ref( download_root: &Path, name_registry: &mut NameRegistry, args: &SyncArgs, + since: Option, remote_files: &mut HashSet, stats: &mut SyncStats, ) -> Result<()> { @@ -767,6 +814,20 @@ async fn sync_file_ref( remote_files.insert(relative_path.clone()); + if let Some(threshold) = since + && let Some(remote_modified) = parse_remote_timestamp(&file_ref.attributes.modified) + && remote_modified < threshold + { + if args.dry_run { + println!( + "Would skip {} (before --since threshold)", + relative_path.display() + ); + } + stats.skipped_since += 1; + return Ok(()); + } + let needs_download = file_needs_download(ctx, &file_ref, &local_path); if args.dry_run { @@ -800,16 +861,16 @@ fn file_needs_download(ctx: &CommandContext, file_ref: &FileRef, local_path: &Pa .and_then(|profile| profile.files.get(&file_ref.id)); if let Some(state) = existing { - if let Some(saved) = state.remote_modified.as_deref() { - if saved != file_ref.attributes.modified.as_str() { - return true; - } + if let Some(saved) = state.remote_modified.as_deref() + && saved != file_ref.attributes.modified.as_str() + { + return true; } - if let (Some(saved_size), Some(remote_size)) = (state.size, file_ref.attributes.file_size) { - if saved_size != remote_size { - return true; - } + if let (Some(saved_size), Some(remote_size)) = (state.size, file_ref.attributes.file_size) + && saved_size != remote_size + { + return true; } if let Some(expected_checksum) = state.checksum.as_deref() { @@ -1087,6 +1148,85 @@ fn hex_encode(bytes: &[u8]) -> String { out } +fn resolve_since_filter(ctx: &CommandContext, raw: Option<&str>) -> Result> { + let Some(value) = raw.map(|s| s.trim()).filter(|s| !s.is_empty()) else { + return Ok(None); + }; + + if let Some(ts) = resolve_semester_since(ctx, value)? { + return Ok(Some(ts)); + } + + if let Some(ts) = parse_compact_since(value)? { + return Ok(Some(ts)); + } + + if let Ok(ts) = OffsetDateTime::parse(value, &Rfc3339) { + return Ok(Some(ts)); + } + + bail!( + "Unable to interpret --since value `{}`. Use a semester key (e.g., ws2526) or a date in DDMMYY / DDMMYYYY / RFC3339 format.", + value + ); +} + +fn resolve_semester_since(ctx: &CommandContext, key: &str) -> Result> { + let normalized = key.trim().to_ascii_lowercase(); + let semester = ctx + .semester_by_key(&normalized) + .or_else(|| ctx.semester_by_key(key)); + + let Some(semester) = semester else { + return Ok(None); + }; + + let start = semester.start.as_deref().ok_or_else(|| { + anyhow!( + "Semester `{}` is known but does not expose a start timestamp yet. Re-run `studip-sync list-courses --refresh` to refresh state.", + key + ) + })?; + + let parsed = OffsetDateTime::parse(start, &Rfc3339).with_context(|| { + format!( + "Failed to parse start timestamp `{}` for semester `{}`", + start, key + ) + })?; + Ok(Some(parsed)) +} + +fn parse_compact_since(input: &str) -> Result> { + match input.len() { + 6 => { + let fmt = format_description!("[day][month][year repr:last_two]"); + let date = Date::parse(input, &fmt) + .with_context(|| format!("Failed to parse `{}` as DDMMYY date", input))?; + let dt = PrimitiveDateTime::new(date, Time::MIDNIGHT); + Ok(Some(dt.assume_offset(UtcOffset::UTC))) + } + 8 => { + let fmt = format_description!("[day][month][year]"); + let date = Date::parse(input, &fmt) + .with_context(|| format!("Failed to parse `{}` as DDMMYYYY date", input))?; + let dt = PrimitiveDateTime::new(date, Time::MIDNIGHT); + Ok(Some(dt.assume_offset(UtcOffset::UTC))) + } + _ => Ok(None), + } +} + +fn parse_remote_timestamp(raw: &str) -> Option { + if let Ok(ts) = OffsetDateTime::parse(raw, &Rfc3339) { + return Some(ts); + } + if let Ok(epoch) = raw.parse::() { + return OffsetDateTime::from_unix_timestamp(epoch).ok(); + } + None +} + fn render_default_config_template(download_root: &Path) -> String { let download_root = download_root.to_string_lossy(); format!( diff --git a/src/semesters.rs b/src/semesters.rs index 9550b1d..4f79448 100644 --- a/src/semesters.rs +++ b/src/semesters.rs @@ -7,7 +7,7 @@ pub fn infer_key(title: &str) -> String { match season { Winter => { let first = numbers - .get(0) + .first() .map(|value| last_two_digits(value)) .unwrap_or_else(|| "00".into()); let second = numbers @@ -16,7 +16,7 @@ pub fn infer_key(title: &str) -> String { .unwrap_or_else(|| { // winter spans two years; if only one provided, assume +1 numbers - .get(0) + .first() .map(|n| increment_two_digits(n)) .unwrap_or_else(|| "00".into()) }); @@ -24,7 +24,7 @@ pub fn infer_key(title: &str) -> String { } Summer => { let year = numbers - .get(0) + .first() .map(|value| last_two_digits(value)) .unwrap_or_else(|| "00".into()); format!("ss{year}") diff --git a/src/state.rs b/src/state.rs index d50eeef..d19fc6b 100644 --- a/src/state.rs +++ b/src/state.rs @@ -29,6 +29,10 @@ pub struct SemesterState { pub id: String, pub title: String, pub key: String, + #[serde(default)] + pub start: Option, + #[serde(default)] + pub end: Option, } #[derive(Debug, Default, Serialize, Deserialize, Clone)] @@ -74,9 +78,7 @@ impl StateFile { } pub fn profile_mut(&mut self, profile: &str) -> &mut ProfileState { - self.profiles - .entry(profile.to_string()) - .or_insert_with(ProfileState::default) + self.profiles.entry(profile.to_string()).or_default() } pub fn profile(&self, profile: &str) -> Option<&ProfileState> { diff --git a/src/studip_client.rs b/src/studip_client.rs index 0763972..d9163dd 100644 --- a/src/studip_client.rs +++ b/src/studip_client.rs @@ -194,6 +194,12 @@ pub struct SemesterData { #[derive(Debug, Deserialize)] pub struct SemesterAttributes { pub title: String, + #[serde(default)] + pub description: Option, + #[serde(default)] + pub start: Option, + #[serde(default)] + pub end: Option, } #[derive(Debug, Deserialize)]