diff --git a/Cargo.lock b/Cargo.lock index 93dcf04..a3f49f5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -93,9 +93,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.98" +version = "1.0.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" +checksum = "b0674a1ddeecb70197781e945de4b3b8ffb61fa939a5597bcf48503737663100" [[package]] name = "atomic-waker" @@ -190,9 +190,9 @@ checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" [[package]] name = "cc" -version = "1.2.31" +version = "1.2.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3a42d84bb6b69d3a8b3eaacf0d88f179e1929695e1ad012b6cf64d9caaa5fd2" +checksum = "2352e5597e9c544d5e6d9c95190d5d27738ade584fa8db0a16e130e5c2b5296e" dependencies = [ "shlex", ] @@ -239,9 +239,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.43" +version = "4.5.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50fd97c9dc2399518aa331917ac6f274280ec5eb34e555dd291899745c48ec6f" +checksum = "1c1f056bae57e3e54c3375c41ff79619ddd13460a17d7438712bd0d83fda4ff8" dependencies = [ "clap_builder", "clap_derive", @@ -249,9 +249,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.43" +version = "4.5.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c35b5830294e1fa0462034af85cc95225a4cb07092c088c55bda3147cfcd8f65" +checksum = "b3e7f4214277f3c7aa526a59dd3fbe306a370daee1f8b7b8c987069cd8e888a8" dependencies = [ "anstream", "anstyle", @@ -261,9 +261,9 @@ dependencies = [ [[package]] name = "clap_complete" -version = "4.5.56" +version = "4.5.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67e4efcbb5da11a92e8a609233aa1e8a7d91e38de0be865f016d14700d45a7fd" +checksum = "4d9501bd3f5f09f7bbee01da9a511073ed30a80cd7a509f1214bb74eadea71ad" dependencies = [ "clap", ] @@ -564,9 +564,9 @@ checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" [[package]] name = "glob" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" [[package]] name = "h2" @@ -940,9 +940,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.174" +version = "0.2.175" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" +checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543" [[package]] name = "libloading" @@ -1190,9 +1190,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.95" +version = "1.0.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" +checksum = "d61789d7719defeb74ea5fe81f2fdfdbd28a803847077cecce2ff14e1472f6f1" dependencies = [ "unicode-ident", ] @@ -1363,9 +1363,9 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.21" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "ryu" @@ -1477,9 +1477,9 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "slab" -version = "0.4.10" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04dc19736151f35336d325007ac991178d504a119863a2fcb3758cdb5e52c50d" +checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" [[package]] name = "smallvec" diff --git a/src/lib.rs b/src/lib.rs index 153c5d3..2720de1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -694,10 +694,11 @@ pub fn decode_audio_to_pcm_f32_ffmpeg(audio_path: &Path) -> Result> { } }; if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); return Err(anyhow!( - "ffmpeg failed for {}: {}", + "Failed to decode audio from {} using ffmpeg. This may indicate the file is not a valid or supported audio/video file, is corrupted, or cannot be opened. ffmpeg stderr: {}", audio_path.display(), - String::from_utf8_lossy(&output.stderr) + stderr.trim() )); } let bytes = output.stdout; diff --git a/src/main.rs b/src/main.rs index 7b8f383..64295f3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -186,6 +186,22 @@ fn is_audio_file(path: &Path) -> bool { false } +fn validate_input_path(path: &Path) -> anyhow::Result<()> { + use anyhow::{anyhow, Context}; + let display = path.display(); + if !path.exists() { + return Err(anyhow!("Input not found: {}", display)); + } + let md = std::fs::metadata(path).with_context(|| format!("Failed to stat input: {}", display))?; + if md.is_dir() { + return Err(anyhow!("Input is a directory (expected a file): {}", display)); + } + // Attempt to open to catch permission errors early + std::fs::File::open(path) + .with_context(|| format!("Failed to open input file: {}", display)) + .map(|_| ()) +} + struct LastModelCleanup { path: PathBuf, } @@ -315,6 +331,18 @@ fn run() -> Result<()> { return Err(anyhow!("No input files provided")); } + // Preflight: validate each input path and type + for inp in &inputs { + let p = Path::new(inp); + validate_input_path(p)?; + if !(is_audio_file(p) || is_json_file(p)) { + return Err(anyhow!( + "Unsupported input type (expected .json transcript or common audio/video): {}", + p.display() + )); + } + } + // Language must be provided via CLI when transcribing audio (no detection from JSON/env) let lang_hint: Option = if let Some(ref l) = args.language { normalize_lang_code(l).or_else(|| Some(l.trim().to_lowercase())) diff --git a/src/models.rs b/src/models.rs index c574649..4144c71 100644 --- a/src/models.rs +++ b/src/models.rs @@ -620,10 +620,13 @@ fn download_one_model(client: &Client, models_dir: &Path, entry: &ModelEntry) -> url ); let mut resp = client - .get(url) + .get(url.clone()) .send() .and_then(|r| r.error_for_status()) - .context("Failed to download model")?; + .with_context(|| format!( + "Failed to download model {} from {}. If your terminal has display/TTY issues, try running with --no-progress.", + entry.name, url + ))?; let tmp_path = models_dir.join(format!("ggml-{}.bin.part", entry.name)); if tmp_path.exists() { diff --git a/tests/integration_validation.rs b/tests/integration_validation.rs new file mode 100644 index 0000000..640f38d --- /dev/null +++ b/tests/integration_validation.rs @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: MIT +// Validation and error-handling integration tests + +use std::fs; +use std::io::Read; +use std::path::PathBuf; +use std::process::Command; + +fn bin() -> &'static str { + env!("CARGO_BIN_EXE_polyscribe") +} + +fn manifest_path(relative: &str) -> PathBuf { + let mut p = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + p.push(relative); + p +} + +#[test] +fn error_on_missing_input_file() { + let exe = bin(); + let missing = manifest_path("input/definitely_missing_123.json"); + let out = Command::new(exe) + .arg(missing.as_os_str()) + .output() + .expect("failed to run polyscribe with missing input"); + assert!(!out.status.success(), "command should fail on missing input"); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("Input not found") || stderr.contains("No input files provided"), + "stderr should mention missing input; got: {}", + stderr + ); +} + +#[test] +fn error_on_directory_as_input() { + let exe = bin(); + // Use the repo's input directory which exists and is a directory + let input_dir = manifest_path("input"); + let out = Command::new(exe) + .arg(input_dir.as_os_str()) + .output() + .expect("failed to run polyscribe with directory input"); + assert!(!out.status.success(), "command should fail on dir input"); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("directory") || stderr.contains("Unsupported input type"), + "stderr should mention directory/unsupported; got: {}", + stderr + ); +} + +#[test] +fn error_on_no_ffmpeg_present() { + let exe = bin(); + // Create a tiny temp .wav file (may be empty; ffmpeg will be attempted but PATH will be empty) + let tmp_dir = manifest_path("target/tmp/itest_no_ffmpeg"); + let _ = fs::remove_dir_all(&tmp_dir); + fs::create_dir_all(&tmp_dir).unwrap(); + let wav = tmp_dir.join("dummy.wav"); + fs::write(&wav, b"\0\0\0\0").unwrap(); + + let out = Command::new(exe) + .arg(wav.as_os_str()) + .env("PATH", "") // simulate ffmpeg missing + .env_remove("WHISPER_MODEL") + .env("POLYSCRIBE_MODELS_BASE_COPY_DIR", manifest_path("models").as_os_str()) + .arg("--language").arg("en") + .output() + .expect("failed to run polyscribe with empty PATH"); + assert!( + !out.status.success(), + "command should fail when ffmpeg is not found" + ); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("ffmpeg not found") || stderr.contains("Failed to execute ffmpeg"), + "stderr should mention ffmpeg not found; got: {}", + stderr + ); +} + +#[cfg(unix)] +#[test] +fn error_on_readonly_output_dir() { + use std::os::unix::fs::PermissionsExt; + + let exe = bin(); + let input1 = manifest_path("input/1-s0wlz.json"); + + // Prepare a read-only directory + let tmp_dir = manifest_path("target/tmp/itest_readonly_out"); + let _ = fs::remove_dir_all(&tmp_dir); + fs::create_dir_all(&tmp_dir).unwrap(); + let mut perms = fs::metadata(&tmp_dir).unwrap().permissions(); + perms.set_mode(0o555); // read & execute, no write + fs::set_permissions(&tmp_dir, perms).unwrap(); + + let out = Command::new(exe) + .arg(input1.as_os_str()) + .arg("-o") + .arg(tmp_dir.as_os_str()) + .output() + .expect("failed to run polyscribe with read-only output dir"); + + // Restore perms for cleanup + let mut perms2 = fs::metadata(&tmp_dir).unwrap().permissions(); + perms2.set_mode(0o755); + let _ = fs::set_permissions(&tmp_dir, perms2); + + assert!( + !out.status.success(), + "command should fail when outputs cannot be created" + ); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("Failed to create output") || stderr.contains("permission"), + "stderr should mention failure to create outputs; got: {}", + stderr + ); + + // Cleanup + let _ = fs::remove_dir_all(&tmp_dir); +}