[feat] add input validation, enhanced error messages, and new integration tests
This commit is contained in:
40
Cargo.lock
generated
40
Cargo.lock
generated
@@ -93,9 +93,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.98"
|
||||
version = "1.0.99"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487"
|
||||
checksum = "b0674a1ddeecb70197781e945de4b3b8ffb61fa939a5597bcf48503737663100"
|
||||
|
||||
[[package]]
|
||||
name = "atomic-waker"
|
||||
@@ -190,9 +190,9 @@ checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.2.31"
|
||||
version = "1.2.32"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c3a42d84bb6b69d3a8b3eaacf0d88f179e1929695e1ad012b6cf64d9caaa5fd2"
|
||||
checksum = "2352e5597e9c544d5e6d9c95190d5d27738ade584fa8db0a16e130e5c2b5296e"
|
||||
dependencies = [
|
||||
"shlex",
|
||||
]
|
||||
@@ -239,9 +239,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "4.5.43"
|
||||
version = "4.5.44"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "50fd97c9dc2399518aa331917ac6f274280ec5eb34e555dd291899745c48ec6f"
|
||||
checksum = "1c1f056bae57e3e54c3375c41ff79619ddd13460a17d7438712bd0d83fda4ff8"
|
||||
dependencies = [
|
||||
"clap_builder",
|
||||
"clap_derive",
|
||||
@@ -249,9 +249,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "clap_builder"
|
||||
version = "4.5.43"
|
||||
version = "4.5.44"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c35b5830294e1fa0462034af85cc95225a4cb07092c088c55bda3147cfcd8f65"
|
||||
checksum = "b3e7f4214277f3c7aa526a59dd3fbe306a370daee1f8b7b8c987069cd8e888a8"
|
||||
dependencies = [
|
||||
"anstream",
|
||||
"anstyle",
|
||||
@@ -261,9 +261,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "clap_complete"
|
||||
version = "4.5.56"
|
||||
version = "4.5.57"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "67e4efcbb5da11a92e8a609233aa1e8a7d91e38de0be865f016d14700d45a7fd"
|
||||
checksum = "4d9501bd3f5f09f7bbee01da9a511073ed30a80cd7a509f1214bb74eadea71ad"
|
||||
dependencies = [
|
||||
"clap",
|
||||
]
|
||||
@@ -564,9 +564,9 @@ checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
|
||||
|
||||
[[package]]
|
||||
name = "glob"
|
||||
version = "0.3.2"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2"
|
||||
checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
|
||||
|
||||
[[package]]
|
||||
name = "h2"
|
||||
@@ -940,9 +940,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.174"
|
||||
version = "0.2.175"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776"
|
||||
checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543"
|
||||
|
||||
[[package]]
|
||||
name = "libloading"
|
||||
@@ -1190,9 +1190,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.95"
|
||||
version = "1.0.97"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778"
|
||||
checksum = "d61789d7719defeb74ea5fe81f2fdfdbd28a803847077cecce2ff14e1472f6f1"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
@@ -1363,9 +1363,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "rustversion"
|
||||
version = "1.0.21"
|
||||
version = "1.0.22"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d"
|
||||
checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
@@ -1477,9 +1477,9 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
|
||||
|
||||
[[package]]
|
||||
name = "slab"
|
||||
version = "0.4.10"
|
||||
version = "0.4.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "04dc19736151f35336d325007ac991178d504a119863a2fcb3758cdb5e52c50d"
|
||||
checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589"
|
||||
|
||||
[[package]]
|
||||
name = "smallvec"
|
||||
|
@@ -694,10 +694,11 @@ pub fn decode_audio_to_pcm_f32_ffmpeg(audio_path: &Path) -> Result<Vec<f32>> {
|
||||
}
|
||||
};
|
||||
if !output.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
return Err(anyhow!(
|
||||
"ffmpeg failed for {}: {}",
|
||||
"Failed to decode audio from {} using ffmpeg. This may indicate the file is not a valid or supported audio/video file, is corrupted, or cannot be opened. ffmpeg stderr: {}",
|
||||
audio_path.display(),
|
||||
String::from_utf8_lossy(&output.stderr)
|
||||
stderr.trim()
|
||||
));
|
||||
}
|
||||
let bytes = output.stdout;
|
||||
|
28
src/main.rs
28
src/main.rs
@@ -186,6 +186,22 @@ fn is_audio_file(path: &Path) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
fn validate_input_path(path: &Path) -> anyhow::Result<()> {
|
||||
use anyhow::{anyhow, Context};
|
||||
let display = path.display();
|
||||
if !path.exists() {
|
||||
return Err(anyhow!("Input not found: {}", display));
|
||||
}
|
||||
let md = std::fs::metadata(path).with_context(|| format!("Failed to stat input: {}", display))?;
|
||||
if md.is_dir() {
|
||||
return Err(anyhow!("Input is a directory (expected a file): {}", display));
|
||||
}
|
||||
// Attempt to open to catch permission errors early
|
||||
std::fs::File::open(path)
|
||||
.with_context(|| format!("Failed to open input file: {}", display))
|
||||
.map(|_| ())
|
||||
}
|
||||
|
||||
struct LastModelCleanup {
|
||||
path: PathBuf,
|
||||
}
|
||||
@@ -315,6 +331,18 @@ fn run() -> Result<()> {
|
||||
return Err(anyhow!("No input files provided"));
|
||||
}
|
||||
|
||||
// Preflight: validate each input path and type
|
||||
for inp in &inputs {
|
||||
let p = Path::new(inp);
|
||||
validate_input_path(p)?;
|
||||
if !(is_audio_file(p) || is_json_file(p)) {
|
||||
return Err(anyhow!(
|
||||
"Unsupported input type (expected .json transcript or common audio/video): {}",
|
||||
p.display()
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// Language must be provided via CLI when transcribing audio (no detection from JSON/env)
|
||||
let lang_hint: Option<String> = if let Some(ref l) = args.language {
|
||||
normalize_lang_code(l).or_else(|| Some(l.trim().to_lowercase()))
|
||||
|
@@ -620,10 +620,13 @@ fn download_one_model(client: &Client, models_dir: &Path, entry: &ModelEntry) ->
|
||||
url
|
||||
);
|
||||
let mut resp = client
|
||||
.get(url)
|
||||
.get(url.clone())
|
||||
.send()
|
||||
.and_then(|r| r.error_for_status())
|
||||
.context("Failed to download model")?;
|
||||
.with_context(|| format!(
|
||||
"Failed to download model {} from {}. If your terminal has display/TTY issues, try running with --no-progress.",
|
||||
entry.name, url
|
||||
))?;
|
||||
|
||||
let tmp_path = models_dir.join(format!("ggml-{}.bin.part", entry.name));
|
||||
if tmp_path.exists() {
|
||||
|
125
tests/integration_validation.rs
Normal file
125
tests/integration_validation.rs
Normal file
@@ -0,0 +1,125 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Validation and error-handling integration tests
|
||||
|
||||
use std::fs;
|
||||
use std::io::Read;
|
||||
use std::path::PathBuf;
|
||||
use std::process::Command;
|
||||
|
||||
fn bin() -> &'static str {
|
||||
env!("CARGO_BIN_EXE_polyscribe")
|
||||
}
|
||||
|
||||
fn manifest_path(relative: &str) -> PathBuf {
|
||||
let mut p = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
||||
p.push(relative);
|
||||
p
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn error_on_missing_input_file() {
|
||||
let exe = bin();
|
||||
let missing = manifest_path("input/definitely_missing_123.json");
|
||||
let out = Command::new(exe)
|
||||
.arg(missing.as_os_str())
|
||||
.output()
|
||||
.expect("failed to run polyscribe with missing input");
|
||||
assert!(!out.status.success(), "command should fail on missing input");
|
||||
let stderr = String::from_utf8_lossy(&out.stderr);
|
||||
assert!(
|
||||
stderr.contains("Input not found") || stderr.contains("No input files provided"),
|
||||
"stderr should mention missing input; got: {}",
|
||||
stderr
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn error_on_directory_as_input() {
|
||||
let exe = bin();
|
||||
// Use the repo's input directory which exists and is a directory
|
||||
let input_dir = manifest_path("input");
|
||||
let out = Command::new(exe)
|
||||
.arg(input_dir.as_os_str())
|
||||
.output()
|
||||
.expect("failed to run polyscribe with directory input");
|
||||
assert!(!out.status.success(), "command should fail on dir input");
|
||||
let stderr = String::from_utf8_lossy(&out.stderr);
|
||||
assert!(
|
||||
stderr.contains("directory") || stderr.contains("Unsupported input type"),
|
||||
"stderr should mention directory/unsupported; got: {}",
|
||||
stderr
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn error_on_no_ffmpeg_present() {
|
||||
let exe = bin();
|
||||
// Create a tiny temp .wav file (may be empty; ffmpeg will be attempted but PATH will be empty)
|
||||
let tmp_dir = manifest_path("target/tmp/itest_no_ffmpeg");
|
||||
let _ = fs::remove_dir_all(&tmp_dir);
|
||||
fs::create_dir_all(&tmp_dir).unwrap();
|
||||
let wav = tmp_dir.join("dummy.wav");
|
||||
fs::write(&wav, b"\0\0\0\0").unwrap();
|
||||
|
||||
let out = Command::new(exe)
|
||||
.arg(wav.as_os_str())
|
||||
.env("PATH", "") // simulate ffmpeg missing
|
||||
.env_remove("WHISPER_MODEL")
|
||||
.env("POLYSCRIBE_MODELS_BASE_COPY_DIR", manifest_path("models").as_os_str())
|
||||
.arg("--language").arg("en")
|
||||
.output()
|
||||
.expect("failed to run polyscribe with empty PATH");
|
||||
assert!(
|
||||
!out.status.success(),
|
||||
"command should fail when ffmpeg is not found"
|
||||
);
|
||||
let stderr = String::from_utf8_lossy(&out.stderr);
|
||||
assert!(
|
||||
stderr.contains("ffmpeg not found") || stderr.contains("Failed to execute ffmpeg"),
|
||||
"stderr should mention ffmpeg not found; got: {}",
|
||||
stderr
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
#[test]
|
||||
fn error_on_readonly_output_dir() {
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
|
||||
let exe = bin();
|
||||
let input1 = manifest_path("input/1-s0wlz.json");
|
||||
|
||||
// Prepare a read-only directory
|
||||
let tmp_dir = manifest_path("target/tmp/itest_readonly_out");
|
||||
let _ = fs::remove_dir_all(&tmp_dir);
|
||||
fs::create_dir_all(&tmp_dir).unwrap();
|
||||
let mut perms = fs::metadata(&tmp_dir).unwrap().permissions();
|
||||
perms.set_mode(0o555); // read & execute, no write
|
||||
fs::set_permissions(&tmp_dir, perms).unwrap();
|
||||
|
||||
let out = Command::new(exe)
|
||||
.arg(input1.as_os_str())
|
||||
.arg("-o")
|
||||
.arg(tmp_dir.as_os_str())
|
||||
.output()
|
||||
.expect("failed to run polyscribe with read-only output dir");
|
||||
|
||||
// Restore perms for cleanup
|
||||
let mut perms2 = fs::metadata(&tmp_dir).unwrap().permissions();
|
||||
perms2.set_mode(0o755);
|
||||
let _ = fs::set_permissions(&tmp_dir, perms2);
|
||||
|
||||
assert!(
|
||||
!out.status.success(),
|
||||
"command should fail when outputs cannot be created"
|
||||
);
|
||||
let stderr = String::from_utf8_lossy(&out.stderr);
|
||||
assert!(
|
||||
stderr.contains("Failed to create output") || stderr.contains("permission"),
|
||||
"stderr should mention failure to create outputs; got: {}",
|
||||
stderr
|
||||
);
|
||||
|
||||
// Cleanup
|
||||
let _ = fs::remove_dir_all(&tmp_dir);
|
||||
}
|
Reference in New Issue
Block a user