diff --git a/TODO.md b/TODO.md index 201c46d..f8e653d 100644 --- a/TODO.md +++ b/TODO.md @@ -1,7 +1,7 @@ - [x] update the project to no more use features - [x] update last_model to be only used during one run - [x] rename project to "PolyScribe" -- add tests +- [x] add tests - update local models using hashes (--update-models) - create folder models/ if not present -> use /usr/share/polyscribe/models/ for release version, use ./models/ for development version - create missing folders for output files diff --git a/src/main.rs b/src/main.rs index f0a3caa..916c11d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -512,3 +512,77 @@ fn main() -> Result<()> { Ok(()) } + + +#[cfg(test)] +mod tests { + use super::*; + use std::path::Path; + + #[test] + fn test_format_srt_time_basic_and_rounding() { + assert_eq!(format_srt_time(0.0), "00:00:00,000"); + assert_eq!(format_srt_time(1.0), "00:00:01,000"); + assert_eq!(format_srt_time(61.0), "00:01:01,000"); + assert_eq!(format_srt_time(3661.789), "01:01:01,789"); + // rounding + assert_eq!(format_srt_time(0.0014), "00:00:00,001"); + assert_eq!(format_srt_time(0.0015), "00:00:00,002"); + } + + #[test] + fn test_render_srt_with_and_without_speaker() { + let items = vec![ + OutputEntry { id: 0, speaker: "Alice".to_string(), start: 0.0, end: 1.0, text: "Hello".to_string() }, + OutputEntry { id: 1, speaker: String::new(), start: 1.0, end: 2.0, text: "World".to_string() }, + ]; + let srt = render_srt(&items); + let expected = "1\n00:00:00,000 --> 00:00:01,000\nAlice: Hello\n\n2\n00:00:01,000 --> 00:00:02,000\nWorld\n\n"; + assert_eq!(srt, expected); + } + + #[test] + fn test_sanitize_speaker_name() { + assert_eq!(sanitize_speaker_name("123-bob"), "bob"); + assert_eq!(sanitize_speaker_name("00123-alice"), "alice"); + assert_eq!(sanitize_speaker_name("abc-bob"), "abc-bob"); + assert_eq!(sanitize_speaker_name("123"), "123"); + assert_eq!(sanitize_speaker_name("-bob"), "-bob"); + assert_eq!(sanitize_speaker_name("123-"), ""); + } + + #[test] + fn test_is_json_file_and_is_audio_file() { + assert!(is_json_file(Path::new("foo.json"))); + assert!(is_json_file(Path::new("foo.JSON"))); + assert!(!is_json_file(Path::new("foo.txt"))); + assert!(!is_json_file(Path::new("foo"))); + + assert!(is_audio_file(Path::new("a.mp3"))); + assert!(is_audio_file(Path::new("b.WAV"))); + assert!(is_audio_file(Path::new("c.m4a"))); + assert!(!is_audio_file(Path::new("d.txt"))); + } + + #[test] + fn test_normalize_lang_code() { + assert_eq!(normalize_lang_code("en"), Some("en".to_string())); + assert_eq!(normalize_lang_code("German"), Some("de".to_string())); + assert_eq!(normalize_lang_code("en_US.UTF-8"), Some("en".to_string())); + assert_eq!(normalize_lang_code("AUTO"), None); + assert_eq!(normalize_lang_code(" \t "), None); + assert_eq!(normalize_lang_code("zh"), Some("zh".to_string())); + } + + #[test] + fn test_date_prefix_format_shape() { + let d = date_prefix(); + assert_eq!(d.len(), 10); + let bytes = d.as_bytes(); + assert!(bytes[0].is_ascii_digit() && bytes[1].is_ascii_digit() && bytes[2].is_ascii_digit() && bytes[3].is_ascii_digit()); + assert_eq!(bytes[4], b'-'); + assert!(bytes[5].is_ascii_digit() && bytes[6].is_ascii_digit()); + assert_eq!(bytes[7], b'-'); + assert!(bytes[8].is_ascii_digit() && bytes[9].is_ascii_digit()); + } +} diff --git a/tests/integration_cli.rs b/tests/integration_cli.rs new file mode 100644 index 0000000..7f247a5 --- /dev/null +++ b/tests/integration_cli.rs @@ -0,0 +1,111 @@ +use std::fs; +use std::io::Read; +use std::path::{Path, PathBuf}; +use std::process::Command; + +use chrono::Local; +use serde::Deserialize; + +#[derive(Deserialize)] +struct OutputEntry { + id: u64, + speaker: String, + start: f64, + end: f64, + text: String, +} + +#[derive(Deserialize)] +struct OutputRoot { + items: Vec, +} + +struct TestDir(PathBuf); +impl TestDir { + fn new() -> Self { + let mut p = std::env::temp_dir(); + let ts = Local::now().format("%Y%m%d%H%M%S%3f"); + let pid = std::process::id(); + p.push(format!("polyscribe_test_{}_{}", pid, ts)); + fs::create_dir_all(&p).expect("Failed to create temp dir"); + TestDir(p) + } + fn path(&self) -> &Path { &self.0 } +} +impl Drop for TestDir { + fn drop(&mut self) { + let _ = fs::remove_dir_all(&self.0); + } +} + +fn manifest_path(relative: &str) -> PathBuf { + let mut p = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + p.push(relative); + p +} + +#[test] +fn cli_merges_json_inputs_and_writes_outputs_to_temp_dir() { + let exe = env!("CARGO_BIN_EXE_polyscribe"); + let tmp = TestDir::new(); + let base = tmp.path().join("out"); + + let input1 = manifest_path("input/1-s0wlz.json"); + let input2 = manifest_path("input/2-vikingowl.json"); + + // Run the CLI to write outputs into temp directory + let status = Command::new(exe) + .arg(input1.as_os_str()) + .arg(input2.as_os_str()) + .arg("-o") + .arg(base.as_os_str()) + .status() + .expect("failed to spawn polyscribe"); + assert!(status.success(), "CLI did not exit successfully"); + + // Expect files with today's date prefix + let date = Local::now().format("%Y-%m-%d").to_string(); + let stem = format!("{}_{}", date, "out"); + let json_path = tmp.path().join(format!("{}.json", stem)); + let toml_path = tmp.path().join(format!("{}.toml", stem)); + let srt_path = tmp.path().join(format!("{}.srt", stem)); + + assert!(json_path.is_file(), "missing JSON output: {}", json_path.display()); + assert!(toml_path.is_file(), "missing TOML output: {}", toml_path.display()); + assert!(srt_path.is_file(), "missing SRT output: {}", srt_path.display()); + + // Parse JSON and perform sanity checks + let mut json_str = String::new(); + fs::File::open(&json_path).unwrap().read_to_string(&mut json_str).unwrap(); + let parsed: OutputRoot = serde_json::from_str(&json_str).expect("invalid JSON in output"); + assert!(!parsed.items.is_empty(), "no items in JSON output"); + // Speakers should include sanitized stems from inputs + let speakers: std::collections::HashSet<_> = parsed.items.iter().map(|e| e.speaker.as_str()).collect(); + assert!(speakers.contains("s0wlz"), "expected speaker s0wlz"); + assert!(speakers.contains("vikingowl"), "expected speaker vikingowl"); + + // Check SRT has expected basic structure and speaker label present at least once + let mut srt = String::new(); + fs::File::open(&srt_path).unwrap().read_to_string(&mut srt).unwrap(); + assert!(srt.starts_with("1\n"), "SRT should start with index 1"); + assert!(srt.contains("s0wlz:") || srt.contains("vikingowl:"), "SRT should contain at least one speaker label"); +} + +#[test] +fn cli_prints_json_to_stdout_when_no_output_path() { + let exe = env!("CARGO_BIN_EXE_polyscribe"); + let input1 = manifest_path("input/1-s0wlz.json"); + let input2 = manifest_path("input/2-vikingowl.json"); + + let output = Command::new(exe) + .arg(input1.as_os_str()) + .arg(input2.as_os_str()) + .output() + .expect("failed to spawn polyscribe"); + assert!(output.status.success(), "CLI failed"); + + let stdout = String::from_utf8(output.stdout).expect("stdout not UTF-8"); + assert!(stdout.contains("\"items\""), "stdout should contain items JSON array"); + // Ensure no files were created in repo output/ by default in this mode + // (Program writes to stdout only when -o omitted.) +}