[test] add unit and integration tests for core functions and CLI behavior

This commit is contained in:
2025-08-08 10:29:18 +02:00
parent 3495d69da9
commit 29b6a2493b
3 changed files with 186 additions and 1 deletions

View File

@@ -1,7 +1,7 @@
- [x] update the project to no more use features
- [x] update last_model to be only used during one run
- [x] rename project to "PolyScribe"
- add tests
- [x] add tests
- update local models using hashes (--update-models)
- create folder models/ if not present -> use /usr/share/polyscribe/models/ for release version, use ./models/ for development version
- create missing folders for output files

View File

@@ -512,3 +512,77 @@ fn main() -> Result<()> {
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::Path;
#[test]
fn test_format_srt_time_basic_and_rounding() {
assert_eq!(format_srt_time(0.0), "00:00:00,000");
assert_eq!(format_srt_time(1.0), "00:00:01,000");
assert_eq!(format_srt_time(61.0), "00:01:01,000");
assert_eq!(format_srt_time(3661.789), "01:01:01,789");
// rounding
assert_eq!(format_srt_time(0.0014), "00:00:00,001");
assert_eq!(format_srt_time(0.0015), "00:00:00,002");
}
#[test]
fn test_render_srt_with_and_without_speaker() {
let items = vec![
OutputEntry { id: 0, speaker: "Alice".to_string(), start: 0.0, end: 1.0, text: "Hello".to_string() },
OutputEntry { id: 1, speaker: String::new(), start: 1.0, end: 2.0, text: "World".to_string() },
];
let srt = render_srt(&items);
let expected = "1\n00:00:00,000 --> 00:00:01,000\nAlice: Hello\n\n2\n00:00:01,000 --> 00:00:02,000\nWorld\n\n";
assert_eq!(srt, expected);
}
#[test]
fn test_sanitize_speaker_name() {
assert_eq!(sanitize_speaker_name("123-bob"), "bob");
assert_eq!(sanitize_speaker_name("00123-alice"), "alice");
assert_eq!(sanitize_speaker_name("abc-bob"), "abc-bob");
assert_eq!(sanitize_speaker_name("123"), "123");
assert_eq!(sanitize_speaker_name("-bob"), "-bob");
assert_eq!(sanitize_speaker_name("123-"), "");
}
#[test]
fn test_is_json_file_and_is_audio_file() {
assert!(is_json_file(Path::new("foo.json")));
assert!(is_json_file(Path::new("foo.JSON")));
assert!(!is_json_file(Path::new("foo.txt")));
assert!(!is_json_file(Path::new("foo")));
assert!(is_audio_file(Path::new("a.mp3")));
assert!(is_audio_file(Path::new("b.WAV")));
assert!(is_audio_file(Path::new("c.m4a")));
assert!(!is_audio_file(Path::new("d.txt")));
}
#[test]
fn test_normalize_lang_code() {
assert_eq!(normalize_lang_code("en"), Some("en".to_string()));
assert_eq!(normalize_lang_code("German"), Some("de".to_string()));
assert_eq!(normalize_lang_code("en_US.UTF-8"), Some("en".to_string()));
assert_eq!(normalize_lang_code("AUTO"), None);
assert_eq!(normalize_lang_code(" \t "), None);
assert_eq!(normalize_lang_code("zh"), Some("zh".to_string()));
}
#[test]
fn test_date_prefix_format_shape() {
let d = date_prefix();
assert_eq!(d.len(), 10);
let bytes = d.as_bytes();
assert!(bytes[0].is_ascii_digit() && bytes[1].is_ascii_digit() && bytes[2].is_ascii_digit() && bytes[3].is_ascii_digit());
assert_eq!(bytes[4], b'-');
assert!(bytes[5].is_ascii_digit() && bytes[6].is_ascii_digit());
assert_eq!(bytes[7], b'-');
assert!(bytes[8].is_ascii_digit() && bytes[9].is_ascii_digit());
}
}

111
tests/integration_cli.rs Normal file
View File

@@ -0,0 +1,111 @@
use std::fs;
use std::io::Read;
use std::path::{Path, PathBuf};
use std::process::Command;
use chrono::Local;
use serde::Deserialize;
#[derive(Deserialize)]
struct OutputEntry {
id: u64,
speaker: String,
start: f64,
end: f64,
text: String,
}
#[derive(Deserialize)]
struct OutputRoot {
items: Vec<OutputEntry>,
}
struct TestDir(PathBuf);
impl TestDir {
fn new() -> Self {
let mut p = std::env::temp_dir();
let ts = Local::now().format("%Y%m%d%H%M%S%3f");
let pid = std::process::id();
p.push(format!("polyscribe_test_{}_{}", pid, ts));
fs::create_dir_all(&p).expect("Failed to create temp dir");
TestDir(p)
}
fn path(&self) -> &Path { &self.0 }
}
impl Drop for TestDir {
fn drop(&mut self) {
let _ = fs::remove_dir_all(&self.0);
}
}
fn manifest_path(relative: &str) -> PathBuf {
let mut p = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
p.push(relative);
p
}
#[test]
fn cli_merges_json_inputs_and_writes_outputs_to_temp_dir() {
let exe = env!("CARGO_BIN_EXE_polyscribe");
let tmp = TestDir::new();
let base = tmp.path().join("out");
let input1 = manifest_path("input/1-s0wlz.json");
let input2 = manifest_path("input/2-vikingowl.json");
// Run the CLI to write outputs into temp directory
let status = Command::new(exe)
.arg(input1.as_os_str())
.arg(input2.as_os_str())
.arg("-o")
.arg(base.as_os_str())
.status()
.expect("failed to spawn polyscribe");
assert!(status.success(), "CLI did not exit successfully");
// Expect files with today's date prefix
let date = Local::now().format("%Y-%m-%d").to_string();
let stem = format!("{}_{}", date, "out");
let json_path = tmp.path().join(format!("{}.json", stem));
let toml_path = tmp.path().join(format!("{}.toml", stem));
let srt_path = tmp.path().join(format!("{}.srt", stem));
assert!(json_path.is_file(), "missing JSON output: {}", json_path.display());
assert!(toml_path.is_file(), "missing TOML output: {}", toml_path.display());
assert!(srt_path.is_file(), "missing SRT output: {}", srt_path.display());
// Parse JSON and perform sanity checks
let mut json_str = String::new();
fs::File::open(&json_path).unwrap().read_to_string(&mut json_str).unwrap();
let parsed: OutputRoot = serde_json::from_str(&json_str).expect("invalid JSON in output");
assert!(!parsed.items.is_empty(), "no items in JSON output");
// Speakers should include sanitized stems from inputs
let speakers: std::collections::HashSet<_> = parsed.items.iter().map(|e| e.speaker.as_str()).collect();
assert!(speakers.contains("s0wlz"), "expected speaker s0wlz");
assert!(speakers.contains("vikingowl"), "expected speaker vikingowl");
// Check SRT has expected basic structure and speaker label present at least once
let mut srt = String::new();
fs::File::open(&srt_path).unwrap().read_to_string(&mut srt).unwrap();
assert!(srt.starts_with("1\n"), "SRT should start with index 1");
assert!(srt.contains("s0wlz:") || srt.contains("vikingowl:"), "SRT should contain at least one speaker label");
}
#[test]
fn cli_prints_json_to_stdout_when_no_output_path() {
let exe = env!("CARGO_BIN_EXE_polyscribe");
let input1 = manifest_path("input/1-s0wlz.json");
let input2 = manifest_path("input/2-vikingowl.json");
let output = Command::new(exe)
.arg(input1.as_os_str())
.arg(input2.as_os_str())
.output()
.expect("failed to spawn polyscribe");
assert!(output.status.success(), "CLI failed");
let stdout = String::from_utf8(output.stdout).expect("stdout not UTF-8");
assert!(stdout.contains("\"items\""), "stdout should contain items JSON array");
// Ensure no files were created in repo output/ by default in this mode
// (Program writes to stdout only when -o omitted.)
}