[feat] add --set-speaker-names CLI flag; implement prompt-based speaker name assignment with tests

This commit is contained in:
2025-08-08 13:06:24 +02:00
parent 1d23be8066
commit 53a7471b99
4 changed files with 122 additions and 4 deletions

View File

@@ -7,7 +7,7 @@
- [x] create missing folders for output files
- [x] for merging (command line flag) -> if not present, treat each file as separate output (--merge | -m)
- [x] for merge + separate output -> if present, treat each file as separate output and also output a merged version (--merge-and-separate)
- set speaker-names per input-file -> prompt user for each file if flag is set (--set-speaker-names)
- [x] set speaker-names per input-file -> prompt user for each file if flag is set (--set-speaker-names)
- fix cli output for model display
- refactor into proper cli app
- add support for video files -> use ffmpeg to extract audio

View File

@@ -76,6 +76,10 @@ struct Args {
/// Update local Whisper models by comparing hashes/sizes with remote manifest
#[arg(long)]
update_models: bool,
/// Prompt for speaker names per input file
#[arg(long = "set-speaker-names")]
set_speaker_names: bool,
}
#[derive(Debug, Deserialize)]
@@ -145,6 +149,27 @@ fn sanitize_speaker_name(raw: &str) -> String {
raw.to_string()
}
fn prompt_speaker_name_for_path(path: &Path, default_name: &str, enabled: bool) -> String {
if !enabled {
return default_name.to_string();
}
let display_owned: String = path
.file_name()
.and_then(|s| s.to_str())
.map(|s| s.to_string())
.unwrap_or_else(|| path.to_string_lossy().to_string());
eprint!("Enter speaker name for {} [default: {}]: ", display_owned, default_name);
io::stderr().flush().ok();
let mut buf = String::new();
match io::stdin().read_line(&mut buf) {
Ok(_) => {
let s = buf.trim();
if s.is_empty() { default_name.to_string() } else { s.to_string() }
}
Err(_) => default_name.to_string(),
}
}
// --- Helpers for audio transcription ---
fn is_json_file(path: &Path) -> bool {
matches!(path.extension().and_then(|s| s.to_str()).map(|s| s.to_lowercase()), Some(ext) if ext == "json")
@@ -467,9 +492,10 @@ fn main() -> Result<()> {
for input_path in &inputs {
let path = Path::new(input_path);
let speaker = sanitize_speaker_name(
let default_speaker = sanitize_speaker_name(
path.file_stem().and_then(|s| s.to_str()).unwrap_or("speaker")
);
let speaker = prompt_speaker_name_for_path(path, &default_speaker, args.set_speaker_names);
// Collect entries per file and extend merged
let mut entries: Vec<OutputEntry> = Vec::new();
@@ -557,11 +583,12 @@ fn main() -> Result<()> {
let mut entries: Vec<OutputEntry> = Vec::new();
for input_path in &inputs {
let path = Path::new(input_path);
let speaker = sanitize_speaker_name(
let default_speaker = sanitize_speaker_name(
path.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("speaker")
);
let speaker = prompt_speaker_name_for_path(path, &default_speaker, args.set_speaker_names);
let mut buf = String::new();
if is_audio_file(path) {
@@ -657,9 +684,10 @@ fn main() -> Result<()> {
for input_path in &inputs {
let path = Path::new(input_path);
let speaker = sanitize_speaker_name(
let default_speaker = sanitize_speaker_name(
path.file_stem().and_then(|s| s.to_str()).unwrap_or("speaker")
);
let speaker = prompt_speaker_name_for_path(path, &default_speaker, args.set_speaker_names);
// Collect entries per file
let mut entries: Vec<OutputEntry> = Vec::new();

View File

@@ -643,6 +643,10 @@ mod tests {
#[test]
fn test_update_local_models_offline_copy_and_manifest() {
use std::sync::{Mutex, OnceLock};
static ENV_LOCK: OnceLock<Mutex<()>> = OnceLock::new();
let _guard = ENV_LOCK.get_or_init(|| Mutex::new(())).lock().unwrap();
let tmp_models = tempdir().unwrap();
let tmp_base = tempdir().unwrap();
let tmp_manifest = tempdir().unwrap();

View File

@@ -204,3 +204,89 @@ fn cli_merge_and_separate_writes_both_kinds_of_outputs() {
// Cleanup
let _ = fs::remove_dir_all(&out_dir);
}
#[test]
fn cli_set_speaker_names_merge_prompts_and_uses_names() {
use std::io::{Read as _, Write as _};
use std::process::Stdio;
let exe = env!("CARGO_BIN_EXE_polyscribe");
let input1 = manifest_path("input/1-s0wlz.json");
let input2 = manifest_path("input/2-vikingowl.json");
let mut child = Command::new(exe)
.arg(input1.as_os_str())
.arg(input2.as_os_str())
.arg("-m")
.arg("--set-speaker-names")
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()
.expect("failed to spawn polyscribe");
{
let stdin = child.stdin.as_mut().expect("failed to open stdin");
// Provide two names for two files
writeln!(stdin, "Alpha").unwrap();
writeln!(stdin, "Beta").unwrap();
}
let output = child.wait_with_output().expect("failed to wait on child");
assert!(output.status.success(), "CLI did not exit successfully");
let stdout = String::from_utf8(output.stdout).expect("stdout not UTF-8");
let root: OutputRoot = serde_json::from_str(&stdout).unwrap();
let speakers: std::collections::HashSet<String> = root.items.into_iter().map(|e| e.speaker).collect();
assert!(speakers.contains("Alpha"), "Alpha not found in speakers");
assert!(speakers.contains("Beta"), "Beta not found in speakers");
}
#[test]
fn cli_set_speaker_names_separate_single_input() {
use std::io::Write as _;
use std::process::Stdio;
let exe = env!("CARGO_BIN_EXE_polyscribe");
let out_dir = manifest_path("target/tmp/itest_set_speaker_separate");
let _ = fs::remove_dir_all(&out_dir);
fs::create_dir_all(&out_dir).unwrap();
let input1 = manifest_path("input/3-schmendrizzle.json");
let mut child = Command::new(exe)
.arg(input1.as_os_str())
.arg("--set-speaker-names")
.arg("-o")
.arg(out_dir.as_os_str())
.stdin(Stdio::piped())
.stdout(Stdio::null())
.stderr(Stdio::null())
.spawn()
.expect("failed to spawn polyscribe");
{
let stdin = child.stdin.as_mut().expect("failed to open stdin");
writeln!(stdin, "ChosenOne").unwrap();
}
let status = child.wait().expect("failed to wait on child");
assert!(status.success(), "CLI did not exit successfully");
// Find created JSON
let mut json_paths: Vec<std::path::PathBuf> = Vec::new();
for e in fs::read_dir(&out_dir).unwrap() {
let p = e.unwrap().path();
if let Some(name) = p.file_name().and_then(|s| s.to_str()) {
if name.ends_with(".json") { json_paths.push(p.clone()); }
}
}
assert!(!json_paths.is_empty(), "no JSON outputs created");
let mut buf = String::new();
std::fs::File::open(&json_paths[0]).unwrap().read_to_string(&mut buf).unwrap();
let root: OutputRoot = serde_json::from_str(&buf).unwrap();
assert!(root.items.iter().all(|e| e.speaker == "ChosenOne"));
let _ = fs::remove_dir_all(&out_dir);
}