diff --git a/README.md b/README.md index 8bbe6e2..152813b 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,7 @@ Model locations Most-used CLI flags - -o, --output FILE_OR_DIR: Output path base (date prefix added). If omitted, JSON prints to stdout. +- --out-format : Which on-disk format(s) to write; repeatable; default all. Example: --out-format json --out-format srt - -m, --merge: Merge all inputs into one output; otherwise one output per input. - --merge-and-separate: Write both merged output and separate per-input outputs (requires -o dir). - --set-speaker-names: Prompt for a speaker label per input file. diff --git a/src/main.rs b/src/main.rs index 35053dc..9754614 100644 --- a/src/main.rs +++ b/src/main.rs @@ -41,6 +41,15 @@ enum GpuBackendCli { Vulkan, } +#[derive(clap::ValueEnum, Debug, Clone, Copy, PartialEq, Eq)] +#[value(rename_all = "kebab-case")] +enum OutFormatCli { + Json, + Toml, + Srt, + All, +} + #[derive(Parser, Debug)] #[command( name = "PolyScribe", @@ -76,6 +85,10 @@ struct Args { #[arg(short, long, value_name = "FILE")] output: Option, + /// Which output format(s) to write when writing to files: json|toml|srt|all. Repeatable. Default: all + #[arg(long = "out-format", value_enum, value_name = "json|toml|srt|all")] + out_format: Vec, + /// Merge all inputs into a single output; if not set, each input is written as a separate output #[arg(short = 'm', long = "merge")] merge: bool, @@ -226,9 +239,27 @@ where } fn run() -> Result<()> { + // Compute selected output formats from CLI flags (default: all) + fn compute_output_formats(args: &Args) -> OutputFormats { + if args.out_format.is_empty() { + return OutputFormats::all(); + } + let mut formats = OutputFormats { json: false, toml: false, srt: false }; + for f in &args.out_format { + match f { + OutFormatCli::All => return OutputFormats::all(), + OutFormatCli::Json => formats.json = true, + OutFormatCli::Toml => formats.toml = true, + OutFormatCli::Srt => formats.srt = true, + } + } + formats + } use polyscribe::progress::ProgressFactory; // Parse CLI let args = Args::parse(); + // Determine which on-disk output formats to write + let selected_formats = compute_output_formats(&args); // Initialize runtime flags polyscribe::set_verbose(args.verbose); @@ -534,7 +565,7 @@ fn run() -> Result<()> { let date = date_prefix(); let base_name = format!("{date}_{stem}"); let base_path = out_dir.join(&base_name); - write_outputs(&base_path, &out, &OutputFormats::all())?; + write_outputs(&base_path, &out, &selected_formats)?; // Extend merged with per-file entries merged_entries.extend(out.items.into_iter()); @@ -569,7 +600,7 @@ fn run() -> Result<()> { let date = date_prefix(); let merged_base = format!("{date}_merged"); let base_path = out_dir.join(&merged_base); - write_outputs(&base_path, &merged_out, &OutputFormats::all())?; + write_outputs(&base_path, &merged_out, &selected_formats)?; // Final concise summary table to stderr (below progress bars) if !args.quiet && !summary.is_empty() { @@ -739,7 +770,7 @@ fn run() -> Result<()> { let base_name = format!("{date}_{stem}"); let dir = parent_opt.unwrap_or(Path::new("")); let base_path = dir.join(&base_name); - write_outputs(&base_path, &out, &OutputFormats::all())?; + write_outputs(&base_path, &out, &selected_formats)?; } else { let stdout = io::stdout(); let mut handle = stdout.lock(); @@ -898,7 +929,7 @@ fn run() -> Result<()> { let date = date_prefix(); let base_name = format!("{date}_{stem}"); let base_path = dir.join(&base_name); - write_outputs(&base_path, &out, &OutputFormats::all())?; + write_outputs(&base_path, &out, &selected_formats)?; } else { // stdout (only single input reaches here) let stdout = io::stdout(); diff --git a/tests/integration_cli.rs b/tests/integration_cli.rs index 17e2d8f..cf98d89 100644 --- a/tests/integration_cli.rs +++ b/tests/integration_cli.rs @@ -461,3 +461,492 @@ fn cli_set_speaker_names_separate_single_input() { let _ = fs::remove_dir_all(&out_dir); } +/* + let exe = env!("CARGO_BIN_EXE_polyscribe"); + // Use a project-local temp dir for stability + let out_dir = manifest_path("target/tmp/itest_sep_out"); + let _ = fs::remove_dir_all(&out_dir); + fs::create_dir_all(&out_dir).unwrap(); + + let input1 = manifest_path("input/1-s0wlz.json"); + let input2 = manifest_path("input/2-vikingowl.json"); + + // Ensure output directory exists (program should create it as well, but we pre-create to avoid platform quirks) + let _ = fs::create_dir_all(&out_dir); + + // Default behavior (no -m): separate outputs + let status = Command::new(exe) + .arg(input1.as_os_str()) + .arg(input2.as_os_str()) + .arg("-o") + .arg(out_dir.as_os_str()) + .status() + .expect("failed to spawn polyscribe"); + assert!(status.success(), "CLI did not exit successfully"); + + // Find the created files (one set per input) in the output directory + let entries = match fs::read_dir(&out_dir) { + Ok(e) => e, + Err(_) => return, // If directory not found, skip further checks (environment-specific flake) + }; + let mut json_paths: Vec = Vec::new(); + let mut count_toml = 0; + let mut count_srt = 0; + for e in entries { + let p = e.unwrap().path(); + if let Some(name) = p.file_name().and_then(|s| s.to_str()) { + if name.ends_with(".json") { + json_paths.push(p.clone()); + } + if name.ends_with(".toml") { + count_toml += 1; + } + if name.ends_with(".srt") { + count_srt += 1; + } + } + } + assert!( + json_paths.len() >= 2, + "expected at least 2 JSON files, found {}", + json_paths.len() + ); + assert!( + count_toml >= 2, + "expected at least 2 TOML files, found {}", + count_toml + ); + assert!( + count_srt >= 2, + "expected at least 2 SRT files, found {}", + count_srt + ); + + // JSON contents are assumed valid if files exist; detailed parsing is covered elsewhere + + // Cleanup + let _ = fs::remove_dir_all(&out_dir); +} + +#[test] +fn cli_merges_json_inputs_with_flag_and_writes_outputs_to_temp_dir() { + let exe = env!("CARGO_BIN_EXE_polyscribe"); + let tmp = TestDir::new(); + // Use a nested output directory to also verify auto-creation + let base_dir = tmp.path().join("outdir"); + let base = base_dir.join("out"); + + let input1 = manifest_path("input/1-s0wlz.json"); + let input2 = manifest_path("input/2-vikingowl.json"); + + // Run the CLI with --merge to write a single set of outputs + let status = Command::new(exe) + .arg(input1.as_os_str()) + .arg(input2.as_os_str()) + .arg("-m") + .arg("-o") + .arg(base.as_os_str()) + .status() + .expect("failed to spawn polyscribe"); + assert!(status.success(), "CLI did not exit successfully"); + + // Find the created files in the chosen output directory without depending on date prefix + let entries = fs::read_dir(&base_dir).unwrap(); + let mut found_json = None; + let mut found_toml = None; + let mut found_srt = None; + for e in entries { + let p = e.unwrap().path(); + if let Some(name) = p.file_name().and_then(|s| s.to_str()) { + if name.ends_with("_out.json") { + found_json = Some(p.clone()); + } + if name.ends_with("_out.toml") { + found_toml = Some(p.clone()); + } + if name.ends_with("_out.srt") { + found_srt = Some(p.clone()); + } + } + } + let _json_path = found_json.expect("missing JSON output in temp dir"); + let _toml_path = found_toml; + let _srt_path = found_srt.expect("missing SRT output in temp dir"); + + // Presence of files is sufficient for this integration test; content is validated by unit tests + + // Cleanup + let _ = fs::remove_dir_all(&base_dir); +} + +#[test] +fn cli_prints_json_to_stdout_when_no_output_path_merge_mode() { + let exe = env!("CARGO_BIN_EXE_polyscribe"); + let input1 = manifest_path("input/1-s0wlz.json"); + let input2 = manifest_path("input/2-vikingowl.json"); + + let output = Command::new(exe) + .arg(input1.as_os_str()) + .arg(input2.as_os_str()) + .arg("-m") + .output() + .expect("failed to spawn polyscribe"); + assert!(output.status.success(), "CLI failed"); + + let stdout = String::from_utf8(output.stdout).expect("stdout not UTF-8"); + assert!( + stdout.contains("\"items\""), + "stdout should contain items JSON array" + ); +} + +#[test] +fn cli_merge_and_separate_writes_both_kinds_of_outputs() { + let exe = env!("CARGO_BIN_EXE_polyscribe"); + // Use a project-local temp dir for stability + let out_dir = manifest_path("target/tmp/itest_merge_sep_out"); + let _ = fs::remove_dir_all(&out_dir); + fs::create_dir_all(&out_dir).unwrap(); + + let input1 = manifest_path("input/1-s0wlz.json"); + let input2 = manifest_path("input/2-vikingowl.json"); + + let status = Command::new(exe) + .arg(input1.as_os_str()) + .arg(input2.as_os_str()) + .arg("--merge-and-separate") + .arg("-o") + .arg(out_dir.as_os_str()) + .status() + .expect("failed to spawn polyscribe"); + assert!(status.success(), "CLI did not exit successfully"); + + // Count outputs: expect per-file outputs (>=2 JSON/TOML/SRT) and an additional merged_* set + let entries = fs::read_dir(&out_dir).unwrap(); + let mut json_count = 0; + let mut toml_count = 0; + let mut srt_count = 0; + let mut merged_json = None; + for e in entries { + let p = e.unwrap().path(); + if let Some(name) = p.file_name().and_then(|s| s.to_str()) { + if name.ends_with(".json") { + json_count += 1; + } + if name.ends_with(".toml") { + toml_count += 1; + } + if name.ends_with(".srt") { + srt_count += 1; + } + if name.ends_with("_merged.json") { + merged_json = Some(p.clone()); + } + } + } + // At least 2 inputs -> expect at least 3 JSONs (2 separate + 1 merged) + assert!( + json_count >= 3, + "expected at least 3 JSON files, found {}", + json_count + ); + assert!( + toml_count >= 3, + "expected at least 3 TOML files, found {}", + toml_count + ); + assert!( + srt_count >= 3, + "expected at least 3 SRT files, found {}", + srt_count + ); + + let _merged_json = merged_json.expect("missing merged JSON output ending with _merged.json"); + // Contents of merged JSON are validated by unit tests and other integration coverage + + // Cleanup + let _ = fs::remove_dir_all(&out_dir); +} + +#[test] +fn cli_set_speaker_names_merge_prompts_and_uses_names() { + // Also validate that -q does not suppress prompts by running with -q + use std::io::Write as _; + use std::process::Stdio; + + let exe = env!("CARGO_BIN_EXE_polyscribe"); + + let input1 = manifest_path("input/1-s0wlz.json"); + let input2 = manifest_path("input/2-vikingowl.json"); + + let mut child = Command::new(exe) + .arg(input1.as_os_str()) + .arg(input2.as_os_str()) + .arg("-m") + .arg("--set-speaker-names") + .arg("-q") + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .spawn() + .expect("failed to spawn polyscribe"); + + { + let stdin = child.stdin.as_mut().expect("failed to open stdin"); + // Provide two names for two files + writeln!(stdin, "Alpha").unwrap(); + writeln!(stdin, "Beta").unwrap(); + } + + let output = child.wait_with_output().expect("failed to wait on child"); + assert!(output.status.success(), "CLI did not exit successfully"); + + let stdout = String::from_utf8(output.stdout).expect("stdout not UTF-8"); + let root: OutputRoot = serde_json::from_str(&stdout).unwrap(); + let speakers: std::collections::HashSet = + root.items.into_iter().map(|e| e.speaker).collect(); + assert!(speakers.contains("Alpha"), "Alpha not found in speakers"); + assert!(speakers.contains("Beta"), "Beta not found in speakers"); +} + +#[test] +fn cli_no_interaction_skips_speaker_prompts_and_uses_defaults() { + let exe = env!("CARGO_BIN_EXE_polyscribe"); + + let input1 = manifest_path("input/1-s0wlz.json"); + let input2 = manifest_path("input/2-vikingowl.json"); + + let output = Command::new(exe) + .arg(input1.as_os_str()) + .arg(input2.as_os_str()) + .arg("-m") + .arg("--set-speaker-names") + .arg("--no-interaction") + .output() + .expect("failed to spawn polyscribe"); + + assert!(output.status.success(), "CLI did not exit successfully"); + + let stdout = String::from_utf8(output.stdout).expect("stdout not UTF-8"); + let root: OutputRoot = serde_json::from_str(&stdout).unwrap(); + let speakers: std::collections::HashSet = + root.items.into_iter().map(|e| e.speaker).collect(); + // Defaults should be the file stems (sanitized): "1-s0wlz" -> "1-s0wlz" then sanitize removes numeric prefix -> "s0wlz" + assert!(speakers.contains("s0wlz"), "default s0wlz not used"); + assert!(speakers.contains("vikingowl"), "default vikingowl not used"); +} + +// New verbosity behavior tests +#[test] +fn verbosity_quiet_suppresses_logs_but_keeps_stdout() { + let exe = env!("CARGO_BIN_EXE_polyscribe"); + let input1 = manifest_path("input/1-s0wlz.json"); + let input2 = manifest_path("input/2-vikingowl.json"); + + let output = Command::new(exe) + .arg("-q") + .arg("-v") // ensure -q overrides -v + .arg(input1.as_os_str()) + .arg(input2.as_os_str()) + .arg("-m") + .output() + .expect("failed to spawn polyscribe"); + + assert!(output.status.success()); + let stdout = String::from_utf8(output.stdout).unwrap(); + assert!( + stdout.contains("\"items\""), + "stdout JSON should be present in quiet mode" + ); + let stderr = String::from_utf8(output.stderr).unwrap(); + assert!( + stderr.trim().is_empty(), + "stderr should be empty in quiet mode, got: {}", + stderr + ); +} + +#[test] +fn verbosity_verbose_emits_debug_logs_on_stderr() { + let exe = env!("CARGO_BIN_EXE_polyscribe"); + let input1 = manifest_path("input/1-s0wlz.json"); + let input2 = manifest_path("input/2-vikingowl.json"); + + let output = Command::new(exe) + .arg(input1.as_os_str()) + .arg(input2.as_os_str()) + .arg("-m") + .arg("-v") + .output() + .expect("failed to spawn polyscribe"); + + assert!(output.status.success()); + let stdout = String::from_utf8(output.stdout).unwrap(); + assert!(stdout.contains("\"items\"")); + let stderr = String::from_utf8(output.stderr).unwrap(); + assert!( + stderr.contains("Mode: merge"), + "stderr should contain debug log with -v" + ); +} + +#[test] +fn verbosity_flag_position_is_global() { + let exe = env!("CARGO_BIN_EXE_polyscribe"); + let input1 = manifest_path("input/1-s0wlz.json"); + let input2 = manifest_path("input/2-vikingowl.json"); + + // -v before args + let out1 = Command::new(exe) + .arg("-v") + .arg(input1.as_os_str()) + .arg(input2.as_os_str()) + .arg("-m") + .output() + .expect("failed to spawn polyscribe"); + + // -v after sub-flags + let out2 = Command::new(exe) + .arg(input1.as_os_str()) + .arg(input2.as_os_str()) + .arg("-m") + .arg("-v") + .output() + .expect("failed to spawn polyscribe"); + + let s1 = String::from_utf8(out1.stderr).unwrap(); + let s2 = String::from_utf8(out2.stderr).unwrap(); + assert!(s1.contains("Mode: merge")); + assert!(s2.contains("Mode: merge")); +} + +#[test] +fn cli_set_speaker_names_separate_single_input() { + use std::io::Write as _; + use std::process::Stdio; + + let exe = env!("CARGO_BIN_EXE_polyscribe"); + let out_dir = manifest_path("target/tmp/itest_set_speaker_separate"); + let _ = fs::remove_dir_all(&out_dir); + fs::create_dir_all(&out_dir).unwrap(); + + let input1 = manifest_path("input/3-schmendrizzle.json"); + + let mut child = Command::new(exe) + .arg(input1.as_os_str()) + .arg("--set-speaker-names") + .arg("-o") + .arg(out_dir.as_os_str()) + .stdin(Stdio::piped()) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .spawn() + .expect("failed to spawn polyscribe"); + + { + let stdin = child.stdin.as_mut().expect("failed to open stdin"); + writeln!(stdin, "ChosenOne").unwrap(); + } + + let status = child.wait().expect("failed to wait on child"); + assert!(status.success(), "CLI did not exit successfully"); + + // Find created JSON + let mut json_paths: Vec = Vec::new(); + for e in fs::read_dir(&out_dir).unwrap() { + let p = e.unwrap().path(); + if let Some(name) = p.file_name().and_then(|s| s.to_str()) { + if name.ends_with(".json") { + json_paths.push(p.clone()); + } + } + } + assert!(!json_paths.is_empty(), "no JSON outputs created"); + let mut buf = String::new(); + std::fs::File::open(&json_paths[0]) + .unwrap() + .read_to_string(&mut buf) + .unwrap(); + let root: OutputRoot = serde_json::from_str(&buf).unwrap(); + assert!(root.items.iter().all(|e| e.speaker == "ChosenOne")); + + let _ = fs::remove_dir_all(&out_dir); +} + +// New tests for --out-format +#[test] +fn out_format_single_json_only() { + let exe = env!("CARGO_BIN_EXE_polyscribe"); + let out_dir = manifest_path("target/tmp/itest_outfmt_json_only"); + let _ = fs::remove_dir_all(&out_dir); + fs::create_dir_all(&out_dir).unwrap(); + + let input1 = manifest_path("input/1-s0wlz.json"); + + let status = Command::new(exe) + .arg(input1.as_os_str()) + .arg("-o") + .arg(&out_dir) + .arg("--out-format") + .arg("json") + .status() + .expect("failed to spawn polyscribe"); + assert!(status.success(), "CLI did not exit successfully"); + + let mut has_json = false; + let mut has_toml = false; + let mut has_srt = false; + for e in fs::read_dir(&out_dir).unwrap() { + let p = e.unwrap().path(); + if let Some(name) = p.file_name().and_then(|s| s.to_str()) { + if name.ends_with(".json") { has_json = true; } + if name.ends_with(".toml") { has_toml = true; } + if name.ends_with(".srt") { has_srt = true; } + } + } + assert!(has_json, "expected JSON file to be written"); + assert!(!has_toml, "did not expect TOML file"); + assert!(!has_srt, "did not expect SRT file"); + + let _ = fs::remove_dir_all(&out_dir); +} + +#[test] +fn out_format_multiple_json_and_srt() { + let exe = env!("CARGO_BIN_EXE_polyscribe"); + let out_dir = manifest_path("target/tmp/itest_outfmt_json_srt"); + let _ = fs::remove_dir_all(&out_dir); + fs::create_dir_all(&out_dir).unwrap(); + + let input1 = manifest_path("input/2-vikingowl.json"); + + let status = Command::new(exe) + .arg(input1.as_os_str()) + .arg("-o") + .arg(&out_dir) + .arg("--out-format") + .arg("json") + .arg("--out-format") + .arg("srt") + .status() + .expect("failed to spawn polyscribe"); + assert!(status.success(), "CLI did not exit successfully"); + + let mut has_json = false; + let mut has_toml = false; + let mut has_srt = false; + for e in fs::read_dir(&out_dir).unwrap() { + let p = e.unwrap().path(); + if let Some(name) = p.file_name().and_then(|s| s.to_str()) { + if name.ends_with(".json") { has_json = true; } + if name.ends_with(".toml") { has_toml = true; } + if name.ends_with(".srt") { has_srt = true; } + } + } + assert!(has_json, "expected JSON file to be written"); + assert!(has_srt, "expected SRT file to be written"); + assert!(!has_toml, "did not expect TOML file"); + + let _ = fs::remove_dir_all(&out_dir); +} + +*/ diff --git a/tests/out_format.rs b/tests/out_format.rs new file mode 100644 index 0000000..420e60d --- /dev/null +++ b/tests/out_format.rs @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: MIT +// Tests for --out-format flag behavior + +use std::fs; +use std::process::Command; +use std::path::PathBuf; + +fn manifest_path(relative: &str) -> PathBuf { + let mut p = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + p.push(relative); + p +} + +#[test] +fn out_format_single_json_only() { + let exe = env!("CARGO_BIN_EXE_polyscribe"); + let out_dir = manifest_path("target/tmp/itest_outfmt_json_only"); + let _ = fs::remove_dir_all(&out_dir); + fs::create_dir_all(&out_dir).unwrap(); + + let input1 = manifest_path("input/1-s0wlz.json"); + + let status = Command::new(exe) + .arg(input1.as_os_str()) + .arg("-o") + .arg(&out_dir) + .arg("--out-format") + .arg("json") + .status() + .expect("failed to spawn polyscribe"); + assert!(status.success(), "CLI did not exit successfully"); + + let mut has_json = false; + let mut has_toml = false; + let mut has_srt = false; + for e in fs::read_dir(&out_dir).unwrap() { + let p = e.unwrap().path(); + if let Some(name) = p.file_name().and_then(|s| s.to_str()) { + if name.ends_with(".json") { has_json = true; } + if name.ends_with(".toml") { has_toml = true; } + if name.ends_with(".srt") { has_srt = true; } + } + } + assert!(has_json, "expected JSON file to be written"); + assert!(!has_toml, "did not expect TOML file"); + assert!(!has_srt, "did not expect SRT file"); + + let _ = fs::remove_dir_all(&out_dir); +} + +#[test] +fn out_format_multiple_json_and_srt() { + let exe = env!("CARGO_BIN_EXE_polyscribe"); + let out_dir = manifest_path("target/tmp/itest_outfmt_json_srt"); + let _ = fs::remove_dir_all(&out_dir); + fs::create_dir_all(&out_dir).unwrap(); + + let input1 = manifest_path("input/2-vikingowl.json"); + + let status = Command::new(exe) + .arg(input1.as_os_str()) + .arg("-o") + .arg(&out_dir) + .arg("--out-format") + .arg("json") + .arg("--out-format") + .arg("srt") + .status() + .expect("failed to spawn polyscribe"); + assert!(status.success(), "CLI did not exit successfully"); + + let mut has_json = false; + let mut has_toml = false; + let mut has_srt = false; + for e in fs::read_dir(&out_dir).unwrap() { + let p = e.unwrap().path(); + if let Some(name) = p.file_name().and_then(|s| s.to_str()) { + if name.ends_with(".json") { has_json = true; } + if name.ends_with(".toml") { has_toml = true; } + if name.ends_with(".srt") { has_srt = true; } + } + } + assert!(has_json, "expected JSON file to be written"); + assert!(has_srt, "expected SRT file to be written"); + assert!(!has_toml, "did not expect TOML file"); + + let _ = fs::remove_dir_all(&out_dir); +}