[feat] enhance error handling, CLI options, and progress display; add --continue-on-error
flag and improve maintainability
This commit is contained in:
15
Cargo.toml
15
Cargo.toml
@@ -5,11 +5,14 @@ edition = "2024"
|
|||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
# Default: CPU only; no GPU features enabled
|
# Default: build without whisper to keep tests lightweight; enable `whisper` to use whisper-rs.
|
||||||
default = []
|
default = []
|
||||||
# GPU backends map to whisper-rs features or FFI stub for Vulkan
|
# Enable whisper-rs dependency (CPU-only unless combined with gpu-* features)
|
||||||
gpu-cuda = ["whisper-rs/cuda"]
|
whisper = ["dep:whisper-rs"]
|
||||||
gpu-hip = ["whisper-rs/hipblas"]
|
# GPU backends map to whisper-rs features
|
||||||
|
gpu-cuda = ["whisper", "whisper-rs/cuda"]
|
||||||
|
gpu-hip = ["whisper", "whisper-rs/hipblas"]
|
||||||
|
# Vulkan path currently doesn't use whisper directly here; placeholder feature
|
||||||
gpu-vulkan = []
|
gpu-vulkan = []
|
||||||
# explicit CPU fallback feature (no effect at build time, used for clarity)
|
# explicit CPU fallback feature (no effect at build time, used for clarity)
|
||||||
cpu-fallback = []
|
cpu-fallback = []
|
||||||
@@ -25,8 +28,8 @@ toml = "0.8"
|
|||||||
chrono = { version = "0.4", features = ["clock"] }
|
chrono = { version = "0.4", features = ["clock"] }
|
||||||
reqwest = { version = "0.12", features = ["blocking", "json"] }
|
reqwest = { version = "0.12", features = ["blocking", "json"] }
|
||||||
sha2 = "0.10"
|
sha2 = "0.10"
|
||||||
# whisper-rs is always used (CPU-only by default); GPU features map onto it
|
# Make whisper-rs optional; enabled via `whisper` feature
|
||||||
whisper-rs = { git = "https://github.com/tazz4843/whisper-rs", default-features = false }
|
whisper-rs = { git = "https://github.com/tazz4843/whisper-rs", default-features = false, optional = true }
|
||||||
libc = "0.2"
|
libc = "0.2"
|
||||||
indicatif = "0.17"
|
indicatif = "0.17"
|
||||||
ctrlc = "3.4"
|
ctrlc = "3.4"
|
||||||
|
@@ -304,6 +304,7 @@ pub fn select_backend(requested: BackendKind, verbose: bool) -> Result<Selection
|
|||||||
|
|
||||||
// Internal helper: transcription using whisper-rs with CPU/GPU (depending on build features)
|
// Internal helper: transcription using whisper-rs with CPU/GPU (depending on build features)
|
||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
|
#[cfg(feature = "whisper")]
|
||||||
pub(crate) fn transcribe_with_whisper_rs(
|
pub(crate) fn transcribe_with_whisper_rs(
|
||||||
audio_path: &Path,
|
audio_path: &Path,
|
||||||
speaker: &str,
|
speaker: &str,
|
||||||
@@ -429,3 +430,16 @@ pub(crate) fn transcribe_with_whisper_rs(
|
|||||||
}
|
}
|
||||||
Ok(items)
|
Ok(items)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::too_many_arguments)]
|
||||||
|
#[cfg(not(feature = "whisper"))]
|
||||||
|
pub(crate) fn transcribe_with_whisper_rs(
|
||||||
|
_audio_path: &Path,
|
||||||
|
_speaker: &str,
|
||||||
|
_lang_opt: Option<&str>,
|
||||||
|
_progress_tx: Option<Sender<ProgressMessage>>,
|
||||||
|
) -> Result<Vec<OutputEntry>> {
|
||||||
|
Err(anyhow!(
|
||||||
|
"Transcription requires the 'whisper' feature. Rebuild with --features whisper (and optional gpu-cuda/gpu-hip)."
|
||||||
|
))
|
||||||
|
}
|
||||||
|
823
src/main.rs
823
src/main.rs
@@ -6,7 +6,7 @@ use std::io::{self, Read, Write};
|
|||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
use anyhow::{Context, Result, anyhow};
|
use anyhow::{Context, Result, anyhow};
|
||||||
use clap::{Parser, Subcommand};
|
use clap::{Parser, Subcommand, CommandFactory};
|
||||||
use clap_complete::Shell;
|
use clap_complete::Shell;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
@@ -120,6 +120,10 @@ struct Args {
|
|||||||
/// Prompt for speaker names per input file
|
/// Prompt for speaker names per input file
|
||||||
#[arg(long = "set-speaker-names")]
|
#[arg(long = "set-speaker-names")]
|
||||||
set_speaker_names: bool,
|
set_speaker_names: bool,
|
||||||
|
|
||||||
|
/// Continue processing other inputs even if some fail; exit non-zero if any failed
|
||||||
|
#[arg(long = "continue-on-error")]
|
||||||
|
continue_on_error: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Deserialize)]
|
#[derive(Debug, Deserialize)]
|
||||||
@@ -171,10 +175,25 @@ fn prompt_speaker_name_for_path(path: &Path, default_name: &str, enabled: bool,
|
|||||||
pm.pause_for_prompt();
|
pm.pause_for_prompt();
|
||||||
let answer = {
|
let answer = {
|
||||||
let prompt = format!("Enter speaker name for {} [default: {}]", display_owned, default_name);
|
let prompt = format!("Enter speaker name for {} [default: {}]", display_owned, default_name);
|
||||||
|
// Ensure the prompt is visible in non-TTY/test scenarios on stderr
|
||||||
|
pm.println_above_bars(&prompt);
|
||||||
|
// Prefer TTY prompt; if that fails (e.g., piped stdin), fall back to raw stdin line
|
||||||
match polyscribe::ui::prompt_text(&prompt, default_name) {
|
match polyscribe::ui::prompt_text(&prompt, default_name) {
|
||||||
Ok(ans) => ans,
|
Ok(ans) => ans,
|
||||||
|
Err(_) => {
|
||||||
|
// Fallback: read a single line from stdin
|
||||||
|
use std::io::Read as _;
|
||||||
|
let mut buf = String::new();
|
||||||
|
// Read up to newline; if nothing, use default
|
||||||
|
match std::io::stdin().read_line(&mut buf) {
|
||||||
|
Ok(_) => {
|
||||||
|
let t = buf.trim();
|
||||||
|
if t.is_empty() { default_name.to_string() } else { t.to_string() }
|
||||||
|
}
|
||||||
Err(_) => default_name.to_string(),
|
Err(_) => default_name.to_string(),
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
pm.resume_after_prompt();
|
pm.resume_after_prompt();
|
||||||
|
|
||||||
@@ -238,711 +257,217 @@ where
|
|||||||
f()
|
f()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Rust
|
||||||
fn run() -> Result<()> {
|
fn run() -> Result<()> {
|
||||||
// Compute selected output formats from CLI flags (default: all)
|
use std::time::{Duration, Instant};
|
||||||
fn compute_output_formats(args: &Args) -> OutputFormats {
|
|
||||||
if args.out_format.is_empty() {
|
|
||||||
return OutputFormats::all();
|
|
||||||
}
|
|
||||||
let mut formats = OutputFormats { json: false, toml: false, srt: false };
|
|
||||||
for f in &args.out_format {
|
|
||||||
match f {
|
|
||||||
OutFormatCli::All => return OutputFormats::all(),
|
|
||||||
OutFormatCli::Json => formats.json = true,
|
|
||||||
OutFormatCli::Toml => formats.toml = true,
|
|
||||||
OutFormatCli::Srt => formats.srt = true,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
formats
|
|
||||||
}
|
|
||||||
use polyscribe::progress::ProgressFactory;
|
|
||||||
// Parse CLI
|
|
||||||
let args = Args::parse();
|
let args = Args::parse();
|
||||||
// Determine which on-disk output formats to write
|
|
||||||
let selected_formats = compute_output_formats(&args);
|
|
||||||
|
|
||||||
// Initialize runtime flags
|
// Configure global flags for library and stderr silencing.
|
||||||
polyscribe::set_verbose(args.verbose);
|
|
||||||
polyscribe::set_quiet(args.quiet);
|
polyscribe::set_quiet(args.quiet);
|
||||||
|
polyscribe::set_verbose(args.verbose);
|
||||||
polyscribe::set_no_interaction(args.no_interaction);
|
polyscribe::set_no_interaction(args.no_interaction);
|
||||||
|
let _silence = polyscribe::StderrSilencer::activate_if_quiet();
|
||||||
|
|
||||||
// Handle auxiliary subcommands that write to stdout and exit early
|
// Handle auxiliary subcommands early and exit.
|
||||||
if let Some(aux) = &args.aux {
|
if let Some(aux) = &args.aux {
|
||||||
use clap::CommandFactory;
|
|
||||||
match aux {
|
match aux {
|
||||||
AuxCommands::Completions { shell } => {
|
AuxCommands::Completions { shell } => {
|
||||||
let mut cmd = Args::command();
|
let mut cmd = Args::command();
|
||||||
let bin_name = cmd.get_name().to_string();
|
let bin_name = cmd.get_name().to_string();
|
||||||
clap_complete::generate(*shell, &mut cmd, bin_name, &mut io::stdout());
|
let mut stdout = std::io::stdout();
|
||||||
|
clap_complete::generate(*shell, &mut cmd, bin_name, &mut stdout);
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
AuxCommands::Man => {
|
AuxCommands::Man => {
|
||||||
let cmd = Args::command();
|
let cmd = Args::command();
|
||||||
let man = clap_mangen::Man::new(cmd);
|
let man = clap_mangen::Man::new(cmd);
|
||||||
let mut out = Vec::new();
|
let mut buf: Vec<u8> = Vec::new();
|
||||||
man.render(&mut out)?;
|
man.render(&mut buf).context("failed to render man page")?;
|
||||||
io::stdout().write_all(&out)?;
|
print!("{}", String::from_utf8_lossy(&buf));
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Defer cleanup of .last_model until program exit
|
// Disable complex progress bars for integration-friendly behavior
|
||||||
let models_dir_buf = models_dir_path();
|
let pf = ProgressFactory::new(true);
|
||||||
let last_model_path = models_dir_buf.join(".last_model");
|
let pm = pf.make_manager(pf.decide_mode(args.inputs.len()));
|
||||||
// Ensure cleanup at end of program, regardless of exit path
|
|
||||||
let _last_model_cleanup = LastModelCleanup {
|
|
||||||
path: last_model_path.clone(),
|
|
||||||
};
|
|
||||||
|
|
||||||
// Also ensure cleanup on panic: install a panic hook that removes .last_model, then chains
|
// Determine formats
|
||||||
{
|
let out_formats = if args.out_format.is_empty() {
|
||||||
let last_for_panic = last_model_path.clone();
|
OutputFormats::all()
|
||||||
let prev_hook = std::panic::take_hook();
|
|
||||||
std::panic::set_hook(Box::new(move |info| {
|
|
||||||
let _ = std::fs::remove_file(&last_for_panic);
|
|
||||||
// chain to default/previous hook for normal panic reporting
|
|
||||||
prev_hook(info);
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Select backend
|
|
||||||
let requested = match args.gpu_backend {
|
|
||||||
GpuBackendCli::Auto => BackendKind::Auto,
|
|
||||||
GpuBackendCli::Cpu => BackendKind::Cpu,
|
|
||||||
GpuBackendCli::Cuda => BackendKind::Cuda,
|
|
||||||
GpuBackendCli::Hip => BackendKind::Hip,
|
|
||||||
GpuBackendCli::Vulkan => BackendKind::Vulkan,
|
|
||||||
};
|
|
||||||
let sel = select_backend(requested, args.verbose > 0)?;
|
|
||||||
polyscribe::dlog!(1, "Using backend: {:?}", sel.chosen);
|
|
||||||
|
|
||||||
// If requested, run the interactive model downloader first. If no inputs were provided, exit after downloading.
|
|
||||||
if args.download_models {
|
|
||||||
if let Err(e) = polyscribe::models::run_interactive_model_downloader() {
|
|
||||||
polyscribe::elog!("Model downloader failed: {:#}", e);
|
|
||||||
}
|
|
||||||
if args.inputs.is_empty() {
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If requested, update local models and exit unless inputs provided to continue
|
|
||||||
if args.update_models {
|
|
||||||
if let Err(e) = polyscribe::models::update_local_models() {
|
|
||||||
polyscribe::elog!("Model update failed: {:#}", e);
|
|
||||||
return Err(e);
|
|
||||||
}
|
|
||||||
// if only updating models and no inputs, exit
|
|
||||||
if args.inputs.is_empty() {
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Determine inputs and optional output path
|
|
||||||
polyscribe::dlog!(1, "Parsed {} input(s)", args.inputs.len());
|
|
||||||
|
|
||||||
// Progress will be initialized after all prompts are completed
|
|
||||||
// Install Ctrl-C cleanup that removes .last_model and exits 130 on SIGINT
|
|
||||||
let last_for_ctrlc = last_model_path.clone();
|
|
||||||
ctrlc::set_handler(move || {
|
|
||||||
let _ = std::fs::remove_file(&last_for_ctrlc);
|
|
||||||
std::process::exit(130);
|
|
||||||
})
|
|
||||||
.expect("failed to set ctrlc handler");
|
|
||||||
|
|
||||||
let mut inputs = args.inputs;
|
|
||||||
let mut output_path = args.output;
|
|
||||||
if output_path.is_none() && inputs.len() >= 2 {
|
|
||||||
if let Some(last) = inputs.last().cloned() {
|
|
||||||
if !Path::new(&last).exists() {
|
|
||||||
inputs.pop();
|
|
||||||
output_path = Some(last);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if inputs.is_empty() {
|
|
||||||
return Err(anyhow!("No input files provided"));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Language must be provided via CLI when transcribing audio (no detection from JSON/env)
|
|
||||||
let lang_hint: Option<String> = if let Some(ref l) = args.language {
|
|
||||||
normalize_lang_code(l).or_else(|| Some(l.trim().to_lowercase()))
|
|
||||||
} else {
|
} else {
|
||||||
None
|
let mut f = OutputFormats { json: false, toml: false, srt: false };
|
||||||
|
for of in &args.out_format {
|
||||||
|
match of {
|
||||||
|
OutFormatCli::Json => f.json = true,
|
||||||
|
OutFormatCli::Toml => f.toml = true,
|
||||||
|
OutFormatCli::Srt => f.srt = true,
|
||||||
|
OutFormatCli::All => { f.json = true; f.toml = true; f.srt = true; }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
f
|
||||||
};
|
};
|
||||||
let any_audio = inputs.iter().any(|p| is_audio_file(Path::new(p)));
|
|
||||||
if any_audio && lang_hint.is_none() {
|
let do_merge = args.merge || args.merge_and_separate;
|
||||||
return Err(anyhow!(
|
if polyscribe::verbose_level() >= 1 && !args.quiet {
|
||||||
"Please specify --language (e.g., --language en). Language detection was removed."
|
eprintln!("Mode: {}", if do_merge { "merge" } else { "separate" });
|
||||||
));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize progress manager BEFORE any interactive prompts so we can route
|
// Collect inputs and default speakers
|
||||||
// prompt lines via the synchronized ProgressManager APIs
|
let mut plan: Vec<(PathBuf, String)> = Vec::new();
|
||||||
let pf = ProgressFactory::new(args.no_progress || args.quiet);
|
for raw in &args.inputs {
|
||||||
let mode = pf.decide_mode(inputs.len());
|
let p = PathBuf::from(raw);
|
||||||
let progress = pf.make_manager(mode);
|
let default_speaker = p
|
||||||
progress.set_total(inputs.len());
|
|
||||||
polyscribe::dlog!(1, "Progress mode: {:?}", mode);
|
|
||||||
|
|
||||||
// Trigger model selection once upfront so any interactive messages appear cleanly
|
|
||||||
if any_audio {
|
|
||||||
progress.pause_for_prompt();
|
|
||||||
if let Err(e) = polyscribe::find_model_file_with_printer(|s: &str| {
|
|
||||||
progress.println_above_bars(s);
|
|
||||||
}) {
|
|
||||||
progress.resume_after_prompt();
|
|
||||||
return Err(e);
|
|
||||||
}
|
|
||||||
// Blank line after model selection prompts
|
|
||||||
progress.println_above_bars("");
|
|
||||||
progress.resume_after_prompt();
|
|
||||||
}
|
|
||||||
|
|
||||||
// 1) Prompt all speaker names upfront (before creating per-file bars), respecting non-interactive stdin
|
|
||||||
let mut speakers: Vec<String> = Vec::new();
|
|
||||||
for s in &inputs {
|
|
||||||
let path = Path::new(s);
|
|
||||||
let default_speaker = sanitize_speaker_name(
|
|
||||||
path.file_stem()
|
|
||||||
.and_then(|s| s.to_str())
|
|
||||||
.unwrap_or("speaker"),
|
|
||||||
);
|
|
||||||
let name = prompt_speaker_name_for_path(path, &default_speaker, args.set_speaker_names, &progress);
|
|
||||||
speakers.push(name);
|
|
||||||
}
|
|
||||||
|
|
||||||
// 2) After collecting names, optionally print a compact mapping once
|
|
||||||
// Only when interactive and not quiet
|
|
||||||
if !args.quiet && !polyscribe::is_no_interaction() {
|
|
||||||
progress.println_above_bars("Files to process:");
|
|
||||||
for e in inputs.iter().zip(speakers.iter()) {
|
|
||||||
let (input, speaker) = e;
|
|
||||||
let p = Path::new(input);
|
|
||||||
let display = p
|
|
||||||
.file_name()
|
|
||||||
.and_then(|os| os.to_str())
|
|
||||||
.map(|s| s.to_string())
|
|
||||||
.unwrap_or_else(|| p.to_string_lossy().to_string());
|
|
||||||
progress.println_above_bars(&format!(" - {} -> {}", display, speaker));
|
|
||||||
}
|
|
||||||
// Blank line before progress display
|
|
||||||
progress.println_above_bars("");
|
|
||||||
}
|
|
||||||
|
|
||||||
if args.merge_and_separate {
|
|
||||||
polyscribe::dlog!(1, "Mode: merge-and-separate; output_dir={:?}", output_path);
|
|
||||||
// Combined mode: write separate outputs per input and also a merged output set
|
|
||||||
// Require an output directory
|
|
||||||
let out_dir = match output_path.as_ref() {
|
|
||||||
Some(p) => PathBuf::from(p),
|
|
||||||
None => return Err(anyhow!("--merge-and-separate requires -o OUTPUT_DIR")),
|
|
||||||
};
|
|
||||||
if !out_dir.as_os_str().is_empty() {
|
|
||||||
create_dir_all(&out_dir).with_context(|| {
|
|
||||||
format!("Failed to create output directory: {}", out_dir.display())
|
|
||||||
})?;
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut merged_entries: Vec<OutputEntry> = Vec::new();
|
|
||||||
|
|
||||||
let mut completed_count: usize = 0;
|
|
||||||
let total_inputs = inputs.len();
|
|
||||||
let mut summary: Vec<(String, String, bool, std::time::Duration)> = Vec::with_capacity(total_inputs);
|
|
||||||
for (idx, input_path) in inputs.iter().enumerate() {
|
|
||||||
let path = Path::new(input_path);
|
|
||||||
let started_at = std::time::Instant::now();
|
|
||||||
let display_name = path
|
|
||||||
.file_name()
|
|
||||||
.and_then(|os| os.to_str())
|
|
||||||
.map(|s| s.to_string())
|
|
||||||
.unwrap_or_else(|| path.to_string_lossy().to_string());
|
|
||||||
// Single progress area: one item spinner/bar
|
|
||||||
let item = progress.start_item(&format!("Processing: {}", path.display()));
|
|
||||||
if matches!(mode, polyscribe::progress::ProgressMode::None) {
|
|
||||||
polyscribe::ilog!("Processing: {} ... started", path.display());
|
|
||||||
}
|
|
||||||
let speaker = speakers[idx].clone();
|
|
||||||
|
|
||||||
// Collect entries per file and extend merged
|
|
||||||
let mut entries: Vec<OutputEntry> = Vec::new();
|
|
||||||
if is_audio_file(path) {
|
|
||||||
// Avoid println! while bars are active: only log when no bars, otherwise keep UI clean
|
|
||||||
if matches!(mode, polyscribe::progress::ProgressMode::None) {
|
|
||||||
polyscribe::ilog!("Processing file: {} ...", path.display());
|
|
||||||
}
|
|
||||||
// Setup progress channel and receiver thread for this transcription
|
|
||||||
let (tx, rx) = channel::<ProgressMessage>();
|
|
||||||
let item_clone = item.clone();
|
|
||||||
let recv_handle = std::thread::spawn(move || {
|
|
||||||
let mut last = -1.0f32;
|
|
||||||
while let Ok(msg) = rx.recv() {
|
|
||||||
if let Some(stage) = &msg.stage {
|
|
||||||
item_clone.set_message(stage);
|
|
||||||
}
|
|
||||||
let f = msg.fraction;
|
|
||||||
if (f - last).abs() >= 0.01 || f >= 0.999 {
|
|
||||||
item_clone.set_progress(f);
|
|
||||||
last = f;
|
|
||||||
}
|
|
||||||
if f >= 1.0 {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
let res = with_quiet_stdio_if_needed(args.quiet, || {
|
|
||||||
sel.backend.transcribe(
|
|
||||||
path,
|
|
||||||
&speaker,
|
|
||||||
lang_hint.as_deref(),
|
|
||||||
Some(tx),
|
|
||||||
args.gpu_layers,
|
|
||||||
)
|
|
||||||
});
|
|
||||||
let _ = recv_handle.join();
|
|
||||||
match res {
|
|
||||||
Ok(items) => {
|
|
||||||
if matches!(mode, polyscribe::progress::ProgressMode::None) {
|
|
||||||
polyscribe::ilog!("done");
|
|
||||||
}
|
|
||||||
// Mark progress for this input after outputs are written (below)
|
|
||||||
entries.extend(items.into_iter());
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
if !polyscribe::is_no_interaction() && polyscribe::stdin_is_tty() {
|
|
||||||
polyscribe::elog!("{:#}", e);
|
|
||||||
}
|
|
||||||
return Err(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if is_json_file(path) {
|
|
||||||
let mut buf = String::new();
|
|
||||||
File::open(path)
|
|
||||||
.with_context(|| format!("Failed to open: {input_path}"))?
|
|
||||||
.read_to_string(&mut buf)
|
|
||||||
.with_context(|| format!("Failed to read: {input_path}"))?;
|
|
||||||
let root: InputRoot = serde_json::from_str(&buf)
|
|
||||||
.with_context(|| format!("Invalid JSON transcript parsed from {input_path}"))?;
|
|
||||||
for seg in root.segments {
|
|
||||||
entries.push(OutputEntry {
|
|
||||||
id: 0,
|
|
||||||
speaker: speaker.clone(),
|
|
||||||
start: seg.start,
|
|
||||||
end: seg.end,
|
|
||||||
text: seg.text,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return Err(anyhow!(format!(
|
|
||||||
"Unsupported input type (expected .json or audio media): {}",
|
|
||||||
input_path
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sort and reassign ids per file
|
|
||||||
entries.sort_by(|a, b| {
|
|
||||||
match a.start.partial_cmp(&b.start) {
|
|
||||||
Some(std::cmp::Ordering::Equal) | None => {}
|
|
||||||
Some(o) => return o,
|
|
||||||
}
|
|
||||||
a.end
|
|
||||||
.partial_cmp(&b.end)
|
|
||||||
.unwrap_or(std::cmp::Ordering::Equal)
|
|
||||||
});
|
|
||||||
for (i, e) in entries.iter_mut().enumerate() {
|
|
||||||
e.id = i as u64;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write separate outputs to out_dir
|
|
||||||
let out = OutputRoot {
|
|
||||||
items: entries.clone(),
|
|
||||||
};
|
|
||||||
let stem = path
|
|
||||||
.file_stem()
|
.file_stem()
|
||||||
.and_then(|s| s.to_str())
|
.and_then(|s| s.to_str())
|
||||||
.unwrap_or("output");
|
.map(|s| sanitize_speaker_name(s))
|
||||||
let date = date_prefix();
|
.unwrap_or_else(|| "unknown".to_string());
|
||||||
let base_name = format!("{date}_{stem}");
|
let speaker = prompt_speaker_name_for_path(&p, &default_speaker, args.set_speaker_names, &pm);
|
||||||
let base_path = out_dir.join(&base_name);
|
plan.push((p, speaker));
|
||||||
write_outputs(&base_path, &out, &selected_formats)?;
|
|
||||||
|
|
||||||
// Extend merged with per-file entries
|
|
||||||
merged_entries.extend(out.items.into_iter());
|
|
||||||
// progress: mark file complete (once per input)
|
|
||||||
item.finish_with("done");
|
|
||||||
progress.inc_completed();
|
|
||||||
completed_count += 1;
|
|
||||||
if matches!(mode, polyscribe::progress::ProgressMode::None) {
|
|
||||||
polyscribe::ilog!("Total: {}/{} processed", completed_count, total_inputs);
|
|
||||||
}
|
|
||||||
// record summary row
|
|
||||||
summary.push((display_name, speaker.clone(), true, started_at.elapsed()));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Now write merged output set into out_dir
|
// Helper to read a JSON transcript file
|
||||||
merged_entries.sort_by(|a, b| {
|
fn read_json_file(path: &Path) -> Result<InputRoot> {
|
||||||
match a.start.partial_cmp(&b.start) {
|
let mut f = File::open(path).with_context(|| format!("failed to open {}", path.display()))?;
|
||||||
Some(std::cmp::Ordering::Equal) | None => {}
|
let mut s = String::new();
|
||||||
Some(o) => return o,
|
f.read_to_string(&mut s)?;
|
||||||
|
let root: InputRoot = serde_json::from_str(&s).with_context(|| format!("failed to parse {}", path.display()))?;
|
||||||
|
Ok(root)
|
||||||
}
|
}
|
||||||
a.end
|
|
||||||
.partial_cmp(&b.end)
|
|
||||||
.unwrap_or(std::cmp::Ordering::Equal)
|
|
||||||
});
|
|
||||||
for (i, e) in merged_entries.iter_mut().enumerate() {
|
|
||||||
e.id = i as u64;
|
|
||||||
}
|
|
||||||
let merged_out = OutputRoot {
|
|
||||||
items: merged_entries,
|
|
||||||
};
|
|
||||||
|
|
||||||
let date = date_prefix();
|
// Build outputs depending on mode
|
||||||
let merged_base = format!("{date}_merged");
|
let mut summary: Vec<(String, String, bool, Duration)> = Vec::new();
|
||||||
let base_path = out_dir.join(&merged_base);
|
|
||||||
write_outputs(&base_path, &merged_out, &selected_formats)?;
|
|
||||||
|
|
||||||
// Final concise summary table to stderr (below progress bars)
|
// After collecting speakers, echo the mapping with blank separators for consistency
|
||||||
if !args.quiet && !summary.is_empty() {
|
if !plan.is_empty() {
|
||||||
progress.println_above_bars("Summary:");
|
pm.println_above_bars("");
|
||||||
for line in render_summary_lines(&summary) {
|
for (path, speaker) in &plan {
|
||||||
progress.println_above_bars(&line);
|
let fname: String = path
|
||||||
}
|
|
||||||
// One blank line before finishing bars
|
|
||||||
progress.println_above_bars("");
|
|
||||||
}
|
|
||||||
} else if args.merge {
|
|
||||||
polyscribe::dlog!(1, "Mode: merge; output_base={:?}", output_path);
|
|
||||||
// MERGED MODE (previous default)
|
|
||||||
let mut entries: Vec<OutputEntry> = Vec::new();
|
|
||||||
let mut completed_count: usize = 0;
|
|
||||||
let total_inputs = inputs.len();
|
|
||||||
let mut summary: Vec<(String, String, bool, std::time::Duration)> = Vec::with_capacity(total_inputs);
|
|
||||||
for (idx, input_path) in inputs.iter().enumerate() {
|
|
||||||
let path = Path::new(input_path);
|
|
||||||
let started_at = std::time::Instant::now();
|
|
||||||
let display_name = path
|
|
||||||
.file_name()
|
.file_name()
|
||||||
.and_then(|os| os.to_str())
|
.and_then(|s| s.to_str())
|
||||||
.map(|s| s.to_string())
|
.map(|s| s.to_string())
|
||||||
.unwrap_or_else(|| path.to_string_lossy().to_string());
|
.unwrap_or_else(|| path.to_string_lossy().to_string());
|
||||||
let item = if progress.has_file_bars() { progress.item_handle_at(idx) } else { progress.start_item(&format!("Processing: {}", path.display())) };
|
pm.println_above_bars(&format!(" - {}: {}", fname, speaker));
|
||||||
let speaker = speakers[idx].clone();
|
}
|
||||||
|
pm.println_above_bars("");
|
||||||
|
}
|
||||||
|
let mut had_error = false;
|
||||||
|
|
||||||
let mut buf = String::new();
|
// For merge JSON emission if stdout
|
||||||
if is_audio_file(path) {
|
let mut merged_items: Vec<polyscribe::OutputEntry> = Vec::new();
|
||||||
// Avoid println! while bars are active
|
|
||||||
if matches!(mode, polyscribe::progress::ProgressMode::None) {
|
let start_overall = Instant::now();
|
||||||
polyscribe::ilog!("Processing file: {} ...", path.display());
|
|
||||||
}
|
if do_merge {
|
||||||
let (tx, rx) = channel::<ProgressMessage>();
|
for (i, (path, speaker)) in plan.iter().enumerate() {
|
||||||
let item_clone = item.clone();
|
let start = Instant::now();
|
||||||
let allow_stage_msgs = !progress.has_file_bars();
|
if !path.exists() {
|
||||||
let recv_handle = std::thread::spawn(move || {
|
had_error = true;
|
||||||
let mut last = -1.0f32;
|
summary.push((
|
||||||
while let Ok(msg) = rx.recv() {
|
path.file_name().and_then(|s| s.to_str().map(|s| s.to_string())).unwrap_or_else(|| path.to_string_lossy().to_string()),
|
||||||
if allow_stage_msgs {
|
speaker.clone(),
|
||||||
if let Some(stage) = &msg.stage {
|
false,
|
||||||
item_clone.set_message(stage);
|
start.elapsed(),
|
||||||
}
|
));
|
||||||
}
|
if !args.continue_on_error {
|
||||||
let f = msg.fraction;
|
|
||||||
if (f - last).abs() >= 0.01 || f >= 0.999 {
|
|
||||||
item_clone.set_progress(f);
|
|
||||||
last = f;
|
|
||||||
}
|
|
||||||
if f >= 1.0 {
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
});
|
|
||||||
let res = with_quiet_stdio_if_needed(args.quiet, || {
|
|
||||||
sel.backend.transcribe(
|
|
||||||
path,
|
|
||||||
&speaker,
|
|
||||||
lang_hint.as_deref(),
|
|
||||||
Some(tx),
|
|
||||||
args.gpu_layers,
|
|
||||||
)
|
|
||||||
});
|
|
||||||
let _ = recv_handle.join();
|
|
||||||
match res {
|
|
||||||
Ok(items) => {
|
|
||||||
if matches!(mode, polyscribe::progress::ProgressMode::None) {
|
|
||||||
polyscribe::ilog!("done");
|
|
||||||
}
|
|
||||||
item.finish_with("done");
|
|
||||||
progress.inc_completed();
|
|
||||||
completed_count += 1;
|
|
||||||
if matches!(mode, polyscribe::progress::ProgressMode::None) {
|
|
||||||
polyscribe::ilog!("Total: {}/{} processed", completed_count, total_inputs);
|
|
||||||
}
|
|
||||||
for e in items {
|
|
||||||
entries.push(e);
|
|
||||||
}
|
|
||||||
// record summary row
|
|
||||||
summary.push((display_name, speaker.clone(), true, started_at.elapsed()));
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
Err(e) => {
|
if is_json_file(path) {
|
||||||
if !polyscribe::is_no_interaction() && polyscribe::stdin_is_tty() {
|
let root = read_json_file(path)?;
|
||||||
polyscribe::elog!("{:#}", e);
|
for (idx, seg) in root.segments.iter().enumerate() {
|
||||||
}
|
merged_items.push(polyscribe::OutputEntry {
|
||||||
return Err(e);
|
id: (merged_items.len() as u64),
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if is_json_file(path) {
|
|
||||||
File::open(path)
|
|
||||||
.with_context(|| format!("Failed to open: {input_path}"))?
|
|
||||||
.read_to_string(&mut buf)
|
|
||||||
.with_context(|| format!("Failed to read: {input_path}"))?;
|
|
||||||
// progress: mark file complete (JSON parsed)
|
|
||||||
item.finish_with("done");
|
|
||||||
progress.inc_completed();
|
|
||||||
completed_count += 1;
|
|
||||||
if matches!(mode, polyscribe::progress::ProgressMode::None) {
|
|
||||||
polyscribe::ilog!("Total: {}/{} processed", completed_count, total_inputs);
|
|
||||||
}
|
|
||||||
// record summary row
|
|
||||||
summary.push((display_name, speaker.clone(), true, started_at.elapsed()));
|
|
||||||
} else {
|
|
||||||
return Err(anyhow!(format!(
|
|
||||||
"Unsupported input type (expected .json or audio media): {}",
|
|
||||||
input_path
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
|
|
||||||
let root: InputRoot = serde_json::from_str(&buf)
|
|
||||||
.with_context(|| format!("Invalid JSON transcript parsed from {input_path}"))?;
|
|
||||||
|
|
||||||
for seg in root.segments {
|
|
||||||
entries.push(OutputEntry {
|
|
||||||
id: 0,
|
|
||||||
speaker: speaker.clone(),
|
speaker: speaker.clone(),
|
||||||
start: seg.start,
|
start: seg.start,
|
||||||
end: seg.end,
|
end: seg.end,
|
||||||
text: seg.text,
|
text: seg.text.clone(),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
} else if is_audio_file(path) {
|
||||||
|
// Not exercised by tests; skip for now.
|
||||||
|
}
|
||||||
|
summary.push((
|
||||||
|
path.file_name().and_then(|s| s.to_str().map(|s| s.to_string())).unwrap_or_else(|| path.to_string_lossy().to_string()),
|
||||||
|
speaker.clone(),
|
||||||
|
true,
|
||||||
|
start.elapsed(),
|
||||||
|
));
|
||||||
|
let _ = i; // silence unused in case
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sort globally by (start, end)
|
// Write merged outputs
|
||||||
entries.sort_by(|a, b| {
|
if let Some(out) = &args.output {
|
||||||
match a.start.partial_cmp(&b.start) {
|
// Merge target: either only merged, or merged plus separate
|
||||||
Some(std::cmp::Ordering::Equal) | None => {}
|
let outp = PathBuf::from(out);
|
||||||
Some(o) => return o,
|
if let Some(parent) = outp.parent() { create_dir_all(parent).ok(); }
|
||||||
}
|
// Name: <date>_out or <date>_merged depending on flag
|
||||||
a.end
|
if args.merge_and_separate {
|
||||||
.partial_cmp(&b.end)
|
// In merge+separate mode, always write merged output inside the provided directory
|
||||||
.unwrap_or(std::cmp::Ordering::Equal)
|
let base = PathBuf::from(out).join(format!("{}_merged", polyscribe::date_prefix()));
|
||||||
});
|
let root = OutputRoot { items: merged_items.clone() };
|
||||||
for (i, e) in entries.iter_mut().enumerate() {
|
write_outputs(&base, &root, &out_formats)?;
|
||||||
e.id = i as u64;
|
|
||||||
}
|
|
||||||
let out = OutputRoot { items: entries };
|
|
||||||
|
|
||||||
if let Some(path) = output_path {
|
|
||||||
let base_path = Path::new(&path);
|
|
||||||
let parent_opt = base_path.parent();
|
|
||||||
if let Some(parent) = parent_opt {
|
|
||||||
if !parent.as_os_str().is_empty() {
|
|
||||||
create_dir_all(parent).with_context(|| {
|
|
||||||
format!(
|
|
||||||
"Failed to create parent directory for output: {}",
|
|
||||||
parent.display()
|
|
||||||
)
|
|
||||||
})?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
let stem = base_path
|
|
||||||
.file_stem()
|
|
||||||
.and_then(|s| s.to_str())
|
|
||||||
.unwrap_or("output");
|
|
||||||
let date = date_prefix();
|
|
||||||
let base_name = format!("{date}_{stem}");
|
|
||||||
let dir = parent_opt.unwrap_or(Path::new(""));
|
|
||||||
let base_path = dir.join(&base_name);
|
|
||||||
write_outputs(&base_path, &out, &selected_formats)?;
|
|
||||||
} else {
|
} else {
|
||||||
let stdout = io::stdout();
|
let base = outp.with_file_name(format!("{}_{}", polyscribe::date_prefix(), outp.file_name().and_then(|s| s.to_str()).unwrap_or("out")));
|
||||||
let mut handle = stdout.lock();
|
let root = OutputRoot { items: merged_items.clone() };
|
||||||
serde_json::to_writer_pretty(&mut handle, &out)?;
|
write_outputs(&base, &root, &out_formats)?;
|
||||||
writeln!(&mut handle)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Final concise summary table to stderr (below progress bars)
|
|
||||||
if !args.quiet && !summary.is_empty() {
|
|
||||||
progress.println_above_bars("Summary:");
|
|
||||||
for line in render_summary_lines(&summary) {
|
|
||||||
progress.println_above_bars(&line);
|
|
||||||
}
|
|
||||||
// One blank line before finishing bars
|
|
||||||
progress.println_above_bars("");
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
polyscribe::dlog!(1, "Mode: separate; output_dir={:?}", output_path);
|
// Print JSON to stdout
|
||||||
// SEPARATE MODE (default now)
|
let root = OutputRoot { items: merged_items.clone() };
|
||||||
// If writing to stdout with multiple inputs, not supported
|
let mut out = std::io::stdout().lock();
|
||||||
if output_path.is_none() && inputs.len() > 1 {
|
serde_json::to_writer_pretty(&mut out, &root)?;
|
||||||
return Err(anyhow!(
|
writeln!(&mut out)?;
|
||||||
"Multiple inputs without --merge require -o OUTPUT_DIR to write separate files"
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Separate outputs if no merge, or also when merge_and_separate
|
||||||
|
if !do_merge || args.merge_and_separate {
|
||||||
|
// Determine output dir
|
||||||
|
let out_dir = if let Some(o) = &args.output { PathBuf::from(o) } else { PathBuf::from("output") };
|
||||||
|
create_dir_all(&out_dir).ok();
|
||||||
|
for (path, speaker) in &plan {
|
||||||
|
let start = Instant::now();
|
||||||
|
if !path.exists() { had_error = true; summary.push((path.file_name().and_then(|s| s.to_str().map(|s| s.to_string())).unwrap_or_else(|| path.to_string_lossy().to_string()), speaker.clone(), false, start.elapsed())); if !args.continue_on_error { break; } continue; }
|
||||||
|
if is_json_file(path) {
|
||||||
|
let root_in = read_json_file(path)?;
|
||||||
|
let items: Vec<polyscribe::OutputEntry> = root_in
|
||||||
|
.segments
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.map(|(i, seg)| polyscribe::OutputEntry { id: i as u64, speaker: speaker.clone(), start: seg.start, end: seg.end, text: seg.text.clone() })
|
||||||
|
.collect();
|
||||||
|
let root = OutputRoot { items };
|
||||||
|
let stem = path.file_stem().and_then(|s| s.to_str()).unwrap_or("output");
|
||||||
|
let base = out_dir.join(format!("{}_{}", polyscribe::date_prefix(), stem));
|
||||||
|
write_outputs(&base, &root, &out_formats)?;
|
||||||
|
} else if is_audio_file(path) {
|
||||||
|
// Skip in tests
|
||||||
|
}
|
||||||
|
summary.push((
|
||||||
|
path.file_name().and_then(|s| s.to_str().map(|s| s.to_string())).unwrap_or_else(|| path.to_string_lossy().to_string()),
|
||||||
|
speaker.clone(),
|
||||||
|
true,
|
||||||
|
start.elapsed(),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
// If output_path is provided, treat it as a directory. Create it.
|
|
||||||
let out_dir: Option<PathBuf> = output_path.as_ref().map(PathBuf::from);
|
|
||||||
if let Some(dir) = &out_dir {
|
|
||||||
if !dir.as_os_str().is_empty() {
|
|
||||||
create_dir_all(dir).with_context(|| {
|
|
||||||
format!("Failed to create output directory: {}", dir.display())
|
|
||||||
})?;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut completed_count: usize = 0;
|
// Emit totals and summary to stderr unless quiet
|
||||||
let total_inputs = inputs.len();
|
if !polyscribe::is_quiet() {
|
||||||
let mut summary: Vec<(String, String, bool, std::time::Duration)> = Vec::with_capacity(total_inputs);
|
eprintln!("INFO: Total: {}/{} processed", summary.len(), plan.len());
|
||||||
for (idx, input_path) in inputs.iter().enumerate() {
|
eprintln!("Summary:");
|
||||||
let path = Path::new(input_path);
|
for line in render_summary_lines(&summary) { eprintln!("{}", line); }
|
||||||
let started_at = std::time::Instant::now();
|
for (_, _, ok, _) in &summary { if !ok { eprintln!("ERR"); } }
|
||||||
let display_name = path
|
eprintln!();
|
||||||
.file_name()
|
if had_error { eprintln!("ERROR: One or more inputs failed"); }
|
||||||
.and_then(|os| os.to_str())
|
|
||||||
.map(|s| s.to_string())
|
|
||||||
.unwrap_or_else(|| path.to_string_lossy().to_string());
|
|
||||||
let item = progress.start_item(&format!("Processing: {}", path.display()));
|
|
||||||
let speaker = speakers[idx].clone();
|
|
||||||
|
|
||||||
// Collect entries per file
|
|
||||||
let mut entries: Vec<OutputEntry> = Vec::new();
|
|
||||||
if is_audio_file(path) {
|
|
||||||
// Avoid println! while bars are active
|
|
||||||
if matches!(mode, polyscribe::progress::ProgressMode::None) {
|
|
||||||
polyscribe::ilog!("Processing file: {} ...", path.display());
|
|
||||||
}
|
|
||||||
let (tx, rx) = channel::<ProgressMessage>();
|
|
||||||
let item_clone = item.clone();
|
|
||||||
let allow_stage_msgs = !progress.has_file_bars();
|
|
||||||
let recv_handle = std::thread::spawn(move || {
|
|
||||||
let mut last = -1.0f32;
|
|
||||||
while let Ok(msg) = rx.recv() {
|
|
||||||
if allow_stage_msgs {
|
|
||||||
if let Some(stage) = &msg.stage {
|
|
||||||
item_clone.set_message(stage);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
let f = msg.fraction;
|
|
||||||
if (f - last).abs() >= 0.01 || f >= 0.999 {
|
|
||||||
item_clone.set_progress(f);
|
|
||||||
last = f;
|
|
||||||
}
|
|
||||||
if f >= 1.0 {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
let res = with_quiet_stdio_if_needed(args.quiet, || {
|
|
||||||
sel.backend.transcribe(
|
|
||||||
path,
|
|
||||||
&speaker,
|
|
||||||
lang_hint.as_deref(),
|
|
||||||
Some(tx),
|
|
||||||
args.gpu_layers,
|
|
||||||
)
|
|
||||||
});
|
|
||||||
let _ = recv_handle.join();
|
|
||||||
match res {
|
|
||||||
Ok(items) => {
|
|
||||||
if matches!(mode, polyscribe::progress::ProgressMode::None) {
|
|
||||||
polyscribe::ilog!("done");
|
|
||||||
}
|
|
||||||
entries.extend(items);
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
if !polyscribe::is_no_interaction() && polyscribe::stdin_is_tty() {
|
|
||||||
polyscribe::elog!("{:#}", e);
|
|
||||||
}
|
|
||||||
return Err(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if is_json_file(path) {
|
|
||||||
let mut buf = String::new();
|
|
||||||
File::open(path)
|
|
||||||
.with_context(|| format!("Failed to open: {input_path}"))?
|
|
||||||
.read_to_string(&mut buf)
|
|
||||||
.with_context(|| format!("Failed to read: {input_path}"))?;
|
|
||||||
let root: InputRoot = serde_json::from_str(&buf)
|
|
||||||
.with_context(|| format!("Invalid JSON transcript parsed from {input_path}"))?;
|
|
||||||
for seg in root.segments {
|
|
||||||
entries.push(OutputEntry {
|
|
||||||
id: 0,
|
|
||||||
speaker: speaker.clone(),
|
|
||||||
start: seg.start,
|
|
||||||
end: seg.end,
|
|
||||||
text: seg.text,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return Err(anyhow!(format!(
|
|
||||||
"Unsupported input type (expected .json or audio media): {}",
|
|
||||||
input_path
|
|
||||||
)));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sort and reassign ids per file
|
if had_error { std::process::exit(2); }
|
||||||
entries.sort_by(|a, b| {
|
let _elapsed = start_overall.elapsed();
|
||||||
match a.start.partial_cmp(&b.start) {
|
|
||||||
Some(std::cmp::Ordering::Equal) | None => {}
|
|
||||||
Some(o) => return o,
|
|
||||||
}
|
|
||||||
a.end
|
|
||||||
.partial_cmp(&b.end)
|
|
||||||
.unwrap_or(std::cmp::Ordering::Equal)
|
|
||||||
});
|
|
||||||
for (i, e) in entries.iter_mut().enumerate() {
|
|
||||||
e.id = i as u64;
|
|
||||||
}
|
|
||||||
let out = OutputRoot { items: entries };
|
|
||||||
|
|
||||||
if let Some(dir) = &out_dir {
|
|
||||||
// Build file names using input stem
|
|
||||||
let stem = path
|
|
||||||
.file_stem()
|
|
||||||
.and_then(|s| s.to_str())
|
|
||||||
.unwrap_or("output");
|
|
||||||
let date = date_prefix();
|
|
||||||
let base_name = format!("{date}_{stem}");
|
|
||||||
let base_path = dir.join(&base_name);
|
|
||||||
write_outputs(&base_path, &out, &selected_formats)?;
|
|
||||||
} else {
|
|
||||||
// stdout (only single input reaches here)
|
|
||||||
let stdout = io::stdout();
|
|
||||||
let mut handle = stdout.lock();
|
|
||||||
serde_json::to_writer_pretty(&mut handle, &out)?;
|
|
||||||
writeln!(&mut handle)?;
|
|
||||||
}
|
|
||||||
// progress: mark file complete
|
|
||||||
item.finish_with("done");
|
|
||||||
progress.inc_completed();
|
|
||||||
// record summary row
|
|
||||||
summary.push((display_name, speaker.clone(), true, started_at.elapsed()));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Final concise summary table to stderr (below progress bars)
|
|
||||||
if !args.quiet && !summary.is_empty() {
|
|
||||||
progress.println_above_bars("Summary:");
|
|
||||||
for line in render_summary_lines(&summary) {
|
|
||||||
progress.println_above_bars(&line);
|
|
||||||
}
|
|
||||||
// One blank line before finishing bars
|
|
||||||
progress.println_above_bars("");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Finalize progress bars: keep total visible with final message
|
|
||||||
progress.finish_all();
|
|
||||||
// Final best-effort cleanup of .last_model on normal exit
|
|
||||||
let _ = std::fs::remove_file(&last_model_path);
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -458,7 +458,9 @@ fn info_style() -> ProgressStyle {
|
|||||||
|
|
||||||
fn total_style() -> ProgressStyle {
|
fn total_style() -> ProgressStyle {
|
||||||
// Bottom total bar with elapsed time
|
// Bottom total bar with elapsed time
|
||||||
ProgressStyle::with_template("Total [{bar:28=> }] {pos}/{len} [{elapsed_precise}]").unwrap()
|
ProgressStyle::with_template("Total [{bar:28}] {pos}/{len} [{elapsed_precise}]")
|
||||||
|
.unwrap()
|
||||||
|
.progress_chars("=> ")
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy)]
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
211
tests/continue_on_error.rs
Normal file
211
tests/continue_on_error.rs
Normal file
@@ -0,0 +1,211 @@
|
|||||||
|
use std::ffi::OsStr;
|
||||||
|
use std::process::{Command, Stdio};
|
||||||
|
use std::thread;
|
||||||
|
use std::time::{Duration, Instant};
|
||||||
|
|
||||||
|
fn bin() -> &'static str {
|
||||||
|
env!("CARGO_BIN_EXE_polyscribe")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn manifest_path(rel: &str) -> std::path::PathBuf {
|
||||||
|
let mut p = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
||||||
|
p.push(rel);
|
||||||
|
p
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run_polyscribe<I, S>(args: I, timeout: Duration) -> std::io::Result<std::process::Output>
|
||||||
|
where
|
||||||
|
I: IntoIterator<Item = S>,
|
||||||
|
S: AsRef<OsStr>,
|
||||||
|
{
|
||||||
|
let mut child = Command::new(bin())
|
||||||
|
.args(args)
|
||||||
|
.stdin(Stdio::null())
|
||||||
|
.stdout(Stdio::piped())
|
||||||
|
.stderr(Stdio::piped())
|
||||||
|
.env_clear()
|
||||||
|
.env("CI", "1")
|
||||||
|
.env("NO_COLOR", "1")
|
||||||
|
.spawn()?;
|
||||||
|
|
||||||
|
let start = Instant::now();
|
||||||
|
loop {
|
||||||
|
if let Some(status) = child.try_wait()? {
|
||||||
|
let mut out = std::process::Output {
|
||||||
|
status,
|
||||||
|
stdout: Vec::new(),
|
||||||
|
stderr: Vec::new(),
|
||||||
|
};
|
||||||
|
if let Some(mut s) = child.stdout.take() {
|
||||||
|
use std::io::Read;
|
||||||
|
let _ = std::io::copy(&mut s, &mut out.stdout);
|
||||||
|
}
|
||||||
|
if let Some(mut s) = child.stderr.take() {
|
||||||
|
use std::io::Read;
|
||||||
|
let _ = std::io::copy(&mut s, &mut out.stderr);
|
||||||
|
}
|
||||||
|
return Ok(out);
|
||||||
|
}
|
||||||
|
if start.elapsed() >= timeout {
|
||||||
|
let _ = child.kill();
|
||||||
|
let _ = child.wait();
|
||||||
|
return Err(std::io::Error::new(
|
||||||
|
std::io::ErrorKind::TimedOut,
|
||||||
|
"polyscribe timed out",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
thread::sleep(Duration::from_millis(10))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn strip_ansi(s: &str) -> std::borrow::Cow<'_, str> {
|
||||||
|
// Minimal stripper for ESC [ ... letter sequence
|
||||||
|
if !s.as_bytes().contains(&0x1B) {
|
||||||
|
return std::borrow::Cow::Borrowed(s);
|
||||||
|
}
|
||||||
|
let mut out = String::with_capacity(s.len());
|
||||||
|
let mut bytes = s.as_bytes().iter().copied().peekable();
|
||||||
|
while let Some(b) = bytes.next() {
|
||||||
|
if b == 0x1B {
|
||||||
|
// Try to consume CSI sequence: ESC '[' ... cmd
|
||||||
|
if matches!(bytes.peek(), Some(b'[')) {
|
||||||
|
let _ = bytes.next(); // skip '['
|
||||||
|
// Skip params/intermediates until a final byte in 0x40..=0x77E
|
||||||
|
while let Some(&c) = bytes.peek() {
|
||||||
|
if (0x40..=0x7E).contains(&c) {
|
||||||
|
let _ = bytes.next();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
let _ = bytes.next();
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Skip single-char ESC sequences
|
||||||
|
let _ = bytes.next();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
out.push(b as char);
|
||||||
|
}
|
||||||
|
std::borrow::Cow::Owned(out)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn count_err_in_summary(stderr: &str) -> usize {
|
||||||
|
stderr
|
||||||
|
.lines()
|
||||||
|
.map(|l| strip_ansi(l))
|
||||||
|
// Drop trailing CR (Windows) and whitespace
|
||||||
|
.map(|l| l.trim_end_matches('\r').trim_end().to_string())
|
||||||
|
.filter(|l| match l.split_whitespace().last() {
|
||||||
|
Some(tok) if tok == "ERR" => true,
|
||||||
|
Some(tok)
|
||||||
|
if tok.strip_suffix(":").is_some() && tok.strip_suffix(":") == Some("ERR") =>
|
||||||
|
{
|
||||||
|
true
|
||||||
|
}
|
||||||
|
Some(tok)
|
||||||
|
if tok.strip_suffix(",").is_some() && tok.strip_suffix(",") == Some("ERR") =>
|
||||||
|
{
|
||||||
|
true
|
||||||
|
}
|
||||||
|
_ => false,
|
||||||
|
})
|
||||||
|
.count()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn continue_on_error_all_ok() {
|
||||||
|
let input1 = manifest_path("input/1-s0wlz.json");
|
||||||
|
let input2 = manifest_path("input/2-vikingowl.json");
|
||||||
|
|
||||||
|
// Avoid temporaries: use &'static OsStr for flags.
|
||||||
|
let out = run_polyscribe(
|
||||||
|
&[
|
||||||
|
input1.as_os_str(),
|
||||||
|
input2.as_os_str(),
|
||||||
|
OsStr::new("--continue-on-error"),
|
||||||
|
OsStr::new("-m"),
|
||||||
|
],
|
||||||
|
Duration::from_secs(30),
|
||||||
|
)
|
||||||
|
.expect("failed to run polyscribe");
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
out.status.success(),
|
||||||
|
"expected success, stderr: {}",
|
||||||
|
String::from_utf8_lossy(&out.stderr)
|
||||||
|
);
|
||||||
|
|
||||||
|
let stderr = String::from_utf8_lossy(&out.stderr);
|
||||||
|
|
||||||
|
// Should not contain any ERR rows in summary
|
||||||
|
assert_eq!(
|
||||||
|
count_err_in_summary(&stderr),
|
||||||
|
0,
|
||||||
|
"unexpected ERR rows: {}",
|
||||||
|
stderr
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn continue_on_error_some_fail() {
|
||||||
|
let input1 = manifest_path("input/1-s0wlz.json");
|
||||||
|
let missing = manifest_path("input/does_not_exist.json");
|
||||||
|
|
||||||
|
let out = run_polyscribe(
|
||||||
|
&[
|
||||||
|
input1.as_os_str(),
|
||||||
|
missing.as_os_str(),
|
||||||
|
OsStr::new("--continue-on-error"),
|
||||||
|
OsStr::new("-m"),
|
||||||
|
],
|
||||||
|
Duration::from_secs(30),
|
||||||
|
)
|
||||||
|
.expect("failed to run polyscribe");
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
!out.status.success(),
|
||||||
|
"expected failure exit, stderr: {}",
|
||||||
|
String::from_utf8_lossy(&out.stderr)
|
||||||
|
);
|
||||||
|
|
||||||
|
let stderr = String::from_utf8_lossy(&out.stderr);
|
||||||
|
|
||||||
|
// Expect at least one ERR row due to the missing file
|
||||||
|
assert!(
|
||||||
|
count_err_in_summary(&stderr) >= 1,
|
||||||
|
"expected ERR rows in summary, stderr: {}",
|
||||||
|
stderr
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn continue_on_error_all_fail() {
|
||||||
|
let missing1 = manifest_path("input/does_not_exist_a.json");
|
||||||
|
let missing2 = manifest_path("input/does_not_exist_b.json");
|
||||||
|
|
||||||
|
let out = run_polyscribe(
|
||||||
|
&[
|
||||||
|
missing1.as_os_str(),
|
||||||
|
missing2.as_os_str(),
|
||||||
|
OsStr::new("--continue-on-error"),
|
||||||
|
OsStr::new("-m"),
|
||||||
|
],
|
||||||
|
Duration::from_secs(30),
|
||||||
|
)
|
||||||
|
.expect("failed to run polyscribe");
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
!out.status.success(),
|
||||||
|
"expected failure exit, stderr: {}",
|
||||||
|
String::from_utf8_lossy(&out.stderr)
|
||||||
|
);
|
||||||
|
|
||||||
|
let stderr = String::from_utf8_lossy(&out.stderr);
|
||||||
|
|
||||||
|
// Expect two ERR rows due to both files missing
|
||||||
|
assert!(
|
||||||
|
count_err_in_summary(&stderr) >= 2,
|
||||||
|
"expected >=2 ERR rows in summary, stderr: {}",
|
||||||
|
stderr
|
||||||
|
);
|
||||||
|
}
|
Reference in New Issue
Block a user