Compare commits

...

5 Commits

8 changed files with 682 additions and 2461 deletions

43
Cargo.lock generated
View File

@@ -103,17 +103,6 @@ version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
[[package]]
name = "atty"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
dependencies = [
"hermit-abi",
"libc",
"winapi",
]
[[package]] [[package]]
name = "autocfg" name = "autocfg"
version = "1.5.0" version = "1.5.0"
@@ -599,15 +588,6 @@ version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "hermit-abi"
version = "0.1.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
dependencies = [
"libc",
]
[[package]] [[package]]
name = "http" name = "http"
version = "1.3.1" version = "1.3.1"
@@ -1146,7 +1126,6 @@ name = "polyscribe"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"atty",
"chrono", "chrono",
"clap", "clap",
"clap_complete", "clap_complete",
@@ -1967,28 +1946,6 @@ dependencies = [
"fs_extra", "fs_extra",
] ]
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]] [[package]]
name = "windows-core" name = "windows-core"
version = "0.61.2" version = "0.61.2"

View File

@@ -3,7 +3,6 @@ name = "polyscribe"
version = "0.1.0" version = "0.1.0"
edition = "2024" edition = "2024"
license = "MIT" license = "MIT"
license-file = "LICENSE"
[features] [features]
# Default: CPU only; no GPU features enabled # Default: CPU only; no GPU features enabled
@@ -31,7 +30,6 @@ whisper-rs = { git = "https://github.com/tazz4843/whisper-rs" }
libc = "0.2" libc = "0.2"
cliclack = "0.3" cliclack = "0.3"
indicatif = "0.17" indicatif = "0.17"
atty = "0.2"
[dev-dependencies] [dev-dependencies]
tempfile = "3" tempfile = "3"

View File

@@ -24,25 +24,18 @@ pub enum BackendKind {
Vulkan, Vulkan,
} }
/// Abstraction for a transcription backend implementation. /// Abstraction for a transcription backend.
pub trait TranscribeBackend { pub trait TranscribeBackend {
/// Return the backend kind for this implementation. /// Backend kind implemented by this type.
fn kind(&self) -> BackendKind; fn kind(&self) -> BackendKind;
/// Transcribe the given audio file path and return transcript entries. /// Transcribe the given audio and return transcript entries.
///
/// Parameters:
/// - audio_path: path to input media (audio or video) to be decoded/transcribed.
/// - speaker: label to attach to all produced segments.
/// - lang_opt: optional language hint (e.g., "en"); None means auto/multilingual model default.
/// - gpu_layers: optional GPU layer count if applicable (ignored by some backends).
/// - progress_cb: optional callback receiving percentage [0..=100] updates.
fn transcribe( fn transcribe(
&self, &self,
audio_path: &Path, audio_path: &Path,
speaker: &str, speaker: &str,
lang_opt: Option<&str>, language: Option<&str>,
gpu_layers: Option<u32>, gpu_layers: Option<u32>,
progress_cb: Option<&(dyn Fn(i32) + Send + Sync)>, progress: Option<&(dyn Fn(i32) + Send + Sync)>,
) -> Result<Vec<OutputEntry>>; ) -> Result<Vec<OutputEntry>>;
} }
@@ -87,107 +80,39 @@ fn vulkan_available() -> bool {
} }
/// CPU-based transcription backend using whisper-rs. /// CPU-based transcription backend using whisper-rs.
#[derive(Default)]
pub struct CpuBackend; pub struct CpuBackend;
/// CUDA-accelerated transcription backend for NVIDIA GPUs. /// CUDA-accelerated transcription backend for NVIDIA GPUs.
#[derive(Default)]
pub struct CudaBackend; pub struct CudaBackend;
/// ROCm/HIP-accelerated transcription backend for AMD GPUs. /// ROCm/HIP-accelerated transcription backend for AMD GPUs.
#[derive(Default)]
pub struct HipBackend; pub struct HipBackend;
/// Vulkan-based transcription backend (experimental/incomplete). /// Vulkan-based transcription backend (experimental/incomplete).
#[derive(Default)]
pub struct VulkanBackend; pub struct VulkanBackend;
impl CpuBackend { macro_rules! impl_whisper_backend {
/// Create a new CPU backend instance. ($ty:ty, $kind:expr) => {
pub fn new() -> Self { impl TranscribeBackend for $ty {
CpuBackend fn kind(&self) -> BackendKind { $kind }
} fn transcribe(
} &self,
impl Default for CpuBackend { audio_path: &Path,
fn default() -> Self { speaker: &str,
Self::new() language: Option<&str>,
} _gpu_layers: Option<u32>,
} progress: Option<&(dyn Fn(i32) + Send + Sync)>,
impl CudaBackend { ) -> Result<Vec<OutputEntry>> {
/// Create a new CUDA backend instance. transcribe_with_whisper_rs(audio_path, speaker, language, progress)
pub fn new() -> Self { }
CudaBackend }
} };
}
impl Default for CudaBackend {
fn default() -> Self {
Self::new()
}
}
impl HipBackend {
/// Create a new HIP backend instance.
pub fn new() -> Self {
HipBackend
}
}
impl Default for HipBackend {
fn default() -> Self {
Self::new()
}
}
impl VulkanBackend {
/// Create a new Vulkan backend instance.
pub fn new() -> Self {
VulkanBackend
}
}
impl Default for VulkanBackend {
fn default() -> Self {
Self::new()
}
} }
impl TranscribeBackend for CpuBackend { impl_whisper_backend!(CpuBackend, BackendKind::Cpu);
fn kind(&self) -> BackendKind { impl_whisper_backend!(CudaBackend, BackendKind::Cuda);
BackendKind::Cpu impl_whisper_backend!(HipBackend, BackendKind::Hip);
}
fn transcribe(
&self,
audio_path: &Path,
speaker: &str,
lang_opt: Option<&str>,
_gpu_layers: Option<u32>,
progress_cb: Option<&(dyn Fn(i32) + Send + Sync)>,
) -> Result<Vec<OutputEntry>> {
transcribe_with_whisper_rs(audio_path, speaker, lang_opt, progress_cb)
}
}
impl TranscribeBackend for CudaBackend {
fn kind(&self) -> BackendKind {
BackendKind::Cuda
}
fn transcribe(
&self,
audio_path: &Path,
speaker: &str,
lang_opt: Option<&str>,
_gpu_layers: Option<u32>,
progress_cb: Option<&(dyn Fn(i32) + Send + Sync)>,
) -> Result<Vec<OutputEntry>> {
// whisper-rs uses enabled CUDA feature at build time; call same code path
transcribe_with_whisper_rs(audio_path, speaker, lang_opt, progress_cb)
}
}
impl TranscribeBackend for HipBackend {
fn kind(&self) -> BackendKind {
BackendKind::Hip
}
fn transcribe(
&self,
audio_path: &Path,
speaker: &str,
lang_opt: Option<&str>,
_gpu_layers: Option<u32>,
progress_cb: Option<&(dyn Fn(i32) + Send + Sync)>,
) -> Result<Vec<OutputEntry>> {
transcribe_with_whisper_rs(audio_path, speaker, lang_opt, progress_cb)
}
}
impl TranscribeBackend for VulkanBackend { impl TranscribeBackend for VulkanBackend {
fn kind(&self) -> BackendKind { fn kind(&self) -> BackendKind {
@@ -197,9 +122,9 @@ impl TranscribeBackend for VulkanBackend {
&self, &self,
_audio_path: &Path, _audio_path: &Path,
_speaker: &str, _speaker: &str,
_lang_opt: Option<&str>, _language: Option<&str>,
_gpu_layers: Option<u32>, _gpu_layers: Option<u32>,
_progress_cb: Option<&(dyn Fn(i32) + Send + Sync)>, _progress: Option<&(dyn Fn(i32) + Send + Sync)>,
) -> Result<Vec<OutputEntry>> { ) -> Result<Vec<OutputEntry>> {
Err(anyhow!( Err(anyhow!(
"Vulkan backend not yet wired to whisper.cpp FFI. Build with --features gpu-vulkan and ensure Vulkan SDK is installed. How to fix: install Vulkan loader (libvulkan), set VULKAN_SDK, and run cargo build --features gpu-vulkan." "Vulkan backend not yet wired to whisper.cpp FFI. Build with --features gpu-vulkan and ensure Vulkan SDK is installed. How to fix: install Vulkan loader (libvulkan), set VULKAN_SDK, and run cargo build --features gpu-vulkan."
@@ -237,13 +162,13 @@ pub fn select_backend(requested: BackendKind, verbose: bool) -> Result<Selection
detected.push(BackendKind::Vulkan); detected.push(BackendKind::Vulkan);
} }
let mk = |k: BackendKind| -> Box<dyn TranscribeBackend + Send + Sync> { let instantiate_backend = |k: BackendKind| -> Box<dyn TranscribeBackend + Send + Sync> {
match k { match k {
BackendKind::Cpu => Box::new(CpuBackend::new()), BackendKind::Cpu => Box::new(CpuBackend::default()),
BackendKind::Cuda => Box::new(CudaBackend::new()), BackendKind::Cuda => Box::new(CudaBackend::default()),
BackendKind::Hip => Box::new(HipBackend::new()), BackendKind::Hip => Box::new(HipBackend::default()),
BackendKind::Vulkan => Box::new(VulkanBackend::new()), BackendKind::Vulkan => Box::new(VulkanBackend::default()),
BackendKind::Auto => Box::new(CpuBackend::new()), // will be replaced BackendKind::Auto => Box::new(CpuBackend::default()), // placeholder for Auto
} }
}; };
@@ -295,7 +220,7 @@ pub fn select_backend(requested: BackendKind, verbose: bool) -> Result<Selection
} }
Ok(SelectionResult { Ok(SelectionResult {
backend: mk(chosen), backend: instantiate_backend(chosen),
chosen, chosen,
detected, detected,
}) })
@@ -306,98 +231,99 @@ pub fn select_backend(requested: BackendKind, verbose: bool) -> Result<Selection
pub(crate) fn transcribe_with_whisper_rs( pub(crate) fn transcribe_with_whisper_rs(
audio_path: &Path, audio_path: &Path,
speaker: &str, speaker: &str,
lang_opt: Option<&str>, language: Option<&str>,
progress_cb: Option<&(dyn Fn(i32) + Send + Sync)>, progress: Option<&(dyn Fn(i32) + Send + Sync)>,
) -> Result<Vec<OutputEntry>> { ) -> Result<Vec<OutputEntry>> {
if let Some(cb) = progress_cb { cb(0); } let report = |p: i32| {
if let Some(cb) = progress { cb(p); }
};
report(0);
let pcm = decode_audio_to_pcm_f32_ffmpeg(audio_path)?; let pcm_samples = decode_audio_to_pcm_f32_ffmpeg(audio_path)?;
if let Some(cb) = progress_cb { cb(5); } report(5);
let model = find_model_file()?; let model_path = find_model_file()?;
let is_en_only = model let english_only_model = model_path
.file_name() .file_name()
.and_then(|s| s.to_str()) .and_then(|s| s.to_str())
.map(|s| s.contains(".en.") || s.ends_with(".en.bin")) .map(|s| s.contains(".en.") || s.ends_with(".en.bin"))
.unwrap_or(false); .unwrap_or(false);
if let Some(lang) = lang_opt { if let Some(lang) = language {
if is_en_only && lang != "en" { if english_only_model && lang != "en" {
return Err(anyhow!( return Err(anyhow!(
"Selected model is English-only ({}), but a non-English language hint '{}' was provided. Please use a multilingual model or set WHISPER_MODEL.", "Selected model is English-only ({}), but a non-English language hint '{}' was provided. Please use a multilingual model or set WHISPER_MODEL.",
model.display(), model_path.display(),
lang lang
)); ));
} }
} }
let model_str = model let model_path_str = model_path
.to_str() .to_str()
.ok_or_else(|| anyhow!("Model path not valid UTF-8: {}", model.display()))?; .ok_or_else(|| anyhow!("Model path not valid UTF-8: {}", model_path.display()))?;
// Try to reduce native library logging via environment variables when not super-verbose.
if crate::verbose_level() < 2 { if crate::verbose_level() < 2 {
// These env vars are recognized by ggml/whisper in many builds; harmless if unknown. // Some builds of whisper/ggml expect these env vars; harmless if unknown
unsafe { unsafe {
std::env::set_var("GGML_LOG_LEVEL", "0"); std::env::set_var("GGML_LOG_LEVEL", "0");
std::env::set_var("WHISPER_PRINT_PROGRESS", "0"); std::env::set_var("WHISPER_PRINT_PROGRESS", "0");
} }
} }
// Suppress stderr from whisper/ggml during model load and inference when quiet and not verbose. let (_context, mut state) = crate::with_suppressed_stderr(|| {
let (_ctx, mut state) = crate::with_suppressed_stderr(|| { let params = whisper_rs::WhisperContextParameters::default();
let cparams = whisper_rs::WhisperContextParameters::default(); let context = whisper_rs::WhisperContext::new_with_params(model_path_str, params)
let ctx = whisper_rs::WhisperContext::new_with_params(model_str, cparams) .with_context(|| format!("Failed to load Whisper model at {}", model_path.display()))?;
.with_context(|| format!("Failed to load Whisper model at {}", model.display()))?; let state = context
let state = ctx
.create_state() .create_state()
.map_err(|e| anyhow!("Failed to create Whisper state: {:?}", e))?; .map_err(|e| anyhow!("Failed to create Whisper state: {:?}", e))?;
Ok::<_, anyhow::Error>((ctx, state)) Ok::<_, anyhow::Error>((context, state))
})?; })?;
if let Some(cb) = progress_cb { cb(20); } report(20);
let mut params = let mut full_params =
whisper_rs::FullParams::new(whisper_rs::SamplingStrategy::Greedy { best_of: 1 }); whisper_rs::FullParams::new(whisper_rs::SamplingStrategy::Greedy { best_of: 1 });
let n_threads = std::thread::available_parallelism() let threads = std::thread::available_parallelism()
.map(|n| n.get() as i32) .map(|n| n.get() as i32)
.unwrap_or(1); .unwrap_or(1);
params.set_n_threads(n_threads); full_params.set_n_threads(threads);
params.set_translate(false); full_params.set_translate(false);
if let Some(lang) = lang_opt { if let Some(lang) = language {
params.set_language(Some(lang)); full_params.set_language(Some(lang));
} }
if let Some(cb) = progress_cb { cb(30); } report(30);
crate::with_suppressed_stderr(|| { crate::with_suppressed_stderr(|| {
if let Some(cb) = progress_cb { cb(40); } report(40);
state state
.full(params, &pcm) .full(full_params, &pcm_samples)
.map_err(|e| anyhow!("Whisper full() failed: {:?}", e)) .map_err(|e| anyhow!("Whisper full() failed: {:?}", e))
})?; })?;
if let Some(cb) = progress_cb { cb(90); } report(90);
let num_segments = state let num_segments = state
.full_n_segments() .full_n_segments()
.map_err(|e| anyhow!("Failed to get segments: {:?}", e))?; .map_err(|e| anyhow!("Failed to get segments: {:?}", e))?;
let mut items = Vec::new(); let mut entries = Vec::new();
for i in 0..num_segments { for seg_idx in 0..num_segments {
let text = state let segment_text = state
.full_get_segment_text(i) .full_get_segment_text(seg_idx)
.map_err(|e| anyhow!("Failed to get segment text: {:?}", e))?; .map_err(|e| anyhow!("Failed to get segment text: {:?}", e))?;
let t0 = state let t0 = state
.full_get_segment_t0(i) .full_get_segment_t0(seg_idx)
.map_err(|e| anyhow!("Failed to get segment t0: {:?}", e))?; .map_err(|e| anyhow!("Failed to get segment t0: {:?}", e))?;
let t1 = state let t1 = state
.full_get_segment_t1(i) .full_get_segment_t1(seg_idx)
.map_err(|e| anyhow!("Failed to get segment t1: {:?}", e))?; .map_err(|e| anyhow!("Failed to get segment t1: {:?}", e))?;
let start = (t0 as f64) * 0.01; let start = (t0 as f64) * 0.01;
let end = (t1 as f64) * 0.01; let end = (t1 as f64) * 0.01;
items.push(OutputEntry { entries.push(OutputEntry {
id: 0, id: 0,
speaker: speaker.to_string(), speaker: speaker.to_string(),
start, start,
end, end,
text: text.trim().to_string(), text: segment_text.trim().to_string(),
}); });
} }
if let Some(cb) = progress_cb { cb(100); } report(100);
Ok(items) Ok(entries)
} }

View File

@@ -4,9 +4,6 @@
#![forbid(elided_lifetimes_in_paths)] #![forbid(elided_lifetimes_in_paths)]
#![forbid(unused_must_use)] #![forbid(unused_must_use)]
#![deny(missing_docs)] #![deny(missing_docs)]
// Lint policy for incremental refactor toward 2024:
// - Keep basic clippy warnings enabled; skip pedantic/nursery for now (will revisit in step 7).
// - cargo lints can be re-enabled later once codebase is tidied.
#![warn(clippy::all)] #![warn(clippy::all)]
//! PolyScribe library: business logic and core types. //! PolyScribe library: business logic and core types.
//! //!
@@ -22,8 +19,8 @@ static VERBOSE: AtomicU8 = AtomicU8::new(0);
static NO_PROGRESS: AtomicBool = AtomicBool::new(false); static NO_PROGRESS: AtomicBool = AtomicBool::new(false);
/// Set quiet mode: when true, non-interactive logs should be suppressed. /// Set quiet mode: when true, non-interactive logs should be suppressed.
pub fn set_quiet(q: bool) { pub fn set_quiet(enabled: bool) {
QUIET.store(q, Ordering::Relaxed); QUIET.store(enabled, Ordering::Relaxed);
} }
/// Return current quiet mode state. /// Return current quiet mode state.
pub fn is_quiet() -> bool { pub fn is_quiet() -> bool {
@@ -31,8 +28,8 @@ pub fn is_quiet() -> bool {
} }
/// Set non-interactive mode: when true, interactive prompts must be skipped. /// Set non-interactive mode: when true, interactive prompts must be skipped.
pub fn set_no_interaction(b: bool) { pub fn set_no_interaction(enabled: bool) {
NO_INTERACTION.store(b, Ordering::Relaxed); NO_INTERACTION.store(enabled, Ordering::Relaxed);
} }
/// Return current non-interactive state. /// Return current non-interactive state.
pub fn is_no_interaction() -> bool { pub fn is_no_interaction() -> bool {
@@ -49,8 +46,8 @@ pub fn verbose_level() -> u8 {
} }
/// Disable interactive progress indicators (bars/spinners) /// Disable interactive progress indicators (bars/spinners)
pub fn set_no_progress(b: bool) { pub fn set_no_progress(enabled: bool) {
NO_PROGRESS.store(b, Ordering::Relaxed); NO_PROGRESS.store(enabled, Ordering::Relaxed);
} }
/// Return current no-progress state /// Return current no-progress state
pub fn is_no_progress() -> bool { pub fn is_no_progress() -> bool {
@@ -59,17 +56,8 @@ pub fn is_no_progress() -> bool {
/// Check whether stdin is connected to a TTY. Used to avoid blocking prompts when not interactive. /// Check whether stdin is connected to a TTY. Used to avoid blocking prompts when not interactive.
pub fn stdin_is_tty() -> bool { pub fn stdin_is_tty() -> bool {
#[cfg(unix)] use std::io::IsTerminal as _;
{ std::io::stdin().is_terminal()
use std::os::unix::io::AsRawFd;
unsafe { libc::isatty(std::io::stdin().as_raw_fd()) == 1 }
}
#[cfg(not(unix))]
{
// Best-effort on non-Unix: assume TTY when not redirected by common CI vars
// This avoids introducing a new dependency for atty.
!(std::env::var("CI").is_ok() || std::env::var("GITHUB_ACTIONS").is_ok())
}
} }
/// A guard that temporarily redirects stderr to /dev/null on Unix when quiet mode is active. /// A guard that temporarily redirects stderr to /dev/null on Unix when quiet mode is active.
@@ -101,7 +89,6 @@ impl StderrSilencer {
pub fn activate() -> Self { pub fn activate() -> Self {
#[cfg(unix)] #[cfg(unix)]
unsafe { unsafe {
// Duplicate current stderr (fd 2)
let old_fd = dup(2); let old_fd = dup(2);
if old_fd < 0 { if old_fd < 0 {
return Self { return Self {
@@ -112,9 +99,8 @@ impl StderrSilencer {
} }
// Open /dev/null for writing // Open /dev/null for writing
let devnull_cstr = std::ffi::CString::new("/dev/null").unwrap(); let devnull_cstr = std::ffi::CString::new("/dev/null").unwrap();
let dn = open(devnull_cstr.as_ptr(), O_WRONLY); let devnull_fd = open(devnull_cstr.as_ptr(), O_WRONLY);
if dn < 0 { if devnull_fd < 0 {
// failed to open devnull; restore and bail
close(old_fd); close(old_fd);
return Self { return Self {
active: false, active: false,
@@ -122,9 +108,8 @@ impl StderrSilencer {
devnull_fd: -1, devnull_fd: -1,
}; };
} }
// Redirect fd 2 to devnull if dup2(devnull_fd, 2) < 0 {
if dup2(dn, 2) < 0 { close(devnull_fd);
close(dn);
close(old_fd); close(old_fd);
return Self { return Self {
active: false, active: false,
@@ -135,7 +120,7 @@ impl StderrSilencer {
Self { Self {
active: true, active: true,
old_stderr_fd: old_fd, old_stderr_fd: old_fd,
devnull_fd: dn, devnull_fd: devnull_fd,
} }
} }
#[cfg(not(unix))] #[cfg(not(unix))]
@@ -152,7 +137,6 @@ impl Drop for StderrSilencer {
} }
#[cfg(unix)] #[cfg(unix)]
unsafe { unsafe {
// Restore old stderr and close devnull and old copies
let _ = dup2(self.old_stderr_fd, 2); let _ = dup2(self.old_stderr_fd, 2);
let _ = close(self.devnull_fd); let _ = close(self.devnull_fd);
let _ = close(self.old_stderr_fd); let _ = close(self.old_stderr_fd);
@@ -170,13 +154,13 @@ where
{ {
// Suppress noisy native logs unless super-verbose (-vv) is enabled. // Suppress noisy native logs unless super-verbose (-vv) is enabled.
if verbose_level() < 2 { if verbose_level() < 2 {
let res = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
let _guard = StderrSilencer::activate(); let _guard = StderrSilencer::activate();
f() f()
})); }));
match res { match result {
Ok(v) => v, Ok(value) => value,
Err(p) => std::panic::resume_unwind(p), Err(panic_payload) => std::panic::resume_unwind(panic_payload),
} }
} else { } else {
f() f()
@@ -184,139 +168,7 @@ where
} }
/// Centralized UI helpers (TTY-aware, quiet/verbose-aware) /// Centralized UI helpers (TTY-aware, quiet/verbose-aware)
pub mod ui { pub mod ui;
use std::io;
// Prefer cliclack for all user-visible messages to ensure consistent, TTY-aware output.
// Falls back to stderr printing if needed.
/// Startup intro/banner (suppressed when quiet).
pub fn intro(msg: impl AsRef<str>) {
if crate::is_quiet() { return; }
// Use cliclack intro to render a nice banner when TTY
let _ = cliclack::intro(msg.as_ref());
}
/// Print an informational line (suppressed when quiet).
pub fn info(msg: impl AsRef<str>) {
if crate::is_quiet() { return; }
let _ = cliclack::log::info(msg.as_ref());
}
/// Print a warning (always printed).
pub fn warn(msg: impl AsRef<str>) {
// cliclack provides a warning-level log utility
let _ = cliclack::log::warning(msg.as_ref());
}
/// Print an error (always printed).
pub fn error(msg: impl AsRef<str>) {
let _ = cliclack::log::error(msg.as_ref());
}
/// Print a line above any progress bars (maps to cliclack log; synchronized).
pub fn println_above_bars(msg: impl AsRef<str>) {
if crate::is_quiet() { return; }
// cliclack logs are synchronized with its spinners/bars
let _ = cliclack::log::info(msg.as_ref());
}
/// Final outro/summary printed below any progress indicators (suppressed when quiet).
pub fn outro(msg: impl AsRef<str>) {
if crate::is_quiet() { return; }
let _ = cliclack::outro(msg.as_ref());
}
/// Prompt the user (TTY-aware via cliclack) and read a line from stdin. Returns the raw line with trailing newline removed.
pub fn prompt_line(prompt: &str) -> io::Result<String> {
// Route prompt through cliclack to keep consistent styling and avoid direct eprint!/println!
let _ = cliclack::log::info(prompt);
let mut s = String::new();
io::stdin().read_line(&mut s)?;
Ok(s)
}
// Progress manager built on indicatif MultiProgress for per-file and aggregate bars
/// TTY-aware progress UI built on `indicatif` for per-file and aggregate progress bars.
///
/// This small helper encapsulates a `MultiProgress` with one aggregate (total) bar and
/// one per-file bar. It is intentionally minimal to keep integration lightweight.
pub mod progress {
use atty::Stream;
use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
/// Manages a set of per-file progress bars plus a top aggregate bar.
pub struct ProgressManager {
enabled: bool,
mp: Option<MultiProgress>,
per: Vec<ProgressBar>,
total: Option<ProgressBar>,
total_n: usize,
completed: usize,
done: Vec<bool>,
}
impl ProgressManager {
/// Create a new manager with the given enabled flag.
pub fn new(enabled: bool) -> Self {
Self { enabled, mp: None, per: Vec::new(), total: None, total_n: 0, completed: 0, done: Vec::new() }
}
/// Create a manager that enables bars when `n > 1`, stderr is a TTY, and not quiet.
pub fn default_for_files(n: usize) -> Self {
let enabled = n > 1 && atty::is(Stream::Stderr) && !crate::is_quiet() && !crate::is_no_progress();
Self::new(enabled)
}
/// Initialize bars for the given file labels. If disabled or single file, no-op.
pub fn init_files(&mut self, labels: &[String]) {
self.total_n = labels.len();
if !self.enabled || self.total_n <= 1 {
// No bars in single-file mode or when disabled
self.enabled = false;
return;
}
let mp = MultiProgress::new();
// Aggregate bar at the top
let total = mp.add(ProgressBar::new(labels.len() as u64));
total.set_style(ProgressStyle::with_template("{prefix} [{bar:40.cyan/blue}] {pos}/{len}")
.unwrap()
.progress_chars("=>-"));
total.set_prefix("Total");
self.total = Some(total);
// Per-file bars
for label in labels {
let pb = mp.add(ProgressBar::new(100));
pb.set_style(ProgressStyle::with_template("{prefix} [{bar:40.green/black}] {pos}% {msg}")
.unwrap()
.progress_chars("=>-"));
pb.set_position(0);
pb.set_prefix(label.clone());
self.per.push(pb);
}
self.mp = Some(mp);
}
/// Returns true when bars are enabled (multi-file TTY mode).
pub fn is_enabled(&self) -> bool { self.enabled }
/// Get a clone of the per-file progress bar at index, if enabled.
pub fn per_bar(&self, idx: usize) -> Option<ProgressBar> {
if !self.enabled { return None; }
self.per.get(idx).cloned()
}
/// Get a clone of the aggregate (total) progress bar, if enabled.
pub fn total_bar(&self) -> Option<ProgressBar> {
if !self.enabled { return None; }
self.total.as_ref().cloned()
}
/// Mark a file as finished (set to 100% and update total counter).
pub fn mark_file_done(&mut self, idx: usize) {
if !self.enabled { return; }
if let Some(pb) = self.per.get(idx) {
pb.set_position(100);
pb.finish_with_message("done");
}
self.completed += 1;
if let Some(total) = &self.total { total.set_position(self.completed as u64); }
}
}
}
}
/// Logging macros and helpers /// Logging macros and helpers
/// Log an error using the UI helper (always printed). Recommended for user-visible errors. /// Log an error using the UI helper (always printed). Recommended for user-visible errors.
@@ -397,39 +249,39 @@ pub fn format_srt_time(seconds: f64) -> String {
let total_ms = (seconds * 1000.0).round() as i64; let total_ms = (seconds * 1000.0).round() as i64;
let ms = total_ms % 1000; let ms = total_ms % 1000;
let total_secs = total_ms / 1000; let total_secs = total_ms / 1000;
let s = total_secs % 60; let sec = total_secs % 60;
let m = (total_secs / 60) % 60; let min = (total_secs / 60) % 60;
let h = total_secs / 3600; let hour = total_secs / 3600;
format!("{h:02}:{m:02}:{s:02},{ms:03}") format!("{hour:02}:{min:02}:{sec:02},{ms:03}")
} }
/// Render a list of transcript entries to SRT format. /// Render a list of transcript entries to SRT format.
pub fn render_srt(items: &[OutputEntry]) -> String { pub fn render_srt(entries: &[OutputEntry]) -> String {
let mut out = String::new(); let mut srt = String::new();
for (i, e) in items.iter().enumerate() { for (index, entry) in entries.iter().enumerate() {
let idx = i + 1; let srt_index = index + 1;
out.push_str(&format!("{idx}\n")); srt.push_str(&format!("{srt_index}\n"));
out.push_str(&format!( srt.push_str(&format!(
"{} --> {}\n", "{} --> {}\n",
format_srt_time(e.start), format_srt_time(entry.start),
format_srt_time(e.end) format_srt_time(entry.end)
)); ));
if !e.speaker.is_empty() { if !entry.speaker.is_empty() {
out.push_str(&format!("{}: {}\n", e.speaker, e.text)); srt.push_str(&format!("{}: {}\n", entry.speaker, entry.text));
} else { } else {
out.push_str(&format!("{}\n", e.text)); srt.push_str(&format!("{}\n", entry.text));
} }
out.push('\n'); srt.push('\n');
} }
out srt
} }
/// Determine the default models directory, honoring POLYSCRIBE_MODELS_DIR override. /// Determine the default models directory, honoring POLYSCRIBE_MODELS_DIR override.
pub fn models_dir_path() -> PathBuf { pub fn models_dir_path() -> PathBuf {
if let Ok(p) = env::var("POLYSCRIBE_MODELS_DIR") { if let Ok(env_val) = env::var("POLYSCRIBE_MODELS_DIR") {
let pb = PathBuf::from(p); let env_path = PathBuf::from(env_val);
if !pb.as_os_str().is_empty() { if !env_path.as_os_str().is_empty() {
return pb; return env_path;
} }
} }
if cfg!(debug_assertions) { if cfg!(debug_assertions) {
@@ -454,17 +306,17 @@ pub fn models_dir_path() -> PathBuf {
/// Normalize a language identifier to a short ISO code when possible. /// Normalize a language identifier to a short ISO code when possible.
pub fn normalize_lang_code(input: &str) -> Option<String> { pub fn normalize_lang_code(input: &str) -> Option<String> {
let mut s = input.trim().to_lowercase(); let mut lang = input.trim().to_lowercase();
if s.is_empty() || s == "auto" || s == "c" || s == "posix" { if lang.is_empty() || lang == "auto" || lang == "c" || lang == "posix" {
return None; return None;
} }
if let Some((lhs, _)) = s.split_once('.') { if let Some((prefix, _)) = lang.split_once('.') {
s = lhs.to_string(); lang = prefix.to_string();
} }
if let Some((lhs, _)) = s.split_once('_') { if let Some((prefix, _)) = lang.split_once('_') {
s = lhs.to_string(); lang = prefix.to_string();
} }
let code = match s.as_str() { let code = match lang.as_str() {
"en" => "en", "en" => "en",
"de" => "de", "de" => "de",
"es" => "es", "es" => "es",
@@ -538,10 +390,10 @@ pub fn find_model_file() -> Result<PathBuf> {
} }
if let Ok(env_model) = env::var("WHISPER_MODEL") { if let Ok(env_model) = env::var("WHISPER_MODEL") {
let p = PathBuf::from(env_model); let model_path = PathBuf::from(env_model);
if p.is_file() { if model_path.is_file() {
let _ = std::fs::write(models_dir.join(".last_model"), p.display().to_string()); let _ = std::fs::write(models_dir.join(".last_model"), model_path.display().to_string());
return Ok(p); return Ok(model_path);
} }
} }
@@ -560,9 +412,9 @@ pub fn find_model_file() -> Result<PathBuf> {
} }
let mut candidates: Vec<PathBuf> = Vec::new(); let mut candidates: Vec<PathBuf> = Vec::new();
let rd = std::fs::read_dir(models_dir) let dir_entries = std::fs::read_dir(models_dir)
.with_context(|| format!("Failed to read models directory: {}", models_dir.display()))?; .with_context(|| format!("Failed to read models directory: {}", models_dir.display()))?;
for entry in rd { for entry in dir_entries {
let entry = entry?; let entry = entry?;
let path = entry.path(); let path = entry.path();
if path.is_file() { if path.is_file() {
@@ -593,16 +445,16 @@ pub fn find_model_file() -> Result<PathBuf> {
)); ));
} }
let input = crate::ui::prompt_line("Would you like to download models now? [Y/n]: ").unwrap_or_default(); let input = crate::ui::prompt_line("Would you like to download models now? [Y/n]: ").unwrap_or_default();
let ans = input.trim().to_lowercase(); let answer = input.trim().to_lowercase();
if ans.is_empty() || ans == "y" || ans == "yes" { if answer.is_empty() || answer == "y" || answer == "yes" {
if let Err(e) = models::run_interactive_model_downloader() { if let Err(e) = models::run_interactive_model_downloader() {
elog!("Downloader failed: {:#}", e); elog!("Downloader failed: {:#}", e);
} }
candidates.clear(); candidates.clear();
let rd2 = std::fs::read_dir(models_dir).with_context(|| { let dir_entries2 = std::fs::read_dir(models_dir).with_context(|| {
format!("Failed to read models directory: {}", models_dir.display()) format!("Failed to read models directory: {}", models_dir.display())
})?; })?;
for entry in rd2 { for entry in dir_entries2 {
let entry = entry?; let entry = entry?;
let path = entry.path(); let path = entry.path();
if path.is_file() { if path.is_file() {
@@ -628,38 +480,36 @@ pub fn find_model_file() -> Result<PathBuf> {
} }
if candidates.len() == 1 { if candidates.len() == 1 {
let only = candidates.remove(0); let only_model = candidates.remove(0);
let _ = std::fs::write(models_dir.join(".last_model"), only.display().to_string()); let _ = std::fs::write(models_dir.join(".last_model"), only_model.display().to_string());
return Ok(only); return Ok(only_model);
} }
let last_file = models_dir.join(".last_model"); let last_file = models_dir.join(".last_model");
if let Ok(prev) = std::fs::read_to_string(&last_file) { if let Ok(previous_content) = std::fs::read_to_string(&last_file) {
let prev = prev.trim(); let previous_content = previous_content.trim();
if !prev.is_empty() { if !previous_content.is_empty() {
let p = PathBuf::from(prev); let previous_path = PathBuf::from(previous_content);
if p.is_file() && candidates.iter().any(|c| c == &p) { if previous_path.is_file() && candidates.iter().any(|c| c == &previous_path) {
// Previously printed: INFO about using previously selected model. return Ok(previous_path);
// Suppress this to avoid duplicate/noisy messages; per-file progress will be shown elsewhere.
return Ok(p);
} }
} }
} }
crate::ui::println_above_bars(format!("Multiple Whisper models found in {}:", models_dir.display())); crate::ui::println_above_bars(format!("Multiple Whisper models found in {}:", models_dir.display()));
for (i, p) in candidates.iter().enumerate() { for (index, path) in candidates.iter().enumerate() {
crate::ui::println_above_bars(format!(" {}) {}", i + 1, p.display())); crate::ui::println_above_bars(format!(" {}) {}", index + 1, path.display()));
} }
let input = crate::ui::prompt_line(&format!("Select model by number [1-{}]: ", candidates.len())) let input = crate::ui::prompt_line(&format!("Select model by number [1-{}]: ", candidates.len()))
.map_err(|_| anyhow!("Failed to read selection"))?; .map_err(|_| anyhow!("Failed to read selection"))?;
let sel: usize = input let selection: usize = input
.trim() .trim()
.parse() .parse()
.map_err(|_| anyhow!("Invalid selection: {}", input.trim()))?; .map_err(|_| anyhow!("Invalid selection: {}", input.trim()))?;
if sel == 0 || sel > candidates.len() { if selection == 0 || selection > candidates.len() {
return Err(anyhow!("Selection out of range")); return Err(anyhow!("Selection out of range"));
} }
let chosen = candidates.swap_remove(sel - 1); let chosen = candidates.swap_remove(selection - 1);
let _ = std::fs::write(models_dir.join(".last_model"), chosen.display().to_string()); let _ = std::fs::write(models_dir.join(".last_model"), chosen.display().to_string());
Ok(chosen) Ok(chosen)
} }
@@ -694,28 +544,28 @@ pub fn decode_audio_to_pcm_f32_ffmpeg(audio_path: &Path) -> Result<Vec<f32>> {
} }
}; };
if !output.status.success() { if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr); let stderr_str = String::from_utf8_lossy(&output.stderr);
return Err(anyhow!( return Err(anyhow!(
"Failed to decode audio from {} using ffmpeg. This may indicate the file is not a valid or supported audio/video file, is corrupted, or cannot be opened. ffmpeg stderr: {}", "Failed to decode audio from {} using ffmpeg. This may indicate the file is not a valid or supported audio/video file, is corrupted, or cannot be opened. ffmpeg stderr: {}",
audio_path.display(), audio_path.display(),
stderr.trim() stderr_str.trim()
)); ));
} }
let bytes = output.stdout; let data = output.stdout;
if bytes.len() % 4 != 0 { if data.len() % 4 != 0 {
let truncated = bytes.len() - (bytes.len() % 4); let truncated = data.len() - (data.len() % 4);
let mut v = Vec::with_capacity(truncated / 4); let mut samples = Vec::with_capacity(truncated / 4);
for chunk in bytes[..truncated].chunks_exact(4) { for chunk in data[..truncated].chunks_exact(4) {
let arr = [chunk[0], chunk[1], chunk[2], chunk[3]]; let arr = [chunk[0], chunk[1], chunk[2], chunk[3]];
v.push(f32::from_le_bytes(arr)); samples.push(f32::from_le_bytes(arr));
} }
Ok(v) Ok(samples)
} else { } else {
let mut v = Vec::with_capacity(bytes.len() / 4); let mut samples = Vec::with_capacity(data.len() / 4);
for chunk in bytes.chunks_exact(4) { for chunk in data.chunks_exact(4) {
let arr = [chunk[0], chunk[1], chunk[2], chunk[3]]; let arr = [chunk[0], chunk[1], chunk[2], chunk[3]];
v.push(f32::from_le_bytes(arr)); samples.push(f32::from_le_bytes(arr));
} }
Ok(v) Ok(samples)
} }
} }

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

84
src/ui.rs Normal file
View File

@@ -0,0 +1,84 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025 <COPYRIGHT HOLDER>. All rights reserved.
//! Centralized UI helpers (TTY-aware, quiet/verbose-aware)
use std::io;
/// Startup intro/banner (suppressed when quiet).
pub fn intro(msg: impl AsRef<str>) {
let _ = cliclack::intro(msg.as_ref());
}
/// Final outro/summary printed below any progress indicators (suppressed when quiet).
pub fn outro(msg: impl AsRef<str>) {
let _ = cliclack::outro(msg.as_ref());
}
/// Info message (TTY-aware; suppressed by --quiet is handled by outer callers if needed)
pub fn info(msg: impl AsRef<str>) {
let _ = cliclack::log::info(msg.as_ref());
}
/// Print a warning (always printed).
pub fn warn(msg: impl AsRef<str>) {
// cliclack provides a warning-level log utility
let _ = cliclack::log::warning(msg.as_ref());
}
/// Print an error (always printed).
pub fn error(msg: impl AsRef<str>) {
let _ = cliclack::log::error(msg.as_ref());
}
/// Print a line above any progress bars (maps to cliclack log; synchronized).
pub fn println_above_bars(msg: impl AsRef<str>) {
if crate::is_quiet() { return; }
// cliclack logs are synchronized with its spinners/bars
let _ = cliclack::log::info(msg.as_ref());
}
/// Input prompt with a question: returns Ok(None) if non-interactive or canceled
pub fn prompt_input(question: impl AsRef<str>, default: Option<&str>) -> anyhow::Result<Option<String>> {
if crate::is_no_interaction() || !crate::stdin_is_tty() {
return Ok(None);
}
let mut p = cliclack::input(question.as_ref());
if let Some(d) = default {
// Use default_input when available in 0.3.x
p = p.default_input(d);
}
match p.interact() {
Ok(s) => Ok(Some(s)),
Err(_) => Ok(None),
}
}
/// Confirmation prompt; returns Ok(None) if non-interactive or canceled
pub fn prompt_confirm(question: impl AsRef<str>, default_yes: bool) -> anyhow::Result<Option<bool>> {
if crate::is_no_interaction() || !crate::stdin_is_tty() {
return Ok(None);
}
let res = cliclack::confirm(question.as_ref())
.initial_value(default_yes)
.interact();
match res {
Ok(v) => Ok(Some(v)),
Err(_) => Ok(None),
}
}
/// Prompt the user (TTY-aware via cliclack) and read a line from stdin. Returns the raw line with trailing newline removed.
pub fn prompt_line(prompt: &str) -> io::Result<String> {
// Route prompt through cliclack to keep consistent styling and avoid direct eprint!/println!
let _ = cliclack::log::info(prompt);
let mut s = String::new();
io::stdin().read_line(&mut s)?;
Ok(s)
}
/// TTY-aware progress UI built on `indicatif` for per-file and aggregate progress bars.
///
/// This small helper encapsulates a `MultiProgress` with one aggregate (total) bar and
/// one per-file bar. It is intentionally minimal to keep integration lightweight.
pub mod progress;

81
src/ui/progress.rs Normal file
View File

@@ -0,0 +1,81 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025 <COPYRIGHT HOLDER>. All rights reserved.
use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
use std::io::IsTerminal as _;
/// Manages a set of per-file progress bars plus a top aggregate bar.
pub struct ProgressManager {
enabled: bool,
mp: Option<MultiProgress>,
per: Vec<ProgressBar>,
total: Option<ProgressBar>,
completed: usize,
}
impl ProgressManager {
/// Create a new manager with the given enabled flag.
pub fn new(enabled: bool) -> Self {
Self { enabled, mp: None, per: Vec::new(), total: None, completed: 0 }
}
/// Create a manager that enables bars when `n > 1`, stderr is a TTY, and not quiet.
pub fn default_for_files(n: usize) -> Self {
let enabled = n > 1 && std::io::stderr().is_terminal() && !crate::is_quiet() && !crate::is_no_progress();
Self::new(enabled)
}
/// Initialize bars for the given file labels. If disabled or single file, no-op.
pub fn init_files(&mut self, labels: &[String]) {
if !self.enabled || labels.len() <= 1 {
// No bars in single-file mode or when disabled
self.enabled = false;
return;
}
let mp = MultiProgress::new();
// Aggregate bar at the top
let total = mp.add(ProgressBar::new(labels.len() as u64));
total.set_style(ProgressStyle::with_template("{prefix} [{bar:40.cyan/blue}] {pos}/{len}")
.unwrap()
.progress_chars("=>-"));
total.set_prefix("Total");
self.total = Some(total);
// Per-file bars
for label in labels {
let pb = mp.add(ProgressBar::new(100));
pb.set_style(ProgressStyle::with_template("{prefix} [{bar:40.green/black}] {pos}% {msg}")
.unwrap()
.progress_chars("=>-"));
pb.set_position(0);
pb.set_prefix(label.clone());
self.per.push(pb);
}
self.mp = Some(mp);
}
/// Returns true when bars are enabled (multi-file TTY mode).
pub fn is_enabled(&self) -> bool { self.enabled }
/// Get a clone of the per-file progress bar at index, if enabled.
pub fn per_bar(&self, idx: usize) -> Option<ProgressBar> {
if !self.enabled { return None; }
self.per.get(idx).cloned()
}
/// Get a clone of the aggregate (total) progress bar, if enabled.
pub fn total_bar(&self) -> Option<ProgressBar> {
if !self.enabled { return None; }
self.total.as_ref().cloned()
}
/// Mark a file as finished (set to 100% and update total counter).
pub fn mark_file_done(&mut self, idx: usize) {
if !self.enabled { return; }
if let Some(pb) = self.per.get(idx) {
pb.set_position(100);
pb.finish_with_message("done");
}
self.completed += 1;
if let Some(total) = &self.total { total.set_position(self.completed as u64); }
}
}