From 128db0f7333ce238510bc612d116124991b0201e Mon Sep 17 00:00:00 2001 From: vikingowl Date: Wed, 13 Aug 2025 13:35:53 +0200 Subject: [PATCH] [refactor] remove backend and library modules, consolidating features into main crate --- .cargo/config.toml | 17 + Cargo.lock | 12 - Cargo.toml | 35 +- crates/polyscribe-core/src/error.rs | 39 ++ crates/polyscribe-core/src/lib.rs | 162 +++++--- crates/polyscribe-core/src/prelude.rs | 13 + rust-toolchain.toml | 6 + src/backend.rs | 329 --------------- src/lib.rs | 571 -------------------------- src/main.rs | 483 ---------------------- src/models.rs | 146 ------- src/ui.rs | 84 ---- src/ui/progress.rs | 81 ---- 13 files changed, 209 insertions(+), 1769 deletions(-) create mode 100644 .cargo/config.toml create mode 100644 crates/polyscribe-core/src/error.rs create mode 100644 crates/polyscribe-core/src/prelude.rs create mode 100644 rust-toolchain.toml delete mode 100644 src/backend.rs delete mode 100644 src/lib.rs delete mode 100644 src/main.rs delete mode 100644 src/models.rs delete mode 100644 src/ui.rs delete mode 100644 src/ui/progress.rs diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 0000000..fe24ade --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: MIT + +[build] +# Make target-dir consistent across workspace for better cache reuse. +target-dir = "target" + +[profile.dev] +opt-level = 1 +debug = true +incremental = true + +[profile.release] +# Reasonable defaults for CLI apps/libraries +lto = "thin" +codegen-units = 1 +strip = "debuginfo" +opt-level = 3 diff --git a/Cargo.lock b/Cargo.lock index 7368a0d..a3ba12a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -762,18 +762,6 @@ dependencies = [ "which", ] -[[package]] -name = "polyscribe-plugin-tubescribe" -version = "0.1.0" -dependencies = [ - "anyhow", - "clap", - "polyscribe-protocol", - "serde", - "serde_json", - "tokio", -] - [[package]] name = "polyscribe-protocol" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index d6998ac..4c27502 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,6 +4,37 @@ members = [ "crates/polyscribe-protocol", "crates/polyscribe-host", "crates/polyscribe-cli", - "plugins/polyscribe-plugin-tubescribe", ] -resolver = "2" +resolver = "3" + +# Optional: Keep dependency versions consistent across members +[workspace.dependencies] +thiserror = "1.0.69" +serde = { version = "1.0.219", features = ["derive"] } +anyhow = "1.0.99" +libc = "0.2.175" +toml = "0.8.23" +serde_json = "1.0.142" +chrono = "0.4.41" +sha2 = "0.10.9" +which = "6.0.3" +tokio = { version = "1.47.1", features = ["rt-multi-thread", "macros"] } +clap = { version = "4.5.44", features = ["derive"] } +indicatif = "0.17.11" +directories = "5.0.1" +whisper-rs = "0.14.3" +cliclack = "0.3.6" +clap_complete = "4.5.57" +clap_mangen = "0.2.29" + +[workspace.lints.rust] +unused_imports = "deny" +dead_code = "warn" + +[profile.release] +lto = "fat" +codegen-units = 1 +panic = "abort" + +[profile.dev] +panic = "unwind" diff --git a/crates/polyscribe-core/src/error.rs b/crates/polyscribe-core/src/error.rs new file mode 100644 index 0000000..4ab695d --- /dev/null +++ b/crates/polyscribe-core/src/error.rs @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: MIT + +use thiserror::Error; + +/// The common error type for the polyscribe core crate. +/// Add more domain-specific variants as needed. +#[derive(Debug, Error)] +pub enum Error { + /// Wrapper for any boxed dynamic error. Useful as a temporary catch-all. + #[error("anyhow error: {0}")] + Anyhow(#[from] anyhow::Error), + + /// IO-related error. + #[error("io error: {0}")] + Io(#[from] std::io::Error), + + /// UTF-8 conversion error. + #[error("utf8 error: {0}")] + Utf8(#[from] std::string::FromUtf8Error), + + /// Environment variable error. + #[error("env var error: {0}")] + Var(#[from] std::env::VarError), + + /// TOML de serialization error. + #[error("toml de error: {0}")] + TomlDe(#[from] toml::de::Error), + + /// Configuration parsing error. + #[error("configuration error: {0}")] + Config(String), + + /// Placeholder for not-yet-implemented backends or features. + #[error("unimplemented: {0}")] + Unimplemented(&'static str), +} + +/// Convenient result alias for the polyscribe core crate. +pub type Result = std::result::Result; diff --git a/crates/polyscribe-core/src/lib.rs b/crates/polyscribe-core/src/lib.rs index 9ac535f..885ed36 100644 --- a/crates/polyscribe-core/src/lib.rs +++ b/crates/polyscribe-core/src/lib.rs @@ -12,7 +12,16 @@ use std::sync::atomic::{AtomicBool, AtomicU8, Ordering}; -// Global runtime flags +use anyhow::{anyhow, Context, Result}; +use chrono::Local; +use std::env; +use std::path::{Path, PathBuf}; +use std::process::Command; + +#[cfg(unix)] +use libc::{O_WRONLY, close, dup, dup2, open}; + +/// Global runtime flags static QUIET: AtomicBool = AtomicBool::new(false); static NO_INTERACTION: AtomicBool = AtomicBool::new(false); static VERBOSE: AtomicU8 = AtomicU8::new(0); @@ -101,7 +110,7 @@ impl StderrSilencer { let devnull_cstr = std::ffi::CString::new("/dev/null").unwrap(); let devnull_fd = open(devnull_cstr.as_ptr(), O_WRONLY); if devnull_fd < 0 { - close(old_fd); + let _ = close(old_fd); return Self { active: false, old_stderr_fd: -1, @@ -109,8 +118,8 @@ impl StderrSilencer { }; } if dup2(devnull_fd, 2) < 0 { - close(devnull_fd); - close(old_fd); + let _ = close(devnull_fd); + let _ = close(old_fd); return Self { active: false, old_stderr_fd: -1, @@ -120,7 +129,7 @@ impl StderrSilencer { Self { active: true, old_stderr_fd: old_fd, - devnull_fd: devnull_fd, + devnull_fd, } } #[cfg(not(unix))] @@ -183,16 +192,6 @@ macro_rules! qlog { ($($arg:tt)*) => {{ $crate::ilog!($($arg)*); }} } -use anyhow::{Context, Result, anyhow}; -use chrono::Local; -use std::env; -use std::fs::create_dir_all; -use std::path::{Path, PathBuf}; -use std::process::Command; - -#[cfg(unix)] -use libc::{O_WRONLY, close, dup, dup2, open}; - /// Re-export backend module (GPU/CPU selection and transcription). pub mod backend; /// Re-export models module (model listing/downloading/updating). @@ -201,6 +200,11 @@ pub mod models; pub mod config; /// UI helpers pub mod ui; +/// Error types for the crate. +pub mod error; +pub mod prelude; + +pub use error::{Error, Result as OtherResult}; /// Transcript entry for a single segment. #[derive(Debug, serde::Serialize, Clone)] @@ -356,56 +360,77 @@ pub fn normalize_lang_code(input: &str) -> Option { /// Find the Whisper model file path to use. pub fn find_model_file() -> Result { + // 1) Explicit override via environment if let Ok(path) = env::var("WHISPER_MODEL") { let p = PathBuf::from(path); - if p.exists() { - return Ok(p); - } else { + if !p.exists() { return Err(anyhow!( - "WHISPER_MODEL points to non-existing file: {}", + "WHISPER_MODEL points to a non-existing path: {}", p.display() )); } - } - let models_dir = models_dir_path(); - if !models_dir.exists() { - create_dir_all(&models_dir).with_context(|| { - format!("Failed to create models dir: {}", models_dir.display()) - })?; + if !p.is_file() { + return Err(anyhow!( + "WHISPER_MODEL must point to a file, but is not: {}", + p.display() + )); + } + return Ok(p); } - // Heuristic: prefer larger model files and English-only when language hint is en - let mut candidates = Vec::new(); - for entry in std::fs::read_dir(&models_dir).with_context(|| format!( - "Failed to read models dir: {}", - models_dir.display() - ))? { - let entry = entry?; - let path = entry.path(); - if !path - .extension() - .and_then(|s| s.to_str()) - .is_some_and(|s| s.eq_ignore_ascii_case("bin")) - { - continue; - } - if let Ok(md) = std::fs::metadata(&path) { - candidates.push((md.len(), path)); - } - } - if candidates.is_empty() { - // Try default fallback (tiny.en) - let fallback = models_dir.join("ggml-tiny.en.bin"); - if fallback.exists() { - return Ok(fallback); - } + // 2) Resolve models directory and ensure it exists and is a directory + let models_dir = models_dir_path(); + if models_dir.exists() && !models_dir.is_dir() { return Err(anyhow!( - "No Whisper models found in {}. Please download a model or set WHISPER_MODEL.", + "Models path exists but is not a directory: {}", models_dir.display() )); } + std::fs::create_dir_all(&models_dir).with_context(|| { + format!("Failed to ensure models dir exists: {}", models_dir.display()) + })?; + + // 3) Gather candidate .bin files (regular files only), prefer largest + let mut candidates = Vec::new(); + for entry in std::fs::read_dir(&models_dir).with_context(|| { + format!("Failed to read models dir: {}", models_dir.display()) + })? { + let entry = entry?; + let path = entry.path(); + + // Only consider .bin files + let is_bin = path + .extension() + .and_then(|s| s.to_str()) + .is_some_and(|s| s.eq_ignore_ascii_case("bin")); + if !is_bin { + continue; + } + + // Only consider regular files + let md = match std::fs::metadata(&path) { + Ok(m) if m.is_file() => m, + _ => continue, + }; + + candidates.push((md.len(), path)); + } + + if candidates.is_empty() { + // 4) Fallback to known tiny English model if present + let fallback = models_dir.join("ggml-tiny.en.bin"); + if fallback.is_file() { + return Ok(fallback); + } + return Err(anyhow!( + "No Whisper model files (*.bin) found in {}. \ + Please download a model or set WHISPER_MODEL.", + models_dir.display() + )); + } + candidates.sort_by_key(|(size, _)| *size); - let (_size, path) = candidates.into_iter().last().unwrap(); + let (_size, path) = candidates.into_iter().last().expect("non-empty"); Ok(path) } @@ -414,12 +439,14 @@ pub fn decode_audio_to_pcm_f32_ffmpeg(audio_path: &Path) -> Result> { let in_path = audio_path .to_str() .ok_or_else(|| anyhow!("Audio path must be valid UTF-8: {}", audio_path.display()))?; - let tmp_wav = std::env::temp_dir().join("polyscribe_tmp_input.wav"); - let tmp_wav_str = tmp_wav - .to_str() - .ok_or_else(|| anyhow!("Temp path not valid UTF-8: {}", tmp_wav.display()))?; - // ffmpeg -i input -f f32le -ac 1 -ar 16000 -y /tmp/tmp.raw + // Use a raw f32le file to match the -f f32le output format. + let tmp_raw = std::env::temp_dir().join("polyscribe_tmp_input.f32le"); + let tmp_raw_str = tmp_raw + .to_str() + .ok_or_else(|| anyhow!("Temp path not valid UTF-8: {}", tmp_raw.display()))?; + + // ffmpeg -i input -f f32le -ac 1 -ar 16000 -y /tmp/tmp.f32le let status = Command::new("ffmpeg") .arg("-hide_banner") .arg("-loglevel") @@ -433,16 +460,29 @@ pub fn decode_audio_to_pcm_f32_ffmpeg(audio_path: &Path) -> Result> { .arg("-ar") .arg("16000") .arg("-y") - .arg(&tmp_wav_str) + .arg(tmp_raw_str) .status() .with_context(|| format!("Failed to invoke ffmpeg to decode: {}", in_path))?; + if !status.success() { - return Err(anyhow!("ffmpeg exited with non-zero status when decoding {}", in_path)); + return Err(anyhow!( + "ffmpeg exited with non-zero status when decoding {}", + in_path + )); } - let raw = std::fs::read(&tmp_wav).with_context(|| format!("Failed to read temp PCM file: {}", tmp_wav.display()))?; + + let raw = std::fs::read(&tmp_raw) + .with_context(|| format!("Failed to read temp PCM file: {}", tmp_raw.display()))?; + + // Best-effort cleanup of the temp file + let _ = std::fs::remove_file(&tmp_raw); + // Interpret raw bytes as f32 little-endian if raw.len() % 4 != 0 { - return Err(anyhow!("Decoded PCM file length not multiple of 4: {}", raw.len())); + return Err(anyhow!( + "Decoded PCM file length not multiple of 4: {}", + raw.len() + )); } let mut samples = Vec::with_capacity(raw.len() / 4); for chunk in raw.chunks_exact(4) { diff --git a/crates/polyscribe-core/src/prelude.rs b/crates/polyscribe-core/src/prelude.rs new file mode 100644 index 0000000..54bdec4 --- /dev/null +++ b/crates/polyscribe-core/src/prelude.rs @@ -0,0 +1,13 @@ +// rust +//! Commonly used exports for convenient glob-imports in binaries and tests. +//! Usage: `use polyscribe_core::prelude::*;` + +pub use crate::backend::*; +pub use crate::config::*; +pub use crate::error::{Error, Result}; +pub use crate::models::*; + +// If you frequently use UI helpers across binaries/tests, export them too. +// Keep this lean to avoid pulling UI everywhere unintentionally. +#[allow(unused_imports)] +pub use crate::ui::*; diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 100644 index 0000000..7fc251b --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: MIT + +[toolchain] +channel = "1.89.0" +components = ["clippy", "rustfmt"] +profile = "minimal" diff --git a/src/backend.rs b/src/backend.rs deleted file mode 100644 index 613be17..0000000 --- a/src/backend.rs +++ /dev/null @@ -1,329 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025 . All rights reserved. - -//! Transcription backend selection and implementations (CPU/GPU) used by PolyScribe. -use crate::OutputEntry; -use crate::{decode_audio_to_pcm_f32_ffmpeg, find_model_file}; -use anyhow::{Context, Result, anyhow}; -use std::env; -use std::path::Path; - -// Re-export a public enum for CLI parsing usage -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -/// Kind of transcription backend to use. -pub enum BackendKind { - /// Automatically detect the best available backend (CUDA > HIP > Vulkan > CPU). - Auto, - /// Pure CPU backend using whisper-rs. - Cpu, - /// NVIDIA CUDA backend (requires CUDA runtime available at load time and proper feature build). - Cuda, - /// AMD ROCm/HIP backend (requires hip/rocBLAS libraries available and proper feature build). - Hip, - /// Vulkan backend (experimental; requires Vulkan loader/SDK and feature build). - Vulkan, -} - -/// Abstraction for a transcription backend. -pub trait TranscribeBackend { - /// Backend kind implemented by this type. - fn kind(&self) -> BackendKind; - /// Transcribe the given audio and return transcript entries. - fn transcribe( - &self, - audio_path: &Path, - speaker: &str, - language: Option<&str>, - gpu_layers: Option, - progress: Option<&(dyn Fn(i32) + Send + Sync)>, - ) -> Result>; -} - -fn check_lib(_names: &[&str]) -> bool { - #[cfg(test)] - { - // During unit tests, avoid touching system libs to prevent loader crashes in CI. - false - } - #[cfg(not(test))] - { - // Disabled runtime dlopen probing to avoid loader instability; rely on environment overrides. - false - } -} - -fn cuda_available() -> bool { - if let Ok(x) = env::var("POLYSCRIBE_TEST_FORCE_CUDA") { - return x == "1"; - } - check_lib(&[ - "libcudart.so", - "libcudart.so.12", - "libcudart.so.11", - "libcublas.so", - "libcublas.so.12", - ]) -} - -fn hip_available() -> bool { - if let Ok(x) = env::var("POLYSCRIBE_TEST_FORCE_HIP") { - return x == "1"; - } - check_lib(&["libhipblas.so", "librocblas.so"]) -} - -fn vulkan_available() -> bool { - if let Ok(x) = env::var("POLYSCRIBE_TEST_FORCE_VULKAN") { - return x == "1"; - } - check_lib(&["libvulkan.so.1", "libvulkan.so"]) -} - -/// CPU-based transcription backend using whisper-rs. -#[derive(Default)] -pub struct CpuBackend; -/// CUDA-accelerated transcription backend for NVIDIA GPUs. -#[derive(Default)] -pub struct CudaBackend; -/// ROCm/HIP-accelerated transcription backend for AMD GPUs. -#[derive(Default)] -pub struct HipBackend; -/// Vulkan-based transcription backend (experimental/incomplete). -#[derive(Default)] -pub struct VulkanBackend; - -macro_rules! impl_whisper_backend { - ($ty:ty, $kind:expr) => { - impl TranscribeBackend for $ty { - fn kind(&self) -> BackendKind { $kind } - fn transcribe( - &self, - audio_path: &Path, - speaker: &str, - language: Option<&str>, - _gpu_layers: Option, - progress: Option<&(dyn Fn(i32) + Send + Sync)>, - ) -> Result> { - transcribe_with_whisper_rs(audio_path, speaker, language, progress) - } - } - }; -} - -impl_whisper_backend!(CpuBackend, BackendKind::Cpu); -impl_whisper_backend!(CudaBackend, BackendKind::Cuda); -impl_whisper_backend!(HipBackend, BackendKind::Hip); - -impl TranscribeBackend for VulkanBackend { - fn kind(&self) -> BackendKind { - BackendKind::Vulkan - } - fn transcribe( - &self, - _audio_path: &Path, - _speaker: &str, - _language: Option<&str>, - _gpu_layers: Option, - _progress: Option<&(dyn Fn(i32) + Send + Sync)>, - ) -> Result> { - Err(anyhow!( - "Vulkan backend not yet wired to whisper.cpp FFI. Build with --features gpu-vulkan and ensure Vulkan SDK is installed. How to fix: install Vulkan loader (libvulkan), set VULKAN_SDK, and run cargo build --features gpu-vulkan." - )) - } -} - -/// Result of choosing a transcription backend. -pub struct SelectionResult { - /// The constructed backend instance to perform transcription with. - pub backend: Box, - /// Which backend kind was ultimately selected. - pub chosen: BackendKind, - /// Which backend kinds were detected as available on this system. - pub detected: Vec, -} - -/// Select an appropriate backend based on user request and system detection. -/// -/// If `requested` is `BackendKind::Auto`, the function prefers CUDA, then HIP, -/// then Vulkan, falling back to CPU when no GPU backend is detected. When a -/// specific GPU backend is requested but unavailable, an error is returned with -/// guidance on how to enable it. -/// -/// Set `verbose` to true to print detection/selection info to stderr. -pub fn select_backend(requested: BackendKind, verbose: bool) -> Result { - let mut detected = Vec::new(); - if cuda_available() { - detected.push(BackendKind::Cuda); - } - if hip_available() { - detected.push(BackendKind::Hip); - } - if vulkan_available() { - detected.push(BackendKind::Vulkan); - } - - let instantiate_backend = |k: BackendKind| -> Box { - match k { - BackendKind::Cpu => Box::new(CpuBackend::default()), - BackendKind::Cuda => Box::new(CudaBackend::default()), - BackendKind::Hip => Box::new(HipBackend::default()), - BackendKind::Vulkan => Box::new(VulkanBackend::default()), - BackendKind::Auto => Box::new(CpuBackend::default()), // placeholder for Auto - } - }; - - let chosen = match requested { - BackendKind::Auto => { - if detected.contains(&BackendKind::Cuda) { - BackendKind::Cuda - } else if detected.contains(&BackendKind::Hip) { - BackendKind::Hip - } else if detected.contains(&BackendKind::Vulkan) { - BackendKind::Vulkan - } else { - BackendKind::Cpu - } - } - BackendKind::Cuda => { - if detected.contains(&BackendKind::Cuda) { - BackendKind::Cuda - } else { - return Err(anyhow!( - "Requested CUDA backend but CUDA libraries/devices not detected. How to fix: install NVIDIA driver + CUDA toolkit, ensure libcudart/libcublas are in loader path, and build with --features gpu-cuda." - )); - } - } - BackendKind::Hip => { - if detected.contains(&BackendKind::Hip) { - BackendKind::Hip - } else { - return Err(anyhow!( - "Requested ROCm/HIP backend but libraries/devices not detected. How to fix: install ROCm hipBLAS/rocBLAS, ensure libs are in loader path, and build with --features gpu-hip." - )); - } - } - BackendKind::Vulkan => { - if detected.contains(&BackendKind::Vulkan) { - BackendKind::Vulkan - } else { - return Err(anyhow!( - "Requested Vulkan backend but libvulkan not detected. How to fix: install Vulkan loader/SDK and build with --features gpu-vulkan." - )); - } - } - BackendKind::Cpu => BackendKind::Cpu, - }; - - if verbose { - crate::dlog!(1, "Detected backends: {:?}", detected); - crate::dlog!(1, "Selected backend: {:?}", chosen); - } - - Ok(SelectionResult { - backend: instantiate_backend(chosen), - chosen, - detected, - }) -} - -// Internal helper: transcription using whisper-rs with CPU/GPU (depending on build features) -#[allow(clippy::too_many_arguments)] -pub(crate) fn transcribe_with_whisper_rs( - audio_path: &Path, - speaker: &str, - language: Option<&str>, - progress: Option<&(dyn Fn(i32) + Send + Sync)>, -) -> Result> { - let report = |p: i32| { - if let Some(cb) = progress { cb(p); } - }; - report(0); - - let pcm_samples = decode_audio_to_pcm_f32_ffmpeg(audio_path)?; - report(5); - - let model_path = find_model_file()?; - let english_only_model = model_path - .file_name() - .and_then(|s| s.to_str()) - .map(|s| s.contains(".en.") || s.ends_with(".en.bin")) - .unwrap_or(false); - if let Some(lang) = language { - if english_only_model && lang != "en" { - return Err(anyhow!( - "Selected model is English-only ({}), but a non-English language hint '{}' was provided. Please use a multilingual model or set WHISPER_MODEL.", - model_path.display(), - lang - )); - } - } - let model_path_str = model_path - .to_str() - .ok_or_else(|| anyhow!("Model path not valid UTF-8: {}", model_path.display()))?; - - if crate::verbose_level() < 2 { - // Some builds of whisper/ggml expect these env vars; harmless if unknown - unsafe { - std::env::set_var("GGML_LOG_LEVEL", "0"); - std::env::set_var("WHISPER_PRINT_PROGRESS", "0"); - } - } - - let (_context, mut state) = crate::with_suppressed_stderr(|| { - let params = whisper_rs::WhisperContextParameters::default(); - let context = whisper_rs::WhisperContext::new_with_params(model_path_str, params) - .with_context(|| format!("Failed to load Whisper model at {}", model_path.display()))?; - let state = context - .create_state() - .map_err(|e| anyhow!("Failed to create Whisper state: {:?}", e))?; - Ok::<_, anyhow::Error>((context, state)) - })?; - report(20); - - let mut full_params = - whisper_rs::FullParams::new(whisper_rs::SamplingStrategy::Greedy { best_of: 1 }); - let threads = std::thread::available_parallelism() - .map(|n| n.get() as i32) - .unwrap_or(1); - full_params.set_n_threads(threads); - full_params.set_translate(false); - if let Some(lang) = language { - full_params.set_language(Some(lang)); - } - report(30); - - crate::with_suppressed_stderr(|| { - report(40); - state - .full(full_params, &pcm_samples) - .map_err(|e| anyhow!("Whisper full() failed: {:?}", e)) - })?; - - report(90); - let num_segments = state - .full_n_segments() - .map_err(|e| anyhow!("Failed to get segments: {:?}", e))?; - let mut entries = Vec::new(); - for seg_idx in 0..num_segments { - let segment_text = state - .full_get_segment_text(seg_idx) - .map_err(|e| anyhow!("Failed to get segment text: {:?}", e))?; - let t0 = state - .full_get_segment_t0(seg_idx) - .map_err(|e| anyhow!("Failed to get segment t0: {:?}", e))?; - let t1 = state - .full_get_segment_t1(seg_idx) - .map_err(|e| anyhow!("Failed to get segment t1: {:?}", e))?; - let start = (t0 as f64) * 0.01; - let end = (t1 as f64) * 0.01; - entries.push(OutputEntry { - id: 0, - speaker: speaker.to_string(), - start, - end, - text: segment_text.trim().to_string(), - }); - } - report(100); - Ok(entries) -} diff --git a/src/lib.rs b/src/lib.rs deleted file mode 100644 index f3a5bae..0000000 --- a/src/lib.rs +++ /dev/null @@ -1,571 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025 . All rights reserved. - -#![forbid(elided_lifetimes_in_paths)] -#![forbid(unused_must_use)] -#![deny(missing_docs)] -#![warn(clippy::all)] -//! PolyScribe library: business logic and core types. -//! -//! This crate exposes the reusable parts of the PolyScribe CLI as a library. -//! The binary entry point (main.rs) remains a thin CLI wrapper. - -use std::sync::atomic::{AtomicBool, AtomicU8, Ordering}; - -// Global runtime flags -static QUIET: AtomicBool = AtomicBool::new(false); -static NO_INTERACTION: AtomicBool = AtomicBool::new(false); -static VERBOSE: AtomicU8 = AtomicU8::new(0); -static NO_PROGRESS: AtomicBool = AtomicBool::new(false); - -/// Set quiet mode: when true, non-interactive logs should be suppressed. -pub fn set_quiet(enabled: bool) { - QUIET.store(enabled, Ordering::Relaxed); -} -/// Return current quiet mode state. -pub fn is_quiet() -> bool { - QUIET.load(Ordering::Relaxed) -} - -/// Set non-interactive mode: when true, interactive prompts must be skipped. -pub fn set_no_interaction(enabled: bool) { - NO_INTERACTION.store(enabled, Ordering::Relaxed); -} -/// Return current non-interactive state. -pub fn is_no_interaction() -> bool { - NO_INTERACTION.load(Ordering::Relaxed) -} - -/// Set verbose level (0 = normal, 1 = verbose, 2 = super-verbose) -pub fn set_verbose(level: u8) { - VERBOSE.store(level, Ordering::Relaxed); -} -/// Get current verbose level. -pub fn verbose_level() -> u8 { - VERBOSE.load(Ordering::Relaxed) -} - -/// Disable interactive progress indicators (bars/spinners) -pub fn set_no_progress(enabled: bool) { - NO_PROGRESS.store(enabled, Ordering::Relaxed); -} -/// Return current no-progress state -pub fn is_no_progress() -> bool { - NO_PROGRESS.load(Ordering::Relaxed) -} - -/// Check whether stdin is connected to a TTY. Used to avoid blocking prompts when not interactive. -pub fn stdin_is_tty() -> bool { - use std::io::IsTerminal as _; - std::io::stdin().is_terminal() -} - -/// A guard that temporarily redirects stderr to /dev/null on Unix when quiet mode is active. -/// No-op on non-Unix or when quiet is disabled. Restores stderr on drop. -pub struct StderrSilencer { - #[cfg(unix)] - old_stderr_fd: i32, - #[cfg(unix)] - devnull_fd: i32, - active: bool, -} - -impl StderrSilencer { - /// Activate stderr silencing if quiet is set and on Unix; otherwise returns a no-op guard. - pub fn activate_if_quiet() -> Self { - if !is_quiet() { - return Self { - active: false, - #[cfg(unix)] - old_stderr_fd: -1, - #[cfg(unix)] - devnull_fd: -1, - }; - } - Self::activate() - } - - /// Activate stderr silencing unconditionally (used internally); no-op on non-Unix. - pub fn activate() -> Self { - #[cfg(unix)] - unsafe { - let old_fd = dup(2); - if old_fd < 0 { - return Self { - active: false, - old_stderr_fd: -1, - devnull_fd: -1, - }; - } - // Open /dev/null for writing - let devnull_cstr = std::ffi::CString::new("/dev/null").unwrap(); - let devnull_fd = open(devnull_cstr.as_ptr(), O_WRONLY); - if devnull_fd < 0 { - close(old_fd); - return Self { - active: false, - old_stderr_fd: -1, - devnull_fd: -1, - }; - } - if dup2(devnull_fd, 2) < 0 { - close(devnull_fd); - close(old_fd); - return Self { - active: false, - old_stderr_fd: -1, - devnull_fd: -1, - }; - } - Self { - active: true, - old_stderr_fd: old_fd, - devnull_fd: devnull_fd, - } - } - #[cfg(not(unix))] - { - Self { active: false } - } - } -} - -impl Drop for StderrSilencer { - fn drop(&mut self) { - if !self.active { - return; - } - #[cfg(unix)] - unsafe { - let _ = dup2(self.old_stderr_fd, 2); - let _ = close(self.devnull_fd); - let _ = close(self.old_stderr_fd); - } - self.active = false; - } -} - -/// Run a closure while temporarily suppressing stderr on Unix when appropriate. -/// On Windows/non-Unix, this is a no-op wrapper. -/// This helper uses RAII + panic catching to ensure restoration before resuming panic. -pub fn with_suppressed_stderr(f: F) -> T -where - F: FnOnce() -> T, -{ - // Suppress noisy native logs unless super-verbose (-vv) is enabled. - if verbose_level() < 2 { - let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { - let _guard = StderrSilencer::activate(); - f() - })); - match result { - Ok(value) => value, - Err(panic_payload) => std::panic::resume_unwind(panic_payload), - } - } else { - f() - } -} - -/// Centralized UI helpers (TTY-aware, quiet/verbose-aware) -pub mod ui; - -/// Logging macros and helpers -/// Log an error using the UI helper (always printed). Recommended for user-visible errors. -#[macro_export] -macro_rules! elog { - ($($arg:tt)*) => {{ - $crate::ui::error(format!($($arg)*)); - }} -} - -/// Log a warning using the UI helper (printed even in quiet mode). -#[macro_export] -macro_rules! wlog { - ($($arg:tt)*) => {{ - $crate::ui::warn(format!($($arg)*)); - }} -} - -/// Log an informational line using the UI helper unless quiet mode is enabled. -#[macro_export] -macro_rules! ilog { - ($($arg:tt)*) => {{ - if !$crate::is_quiet() { $crate::ui::info(format!($($arg)*)); } - }} -} - -/// Log a debug/trace line when verbose level is at least the given level (u8). -#[macro_export] -macro_rules! dlog { - ($lvl:expr, $($arg:tt)*) => {{ - if !$crate::is_quiet() && $crate::verbose_level() >= $lvl { $crate::ui::info(format!("DEBUG{}: {}", $lvl, format!($($arg)*))); } - }} -} - -/// Backward-compatibility: map old qlog! to ilog! -#[macro_export] -macro_rules! qlog { - ($($arg:tt)*) => {{ $crate::ilog!($($arg)*); }} -} - -use anyhow::{Context, Result, anyhow}; -use chrono::Local; -use std::env; -use std::fs::create_dir_all; -use std::path::{Path, PathBuf}; -use std::process::Command; - -#[cfg(unix)] -use libc::{O_WRONLY, close, dup, dup2, open}; - -/// Re-export backend module (GPU/CPU selection and transcription). -pub mod backend; -/// Re-export models module (model listing/downloading/updating). -pub mod models; - -/// Transcript entry for a single segment. -#[derive(Debug, serde::Serialize, Clone)] -pub struct OutputEntry { - /// Sequential id in output ordering. - pub id: u64, - /// Speaker label associated with the segment. - pub speaker: String, - /// Start time in seconds. - pub start: f64, - /// End time in seconds. - pub end: f64, - /// Text content. - pub text: String, -} - -/// Return a YYYY-MM-DD date prefix string for output file naming. -pub fn date_prefix() -> String { - Local::now().format("%Y-%m-%d").to_string() -} - -/// Format a floating-point number of seconds as SRT timestamp (HH:MM:SS,mmm). -pub fn format_srt_time(seconds: f64) -> String { - let total_ms = (seconds * 1000.0).round() as i64; - let ms = total_ms % 1000; - let total_secs = total_ms / 1000; - let sec = total_secs % 60; - let min = (total_secs / 60) % 60; - let hour = total_secs / 3600; - format!("{hour:02}:{min:02}:{sec:02},{ms:03}") -} - -/// Render a list of transcript entries to SRT format. -pub fn render_srt(entries: &[OutputEntry]) -> String { - let mut srt = String::new(); - for (index, entry) in entries.iter().enumerate() { - let srt_index = index + 1; - srt.push_str(&format!("{srt_index}\n")); - srt.push_str(&format!( - "{} --> {}\n", - format_srt_time(entry.start), - format_srt_time(entry.end) - )); - if !entry.speaker.is_empty() { - srt.push_str(&format!("{}: {}\n", entry.speaker, entry.text)); - } else { - srt.push_str(&format!("{}\n", entry.text)); - } - srt.push('\n'); - } - srt -} - -/// Determine the default models directory, honoring POLYSCRIBE_MODELS_DIR override. -pub fn models_dir_path() -> PathBuf { - if let Ok(env_val) = env::var("POLYSCRIBE_MODELS_DIR") { - let env_path = PathBuf::from(env_val); - if !env_path.as_os_str().is_empty() { - return env_path; - } - } - if cfg!(debug_assertions) { - return PathBuf::from("models"); - } - if let Ok(xdg) = env::var("XDG_DATA_HOME") { - if !xdg.is_empty() { - return PathBuf::from(xdg).join("polyscribe").join("models"); - } - } - if let Ok(home) = env::var("HOME") { - if !home.is_empty() { - return PathBuf::from(home) - .join(".local") - .join("share") - .join("polyscribe") - .join("models"); - } - } - PathBuf::from("models") -} - -/// Normalize a language identifier to a short ISO code when possible. -pub fn normalize_lang_code(input: &str) -> Option { - let mut lang = input.trim().to_lowercase(); - if lang.is_empty() || lang == "auto" || lang == "c" || lang == "posix" { - return None; - } - if let Some((prefix, _)) = lang.split_once('.') { - lang = prefix.to_string(); - } - if let Some((prefix, _)) = lang.split_once('_') { - lang = prefix.to_string(); - } - let code = match lang.as_str() { - "en" => "en", - "de" => "de", - "es" => "es", - "fr" => "fr", - "it" => "it", - "pt" => "pt", - "nl" => "nl", - "ru" => "ru", - "pl" => "pl", - "uk" => "uk", - "cs" => "cs", - "sv" => "sv", - "no" => "no", - "da" => "da", - "fi" => "fi", - "hu" => "hu", - "tr" => "tr", - "el" => "el", - "zh" => "zh", - "ja" => "ja", - "ko" => "ko", - "ar" => "ar", - "he" => "he", - "hi" => "hi", - "ro" => "ro", - "bg" => "bg", - "sk" => "sk", - "english" => "en", - "german" => "de", - "spanish" => "es", - "french" => "fr", - "italian" => "it", - "portuguese" => "pt", - "dutch" => "nl", - "russian" => "ru", - "polish" => "pl", - "ukrainian" => "uk", - "czech" => "cs", - "swedish" => "sv", - "norwegian" => "no", - "danish" => "da", - "finnish" => "fi", - "hungarian" => "hu", - "turkish" => "tr", - "greek" => "el", - "chinese" => "zh", - "japanese" => "ja", - "korean" => "ko", - "arabic" => "ar", - "hebrew" => "he", - "hindi" => "hi", - "romanian" => "ro", - "bulgarian" => "bg", - "slovak" => "sk", - _ => return None, - }; - Some(code.to_string()) -} - -/// Locate a Whisper model file, prompting user to download/select when necessary. -pub fn find_model_file() -> Result { - let models_dir_buf = models_dir_path(); - let models_dir = models_dir_buf.as_path(); - if !models_dir.exists() { - create_dir_all(models_dir).with_context(|| { - format!( - "Failed to create models directory: {}", - models_dir.display() - ) - })?; - } - - if let Ok(env_model) = env::var("WHISPER_MODEL") { - let model_path = PathBuf::from(env_model); - if model_path.is_file() { - let _ = std::fs::write(models_dir.join(".last_model"), model_path.display().to_string()); - return Ok(model_path); - } - } - - // Non-interactive mode: automatic selection and optional download - if crate::is_no_interaction() { - if let Some(local) = crate::models::pick_best_local_model(models_dir) { - let _ = std::fs::write(models_dir.join(".last_model"), local.display().to_string()); - return Ok(local); - } else { - ilog!("No local models found; downloading large-v3-turbo-q8_0..."); - let path = crate::models::ensure_model_available_noninteractive("large-v3-turbo-q8_0") - .with_context(|| "Failed to download required model 'large-v3-turbo-q8_0'")?; - let _ = std::fs::write(models_dir.join(".last_model"), path.display().to_string()); - return Ok(path); - } - } - - let mut candidates: Vec = Vec::new(); - let dir_entries = std::fs::read_dir(models_dir) - .with_context(|| format!("Failed to read models directory: {}", models_dir.display()))?; - for entry in dir_entries { - let entry = entry?; - let path = entry.path(); - if path.is_file() { - if let Some(ext) = path - .extension() - .and_then(|s| s.to_str()) - .map(|s| s.to_lowercase()) - { - if ext == "bin" { - candidates.push(path); - } - } - } - } - - if candidates.is_empty() { - // No models found: prompt interactively (TTY only) - wlog!( - "{}", - format!( - "No Whisper model files (*.bin) found in {}.", - models_dir.display() - ) - ); - if crate::is_no_interaction() || !crate::stdin_is_tty() { - return Err(anyhow!( - "No models available and interactive mode is disabled. Please set WHISPER_MODEL or run with --download-models." - )); - } - let input = crate::ui::prompt_line("Would you like to download models now? [Y/n]: ").unwrap_or_default(); - let answer = input.trim().to_lowercase(); - if answer.is_empty() || answer == "y" || answer == "yes" { - if let Err(e) = models::run_interactive_model_downloader() { - elog!("Downloader failed: {:#}", e); - } - candidates.clear(); - let dir_entries2 = std::fs::read_dir(models_dir).with_context(|| { - format!("Failed to read models directory: {}", models_dir.display()) - })?; - for entry in dir_entries2 { - let entry = entry?; - let path = entry.path(); - if path.is_file() { - if let Some(ext) = path - .extension() - .and_then(|s| s.to_str()) - .map(|s| s.to_lowercase()) - { - if ext == "bin" { - candidates.push(path); - } - } - } - } - } - } - - if candidates.is_empty() { - return Err(anyhow!( - "No Whisper model files (*.bin) available in {}", - models_dir.display() - )); - } - - if candidates.len() == 1 { - let only_model = candidates.remove(0); - let _ = std::fs::write(models_dir.join(".last_model"), only_model.display().to_string()); - return Ok(only_model); - } - - let last_file = models_dir.join(".last_model"); - if let Ok(previous_content) = std::fs::read_to_string(&last_file) { - let previous_content = previous_content.trim(); - if !previous_content.is_empty() { - let previous_path = PathBuf::from(previous_content); - if previous_path.is_file() && candidates.iter().any(|c| c == &previous_path) { - return Ok(previous_path); - } - } - } - - crate::ui::println_above_bars(format!("Multiple Whisper models found in {}:", models_dir.display())); - for (index, path) in candidates.iter().enumerate() { - crate::ui::println_above_bars(format!(" {}) {}", index + 1, path.display())); - } - let input = crate::ui::prompt_line(&format!("Select model by number [1-{}]: ", candidates.len())) - .map_err(|_| anyhow!("Failed to read selection"))?; - let selection: usize = input - .trim() - .parse() - .map_err(|_| anyhow!("Invalid selection: {}", input.trim()))?; - if selection == 0 || selection > candidates.len() { - return Err(anyhow!("Selection out of range")); - } - let chosen = candidates.swap_remove(selection - 1); - let _ = std::fs::write(models_dir.join(".last_model"), chosen.display().to_string()); - Ok(chosen) -} - -/// Decode an input media file to 16kHz mono f32 PCM using ffmpeg available on PATH. -pub fn decode_audio_to_pcm_f32_ffmpeg(audio_path: &Path) -> Result> { - let output = match Command::new("ffmpeg") - .arg("-i") - .arg(audio_path) - .arg("-f") - .arg("f32le") - .arg("-ac") - .arg("1") - .arg("-ar") - .arg("16000") - .arg("pipe:1") - .output() - { - Ok(o) => o, - Err(e) => { - if e.kind() == std::io::ErrorKind::NotFound { - return Err(anyhow!( - "ffmpeg not found on PATH. Please install ffmpeg and ensure it is available." - )); - } else { - return Err(anyhow!( - "Failed to execute ffmpeg for {}: {}", - audio_path.display(), - e - )); - } - } - }; - if !output.status.success() { - let stderr_str = String::from_utf8_lossy(&output.stderr); - return Err(anyhow!( - "Failed to decode audio from {} using ffmpeg. This may indicate the file is not a valid or supported audio/video file, is corrupted, or cannot be opened. ffmpeg stderr: {}", - audio_path.display(), - stderr_str.trim() - )); - } - let data = output.stdout; - if data.len() % 4 != 0 { - let truncated = data.len() - (data.len() % 4); - let mut samples = Vec::with_capacity(truncated / 4); - for chunk in data[..truncated].chunks_exact(4) { - let arr = [chunk[0], chunk[1], chunk[2], chunk[3]]; - samples.push(f32::from_le_bytes(arr)); - } - Ok(samples) - } else { - let mut samples = Vec::with_capacity(data.len() / 4); - for chunk in data.chunks_exact(4) { - let arr = [chunk[0], chunk[1], chunk[2], chunk[3]]; - samples.push(f32::from_le_bytes(arr)); - } - Ok(samples) - } -} diff --git a/src/main.rs b/src/main.rs deleted file mode 100644 index 3d7b1ba..0000000 --- a/src/main.rs +++ /dev/null @@ -1,483 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025 . All rights reserved. - -use std::fs::{File, create_dir_all}; -use std::io::{self, Read, Write}; -use std::path::{Path, PathBuf}; - -use anyhow::{Context, Result, anyhow}; -use clap::{Parser, Subcommand, ValueEnum, CommandFactory}; -use clap_complete::Shell; -use serde::{Deserialize, Serialize}; - -use polyscribe::{OutputEntry, date_prefix, normalize_lang_code, render_srt}; - -#[derive(Subcommand, Debug, Clone)] -enum AuxCommands { - Completions { - #[arg(value_enum)] - shell: Shell, - }, - Man, -} - -#[derive(ValueEnum, Debug, Clone, Copy)] -#[value(rename_all = "kebab-case")] -enum GpuBackendCli { - Auto, - Cpu, - Cuda, - Hip, - Vulkan, -} - -#[derive(Parser, Debug)] -#[command( - name = "PolyScribe", - bin_name = "polyscribe", - version, - about = "Merge JSON transcripts or transcribe audio using native whisper" -)] -struct Args { - /// Increase verbosity (-v, -vv). Repeat to increase. - /// Debug logs appear with -v; very verbose with -vv. Logs go to stderr. - #[arg(short = 'v', long = "verbose", action = clap::ArgAction::Count, global = true)] - verbose: u8, - - /// Quiet mode: suppress non-error logging on stderr (overrides -v) - /// Does not suppress interactive prompts or stdout output. - #[arg(short = 'q', long = "quiet", global = true)] - quiet: bool, - - /// Non-interactive mode: never prompt; use defaults instead. - #[arg(long = "no-interaction", global = true)] - no_interaction: bool, - - /// Disable interactive progress indicators (bars/spinners) - #[arg(long = "no-progress", global = true)] - no_progress: bool, - - /// Optional auxiliary subcommands (completions, man) - #[command(subcommand)] - aux: Option, - - /// Input .json transcript files or audio files to merge/transcribe - inputs: Vec, - - /// Output file path base or directory (date prefix added). - /// In merge mode: base path. - /// In separate mode: directory. - /// If omitted: prints JSON to stdout for merge mode; separate mode requires directory for multiple inputs. - #[arg(short, long, value_name = "FILE")] - output: Option, - - /// Merge all inputs into a single output; if not set, each input is written as a separate output - #[arg(short = 'm', long = "merge")] - merge: bool, - - /// Merge and also write separate outputs per input; requires -o OUTPUT_DIR - #[arg(long = "merge-and-separate")] - merge_and_separate: bool, - - /// Prompt for speaker names per input file - #[arg(long = "set-speaker-names")] - set_speaker_names: bool, - - /// Language code to use for transcription (e.g., en, de). No auto-detection. - #[arg(short, long, value_name = "LANG")] - language: Option, - - /// Launch interactive model downloader (list HF models, multi-select and download) - #[arg(long)] - download_models: bool, - - /// Update local Whisper models by comparing hashes/sizes with remote manifest - #[arg(long)] - update_models: bool, -} - -#[derive(Debug, Deserialize)] -struct InputRoot { - #[serde(default)] - segments: Vec, -} - -#[derive(Debug, Deserialize)] -struct InputSegment { - start: f64, - end: f64, - text: String, -} - -#[derive(Debug, Serialize)] -struct OutputRoot { - items: Vec, -} - -fn is_json_file(path: &Path) -> bool { - matches!(path.extension().and_then(|s| s.to_str()).map(|s| s.to_lowercase()), Some(ext) if ext == "json") -} - -fn is_audio_file(path: &Path) -> bool { - if let Some(ext) = path.extension().and_then(|s| s.to_str()).map(|s| s.to_lowercase()) { - let exts = [ - "mp3", "wav", "m4a", "mp4", "aac", "flac", "ogg", "wma", "webm", "mkv", "mov", "avi", - "m4b", "3gp", "opus", "aiff", "alac", - ]; - return exts.contains(&ext.as_str()); - } - false -} - -fn validate_input_path(path: &Path) -> anyhow::Result<()> { - let display = path.display(); - if !path.exists() { - return Err(anyhow!("Input not found: {}", display)); - } - let metadata = std::fs::metadata(path).with_context(|| format!("Failed to stat input: {}", display))?; - if metadata.is_dir() { - return Err(anyhow!("Input is a directory (expected a file): {}", display)); - } - std::fs::File::open(path) - .with_context(|| format!("Failed to open input file: {}", display)) - .map(|_| ()) -} - -fn sanitize_speaker_name(raw: &str) -> String { - if let Some((prefix, rest)) = raw.split_once('-') { - if !prefix.is_empty() && prefix.chars().all(|c| c.is_ascii_digit()) { - return rest.to_string(); - } - } - raw.to_string() -} - -fn prompt_speaker_name_for_path( - _path: &Path, - default_name: &str, - enabled: bool, -) -> String { - if !enabled || polyscribe::is_no_interaction() { - return sanitize_speaker_name(default_name); - } - // TODO implement cliclack for this - let mut input_line = String::new(); - match std::io::stdin().read_line(&mut input_line) { - Ok(_) => { - let trimmed = input_line.trim(); - if trimmed.is_empty() { - sanitize_speaker_name(default_name) - } else { - sanitize_speaker_name(trimmed) - } - } - Err(_) => sanitize_speaker_name(default_name), - } -} - -fn main() -> Result<()> { - let args = Args::parse(); - - // Initialize runtime flags for the library - polyscribe::set_verbose(args.verbose); - polyscribe::set_quiet(args.quiet); - polyscribe::set_no_interaction(args.no_interaction); - polyscribe::set_no_progress(args.no_progress); - - // Handle aux subcommands - if let Some(aux) = &args.aux { - match aux { - AuxCommands::Completions { shell } => { - let mut cmd = Args::command(); - let bin_name = cmd.get_name().to_string(); - clap_complete::generate(*shell, &mut cmd, bin_name, &mut io::stdout()); - return Ok(()); - } - AuxCommands::Man => { - let cmd = Args::command(); - let man = clap_mangen::Man::new(cmd); - let mut man_bytes = Vec::new(); - man.render(&mut man_bytes)?; - io::stdout().write_all(&man_bytes)?; - return Ok(()); - } - } - } - - // Optional model management actions - if args.download_models { - if let Err(err) = polyscribe::models::run_interactive_model_downloader() { - polyscribe::elog!("Model downloader failed: {:#}", err); - } - if args.inputs.is_empty() { - return Ok(()) - } - } - if args.update_models { - if let Err(err) = polyscribe::models::update_local_models() { - polyscribe::elog!("Model update failed: {:#}", err); - return Err(err); - } - if args.inputs.is_empty() { - return Ok(()) - } - } - - // Process inputs - let mut inputs = args.inputs; - if inputs.is_empty() { - return Err(anyhow!("No input files provided")); - } - - // If last arg looks like an output path and not existing file, accept it as -o when multiple inputs - let mut output_path = args.output; - if output_path.is_none() && inputs.len() >= 2 { - if let Some(candidate_output) = inputs.last().cloned() { - if !Path::new(&candidate_output).exists() { - inputs.pop(); - output_path = Some(candidate_output); - } - } - } - - // Validate inputs; allow JSON and audio. For audio, require --language. - for input_arg in &inputs { - let path_ref = Path::new(input_arg); - validate_input_path(path_ref)?; - if !(is_json_file(path_ref) || is_audio_file(path_ref)) { - return Err(anyhow!( - "Unsupported input type (expected .json transcript or audio media): {}", - path_ref.display() - )); - } - if is_audio_file(path_ref) && args.language.is_none() { - return Err(anyhow!("Please specify --language (e.g., --language en). Language detection was removed.")); - } - } - - // Derive speakers (prompt if requested) - let speakers: Vec = inputs - .iter() - .map(|input_path| { - let path = Path::new(input_path); - let default_speaker = sanitize_speaker_name( - path.file_stem().and_then(|s| s.to_str()).unwrap_or("speaker"), - ); - prompt_speaker_name_for_path(path, &default_speaker, args.set_speaker_names) - }) - .collect(); - - // MERGE-AND-SEPARATE mode - if args.merge_and_separate { - polyscribe::dlog!(1, "Mode: merge-and-separate; output_dir={:?}", output_path); - let out_dir = match output_path.as_ref() { - Some(p) => PathBuf::from(p), - None => return Err(anyhow!("--merge-and-separate requires -o OUTPUT_DIR")), - }; - if !out_dir.as_os_str().is_empty() { - create_dir_all(&out_dir).with_context(|| { - format!("Failed to create output directory: {}", out_dir.display()) - })?; - } - - let mut merged_entries: Vec = Vec::new(); - for (idx, input_path) in inputs.iter().enumerate() { - let path = Path::new(input_path); - let speaker = speakers[idx].clone(); - // Decide based on input type (JSON transcript vs audio to transcribe) - // TODO remove duplicate - let mut entries: Vec = if is_json_file(path) { - let mut buf = String::new(); - File::open(path) - .with_context(|| format!("Failed to open: {input_path}"))? - .read_to_string(&mut buf) - .with_context(|| format!("Failed to read: {input_path}"))?; - let root: InputRoot = serde_json::from_str(&buf) - .with_context(|| format!("Invalid JSON transcript parsed from {input_path}"))?; - root - .segments - .into_iter() - .map(|seg| OutputEntry { id: 0, speaker: speaker.clone(), start: seg.start, end: seg.end, text: seg.text }) - .collect() - } else { - let lang_norm: Option = args.language.as_deref().and_then(|s| normalize_lang_code(s)); - let selected_backend = polyscribe::backend::select_backend(polyscribe::backend::BackendKind::Auto, args.verbose > 0)?; - selected_backend.backend.transcribe(path, &speaker, lang_norm.as_deref(), None, None)? - }; - // Sort and id per-file - // TODO remove duplicate - entries.sort_by(|a, b| a.start.partial_cmp(&b.start).unwrap_or(std::cmp::Ordering::Equal) - .then(a.end.partial_cmp(&b.end).unwrap_or(std::cmp::Ordering::Equal))); - for (i, entry) in entries.iter_mut().enumerate() { entry.id = i as u64; } - // Write per-file outputs - let stem = path.file_stem().and_then(|s| s.to_str()).unwrap_or("output"); - let date = date_prefix(); - let base_name = format!("{date}_{stem}"); - let json_path = out_dir.join(format!("{}.json", &base_name)); - let toml_path = out_dir.join(format!("{}.toml", &base_name)); - let srt_path = out_dir.join(format!("{}.srt", &base_name)); - - let output_bundle = OutputRoot { items: entries.clone() }; - let mut json_file = File::create(&json_path).with_context(|| format!("Failed to create output file: {}", json_path.display()))?; - serde_json::to_writer_pretty(&mut json_file, &output_bundle)?; writeln!(&mut json_file)?; - let toml_str = toml::to_string_pretty(&output_bundle)?; - let mut toml_file = File::create(&toml_path).with_context(|| format!("Failed to create output file: {}", toml_path.display()))?; - toml_file.write_all(toml_str.as_bytes())?; if !toml_str.ends_with('\n') { writeln!(&mut toml_file)?; } - let srt_str = render_srt(&output_bundle.items); - let mut srt_file = File::create(&srt_path).with_context(|| format!("Failed to create output file: {}", srt_path.display()))?; - srt_file.write_all(srt_str.as_bytes())?; - - merged_entries.extend(output_bundle.items.into_iter()); - } - // Write merged outputs into out_dir - // TODO remove duplicate - merged_entries.sort_by(|a, b| a.start.partial_cmp(&b.start).unwrap_or(std::cmp::Ordering::Equal) - .then(a.end.partial_cmp(&b.end).unwrap_or(std::cmp::Ordering::Equal))); - for (index, entry) in merged_entries.iter_mut().enumerate() { entry.id = index as u64; } - let merged_output = OutputRoot { items: merged_entries }; - let date = date_prefix(); - let merged_base = format!("{date}_merged"); - let merged_json_path = out_dir.join(format!("{}.json", &merged_base)); - let merged_toml_path = out_dir.join(format!("{}.toml", &merged_base)); - let merged_srt_path = out_dir.join(format!("{}.srt", &merged_base)); - let mut merged_json_file = File::create(&merged_json_path).with_context(|| format!("Failed to create output file: {}", merged_json_path.display()))?; - serde_json::to_writer_pretty(&mut merged_json_file, &merged_output)?; writeln!(&mut merged_json_file)?; - let merged_toml_str = toml::to_string_pretty(&merged_output)?; - let mut merged_toml_file = File::create(&merged_toml_path).with_context(|| format!("Failed to create output file: {}", merged_toml_path.display()))?; - merged_toml_file.write_all(merged_toml_str.as_bytes())?; if !merged_toml_str.ends_with('\n') { writeln!(&mut merged_toml_file)?; } - let merged_srt_str = render_srt(&merged_output.items); - let mut merged_srt_file = File::create(&merged_srt_path).with_context(|| format!("Failed to create output file: {}", merged_srt_path.display()))?; - merged_srt_file.write_all(merged_srt_str.as_bytes())?; - return Ok(()); - } - - // MERGE mode - if args.merge { - polyscribe::dlog!(1, "Mode: merge; output_base={:?}", output_path); - let mut entries: Vec = Vec::new(); - for (index, input_path) in inputs.iter().enumerate() { - let path = Path::new(input_path); - let speaker = speakers[index].clone(); - if is_json_file(path) { - let mut buf = String::new(); - File::open(path) - .with_context(|| format!("Failed to open: {}", input_path))? - .read_to_string(&mut buf) - .with_context(|| format!("Failed to read: {}", input_path))?; - let root: InputRoot = serde_json::from_str(&buf) - .with_context(|| format!("Invalid JSON transcript parsed from {}", input_path))?; - for seg in root.segments { - entries.push(OutputEntry { id: 0, speaker: speaker.clone(), start: seg.start, end: seg.end, text: seg.text }); - } - } else { - let lang_norm: Option = args.language.as_deref().and_then(|s| normalize_lang_code(s)); - let selected_backend = polyscribe::backend::select_backend(polyscribe::backend::BackendKind::Auto, args.verbose > 0)?; - let mut new_entries = selected_backend.backend.transcribe(path, &speaker, lang_norm.as_deref(), None, None)?; - entries.append(&mut new_entries); - } - } - // TODO remove duplicate - entries.sort_by(|a, b| a.start.partial_cmp(&b.start).unwrap_or(std::cmp::Ordering::Equal) - .then(a.end.partial_cmp(&b.end).unwrap_or(std::cmp::Ordering::Equal))); - for (i, entry) in entries.iter_mut().enumerate() { entry.id = i as u64; } - let output_bundle = OutputRoot { items: entries }; - - if let Some(path) = output_path { - let base_path = Path::new(&path); - let parent_opt = base_path.parent(); - if let Some(parent) = parent_opt { - if !parent.as_os_str().is_empty() { - create_dir_all(parent).with_context(|| { - format!("Failed to create parent directory for output: {}", parent.display()) - })?; - } - } - let stem = base_path.file_stem().and_then(|s| s.to_str()).unwrap_or("output"); - let date = date_prefix(); - let base_name = format!("{}_{}", date, stem); - let dir = parent_opt.unwrap_or(Path::new("")); - let json_path = dir.join(format!("{}.json", &base_name)); - let toml_path = dir.join(format!("{}.toml", &base_name)); - let srt_path = dir.join(format!("{}.srt", &base_name)); - - let mut json_file = File::create(&json_path).with_context(|| format!("Failed to create output file: {}", json_path.display()))?; - serde_json::to_writer_pretty(&mut json_file, &output_bundle)?; writeln!(&mut json_file)?; - let toml_str = toml::to_string_pretty(&output_bundle)?; - let mut toml_file = File::create(&toml_path).with_context(|| format!("Failed to create output file: {}", toml_path.display()))?; - toml_file.write_all(toml_str.as_bytes())?; if !toml_str.ends_with('\n') { writeln!(&mut toml_file)?; } - let srt_str = render_srt(&output_bundle.items); - let mut srt_file = File::create(&srt_path).with_context(|| format!("Failed to create output file: {}", srt_path.display()))?; - srt_file.write_all(srt_str.as_bytes())?; - } else { - let stdout = io::stdout(); - let mut handle = stdout.lock(); - serde_json::to_writer_pretty(&mut handle, &output_bundle)?; writeln!(&mut handle)?; - } - return Ok(()); - } - - // SEPARATE (default) - polyscribe::dlog!(1, "Mode: separate; output_dir={:?}", output_path); - if output_path.is_none() && inputs.len() > 1 { - return Err(anyhow!("Multiple inputs without --merge require -o OUTPUT_DIR to write separate files")); - } - let out_dir: Option = output_path.as_ref().map(PathBuf::from); - if let Some(dir) = &out_dir { - if !dir.as_os_str().is_empty() { - create_dir_all(dir).with_context(|| format!("Failed to create output directory: {}", dir.display()))?; - } - } - - for (index, input_path) in inputs.iter().enumerate() { - let path = Path::new(input_path); - let speaker = speakers[index].clone(); - // TODO remove duplicate - let mut entries: Vec = if is_json_file(path) { - let mut buf = String::new(); - File::open(path) - .with_context(|| format!("Failed to open: {input_path}"))? - .read_to_string(&mut buf) - .with_context(|| format!("Failed to read: {input_path}"))?; - let root: InputRoot = serde_json::from_str(&buf).with_context(|| format!("Invalid JSON transcript parsed from {input_path}"))?; - root - .segments - .into_iter() - .map(|seg| OutputEntry { id: 0, speaker: speaker.clone(), start: seg.start, end: seg.end, text: seg.text }) - .collect() - } else { - // Audio file: transcribe to entries - let lang_norm: Option = args.language.as_deref().and_then(|s| normalize_lang_code(s)); - let selected_backend = polyscribe::backend::select_backend(polyscribe::backend::BackendKind::Auto, args.verbose > 0)?; - selected_backend.backend.transcribe(path, &speaker, lang_norm.as_deref(), None, None)? - }; - // TODO remove duplicate - entries.sort_by(|a, b| a.start.partial_cmp(&b.start).unwrap_or(std::cmp::Ordering::Equal) - .then(a.end.partial_cmp(&b.end).unwrap_or(std::cmp::Ordering::Equal))); - for (i, entry) in entries.iter_mut().enumerate() { entry.id = i as u64; } - let output_bundle = OutputRoot { items: entries }; - - if let Some(dir) = &out_dir { - let stem = path.file_stem().and_then(|s| s.to_str()).unwrap_or("output"); - let date = date_prefix(); - let base_name = format!("{date}_{stem}"); - let json_path = dir.join(format!("{}.json", &base_name)); - let toml_path = dir.join(format!("{}.toml", &base_name)); - let srt_path = dir.join(format!("{}.srt", &base_name)); - - let mut json_file = File::create(&json_path).with_context(|| format!("Failed to create output file: {}", json_path.display()))?; - serde_json::to_writer_pretty(&mut json_file, &output_bundle)?; writeln!(&mut json_file)?; - let toml_str = toml::to_string_pretty(&output_bundle)?; - let mut toml_file = File::create(&toml_path).with_context(|| format!("Failed to create output file: {}", toml_path.display()))?; - toml_file.write_all(toml_str.as_bytes())?; if !toml_str.ends_with('\n') { writeln!(&mut toml_file)?; } - let srt_str = render_srt(&output_bundle.items); - let mut srt_file = File::create(&srt_path).with_context(|| format!("Failed to create output file: {}", srt_path.display()))?; - srt_file.write_all(srt_str.as_bytes())?; - } else { - let stdout = io::stdout(); - let mut handle = stdout.lock(); - serde_json::to_writer_pretty(&mut handle, &output_bundle)?; writeln!(&mut handle)?; - } - } - - Ok(()) -} diff --git a/src/models.rs b/src/models.rs deleted file mode 100644 index 3cc4eb0..0000000 --- a/src/models.rs +++ /dev/null @@ -1,146 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025 . All rights reserved. - -//! Minimal model management API for PolyScribe used by the library and CLI. -//! This implementation focuses on filesystem operations sufficient for tests -//! and basic non-interactive workflows. It can be extended later to support -//! remote discovery and verification. - -use anyhow::{Context, Result}; -use std::fs::{self, File}; -use std::io::Write; -use std::path::{Path, PathBuf}; - -/// Pick the best local Whisper model in the given directory. -/// -/// Heuristic: choose the largest .bin file by size. Returns None if none found. -pub fn pick_best_local_model(dir: &Path) -> Option { - let rd = fs::read_dir(dir).ok()?; - rd.flatten() - .map(|e| e.path()) - .filter(|p| p.is_file() && p.extension().and_then(|s| s.to_str()).is_some_and(|s| s.eq_ignore_ascii_case("bin"))) - .filter_map(|p| fs::metadata(&p).ok().map(|md| (md.len(), p))) - .max_by_key(|(sz, _)| *sz) - .map(|(_, p)| p) -} - -/// Ensure a model file with the given short name exists locally (non-interactive). -/// -/// This stub creates an empty file named `.bin` inside the models dir if it -/// does not yet exist, and returns its path. In a full implementation, this would -/// download and verify the file from a remote source. -pub fn ensure_model_available_noninteractive(name: &str) -> Result { - let models_dir = crate::models_dir_path(); - if !models_dir.exists() { - fs::create_dir_all(&models_dir).with_context(|| { - format!("Failed to create models dir: {}", models_dir.display()) - })?; - } - let filename = if name.ends_with(".bin") { name.to_string() } else { format!("{}.bin", name) }; - let path = models_dir.join(filename); - if !path.exists() { - // Create a small placeholder file to satisfy path checks - let mut f = File::create(&path).with_context(|| format!("Failed to create model file: {}", path.display()))?; - // Write a short header marker (harmless for tests; real models are large) - let _ = f.write_all(b"POLYSCRIBE_PLACEHOLDER_MODEL\n"); - } - Ok(path) -} - -/// Run an interactive model downloader UI. -/// -/// Minimal implementation: -/// - Presents a short list of common Whisper model names. -/// - Prompts the user to select models by comma-separated indices. -/// - Ensures the selected models exist locally (placeholder files), -/// using `ensure_model_available_noninteractive`. -/// - Respects --no-interaction by returning early with an info message. -pub fn run_interactive_model_downloader() -> Result<()> { - use crate::ui; - - // Respect non-interactive mode - if crate::is_no_interaction() || !crate::stdin_is_tty() { - ui::info("Non-interactive mode: skipping interactive model downloader."); - return Ok(()); - } - - // Available models (ordered from small to large). In a full implementation, - // this would come from a remote manifest. - let available = vec![ - ("tiny.en", "English-only tiny model (~75 MB)"), - ("tiny", "Multilingual tiny model (~75 MB)"), - ("base.en", "English-only base model (~142 MB)"), - ("base", "Multilingual base model (~142 MB)"), - ("small.en", "English-only small model (~466 MB)"), - ("small", "Multilingual small model (~466 MB)"), - ("medium.en", "English-only medium model (~1.5 GB)"), - ("medium", "Multilingual medium model (~1.5 GB)"), - ("large-v2", "Multilingual large v2 (~3.1 GB)"), - ("large-v3", "Multilingual large v3 (~3.1 GB)"), - ("large-v3-turbo", "Multilingual large v3 turbo (~1.5 GB)"), - ]; - - ui::intro("PolyScribe model downloader"); - ui::info("Select one or more models to download. Enter comma-separated numbers (e.g., 1,3,4). Press Enter to accept default [1]."); - ui::println_above_bars("Available models:"); - for (i, (name, desc)) in available.iter().enumerate() { - ui::println_above_bars(format!(" {}. {:<16} – {}", i + 1, name, desc)); - } - - let answer = ui::prompt_input("Your selection", Some("1"))?; - let selection_raw = match answer { - Some(s) => s.trim().to_string(), - None => "1".to_string(), - }; - let selection = if selection_raw.is_empty() { "1" } else { &selection_raw }; - - // Parse indices - use std::collections::BTreeSet; - let mut picked_set: BTreeSet = BTreeSet::new(); - for part in selection.split([',', ' ', ';']) { - let t = part.trim(); - if t.is_empty() { continue; } - match t.parse::() { - Ok(n) if (1..=available.len()).contains(&n) => { - picked_set.insert(n - 1); - } - _ => ui::warn(format!("Ignoring invalid selection: '{}'", t)), - } - } - let mut picked_indices: Vec = picked_set.into_iter().collect(); - if picked_indices.is_empty() { - // Fallback to default first item - picked_indices.push(0); - } - - // Prepare progress (TTY-aware) - let labels: Vec = picked_indices - .iter() - .map(|&i| available[i].0.to_string()) - .collect(); - let mut pm = ui::progress::ProgressManager::default_for_files(labels.len()); - pm.init_files(&labels); - - // Ensure models exist - for (i, idx) in picked_indices.iter().enumerate() { - let (name, _desc) = available[*idx]; - if let Some(pb) = pm.per_bar(i) { - pb.set_message("creating placeholder"); - } - let path = ensure_model_available_noninteractive(name)?; - ui::println_above_bars(format!("Ready: {}", path.display())); - pm.mark_file_done(i); - } - - if let Some(total) = pm.total_bar() { total.finish_with_message("all done"); } - ui::outro("Model selection complete."); - Ok(()) -} - -/// Verify/update local models by comparing with a remote manifest. -/// -/// Stub that currently succeeds and logs a short message. -pub fn update_local_models() -> Result<()> { - crate::ui::info("Model update check is not implemented yet. Nothing to do."); - Ok(()) -} diff --git a/src/ui.rs b/src/ui.rs deleted file mode 100644 index 72de16d..0000000 --- a/src/ui.rs +++ /dev/null @@ -1,84 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025 . All rights reserved. - -//! Centralized UI helpers (TTY-aware, quiet/verbose-aware) - -use std::io; - -/// Startup intro/banner (suppressed when quiet). -pub fn intro(msg: impl AsRef) { - let _ = cliclack::intro(msg.as_ref()); -} - -/// Final outro/summary printed below any progress indicators (suppressed when quiet). -pub fn outro(msg: impl AsRef) { - let _ = cliclack::outro(msg.as_ref()); -} - -/// Info message (TTY-aware; suppressed by --quiet is handled by outer callers if needed) -pub fn info(msg: impl AsRef) { - let _ = cliclack::log::info(msg.as_ref()); -} - -/// Print a warning (always printed). -pub fn warn(msg: impl AsRef) { - // cliclack provides a warning-level log utility - let _ = cliclack::log::warning(msg.as_ref()); -} - -/// Print an error (always printed). -pub fn error(msg: impl AsRef) { - let _ = cliclack::log::error(msg.as_ref()); -} - -/// Print a line above any progress bars (maps to cliclack log; synchronized). -pub fn println_above_bars(msg: impl AsRef) { - if crate::is_quiet() { return; } - // cliclack logs are synchronized with its spinners/bars - let _ = cliclack::log::info(msg.as_ref()); -} - -/// Input prompt with a question: returns Ok(None) if non-interactive or canceled -pub fn prompt_input(question: impl AsRef, default: Option<&str>) -> anyhow::Result> { - if crate::is_no_interaction() || !crate::stdin_is_tty() { - return Ok(None); - } - let mut p = cliclack::input(question.as_ref()); - if let Some(d) = default { - // Use default_input when available in 0.3.x - p = p.default_input(d); - } - match p.interact() { - Ok(s) => Ok(Some(s)), - Err(_) => Ok(None), - } -} - -/// Confirmation prompt; returns Ok(None) if non-interactive or canceled -pub fn prompt_confirm(question: impl AsRef, default_yes: bool) -> anyhow::Result> { - if crate::is_no_interaction() || !crate::stdin_is_tty() { - return Ok(None); - } - let res = cliclack::confirm(question.as_ref()) - .initial_value(default_yes) - .interact(); - match res { - Ok(v) => Ok(Some(v)), - Err(_) => Ok(None), - } -} - -/// Prompt the user (TTY-aware via cliclack) and read a line from stdin. Returns the raw line with trailing newline removed. -pub fn prompt_line(prompt: &str) -> io::Result { - // Route prompt through cliclack to keep consistent styling and avoid direct eprint!/println! - let _ = cliclack::log::info(prompt); - let mut s = String::new(); - io::stdin().read_line(&mut s)?; - Ok(s) -} - -/// TTY-aware progress UI built on `indicatif` for per-file and aggregate progress bars. -/// -/// This small helper encapsulates a `MultiProgress` with one aggregate (total) bar and -/// one per-file bar. It is intentionally minimal to keep integration lightweight. -pub mod progress; diff --git a/src/ui/progress.rs b/src/ui/progress.rs deleted file mode 100644 index e558f75..0000000 --- a/src/ui/progress.rs +++ /dev/null @@ -1,81 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025 . All rights reserved. - -use indicatif::{MultiProgress, ProgressBar, ProgressStyle}; -use std::io::IsTerminal as _; - -/// Manages a set of per-file progress bars plus a top aggregate bar. -pub struct ProgressManager { - enabled: bool, - mp: Option, - per: Vec, - total: Option, - completed: usize, -} - -impl ProgressManager { - /// Create a new manager with the given enabled flag. - pub fn new(enabled: bool) -> Self { - Self { enabled, mp: None, per: Vec::new(), total: None, completed: 0 } - } - - /// Create a manager that enables bars when `n > 1`, stderr is a TTY, and not quiet. - pub fn default_for_files(n: usize) -> Self { - let enabled = n > 1 && std::io::stderr().is_terminal() && !crate::is_quiet() && !crate::is_no_progress(); - Self::new(enabled) - } - - /// Initialize bars for the given file labels. If disabled or single file, no-op. - pub fn init_files(&mut self, labels: &[String]) { - if !self.enabled || labels.len() <= 1 { - // No bars in single-file mode or when disabled - self.enabled = false; - return; - } - let mp = MultiProgress::new(); - // Aggregate bar at the top - let total = mp.add(ProgressBar::new(labels.len() as u64)); - total.set_style(ProgressStyle::with_template("{prefix} [{bar:40.cyan/blue}] {pos}/{len}") - .unwrap() - .progress_chars("=>-")); - total.set_prefix("Total"); - self.total = Some(total); - // Per-file bars - for label in labels { - let pb = mp.add(ProgressBar::new(100)); - pb.set_style(ProgressStyle::with_template("{prefix} [{bar:40.green/black}] {pos}% {msg}") - .unwrap() - .progress_chars("=>-")); - pb.set_position(0); - pb.set_prefix(label.clone()); - self.per.push(pb); - } - self.mp = Some(mp); - } - - /// Returns true when bars are enabled (multi-file TTY mode). - pub fn is_enabled(&self) -> bool { self.enabled } - - /// Get a clone of the per-file progress bar at index, if enabled. - pub fn per_bar(&self, idx: usize) -> Option { - if !self.enabled { return None; } - self.per.get(idx).cloned() - } - - /// Get a clone of the aggregate (total) progress bar, if enabled. - pub fn total_bar(&self) -> Option { - if !self.enabled { return None; } - self.total.as_ref().cloned() - } - - /// Mark a file as finished (set to 100% and update total counter). - pub fn mark_file_done(&mut self, idx: usize) { - if !self.enabled { return; } - if let Some(pb) = self.per.get(idx) { - pb.set_position(100); - pb.finish_with_message("done"); - } - self.completed += 1; - if let Some(total) = &self.total { total.set_position(self.completed as u64); } - } -}