// SPDX-License-Identifier: MIT // Copyright (c) 2025 . All rights reserved. #![forbid(elided_lifetimes_in_paths)] #![forbid(unused_must_use)] #![deny(missing_docs)] #![warn(clippy::all)] //! PolyScribe library: business logic and core types. //! //! This crate exposes the reusable parts of the PolyScribe CLI as a library. //! The binary entry point (main.rs) remains a thin CLI wrapper. use std::sync::atomic::{AtomicBool, AtomicU8, Ordering}; use anyhow::{anyhow, Context, Result}; use chrono::Local; use std::env; use std::path::{Path, PathBuf}; use std::process::Command; #[cfg(unix)] use libc::{O_WRONLY, close, dup, dup2, open}; /// Global runtime flags static QUIET: AtomicBool = AtomicBool::new(false); static NO_INTERACTION: AtomicBool = AtomicBool::new(false); static VERBOSE: AtomicU8 = AtomicU8::new(0); static NO_PROGRESS: AtomicBool = AtomicBool::new(false); /// Set quiet mode: when true, non-interactive logs should be suppressed. pub fn set_quiet(enabled: bool) { QUIET.store(enabled, Ordering::Relaxed); } /// Return current quiet mode state. pub fn is_quiet() -> bool { QUIET.load(Ordering::Relaxed) } /// Set non-interactive mode: when true, interactive prompts must be skipped. pub fn set_no_interaction(enabled: bool) { NO_INTERACTION.store(enabled, Ordering::Relaxed); } /// Return current non-interactive state. pub fn is_no_interaction() -> bool { NO_INTERACTION.load(Ordering::Relaxed) } /// Set verbose level (0 = normal, 1 = verbose, 2 = super-verbose) pub fn set_verbose(level: u8) { VERBOSE.store(level, Ordering::Relaxed); } /// Get current verbose level. pub fn verbose_level() -> u8 { VERBOSE.load(Ordering::Relaxed) } /// Disable interactive progress indicators (bars/spinners) pub fn set_no_progress(enabled: bool) { NO_PROGRESS.store(enabled, Ordering::Relaxed); } /// Return current no-progress state pub fn is_no_progress() -> bool { NO_PROGRESS.load(Ordering::Relaxed) } /// Check whether stdin is connected to a TTY. Used to avoid blocking prompts when not interactive. pub fn stdin_is_tty() -> bool { use std::io::IsTerminal as _; std::io::stdin().is_terminal() } /// A guard that temporarily redirects stderr to /dev/null on Unix when quiet mode is active. /// No-op on non-Unix or when quiet is disabled. Restores stderr on drop. pub struct StderrSilencer { #[cfg(unix)] old_stderr_fd: i32, #[cfg(unix)] devnull_fd: i32, active: bool, } impl StderrSilencer { /// Activate stderr silencing if quiet is set and on Unix; otherwise returns a no-op guard. pub fn activate_if_quiet() -> Self { if !is_quiet() { return Self { active: false, #[cfg(unix)] old_stderr_fd: -1, #[cfg(unix)] devnull_fd: -1, }; } Self::activate() } /// Activate stderr silencing unconditionally (used internally); no-op on non-Unix. pub fn activate() -> Self { #[cfg(unix)] unsafe { let old_fd = dup(2); if old_fd < 0 { return Self { active: false, old_stderr_fd: -1, devnull_fd: -1, }; } // Open /dev/null for writing let devnull_cstr = std::ffi::CString::new("/dev/null").unwrap(); let devnull_fd = open(devnull_cstr.as_ptr(), O_WRONLY); if devnull_fd < 0 { let _ = close(old_fd); return Self { active: false, old_stderr_fd: -1, devnull_fd: -1, }; } if dup2(devnull_fd, 2) < 0 { let _ = close(devnull_fd); let _ = close(old_fd); return Self { active: false, old_stderr_fd: -1, devnull_fd: -1, }; } Self { active: true, old_stderr_fd: old_fd, devnull_fd, } } #[cfg(not(unix))] { Self { active: false } } } } impl Drop for StderrSilencer { fn drop(&mut self) { if !self.active { return; } #[cfg(unix)] unsafe { let _ = dup2(self.old_stderr_fd, 2); let _ = close(self.old_stderr_fd); let _ = close(self.devnull_fd); } } } /// Run the given closure with stderr temporarily silenced (Unix-only). Returns the closure result. pub fn with_suppressed_stderr(f: F) -> T where F: FnOnce() -> T, { let silencer = StderrSilencer::activate_if_quiet(); let result = f(); drop(silencer); result } /// Log an error line (always printed). #[macro_export] macro_rules! elog { ($($arg:tt)*) => {{ $crate::ui::error(format!($($arg)*)); }} } /// Log an informational line using the UI helper unless quiet mode is enabled. #[macro_export] macro_rules! ilog { ($($arg:tt)*) => {{ if !$crate::is_quiet() { $crate::ui::info(format!($($arg)*)); } }} } /// Log a debug/trace line when verbose level is at least the given level (u8). #[macro_export] macro_rules! dlog { ($lvl:expr, $($arg:tt)*) => {{ if !$crate::is_quiet() && $crate::verbose_level() >= $lvl { $crate::ui::info(format!("DEBUG{}: {}", $lvl, format!($($arg)*))); } }} } /// Backward-compatibility: map old qlog! to ilog! #[macro_export] macro_rules! qlog { ($($arg:tt)*) => {{ $crate::ilog!($($arg)*); }} } /// Re-export backend module (GPU/CPU selection and transcription). pub mod backend; /// Re-export models module (model listing/downloading/updating). pub mod models; /// Configuration service (XDG + atomic writes) pub mod config; /// UI helpers pub mod ui; /// Error types for the crate. pub mod error; pub mod prelude; pub use error::{Error, Result as OtherResult}; /// Transcript entry for a single segment. #[derive(Debug, serde::Serialize, Clone)] pub struct OutputEntry { /// Sequential id in output ordering. pub id: u64, /// Speaker label associated with the segment. pub speaker: String, /// Start time in seconds. pub start: f64, /// End time in seconds. pub end: f64, /// Text content. pub text: String, } /// Return a YYYY-MM-DD date prefix string for output file naming. pub fn date_prefix() -> String { Local::now().format("%Y-%m-%d").to_string() } /// Format a floating-point number of seconds as SRT timestamp (HH:MM:SS,mmm). pub fn format_srt_time(seconds: f64) -> String { let total_ms = (seconds * 1000.0).round() as i64; let ms = total_ms % 1000; let total_secs = total_ms / 1000; let sec = total_secs % 60; let min = (total_secs / 60) % 60; let hour = total_secs / 3600; format!("{hour:02}:{min:02}:{sec:02},{ms:03}") } /// Render a list of transcript entries to SRT format. pub fn render_srt(entries: &[OutputEntry]) -> String { let mut srt = String::new(); for (index, entry) in entries.iter().enumerate() { let srt_index = index + 1; srt.push_str(&format!("{srt_index}\n")); srt.push_str(&format!( "{} --> {}\n", format_srt_time(entry.start), format_srt_time(entry.end) )); if !entry.speaker.is_empty() { srt.push_str(&format!("{}: {}\n", entry.speaker, entry.text)); } else { srt.push_str(&format!("{}\n", entry.text)); } srt.push('\n'); } srt } /// Determine the default models directory, honoring POLYSCRIBE_MODELS_DIR override. pub fn models_dir_path() -> PathBuf { if let Ok(env_val) = env::var("POLYSCRIBE_MODELS_DIR") { let env_path = PathBuf::from(env_val); if !env_path.as_os_str().is_empty() { return env_path; } } if cfg!(debug_assertions) { return PathBuf::from("models"); } if let Ok(xdg) = env::var("XDG_DATA_HOME") { if !xdg.is_empty() { return PathBuf::from(xdg).join("polyscribe").join("models"); } } if let Ok(home) = env::var("HOME") { if !home.is_empty() { return PathBuf::from(home) .join(".local") .join("share") .join("polyscribe") .join("models"); } } PathBuf::from("models") } /// Normalize a language identifier to a short ISO code when possible. pub fn normalize_lang_code(input: &str) -> Option { let mut lang = input.trim().to_lowercase(); if lang.is_empty() || lang == "auto" || lang == "c" || lang == "posix" { return None; } if let Some((prefix, _)) = lang.split_once('.') { lang = prefix.to_string(); } if let Some((prefix, _)) = lang.split_once('_') { lang = prefix.to_string(); } let code = match lang.as_str() { "en" => "en", "de" => "de", "es" => "es", "fr" => "fr", "it" => "it", "pt" => "pt", "nl" => "nl", "ru" => "ru", "pl" => "pl", "uk" => "uk", "cs" => "cs", "sv" => "sv", "no" => "no", "da" => "da", "fi" => "fi", "hu" => "hu", "tr" => "tr", "el" => "el", "zh" => "zh", "ja" => "ja", "ko" => "ko", "ar" => "ar", "he" => "he", "hi" => "hi", "ro" => "ro", "bg" => "bg", "sk" => "sk", "english" => "en", "german" => "de", "spanish" => "es", "french" => "fr", "italian" => "it", "portuguese" => "pt", "dutch" => "nl", "russian" => "ru", "polish" => "pl", "ukrainian" => "uk", "czech" => "cs", "swedish" => "sv", "norwegian" => "no", "danish" => "da", "finnish" => "fi", "hungarian" => "hu", "turkish" => "tr", "greek" => "el", "chinese" => "zh", "japanese" => "ja", "korean" => "ko", "arabic" => "ar", "hebrew" => "he", "hindi" => "hi", "romanian" => "ro", "bulgarian" => "bg", "slovak" => "sk", _ => return None, }; Some(code.to_string()) } /// Find the Whisper model file path to use. pub fn find_model_file() -> Result { // 1) Explicit override via environment if let Ok(path) = env::var("WHISPER_MODEL") { let p = PathBuf::from(path); if !p.exists() { return Err(anyhow!( "WHISPER_MODEL points to a non-existing path: {}", p.display() )); } if !p.is_file() { return Err(anyhow!( "WHISPER_MODEL must point to a file, but is not: {}", p.display() )); } return Ok(p); } // 2) Resolve models directory and ensure it exists and is a directory let models_dir = models_dir_path(); if models_dir.exists() && !models_dir.is_dir() { return Err(anyhow!( "Models path exists but is not a directory: {}", models_dir.display() )); } std::fs::create_dir_all(&models_dir).with_context(|| { format!("Failed to ensure models dir exists: {}", models_dir.display()) })?; // 3) Gather candidate .bin files (regular files only), prefer largest let mut candidates = Vec::new(); for entry in std::fs::read_dir(&models_dir).with_context(|| { format!("Failed to read models dir: {}", models_dir.display()) })? { let entry = entry?; let path = entry.path(); // Only consider .bin files let is_bin = path .extension() .and_then(|s| s.to_str()) .is_some_and(|s| s.eq_ignore_ascii_case("bin")); if !is_bin { continue; } // Only consider regular files let md = match std::fs::metadata(&path) { Ok(m) if m.is_file() => m, _ => continue, }; candidates.push((md.len(), path)); } if candidates.is_empty() { // 4) Fallback to known tiny English model if present let fallback = models_dir.join("ggml-tiny.en.bin"); if fallback.is_file() { return Ok(fallback); } return Err(anyhow!( "No Whisper model files (*.bin) found in {}. \ Please download a model or set WHISPER_MODEL.", models_dir.display() )); } candidates.sort_by_key(|(size, _)| *size); let (_size, path) = candidates.into_iter().last().expect("non-empty"); Ok(path) } /// Decode an audio file into PCM f32 samples using ffmpeg (ffmpeg executable required). pub fn decode_audio_to_pcm_f32_ffmpeg(audio_path: &Path) -> Result> { let in_path = audio_path .to_str() .ok_or_else(|| anyhow!("Audio path must be valid UTF-8: {}", audio_path.display()))?; // Use a raw f32le file to match the -f f32le output format. let tmp_raw = std::env::temp_dir().join("polyscribe_tmp_input.f32le"); let tmp_raw_str = tmp_raw .to_str() .ok_or_else(|| anyhow!("Temp path not valid UTF-8: {}", tmp_raw.display()))?; // ffmpeg -i input -f f32le -ac 1 -ar 16000 -y /tmp/tmp.f32le let status = Command::new("ffmpeg") .arg("-hide_banner") .arg("-loglevel") .arg("error") .arg("-i") .arg(in_path) .arg("-f") .arg("f32le") .arg("-ac") .arg("1") .arg("-ar") .arg("16000") .arg("-y") .arg(tmp_raw_str) .status() .with_context(|| format!("Failed to invoke ffmpeg to decode: {}", in_path))?; if !status.success() { return Err(anyhow!( "ffmpeg exited with non-zero status when decoding {}", in_path )); } let raw = std::fs::read(&tmp_raw) .with_context(|| format!("Failed to read temp PCM file: {}", tmp_raw.display()))?; // Best-effort cleanup of the temp file let _ = std::fs::remove_file(&tmp_raw); // Interpret raw bytes as f32 little-endian if raw.len() % 4 != 0 { return Err(anyhow!( "Decoded PCM file length not multiple of 4: {}", raw.len() )); } let mut samples = Vec::with_capacity(raw.len() / 4); for chunk in raw.chunks_exact(4) { let v = f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]); samples.push(v); } Ok(samples) }