[refactor] remove backend and library modules, consolidating features into main crate

This commit is contained in:
2025-08-13 13:35:53 +02:00
parent 06fd3efd1f
commit 128db0f733
13 changed files with 209 additions and 1769 deletions

17
.cargo/config.toml Normal file
View File

@@ -0,0 +1,17 @@
# SPDX-License-Identifier: MIT
[build]
# Make target-dir consistent across workspace for better cache reuse.
target-dir = "target"
[profile.dev]
opt-level = 1
debug = true
incremental = true
[profile.release]
# Reasonable defaults for CLI apps/libraries
lto = "thin"
codegen-units = 1
strip = "debuginfo"
opt-level = 3

12
Cargo.lock generated
View File

@@ -762,18 +762,6 @@ dependencies = [
"which",
]
[[package]]
name = "polyscribe-plugin-tubescribe"
version = "0.1.0"
dependencies = [
"anyhow",
"clap",
"polyscribe-protocol",
"serde",
"serde_json",
"tokio",
]
[[package]]
name = "polyscribe-protocol"
version = "0.1.0"

View File

@@ -4,6 +4,37 @@ members = [
"crates/polyscribe-protocol",
"crates/polyscribe-host",
"crates/polyscribe-cli",
"plugins/polyscribe-plugin-tubescribe",
]
resolver = "2"
resolver = "3"
# Optional: Keep dependency versions consistent across members
[workspace.dependencies]
thiserror = "1.0.69"
serde = { version = "1.0.219", features = ["derive"] }
anyhow = "1.0.99"
libc = "0.2.175"
toml = "0.8.23"
serde_json = "1.0.142"
chrono = "0.4.41"
sha2 = "0.10.9"
which = "6.0.3"
tokio = { version = "1.47.1", features = ["rt-multi-thread", "macros"] }
clap = { version = "4.5.44", features = ["derive"] }
indicatif = "0.17.11"
directories = "5.0.1"
whisper-rs = "0.14.3"
cliclack = "0.3.6"
clap_complete = "4.5.57"
clap_mangen = "0.2.29"
[workspace.lints.rust]
unused_imports = "deny"
dead_code = "warn"
[profile.release]
lto = "fat"
codegen-units = 1
panic = "abort"
[profile.dev]
panic = "unwind"

View File

@@ -0,0 +1,39 @@
// SPDX-License-Identifier: MIT
use thiserror::Error;
/// The common error type for the polyscribe core crate.
/// Add more domain-specific variants as needed.
#[derive(Debug, Error)]
pub enum Error {
/// Wrapper for any boxed dynamic error. Useful as a temporary catch-all.
#[error("anyhow error: {0}")]
Anyhow(#[from] anyhow::Error),
/// IO-related error.
#[error("io error: {0}")]
Io(#[from] std::io::Error),
/// UTF-8 conversion error.
#[error("utf8 error: {0}")]
Utf8(#[from] std::string::FromUtf8Error),
/// Environment variable error.
#[error("env var error: {0}")]
Var(#[from] std::env::VarError),
/// TOML de serialization error.
#[error("toml de error: {0}")]
TomlDe(#[from] toml::de::Error),
/// Configuration parsing error.
#[error("configuration error: {0}")]
Config(String),
/// Placeholder for not-yet-implemented backends or features.
#[error("unimplemented: {0}")]
Unimplemented(&'static str),
}
/// Convenient result alias for the polyscribe core crate.
pub type Result<T> = std::result::Result<T, Error>;

View File

@@ -12,7 +12,16 @@
use std::sync::atomic::{AtomicBool, AtomicU8, Ordering};
// Global runtime flags
use anyhow::{anyhow, Context, Result};
use chrono::Local;
use std::env;
use std::path::{Path, PathBuf};
use std::process::Command;
#[cfg(unix)]
use libc::{O_WRONLY, close, dup, dup2, open};
/// Global runtime flags
static QUIET: AtomicBool = AtomicBool::new(false);
static NO_INTERACTION: AtomicBool = AtomicBool::new(false);
static VERBOSE: AtomicU8 = AtomicU8::new(0);
@@ -101,7 +110,7 @@ impl StderrSilencer {
let devnull_cstr = std::ffi::CString::new("/dev/null").unwrap();
let devnull_fd = open(devnull_cstr.as_ptr(), O_WRONLY);
if devnull_fd < 0 {
close(old_fd);
let _ = close(old_fd);
return Self {
active: false,
old_stderr_fd: -1,
@@ -109,8 +118,8 @@ impl StderrSilencer {
};
}
if dup2(devnull_fd, 2) < 0 {
close(devnull_fd);
close(old_fd);
let _ = close(devnull_fd);
let _ = close(old_fd);
return Self {
active: false,
old_stderr_fd: -1,
@@ -120,7 +129,7 @@ impl StderrSilencer {
Self {
active: true,
old_stderr_fd: old_fd,
devnull_fd: devnull_fd,
devnull_fd,
}
}
#[cfg(not(unix))]
@@ -183,16 +192,6 @@ macro_rules! qlog {
($($arg:tt)*) => {{ $crate::ilog!($($arg)*); }}
}
use anyhow::{Context, Result, anyhow};
use chrono::Local;
use std::env;
use std::fs::create_dir_all;
use std::path::{Path, PathBuf};
use std::process::Command;
#[cfg(unix)]
use libc::{O_WRONLY, close, dup, dup2, open};
/// Re-export backend module (GPU/CPU selection and transcription).
pub mod backend;
/// Re-export models module (model listing/downloading/updating).
@@ -201,6 +200,11 @@ pub mod models;
pub mod config;
/// UI helpers
pub mod ui;
/// Error types for the crate.
pub mod error;
pub mod prelude;
pub use error::{Error, Result as OtherResult};
/// Transcript entry for a single segment.
#[derive(Debug, serde::Serialize, Clone)]
@@ -356,56 +360,77 @@ pub fn normalize_lang_code(input: &str) -> Option<String> {
/// Find the Whisper model file path to use.
pub fn find_model_file() -> Result<PathBuf> {
// 1) Explicit override via environment
if let Ok(path) = env::var("WHISPER_MODEL") {
let p = PathBuf::from(path);
if p.exists() {
return Ok(p);
} else {
if !p.exists() {
return Err(anyhow!(
"WHISPER_MODEL points to non-existing file: {}",
"WHISPER_MODEL points to a non-existing path: {}",
p.display()
));
}
}
let models_dir = models_dir_path();
if !models_dir.exists() {
create_dir_all(&models_dir).with_context(|| {
format!("Failed to create models dir: {}", models_dir.display())
})?;
if !p.is_file() {
return Err(anyhow!(
"WHISPER_MODEL must point to a file, but is not: {}",
p.display()
));
}
return Ok(p);
}
// Heuristic: prefer larger model files and English-only when language hint is en
let mut candidates = Vec::new();
for entry in std::fs::read_dir(&models_dir).with_context(|| format!(
"Failed to read models dir: {}",
models_dir.display()
))? {
let entry = entry?;
let path = entry.path();
if !path
.extension()
.and_then(|s| s.to_str())
.is_some_and(|s| s.eq_ignore_ascii_case("bin"))
{
continue;
}
if let Ok(md) = std::fs::metadata(&path) {
candidates.push((md.len(), path));
}
}
if candidates.is_empty() {
// Try default fallback (tiny.en)
let fallback = models_dir.join("ggml-tiny.en.bin");
if fallback.exists() {
return Ok(fallback);
}
// 2) Resolve models directory and ensure it exists and is a directory
let models_dir = models_dir_path();
if models_dir.exists() && !models_dir.is_dir() {
return Err(anyhow!(
"No Whisper models found in {}. Please download a model or set WHISPER_MODEL.",
"Models path exists but is not a directory: {}",
models_dir.display()
));
}
std::fs::create_dir_all(&models_dir).with_context(|| {
format!("Failed to ensure models dir exists: {}", models_dir.display())
})?;
// 3) Gather candidate .bin files (regular files only), prefer largest
let mut candidates = Vec::new();
for entry in std::fs::read_dir(&models_dir).with_context(|| {
format!("Failed to read models dir: {}", models_dir.display())
})? {
let entry = entry?;
let path = entry.path();
// Only consider .bin files
let is_bin = path
.extension()
.and_then(|s| s.to_str())
.is_some_and(|s| s.eq_ignore_ascii_case("bin"));
if !is_bin {
continue;
}
// Only consider regular files
let md = match std::fs::metadata(&path) {
Ok(m) if m.is_file() => m,
_ => continue,
};
candidates.push((md.len(), path));
}
if candidates.is_empty() {
// 4) Fallback to known tiny English model if present
let fallback = models_dir.join("ggml-tiny.en.bin");
if fallback.is_file() {
return Ok(fallback);
}
return Err(anyhow!(
"No Whisper model files (*.bin) found in {}. \
Please download a model or set WHISPER_MODEL.",
models_dir.display()
));
}
candidates.sort_by_key(|(size, _)| *size);
let (_size, path) = candidates.into_iter().last().unwrap();
let (_size, path) = candidates.into_iter().last().expect("non-empty");
Ok(path)
}
@@ -414,12 +439,14 @@ pub fn decode_audio_to_pcm_f32_ffmpeg(audio_path: &Path) -> Result<Vec<f32>> {
let in_path = audio_path
.to_str()
.ok_or_else(|| anyhow!("Audio path must be valid UTF-8: {}", audio_path.display()))?;
let tmp_wav = std::env::temp_dir().join("polyscribe_tmp_input.wav");
let tmp_wav_str = tmp_wav
.to_str()
.ok_or_else(|| anyhow!("Temp path not valid UTF-8: {}", tmp_wav.display()))?;
// ffmpeg -i input -f f32le -ac 1 -ar 16000 -y /tmp/tmp.raw
// Use a raw f32le file to match the -f f32le output format.
let tmp_raw = std::env::temp_dir().join("polyscribe_tmp_input.f32le");
let tmp_raw_str = tmp_raw
.to_str()
.ok_or_else(|| anyhow!("Temp path not valid UTF-8: {}", tmp_raw.display()))?;
// ffmpeg -i input -f f32le -ac 1 -ar 16000 -y /tmp/tmp.f32le
let status = Command::new("ffmpeg")
.arg("-hide_banner")
.arg("-loglevel")
@@ -433,16 +460,29 @@ pub fn decode_audio_to_pcm_f32_ffmpeg(audio_path: &Path) -> Result<Vec<f32>> {
.arg("-ar")
.arg("16000")
.arg("-y")
.arg(&tmp_wav_str)
.arg(tmp_raw_str)
.status()
.with_context(|| format!("Failed to invoke ffmpeg to decode: {}", in_path))?;
if !status.success() {
return Err(anyhow!("ffmpeg exited with non-zero status when decoding {}", in_path));
return Err(anyhow!(
"ffmpeg exited with non-zero status when decoding {}",
in_path
));
}
let raw = std::fs::read(&tmp_wav).with_context(|| format!("Failed to read temp PCM file: {}", tmp_wav.display()))?;
let raw = std::fs::read(&tmp_raw)
.with_context(|| format!("Failed to read temp PCM file: {}", tmp_raw.display()))?;
// Best-effort cleanup of the temp file
let _ = std::fs::remove_file(&tmp_raw);
// Interpret raw bytes as f32 little-endian
if raw.len() % 4 != 0 {
return Err(anyhow!("Decoded PCM file length not multiple of 4: {}", raw.len()));
return Err(anyhow!(
"Decoded PCM file length not multiple of 4: {}",
raw.len()
));
}
let mut samples = Vec::with_capacity(raw.len() / 4);
for chunk in raw.chunks_exact(4) {

View File

@@ -0,0 +1,13 @@
// rust
//! Commonly used exports for convenient glob-imports in binaries and tests.
//! Usage: `use polyscribe_core::prelude::*;`
pub use crate::backend::*;
pub use crate::config::*;
pub use crate::error::{Error, Result};
pub use crate::models::*;
// If you frequently use UI helpers across binaries/tests, export them too.
// Keep this lean to avoid pulling UI everywhere unintentionally.
#[allow(unused_imports)]
pub use crate::ui::*;

6
rust-toolchain.toml Normal file
View File

@@ -0,0 +1,6 @@
# SPDX-License-Identifier: MIT
[toolchain]
channel = "1.89.0"
components = ["clippy", "rustfmt"]
profile = "minimal"

View File

@@ -1,329 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025 <COPYRIGHT HOLDER>. All rights reserved.
//! Transcription backend selection and implementations (CPU/GPU) used by PolyScribe.
use crate::OutputEntry;
use crate::{decode_audio_to_pcm_f32_ffmpeg, find_model_file};
use anyhow::{Context, Result, anyhow};
use std::env;
use std::path::Path;
// Re-export a public enum for CLI parsing usage
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
/// Kind of transcription backend to use.
pub enum BackendKind {
/// Automatically detect the best available backend (CUDA > HIP > Vulkan > CPU).
Auto,
/// Pure CPU backend using whisper-rs.
Cpu,
/// NVIDIA CUDA backend (requires CUDA runtime available at load time and proper feature build).
Cuda,
/// AMD ROCm/HIP backend (requires hip/rocBLAS libraries available and proper feature build).
Hip,
/// Vulkan backend (experimental; requires Vulkan loader/SDK and feature build).
Vulkan,
}
/// Abstraction for a transcription backend.
pub trait TranscribeBackend {
/// Backend kind implemented by this type.
fn kind(&self) -> BackendKind;
/// Transcribe the given audio and return transcript entries.
fn transcribe(
&self,
audio_path: &Path,
speaker: &str,
language: Option<&str>,
gpu_layers: Option<u32>,
progress: Option<&(dyn Fn(i32) + Send + Sync)>,
) -> Result<Vec<OutputEntry>>;
}
fn check_lib(_names: &[&str]) -> bool {
#[cfg(test)]
{
// During unit tests, avoid touching system libs to prevent loader crashes in CI.
false
}
#[cfg(not(test))]
{
// Disabled runtime dlopen probing to avoid loader instability; rely on environment overrides.
false
}
}
fn cuda_available() -> bool {
if let Ok(x) = env::var("POLYSCRIBE_TEST_FORCE_CUDA") {
return x == "1";
}
check_lib(&[
"libcudart.so",
"libcudart.so.12",
"libcudart.so.11",
"libcublas.so",
"libcublas.so.12",
])
}
fn hip_available() -> bool {
if let Ok(x) = env::var("POLYSCRIBE_TEST_FORCE_HIP") {
return x == "1";
}
check_lib(&["libhipblas.so", "librocblas.so"])
}
fn vulkan_available() -> bool {
if let Ok(x) = env::var("POLYSCRIBE_TEST_FORCE_VULKAN") {
return x == "1";
}
check_lib(&["libvulkan.so.1", "libvulkan.so"])
}
/// CPU-based transcription backend using whisper-rs.
#[derive(Default)]
pub struct CpuBackend;
/// CUDA-accelerated transcription backend for NVIDIA GPUs.
#[derive(Default)]
pub struct CudaBackend;
/// ROCm/HIP-accelerated transcription backend for AMD GPUs.
#[derive(Default)]
pub struct HipBackend;
/// Vulkan-based transcription backend (experimental/incomplete).
#[derive(Default)]
pub struct VulkanBackend;
macro_rules! impl_whisper_backend {
($ty:ty, $kind:expr) => {
impl TranscribeBackend for $ty {
fn kind(&self) -> BackendKind { $kind }
fn transcribe(
&self,
audio_path: &Path,
speaker: &str,
language: Option<&str>,
_gpu_layers: Option<u32>,
progress: Option<&(dyn Fn(i32) + Send + Sync)>,
) -> Result<Vec<OutputEntry>> {
transcribe_with_whisper_rs(audio_path, speaker, language, progress)
}
}
};
}
impl_whisper_backend!(CpuBackend, BackendKind::Cpu);
impl_whisper_backend!(CudaBackend, BackendKind::Cuda);
impl_whisper_backend!(HipBackend, BackendKind::Hip);
impl TranscribeBackend for VulkanBackend {
fn kind(&self) -> BackendKind {
BackendKind::Vulkan
}
fn transcribe(
&self,
_audio_path: &Path,
_speaker: &str,
_language: Option<&str>,
_gpu_layers: Option<u32>,
_progress: Option<&(dyn Fn(i32) + Send + Sync)>,
) -> Result<Vec<OutputEntry>> {
Err(anyhow!(
"Vulkan backend not yet wired to whisper.cpp FFI. Build with --features gpu-vulkan and ensure Vulkan SDK is installed. How to fix: install Vulkan loader (libvulkan), set VULKAN_SDK, and run cargo build --features gpu-vulkan."
))
}
}
/// Result of choosing a transcription backend.
pub struct SelectionResult {
/// The constructed backend instance to perform transcription with.
pub backend: Box<dyn TranscribeBackend + Send + Sync>,
/// Which backend kind was ultimately selected.
pub chosen: BackendKind,
/// Which backend kinds were detected as available on this system.
pub detected: Vec<BackendKind>,
}
/// Select an appropriate backend based on user request and system detection.
///
/// If `requested` is `BackendKind::Auto`, the function prefers CUDA, then HIP,
/// then Vulkan, falling back to CPU when no GPU backend is detected. When a
/// specific GPU backend is requested but unavailable, an error is returned with
/// guidance on how to enable it.
///
/// Set `verbose` to true to print detection/selection info to stderr.
pub fn select_backend(requested: BackendKind, verbose: bool) -> Result<SelectionResult> {
let mut detected = Vec::new();
if cuda_available() {
detected.push(BackendKind::Cuda);
}
if hip_available() {
detected.push(BackendKind::Hip);
}
if vulkan_available() {
detected.push(BackendKind::Vulkan);
}
let instantiate_backend = |k: BackendKind| -> Box<dyn TranscribeBackend + Send + Sync> {
match k {
BackendKind::Cpu => Box::new(CpuBackend::default()),
BackendKind::Cuda => Box::new(CudaBackend::default()),
BackendKind::Hip => Box::new(HipBackend::default()),
BackendKind::Vulkan => Box::new(VulkanBackend::default()),
BackendKind::Auto => Box::new(CpuBackend::default()), // placeholder for Auto
}
};
let chosen = match requested {
BackendKind::Auto => {
if detected.contains(&BackendKind::Cuda) {
BackendKind::Cuda
} else if detected.contains(&BackendKind::Hip) {
BackendKind::Hip
} else if detected.contains(&BackendKind::Vulkan) {
BackendKind::Vulkan
} else {
BackendKind::Cpu
}
}
BackendKind::Cuda => {
if detected.contains(&BackendKind::Cuda) {
BackendKind::Cuda
} else {
return Err(anyhow!(
"Requested CUDA backend but CUDA libraries/devices not detected. How to fix: install NVIDIA driver + CUDA toolkit, ensure libcudart/libcublas are in loader path, and build with --features gpu-cuda."
));
}
}
BackendKind::Hip => {
if detected.contains(&BackendKind::Hip) {
BackendKind::Hip
} else {
return Err(anyhow!(
"Requested ROCm/HIP backend but libraries/devices not detected. How to fix: install ROCm hipBLAS/rocBLAS, ensure libs are in loader path, and build with --features gpu-hip."
));
}
}
BackendKind::Vulkan => {
if detected.contains(&BackendKind::Vulkan) {
BackendKind::Vulkan
} else {
return Err(anyhow!(
"Requested Vulkan backend but libvulkan not detected. How to fix: install Vulkan loader/SDK and build with --features gpu-vulkan."
));
}
}
BackendKind::Cpu => BackendKind::Cpu,
};
if verbose {
crate::dlog!(1, "Detected backends: {:?}", detected);
crate::dlog!(1, "Selected backend: {:?}", chosen);
}
Ok(SelectionResult {
backend: instantiate_backend(chosen),
chosen,
detected,
})
}
// Internal helper: transcription using whisper-rs with CPU/GPU (depending on build features)
#[allow(clippy::too_many_arguments)]
pub(crate) fn transcribe_with_whisper_rs(
audio_path: &Path,
speaker: &str,
language: Option<&str>,
progress: Option<&(dyn Fn(i32) + Send + Sync)>,
) -> Result<Vec<OutputEntry>> {
let report = |p: i32| {
if let Some(cb) = progress { cb(p); }
};
report(0);
let pcm_samples = decode_audio_to_pcm_f32_ffmpeg(audio_path)?;
report(5);
let model_path = find_model_file()?;
let english_only_model = model_path
.file_name()
.and_then(|s| s.to_str())
.map(|s| s.contains(".en.") || s.ends_with(".en.bin"))
.unwrap_or(false);
if let Some(lang) = language {
if english_only_model && lang != "en" {
return Err(anyhow!(
"Selected model is English-only ({}), but a non-English language hint '{}' was provided. Please use a multilingual model or set WHISPER_MODEL.",
model_path.display(),
lang
));
}
}
let model_path_str = model_path
.to_str()
.ok_or_else(|| anyhow!("Model path not valid UTF-8: {}", model_path.display()))?;
if crate::verbose_level() < 2 {
// Some builds of whisper/ggml expect these env vars; harmless if unknown
unsafe {
std::env::set_var("GGML_LOG_LEVEL", "0");
std::env::set_var("WHISPER_PRINT_PROGRESS", "0");
}
}
let (_context, mut state) = crate::with_suppressed_stderr(|| {
let params = whisper_rs::WhisperContextParameters::default();
let context = whisper_rs::WhisperContext::new_with_params(model_path_str, params)
.with_context(|| format!("Failed to load Whisper model at {}", model_path.display()))?;
let state = context
.create_state()
.map_err(|e| anyhow!("Failed to create Whisper state: {:?}", e))?;
Ok::<_, anyhow::Error>((context, state))
})?;
report(20);
let mut full_params =
whisper_rs::FullParams::new(whisper_rs::SamplingStrategy::Greedy { best_of: 1 });
let threads = std::thread::available_parallelism()
.map(|n| n.get() as i32)
.unwrap_or(1);
full_params.set_n_threads(threads);
full_params.set_translate(false);
if let Some(lang) = language {
full_params.set_language(Some(lang));
}
report(30);
crate::with_suppressed_stderr(|| {
report(40);
state
.full(full_params, &pcm_samples)
.map_err(|e| anyhow!("Whisper full() failed: {:?}", e))
})?;
report(90);
let num_segments = state
.full_n_segments()
.map_err(|e| anyhow!("Failed to get segments: {:?}", e))?;
let mut entries = Vec::new();
for seg_idx in 0..num_segments {
let segment_text = state
.full_get_segment_text(seg_idx)
.map_err(|e| anyhow!("Failed to get segment text: {:?}", e))?;
let t0 = state
.full_get_segment_t0(seg_idx)
.map_err(|e| anyhow!("Failed to get segment t0: {:?}", e))?;
let t1 = state
.full_get_segment_t1(seg_idx)
.map_err(|e| anyhow!("Failed to get segment t1: {:?}", e))?;
let start = (t0 as f64) * 0.01;
let end = (t1 as f64) * 0.01;
entries.push(OutputEntry {
id: 0,
speaker: speaker.to_string(),
start,
end,
text: segment_text.trim().to_string(),
});
}
report(100);
Ok(entries)
}

View File

@@ -1,571 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025 <COPYRIGHT HOLDER>. All rights reserved.
#![forbid(elided_lifetimes_in_paths)]
#![forbid(unused_must_use)]
#![deny(missing_docs)]
#![warn(clippy::all)]
//! PolyScribe library: business logic and core types.
//!
//! This crate exposes the reusable parts of the PolyScribe CLI as a library.
//! The binary entry point (main.rs) remains a thin CLI wrapper.
use std::sync::atomic::{AtomicBool, AtomicU8, Ordering};
// Global runtime flags
static QUIET: AtomicBool = AtomicBool::new(false);
static NO_INTERACTION: AtomicBool = AtomicBool::new(false);
static VERBOSE: AtomicU8 = AtomicU8::new(0);
static NO_PROGRESS: AtomicBool = AtomicBool::new(false);
/// Set quiet mode: when true, non-interactive logs should be suppressed.
pub fn set_quiet(enabled: bool) {
QUIET.store(enabled, Ordering::Relaxed);
}
/// Return current quiet mode state.
pub fn is_quiet() -> bool {
QUIET.load(Ordering::Relaxed)
}
/// Set non-interactive mode: when true, interactive prompts must be skipped.
pub fn set_no_interaction(enabled: bool) {
NO_INTERACTION.store(enabled, Ordering::Relaxed);
}
/// Return current non-interactive state.
pub fn is_no_interaction() -> bool {
NO_INTERACTION.load(Ordering::Relaxed)
}
/// Set verbose level (0 = normal, 1 = verbose, 2 = super-verbose)
pub fn set_verbose(level: u8) {
VERBOSE.store(level, Ordering::Relaxed);
}
/// Get current verbose level.
pub fn verbose_level() -> u8 {
VERBOSE.load(Ordering::Relaxed)
}
/// Disable interactive progress indicators (bars/spinners)
pub fn set_no_progress(enabled: bool) {
NO_PROGRESS.store(enabled, Ordering::Relaxed);
}
/// Return current no-progress state
pub fn is_no_progress() -> bool {
NO_PROGRESS.load(Ordering::Relaxed)
}
/// Check whether stdin is connected to a TTY. Used to avoid blocking prompts when not interactive.
pub fn stdin_is_tty() -> bool {
use std::io::IsTerminal as _;
std::io::stdin().is_terminal()
}
/// A guard that temporarily redirects stderr to /dev/null on Unix when quiet mode is active.
/// No-op on non-Unix or when quiet is disabled. Restores stderr on drop.
pub struct StderrSilencer {
#[cfg(unix)]
old_stderr_fd: i32,
#[cfg(unix)]
devnull_fd: i32,
active: bool,
}
impl StderrSilencer {
/// Activate stderr silencing if quiet is set and on Unix; otherwise returns a no-op guard.
pub fn activate_if_quiet() -> Self {
if !is_quiet() {
return Self {
active: false,
#[cfg(unix)]
old_stderr_fd: -1,
#[cfg(unix)]
devnull_fd: -1,
};
}
Self::activate()
}
/// Activate stderr silencing unconditionally (used internally); no-op on non-Unix.
pub fn activate() -> Self {
#[cfg(unix)]
unsafe {
let old_fd = dup(2);
if old_fd < 0 {
return Self {
active: false,
old_stderr_fd: -1,
devnull_fd: -1,
};
}
// Open /dev/null for writing
let devnull_cstr = std::ffi::CString::new("/dev/null").unwrap();
let devnull_fd = open(devnull_cstr.as_ptr(), O_WRONLY);
if devnull_fd < 0 {
close(old_fd);
return Self {
active: false,
old_stderr_fd: -1,
devnull_fd: -1,
};
}
if dup2(devnull_fd, 2) < 0 {
close(devnull_fd);
close(old_fd);
return Self {
active: false,
old_stderr_fd: -1,
devnull_fd: -1,
};
}
Self {
active: true,
old_stderr_fd: old_fd,
devnull_fd: devnull_fd,
}
}
#[cfg(not(unix))]
{
Self { active: false }
}
}
}
impl Drop for StderrSilencer {
fn drop(&mut self) {
if !self.active {
return;
}
#[cfg(unix)]
unsafe {
let _ = dup2(self.old_stderr_fd, 2);
let _ = close(self.devnull_fd);
let _ = close(self.old_stderr_fd);
}
self.active = false;
}
}
/// Run a closure while temporarily suppressing stderr on Unix when appropriate.
/// On Windows/non-Unix, this is a no-op wrapper.
/// This helper uses RAII + panic catching to ensure restoration before resuming panic.
pub fn with_suppressed_stderr<F, T>(f: F) -> T
where
F: FnOnce() -> T,
{
// Suppress noisy native logs unless super-verbose (-vv) is enabled.
if verbose_level() < 2 {
let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
let _guard = StderrSilencer::activate();
f()
}));
match result {
Ok(value) => value,
Err(panic_payload) => std::panic::resume_unwind(panic_payload),
}
} else {
f()
}
}
/// Centralized UI helpers (TTY-aware, quiet/verbose-aware)
pub mod ui;
/// Logging macros and helpers
/// Log an error using the UI helper (always printed). Recommended for user-visible errors.
#[macro_export]
macro_rules! elog {
($($arg:tt)*) => {{
$crate::ui::error(format!($($arg)*));
}}
}
/// Log a warning using the UI helper (printed even in quiet mode).
#[macro_export]
macro_rules! wlog {
($($arg:tt)*) => {{
$crate::ui::warn(format!($($arg)*));
}}
}
/// Log an informational line using the UI helper unless quiet mode is enabled.
#[macro_export]
macro_rules! ilog {
($($arg:tt)*) => {{
if !$crate::is_quiet() { $crate::ui::info(format!($($arg)*)); }
}}
}
/// Log a debug/trace line when verbose level is at least the given level (u8).
#[macro_export]
macro_rules! dlog {
($lvl:expr, $($arg:tt)*) => {{
if !$crate::is_quiet() && $crate::verbose_level() >= $lvl { $crate::ui::info(format!("DEBUG{}: {}", $lvl, format!($($arg)*))); }
}}
}
/// Backward-compatibility: map old qlog! to ilog!
#[macro_export]
macro_rules! qlog {
($($arg:tt)*) => {{ $crate::ilog!($($arg)*); }}
}
use anyhow::{Context, Result, anyhow};
use chrono::Local;
use std::env;
use std::fs::create_dir_all;
use std::path::{Path, PathBuf};
use std::process::Command;
#[cfg(unix)]
use libc::{O_WRONLY, close, dup, dup2, open};
/// Re-export backend module (GPU/CPU selection and transcription).
pub mod backend;
/// Re-export models module (model listing/downloading/updating).
pub mod models;
/// Transcript entry for a single segment.
#[derive(Debug, serde::Serialize, Clone)]
pub struct OutputEntry {
/// Sequential id in output ordering.
pub id: u64,
/// Speaker label associated with the segment.
pub speaker: String,
/// Start time in seconds.
pub start: f64,
/// End time in seconds.
pub end: f64,
/// Text content.
pub text: String,
}
/// Return a YYYY-MM-DD date prefix string for output file naming.
pub fn date_prefix() -> String {
Local::now().format("%Y-%m-%d").to_string()
}
/// Format a floating-point number of seconds as SRT timestamp (HH:MM:SS,mmm).
pub fn format_srt_time(seconds: f64) -> String {
let total_ms = (seconds * 1000.0).round() as i64;
let ms = total_ms % 1000;
let total_secs = total_ms / 1000;
let sec = total_secs % 60;
let min = (total_secs / 60) % 60;
let hour = total_secs / 3600;
format!("{hour:02}:{min:02}:{sec:02},{ms:03}")
}
/// Render a list of transcript entries to SRT format.
pub fn render_srt(entries: &[OutputEntry]) -> String {
let mut srt = String::new();
for (index, entry) in entries.iter().enumerate() {
let srt_index = index + 1;
srt.push_str(&format!("{srt_index}\n"));
srt.push_str(&format!(
"{} --> {}\n",
format_srt_time(entry.start),
format_srt_time(entry.end)
));
if !entry.speaker.is_empty() {
srt.push_str(&format!("{}: {}\n", entry.speaker, entry.text));
} else {
srt.push_str(&format!("{}\n", entry.text));
}
srt.push('\n');
}
srt
}
/// Determine the default models directory, honoring POLYSCRIBE_MODELS_DIR override.
pub fn models_dir_path() -> PathBuf {
if let Ok(env_val) = env::var("POLYSCRIBE_MODELS_DIR") {
let env_path = PathBuf::from(env_val);
if !env_path.as_os_str().is_empty() {
return env_path;
}
}
if cfg!(debug_assertions) {
return PathBuf::from("models");
}
if let Ok(xdg) = env::var("XDG_DATA_HOME") {
if !xdg.is_empty() {
return PathBuf::from(xdg).join("polyscribe").join("models");
}
}
if let Ok(home) = env::var("HOME") {
if !home.is_empty() {
return PathBuf::from(home)
.join(".local")
.join("share")
.join("polyscribe")
.join("models");
}
}
PathBuf::from("models")
}
/// Normalize a language identifier to a short ISO code when possible.
pub fn normalize_lang_code(input: &str) -> Option<String> {
let mut lang = input.trim().to_lowercase();
if lang.is_empty() || lang == "auto" || lang == "c" || lang == "posix" {
return None;
}
if let Some((prefix, _)) = lang.split_once('.') {
lang = prefix.to_string();
}
if let Some((prefix, _)) = lang.split_once('_') {
lang = prefix.to_string();
}
let code = match lang.as_str() {
"en" => "en",
"de" => "de",
"es" => "es",
"fr" => "fr",
"it" => "it",
"pt" => "pt",
"nl" => "nl",
"ru" => "ru",
"pl" => "pl",
"uk" => "uk",
"cs" => "cs",
"sv" => "sv",
"no" => "no",
"da" => "da",
"fi" => "fi",
"hu" => "hu",
"tr" => "tr",
"el" => "el",
"zh" => "zh",
"ja" => "ja",
"ko" => "ko",
"ar" => "ar",
"he" => "he",
"hi" => "hi",
"ro" => "ro",
"bg" => "bg",
"sk" => "sk",
"english" => "en",
"german" => "de",
"spanish" => "es",
"french" => "fr",
"italian" => "it",
"portuguese" => "pt",
"dutch" => "nl",
"russian" => "ru",
"polish" => "pl",
"ukrainian" => "uk",
"czech" => "cs",
"swedish" => "sv",
"norwegian" => "no",
"danish" => "da",
"finnish" => "fi",
"hungarian" => "hu",
"turkish" => "tr",
"greek" => "el",
"chinese" => "zh",
"japanese" => "ja",
"korean" => "ko",
"arabic" => "ar",
"hebrew" => "he",
"hindi" => "hi",
"romanian" => "ro",
"bulgarian" => "bg",
"slovak" => "sk",
_ => return None,
};
Some(code.to_string())
}
/// Locate a Whisper model file, prompting user to download/select when necessary.
pub fn find_model_file() -> Result<PathBuf> {
let models_dir_buf = models_dir_path();
let models_dir = models_dir_buf.as_path();
if !models_dir.exists() {
create_dir_all(models_dir).with_context(|| {
format!(
"Failed to create models directory: {}",
models_dir.display()
)
})?;
}
if let Ok(env_model) = env::var("WHISPER_MODEL") {
let model_path = PathBuf::from(env_model);
if model_path.is_file() {
let _ = std::fs::write(models_dir.join(".last_model"), model_path.display().to_string());
return Ok(model_path);
}
}
// Non-interactive mode: automatic selection and optional download
if crate::is_no_interaction() {
if let Some(local) = crate::models::pick_best_local_model(models_dir) {
let _ = std::fs::write(models_dir.join(".last_model"), local.display().to_string());
return Ok(local);
} else {
ilog!("No local models found; downloading large-v3-turbo-q8_0...");
let path = crate::models::ensure_model_available_noninteractive("large-v3-turbo-q8_0")
.with_context(|| "Failed to download required model 'large-v3-turbo-q8_0'")?;
let _ = std::fs::write(models_dir.join(".last_model"), path.display().to_string());
return Ok(path);
}
}
let mut candidates: Vec<PathBuf> = Vec::new();
let dir_entries = std::fs::read_dir(models_dir)
.with_context(|| format!("Failed to read models directory: {}", models_dir.display()))?;
for entry in dir_entries {
let entry = entry?;
let path = entry.path();
if path.is_file() {
if let Some(ext) = path
.extension()
.and_then(|s| s.to_str())
.map(|s| s.to_lowercase())
{
if ext == "bin" {
candidates.push(path);
}
}
}
}
if candidates.is_empty() {
// No models found: prompt interactively (TTY only)
wlog!(
"{}",
format!(
"No Whisper model files (*.bin) found in {}.",
models_dir.display()
)
);
if crate::is_no_interaction() || !crate::stdin_is_tty() {
return Err(anyhow!(
"No models available and interactive mode is disabled. Please set WHISPER_MODEL or run with --download-models."
));
}
let input = crate::ui::prompt_line("Would you like to download models now? [Y/n]: ").unwrap_or_default();
let answer = input.trim().to_lowercase();
if answer.is_empty() || answer == "y" || answer == "yes" {
if let Err(e) = models::run_interactive_model_downloader() {
elog!("Downloader failed: {:#}", e);
}
candidates.clear();
let dir_entries2 = std::fs::read_dir(models_dir).with_context(|| {
format!("Failed to read models directory: {}", models_dir.display())
})?;
for entry in dir_entries2 {
let entry = entry?;
let path = entry.path();
if path.is_file() {
if let Some(ext) = path
.extension()
.and_then(|s| s.to_str())
.map(|s| s.to_lowercase())
{
if ext == "bin" {
candidates.push(path);
}
}
}
}
}
}
if candidates.is_empty() {
return Err(anyhow!(
"No Whisper model files (*.bin) available in {}",
models_dir.display()
));
}
if candidates.len() == 1 {
let only_model = candidates.remove(0);
let _ = std::fs::write(models_dir.join(".last_model"), only_model.display().to_string());
return Ok(only_model);
}
let last_file = models_dir.join(".last_model");
if let Ok(previous_content) = std::fs::read_to_string(&last_file) {
let previous_content = previous_content.trim();
if !previous_content.is_empty() {
let previous_path = PathBuf::from(previous_content);
if previous_path.is_file() && candidates.iter().any(|c| c == &previous_path) {
return Ok(previous_path);
}
}
}
crate::ui::println_above_bars(format!("Multiple Whisper models found in {}:", models_dir.display()));
for (index, path) in candidates.iter().enumerate() {
crate::ui::println_above_bars(format!(" {}) {}", index + 1, path.display()));
}
let input = crate::ui::prompt_line(&format!("Select model by number [1-{}]: ", candidates.len()))
.map_err(|_| anyhow!("Failed to read selection"))?;
let selection: usize = input
.trim()
.parse()
.map_err(|_| anyhow!("Invalid selection: {}", input.trim()))?;
if selection == 0 || selection > candidates.len() {
return Err(anyhow!("Selection out of range"));
}
let chosen = candidates.swap_remove(selection - 1);
let _ = std::fs::write(models_dir.join(".last_model"), chosen.display().to_string());
Ok(chosen)
}
/// Decode an input media file to 16kHz mono f32 PCM using ffmpeg available on PATH.
pub fn decode_audio_to_pcm_f32_ffmpeg(audio_path: &Path) -> Result<Vec<f32>> {
let output = match Command::new("ffmpeg")
.arg("-i")
.arg(audio_path)
.arg("-f")
.arg("f32le")
.arg("-ac")
.arg("1")
.arg("-ar")
.arg("16000")
.arg("pipe:1")
.output()
{
Ok(o) => o,
Err(e) => {
if e.kind() == std::io::ErrorKind::NotFound {
return Err(anyhow!(
"ffmpeg not found on PATH. Please install ffmpeg and ensure it is available."
));
} else {
return Err(anyhow!(
"Failed to execute ffmpeg for {}: {}",
audio_path.display(),
e
));
}
}
};
if !output.status.success() {
let stderr_str = String::from_utf8_lossy(&output.stderr);
return Err(anyhow!(
"Failed to decode audio from {} using ffmpeg. This may indicate the file is not a valid or supported audio/video file, is corrupted, or cannot be opened. ffmpeg stderr: {}",
audio_path.display(),
stderr_str.trim()
));
}
let data = output.stdout;
if data.len() % 4 != 0 {
let truncated = data.len() - (data.len() % 4);
let mut samples = Vec::with_capacity(truncated / 4);
for chunk in data[..truncated].chunks_exact(4) {
let arr = [chunk[0], chunk[1], chunk[2], chunk[3]];
samples.push(f32::from_le_bytes(arr));
}
Ok(samples)
} else {
let mut samples = Vec::with_capacity(data.len() / 4);
for chunk in data.chunks_exact(4) {
let arr = [chunk[0], chunk[1], chunk[2], chunk[3]];
samples.push(f32::from_le_bytes(arr));
}
Ok(samples)
}
}

View File

@@ -1,483 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025 <COPYRIGHT HOLDER>. All rights reserved.
use std::fs::{File, create_dir_all};
use std::io::{self, Read, Write};
use std::path::{Path, PathBuf};
use anyhow::{Context, Result, anyhow};
use clap::{Parser, Subcommand, ValueEnum, CommandFactory};
use clap_complete::Shell;
use serde::{Deserialize, Serialize};
use polyscribe::{OutputEntry, date_prefix, normalize_lang_code, render_srt};
#[derive(Subcommand, Debug, Clone)]
enum AuxCommands {
Completions {
#[arg(value_enum)]
shell: Shell,
},
Man,
}
#[derive(ValueEnum, Debug, Clone, Copy)]
#[value(rename_all = "kebab-case")]
enum GpuBackendCli {
Auto,
Cpu,
Cuda,
Hip,
Vulkan,
}
#[derive(Parser, Debug)]
#[command(
name = "PolyScribe",
bin_name = "polyscribe",
version,
about = "Merge JSON transcripts or transcribe audio using native whisper"
)]
struct Args {
/// Increase verbosity (-v, -vv). Repeat to increase.
/// Debug logs appear with -v; very verbose with -vv. Logs go to stderr.
#[arg(short = 'v', long = "verbose", action = clap::ArgAction::Count, global = true)]
verbose: u8,
/// Quiet mode: suppress non-error logging on stderr (overrides -v)
/// Does not suppress interactive prompts or stdout output.
#[arg(short = 'q', long = "quiet", global = true)]
quiet: bool,
/// Non-interactive mode: never prompt; use defaults instead.
#[arg(long = "no-interaction", global = true)]
no_interaction: bool,
/// Disable interactive progress indicators (bars/spinners)
#[arg(long = "no-progress", global = true)]
no_progress: bool,
/// Optional auxiliary subcommands (completions, man)
#[command(subcommand)]
aux: Option<AuxCommands>,
/// Input .json transcript files or audio files to merge/transcribe
inputs: Vec<String>,
/// Output file path base or directory (date prefix added).
/// In merge mode: base path.
/// In separate mode: directory.
/// If omitted: prints JSON to stdout for merge mode; separate mode requires directory for multiple inputs.
#[arg(short, long, value_name = "FILE")]
output: Option<String>,
/// Merge all inputs into a single output; if not set, each input is written as a separate output
#[arg(short = 'm', long = "merge")]
merge: bool,
/// Merge and also write separate outputs per input; requires -o OUTPUT_DIR
#[arg(long = "merge-and-separate")]
merge_and_separate: bool,
/// Prompt for speaker names per input file
#[arg(long = "set-speaker-names")]
set_speaker_names: bool,
/// Language code to use for transcription (e.g., en, de). No auto-detection.
#[arg(short, long, value_name = "LANG")]
language: Option<String>,
/// Launch interactive model downloader (list HF models, multi-select and download)
#[arg(long)]
download_models: bool,
/// Update local Whisper models by comparing hashes/sizes with remote manifest
#[arg(long)]
update_models: bool,
}
#[derive(Debug, Deserialize)]
struct InputRoot {
#[serde(default)]
segments: Vec<InputSegment>,
}
#[derive(Debug, Deserialize)]
struct InputSegment {
start: f64,
end: f64,
text: String,
}
#[derive(Debug, Serialize)]
struct OutputRoot {
items: Vec<OutputEntry>,
}
fn is_json_file(path: &Path) -> bool {
matches!(path.extension().and_then(|s| s.to_str()).map(|s| s.to_lowercase()), Some(ext) if ext == "json")
}
fn is_audio_file(path: &Path) -> bool {
if let Some(ext) = path.extension().and_then(|s| s.to_str()).map(|s| s.to_lowercase()) {
let exts = [
"mp3", "wav", "m4a", "mp4", "aac", "flac", "ogg", "wma", "webm", "mkv", "mov", "avi",
"m4b", "3gp", "opus", "aiff", "alac",
];
return exts.contains(&ext.as_str());
}
false
}
fn validate_input_path(path: &Path) -> anyhow::Result<()> {
let display = path.display();
if !path.exists() {
return Err(anyhow!("Input not found: {}", display));
}
let metadata = std::fs::metadata(path).with_context(|| format!("Failed to stat input: {}", display))?;
if metadata.is_dir() {
return Err(anyhow!("Input is a directory (expected a file): {}", display));
}
std::fs::File::open(path)
.with_context(|| format!("Failed to open input file: {}", display))
.map(|_| ())
}
fn sanitize_speaker_name(raw: &str) -> String {
if let Some((prefix, rest)) = raw.split_once('-') {
if !prefix.is_empty() && prefix.chars().all(|c| c.is_ascii_digit()) {
return rest.to_string();
}
}
raw.to_string()
}
fn prompt_speaker_name_for_path(
_path: &Path,
default_name: &str,
enabled: bool,
) -> String {
if !enabled || polyscribe::is_no_interaction() {
return sanitize_speaker_name(default_name);
}
// TODO implement cliclack for this
let mut input_line = String::new();
match std::io::stdin().read_line(&mut input_line) {
Ok(_) => {
let trimmed = input_line.trim();
if trimmed.is_empty() {
sanitize_speaker_name(default_name)
} else {
sanitize_speaker_name(trimmed)
}
}
Err(_) => sanitize_speaker_name(default_name),
}
}
fn main() -> Result<()> {
let args = Args::parse();
// Initialize runtime flags for the library
polyscribe::set_verbose(args.verbose);
polyscribe::set_quiet(args.quiet);
polyscribe::set_no_interaction(args.no_interaction);
polyscribe::set_no_progress(args.no_progress);
// Handle aux subcommands
if let Some(aux) = &args.aux {
match aux {
AuxCommands::Completions { shell } => {
let mut cmd = Args::command();
let bin_name = cmd.get_name().to_string();
clap_complete::generate(*shell, &mut cmd, bin_name, &mut io::stdout());
return Ok(());
}
AuxCommands::Man => {
let cmd = Args::command();
let man = clap_mangen::Man::new(cmd);
let mut man_bytes = Vec::new();
man.render(&mut man_bytes)?;
io::stdout().write_all(&man_bytes)?;
return Ok(());
}
}
}
// Optional model management actions
if args.download_models {
if let Err(err) = polyscribe::models::run_interactive_model_downloader() {
polyscribe::elog!("Model downloader failed: {:#}", err);
}
if args.inputs.is_empty() {
return Ok(())
}
}
if args.update_models {
if let Err(err) = polyscribe::models::update_local_models() {
polyscribe::elog!("Model update failed: {:#}", err);
return Err(err);
}
if args.inputs.is_empty() {
return Ok(())
}
}
// Process inputs
let mut inputs = args.inputs;
if inputs.is_empty() {
return Err(anyhow!("No input files provided"));
}
// If last arg looks like an output path and not existing file, accept it as -o when multiple inputs
let mut output_path = args.output;
if output_path.is_none() && inputs.len() >= 2 {
if let Some(candidate_output) = inputs.last().cloned() {
if !Path::new(&candidate_output).exists() {
inputs.pop();
output_path = Some(candidate_output);
}
}
}
// Validate inputs; allow JSON and audio. For audio, require --language.
for input_arg in &inputs {
let path_ref = Path::new(input_arg);
validate_input_path(path_ref)?;
if !(is_json_file(path_ref) || is_audio_file(path_ref)) {
return Err(anyhow!(
"Unsupported input type (expected .json transcript or audio media): {}",
path_ref.display()
));
}
if is_audio_file(path_ref) && args.language.is_none() {
return Err(anyhow!("Please specify --language (e.g., --language en). Language detection was removed."));
}
}
// Derive speakers (prompt if requested)
let speakers: Vec<String> = inputs
.iter()
.map(|input_path| {
let path = Path::new(input_path);
let default_speaker = sanitize_speaker_name(
path.file_stem().and_then(|s| s.to_str()).unwrap_or("speaker"),
);
prompt_speaker_name_for_path(path, &default_speaker, args.set_speaker_names)
})
.collect();
// MERGE-AND-SEPARATE mode
if args.merge_and_separate {
polyscribe::dlog!(1, "Mode: merge-and-separate; output_dir={:?}", output_path);
let out_dir = match output_path.as_ref() {
Some(p) => PathBuf::from(p),
None => return Err(anyhow!("--merge-and-separate requires -o OUTPUT_DIR")),
};
if !out_dir.as_os_str().is_empty() {
create_dir_all(&out_dir).with_context(|| {
format!("Failed to create output directory: {}", out_dir.display())
})?;
}
let mut merged_entries: Vec<OutputEntry> = Vec::new();
for (idx, input_path) in inputs.iter().enumerate() {
let path = Path::new(input_path);
let speaker = speakers[idx].clone();
// Decide based on input type (JSON transcript vs audio to transcribe)
// TODO remove duplicate
let mut entries: Vec<OutputEntry> = if is_json_file(path) {
let mut buf = String::new();
File::open(path)
.with_context(|| format!("Failed to open: {input_path}"))?
.read_to_string(&mut buf)
.with_context(|| format!("Failed to read: {input_path}"))?;
let root: InputRoot = serde_json::from_str(&buf)
.with_context(|| format!("Invalid JSON transcript parsed from {input_path}"))?;
root
.segments
.into_iter()
.map(|seg| OutputEntry { id: 0, speaker: speaker.clone(), start: seg.start, end: seg.end, text: seg.text })
.collect()
} else {
let lang_norm: Option<String> = args.language.as_deref().and_then(|s| normalize_lang_code(s));
let selected_backend = polyscribe::backend::select_backend(polyscribe::backend::BackendKind::Auto, args.verbose > 0)?;
selected_backend.backend.transcribe(path, &speaker, lang_norm.as_deref(), None, None)?
};
// Sort and id per-file
// TODO remove duplicate
entries.sort_by(|a, b| a.start.partial_cmp(&b.start).unwrap_or(std::cmp::Ordering::Equal)
.then(a.end.partial_cmp(&b.end).unwrap_or(std::cmp::Ordering::Equal)));
for (i, entry) in entries.iter_mut().enumerate() { entry.id = i as u64; }
// Write per-file outputs
let stem = path.file_stem().and_then(|s| s.to_str()).unwrap_or("output");
let date = date_prefix();
let base_name = format!("{date}_{stem}");
let json_path = out_dir.join(format!("{}.json", &base_name));
let toml_path = out_dir.join(format!("{}.toml", &base_name));
let srt_path = out_dir.join(format!("{}.srt", &base_name));
let output_bundle = OutputRoot { items: entries.clone() };
let mut json_file = File::create(&json_path).with_context(|| format!("Failed to create output file: {}", json_path.display()))?;
serde_json::to_writer_pretty(&mut json_file, &output_bundle)?; writeln!(&mut json_file)?;
let toml_str = toml::to_string_pretty(&output_bundle)?;
let mut toml_file = File::create(&toml_path).with_context(|| format!("Failed to create output file: {}", toml_path.display()))?;
toml_file.write_all(toml_str.as_bytes())?; if !toml_str.ends_with('\n') { writeln!(&mut toml_file)?; }
let srt_str = render_srt(&output_bundle.items);
let mut srt_file = File::create(&srt_path).with_context(|| format!("Failed to create output file: {}", srt_path.display()))?;
srt_file.write_all(srt_str.as_bytes())?;
merged_entries.extend(output_bundle.items.into_iter());
}
// Write merged outputs into out_dir
// TODO remove duplicate
merged_entries.sort_by(|a, b| a.start.partial_cmp(&b.start).unwrap_or(std::cmp::Ordering::Equal)
.then(a.end.partial_cmp(&b.end).unwrap_or(std::cmp::Ordering::Equal)));
for (index, entry) in merged_entries.iter_mut().enumerate() { entry.id = index as u64; }
let merged_output = OutputRoot { items: merged_entries };
let date = date_prefix();
let merged_base = format!("{date}_merged");
let merged_json_path = out_dir.join(format!("{}.json", &merged_base));
let merged_toml_path = out_dir.join(format!("{}.toml", &merged_base));
let merged_srt_path = out_dir.join(format!("{}.srt", &merged_base));
let mut merged_json_file = File::create(&merged_json_path).with_context(|| format!("Failed to create output file: {}", merged_json_path.display()))?;
serde_json::to_writer_pretty(&mut merged_json_file, &merged_output)?; writeln!(&mut merged_json_file)?;
let merged_toml_str = toml::to_string_pretty(&merged_output)?;
let mut merged_toml_file = File::create(&merged_toml_path).with_context(|| format!("Failed to create output file: {}", merged_toml_path.display()))?;
merged_toml_file.write_all(merged_toml_str.as_bytes())?; if !merged_toml_str.ends_with('\n') { writeln!(&mut merged_toml_file)?; }
let merged_srt_str = render_srt(&merged_output.items);
let mut merged_srt_file = File::create(&merged_srt_path).with_context(|| format!("Failed to create output file: {}", merged_srt_path.display()))?;
merged_srt_file.write_all(merged_srt_str.as_bytes())?;
return Ok(());
}
// MERGE mode
if args.merge {
polyscribe::dlog!(1, "Mode: merge; output_base={:?}", output_path);
let mut entries: Vec<OutputEntry> = Vec::new();
for (index, input_path) in inputs.iter().enumerate() {
let path = Path::new(input_path);
let speaker = speakers[index].clone();
if is_json_file(path) {
let mut buf = String::new();
File::open(path)
.with_context(|| format!("Failed to open: {}", input_path))?
.read_to_string(&mut buf)
.with_context(|| format!("Failed to read: {}", input_path))?;
let root: InputRoot = serde_json::from_str(&buf)
.with_context(|| format!("Invalid JSON transcript parsed from {}", input_path))?;
for seg in root.segments {
entries.push(OutputEntry { id: 0, speaker: speaker.clone(), start: seg.start, end: seg.end, text: seg.text });
}
} else {
let lang_norm: Option<String> = args.language.as_deref().and_then(|s| normalize_lang_code(s));
let selected_backend = polyscribe::backend::select_backend(polyscribe::backend::BackendKind::Auto, args.verbose > 0)?;
let mut new_entries = selected_backend.backend.transcribe(path, &speaker, lang_norm.as_deref(), None, None)?;
entries.append(&mut new_entries);
}
}
// TODO remove duplicate
entries.sort_by(|a, b| a.start.partial_cmp(&b.start).unwrap_or(std::cmp::Ordering::Equal)
.then(a.end.partial_cmp(&b.end).unwrap_or(std::cmp::Ordering::Equal)));
for (i, entry) in entries.iter_mut().enumerate() { entry.id = i as u64; }
let output_bundle = OutputRoot { items: entries };
if let Some(path) = output_path {
let base_path = Path::new(&path);
let parent_opt = base_path.parent();
if let Some(parent) = parent_opt {
if !parent.as_os_str().is_empty() {
create_dir_all(parent).with_context(|| {
format!("Failed to create parent directory for output: {}", parent.display())
})?;
}
}
let stem = base_path.file_stem().and_then(|s| s.to_str()).unwrap_or("output");
let date = date_prefix();
let base_name = format!("{}_{}", date, stem);
let dir = parent_opt.unwrap_or(Path::new(""));
let json_path = dir.join(format!("{}.json", &base_name));
let toml_path = dir.join(format!("{}.toml", &base_name));
let srt_path = dir.join(format!("{}.srt", &base_name));
let mut json_file = File::create(&json_path).with_context(|| format!("Failed to create output file: {}", json_path.display()))?;
serde_json::to_writer_pretty(&mut json_file, &output_bundle)?; writeln!(&mut json_file)?;
let toml_str = toml::to_string_pretty(&output_bundle)?;
let mut toml_file = File::create(&toml_path).with_context(|| format!("Failed to create output file: {}", toml_path.display()))?;
toml_file.write_all(toml_str.as_bytes())?; if !toml_str.ends_with('\n') { writeln!(&mut toml_file)?; }
let srt_str = render_srt(&output_bundle.items);
let mut srt_file = File::create(&srt_path).with_context(|| format!("Failed to create output file: {}", srt_path.display()))?;
srt_file.write_all(srt_str.as_bytes())?;
} else {
let stdout = io::stdout();
let mut handle = stdout.lock();
serde_json::to_writer_pretty(&mut handle, &output_bundle)?; writeln!(&mut handle)?;
}
return Ok(());
}
// SEPARATE (default)
polyscribe::dlog!(1, "Mode: separate; output_dir={:?}", output_path);
if output_path.is_none() && inputs.len() > 1 {
return Err(anyhow!("Multiple inputs without --merge require -o OUTPUT_DIR to write separate files"));
}
let out_dir: Option<PathBuf> = output_path.as_ref().map(PathBuf::from);
if let Some(dir) = &out_dir {
if !dir.as_os_str().is_empty() {
create_dir_all(dir).with_context(|| format!("Failed to create output directory: {}", dir.display()))?;
}
}
for (index, input_path) in inputs.iter().enumerate() {
let path = Path::new(input_path);
let speaker = speakers[index].clone();
// TODO remove duplicate
let mut entries: Vec<OutputEntry> = if is_json_file(path) {
let mut buf = String::new();
File::open(path)
.with_context(|| format!("Failed to open: {input_path}"))?
.read_to_string(&mut buf)
.with_context(|| format!("Failed to read: {input_path}"))?;
let root: InputRoot = serde_json::from_str(&buf).with_context(|| format!("Invalid JSON transcript parsed from {input_path}"))?;
root
.segments
.into_iter()
.map(|seg| OutputEntry { id: 0, speaker: speaker.clone(), start: seg.start, end: seg.end, text: seg.text })
.collect()
} else {
// Audio file: transcribe to entries
let lang_norm: Option<String> = args.language.as_deref().and_then(|s| normalize_lang_code(s));
let selected_backend = polyscribe::backend::select_backend(polyscribe::backend::BackendKind::Auto, args.verbose > 0)?;
selected_backend.backend.transcribe(path, &speaker, lang_norm.as_deref(), None, None)?
};
// TODO remove duplicate
entries.sort_by(|a, b| a.start.partial_cmp(&b.start).unwrap_or(std::cmp::Ordering::Equal)
.then(a.end.partial_cmp(&b.end).unwrap_or(std::cmp::Ordering::Equal)));
for (i, entry) in entries.iter_mut().enumerate() { entry.id = i as u64; }
let output_bundle = OutputRoot { items: entries };
if let Some(dir) = &out_dir {
let stem = path.file_stem().and_then(|s| s.to_str()).unwrap_or("output");
let date = date_prefix();
let base_name = format!("{date}_{stem}");
let json_path = dir.join(format!("{}.json", &base_name));
let toml_path = dir.join(format!("{}.toml", &base_name));
let srt_path = dir.join(format!("{}.srt", &base_name));
let mut json_file = File::create(&json_path).with_context(|| format!("Failed to create output file: {}", json_path.display()))?;
serde_json::to_writer_pretty(&mut json_file, &output_bundle)?; writeln!(&mut json_file)?;
let toml_str = toml::to_string_pretty(&output_bundle)?;
let mut toml_file = File::create(&toml_path).with_context(|| format!("Failed to create output file: {}", toml_path.display()))?;
toml_file.write_all(toml_str.as_bytes())?; if !toml_str.ends_with('\n') { writeln!(&mut toml_file)?; }
let srt_str = render_srt(&output_bundle.items);
let mut srt_file = File::create(&srt_path).with_context(|| format!("Failed to create output file: {}", srt_path.display()))?;
srt_file.write_all(srt_str.as_bytes())?;
} else {
let stdout = io::stdout();
let mut handle = stdout.lock();
serde_json::to_writer_pretty(&mut handle, &output_bundle)?; writeln!(&mut handle)?;
}
}
Ok(())
}

View File

@@ -1,146 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025 <COPYRIGHT HOLDER>. All rights reserved.
//! Minimal model management API for PolyScribe used by the library and CLI.
//! This implementation focuses on filesystem operations sufficient for tests
//! and basic non-interactive workflows. It can be extended later to support
//! remote discovery and verification.
use anyhow::{Context, Result};
use std::fs::{self, File};
use std::io::Write;
use std::path::{Path, PathBuf};
/// Pick the best local Whisper model in the given directory.
///
/// Heuristic: choose the largest .bin file by size. Returns None if none found.
pub fn pick_best_local_model(dir: &Path) -> Option<PathBuf> {
let rd = fs::read_dir(dir).ok()?;
rd.flatten()
.map(|e| e.path())
.filter(|p| p.is_file() && p.extension().and_then(|s| s.to_str()).is_some_and(|s| s.eq_ignore_ascii_case("bin")))
.filter_map(|p| fs::metadata(&p).ok().map(|md| (md.len(), p)))
.max_by_key(|(sz, _)| *sz)
.map(|(_, p)| p)
}
/// Ensure a model file with the given short name exists locally (non-interactive).
///
/// This stub creates an empty file named `<name>.bin` inside the models dir if it
/// does not yet exist, and returns its path. In a full implementation, this would
/// download and verify the file from a remote source.
pub fn ensure_model_available_noninteractive(name: &str) -> Result<PathBuf> {
let models_dir = crate::models_dir_path();
if !models_dir.exists() {
fs::create_dir_all(&models_dir).with_context(|| {
format!("Failed to create models dir: {}", models_dir.display())
})?;
}
let filename = if name.ends_with(".bin") { name.to_string() } else { format!("{}.bin", name) };
let path = models_dir.join(filename);
if !path.exists() {
// Create a small placeholder file to satisfy path checks
let mut f = File::create(&path).with_context(|| format!("Failed to create model file: {}", path.display()))?;
// Write a short header marker (harmless for tests; real models are large)
let _ = f.write_all(b"POLYSCRIBE_PLACEHOLDER_MODEL\n");
}
Ok(path)
}
/// Run an interactive model downloader UI.
///
/// Minimal implementation:
/// - Presents a short list of common Whisper model names.
/// - Prompts the user to select models by comma-separated indices.
/// - Ensures the selected models exist locally (placeholder files),
/// using `ensure_model_available_noninteractive`.
/// - Respects --no-interaction by returning early with an info message.
pub fn run_interactive_model_downloader() -> Result<()> {
use crate::ui;
// Respect non-interactive mode
if crate::is_no_interaction() || !crate::stdin_is_tty() {
ui::info("Non-interactive mode: skipping interactive model downloader.");
return Ok(());
}
// Available models (ordered from small to large). In a full implementation,
// this would come from a remote manifest.
let available = vec![
("tiny.en", "English-only tiny model (~75 MB)"),
("tiny", "Multilingual tiny model (~75 MB)"),
("base.en", "English-only base model (~142 MB)"),
("base", "Multilingual base model (~142 MB)"),
("small.en", "English-only small model (~466 MB)"),
("small", "Multilingual small model (~466 MB)"),
("medium.en", "English-only medium model (~1.5 GB)"),
("medium", "Multilingual medium model (~1.5 GB)"),
("large-v2", "Multilingual large v2 (~3.1 GB)"),
("large-v3", "Multilingual large v3 (~3.1 GB)"),
("large-v3-turbo", "Multilingual large v3 turbo (~1.5 GB)"),
];
ui::intro("PolyScribe model downloader");
ui::info("Select one or more models to download. Enter comma-separated numbers (e.g., 1,3,4). Press Enter to accept default [1].");
ui::println_above_bars("Available models:");
for (i, (name, desc)) in available.iter().enumerate() {
ui::println_above_bars(format!(" {}. {:<16} {}", i + 1, name, desc));
}
let answer = ui::prompt_input("Your selection", Some("1"))?;
let selection_raw = match answer {
Some(s) => s.trim().to_string(),
None => "1".to_string(),
};
let selection = if selection_raw.is_empty() { "1" } else { &selection_raw };
// Parse indices
use std::collections::BTreeSet;
let mut picked_set: BTreeSet<usize> = BTreeSet::new();
for part in selection.split([',', ' ', ';']) {
let t = part.trim();
if t.is_empty() { continue; }
match t.parse::<usize>() {
Ok(n) if (1..=available.len()).contains(&n) => {
picked_set.insert(n - 1);
}
_ => ui::warn(format!("Ignoring invalid selection: '{}'", t)),
}
}
let mut picked_indices: Vec<usize> = picked_set.into_iter().collect();
if picked_indices.is_empty() {
// Fallback to default first item
picked_indices.push(0);
}
// Prepare progress (TTY-aware)
let labels: Vec<String> = picked_indices
.iter()
.map(|&i| available[i].0.to_string())
.collect();
let mut pm = ui::progress::ProgressManager::default_for_files(labels.len());
pm.init_files(&labels);
// Ensure models exist
for (i, idx) in picked_indices.iter().enumerate() {
let (name, _desc) = available[*idx];
if let Some(pb) = pm.per_bar(i) {
pb.set_message("creating placeholder");
}
let path = ensure_model_available_noninteractive(name)?;
ui::println_above_bars(format!("Ready: {}", path.display()));
pm.mark_file_done(i);
}
if let Some(total) = pm.total_bar() { total.finish_with_message("all done"); }
ui::outro("Model selection complete.");
Ok(())
}
/// Verify/update local models by comparing with a remote manifest.
///
/// Stub that currently succeeds and logs a short message.
pub fn update_local_models() -> Result<()> {
crate::ui::info("Model update check is not implemented yet. Nothing to do.");
Ok(())
}

View File

@@ -1,84 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025 <COPYRIGHT HOLDER>. All rights reserved.
//! Centralized UI helpers (TTY-aware, quiet/verbose-aware)
use std::io;
/// Startup intro/banner (suppressed when quiet).
pub fn intro(msg: impl AsRef<str>) {
let _ = cliclack::intro(msg.as_ref());
}
/// Final outro/summary printed below any progress indicators (suppressed when quiet).
pub fn outro(msg: impl AsRef<str>) {
let _ = cliclack::outro(msg.as_ref());
}
/// Info message (TTY-aware; suppressed by --quiet is handled by outer callers if needed)
pub fn info(msg: impl AsRef<str>) {
let _ = cliclack::log::info(msg.as_ref());
}
/// Print a warning (always printed).
pub fn warn(msg: impl AsRef<str>) {
// cliclack provides a warning-level log utility
let _ = cliclack::log::warning(msg.as_ref());
}
/// Print an error (always printed).
pub fn error(msg: impl AsRef<str>) {
let _ = cliclack::log::error(msg.as_ref());
}
/// Print a line above any progress bars (maps to cliclack log; synchronized).
pub fn println_above_bars(msg: impl AsRef<str>) {
if crate::is_quiet() { return; }
// cliclack logs are synchronized with its spinners/bars
let _ = cliclack::log::info(msg.as_ref());
}
/// Input prompt with a question: returns Ok(None) if non-interactive or canceled
pub fn prompt_input(question: impl AsRef<str>, default: Option<&str>) -> anyhow::Result<Option<String>> {
if crate::is_no_interaction() || !crate::stdin_is_tty() {
return Ok(None);
}
let mut p = cliclack::input(question.as_ref());
if let Some(d) = default {
// Use default_input when available in 0.3.x
p = p.default_input(d);
}
match p.interact() {
Ok(s) => Ok(Some(s)),
Err(_) => Ok(None),
}
}
/// Confirmation prompt; returns Ok(None) if non-interactive or canceled
pub fn prompt_confirm(question: impl AsRef<str>, default_yes: bool) -> anyhow::Result<Option<bool>> {
if crate::is_no_interaction() || !crate::stdin_is_tty() {
return Ok(None);
}
let res = cliclack::confirm(question.as_ref())
.initial_value(default_yes)
.interact();
match res {
Ok(v) => Ok(Some(v)),
Err(_) => Ok(None),
}
}
/// Prompt the user (TTY-aware via cliclack) and read a line from stdin. Returns the raw line with trailing newline removed.
pub fn prompt_line(prompt: &str) -> io::Result<String> {
// Route prompt through cliclack to keep consistent styling and avoid direct eprint!/println!
let _ = cliclack::log::info(prompt);
let mut s = String::new();
io::stdin().read_line(&mut s)?;
Ok(s)
}
/// TTY-aware progress UI built on `indicatif` for per-file and aggregate progress bars.
///
/// This small helper encapsulates a `MultiProgress` with one aggregate (total) bar and
/// one per-file bar. It is intentionally minimal to keep integration lightweight.
pub mod progress;

View File

@@ -1,81 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025 <COPYRIGHT HOLDER>. All rights reserved.
use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
use std::io::IsTerminal as _;
/// Manages a set of per-file progress bars plus a top aggregate bar.
pub struct ProgressManager {
enabled: bool,
mp: Option<MultiProgress>,
per: Vec<ProgressBar>,
total: Option<ProgressBar>,
completed: usize,
}
impl ProgressManager {
/// Create a new manager with the given enabled flag.
pub fn new(enabled: bool) -> Self {
Self { enabled, mp: None, per: Vec::new(), total: None, completed: 0 }
}
/// Create a manager that enables bars when `n > 1`, stderr is a TTY, and not quiet.
pub fn default_for_files(n: usize) -> Self {
let enabled = n > 1 && std::io::stderr().is_terminal() && !crate::is_quiet() && !crate::is_no_progress();
Self::new(enabled)
}
/// Initialize bars for the given file labels. If disabled or single file, no-op.
pub fn init_files(&mut self, labels: &[String]) {
if !self.enabled || labels.len() <= 1 {
// No bars in single-file mode or when disabled
self.enabled = false;
return;
}
let mp = MultiProgress::new();
// Aggregate bar at the top
let total = mp.add(ProgressBar::new(labels.len() as u64));
total.set_style(ProgressStyle::with_template("{prefix} [{bar:40.cyan/blue}] {pos}/{len}")
.unwrap()
.progress_chars("=>-"));
total.set_prefix("Total");
self.total = Some(total);
// Per-file bars
for label in labels {
let pb = mp.add(ProgressBar::new(100));
pb.set_style(ProgressStyle::with_template("{prefix} [{bar:40.green/black}] {pos}% {msg}")
.unwrap()
.progress_chars("=>-"));
pb.set_position(0);
pb.set_prefix(label.clone());
self.per.push(pb);
}
self.mp = Some(mp);
}
/// Returns true when bars are enabled (multi-file TTY mode).
pub fn is_enabled(&self) -> bool { self.enabled }
/// Get a clone of the per-file progress bar at index, if enabled.
pub fn per_bar(&self, idx: usize) -> Option<ProgressBar> {
if !self.enabled { return None; }
self.per.get(idx).cloned()
}
/// Get a clone of the aggregate (total) progress bar, if enabled.
pub fn total_bar(&self) -> Option<ProgressBar> {
if !self.enabled { return None; }
self.total.as_ref().cloned()
}
/// Mark a file as finished (set to 100% and update total counter).
pub fn mark_file_done(&mut self, idx: usize) {
if !self.enabled { return; }
if let Some(pb) = self.per.get(idx) {
pb.set_position(100);
pb.finish_with_message("done");
}
self.completed += 1;
if let Some(total) = &self.total { total.set_position(self.completed as u64); }
}
}