[feat] enhance CLI flags with --quiet
and --no-interaction
; update logging to respect verbosity and quiet modes; refactor log macros and add related tests
This commit is contained in:
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -1078,6 +1078,7 @@ dependencies = [
|
||||
"clap",
|
||||
"clap_complete",
|
||||
"clap_mangen",
|
||||
"libc",
|
||||
"libloading",
|
||||
"reqwest",
|
||||
"serde",
|
||||
|
@@ -27,6 +27,7 @@ sha2 = "0.10"
|
||||
# whisper-rs is always used (CPU-only by default); GPU features map onto it
|
||||
whisper-rs = { git = "https://github.com/tazz4843/whisper-rs" }
|
||||
libloading = { version = "0.8" }
|
||||
libc = "0.2"
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3"
|
||||
|
4
TODO.md
4
TODO.md
@@ -13,8 +13,8 @@
|
||||
- [x] add support for video files -> use ffmpeg to extract audio
|
||||
- [x] detect gpus and use them
|
||||
- [x] refactor project
|
||||
- add error handling
|
||||
- add verbose flag (--verbose | -v) + add logging
|
||||
- [x] add error handling
|
||||
- [x] add verbose flag (--verbose | -v) + add logging
|
||||
- add documentation
|
||||
- refactor project
|
||||
- package into executable
|
||||
|
@@ -263,8 +263,8 @@ pub fn select_backend(requested: BackendKind, verbose: bool) -> Result<Selection
|
||||
};
|
||||
|
||||
if verbose {
|
||||
eprintln!("INFO: Detected backends: {:?}", detected);
|
||||
eprintln!("INFO: Selected backend: {:?}", chosen);
|
||||
crate::dlog!(1, "Detected backends: {:?}", detected);
|
||||
crate::dlog!(1, "Selected backend: {:?}", chosen);
|
||||
}
|
||||
|
||||
Ok(SelectionResult {
|
||||
@@ -301,12 +301,25 @@ pub(crate) fn transcribe_with_whisper_rs(
|
||||
.to_str()
|
||||
.ok_or_else(|| anyhow!("Model path not valid UTF-8: {}", model.display()))?;
|
||||
|
||||
let cparams = whisper_rs::WhisperContextParameters::default();
|
||||
let ctx = whisper_rs::WhisperContext::new_with_params(model_str, cparams)
|
||||
.with_context(|| format!("Failed to load Whisper model at {}", model.display()))?;
|
||||
let mut state = ctx
|
||||
.create_state()
|
||||
.map_err(|e| anyhow!("Failed to create Whisper state: {:?}", e))?;
|
||||
// Try to reduce native library logging via environment variables when not super-verbose.
|
||||
if crate::verbose_level() < 2 {
|
||||
// These env vars are recognized by ggml/whisper in many builds; harmless if unknown.
|
||||
unsafe {
|
||||
std::env::set_var("GGML_LOG_LEVEL", "0");
|
||||
std::env::set_var("WHISPER_PRINT_PROGRESS", "0");
|
||||
}
|
||||
}
|
||||
|
||||
// Suppress stderr from whisper/ggml during model load and inference when quiet and not verbose.
|
||||
let (ctx, mut state) = crate::with_suppressed_stderr(|| {
|
||||
let cparams = whisper_rs::WhisperContextParameters::default();
|
||||
let ctx = whisper_rs::WhisperContext::new_with_params(model_str, cparams)
|
||||
.with_context(|| format!("Failed to load Whisper model at {}", model.display()))?;
|
||||
let state = ctx
|
||||
.create_state()
|
||||
.map_err(|e| anyhow!("Failed to create Whisper state: {:?}", e))?;
|
||||
Ok::<_, anyhow::Error>((ctx, state))
|
||||
})?;
|
||||
|
||||
let mut params =
|
||||
whisper_rs::FullParams::new(whisper_rs::SamplingStrategy::Greedy { best_of: 1 });
|
||||
@@ -319,9 +332,11 @@ pub(crate) fn transcribe_with_whisper_rs(
|
||||
params.set_language(Some(lang));
|
||||
}
|
||||
|
||||
state
|
||||
.full(params, &pcm)
|
||||
.map_err(|e| anyhow!("Whisper full() failed: {:?}", e))?;
|
||||
crate::with_suppressed_stderr(|| {
|
||||
state
|
||||
.full(params, &pcm)
|
||||
.map_err(|e| anyhow!("Whisper full() failed: {:?}", e))
|
||||
})?;
|
||||
|
||||
let num_segments = state
|
||||
.full_n_segments()
|
||||
|
210
src/lib.rs
210
src/lib.rs
@@ -10,6 +10,182 @@
|
||||
//! This crate exposes the reusable parts of the PolyScribe CLI as a library.
|
||||
//! The binary entry point (main.rs) remains a thin CLI wrapper.
|
||||
|
||||
use std::sync::atomic::{AtomicBool, AtomicU8, Ordering};
|
||||
|
||||
// Global runtime flags
|
||||
static QUIET: AtomicBool = AtomicBool::new(false);
|
||||
static NO_INTERACTION: AtomicBool = AtomicBool::new(false);
|
||||
static VERBOSE: AtomicU8 = AtomicU8::new(0);
|
||||
|
||||
/// Set quiet mode: when true, non-interactive logs should be suppressed.
|
||||
pub fn set_quiet(q: bool) {
|
||||
QUIET.store(q, Ordering::Relaxed);
|
||||
}
|
||||
/// Return current quiet mode state.
|
||||
pub fn is_quiet() -> bool {
|
||||
QUIET.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
/// Set non-interactive mode: when true, interactive prompts must be skipped.
|
||||
pub fn set_no_interaction(b: bool) {
|
||||
NO_INTERACTION.store(b, Ordering::Relaxed);
|
||||
}
|
||||
/// Return current non-interactive state.
|
||||
pub fn is_no_interaction() -> bool {
|
||||
NO_INTERACTION.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
/// Set verbose level (0 = normal, 1 = verbose, 2 = super-verbose)
|
||||
pub fn set_verbose(level: u8) {
|
||||
VERBOSE.store(level, Ordering::Relaxed);
|
||||
}
|
||||
/// Get current verbose level.
|
||||
pub fn verbose_level() -> u8 {
|
||||
VERBOSE.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
/// Check whether stdin is connected to a TTY. Used to avoid blocking prompts when not interactive.
|
||||
pub fn stdin_is_tty() -> bool {
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::io::AsRawFd;
|
||||
unsafe { libc::isatty(std::io::stdin().as_raw_fd()) == 1 }
|
||||
}
|
||||
#[cfg(not(unix))]
|
||||
{
|
||||
// Best-effort on non-Unix: assume TTY when not redirected by common CI vars
|
||||
// This avoids introducing a new dependency for atty.
|
||||
!(std::env::var("CI").is_ok() || std::env::var("GITHUB_ACTIONS").is_ok())
|
||||
}
|
||||
}
|
||||
|
||||
/// A guard that temporarily redirects stderr to /dev/null on Unix when quiet mode is active.
|
||||
/// No-op on non-Unix or when quiet is disabled. Restores stderr on drop.
|
||||
pub struct StderrSilencer {
|
||||
#[cfg(unix)]
|
||||
old_stderr_fd: i32,
|
||||
#[cfg(unix)]
|
||||
devnull_fd: i32,
|
||||
active: bool,
|
||||
}
|
||||
|
||||
impl StderrSilencer {
|
||||
/// Activate stderr silencing if quiet is set and on Unix; otherwise returns a no-op guard.
|
||||
pub fn activate_if_quiet() -> Self {
|
||||
if !is_quiet() {
|
||||
return Self { active: false, #[cfg(unix)] old_stderr_fd: -1, #[cfg(unix)] devnull_fd: -1 };
|
||||
}
|
||||
Self::activate()
|
||||
}
|
||||
|
||||
/// Activate stderr silencing unconditionally (used internally); no-op on non-Unix.
|
||||
pub fn activate() -> Self {
|
||||
#[cfg(unix)]
|
||||
unsafe {
|
||||
// Duplicate current stderr (fd 2)
|
||||
let old_fd = dup(2);
|
||||
if old_fd < 0 {
|
||||
return Self { active: false, old_stderr_fd: -1, devnull_fd: -1 };
|
||||
}
|
||||
// Open /dev/null for writing
|
||||
let devnull_cstr = std::ffi::CString::new("/dev/null").unwrap();
|
||||
let dn = open(devnull_cstr.as_ptr(), O_WRONLY);
|
||||
if dn < 0 {
|
||||
// failed to open devnull; restore and bail
|
||||
close(old_fd);
|
||||
return Self { active: false, old_stderr_fd: -1, devnull_fd: -1 };
|
||||
}
|
||||
// Redirect fd 2 to devnull
|
||||
if dup2(dn, 2) < 0 {
|
||||
close(dn);
|
||||
close(old_fd);
|
||||
return Self { active: false, old_stderr_fd: -1, devnull_fd: -1 };
|
||||
}
|
||||
Self { active: true, old_stderr_fd: old_fd, devnull_fd: dn }
|
||||
}
|
||||
#[cfg(not(unix))]
|
||||
{
|
||||
Self { active: false }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for StderrSilencer {
|
||||
fn drop(&mut self) {
|
||||
if !self.active {
|
||||
return;
|
||||
}
|
||||
#[cfg(unix)]
|
||||
unsafe {
|
||||
// Restore old stderr and close devnull and old copies
|
||||
let _ = dup2(self.old_stderr_fd, 2);
|
||||
let _ = close(self.devnull_fd);
|
||||
let _ = close(self.old_stderr_fd);
|
||||
}
|
||||
self.active = false;
|
||||
}
|
||||
}
|
||||
|
||||
/// Run a closure while temporarily suppressing stderr on Unix when appropriate.
|
||||
/// On Windows/non-Unix, this is a no-op wrapper.
|
||||
/// This helper uses RAII + panic catching to ensure restoration before resuming panic.
|
||||
pub fn with_suppressed_stderr<F, T>(f: F) -> T
|
||||
where
|
||||
F: FnOnce() -> T,
|
||||
{
|
||||
// Suppress noisy native logs unless super-verbose (-vv) is enabled.
|
||||
if verbose_level() < 2 {
|
||||
let res = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
|
||||
let _guard = StderrSilencer::activate();
|
||||
f()
|
||||
}));
|
||||
match res {
|
||||
Ok(v) => v,
|
||||
Err(p) => std::panic::resume_unwind(p),
|
||||
}
|
||||
} else {
|
||||
f()
|
||||
}
|
||||
}
|
||||
|
||||
/// Logging macros and helpers
|
||||
/// Log an error to stderr (always printed). Recommended for user-visible errors.
|
||||
#[macro_export]
|
||||
macro_rules! elog {
|
||||
($($arg:tt)*) => {{
|
||||
eprintln!("ERROR: {}", format!($($arg)*));
|
||||
}}
|
||||
}
|
||||
/// Log a warning to stderr (should generally be printed even in quiet mode).
|
||||
#[macro_export]
|
||||
macro_rules! wlog {
|
||||
($($arg:tt)*) => {{
|
||||
eprintln!("WARN: {}", format!($($arg)*));
|
||||
}}
|
||||
}
|
||||
/// Log an informational line to stderr unless quiet mode is enabled.
|
||||
#[macro_export]
|
||||
macro_rules! ilog {
|
||||
($($arg:tt)*) => {{
|
||||
if !$crate::is_quiet() {
|
||||
eprintln!("INFO: {}", format!($($arg)*));
|
||||
}
|
||||
}}
|
||||
}
|
||||
/// Log a debug/trace line when verbose level is at least the given level (u8).
|
||||
#[macro_export]
|
||||
macro_rules! dlog {
|
||||
($lvl:expr, $($arg:tt)*) => {{
|
||||
if !$crate::is_quiet() && $crate::verbose_level() >= $lvl { eprintln!("DEBUG{}: {}", $lvl, format!($($arg)*)); }
|
||||
}}
|
||||
}
|
||||
|
||||
/// Backward-compatibility: map old qlog! to ilog!
|
||||
#[macro_export]
|
||||
macro_rules! qlog {
|
||||
($($arg:tt)*) => {{ $crate::ilog!($($arg)*); }}
|
||||
}
|
||||
|
||||
use anyhow::{Context, Result, anyhow};
|
||||
use chrono::Local;
|
||||
use std::env;
|
||||
@@ -18,6 +194,9 @@ use std::io::{self, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::Command;
|
||||
|
||||
#[cfg(unix)]
|
||||
use libc::{close, dup, dup2, open, O_WRONLY};
|
||||
|
||||
/// Re-export backend module (GPU/CPU selection and transcription).
|
||||
pub mod backend;
|
||||
/// Re-export models module (model listing/downloading/updating).
|
||||
@@ -196,6 +375,20 @@ pub fn find_model_file() -> Result<PathBuf> {
|
||||
}
|
||||
}
|
||||
|
||||
// Non-interactive mode: automatic selection and optional download
|
||||
if crate::is_no_interaction() {
|
||||
if let Some(local) = crate::models::pick_best_local_model(models_dir) {
|
||||
let _ = std::fs::write(models_dir.join(".last_model"), local.display().to_string());
|
||||
return Ok(local);
|
||||
} else {
|
||||
ilog!("No local models found; downloading large-v3-turbo-q8_0...");
|
||||
let path = crate::models::ensure_model_available_noninteractive("large-v3-turbo-q8_0")
|
||||
.with_context(|| "Failed to download required model 'large-v3-turbo-q8_0'")?;
|
||||
let _ = std::fs::write(models_dir.join(".last_model"), path.display().to_string());
|
||||
return Ok(path);
|
||||
}
|
||||
}
|
||||
|
||||
let mut candidates: Vec<PathBuf> = Vec::new();
|
||||
let rd = std::fs::read_dir(models_dir)
|
||||
.with_context(|| format!("Failed to read models directory: {}", models_dir.display()))?;
|
||||
@@ -216,10 +409,16 @@ pub fn find_model_file() -> Result<PathBuf> {
|
||||
}
|
||||
|
||||
if candidates.is_empty() {
|
||||
eprintln!(
|
||||
"WARN: No Whisper model files (*.bin) found in {}.",
|
||||
// No models found: prompt interactively (TTY only)
|
||||
wlog!("{}", format!(
|
||||
"No Whisper model files (*.bin) found in {}.",
|
||||
models_dir.display()
|
||||
);
|
||||
));
|
||||
if crate::is_no_interaction() || !crate::stdin_is_tty() {
|
||||
return Err(anyhow!(
|
||||
"No models available and interactive mode is disabled. Please set WHISPER_MODEL or run with --download-models."
|
||||
));
|
||||
}
|
||||
eprint!("Would you like to download models now? [Y/n]: ");
|
||||
io::stderr().flush().ok();
|
||||
let mut input = String::new();
|
||||
@@ -227,7 +426,7 @@ pub fn find_model_file() -> Result<PathBuf> {
|
||||
let ans = input.trim().to_lowercase();
|
||||
if ans.is_empty() || ans == "y" || ans == "yes" {
|
||||
if let Err(e) = models::run_interactive_model_downloader() {
|
||||
eprintln!("ERROR: Downloader failed: {:#}", e);
|
||||
elog!("Downloader failed: {:#}", e);
|
||||
}
|
||||
candidates.clear();
|
||||
let rd2 = std::fs::read_dir(models_dir).with_context(|| {
|
||||
@@ -271,7 +470,8 @@ pub fn find_model_file() -> Result<PathBuf> {
|
||||
let p = PathBuf::from(prev);
|
||||
if p.is_file() {
|
||||
if candidates.iter().any(|c| c == &p) {
|
||||
eprintln!("INFO: Using previously selected model: {}", p.display());
|
||||
// Previously printed: INFO about using previously selected model.
|
||||
// Suppress this to avoid duplicate/noisy messages; per-file progress will be shown elsewhere.
|
||||
return Ok(p);
|
||||
}
|
||||
}
|
||||
|
181
src/main.rs
181
src/main.rs
@@ -1,38 +1,17 @@
|
||||
use std::fs::{File, create_dir_all};
|
||||
use std::io::{self, Read, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
#[cfg(unix)]
|
||||
use std::os::unix::io::AsRawFd;
|
||||
|
||||
use anyhow::{Context, Result, anyhow};
|
||||
use clap::{Parser, Subcommand};
|
||||
use clap_complete::Shell;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::sync::atomic::{AtomicU8, Ordering};
|
||||
|
||||
// whisper-rs is used from the library crate
|
||||
use polyscribe::backend::{BackendKind, select_backend};
|
||||
|
||||
static VERBOSE: AtomicU8 = AtomicU8::new(0);
|
||||
|
||||
macro_rules! vlog {
|
||||
($lvl:expr, $($arg:tt)*) => {
|
||||
let v = VERBOSE.load(Ordering::Relaxed);
|
||||
let needed = match $lvl { 0u8 => true, 1u8 => v >= 1, 2u8 => v >= 2, _ => true };
|
||||
if needed { eprintln!("INFO: {}", format!($($arg)*)); }
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(unused_macros)]
|
||||
macro_rules! warnlog {
|
||||
($($arg:tt)*) => {
|
||||
eprintln!("WARN: {}", format!($($arg)*));
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! errorlog {
|
||||
($($arg:tt)*) => {
|
||||
eprintln!("ERROR: {}", format!($($arg)*));
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Subcommand, Debug, Clone)]
|
||||
enum AuxCommands {
|
||||
@@ -64,10 +43,18 @@ enum GpuBackendCli {
|
||||
about = "Merge JSON transcripts or transcribe audio using native whisper"
|
||||
)]
|
||||
struct Args {
|
||||
/// Increase verbosity (-v, -vv). Logs go to stderr.
|
||||
/// Increase verbosity (-v, -vv). Repeat to increase. Debug logs appear with -v; very verbose with -vv. Logs go to stderr.
|
||||
#[arg(short = 'v', long = "verbose", action = clap::ArgAction::Count, global = true)]
|
||||
verbose: u8,
|
||||
|
||||
/// Quiet mode: suppress non-error logging on stderr (overrides -v). Does not suppress interactive prompts or stdout output.
|
||||
#[arg(short = 'q', long = "quiet", global = true)]
|
||||
quiet: bool,
|
||||
|
||||
/// Non-interactive mode: never prompt; use defaults instead.
|
||||
#[arg(long = "no-interaction", global = true)]
|
||||
no_interaction: bool,
|
||||
|
||||
/// Optional auxiliary subcommands (completions, man)
|
||||
#[command(subcommand)]
|
||||
aux: Option<AuxCommands>,
|
||||
@@ -146,6 +133,10 @@ fn prompt_speaker_name_for_path(path: &Path, default_name: &str, enabled: bool)
|
||||
if !enabled {
|
||||
return default_name.to_string();
|
||||
}
|
||||
if polyscribe::is_no_interaction() {
|
||||
// Explicitly non-interactive: never prompt
|
||||
return default_name.to_string();
|
||||
}
|
||||
let display_owned: String = path
|
||||
.file_name()
|
||||
.and_then(|s| s.to_str())
|
||||
@@ -203,18 +194,37 @@ impl Drop for LastModelCleanup {
|
||||
if let Err(e) = std::fs::remove_file(&self.path) {
|
||||
// Best-effort cleanup; ignore missing file; warn for other errors
|
||||
if e.kind() != std::io::ErrorKind::NotFound {
|
||||
warnlog!("Failed to remove {}: {}", self.path.display(), e);
|
||||
polyscribe::wlog!("Failed to remove {}: {}", self.path.display(), e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
fn with_quiet_stdio_if_needed<F, R>(_quiet: bool, f: F) -> R
|
||||
where
|
||||
F: FnOnce() -> R,
|
||||
{
|
||||
// Quiet mode no longer redirects stdio globally; only logging is silenced.
|
||||
f()
|
||||
}
|
||||
|
||||
#[cfg(not(unix))]
|
||||
fn with_quiet_stdio_if_needed<F, R>(_quiet: bool, f: F) -> R
|
||||
where
|
||||
F: FnOnce() -> R,
|
||||
{
|
||||
f()
|
||||
}
|
||||
|
||||
fn run() -> Result<()> {
|
||||
// Parse CLI
|
||||
let args = Args::parse();
|
||||
|
||||
// Initialize verbosity
|
||||
VERBOSE.store(args.verbose, Ordering::Relaxed);
|
||||
// Initialize runtime flags
|
||||
polyscribe::set_verbose(args.verbose);
|
||||
polyscribe::set_quiet(args.quiet);
|
||||
polyscribe::set_no_interaction(args.no_interaction);
|
||||
|
||||
// Handle auxiliary subcommands that write to stdout and exit early
|
||||
if let Some(aux) = &args.aux {
|
||||
@@ -254,12 +264,12 @@ fn run() -> Result<()> {
|
||||
GpuBackendCli::Vulkan => BackendKind::Vulkan,
|
||||
};
|
||||
let sel = select_backend(requested, args.verbose > 0)?;
|
||||
vlog!(0, "Using backend: {:?}", sel.chosen);
|
||||
polyscribe::dlog!(1, "Using backend: {:?}", sel.chosen);
|
||||
|
||||
// If requested, run the interactive model downloader first. If no inputs were provided, exit after downloading.
|
||||
if args.download_models {
|
||||
if let Err(e) = polyscribe::models::run_interactive_model_downloader() {
|
||||
errorlog!("Model downloader failed: {:#}", e);
|
||||
polyscribe::elog!("Model downloader failed: {:#}", e);
|
||||
}
|
||||
if args.inputs.is_empty() {
|
||||
return Ok(());
|
||||
@@ -269,7 +279,7 @@ fn run() -> Result<()> {
|
||||
// If requested, update local models and exit unless inputs provided to continue
|
||||
if args.update_models {
|
||||
if let Err(e) = polyscribe::models::update_local_models() {
|
||||
errorlog!("Model update failed: {:#}", e);
|
||||
polyscribe::elog!("Model update failed: {:#}", e);
|
||||
return Err(e);
|
||||
}
|
||||
// if only updating models and no inputs, exit
|
||||
@@ -279,7 +289,7 @@ fn run() -> Result<()> {
|
||||
}
|
||||
|
||||
// Determine inputs and optional output path
|
||||
vlog!(1, "Parsed {} input(s)", args.inputs.len());
|
||||
polyscribe::dlog!(1, "Parsed {} input(s)", args.inputs.len());
|
||||
let mut inputs = args.inputs;
|
||||
let mut output_path = args.output;
|
||||
if output_path.is_none() && inputs.len() >= 2 {
|
||||
@@ -308,7 +318,7 @@ fn run() -> Result<()> {
|
||||
}
|
||||
|
||||
if args.merge_and_separate {
|
||||
vlog!(1, "Mode: merge-and-separate; output_dir={:?}", output_path);
|
||||
polyscribe::dlog!(1, "Mode: merge-and-separate; output_dir={:?}", output_path);
|
||||
// Combined mode: write separate outputs per input and also a merged output set
|
||||
// Require an output directory
|
||||
let out_dir = match output_path.as_ref() {
|
||||
@@ -336,13 +346,28 @@ fn run() -> Result<()> {
|
||||
// Collect entries per file and extend merged
|
||||
let mut entries: Vec<OutputEntry> = Vec::new();
|
||||
if is_audio_file(path) {
|
||||
let items = sel.backend.transcribe(
|
||||
path,
|
||||
&speaker,
|
||||
lang_hint.as_deref(),
|
||||
args.gpu_layers,
|
||||
)?;
|
||||
entries.extend(items.into_iter());
|
||||
// Progress log to stderr (suppressed by -q); avoid partial lines
|
||||
polyscribe::ilog!("Processing file: {} ...", path.display());
|
||||
let res = with_quiet_stdio_if_needed(args.quiet, || {
|
||||
sel.backend.transcribe(
|
||||
path,
|
||||
&speaker,
|
||||
lang_hint.as_deref(),
|
||||
args.gpu_layers,
|
||||
)
|
||||
});
|
||||
match res {
|
||||
Ok(items) => {
|
||||
polyscribe::ilog!("done");
|
||||
entries.extend(items.into_iter());
|
||||
}
|
||||
Err(e) => {
|
||||
if !(polyscribe::is_no_interaction() || !polyscribe::stdin_is_tty()) {
|
||||
polyscribe::elog!("{:#}", e);
|
||||
}
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
} else if is_json_file(path) {
|
||||
let mut buf = String::new();
|
||||
File::open(path)
|
||||
@@ -461,7 +486,7 @@ fn run() -> Result<()> {
|
||||
.with_context(|| format!("Failed to create output file: {}", m_srt.display()))?;
|
||||
ms.write_all(m_srt_str.as_bytes())?;
|
||||
} else if args.merge {
|
||||
vlog!(1, "Mode: merge; output_base={:?}", output_path);
|
||||
polyscribe::dlog!(1, "Mode: merge; output_base={:?}", output_path);
|
||||
// MERGED MODE (previous default)
|
||||
let mut entries: Vec<OutputEntry> = Vec::new();
|
||||
for input_path in &inputs {
|
||||
@@ -476,16 +501,31 @@ fn run() -> Result<()> {
|
||||
|
||||
let mut buf = String::new();
|
||||
if is_audio_file(path) {
|
||||
let items = sel.backend.transcribe(
|
||||
path,
|
||||
&speaker,
|
||||
lang_hint.as_deref(),
|
||||
args.gpu_layers,
|
||||
)?;
|
||||
for e in items {
|
||||
entries.push(e);
|
||||
// Progress log to stderr (suppressed by -q)
|
||||
polyscribe::ilog!("Processing file: {} ...", path.display());
|
||||
let res = with_quiet_stdio_if_needed(args.quiet, || {
|
||||
sel.backend.transcribe(
|
||||
path,
|
||||
&speaker,
|
||||
lang_hint.as_deref(),
|
||||
args.gpu_layers,
|
||||
)
|
||||
});
|
||||
match res {
|
||||
Ok(items) => {
|
||||
polyscribe::ilog!("done");
|
||||
for e in items {
|
||||
entries.push(e);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
Err(e) => {
|
||||
if !(polyscribe::is_no_interaction() || !polyscribe::stdin_is_tty()) {
|
||||
polyscribe::elog!("{:#}", e);
|
||||
}
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
} else if is_json_file(path) {
|
||||
File::open(path)
|
||||
.with_context(|| format!("Failed to open: {}", input_path))?
|
||||
@@ -577,7 +617,7 @@ fn run() -> Result<()> {
|
||||
writeln!(&mut handle)?;
|
||||
}
|
||||
} else {
|
||||
vlog!(1, "Mode: separate; output_dir={:?}", output_path);
|
||||
polyscribe::dlog!(1, "Mode: separate; output_dir={:?}", output_path);
|
||||
// SEPARATE MODE (default now)
|
||||
// If writing to stdout with multiple inputs, not supported
|
||||
if output_path.is_none() && inputs.len() > 1 {
|
||||
@@ -609,13 +649,28 @@ fn run() -> Result<()> {
|
||||
// Collect entries per file
|
||||
let mut entries: Vec<OutputEntry> = Vec::new();
|
||||
if is_audio_file(path) {
|
||||
let items = sel.backend.transcribe(
|
||||
path,
|
||||
&speaker,
|
||||
lang_hint.as_deref(),
|
||||
args.gpu_layers,
|
||||
)?;
|
||||
entries.extend(items);
|
||||
// Progress log to stderr (suppressed by -q)
|
||||
polyscribe::ilog!("Processing file: {} ...", path.display());
|
||||
let res = with_quiet_stdio_if_needed(args.quiet, || {
|
||||
sel.backend.transcribe(
|
||||
path,
|
||||
&speaker,
|
||||
lang_hint.as_deref(),
|
||||
args.gpu_layers,
|
||||
)
|
||||
});
|
||||
match res {
|
||||
Ok(items) => {
|
||||
polyscribe::ilog!("done");
|
||||
entries.extend(items);
|
||||
}
|
||||
Err(e) => {
|
||||
if !(polyscribe::is_no_interaction() || !polyscribe::stdin_is_tty()) {
|
||||
polyscribe::elog!("{:#}", e);
|
||||
}
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
} else if is_json_file(path) {
|
||||
let mut buf = String::new();
|
||||
File::open(path)
|
||||
@@ -703,11 +758,11 @@ fn run() -> Result<()> {
|
||||
|
||||
fn main() {
|
||||
if let Err(e) = run() {
|
||||
errorlog!("{}", e);
|
||||
if VERBOSE.load(Ordering::Relaxed) >= 1 {
|
||||
polyscribe::elog!("{}", e);
|
||||
if polyscribe::verbose_level() >= 1 {
|
||||
let mut src = e.source();
|
||||
while let Some(s) = src {
|
||||
errorlog!("caused by: {}", s);
|
||||
polyscribe::elog!("caused by: {}", s);
|
||||
src = s.source();
|
||||
}
|
||||
}
|
||||
@@ -897,6 +952,9 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_backend_auto_order_prefers_cuda_then_hip_then_vulkan_then_cpu() {
|
||||
use std::sync::{Mutex, OnceLock};
|
||||
static ENV_LOCK: OnceLock<Mutex<()>> = OnceLock::new();
|
||||
let _guard = ENV_LOCK.get_or_init(|| Mutex::new(())).lock().unwrap();
|
||||
// Clear overrides
|
||||
unsafe {
|
||||
std_env::remove_var("POLYSCRIBE_TEST_FORCE_CUDA");
|
||||
@@ -935,6 +993,9 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_backend_explicit_missing_errors() {
|
||||
use std::sync::{Mutex, OnceLock};
|
||||
static ENV_LOCK: OnceLock<Mutex<()>> = OnceLock::new();
|
||||
let _guard = ENV_LOCK.get_or_init(|| Mutex::new(())).lock().unwrap();
|
||||
// Ensure all off
|
||||
unsafe {
|
||||
std_env::remove_var("POLYSCRIBE_TEST_FORCE_CUDA");
|
||||
|
120
src/models.rs
120
src/models.rs
@@ -11,12 +11,6 @@ use reqwest::redirect::Policy;
|
||||
use serde::Deserialize;
|
||||
use sha2::{Digest, Sha256};
|
||||
|
||||
// Print to stderr only when not in quiet mode
|
||||
macro_rules! qlog {
|
||||
($($arg:tt)*) => {
|
||||
eprintln!($($arg)*);
|
||||
};
|
||||
}
|
||||
|
||||
// --- Model downloader: list & download ggml models from Hugging Face ---
|
||||
|
||||
@@ -208,7 +202,9 @@ fn fill_meta_via_head(repo: &str, name: &str) -> (Option<u64>, Option<String>) {
|
||||
}
|
||||
|
||||
fn hf_fetch_repo_models(client: &Client, repo: &'static str) -> Result<Vec<ModelEntry>> {
|
||||
qlog!("Fetching online data: listing models from {}...", repo);
|
||||
if !(crate::is_no_interaction() && crate::verbose_level() < 2) {
|
||||
ilog!("Fetching online data: listing models from {}...", repo);
|
||||
}
|
||||
// Prefer the tree endpoint for reliable size/hash metadata, then fall back to model metadata
|
||||
let tree_url = format!(
|
||||
"https://huggingface.co/api/models/{}/tree/main?recursive=1",
|
||||
@@ -291,10 +287,12 @@ fn hf_fetch_repo_models(client: &Client, repo: &'static str) -> Result<Vec<Model
|
||||
|
||||
// Fill missing metadata (size/hash) via HEAD request if necessary
|
||||
if out.iter().any(|m| m.size == 0 || m.sha256.is_none()) {
|
||||
qlog!(
|
||||
"Fetching online data: completing metadata checks for models in {}...",
|
||||
repo
|
||||
);
|
||||
if !(crate::is_no_interaction() && crate::verbose_level() < 2) {
|
||||
ilog!(
|
||||
"Fetching online data: completing metadata checks for models in {}...",
|
||||
repo
|
||||
);
|
||||
}
|
||||
}
|
||||
for m in out.iter_mut() {
|
||||
if m.size == 0 || m.sha256.is_none() {
|
||||
@@ -321,7 +319,9 @@ fn hf_fetch_repo_models(client: &Client, repo: &'static str) -> Result<Vec<Model
|
||||
}
|
||||
|
||||
fn fetch_all_models(client: &Client) -> Result<Vec<ModelEntry>> {
|
||||
qlog!("Fetching online data: aggregating available models from Hugging Face...");
|
||||
if !(crate::is_no_interaction() && crate::verbose_level() < 2) {
|
||||
ilog!("Fetching online data: aggregating available models from Hugging Face...");
|
||||
}
|
||||
let mut v1 = hf_fetch_repo_models(client, "ggerganov/whisper.cpp")?; // main repo must succeed
|
||||
|
||||
// Optional tinydiarize repo; ignore errors but log to stderr
|
||||
@@ -329,7 +329,7 @@ fn fetch_all_models(client: &Client) -> Result<Vec<ModelEntry>> {
|
||||
match hf_fetch_repo_models(client, "akashmjn/tinydiarize-whisper.cpp") {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
qlog!(
|
||||
ilog!(
|
||||
"Warning: failed to fetch optional repo akashmjn/tinydiarize-whisper.cpp: {:#}",
|
||||
e
|
||||
);
|
||||
@@ -396,6 +396,10 @@ fn format_model_list(models: &[ModelEntry]) -> String {
|
||||
}
|
||||
|
||||
fn prompt_select_models_two_stage(models: &[ModelEntry]) -> Result<Vec<ModelEntry>> {
|
||||
if crate::is_no_interaction() || !crate::stdin_is_tty() {
|
||||
// Non-interactive: do not prompt, return empty selection to skip
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
// 1) Choose base (tiny, small, medium, etc.)
|
||||
let mut bases: Vec<String> = Vec::new();
|
||||
let mut last = String::new();
|
||||
@@ -547,7 +551,7 @@ pub fn run_interactive_model_downloader() -> Result<()> {
|
||||
.build()
|
||||
.context("Failed to build HTTP client")?;
|
||||
|
||||
qlog!(
|
||||
ilog!(
|
||||
"Fetching online data: contacting Hugging Face to retrieve available models (this may take a moment)..."
|
||||
);
|
||||
let models = fetch_all_models(&client)?;
|
||||
@@ -562,7 +566,7 @@ pub fn run_interactive_model_downloader() -> Result<()> {
|
||||
}
|
||||
for m in selected {
|
||||
if let Err(e) = download_one_model(&client, models_dir, &m) {
|
||||
qlog!("Error: {:#}", e);
|
||||
elog!("Error: {:#}", e);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
@@ -846,6 +850,62 @@ pub fn update_local_models() -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Pick the best local ggml-*.bin model: largest by file size; tie-break by lexicographic filename.
|
||||
pub fn pick_best_local_model(models_dir: &Path) -> Option<std::path::PathBuf> {
|
||||
let mut best: Option<(u64, String, std::path::PathBuf)> = None;
|
||||
let rd = std::fs::read_dir(models_dir).ok()?;
|
||||
for entry in rd.flatten() {
|
||||
let path = entry.path();
|
||||
if !path.is_file() { continue; }
|
||||
let fname = match path.file_name().and_then(|s| s.to_str()) { Some(s) => s.to_string(), None => continue };
|
||||
if !fname.starts_with("ggml-") || !fname.ends_with(".bin") { continue; }
|
||||
let size = std::fs::metadata(&path).ok()?.len();
|
||||
match &mut best {
|
||||
None => best = Some((size, fname, path.clone())),
|
||||
Some((bsize, bname, bpath)) => {
|
||||
if size > *bsize || (size == *bsize && fname < *bname) {
|
||||
*bsize = size;
|
||||
*bname = fname;
|
||||
*bpath = path.clone();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
best.map(|(_, _, p)| p)
|
||||
}
|
||||
|
||||
/// Ensure a specific model is available locally without any interactive prompts.
|
||||
/// If found locally, returns its path. Otherwise downloads it and returns the path.
|
||||
pub fn ensure_model_available_noninteractive(model_name: &str) -> Result<std::path::PathBuf> {
|
||||
let models_dir_buf = crate::models_dir_path();
|
||||
let models_dir = models_dir_buf.as_path();
|
||||
if !models_dir.exists() {
|
||||
create_dir_all(models_dir).context("Failed to create models directory")?;
|
||||
}
|
||||
let final_path = models_dir.join(format!("ggml-{}.bin", model_name));
|
||||
if final_path.exists() {
|
||||
return Ok(final_path);
|
||||
}
|
||||
|
||||
let client = Client::builder()
|
||||
.user_agent("PolyScribe/0.1 (+https://github.com/)")
|
||||
.timeout(Duration::from_secs(600))
|
||||
.redirect(Policy::limited(10))
|
||||
.build()
|
||||
.context("Failed to build HTTP client")?;
|
||||
|
||||
// Prefer fetching metadata to construct a proper ModelEntry
|
||||
let models = fetch_all_models(&client)?;
|
||||
if let Some(entry) = models.into_iter().find(|m| m.name == model_name) {
|
||||
download_one_model(&client, models_dir, &entry)?;
|
||||
return Ok(models_dir.join(format!("ggml-{}.bin", entry.name)));
|
||||
}
|
||||
Err(anyhow!(
|
||||
"Model '{}' not found in remote listings; cannot download non-interactively.",
|
||||
model_name
|
||||
))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@@ -912,6 +972,36 @@ mod tests {
|
||||
assert!(s.contains("Enter selection by indices"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_format_model_list_unaffected_by_quiet_flag() {
|
||||
let models = vec![
|
||||
ModelEntry {
|
||||
name: "tiny.en-q5_1".to_string(),
|
||||
base: "tiny".to_string(),
|
||||
subtype: "en-q5_1".to_string(),
|
||||
size: 1024,
|
||||
sha256: None,
|
||||
repo: "ggerganov/whisper.cpp".to_string(),
|
||||
},
|
||||
ModelEntry {
|
||||
name: "base.en-q5_1".to_string(),
|
||||
base: "base".to_string(),
|
||||
subtype: "en-q5_1".to_string(),
|
||||
size: 2048,
|
||||
sha256: None,
|
||||
repo: "ggerganov/whisper.cpp".to_string(),
|
||||
},
|
||||
];
|
||||
// Compute with quiet off and on; the pure formatter should not depend on quiet.
|
||||
crate::set_quiet(false);
|
||||
let a = format_model_list(&models);
|
||||
crate::set_quiet(true);
|
||||
let b = format_model_list(&models);
|
||||
assert_eq!(a, b);
|
||||
// reset quiet for other tests
|
||||
crate::set_quiet(false);
|
||||
}
|
||||
|
||||
fn sha256_hex(data: &[u8]) -> String {
|
||||
use sha2::{Digest, Sha256};
|
||||
let mut hasher = Sha256::new();
|
||||
|
@@ -256,6 +256,7 @@ fn cli_merge_and_separate_writes_both_kinds_of_outputs() {
|
||||
|
||||
#[test]
|
||||
fn cli_set_speaker_names_merge_prompts_and_uses_names() {
|
||||
// Also validate that -q does not suppress prompts by running with -q
|
||||
use std::io::{Read as _, Write as _};
|
||||
use std::process::Stdio;
|
||||
|
||||
@@ -269,6 +270,7 @@ fn cli_set_speaker_names_merge_prompts_and_uses_names() {
|
||||
.arg(input2.as_os_str())
|
||||
.arg("-m")
|
||||
.arg("--set-speaker-names")
|
||||
.arg("-q")
|
||||
.stdin(Stdio::piped())
|
||||
.stdout(Stdio::piped())
|
||||
.spawn()
|
||||
@@ -292,6 +294,107 @@ fn cli_set_speaker_names_merge_prompts_and_uses_names() {
|
||||
assert!(speakers.contains("Beta"), "Beta not found in speakers");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cli_no_interaction_skips_speaker_prompts_and_uses_defaults() {
|
||||
let exe = env!("CARGO_BIN_EXE_polyscribe");
|
||||
|
||||
let input1 = manifest_path("input/1-s0wlz.json");
|
||||
let input2 = manifest_path("input/2-vikingowl.json");
|
||||
|
||||
let output = Command::new(exe)
|
||||
.arg(input1.as_os_str())
|
||||
.arg(input2.as_os_str())
|
||||
.arg("-m")
|
||||
.arg("--set-speaker-names")
|
||||
.arg("--no-interaction")
|
||||
.output()
|
||||
.expect("failed to spawn polyscribe");
|
||||
|
||||
assert!(output.status.success(), "CLI did not exit successfully");
|
||||
|
||||
let stdout = String::from_utf8(output.stdout).expect("stdout not UTF-8");
|
||||
let root: OutputRoot = serde_json::from_str(&stdout).unwrap();
|
||||
let speakers: std::collections::HashSet<String> =
|
||||
root.items.into_iter().map(|e| e.speaker).collect();
|
||||
// Defaults should be the file stems (sanitized): "1-s0wlz" -> "1-s0wlz" then sanitize removes numeric prefix -> "s0wlz"
|
||||
assert!(speakers.contains("s0wlz"), "default s0wlz not used");
|
||||
assert!(speakers.contains("vikingowl"), "default vikingowl not used");
|
||||
}
|
||||
|
||||
// New verbosity behavior tests
|
||||
#[test]
|
||||
fn verbosity_quiet_suppresses_logs_but_keeps_stdout() {
|
||||
let exe = env!("CARGO_BIN_EXE_polyscribe");
|
||||
let input1 = manifest_path("input/1-s0wlz.json");
|
||||
let input2 = manifest_path("input/2-vikingowl.json");
|
||||
|
||||
let output = Command::new(exe)
|
||||
.arg("-q")
|
||||
.arg("-v") // ensure -q overrides -v
|
||||
.arg(input1.as_os_str())
|
||||
.arg(input2.as_os_str())
|
||||
.arg("-m")
|
||||
.output()
|
||||
.expect("failed to spawn polyscribe");
|
||||
|
||||
assert!(output.status.success());
|
||||
let stdout = String::from_utf8(output.stdout).unwrap();
|
||||
assert!(stdout.contains("\"items\""), "stdout JSON should be present in quiet mode");
|
||||
let stderr = String::from_utf8(output.stderr).unwrap();
|
||||
assert!(stderr.trim().is_empty(), "stderr should be empty in quiet mode, got: {}", stderr);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn verbosity_verbose_emits_debug_logs_on_stderr() {
|
||||
let exe = env!("CARGO_BIN_EXE_polyscribe");
|
||||
let input1 = manifest_path("input/1-s0wlz.json");
|
||||
let input2 = manifest_path("input/2-vikingowl.json");
|
||||
|
||||
let output = Command::new(exe)
|
||||
.arg(input1.as_os_str())
|
||||
.arg(input2.as_os_str())
|
||||
.arg("-m")
|
||||
.arg("-v")
|
||||
.output()
|
||||
.expect("failed to spawn polyscribe");
|
||||
|
||||
assert!(output.status.success());
|
||||
let stdout = String::from_utf8(output.stdout).unwrap();
|
||||
assert!(stdout.contains("\"items\""));
|
||||
let stderr = String::from_utf8(output.stderr).unwrap();
|
||||
assert!(stderr.contains("Mode: merge"), "stderr should contain debug log with -v");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn verbosity_flag_position_is_global() {
|
||||
let exe = env!("CARGO_BIN_EXE_polyscribe");
|
||||
let input1 = manifest_path("input/1-s0wlz.json");
|
||||
let input2 = manifest_path("input/2-vikingowl.json");
|
||||
|
||||
// -v before args
|
||||
let out1 = Command::new(exe)
|
||||
.arg("-v")
|
||||
.arg(input1.as_os_str())
|
||||
.arg(input2.as_os_str())
|
||||
.arg("-m")
|
||||
.output()
|
||||
.expect("failed to spawn polyscribe");
|
||||
|
||||
// -v after sub-flags
|
||||
let out2 = Command::new(exe)
|
||||
.arg(input1.as_os_str())
|
||||
.arg(input2.as_os_str())
|
||||
.arg("-m")
|
||||
.arg("-v")
|
||||
.output()
|
||||
.expect("failed to spawn polyscribe");
|
||||
|
||||
let s1 = String::from_utf8(out1.stderr).unwrap();
|
||||
let s2 = String::from_utf8(out2.stderr).unwrap();
|
||||
assert!(s1.contains("Mode: merge"));
|
||||
assert!(s2.contains("Mode: merge"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cli_set_speaker_names_separate_single_input() {
|
||||
use std::io::Write as _;
|
||||
|
Reference in New Issue
Block a user