[refactor] clean up string formatting, improve control flow, and enhance readability in core modules and tests

This commit is contained in:
2025-08-08 20:01:56 +02:00
parent ff9de91dcf
commit fe98bd36b6
5 changed files with 106 additions and 77 deletions

View File

@@ -100,7 +100,9 @@ impl CpuBackend {
} }
} }
impl Default for CpuBackend { impl Default for CpuBackend {
fn default() -> Self { Self::new() } fn default() -> Self {
Self::new()
}
} }
impl CudaBackend { impl CudaBackend {
/// Create a new CUDA backend instance. /// Create a new CUDA backend instance.
@@ -109,7 +111,9 @@ impl CudaBackend {
} }
} }
impl Default for CudaBackend { impl Default for CudaBackend {
fn default() -> Self { Self::new() } fn default() -> Self {
Self::new()
}
} }
impl HipBackend { impl HipBackend {
/// Create a new HIP backend instance. /// Create a new HIP backend instance.
@@ -118,7 +122,9 @@ impl HipBackend {
} }
} }
impl Default for HipBackend { impl Default for HipBackend {
fn default() -> Self { Self::new() } fn default() -> Self {
Self::new()
}
} }
impl VulkanBackend { impl VulkanBackend {
/// Create a new Vulkan backend instance. /// Create a new Vulkan backend instance.
@@ -127,7 +133,9 @@ impl VulkanBackend {
} }
} }
impl Default for VulkanBackend { impl Default for VulkanBackend {
fn default() -> Self { Self::new() } fn default() -> Self {
Self::new()
}
} }
impl TranscribeBackend for CpuBackend { impl TranscribeBackend for CpuBackend {

View File

@@ -73,7 +73,13 @@ impl StderrSilencer {
/// Activate stderr silencing if quiet is set and on Unix; otherwise returns a no-op guard. /// Activate stderr silencing if quiet is set and on Unix; otherwise returns a no-op guard.
pub fn activate_if_quiet() -> Self { pub fn activate_if_quiet() -> Self {
if !is_quiet() { if !is_quiet() {
return Self { active: false, #[cfg(unix)] old_stderr_fd: -1, #[cfg(unix)] devnull_fd: -1 }; return Self {
active: false,
#[cfg(unix)]
old_stderr_fd: -1,
#[cfg(unix)]
devnull_fd: -1,
};
} }
Self::activate() Self::activate()
} }
@@ -85,7 +91,11 @@ impl StderrSilencer {
// Duplicate current stderr (fd 2) // Duplicate current stderr (fd 2)
let old_fd = dup(2); let old_fd = dup(2);
if old_fd < 0 { if old_fd < 0 {
return Self { active: false, old_stderr_fd: -1, devnull_fd: -1 }; return Self {
active: false,
old_stderr_fd: -1,
devnull_fd: -1,
};
} }
// Open /dev/null for writing // Open /dev/null for writing
let devnull_cstr = std::ffi::CString::new("/dev/null").unwrap(); let devnull_cstr = std::ffi::CString::new("/dev/null").unwrap();
@@ -93,15 +103,27 @@ impl StderrSilencer {
if dn < 0 { if dn < 0 {
// failed to open devnull; restore and bail // failed to open devnull; restore and bail
close(old_fd); close(old_fd);
return Self { active: false, old_stderr_fd: -1, devnull_fd: -1 }; return Self {
active: false,
old_stderr_fd: -1,
devnull_fd: -1,
};
} }
// Redirect fd 2 to devnull // Redirect fd 2 to devnull
if dup2(dn, 2) < 0 { if dup2(dn, 2) < 0 {
close(dn); close(dn);
close(old_fd); close(old_fd);
return Self { active: false, old_stderr_fd: -1, devnull_fd: -1 }; return Self {
active: false,
old_stderr_fd: -1,
devnull_fd: -1,
};
}
Self {
active: true,
old_stderr_fd: old_fd,
devnull_fd: dn,
} }
Self { active: true, old_stderr_fd: old_fd, devnull_fd: dn }
} }
#[cfg(not(unix))] #[cfg(not(unix))]
{ {
@@ -195,7 +217,7 @@ use std::path::{Path, PathBuf};
use std::process::Command; use std::process::Command;
#[cfg(unix)] #[cfg(unix)]
use libc::{close, dup, dup2, open, O_WRONLY}; use libc::{O_WRONLY, close, dup, dup2, open};
/// Re-export backend module (GPU/CPU selection and transcription). /// Re-export backend module (GPU/CPU selection and transcription).
pub mod backend; pub mod backend;
@@ -225,12 +247,12 @@ pub fn date_prefix() -> String {
/// Format a floating-point number of seconds as SRT timestamp (HH:MM:SS,mmm). /// Format a floating-point number of seconds as SRT timestamp (HH:MM:SS,mmm).
pub fn format_srt_time(seconds: f64) -> String { pub fn format_srt_time(seconds: f64) -> String {
let total_ms = (seconds * 1000.0).round() as i64; let total_ms = (seconds * 1000.0).round() as i64;
let ms = (total_ms % 1000) as i64; let ms = total_ms % 1000;
let total_secs = total_ms / 1000; let total_secs = total_ms / 1000;
let s = (total_secs % 60) as i64; let s = total_secs % 60;
let m = ((total_secs / 60) % 60) as i64; let m = (total_secs / 60) % 60;
let h = (total_secs / 3600) as i64; let h = total_secs / 3600;
format!("{:02}:{:02}:{:02},{:03}", h, m, s, ms) format!("{h:02}:{m:02}:{s:02},{ms:03}")
} }
/// Render a list of transcript entries to SRT format. /// Render a list of transcript entries to SRT format.
@@ -238,7 +260,7 @@ pub fn render_srt(items: &[OutputEntry]) -> String {
let mut out = String::new(); let mut out = String::new();
for (i, e) in items.iter().enumerate() { for (i, e) in items.iter().enumerate() {
let idx = i + 1; let idx = i + 1;
out.push_str(&format!("{}\n", idx)); out.push_str(&format!("{idx}\n"));
out.push_str(&format!( out.push_str(&format!(
"{} --> {}\n", "{} --> {}\n",
format_srt_time(e.start), format_srt_time(e.start),
@@ -410,10 +432,13 @@ pub fn find_model_file() -> Result<PathBuf> {
if candidates.is_empty() { if candidates.is_empty() {
// No models found: prompt interactively (TTY only) // No models found: prompt interactively (TTY only)
wlog!("{}", format!( wlog!(
"No Whisper model files (*.bin) found in {}.", "{}",
models_dir.display() format!(
)); "No Whisper model files (*.bin) found in {}.",
models_dir.display()
)
);
if crate::is_no_interaction() || !crate::stdin_is_tty() { if crate::is_no_interaction() || !crate::stdin_is_tty() {
return Err(anyhow!( return Err(anyhow!(
"No models available and interactive mode is disabled. Please set WHISPER_MODEL or run with --download-models." "No models available and interactive mode is disabled. Please set WHISPER_MODEL or run with --download-models."
@@ -468,12 +493,10 @@ pub fn find_model_file() -> Result<PathBuf> {
let prev = prev.trim(); let prev = prev.trim();
if !prev.is_empty() { if !prev.is_empty() {
let p = PathBuf::from(prev); let p = PathBuf::from(prev);
if p.is_file() { if p.is_file() && candidates.iter().any(|c| c == &p) {
if candidates.iter().any(|c| c == &p) { // Previously printed: INFO about using previously selected model.
// Previously printed: INFO about using previously selected model. // Suppress this to avoid duplicate/noisy messages; per-file progress will be shown elsewhere.
// Suppress this to avoid duplicate/noisy messages; per-file progress will be shown elsewhere. return Ok(p);
return Ok(p);
}
} }
} }
} }

View File

@@ -10,7 +10,6 @@ use serde::{Deserialize, Serialize};
// whisper-rs is used from the library crate // whisper-rs is used from the library crate
use polyscribe::backend::{BackendKind, select_backend}; use polyscribe::backend::{BackendKind, select_backend};
#[derive(Subcommand, Debug, Clone)] #[derive(Subcommand, Debug, Clone)]
enum AuxCommands { enum AuxCommands {
/// Generate shell completion script to stdout /// Generate shell completion script to stdout
@@ -141,8 +140,7 @@ fn prompt_speaker_name_for_path(path: &Path, default_name: &str, enabled: bool)
.map(|s| s.to_string()) .map(|s| s.to_string())
.unwrap_or_else(|| path.to_string_lossy().to_string()); .unwrap_or_else(|| path.to_string_lossy().to_string());
eprint!( eprint!(
"Enter speaker name for {} [default: {}]: ", "Enter speaker name for {display_owned} [default: {default_name}]: "
display_owned, default_name
); );
io::stderr().flush().ok(); io::stderr().flush().ok();
let mut buf = String::new(); let mut buf = String::new();
@@ -347,12 +345,8 @@ fn run() -> Result<()> {
// Progress log to stderr (suppressed by -q); avoid partial lines // Progress log to stderr (suppressed by -q); avoid partial lines
polyscribe::ilog!("Processing file: {} ...", path.display()); polyscribe::ilog!("Processing file: {} ...", path.display());
let res = with_quiet_stdio_if_needed(args.quiet, || { let res = with_quiet_stdio_if_needed(args.quiet, || {
sel.backend.transcribe( sel.backend
path, .transcribe(path, &speaker, lang_hint.as_deref(), args.gpu_layers)
&speaker,
lang_hint.as_deref(),
args.gpu_layers,
)
}); });
match res { match res {
Ok(items) => { Ok(items) => {
@@ -360,7 +354,7 @@ fn run() -> Result<()> {
entries.extend(items.into_iter()); entries.extend(items.into_iter());
} }
Err(e) => { Err(e) => {
if !(polyscribe::is_no_interaction() || !polyscribe::stdin_is_tty()) { if !polyscribe::is_no_interaction() && polyscribe::stdin_is_tty() {
polyscribe::elog!("{:#}", e); polyscribe::elog!("{:#}", e);
} }
return Err(e); return Err(e);
@@ -369,11 +363,11 @@ fn run() -> Result<()> {
} else if is_json_file(path) { } else if is_json_file(path) {
let mut buf = String::new(); let mut buf = String::new();
File::open(path) File::open(path)
.with_context(|| format!("Failed to open: {}", input_path))? .with_context(|| format!("Failed to open: {input_path}"))?
.read_to_string(&mut buf) .read_to_string(&mut buf)
.with_context(|| format!("Failed to read: {}", input_path))?; .with_context(|| format!("Failed to read: {input_path}"))?;
let root: InputRoot = serde_json::from_str(&buf).with_context(|| { let root: InputRoot = serde_json::from_str(&buf).with_context(|| {
format!("Invalid JSON transcript parsed from {}", input_path) format!("Invalid JSON transcript parsed from {input_path}")
})?; })?;
for seg in root.segments { for seg in root.segments {
entries.push(OutputEntry { entries.push(OutputEntry {
@@ -414,7 +408,7 @@ fn run() -> Result<()> {
.and_then(|s| s.to_str()) .and_then(|s| s.to_str())
.unwrap_or("output"); .unwrap_or("output");
let date = date_prefix(); let date = date_prefix();
let base_name = format!("{}_{}", date, stem); let base_name = format!("{date}_{stem}");
let json_path = out_dir.join(format!("{}.json", &base_name)); let json_path = out_dir.join(format!("{}.json", &base_name));
let toml_path = out_dir.join(format!("{}.toml", &base_name)); let toml_path = out_dir.join(format!("{}.toml", &base_name));
let srt_path = out_dir.join(format!("{}.srt", &base_name)); let srt_path = out_dir.join(format!("{}.srt", &base_name));
@@ -461,7 +455,7 @@ fn run() -> Result<()> {
}; };
let date = date_prefix(); let date = date_prefix();
let merged_base = format!("{}_merged", date); let merged_base = format!("{date}_merged");
let m_json = out_dir.join(format!("{}.json", &merged_base)); let m_json = out_dir.join(format!("{}.json", &merged_base));
let m_toml = out_dir.join(format!("{}.toml", &merged_base)); let m_toml = out_dir.join(format!("{}.toml", &merged_base));
let m_srt = out_dir.join(format!("{}.srt", &merged_base)); let m_srt = out_dir.join(format!("{}.srt", &merged_base));
@@ -502,12 +496,8 @@ fn run() -> Result<()> {
// Progress log to stderr (suppressed by -q) // Progress log to stderr (suppressed by -q)
polyscribe::ilog!("Processing file: {} ...", path.display()); polyscribe::ilog!("Processing file: {} ...", path.display());
let res = with_quiet_stdio_if_needed(args.quiet, || { let res = with_quiet_stdio_if_needed(args.quiet, || {
sel.backend.transcribe( sel.backend
path, .transcribe(path, &speaker, lang_hint.as_deref(), args.gpu_layers)
&speaker,
lang_hint.as_deref(),
args.gpu_layers,
)
}); });
match res { match res {
Ok(items) => { Ok(items) => {
@@ -625,7 +615,7 @@ fn run() -> Result<()> {
} }
// If output_path is provided, treat it as a directory. Create it. // If output_path is provided, treat it as a directory. Create it.
let out_dir: Option<PathBuf> = output_path.as_ref().map(|p| PathBuf::from(p)); let out_dir: Option<PathBuf> = output_path.as_ref().map(PathBuf::from);
if let Some(dir) = &out_dir { if let Some(dir) = &out_dir {
if !dir.as_os_str().is_empty() { if !dir.as_os_str().is_empty() {
create_dir_all(dir).with_context(|| { create_dir_all(dir).with_context(|| {
@@ -650,12 +640,8 @@ fn run() -> Result<()> {
// Progress log to stderr (suppressed by -q) // Progress log to stderr (suppressed by -q)
polyscribe::ilog!("Processing file: {} ...", path.display()); polyscribe::ilog!("Processing file: {} ...", path.display());
let res = with_quiet_stdio_if_needed(args.quiet, || { let res = with_quiet_stdio_if_needed(args.quiet, || {
sel.backend.transcribe( sel.backend
path, .transcribe(path, &speaker, lang_hint.as_deref(), args.gpu_layers)
&speaker,
lang_hint.as_deref(),
args.gpu_layers,
)
}); });
match res { match res {
Ok(items) => { Ok(items) => {
@@ -663,7 +649,7 @@ fn run() -> Result<()> {
entries.extend(items); entries.extend(items);
} }
Err(e) => { Err(e) => {
if !(polyscribe::is_no_interaction() || !polyscribe::stdin_is_tty()) { if !polyscribe::is_no_interaction() && polyscribe::stdin_is_tty() {
polyscribe::elog!("{:#}", e); polyscribe::elog!("{:#}", e);
} }
return Err(e); return Err(e);
@@ -672,11 +658,11 @@ fn run() -> Result<()> {
} else if is_json_file(path) { } else if is_json_file(path) {
let mut buf = String::new(); let mut buf = String::new();
File::open(path) File::open(path)
.with_context(|| format!("Failed to open: {}", input_path))? .with_context(|| format!("Failed to open: {input_path}"))?
.read_to_string(&mut buf) .read_to_string(&mut buf)
.with_context(|| format!("Failed to read: {}", input_path))?; .with_context(|| format!("Failed to read: {input_path}"))?;
let root: InputRoot = serde_json::from_str(&buf).with_context(|| { let root: InputRoot = serde_json::from_str(&buf).with_context(|| {
format!("Invalid JSON transcript parsed from {}", input_path) format!("Invalid JSON transcript parsed from {input_path}")
})?; })?;
for seg in root.segments { for seg in root.segments {
entries.push(OutputEntry { entries.push(OutputEntry {

View File

@@ -12,7 +12,6 @@ use reqwest::redirect::Policy;
use serde::Deserialize; use serde::Deserialize;
use sha2::{Digest, Sha256}; use sha2::{Digest, Sha256};
// --- Model downloader: list & download ggml models from Hugging Face --- // --- Model downloader: list & download ggml models from Hugging Face ---
#[derive(Debug, Deserialize)] #[derive(Debug, Deserialize)]
@@ -159,9 +158,7 @@ fn fill_meta_via_head(repo: &str, name: &str) -> (Option<u64>, Option<String>) {
Ok(c) => c, Ok(c) => c,
Err(_) => return (None, None), Err(_) => return (None, None),
}; };
let url = format!( let url = format!("https://huggingface.co/{repo}/resolve/main/ggml-{name}.bin");
"https://huggingface.co/{repo}/resolve/main/ggml-{name}.bin"
);
let resp = match head_client let resp = match head_client
.head(url) .head(url)
.send() .send()
@@ -206,9 +203,7 @@ fn hf_fetch_repo_models(client: &Client, repo: &'static str) -> Result<Vec<Model
ilog!("Fetching online data: listing models from {}...", repo); ilog!("Fetching online data: listing models from {}...", repo);
} }
// Prefer the tree endpoint for reliable size/hash metadata, then fall back to model metadata // Prefer the tree endpoint for reliable size/hash metadata, then fall back to model metadata
let tree_url = format!( let tree_url = format!("https://huggingface.co/api/models/{repo}/tree/main?recursive=1");
"https://huggingface.co/api/models/{repo}/tree/main?recursive=1"
);
let mut out: Vec<ModelEntry> = Vec::new(); let mut out: Vec<ModelEntry> = Vec::new();
match client match client
@@ -452,12 +447,12 @@ fn prompt_select_models_two_stage(models: &[ModelEntry]) -> Result<Vec<ModelEntr
let filtered: Vec<ModelEntry> = let filtered: Vec<ModelEntry> =
models.iter().filter(|m| m.base == base).cloned().collect(); models.iter().filter(|m| m.base == base).cloned().collect();
if filtered.is_empty() { if filtered.is_empty() {
eprintln!("No models found for base '{}'.", base); eprintln!("No models found for base '{base}'.");
continue; continue;
} }
// Reuse the formatter but only for the chosen base list // Reuse the formatter but only for the chosen base list
let listing = format_model_list(&filtered); let listing = format_model_list(&filtered);
eprint!("{}", listing); eprint!("{listing}");
// Build index map for filtered list // Build index map for filtered list
let mut index_map: Vec<usize> = Vec::with_capacity(filtered.len()); let mut index_map: Vec<usize> = Vec::with_capacity(filtered.len());
@@ -482,7 +477,7 @@ fn prompt_select_models_two_stage(models: &[ModelEntry]) -> Result<Vec<ModelEntr
if s2 == "all" || s2 == "*" { if s2 == "all" || s2 == "*" {
selected = (1..idx).collect(); selected = (1..idx).collect();
} else if !s2.is_empty() { } else if !s2.is_empty() {
for part in s2.split(|c| c == ',' || c == ' ' || c == ';') { for part in s2.split([',', ' ', ';']) {
let part = part.trim(); let part = part.trim();
if part.is_empty() { if part.is_empty() {
continue; continue;
@@ -759,9 +754,9 @@ pub fn update_local_models() -> Result<()> {
let models: Vec<ModelEntry> = if let Ok(manifest_path) = env::var("POLYSCRIBE_MODELS_MANIFEST") let models: Vec<ModelEntry> = if let Ok(manifest_path) = env::var("POLYSCRIBE_MODELS_MANIFEST")
{ {
let data = std::fs::read_to_string(&manifest_path) let data = std::fs::read_to_string(&manifest_path)
.with_context(|| format!("Failed to read manifest at {}", manifest_path))?; .with_context(|| format!("Failed to read manifest at {manifest_path}"))?;
let mut list: Vec<ModelEntry> = serde_json::from_str(&data) let mut list: Vec<ModelEntry> = serde_json::from_str(&data)
.with_context(|| format!("Invalid JSON manifest: {}", manifest_path))?; .with_context(|| format!("Invalid JSON manifest: {manifest_path}"))?;
// sort for stability // sort for stability
list.sort_by(|a, b| a.name.cmp(&b.name)); list.sort_by(|a, b| a.name.cmp(&b.name));
list list
@@ -855,9 +850,16 @@ pub fn pick_best_local_model(models_dir: &Path) -> Option<std::path::PathBuf> {
let rd = std::fs::read_dir(models_dir).ok()?; let rd = std::fs::read_dir(models_dir).ok()?;
for entry in rd.flatten() { for entry in rd.flatten() {
let path = entry.path(); let path = entry.path();
if !path.is_file() { continue; } if !path.is_file() {
let fname = match path.file_name().and_then(|s| s.to_str()) { Some(s) => s.to_string(), None => continue }; continue;
if !fname.starts_with("ggml-") || !fname.ends_with(".bin") { continue; } }
let fname = match path.file_name().and_then(|s| s.to_str()) {
Some(s) => s.to_string(),
None => continue,
};
if !fname.starts_with("ggml-") || !fname.ends_with(".bin") {
continue;
}
let size = std::fs::metadata(&path).ok()?.len(); let size = std::fs::metadata(&path).ok()?.len();
match &mut best { match &mut best {
None => best = Some((size, fname, path.clone())), None => best = Some((size, fname, path.clone())),
@@ -881,7 +883,7 @@ pub fn ensure_model_available_noninteractive(model_name: &str) -> Result<std::pa
if !models_dir.exists() { if !models_dir.exists() {
create_dir_all(models_dir).context("Failed to create models directory")?; create_dir_all(models_dir).context("Failed to create models directory")?;
} }
let final_path = models_dir.join(format!("ggml-{}.bin", model_name)); let final_path = models_dir.join(format!("ggml-{model_name}.bin"));
if final_path.exists() { if final_path.exists() {
return Ok(final_path); return Ok(final_path);
} }

View File

@@ -340,9 +340,16 @@ fn verbosity_quiet_suppresses_logs_but_keeps_stdout() {
assert!(output.status.success()); assert!(output.status.success());
let stdout = String::from_utf8(output.stdout).unwrap(); let stdout = String::from_utf8(output.stdout).unwrap();
assert!(stdout.contains("\"items\""), "stdout JSON should be present in quiet mode"); assert!(
stdout.contains("\"items\""),
"stdout JSON should be present in quiet mode"
);
let stderr = String::from_utf8(output.stderr).unwrap(); let stderr = String::from_utf8(output.stderr).unwrap();
assert!(stderr.trim().is_empty(), "stderr should be empty in quiet mode, got: {}", stderr); assert!(
stderr.trim().is_empty(),
"stderr should be empty in quiet mode, got: {}",
stderr
);
} }
#[test] #[test]
@@ -363,7 +370,10 @@ fn verbosity_verbose_emits_debug_logs_on_stderr() {
let stdout = String::from_utf8(output.stdout).unwrap(); let stdout = String::from_utf8(output.stdout).unwrap();
assert!(stdout.contains("\"items\"")); assert!(stdout.contains("\"items\""));
let stderr = String::from_utf8(output.stderr).unwrap(); let stderr = String::from_utf8(output.stderr).unwrap();
assert!(stderr.contains("Mode: merge"), "stderr should contain debug log with -v"); assert!(
stderr.contains("Mode: merge"),
"stderr should contain debug log with -v"
);
} }
#[test] #[test]