[refactor] improve variable naming and simplify logic across multiple functions and structs
This commit is contained in:
118
src/backend.rs
118
src/backend.rs
@@ -24,25 +24,18 @@ pub enum BackendKind {
|
|||||||
Vulkan,
|
Vulkan,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Abstraction for a transcription backend implementation.
|
/// Abstraction for a transcription backend.
|
||||||
pub trait TranscribeBackend {
|
pub trait TranscribeBackend {
|
||||||
/// Return the backend kind for this implementation.
|
/// Backend kind implemented by this type.
|
||||||
fn kind(&self) -> BackendKind;
|
fn kind(&self) -> BackendKind;
|
||||||
/// Transcribe the given audio file path and return transcript entries.
|
/// Transcribe the given audio and return transcript entries.
|
||||||
///
|
|
||||||
/// Parameters:
|
|
||||||
/// - audio_path: path to input media (audio or video) to be decoded/transcribed.
|
|
||||||
/// - speaker: label to attach to all produced segments.
|
|
||||||
/// - lang_opt: optional language hint (e.g., "en"); None means auto/multilingual model default.
|
|
||||||
/// - gpu_layers: optional GPU layer count if applicable (ignored by some backends).
|
|
||||||
/// - progress_cb: optional callback receiving percentage [0..=100] updates.
|
|
||||||
fn transcribe(
|
fn transcribe(
|
||||||
&self,
|
&self,
|
||||||
audio_path: &Path,
|
audio_path: &Path,
|
||||||
speaker: &str,
|
speaker: &str,
|
||||||
lang_opt: Option<&str>,
|
language: Option<&str>,
|
||||||
gpu_layers: Option<u32>,
|
gpu_layers: Option<u32>,
|
||||||
progress_cb: Option<&(dyn Fn(i32) + Send + Sync)>,
|
progress: Option<&(dyn Fn(i32) + Send + Sync)>,
|
||||||
) -> Result<Vec<OutputEntry>>;
|
) -> Result<Vec<OutputEntry>>;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -107,11 +100,11 @@ macro_rules! impl_whisper_backend {
|
|||||||
&self,
|
&self,
|
||||||
audio_path: &Path,
|
audio_path: &Path,
|
||||||
speaker: &str,
|
speaker: &str,
|
||||||
lang_opt: Option<&str>,
|
language: Option<&str>,
|
||||||
_gpu_layers: Option<u32>,
|
_gpu_layers: Option<u32>,
|
||||||
progress_cb: Option<&(dyn Fn(i32) + Send + Sync)>,
|
progress: Option<&(dyn Fn(i32) + Send + Sync)>,
|
||||||
) -> Result<Vec<OutputEntry>> {
|
) -> Result<Vec<OutputEntry>> {
|
||||||
transcribe_with_whisper_rs(audio_path, speaker, lang_opt, progress_cb)
|
transcribe_with_whisper_rs(audio_path, speaker, language, progress)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -129,9 +122,9 @@ impl TranscribeBackend for VulkanBackend {
|
|||||||
&self,
|
&self,
|
||||||
_audio_path: &Path,
|
_audio_path: &Path,
|
||||||
_speaker: &str,
|
_speaker: &str,
|
||||||
_lang_opt: Option<&str>,
|
_language: Option<&str>,
|
||||||
_gpu_layers: Option<u32>,
|
_gpu_layers: Option<u32>,
|
||||||
_progress_cb: Option<&(dyn Fn(i32) + Send + Sync)>,
|
_progress: Option<&(dyn Fn(i32) + Send + Sync)>,
|
||||||
) -> Result<Vec<OutputEntry>> {
|
) -> Result<Vec<OutputEntry>> {
|
||||||
Err(anyhow!(
|
Err(anyhow!(
|
||||||
"Vulkan backend not yet wired to whisper.cpp FFI. Build with --features gpu-vulkan and ensure Vulkan SDK is installed. How to fix: install Vulkan loader (libvulkan), set VULKAN_SDK, and run cargo build --features gpu-vulkan."
|
"Vulkan backend not yet wired to whisper.cpp FFI. Build with --features gpu-vulkan and ensure Vulkan SDK is installed. How to fix: install Vulkan loader (libvulkan), set VULKAN_SDK, and run cargo build --features gpu-vulkan."
|
||||||
@@ -169,13 +162,13 @@ pub fn select_backend(requested: BackendKind, verbose: bool) -> Result<Selection
|
|||||||
detected.push(BackendKind::Vulkan);
|
detected.push(BackendKind::Vulkan);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mk = |k: BackendKind| -> Box<dyn TranscribeBackend + Send + Sync> {
|
let instantiate_backend = |k: BackendKind| -> Box<dyn TranscribeBackend + Send + Sync> {
|
||||||
match k {
|
match k {
|
||||||
BackendKind::Cpu => Box::new(CpuBackend::default()),
|
BackendKind::Cpu => Box::new(CpuBackend::default()),
|
||||||
BackendKind::Cuda => Box::new(CudaBackend::default()),
|
BackendKind::Cuda => Box::new(CudaBackend::default()),
|
||||||
BackendKind::Hip => Box::new(HipBackend::default()),
|
BackendKind::Hip => Box::new(HipBackend::default()),
|
||||||
BackendKind::Vulkan => Box::new(VulkanBackend::default()),
|
BackendKind::Vulkan => Box::new(VulkanBackend::default()),
|
||||||
BackendKind::Auto => Box::new(CpuBackend::default()), // will be replaced
|
BackendKind::Auto => Box::new(CpuBackend::default()), // placeholder for Auto
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -227,7 +220,7 @@ pub fn select_backend(requested: BackendKind, verbose: bool) -> Result<Selection
|
|||||||
}
|
}
|
||||||
|
|
||||||
Ok(SelectionResult {
|
Ok(SelectionResult {
|
||||||
backend: mk(chosen),
|
backend: instantiate_backend(chosen),
|
||||||
chosen,
|
chosen,
|
||||||
detected,
|
detected,
|
||||||
})
|
})
|
||||||
@@ -238,98 +231,99 @@ pub fn select_backend(requested: BackendKind, verbose: bool) -> Result<Selection
|
|||||||
pub(crate) fn transcribe_with_whisper_rs(
|
pub(crate) fn transcribe_with_whisper_rs(
|
||||||
audio_path: &Path,
|
audio_path: &Path,
|
||||||
speaker: &str,
|
speaker: &str,
|
||||||
lang_opt: Option<&str>,
|
language: Option<&str>,
|
||||||
progress_cb: Option<&(dyn Fn(i32) + Send + Sync)>,
|
progress: Option<&(dyn Fn(i32) + Send + Sync)>,
|
||||||
) -> Result<Vec<OutputEntry>> {
|
) -> Result<Vec<OutputEntry>> {
|
||||||
if let Some(cb) = progress_cb { cb(0); }
|
let report = |p: i32| {
|
||||||
|
if let Some(cb) = progress { cb(p); }
|
||||||
|
};
|
||||||
|
report(0);
|
||||||
|
|
||||||
let pcm = decode_audio_to_pcm_f32_ffmpeg(audio_path)?;
|
let pcm_samples = decode_audio_to_pcm_f32_ffmpeg(audio_path)?;
|
||||||
if let Some(cb) = progress_cb { cb(5); }
|
report(5);
|
||||||
|
|
||||||
let model = find_model_file()?;
|
let model_path = find_model_file()?;
|
||||||
let is_en_only = model
|
let english_only_model = model_path
|
||||||
.file_name()
|
.file_name()
|
||||||
.and_then(|s| s.to_str())
|
.and_then(|s| s.to_str())
|
||||||
.map(|s| s.contains(".en.") || s.ends_with(".en.bin"))
|
.map(|s| s.contains(".en.") || s.ends_with(".en.bin"))
|
||||||
.unwrap_or(false);
|
.unwrap_or(false);
|
||||||
if let Some(lang) = lang_opt {
|
if let Some(lang) = language {
|
||||||
if is_en_only && lang != "en" {
|
if english_only_model && lang != "en" {
|
||||||
return Err(anyhow!(
|
return Err(anyhow!(
|
||||||
"Selected model is English-only ({}), but a non-English language hint '{}' was provided. Please use a multilingual model or set WHISPER_MODEL.",
|
"Selected model is English-only ({}), but a non-English language hint '{}' was provided. Please use a multilingual model or set WHISPER_MODEL.",
|
||||||
model.display(),
|
model_path.display(),
|
||||||
lang
|
lang
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let model_str = model
|
let model_path_str = model_path
|
||||||
.to_str()
|
.to_str()
|
||||||
.ok_or_else(|| anyhow!("Model path not valid UTF-8: {}", model.display()))?;
|
.ok_or_else(|| anyhow!("Model path not valid UTF-8: {}", model_path.display()))?;
|
||||||
|
|
||||||
// Try to reduce native library logging via environment variables when not super-verbose.
|
|
||||||
if crate::verbose_level() < 2 {
|
if crate::verbose_level() < 2 {
|
||||||
// These env vars are recognized by ggml/whisper in many builds; harmless if unknown.
|
// Some builds of whisper/ggml expect these env vars; harmless if unknown
|
||||||
unsafe {
|
unsafe {
|
||||||
std::env::set_var("GGML_LOG_LEVEL", "0");
|
std::env::set_var("GGML_LOG_LEVEL", "0");
|
||||||
std::env::set_var("WHISPER_PRINT_PROGRESS", "0");
|
std::env::set_var("WHISPER_PRINT_PROGRESS", "0");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Suppress stderr from whisper/ggml during model load and inference when quiet and not verbose.
|
let (_context, mut state) = crate::with_suppressed_stderr(|| {
|
||||||
let (_ctx, mut state) = crate::with_suppressed_stderr(|| {
|
let params = whisper_rs::WhisperContextParameters::default();
|
||||||
let cparams = whisper_rs::WhisperContextParameters::default();
|
let context = whisper_rs::WhisperContext::new_with_params(model_path_str, params)
|
||||||
let ctx = whisper_rs::WhisperContext::new_with_params(model_str, cparams)
|
.with_context(|| format!("Failed to load Whisper model at {}", model_path.display()))?;
|
||||||
.with_context(|| format!("Failed to load Whisper model at {}", model.display()))?;
|
let state = context
|
||||||
let state = ctx
|
|
||||||
.create_state()
|
.create_state()
|
||||||
.map_err(|e| anyhow!("Failed to create Whisper state: {:?}", e))?;
|
.map_err(|e| anyhow!("Failed to create Whisper state: {:?}", e))?;
|
||||||
Ok::<_, anyhow::Error>((ctx, state))
|
Ok::<_, anyhow::Error>((context, state))
|
||||||
})?;
|
})?;
|
||||||
if let Some(cb) = progress_cb { cb(20); }
|
report(20);
|
||||||
|
|
||||||
let mut params =
|
let mut full_params =
|
||||||
whisper_rs::FullParams::new(whisper_rs::SamplingStrategy::Greedy { best_of: 1 });
|
whisper_rs::FullParams::new(whisper_rs::SamplingStrategy::Greedy { best_of: 1 });
|
||||||
let n_threads = std::thread::available_parallelism()
|
let threads = std::thread::available_parallelism()
|
||||||
.map(|n| n.get() as i32)
|
.map(|n| n.get() as i32)
|
||||||
.unwrap_or(1);
|
.unwrap_or(1);
|
||||||
params.set_n_threads(n_threads);
|
full_params.set_n_threads(threads);
|
||||||
params.set_translate(false);
|
full_params.set_translate(false);
|
||||||
if let Some(lang) = lang_opt {
|
if let Some(lang) = language {
|
||||||
params.set_language(Some(lang));
|
full_params.set_language(Some(lang));
|
||||||
}
|
}
|
||||||
if let Some(cb) = progress_cb { cb(30); }
|
report(30);
|
||||||
|
|
||||||
crate::with_suppressed_stderr(|| {
|
crate::with_suppressed_stderr(|| {
|
||||||
if let Some(cb) = progress_cb { cb(40); }
|
report(40);
|
||||||
state
|
state
|
||||||
.full(params, &pcm)
|
.full(full_params, &pcm_samples)
|
||||||
.map_err(|e| anyhow!("Whisper full() failed: {:?}", e))
|
.map_err(|e| anyhow!("Whisper full() failed: {:?}", e))
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
if let Some(cb) = progress_cb { cb(90); }
|
report(90);
|
||||||
let num_segments = state
|
let num_segments = state
|
||||||
.full_n_segments()
|
.full_n_segments()
|
||||||
.map_err(|e| anyhow!("Failed to get segments: {:?}", e))?;
|
.map_err(|e| anyhow!("Failed to get segments: {:?}", e))?;
|
||||||
let mut items = Vec::new();
|
let mut entries = Vec::new();
|
||||||
for i in 0..num_segments {
|
for seg_idx in 0..num_segments {
|
||||||
let text = state
|
let segment_text = state
|
||||||
.full_get_segment_text(i)
|
.full_get_segment_text(seg_idx)
|
||||||
.map_err(|e| anyhow!("Failed to get segment text: {:?}", e))?;
|
.map_err(|e| anyhow!("Failed to get segment text: {:?}", e))?;
|
||||||
let t0 = state
|
let t0 = state
|
||||||
.full_get_segment_t0(i)
|
.full_get_segment_t0(seg_idx)
|
||||||
.map_err(|e| anyhow!("Failed to get segment t0: {:?}", e))?;
|
.map_err(|e| anyhow!("Failed to get segment t0: {:?}", e))?;
|
||||||
let t1 = state
|
let t1 = state
|
||||||
.full_get_segment_t1(i)
|
.full_get_segment_t1(seg_idx)
|
||||||
.map_err(|e| anyhow!("Failed to get segment t1: {:?}", e))?;
|
.map_err(|e| anyhow!("Failed to get segment t1: {:?}", e))?;
|
||||||
let start = (t0 as f64) * 0.01;
|
let start = (t0 as f64) * 0.01;
|
||||||
let end = (t1 as f64) * 0.01;
|
let end = (t1 as f64) * 0.01;
|
||||||
items.push(OutputEntry {
|
entries.push(OutputEntry {
|
||||||
id: 0,
|
id: 0,
|
||||||
speaker: speaker.to_string(),
|
speaker: speaker.to_string(),
|
||||||
start,
|
start,
|
||||||
end,
|
end,
|
||||||
text: text.trim().to_string(),
|
text: segment_text.trim().to_string(),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
if let Some(cb) = progress_cb { cb(100); }
|
report(100);
|
||||||
Ok(items)
|
Ok(entries)
|
||||||
}
|
}
|
||||||
|
169
src/lib.rs
169
src/lib.rs
@@ -4,9 +4,6 @@
|
|||||||
#![forbid(elided_lifetimes_in_paths)]
|
#![forbid(elided_lifetimes_in_paths)]
|
||||||
#![forbid(unused_must_use)]
|
#![forbid(unused_must_use)]
|
||||||
#![deny(missing_docs)]
|
#![deny(missing_docs)]
|
||||||
// Lint policy for incremental refactor toward 2024:
|
|
||||||
// - Keep basic clippy warnings enabled; skip pedantic/nursery for now (will revisit in step 7).
|
|
||||||
// - cargo lints can be re-enabled later once codebase is tidied.
|
|
||||||
#![warn(clippy::all)]
|
#![warn(clippy::all)]
|
||||||
//! PolyScribe library: business logic and core types.
|
//! PolyScribe library: business logic and core types.
|
||||||
//!
|
//!
|
||||||
@@ -22,8 +19,8 @@ static VERBOSE: AtomicU8 = AtomicU8::new(0);
|
|||||||
static NO_PROGRESS: AtomicBool = AtomicBool::new(false);
|
static NO_PROGRESS: AtomicBool = AtomicBool::new(false);
|
||||||
|
|
||||||
/// Set quiet mode: when true, non-interactive logs should be suppressed.
|
/// Set quiet mode: when true, non-interactive logs should be suppressed.
|
||||||
pub fn set_quiet(q: bool) {
|
pub fn set_quiet(enabled: bool) {
|
||||||
QUIET.store(q, Ordering::Relaxed);
|
QUIET.store(enabled, Ordering::Relaxed);
|
||||||
}
|
}
|
||||||
/// Return current quiet mode state.
|
/// Return current quiet mode state.
|
||||||
pub fn is_quiet() -> bool {
|
pub fn is_quiet() -> bool {
|
||||||
@@ -31,8 +28,8 @@ pub fn is_quiet() -> bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Set non-interactive mode: when true, interactive prompts must be skipped.
|
/// Set non-interactive mode: when true, interactive prompts must be skipped.
|
||||||
pub fn set_no_interaction(b: bool) {
|
pub fn set_no_interaction(enabled: bool) {
|
||||||
NO_INTERACTION.store(b, Ordering::Relaxed);
|
NO_INTERACTION.store(enabled, Ordering::Relaxed);
|
||||||
}
|
}
|
||||||
/// Return current non-interactive state.
|
/// Return current non-interactive state.
|
||||||
pub fn is_no_interaction() -> bool {
|
pub fn is_no_interaction() -> bool {
|
||||||
@@ -49,8 +46,8 @@ pub fn verbose_level() -> u8 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Disable interactive progress indicators (bars/spinners)
|
/// Disable interactive progress indicators (bars/spinners)
|
||||||
pub fn set_no_progress(b: bool) {
|
pub fn set_no_progress(enabled: bool) {
|
||||||
NO_PROGRESS.store(b, Ordering::Relaxed);
|
NO_PROGRESS.store(enabled, Ordering::Relaxed);
|
||||||
}
|
}
|
||||||
/// Return current no-progress state
|
/// Return current no-progress state
|
||||||
pub fn is_no_progress() -> bool {
|
pub fn is_no_progress() -> bool {
|
||||||
@@ -92,7 +89,6 @@ impl StderrSilencer {
|
|||||||
pub fn activate() -> Self {
|
pub fn activate() -> Self {
|
||||||
#[cfg(unix)]
|
#[cfg(unix)]
|
||||||
unsafe {
|
unsafe {
|
||||||
// Duplicate current stderr (fd 2)
|
|
||||||
let old_fd = dup(2);
|
let old_fd = dup(2);
|
||||||
if old_fd < 0 {
|
if old_fd < 0 {
|
||||||
return Self {
|
return Self {
|
||||||
@@ -103,9 +99,8 @@ impl StderrSilencer {
|
|||||||
}
|
}
|
||||||
// Open /dev/null for writing
|
// Open /dev/null for writing
|
||||||
let devnull_cstr = std::ffi::CString::new("/dev/null").unwrap();
|
let devnull_cstr = std::ffi::CString::new("/dev/null").unwrap();
|
||||||
let dn = open(devnull_cstr.as_ptr(), O_WRONLY);
|
let devnull_fd = open(devnull_cstr.as_ptr(), O_WRONLY);
|
||||||
if dn < 0 {
|
if devnull_fd < 0 {
|
||||||
// failed to open devnull; restore and bail
|
|
||||||
close(old_fd);
|
close(old_fd);
|
||||||
return Self {
|
return Self {
|
||||||
active: false,
|
active: false,
|
||||||
@@ -113,9 +108,8 @@ impl StderrSilencer {
|
|||||||
devnull_fd: -1,
|
devnull_fd: -1,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
// Redirect fd 2 to devnull
|
if dup2(devnull_fd, 2) < 0 {
|
||||||
if dup2(dn, 2) < 0 {
|
close(devnull_fd);
|
||||||
close(dn);
|
|
||||||
close(old_fd);
|
close(old_fd);
|
||||||
return Self {
|
return Self {
|
||||||
active: false,
|
active: false,
|
||||||
@@ -126,7 +120,7 @@ impl StderrSilencer {
|
|||||||
Self {
|
Self {
|
||||||
active: true,
|
active: true,
|
||||||
old_stderr_fd: old_fd,
|
old_stderr_fd: old_fd,
|
||||||
devnull_fd: dn,
|
devnull_fd: devnull_fd,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[cfg(not(unix))]
|
#[cfg(not(unix))]
|
||||||
@@ -143,7 +137,6 @@ impl Drop for StderrSilencer {
|
|||||||
}
|
}
|
||||||
#[cfg(unix)]
|
#[cfg(unix)]
|
||||||
unsafe {
|
unsafe {
|
||||||
// Restore old stderr and close devnull and old copies
|
|
||||||
let _ = dup2(self.old_stderr_fd, 2);
|
let _ = dup2(self.old_stderr_fd, 2);
|
||||||
let _ = close(self.devnull_fd);
|
let _ = close(self.devnull_fd);
|
||||||
let _ = close(self.old_stderr_fd);
|
let _ = close(self.old_stderr_fd);
|
||||||
@@ -161,13 +154,13 @@ where
|
|||||||
{
|
{
|
||||||
// Suppress noisy native logs unless super-verbose (-vv) is enabled.
|
// Suppress noisy native logs unless super-verbose (-vv) is enabled.
|
||||||
if verbose_level() < 2 {
|
if verbose_level() < 2 {
|
||||||
let res = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
|
let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
|
||||||
let _guard = StderrSilencer::activate();
|
let _guard = StderrSilencer::activate();
|
||||||
f()
|
f()
|
||||||
}));
|
}));
|
||||||
match res {
|
match result {
|
||||||
Ok(v) => v,
|
Ok(value) => value,
|
||||||
Err(p) => std::panic::resume_unwind(p),
|
Err(panic_payload) => std::panic::resume_unwind(panic_payload),
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
f()
|
f()
|
||||||
@@ -256,39 +249,39 @@ pub fn format_srt_time(seconds: f64) -> String {
|
|||||||
let total_ms = (seconds * 1000.0).round() as i64;
|
let total_ms = (seconds * 1000.0).round() as i64;
|
||||||
let ms = total_ms % 1000;
|
let ms = total_ms % 1000;
|
||||||
let total_secs = total_ms / 1000;
|
let total_secs = total_ms / 1000;
|
||||||
let s = total_secs % 60;
|
let sec = total_secs % 60;
|
||||||
let m = (total_secs / 60) % 60;
|
let min = (total_secs / 60) % 60;
|
||||||
let h = total_secs / 3600;
|
let hour = total_secs / 3600;
|
||||||
format!("{h:02}:{m:02}:{s:02},{ms:03}")
|
format!("{hour:02}:{min:02}:{sec:02},{ms:03}")
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Render a list of transcript entries to SRT format.
|
/// Render a list of transcript entries to SRT format.
|
||||||
pub fn render_srt(items: &[OutputEntry]) -> String {
|
pub fn render_srt(entries: &[OutputEntry]) -> String {
|
||||||
let mut out = String::new();
|
let mut srt = String::new();
|
||||||
for (i, e) in items.iter().enumerate() {
|
for (index, entry) in entries.iter().enumerate() {
|
||||||
let idx = i + 1;
|
let srt_index = index + 1;
|
||||||
out.push_str(&format!("{idx}\n"));
|
srt.push_str(&format!("{srt_index}\n"));
|
||||||
out.push_str(&format!(
|
srt.push_str(&format!(
|
||||||
"{} --> {}\n",
|
"{} --> {}\n",
|
||||||
format_srt_time(e.start),
|
format_srt_time(entry.start),
|
||||||
format_srt_time(e.end)
|
format_srt_time(entry.end)
|
||||||
));
|
));
|
||||||
if !e.speaker.is_empty() {
|
if !entry.speaker.is_empty() {
|
||||||
out.push_str(&format!("{}: {}\n", e.speaker, e.text));
|
srt.push_str(&format!("{}: {}\n", entry.speaker, entry.text));
|
||||||
} else {
|
} else {
|
||||||
out.push_str(&format!("{}\n", e.text));
|
srt.push_str(&format!("{}\n", entry.text));
|
||||||
}
|
}
|
||||||
out.push('\n');
|
srt.push('\n');
|
||||||
}
|
}
|
||||||
out
|
srt
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Determine the default models directory, honoring POLYSCRIBE_MODELS_DIR override.
|
/// Determine the default models directory, honoring POLYSCRIBE_MODELS_DIR override.
|
||||||
pub fn models_dir_path() -> PathBuf {
|
pub fn models_dir_path() -> PathBuf {
|
||||||
if let Ok(p) = env::var("POLYSCRIBE_MODELS_DIR") {
|
if let Ok(env_val) = env::var("POLYSCRIBE_MODELS_DIR") {
|
||||||
let pb = PathBuf::from(p);
|
let env_path = PathBuf::from(env_val);
|
||||||
if !pb.as_os_str().is_empty() {
|
if !env_path.as_os_str().is_empty() {
|
||||||
return pb;
|
return env_path;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if cfg!(debug_assertions) {
|
if cfg!(debug_assertions) {
|
||||||
@@ -313,17 +306,17 @@ pub fn models_dir_path() -> PathBuf {
|
|||||||
|
|
||||||
/// Normalize a language identifier to a short ISO code when possible.
|
/// Normalize a language identifier to a short ISO code when possible.
|
||||||
pub fn normalize_lang_code(input: &str) -> Option<String> {
|
pub fn normalize_lang_code(input: &str) -> Option<String> {
|
||||||
let mut s = input.trim().to_lowercase();
|
let mut lang = input.trim().to_lowercase();
|
||||||
if s.is_empty() || s == "auto" || s == "c" || s == "posix" {
|
if lang.is_empty() || lang == "auto" || lang == "c" || lang == "posix" {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
if let Some((lhs, _)) = s.split_once('.') {
|
if let Some((prefix, _)) = lang.split_once('.') {
|
||||||
s = lhs.to_string();
|
lang = prefix.to_string();
|
||||||
}
|
}
|
||||||
if let Some((lhs, _)) = s.split_once('_') {
|
if let Some((prefix, _)) = lang.split_once('_') {
|
||||||
s = lhs.to_string();
|
lang = prefix.to_string();
|
||||||
}
|
}
|
||||||
let code = match s.as_str() {
|
let code = match lang.as_str() {
|
||||||
"en" => "en",
|
"en" => "en",
|
||||||
"de" => "de",
|
"de" => "de",
|
||||||
"es" => "es",
|
"es" => "es",
|
||||||
@@ -397,10 +390,10 @@ pub fn find_model_file() -> Result<PathBuf> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if let Ok(env_model) = env::var("WHISPER_MODEL") {
|
if let Ok(env_model) = env::var("WHISPER_MODEL") {
|
||||||
let p = PathBuf::from(env_model);
|
let model_path = PathBuf::from(env_model);
|
||||||
if p.is_file() {
|
if model_path.is_file() {
|
||||||
let _ = std::fs::write(models_dir.join(".last_model"), p.display().to_string());
|
let _ = std::fs::write(models_dir.join(".last_model"), model_path.display().to_string());
|
||||||
return Ok(p);
|
return Ok(model_path);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -419,9 +412,9 @@ pub fn find_model_file() -> Result<PathBuf> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let mut candidates: Vec<PathBuf> = Vec::new();
|
let mut candidates: Vec<PathBuf> = Vec::new();
|
||||||
let rd = std::fs::read_dir(models_dir)
|
let dir_entries = std::fs::read_dir(models_dir)
|
||||||
.with_context(|| format!("Failed to read models directory: {}", models_dir.display()))?;
|
.with_context(|| format!("Failed to read models directory: {}", models_dir.display()))?;
|
||||||
for entry in rd {
|
for entry in dir_entries {
|
||||||
let entry = entry?;
|
let entry = entry?;
|
||||||
let path = entry.path();
|
let path = entry.path();
|
||||||
if path.is_file() {
|
if path.is_file() {
|
||||||
@@ -452,16 +445,16 @@ pub fn find_model_file() -> Result<PathBuf> {
|
|||||||
));
|
));
|
||||||
}
|
}
|
||||||
let input = crate::ui::prompt_line("Would you like to download models now? [Y/n]: ").unwrap_or_default();
|
let input = crate::ui::prompt_line("Would you like to download models now? [Y/n]: ").unwrap_or_default();
|
||||||
let ans = input.trim().to_lowercase();
|
let answer = input.trim().to_lowercase();
|
||||||
if ans.is_empty() || ans == "y" || ans == "yes" {
|
if answer.is_empty() || answer == "y" || answer == "yes" {
|
||||||
if let Err(e) = models::run_interactive_model_downloader() {
|
if let Err(e) = models::run_interactive_model_downloader() {
|
||||||
elog!("Downloader failed: {:#}", e);
|
elog!("Downloader failed: {:#}", e);
|
||||||
}
|
}
|
||||||
candidates.clear();
|
candidates.clear();
|
||||||
let rd2 = std::fs::read_dir(models_dir).with_context(|| {
|
let dir_entries2 = std::fs::read_dir(models_dir).with_context(|| {
|
||||||
format!("Failed to read models directory: {}", models_dir.display())
|
format!("Failed to read models directory: {}", models_dir.display())
|
||||||
})?;
|
})?;
|
||||||
for entry in rd2 {
|
for entry in dir_entries2 {
|
||||||
let entry = entry?;
|
let entry = entry?;
|
||||||
let path = entry.path();
|
let path = entry.path();
|
||||||
if path.is_file() {
|
if path.is_file() {
|
||||||
@@ -487,38 +480,36 @@ pub fn find_model_file() -> Result<PathBuf> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if candidates.len() == 1 {
|
if candidates.len() == 1 {
|
||||||
let only = candidates.remove(0);
|
let only_model = candidates.remove(0);
|
||||||
let _ = std::fs::write(models_dir.join(".last_model"), only.display().to_string());
|
let _ = std::fs::write(models_dir.join(".last_model"), only_model.display().to_string());
|
||||||
return Ok(only);
|
return Ok(only_model);
|
||||||
}
|
}
|
||||||
|
|
||||||
let last_file = models_dir.join(".last_model");
|
let last_file = models_dir.join(".last_model");
|
||||||
if let Ok(prev) = std::fs::read_to_string(&last_file) {
|
if let Ok(previous_content) = std::fs::read_to_string(&last_file) {
|
||||||
let prev = prev.trim();
|
let previous_content = previous_content.trim();
|
||||||
if !prev.is_empty() {
|
if !previous_content.is_empty() {
|
||||||
let p = PathBuf::from(prev);
|
let previous_path = PathBuf::from(previous_content);
|
||||||
if p.is_file() && candidates.iter().any(|c| c == &p) {
|
if previous_path.is_file() && candidates.iter().any(|c| c == &previous_path) {
|
||||||
// Previously printed: INFO about using previously selected model.
|
return Ok(previous_path);
|
||||||
// Suppress this to avoid duplicate/noisy messages; per-file progress will be shown elsewhere.
|
|
||||||
return Ok(p);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
crate::ui::println_above_bars(format!("Multiple Whisper models found in {}:", models_dir.display()));
|
crate::ui::println_above_bars(format!("Multiple Whisper models found in {}:", models_dir.display()));
|
||||||
for (i, p) in candidates.iter().enumerate() {
|
for (index, path) in candidates.iter().enumerate() {
|
||||||
crate::ui::println_above_bars(format!(" {}) {}", i + 1, p.display()));
|
crate::ui::println_above_bars(format!(" {}) {}", index + 1, path.display()));
|
||||||
}
|
}
|
||||||
let input = crate::ui::prompt_line(&format!("Select model by number [1-{}]: ", candidates.len()))
|
let input = crate::ui::prompt_line(&format!("Select model by number [1-{}]: ", candidates.len()))
|
||||||
.map_err(|_| anyhow!("Failed to read selection"))?;
|
.map_err(|_| anyhow!("Failed to read selection"))?;
|
||||||
let sel: usize = input
|
let selection: usize = input
|
||||||
.trim()
|
.trim()
|
||||||
.parse()
|
.parse()
|
||||||
.map_err(|_| anyhow!("Invalid selection: {}", input.trim()))?;
|
.map_err(|_| anyhow!("Invalid selection: {}", input.trim()))?;
|
||||||
if sel == 0 || sel > candidates.len() {
|
if selection == 0 || selection > candidates.len() {
|
||||||
return Err(anyhow!("Selection out of range"));
|
return Err(anyhow!("Selection out of range"));
|
||||||
}
|
}
|
||||||
let chosen = candidates.swap_remove(sel - 1);
|
let chosen = candidates.swap_remove(selection - 1);
|
||||||
let _ = std::fs::write(models_dir.join(".last_model"), chosen.display().to_string());
|
let _ = std::fs::write(models_dir.join(".last_model"), chosen.display().to_string());
|
||||||
Ok(chosen)
|
Ok(chosen)
|
||||||
}
|
}
|
||||||
@@ -553,28 +544,28 @@ pub fn decode_audio_to_pcm_f32_ffmpeg(audio_path: &Path) -> Result<Vec<f32>> {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
if !output.status.success() {
|
if !output.status.success() {
|
||||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
let stderr_str = String::from_utf8_lossy(&output.stderr);
|
||||||
return Err(anyhow!(
|
return Err(anyhow!(
|
||||||
"Failed to decode audio from {} using ffmpeg. This may indicate the file is not a valid or supported audio/video file, is corrupted, or cannot be opened. ffmpeg stderr: {}",
|
"Failed to decode audio from {} using ffmpeg. This may indicate the file is not a valid or supported audio/video file, is corrupted, or cannot be opened. ffmpeg stderr: {}",
|
||||||
audio_path.display(),
|
audio_path.display(),
|
||||||
stderr.trim()
|
stderr_str.trim()
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
let bytes = output.stdout;
|
let data = output.stdout;
|
||||||
if bytes.len() % 4 != 0 {
|
if data.len() % 4 != 0 {
|
||||||
let truncated = bytes.len() - (bytes.len() % 4);
|
let truncated = data.len() - (data.len() % 4);
|
||||||
let mut v = Vec::with_capacity(truncated / 4);
|
let mut samples = Vec::with_capacity(truncated / 4);
|
||||||
for chunk in bytes[..truncated].chunks_exact(4) {
|
for chunk in data[..truncated].chunks_exact(4) {
|
||||||
let arr = [chunk[0], chunk[1], chunk[2], chunk[3]];
|
let arr = [chunk[0], chunk[1], chunk[2], chunk[3]];
|
||||||
v.push(f32::from_le_bytes(arr));
|
samples.push(f32::from_le_bytes(arr));
|
||||||
}
|
}
|
||||||
Ok(v)
|
Ok(samples)
|
||||||
} else {
|
} else {
|
||||||
let mut v = Vec::with_capacity(bytes.len() / 4);
|
let mut samples = Vec::with_capacity(data.len() / 4);
|
||||||
for chunk in bytes.chunks_exact(4) {
|
for chunk in data.chunks_exact(4) {
|
||||||
let arr = [chunk[0], chunk[1], chunk[2], chunk[3]];
|
let arr = [chunk[0], chunk[1], chunk[2], chunk[3]];
|
||||||
v.push(f32::from_le_bytes(arr));
|
samples.push(f32::from_le_bytes(arr));
|
||||||
}
|
}
|
||||||
Ok(v)
|
Ok(samples)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user