[feat] integrate native whisper support for audio transcription
This commit is contained in:
182
Cargo.lock
generated
182
Cargo.lock
generated
@@ -2,6 +2,15 @@
|
|||||||
# It is not intended for manual editing.
|
# It is not intended for manual editing.
|
||||||
version = 4
|
version = 4
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "aho-corasick"
|
||||||
|
version = "1.1.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
|
||||||
|
dependencies = [
|
||||||
|
"memchr",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "android-tzdata"
|
name = "android-tzdata"
|
||||||
version = "0.1.1"
|
version = "0.1.1"
|
||||||
@@ -79,6 +88,32 @@ version = "1.5.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
|
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "bindgen"
|
||||||
|
version = "0.71.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
"cexpr",
|
||||||
|
"clang-sys",
|
||||||
|
"itertools",
|
||||||
|
"log",
|
||||||
|
"prettyplease",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"regex",
|
||||||
|
"rustc-hash",
|
||||||
|
"shlex",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "bitflags"
|
||||||
|
version = "2.9.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bumpalo"
|
name = "bumpalo"
|
||||||
version = "3.19.0"
|
version = "3.19.0"
|
||||||
@@ -94,6 +129,15 @@ dependencies = [
|
|||||||
"shlex",
|
"shlex",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cexpr"
|
||||||
|
version = "0.6.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
|
||||||
|
dependencies = [
|
||||||
|
"nom",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cfg-if"
|
name = "cfg-if"
|
||||||
version = "1.0.1"
|
version = "1.0.1"
|
||||||
@@ -114,6 +158,17 @@ dependencies = [
|
|||||||
"windows-link",
|
"windows-link",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clang-sys"
|
||||||
|
version = "1.8.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
|
||||||
|
dependencies = [
|
||||||
|
"glob",
|
||||||
|
"libc",
|
||||||
|
"libloading",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "clap"
|
name = "clap"
|
||||||
version = "4.5.43"
|
version = "4.5.43"
|
||||||
@@ -154,6 +209,15 @@ version = "0.7.5"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675"
|
checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cmake"
|
||||||
|
version = "0.1.54"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e7caa3f9de89ddbe2c607f4101924c5abec803763ae9534e4f4d7d8f84aa81f0"
|
||||||
|
dependencies = [
|
||||||
|
"cc",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "colorchoice"
|
name = "colorchoice"
|
||||||
version = "1.0.4"
|
version = "1.0.4"
|
||||||
@@ -176,14 +240,33 @@ dependencies = [
|
|||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"toml",
|
"toml",
|
||||||
|
"whisper-rs",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "either"
|
||||||
|
version = "1.15.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "equivalent"
|
name = "equivalent"
|
||||||
version = "1.0.2"
|
version = "1.0.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
|
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "fs_extra"
|
||||||
|
version = "1.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "glob"
|
||||||
|
version = "0.3.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "hashbrown"
|
name = "hashbrown"
|
||||||
version = "0.15.5"
|
version = "0.15.5"
|
||||||
@@ -236,6 +319,15 @@ version = "1.70.1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
|
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "itertools"
|
||||||
|
version = "0.13.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
|
||||||
|
dependencies = [
|
||||||
|
"either",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "itoa"
|
name = "itoa"
|
||||||
version = "1.0.15"
|
version = "1.0.15"
|
||||||
@@ -258,6 +350,16 @@ version = "0.2.174"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776"
|
checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "libloading"
|
||||||
|
version = "0.8.8"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"windows-targets",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "log"
|
name = "log"
|
||||||
version = "0.4.27"
|
version = "0.4.27"
|
||||||
@@ -270,6 +372,22 @@ version = "2.7.5"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
|
checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "minimal-lexical"
|
||||||
|
version = "0.2.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "nom"
|
||||||
|
version = "7.1.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
|
||||||
|
dependencies = [
|
||||||
|
"memchr",
|
||||||
|
"minimal-lexical",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "num-traits"
|
name = "num-traits"
|
||||||
version = "0.2.19"
|
version = "0.2.19"
|
||||||
@@ -291,6 +409,16 @@ version = "1.70.1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad"
|
checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "prettyplease"
|
||||||
|
version = "0.2.36"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ff24dfcda44452b9816fff4cd4227e1bb73ff5a2f1bc1105aa92fb8565ce44d2"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "proc-macro2"
|
name = "proc-macro2"
|
||||||
version = "1.0.95"
|
version = "1.0.95"
|
||||||
@@ -309,6 +437,41 @@ dependencies = [
|
|||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex"
|
||||||
|
version = "1.11.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
|
||||||
|
dependencies = [
|
||||||
|
"aho-corasick",
|
||||||
|
"memchr",
|
||||||
|
"regex-automata",
|
||||||
|
"regex-syntax",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex-automata"
|
||||||
|
version = "0.4.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
|
||||||
|
dependencies = [
|
||||||
|
"aho-corasick",
|
||||||
|
"memchr",
|
||||||
|
"regex-syntax",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex-syntax"
|
||||||
|
version = "0.8.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rustc-hash"
|
||||||
|
version = "2.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rustversion"
|
name = "rustversion"
|
||||||
version = "1.0.21"
|
version = "1.0.21"
|
||||||
@@ -496,6 +659,25 @@ dependencies = [
|
|||||||
"unicode-ident",
|
"unicode-ident",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "whisper-rs"
|
||||||
|
version = "0.14.3"
|
||||||
|
source = "git+https://github.com/tazz4843/whisper-rs#135b60b85a15714862806b6ea9f76abec38156f1"
|
||||||
|
dependencies = [
|
||||||
|
"whisper-rs-sys",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "whisper-rs-sys"
|
||||||
|
version = "0.13.0"
|
||||||
|
source = "git+https://github.com/tazz4843/whisper-rs#135b60b85a15714862806b6ea9f76abec38156f1"
|
||||||
|
dependencies = [
|
||||||
|
"bindgen",
|
||||||
|
"cfg-if",
|
||||||
|
"cmake",
|
||||||
|
"fs_extra",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows-core"
|
name = "windows-core"
|
||||||
version = "0.61.2"
|
version = "0.61.2"
|
||||||
|
@@ -10,3 +10,8 @@ serde = { version = "1.0.219", features = ["derive"] }
|
|||||||
serde_json = "1.0.142"
|
serde_json = "1.0.142"
|
||||||
toml = "0.8"
|
toml = "0.8"
|
||||||
chrono = { version = "0.4", features = ["clock"] }
|
chrono = { version = "0.4", features = ["clock"] }
|
||||||
|
whisper-rs = { git = "https://github.com/tazz4843/whisper-rs", optional = true }
|
||||||
|
|
||||||
|
[features]
|
||||||
|
default = ["native-whisper"]
|
||||||
|
native-whisper = ["whisper-rs"]
|
||||||
|
253
src/main.rs
253
src/main.rs
@@ -8,16 +8,23 @@ use clap::Parser;
|
|||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use chrono::Local;
|
use chrono::Local;
|
||||||
|
|
||||||
|
#[cfg(feature = "native-whisper")]
|
||||||
|
use whisper_rs::{FullParams, SamplingStrategy, WhisperContext, WhisperContextParameters};
|
||||||
|
|
||||||
#[derive(Parser, Debug)]
|
#[derive(Parser, Debug)]
|
||||||
#[command(name = "merge_transcripts", version, about = "Merge multiple JSON transcripts into one")]
|
#[command(name = "merge_transcripts", version, about = "Merge multiple JSON transcripts into one or transcribe audio using native whisper")]
|
||||||
struct Args {
|
struct Args {
|
||||||
/// Input JSON files to merge
|
/// Input .json transcript files or audio files to merge/transcribe
|
||||||
#[arg(required = true)]
|
#[arg(required = true)]
|
||||||
inputs: Vec<String>,
|
inputs: Vec<String>,
|
||||||
|
|
||||||
/// Output file path (if omitted, writes to stdout)
|
/// Output file path base (date prefix will be added); if omitted, writes JSON to stdout
|
||||||
#[arg(short, long, value_name = "FILE")]
|
#[arg(short, long, value_name = "FILE")]
|
||||||
output: Option<String>,
|
output: Option<String>,
|
||||||
|
|
||||||
|
/// Language code to use for transcription (e.g., en, de). No auto-detection.
|
||||||
|
#[arg(short, long, value_name = "LANG")]
|
||||||
|
language: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Deserialize)]
|
#[derive(Debug, Deserialize)]
|
||||||
@@ -78,7 +85,7 @@ fn render_srt(items: &[OutputEntry]) -> String {
|
|||||||
out
|
out
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- Helpers for audio transcription via openai-whisper ---
|
// --- Helpers for audio transcription ---
|
||||||
fn is_json_file(path: &Path) -> bool {
|
fn is_json_file(path: &Path) -> bool {
|
||||||
matches!(path.extension().and_then(|s| s.to_str()).map(|s| s.to_lowercase()), Some(ext) if ext == "json")
|
matches!(path.extension().and_then(|s| s.to_str()).map(|s| s.to_lowercase()), Some(ext) if ext == "json")
|
||||||
}
|
}
|
||||||
@@ -93,75 +100,153 @@ fn is_audio_file(path: &Path) -> bool {
|
|||||||
false
|
false
|
||||||
}
|
}
|
||||||
|
|
||||||
fn ensure_whisper_venv() -> Result<PathBuf> {
|
fn normalize_lang_code(input: &str) -> Option<String> {
|
||||||
let venv_dir = Path::new(".venv");
|
let mut s = input.trim().to_lowercase();
|
||||||
let bin_dir = venv_dir.join("bin");
|
if s.is_empty() || s == "auto" || s == "c" || s == "posix" { return None; }
|
||||||
let python_bin = bin_dir.join("python");
|
if let Some((lhs, _)) = s.split_once('.') { s = lhs.to_string(); }
|
||||||
// Create venv if needed
|
if let Some((lhs, _)) = s.split_once('_') { s = lhs.to_string(); }
|
||||||
if !python_bin.exists() {
|
let code = match s.as_str() {
|
||||||
let status = Command::new("python")
|
// ISO codes directly
|
||||||
.arg("-m").arg("venv").arg(".venv")
|
"en"=>"en","de"=>"de","es"=>"es","fr"=>"fr","it"=>"it","pt"=>"pt","nl"=>"nl","ru"=>"ru","pl"=>"pl",
|
||||||
.status()
|
"uk"=>"uk","cs"=>"cs","sv"=>"sv","no"=>"no","da"=>"da","fi"=>"fi","hu"=>"hu","tr"=>"tr","el"=>"el",
|
||||||
.with_context(|| "Failed to execute: python -m venv .venv")?;
|
"zh"=>"zh","ja"=>"ja","ko"=>"ko","ar"=>"ar","he"=>"he","hi"=>"hi","ro"=>"ro","bg"=>"bg","sk"=>"sk",
|
||||||
if !status.success() {
|
// Common English names
|
||||||
return Err(anyhow!("Creating virtual environment failed (python -m venv .venv)"));
|
"english"=>"en","german"=>"de","spanish"=>"es","french"=>"fr","italian"=>"it","portuguese"=>"pt",
|
||||||
}
|
"dutch"=>"nl","russian"=>"ru","polish"=>"pl","ukrainian"=>"uk","czech"=>"cs","swedish"=>"sv",
|
||||||
}
|
"norwegian"=>"no","danish"=>"da","finnish"=>"fi","hungarian"=>"hu","turkish"=>"tr","greek"=>"el",
|
||||||
// Install openai-whisper if needed
|
"chinese"=>"zh","japanese"=>"ja","korean"=>"ko","arabic"=>"ar","hebrew"=>"he","hindi"=>"hi",
|
||||||
let whisper_bin = bin_dir.join("whisper");
|
"romanian"=>"ro","bulgarian"=>"bg","slovak"=>"sk",
|
||||||
if !whisper_bin.exists() {
|
_ => return None,
|
||||||
let status = Command::new(&python_bin)
|
};
|
||||||
.arg("-m").arg("pip").arg("install").arg("-U").arg("openai-whisper")
|
Some(code.to_string())
|
||||||
.status()
|
|
||||||
.with_context(|| "Failed to execute: .venv/bin/python -m pip install -U openai-whisper")?;
|
|
||||||
if !status.success() {
|
|
||||||
return Err(anyhow!("Installing openai-whisper failed"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(whisper_bin)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn run_whisper(audio_path: &Path, out_dir: &Path, whisper_bin: &Path) -> Result<PathBuf> {
|
|
||||||
create_dir_all(out_dir).with_context(|| format!("Failed to create whisper output dir: {}", out_dir.display()))?;
|
|
||||||
let out_dir_str = out_dir.to_string_lossy().to_string();
|
|
||||||
|
|
||||||
// Try with -o first (as per requested command)
|
|
||||||
let attempt = |use_short_o: bool| -> Result<()> {
|
#[cfg(feature = "native-whisper")]
|
||||||
let mut cmd = Command::new(whisper_bin);
|
fn find_model_file() -> Result<PathBuf> {
|
||||||
cmd.arg(audio_path)
|
let models_dir = Path::new("models");
|
||||||
.arg("--model").arg("turbo")
|
if !models_dir.exists() {
|
||||||
.arg("--output_format").arg("json");
|
return Err(anyhow!("No models directory found at {}", models_dir.display()));
|
||||||
if use_short_o {
|
}
|
||||||
cmd.arg("-o").arg(&out_dir_str);
|
let mut candidates: Vec<PathBuf> = Vec::new();
|
||||||
} else {
|
let rd = std::fs::read_dir(models_dir)
|
||||||
cmd.arg("--output_dir").arg(&out_dir_str);
|
.with_context(|| format!("Failed to read models directory: {}", models_dir.display()))?;
|
||||||
|
for entry in rd {
|
||||||
|
let entry = entry?;
|
||||||
|
let path = entry.path();
|
||||||
|
if path.is_file() {
|
||||||
|
if let Some(ext) = path.extension().and_then(|s| s.to_str()).map(|s| s.to_lowercase()) {
|
||||||
|
if ext == "bin" {
|
||||||
|
candidates.push(path);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
let output = cmd.output()
|
}
|
||||||
.with_context(|| format!("Failed to execute whisper for {}", audio_path.display()))?;
|
if candidates.is_empty() {
|
||||||
if !output.status.success() {
|
return Err(anyhow!("No Whisper model files (*.bin) found in {}", models_dir.display()));
|
||||||
|
}
|
||||||
|
if candidates.len() == 1 {
|
||||||
|
return Ok(candidates.remove(0));
|
||||||
|
}
|
||||||
|
// Multiple models: prompt user to choose
|
||||||
|
eprintln!("Multiple Whisper models found in {}:", models_dir.display());
|
||||||
|
for (i, p) in candidates.iter().enumerate() {
|
||||||
|
eprintln!(" {}) {}", i + 1, p.display());
|
||||||
|
}
|
||||||
|
eprint!("Select model by number [1-{}]: ", candidates.len());
|
||||||
|
io::stderr().flush().ok();
|
||||||
|
let mut input = String::new();
|
||||||
|
io::stdin().read_line(&mut input).context("Failed to read selection")?;
|
||||||
|
let sel: usize = input.trim().parse().map_err(|_| anyhow!("Invalid selection: {}", input.trim()))?;
|
||||||
|
if sel == 0 || sel > candidates.len() {
|
||||||
|
return Err(anyhow!("Selection out of range"));
|
||||||
|
}
|
||||||
|
Ok(candidates.swap_remove(sel - 1))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "native-whisper")]
|
||||||
|
fn decode_audio_to_pcm_f32_ffmpeg(audio_path: &Path) -> Result<Vec<f32>> {
|
||||||
|
let output = Command::new("ffmpeg")
|
||||||
|
.arg("-i").arg(audio_path)
|
||||||
|
.arg("-f").arg("f32le")
|
||||||
|
.arg("-ac").arg("1")
|
||||||
|
.arg("-ar").arg("16000")
|
||||||
|
.arg("pipe:1")
|
||||||
|
.output()
|
||||||
|
.with_context(|| format!("Failed to execute ffmpeg for {}", audio_path.display()))?;
|
||||||
|
if !output.status.success() {
|
||||||
|
return Err(anyhow!(
|
||||||
|
"ffmpeg failed for {}: {}",
|
||||||
|
audio_path.display(),
|
||||||
|
String::from_utf8_lossy(&output.stderr)
|
||||||
|
));
|
||||||
|
}
|
||||||
|
let bytes = output.stdout;
|
||||||
|
if bytes.len() % 4 != 0 {
|
||||||
|
// Truncate to nearest multiple of 4 bytes to avoid partial f32
|
||||||
|
let truncated = bytes.len() - (bytes.len() % 4);
|
||||||
|
let mut v = Vec::with_capacity(truncated / 4);
|
||||||
|
for chunk in bytes[..truncated].chunks_exact(4) {
|
||||||
|
let arr = [chunk[0], chunk[1], chunk[2], chunk[3]];
|
||||||
|
v.push(f32::from_le_bytes(arr));
|
||||||
|
}
|
||||||
|
Ok(v)
|
||||||
|
} else {
|
||||||
|
let mut v = Vec::with_capacity(bytes.len() / 4);
|
||||||
|
for chunk in bytes.chunks_exact(4) {
|
||||||
|
let arr = [chunk[0], chunk[1], chunk[2], chunk[3]];
|
||||||
|
v.push(f32::from_le_bytes(arr));
|
||||||
|
}
|
||||||
|
Ok(v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "native-whisper")]
|
||||||
|
fn transcribe_native(audio_path: &Path, speaker: &str, lang_opt: Option<&str>) -> Result<Vec<OutputEntry>> {
|
||||||
|
let pcm = decode_audio_to_pcm_f32_ffmpeg(audio_path)?;
|
||||||
|
let model = find_model_file()?;
|
||||||
|
let is_en_only = model
|
||||||
|
.file_name()
|
||||||
|
.and_then(|s| s.to_str())
|
||||||
|
.map(|s| s.contains(".en.") || s.ends_with(".en.bin"))
|
||||||
|
.unwrap_or(false);
|
||||||
|
if let Some(lang) = lang_opt {
|
||||||
|
if is_en_only && lang != "en" {
|
||||||
return Err(anyhow!(
|
return Err(anyhow!(
|
||||||
"Whisper transcription failed for {}: {}",
|
"Selected model is English-only ({}), but a non-English language hint '{}' was provided. Please use a multilingual model like models/ggml-base.bin or set WHISPER_MODEL accordingly.",
|
||||||
audio_path.display(),
|
model.display(),
|
||||||
String::from_utf8_lossy(&output.stderr)
|
lang
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
Ok(())
|
|
||||||
};
|
|
||||||
|
|
||||||
if let Err(_e) = attempt(true) {
|
|
||||||
// Fallback to --output_dir if -o not supported by CLI installed
|
|
||||||
attempt(false)?;
|
|
||||||
}
|
}
|
||||||
|
let model_str = model.to_str().ok_or_else(|| anyhow!("Model path not valid UTF-8: {}", model.display()))?;
|
||||||
|
let ctx = WhisperContext::new_with_params(model_str, WhisperContextParameters::default())
|
||||||
|
.with_context(|| format!("Failed to load Whisper model at {}", model.display()))?;
|
||||||
|
let mut state = ctx.create_state()
|
||||||
|
.map_err(|e| anyhow!("Failed to create Whisper state: {:?}", e))?;
|
||||||
|
|
||||||
// Try to locate the resulting JSON file
|
let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 1 });
|
||||||
let stem = audio_path.file_stem().and_then(|s| s.to_str()).unwrap_or("transcript");
|
let n_threads = std::thread::available_parallelism().map(|n| n.get() as i32).unwrap_or(1);
|
||||||
let cand1 = out_dir.join(format!("{}.json", stem));
|
params.set_n_threads(n_threads);
|
||||||
if cand1.exists() { return Ok(cand1); }
|
params.set_translate(false);
|
||||||
if let Some(file_name) = audio_path.file_name().and_then(|s| s.to_str()) {
|
if let Some(lang) = lang_opt { params.set_language(Some(lang)); }
|
||||||
let cand2 = out_dir.join(format!("{}.json", file_name));
|
|
||||||
if cand2.exists() { return Ok(cand2); }
|
state.full(params, &pcm)
|
||||||
|
.map_err(|e| anyhow!("Whisper full() failed: {:?}", e))?;
|
||||||
|
|
||||||
|
let num_segments = state.full_n_segments().map_err(|e| anyhow!("Failed to get segments: {:?}", e))?;
|
||||||
|
let mut items = Vec::new();
|
||||||
|
for i in 0..num_segments {
|
||||||
|
let text = state.full_get_segment_text(i)
|
||||||
|
.map_err(|e| anyhow!("Failed to get segment text: {:?}", e))?;
|
||||||
|
let t0 = state.full_get_segment_t0(i).map_err(|e| anyhow!("Failed to get segment t0: {:?}", e))?;
|
||||||
|
let t1 = state.full_get_segment_t1(i).map_err(|e| anyhow!("Failed to get segment t1: {:?}", e))?;
|
||||||
|
let start = (t0 as f64) * 0.01;
|
||||||
|
let end = (t1 as f64) * 0.01;
|
||||||
|
items.push(OutputEntry { id: 0, speaker: speaker.to_string(), start, end, text: text.trim().to_string() });
|
||||||
}
|
}
|
||||||
Err(anyhow!("Could not find whisper JSON output in {} for input {}", out_dir.display(), audio_path.display()))
|
Ok(items)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn main() -> Result<()> {
|
fn main() -> Result<()> {
|
||||||
@@ -182,22 +267,16 @@ fn main() -> Result<()> {
|
|||||||
return Err(anyhow!("No input files provided"));
|
return Err(anyhow!("No input files provided"));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Determine directory for intermediate Whisper transcriptions
|
// Language must be provided via CLI when transcribing audio (no detection from JSON/env)
|
||||||
let transcripts_dir: PathBuf = if let Some(ref out_path) = output_path {
|
let lang_hint: Option<String> = if let Some(ref l) = args.language {
|
||||||
let base_path = Path::new(out_path);
|
normalize_lang_code(l).or_else(|| Some(l.trim().to_lowercase()))
|
||||||
if let Some(parent) = base_path.parent() {
|
|
||||||
if parent.as_os_str().is_empty() {
|
|
||||||
PathBuf::from("output").join("transcripts")
|
|
||||||
} else {
|
|
||||||
parent.join("transcripts")
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
PathBuf::from("output").join("transcripts")
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
PathBuf::from("output").join("transcripts")
|
None
|
||||||
};
|
};
|
||||||
let mut whisper_bin_opt: Option<PathBuf> = None;
|
let any_audio = inputs.iter().any(|p| is_audio_file(Path::new(p)));
|
||||||
|
if any_audio && lang_hint.is_none() {
|
||||||
|
return Err(anyhow!("Please specify --language (e.g., --language en). Language detection was removed."));
|
||||||
|
}
|
||||||
|
|
||||||
let mut entries: Vec<OutputEntry> = Vec::new();
|
let mut entries: Vec<OutputEntry> = Vec::new();
|
||||||
|
|
||||||
@@ -211,16 +290,18 @@ fn main() -> Result<()> {
|
|||||||
|
|
||||||
let mut buf = String::new();
|
let mut buf = String::new();
|
||||||
if is_audio_file(path) {
|
if is_audio_file(path) {
|
||||||
// Ensure whisper is available
|
#[cfg(feature = "native-whisper")]
|
||||||
if whisper_bin_opt.is_none() {
|
{
|
||||||
whisper_bin_opt = Some(ensure_whisper_venv()?);
|
let items = transcribe_native(path, &speaker, lang_hint.as_deref())?;
|
||||||
|
for e in items {
|
||||||
|
entries.push(e);
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
#[cfg(not(feature = "native-whisper"))]
|
||||||
|
{
|
||||||
|
return Err(anyhow!("Python transcription has been removed. Please build with --features native-whisper to transcribe audio."));
|
||||||
}
|
}
|
||||||
let whisper_bin = whisper_bin_opt.as_ref().unwrap();
|
|
||||||
let json_path = run_whisper(path, &transcripts_dir, whisper_bin)?;
|
|
||||||
File::open(&json_path)
|
|
||||||
.with_context(|| format!("Failed to open Whisper JSON: {}", json_path.display()))?
|
|
||||||
.read_to_string(&mut buf)
|
|
||||||
.with_context(|| format!("Failed to read Whisper JSON: {}", json_path.display()))?;
|
|
||||||
} else if is_json_file(path) {
|
} else if is_json_file(path) {
|
||||||
File::open(path)
|
File::open(path)
|
||||||
.with_context(|| format!("Failed to open: {}", input_path))?
|
.with_context(|| format!("Failed to open: {}", input_path))?
|
||||||
|
Reference in New Issue
Block a user