Revert "[test] add unit tests for validate_model_lang_compat ensuring model-language compatibility validation"

This reverts commit abe81b643b.
This commit is contained in:
2025-08-12 06:00:09 +02:00
parent 90f9849cc0
commit f551cc3498

View File

@@ -141,28 +141,6 @@ impl Default for VulkanBackend {
}
}
/// Validate that a provided language hint is compatible with the selected model.
///
/// English-only models (filenames containing ".en." or ending with ".en.bin") reject non-"en" hints.
/// When no language is provided, this check passes and downstream behavior remains unchanged.
pub(crate) fn validate_model_lang_compat(model: &Path, lang_opt: Option<&str>) -> Result<()> {
let is_en_only = model
.file_name()
.and_then(|s| s.to_str())
.map(|s| s.contains(".en.") || s.ends_with(".en.bin"))
.unwrap_or(false);
if let Some(lang) = lang_opt {
if is_en_only && lang != "en" {
return Err(anyhow!(
"Selected model is English-only ({}), but a non-English language hint '{}' was provided. Please use a multilingual model or set WHISPER_MODEL.",
model.display(),
lang
));
}
}
Ok(())
}
impl TranscribeBackend for CpuBackend {
fn kind(&self) -> BackendKind {
BackendKind::Cpu
@@ -350,8 +328,20 @@ pub(crate) fn transcribe_with_whisper_rs(
note: Some("model selected".to_string()),
});
}
// Validate language hint compatibility with the selected model
validate_model_lang_compat(&model, lang_opt)?;
let is_en_only = model
.file_name()
.and_then(|s| s.to_str())
.map(|s| s.contains(".en.") || s.ends_with(".en.bin"))
.unwrap_or(false);
if let Some(lang) = lang_opt {
if is_en_only && lang != "en" {
return Err(anyhow!(
"Selected model is English-only ({}), but a non-English language hint '{}' was provided. Please use a multilingual model or set WHISPER_MODEL.",
model.display(),
lang
));
}
}
let model_str = model
.to_str()
.ok_or_else(|| anyhow!("Model path not valid UTF-8: {}", model.display()))?;
@@ -461,35 +451,6 @@ mod tests {
use std::env as std_env;
use std::sync::{Mutex, OnceLock};
#[test]
fn test_validate_model_lang_guard_table() {
struct case<'a> { model: &'a str, lang: Option<&'a str>, ok: bool }
let cases = vec![
// English-only model with en hint: OK
case { model: "ggml-base.en.bin", lang: Some("en"), ok: true },
// English-only model with de hint: Error
case { model: "ggml-small.en.bin", lang: Some("de"), ok: false },
// Multilingual model with de hint: OK
case { model: "ggml-large-v3.bin", lang: Some("de"), ok: true },
// No language provided (audio path scenario): guard should pass (existing behavior elsewhere)
case { model: "ggml-medium.en.bin", lang: None, ok: true },
];
for c in cases {
let p = std::path::Path::new(c.model);
let res = validate_model_lang_compat(p, c.lang);
match (c.ok, res) {
(true, Ok(())) => {}
(false, Err(e)) => {
let msg = format!("{}", e);
assert!(msg.contains("English-only"), "unexpected error: {msg}");
if let Some(l) = c.lang { assert!(msg.contains(l), "missing lang in msg: {msg}"); }
}
(true, Err(e)) => panic!("expected Ok for model={}, lang={:?}, got error: {}", c.model, c.lang, e),
(false, Ok(())) => panic!("expected Err for model={}, lang={:?}", c.model, c.lang),
}
}
}
// Serialize environment variable modifications across tests in this module
static ENV_LOCK: OnceLock<Mutex<()>> = OnceLock::new();