From 5550a100485934bc8646b133efe8f2fa3f4e7ebc Mon Sep 17 00:00:00 2001 From: vikingowl Date: Thu, 26 Mar 2026 15:21:52 +0100 Subject: [PATCH] feat(converter): implement natural speech query parser --- crates/owlry-plugin-converter/src/currency.rs | 26 +- crates/owlry-plugin-converter/src/parser.rs | 229 +++++++++++++++++- crates/owlry-plugin-converter/src/units.rs | 10 +- 3 files changed, 258 insertions(+), 7 deletions(-) diff --git a/crates/owlry-plugin-converter/src/currency.rs b/crates/owlry-plugin-converter/src/currency.rs index b243fc8..9f7eefd 100644 --- a/crates/owlry-plugin-converter/src/currency.rs +++ b/crates/owlry-plugin-converter/src/currency.rs @@ -8,10 +8,28 @@ pub fn get_rates() -> Option { None } -pub fn resolve_currency_code(_alias: &str) -> Option { - None +pub fn resolve_currency_code(alias: &str) -> Option { + let lower = alias.to_lowercase(); + match lower.as_str() { + "eur" | "euro" | "euros" | "€" => Some("EUR".to_string()), + "usd" | "dollar" | "dollars" | "$" | "us_dollar" => Some("USD".to_string()), + "gbp" | "pound_sterling" | "£" | "british_pound" | "pounds" => Some("GBP".to_string()), + "jpy" | "yen" | "¥" | "japanese_yen" => Some("JPY".to_string()), + "chf" | "swiss_franc" | "francs" => Some("CHF".to_string()), + "cad" | "canadian_dollar" | "c$" => Some("CAD".to_string()), + "aud" | "australian_dollar" | "a$" => Some("AUD".to_string()), + "cny" | "yuan" | "renminbi" | "rmb" => Some("CNY".to_string()), + "sek" | "swedish_krona" | "kronor" => Some("SEK".to_string()), + "nok" | "norwegian_krone" => Some("NOK".to_string()), + "dkk" | "danish_krone" => Some("DKK".to_string()), + "pln" | "zloty" | "złoty" => Some("PLN".to_string()), + "czk" | "czech_koruna" => Some("CZK".to_string()), + "huf" | "forint" => Some("HUF".to_string()), + "try" | "turkish_lira" | "lira" => Some("TRY".to_string()), + _ => None, + } } -pub fn is_currency_alias(_alias: &str) -> bool { - false +pub fn is_currency_alias(alias: &str) -> bool { + resolve_currency_code(alias).is_some() } diff --git a/crates/owlry-plugin-converter/src/parser.rs b/crates/owlry-plugin-converter/src/parser.rs index d3242ff..952d797 100644 --- a/crates/owlry-plugin-converter/src/parser.rs +++ b/crates/owlry-plugin-converter/src/parser.rs @@ -1,3 +1,5 @@ +use crate::units; + pub struct ParsedQuery { pub value: f64, pub from_unit: String, @@ -5,6 +7,229 @@ pub struct ParsedQuery { pub target_unit: Option, } -pub fn parse_conversion(_input: &str) -> Option { - None +pub fn parse_conversion(input: &str) -> Option { + let input = input.trim(); + if input.is_empty() { + return None; + } + + // Extract leading number + let (value, rest) = extract_number(input)?; + let rest = rest.trim(); + + if rest.is_empty() { + return None; + } + + // Split on " to " or " in " (case-insensitive) + let (from_str, target_str) = split_on_connector(rest); + + // Resolve from unit + let from_lower = from_str.trim().to_lowercase(); + let from_symbol = units::find_unit(&from_lower)?; + + let from_symbol_str = from_symbol.to_string(); + + // Resolve target unit if present + let target_unit = target_str.and_then(|t| { + let t_lower = t.trim().to_lowercase(); + if t_lower.is_empty() { + None + } else { + units::find_unit(&t_lower).map(|_| t_lower) + } + }); + + Some(ParsedQuery { + value, + from_unit: from_lower, + from_symbol: from_symbol_str, + target_unit, + }) +} + +fn extract_number(input: &str) -> Option<(f64, &str)> { + let bytes = input.as_bytes(); + let mut i = 0; + + // Optional negative sign + if i < bytes.len() && bytes[i] == b'-' { + i += 1; + } + + // Must have at least one digit or start with . + if i >= bytes.len() { + return None; + } + + let start_digits = i; + + // Integer part + while i < bytes.len() && bytes[i].is_ascii_digit() { + i += 1; + } + + // Decimal part + if i < bytes.len() && bytes[i] == b'.' { + i += 1; + while i < bytes.len() && bytes[i].is_ascii_digit() { + i += 1; + } + } + + if i == start_digits && !(i > 0 && bytes[0] == b'-') { + // No digits found (and not just a negative sign before a dot) + // Handle ".5" case + if bytes[start_digits] == b'.' { + // already advanced past dot above + } else { + return None; + } + } + + if i == 0 || (i == 1 && bytes[0] == b'-') { + return None; + } + + let num_str = &input[..i]; + let value: f64 = num_str.parse().ok()?; + let rest = &input[i..]; + + Some((value, rest)) +} + +fn split_on_connector(input: &str) -> (&str, Option<&str>) { + let lower = input.to_lowercase(); + + // Try " to " first + if let Some(pos) = lower.find(" to ") { + let from = &input[..pos]; + let target = &input[pos + 4..]; + return (from, Some(target)); + } + + // Try " in " + if let Some(pos) = lower.find(" in ") { + let from = &input[..pos]; + let target = &input[pos + 4..]; + return (from, Some(target)); + } + + (input, None) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_number_and_unit_with_space() { + let p = parse_conversion("100 km").unwrap(); + assert!((p.value - 100.0).abs() < 0.001); + assert_eq!(p.from_unit, "km"); + assert!(p.target_unit.is_none()); + } + + #[test] + fn test_number_and_unit_no_space() { + let p = parse_conversion("100km").unwrap(); + assert!((p.value - 100.0).abs() < 0.001); + assert_eq!(p.from_unit, "km"); + } + + #[test] + fn test_with_target_to() { + let p = parse_conversion("100 km to mi").unwrap(); + assert!((p.value - 100.0).abs() < 0.001); + assert_eq!(p.from_unit, "km"); + assert_eq!(p.target_unit.as_deref(), Some("mi")); + } + + #[test] + fn test_with_target_in() { + let p = parse_conversion("100 km in mi").unwrap(); + assert_eq!(p.target_unit.as_deref(), Some("mi")); + } + + #[test] + fn test_temperature_no_space() { + let p = parse_conversion("102F to C").unwrap(); + assert!((p.value - 102.0).abs() < 0.001); + assert_eq!(p.from_unit, "f"); + assert_eq!(p.target_unit.as_deref(), Some("c")); + } + + #[test] + fn test_temperature_with_space() { + let p = parse_conversion("102 F in K").unwrap(); + assert!((p.value - 102.0).abs() < 0.001); + assert_eq!(p.from_unit, "f"); + assert_eq!(p.target_unit.as_deref(), Some("k")); + } + + #[test] + fn test_decimal_number() { + let p = parse_conversion("3.5 kg to lb").unwrap(); + assert!((p.value - 3.5).abs() < 0.001); + } + + #[test] + fn test_decimal_starting_with_dot() { + let p = parse_conversion(".5 kg").unwrap(); + assert!((p.value - 0.5).abs() < 0.001); + } + + #[test] + fn test_full_unit_names() { + let p = parse_conversion("100 kilometers to miles").unwrap(); + assert_eq!(p.from_unit, "kilometers"); + assert_eq!(p.target_unit.as_deref(), Some("miles")); + } + + #[test] + fn test_case_insensitive() { + let p = parse_conversion("100 KM TO MI").unwrap(); + assert_eq!(p.from_unit, "km"); + assert_eq!(p.target_unit.as_deref(), Some("mi")); + } + + #[test] + fn test_currency() { + let p = parse_conversion("100 eur to usd").unwrap(); + assert_eq!(p.from_unit, "eur"); + assert_eq!(p.target_unit.as_deref(), Some("usd")); + } + + #[test] + fn test_no_number_returns_none() { + assert!(parse_conversion("km to mi").is_none()); + } + + #[test] + fn test_unknown_unit_returns_none() { + assert!(parse_conversion("100 xyz to abc").is_none()); + } + + #[test] + fn test_empty_returns_none() { + assert!(parse_conversion("").is_none()); + } + + #[test] + fn test_number_only_returns_none() { + assert!(parse_conversion("100").is_none()); + } + + #[test] + fn test_compound_unit_alias() { + let p = parse_conversion("100 km/h to mph").unwrap(); + assert_eq!(p.from_unit, "km/h"); + assert_eq!(p.target_unit.as_deref(), Some("mph")); + } + + #[test] + fn test_multi_word_unit() { + let p = parse_conversion("100 fl_oz to ml").unwrap(); + assert_eq!(p.from_unit, "fl_oz"); + } } diff --git a/crates/owlry-plugin-converter/src/units.rs b/crates/owlry-plugin-converter/src/units.rs index 9587b9b..c8509e1 100644 --- a/crates/owlry-plugin-converter/src/units.rs +++ b/crates/owlry-plugin-converter/src/units.rs @@ -89,7 +89,15 @@ static COMMON_TARGETS: LazyLock>> = LazyLock pub fn find_unit(alias: &str) -> Option<&'static str> { let lower = alias.to_lowercase(); - ALIAS_MAP.get(&lower).map(|&i| UNITS[i].symbol) + if let Some(&i) = ALIAS_MAP.get(&lower) { + return Some(UNITS[i].symbol); + } + // Check currency + currency::resolve_currency_code(&lower).map(|code| { + // Return a leaked &'static str for the currency code + // This is fine since there are a fixed number of currencies + Box::leak(code.into_boxed_str()) as &'static str + }) } pub fn lookup_unit(alias: &str) -> Option<(usize, &UnitDef)> {