feat(tool/web): route searches through provider

Acceptance Criteria:\n- web.search proxies Ollama Cloud's /api/web_search via the configured provider endpoint\n- Tool is only registered when remote search is enabled and the cloud provider is active\n- Consent prompts, docs, and MCP tooling no longer reference DuckDuckGo or expose web_search_detailed

Test Notes:\n- cargo check
This commit is contained in:
2025-10-24 01:29:37 +02:00
parent 79fdafce97
commit bbb94367e1
9 changed files with 242 additions and 266 deletions

View File

@@ -39,9 +39,7 @@ crossterm = { workspace = true }
urlencoding = { workspace = true }
rpassword = { workspace = true }
sqlx = { workspace = true }
duckduckgo = "0.2.0"
reqwest = { workspace = true, features = ["default"] }
reqwest_011 = { version = "0.11", package = "reqwest" }
path-clean = "1.0"
tokio-stream = { workspace = true }
tokio-tungstenite = "0.21"

View File

@@ -3,15 +3,13 @@ use super::protocol::{
PROTOCOL_VERSION, RequestId, RpcErrorResponse, RpcNotification, RpcRequest, RpcResponse,
};
use super::{McpClient, McpToolCall, McpToolDescriptor, McpToolResponse};
use crate::consent::{ConsentManager, ConsentScope};
use crate::tools::{Tool, WebScrapeTool, WebSearchTool};
use crate::tools::{Tool, WebScrapeTool};
use crate::types::ModelInfo;
use crate::types::{ChatResponse, Message, Role};
use crate::{
ChatStream, Error, LlmProvider, Result, facade::llm_client::LlmClient, mode::Mode,
send_via_stream,
};
use anyhow::anyhow;
use futures::{StreamExt, future::BoxFuture, stream};
use reqwest::Client as HttpClient;
use serde_json::json;
@@ -443,34 +441,6 @@ impl McpClient for RemoteMcpClient {
duration_ms: 0,
});
}
// Local handling for web tools to avoid needing an external MCP server.
if call.name == "web_search" {
// Autogrant consent for the web_search tool (permanent for this process).
let consent_manager = std::sync::Arc::new(std::sync::Mutex::new(ConsentManager::new()));
{
let mut cm = consent_manager
.lock()
.map_err(|_| Error::Provider(anyhow!("Consent manager mutex poisoned")))?;
cm.grant_consent_with_scope(
"web_search",
Vec::new(),
Vec::new(),
ConsentScope::Permanent,
);
}
let tool = WebSearchTool::new(consent_manager.clone(), None, None);
let result = tool
.execute(call.arguments.clone())
.await
.map_err(|e| Error::Provider(e.into()))?;
return Ok(McpToolResponse {
name: call.name,
success: true,
output: result.output,
metadata: std::collections::HashMap::new(),
duration_ms: result.duration.as_millis() as u128,
});
}
if call.name == "web_scrape" {
let tool = WebScrapeTool::new();
let result = tool

View File

@@ -1,10 +1,14 @@
use crate::config::{Config, McpResourceConfig, McpServerConfig};
use crate::config::{
Config, LEGACY_OLLAMA_CLOUD_API_KEY_ENV, LEGACY_OWLEN_OLLAMA_CLOUD_API_KEY_ENV,
McpResourceConfig, McpServerConfig, OLLAMA_API_KEY_ENV, OLLAMA_CLOUD_BASE_URL,
};
use crate::consent::{ConsentManager, ConsentScope};
use crate::conversation::ConversationManager;
use crate::credentials::CredentialManager;
use crate::encryption::{self, VaultHandle};
use crate::formatting::MessageFormatter;
use crate::input::InputBuffer;
use crate::llm::ProviderConfig;
use crate::mcp::McpToolCall;
use crate::mcp::client::McpClient;
use crate::mcp::factory::McpClientFactory;
@@ -24,22 +28,154 @@ use crate::validation::{SchemaValidator, get_builtin_schemas};
use crate::{ChatStream, Provider};
use crate::{
CodeExecTool, ResourcesDeleteTool, ResourcesGetTool, ResourcesListTool, ResourcesWriteTool,
ToolRegistry, WebScrapeTool, WebSearchDetailedTool, WebSearchTool,
ToolRegistry, WebScrapeTool, WebSearchSettings, WebSearchTool,
};
use crate::{Error, Result};
use chrono::Utc;
use log::warn;
use reqwest::Url;
use serde_json::{Value, json};
use std::collections::{HashMap, HashSet};
use std::env;
use std::path::PathBuf;
use std::sync::{Arc, Mutex};
use std::time::SystemTime;
use std::time::{Duration, SystemTime};
use tokio::fs;
use tokio::sync::Mutex as TokioMutex;
use tokio::sync::mpsc::UnboundedSender;
use uuid::Uuid;
fn env_var_non_empty(name: &str) -> Option<String> {
env::var(name)
.ok()
.map(|value| value.trim().to_string())
.filter(|value| !value.is_empty())
}
fn compute_web_search_settings(
config: &Config,
provider_id: &str,
) -> Result<Option<WebSearchSettings>> {
let provider_id = provider_id.trim();
let provider_config = match config.providers.get(provider_id) {
Some(cfg) => cfg,
None => return Ok(None),
};
if !provider_config.enabled {
return Ok(None);
}
if provider_config
.provider_type
.trim()
.eq_ignore_ascii_case("ollama")
{
// Local Ollama does not expose web search.
return Ok(None);
}
if !provider_config
.provider_type
.trim()
.eq_ignore_ascii_case("ollama_cloud")
{
return Ok(None);
}
let base_url = provider_config
.base_url
.as_deref()
.filter(|value| !value.trim().is_empty())
.unwrap_or(OLLAMA_CLOUD_BASE_URL);
let endpoint = provider_config
.extra
.get("web_search_endpoint")
.and_then(|value| value.as_str())
.unwrap_or("/api/web_search");
let endpoint_url = build_search_url(base_url, endpoint)?;
let api_key = resolve_web_search_api_key(provider_config)
.or_else(|| env_var_non_empty(OLLAMA_API_KEY_ENV))
.or_else(|| env_var_non_empty(LEGACY_OLLAMA_CLOUD_API_KEY_ENV))
.or_else(|| env_var_non_empty(LEGACY_OWLEN_OLLAMA_CLOUD_API_KEY_ENV));
let api_key = match api_key {
Some(key) if !key.is_empty() => key,
_ => return Ok(None),
};
let settings = WebSearchSettings {
endpoint: endpoint_url,
api_key,
provider_label: provider_id.to_string(),
timeout: Duration::from_secs(20),
};
Ok(Some(settings))
}
fn resolve_web_search_api_key(provider_config: &ProviderConfig) -> Option<String> {
resolve_inline_api_key(provider_config.api_key.as_deref()).or_else(|| {
provider_config
.api_key_env
.as_deref()
.and_then(|var| env_var_non_empty(var.trim()))
})
}
fn resolve_inline_api_key(value: Option<&str>) -> Option<String> {
let raw = value?.trim();
if raw.is_empty() {
return None;
}
if let Some(inner) = raw
.strip_prefix("${")
.and_then(|value| value.strip_suffix('}'))
.map(str::trim)
{
return env_var_non_empty(inner);
}
if let Some(inner) = raw.strip_prefix('$').map(str::trim) {
return env_var_non_empty(inner);
}
Some(raw.to_string())
}
fn build_search_url(base_url: &str, endpoint: &str) -> Result<Url> {
let endpoint = endpoint.trim();
if let Ok(url) = Url::parse(endpoint) {
return Ok(url);
}
let trimmed_base = base_url.trim();
let normalized_base = if trimmed_base.ends_with('/') {
trimmed_base.to_string()
} else {
format!("{}/", trimmed_base)
};
let base = Url::parse(&normalized_base).map_err(|err| {
Error::Config(format!("Invalid provider base_url '{}': {}", base_url, err))
})?;
if endpoint.is_empty() {
return Ok(base);
}
base.join(endpoint.trim_start_matches('/')).map_err(|err| {
Error::Config(format!(
"Invalid web_search_endpoint '{}': {}",
endpoint, err
))
})
}
pub enum SessionOutcome {
Complete(ChatResponse),
Streaming {
@@ -251,8 +387,8 @@ async fn build_tools(
ui: Arc<dyn UiController>,
enable_code_tools: bool,
consent_manager: Arc<Mutex<ConsentManager>>,
credential_manager: Option<Arc<CredentialManager>>,
vault: Option<Arc<Mutex<VaultHandle>>>,
_credential_manager: Option<Arc<CredentialManager>>,
_vault: Option<Arc<Mutex<VaultHandle>>>,
) -> Result<(Arc<ToolRegistry>, Arc<SchemaValidator>)> {
let mut registry = ToolRegistry::new(config.clone(), ui);
let mut validator = SchemaValidator::new();
@@ -265,7 +401,9 @@ async fn build_tools(
}
}
if config_guard
let active_provider_id = config_guard.general.default_provider.clone();
let web_search_settings = if config_guard
.security
.allowed_tools
.iter()
@@ -273,11 +411,19 @@ async fn build_tools(
&& config_guard.tools.web_search.enabled
&& config_guard.privacy.enable_remote_search
{
let tool = WebSearchTool::new(
consent_manager.clone(),
credential_manager.clone(),
vault.clone(),
);
match compute_web_search_settings(&config_guard, &active_provider_id) {
Ok(settings) => settings,
Err(err) => {
warn!("Skipping web_search tool: {}", err);
None
}
}
} else {
None
};
if let Some(settings) = web_search_settings {
let tool = WebSearchTool::new(consent_manager.clone(), settings);
registry.register(tool);
}
@@ -294,22 +440,6 @@ async fn build_tools(
registry.register(tool);
}
if config_guard
.security
.allowed_tools
.iter()
.any(|tool| tool == "web_search")
&& config_guard.tools.web_search.enabled
&& config_guard.privacy.enable_remote_search
{
let tool = WebSearchDetailedTool::new(
consent_manager.clone(),
credential_manager.clone(),
vault.clone(),
);
registry.register(tool);
}
if enable_code_tools
&& config_guard
.security
@@ -905,9 +1035,9 @@ impl SessionController {
seen_tools.insert(tool_call.name.clone());
let (data_types, endpoints) = match tool_call.name.as_str() {
"web_search" | "web_search_detailed" => (
"web_search" => (
vec!["search query".to_string()],
vec!["duckduckgo.com".to_string()],
vec!["cloud provider".to_string()],
),
"code_exec" => (
vec!["code to execute".to_string()],

View File

@@ -10,7 +10,6 @@ pub mod fs_tools;
pub mod registry;
pub mod web_scrape;
pub mod web_search;
pub mod web_search_detailed;
use async_trait::async_trait;
use serde_json::{Value, json};
@@ -93,5 +92,4 @@ pub use code_exec::CodeExecTool;
pub use fs_tools::{ResourcesDeleteTool, ResourcesGetTool, ResourcesListTool, ResourcesWriteTool};
pub use registry::ToolRegistry;
pub use web_scrape::WebScrapeTool;
pub use web_search::WebSearchTool;
pub use web_search_detailed::WebSearchDetailedTool;
pub use web_search::{WebSearchSettings, WebSearchTool};

View File

@@ -2,6 +2,7 @@ use super::{Tool, ToolResult};
use crate::Result;
use anyhow::Context;
use async_trait::async_trait;
use reqwest::Client;
use serde_json::{Value, json};
/// Tool that fetches the raw HTML content for a list of URLs.
@@ -10,8 +11,7 @@ use serde_json::{Value, json};
/// urls: array of strings (max 5 URLs)
/// timeout_secs: optional integer perrequest timeout (default 10)
pub struct WebScrapeTool {
// No special dependencies; uses reqwest_011 for compatibility with existing web_search.
client: reqwest_011::Client,
client: Client,
}
impl Default for WebScrapeTool {
@@ -22,7 +22,7 @@ impl Default for WebScrapeTool {
impl WebScrapeTool {
pub fn new() -> Self {
let client = reqwest_011::Client::builder()
let client = Client::builder()
.user_agent("OwlenWebScrape/0.1")
.build()
.expect("Failed to build reqwest client");

View File

@@ -1,36 +1,42 @@
use std::collections::HashMap;
use std::sync::{Arc, Mutex};
use std::time::Instant;
use std::time::{Duration, Instant};
use crate::Result;
use anyhow::Context;
use anyhow::{Context, anyhow};
use async_trait::async_trait;
use reqwest::{Client, StatusCode, Url};
use serde_json::{Value, json};
use super::{Tool, ToolResult};
use crate::consent::ConsentManager;
use crate::credentials::CredentialManager;
use crate::encryption::VaultHandle;
/// Configuration applied to the web search tool at registration time.
#[derive(Clone, Debug)]
pub struct WebSearchSettings {
pub endpoint: Url,
pub api_key: String,
pub provider_label: String,
pub timeout: Duration,
}
pub struct WebSearchTool {
consent_manager: Arc<Mutex<ConsentManager>>,
_credential_manager: Option<Arc<CredentialManager>>,
browser: duckduckgo::browser::Browser,
client: Client,
settings: WebSearchSettings,
}
impl WebSearchTool {
pub fn new(
consent_manager: Arc<Mutex<ConsentManager>>,
credential_manager: Option<Arc<CredentialManager>>,
_vault: Option<Arc<Mutex<VaultHandle>>>,
) -> Self {
// Create a reqwest client compatible with duckduckgo crate (v0.11)
let client = reqwest_011::Client::new();
let browser = duckduckgo::browser::Browser::new(client);
pub fn new(consent_manager: Arc<Mutex<ConsentManager>>, settings: WebSearchSettings) -> Self {
let client = Client::builder()
.timeout(settings.timeout)
.build()
.expect("failed to construct reqwest client for web search");
Self {
consent_manager,
_credential_manager: credential_manager,
browser,
client,
settings,
}
}
}
@@ -42,7 +48,7 @@ impl Tool for WebSearchTool {
}
fn description(&self) -> &'static str {
"Search the web for information using DuckDuckGo API"
"Search the web using the active cloud provider."
}
fn schema(&self) -> Value {
@@ -53,14 +59,14 @@ impl Tool for WebSearchTool {
"type": "string",
"minLength": 1,
"maxLength": 500,
"description": "Search query"
"description": "Search query text"
},
"max_results": {
"type": "integer",
"minimum": 1,
"maximum": 10,
"default": 5,
"description": "Maximum number of results"
"description": "Maximum number of search results to retrieve"
}
},
"required": ["query"],
@@ -75,8 +81,6 @@ impl Tool for WebSearchTool {
async fn execute(&self, args: Value) -> Result<ToolResult> {
let start = Instant::now();
// Check if consent has been granted (non-blocking check)
// Consent should have been granted via TUI dialog before tool execution
{
let consent = self
.consent_manager
@@ -85,7 +89,7 @@ impl Tool for WebSearchTool {
if !consent.has_consent(self.name()) {
return Ok(ToolResult::error(
"Consent not granted for web search. This should have been handled by the TUI.",
"Consent not granted for web search. Enable the tool from the UI before invoking it.",
));
}
}
@@ -93,61 +97,67 @@ impl Tool for WebSearchTool {
let query = args
.get("query")
.and_then(Value::as_str)
.context("Missing query parameter")?;
let max_results = args.get("max_results").and_then(Value::as_u64).unwrap_or(5) as usize;
.map(str::trim)
.filter(|q| !q.is_empty())
.ok_or_else(|| anyhow!("Missing query parameter"))?;
let user_agent = duckduckgo::user_agents::get("firefox").unwrap_or(
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0",
);
let max_results = args.get("max_results").and_then(Value::as_u64).unwrap_or(5) as u32;
// Detect if this is a news query - use news endpoint for better snippets
let is_news_query = query.to_lowercase().contains("news")
|| query.to_lowercase().contains("latest")
|| query.to_lowercase().contains("today")
|| query.to_lowercase().contains("recent");
let payload = json!({
"query": query,
"max_results": max_results
});
let mut formatted_results = Vec::new();
let response = self
.client
.post(self.settings.endpoint.clone())
.bearer_auth(&self.settings.api_key)
.json(&payload)
.send()
.await
.context("Web search request failed")?;
if is_news_query {
// Use news endpoint which returns excerpts/snippets
let news_results = self
.browser
.news(query, "wt-wt", false, Some(max_results), user_agent)
.await
.context("DuckDuckGo news search failed")?;
for result in news_results {
formatted_results.push(json!({
"title": result.title,
"url": result.url,
"snippet": result.body, // news has body/excerpt
"source": result.source,
"date": result.date
}));
match response.status() {
StatusCode::UNAUTHORIZED | StatusCode::FORBIDDEN => {
return Ok(ToolResult::error(
"Cloud web search request was not authorized. Verify your Ollama Cloud API key.",
));
}
} else {
// Use lite search for general queries (fast but no snippets)
let search_results = self
.browser
.lite_search(query, "wt-wt", Some(max_results), user_agent)
.await
.context("DuckDuckGo search failed")?;
for result in search_results {
formatted_results.push(json!({
"title": result.title,
"url": result.url,
"snippet": result.snippet
}));
StatusCode::TOO_MANY_REQUESTS => {
return Ok(ToolResult::error(
"Cloud web search is rate limited. Please wait before retrying.",
));
}
status if !status.is_success() => {
return Ok(ToolResult::error(&format!(
"Cloud web search failed with status {}",
status
)));
}
_ => {}
}
let body: Value = response
.json()
.await
.context("Failed to decode cloud search response")?;
let results = body
.get("results")
.and_then(|value| value.as_array())
.cloned()
.unwrap_or_else(Vec::new);
let mut metadata = HashMap::new();
metadata.insert("provider".to_string(), self.settings.provider_label.clone());
let mut result = ToolResult::success(json!({
"query": query,
"results": formatted_results,
"total_found": formatted_results.len()
"provider": self.settings.provider_label,
"results": results,
}));
result.duration = start.elapsed();
result.metadata = metadata;
Ok(result)
}

View File

@@ -1,133 +0,0 @@
use std::sync::{Arc, Mutex};
use std::time::Instant;
use crate::Result;
use anyhow::Context;
use async_trait::async_trait;
use serde_json::{Value, json};
use super::{Tool, ToolResult};
use crate::consent::ConsentManager;
use crate::credentials::CredentialManager;
use crate::encryption::VaultHandle;
pub struct WebSearchDetailedTool {
consent_manager: Arc<Mutex<ConsentManager>>,
_credential_manager: Option<Arc<CredentialManager>>,
browser: duckduckgo::browser::Browser,
}
impl WebSearchDetailedTool {
pub fn new(
consent_manager: Arc<Mutex<ConsentManager>>,
credential_manager: Option<Arc<CredentialManager>>,
_vault: Option<Arc<Mutex<VaultHandle>>>,
) -> Self {
// Create a reqwest client compatible with duckduckgo crate (v0.11)
let client = reqwest_011::Client::new();
let browser = duckduckgo::browser::Browser::new(client);
Self {
consent_manager,
_credential_manager: credential_manager,
browser,
}
}
}
#[async_trait]
impl Tool for WebSearchDetailedTool {
fn name(&self) -> &'static str {
"web_search_detailed"
}
fn description(&self) -> &'static str {
"Search for recent articles and web content with detailed snippets and descriptions. \
Returns results with publication dates, sources, and full text excerpts. \
Best for finding recent information, articles, and detailed context about topics."
}
fn schema(&self) -> Value {
json!({
"type": "object",
"properties": {
"query": {
"type": "string",
"minLength": 1,
"maxLength": 500,
"description": "Search query"
},
"max_results": {
"type": "integer",
"minimum": 1,
"maximum": 10,
"default": 5,
"description": "Maximum number of results"
}
},
"required": ["query"],
"additionalProperties": false
})
}
fn requires_network(&self) -> bool {
true
}
async fn execute(&self, args: Value) -> Result<ToolResult> {
let start = Instant::now();
// Check if consent has been granted (non-blocking check)
// Consent should have been granted via TUI dialog before tool execution
{
let consent = self
.consent_manager
.lock()
.expect("Consent manager mutex poisoned");
if !consent.has_consent(self.name()) {
return Ok(ToolResult::error(
"Consent not granted for detailed web search. This should have been handled by the TUI.",
));
}
}
let query = args
.get("query")
.and_then(Value::as_str)
.context("Missing query parameter")?;
let max_results = args.get("max_results").and_then(Value::as_u64).unwrap_or(5) as usize;
let user_agent = duckduckgo::user_agents::get("firefox").unwrap_or(
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0",
);
// Use news endpoint which provides detailed results with full snippets
// Even for non-news queries, this often returns recent articles and content with good descriptions
let news_results = self
.browser
.news(query, "wt-wt", false, Some(max_results), user_agent)
.await
.context("DuckDuckGo detailed search failed")?;
let mut formatted_results = Vec::new();
for result in news_results {
formatted_results.push(json!({
"title": result.title,
"url": result.url,
"snippet": result.body, // news endpoint includes full excerpts
"source": result.source,
"date": result.date
}));
}
let mut result = ToolResult::success(json!({
"query": query,
"results": formatted_results,
"total_found": formatted_results.len()
}));
result.duration = start.elapsed();
Ok(result)
}
}