feat(tool/web): route searches through provider

Acceptance Criteria:\n- web.search proxies Ollama Cloud's /api/web_search via the configured provider endpoint\n- Tool is only registered when remote search is enabled and the cloud provider is active\n- Consent prompts, docs, and MCP tooling no longer reference DuckDuckGo or expose web_search_detailed

Test Notes:\n- cargo check
This commit is contained in:
2025-10-24 01:29:37 +02:00
parent 79fdafce97
commit bbb94367e1
9 changed files with 242 additions and 266 deletions

View File

@@ -39,6 +39,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- `McpMode` support was restored with explicit validation; `remote_only`, `remote_preferred`, and `local_only` now behave predictably.
- Configuration loading performs structural validation and fails fast on missing default providers or invalid MCP definitions.
- Ollama provider error handling now distinguishes timeouts, missing models, and authentication failures.
- The `web.search` tool now proxies through Ollama Clouds `/api/web_search` endpoint and is hidden whenever the active provider cannot reach the cloud.
- `owlen` warns when the active terminal likely lacks 256-color support.
- `config.toml` now carries a schema version (`1.2.0`) and is migrated automatically; deprecated keys such as `agent.max_tool_calls` trigger warnings instead of hard failures.
- Model selector navigation (Tab/Shift-Tab) now switches between local and cloud tabs while preserving selection state.

View File

@@ -39,9 +39,7 @@ crossterm = { workspace = true }
urlencoding = { workspace = true }
rpassword = { workspace = true }
sqlx = { workspace = true }
duckduckgo = "0.2.0"
reqwest = { workspace = true, features = ["default"] }
reqwest_011 = { version = "0.11", package = "reqwest" }
path-clean = "1.0"
tokio-stream = { workspace = true }
tokio-tungstenite = "0.21"

View File

@@ -3,15 +3,13 @@ use super::protocol::{
PROTOCOL_VERSION, RequestId, RpcErrorResponse, RpcNotification, RpcRequest, RpcResponse,
};
use super::{McpClient, McpToolCall, McpToolDescriptor, McpToolResponse};
use crate::consent::{ConsentManager, ConsentScope};
use crate::tools::{Tool, WebScrapeTool, WebSearchTool};
use crate::tools::{Tool, WebScrapeTool};
use crate::types::ModelInfo;
use crate::types::{ChatResponse, Message, Role};
use crate::{
ChatStream, Error, LlmProvider, Result, facade::llm_client::LlmClient, mode::Mode,
send_via_stream,
};
use anyhow::anyhow;
use futures::{StreamExt, future::BoxFuture, stream};
use reqwest::Client as HttpClient;
use serde_json::json;
@@ -443,34 +441,6 @@ impl McpClient for RemoteMcpClient {
duration_ms: 0,
});
}
// Local handling for web tools to avoid needing an external MCP server.
if call.name == "web_search" {
// Autogrant consent for the web_search tool (permanent for this process).
let consent_manager = std::sync::Arc::new(std::sync::Mutex::new(ConsentManager::new()));
{
let mut cm = consent_manager
.lock()
.map_err(|_| Error::Provider(anyhow!("Consent manager mutex poisoned")))?;
cm.grant_consent_with_scope(
"web_search",
Vec::new(),
Vec::new(),
ConsentScope::Permanent,
);
}
let tool = WebSearchTool::new(consent_manager.clone(), None, None);
let result = tool
.execute(call.arguments.clone())
.await
.map_err(|e| Error::Provider(e.into()))?;
return Ok(McpToolResponse {
name: call.name,
success: true,
output: result.output,
metadata: std::collections::HashMap::new(),
duration_ms: result.duration.as_millis() as u128,
});
}
if call.name == "web_scrape" {
let tool = WebScrapeTool::new();
let result = tool

View File

@@ -1,10 +1,14 @@
use crate::config::{Config, McpResourceConfig, McpServerConfig};
use crate::config::{
Config, LEGACY_OLLAMA_CLOUD_API_KEY_ENV, LEGACY_OWLEN_OLLAMA_CLOUD_API_KEY_ENV,
McpResourceConfig, McpServerConfig, OLLAMA_API_KEY_ENV, OLLAMA_CLOUD_BASE_URL,
};
use crate::consent::{ConsentManager, ConsentScope};
use crate::conversation::ConversationManager;
use crate::credentials::CredentialManager;
use crate::encryption::{self, VaultHandle};
use crate::formatting::MessageFormatter;
use crate::input::InputBuffer;
use crate::llm::ProviderConfig;
use crate::mcp::McpToolCall;
use crate::mcp::client::McpClient;
use crate::mcp::factory::McpClientFactory;
@@ -24,22 +28,154 @@ use crate::validation::{SchemaValidator, get_builtin_schemas};
use crate::{ChatStream, Provider};
use crate::{
CodeExecTool, ResourcesDeleteTool, ResourcesGetTool, ResourcesListTool, ResourcesWriteTool,
ToolRegistry, WebScrapeTool, WebSearchDetailedTool, WebSearchTool,
ToolRegistry, WebScrapeTool, WebSearchSettings, WebSearchTool,
};
use crate::{Error, Result};
use chrono::Utc;
use log::warn;
use reqwest::Url;
use serde_json::{Value, json};
use std::collections::{HashMap, HashSet};
use std::env;
use std::path::PathBuf;
use std::sync::{Arc, Mutex};
use std::time::SystemTime;
use std::time::{Duration, SystemTime};
use tokio::fs;
use tokio::sync::Mutex as TokioMutex;
use tokio::sync::mpsc::UnboundedSender;
use uuid::Uuid;
fn env_var_non_empty(name: &str) -> Option<String> {
env::var(name)
.ok()
.map(|value| value.trim().to_string())
.filter(|value| !value.is_empty())
}
fn compute_web_search_settings(
config: &Config,
provider_id: &str,
) -> Result<Option<WebSearchSettings>> {
let provider_id = provider_id.trim();
let provider_config = match config.providers.get(provider_id) {
Some(cfg) => cfg,
None => return Ok(None),
};
if !provider_config.enabled {
return Ok(None);
}
if provider_config
.provider_type
.trim()
.eq_ignore_ascii_case("ollama")
{
// Local Ollama does not expose web search.
return Ok(None);
}
if !provider_config
.provider_type
.trim()
.eq_ignore_ascii_case("ollama_cloud")
{
return Ok(None);
}
let base_url = provider_config
.base_url
.as_deref()
.filter(|value| !value.trim().is_empty())
.unwrap_or(OLLAMA_CLOUD_BASE_URL);
let endpoint = provider_config
.extra
.get("web_search_endpoint")
.and_then(|value| value.as_str())
.unwrap_or("/api/web_search");
let endpoint_url = build_search_url(base_url, endpoint)?;
let api_key = resolve_web_search_api_key(provider_config)
.or_else(|| env_var_non_empty(OLLAMA_API_KEY_ENV))
.or_else(|| env_var_non_empty(LEGACY_OLLAMA_CLOUD_API_KEY_ENV))
.or_else(|| env_var_non_empty(LEGACY_OWLEN_OLLAMA_CLOUD_API_KEY_ENV));
let api_key = match api_key {
Some(key) if !key.is_empty() => key,
_ => return Ok(None),
};
let settings = WebSearchSettings {
endpoint: endpoint_url,
api_key,
provider_label: provider_id.to_string(),
timeout: Duration::from_secs(20),
};
Ok(Some(settings))
}
fn resolve_web_search_api_key(provider_config: &ProviderConfig) -> Option<String> {
resolve_inline_api_key(provider_config.api_key.as_deref()).or_else(|| {
provider_config
.api_key_env
.as_deref()
.and_then(|var| env_var_non_empty(var.trim()))
})
}
fn resolve_inline_api_key(value: Option<&str>) -> Option<String> {
let raw = value?.trim();
if raw.is_empty() {
return None;
}
if let Some(inner) = raw
.strip_prefix("${")
.and_then(|value| value.strip_suffix('}'))
.map(str::trim)
{
return env_var_non_empty(inner);
}
if let Some(inner) = raw.strip_prefix('$').map(str::trim) {
return env_var_non_empty(inner);
}
Some(raw.to_string())
}
fn build_search_url(base_url: &str, endpoint: &str) -> Result<Url> {
let endpoint = endpoint.trim();
if let Ok(url) = Url::parse(endpoint) {
return Ok(url);
}
let trimmed_base = base_url.trim();
let normalized_base = if trimmed_base.ends_with('/') {
trimmed_base.to_string()
} else {
format!("{}/", trimmed_base)
};
let base = Url::parse(&normalized_base).map_err(|err| {
Error::Config(format!("Invalid provider base_url '{}': {}", base_url, err))
})?;
if endpoint.is_empty() {
return Ok(base);
}
base.join(endpoint.trim_start_matches('/')).map_err(|err| {
Error::Config(format!(
"Invalid web_search_endpoint '{}': {}",
endpoint, err
))
})
}
pub enum SessionOutcome {
Complete(ChatResponse),
Streaming {
@@ -251,8 +387,8 @@ async fn build_tools(
ui: Arc<dyn UiController>,
enable_code_tools: bool,
consent_manager: Arc<Mutex<ConsentManager>>,
credential_manager: Option<Arc<CredentialManager>>,
vault: Option<Arc<Mutex<VaultHandle>>>,
_credential_manager: Option<Arc<CredentialManager>>,
_vault: Option<Arc<Mutex<VaultHandle>>>,
) -> Result<(Arc<ToolRegistry>, Arc<SchemaValidator>)> {
let mut registry = ToolRegistry::new(config.clone(), ui);
let mut validator = SchemaValidator::new();
@@ -265,7 +401,9 @@ async fn build_tools(
}
}
if config_guard
let active_provider_id = config_guard.general.default_provider.clone();
let web_search_settings = if config_guard
.security
.allowed_tools
.iter()
@@ -273,11 +411,19 @@ async fn build_tools(
&& config_guard.tools.web_search.enabled
&& config_guard.privacy.enable_remote_search
{
let tool = WebSearchTool::new(
consent_manager.clone(),
credential_manager.clone(),
vault.clone(),
);
match compute_web_search_settings(&config_guard, &active_provider_id) {
Ok(settings) => settings,
Err(err) => {
warn!("Skipping web_search tool: {}", err);
None
}
}
} else {
None
};
if let Some(settings) = web_search_settings {
let tool = WebSearchTool::new(consent_manager.clone(), settings);
registry.register(tool);
}
@@ -294,22 +440,6 @@ async fn build_tools(
registry.register(tool);
}
if config_guard
.security
.allowed_tools
.iter()
.any(|tool| tool == "web_search")
&& config_guard.tools.web_search.enabled
&& config_guard.privacy.enable_remote_search
{
let tool = WebSearchDetailedTool::new(
consent_manager.clone(),
credential_manager.clone(),
vault.clone(),
);
registry.register(tool);
}
if enable_code_tools
&& config_guard
.security
@@ -905,9 +1035,9 @@ impl SessionController {
seen_tools.insert(tool_call.name.clone());
let (data_types, endpoints) = match tool_call.name.as_str() {
"web_search" | "web_search_detailed" => (
"web_search" => (
vec!["search query".to_string()],
vec!["duckduckgo.com".to_string()],
vec!["cloud provider".to_string()],
),
"code_exec" => (
vec!["code to execute".to_string()],

View File

@@ -10,7 +10,6 @@ pub mod fs_tools;
pub mod registry;
pub mod web_scrape;
pub mod web_search;
pub mod web_search_detailed;
use async_trait::async_trait;
use serde_json::{Value, json};
@@ -93,5 +92,4 @@ pub use code_exec::CodeExecTool;
pub use fs_tools::{ResourcesDeleteTool, ResourcesGetTool, ResourcesListTool, ResourcesWriteTool};
pub use registry::ToolRegistry;
pub use web_scrape::WebScrapeTool;
pub use web_search::WebSearchTool;
pub use web_search_detailed::WebSearchDetailedTool;
pub use web_search::{WebSearchSettings, WebSearchTool};

View File

@@ -2,6 +2,7 @@ use super::{Tool, ToolResult};
use crate::Result;
use anyhow::Context;
use async_trait::async_trait;
use reqwest::Client;
use serde_json::{Value, json};
/// Tool that fetches the raw HTML content for a list of URLs.
@@ -10,8 +11,7 @@ use serde_json::{Value, json};
/// urls: array of strings (max 5 URLs)
/// timeout_secs: optional integer perrequest timeout (default 10)
pub struct WebScrapeTool {
// No special dependencies; uses reqwest_011 for compatibility with existing web_search.
client: reqwest_011::Client,
client: Client,
}
impl Default for WebScrapeTool {
@@ -22,7 +22,7 @@ impl Default for WebScrapeTool {
impl WebScrapeTool {
pub fn new() -> Self {
let client = reqwest_011::Client::builder()
let client = Client::builder()
.user_agent("OwlenWebScrape/0.1")
.build()
.expect("Failed to build reqwest client");

View File

@@ -1,36 +1,42 @@
use std::collections::HashMap;
use std::sync::{Arc, Mutex};
use std::time::Instant;
use std::time::{Duration, Instant};
use crate::Result;
use anyhow::Context;
use anyhow::{Context, anyhow};
use async_trait::async_trait;
use reqwest::{Client, StatusCode, Url};
use serde_json::{Value, json};
use super::{Tool, ToolResult};
use crate::consent::ConsentManager;
use crate::credentials::CredentialManager;
use crate::encryption::VaultHandle;
/// Configuration applied to the web search tool at registration time.
#[derive(Clone, Debug)]
pub struct WebSearchSettings {
pub endpoint: Url,
pub api_key: String,
pub provider_label: String,
pub timeout: Duration,
}
pub struct WebSearchTool {
consent_manager: Arc<Mutex<ConsentManager>>,
_credential_manager: Option<Arc<CredentialManager>>,
browser: duckduckgo::browser::Browser,
client: Client,
settings: WebSearchSettings,
}
impl WebSearchTool {
pub fn new(
consent_manager: Arc<Mutex<ConsentManager>>,
credential_manager: Option<Arc<CredentialManager>>,
_vault: Option<Arc<Mutex<VaultHandle>>>,
) -> Self {
// Create a reqwest client compatible with duckduckgo crate (v0.11)
let client = reqwest_011::Client::new();
let browser = duckduckgo::browser::Browser::new(client);
pub fn new(consent_manager: Arc<Mutex<ConsentManager>>, settings: WebSearchSettings) -> Self {
let client = Client::builder()
.timeout(settings.timeout)
.build()
.expect("failed to construct reqwest client for web search");
Self {
consent_manager,
_credential_manager: credential_manager,
browser,
client,
settings,
}
}
}
@@ -42,7 +48,7 @@ impl Tool for WebSearchTool {
}
fn description(&self) -> &'static str {
"Search the web for information using DuckDuckGo API"
"Search the web using the active cloud provider."
}
fn schema(&self) -> Value {
@@ -53,14 +59,14 @@ impl Tool for WebSearchTool {
"type": "string",
"minLength": 1,
"maxLength": 500,
"description": "Search query"
"description": "Search query text"
},
"max_results": {
"type": "integer",
"minimum": 1,
"maximum": 10,
"default": 5,
"description": "Maximum number of results"
"description": "Maximum number of search results to retrieve"
}
},
"required": ["query"],
@@ -75,8 +81,6 @@ impl Tool for WebSearchTool {
async fn execute(&self, args: Value) -> Result<ToolResult> {
let start = Instant::now();
// Check if consent has been granted (non-blocking check)
// Consent should have been granted via TUI dialog before tool execution
{
let consent = self
.consent_manager
@@ -85,7 +89,7 @@ impl Tool for WebSearchTool {
if !consent.has_consent(self.name()) {
return Ok(ToolResult::error(
"Consent not granted for web search. This should have been handled by the TUI.",
"Consent not granted for web search. Enable the tool from the UI before invoking it.",
));
}
}
@@ -93,61 +97,67 @@ impl Tool for WebSearchTool {
let query = args
.get("query")
.and_then(Value::as_str)
.context("Missing query parameter")?;
let max_results = args.get("max_results").and_then(Value::as_u64).unwrap_or(5) as usize;
.map(str::trim)
.filter(|q| !q.is_empty())
.ok_or_else(|| anyhow!("Missing query parameter"))?;
let user_agent = duckduckgo::user_agents::get("firefox").unwrap_or(
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0",
);
let max_results = args.get("max_results").and_then(Value::as_u64).unwrap_or(5) as u32;
// Detect if this is a news query - use news endpoint for better snippets
let is_news_query = query.to_lowercase().contains("news")
|| query.to_lowercase().contains("latest")
|| query.to_lowercase().contains("today")
|| query.to_lowercase().contains("recent");
let payload = json!({
"query": query,
"max_results": max_results
});
let mut formatted_results = Vec::new();
let response = self
.client
.post(self.settings.endpoint.clone())
.bearer_auth(&self.settings.api_key)
.json(&payload)
.send()
.await
.context("Web search request failed")?;
if is_news_query {
// Use news endpoint which returns excerpts/snippets
let news_results = self
.browser
.news(query, "wt-wt", false, Some(max_results), user_agent)
.await
.context("DuckDuckGo news search failed")?;
for result in news_results {
formatted_results.push(json!({
"title": result.title,
"url": result.url,
"snippet": result.body, // news has body/excerpt
"source": result.source,
"date": result.date
}));
match response.status() {
StatusCode::UNAUTHORIZED | StatusCode::FORBIDDEN => {
return Ok(ToolResult::error(
"Cloud web search request was not authorized. Verify your Ollama Cloud API key.",
));
}
} else {
// Use lite search for general queries (fast but no snippets)
let search_results = self
.browser
.lite_search(query, "wt-wt", Some(max_results), user_agent)
.await
.context("DuckDuckGo search failed")?;
for result in search_results {
formatted_results.push(json!({
"title": result.title,
"url": result.url,
"snippet": result.snippet
}));
StatusCode::TOO_MANY_REQUESTS => {
return Ok(ToolResult::error(
"Cloud web search is rate limited. Please wait before retrying.",
));
}
status if !status.is_success() => {
return Ok(ToolResult::error(&format!(
"Cloud web search failed with status {}",
status
)));
}
_ => {}
}
let body: Value = response
.json()
.await
.context("Failed to decode cloud search response")?;
let results = body
.get("results")
.and_then(|value| value.as_array())
.cloned()
.unwrap_or_else(Vec::new);
let mut metadata = HashMap::new();
metadata.insert("provider".to_string(), self.settings.provider_label.clone());
let mut result = ToolResult::success(json!({
"query": query,
"results": formatted_results,
"total_found": formatted_results.len()
"provider": self.settings.provider_label,
"results": results,
}));
result.duration = start.elapsed();
result.metadata = metadata;
Ok(result)
}

View File

@@ -1,133 +0,0 @@
use std::sync::{Arc, Mutex};
use std::time::Instant;
use crate::Result;
use anyhow::Context;
use async_trait::async_trait;
use serde_json::{Value, json};
use super::{Tool, ToolResult};
use crate::consent::ConsentManager;
use crate::credentials::CredentialManager;
use crate::encryption::VaultHandle;
pub struct WebSearchDetailedTool {
consent_manager: Arc<Mutex<ConsentManager>>,
_credential_manager: Option<Arc<CredentialManager>>,
browser: duckduckgo::browser::Browser,
}
impl WebSearchDetailedTool {
pub fn new(
consent_manager: Arc<Mutex<ConsentManager>>,
credential_manager: Option<Arc<CredentialManager>>,
_vault: Option<Arc<Mutex<VaultHandle>>>,
) -> Self {
// Create a reqwest client compatible with duckduckgo crate (v0.11)
let client = reqwest_011::Client::new();
let browser = duckduckgo::browser::Browser::new(client);
Self {
consent_manager,
_credential_manager: credential_manager,
browser,
}
}
}
#[async_trait]
impl Tool for WebSearchDetailedTool {
fn name(&self) -> &'static str {
"web_search_detailed"
}
fn description(&self) -> &'static str {
"Search for recent articles and web content with detailed snippets and descriptions. \
Returns results with publication dates, sources, and full text excerpts. \
Best for finding recent information, articles, and detailed context about topics."
}
fn schema(&self) -> Value {
json!({
"type": "object",
"properties": {
"query": {
"type": "string",
"minLength": 1,
"maxLength": 500,
"description": "Search query"
},
"max_results": {
"type": "integer",
"minimum": 1,
"maximum": 10,
"default": 5,
"description": "Maximum number of results"
}
},
"required": ["query"],
"additionalProperties": false
})
}
fn requires_network(&self) -> bool {
true
}
async fn execute(&self, args: Value) -> Result<ToolResult> {
let start = Instant::now();
// Check if consent has been granted (non-blocking check)
// Consent should have been granted via TUI dialog before tool execution
{
let consent = self
.consent_manager
.lock()
.expect("Consent manager mutex poisoned");
if !consent.has_consent(self.name()) {
return Ok(ToolResult::error(
"Consent not granted for detailed web search. This should have been handled by the TUI.",
));
}
}
let query = args
.get("query")
.and_then(Value::as_str)
.context("Missing query parameter")?;
let max_results = args.get("max_results").and_then(Value::as_u64).unwrap_or(5) as usize;
let user_agent = duckduckgo::user_agents::get("firefox").unwrap_or(
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0",
);
// Use news endpoint which provides detailed results with full snippets
// Even for non-news queries, this often returns recent articles and content with good descriptions
let news_results = self
.browser
.news(query, "wt-wt", false, Some(max_results), user_agent)
.await
.context("DuckDuckGo detailed search failed")?;
let mut formatted_results = Vec::new();
for result in news_results {
formatted_results.push(json!({
"title": result.title,
"url": result.url,
"snippet": result.body, // news endpoint includes full excerpts
"source": result.source,
"date": result.date
}));
}
let mut result = ToolResult::success(json!({
"query": query,
"results": formatted_results,
"total_found": formatted_results.len()
}));
result.duration = start.elapsed();
Ok(result)
}
}

View File

@@ -185,6 +185,8 @@ Requests target the same `/api/chat` endpoint documented by Ollama and automatic
The quota fields are optional and purely informational—they are never sent to the provider. Owlen uses them to display hourly/weekly token usage in the chat header, emit pre-limit toasts at 80% and 95%, and power the `:limits` command. Adjust the numbers to reflect the soft limits on your account or remove the keys altogether if you do not want usage tracking.
If your deployment exposes the web search endpoint under a different path, set `web_search_endpoint` in the same table. The default (`/api/web_search`) matches the Ollama Cloud REST API documented in the web retrieval guide.citeturn4open0
> **Tip:** If the official `ollama signin` flow fails on Linux v0.12.3, follow the [Linux Ollama sign-in workaround](#linux-ollama-sign-in-workaround-v0123) in the troubleshooting guide to copy keys from a working machine or register them manually.
### Managing cloud credentials via CLI