Files
owlen/crates/owlen-core/src/tools/web_scrape.rs
vikingowl bbb94367e1 feat(tool/web): route searches through provider
Acceptance Criteria:\n- web.search proxies Ollama Cloud's /api/web_search via the configured provider endpoint\n- Tool is only registered when remote search is enabled and the cloud provider is active\n- Consent prompts, docs, and MCP tooling no longer reference DuckDuckGo or expose web_search_detailed

Test Notes:\n- cargo check
2025-10-24 01:29:37 +02:00

103 lines
2.8 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
use super::{Tool, ToolResult};
use crate::Result;
use anyhow::Context;
use async_trait::async_trait;
use reqwest::Client;
use serde_json::{Value, json};
/// Tool that fetches the raw HTML content for a list of URLs.
///
/// Input schema expects:
/// urls: array of strings (max 5 URLs)
/// timeout_secs: optional integer perrequest timeout (default 10)
pub struct WebScrapeTool {
client: Client,
}
impl Default for WebScrapeTool {
fn default() -> Self {
Self::new()
}
}
impl WebScrapeTool {
pub fn new() -> Self {
let client = Client::builder()
.user_agent("OwlenWebScrape/0.1")
.build()
.expect("Failed to build reqwest client");
Self { client }
}
}
#[async_trait]
impl Tool for WebScrapeTool {
fn name(&self) -> &'static str {
"web_scrape"
}
fn description(&self) -> &'static str {
"Fetch raw HTML content for a list of URLs"
}
fn schema(&self) -> Value {
json!({
"type": "object",
"properties": {
"urls": {
"type": "array",
"items": { "type": "string", "format": "uri" },
"minItems": 1,
"maxItems": 5,
"description": "List of URLs to scrape"
},
"timeout_secs": {
"type": "integer",
"minimum": 1,
"maximum": 30,
"default": 10,
"description": "Perrequest timeout in seconds"
}
},
"required": ["urls"],
"additionalProperties": false
})
}
fn requires_network(&self) -> bool {
true
}
async fn execute(&self, args: Value) -> Result<ToolResult> {
let urls = args
.get("urls")
.and_then(|v| v.as_array())
.context("Missing 'urls' array")?;
let timeout_secs = args
.get("timeout_secs")
.and_then(|v| v.as_u64())
.unwrap_or(10);
let mut results = Vec::new();
for url_val in urls {
let url = url_val.as_str().unwrap_or("");
let resp = self
.client
.get(url)
.timeout(std::time::Duration::from_secs(timeout_secs))
.send()
.await;
match resp {
Ok(r) => {
let text = r.text().await.unwrap_or_default();
results.push(json!({ "url": url, "content": text }));
}
Err(e) => {
results.push(json!({ "url": url, "error": e.to_string() }));
}
}
}
Ok(ToolResult::success(json!({ "pages": results })))
}
}