use super::{Tool, ToolResult}; use crate::Result; use anyhow::Context; use async_trait::async_trait; use serde_json::{json, Value}; /// Tool that fetches the raw HTML content for a list of URLs. /// /// Input schema expects: /// urls: array of strings (max 5 URLs) /// timeout_secs: optional integer per‑request timeout (default 10) pub struct WebScrapeTool { // No special dependencies; uses reqwest_011 for compatibility with existing web_search. client: reqwest_011::Client, } impl Default for WebScrapeTool { fn default() -> Self { Self::new() } } impl WebScrapeTool { pub fn new() -> Self { let client = reqwest_011::Client::builder() .user_agent("OwlenWebScrape/0.1") .build() .expect("Failed to build reqwest client"); Self { client } } } #[async_trait] impl Tool for WebScrapeTool { fn name(&self) -> &'static str { "web_scrape" } fn description(&self) -> &'static str { "Fetch raw HTML content for a list of URLs" } fn schema(&self) -> Value { json!({ "type": "object", "properties": { "urls": { "type": "array", "items": { "type": "string", "format": "uri" }, "minItems": 1, "maxItems": 5, "description": "List of URLs to scrape" }, "timeout_secs": { "type": "integer", "minimum": 1, "maximum": 30, "default": 10, "description": "Per‑request timeout in seconds" } }, "required": ["urls"], "additionalProperties": false }) } fn requires_network(&self) -> bool { true } async fn execute(&self, args: Value) -> Result { let urls = args .get("urls") .and_then(|v| v.as_array()) .context("Missing 'urls' array")?; let timeout_secs = args .get("timeout_secs") .and_then(|v| v.as_u64()) .unwrap_or(10); let mut results = Vec::new(); for url_val in urls { let url = url_val.as_str().unwrap_or(""); let resp = self .client .get(url) .timeout(std::time::Duration::from_secs(timeout_secs)) .send() .await; match resp { Ok(r) => { let text = r.text().await.unwrap_or_default(); results.push(json!({ "url": url, "content": text })); } Err(e) => { results.push(json!({ "url": url, "error": e.to_string() })); } } } Ok(ToolResult::success(json!({ "pages": results }))) } }