feat(M9): implement WebFetch and WebSearch with domain filtering and pluggable providers
Milestone M9 implementation adds web access tools with security controls. New crate: crates/tools/web WebFetch Features: - HTTP client using reqwest - Domain allowlist/blocklist filtering * Empty allowlist = allow all domains (except blocked) * Non-empty allowlist = only allow specified domains * Blocklist always takes precedence - Redirect detection and blocking * Redirects to unapproved domains are blocked * Manual redirect policy (no automatic following) * Returns error message with redirect URL - Response capture with metadata * Status code, content, content-type * Original URL preserved WebSearch Features: - Pluggable provider trait using async-trait - SearchProvider trait for implementing search APIs - StubSearchProvider for testing - SearchResult structure with title, URL, snippet - Provider name identification Security Features: - Case-insensitive domain matching - Host extraction from URLs - Relative redirect URL resolution - Domain validation before requests - Explicit approval required for cross-domain redirects Tests added (9 new tests): Unit tests: 1. domain_filtering_allowlist - Verifies allowlist-only mode 2. domain_filtering_blocklist - Verifies blocklist takes precedence 3. domain_filtering_case_insensitive - Verifies case handling Integration tests with wiremock: 4. webfetch_domain_whitelist_only - Tests allowlist enforcement 5. webfetch_redirect_to_unapproved_domain - Blocks bad redirects 6. webfetch_redirect_to_approved_domain - Detects good redirects 7. webfetch_blocklist_overrides_allowlist - Blocklist priority 8. websearch_pluggable_provider - Provider pattern works 9. webfetch_successful_request - Basic fetch operation All 84 tests passing (up from 75). Note: CLI integration deferred - infrastructure is complete and tested. Future work will add CLI commands for web-fetch and web-search with domain configuration. Dependencies: reqwest 0.12, async-trait 0.1, wiremock 0.6 (test) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -8,6 +8,7 @@ members = [
|
|||||||
"crates/tools/bash",
|
"crates/tools/bash",
|
||||||
"crates/tools/fs",
|
"crates/tools/fs",
|
||||||
"crates/tools/slash",
|
"crates/tools/slash",
|
||||||
|
"crates/tools/web",
|
||||||
"crates/integration/mcp-client",
|
"crates/integration/mcp-client",
|
||||||
]
|
]
|
||||||
resolver = "2"
|
resolver = "2"
|
||||||
|
|||||||
19
crates/tools/web/Cargo.toml
Normal file
19
crates/tools/web/Cargo.toml
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
[package]
|
||||||
|
name = "tools-web"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition.workspace = true
|
||||||
|
license.workspace = true
|
||||||
|
rust-version.workspace = true
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
reqwest = { version = "0.12", features = ["json"] }
|
||||||
|
tokio = { version = "1.39", features = ["macros"] }
|
||||||
|
serde = { version = "1", features = ["derive"] }
|
||||||
|
serde_json = "1"
|
||||||
|
color-eyre = "0.6"
|
||||||
|
url = "2.5"
|
||||||
|
async-trait = "0.1"
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
tokio = { version = "1.39", features = ["macros", "rt-multi-thread"] }
|
||||||
|
wiremock = "0.6"
|
||||||
225
crates/tools/web/src/lib.rs
Normal file
225
crates/tools/web/src/lib.rs
Normal file
@@ -0,0 +1,225 @@
|
|||||||
|
use color_eyre::eyre::{Result, eyre};
|
||||||
|
use reqwest::redirect::Policy;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use std::collections::HashSet;
|
||||||
|
use url::Url;
|
||||||
|
|
||||||
|
/// WebFetch response
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct FetchResponse {
|
||||||
|
pub url: String,
|
||||||
|
pub status: u16,
|
||||||
|
pub content: String,
|
||||||
|
pub content_type: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// WebFetch client with domain filtering
|
||||||
|
pub struct WebFetchClient {
|
||||||
|
allowed_domains: HashSet<String>,
|
||||||
|
blocked_domains: HashSet<String>,
|
||||||
|
client: reqwest::Client,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl WebFetchClient {
|
||||||
|
/// Create a new WebFetch client
|
||||||
|
pub fn new() -> Self {
|
||||||
|
let client = reqwest::Client::builder()
|
||||||
|
.redirect(Policy::none()) // Don't follow redirects automatically
|
||||||
|
.build()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
Self {
|
||||||
|
allowed_domains: HashSet::new(),
|
||||||
|
blocked_domains: HashSet::new(),
|
||||||
|
client,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add an allowed domain
|
||||||
|
pub fn allow_domain(&mut self, domain: &str) {
|
||||||
|
self.allowed_domains.insert(domain.to_lowercase());
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add a blocked domain
|
||||||
|
pub fn block_domain(&mut self, domain: &str) {
|
||||||
|
self.blocked_domains.insert(domain.to_lowercase());
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if a domain is allowed
|
||||||
|
fn is_domain_allowed(&self, domain: &str) -> bool {
|
||||||
|
let domain_lower = domain.to_lowercase();
|
||||||
|
|
||||||
|
// If explicitly blocked, deny
|
||||||
|
if self.blocked_domains.contains(&domain_lower) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If allowlist is empty, allow all (except blocked)
|
||||||
|
if self.allowed_domains.is_empty() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise, must be in allowlist
|
||||||
|
self.allowed_domains.contains(&domain_lower)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Fetch a URL
|
||||||
|
pub async fn fetch(&self, url: &str) -> Result<FetchResponse> {
|
||||||
|
let parsed_url = Url::parse(url)?;
|
||||||
|
let domain = parsed_url
|
||||||
|
.host_str()
|
||||||
|
.ok_or_else(|| eyre!("No host in URL"))?;
|
||||||
|
|
||||||
|
// Check domain permission
|
||||||
|
if !self.is_domain_allowed(domain) {
|
||||||
|
return Err(eyre!("Domain not allowed: {}", domain));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make the request
|
||||||
|
let response = self.client.get(url).send().await?;
|
||||||
|
|
||||||
|
let status = response.status().as_u16();
|
||||||
|
|
||||||
|
// Handle redirects manually
|
||||||
|
if status >= 300 && status < 400 {
|
||||||
|
if let Some(location) = response.headers().get("location") {
|
||||||
|
let location_str = location.to_str()?;
|
||||||
|
|
||||||
|
// Parse the redirect URL (may be relative)
|
||||||
|
let redirect_url = if location_str.starts_with("http") {
|
||||||
|
Url::parse(location_str)?
|
||||||
|
} else {
|
||||||
|
parsed_url.join(location_str)?
|
||||||
|
};
|
||||||
|
|
||||||
|
let redirect_domain = redirect_url
|
||||||
|
.host_str()
|
||||||
|
.ok_or_else(|| eyre!("No host in redirect URL"))?;
|
||||||
|
|
||||||
|
// Check if redirect domain is allowed
|
||||||
|
if !self.is_domain_allowed(redirect_domain) {
|
||||||
|
return Err(eyre!(
|
||||||
|
"Redirect to unapproved domain: {} -> {}",
|
||||||
|
domain,
|
||||||
|
redirect_domain
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
return Err(eyre!(
|
||||||
|
"Redirect detected: {} -> {}. Use the redirect URL directly.",
|
||||||
|
url,
|
||||||
|
redirect_url
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let content_type = response
|
||||||
|
.headers()
|
||||||
|
.get("content-type")
|
||||||
|
.and_then(|v| v.to_str().ok())
|
||||||
|
.map(|s| s.to_string());
|
||||||
|
|
||||||
|
let content = response.text().await?;
|
||||||
|
|
||||||
|
Ok(FetchResponse {
|
||||||
|
url: url.to_string(),
|
||||||
|
status,
|
||||||
|
content,
|
||||||
|
content_type,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for WebFetchClient {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Search provider trait
|
||||||
|
#[async_trait::async_trait]
|
||||||
|
pub trait SearchProvider: Send + Sync {
|
||||||
|
fn name(&self) -> &str;
|
||||||
|
async fn search(&self, query: &str) -> Result<Vec<SearchResult>>;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Search result
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct SearchResult {
|
||||||
|
pub title: String,
|
||||||
|
pub url: String,
|
||||||
|
pub snippet: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Stub search provider for testing
|
||||||
|
pub struct StubSearchProvider {
|
||||||
|
results: Vec<SearchResult>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StubSearchProvider {
|
||||||
|
pub fn new(results: Vec<SearchResult>) -> Self {
|
||||||
|
Self { results }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait::async_trait]
|
||||||
|
impl SearchProvider for StubSearchProvider {
|
||||||
|
fn name(&self) -> &str {
|
||||||
|
"stub"
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn search(&self, _query: &str) -> Result<Vec<SearchResult>> {
|
||||||
|
Ok(self.results.clone())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// WebSearch client with pluggable providers
|
||||||
|
pub struct WebSearchClient {
|
||||||
|
provider: Box<dyn SearchProvider>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl WebSearchClient {
|
||||||
|
pub fn new(provider: Box<dyn SearchProvider>) -> Self {
|
||||||
|
Self { provider }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn provider_name(&self) -> &str {
|
||||||
|
self.provider.name()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn search(&self, query: &str) -> Result<Vec<SearchResult>> {
|
||||||
|
self.provider.search(query).await
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn domain_filtering_allowlist() {
|
||||||
|
let mut client = WebFetchClient::new();
|
||||||
|
client.allow_domain("example.com");
|
||||||
|
|
||||||
|
assert!(client.is_domain_allowed("example.com"));
|
||||||
|
assert!(!client.is_domain_allowed("evil.com"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn domain_filtering_blocklist() {
|
||||||
|
let mut client = WebFetchClient::new();
|
||||||
|
client.block_domain("evil.com");
|
||||||
|
|
||||||
|
assert!(client.is_domain_allowed("example.com")); // Empty allowlist = allow all
|
||||||
|
assert!(!client.is_domain_allowed("evil.com"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn domain_filtering_case_insensitive() {
|
||||||
|
let mut client = WebFetchClient::new();
|
||||||
|
client.allow_domain("Example.COM");
|
||||||
|
|
||||||
|
assert!(client.is_domain_allowed("example.com"));
|
||||||
|
assert!(client.is_domain_allowed("EXAMPLE.COM"));
|
||||||
|
}
|
||||||
|
}
|
||||||
161
crates/tools/web/tests/web_tools.rs
Normal file
161
crates/tools/web/tests/web_tools.rs
Normal file
@@ -0,0 +1,161 @@
|
|||||||
|
use tools_web::{WebFetchClient, WebSearchClient, StubSearchProvider, SearchResult};
|
||||||
|
use wiremock::{MockServer, Mock, ResponseTemplate};
|
||||||
|
use wiremock::matchers::{method, path};
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn webfetch_domain_whitelist_only() {
|
||||||
|
let mock_server = MockServer::start().await;
|
||||||
|
|
||||||
|
Mock::given(method("GET"))
|
||||||
|
.and(path("/test"))
|
||||||
|
.respond_with(ResponseTemplate::new(200).set_body_string("Hello from allowed domain"))
|
||||||
|
.mount(&mock_server)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
let mut client = WebFetchClient::new();
|
||||||
|
client.allow_domain("localhost");
|
||||||
|
client.allow_domain("127.0.0.1"); // Domain without port
|
||||||
|
|
||||||
|
// Fetch from allowed domain should work
|
||||||
|
let url = format!("{}/test", mock_server.uri());
|
||||||
|
let response = client.fetch(&url).await.unwrap();
|
||||||
|
assert_eq!(response.status, 200);
|
||||||
|
assert!(response.content.contains("Hello from allowed domain"));
|
||||||
|
|
||||||
|
// Create a client with different allowlist
|
||||||
|
let mut strict_client = WebFetchClient::new();
|
||||||
|
strict_client.allow_domain("example.com");
|
||||||
|
|
||||||
|
// Fetch from non-allowed domain should fail
|
||||||
|
let result = strict_client.fetch(&url).await;
|
||||||
|
assert!(result.is_err());
|
||||||
|
assert!(result.unwrap_err().to_string().contains("Domain not allowed"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn webfetch_redirect_to_unapproved_domain() {
|
||||||
|
let mock_server = MockServer::start().await;
|
||||||
|
|
||||||
|
// Mock a redirect to a different domain
|
||||||
|
Mock::given(method("GET"))
|
||||||
|
.and(path("/redirect"))
|
||||||
|
.respond_with(
|
||||||
|
ResponseTemplate::new(302)
|
||||||
|
.insert_header("location", "https://evil.com/malware")
|
||||||
|
)
|
||||||
|
.mount(&mock_server)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
let mut client = WebFetchClient::new();
|
||||||
|
client.allow_domain("localhost");
|
||||||
|
client.allow_domain("127.0.0.1"); // Domain without port
|
||||||
|
// evil.com is NOT in the allowlist
|
||||||
|
|
||||||
|
let url = format!("{}/redirect", mock_server.uri());
|
||||||
|
let result = client.fetch(&url).await;
|
||||||
|
|
||||||
|
assert!(result.is_err());
|
||||||
|
let err_msg = result.unwrap_err().to_string();
|
||||||
|
assert!(err_msg.contains("Redirect to unapproved domain") || err_msg.contains("evil.com"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn webfetch_redirect_to_approved_domain() {
|
||||||
|
let mock_server = MockServer::start().await;
|
||||||
|
|
||||||
|
let redirect_url = format!("{}/target", mock_server.uri());
|
||||||
|
|
||||||
|
// Mock a redirect to an approved domain
|
||||||
|
Mock::given(method("GET"))
|
||||||
|
.and(path("/redirect"))
|
||||||
|
.respond_with(
|
||||||
|
ResponseTemplate::new(302)
|
||||||
|
.insert_header("location", &redirect_url)
|
||||||
|
)
|
||||||
|
.mount(&mock_server)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
let mut client = WebFetchClient::new();
|
||||||
|
client.allow_domain("localhost");
|
||||||
|
client.allow_domain("127.0.0.1"); // Domain without port
|
||||||
|
|
||||||
|
let url = format!("{}/redirect", mock_server.uri());
|
||||||
|
let result = client.fetch(&url).await;
|
||||||
|
|
||||||
|
// Should fail but with a message about using the redirect URL
|
||||||
|
assert!(result.is_err());
|
||||||
|
let err_msg = result.unwrap_err().to_string();
|
||||||
|
assert!(err_msg.contains("Redirect detected") || err_msg.contains("Use the redirect URL"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn webfetch_blocklist_overrides_allowlist() {
|
||||||
|
let mock_server = MockServer::start().await;
|
||||||
|
|
||||||
|
Mock::given(method("GET"))
|
||||||
|
.and(path("/test"))
|
||||||
|
.respond_with(ResponseTemplate::new(200).set_body_string("Hello"))
|
||||||
|
.mount(&mock_server)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
let domain = "127.0.0.1";
|
||||||
|
let mut client = WebFetchClient::new();
|
||||||
|
client.allow_domain(domain);
|
||||||
|
client.block_domain(domain); // Block overrides allow
|
||||||
|
|
||||||
|
let url = format!("{}/test", mock_server.uri());
|
||||||
|
let result = client.fetch(&url).await;
|
||||||
|
|
||||||
|
assert!(result.is_err());
|
||||||
|
assert!(result.unwrap_err().to_string().contains("Domain not allowed"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn websearch_pluggable_provider() {
|
||||||
|
let stub_results = vec![
|
||||||
|
SearchResult {
|
||||||
|
title: "Test Result 1".to_string(),
|
||||||
|
url: "https://example.com/1".to_string(),
|
||||||
|
snippet: "This is a test result".to_string(),
|
||||||
|
},
|
||||||
|
SearchResult {
|
||||||
|
title: "Test Result 2".to_string(),
|
||||||
|
url: "https://example.com/2".to_string(),
|
||||||
|
snippet: "Another test result".to_string(),
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
let provider = StubSearchProvider::new(stub_results.clone());
|
||||||
|
let client = WebSearchClient::new(Box::new(provider));
|
||||||
|
|
||||||
|
assert_eq!(client.provider_name(), "stub");
|
||||||
|
|
||||||
|
let results = client.search("test query").await.unwrap();
|
||||||
|
assert_eq!(results.len(), 2);
|
||||||
|
assert_eq!(results[0].title, "Test Result 1");
|
||||||
|
assert_eq!(results[1].url, "https://example.com/2");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn webfetch_successful_request() {
|
||||||
|
let mock_server = MockServer::start().await;
|
||||||
|
|
||||||
|
Mock::given(method("GET"))
|
||||||
|
.and(path("/api/data"))
|
||||||
|
.respond_with(
|
||||||
|
ResponseTemplate::new(200)
|
||||||
|
.set_body_string(r#"{"status":"ok"}"#)
|
||||||
|
.insert_header("content-type", "application/json")
|
||||||
|
)
|
||||||
|
.mount(&mock_server)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
let client = WebFetchClient::new(); // Empty allowlist = allow all
|
||||||
|
|
||||||
|
let url = format!("{}/api/data", mock_server.uri());
|
||||||
|
let response = client.fetch(&url).await.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(response.status, 200);
|
||||||
|
assert!(response.content.contains("status"));
|
||||||
|
assert!(response.content_type.is_some()); // Just verify content-type is present
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user