Files
owlen/crates/tools/web/tests/web_tools.rs
vikingowl 173403379f feat(M9): implement WebFetch and WebSearch with domain filtering and pluggable providers
Milestone M9 implementation adds web access tools with security controls.

New crate: crates/tools/web

WebFetch Features:
- HTTP client using reqwest
- Domain allowlist/blocklist filtering
  * Empty allowlist = allow all domains (except blocked)
  * Non-empty allowlist = only allow specified domains
  * Blocklist always takes precedence
- Redirect detection and blocking
  * Redirects to unapproved domains are blocked
  * Manual redirect policy (no automatic following)
  * Returns error message with redirect URL
- Response capture with metadata
  * Status code, content, content-type
  * Original URL preserved

WebSearch Features:
- Pluggable provider trait using async-trait
- SearchProvider trait for implementing search APIs
- StubSearchProvider for testing
- SearchResult structure with title, URL, snippet
- Provider name identification

Security Features:
- Case-insensitive domain matching
- Host extraction from URLs
- Relative redirect URL resolution
- Domain validation before requests
- Explicit approval required for cross-domain redirects

Tests added (9 new tests):
Unit tests:
1. domain_filtering_allowlist - Verifies allowlist-only mode
2. domain_filtering_blocklist - Verifies blocklist takes precedence
3. domain_filtering_case_insensitive - Verifies case handling

Integration tests with wiremock:
4. webfetch_domain_whitelist_only - Tests allowlist enforcement
5. webfetch_redirect_to_unapproved_domain - Blocks bad redirects
6. webfetch_redirect_to_approved_domain - Detects good redirects
7. webfetch_blocklist_overrides_allowlist - Blocklist priority
8. websearch_pluggable_provider - Provider pattern works
9. webfetch_successful_request - Basic fetch operation

All 84 tests passing (up from 75).

Note: CLI integration deferred - infrastructure is complete and tested.
Future work will add CLI commands for web-fetch and web-search with
domain configuration.

Dependencies: reqwest 0.12, async-trait 0.1, wiremock 0.6 (test)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-01 20:23:29 +01:00

162 lines
5.3 KiB
Rust

use tools_web::{WebFetchClient, WebSearchClient, StubSearchProvider, SearchResult};
use wiremock::{MockServer, Mock, ResponseTemplate};
use wiremock::matchers::{method, path};
#[tokio::test]
async fn webfetch_domain_whitelist_only() {
let mock_server = MockServer::start().await;
Mock::given(method("GET"))
.and(path("/test"))
.respond_with(ResponseTemplate::new(200).set_body_string("Hello from allowed domain"))
.mount(&mock_server)
.await;
let mut client = WebFetchClient::new();
client.allow_domain("localhost");
client.allow_domain("127.0.0.1"); // Domain without port
// Fetch from allowed domain should work
let url = format!("{}/test", mock_server.uri());
let response = client.fetch(&url).await.unwrap();
assert_eq!(response.status, 200);
assert!(response.content.contains("Hello from allowed domain"));
// Create a client with different allowlist
let mut strict_client = WebFetchClient::new();
strict_client.allow_domain("example.com");
// Fetch from non-allowed domain should fail
let result = strict_client.fetch(&url).await;
assert!(result.is_err());
assert!(result.unwrap_err().to_string().contains("Domain not allowed"));
}
#[tokio::test]
async fn webfetch_redirect_to_unapproved_domain() {
let mock_server = MockServer::start().await;
// Mock a redirect to a different domain
Mock::given(method("GET"))
.and(path("/redirect"))
.respond_with(
ResponseTemplate::new(302)
.insert_header("location", "https://evil.com/malware")
)
.mount(&mock_server)
.await;
let mut client = WebFetchClient::new();
client.allow_domain("localhost");
client.allow_domain("127.0.0.1"); // Domain without port
// evil.com is NOT in the allowlist
let url = format!("{}/redirect", mock_server.uri());
let result = client.fetch(&url).await;
assert!(result.is_err());
let err_msg = result.unwrap_err().to_string();
assert!(err_msg.contains("Redirect to unapproved domain") || err_msg.contains("evil.com"));
}
#[tokio::test]
async fn webfetch_redirect_to_approved_domain() {
let mock_server = MockServer::start().await;
let redirect_url = format!("{}/target", mock_server.uri());
// Mock a redirect to an approved domain
Mock::given(method("GET"))
.and(path("/redirect"))
.respond_with(
ResponseTemplate::new(302)
.insert_header("location", &redirect_url)
)
.mount(&mock_server)
.await;
let mut client = WebFetchClient::new();
client.allow_domain("localhost");
client.allow_domain("127.0.0.1"); // Domain without port
let url = format!("{}/redirect", mock_server.uri());
let result = client.fetch(&url).await;
// Should fail but with a message about using the redirect URL
assert!(result.is_err());
let err_msg = result.unwrap_err().to_string();
assert!(err_msg.contains("Redirect detected") || err_msg.contains("Use the redirect URL"));
}
#[tokio::test]
async fn webfetch_blocklist_overrides_allowlist() {
let mock_server = MockServer::start().await;
Mock::given(method("GET"))
.and(path("/test"))
.respond_with(ResponseTemplate::new(200).set_body_string("Hello"))
.mount(&mock_server)
.await;
let domain = "127.0.0.1";
let mut client = WebFetchClient::new();
client.allow_domain(domain);
client.block_domain(domain); // Block overrides allow
let url = format!("{}/test", mock_server.uri());
let result = client.fetch(&url).await;
assert!(result.is_err());
assert!(result.unwrap_err().to_string().contains("Domain not allowed"));
}
#[tokio::test]
async fn websearch_pluggable_provider() {
let stub_results = vec![
SearchResult {
title: "Test Result 1".to_string(),
url: "https://example.com/1".to_string(),
snippet: "This is a test result".to_string(),
},
SearchResult {
title: "Test Result 2".to_string(),
url: "https://example.com/2".to_string(),
snippet: "Another test result".to_string(),
},
];
let provider = StubSearchProvider::new(stub_results.clone());
let client = WebSearchClient::new(Box::new(provider));
assert_eq!(client.provider_name(), "stub");
let results = client.search("test query").await.unwrap();
assert_eq!(results.len(), 2);
assert_eq!(results[0].title, "Test Result 1");
assert_eq!(results[1].url, "https://example.com/2");
}
#[tokio::test]
async fn webfetch_successful_request() {
let mock_server = MockServer::start().await;
Mock::given(method("GET"))
.and(path("/api/data"))
.respond_with(
ResponseTemplate::new(200)
.set_body_string(r#"{"status":"ok"}"#)
.insert_header("content-type", "application/json")
)
.mount(&mock_server)
.await;
let client = WebFetchClient::new(); // Empty allowlist = allow all
let url = format!("{}/api/data", mock_server.uri());
let response = client.fetch(&url).await.unwrap();
assert_eq!(response.status, 200);
assert!(response.content.contains("status"));
assert!(response.content_type.is_some()); // Just verify content-type is present
}