From 26b4f342fc83b244858bc4da2efa4a85c6b2e488 Mon Sep 17 00:00:00 2001 From: vikingowl Date: Wed, 31 Dec 2025 20:04:09 +0100 Subject: [PATCH] feat: add web search and location tools MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add web_search built-in tool that searches via DuckDuckGo - Add get_location tool to get user's geographic location - Create backend search proxy endpoint (/api/v1/proxy/search) - DuckDuckGo HTML scraping with title, URL, and snippet extraction - Geolocation with OpenStreetMap reverse geocoding for city/country - Fix StreamingIndicator visibility in dark mode - Improve tool descriptions to encourage proper tool usage - Better error messages with suggestions when location fails 🤖 Generated with [Claude Code](https://claude.ai/claude-code) Co-Authored-By: Claude Opus 4.5 --- backend/internal/api/routes.go | 3 + backend/internal/api/search.go | 234 ++++++++++++++++++ .../src/lib/components/chat/ChatWindow.svelte | 2 +- .../components/chat/StreamingIndicator.svelte | 6 +- frontend/src/lib/tools/builtin.ts | 212 ++++++++++++++++ frontend/src/routes/+page.svelte | 1 + 6 files changed, 454 insertions(+), 4 deletions(-) create mode 100644 backend/internal/api/search.go diff --git a/backend/internal/api/routes.go b/backend/internal/api/routes.go index 26a8e4a..0ba5e71 100644 --- a/backend/internal/api/routes.go +++ b/backend/internal/api/routes.go @@ -39,6 +39,9 @@ func SetupRoutes(r *gin.Engine, db *sql.DB, ollamaURL string) { // URL fetch proxy (for tools that need to fetch external URLs) v1.POST("/proxy/fetch", URLFetchProxyHandler()) + // Web search proxy (for web_search tool) + v1.POST("/proxy/search", WebSearchProxyHandler()) + // Ollama proxy (optional) v1.Any("/ollama/*path", OllamaProxyHandler(ollamaURL)) } diff --git a/backend/internal/api/search.go b/backend/internal/api/search.go new file mode 100644 index 0000000..d57ca93 --- /dev/null +++ b/backend/internal/api/search.go @@ -0,0 +1,234 @@ +package api + +import ( + "fmt" + "io" + "net/http" + "net/url" + "regexp" + "strings" + "time" + + "github.com/gin-gonic/gin" +) + +// SearchRequest represents a web search request +type SearchRequest struct { + Query string `json:"query" binding:"required"` + MaxResults int `json:"maxResults"` +} + +// SearchResult represents a single search result +type SearchResult struct { + Title string `json:"title"` + URL string `json:"url"` + Snippet string `json:"snippet"` +} + +// WebSearchProxyHandler returns a handler that performs web searches via DuckDuckGo +func WebSearchProxyHandler() gin.HandlerFunc { + return func(c *gin.Context) { + var req SearchRequest + if err := c.ShouldBindJSON(&req); err != nil { + c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request: " + err.Error()}) + return + } + + // Set default and max results + maxResults := req.MaxResults + if maxResults <= 0 { + maxResults = 5 + } + if maxResults > 10 { + maxResults = 10 + } + + // Build DuckDuckGo HTML search URL + searchURL := fmt.Sprintf("https://html.duckduckgo.com/html/?q=%s", url.QueryEscape(req.Query)) + + // Create HTTP client with timeout + client := &http.Client{ + Timeout: 15 * time.Second, + } + + // Create request + httpReq, err := http.NewRequestWithContext(c.Request.Context(), "GET", searchURL, nil) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to create request: " + err.Error()}) + return + } + + // Set headers to mimic a browser + httpReq.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36") + httpReq.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") + httpReq.Header.Set("Accept-Language", "en-US,en;q=0.5") + + // Execute request + resp, err := client.Do(httpReq) + if err != nil { + c.JSON(http.StatusBadGateway, gin.H{"error": "failed to perform search: " + err.Error()}) + return + } + defer resp.Body.Close() + + // Check status + if resp.StatusCode >= 400 { + c.JSON(http.StatusBadGateway, gin.H{"error": "search failed: HTTP " + resp.Status}) + return + } + + // Read response body + body, err := io.ReadAll(io.LimitReader(resp.Body, 500000)) // 500KB limit + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to read response: " + err.Error()}) + return + } + + // Parse results from HTML + results := parseDuckDuckGoResults(string(body), maxResults) + + c.JSON(http.StatusOK, gin.H{ + "query": req.Query, + "results": results, + "count": len(results), + }) + } +} + +// parseDuckDuckGoResults extracts search results from DuckDuckGo HTML +func parseDuckDuckGoResults(html string, maxResults int) []SearchResult { + var results []SearchResult + + // DuckDuckGo HTML result structure: + // + + // Match each result block (more permissive pattern) + resultPattern := regexp.MustCompile(`(?s)]*class="[^"]*results_links[^"]*"[^>]*>(.*?)\s*`) + + // Patterns for extracting components + titleURLPattern := regexp.MustCompile(`(?s)]*class="result__a"[^>]*href="([^"]*)"[^>]*>([^<]+)`) + snippetPattern := regexp.MustCompile(`(?s)]*class="result__snippet"[^>]*>(.*?)`) + + resultBlocks := resultPattern.FindAllStringSubmatch(html, maxResults*3) + + for _, match := range resultBlocks { + if len(results) >= maxResults { + break + } + if len(match) < 2 { + continue + } + + block := match[1] + var result SearchResult + + // Extract title and URL + titleMatch := titleURLPattern.FindStringSubmatch(block) + if len(titleMatch) >= 3 { + result.URL = decodeURL(titleMatch[1]) + result.Title = cleanHTML(titleMatch[2]) + } + + // Extract snippet (can contain HTML like tags) + snippetMatch := snippetPattern.FindStringSubmatch(block) + if len(snippetMatch) >= 2 { + result.Snippet = cleanHTML(snippetMatch[1]) + } + + // Only add if we have a title and URL + if result.Title != "" && result.URL != "" { + // Skip DuckDuckGo internal links + if strings.Contains(result.URL, "duckduckgo.com") { + continue + } + results = append(results, result) + } + } + + // Fallback: try a simpler pattern if no results found + if len(results) == 0 { + results = parseSimpleDuckDuckGo(html, maxResults) + } + + return results +} + +// parseSimpleDuckDuckGo is a fallback parser using simpler patterns +func parseSimpleDuckDuckGo(html string, maxResults int) []SearchResult { + var results []SearchResult + + // Look for result__a links (main result titles) + pattern := regexp.MustCompile(`(?s)]*class="result__a"[^>]*href="([^"]*)"[^>]*>([^<]*)`) + matches := pattern.FindAllStringSubmatch(html, maxResults*2) + + for _, match := range matches { + if len(results) >= maxResults { + break + } + + if len(match) >= 3 { + url := decodeURL(match[1]) + title := cleanHTML(match[2]) + + // Skip empty or DuckDuckGo internal + if url == "" || title == "" || strings.Contains(url, "duckduckgo.com") { + continue + } + + results = append(results, SearchResult{ + Title: title, + URL: url, + Snippet: "", // Snippet extraction is more complex + }) + } + } + + return results +} + +// decodeURL extracts the actual URL from DuckDuckGo's redirect URL +func decodeURL(ddgURL string) string { + // DuckDuckGo wraps URLs in redirect links like: + // //duckduckgo.com/l/?uddg=https%3A%2F%2Fexample.com&... + if strings.Contains(ddgURL, "uddg=") { + parsed, err := url.Parse(ddgURL) + if err == nil { + uddg := parsed.Query().Get("uddg") + if uddg != "" { + return uddg + } + } + } + + // Sometimes URLs start with // (protocol-relative) + if strings.HasPrefix(ddgURL, "//") { + return "https:" + ddgURL + } + + return ddgURL +} + +// cleanHTML removes HTML tags and decodes entities +func cleanHTML(s string) string { + // Remove HTML tags + tagPattern := regexp.MustCompile(`<[^>]*>`) + s = tagPattern.ReplaceAllString(s, "") + + // Decode common HTML entities + s = strings.ReplaceAll(s, "&", "&") + s = strings.ReplaceAll(s, "<", "<") + s = strings.ReplaceAll(s, ">", ">") + s = strings.ReplaceAll(s, """, "\"") + s = strings.ReplaceAll(s, "'", "'") + s = strings.ReplaceAll(s, " ", " ") + + // Clean up whitespace + s = strings.TrimSpace(s) + spacePattern := regexp.MustCompile(`\s+`) + s = spacePattern.ReplaceAllString(s, " ") + + return s +} diff --git a/frontend/src/lib/components/chat/ChatWindow.svelte b/frontend/src/lib/components/chat/ChatWindow.svelte index 39bb4ee..92da478 100644 --- a/frontend/src/lib/components/chat/ChatWindow.svelte +++ b/frontend/src/lib/components/chat/ChatWindow.svelte @@ -312,9 +312,9 @@ // Debug logging console.log('[Chat] Tools enabled:', toolsState.toolsEnabled); console.log('[Chat] Tools count:', tools?.length ?? 0); + console.log('[Chat] Tool names:', tools?.map(t => t.function.name) ?? []); console.log('[Chat] USE_FUNCTION_MODEL:', USE_FUNCTION_MODEL); console.log('[Chat] Using model:', chatModel, '(original:', model, ')'); - if (tools?.length) console.log('[Chat] Tool definitions:', tools); await ollamaClient.streamChatWithCallbacks( { diff --git a/frontend/src/lib/components/chat/StreamingIndicator.svelte b/frontend/src/lib/components/chat/StreamingIndicator.svelte index 636d712..4351837 100644 --- a/frontend/src/lib/components/chat/StreamingIndicator.svelte +++ b/frontend/src/lib/components/chat/StreamingIndicator.svelte @@ -29,15 +29,15 @@ aria-label="Generating response" > Generating response... diff --git a/frontend/src/lib/tools/builtin.ts b/frontend/src/lib/tools/builtin.ts index f7c190e..830d19a 100644 --- a/frontend/src/lib/tools/builtin.ts +++ b/frontend/src/lib/tools/builtin.ts @@ -439,6 +439,205 @@ const fetchUrlHandler: BuiltinToolHandler = async (args) => { } }; +// ============================================================================ +// Get Location Tool +// ============================================================================ + +interface GetLocationArgs { + highAccuracy?: boolean; +} + +interface LocationResult { + latitude: number; + longitude: number; + accuracy: number; + city?: string; + country?: string; +} + +const getLocationDefinition: ToolDefinition = { + type: 'function', + function: { + name: 'get_location', + description: 'Get the user\'s current location (city, country, coordinates). Call this IMMEDIATELY when you need location for weather, local info, or nearby places. Do NOT ask the user where they are - use this tool instead.', + parameters: { + type: 'object', + properties: { + highAccuracy: { + type: 'boolean', + description: 'Whether to request high accuracy GPS location (may take longer and use more battery). Default is false.' + } + } + } + } +}; + +const getLocationHandler: BuiltinToolHandler = async (args) => { + const { highAccuracy = false } = args; + + // Check if geolocation is available + if (!navigator.geolocation) { + return { error: 'Geolocation is not supported by this browser' }; + } + + try { + const position = await new Promise((resolve, reject) => { + navigator.geolocation.getCurrentPosition(resolve, reject, { + enableHighAccuracy: highAccuracy, + timeout: 30000, // 30 seconds - user needs time to accept permission prompt + maximumAge: 300000 // Cache for 5 minutes + }); + }); + + const result: LocationResult = { + latitude: position.coords.latitude, + longitude: position.coords.longitude, + accuracy: Math.round(position.coords.accuracy) + }; + + // Try to get city/country via reverse geocoding (using a free service) + try { + const geoResponse = await fetch( + `https://nominatim.openstreetmap.org/reverse?lat=${result.latitude}&lon=${result.longitude}&format=json`, + { + headers: { + 'User-Agent': 'OllamaWebUI/1.0' + } + } + ); + + if (geoResponse.ok) { + const geoData = await geoResponse.json(); + if (geoData.address) { + result.city = geoData.address.city || geoData.address.town || geoData.address.village || geoData.address.municipality; + result.country = geoData.address.country; + } + } + } catch { + // Reverse geocoding failed, but we still have coordinates + } + + return { + location: result, + message: result.city + ? `User is located in ${result.city}${result.country ? ', ' + result.country : ''}` + : `User is at coordinates ${result.latitude.toFixed(4)}, ${result.longitude.toFixed(4)}` + }; + } catch (error) { + if (error instanceof GeolocationPositionError) { + switch (error.code) { + case error.PERMISSION_DENIED: + return { + error: 'Location permission denied', + suggestion: 'Ask the user for their city/location directly, then use web_search with that location.' + }; + case error.POSITION_UNAVAILABLE: + return { + error: 'Location services unavailable on this device', + suggestion: 'Ask the user for their city/location directly, then use web_search with that location.' + }; + case error.TIMEOUT: + return { + error: 'Location request timed out', + suggestion: 'Ask the user for their city/location directly, then use web_search with that location.' + }; + } + } + return { + error: `Failed to get location: ${error instanceof Error ? error.message : 'Unknown error'}`, + suggestion: 'Ask the user for their city/location directly, then use web_search with that location.' + }; + } +}; + +// ============================================================================ +// Web Search Tool +// ============================================================================ + +interface WebSearchArgs { + query: string; + maxResults?: number; +} + +interface WebSearchResult { + title: string; + url: string; + snippet: string; +} + +const webSearchDefinition: ToolDefinition = { + type: 'function', + function: { + name: 'web_search', + description: 'Search the web for current information. You MUST call this tool immediately when the user asks about weather, news, current events, sports, stocks, prices, or any real-time information. Do NOT ask the user for clarification - just search. If no location is specified for weather, call get_location first.', + parameters: { + type: 'object', + properties: { + query: { + type: 'string', + description: 'The search query (e.g., "weather Berlin tomorrow", "latest news", "Bitcoin price")' + }, + maxResults: { + type: 'number', + description: 'Maximum number of results to return (1-10, default 5)' + } + }, + required: ['query'] + } + } +}; + +const webSearchHandler: BuiltinToolHandler = async (args) => { + const { query, maxResults = 5 } = args; + + if (!query || query.trim() === '') { + return { error: 'Search query is required' }; + } + + // Try backend proxy first + try { + const proxyResponse = await fetch('/api/v1/proxy/search', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ query, maxResults: Math.min(Math.max(1, maxResults), 10) }) + }); + + if (proxyResponse.ok) { + const data = await proxyResponse.json(); + const results = data.results as WebSearchResult[]; + + if (results.length === 0) { + return { message: 'No search results found for the query.', query }; + } + + // Format results for the AI + return { + query, + resultCount: results.length, + results: results.map((r, i) => ({ + rank: i + 1, + title: r.title, + url: r.url, + snippet: r.snippet || '(no snippet available)' + })) + }; + } + + // If proxy returns an error, extract it + const errorData = await proxyResponse.json().catch(() => null); + if (errorData?.error) { + return { error: errorData.error }; + } + } catch { + // Proxy not available + } + + return { + error: 'Web search is not available. Please start the backend server to enable web search functionality.', + hint: 'Run the backend server with: cd backend && go run cmd/server/main.go' + }; +}; + // ============================================================================ // Registry of Built-in Tools // ============================================================================ @@ -458,6 +657,16 @@ export const builtinTools: Map = new Map([ definition: fetchUrlDefinition, handler: fetchUrlHandler as unknown as BuiltinToolHandler, isBuiltin: true + }], + ['get_location', { + definition: getLocationDefinition, + handler: getLocationHandler as unknown as BuiltinToolHandler, + isBuiltin: true + }], + ['web_search', { + definition: webSearchDefinition, + handler: webSearchHandler as unknown as BuiltinToolHandler, + isBuiltin: true }] ]); @@ -465,3 +674,6 @@ export const builtinTools: Map = new Map([ export function getBuiltinToolDefinitions(): ToolDefinition[] { return Array.from(builtinTools.values()).map(entry => entry.definition); } + +// Log available builtin tools at startup +console.log('[Builtin Tools] Available:', Array.from(builtinTools.keys())); diff --git a/frontend/src/routes/+page.svelte b/frontend/src/routes/+page.svelte index 0f8ca63..82b41b4 100644 --- a/frontend/src/routes/+page.svelte +++ b/frontend/src/routes/+page.svelte @@ -156,6 +156,7 @@ console.log('[NewChat] Tools enabled:', toolsState.toolsEnabled); console.log('[NewChat] Tools count:', tools?.length ?? 0); + console.log('[NewChat] Tool names:', tools?.map(t => t.function.name) ?? []); console.log('[NewChat] Using model:', chatModel, '(original:', model, ')'); await ollamaClient.streamChatWithCallbacks(