From 774ed4c57a2bb2610a7a63f5c081b6b54e6cac91 Mon Sep 17 00:00:00 2001 From: vikingowl Date: Thu, 1 Jan 2026 07:29:49 +0100 Subject: [PATCH] fix: web search failing with BusyBox wget in Docker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Detect BusyBox wget (limited options) vs GNU wget - Use compatible flags for BusyBox: -q -O -T -U only - Add curl to Docker image for better reliability - curl is now preferred and will be used over BusyBox wget BusyBox wget doesn't support --max-redirect, --header, or long-form options which caused web search to fail. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- backend/Dockerfile | 3 +- backend/internal/api/fetcher.go | 64 +++++++++++++++++++++++---------- 2 files changed, 47 insertions(+), 20 deletions(-) diff --git a/backend/Dockerfile b/backend/Dockerfile index eb02a46..b2cb3d5 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -18,7 +18,8 @@ RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o server ./cmd/serv # Final stage FROM alpine:latest -RUN apk --no-cache add ca-certificates +# curl for web fetching, ca-certificates for HTTPS +RUN apk --no-cache add ca-certificates curl WORKDIR /app diff --git a/backend/internal/api/fetcher.go b/backend/internal/api/fetcher.go index ee13000..46b98f2 100644 --- a/backend/internal/api/fetcher.go +++ b/backend/internal/api/fetcher.go @@ -65,13 +65,14 @@ func DefaultFetchOptions() FetchOptions { // Fetcher provides URL fetching with multiple backend support type Fetcher struct { - curlPath string - wgetPath string - chromePath string - httpClient *http.Client - method FetchMethod - hasChrome bool - mu sync.RWMutex + curlPath string + wgetPath string + wgetIsBusyBox bool // BusyBox wget has limited options + chromePath string + httpClient *http.Client + method FetchMethod + hasChrome bool + mu sync.RWMutex // chromedp allocator context (reused for efficiency) allocCtx context.Context @@ -114,6 +115,13 @@ func (f *Fetcher) detectTools() { // Check for wget if path, err := exec.LookPath("wget"); err == nil { f.wgetPath = path + // Check if it's BusyBox wget (has limited options) + versionCmd := exec.Command(path, "--version") + versionOut, _ := versionCmd.CombinedOutput() + f.wgetIsBusyBox = strings.Contains(string(versionOut), "BusyBox") + if f.wgetIsBusyBox { + log.Printf("[Fetcher] Found BusyBox wget (limited options)") + } if f.method == "" { f.method = FetchMethodWget } @@ -483,19 +491,37 @@ func (f *Fetcher) fetchWithCurl(ctx context.Context, url string, curlPath string // fetchWithWget uses wget to fetch the URL func (f *Fetcher) fetchWithWget(ctx context.Context, url string, wgetPath string, opts FetchOptions) (*FetchResult, error) { - args := []string{ - "-q", // Quiet - "-O", "-", // Output to stdout - "--timeout", fmt.Sprintf("%d", int(opts.Timeout.Seconds())), - "--user-agent", opts.UserAgent, - "--max-redirect", "10", // Follow up to 10 redirects - "--header", "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", - "--header", "Accept-Language: en-US,en;q=0.5", - } + f.mu.RLock() + isBusyBox := f.wgetIsBusyBox + f.mu.RUnlock() - // Add custom headers - for key, value := range opts.Headers { - args = append(args, "--header", fmt.Sprintf("%s: %s", key, value)) + var args []string + + if isBusyBox { + // BusyBox wget has limited options - use short flags only + args = []string{ + "-q", // Quiet + "-O", "-", // Output to stdout + "-T", fmt.Sprintf("%d", int(opts.Timeout.Seconds())), // Timeout + "-U", opts.UserAgent, // User agent + } + // BusyBox wget doesn't support custom headers or max-redirect + } else { + // GNU wget supports full options + args = []string{ + "-q", // Quiet + "-O", "-", // Output to stdout + "--timeout", fmt.Sprintf("%d", int(opts.Timeout.Seconds())), + "--user-agent", opts.UserAgent, + "--max-redirect", "10", // Follow up to 10 redirects + "--header", "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "--header", "Accept-Language: en-US,en;q=0.5", + } + + // Add custom headers (GNU wget only) + for key, value := range opts.Headers { + args = append(args, "--header", fmt.Sprintf("%s: %s", key, value)) + } } args = append(args, url)