fix: web search failing with BusyBox wget in Docker

- Detect BusyBox wget (limited options) vs GNU wget
- Use compatible flags for BusyBox: -q -O -T -U only
- Add curl to Docker image for better reliability
- curl is now preferred and will be used over BusyBox wget

BusyBox wget doesn't support --max-redirect, --header, or
long-form options which caused web search to fail.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-01 07:29:49 +01:00
parent 3faf1e9f34
commit 774ed4c57a
2 changed files with 47 additions and 20 deletions

View File

@@ -18,7 +18,8 @@ RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o server ./cmd/serv
# Final stage
FROM alpine:latest
RUN apk --no-cache add ca-certificates
# curl for web fetching, ca-certificates for HTTPS
RUN apk --no-cache add ca-certificates curl
WORKDIR /app

View File

@@ -65,13 +65,14 @@ func DefaultFetchOptions() FetchOptions {
// Fetcher provides URL fetching with multiple backend support
type Fetcher struct {
curlPath string
wgetPath string
chromePath string
httpClient *http.Client
method FetchMethod
hasChrome bool
mu sync.RWMutex
curlPath string
wgetPath string
wgetIsBusyBox bool // BusyBox wget has limited options
chromePath string
httpClient *http.Client
method FetchMethod
hasChrome bool
mu sync.RWMutex
// chromedp allocator context (reused for efficiency)
allocCtx context.Context
@@ -114,6 +115,13 @@ func (f *Fetcher) detectTools() {
// Check for wget
if path, err := exec.LookPath("wget"); err == nil {
f.wgetPath = path
// Check if it's BusyBox wget (has limited options)
versionCmd := exec.Command(path, "--version")
versionOut, _ := versionCmd.CombinedOutput()
f.wgetIsBusyBox = strings.Contains(string(versionOut), "BusyBox")
if f.wgetIsBusyBox {
log.Printf("[Fetcher] Found BusyBox wget (limited options)")
}
if f.method == "" {
f.method = FetchMethodWget
}
@@ -483,19 +491,37 @@ func (f *Fetcher) fetchWithCurl(ctx context.Context, url string, curlPath string
// fetchWithWget uses wget to fetch the URL
func (f *Fetcher) fetchWithWget(ctx context.Context, url string, wgetPath string, opts FetchOptions) (*FetchResult, error) {
args := []string{
"-q", // Quiet
"-O", "-", // Output to stdout
"--timeout", fmt.Sprintf("%d", int(opts.Timeout.Seconds())),
"--user-agent", opts.UserAgent,
"--max-redirect", "10", // Follow up to 10 redirects
"--header", "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"--header", "Accept-Language: en-US,en;q=0.5",
}
f.mu.RLock()
isBusyBox := f.wgetIsBusyBox
f.mu.RUnlock()
// Add custom headers
for key, value := range opts.Headers {
args = append(args, "--header", fmt.Sprintf("%s: %s", key, value))
var args []string
if isBusyBox {
// BusyBox wget has limited options - use short flags only
args = []string{
"-q", // Quiet
"-O", "-", // Output to stdout
"-T", fmt.Sprintf("%d", int(opts.Timeout.Seconds())), // Timeout
"-U", opts.UserAgent, // User agent
}
// BusyBox wget doesn't support custom headers or max-redirect
} else {
// GNU wget supports full options
args = []string{
"-q", // Quiet
"-O", "-", // Output to stdout
"--timeout", fmt.Sprintf("%d", int(opts.Timeout.Seconds())),
"--user-agent", opts.UserAgent,
"--max-redirect", "10", // Follow up to 10 redirects
"--header", "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"--header", "Accept-Language: en-US,en;q=0.5",
}
// Add custom headers (GNU wget only)
for key, value := range opts.Headers {
args = append(args, "--header", fmt.Sprintf("%s: %s", key, value))
}
}
args = append(args, url)