Files
gnoma/internal/provider/errors.go
vikingowl 99529e6156 fix: deterministic 500 retry, OpenAI error wrapping, local /init prompt
Stop retrying llama.cpp 500s that are deterministic tool-parse failures
by inspecting the error message body (ClassifyHTTPError). Wrap OpenAI SDK
errors as ProviderError so the engine's retry logic classifies them. Add
localInitPrompt for local models that uses sequential fs_* calls instead
of spawn_elfs (which local models can't produce reliably).
2026-04-12 18:35:18 +02:00

106 lines
3.0 KiB
Go

package provider
import (
"fmt"
"strings"
"time"
)
// ErrorKind classifies provider errors for retry decisions.
type ErrorKind int
const (
ErrTransient ErrorKind = iota + 1 // 429, 500, 502, 503, 529 — retry with backoff
ErrAuth // 401, 403 — don't retry
ErrBadRequest // 400 — don't retry, fix request
ErrNotFound // 404 — model/endpoint not found
ErrOverloaded // capacity exhausted — backoff + retry
)
func (k ErrorKind) String() string {
switch k {
case ErrTransient:
return "transient"
case ErrAuth:
return "auth"
case ErrBadRequest:
return "bad_request"
case ErrNotFound:
return "not_found"
case ErrOverloaded:
return "overloaded"
default:
return fmt.Sprintf("unknown(%d)", k)
}
}
// ProviderError wraps an SDK error with classification metadata.
type ProviderError struct {
Kind ErrorKind
Provider string
StatusCode int
Message string
Retryable bool
RetryAfter time.Duration // from Retry-After or rate limit headers
Err error // underlying SDK error
}
func (e *ProviderError) Error() string {
if e.Err != nil {
return fmt.Sprintf("%s %s (%d): %s: %v", e.Provider, e.Kind, e.StatusCode, e.Message, e.Err)
}
return fmt.Sprintf("%s %s (%d): %s", e.Provider, e.Kind, e.StatusCode, e.Message)
}
func (e *ProviderError) Unwrap() error {
return e.Err
}
// nonRetryable500Substrings lists error messages from servers (e.g. llama.cpp)
// that return 500 for deterministic client-side failures. These should not be
// retried because the same request will always produce the same error.
var nonRetryable500Substrings = []string{
"Failed to parse tool call", // llama.cpp: model output invalid tool call JSON
"failed to parse tool call", // lowercase variant
"tool_call_error", // some servers use this error type
"invalid_tool_call", // OpenAI-compat servers
}
// ClassifyHTTPError classifies an HTTP error using both status code and the
// error message. This catches deterministic 500s (e.g. llama.cpp tool parse
// failures) that should not be retried.
func ClassifyHTTPError(status int, message string) (ErrorKind, bool) {
if status == 500 && message != "" {
lower := strings.ToLower(message)
for _, substr := range nonRetryable500Substrings {
if strings.Contains(lower, strings.ToLower(substr)) {
return ErrBadRequest, false
}
}
}
return ClassifyHTTPStatus(status)
}
// ClassifyHTTPStatus returns the ErrorKind and retryability for an HTTP status code.
func ClassifyHTTPStatus(status int) (ErrorKind, bool) {
switch {
case status == 401 || status == 403:
return ErrAuth, false
case status == 400:
return ErrBadRequest, false
case status == 404:
return ErrNotFound, false
case status == 429 || status == 529:
return ErrTransient, true
case status == 500 || status == 502 || status == 503:
return ErrTransient, true
case status == 504:
return ErrOverloaded, true
default:
if status >= 500 {
return ErrTransient, true
}
return ErrBadRequest, false
}
}