Files
gnoma/internal/provider/errors.go
T
vikingowl c4fde583f5 chore(lint): gofmt sweep + errcheck cleanups in router discovery
Apply gofmt -w across the codebase (struct field comment realignment
only — no semantic changes) and silence two errcheck warnings on
fmt.Sscanf / fmt.Fprintf return values in internal/router/discovery
with explicit `_, _ =` discards. Required so `make check` is green
before tagging v0.1.0.
2026-05-20 03:13:05 +02:00

106 lines
2.9 KiB
Go

package provider
import (
"fmt"
"strings"
"time"
)
// ErrorKind classifies provider errors for retry decisions.
type ErrorKind int
const (
ErrTransient ErrorKind = iota + 1 // 429, 500, 502, 503, 529 — retry with backoff
ErrAuth // 401, 403 — don't retry
ErrBadRequest // 400 — don't retry, fix request
ErrNotFound // 404 — model/endpoint not found
ErrOverloaded // capacity exhausted — backoff + retry
)
func (k ErrorKind) String() string {
switch k {
case ErrTransient:
return "transient"
case ErrAuth:
return "auth"
case ErrBadRequest:
return "bad_request"
case ErrNotFound:
return "not_found"
case ErrOverloaded:
return "overloaded"
default:
return fmt.Sprintf("unknown(%d)", k)
}
}
// ProviderError wraps an SDK error with classification metadata.
type ProviderError struct {
Kind ErrorKind
Provider string
StatusCode int
Message string
Retryable bool
RetryAfter time.Duration // from Retry-After or rate limit headers
Err error // underlying SDK error
}
func (e *ProviderError) Error() string {
if e.Err != nil {
return fmt.Sprintf("%s %s (%d): %s: %v", e.Provider, e.Kind, e.StatusCode, e.Message, e.Err)
}
return fmt.Sprintf("%s %s (%d): %s", e.Provider, e.Kind, e.StatusCode, e.Message)
}
func (e *ProviderError) Unwrap() error {
return e.Err
}
// nonRetryable500Substrings lists error messages from servers (e.g. llama.cpp)
// that return 500 for deterministic client-side failures. These should not be
// retried because the same request will always produce the same error.
var nonRetryable500Substrings = []string{
"Failed to parse tool call", // llama.cpp: model output invalid tool call JSON
"failed to parse tool call", // lowercase variant
"tool_call_error", // some servers use this error type
"invalid_tool_call", // OpenAI-compat servers
}
// ClassifyHTTPError classifies an HTTP error using both status code and the
// error message. This catches deterministic 500s (e.g. llama.cpp tool parse
// failures) that should not be retried.
func ClassifyHTTPError(status int, message string) (ErrorKind, bool) {
if status == 500 && message != "" {
lower := strings.ToLower(message)
for _, substr := range nonRetryable500Substrings {
if strings.Contains(lower, strings.ToLower(substr)) {
return ErrBadRequest, false
}
}
}
return ClassifyHTTPStatus(status)
}
// ClassifyHTTPStatus returns the ErrorKind and retryability for an HTTP status code.
func ClassifyHTTPStatus(status int) (ErrorKind, bool) {
switch status {
case 401, 403:
return ErrAuth, false
case 400:
return ErrBadRequest, false
case 404:
return ErrNotFound, false
case 429, 529:
return ErrTransient, true
case 500, 502, 503:
return ErrTransient, true
case 504:
return ErrOverloaded, true
default:
if status >= 500 {
return ErrTransient, true
}
return ErrBadRequest, false
}
}