ec9433d783
Brings the project to a clean `make lint` baseline (0 issues). Mechanical: - Wrap deferred resp.Body.Close() in closures (router/discovery.go, router/probe.go) so the unchecked return surfaces as `_ = ...`. - Apply `_ = ...` (single or multi-return blank) to test-file calls that intentionally ignore errors: os.MkdirAll / os.WriteFile / os.Chdir in setup paths, Close / Shutdown in teardown, Submit / Spawn / Send / LoadDir in tests that assert on side effects. Structural: - engine.handleRequestTooLarge drops the unused req parameter and rebuilds the request from compacted history (SA4009 — argument was overwritten before first use). - provider.ClassifyHTTPStatus and google.applyCapabilityOverrides switch to tagged switches over the discriminator (QF1002). - tui.app.go MouseWheel + inputMode and cmd/gnoma main slm-status use tagged switches in place of equality chains (QF1003). - cmd/gnoma main.go merges a var decl with its immediate assignment (S1021). - Three empty-branch sites (dispatcher_test, loader_test, coordinator_test) become real assertions or get the dead `if` removed (SA9003).
106 lines
2.9 KiB
Go
106 lines
2.9 KiB
Go
package provider
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
// ErrorKind classifies provider errors for retry decisions.
|
|
type ErrorKind int
|
|
|
|
const (
|
|
ErrTransient ErrorKind = iota + 1 // 429, 500, 502, 503, 529 — retry with backoff
|
|
ErrAuth // 401, 403 — don't retry
|
|
ErrBadRequest // 400 — don't retry, fix request
|
|
ErrNotFound // 404 — model/endpoint not found
|
|
ErrOverloaded // capacity exhausted — backoff + retry
|
|
)
|
|
|
|
func (k ErrorKind) String() string {
|
|
switch k {
|
|
case ErrTransient:
|
|
return "transient"
|
|
case ErrAuth:
|
|
return "auth"
|
|
case ErrBadRequest:
|
|
return "bad_request"
|
|
case ErrNotFound:
|
|
return "not_found"
|
|
case ErrOverloaded:
|
|
return "overloaded"
|
|
default:
|
|
return fmt.Sprintf("unknown(%d)", k)
|
|
}
|
|
}
|
|
|
|
// ProviderError wraps an SDK error with classification metadata.
|
|
type ProviderError struct {
|
|
Kind ErrorKind
|
|
Provider string
|
|
StatusCode int
|
|
Message string
|
|
Retryable bool
|
|
RetryAfter time.Duration // from Retry-After or rate limit headers
|
|
Err error // underlying SDK error
|
|
}
|
|
|
|
func (e *ProviderError) Error() string {
|
|
if e.Err != nil {
|
|
return fmt.Sprintf("%s %s (%d): %s: %v", e.Provider, e.Kind, e.StatusCode, e.Message, e.Err)
|
|
}
|
|
return fmt.Sprintf("%s %s (%d): %s", e.Provider, e.Kind, e.StatusCode, e.Message)
|
|
}
|
|
|
|
func (e *ProviderError) Unwrap() error {
|
|
return e.Err
|
|
}
|
|
|
|
// nonRetryable500Substrings lists error messages from servers (e.g. llama.cpp)
|
|
// that return 500 for deterministic client-side failures. These should not be
|
|
// retried because the same request will always produce the same error.
|
|
var nonRetryable500Substrings = []string{
|
|
"Failed to parse tool call", // llama.cpp: model output invalid tool call JSON
|
|
"failed to parse tool call", // lowercase variant
|
|
"tool_call_error", // some servers use this error type
|
|
"invalid_tool_call", // OpenAI-compat servers
|
|
}
|
|
|
|
// ClassifyHTTPError classifies an HTTP error using both status code and the
|
|
// error message. This catches deterministic 500s (e.g. llama.cpp tool parse
|
|
// failures) that should not be retried.
|
|
func ClassifyHTTPError(status int, message string) (ErrorKind, bool) {
|
|
if status == 500 && message != "" {
|
|
lower := strings.ToLower(message)
|
|
for _, substr := range nonRetryable500Substrings {
|
|
if strings.Contains(lower, strings.ToLower(substr)) {
|
|
return ErrBadRequest, false
|
|
}
|
|
}
|
|
}
|
|
return ClassifyHTTPStatus(status)
|
|
}
|
|
|
|
// ClassifyHTTPStatus returns the ErrorKind and retryability for an HTTP status code.
|
|
func ClassifyHTTPStatus(status int) (ErrorKind, bool) {
|
|
switch status {
|
|
case 401, 403:
|
|
return ErrAuth, false
|
|
case 400:
|
|
return ErrBadRequest, false
|
|
case 404:
|
|
return ErrNotFound, false
|
|
case 429, 529:
|
|
return ErrTransient, true
|
|
case 500, 502, 503:
|
|
return ErrTransient, true
|
|
case 504:
|
|
return ErrOverloaded, true
|
|
default:
|
|
if status >= 500 {
|
|
return ErrTransient, true
|
|
}
|
|
return ErrBadRequest, false
|
|
}
|
|
}
|