Stop retrying llama.cpp 500s that are deterministic tool-parse failures by inspecting the error message body (ClassifyHTTPError). Wrap OpenAI SDK errors as ProviderError so the engine's retry logic classifies them. Add localInitPrompt for local models that uses sequential fs_* calls instead of spawn_elfs (which local models can't produce reliably).
106 lines
3.0 KiB
Go
106 lines
3.0 KiB
Go
package provider
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
// ErrorKind classifies provider errors for retry decisions.
|
|
type ErrorKind int
|
|
|
|
const (
|
|
ErrTransient ErrorKind = iota + 1 // 429, 500, 502, 503, 529 — retry with backoff
|
|
ErrAuth // 401, 403 — don't retry
|
|
ErrBadRequest // 400 — don't retry, fix request
|
|
ErrNotFound // 404 — model/endpoint not found
|
|
ErrOverloaded // capacity exhausted — backoff + retry
|
|
)
|
|
|
|
func (k ErrorKind) String() string {
|
|
switch k {
|
|
case ErrTransient:
|
|
return "transient"
|
|
case ErrAuth:
|
|
return "auth"
|
|
case ErrBadRequest:
|
|
return "bad_request"
|
|
case ErrNotFound:
|
|
return "not_found"
|
|
case ErrOverloaded:
|
|
return "overloaded"
|
|
default:
|
|
return fmt.Sprintf("unknown(%d)", k)
|
|
}
|
|
}
|
|
|
|
// ProviderError wraps an SDK error with classification metadata.
|
|
type ProviderError struct {
|
|
Kind ErrorKind
|
|
Provider string
|
|
StatusCode int
|
|
Message string
|
|
Retryable bool
|
|
RetryAfter time.Duration // from Retry-After or rate limit headers
|
|
Err error // underlying SDK error
|
|
}
|
|
|
|
func (e *ProviderError) Error() string {
|
|
if e.Err != nil {
|
|
return fmt.Sprintf("%s %s (%d): %s: %v", e.Provider, e.Kind, e.StatusCode, e.Message, e.Err)
|
|
}
|
|
return fmt.Sprintf("%s %s (%d): %s", e.Provider, e.Kind, e.StatusCode, e.Message)
|
|
}
|
|
|
|
func (e *ProviderError) Unwrap() error {
|
|
return e.Err
|
|
}
|
|
|
|
// nonRetryable500Substrings lists error messages from servers (e.g. llama.cpp)
|
|
// that return 500 for deterministic client-side failures. These should not be
|
|
// retried because the same request will always produce the same error.
|
|
var nonRetryable500Substrings = []string{
|
|
"Failed to parse tool call", // llama.cpp: model output invalid tool call JSON
|
|
"failed to parse tool call", // lowercase variant
|
|
"tool_call_error", // some servers use this error type
|
|
"invalid_tool_call", // OpenAI-compat servers
|
|
}
|
|
|
|
// ClassifyHTTPError classifies an HTTP error using both status code and the
|
|
// error message. This catches deterministic 500s (e.g. llama.cpp tool parse
|
|
// failures) that should not be retried.
|
|
func ClassifyHTTPError(status int, message string) (ErrorKind, bool) {
|
|
if status == 500 && message != "" {
|
|
lower := strings.ToLower(message)
|
|
for _, substr := range nonRetryable500Substrings {
|
|
if strings.Contains(lower, strings.ToLower(substr)) {
|
|
return ErrBadRequest, false
|
|
}
|
|
}
|
|
}
|
|
return ClassifyHTTPStatus(status)
|
|
}
|
|
|
|
// ClassifyHTTPStatus returns the ErrorKind and retryability for an HTTP status code.
|
|
func ClassifyHTTPStatus(status int) (ErrorKind, bool) {
|
|
switch {
|
|
case status == 401 || status == 403:
|
|
return ErrAuth, false
|
|
case status == 400:
|
|
return ErrBadRequest, false
|
|
case status == 404:
|
|
return ErrNotFound, false
|
|
case status == 429 || status == 529:
|
|
return ErrTransient, true
|
|
case status == 500 || status == 502 || status == 503:
|
|
return ErrTransient, true
|
|
case status == 504:
|
|
return ErrOverloaded, true
|
|
default:
|
|
if status >= 500 {
|
|
return ErrTransient, true
|
|
}
|
|
return ErrBadRequest, false
|
|
}
|
|
}
|