feat: Phase 5 retry + resilience — exponential backoff, jitter, Retry-After

Add retry logic to all HTTP methods:
- doRetry() core loop with configurable max retries
- Exponential backoff with 0.5-1.5x jitter
- Retry-After header support (seconds and HTTP-date)
- Retry on 429 and 5xx; no retry on 4xx client errors
- Context cancellation respected during retry delays
- Multipart uploads also retry via doRetry()
- 9 new tests: 429/500 recovery, exhaustion, no-retry-on-400, backoff math
This commit is contained in:
2026-03-05 19:44:32 +01:00
parent 6c22c9810b
commit 2fecdbc2cc
2 changed files with 386 additions and 21 deletions

View File

@@ -6,25 +6,90 @@ import (
"encoding/json"
"fmt"
"io"
"math/rand/v2"
"mime/multipart"
"net/http"
"strconv"
"time"
)
// doRetry executes an HTTP request with retry logic.
// buildReq is called on each attempt to create a fresh request.
func (c *Client) doRetry(ctx context.Context, buildReq func() (*http.Request, error)) (*http.Response, error) {
maxAttempts := 1 + c.maxRetries
var lastErr error
var lastResp *http.Response
for attempt := range maxAttempts {
if attempt > 0 {
delay := c.backoff(attempt)
if lastResp != nil {
if ra := retryAfterDelay(lastResp); ra > delay {
delay = ra
}
}
select {
case <-ctx.Done():
return nil, ctx.Err()
case <-time.After(delay):
}
}
req, err := buildReq()
if err != nil {
return nil, fmt.Errorf("mistral: create request: %w", err)
}
resp, err := c.httpClient.Do(req)
if err != nil {
lastErr = fmt.Errorf("mistral: send request: %w", err)
if attempt < maxAttempts-1 {
continue
}
return nil, lastErr
}
if !shouldRetry(resp.StatusCode) || attempt >= maxAttempts-1 {
return resp, nil
}
io.Copy(io.Discard, resp.Body)
resp.Body.Close()
lastResp = resp
}
if lastErr != nil {
return nil, lastErr
}
return lastResp, nil
}
func (c *Client) do(ctx context.Context, method, path string, body io.Reader) (*http.Response, error) {
req, err := http.NewRequestWithContext(ctx, method, c.baseURL+path, body)
if err != nil {
return nil, fmt.Errorf("mistral: create request: %w", err)
}
req.Header.Set("Authorization", "Bearer "+c.apiKey)
req.Header.Set("Accept", "application/json")
var bodyBytes []byte
if body != nil {
req.Header.Set("Content-Type", "application/json")
var err error
bodyBytes, err = io.ReadAll(body)
if err != nil {
return nil, fmt.Errorf("mistral: buffer request body: %w", err)
}
}
resp, err := c.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("mistral: send request: %w", err)
}
return resp, nil
return c.doRetry(ctx, func() (*http.Request, error) {
var br io.Reader
if bodyBytes != nil {
br = bytes.NewReader(bodyBytes)
}
req, err := http.NewRequestWithContext(ctx, method, c.baseURL+path, br)
if err != nil {
return nil, err
}
req.Header.Set("Authorization", "Bearer "+c.apiKey)
req.Header.Set("Accept", "application/json")
if bodyBytes != nil {
req.Header.Set("Content-Type", "application/json")
}
return req, nil
})
}
func (c *Client) doJSON(ctx context.Context, method, path string, reqBody, respBody any) error {
@@ -91,17 +156,21 @@ func (c *Client) doMultipart(ctx context.Context, path string, filename string,
return fmt.Errorf("mistral: close multipart: %w", err)
}
req, err := http.NewRequestWithContext(ctx, "POST", c.baseURL+path, &buf)
if err != nil {
return fmt.Errorf("mistral: create request: %w", err)
}
req.Header.Set("Authorization", "Bearer "+c.apiKey)
req.Header.Set("Accept", "application/json")
req.Header.Set("Content-Type", w.FormDataContentType())
bodyBytes := buf.Bytes()
ct := w.FormDataContentType()
resp, err := c.httpClient.Do(req)
resp, err := c.doRetry(ctx, func() (*http.Request, error) {
req, err := http.NewRequestWithContext(ctx, "POST", c.baseURL+path, bytes.NewReader(bodyBytes))
if err != nil {
return nil, err
}
req.Header.Set("Authorization", "Bearer "+c.apiKey)
req.Header.Set("Accept", "application/json")
req.Header.Set("Content-Type", ct)
return req, nil
})
if err != nil {
return fmt.Errorf("mistral: send request: %w", err)
return err
}
defer resp.Body.Close()
if resp.StatusCode >= 400 {
@@ -115,6 +184,38 @@ func (c *Client) doMultipart(ctx context.Context, path string, filename string,
return nil
}
// backoff computes the retry delay with exponential backoff and jitter.
func (c *Client) backoff(attempt int) time.Duration {
if c.retryDelay <= 0 {
return 0
}
delay := c.retryDelay * (1 << uint(attempt-1))
jitter := 0.5 + rand.Float64() // 0.51.5x
return time.Duration(float64(delay) * jitter)
}
// shouldRetry returns true if the status code is retryable.
func shouldRetry(statusCode int) bool {
return statusCode == http.StatusTooManyRequests || statusCode >= 500
}
// retryAfterDelay parses the Retry-After header.
func retryAfterDelay(resp *http.Response) time.Duration {
header := resp.Header.Get("Retry-After")
if header == "" {
return 0
}
if secs, err := strconv.Atoi(header); err == nil {
return time.Duration(secs) * time.Second
}
if t, err := http.ParseTime(header); err == nil {
if d := time.Until(t); d > 0 {
return d
}
}
return 0
}
func parseAPIError(resp *http.Response) error {
body, err := io.ReadAll(resp.Body)
if err != nil {

264
retry_test.go Normal file
View File

@@ -0,0 +1,264 @@
package mistral
import (
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"sync/atomic"
"testing"
"time"
"somegit.dev/vikingowl/mistral-go-sdk/chat"
)
func TestRetry_429ThenSuccess(t *testing.T) {
var attempts atomic.Int32
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
n := attempts.Add(1)
if n <= 2 {
w.WriteHeader(http.StatusTooManyRequests)
json.NewEncoder(w).Encode(map[string]any{"message": "rate limited"})
return
}
json.NewEncoder(w).Encode(map[string]any{
"id": "ok", "object": "chat.completion",
"model": "m", "created": 0,
"choices": []map[string]any{{
"index": 0, "message": map[string]any{"role": "assistant", "content": "success"},
"finish_reason": "stop",
}},
"usage": map[string]any{"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
})
}))
defer server.Close()
client := NewClient("key",
WithBaseURL(server.URL),
WithRetry(3, 1*time.Millisecond),
)
resp, err := client.ChatComplete(context.Background(), &chat.CompletionRequest{
Model: "m",
Messages: []chat.Message{&chat.UserMessage{Content: chat.TextContent("Hi")}},
})
if err != nil {
t.Fatal(err)
}
if resp.Choices[0].Message.Content.String() != "success" {
t.Errorf("got %q", resp.Choices[0].Message.Content.String())
}
if attempts.Load() != 3 {
t.Errorf("expected 3 attempts, got %d", attempts.Load())
}
}
func TestRetry_500ThenSuccess(t *testing.T) {
var attempts atomic.Int32
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
n := attempts.Add(1)
if n == 1 {
w.WriteHeader(http.StatusInternalServerError)
json.NewEncoder(w).Encode(map[string]any{"message": "server error"})
return
}
json.NewEncoder(w).Encode(map[string]any{
"id": "ok", "object": "chat.completion",
"model": "m", "created": 0,
"choices": []map[string]any{{
"index": 0, "message": map[string]any{"role": "assistant", "content": "ok"},
"finish_reason": "stop",
}},
"usage": map[string]any{"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
})
}))
defer server.Close()
client := NewClient("key",
WithBaseURL(server.URL),
WithRetry(2, 1*time.Millisecond),
)
resp, err := client.ChatComplete(context.Background(), &chat.CompletionRequest{
Model: "m",
Messages: []chat.Message{&chat.UserMessage{Content: chat.TextContent("Hi")}},
})
if err != nil {
t.Fatal(err)
}
if resp.Choices[0].Message.Content.String() != "ok" {
t.Errorf("got %q", resp.Choices[0].Message.Content.String())
}
if attempts.Load() != 2 {
t.Errorf("expected 2 attempts, got %d", attempts.Load())
}
}
func TestRetry_NoRetryOn400(t *testing.T) {
var attempts atomic.Int32
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
attempts.Add(1)
w.WriteHeader(http.StatusBadRequest)
json.NewEncoder(w).Encode(map[string]any{"message": "bad request"})
}))
defer server.Close()
client := NewClient("key",
WithBaseURL(server.URL),
WithRetry(3, 1*time.Millisecond),
)
_, err := client.ChatComplete(context.Background(), &chat.CompletionRequest{
Model: "m",
Messages: []chat.Message{&chat.UserMessage{Content: chat.TextContent("Hi")}},
})
if err == nil {
t.Fatal("expected error")
}
if attempts.Load() != 1 {
t.Errorf("expected 1 attempt (no retry on 400), got %d", attempts.Load())
}
}
func TestRetry_ExhaustedRetries(t *testing.T) {
var attempts atomic.Int32
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
attempts.Add(1)
w.WriteHeader(http.StatusTooManyRequests)
json.NewEncoder(w).Encode(map[string]any{"message": "rate limited"})
}))
defer server.Close()
client := NewClient("key",
WithBaseURL(server.URL),
WithRetry(2, 1*time.Millisecond),
)
_, err := client.ChatComplete(context.Background(), &chat.CompletionRequest{
Model: "m",
Messages: []chat.Message{&chat.UserMessage{Content: chat.TextContent("Hi")}},
})
if err == nil {
t.Fatal("expected error after exhausting retries")
}
if !IsRateLimit(err) {
t.Errorf("expected rate limit error, got: %v", err)
}
if attempts.Load() != 3 {
t.Errorf("expected 3 attempts (1 + 2 retries), got %d", attempts.Load())
}
}
func TestRetry_NoRetryByDefault(t *testing.T) {
var attempts atomic.Int32
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
attempts.Add(1)
w.WriteHeader(http.StatusTooManyRequests)
json.NewEncoder(w).Encode(map[string]any{"message": "rate limited"})
}))
defer server.Close()
client := NewClient("key", WithBaseURL(server.URL))
_, err := client.ChatComplete(context.Background(), &chat.CompletionRequest{
Model: "m",
Messages: []chat.Message{&chat.UserMessage{Content: chat.TextContent("Hi")}},
})
if err == nil {
t.Fatal("expected error")
}
if attempts.Load() != 1 {
t.Errorf("expected 1 attempt (no retries configured), got %d", attempts.Load())
}
}
func TestRetry_RetryAfterHeader(t *testing.T) {
var attempts atomic.Int32
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
n := attempts.Add(1)
if n == 1 {
w.Header().Set("Retry-After", "0")
w.WriteHeader(http.StatusTooManyRequests)
json.NewEncoder(w).Encode(map[string]any{"message": "rate limited"})
return
}
json.NewEncoder(w).Encode(map[string]any{
"id": "ok", "object": "chat.completion",
"model": "m", "created": 0,
"choices": []map[string]any{{
"index": 0, "message": map[string]any{"role": "assistant", "content": "ok"},
"finish_reason": "stop",
}},
"usage": map[string]any{"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
})
}))
defer server.Close()
client := NewClient("key",
WithBaseURL(server.URL),
WithRetry(1, 1*time.Millisecond),
)
resp, err := client.ChatComplete(context.Background(), &chat.CompletionRequest{
Model: "m",
Messages: []chat.Message{&chat.UserMessage{Content: chat.TextContent("Hi")}},
})
if err != nil {
t.Fatal(err)
}
if resp.ID != "ok" {
t.Errorf("got id %q", resp.ID)
}
}
func TestRetry_ContextCanceled(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusTooManyRequests)
json.NewEncoder(w).Encode(map[string]any{"message": "rate limited"})
}))
defer server.Close()
client := NewClient("key",
WithBaseURL(server.URL),
WithRetry(10, 10*time.Second),
)
ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond)
defer cancel()
_, err := client.ChatComplete(ctx, &chat.CompletionRequest{
Model: "m",
Messages: []chat.Message{&chat.UserMessage{Content: chat.TextContent("Hi")}},
})
if err == nil {
t.Fatal("expected error")
}
}
func TestBackoff(t *testing.T) {
c := &Client{retryDelay: 100 * time.Millisecond}
for i := 1; i <= 5; i++ {
d := c.backoff(i)
base := 100 * time.Millisecond * (1 << uint(i-1))
minD := time.Duration(float64(base) * 0.5)
maxD := time.Duration(float64(base) * 1.5)
if d < minD || d > maxD {
t.Errorf("attempt %d: backoff %v not in [%v, %v]", i, d, minD, maxD)
}
}
}
func TestShouldRetry(t *testing.T) {
tests := []struct {
code int
want bool
}{
{200, false},
{400, false},
{401, false},
{404, false},
{429, true},
{500, true},
{502, true},
{503, true},
}
for _, tt := range tests {
if got := shouldRetry(tt.code); got != tt.want {
t.Errorf("shouldRetry(%d) = %v, want %v", tt.code, got, tt.want)
}
}
}