Files
mistral-go-sdk/retry_test.go
vikingowl 9778dd6a8e feat: Phase 5 retry + resilience — exponential backoff, jitter, Retry-After
Add retry logic to all HTTP methods:
- doRetry() core loop with configurable max retries
- Exponential backoff with 0.5-1.5x jitter
- Retry-After header support (seconds and HTTP-date)
- Retry on 429 and 5xx; no retry on 4xx client errors
- Context cancellation respected during retry delays
- Multipart uploads also retry via doRetry()
- 9 new tests: 429/500 recovery, exhaustion, no-retry-on-400, backoff math
2026-03-05 19:44:32 +01:00

265 lines
7.4 KiB
Go

package mistral
import (
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"sync/atomic"
"testing"
"time"
"somegit.dev/vikingowl/mistral-go-sdk/chat"
)
func TestRetry_429ThenSuccess(t *testing.T) {
var attempts atomic.Int32
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
n := attempts.Add(1)
if n <= 2 {
w.WriteHeader(http.StatusTooManyRequests)
json.NewEncoder(w).Encode(map[string]any{"message": "rate limited"})
return
}
json.NewEncoder(w).Encode(map[string]any{
"id": "ok", "object": "chat.completion",
"model": "m", "created": 0,
"choices": []map[string]any{{
"index": 0, "message": map[string]any{"role": "assistant", "content": "success"},
"finish_reason": "stop",
}},
"usage": map[string]any{"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
})
}))
defer server.Close()
client := NewClient("key",
WithBaseURL(server.URL),
WithRetry(3, 1*time.Millisecond),
)
resp, err := client.ChatComplete(context.Background(), &chat.CompletionRequest{
Model: "m",
Messages: []chat.Message{&chat.UserMessage{Content: chat.TextContent("Hi")}},
})
if err != nil {
t.Fatal(err)
}
if resp.Choices[0].Message.Content.String() != "success" {
t.Errorf("got %q", resp.Choices[0].Message.Content.String())
}
if attempts.Load() != 3 {
t.Errorf("expected 3 attempts, got %d", attempts.Load())
}
}
func TestRetry_500ThenSuccess(t *testing.T) {
var attempts atomic.Int32
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
n := attempts.Add(1)
if n == 1 {
w.WriteHeader(http.StatusInternalServerError)
json.NewEncoder(w).Encode(map[string]any{"message": "server error"})
return
}
json.NewEncoder(w).Encode(map[string]any{
"id": "ok", "object": "chat.completion",
"model": "m", "created": 0,
"choices": []map[string]any{{
"index": 0, "message": map[string]any{"role": "assistant", "content": "ok"},
"finish_reason": "stop",
}},
"usage": map[string]any{"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
})
}))
defer server.Close()
client := NewClient("key",
WithBaseURL(server.URL),
WithRetry(2, 1*time.Millisecond),
)
resp, err := client.ChatComplete(context.Background(), &chat.CompletionRequest{
Model: "m",
Messages: []chat.Message{&chat.UserMessage{Content: chat.TextContent("Hi")}},
})
if err != nil {
t.Fatal(err)
}
if resp.Choices[0].Message.Content.String() != "ok" {
t.Errorf("got %q", resp.Choices[0].Message.Content.String())
}
if attempts.Load() != 2 {
t.Errorf("expected 2 attempts, got %d", attempts.Load())
}
}
func TestRetry_NoRetryOn400(t *testing.T) {
var attempts atomic.Int32
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
attempts.Add(1)
w.WriteHeader(http.StatusBadRequest)
json.NewEncoder(w).Encode(map[string]any{"message": "bad request"})
}))
defer server.Close()
client := NewClient("key",
WithBaseURL(server.URL),
WithRetry(3, 1*time.Millisecond),
)
_, err := client.ChatComplete(context.Background(), &chat.CompletionRequest{
Model: "m",
Messages: []chat.Message{&chat.UserMessage{Content: chat.TextContent("Hi")}},
})
if err == nil {
t.Fatal("expected error")
}
if attempts.Load() != 1 {
t.Errorf("expected 1 attempt (no retry on 400), got %d", attempts.Load())
}
}
func TestRetry_ExhaustedRetries(t *testing.T) {
var attempts atomic.Int32
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
attempts.Add(1)
w.WriteHeader(http.StatusTooManyRequests)
json.NewEncoder(w).Encode(map[string]any{"message": "rate limited"})
}))
defer server.Close()
client := NewClient("key",
WithBaseURL(server.URL),
WithRetry(2, 1*time.Millisecond),
)
_, err := client.ChatComplete(context.Background(), &chat.CompletionRequest{
Model: "m",
Messages: []chat.Message{&chat.UserMessage{Content: chat.TextContent("Hi")}},
})
if err == nil {
t.Fatal("expected error after exhausting retries")
}
if !IsRateLimit(err) {
t.Errorf("expected rate limit error, got: %v", err)
}
if attempts.Load() != 3 {
t.Errorf("expected 3 attempts (1 + 2 retries), got %d", attempts.Load())
}
}
func TestRetry_NoRetryByDefault(t *testing.T) {
var attempts atomic.Int32
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
attempts.Add(1)
w.WriteHeader(http.StatusTooManyRequests)
json.NewEncoder(w).Encode(map[string]any{"message": "rate limited"})
}))
defer server.Close()
client := NewClient("key", WithBaseURL(server.URL))
_, err := client.ChatComplete(context.Background(), &chat.CompletionRequest{
Model: "m",
Messages: []chat.Message{&chat.UserMessage{Content: chat.TextContent("Hi")}},
})
if err == nil {
t.Fatal("expected error")
}
if attempts.Load() != 1 {
t.Errorf("expected 1 attempt (no retries configured), got %d", attempts.Load())
}
}
func TestRetry_RetryAfterHeader(t *testing.T) {
var attempts atomic.Int32
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
n := attempts.Add(1)
if n == 1 {
w.Header().Set("Retry-After", "0")
w.WriteHeader(http.StatusTooManyRequests)
json.NewEncoder(w).Encode(map[string]any{"message": "rate limited"})
return
}
json.NewEncoder(w).Encode(map[string]any{
"id": "ok", "object": "chat.completion",
"model": "m", "created": 0,
"choices": []map[string]any{{
"index": 0, "message": map[string]any{"role": "assistant", "content": "ok"},
"finish_reason": "stop",
}},
"usage": map[string]any{"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
})
}))
defer server.Close()
client := NewClient("key",
WithBaseURL(server.URL),
WithRetry(1, 1*time.Millisecond),
)
resp, err := client.ChatComplete(context.Background(), &chat.CompletionRequest{
Model: "m",
Messages: []chat.Message{&chat.UserMessage{Content: chat.TextContent("Hi")}},
})
if err != nil {
t.Fatal(err)
}
if resp.ID != "ok" {
t.Errorf("got id %q", resp.ID)
}
}
func TestRetry_ContextCanceled(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusTooManyRequests)
json.NewEncoder(w).Encode(map[string]any{"message": "rate limited"})
}))
defer server.Close()
client := NewClient("key",
WithBaseURL(server.URL),
WithRetry(10, 10*time.Second),
)
ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond)
defer cancel()
_, err := client.ChatComplete(ctx, &chat.CompletionRequest{
Model: "m",
Messages: []chat.Message{&chat.UserMessage{Content: chat.TextContent("Hi")}},
})
if err == nil {
t.Fatal("expected error")
}
}
func TestBackoff(t *testing.T) {
c := &Client{retryDelay: 100 * time.Millisecond}
for i := 1; i <= 5; i++ {
d := c.backoff(i)
base := 100 * time.Millisecond * (1 << uint(i-1))
minD := time.Duration(float64(base) * 0.5)
maxD := time.Duration(float64(base) * 1.5)
if d < minD || d > maxD {
t.Errorf("attempt %d: backoff %v not in [%v, %v]", i, d, minD, maxD)
}
}
}
func TestShouldRetry(t *testing.T) {
tests := []struct {
code int
want bool
}{
{200, false},
{400, false},
{401, false},
{404, false},
{429, true},
{500, true},
{502, true},
{503, true},
}
for _, tt := range tests {
if got := shouldRetry(tt.code); got != tt.want {
t.Errorf("shouldRetry(%d) = %v, want %v", tt.code, got, tt.want)
}
}
}