99fa0ff08e
Bump hard-coded provider defaults to the May 2026 lineup: - Anthropic: claude-sonnet-4-6 (default); Opus 4.7 and Haiku 4.5 in the fallback list. 4.6/4.7 generation has 1M context standard. - OpenAI: gpt-5.5 (default); 5.5-pro / 5.2 / 5.2-chat-latest in fallback. ThinkingModes now baseline on GPT-5.x. - Google: gemini-3.5-flash (default); 3.1 Pro / Flash Lite in fallback. - Mistral: mistral-large-latest unchanged (Mistral Large 3); add mistral-medium-3.5, mistral-medium-2511, mistral-large-2512 to the rate-limit map. Legacy dated IDs retained in fallback lists and ratelimits maps so configs pinned to claude-sonnet-4-20250514 / gpt-4o / gemini-2.5-flash keep resolving. Capability tables (ContextWindow, MaxOutput, ThinkingModes) updated to match each generation. CLI help text in cmd/gnoma/main.go also updated.
178 lines
6.3 KiB
Go
178 lines
6.3 KiB
Go
package provider
|
|
|
|
import "math"
|
|
|
|
// RateLimits describes the rate limits for a provider+model pair.
|
|
// Zero values mean "no limit" or "unknown".
|
|
type RateLimits struct {
|
|
RPS float64 // requests per second (Mistral global)
|
|
RPM int // requests per minute
|
|
RPD int // requests per day
|
|
TPM int // tokens per minute (combined input+output)
|
|
ITPM int // input tokens per minute (Anthropic)
|
|
OTPM int // output tokens per minute (Anthropic)
|
|
TokensMonth int64 // tokens per month
|
|
SpendCap float64 // monthly spend cap in provider currency
|
|
}
|
|
|
|
// MaxConcurrent returns the maximum number of concurrent in-flight requests
|
|
// that this rate limit allows. Returns 0 when there is no meaningful concurrency
|
|
// constraint (provider has high or unknown limits).
|
|
func (rl RateLimits) MaxConcurrent() int {
|
|
if rl.RPS > 0 {
|
|
n := int(math.Ceil(rl.RPS))
|
|
if n < 1 {
|
|
n = 1
|
|
}
|
|
return n
|
|
}
|
|
if rl.RPM > 0 {
|
|
// Allow 1 concurrent slot per 30 RPM (conservative heuristic).
|
|
n := rl.RPM / 30
|
|
if n < 1 {
|
|
n = 1
|
|
}
|
|
if n > 16 {
|
|
n = 16
|
|
}
|
|
return n
|
|
}
|
|
return 0
|
|
}
|
|
|
|
// ProviderDefaults holds default rate limits keyed by model glob.
|
|
// The special key "*" matches any model not explicitly listed.
|
|
type ProviderDefaults struct {
|
|
Provider string
|
|
Tier string // "free", "tier1", "tier2", etc.
|
|
Models map[string]RateLimits
|
|
}
|
|
|
|
// DefaultRateLimits returns conservative defaults for known providers.
|
|
// These are "starter tier" limits — users should override via config.
|
|
func DefaultRateLimits(providerName string) ProviderDefaults {
|
|
switch providerName {
|
|
case "mistral":
|
|
return mistralDefaults()
|
|
case "anthropic":
|
|
return anthropicDefaults()
|
|
case "openai":
|
|
return openaiDefaults()
|
|
case "google":
|
|
return googleDefaults()
|
|
default:
|
|
return ProviderDefaults{Provider: providerName}
|
|
}
|
|
}
|
|
|
|
// LookupModel finds rate limits for a specific model, falling back to "*".
|
|
func (pd ProviderDefaults) LookupModel(model string) (RateLimits, bool) {
|
|
if rl, ok := pd.Models[model]; ok {
|
|
return rl, true
|
|
}
|
|
if rl, ok := pd.Models["*"]; ok {
|
|
return rl, true
|
|
}
|
|
return RateLimits{}, false
|
|
}
|
|
|
|
func mistralDefaults() ProviderDefaults {
|
|
// Starter tier from Mistral dashboard. Spend cap is variable — not hardcoded.
|
|
base := RateLimits{RPS: 1, TPM: 50_000, TokensMonth: 4_000_000}
|
|
return ProviderDefaults{
|
|
Provider: "mistral",
|
|
Tier: "starter",
|
|
Models: map[string]RateLimits{
|
|
"*": base,
|
|
// Mistral 3 (released Dec 2025) — flagship.
|
|
"mistral-large-3": {RPS: 1, TPM: 600_000, TokensMonth: 200_000_000_000},
|
|
"mistral-large-2512": {RPS: 1, TPM: 600_000, TokensMonth: 200_000_000_000},
|
|
"mistral-large-latest": {RPS: 1, TPM: 50_000, TokensMonth: 4_000_000},
|
|
"mistral-medium-3.5": {RPS: 1, TPM: 375_000},
|
|
"mistral-medium-2511": {RPS: 1, TPM: 375_000},
|
|
// Magistral models get higher limits
|
|
"magistral-medium-2509": {RPS: 1, TPM: 75_000, TokensMonth: 1_000_000_000},
|
|
"magistral-small-2509": {RPS: 1, TPM: 75_000, TokensMonth: 1_000_000_000},
|
|
// Older Large/medium
|
|
"mistral-large-2411": {RPS: 1, TPM: 600_000, TokensMonth: 200_000_000_000},
|
|
"mistral-medium-2505": {RPS: 1, TPM: 375_000},
|
|
"mistral-medium-2508": {RPS: 1, TPM: 375_000},
|
|
"mistral-small-2603": {RPS: 1, TPM: 375_000},
|
|
// Codestral
|
|
"codestral-2508": {RPS: 1, TPM: 50_000, TokensMonth: 4_000_000},
|
|
// Pixtral
|
|
"pixtral-large-2411": {RPS: 1, TPM: 50_000, TokensMonth: 4_000_000},
|
|
},
|
|
}
|
|
}
|
|
|
|
func anthropicDefaults() ProviderDefaults {
|
|
// Tier 1 (lowest paid tier, $5 deposit). Users on higher tiers override via config.
|
|
return ProviderDefaults{
|
|
Provider: "anthropic",
|
|
Tier: "tier1",
|
|
Models: map[string]RateLimits{
|
|
"*": {RPM: 50, ITPM: 30_000, OTPM: 8_000},
|
|
// Claude 4.6 / 4.7 generation — dateless IDs.
|
|
"claude-opus-4-7": {RPM: 50, ITPM: 30_000, OTPM: 8_000},
|
|
"claude-opus-4-6": {RPM: 50, ITPM: 30_000, OTPM: 8_000},
|
|
"claude-sonnet-4-6": {RPM: 50, ITPM: 30_000, OTPM: 8_000},
|
|
"claude-haiku-4-5": {RPM: 50, ITPM: 50_000, OTPM: 10_000},
|
|
"claude-haiku-4-5-20251001": {RPM: 50, ITPM: 50_000, OTPM: 10_000},
|
|
// Legacy dated 4.0 IDs.
|
|
"claude-opus-4-20250514": {RPM: 50, ITPM: 30_000, OTPM: 8_000},
|
|
"claude-opus-4-0": {RPM: 50, ITPM: 30_000, OTPM: 8_000},
|
|
"claude-sonnet-4-20250514": {RPM: 50, ITPM: 30_000, OTPM: 8_000},
|
|
"claude-sonnet-4-0": {RPM: 50, ITPM: 30_000, OTPM: 8_000},
|
|
"claude-3-5-haiku-20241022": {RPM: 50, ITPM: 50_000, OTPM: 10_000},
|
|
},
|
|
}
|
|
}
|
|
|
|
func openaiDefaults() ProviderDefaults {
|
|
// Tier 1 ($5 paid). Higher tiers have dramatically higher limits.
|
|
return ProviderDefaults{
|
|
Provider: "openai",
|
|
Tier: "tier1",
|
|
Models: map[string]RateLimits{
|
|
"*": {RPM: 500, TPM: 30_000, RPD: 10_000},
|
|
// GPT-5.5 generation.
|
|
"gpt-5.5": {RPM: 500, TPM: 30_000, RPD: 10_000},
|
|
"gpt-5.5-pro": {RPM: 500, TPM: 30_000, RPD: 10_000},
|
|
"gpt-5.5-2026-04-23": {RPM: 500, TPM: 30_000, RPD: 10_000},
|
|
// GPT-5.2 generation.
|
|
"gpt-5.2": {RPM: 500, TPM: 200_000, RPD: 10_000},
|
|
"gpt-5.2-chat-latest": {RPM: 500, TPM: 200_000, RPD: 10_000},
|
|
// Legacy.
|
|
"gpt-4o": {RPM: 500, TPM: 30_000, RPD: 10_000},
|
|
"gpt-4o-mini": {RPM: 500, TPM: 200_000, RPD: 10_000},
|
|
"o1": {RPM: 500, TPM: 30_000},
|
|
"o3": {RPM: 500, TPM: 30_000},
|
|
"o3-mini": {RPM: 500, TPM: 200_000},
|
|
"o4-mini": {RPM: 500, TPM: 200_000},
|
|
},
|
|
}
|
|
}
|
|
|
|
func googleDefaults() ProviderDefaults {
|
|
// Free tier. Pay-as-you-go Tier 1 is significantly higher.
|
|
return ProviderDefaults{
|
|
Provider: "google",
|
|
Tier: "free",
|
|
Models: map[string]RateLimits{
|
|
"*": {RPM: 15, TPM: 250_000, RPD: 250},
|
|
// Gemini 3.x generation.
|
|
"gemini-3.1-pro-preview": {RPM: 5, TPM: 250_000, RPD: 100},
|
|
"gemini-3.5-flash": {RPM: 15, TPM: 250_000, RPD: 250},
|
|
"gemini-3.1-flash-lite": {RPM: 15, TPM: 250_000, RPD: 250},
|
|
"gemini-3.1-flash-image-preview": {RPM: 15, TPM: 250_000, RPD: 250},
|
|
// Legacy.
|
|
"gemini-2.5-pro": {RPM: 5, TPM: 250_000, RPD: 100},
|
|
"gemini-2.5-pro-preview-05-06": {RPM: 5, TPM: 250_000, RPD: 100},
|
|
"gemini-2.5-flash": {RPM: 15, TPM: 250_000, RPD: 250},
|
|
"gemini-2.5-flash-preview-04-17": {RPM: 15, TPM: 250_000, RPD: 250},
|
|
"gemini-2.0-flash": {RPM: 10, RPD: 1_500},
|
|
},
|
|
}
|
|
}
|