2f8d4c412f
Closes R-4 and R-5 of the routing-defaults plan.
R-4: Strengths + CostWeight defaults for closed frontier models.
Cloud entries land in the same knownFamilyDefaults table as local
ones, with MaxComplexity intentionally left zero (cloud arms get
no complexity ceiling). CostWeight tuned per the plan's rationale:
claude-opus-4-7 → Planning/SecurityReview/Debug/Refactor, 0.3
claude-sonnet-4-6 → Generation/Refactor/Review, 0.7
gpt-5.5 → Planning/SecurityReview/Generation, 0.3
gpt-5.3-codex → Generation/Refactor/Debug/UnitTest, 0.6
gpt-5.2 → Orchestration/Review, 0.8
gemini-3.1-pro → Planning/Review/Orchestration, 0.5
gemini-3.5-flash → Boilerplate/Explain/Orchestration, 1.2
The 0.3 weight on frontier arms keeps them competitive on
SecurityReview / Planning despite $4+/Mtok; 1.2 on Gemini Flash
penalizes cost more so it only wins when cost is genuinely
decisive (boilerplate, explain).
Mechanism: extracted applyFamilyDefaults into defaults.go and call
it from Router.RegisterArm. Single source of truth — both local
discovery and the primary-provider path in cmd/gnoma/main.go now
flow through the same defaults application. Removed the duplicate
apply block from RegisterDiscoveredModels.
Legacy model IDs (claude-opus-4-20250514, gpt-4o, o3, gemini-2.5-pro,
etc.) intentionally do not match any table entry — keeps users on
pinned older models safe from imposed 2026 Strengths.
R-5: gpt-5.3-codex registration.
- internal/provider/openai/provider.go: added to fallbackModels
and inferOpenAIModelCapabilities (400K context, 32K output).
- internal/provider/ratelimits.go: gpt-5.3-codex and its dated
alias gpt-5.3-codex-2026-02-15 added with the same Tier 1
quotas as gpt-5.2.
Gemini 3.x (3.1-pro-preview, 3.5-flash, 3.1-flash-lite) was already
registered in both google/provider.go and ratelimits.go — no change
needed for that part of R-5.
Test coverage:
- ResolveFamilyDefaults table-driven across all 7 cloud entries
including prefix-sharing (gpt-5.5-pro → gpt-5.5 defaults,
gemini-3.1-pro-preview → gemini-3.1-pro defaults).
- Legacy IDs return !ok.
- RegisterArm applies cloud defaults end-to-end.
- User-supplied Strengths and CostWeight are not overridden.
- ID.Model() fallback works when ModelName is empty (test code
often constructs arms this way).
Refs: docs/superpowers/plans/2026-05-23-routing-defaults-refresh.md
476 lines
18 KiB
Go
476 lines
18 KiB
Go
package router
|
|
|
|
import (
|
|
"reflect"
|
|
"sort"
|
|
"testing"
|
|
|
|
"somegit.dev/Owlibou/gnoma/internal/provider"
|
|
"somegit.dev/Owlibou/gnoma/internal/security"
|
|
)
|
|
|
|
// --- parseSizeFromModelID -------------------------------------------------
|
|
|
|
func TestParseSizeFromModelID(t *testing.T) {
|
|
cases := []struct {
|
|
name string
|
|
id string
|
|
want float64
|
|
wantOK bool
|
|
}{
|
|
{"ollama colon", "qwen3:14b", 14, true},
|
|
{"ollama colon decimal", "tiny3.5:1.5b", 1.5, true},
|
|
{"ollama colon millions", "reecdev/tiny3.5:500m", 0.5, true},
|
|
{"hyphen middle", "qwen3.5-9b-glm5.1-distill-v1", 9, true},
|
|
{"moe total wins over active", "qwen3-coder:30b-a3b-q4_K_M", 30, true},
|
|
{"namespace stripped", "google/functiongemma-270m-it", 0.27, true},
|
|
{"no size tag", "phi-4", 0, false},
|
|
{"plain version no b", "qwen3.5", 0, false},
|
|
{"gemma e-tag not pure size", "gemma-4-e2b-it", 0, false},
|
|
{"starcoder digit-only family", "starcoder2", 0, false},
|
|
{"large MoE", "qwen3-coder:480b", 480, true},
|
|
}
|
|
for _, tc := range cases {
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
got, ok := parseSizeFromModelID(tc.id)
|
|
if ok != tc.wantOK {
|
|
t.Fatalf("parseSizeFromModelID(%q) ok=%v, want %v (got value %v)", tc.id, ok, tc.wantOK, got)
|
|
}
|
|
if ok && got != tc.want {
|
|
t.Errorf("parseSizeFromModelID(%q) = %v, want %v", tc.id, got, tc.want)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
// --- ResolveFamilyDefaults: longest-prefix discipline ---------------------
|
|
|
|
func TestResolveFamilyDefaults_LongestPrefixWins(t *testing.T) {
|
|
cases := []struct {
|
|
modelID string
|
|
wantFamily string // expected family key (longest matching)
|
|
}{
|
|
{"qwen3-coder:30b", "qwen3-coder"},
|
|
{"qwen3:14b", "qwen3"},
|
|
{"qwen3.5:4b", "qwen3.5"},
|
|
{"qwen3.5-9b-glm5.1-distill-v1", "qwen3.5"},
|
|
{"qwen2.5-coder:14b", "qwen2.5-coder"},
|
|
{"qwen2.5:7b", "qwen2.5"},
|
|
{"qwen-novel:7b", "qwen"},
|
|
{"mistral-small-3:24b", "mistral-small-3"},
|
|
{"mistral-7b-instruct-v0.3", "mistral"},
|
|
{"ministral-3:14b", "ministral-3"},
|
|
{"gemma4:latest", "gemma4"},
|
|
{"gemma4-e4b-uc:latest", "gemma4-e"},
|
|
{"gemma-4-e2b-it", "gemma-4-e"},
|
|
{"phi-4-mini", "phi-4-mini"},
|
|
{"phi-4:14b", "phi-4"},
|
|
{"tiny3.5:1.5b", "tiny3.5"},
|
|
{"reecdev/tiny3.5:500m", "tiny3.5"},
|
|
{"google/functiongemma-270m-it", "functiongemma"},
|
|
{"glm-ocr", "glm-ocr"},
|
|
{"glm-5.1", "glm"},
|
|
}
|
|
for _, tc := range cases {
|
|
t.Run(tc.modelID, func(t *testing.T) {
|
|
defaults, ok := ResolveFamilyDefaults(tc.modelID)
|
|
if !ok {
|
|
t.Fatalf("ResolveFamilyDefaults(%q) returned !ok", tc.modelID)
|
|
}
|
|
expected, ok := knownFamilyDefaults[tc.wantFamily]
|
|
if !ok {
|
|
t.Fatalf("test bug: %q not in knownFamilyDefaults", tc.wantFamily)
|
|
}
|
|
if !reflect.DeepEqual(defaults.Strengths, expected.Strengths) ||
|
|
defaults.MaxComplexity != expected.MaxComplexity ||
|
|
defaults.Disabled != expected.Disabled {
|
|
t.Errorf("%q resolved to wrong family — got Strengths=%v MaxComplexity=%v Disabled=%v, want family %q Strengths=%v MaxComplexity=%v Disabled=%v",
|
|
tc.modelID, defaults.Strengths, defaults.MaxComplexity, defaults.Disabled,
|
|
tc.wantFamily, expected.Strengths, expected.MaxComplexity, expected.Disabled)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestResolveFamilyDefaults_Unknown(t *testing.T) {
|
|
for _, id := range []string{
|
|
"some-novel-model:1.5b",
|
|
"falcon:7b",
|
|
"command-r:35b",
|
|
} {
|
|
if _, ok := ResolveFamilyDefaults(id); ok {
|
|
t.Errorf("ResolveFamilyDefaults(%q) should not match anything in the table", id)
|
|
}
|
|
}
|
|
}
|
|
|
|
// --- ResolveMaxComplexity: size-keyed lookup -----------------------------
|
|
|
|
func TestResolveMaxComplexity_SizeKeyed(t *testing.T) {
|
|
cases := []struct {
|
|
id string
|
|
want float64
|
|
}{
|
|
// ministral-3 ladder: 14b → 0.70, 8b → 0.55, 3b → 0.35
|
|
{"ministral-3:14b", 0.70},
|
|
{"ministral-3:8b", 0.55},
|
|
{"ministral-3:3b", 0.35},
|
|
// qwen3 ladder: 14b → 0.75, 7-13b → 0.65, <7b → 0.50
|
|
{"qwen3:14b", 0.75},
|
|
{"qwen3:7b", 0.65},
|
|
{"qwen3:4b", 0.50},
|
|
// qwen3.5 ladder: 9b → 0.65, 4-8b → 0.50, <4b → 0.40
|
|
{"qwen3.5-9b-glm5.1-distill-v1", 0.65},
|
|
{"qwen3.5:4b", 0.50},
|
|
// tiny3.5 ladder: 1.5b → 0.30, 0.5b → 0.20
|
|
{"reecdev/tiny3.5:1.5b", 0.30},
|
|
{"reecdev/tiny3.5:500m", 0.20},
|
|
// flat caps still resolve correctly
|
|
{"qwen3-coder:30b", 0.85},
|
|
{"phi-4:14b", 0.65},
|
|
{"gemma4-e4b-uc:latest", 0.45},
|
|
}
|
|
for _, tc := range cases {
|
|
t.Run(tc.id, func(t *testing.T) {
|
|
got, ok := ResolveMaxComplexity(tc.id)
|
|
if !ok {
|
|
t.Fatalf("ResolveMaxComplexity(%q) returned !ok", tc.id)
|
|
}
|
|
if got != tc.want {
|
|
t.Errorf("ResolveMaxComplexity(%q) = %v, want %v", tc.id, got, tc.want)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestResolveMaxComplexity_SizeParseFailsFallsBack(t *testing.T) {
|
|
// "qwen3" with no size tag — uses smallest SizeCap as conservative fallback.
|
|
got, ok := ResolveMaxComplexity("qwen3")
|
|
if !ok {
|
|
t.Fatal("ResolveMaxComplexity should resolve unsized qwen3 via fallback")
|
|
}
|
|
if got != 0.50 {
|
|
t.Errorf("ResolveMaxComplexity(\"qwen3\") = %v, want 0.50 (smallest SizeCap fallback)", got)
|
|
}
|
|
}
|
|
|
|
// --- Table integrity ------------------------------------------------------
|
|
|
|
// TestKnownFamilyDefaults_SizeCapsOrdered confirms SizeCaps entries are
|
|
// stored largest-first, since ResolveMaxComplexity iterates and stops at
|
|
// the first match.
|
|
func TestKnownFamilyDefaults_SizeCapsOrdered(t *testing.T) {
|
|
for key, fd := range knownFamilyDefaults {
|
|
if len(fd.SizeCaps) < 2 {
|
|
continue
|
|
}
|
|
thresholds := make([]float64, len(fd.SizeCaps))
|
|
for i, sc := range fd.SizeCaps {
|
|
thresholds[i] = sc.MinSizeB
|
|
}
|
|
sorted := append([]float64(nil), thresholds...)
|
|
sort.Sort(sort.Reverse(sort.Float64Slice(sorted)))
|
|
if !reflect.DeepEqual(thresholds, sorted) {
|
|
t.Errorf("family %q SizeCaps not ordered largest-first: %v", key, thresholds)
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestKnownFamilyDefaults_NoDualSpec confirms entries don't declare both
|
|
// SizeCaps and MaxComplexity — they're mutually exclusive in the lookup.
|
|
func TestKnownFamilyDefaults_NoDualSpec(t *testing.T) {
|
|
for key, fd := range knownFamilyDefaults {
|
|
if len(fd.SizeCaps) > 0 && fd.MaxComplexity > 0 {
|
|
t.Errorf("family %q declares both SizeCaps and MaxComplexity; pick one", key)
|
|
}
|
|
}
|
|
}
|
|
|
|
// --- Cloud defaults --------------------------------------------------------
|
|
|
|
func TestResolveFamilyDefaults_CloudArms(t *testing.T) {
|
|
cases := []struct {
|
|
modelID string
|
|
wantStrengths []TaskType
|
|
wantCostWeight float64
|
|
}{
|
|
{"claude-opus-4-7", []TaskType{TaskPlanning, TaskSecurityReview, TaskDebug, TaskRefactor}, 0.3},
|
|
{"claude-sonnet-4-6", []TaskType{TaskGeneration, TaskRefactor, TaskReview}, 0.7},
|
|
{"gpt-5.5", []TaskType{TaskPlanning, TaskSecurityReview, TaskGeneration}, 0.3},
|
|
{"gpt-5.5-pro", []TaskType{TaskPlanning, TaskSecurityReview, TaskGeneration}, 0.3}, // shares prefix with gpt-5.5
|
|
{"gpt-5.3-codex", []TaskType{TaskGeneration, TaskRefactor, TaskDebug, TaskUnitTest}, 0.6},
|
|
{"gpt-5.2", []TaskType{TaskOrchestration, TaskReview}, 0.8},
|
|
{"gpt-5.2-chat-latest", []TaskType{TaskOrchestration, TaskReview}, 0.8},
|
|
{"gemini-3.1-pro", []TaskType{TaskPlanning, TaskReview, TaskOrchestration}, 0.5},
|
|
{"gemini-3.1-pro-preview", []TaskType{TaskPlanning, TaskReview, TaskOrchestration}, 0.5},
|
|
{"gemini-3.5-flash", []TaskType{TaskBoilerplate, TaskExplain, TaskOrchestration}, 1.2},
|
|
}
|
|
for _, tc := range cases {
|
|
t.Run(tc.modelID, func(t *testing.T) {
|
|
got, ok := ResolveFamilyDefaults(tc.modelID)
|
|
if !ok {
|
|
t.Fatalf("ResolveFamilyDefaults(%q) returned !ok", tc.modelID)
|
|
}
|
|
if !reflect.DeepEqual(got.Strengths, tc.wantStrengths) {
|
|
t.Errorf("%q Strengths = %v, want %v", tc.modelID, got.Strengths, tc.wantStrengths)
|
|
}
|
|
if got.CostWeight != tc.wantCostWeight {
|
|
t.Errorf("%q CostWeight = %v, want %v", tc.modelID, got.CostWeight, tc.wantCostWeight)
|
|
}
|
|
if got.MaxComplexity != 0 {
|
|
t.Errorf("%q MaxComplexity = %v, want 0 (cloud arms have no ceiling)", tc.modelID, got.MaxComplexity)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestResolveFamilyDefaults_CloudLegacyUnaffected(t *testing.T) {
|
|
// Legacy / unrelated cloud IDs must NOT pick up defaults — keeping
|
|
// users on older pinned models safe from imposed Strengths.
|
|
noMatch := []string{
|
|
"claude-opus-4-20250514",
|
|
"claude-sonnet-4-20250514",
|
|
"claude-haiku-4-5-20251001",
|
|
"gpt-4o",
|
|
"gpt-4o-mini",
|
|
"o3",
|
|
"o3-mini",
|
|
"gemini-2.5-pro",
|
|
"gemini-2.0-flash",
|
|
}
|
|
for _, id := range noMatch {
|
|
if _, ok := ResolveFamilyDefaults(id); ok {
|
|
t.Errorf("ResolveFamilyDefaults(%q) should not match (legacy model)", id)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestRegisterArm_AppliesCloudDefaults(t *testing.T) {
|
|
r := New(Config{})
|
|
r.RegisterArm(&Arm{
|
|
ID: NewArmID("openai", "gpt-5.3-codex"),
|
|
ModelName: "gpt-5.3-codex",
|
|
Capabilities: provider.Capabilities{
|
|
ToolUse: true, JSONOutput: true,
|
|
ContextWindow: 400000,
|
|
},
|
|
})
|
|
arm, ok := r.LookupArm(NewArmID("openai", "gpt-5.3-codex"))
|
|
if !ok {
|
|
t.Fatal("gpt-5.3-codex arm should be registered")
|
|
}
|
|
wantStrengths := []TaskType{TaskGeneration, TaskRefactor, TaskDebug, TaskUnitTest}
|
|
if !reflect.DeepEqual(arm.Strengths, wantStrengths) {
|
|
t.Errorf("Strengths = %v, want %v", arm.Strengths, wantStrengths)
|
|
}
|
|
if arm.CostWeight != 0.6 {
|
|
t.Errorf("CostWeight = %v, want 0.6", arm.CostWeight)
|
|
}
|
|
if arm.MaxComplexity != 0 {
|
|
t.Errorf("MaxComplexity = %v, want 0 (cloud arm)", arm.MaxComplexity)
|
|
}
|
|
}
|
|
|
|
func TestRegisterArm_DoesNotOverrideUserStrengths(t *testing.T) {
|
|
r := New(Config{})
|
|
r.RegisterArm(&Arm{
|
|
ID: NewArmID("anthropic", "claude-opus-4-7"),
|
|
ModelName: "claude-opus-4-7",
|
|
Strengths: []TaskType{TaskUnitTest}, // user-supplied; defaults should not overwrite
|
|
CostWeight: 0.5, // user-supplied
|
|
})
|
|
arm, _ := r.LookupArm(NewArmID("anthropic", "claude-opus-4-7"))
|
|
if !reflect.DeepEqual(arm.Strengths, []TaskType{TaskUnitTest}) {
|
|
t.Errorf("user-supplied Strengths overridden by defaults: got %v", arm.Strengths)
|
|
}
|
|
if arm.CostWeight != 0.5 {
|
|
t.Errorf("user-supplied CostWeight overridden: got %v", arm.CostWeight)
|
|
}
|
|
}
|
|
|
|
func TestRegisterArm_FallsBackToIDWhenModelNameMissing(t *testing.T) {
|
|
// Some test code constructs arms with ID but no ModelName.
|
|
// applyFamilyDefaults should fall back to ID.Model() so defaults
|
|
// still flow through.
|
|
r := New(Config{})
|
|
r.RegisterArm(&Arm{
|
|
ID: NewArmID("openai", "gpt-5.3-codex"),
|
|
// ModelName intentionally empty
|
|
})
|
|
arm, _ := r.LookupArm(NewArmID("openai", "gpt-5.3-codex"))
|
|
if arm.CostWeight != 0.6 {
|
|
t.Errorf("CostWeight = %v, want 0.6 (defaults should resolve via ID.Model() fallback)", arm.CostWeight)
|
|
}
|
|
}
|
|
|
|
// --- Integration: routing-payoff scenario --------------------------------
|
|
|
|
// TestRoutingDefaults_PayoffScenario is the user-facing demonstration that
|
|
// out-of-the-box selection now picks sensibly across a realistic local
|
|
// fleet, without any [[arms]] override. Per
|
|
// docs/superpowers/plans/2026-05-23-routing-defaults-refresh.md the
|
|
// motivating goal: incognito stops feeling random.
|
|
//
|
|
// Note on Thinking capability: real phi-4 supports extended reasoning,
|
|
// but DiscoveredModel today has no SupportsThinking field — discovery
|
|
// only flips ToolUse and Vision. The selector's heuristicQuality gives
|
|
// a +0.2 bump for Thinking+Planning that would otherwise push phi-4
|
|
// over the TaskPlanning quality floor (0.60). The test mutates the arm
|
|
// after registration to reflect what the model actually supports;
|
|
// surfacing a thinking flag in discovery is tracked separately (out of
|
|
// scope for the defaults-refresh plan).
|
|
func TestRoutingDefaults_PayoffScenario(t *testing.T) {
|
|
r := New(Config{})
|
|
factory := func(name, model string) SecureProvider {
|
|
return security.WrapProvider(&stubProvider{name: name, model: model}, nil)
|
|
}
|
|
|
|
models := []DiscoveredModel{
|
|
{ID: "reecdev/tiny3.5:1.5b", Provider: "ollama", SupportsTools: true, ContextSize: 32768},
|
|
{ID: "phi-4:14b", Provider: "ollama", SupportsTools: true, ContextSize: 16384},
|
|
{ID: "qwen3-coder:30b", Provider: "ollama", SupportsTools: true, ContextSize: 262144},
|
|
}
|
|
RegisterDiscoveredModels(r, models, factory)
|
|
|
|
// Reflect phi-4's real Thinking capability — see test comment.
|
|
if arm, ok := r.LookupArm("ollama/phi-4:14b"); ok {
|
|
arm.Capabilities.ThinkingModes = []provider.EffortLevel{provider.EffortMedium}
|
|
}
|
|
|
|
cases := []struct {
|
|
name string
|
|
task Task
|
|
wantArmID ArmID
|
|
reason string
|
|
}{
|
|
{
|
|
name: "Generation picks qwen3-coder",
|
|
task: Task{Type: TaskGeneration, RequiresTools: true, ComplexityScore: 0.7, Priority: PriorityNormal, EstimatedTokens: 2000},
|
|
wantArmID: "ollama/qwen3-coder:30b",
|
|
reason: "qwen3-coder is Strengths-promoted for TaskGeneration and has the highest MaxComplexity (0.85)",
|
|
},
|
|
{
|
|
name: "Planning picks phi-4",
|
|
task: Task{Type: TaskPlanning, RequiresTools: true, ComplexityScore: 0.5, Priority: PriorityNormal, EstimatedTokens: 1500},
|
|
wantArmID: "ollama/phi-4:14b",
|
|
reason: "phi-4 is Strengths-promoted for TaskPlanning; qwen3-coder's strengths don't include Planning",
|
|
},
|
|
{
|
|
name: "Boilerplate picks tiny3.5",
|
|
task: Task{Type: TaskBoilerplate, RequiresTools: true, ComplexityScore: 0.1, Priority: PriorityLow, EstimatedTokens: 200},
|
|
wantArmID: "ollama/reecdev/tiny3.5:1.5b",
|
|
reason: "tiny3.5 Strengths include TaskBoilerplate; it's the cheapest viable arm for a trivial task",
|
|
},
|
|
}
|
|
for _, tc := range cases {
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
decision := r.Select(tc.task)
|
|
if decision.Error != nil {
|
|
t.Fatalf("Select returned error: %v", decision.Error)
|
|
}
|
|
if decision.Arm == nil {
|
|
t.Fatal("Select returned nil arm")
|
|
}
|
|
if decision.Arm.ID != tc.wantArmID {
|
|
t.Errorf("got arm %q, want %q\n reason: %s", decision.Arm.ID, tc.wantArmID, tc.reason)
|
|
}
|
|
decision.Rollback()
|
|
})
|
|
}
|
|
}
|
|
|
|
// TestRoutingDefaults_LocalFleetVisibility makes sure the maintainer's
|
|
// actual Ollama inventory all register correctly (none accidentally
|
|
// excluded by the non-chat filter, all get sensible defaults).
|
|
func TestRoutingDefaults_LocalFleetVisibility(t *testing.T) {
|
|
r := New(Config{})
|
|
factory := func(name, model string) SecureProvider {
|
|
return security.WrapProvider(&stubProvider{name: name, model: model}, nil)
|
|
}
|
|
|
|
// Models from the maintainer's `ollama ls` output (2026-05-23 session).
|
|
models := []DiscoveredModel{
|
|
{ID: "reecdev/tiny3.5:1.5b", Provider: "ollama", SupportsTools: true, ContextSize: 32768},
|
|
{ID: "reecdev/tiny3.5:500m", Provider: "ollama", ContextSize: 32768},
|
|
{ID: "ministral-3:3b", Provider: "ollama", SupportsTools: true, ContextSize: 32768},
|
|
{ID: "qwen3.5:4b", Provider: "ollama", SupportsTools: true, ContextSize: 32768},
|
|
{ID: "gemma4-e4b-uc:latest", Provider: "ollama", SupportsTools: true, ContextSize: 32768},
|
|
{ID: "gemma4:latest", Provider: "ollama", SupportsTools: true, ContextSize: 32768},
|
|
{ID: "qwen3:14b", Provider: "ollama", SupportsTools: true, ContextSize: 32768},
|
|
{ID: "devstral-small-2:24b", Provider: "ollama", SupportsTools: true, ContextSize: 131072},
|
|
{ID: "qwen2.5-coder:14b", Provider: "ollama", SupportsTools: true, ContextSize: 32768},
|
|
{ID: "embeddinggemma:latest", Provider: "ollama", ContextSize: 8192},
|
|
{ID: "functiongemma:latest", Provider: "ollama", SupportsTools: true, ContextSize: 32768},
|
|
{ID: "ministral-3:14b", Provider: "ollama", SupportsTools: true, ContextSize: 32768},
|
|
{ID: "ministral-3:8b", Provider: "ollama", SupportsTools: true, ContextSize: 32768},
|
|
}
|
|
|
|
RegisterDiscoveredModels(r, models, factory)
|
|
registered := make(map[ArmID]*Arm)
|
|
for _, a := range r.Arms() {
|
|
registered[a.ID] = a
|
|
}
|
|
|
|
// embeddinggemma must be skipped entirely.
|
|
if _, ok := registered["ollama/embeddinggemma:latest"]; ok {
|
|
t.Error("embeddinggemma should be skipped by non-chat filter")
|
|
}
|
|
|
|
// Every other model must be registered.
|
|
wantRegistered := []ArmID{
|
|
"ollama/reecdev/tiny3.5:1.5b",
|
|
"ollama/reecdev/tiny3.5:500m",
|
|
"ollama/ministral-3:3b",
|
|
"ollama/qwen3.5:4b",
|
|
"ollama/gemma4-e4b-uc:latest",
|
|
"ollama/gemma4:latest",
|
|
"ollama/qwen3:14b",
|
|
"ollama/devstral-small-2:24b",
|
|
"ollama/qwen2.5-coder:14b",
|
|
"ollama/functiongemma:latest",
|
|
"ollama/ministral-3:14b",
|
|
"ollama/ministral-3:8b",
|
|
}
|
|
for _, id := range wantRegistered {
|
|
if _, ok := registered[id]; !ok {
|
|
t.Errorf("expected %q to be registered", id)
|
|
}
|
|
}
|
|
|
|
// Spot-check that defaults flowed through to the arms.
|
|
checks := []struct {
|
|
id ArmID
|
|
wantMaxComp float64
|
|
wantDisabled bool
|
|
wantStrengths []TaskType
|
|
}{
|
|
{"ollama/qwen3-coder:30b", 0, false, nil}, // not in fleet, sanity skip
|
|
{"ollama/devstral-small-2:24b", 0.85, false, []TaskType{TaskGeneration, TaskRefactor, TaskDebug}},
|
|
{"ollama/qwen3:14b", 0.75, false, []TaskType{TaskGeneration, TaskRefactor, TaskDebug}},
|
|
{"ollama/ministral-3:14b", 0.70, false, []TaskType{TaskOrchestration, TaskPlanning}},
|
|
{"ollama/ministral-3:8b", 0.55, false, []TaskType{TaskOrchestration, TaskPlanning}},
|
|
{"ollama/ministral-3:3b", 0.35, false, []TaskType{TaskOrchestration, TaskPlanning}},
|
|
{"ollama/reecdev/tiny3.5:1.5b", 0.30, false, []TaskType{TaskBoilerplate, TaskExplain}},
|
|
{"ollama/reecdev/tiny3.5:500m", 0.20, false, []TaskType{TaskBoilerplate, TaskExplain}},
|
|
{"ollama/functiongemma:latest", 0.40, true, []TaskType{TaskOrchestration}},
|
|
{"ollama/gemma4-e4b-uc:latest", 0.45, false, []TaskType{TaskExplain, TaskBoilerplate}},
|
|
{"ollama/qwen3.5:4b", 0.50, false, []TaskType{TaskBoilerplate, TaskExplain, TaskOrchestration}},
|
|
}
|
|
for _, c := range checks {
|
|
arm, ok := registered[c.id]
|
|
if !ok {
|
|
continue // already reported above
|
|
}
|
|
if arm.MaxComplexity != c.wantMaxComp {
|
|
t.Errorf("%s MaxComplexity = %v, want %v", c.id, arm.MaxComplexity, c.wantMaxComp)
|
|
}
|
|
if arm.Disabled != c.wantDisabled {
|
|
t.Errorf("%s Disabled = %v, want %v", c.id, arm.Disabled, c.wantDisabled)
|
|
}
|
|
if c.wantStrengths != nil && !reflect.DeepEqual(arm.Strengths, c.wantStrengths) {
|
|
t.Errorf("%s Strengths = %v, want %v", c.id, arm.Strengths, c.wantStrengths)
|
|
}
|
|
}
|
|
}
|
|
|