Files
gnoma/internal/router/task.go
T
vikingowl eb0583f606 fix(router): unpin config-default provider + complexity floor by task type
Two routing bugs were keeping the SLM out of every real prompt and,
once it was eligible, pulling complex tasks into it as well.

Bug 1: ForceArm was called unconditionally when a primary provider was
configured (cmd/gnoma/main.go:378). That short-circuited the entire
router — every prompt went straight to whatever was set as
[provider].default, regardless of tier, score, or feasibility. The SLM
arm appeared in `gnoma router stats` registration logs but had zero
observations after dozens of prompts.

Fix: only pin when the user passed --provider on the command line.
Config defaults register the arm but don't force it; the router picks
freely. Verified end-to-end — trivial prompts now reach slm/ollama
via the tier-0 priority.

Bug 2: A short prompt like "refactor the SLM module" classifies as
TaskRefactor with complexity 0.015 — well under the SLM arm's 0.3
ceiling. The arm became eligible despite the task being inherently
non-trivial. Once eligible, tier-0 priority then pulled it in over
the CLI agents.

Fix: add MinComplexityForType, applied in both ClassifyTask
(heuristic path) and slm.Classifier.Classify (SLM-overlay path). The
floor is per-task-type:

  - TaskSecurityReview, TaskOrchestration  → 0.60
  - TaskRefactor, TaskPlanning, TaskDebug  → 0.40
  - TaskUnitTest, TaskReview               → 0.35

Tasks like Explain/Generation/Boilerplate keep their organic
complexity score so trivial knowledge prompts (≤0.15) still fall to
the SLM. Tasks that imply existing code or multi-step reasoning are
clamped above the SLM's MaxComplexity, naturally routing them to a
bigger arm.

After both fixes, observed routing in a clean run:

  What is 2+2?              → slm/ollama (complexity 0.015)
  Define a closure          → slm/ollama (complexity 0.015)
  What is HTTP?             → slm/ollama (complexity 0.015)
  Refactor the SLM module   → subprocess/gemini (complexity 0.40)
  Audit for race conditions → subprocess/gemini (complexity 0.35)
  Plan a migration          → subprocess/gemini (complexity 0.40)
2026-05-19 19:22:16 +02:00

378 lines
12 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package router
import (
"fmt"
"strings"
"somegit.dev/Owlibou/gnoma/internal/provider"
)
// TaskType classifies a task for routing purposes.
type TaskType int
const (
TaskBoilerplate TaskType = iota // simple scaffolding, templates
TaskGeneration // new code creation
TaskRefactor // restructuring existing code
TaskReview // code review, analysis
TaskUnitTest // writing tests
TaskPlanning // architecture, design
TaskOrchestration // multi-step coordination
TaskSecurityReview // security-focused analysis
TaskDebug // finding and fixing bugs
TaskExplain // explaining code or concepts
)
func (t TaskType) String() string {
switch t {
case TaskBoilerplate:
return "boilerplate"
case TaskGeneration:
return "generation"
case TaskRefactor:
return "refactor"
case TaskReview:
return "review"
case TaskUnitTest:
return "unit_test"
case TaskPlanning:
return "planning"
case TaskOrchestration:
return "orchestration"
case TaskSecurityReview:
return "security_review"
case TaskDebug:
return "debug"
case TaskExplain:
return "explain"
default:
return fmt.Sprintf("unknown(%d)", t)
}
}
// Priority indicates task importance for routing decisions.
type Priority int
const (
PriorityLow Priority = iota
PriorityNormal
PriorityHigh
PriorityCritical
)
// ClassifierSource identifies which classifier produced a Task.
// Phase 4 routing decisions depend on knowing whether the SLM is actually
// firing or whether the heuristic is silently doing all the work.
type ClassifierSource int
const (
ClassifierUnknown ClassifierSource = iota // unset / pre-classification
ClassifierHeuristic // router.HeuristicClassifier
ClassifierSLM // slm.Classifier (SLM call succeeded)
ClassifierSLMFallback // slm.Classifier fell back internally (timeout, parse error)
)
func (s ClassifierSource) String() string {
switch s {
case ClassifierHeuristic:
return "heuristic"
case ClassifierSLM:
return "slm"
case ClassifierSLMFallback:
return "slm_fallback"
default:
return "unknown"
}
}
// Task represents a classified unit of work for routing.
type Task struct {
Type TaskType
Priority Priority
EstimatedTokens int
RequiresTools bool
ComplexityScore float64 // 0-1
RequiredEffort provider.EffortLevel // EffortAuto = no constraint on thinking
ExcludedArms []ArmID // Arms to avoid (e.g. due to recent 429 errors)
ClassifierSource ClassifierSource // which classifier produced this Task
}
// ValueScore computes a routing value based on priority and type.
func (t Task) ValueScore() float64 {
base := map[Priority]float64{
PriorityLow: 0.5,
PriorityNormal: 1.0,
PriorityHigh: 2.0,
PriorityCritical: 5.0,
}[t.Priority]
return base * taskTypeMultiplier[t.Type]
}
var taskTypeMultiplier = map[TaskType]float64{
TaskBoilerplate: 0.6,
TaskGeneration: 1.0,
TaskRefactor: 0.9,
TaskReview: 1.1,
TaskUnitTest: 0.8,
TaskPlanning: 1.4,
TaskOrchestration: 1.5,
TaskSecurityReview: 2.0,
TaskDebug: 1.2,
TaskExplain: 0.7,
}
// QualityThreshold defines minimum acceptable quality for a task type.
type QualityThreshold struct {
Minimum float64 // below → output is harmful, never accept
Acceptable float64 // good enough
Target float64 // ideal
}
// DefaultThresholds are calibrated for M4 heuristic scores (range ~00.85).
// M9 will replace these with bandit-derived values once quality data accumulates.
var DefaultThresholds = map[TaskType]QualityThreshold{
TaskBoilerplate: {0.40, 0.55, 0.70}, // any capable arm works
TaskGeneration: {0.45, 0.60, 0.75},
TaskRefactor: {0.50, 0.65, 0.78},
TaskReview: {0.55, 0.68, 0.80},
TaskUnitTest: {0.45, 0.60, 0.75},
TaskPlanning: {0.60, 0.72, 0.82},
TaskOrchestration: {0.65, 0.75, 0.83},
TaskSecurityReview: {0.70, 0.78, 0.84}, // requires thinking or large context window
TaskDebug: {0.50, 0.65, 0.78},
TaskExplain: {0.40, 0.55, 0.72},
}
// inferEffort derives the minimum required reasoning effort from task type and complexity.
func inferEffort(task Task) provider.EffortLevel {
switch task.Type {
case TaskSecurityReview:
return provider.EffortHigh
case TaskOrchestration:
if task.ComplexityScore >= 0.5 {
return provider.EffortHigh
}
return provider.EffortMedium
case TaskPlanning:
if task.ComplexityScore >= 0.7 {
return provider.EffortHigh
}
return provider.EffortMedium
case TaskDebug, TaskRefactor, TaskReview:
if task.ComplexityScore >= 0.7 {
return provider.EffortMedium
}
if task.ComplexityScore >= 0.4 {
return provider.EffortLow
}
return provider.EffortAuto
case TaskGeneration:
if task.ComplexityScore >= 0.8 {
return provider.EffortMedium
}
return provider.EffortAuto
default:
return provider.EffortAuto
}
}
// ClassifyTask infers a TaskType from the user's prompt using keyword heuristics.
func ClassifyTask(prompt string) Task {
lower := strings.ToLower(prompt)
task := Task{
Priority: PriorityNormal,
RequiresTools: true, // assume tools needed by default
}
// Check for task type keywords (order matters — more specific/common first).
// Orchestration is placed late: its keywords ("dispatch", "pipeline", "orchestrat")
// appear as nouns in non-orchestration prompts (e.g. "refactor the pipeline dispatch",
// "review the orchestration layer"). Operational task types must gate first.
switch {
case containsAny(lower, "security", "vulnerability", "cve", "owasp", "xss", "injection", "audit security"):
task.Type = TaskSecurityReview
task.Priority = PriorityHigh
case containsAny(lower, "debug", "fix", "troubleshoot", "not working", "error", "crash", "failing", "bug"):
task.Type = TaskDebug
case containsAny(lower, "review", "check", "analyze", "audit", "inspect"):
task.Type = TaskReview
case containsAny(lower, "refactor", "restructure", "reorganize", "clean up", "simplify"):
task.Type = TaskRefactor
case containsAny(lower, "test", "spec", "coverage", "assert"):
task.Type = TaskUnitTest
case containsAny(lower, "explain", "what is", "how does", "describe", "tell me about"):
task.Type = TaskExplain
task.RequiresTools = false
case containsAny(lower, "plan", "architect", "design", "strategy", "roadmap"):
task.Type = TaskPlanning
case containsAny(lower, "orchestrat", "coordinate", "dispatch", "pipeline",
"fan out", "subtask", "delegate to", "spawn elf"):
task.Type = TaskOrchestration
task.Priority = PriorityHigh
case containsAny(lower, "create", "implement", "build", "add", "write", "generate", "make"):
task.Type = TaskGeneration
case containsAny(lower, "scaffold", "boilerplate", "template", "stub", "skeleton"):
task.Type = TaskBoilerplate
default:
task.Type = TaskGeneration // default
}
// Estimate complexity from prompt length and keywords
task.ComplexityScore = estimateComplexity(lower)
// Per-task-type complexity floor. A short "refactor X" prompt looks
// trivial by word count but the task itself implies existing code and
// non-trivial reasoning — clamping the floor up keeps such tasks out
// of the SLM arm's MaxComplexity ceiling.
if floor := MinComplexityForType(task.Type); task.ComplexityScore < floor {
task.ComplexityScore = floor
}
// Trivial-prompt override: short, knowledge-only prompts whose task
// type doesn't imply existing code to read or modify can run without
// tools — making the SLM arm (ToolUse=false) feasible for genuinely
// tiny questions like "what is 2+2?" or "explain a closure".
if isTrivialPrompt(lower, task.Type, task.ComplexityScore) {
task.RequiresTools = false
}
task.RequiredEffort = inferEffort(task)
return task
}
// MinComplexityForType returns the inherent complexity floor for a task
// type. Tasks that imply existing code or multi-step reasoning get a
// non-zero floor so short prompts don't slip past the SLM arm's
// MaxComplexity ceiling.
func MinComplexityForType(t TaskType) float64 {
switch t {
case TaskSecurityReview, TaskOrchestration:
return 0.6
case TaskRefactor, TaskPlanning, TaskDebug:
return 0.4
case TaskUnitTest, TaskReview:
return 0.35
default:
return 0
}
}
// trivialEligibleTypes are the task types where a "no tools needed" verdict
// is plausible from a short prompt alone. Debug / Refactor / Review / Test /
// SecurityReview / Orchestration all imply existing code or processes to
// touch — keep RequiresTools=true even if the wording is brief.
var trivialEligibleTypes = map[TaskType]bool{
TaskExplain: true,
TaskGeneration: true,
TaskBoilerplate: true,
}
// toolNeedingTokens name actions/objects that always require tool execution.
// Matched as whole words (string-fields), not substrings — avoids treating
// "tester" as "test".
var toolNeedingTokens = map[string]bool{
"read": true, "write": true, "edit": true, "create": true,
"delete": true, "remove": true, "list": true, "find": true,
"search": true, "grep": true, "open": true, "save": true,
"run": true, "execute": true, "compile": true, "build": true,
"test": true, "tests": true, "install": true, "commit": true,
"push": true, "pull": true, "diff": true, "file": true, "files": true,
}
// isTrivialPrompt is true when the prompt is short, low-complexity, of a
// type compatible with knowledge-only answers, and contains no token that
// implies a file/shell action.
func isTrivialPrompt(lower string, taskType TaskType, complexity float64) bool {
if !trivialEligibleTypes[taskType] {
return false
}
if complexity > 0.15 {
return false
}
fields := strings.Fields(lower)
if len(fields) > 12 {
return false
}
for _, w := range fields {
w = strings.Trim(w, ".,!?;:")
if toolNeedingTokens[w] {
return false
}
}
return true
}
func containsAny(s string, keywords ...string) bool {
for _, kw := range keywords {
if strings.Contains(s, kw) {
return true
}
}
return false
}
func estimateComplexity(prompt string) float64 {
score := 0.0
// Length contributes to complexity
words := len(strings.Fields(prompt))
score += float64(words) / 200.0 // normalize: 200 words = 1.0
// Complexity keywords
complexKeywords := []string{"implement", "design", "architect", "system", "integration", "migrate", "optimize"}
for _, kw := range complexKeywords {
if strings.Contains(prompt, kw) {
score += 0.15
}
}
// Simple keywords reduce complexity
simpleKeywords := []string{"rename", "format", "add field", "change name", "typo", "simple"}
for _, kw := range simpleKeywords {
if strings.Contains(prompt, kw) {
score -= 0.15
}
}
// Clamp to [0, 1]
if score < 0 {
score = 0
}
if score > 1 {
score = 1
}
return score
}
// ParseTaskType converts a string from an SLM JSON response to a TaskType.
// Matching is case-insensitive. Unknown strings fall back to TaskGeneration.
func ParseTaskType(s string) TaskType {
switch strings.ToLower(strings.ReplaceAll(s, "_", "")) {
case "debug":
return TaskDebug
case "explain":
return TaskExplain
case "generation":
return TaskGeneration
case "refactor":
return TaskRefactor
case "unittest":
return TaskUnitTest
case "boilerplate":
return TaskBoilerplate
case "planning":
return TaskPlanning
case "orchestration":
return TaskOrchestration
case "securityreview":
return TaskSecurityReview
case "review":
return TaskReview
default:
return TaskGeneration
}
}