58beb7ce3c
Phase 4 routing decisions depend on knowing whether the SLM classifier is actually firing or whether the heuristic is silently doing all the work. Adds the instrumentation to make that observable. router.ClassifierSource enum (heuristic / slm / slm_fallback) is set on Task by every classifier: - HeuristicClassifier → ClassifierHeuristic - slm.Classifier → ClassifierSLM on success, ClassifierSLMFallback when the SLM call fails or returns unparseable output The source is plumbed through router.Outcome to QualityTracker, which now maintains per-source counters alongside the existing per-arm × task EMA scores. QualitySnapshot serializes both (classifier_counts is omitempty for back-compat with pre-feature quality.json files). lazyClassifier logs at INFO the first time it falls back to heuristic because the SLM hasn't booted yet — distinguishes operational fallback from an unconfigured-SLM run. slm.Manager.Start() now records elapsed-to-healthy and the main.go goroutine logs it as part of the "SLM ready" event. Confirms whether short-lived runs are racing the boot cycle. New `gnoma router stats` subcommand prints both tables (arm × task quality, classifier source breakdown) from quality.json with a Phase 4 trust hint when the data is too sparse or the SLM share is low. 6 new tests cover ClassifierSource string/enum, heuristic + SLM source propagation, QualityTracker counter round-trip, and back-compat restore from a legacy quality.json without classifier_counts.
94 lines
2.5 KiB
Go
94 lines
2.5 KiB
Go
package router
|
|
|
|
import "sync"
|
|
|
|
const (
|
|
qualityAlpha = 0.3 // EMA smoothing factor (~3-sample memory)
|
|
minObservations = 3 // min samples before observed score overrides heuristic
|
|
)
|
|
|
|
// EMAScore tracks an exponential moving average quality score.
|
|
type EMAScore struct {
|
|
Value float64
|
|
Count int
|
|
}
|
|
|
|
// QualityTracker records per-arm, per-task-type EMA quality scores from elf
|
|
// outcomes and per-classifier-source counts used by Phase 4 routing decisions.
|
|
type QualityTracker struct {
|
|
mu sync.RWMutex
|
|
scores map[ArmID]map[TaskType]*EMAScore
|
|
classifierCount map[ClassifierSource]int
|
|
}
|
|
|
|
// NewQualityTracker returns an empty QualityTracker.
|
|
func NewQualityTracker() *QualityTracker {
|
|
return &QualityTracker{
|
|
scores: make(map[ArmID]map[TaskType]*EMAScore),
|
|
classifierCount: make(map[ClassifierSource]int),
|
|
}
|
|
}
|
|
|
|
// RecordClassifier increments the count for a classifier source. Used to
|
|
// answer "how often did the SLM actually classify vs fall back?" — Phase 4
|
|
// trust signal.
|
|
func (qt *QualityTracker) RecordClassifier(src ClassifierSource) {
|
|
if src == ClassifierUnknown {
|
|
return // pre-classification / forced; don't pollute counters
|
|
}
|
|
qt.mu.Lock()
|
|
defer qt.mu.Unlock()
|
|
qt.classifierCount[src]++
|
|
}
|
|
|
|
// ClassifierCounts returns a copy of the per-source observation counts.
|
|
func (qt *QualityTracker) ClassifierCounts() map[ClassifierSource]int {
|
|
qt.mu.RLock()
|
|
defer qt.mu.RUnlock()
|
|
out := make(map[ClassifierSource]int, len(qt.classifierCount))
|
|
for k, v := range qt.classifierCount {
|
|
out[k] = v
|
|
}
|
|
return out
|
|
}
|
|
|
|
// Record updates the EMA score for the given arm and task type.
|
|
func (qt *QualityTracker) Record(armID ArmID, taskType TaskType, success bool) {
|
|
observation := 0.0
|
|
if success {
|
|
observation = 1.0
|
|
}
|
|
qt.mu.Lock()
|
|
defer qt.mu.Unlock()
|
|
if qt.scores[armID] == nil {
|
|
qt.scores[armID] = make(map[TaskType]*EMAScore)
|
|
}
|
|
s := qt.scores[armID][taskType]
|
|
if s == nil {
|
|
s = &EMAScore{}
|
|
qt.scores[armID][taskType] = s
|
|
}
|
|
if s.Count == 0 {
|
|
s.Value = observation
|
|
} else {
|
|
s.Value = qualityAlpha*observation + (1-qualityAlpha)*s.Value
|
|
}
|
|
s.Count++
|
|
}
|
|
|
|
// Quality returns the observed EMA score for an arm+task combination.
|
|
// Returns (0, false) when fewer than minObservations have been recorded.
|
|
func (qt *QualityTracker) Quality(armID ArmID, taskType TaskType) (score float64, hasData bool) {
|
|
qt.mu.RLock()
|
|
defer qt.mu.RUnlock()
|
|
m, ok := qt.scores[armID]
|
|
if !ok {
|
|
return 0, false
|
|
}
|
|
s, ok := m[taskType]
|
|
if !ok || s.Count < minObservations {
|
|
return 0, false
|
|
}
|
|
return s.Value, true
|
|
}
|