feat: accurate context window sizing from arm capabilities + prefix token baseline + tokenizer wiring

This commit is contained in:
2026-04-05 22:26:31 +02:00
parent 27ca12f863
commit dd9f4e390a
2 changed files with 33 additions and 4 deletions

View File

@@ -22,6 +22,7 @@ import (
"somegit.dev/Owlibou/gnoma/internal/provider"
"somegit.dev/Owlibou/gnoma/internal/router"
"somegit.dev/Owlibou/gnoma/internal/security"
"somegit.dev/Owlibou/gnoma/internal/tokenizer"
anthropicprov "somegit.dev/Owlibou/gnoma/internal/provider/anthropic"
"somegit.dev/Owlibou/gnoma/internal/provider/mistral"
googleprov "somegit.dev/Owlibou/gnoma/internal/provider/google"
@@ -329,16 +330,32 @@ func main() {
logger.Debug("loaded project docs as context prefix", "file", name, "size", len(data))
}
// Derive context window size from registered arm capabilities (accurate) or fall back to heuristic
contextWindowSize := int64(cfg.Provider.MaxTokens) * 20
if arm, ok := rtr.LookupArm(armID); ok && arm.Capabilities.ContextWindow > 0 {
contextWindowSize = int64(arm.Capabilities.ContextWindow)
logger.Debug("context window from arm capabilities", "arm", armID, "context_window", contextWindowSize)
}
// Create context window with summarize strategy (falls back to truncation)
var compactStrategy gnomactx.Strategy
compactStrategy = gnomactx.NewSummarizeStrategy(prov)
ctxWindow := gnomactx.NewWindow(gnomactx.WindowConfig{
MaxTokens: cfg.Provider.MaxTokens * 20, // rough: max_tokens is per-turn, context window ~20x
MaxTokens: contextWindowSize,
Strategy: compactStrategy,
PrefixMessages: prefixMsgs,
Logger: logger,
})
// Wire tokenizer and seed tracker with prefix cost
tok := tokenizer.ForProvider(prov.Name())
ctxWindow.Tracker().SetTokenizer(tok)
if len(prefixMsgs) > 0 {
prefixTokens := ctxWindow.Tracker().CountMessages(prefixMsgs)
ctxWindow.Tracker().Set(prefixTokens)
logger.Debug("prefix token baseline set", "tokens", prefixTokens)
}
// Create engine
eng, err := engine.New(engine.Config{
Provider: prov,

View File

@@ -76,7 +76,11 @@ func (e *Engine) runLoop(ctx context.Context, cb Callback) (*Turn, error) {
}
}
task := router.ClassifyTask(prompt)
task.EstimatedTokens = int(gnomactx.EstimateTokens(prompt))
if e.cfg.Context != nil {
task.EstimatedTokens = int(e.cfg.Context.Tracker().CountTokens(prompt))
} else {
task.EstimatedTokens = int(gnomactx.EstimateTokens(prompt))
}
e.logger.Debug("routing request",
"task_type", task.Type,
@@ -117,7 +121,11 @@ func (e *Engine) runLoop(ctx context.Context, cb Callback) (*Turn, error) {
}
}
task := router.ClassifyTask(prompt)
task.EstimatedTokens = int(gnomactx.EstimateTokens(prompt))
if e.cfg.Context != nil {
task.EstimatedTokens = int(e.cfg.Context.Tracker().CountTokens(prompt))
} else {
task.EstimatedTokens = int(gnomactx.EstimateTokens(prompt))
}
var retryDecision router.RoutingDecision
s, retryDecision, err = e.cfg.Router.Stream(ctx, task, req)
decision = retryDecision // adopt new reservation on retry
@@ -455,7 +463,11 @@ func (e *Engine) handleRequestTooLarge(ctx context.Context, origErr error, req p
}
}
task := router.ClassifyTask(prompt)
task.EstimatedTokens = int(gnomactx.EstimateTokens(prompt))
if e.cfg.Context != nil {
task.EstimatedTokens = int(e.cfg.Context.Tracker().CountTokens(prompt))
} else {
task.EstimatedTokens = int(gnomactx.EstimateTokens(prompt))
}
s, _, err := e.cfg.Router.Stream(ctx, task, req)
return s, err
}