From dd9f4e390a3860838da992dda22c250270d66067 Mon Sep 17 00:00:00 2001 From: vikingowl Date: Sun, 5 Apr 2026 22:26:31 +0200 Subject: [PATCH] feat: accurate context window sizing from arm capabilities + prefix token baseline + tokenizer wiring --- cmd/gnoma/main.go | 19 ++++++++++++++++++- internal/engine/loop.go | 18 +++++++++++++++--- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/cmd/gnoma/main.go b/cmd/gnoma/main.go index 4c82535..b783e0d 100644 --- a/cmd/gnoma/main.go +++ b/cmd/gnoma/main.go @@ -22,6 +22,7 @@ import ( "somegit.dev/Owlibou/gnoma/internal/provider" "somegit.dev/Owlibou/gnoma/internal/router" "somegit.dev/Owlibou/gnoma/internal/security" + "somegit.dev/Owlibou/gnoma/internal/tokenizer" anthropicprov "somegit.dev/Owlibou/gnoma/internal/provider/anthropic" "somegit.dev/Owlibou/gnoma/internal/provider/mistral" googleprov "somegit.dev/Owlibou/gnoma/internal/provider/google" @@ -329,16 +330,32 @@ func main() { logger.Debug("loaded project docs as context prefix", "file", name, "size", len(data)) } + // Derive context window size from registered arm capabilities (accurate) or fall back to heuristic + contextWindowSize := int64(cfg.Provider.MaxTokens) * 20 + if arm, ok := rtr.LookupArm(armID); ok && arm.Capabilities.ContextWindow > 0 { + contextWindowSize = int64(arm.Capabilities.ContextWindow) + logger.Debug("context window from arm capabilities", "arm", armID, "context_window", contextWindowSize) + } + // Create context window with summarize strategy (falls back to truncation) var compactStrategy gnomactx.Strategy compactStrategy = gnomactx.NewSummarizeStrategy(prov) ctxWindow := gnomactx.NewWindow(gnomactx.WindowConfig{ - MaxTokens: cfg.Provider.MaxTokens * 20, // rough: max_tokens is per-turn, context window ~20x + MaxTokens: contextWindowSize, Strategy: compactStrategy, PrefixMessages: prefixMsgs, Logger: logger, }) + // Wire tokenizer and seed tracker with prefix cost + tok := tokenizer.ForProvider(prov.Name()) + ctxWindow.Tracker().SetTokenizer(tok) + if len(prefixMsgs) > 0 { + prefixTokens := ctxWindow.Tracker().CountMessages(prefixMsgs) + ctxWindow.Tracker().Set(prefixTokens) + logger.Debug("prefix token baseline set", "tokens", prefixTokens) + } + // Create engine eng, err := engine.New(engine.Config{ Provider: prov, diff --git a/internal/engine/loop.go b/internal/engine/loop.go index cb89e76..13f5466 100644 --- a/internal/engine/loop.go +++ b/internal/engine/loop.go @@ -76,7 +76,11 @@ func (e *Engine) runLoop(ctx context.Context, cb Callback) (*Turn, error) { } } task := router.ClassifyTask(prompt) - task.EstimatedTokens = int(gnomactx.EstimateTokens(prompt)) + if e.cfg.Context != nil { + task.EstimatedTokens = int(e.cfg.Context.Tracker().CountTokens(prompt)) + } else { + task.EstimatedTokens = int(gnomactx.EstimateTokens(prompt)) + } e.logger.Debug("routing request", "task_type", task.Type, @@ -117,7 +121,11 @@ func (e *Engine) runLoop(ctx context.Context, cb Callback) (*Turn, error) { } } task := router.ClassifyTask(prompt) - task.EstimatedTokens = int(gnomactx.EstimateTokens(prompt)) + if e.cfg.Context != nil { + task.EstimatedTokens = int(e.cfg.Context.Tracker().CountTokens(prompt)) + } else { + task.EstimatedTokens = int(gnomactx.EstimateTokens(prompt)) + } var retryDecision router.RoutingDecision s, retryDecision, err = e.cfg.Router.Stream(ctx, task, req) decision = retryDecision // adopt new reservation on retry @@ -455,7 +463,11 @@ func (e *Engine) handleRequestTooLarge(ctx context.Context, origErr error, req p } } task := router.ClassifyTask(prompt) - task.EstimatedTokens = int(gnomactx.EstimateTokens(prompt)) + if e.cfg.Context != nil { + task.EstimatedTokens = int(e.cfg.Context.Tracker().CountTokens(prompt)) + } else { + task.EstimatedTokens = int(gnomactx.EstimateTokens(prompt)) + } s, _, err := e.cfg.Router.Stream(ctx, task, req) return s, err }