feat: accurate context window sizing from arm capabilities + prefix token baseline + tokenizer wiring
This commit is contained in:
@@ -22,6 +22,7 @@ import (
|
||||
"somegit.dev/Owlibou/gnoma/internal/provider"
|
||||
"somegit.dev/Owlibou/gnoma/internal/router"
|
||||
"somegit.dev/Owlibou/gnoma/internal/security"
|
||||
"somegit.dev/Owlibou/gnoma/internal/tokenizer"
|
||||
anthropicprov "somegit.dev/Owlibou/gnoma/internal/provider/anthropic"
|
||||
"somegit.dev/Owlibou/gnoma/internal/provider/mistral"
|
||||
googleprov "somegit.dev/Owlibou/gnoma/internal/provider/google"
|
||||
@@ -329,16 +330,32 @@ func main() {
|
||||
logger.Debug("loaded project docs as context prefix", "file", name, "size", len(data))
|
||||
}
|
||||
|
||||
// Derive context window size from registered arm capabilities (accurate) or fall back to heuristic
|
||||
contextWindowSize := int64(cfg.Provider.MaxTokens) * 20
|
||||
if arm, ok := rtr.LookupArm(armID); ok && arm.Capabilities.ContextWindow > 0 {
|
||||
contextWindowSize = int64(arm.Capabilities.ContextWindow)
|
||||
logger.Debug("context window from arm capabilities", "arm", armID, "context_window", contextWindowSize)
|
||||
}
|
||||
|
||||
// Create context window with summarize strategy (falls back to truncation)
|
||||
var compactStrategy gnomactx.Strategy
|
||||
compactStrategy = gnomactx.NewSummarizeStrategy(prov)
|
||||
ctxWindow := gnomactx.NewWindow(gnomactx.WindowConfig{
|
||||
MaxTokens: cfg.Provider.MaxTokens * 20, // rough: max_tokens is per-turn, context window ~20x
|
||||
MaxTokens: contextWindowSize,
|
||||
Strategy: compactStrategy,
|
||||
PrefixMessages: prefixMsgs,
|
||||
Logger: logger,
|
||||
})
|
||||
|
||||
// Wire tokenizer and seed tracker with prefix cost
|
||||
tok := tokenizer.ForProvider(prov.Name())
|
||||
ctxWindow.Tracker().SetTokenizer(tok)
|
||||
if len(prefixMsgs) > 0 {
|
||||
prefixTokens := ctxWindow.Tracker().CountMessages(prefixMsgs)
|
||||
ctxWindow.Tracker().Set(prefixTokens)
|
||||
logger.Debug("prefix token baseline set", "tokens", prefixTokens)
|
||||
}
|
||||
|
||||
// Create engine
|
||||
eng, err := engine.New(engine.Config{
|
||||
Provider: prov,
|
||||
|
||||
@@ -76,7 +76,11 @@ func (e *Engine) runLoop(ctx context.Context, cb Callback) (*Turn, error) {
|
||||
}
|
||||
}
|
||||
task := router.ClassifyTask(prompt)
|
||||
task.EstimatedTokens = int(gnomactx.EstimateTokens(prompt))
|
||||
if e.cfg.Context != nil {
|
||||
task.EstimatedTokens = int(e.cfg.Context.Tracker().CountTokens(prompt))
|
||||
} else {
|
||||
task.EstimatedTokens = int(gnomactx.EstimateTokens(prompt))
|
||||
}
|
||||
|
||||
e.logger.Debug("routing request",
|
||||
"task_type", task.Type,
|
||||
@@ -117,7 +121,11 @@ func (e *Engine) runLoop(ctx context.Context, cb Callback) (*Turn, error) {
|
||||
}
|
||||
}
|
||||
task := router.ClassifyTask(prompt)
|
||||
task.EstimatedTokens = int(gnomactx.EstimateTokens(prompt))
|
||||
if e.cfg.Context != nil {
|
||||
task.EstimatedTokens = int(e.cfg.Context.Tracker().CountTokens(prompt))
|
||||
} else {
|
||||
task.EstimatedTokens = int(gnomactx.EstimateTokens(prompt))
|
||||
}
|
||||
var retryDecision router.RoutingDecision
|
||||
s, retryDecision, err = e.cfg.Router.Stream(ctx, task, req)
|
||||
decision = retryDecision // adopt new reservation on retry
|
||||
@@ -455,7 +463,11 @@ func (e *Engine) handleRequestTooLarge(ctx context.Context, origErr error, req p
|
||||
}
|
||||
}
|
||||
task := router.ClassifyTask(prompt)
|
||||
task.EstimatedTokens = int(gnomactx.EstimateTokens(prompt))
|
||||
if e.cfg.Context != nil {
|
||||
task.EstimatedTokens = int(e.cfg.Context.Tracker().CountTokens(prompt))
|
||||
} else {
|
||||
task.EstimatedTokens = int(gnomactx.EstimateTokens(prompt))
|
||||
}
|
||||
s, _, err := e.cfg.Router.Stream(ctx, task, req)
|
||||
return s, err
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user