fix: M1-M7 gap audit phase 1 — bug fix + 5 quick wins
Bug fix: - window.go: token ratio after compaction used len(w.messages) after reassignment, always producing ratio ~1.0. Fixed by saving original length before assignment. Gap 1 (M3): Scanner patterns 13 → 47 - Added 34 new patterns: Azure, DigitalOcean, HuggingFace, Grafana, GitHub extended (app/oauth/refresh), Shopify, Twilio, SendGrid, NPM, PyPI, Databricks, Pulumi, Postman, Sentry, Anthropic admin, OpenAI extended, Vault, Supabase, Telegram, Discord, JWT, Heroku, Mailgun, Figma Gap 2 (M3): Config security section - SecuritySection with EntropyThreshold + custom PatternConfig - Wire custom patterns from TOML into scanner at startup Gap 3 (M4): Polling discovery loop - StartDiscoveryLoop with 30s ticker, reconciles arms vs discovered - Router.RemoveArm for disappeared local models Gap 4 (M5): Incognito LocalOnly enforcement - Router.SetLocalOnly filters non-local arms in Select() - TUI incognito toggle (Ctrl+X, /incognito) sets local-only routing Gap 5 (M6): Reactive 413 compaction - Window.ForceCompact() bypasses ShouldCompact threshold - Engine handles 413 with emergency compact + retry
This commit is contained in:
+41
-1
@@ -108,7 +108,11 @@ func (e *Engine) runLoop(ctx context.Context, cb Callback) (*Turn, error) {
|
||||
return e.cfg.Provider.Stream(ctx, req)
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("provider stream: %w", err)
|
||||
// Try reactive compaction on 413 (request too large)
|
||||
s, err = e.handleRequestTooLarge(ctx, err, req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("provider stream: %w", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -341,6 +345,42 @@ func truncate(s string, maxLen int) string {
|
||||
return s[:maxLen] + "..."
|
||||
}
|
||||
|
||||
// handleRequestTooLarge attempts compaction on 413 and retries once.
|
||||
func (e *Engine) handleRequestTooLarge(ctx context.Context, origErr error, req provider.Request) (stream.Stream, error) {
|
||||
var provErr *provider.ProviderError
|
||||
if !errors.As(origErr, &provErr) || provErr.StatusCode != 413 {
|
||||
return nil, origErr
|
||||
}
|
||||
|
||||
if e.cfg.Context == nil {
|
||||
return nil, origErr
|
||||
}
|
||||
|
||||
e.logger.Warn("413 received, forcing emergency compaction")
|
||||
compacted, compactErr := e.cfg.Context.ForceCompact()
|
||||
if compactErr != nil || !compacted {
|
||||
return nil, origErr
|
||||
}
|
||||
|
||||
e.history = e.cfg.Context.Messages()
|
||||
req = e.buildRequest(ctx)
|
||||
|
||||
if e.cfg.Router != nil {
|
||||
prompt := ""
|
||||
for i := len(e.history) - 1; i >= 0; i-- {
|
||||
if e.history[i].Role == message.RoleUser {
|
||||
prompt = e.history[i].TextContent()
|
||||
break
|
||||
}
|
||||
}
|
||||
task := router.ClassifyTask(prompt)
|
||||
task.EstimatedTokens = 4000
|
||||
s, _, err := e.cfg.Router.Stream(ctx, task, req)
|
||||
return s, err
|
||||
}
|
||||
return e.cfg.Provider.Stream(ctx, req)
|
||||
}
|
||||
|
||||
// retryOnTransient retries the stream call on 429/5xx with exponential backoff.
|
||||
// Returns the original error if not retryable or all retries exhausted.
|
||||
func (e *Engine) retryOnTransient(ctx context.Context, firstErr error, fn func() (stream.Stream, error)) (stream.Stream, error) {
|
||||
|
||||
Reference in New Issue
Block a user