fix: M1-M7 gap audit phase 1 — bug fix + 5 quick wins

Bug fix:
- window.go: token ratio after compaction used len(w.messages) after
  reassignment, always producing ratio ~1.0. Fixed by saving original
  length before assignment.

Gap 1 (M3): Scanner patterns 13 → 47
- Added 34 new patterns: Azure, DigitalOcean, HuggingFace, Grafana,
  GitHub extended (app/oauth/refresh), Shopify, Twilio, SendGrid,
  NPM, PyPI, Databricks, Pulumi, Postman, Sentry, Anthropic admin,
  OpenAI extended, Vault, Supabase, Telegram, Discord, JWT, Heroku,
  Mailgun, Figma

Gap 2 (M3): Config security section
- SecuritySection with EntropyThreshold + custom PatternConfig
- Wire custom patterns from TOML into scanner at startup

Gap 3 (M4): Polling discovery loop
- StartDiscoveryLoop with 30s ticker, reconciles arms vs discovered
- Router.RemoveArm for disappeared local models

Gap 4 (M5): Incognito LocalOnly enforcement
- Router.SetLocalOnly filters non-local arms in Select()
- TUI incognito toggle (Ctrl+X, /incognito) sets local-only routing

Gap 5 (M6): Reactive 413 compaction
- Window.ForceCompact() bypasses ShouldCompact threshold
- Engine handles 413 with emergency compact + retry
This commit is contained in:
2026-04-03 23:11:08 +02:00
parent 6aea2a9e3a
commit de1798ff5c
8 changed files with 268 additions and 23 deletions
+41 -1
View File
@@ -108,7 +108,11 @@ func (e *Engine) runLoop(ctx context.Context, cb Callback) (*Turn, error) {
return e.cfg.Provider.Stream(ctx, req)
})
if err != nil {
return nil, fmt.Errorf("provider stream: %w", err)
// Try reactive compaction on 413 (request too large)
s, err = e.handleRequestTooLarge(ctx, err, req)
if err != nil {
return nil, fmt.Errorf("provider stream: %w", err)
}
}
}
@@ -341,6 +345,42 @@ func truncate(s string, maxLen int) string {
return s[:maxLen] + "..."
}
// handleRequestTooLarge attempts compaction on 413 and retries once.
func (e *Engine) handleRequestTooLarge(ctx context.Context, origErr error, req provider.Request) (stream.Stream, error) {
var provErr *provider.ProviderError
if !errors.As(origErr, &provErr) || provErr.StatusCode != 413 {
return nil, origErr
}
if e.cfg.Context == nil {
return nil, origErr
}
e.logger.Warn("413 received, forcing emergency compaction")
compacted, compactErr := e.cfg.Context.ForceCompact()
if compactErr != nil || !compacted {
return nil, origErr
}
e.history = e.cfg.Context.Messages()
req = e.buildRequest(ctx)
if e.cfg.Router != nil {
prompt := ""
for i := len(e.history) - 1; i >= 0; i-- {
if e.history[i].Role == message.RoleUser {
prompt = e.history[i].TextContent()
break
}
}
task := router.ClassifyTask(prompt)
task.EstimatedTokens = 4000
s, _, err := e.cfg.Router.Stream(ctx, task, req)
return s, err
}
return e.cfg.Provider.Stream(ctx, req)
}
// retryOnTransient retries the stream call on 429/5xx with exponential backoff.
// Returns the original error if not retryable or all retries exhausted.
func (e *Engine) retryOnTransient(ctx context.Context, firstErr error, fn func() (stream.Stream, error)) (stream.Stream, error) {