feat: M1-M7 gap audit phase 3 — context prefix, deferred tools, compact hooks

Gap 11 (M6): Fixed context prefix
- Window.PrefixMessages stores immutable docs (CLAUDE.md, .gnoma/GNOMA.md)
- Prefix stripped before compaction, prepended after — survives all compaction
- AllMessages() returns prefix + history for provider requests
- main.go loads CLAUDE.md and .gnoma/GNOMA.md at startup as prefix

Gap 12 (M6): Deferred tool loading
- DeferrableTool optional interface: ShouldDefer() bool
- buildRequest() skips deferred tools until activated
- Tools auto-activate on first model request (activatedTools map)
- agent + spawn_elfs marked as deferrable (large schemas, rarely needed early)
- Saves ~800 tokens per deferred tool per request

Gap 13 (M6): Pre/post compact hooks
- OnPreCompact/OnPostCompact callbacks in WindowConfig
- Called in doCompact() (shared by CompactIfNeeded + ForceCompact)
- M8 hooks system will extend these to full protocol
This commit is contained in:
2026-04-04 20:46:50 +02:00
parent 11363f3b97
commit 95dfd0cf0c
7 changed files with 121 additions and 61 deletions
+19 -3
View File
@@ -15,6 +15,7 @@ import (
"encoding/json"
gnomacfg "somegit.dev/Owlibou/gnoma/internal/config"
gnomactx "somegit.dev/Owlibou/gnoma/internal/context"
"somegit.dev/Owlibou/gnoma/internal/message"
"somegit.dev/Owlibou/gnoma/internal/permission"
"somegit.dev/Owlibou/gnoma/internal/provider"
"somegit.dev/Owlibou/gnoma/internal/router"
@@ -270,13 +271,28 @@ func main() {
systemPrompt = systemPrompt + "\n\n" + summary
}
// Load project docs as immutable context prefix
var prefixMsgs []message.Message
for _, name := range []string{"CLAUDE.md", ".gnoma/GNOMA.md"} {
data, err := os.ReadFile(name)
if err != nil {
continue
}
prefixMsgs = append(prefixMsgs,
message.NewUserText(fmt.Sprintf("[Project docs: %s]\n\n%s", name, string(data))),
message.NewAssistantText("I've read the project documentation and will follow these guidelines."),
)
logger.Debug("loaded project docs as context prefix", "file", name, "size", len(data))
}
// Create context window with summarize strategy (falls back to truncation)
var compactStrategy gnomactx.Strategy
compactStrategy = gnomactx.NewSummarizeStrategy(prov)
ctxWindow := gnomactx.NewWindow(gnomactx.WindowConfig{
MaxTokens: cfg.Provider.MaxTokens * 20, // rough: max_tokens is per-turn, context window ~20x
Strategy: compactStrategy,
Logger: logger,
MaxTokens: cfg.Provider.MaxTokens * 20, // rough: max_tokens is per-turn, context window ~20x
Strategy: compactStrategy,
PrefixMessages: prefixMsgs,
Logger: logger,
})
// Create engine
+72 -55
View File
@@ -18,18 +18,26 @@ type Strategy interface {
type Window struct {
tracker *Tracker
strategy Strategy
messages []message.Message
prefix []message.Message // immutable prefix (project docs), never compacted
messages []message.Message // mutable conversation history
logger *slog.Logger
// Compact hooks
onPreCompact func([]message.Message)
onPostCompact func([]message.Message)
// Circuit breaker: stop retrying after consecutive failures
consecutiveFailures int
maxFailures int
}
type WindowConfig struct {
MaxTokens int64
Strategy Strategy
Logger *slog.Logger
MaxTokens int64
Strategy Strategy
PrefixMessages []message.Message // immutable prefix, survives compaction
OnPreCompact func([]message.Message)
OnPostCompact func([]message.Message)
Logger *slog.Logger
}
func NewWindow(cfg WindowConfig) *Window {
@@ -38,11 +46,14 @@ func NewWindow(cfg WindowConfig) *Window {
logger = slog.Default()
}
return &Window{
tracker: NewTracker(cfg.MaxTokens),
strategy: cfg.Strategy,
messages: nil,
logger: logger,
maxFailures: 3,
tracker: NewTracker(cfg.MaxTokens),
strategy: cfg.Strategy,
prefix: cfg.PrefixMessages,
messages: nil,
logger: logger,
onPreCompact: cfg.OnPreCompact,
onPostCompact: cfg.OnPostCompact,
maxFailures: 3,
}
}
@@ -52,12 +63,23 @@ func (w *Window) Append(msg message.Message, usage message.Usage) {
w.tracker.Add(usage)
}
// Messages returns the current message history.
// Messages returns the mutable conversation history (without prefix).
func (w *Window) Messages() []message.Message {
return w.messages
}
// SetMessages replaces the message history (used after compaction).
// AllMessages returns prefix + mutable history. Use this for building provider requests.
func (w *Window) AllMessages() []message.Message {
if len(w.prefix) == 0 {
return w.messages
}
all := make([]message.Message, 0, len(w.prefix)+len(w.messages))
all = append(all, w.prefix...)
all = append(all, w.messages...)
return all
}
// SetMessages replaces the mutable message history (used after compaction).
func (w *Window) SetMessages(msgs []message.Message) {
w.messages = msgs
}
@@ -73,13 +95,25 @@ func (w *Window) CompactIfNeeded() (bool, error) {
if !w.tracker.ShouldCompact() {
return false, nil
}
return w.doCompact(false)
}
// ForceCompact runs compaction regardless of the token threshold.
// Used for reactive compaction (e.g., after a 413 response).
func (w *Window) ForceCompact() (bool, error) {
if len(w.messages) <= 2 {
return false, nil
}
return w.doCompact(true)
}
func (w *Window) doCompact(force bool) (bool, error) {
if w.strategy == nil {
return false, fmt.Errorf("no compaction strategy configured")
}
// Circuit breaker
if w.consecutiveFailures >= w.maxFailures {
// Circuit breaker (skip for forced)
if !force && w.consecutiveFailures >= w.maxFailures {
w.logger.Warn("compaction circuit breaker open",
"failures", w.consecutiveFailures,
"max", w.maxFailures,
@@ -87,18 +121,33 @@ func (w *Window) CompactIfNeeded() (bool, error) {
return false, nil
}
budget := w.tracker.Remaining() + w.tracker.Used()/2 // target: half of current usage
if budget < 0 {
var budget int64
if force {
budget = w.tracker.MaxTokens() / 2
} else {
budget = w.tracker.Remaining() + w.tracker.Used()/2
if budget < 0 {
budget = w.tracker.MaxTokens() / 2
}
}
w.logger.Info("compacting context",
label := "compacting"
if force {
label = "forced compacting"
}
w.logger.Info(label+" context",
"messages", len(w.messages),
"prefix", len(w.prefix),
"used", w.tracker.Used(),
"budget", budget,
"strategy", fmt.Sprintf("%T", w.strategy),
)
// Pre-compact hook
if w.onPreCompact != nil {
w.onPreCompact(w.messages)
}
// Compact only mutable messages — prefix is preserved separately
compacted, err := w.strategy.Compact(w.messages, budget)
if err != nil {
w.consecutiveFailures++
@@ -113,7 +162,6 @@ func (w *Window) CompactIfNeeded() (bool, error) {
originalLen := len(w.messages)
w.messages = compacted
// Rough estimate: reduce tracked tokens proportionally
ratio := float64(len(compacted)) / float64(originalLen+1)
w.tracker.Set(int64(float64(w.tracker.Used()) * ratio))
@@ -123,46 +171,15 @@ func (w *Window) CompactIfNeeded() (bool, error) {
"tokens_after", w.tracker.Used(),
)
// Post-compact hook
if w.onPostCompact != nil {
w.onPostCompact(compacted)
}
return true, nil
}
// ForceCompact runs compaction regardless of the token threshold.
// Used for reactive compaction (e.g., after a 413 response).
func (w *Window) ForceCompact() (bool, error) {
if w.strategy == nil {
return false, fmt.Errorf("no compaction strategy configured")
}
if len(w.messages) <= 2 {
return false, nil // nothing to compact
}
budget := w.tracker.MaxTokens() / 2
w.logger.Info("forced compaction",
"messages", len(w.messages),
"used", w.tracker.Used(),
"budget", budget,
)
compacted, err := w.strategy.Compact(w.messages, budget)
if err != nil {
return false, err
}
originalLen := len(w.messages)
w.messages = compacted
ratio := float64(len(compacted)) / float64(originalLen+1)
w.tracker.Set(int64(float64(w.tracker.Used()) * ratio))
w.logger.Info("forced compaction complete",
"messages_before", originalLen,
"messages_after", len(compacted),
"tokens_after", w.tracker.Used(),
)
return true, nil
}
// Reset clears all messages and usage.
// Reset clears all messages and usage (prefix is preserved).
func (w *Window) Reset() {
w.messages = nil
w.tracker.Reset()
+7 -2
View File
@@ -55,6 +55,10 @@ type Engine struct {
// Cached model capabilities, resolved lazily
modelCaps *provider.Capabilities
modelCapsFor string // model ID the cached caps are for
// Deferred tool loading: tools with ShouldDefer() are excluded until
// the model requests them. Activated on first use.
activatedTools map[string]bool
}
// New creates an engine.
@@ -67,8 +71,9 @@ func New(cfg Config) (*Engine, error) {
logger = slog.Default()
}
return &Engine{
cfg: cfg,
logger: logger,
cfg: cfg,
logger: logger,
activatedTools: make(map[string]bool),
}, nil
}
+14 -1
View File
@@ -190,8 +190,11 @@ func (e *Engine) runLoop(ctx context.Context, cb Callback) (*Turn, error) {
}
func (e *Engine) buildRequest(ctx context.Context) provider.Request {
// Scan messages through firewall if configured
// Use AllMessages (prefix + history) if context window manages prefix docs
messages := e.history
if e.cfg.Context != nil {
messages = e.cfg.Context.AllMessages()
}
systemPrompt := e.cfg.System
if e.cfg.Firewall != nil {
messages = e.cfg.Firewall.ScanOutgoingMessages(messages)
@@ -209,6 +212,10 @@ func (e *Engine) buildRequest(ctx context.Context) provider.Request {
if caps == nil || caps.ToolUse {
// nil caps = unknown model, include tools optimistically
for _, t := range e.cfg.Tools.All() {
// Skip deferred tools until the model requests them
if dt, ok := t.(tool.DeferrableTool); ok && dt.ShouldDefer() && !e.activatedTools[t.Name()] {
continue
}
req.Tools = append(req.Tools, provider.ToolDefinition{
Name: t.Name(),
Description: t.Description(),
@@ -237,6 +244,12 @@ func (e *Engine) executeTools(ctx context.Context, calls []message.ToolCall, cb
for _, call := range calls {
t, ok := e.cfg.Tools.Get(call.Name)
if ok {
// Activate deferred tools on first use
if dt, isDeferrable := t.(tool.DeferrableTool); isDeferrable && dt.ShouldDefer() {
e.activatedTools[call.Name] = true
}
}
if !ok {
e.logger.Warn("unknown tool", "name", call.Name)
unknownResults = append(unknownResults, message.ToolResult{
+1
View File
@@ -53,6 +53,7 @@ func (t *Tool) Description() string { return "Spawn a sub-agent (elf) to
func (t *Tool) Parameters() json.RawMessage { return paramSchema }
func (t *Tool) IsReadOnly() bool { return true }
func (t *Tool) IsDestructive() bool { return false }
func (t *Tool) ShouldDefer() bool { return true }
type agentArgs struct {
Prompt string `json:"prompt"`
+1
View File
@@ -64,6 +64,7 @@ func (t *BatchTool) Description() string { return "Spawn multiple elfs (
func (t *BatchTool) Parameters() json.RawMessage { return batchSchema }
func (t *BatchTool) IsReadOnly() bool { return true }
func (t *BatchTool) IsDestructive() bool { return false }
func (t *BatchTool) ShouldDefer() bool { return true }
type batchArgs struct {
Tasks []batchTask `json:"tasks"`
+7
View File
@@ -20,3 +20,10 @@ type Tool interface {
// IsDestructive returns true if the tool can cause irreversible changes.
IsDestructive() bool
}
// DeferrableTool is an optional interface for tools that can be excluded
// from initial requests and loaded on demand. Reduces token overhead
// for rarely-used tools with large schemas.
type DeferrableTool interface {
ShouldDefer() bool
}