feat: M1-M7 gap audit phase 3 — context prefix, deferred tools, compact hooks

Gap 11 (M6): Fixed context prefix - Window.PrefixMessages stores immutable docs (CLAUDE.md, .gnoma/GNOMA.md) - Prefix stripped before compaction, prepended after — survives all compaction - AllMessages() returns prefix + history for provider requests - main.go loads CLAUDE.md and .gnoma/GNOMA.md at startup as prefix Gap 12 (M6): Deferred tool loading - DeferrableTool optional interface: ShouldDefer() bool - buildRequest() skips deferred tools until activated - Tools auto-activate on first model request (activatedTools map) - agent + spawn_elfs marked as deferrable (large schemas, rarely needed early) - Saves ~800 tokens per deferred tool per request Gap 13 (M6): Pre/post compact hooks - OnPreCompact/OnPostCompact callbacks in WindowConfig - Called in doCompact() (shared by CompactIfNeeded + ForceCompact) - M8 hooks system will extend these to full protocol
2026-04-04 20:46:50 +02:00
parent 11363f3b97
commit 95dfd0cf0c
7 changed files with 121 additions and 61 deletions
@@ -15,6 +15,7 @@ import (
 	"encoding/json"
 	gnomacfg "somegit.dev/Owlibou/gnoma/internal/config"
 	gnomactx "somegit.dev/Owlibou/gnoma/internal/context"
+	"somegit.dev/Owlibou/gnoma/internal/message"
 	"somegit.dev/Owlibou/gnoma/internal/permission"
 	"somegit.dev/Owlibou/gnoma/internal/provider"
 	"somegit.dev/Owlibou/gnoma/internal/router"
@@ -270,13 +271,28 @@ func main() {
 		systemPrompt = systemPrompt + "\n\n" + summary
 	}

+	// Load project docs as immutable context prefix
+	var prefixMsgs []message.Message
+	for _, name := range []string{"CLAUDE.md", ".gnoma/GNOMA.md"} {
+		data, err := os.ReadFile(name)
+		if err != nil {
+			continue
+		}
+		prefixMsgs = append(prefixMsgs,
+			message.NewUserText(fmt.Sprintf("[Project docs: %s]\n\n%s", name, string(data))),
+			message.NewAssistantText("I've read the project documentation and will follow these guidelines."),
+		)
+		logger.Debug("loaded project docs as context prefix", "file", name, "size", len(data))
+	}
+
 	// Create context window with summarize strategy (falls back to truncation)
 	var compactStrategy gnomactx.Strategy
 	compactStrategy = gnomactx.NewSummarizeStrategy(prov)
 	ctxWindow := gnomactx.NewWindow(gnomactx.WindowConfig{
-		MaxTokens: cfg.Provider.MaxTokens * 20, // rough: max_tokens is per-turn, context window ~20x
-		Strategy:  compactStrategy,
-		Logger:    logger,
+		MaxTokens:      cfg.Provider.MaxTokens * 20, // rough: max_tokens is per-turn, context window ~20x
+		Strategy:       compactStrategy,
+		PrefixMessages: prefixMsgs,
+		Logger:         logger,
 	})

 	// Create engine
@@ -18,18 +18,26 @@ type Strategy interface {
 type Window struct {
 	tracker  *Tracker
 	strategy Strategy
-	messages []message.Message
+	prefix   []message.Message // immutable prefix (project docs), never compacted
+	messages []message.Message // mutable conversation history
 	logger   *slog.Logger

+	// Compact hooks
+	onPreCompact  func([]message.Message)
+	onPostCompact func([]message.Message)
+
 	// Circuit breaker: stop retrying after consecutive failures
 	consecutiveFailures int
 	maxFailures         int
 }

 type WindowConfig struct {
-	MaxTokens int64
-	Strategy  Strategy
-	Logger    *slog.Logger
+	MaxTokens      int64
+	Strategy       Strategy
+	PrefixMessages []message.Message // immutable prefix, survives compaction
+	OnPreCompact   func([]message.Message)
+	OnPostCompact  func([]message.Message)
+	Logger         *slog.Logger
 }

 func NewWindow(cfg WindowConfig) *Window {
@@ -38,11 +46,14 @@ func NewWindow(cfg WindowConfig) *Window {
 		logger = slog.Default()
 	}
 	return &Window{
-		tracker:     NewTracker(cfg.MaxTokens),
-		strategy:    cfg.Strategy,
-		messages:    nil,
-		logger:      logger,
-		maxFailures: 3,
+		tracker:       NewTracker(cfg.MaxTokens),
+		strategy:      cfg.Strategy,
+		prefix:        cfg.PrefixMessages,
+		messages:      nil,
+		logger:        logger,
+		onPreCompact:  cfg.OnPreCompact,
+		onPostCompact: cfg.OnPostCompact,
+		maxFailures:   3,
 	}
 }

@@ -52,12 +63,23 @@ func (w *Window) Append(msg message.Message, usage message.Usage) {
 	w.tracker.Add(usage)
 }

-// Messages returns the current message history.
+// Messages returns the mutable conversation history (without prefix).
 func (w *Window) Messages() []message.Message {
 	return w.messages
 }

-// SetMessages replaces the message history (used after compaction).
+// AllMessages returns prefix + mutable history. Use this for building provider requests.
+func (w *Window) AllMessages() []message.Message {
+	if len(w.prefix) == 0 {
+		return w.messages
+	}
+	all := make([]message.Message, 0, len(w.prefix)+len(w.messages))
+	all = append(all, w.prefix...)
+	all = append(all, w.messages...)
+	return all
+}
+
+// SetMessages replaces the mutable message history (used after compaction).
 func (w *Window) SetMessages(msgs []message.Message) {
 	w.messages = msgs
 }
@@ -73,13 +95,25 @@ func (w *Window) CompactIfNeeded() (bool, error) {
 	if !w.tracker.ShouldCompact() {
 		return false, nil
 	}
+	return w.doCompact(false)
+}

+// ForceCompact runs compaction regardless of the token threshold.
+// Used for reactive compaction (e.g., after a 413 response).
+func (w *Window) ForceCompact() (bool, error) {
+	if len(w.messages) <= 2 {
+		return false, nil
+	}
+	return w.doCompact(true)
+}
+
+func (w *Window) doCompact(force bool) (bool, error) {
 	if w.strategy == nil {
 		return false, fmt.Errorf("no compaction strategy configured")
 	}

-	// Circuit breaker
-	if w.consecutiveFailures >= w.maxFailures {
+	// Circuit breaker (skip for forced)
+	if !force && w.consecutiveFailures >= w.maxFailures {
 		w.logger.Warn("compaction circuit breaker open",
 			"failures", w.consecutiveFailures,
 			"max", w.maxFailures,
@@ -87,18 +121,33 @@ func (w *Window) CompactIfNeeded() (bool, error) {
 		return false, nil
 	}

-	budget := w.tracker.Remaining() + w.tracker.Used()/2 // target: half of current usage
-	if budget < 0 {
+	var budget int64
+	if force {
 		budget = w.tracker.MaxTokens() / 2
+	} else {
+		budget = w.tracker.Remaining() + w.tracker.Used()/2
+		if budget < 0 {
+			budget = w.tracker.MaxTokens() / 2
+		}
 	}

-	w.logger.Info("compacting context",
+	label := "compacting"
+	if force {
+		label = "forced compacting"
+	}
+	w.logger.Info(label+" context",
 		"messages", len(w.messages),
+		"prefix", len(w.prefix),
 		"used", w.tracker.Used(),
 		"budget", budget,
-		"strategy", fmt.Sprintf("%T", w.strategy),
 	)

+	// Pre-compact hook
+	if w.onPreCompact != nil {
+		w.onPreCompact(w.messages)
+	}
+
+	// Compact only mutable messages — prefix is preserved separately
 	compacted, err := w.strategy.Compact(w.messages, budget)
 	if err != nil {
 		w.consecutiveFailures++
@@ -113,7 +162,6 @@ func (w *Window) CompactIfNeeded() (bool, error) {
 	originalLen := len(w.messages)
 	w.messages = compacted

-	// Rough estimate: reduce tracked tokens proportionally
 	ratio := float64(len(compacted)) / float64(originalLen+1)
 	w.tracker.Set(int64(float64(w.tracker.Used()) * ratio))

@@ -123,46 +171,15 @@ func (w *Window) CompactIfNeeded() (bool, error) {
 		"tokens_after", w.tracker.Used(),
 	)

+	// Post-compact hook
+	if w.onPostCompact != nil {
+		w.onPostCompact(compacted)
+	}
+
 	return true, nil
 }

-// ForceCompact runs compaction regardless of the token threshold.
-// Used for reactive compaction (e.g., after a 413 response).
-func (w *Window) ForceCompact() (bool, error) {
-	if w.strategy == nil {
-		return false, fmt.Errorf("no compaction strategy configured")
-	}
-	if len(w.messages) <= 2 {
-		return false, nil // nothing to compact
-	}
-
-	budget := w.tracker.MaxTokens() / 2
-
-	w.logger.Info("forced compaction",
-		"messages", len(w.messages),
-		"used", w.tracker.Used(),
-		"budget", budget,
-	)
-
-	compacted, err := w.strategy.Compact(w.messages, budget)
-	if err != nil {
-		return false, err
-	}
-
-	originalLen := len(w.messages)
-	w.messages = compacted
-	ratio := float64(len(compacted)) / float64(originalLen+1)
-	w.tracker.Set(int64(float64(w.tracker.Used()) * ratio))
-
-	w.logger.Info("forced compaction complete",
-		"messages_before", originalLen,
-		"messages_after", len(compacted),
-		"tokens_after", w.tracker.Used(),
-	)
-	return true, nil
-}
-
-// Reset clears all messages and usage.
+// Reset clears all messages and usage (prefix is preserved).
 func (w *Window) Reset() {
 	w.messages = nil
 	w.tracker.Reset()
@@ -55,6 +55,10 @@ type Engine struct {
 	// Cached model capabilities, resolved lazily
 	modelCaps    *provider.Capabilities
 	modelCapsFor string // model ID the cached caps are for
+
+	// Deferred tool loading: tools with ShouldDefer() are excluded until
+	// the model requests them. Activated on first use.
+	activatedTools map[string]bool
 }

 // New creates an engine.
@@ -67,8 +71,9 @@ func New(cfg Config) (*Engine, error) {
 		logger = slog.Default()
 	}
 	return &Engine{
-		cfg:    cfg,
-		logger: logger,
+		cfg:            cfg,
+		logger:         logger,
+		activatedTools: make(map[string]bool),
 	}, nil
 }

@@ -190,8 +190,11 @@ func (e *Engine) runLoop(ctx context.Context, cb Callback) (*Turn, error) {
 }

 func (e *Engine) buildRequest(ctx context.Context) provider.Request {
-	// Scan messages through firewall if configured
+	// Use AllMessages (prefix + history) if context window manages prefix docs
 	messages := e.history
+	if e.cfg.Context != nil {
+		messages = e.cfg.Context.AllMessages()
+	}
 	systemPrompt := e.cfg.System
 	if e.cfg.Firewall != nil {
 		messages = e.cfg.Firewall.ScanOutgoingMessages(messages)
@@ -209,6 +212,10 @@ func (e *Engine) buildRequest(ctx context.Context) provider.Request {
 	if caps == nil || caps.ToolUse {
 		// nil caps = unknown model, include tools optimistically
 		for _, t := range e.cfg.Tools.All() {
+			// Skip deferred tools until the model requests them
+			if dt, ok := t.(tool.DeferrableTool); ok && dt.ShouldDefer() && !e.activatedTools[t.Name()] {
+				continue
+			}
 			req.Tools = append(req.Tools, provider.ToolDefinition{
 				Name:        t.Name(),
 				Description: t.Description(),
@@ -237,6 +244,12 @@ func (e *Engine) executeTools(ctx context.Context, calls []message.ToolCall, cb

 	for _, call := range calls {
 		t, ok := e.cfg.Tools.Get(call.Name)
+		if ok {
+			// Activate deferred tools on first use
+			if dt, isDeferrable := t.(tool.DeferrableTool); isDeferrable && dt.ShouldDefer() {
+				e.activatedTools[call.Name] = true
+			}
+		}
 		if !ok {
 			e.logger.Warn("unknown tool", "name", call.Name)
 			unknownResults = append(unknownResults, message.ToolResult{
@@ -53,6 +53,7 @@ func (t *Tool) Description() string         { return "Spawn a sub-agent (elf) to
 func (t *Tool) Parameters() json.RawMessage { return paramSchema }
 func (t *Tool) IsReadOnly() bool            { return true }
 func (t *Tool) IsDestructive() bool         { return false }
+func (t *Tool) ShouldDefer() bool           { return true }

 type agentArgs struct {
 	Prompt   string `json:"prompt"`
@@ -64,6 +64,7 @@ func (t *BatchTool) Description() string         { return "Spawn multiple elfs (
 func (t *BatchTool) Parameters() json.RawMessage { return batchSchema }
 func (t *BatchTool) IsReadOnly() bool            { return true }
 func (t *BatchTool) IsDestructive() bool         { return false }
+func (t *BatchTool) ShouldDefer() bool           { return true }

 type batchArgs struct {
 	Tasks    []batchTask `json:"tasks"`
@@ -20,3 +20,10 @@ type Tool interface {
 	// IsDestructive returns true if the tool can cause irreversible changes.
 	IsDestructive() bool
 }
+
+// DeferrableTool is an optional interface for tools that can be excluded
+// from initial requests and loaded on demand. Reduces token overhead
+// for rarely-used tools with large schemas.
+type DeferrableTool interface {
+	ShouldDefer() bool
+}