feat: M1-M7 gap audit phase 3 — context prefix, deferred tools, compact hooks
Gap 11 (M6): Fixed context prefix - Window.PrefixMessages stores immutable docs (CLAUDE.md, .gnoma/GNOMA.md) - Prefix stripped before compaction, prepended after — survives all compaction - AllMessages() returns prefix + history for provider requests - main.go loads CLAUDE.md and .gnoma/GNOMA.md at startup as prefix Gap 12 (M6): Deferred tool loading - DeferrableTool optional interface: ShouldDefer() bool - buildRequest() skips deferred tools until activated - Tools auto-activate on first model request (activatedTools map) - agent + spawn_elfs marked as deferrable (large schemas, rarely needed early) - Saves ~800 tokens per deferred tool per request Gap 13 (M6): Pre/post compact hooks - OnPreCompact/OnPostCompact callbacks in WindowConfig - Called in doCompact() (shared by CompactIfNeeded + ForceCompact) - M8 hooks system will extend these to full protocol
This commit is contained in:
+19
-3
@@ -15,6 +15,7 @@ import (
|
||||
"encoding/json"
|
||||
gnomacfg "somegit.dev/Owlibou/gnoma/internal/config"
|
||||
gnomactx "somegit.dev/Owlibou/gnoma/internal/context"
|
||||
"somegit.dev/Owlibou/gnoma/internal/message"
|
||||
"somegit.dev/Owlibou/gnoma/internal/permission"
|
||||
"somegit.dev/Owlibou/gnoma/internal/provider"
|
||||
"somegit.dev/Owlibou/gnoma/internal/router"
|
||||
@@ -270,13 +271,28 @@ func main() {
|
||||
systemPrompt = systemPrompt + "\n\n" + summary
|
||||
}
|
||||
|
||||
// Load project docs as immutable context prefix
|
||||
var prefixMsgs []message.Message
|
||||
for _, name := range []string{"CLAUDE.md", ".gnoma/GNOMA.md"} {
|
||||
data, err := os.ReadFile(name)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
prefixMsgs = append(prefixMsgs,
|
||||
message.NewUserText(fmt.Sprintf("[Project docs: %s]\n\n%s", name, string(data))),
|
||||
message.NewAssistantText("I've read the project documentation and will follow these guidelines."),
|
||||
)
|
||||
logger.Debug("loaded project docs as context prefix", "file", name, "size", len(data))
|
||||
}
|
||||
|
||||
// Create context window with summarize strategy (falls back to truncation)
|
||||
var compactStrategy gnomactx.Strategy
|
||||
compactStrategy = gnomactx.NewSummarizeStrategy(prov)
|
||||
ctxWindow := gnomactx.NewWindow(gnomactx.WindowConfig{
|
||||
MaxTokens: cfg.Provider.MaxTokens * 20, // rough: max_tokens is per-turn, context window ~20x
|
||||
Strategy: compactStrategy,
|
||||
Logger: logger,
|
||||
MaxTokens: cfg.Provider.MaxTokens * 20, // rough: max_tokens is per-turn, context window ~20x
|
||||
Strategy: compactStrategy,
|
||||
PrefixMessages: prefixMsgs,
|
||||
Logger: logger,
|
||||
})
|
||||
|
||||
// Create engine
|
||||
|
||||
+72
-55
@@ -18,18 +18,26 @@ type Strategy interface {
|
||||
type Window struct {
|
||||
tracker *Tracker
|
||||
strategy Strategy
|
||||
messages []message.Message
|
||||
prefix []message.Message // immutable prefix (project docs), never compacted
|
||||
messages []message.Message // mutable conversation history
|
||||
logger *slog.Logger
|
||||
|
||||
// Compact hooks
|
||||
onPreCompact func([]message.Message)
|
||||
onPostCompact func([]message.Message)
|
||||
|
||||
// Circuit breaker: stop retrying after consecutive failures
|
||||
consecutiveFailures int
|
||||
maxFailures int
|
||||
}
|
||||
|
||||
type WindowConfig struct {
|
||||
MaxTokens int64
|
||||
Strategy Strategy
|
||||
Logger *slog.Logger
|
||||
MaxTokens int64
|
||||
Strategy Strategy
|
||||
PrefixMessages []message.Message // immutable prefix, survives compaction
|
||||
OnPreCompact func([]message.Message)
|
||||
OnPostCompact func([]message.Message)
|
||||
Logger *slog.Logger
|
||||
}
|
||||
|
||||
func NewWindow(cfg WindowConfig) *Window {
|
||||
@@ -38,11 +46,14 @@ func NewWindow(cfg WindowConfig) *Window {
|
||||
logger = slog.Default()
|
||||
}
|
||||
return &Window{
|
||||
tracker: NewTracker(cfg.MaxTokens),
|
||||
strategy: cfg.Strategy,
|
||||
messages: nil,
|
||||
logger: logger,
|
||||
maxFailures: 3,
|
||||
tracker: NewTracker(cfg.MaxTokens),
|
||||
strategy: cfg.Strategy,
|
||||
prefix: cfg.PrefixMessages,
|
||||
messages: nil,
|
||||
logger: logger,
|
||||
onPreCompact: cfg.OnPreCompact,
|
||||
onPostCompact: cfg.OnPostCompact,
|
||||
maxFailures: 3,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -52,12 +63,23 @@ func (w *Window) Append(msg message.Message, usage message.Usage) {
|
||||
w.tracker.Add(usage)
|
||||
}
|
||||
|
||||
// Messages returns the current message history.
|
||||
// Messages returns the mutable conversation history (without prefix).
|
||||
func (w *Window) Messages() []message.Message {
|
||||
return w.messages
|
||||
}
|
||||
|
||||
// SetMessages replaces the message history (used after compaction).
|
||||
// AllMessages returns prefix + mutable history. Use this for building provider requests.
|
||||
func (w *Window) AllMessages() []message.Message {
|
||||
if len(w.prefix) == 0 {
|
||||
return w.messages
|
||||
}
|
||||
all := make([]message.Message, 0, len(w.prefix)+len(w.messages))
|
||||
all = append(all, w.prefix...)
|
||||
all = append(all, w.messages...)
|
||||
return all
|
||||
}
|
||||
|
||||
// SetMessages replaces the mutable message history (used after compaction).
|
||||
func (w *Window) SetMessages(msgs []message.Message) {
|
||||
w.messages = msgs
|
||||
}
|
||||
@@ -73,13 +95,25 @@ func (w *Window) CompactIfNeeded() (bool, error) {
|
||||
if !w.tracker.ShouldCompact() {
|
||||
return false, nil
|
||||
}
|
||||
return w.doCompact(false)
|
||||
}
|
||||
|
||||
// ForceCompact runs compaction regardless of the token threshold.
|
||||
// Used for reactive compaction (e.g., after a 413 response).
|
||||
func (w *Window) ForceCompact() (bool, error) {
|
||||
if len(w.messages) <= 2 {
|
||||
return false, nil
|
||||
}
|
||||
return w.doCompact(true)
|
||||
}
|
||||
|
||||
func (w *Window) doCompact(force bool) (bool, error) {
|
||||
if w.strategy == nil {
|
||||
return false, fmt.Errorf("no compaction strategy configured")
|
||||
}
|
||||
|
||||
// Circuit breaker
|
||||
if w.consecutiveFailures >= w.maxFailures {
|
||||
// Circuit breaker (skip for forced)
|
||||
if !force && w.consecutiveFailures >= w.maxFailures {
|
||||
w.logger.Warn("compaction circuit breaker open",
|
||||
"failures", w.consecutiveFailures,
|
||||
"max", w.maxFailures,
|
||||
@@ -87,18 +121,33 @@ func (w *Window) CompactIfNeeded() (bool, error) {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
budget := w.tracker.Remaining() + w.tracker.Used()/2 // target: half of current usage
|
||||
if budget < 0 {
|
||||
var budget int64
|
||||
if force {
|
||||
budget = w.tracker.MaxTokens() / 2
|
||||
} else {
|
||||
budget = w.tracker.Remaining() + w.tracker.Used()/2
|
||||
if budget < 0 {
|
||||
budget = w.tracker.MaxTokens() / 2
|
||||
}
|
||||
}
|
||||
|
||||
w.logger.Info("compacting context",
|
||||
label := "compacting"
|
||||
if force {
|
||||
label = "forced compacting"
|
||||
}
|
||||
w.logger.Info(label+" context",
|
||||
"messages", len(w.messages),
|
||||
"prefix", len(w.prefix),
|
||||
"used", w.tracker.Used(),
|
||||
"budget", budget,
|
||||
"strategy", fmt.Sprintf("%T", w.strategy),
|
||||
)
|
||||
|
||||
// Pre-compact hook
|
||||
if w.onPreCompact != nil {
|
||||
w.onPreCompact(w.messages)
|
||||
}
|
||||
|
||||
// Compact only mutable messages — prefix is preserved separately
|
||||
compacted, err := w.strategy.Compact(w.messages, budget)
|
||||
if err != nil {
|
||||
w.consecutiveFailures++
|
||||
@@ -113,7 +162,6 @@ func (w *Window) CompactIfNeeded() (bool, error) {
|
||||
originalLen := len(w.messages)
|
||||
w.messages = compacted
|
||||
|
||||
// Rough estimate: reduce tracked tokens proportionally
|
||||
ratio := float64(len(compacted)) / float64(originalLen+1)
|
||||
w.tracker.Set(int64(float64(w.tracker.Used()) * ratio))
|
||||
|
||||
@@ -123,46 +171,15 @@ func (w *Window) CompactIfNeeded() (bool, error) {
|
||||
"tokens_after", w.tracker.Used(),
|
||||
)
|
||||
|
||||
// Post-compact hook
|
||||
if w.onPostCompact != nil {
|
||||
w.onPostCompact(compacted)
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// ForceCompact runs compaction regardless of the token threshold.
|
||||
// Used for reactive compaction (e.g., after a 413 response).
|
||||
func (w *Window) ForceCompact() (bool, error) {
|
||||
if w.strategy == nil {
|
||||
return false, fmt.Errorf("no compaction strategy configured")
|
||||
}
|
||||
if len(w.messages) <= 2 {
|
||||
return false, nil // nothing to compact
|
||||
}
|
||||
|
||||
budget := w.tracker.MaxTokens() / 2
|
||||
|
||||
w.logger.Info("forced compaction",
|
||||
"messages", len(w.messages),
|
||||
"used", w.tracker.Used(),
|
||||
"budget", budget,
|
||||
)
|
||||
|
||||
compacted, err := w.strategy.Compact(w.messages, budget)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
originalLen := len(w.messages)
|
||||
w.messages = compacted
|
||||
ratio := float64(len(compacted)) / float64(originalLen+1)
|
||||
w.tracker.Set(int64(float64(w.tracker.Used()) * ratio))
|
||||
|
||||
w.logger.Info("forced compaction complete",
|
||||
"messages_before", originalLen,
|
||||
"messages_after", len(compacted),
|
||||
"tokens_after", w.tracker.Used(),
|
||||
)
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// Reset clears all messages and usage.
|
||||
// Reset clears all messages and usage (prefix is preserved).
|
||||
func (w *Window) Reset() {
|
||||
w.messages = nil
|
||||
w.tracker.Reset()
|
||||
|
||||
@@ -55,6 +55,10 @@ type Engine struct {
|
||||
// Cached model capabilities, resolved lazily
|
||||
modelCaps *provider.Capabilities
|
||||
modelCapsFor string // model ID the cached caps are for
|
||||
|
||||
// Deferred tool loading: tools with ShouldDefer() are excluded until
|
||||
// the model requests them. Activated on first use.
|
||||
activatedTools map[string]bool
|
||||
}
|
||||
|
||||
// New creates an engine.
|
||||
@@ -67,8 +71,9 @@ func New(cfg Config) (*Engine, error) {
|
||||
logger = slog.Default()
|
||||
}
|
||||
return &Engine{
|
||||
cfg: cfg,
|
||||
logger: logger,
|
||||
cfg: cfg,
|
||||
logger: logger,
|
||||
activatedTools: make(map[string]bool),
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
||||
+14
-1
@@ -190,8 +190,11 @@ func (e *Engine) runLoop(ctx context.Context, cb Callback) (*Turn, error) {
|
||||
}
|
||||
|
||||
func (e *Engine) buildRequest(ctx context.Context) provider.Request {
|
||||
// Scan messages through firewall if configured
|
||||
// Use AllMessages (prefix + history) if context window manages prefix docs
|
||||
messages := e.history
|
||||
if e.cfg.Context != nil {
|
||||
messages = e.cfg.Context.AllMessages()
|
||||
}
|
||||
systemPrompt := e.cfg.System
|
||||
if e.cfg.Firewall != nil {
|
||||
messages = e.cfg.Firewall.ScanOutgoingMessages(messages)
|
||||
@@ -209,6 +212,10 @@ func (e *Engine) buildRequest(ctx context.Context) provider.Request {
|
||||
if caps == nil || caps.ToolUse {
|
||||
// nil caps = unknown model, include tools optimistically
|
||||
for _, t := range e.cfg.Tools.All() {
|
||||
// Skip deferred tools until the model requests them
|
||||
if dt, ok := t.(tool.DeferrableTool); ok && dt.ShouldDefer() && !e.activatedTools[t.Name()] {
|
||||
continue
|
||||
}
|
||||
req.Tools = append(req.Tools, provider.ToolDefinition{
|
||||
Name: t.Name(),
|
||||
Description: t.Description(),
|
||||
@@ -237,6 +244,12 @@ func (e *Engine) executeTools(ctx context.Context, calls []message.ToolCall, cb
|
||||
|
||||
for _, call := range calls {
|
||||
t, ok := e.cfg.Tools.Get(call.Name)
|
||||
if ok {
|
||||
// Activate deferred tools on first use
|
||||
if dt, isDeferrable := t.(tool.DeferrableTool); isDeferrable && dt.ShouldDefer() {
|
||||
e.activatedTools[call.Name] = true
|
||||
}
|
||||
}
|
||||
if !ok {
|
||||
e.logger.Warn("unknown tool", "name", call.Name)
|
||||
unknownResults = append(unknownResults, message.ToolResult{
|
||||
|
||||
@@ -53,6 +53,7 @@ func (t *Tool) Description() string { return "Spawn a sub-agent (elf) to
|
||||
func (t *Tool) Parameters() json.RawMessage { return paramSchema }
|
||||
func (t *Tool) IsReadOnly() bool { return true }
|
||||
func (t *Tool) IsDestructive() bool { return false }
|
||||
func (t *Tool) ShouldDefer() bool { return true }
|
||||
|
||||
type agentArgs struct {
|
||||
Prompt string `json:"prompt"`
|
||||
|
||||
@@ -64,6 +64,7 @@ func (t *BatchTool) Description() string { return "Spawn multiple elfs (
|
||||
func (t *BatchTool) Parameters() json.RawMessage { return batchSchema }
|
||||
func (t *BatchTool) IsReadOnly() bool { return true }
|
||||
func (t *BatchTool) IsDestructive() bool { return false }
|
||||
func (t *BatchTool) ShouldDefer() bool { return true }
|
||||
|
||||
type batchArgs struct {
|
||||
Tasks []batchTask `json:"tasks"`
|
||||
|
||||
@@ -20,3 +20,10 @@ type Tool interface {
|
||||
// IsDestructive returns true if the tool can cause irreversible changes.
|
||||
IsDestructive() bool
|
||||
}
|
||||
|
||||
// DeferrableTool is an optional interface for tools that can be excluded
|
||||
// from initial requests and loaded on demand. Reduces token overhead
|
||||
// for rarely-used tools with large schemas.
|
||||
type DeferrableTool interface {
|
||||
ShouldDefer() bool
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user