feat: Ollama/gemma4 compat — /init flow, stream filter, safety fixes
provider/openai: - Fix doubled tool call args (argsComplete flag): Ollama sends complete args in the first streaming chunk then repeats them as delta, causing doubled JSON and 400 errors in elfs - Handle fs: prefix (gemma4 uses fs:grep instead of fs.grep) - Add Reasoning field support for Ollama thinking output cmd/gnoma: - Early TTY detection so logger is created with correct destination before any component gets a reference to it (fixes slog WARN bleed into TUI textarea) permission: - Exempt spawn_elfs and agent tools from safety scanner: elf prompt text may legitimately mention .env/.ssh/credentials patterns and should not be blocked tui/app: - /init retry chain: no-tool-calls → spawn_elfs nudge → write nudge (ask for plain text output) → TUI fallback write from streamBuf - looksLikeAgentsMD + extractMarkdownDoc: validate and clean fallback content before writing (reject refusals, strip narrative preambles) - Collapse thinking output to 3 lines; ctrl+o to expand (live stream and committed messages) - Stream-level filter for model pseudo-tool-call blocks: suppresses <<tool_code>>...</tool_code>> and <<function_call>>...<tool_call|> from entering streamBuf across chunk boundaries - sanitizeAssistantText regex covers both block formats - Reset streamFilterClose at every turn start
This commit is contained in:
34
internal/context/compact.go
Normal file
34
internal/context/compact.go
Normal file
@@ -0,0 +1,34 @@
|
||||
package context
|
||||
|
||||
import "somegit.dev/Owlibou/gnoma/internal/message"
|
||||
|
||||
// safeSplitPoint adjusts a compaction split index to avoid orphaning tool
|
||||
// results. If history[target] is a tool-result message, it walks backward
|
||||
// until it finds a message that is not a tool result, so the assistant message
|
||||
// that issued the tool calls stays in the "recent" window alongside its results.
|
||||
//
|
||||
// target is the index of the first message to keep in the recent window.
|
||||
// Returns an adjusted index guaranteed to keep tool-call/tool-result pairs together.
|
||||
func safeSplitPoint(history []message.Message, target int) int {
|
||||
if target <= 0 || len(history) == 0 {
|
||||
return 0
|
||||
}
|
||||
if target >= len(history) {
|
||||
target = len(history) - 1
|
||||
}
|
||||
idx := target
|
||||
for idx > 0 && hasToolResults(history[idx]) {
|
||||
idx--
|
||||
}
|
||||
return idx
|
||||
}
|
||||
|
||||
// hasToolResults reports whether msg contains any ContentToolResult blocks.
|
||||
func hasToolResults(msg message.Message) bool {
|
||||
for _, c := range msg.Content {
|
||||
if c.Type == message.ContentToolResult {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
@@ -197,3 +197,215 @@ func (s *failingStrategy) Compact(msgs []message.Message, budget int64) ([]messa
|
||||
}
|
||||
|
||||
var _ Strategy = (*failingStrategy)(nil)
|
||||
|
||||
func TestWindow_AppendMessage_NoTokenTracking(t *testing.T) {
|
||||
w := NewWindow(WindowConfig{MaxTokens: 100_000})
|
||||
|
||||
before := w.Tracker().Used()
|
||||
w.AppendMessage(message.NewUserText("hello"))
|
||||
after := w.Tracker().Used()
|
||||
|
||||
if after != before {
|
||||
t.Errorf("AppendMessage should not change tracker: before=%d, after=%d", before, after)
|
||||
}
|
||||
if len(w.Messages()) != 1 {
|
||||
t.Errorf("expected 1 message, got %d", len(w.Messages()))
|
||||
}
|
||||
}
|
||||
|
||||
func TestWindow_CompactionUsesEstimateNotRatio(t *testing.T) {
|
||||
// Add many small messages then compact to 2.
|
||||
// The token estimate post-compaction should reflect actual content,
|
||||
// not a message-count ratio of the previous token count.
|
||||
w := NewWindow(WindowConfig{
|
||||
MaxTokens: 200_000,
|
||||
Strategy: &TruncateStrategy{KeepRecent: 2},
|
||||
})
|
||||
|
||||
// Push 20 messages, each costing 8000 tokens (total: 160K).
|
||||
// Compaction should leave 2 messages.
|
||||
for i := 0; i < 10; i++ {
|
||||
w.Append(message.NewUserText("msg"), message.Usage{InputTokens: 4000})
|
||||
w.Append(message.NewAssistantText("reply"), message.Usage{OutputTokens: 4000})
|
||||
}
|
||||
|
||||
// Push past critical
|
||||
w.Tracker().Set(200_000 - DefaultAutocompactBuffer)
|
||||
|
||||
compacted, err := w.CompactIfNeeded()
|
||||
if err != nil {
|
||||
t.Fatalf("CompactIfNeeded: %v", err)
|
||||
}
|
||||
if !compacted {
|
||||
t.Skip("compaction did not trigger")
|
||||
}
|
||||
|
||||
// After compaction to ~2 messages, EstimateMessages(2 short messages) ~ <100 tokens.
|
||||
// The old ratio approach would give ~(2/21) * ~(200K-13K) = ~17800 tokens.
|
||||
// Verify we're well below 17000, indicating the estimate-based approach.
|
||||
if w.Tracker().Used() >= 17_000 {
|
||||
t.Errorf("token tracker after compaction seems to use ratio (got %d tokens, expected <17000 for estimate-based)", w.Tracker().Used())
|
||||
}
|
||||
}
|
||||
|
||||
func TestWindow_AddPrefix_AppendsToPrefix(t *testing.T) {
|
||||
w := NewWindow(WindowConfig{
|
||||
MaxTokens: 100_000,
|
||||
PrefixMessages: []message.Message{message.NewSystemText("initial prefix")},
|
||||
})
|
||||
w.AppendMessage(message.NewUserText("hello"))
|
||||
|
||||
w.AddPrefix(
|
||||
message.NewUserText("[Project docs: AGENTS.md]\n\nBuild: make build"),
|
||||
message.NewAssistantText("Understood."),
|
||||
)
|
||||
|
||||
all := w.AllMessages()
|
||||
// prefix (1 initial + 2 added) + messages (1)
|
||||
if len(all) != 4 {
|
||||
t.Errorf("AllMessages() = %d, want 4", len(all))
|
||||
}
|
||||
// The added prefix messages come after the initial prefix, before conversation
|
||||
if all[1].Role != "user" {
|
||||
t.Errorf("all[1].Role = %q, want user", all[1].Role)
|
||||
}
|
||||
if all[3].Role != "user" {
|
||||
t.Errorf("all[3].Role = %q, want user (conversation msg)", all[3].Role)
|
||||
}
|
||||
}
|
||||
|
||||
func TestWindow_AddPrefix_SurvivesReset(t *testing.T) {
|
||||
w := NewWindow(WindowConfig{MaxTokens: 100_000})
|
||||
w.AppendMessage(message.NewUserText("hello"))
|
||||
|
||||
w.AddPrefix(message.NewSystemText("added prefix"))
|
||||
w.Reset()
|
||||
|
||||
all := w.AllMessages()
|
||||
// Prefix should survive Reset(), conversation messages cleared
|
||||
if len(all) != 1 {
|
||||
t.Errorf("AllMessages() after Reset = %d, want 1 (just added prefix)", len(all))
|
||||
}
|
||||
}
|
||||
|
||||
func TestWindow_Reset_ClearsMessages(t *testing.T) {
|
||||
w := NewWindow(WindowConfig{
|
||||
MaxTokens: 100_000,
|
||||
PrefixMessages: []message.Message{message.NewSystemText("prefix")},
|
||||
})
|
||||
w.AppendMessage(message.NewUserText("hello"))
|
||||
w.Tracker().Set(5000)
|
||||
|
||||
w.Reset()
|
||||
|
||||
if len(w.Messages()) != 0 {
|
||||
t.Errorf("Messages after reset = %d, want 0", len(w.Messages()))
|
||||
}
|
||||
if w.Tracker().Used() != 0 {
|
||||
t.Errorf("Tracker after reset = %d, want 0", w.Tracker().Used())
|
||||
}
|
||||
// Prefix should be preserved
|
||||
if len(w.AllMessages()) != 1 {
|
||||
t.Errorf("AllMessages after reset should have prefix only, got %d", len(w.AllMessages()))
|
||||
}
|
||||
}
|
||||
|
||||
// --- Compaction safety (safeSplitPoint) ---
|
||||
|
||||
func toolCallMsg() message.Message {
|
||||
return message.NewAssistantContent(
|
||||
message.NewToolCallContent(message.ToolCall{
|
||||
ID: "call-123",
|
||||
Name: "bash",
|
||||
}),
|
||||
)
|
||||
}
|
||||
|
||||
func toolResultMsg() message.Message {
|
||||
return message.NewToolResults(message.ToolResult{
|
||||
ToolCallID: "call-123",
|
||||
Content: "result",
|
||||
})
|
||||
}
|
||||
|
||||
func TestSafeSplitPoint_NoAdjustmentNeeded(t *testing.T) {
|
||||
history := []message.Message{
|
||||
message.NewUserText("hello"), // 0
|
||||
message.NewAssistantText("hi"), // 1
|
||||
message.NewUserText("do something"), // 2 — plain user text, safe split point
|
||||
}
|
||||
// Target split at index 2: keep history[2:] as recent. Not a tool result.
|
||||
got := safeSplitPoint(history, 2)
|
||||
if got != 2 {
|
||||
t.Errorf("safeSplitPoint = %d, want 2 (no adjustment needed)", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSafeSplitPoint_WalksBackPastToolResult(t *testing.T) {
|
||||
history := []message.Message{
|
||||
message.NewUserText("hello"), // 0
|
||||
message.NewAssistantText("hi"), // 1
|
||||
toolCallMsg(), // 2 — assistant with tool call
|
||||
toolResultMsg(), // 3 — tool result (should NOT be split point)
|
||||
message.NewAssistantText("done"), // 4
|
||||
}
|
||||
// Target split at 3 would orphan the tool result (no matching tool call in recent window)
|
||||
got := safeSplitPoint(history, 3)
|
||||
if got != 2 {
|
||||
t.Errorf("safeSplitPoint = %d, want 2 (walk back past tool result to tool call)", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSafeSplitPoint_NeverGoesNegative(t *testing.T) {
|
||||
// All messages are tool results — should return 0 (not go below 0)
|
||||
history := []message.Message{
|
||||
toolResultMsg(),
|
||||
toolResultMsg(),
|
||||
}
|
||||
got := safeSplitPoint(history, 0)
|
||||
if got != 0 {
|
||||
t.Errorf("safeSplitPoint = %d, want 0 (floor at 0)", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTruncate_NeverOrphansToolResult(t *testing.T) {
|
||||
s := NewTruncateStrategy() // keepRecent = 10
|
||||
s.KeepRecent = 3
|
||||
|
||||
// History: user, assistant+toolcall, user+toolresult, assistant, user
|
||||
// With keepRecent=3, naive split at index 2 would grab [toolresult, assistant, user]
|
||||
// — orphaning the tool call. safeSplitPoint should walk back to index 1 instead.
|
||||
history := []message.Message{
|
||||
message.NewUserText("start"), // 0
|
||||
toolCallMsg(), // 1 — assistant with tool call
|
||||
toolResultMsg(), // 2 — must stay paired with index 1
|
||||
message.NewAssistantText("done"), // 3
|
||||
message.NewUserText("next"), // 4
|
||||
}
|
||||
|
||||
result, err := s.Compact(history, 100_000)
|
||||
if err != nil {
|
||||
t.Fatalf("Compact error: %v", err)
|
||||
}
|
||||
|
||||
// Find the tool result message in result and verify its tool call ID
|
||||
// appears somewhere in a preceding assistant message
|
||||
toolCallIDs := make(map[string]bool)
|
||||
for _, m := range result {
|
||||
for _, c := range m.Content {
|
||||
if c.Type == message.ContentToolCall && c.ToolCall != nil {
|
||||
toolCallIDs[c.ToolCall.ID] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, m := range result {
|
||||
for _, c := range m.Content {
|
||||
if c.Type == message.ContentToolResult && c.ToolResult != nil {
|
||||
if !toolCallIDs[c.ToolResult.ToolCallID] {
|
||||
t.Errorf("orphaned tool result: ToolCallID %q has no matching tool call in compacted history",
|
||||
c.ToolResult.ToolCallID)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -56,13 +56,16 @@ func (s *SummarizeStrategy) Compact(messages []message.Message, budget int64) ([
|
||||
return messages, nil
|
||||
}
|
||||
|
||||
// Split: old messages to summarize, recent to keep
|
||||
// Split: old messages to summarize, recent to keep.
|
||||
// Adjust split to never orphan tool results — the assistant message with
|
||||
// matching tool calls must stay in the recent window with its results.
|
||||
keepRecent := 6
|
||||
if keepRecent > len(history) {
|
||||
keepRecent = len(history)
|
||||
}
|
||||
oldMessages := history[:len(history)-keepRecent]
|
||||
recentMessages := history[len(history)-keepRecent:]
|
||||
splitAt := safeSplitPoint(history, len(history)-keepRecent)
|
||||
oldMessages := history[:splitAt]
|
||||
recentMessages := history[splitAt:]
|
||||
|
||||
// Build conversation text for summarization
|
||||
var convText strings.Builder
|
||||
|
||||
@@ -46,7 +46,10 @@ func (s *TruncateStrategy) Compact(messages []message.Message, budget int64) ([]
|
||||
marker := message.NewUserText("[Earlier conversation was summarized to save context]")
|
||||
ack := message.NewAssistantText("Understood, I'll continue from here.")
|
||||
|
||||
recent := history[len(history)-keepRecent:]
|
||||
// Adjust split to never orphan tool results (the assistant message with
|
||||
// matching tool calls must stay in the recent window with its results).
|
||||
splitAt := safeSplitPoint(history, len(history)-keepRecent)
|
||||
recent := history[splitAt:]
|
||||
result := append(systemMsgs, marker, ack)
|
||||
result = append(result, recent...)
|
||||
return result, nil
|
||||
|
||||
@@ -57,12 +57,20 @@ func NewWindow(cfg WindowConfig) *Window {
|
||||
}
|
||||
}
|
||||
|
||||
// Append adds a message and tracks usage.
|
||||
// Append adds a message and tracks usage (legacy: accumulates InputTokens+OutputTokens).
|
||||
// Prefer AppendMessage + Tracker().Set() for accurate per-round tracking.
|
||||
func (w *Window) Append(msg message.Message, usage message.Usage) {
|
||||
w.messages = append(w.messages, msg)
|
||||
w.tracker.Add(usage)
|
||||
}
|
||||
|
||||
// AppendMessage adds a message without touching the token tracker.
|
||||
// Use this for user messages, tool results, and injected context — callers
|
||||
// are responsible for updating the tracker separately (e.g., via Tracker().Set).
|
||||
func (w *Window) AppendMessage(msg message.Message) {
|
||||
w.messages = append(w.messages, msg)
|
||||
}
|
||||
|
||||
// Messages returns the mutable conversation history (without prefix).
|
||||
func (w *Window) Messages() []message.Message {
|
||||
return w.messages
|
||||
@@ -162,8 +170,9 @@ func (w *Window) doCompact(force bool) (bool, error) {
|
||||
originalLen := len(w.messages)
|
||||
w.messages = compacted
|
||||
|
||||
ratio := float64(len(compacted)) / float64(originalLen+1)
|
||||
w.tracker.Set(int64(float64(w.tracker.Used()) * ratio))
|
||||
// Re-estimate tokens from actual message content rather than using a
|
||||
// message-count ratio (which is unrelated to token count).
|
||||
w.tracker.Set(EstimateMessages(compacted))
|
||||
|
||||
w.logger.Info("compaction complete",
|
||||
"messages_before", originalLen,
|
||||
@@ -179,6 +188,12 @@ func (w *Window) doCompact(force bool) (bool, error) {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// AddPrefix appends messages to the immutable prefix.
|
||||
// Used to hot-load project docs (e.g., after /init generates AGENTS.md).
|
||||
func (w *Window) AddPrefix(msgs ...message.Message) {
|
||||
w.prefix = append(w.prefix, msgs...)
|
||||
}
|
||||
|
||||
// Reset clears all messages and usage (prefix is preserved).
|
||||
func (w *Window) Reset() {
|
||||
w.messages = nil
|
||||
|
||||
Reference in New Issue
Block a user