feat: Ollama/gemma4 compat — /init flow, stream filter, safety fixes
provider/openai: - Fix doubled tool call args (argsComplete flag): Ollama sends complete args in the first streaming chunk then repeats them as delta, causing doubled JSON and 400 errors in elfs - Handle fs: prefix (gemma4 uses fs:grep instead of fs.grep) - Add Reasoning field support for Ollama thinking output cmd/gnoma: - Early TTY detection so logger is created with correct destination before any component gets a reference to it (fixes slog WARN bleed into TUI textarea) permission: - Exempt spawn_elfs and agent tools from safety scanner: elf prompt text may legitimately mention .env/.ssh/credentials patterns and should not be blocked tui/app: - /init retry chain: no-tool-calls → spawn_elfs nudge → write nudge (ask for plain text output) → TUI fallback write from streamBuf - looksLikeAgentsMD + extractMarkdownDoc: validate and clean fallback content before writing (reject refusals, strip narrative preambles) - Collapse thinking output to 3 lines; ctrl+o to expand (live stream and committed messages) - Stream-level filter for model pseudo-tool-call blocks: suppresses <<tool_code>>...</tool_code>> and <<function_call>>...<tool_call|> from entering streamBuf across chunk boundaries - sanitizeAssistantText regex covers both block formats - Reset streamFilterClose at every turn start
This commit is contained in:
@@ -45,6 +45,11 @@ type Turn struct {
|
||||
Rounds int // number of API round-trips
|
||||
}
|
||||
|
||||
// TurnOptions carries per-turn overrides that apply for a single Submit call.
|
||||
type TurnOptions struct {
|
||||
ToolChoice provider.ToolChoiceMode // "" = use provider default
|
||||
}
|
||||
|
||||
// Engine orchestrates the conversation.
|
||||
type Engine struct {
|
||||
cfg Config
|
||||
@@ -59,6 +64,9 @@ type Engine struct {
|
||||
// Deferred tool loading: tools with ShouldDefer() are excluded until
|
||||
// the model requests them. Activated on first use.
|
||||
activatedTools map[string]bool
|
||||
|
||||
// Per-turn options, set for the duration of SubmitWithOptions.
|
||||
turnOpts TurnOptions
|
||||
}
|
||||
|
||||
// New creates an engine.
|
||||
@@ -124,6 +132,9 @@ func (e *Engine) ContextWindow() *gnomactx.Window {
|
||||
// the model should see as context in subsequent turns.
|
||||
func (e *Engine) InjectMessage(msg message.Message) {
|
||||
e.history = append(e.history, msg)
|
||||
if e.cfg.Context != nil {
|
||||
e.cfg.Context.AppendMessage(msg)
|
||||
}
|
||||
}
|
||||
|
||||
// Usage returns cumulative token usage.
|
||||
@@ -145,4 +156,8 @@ func (e *Engine) SetModel(model string) {
|
||||
func (e *Engine) Reset() {
|
||||
e.history = nil
|
||||
e.usage = message.Usage{}
|
||||
if e.cfg.Context != nil {
|
||||
e.cfg.Context.Reset()
|
||||
}
|
||||
e.activatedTools = make(map[string]bool)
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
gnomactx "somegit.dev/Owlibou/gnoma/internal/context"
|
||||
"somegit.dev/Owlibou/gnoma/internal/message"
|
||||
"somegit.dev/Owlibou/gnoma/internal/provider"
|
||||
"somegit.dev/Owlibou/gnoma/internal/stream"
|
||||
@@ -446,6 +447,109 @@ func TestEngine_Reset(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestEngine_Reset_ClearsContextWindow(t *testing.T) {
|
||||
ctxWindow := gnomactx.NewWindow(gnomactx.WindowConfig{MaxTokens: 200_000})
|
||||
mp := &mockProvider{
|
||||
name: "test",
|
||||
streams: []stream.Stream{
|
||||
newEventStream(message.StopEndTurn, "",
|
||||
stream.Event{Type: stream.EventTextDelta, Text: "hi"},
|
||||
),
|
||||
},
|
||||
}
|
||||
e, _ := New(Config{
|
||||
Provider: mp,
|
||||
Tools: tool.NewRegistry(),
|
||||
Context: ctxWindow,
|
||||
})
|
||||
e.Submit(context.Background(), "hello", nil)
|
||||
|
||||
if len(ctxWindow.Messages()) == 0 {
|
||||
t.Fatal("context window should have messages before reset")
|
||||
}
|
||||
|
||||
e.Reset()
|
||||
|
||||
if len(ctxWindow.Messages()) != 0 {
|
||||
t.Errorf("context window should be empty after reset, got %d messages", len(ctxWindow.Messages()))
|
||||
}
|
||||
}
|
||||
|
||||
func TestSubmit_ContextWindowTracksUserAndToolMessages(t *testing.T) {
|
||||
reg := tool.NewRegistry()
|
||||
reg.Register(&mockTool{
|
||||
name: "bash",
|
||||
execFn: func(_ context.Context, _ json.RawMessage) (tool.Result, error) {
|
||||
return tool.Result{Output: "output"}, nil
|
||||
},
|
||||
})
|
||||
|
||||
mp := &mockProvider{
|
||||
name: "test",
|
||||
streams: []stream.Stream{
|
||||
newEventStream(message.StopToolUse, "model",
|
||||
stream.Event{Type: stream.EventToolCallStart, ToolCallID: "tc1", ToolCallName: "bash"},
|
||||
stream.Event{Type: stream.EventToolCallDone, ToolCallID: "tc1", Args: json.RawMessage(`{"command":"ls"}`)},
|
||||
stream.Event{Type: stream.EventUsage, Usage: &message.Usage{InputTokens: 100, OutputTokens: 20}},
|
||||
),
|
||||
newEventStream(message.StopEndTurn, "model",
|
||||
stream.Event{Type: stream.EventTextDelta, Text: "Done."},
|
||||
),
|
||||
},
|
||||
}
|
||||
|
||||
ctxWindow := gnomactx.NewWindow(gnomactx.WindowConfig{MaxTokens: 200_000})
|
||||
e, _ := New(Config{
|
||||
Provider: mp,
|
||||
Tools: reg,
|
||||
Context: ctxWindow,
|
||||
})
|
||||
|
||||
_, err := e.Submit(context.Background(), "list files", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("Submit: %v", err)
|
||||
}
|
||||
|
||||
allMsgs := ctxWindow.AllMessages()
|
||||
// Expect: user msg, assistant (tool call), tool results, assistant (final)
|
||||
if len(allMsgs) < 4 {
|
||||
t.Errorf("context window has %d messages, want at least 4 (user+assistant+tool_results+assistant)", len(allMsgs))
|
||||
for i, m := range allMsgs {
|
||||
t.Logf(" [%d] role=%s content=%s", i, m.Role, m.TextContent())
|
||||
}
|
||||
}
|
||||
// First message should be user
|
||||
if len(allMsgs) > 0 && allMsgs[0].Role != message.RoleUser {
|
||||
t.Errorf("allMsgs[0].Role = %q, want user", allMsgs[0].Role)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSubmit_TrackerReflectsInputTokens(t *testing.T) {
|
||||
// Verify the tracker is set from InputTokens (not accumulated).
|
||||
// After 3 rounds, tracker should equal last round's InputTokens+OutputTokens,
|
||||
// not the sum of all rounds.
|
||||
ctxWindow := gnomactx.NewWindow(gnomactx.WindowConfig{MaxTokens: 200_000})
|
||||
|
||||
mp := &mockProvider{
|
||||
name: "test",
|
||||
streams: []stream.Stream{
|
||||
newEventStream(message.StopEndTurn, "",
|
||||
stream.Event{Type: stream.EventUsage, Usage: &message.Usage{InputTokens: 100, OutputTokens: 50}},
|
||||
stream.Event{Type: stream.EventTextDelta, Text: "a"},
|
||||
),
|
||||
},
|
||||
}
|
||||
e, _ := New(Config{Provider: mp, Tools: tool.NewRegistry(), Context: ctxWindow})
|
||||
|
||||
e.Submit(context.Background(), "hi", nil)
|
||||
|
||||
// Tracker should be InputTokens + OutputTokens = 150, not more
|
||||
used := ctxWindow.Tracker().Used()
|
||||
if used != 150 {
|
||||
t.Errorf("tracker = %d, want 150 (InputTokens+OutputTokens, not cumulative)", used)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSubmit_CumulativeUsage(t *testing.T) {
|
||||
mp := &mockProvider{
|
||||
name: "test",
|
||||
|
||||
@@ -2,7 +2,6 @@ package engine
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"sync"
|
||||
@@ -20,8 +19,19 @@ import (
|
||||
// Submit sends a user message and runs the agentic loop to completion.
|
||||
// The callback receives real-time streaming events.
|
||||
func (e *Engine) Submit(ctx context.Context, input string, cb Callback) (*Turn, error) {
|
||||
return e.SubmitWithOptions(ctx, input, TurnOptions{}, cb)
|
||||
}
|
||||
|
||||
// SubmitWithOptions is like Submit but applies per-turn overrides (e.g. ToolChoice).
|
||||
func (e *Engine) SubmitWithOptions(ctx context.Context, input string, opts TurnOptions, cb Callback) (*Turn, error) {
|
||||
e.turnOpts = opts
|
||||
defer func() { e.turnOpts = TurnOptions{} }()
|
||||
|
||||
userMsg := message.NewUserText(input)
|
||||
e.history = append(e.history, userMsg)
|
||||
if e.cfg.Context != nil {
|
||||
e.cfg.Context.AppendMessage(userMsg)
|
||||
}
|
||||
|
||||
return e.runLoop(ctx, cb)
|
||||
}
|
||||
@@ -29,6 +39,11 @@ func (e *Engine) Submit(ctx context.Context, input string, cb Callback) (*Turn,
|
||||
// SubmitMessages is like Submit but accepts pre-built messages.
|
||||
func (e *Engine) SubmitMessages(ctx context.Context, msgs []message.Message, cb Callback) (*Turn, error) {
|
||||
e.history = append(e.history, msgs...)
|
||||
if e.cfg.Context != nil {
|
||||
for _, m := range msgs {
|
||||
e.cfg.Context.AppendMessage(m)
|
||||
}
|
||||
}
|
||||
|
||||
return e.runLoop(ctx, cb)
|
||||
}
|
||||
@@ -48,6 +63,7 @@ func (e *Engine) runLoop(ctx context.Context, cb Callback) (*Turn, error) {
|
||||
// Route and stream
|
||||
var s stream.Stream
|
||||
var err error
|
||||
var decision router.RoutingDecision
|
||||
|
||||
if e.cfg.Router != nil {
|
||||
// Classify task from the latest user message
|
||||
@@ -59,7 +75,7 @@ func (e *Engine) runLoop(ctx context.Context, cb Callback) (*Turn, error) {
|
||||
}
|
||||
}
|
||||
task := router.ClassifyTask(prompt)
|
||||
task.EstimatedTokens = 4000 // rough default
|
||||
task.EstimatedTokens = int(gnomactx.EstimateTokens(prompt))
|
||||
|
||||
e.logger.Debug("routing request",
|
||||
"task_type", task.Type,
|
||||
@@ -67,13 +83,12 @@ func (e *Engine) runLoop(ctx context.Context, cb Callback) (*Turn, error) {
|
||||
"round", turn.Rounds,
|
||||
)
|
||||
|
||||
var arm *router.Arm
|
||||
s, arm, err = e.cfg.Router.Stream(ctx, task, req)
|
||||
if arm != nil {
|
||||
s, decision, err = e.cfg.Router.Stream(ctx, task, req)
|
||||
if decision.Arm != nil {
|
||||
e.logger.Debug("streaming request",
|
||||
"provider", arm.Provider.Name(),
|
||||
"model", arm.ModelName,
|
||||
"arm", arm.ID,
|
||||
"provider", decision.Arm.Provider.Name(),
|
||||
"model", decision.Arm.ModelName,
|
||||
"arm", decision.Arm.ID,
|
||||
"messages", len(req.Messages),
|
||||
"tools", len(req.Tools),
|
||||
"round", turn.Rounds,
|
||||
@@ -101,9 +116,11 @@ func (e *Engine) runLoop(ctx context.Context, cb Callback) (*Turn, error) {
|
||||
}
|
||||
}
|
||||
task := router.ClassifyTask(prompt)
|
||||
task.EstimatedTokens = 4000
|
||||
s, _, retryErr := e.cfg.Router.Stream(ctx, task, req)
|
||||
return s, retryErr
|
||||
task.EstimatedTokens = int(gnomactx.EstimateTokens(prompt))
|
||||
var retryDecision router.RoutingDecision
|
||||
s, retryDecision, err = e.cfg.Router.Stream(ctx, task, req)
|
||||
decision = retryDecision // adopt new reservation on retry
|
||||
return s, err
|
||||
}
|
||||
return e.cfg.Provider.Stream(ctx, req)
|
||||
})
|
||||
@@ -111,20 +128,30 @@ func (e *Engine) runLoop(ctx context.Context, cb Callback) (*Turn, error) {
|
||||
// Try reactive compaction on 413 (request too large)
|
||||
s, err = e.handleRequestTooLarge(ctx, err, req)
|
||||
if err != nil {
|
||||
decision.Rollback()
|
||||
return nil, fmt.Errorf("provider stream: %w", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Consume stream, forwarding events to callback
|
||||
// Consume stream, forwarding events to callback.
|
||||
// Track TTFT and stream duration for arm performance metrics.
|
||||
acc := stream.NewAccumulator()
|
||||
var stopReason message.StopReason
|
||||
var model string
|
||||
|
||||
streamStart := time.Now()
|
||||
var firstTokenAt time.Time
|
||||
|
||||
for s.Next() {
|
||||
evt := s.Current()
|
||||
acc.Apply(evt)
|
||||
|
||||
// Record time of first text token for TTFT metric
|
||||
if firstTokenAt.IsZero() && evt.Type == stream.EventTextDelta && evt.Text != "" {
|
||||
firstTokenAt = time.Now()
|
||||
}
|
||||
|
||||
// Capture stop reason and model from events
|
||||
if evt.StopReason != "" {
|
||||
stopReason = evt.StopReason
|
||||
@@ -137,14 +164,28 @@ func (e *Engine) runLoop(ctx context.Context, cb Callback) (*Turn, error) {
|
||||
cb(evt)
|
||||
}
|
||||
}
|
||||
streamEnd := time.Now()
|
||||
if err := s.Err(); err != nil {
|
||||
s.Close()
|
||||
decision.Rollback()
|
||||
return nil, fmt.Errorf("stream error: %w", err)
|
||||
}
|
||||
s.Close()
|
||||
|
||||
// Build response
|
||||
resp := acc.Response(stopReason, model)
|
||||
|
||||
// Commit pool reservation and record perf metrics for this round.
|
||||
actualTokens := int(resp.Usage.InputTokens + resp.Usage.OutputTokens)
|
||||
decision.Commit(actualTokens)
|
||||
if decision.Arm != nil && !firstTokenAt.IsZero() {
|
||||
decision.Arm.Perf.Update(
|
||||
firstTokenAt.Sub(streamStart),
|
||||
int(resp.Usage.OutputTokens),
|
||||
streamEnd.Sub(streamStart),
|
||||
)
|
||||
}
|
||||
|
||||
turn.Usage.Add(resp.Usage)
|
||||
turn.Messages = append(turn.Messages, resp.Message)
|
||||
e.history = append(e.history, resp.Message)
|
||||
@@ -152,7 +193,14 @@ func (e *Engine) runLoop(ctx context.Context, cb Callback) (*Turn, error) {
|
||||
|
||||
// Track in context window and check for compaction
|
||||
if e.cfg.Context != nil {
|
||||
e.cfg.Context.Append(resp.Message, resp.Usage)
|
||||
e.cfg.Context.AppendMessage(resp.Message)
|
||||
// Set tracker to the provider-reported context size (InputTokens = full context
|
||||
// as sent this round). This avoids double-counting InputTokens across rounds.
|
||||
if resp.Usage.InputTokens > 0 {
|
||||
e.cfg.Context.Tracker().Set(resp.Usage.InputTokens + resp.Usage.OutputTokens)
|
||||
} else {
|
||||
e.cfg.Context.Tracker().Add(message.Usage{OutputTokens: resp.Usage.OutputTokens})
|
||||
}
|
||||
if compacted, err := e.cfg.Context.CompactIfNeeded(); err != nil {
|
||||
e.logger.Error("context compaction failed", "error", err)
|
||||
} else if compacted {
|
||||
@@ -169,9 +217,19 @@ func (e *Engine) runLoop(ctx context.Context, cb Callback) (*Turn, error) {
|
||||
|
||||
// Decide next action
|
||||
switch resp.StopReason {
|
||||
case message.StopEndTurn, message.StopMaxTokens, message.StopSequence:
|
||||
case message.StopEndTurn, message.StopSequence:
|
||||
return turn, nil
|
||||
|
||||
case message.StopMaxTokens:
|
||||
// Model hit its output token budget mid-response. Inject a continue prompt
|
||||
// and re-query so the response is completed rather than silently truncated.
|
||||
contMsg := message.NewUserText("Continue from where you left off.")
|
||||
e.history = append(e.history, contMsg)
|
||||
if e.cfg.Context != nil {
|
||||
e.cfg.Context.AppendMessage(contMsg)
|
||||
}
|
||||
// Continue loop — next round will resume generation
|
||||
|
||||
case message.StopToolUse:
|
||||
results, err := e.executeTools(ctx, resp.Message.ToolCalls(), cb)
|
||||
if err != nil {
|
||||
@@ -180,6 +238,9 @@ func (e *Engine) runLoop(ctx context.Context, cb Callback) (*Turn, error) {
|
||||
toolMsg := message.NewToolResults(results...)
|
||||
turn.Messages = append(turn.Messages, toolMsg)
|
||||
e.history = append(e.history, toolMsg)
|
||||
if e.cfg.Context != nil {
|
||||
e.cfg.Context.AppendMessage(toolMsg)
|
||||
}
|
||||
// Continue loop — re-query provider with tool results
|
||||
|
||||
default:
|
||||
@@ -205,12 +266,15 @@ func (e *Engine) buildRequest(ctx context.Context) provider.Request {
|
||||
Model: e.cfg.Model,
|
||||
SystemPrompt: systemPrompt,
|
||||
Messages: messages,
|
||||
ToolChoice: e.turnOpts.ToolChoice,
|
||||
}
|
||||
|
||||
// Only include tools if the model supports them
|
||||
// Only include tools if the model supports them.
|
||||
// When Router is active, skip capability gating — the router selects the arm
|
||||
// and already knows its capabilities. Gating here would use the wrong provider.
|
||||
caps := e.resolveCapabilities(ctx)
|
||||
if caps == nil || caps.ToolUse {
|
||||
// nil caps = unknown model, include tools optimistically
|
||||
if e.cfg.Router != nil || caps == nil || caps.ToolUse {
|
||||
// Router active, nil caps (unknown model), or model supports tools
|
||||
for _, t := range e.cfg.Tools.All() {
|
||||
// Skip deferred tools until the model requests them
|
||||
if dt, ok := t.(tool.DeferrableTool); ok && dt.ShouldDefer() && !e.activatedTools[t.Name()] {
|
||||
@@ -352,10 +416,11 @@ func (e *Engine) executeSingleTool(ctx context.Context, call message.ToolCall, t
|
||||
}
|
||||
|
||||
func truncate(s string, maxLen int) string {
|
||||
if len(s) <= maxLen {
|
||||
runes := []rune(s)
|
||||
if len(runes) <= maxLen {
|
||||
return s
|
||||
}
|
||||
return s[:maxLen] + "..."
|
||||
return string(runes[:maxLen]) + "..."
|
||||
}
|
||||
|
||||
// handleRequestTooLarge attempts compaction on 413 and retries once.
|
||||
@@ -387,7 +452,7 @@ func (e *Engine) handleRequestTooLarge(ctx context.Context, origErr error, req p
|
||||
}
|
||||
}
|
||||
task := router.ClassifyTask(prompt)
|
||||
task.EstimatedTokens = 4000
|
||||
task.EstimatedTokens = int(gnomactx.EstimateTokens(prompt))
|
||||
s, _, err := e.cfg.Router.Stream(ctx, task, req)
|
||||
return s, err
|
||||
}
|
||||
@@ -441,12 +506,3 @@ func (e *Engine) retryOnTransient(ctx context.Context, firstErr error, fn func()
|
||||
return nil, firstErr
|
||||
}
|
||||
|
||||
// toolDefFromTool converts a tool.Tool to provider.ToolDefinition.
|
||||
// Unused currently but kept for reference when building tool definitions dynamically.
|
||||
func toolDefFromJSON(name, description string, params json.RawMessage) provider.ToolDefinition {
|
||||
return provider.ToolDefinition{
|
||||
Name: name,
|
||||
Description: description,
|
||||
Parameters: params,
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user