Files
gnoma/internal/provider/openai/stream.go
vikingowl 3873f90f83 feat: local model reliability — SDK retries, capability probing, init skill, context compaction
Three compounding bugs prevented tool calling with llama.cpp:
- Stream parser set argsComplete on partial JSON (e.g. "{"), dropping
  subsequent argument deltas — fix: use json.Valid to detect completeness
- Missing tool_choice default — llama.cpp needs explicit "auto" to
  activate its GBNF grammar constraint; now set when tools are present
- Tool names in history used internal format (fs.ls) while definitions
  used API format (fs_ls) — now re-sanitized in translateMessage

Additional changes:
- Disable SDK retries for local providers (500s are deterministic)
- Dynamic capability probing via /props (llama.cpp) and /api/show
  (Ollama), replacing hardcoded model prefix list
- Engine respects forced arm ToolUse capability when router is active
- Bundled /init skill with Go template blocks, context-aware for local
  vs cloud models, deduplication rules against CLAUDE.md
- Tool result compaction for local models — previous round results
  replaced with size markers to stay within small context windows
- Text-only fallback when tool-parse errors occur on local models
- "text-only" TUI indicator when model lacks tool support
- Session ResetError for retry after stream failures
- AllowedTools per-turn filtering in engine buildRequest
2026-04-13 02:01:01 +02:00

212 lines
5.3 KiB
Go

package openai
import (
"encoding/json"
"errors"
"log/slog"
"somegit.dev/Owlibou/gnoma/internal/message"
"somegit.dev/Owlibou/gnoma/internal/provider"
"somegit.dev/Owlibou/gnoma/internal/stream"
oai "github.com/openai/openai-go"
"github.com/openai/openai-go/packages/ssestream"
)
// openaiStream adapts OpenAI's ssestream to gnoma's stream.Stream.
type openaiStream struct {
raw *ssestream.Stream[oai.ChatCompletionChunk]
cur stream.Event
err error
model string
stopReason message.StopReason
emittedStop bool
// Tool call tracking (OpenAI uses index-based accumulation)
toolCalls map[int64]*toolCallState
hadToolCalls bool
}
type toolCallState struct {
id string
name string
args string
argsComplete bool // true when args arrived in the initial chunk; skip subsequent deltas
}
func newOpenAIStream(raw *ssestream.Stream[oai.ChatCompletionChunk]) *openaiStream {
return &openaiStream{
raw: raw,
toolCalls: make(map[int64]*toolCallState),
}
}
func (s *openaiStream) Next() bool {
for s.raw.Next() {
chunk := s.raw.Current()
if s.model == "" && chunk.Model != "" {
s.model = chunk.Model
}
// Usage (only present when StreamOptions.IncludeUsage is true)
if chunk.Usage.PromptTokens > 0 || chunk.Usage.CompletionTokens > 0 {
usage := translateUsage(chunk.Usage)
s.cur = stream.Event{
Type: stream.EventUsage,
Usage: usage,
}
return true
}
if len(chunk.Choices) == 0 {
continue
}
choice := chunk.Choices[0]
delta := choice.Delta
// Finish reason
if choice.FinishReason != "" {
s.stopReason = translateFinishReason(string(choice.FinishReason))
}
// Tool calls (index-based)
if len(delta.ToolCalls) > 0 {
for _, tc := range delta.ToolCalls {
existing, ok := s.toolCalls[tc.Index]
if !ok {
// New tool call — capture initial arguments too
existing = &toolCallState{
id: tc.ID,
name: tc.Function.Name,
args: tc.Function.Arguments,
argsComplete: tc.Function.Arguments != "" && json.Valid([]byte(tc.Function.Arguments)),
}
s.toolCalls[tc.Index] = existing
s.hadToolCalls = true
if tc.Function.Name != "" {
s.cur = stream.Event{
Type: stream.EventToolCallStart,
ToolCallID: tc.ID,
ToolCallName: unsanitizeToolName(tc.Function.Name),
}
return true
}
}
// Accumulate arguments (subsequent chunks).
// Skip if args were already provided in the initial chunk — some providers
// (e.g. Ollama) send complete args in the name chunk and then repeat them
// as a delta, which would cause doubled JSON and unmarshal failures.
if tc.Function.Arguments != "" && ok && !existing.argsComplete {
existing.args += tc.Function.Arguments
s.cur = stream.Event{
Type: stream.EventToolCallDelta,
ToolCallID: existing.id,
ArgDelta: tc.Function.Arguments,
}
return true
}
}
continue
}
// Text content
if delta.Content != "" {
s.cur = stream.Event{
Type: stream.EventTextDelta,
Text: delta.Content,
}
return true
}
// Ollama thinking content — non-standard "thinking" or "reasoning" field on the delta.
// Ollama uses "reasoning"; some other servers use "thinking".
// The openai-go struct drops unknown fields, so we read the raw JSON directly.
if raw := delta.RawJSON(); raw != "" {
var extra struct {
Thinking string `json:"thinking"`
Reasoning string `json:"reasoning"`
}
if json.Unmarshal([]byte(raw), &extra) == nil {
text := extra.Thinking
if text == "" {
text = extra.Reasoning
}
if text != "" {
s.cur = stream.Event{
Type: stream.EventThinkingDelta,
Text: text,
}
return true
}
}
}
}
// Stream ended — flush tool call Done events, then emit stop
for idx, tc := range s.toolCalls {
s.cur = stream.Event{
Type: stream.EventToolCallDone,
ToolCallID: tc.id,
ToolCallName: unsanitizeToolName(tc.name),
Args: json.RawMessage(tc.args),
}
delete(s.toolCalls, idx)
return true
}
if !s.emittedStop {
s.emittedStop = true
if s.stopReason == "" {
if s.hadToolCalls {
s.stopReason = message.StopToolUse
} else {
s.stopReason = message.StopEndTurn
}
}
s.cur = stream.Event{
Type: stream.EventTextDelta,
StopReason: s.stopReason,
Model: s.model,
}
return true
}
s.err = wrapSDKError(s.raw.Err())
return false
}
func (s *openaiStream) Current() stream.Event { return s.cur }
func (s *openaiStream) Err() error { return s.err }
func (s *openaiStream) Close() error { return s.raw.Close() }
// wrapSDKError converts an OpenAI SDK apierror.Error into a ProviderError
// so the engine's retry logic can classify it properly.
func wrapSDKError(err error) error {
if err == nil {
return nil
}
var apiErr *oai.Error
if !errors.As(err, &apiErr) {
return err
}
kind, retryable := provider.ClassifyHTTPError(apiErr.StatusCode, apiErr.Message)
slog.Debug("openai SDK error wrapped",
"status", apiErr.StatusCode,
"kind", kind,
"retryable", retryable,
"message", apiErr.Message,
)
return &provider.ProviderError{
Kind: kind,
Provider: "openai",
StatusCode: apiErr.StatusCode,
Message: apiErr.Message,
Retryable: retryable,
Err: err,
}
}