diff --git a/internal/provider/errors.go b/internal/provider/errors.go index 4fb6b3c..b82c3d0 100644 --- a/internal/provider/errors.go +++ b/internal/provider/errors.go @@ -2,6 +2,7 @@ package provider import ( "fmt" + "strings" "time" ) @@ -55,6 +56,31 @@ func (e *ProviderError) Unwrap() error { return e.Err } +// nonRetryable500Substrings lists error messages from servers (e.g. llama.cpp) +// that return 500 for deterministic client-side failures. These should not be +// retried because the same request will always produce the same error. +var nonRetryable500Substrings = []string{ + "Failed to parse tool call", // llama.cpp: model output invalid tool call JSON + "failed to parse tool call", // lowercase variant + "tool_call_error", // some servers use this error type + "invalid_tool_call", // OpenAI-compat servers +} + +// ClassifyHTTPError classifies an HTTP error using both status code and the +// error message. This catches deterministic 500s (e.g. llama.cpp tool parse +// failures) that should not be retried. +func ClassifyHTTPError(status int, message string) (ErrorKind, bool) { + if status == 500 && message != "" { + lower := strings.ToLower(message) + for _, substr := range nonRetryable500Substrings { + if strings.Contains(lower, strings.ToLower(substr)) { + return ErrBadRequest, false + } + } + } + return ClassifyHTTPStatus(status) +} + // ClassifyHTTPStatus returns the ErrorKind and retryability for an HTTP status code. func ClassifyHTTPStatus(status int) (ErrorKind, bool) { switch { diff --git a/internal/provider/errors_test.go b/internal/provider/errors_test.go index 07f7c2c..76994e0 100644 --- a/internal/provider/errors_test.go +++ b/internal/provider/errors_test.go @@ -98,6 +98,48 @@ func TestClassifyHTTPStatus(t *testing.T) { } } +func TestClassifyHTTPError_DeterministicToolParse500(t *testing.T) { + kind, retry := ClassifyHTTPError(500, "Failed to parse tool call arguments as JSON") + if kind != ErrBadRequest { + t.Errorf("kind = %v, want %v", kind, ErrBadRequest) + } + if retry { + t.Error("deterministic tool parse failure should not be retryable") + } +} + +func TestClassifyHTTPError_Genuine500(t *testing.T) { + kind, retry := ClassifyHTTPError(500, "internal server error") + if kind != ErrTransient { + t.Errorf("kind = %v, want %v", kind, ErrTransient) + } + if !retry { + t.Error("genuine 500 should be retryable") + } +} + +func TestClassifyHTTPError_EmptyMessage(t *testing.T) { + kind, retry := ClassifyHTTPError(500, "") + if kind != ErrTransient { + t.Errorf("kind = %v, want %v (empty message should fallthrough)", kind, ErrTransient) + } + if !retry { + t.Error("500 with empty message should be retryable") + } +} + +func TestClassifyHTTPError_Non500(t *testing.T) { + // Non-500 status codes should pass through to ClassifyHTTPStatus unchanged + kind, retry := ClassifyHTTPError(429, "rate limited") + if kind != ErrTransient || !retry { + t.Errorf("429 should be transient+retryable, got %v/%v", kind, retry) + } + kind, retry = ClassifyHTTPError(400, "bad request") + if kind != ErrBadRequest || retry { + t.Errorf("400 should be bad_request+non-retryable, got %v/%v", kind, retry) + } +} + func TestErrorKind_String(t *testing.T) { tests := []struct { kind ErrorKind diff --git a/internal/provider/mistral/provider.go b/internal/provider/mistral/provider.go index d66db23..3a20128 100644 --- a/internal/provider/mistral/provider.go +++ b/internal/provider/mistral/provider.go @@ -105,7 +105,7 @@ func inferCapabilities(m model.ModelCard) provider.Capabilities { func (p *Provider) wrapError(err error) error { if apiErr, ok := err.(*mistralgo.APIError); ok { - kind, retryable := provider.ClassifyHTTPStatus(apiErr.StatusCode) + kind, retryable := provider.ClassifyHTTPError(apiErr.StatusCode, apiErr.Message) return &provider.ProviderError{ Kind: kind, Provider: p.name, diff --git a/internal/provider/openai/stream.go b/internal/provider/openai/stream.go index 1d340d0..3d27d5a 100644 --- a/internal/provider/openai/stream.go +++ b/internal/provider/openai/stream.go @@ -2,8 +2,10 @@ package openai import ( "encoding/json" + "errors" "somegit.dev/Owlibou/gnoma/internal/message" + "somegit.dev/Owlibou/gnoma/internal/provider" "somegit.dev/Owlibou/gnoma/internal/stream" oai "github.com/openai/openai-go" @@ -172,10 +174,31 @@ func (s *openaiStream) Next() bool { return true } - s.err = s.raw.Err() + s.err = wrapSDKError(s.raw.Err()) return false } func (s *openaiStream) Current() stream.Event { return s.cur } func (s *openaiStream) Err() error { return s.err } func (s *openaiStream) Close() error { return s.raw.Close() } + +// wrapSDKError converts an OpenAI SDK apierror.Error into a ProviderError +// so the engine's retry logic can classify it properly. +func wrapSDKError(err error) error { + if err == nil { + return nil + } + var apiErr *oai.Error + if !errors.As(err, &apiErr) { + return err + } + kind, retryable := provider.ClassifyHTTPError(apiErr.StatusCode, apiErr.Message) + return &provider.ProviderError{ + Kind: kind, + Provider: "openai", + StatusCode: apiErr.StatusCode, + Message: apiErr.Message, + Retryable: retryable, + Err: err, + } +} diff --git a/internal/tui/app.go b/internal/tui/app.go index 5b9422d..aa03987 100644 --- a/internal/tui/app.go +++ b/internal/tui/app.go @@ -500,6 +500,9 @@ func (m Model) Update(msg tea.Msg) (tea.Model, tea.Cmd) { m.config.Engine.Reset() } nudge := "Call spawn_elfs now. Spawn 3 elfs in parallel: (1) explore project structure, read go.mod/Makefile/existing AI config files; (2) find non-standard Go conventions and idioms; (3) check README/docs for env vars and setup requirements. Then write AGENTS.md using fs.write." + if retryStatus := m.session.Status(); isLocalProvider(retryStatus.Provider) { + nudge = "Call fs_ls on the project root now. Then fs_read go.mod and Makefile. Then fs_glob **/*.go to find source files. Finally fs_write AGENTS.md. Do not explain — call the tools." + } if err := m.session.Send(nudge); err != nil { m.messages = append(m.messages, chatMessage{role: "error", content: err.Error()}) m.streaming = false @@ -917,7 +920,15 @@ func (m Model) handleCommand(cmd string) (tea.Model, tea.Cmd) { existingPath = agentsPath } - prompt := initPrompt(root, existingPath) + status := m.session.Status() + local := isLocalProvider(status.Provider) + + var prompt string + if local { + prompt = localInitPrompt(root, existingPath) + } else { + prompt = initPrompt(root, existingPath) + } m.messages = append(m.messages, chatMessage{role: "user", content: "/init"}) m.streaming = true @@ -930,11 +941,12 @@ func (m Model) handleCommand(cmd string) (tea.Model, tea.Cmd) { m.initRetried = false m.initWriteNudged = false - // Local models (Ollama, llama.cpp) often narrate tool calls as text instead of - // invoking them. Force tool_choice: required so the API response includes actual - // function call JSON rather than a prose description. + // Cloud models: use spawn_elfs for parallel analysis. + // Local models: use the simplified prompt with sequential fs_* tools. + // Force tool_choice: required for local models so the API emits function + // call JSON rather than narrating the tool calls as text. opts := engine.TurnOptions{} - if status := m.session.Status(); isLocalProvider(status.Provider) { + if local { opts.ToolChoice = provider.ToolChoiceRequired } if err := m.session.SendWithOptions(prompt, opts); err != nil { diff --git a/internal/tui/init.go b/internal/tui/init.go index 2e31bf4..eadaf80 100644 --- a/internal/tui/init.go +++ b/internal/tui/init.go @@ -10,6 +10,36 @@ import ( "somegit.dev/Owlibou/gnoma/internal/message" ) +// localInitPrompt builds a simplified /init prompt for local models (Ollama, llama.cpp). +// Instead of spawn_elfs (complex nested JSON that local models can't produce reliably), +// this uses sequential simple tool calls: fs_ls, fs_read, fs_glob, fs_write. +func localInitPrompt(root, existingPath string) string { + existing := "" + if existingPath != "" { + existing = fmt.Sprintf("\n\nAn existing AGENTS.md exists at %s. Read it first, then update it — keep accurate sections, fix stale ones, remove bloat.", existingPath) + } + + return fmt.Sprintf(`You are creating an AGENTS.md project documentation file for the project at %s.%s + +Use ONLY these tools: fs_ls, fs_read, fs_glob, fs_grep, fs_write. +Do NOT use bash or spawn_elfs. + +Steps: +1. fs_ls on the project root to see the directory structure. +2. fs_read on go.mod (or package.json/Cargo.toml) to get the module path, runtime version, and dependencies. +3. fs_read on Makefile to find build/test commands. +4. fs_glob for **/*.go to discover source files, then fs_read 3-4 key files to understand code conventions and patterns. +5. fs_read any existing AI config files: CLAUDE.md, .cursor/rules, .cursorrules. + +Then fs_write AGENTS.md to %s/AGENTS.md. + +AGENTS.md must contain ONLY information not already in CLAUDE.md or other AI config files. +Include: module path, key dependencies with import paths, non-standard build targets, language-specific idioms with code examples, domain terminology, testing conventions, required env vars. +Exclude: anything already in CLAUDE.md, standard conventions, generic advice, file listings. +Format: terse directive-style bullets. Short code examples where non-obvious. +Do not fabricate. Only write what you observed.`, root, existing, root) +} + // initPrompt builds the prompt sent to the LLM for /init. // existingPath is the absolute path to an existing AGENTS.md, or "" if none exists. // The 3 base elfs always run. When existingPath is set, a 4th elf reads the current file.