fix: deterministic 500 retry, OpenAI error wrapping, local /init prompt

Stop retrying llama.cpp 500s that are deterministic tool-parse failures by inspecting the error message body (ClassifyHTTPError). Wrap OpenAI SDK errors as ProviderError so the engine's retry logic classifies them. Add localInitPrompt for local models that uses sequential fs_* calls instead of spawn_elfs (which local models can't produce reliably).
2026-04-12 18:35:18 +02:00
parent 0adf118675
commit 99529e6156
6 changed files with 140 additions and 7 deletions
--- a/internal/provider/errors.go
+++ b/internal/provider/errors.go
@@ -2,6 +2,7 @@ package provider

 import (
 	"fmt"
+	"strings"
 	"time"
 )

@@ -55,6 +56,31 @@ func (e *ProviderError) Unwrap() error {
 	return e.Err
 }

+// nonRetryable500Substrings lists error messages from servers (e.g. llama.cpp)
+// that return 500 for deterministic client-side failures. These should not be
+// retried because the same request will always produce the same error.
+var nonRetryable500Substrings = []string{
+	"Failed to parse tool call",   // llama.cpp: model output invalid tool call JSON
+	"failed to parse tool call",   // lowercase variant
+	"tool_call_error",             // some servers use this error type
+	"invalid_tool_call",           // OpenAI-compat servers
+}
+
+// ClassifyHTTPError classifies an HTTP error using both status code and the
+// error message. This catches deterministic 500s (e.g. llama.cpp tool parse
+// failures) that should not be retried.
+func ClassifyHTTPError(status int, message string) (ErrorKind, bool) {
+	if status == 500 && message != "" {
+		lower := strings.ToLower(message)
+		for _, substr := range nonRetryable500Substrings {
+			if strings.Contains(lower, strings.ToLower(substr)) {
+				return ErrBadRequest, false
+			}
+		}
+	}
+	return ClassifyHTTPStatus(status)
+}
+
 // ClassifyHTTPStatus returns the ErrorKind and retryability for an HTTP status code.
 func ClassifyHTTPStatus(status int) (ErrorKind, bool) {
 	switch {
--- a/internal/provider/errors_test.go
+++ b/internal/provider/errors_test.go
@@ -98,6 +98,48 @@ func TestClassifyHTTPStatus(t *testing.T) {
 	}
 }

+func TestClassifyHTTPError_DeterministicToolParse500(t *testing.T) {
+	kind, retry := ClassifyHTTPError(500, "Failed to parse tool call arguments as JSON")
+	if kind != ErrBadRequest {
+		t.Errorf("kind = %v, want %v", kind, ErrBadRequest)
+	}
+	if retry {
+		t.Error("deterministic tool parse failure should not be retryable")
+	}
+}
+
+func TestClassifyHTTPError_Genuine500(t *testing.T) {
+	kind, retry := ClassifyHTTPError(500, "internal server error")
+	if kind != ErrTransient {
+		t.Errorf("kind = %v, want %v", kind, ErrTransient)
+	}
+	if !retry {
+		t.Error("genuine 500 should be retryable")
+	}
+}
+
+func TestClassifyHTTPError_EmptyMessage(t *testing.T) {
+	kind, retry := ClassifyHTTPError(500, "")
+	if kind != ErrTransient {
+		t.Errorf("kind = %v, want %v (empty message should fallthrough)", kind, ErrTransient)
+	}
+	if !retry {
+		t.Error("500 with empty message should be retryable")
+	}
+}
+
+func TestClassifyHTTPError_Non500(t *testing.T) {
+	// Non-500 status codes should pass through to ClassifyHTTPStatus unchanged
+	kind, retry := ClassifyHTTPError(429, "rate limited")
+	if kind != ErrTransient || !retry {
+		t.Errorf("429 should be transient+retryable, got %v/%v", kind, retry)
+	}
+	kind, retry = ClassifyHTTPError(400, "bad request")
+	if kind != ErrBadRequest || retry {
+		t.Errorf("400 should be bad_request+non-retryable, got %v/%v", kind, retry)
+	}
+}
+
 func TestErrorKind_String(t *testing.T) {
 	tests := []struct {
 		kind ErrorKind
--- a/internal/provider/mistral/provider.go
+++ b/internal/provider/mistral/provider.go
@@ -105,7 +105,7 @@ func inferCapabilities(m model.ModelCard) provider.Capabilities {

 func (p *Provider) wrapError(err error) error {
 	if apiErr, ok := err.(*mistralgo.APIError); ok {
-		kind, retryable := provider.ClassifyHTTPStatus(apiErr.StatusCode)
+		kind, retryable := provider.ClassifyHTTPError(apiErr.StatusCode, apiErr.Message)
 		return &provider.ProviderError{
 			Kind:       kind,
 			Provider:   p.name,
--- a/internal/provider/openai/stream.go
+++ b/internal/provider/openai/stream.go
@@ -2,8 +2,10 @@ package openai

 import (
 	"encoding/json"
+	"errors"

 	"somegit.dev/Owlibou/gnoma/internal/message"
+	"somegit.dev/Owlibou/gnoma/internal/provider"
 	"somegit.dev/Owlibou/gnoma/internal/stream"

 	oai "github.com/openai/openai-go"
@@ -172,10 +174,31 @@ func (s *openaiStream) Next() bool {
 		return true
 	}

-	s.err = s.raw.Err()
+	s.err = wrapSDKError(s.raw.Err())
 	return false
 }

 func (s *openaiStream) Current() stream.Event { return s.cur }
 func (s *openaiStream) Err() error            { return s.err }
 func (s *openaiStream) Close() error           { return s.raw.Close() }
+
+// wrapSDKError converts an OpenAI SDK apierror.Error into a ProviderError
+// so the engine's retry logic can classify it properly.
+func wrapSDKError(err error) error {
+	if err == nil {
+		return nil
+	}
+	var apiErr *oai.Error
+	if !errors.As(err, &apiErr) {
+		return err
+	}
+	kind, retryable := provider.ClassifyHTTPError(apiErr.StatusCode, apiErr.Message)
+	return &provider.ProviderError{
+		Kind:       kind,
+		Provider:   "openai",
+		StatusCode: apiErr.StatusCode,
+		Message:    apiErr.Message,
+		Retryable:  retryable,
+		Err:        err,
+	}
+}
--- a/internal/tui/app.go
+++ b/internal/tui/app.go
@@ -500,6 +500,9 @@ func (m Model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
 				m.config.Engine.Reset()
 			}
 			nudge := "Call spawn_elfs now. Spawn 3 elfs in parallel: (1) explore project structure, read go.mod/Makefile/existing AI config files; (2) find non-standard Go conventions and idioms; (3) check README/docs for env vars and setup requirements. Then write AGENTS.md using fs.write."
+			if retryStatus := m.session.Status(); isLocalProvider(retryStatus.Provider) {
+				nudge = "Call fs_ls on the project root now. Then fs_read go.mod and Makefile. Then fs_glob **/*.go to find source files. Finally fs_write AGENTS.md. Do not explain — call the tools."
+			}
 			if err := m.session.Send(nudge); err != nil {
 				m.messages = append(m.messages, chatMessage{role: "error", content: err.Error()})
 				m.streaming = false
@@ -917,7 +920,15 @@ func (m Model) handleCommand(cmd string) (tea.Model, tea.Cmd) {
 			existingPath = agentsPath
 		}

-		prompt := initPrompt(root, existingPath)
+		status := m.session.Status()
+		local := isLocalProvider(status.Provider)
+
+		var prompt string
+		if local {
+			prompt = localInitPrompt(root, existingPath)
+		} else {
+			prompt = initPrompt(root, existingPath)
+		}

 		m.messages = append(m.messages, chatMessage{role: "user", content: "/init"})
 		m.streaming = true
@@ -930,11 +941,12 @@ func (m Model) handleCommand(cmd string) (tea.Model, tea.Cmd) {
 		m.initRetried = false
 		m.initWriteNudged = false

-		// Local models (Ollama, llama.cpp) often narrate tool calls as text instead of
-		// invoking them. Force tool_choice: required so the API response includes actual
-		// function call JSON rather than a prose description.
+		// Cloud models: use spawn_elfs for parallel analysis.
+		// Local models: use the simplified prompt with sequential fs_* tools.
+		// Force tool_choice: required for local models so the API emits function
+		// call JSON rather than narrating the tool calls as text.
 		opts := engine.TurnOptions{}
-		if status := m.session.Status(); isLocalProvider(status.Provider) {
+		if local {
 			opts.ToolChoice = provider.ToolChoiceRequired
 		}
 		if err := m.session.SendWithOptions(prompt, opts); err != nil {
--- a/internal/tui/init.go
+++ b/internal/tui/init.go
@@ -10,6 +10,36 @@ import (
 	"somegit.dev/Owlibou/gnoma/internal/message"
 )

+// localInitPrompt builds a simplified /init prompt for local models (Ollama, llama.cpp).
+// Instead of spawn_elfs (complex nested JSON that local models can't produce reliably),
+// this uses sequential simple tool calls: fs_ls, fs_read, fs_glob, fs_write.
+func localInitPrompt(root, existingPath string) string {
+	existing := ""
+	if existingPath != "" {
+		existing = fmt.Sprintf("\n\nAn existing AGENTS.md exists at %s. Read it first, then update it — keep accurate sections, fix stale ones, remove bloat.", existingPath)
+	}
+
+	return fmt.Sprintf(`You are creating an AGENTS.md project documentation file for the project at %s.%s
+
+Use ONLY these tools: fs_ls, fs_read, fs_glob, fs_grep, fs_write.
+Do NOT use bash or spawn_elfs.
+
+Steps:
+1. fs_ls on the project root to see the directory structure.
+2. fs_read on go.mod (or package.json/Cargo.toml) to get the module path, runtime version, and dependencies.
+3. fs_read on Makefile to find build/test commands.
+4. fs_glob for **/*.go to discover source files, then fs_read 3-4 key files to understand code conventions and patterns.
+5. fs_read any existing AI config files: CLAUDE.md, .cursor/rules, .cursorrules.
+
+Then fs_write AGENTS.md to %s/AGENTS.md.
+
+AGENTS.md must contain ONLY information not already in CLAUDE.md or other AI config files.
+Include: module path, key dependencies with import paths, non-standard build targets, language-specific idioms with code examples, domain terminology, testing conventions, required env vars.
+Exclude: anything already in CLAUDE.md, standard conventions, generic advice, file listings.
+Format: terse directive-style bullets. Short code examples where non-obvious.
+Do not fabricate. Only write what you observed.`, root, existing, root)
+}
+
 // initPrompt builds the prompt sent to the LLM for /init.
 // existingPath is the absolute path to an existing AGENTS.md, or "" if none exists.
 // The 3 base elfs always run. When existingPath is set, a 4th elf reads the current file.