Files
gnoma/internal/router/probe_test.go
vikingowl 3873f90f83 feat: local model reliability — SDK retries, capability probing, init skill, context compaction
Three compounding bugs prevented tool calling with llama.cpp:
- Stream parser set argsComplete on partial JSON (e.g. "{"), dropping
  subsequent argument deltas — fix: use json.Valid to detect completeness
- Missing tool_choice default — llama.cpp needs explicit "auto" to
  activate its GBNF grammar constraint; now set when tools are present
- Tool names in history used internal format (fs.ls) while definitions
  used API format (fs_ls) — now re-sanitized in translateMessage

Additional changes:
- Disable SDK retries for local providers (500s are deterministic)
- Dynamic capability probing via /props (llama.cpp) and /api/show
  (Ollama), replacing hardcoded model prefix list
- Engine respects forced arm ToolUse capability when router is active
- Bundled /init skill with Go template blocks, context-aware for local
  vs cloud models, deduplication rules against CLAUDE.md
- Tool result compaction for local models — previous round results
  replaced with size markers to stay within small context windows
- Text-only fallback when tool-parse errors occur on local models
- "text-only" TUI indicator when model lacks tool support
- Session ResetError for retry after stream failures
- AllowedTools per-turn filtering in engine buildRequest
2026-04-13 02:01:01 +02:00

148 lines
4.5 KiB
Go

package router
import (
"context"
"net/http"
"net/http/httptest"
"testing"
)
func TestProbeLlamaCppToolSupport_SupportsTools(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/props" {
t.Errorf("unexpected path %q", r.URL.Path)
}
w.Header().Set("Content-Type", "application/json")
w.Write([]byte(`{
"chat_template": "...",
"chat_template_caps": {
"supports_tools": true,
"supports_tool_calls": true,
"supports_parallel_tool_calls": false,
"supports_system_role": true
}
}`))
}))
defer srv.Close()
got := probeLlamaCppToolSupport(context.Background(), srv.URL)
if !got {
t.Error("probeLlamaCppToolSupport() = false, want true for model with tool support")
}
}
func TestProbeLlamaCppToolSupport_NoToolSupport(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
w.Write([]byte(`{
"chat_template": "...",
"chat_template_caps": {
"supports_tools": false,
"supports_tool_calls": false,
"supports_system_role": true
}
}`))
}))
defer srv.Close()
got := probeLlamaCppToolSupport(context.Background(), srv.URL)
if got {
t.Error("probeLlamaCppToolSupport() = true, want false for model without tool support")
}
}
func TestProbeLlamaCppToolSupport_NoCaps(t *testing.T) {
// Old llama.cpp version that doesn't return chat_template_caps
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
w.Write([]byte(`{"chat_template": "...", "total_slots": 1}`))
}))
defer srv.Close()
got := probeLlamaCppToolSupport(context.Background(), srv.URL)
if got {
t.Error("probeLlamaCppToolSupport() = true, want false when chat_template_caps is absent")
}
}
func TestProbeLlamaCppToolSupport_ServerDown(t *testing.T) {
got := probeLlamaCppToolSupport(context.Background(), "http://127.0.0.1:1")
if got {
t.Error("probeLlamaCppToolSupport() = true, want false when server unreachable")
}
}
func TestProbeLlamaCppToolSupport_ToolsWithoutToolCalls(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
w.Write([]byte(`{
"chat_template_caps": {
"supports_tools": true,
"supports_tool_calls": false
}
}`))
}))
defer srv.Close()
got := probeLlamaCppToolSupport(context.Background(), srv.URL)
if got {
t.Error("probeLlamaCppToolSupport() = true, want false when supports_tool_calls is false")
}
}
func TestProbeOllamaToolSupport_HasTools(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/api/show" || r.Method != http.MethodPost {
t.Errorf("unexpected %s %s", r.Method, r.URL.Path)
}
w.Header().Set("Content-Type", "application/json")
w.Write([]byte(`{
"details": {"family": "qwen2", "parameter_size": "7B"},
"capabilities": ["completion", "tools"]
}`))
}))
defer srv.Close()
got := probeOllamaToolSupport(context.Background(), srv.URL, "qwen2.5:7b")
if !got {
t.Error("probeOllamaToolSupport() = false, want true for model with tools capability")
}
}
func TestProbeOllamaToolSupport_NoTools(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
w.Write([]byte(`{
"details": {"family": "phi", "parameter_size": "3B"},
"capabilities": ["completion"]
}`))
}))
defer srv.Close()
got := probeOllamaToolSupport(context.Background(), srv.URL, "phi3:3b")
if got {
t.Error("probeOllamaToolSupport() = true, want false for model without tools capability")
}
}
func TestProbeOllamaToolSupport_NoCapsField(t *testing.T) {
// Old Ollama version without capabilities
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
w.Write([]byte(`{"details": {"family": "llama"}}`))
}))
defer srv.Close()
got := probeOllamaToolSupport(context.Background(), srv.URL, "llama3:8b")
if got {
t.Error("probeOllamaToolSupport() = true, want false when capabilities field absent")
}
}
func TestProbeOllamaToolSupport_ServerDown(t *testing.T) {
got := probeOllamaToolSupport(context.Background(), "http://127.0.0.1:1", "test")
if got {
t.Error("probeOllamaToolSupport() = true, want false when server unreachable")
}
}