Files
gnoma/internal/router/discovery.go
T
vikingowl d71bd942c4 feat: local model reliability — SDK retries, capability probing, init skill, context compaction
Three compounding bugs prevented tool calling with llama.cpp:
- Stream parser set argsComplete on partial JSON (e.g. "{"), dropping
  subsequent argument deltas — fix: use json.Valid to detect completeness
- Missing tool_choice default — llama.cpp needs explicit "auto" to
  activate its GBNF grammar constraint; now set when tools are present
- Tool names in history used internal format (fs.ls) while definitions
  used API format (fs_ls) — now re-sanitized in translateMessage

Additional changes:
- Disable SDK retries for local providers (500s are deterministic)
- Dynamic capability probing via /props (llama.cpp) and /api/show
  (Ollama), replacing hardcoded model prefix list
- Engine respects forced arm ToolUse capability when router is active
- Bundled /init skill with Go template blocks, context-aware for local
  vs cloud models, deduplication rules against CLAUDE.md
- Tool result compaction for local models — previous round results
  replaced with size markers to stay within small context windows
- Text-only fallback when tool-parse errors occur on local models
- "text-only" TUI indicator when model lacks tool support
- Session ResetError for retry after stream failures
- AllowedTools per-turn filtering in engine buildRequest
2026-04-13 02:01:01 +02:00

295 lines
8.7 KiB
Go

package router
import (
"context"
"encoding/json"
"fmt"
"log/slog"
"net/http"
"time"
"somegit.dev/Owlibou/gnoma/internal/provider"
)
const discoveryTimeout = 5 * time.Second
// DiscoveredModel represents a model found via discovery.
type DiscoveredModel struct {
ID string
Name string
Provider string // "ollama" or "llamacpp"
Size int64 // bytes, if available
SupportsTools bool // whether the model supports function/tool calling
ContextSize int // context window in tokens (0 = unknown, use default)
}
// DiscoverOllama polls the local Ollama instance for available models.
// toolCache caches /api/show probe results per model name to avoid N requests
// per discovery cycle. Pass nil to probe every model unconditionally.
// The caller owns the cache and should pass the same map across cycles.
func DiscoverOllama(ctx context.Context, baseURL string, toolCache map[string]bool) ([]DiscoveredModel, error) {
if baseURL == "" {
baseURL = "http://localhost:11434"
}
ctx, cancel := context.WithTimeout(ctx, discoveryTimeout)
defer cancel()
req, err := http.NewRequestWithContext(ctx, "GET", baseURL+"/api/tags", nil)
if err != nil {
return nil, err
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, fmt.Errorf("ollama not reachable at %s: %w", baseURL, err)
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return nil, fmt.Errorf("ollama returned %d", resp.StatusCode)
}
var result struct {
Models []struct {
Name string `json:"name"`
Size int64 `json:"size"`
Details struct {
Family string `json:"family"`
ParameterSize string `json:"parameter_size"`
} `json:"details"`
} `json:"models"`
}
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return nil, fmt.Errorf("ollama response parse: %w", err)
}
currentModels := make(map[string]bool, len(result.Models))
var models []DiscoveredModel
for _, m := range result.Models {
currentModels[m.Name] = true
supportsTools, cached := false, false
if toolCache != nil {
supportsTools, cached = toolCache[m.Name]
}
if !cached {
supportsTools = probeOllamaToolSupport(ctx, baseURL, m.Name)
if toolCache != nil {
toolCache[m.Name] = supportsTools
}
}
models = append(models, DiscoveredModel{
ID: m.Name,
Name: m.Name,
Provider: "ollama",
Size: m.Size,
SupportsTools: supportsTools,
ContextSize: 32768, // conservative default; Ollama /api/show can refine this
})
}
// Prune cache entries for disappeared models (may be a different quant next time).
for name := range toolCache {
if !currentModels[name] {
delete(toolCache, name)
}
}
return models, nil
}
// DiscoverLlamaCpp polls a local llama.cpp server for available models.
func DiscoverLlamaCpp(ctx context.Context, baseURL string) ([]DiscoveredModel, error) {
if baseURL == "" {
baseURL = "http://localhost:8080"
}
ctx, cancel := context.WithTimeout(ctx, discoveryTimeout)
defer cancel()
req, err := http.NewRequestWithContext(ctx, "GET", baseURL+"/v1/models", nil)
if err != nil {
return nil, err
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, fmt.Errorf("llama.cpp not reachable at %s: %w", baseURL, err)
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return nil, fmt.Errorf("llama.cpp returned %d", resp.StatusCode)
}
var result struct {
Data []struct {
ID string `json:"id"`
} `json:"data"`
}
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return nil, fmt.Errorf("llama.cpp response parse: %w", err)
}
// llama.cpp loads one model server-wide; probe once for tool support.
toolSupport := probeLlamaCppToolSupport(ctx, baseURL)
slog.Debug("llamacpp discovery probe complete",
"models_found", len(result.Data),
"tool_support", toolSupport,
)
var models []DiscoveredModel
for _, m := range result.Data {
models = append(models, DiscoveredModel{
ID: m.ID,
Name: m.ID,
Provider: "llamacpp",
SupportsTools: toolSupport,
ContextSize: 8192, // llama.cpp default; --ctx-size configurable
})
}
return models, nil
}
// DiscoverLocalModels discovers all available local models (ollama + llama.cpp).
// Non-blocking: failures are logged and skipped.
// ollamaToolCache is passed to DiscoverOllama; nil skips caching.
func DiscoverLocalModels(ctx context.Context, logger *slog.Logger, ollamaURL, llamacppURL string, ollamaToolCache map[string]bool) []DiscoveredModel {
var all []DiscoveredModel
if models, err := DiscoverOllama(ctx, ollamaURL, ollamaToolCache); err != nil {
logger.Debug("ollama discovery failed (non-fatal)", "error", err)
} else {
logger.Debug("discovered ollama models", "count", len(models))
all = append(all, models...)
}
if models, err := DiscoverLlamaCpp(ctx, llamacppURL); err != nil {
logger.Debug("llamacpp discovery failed (non-fatal)", "error", err)
} else {
logger.Debug("discovered llamacpp models", "count", len(models))
all = append(all, models...)
}
return all
}
// StartDiscoveryLoop periodically polls for local models and reconciles with the router.
// onReconcile is called when the forced arm identity changes (may be nil).
func StartDiscoveryLoop(ctx context.Context, r *Router, logger *slog.Logger,
ollamaURL, llamacppURL string,
providerFactory func(name, model string) provider.Provider,
interval time.Duration,
onReconcile func(ArmID),
) {
go func() {
ollamaToolCache := make(map[string]bool)
ticker := time.NewTicker(interval)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
models := DiscoverLocalModels(ctx, logger, ollamaURL, llamacppURL, ollamaToolCache)
reconcileArms(r, models, providerFactory, logger, onReconcile)
}
}
}()
}
// reconcileArms adds newly discovered models, removes disappeared ones, and
// reconciles the forced arm when discovery reveals its real model name.
// onReconcile is called (if non-nil) when the forced arm identity changes.
func reconcileArms(r *Router, discovered []DiscoveredModel, providerFactory func(name, model string) provider.Provider, logger *slog.Logger, onReconcile func(ArmID)) {
discoveredSet := make(map[ArmID]bool, len(discovered))
for _, m := range discovered {
discoveredSet[NewArmID(m.Provider, m.ID)] = true
}
// Reconcile forced arm if it uses a placeholder "default" model name
// and discovery found the real model name for the same provider.
forcedID := r.ForcedArm()
if forcedID != "" && forcedID.Model() == "default" {
provName := forcedID.Provider()
var candidates []DiscoveredModel
for _, m := range discovered {
if m.Provider == provName {
candidates = append(candidates, m)
}
}
if len(candidates) >= 1 {
chosen := candidates[0]
newID := NewArmID(provName, chosen.ID)
if len(candidates) > 1 {
logger.Warn("multiple models discovered for forced provider, using first",
"provider", provName, "chosen", chosen.ID, "total", len(candidates))
}
logger.Debug("reconciling forced arm identity", "old", forcedID, "new", newID)
r.reconcileForcedArm(forcedID, newID, chosen.ID)
if onReconcile != nil {
onReconcile(newID)
}
}
}
// Register new models
RegisterDiscoveredModels(r, discovered, providerFactory)
// Remove arms whose models have disappeared (only local arms).
// Never remove the forced arm — the user explicitly chose it.
currentForced := r.ForcedArm()
for _, arm := range r.Arms() {
if !arm.IsLocal {
continue
}
if arm.ID == currentForced {
continue
}
if !discoveredSet[arm.ID] {
logger.Debug("removing disappeared local arm", "id", arm.ID)
r.RemoveArm(arm.ID)
}
}
}
// RegisterDiscoveredModels registers discovered local models as arms in the router.
func RegisterDiscoveredModels(r *Router, models []DiscoveredModel, providerFactory func(name, model string) provider.Provider) {
for _, m := range models {
armID := NewArmID(m.Provider, m.ID)
// Skip if already registered
exists := false
for _, arm := range r.Arms() {
if arm.ID == armID {
exists = true
break
}
}
if exists {
continue
}
prov := providerFactory(m.Provider, m.ID)
if prov == nil {
continue
}
r.RegisterArm(&Arm{
ID: armID,
Provider: prov,
ModelName: m.ID,
IsLocal: true,
Capabilities: provider.Capabilities{
// Conservative default: don't assume tool support.
// Many small local models (phi, tinyllama, etc.) don't support
// function calling and will produce confused output if selected
// for tool-requiring tasks. Larger known models (mistral, llama3,
// qwen2.5-coder) support tools. Callers can update the arm's
// Capabilities after probing the model template.
ToolUse: m.SupportsTools,
ContextWindow: m.ContextSize,
},
})
}
}