Files
gnoma/internal/session/local.go
vikingowl 3873f90f83 feat: local model reliability — SDK retries, capability probing, init skill, context compaction
Three compounding bugs prevented tool calling with llama.cpp:
- Stream parser set argsComplete on partial JSON (e.g. "{"), dropping
  subsequent argument deltas — fix: use json.Valid to detect completeness
- Missing tool_choice default — llama.cpp needs explicit "auto" to
  activate its GBNF grammar constraint; now set when tools are present
- Tool names in history used internal format (fs.ls) while definitions
  used API format (fs_ls) — now re-sanitized in translateMessage

Additional changes:
- Disable SDK retries for local providers (500s are deterministic)
- Dynamic capability probing via /props (llama.cpp) and /api/show
  (Ollama), replacing hardcoded model prefix list
- Engine respects forced arm ToolUse capability when router is active
- Bundled /init skill with Go template blocks, context-aware for local
  vs cloud models, deduplication rules against CLAUDE.md
- Tool result compaction for local models — previous round results
  replaced with size markers to stay within small context windows
- Text-only fallback when tool-parse errors occur on local models
- "text-only" TUI indicator when model lacks tool support
- Session ResetError for retry after stream failures
- AllowedTools per-turn filtering in engine buildRequest
2026-04-13 02:01:01 +02:00

245 lines
5.2 KiB
Go

package session
import (
"context"
"fmt"
"log/slog"
"strings"
"sync"
"time"
"somegit.dev/Owlibou/gnoma/internal/engine"
"somegit.dev/Owlibou/gnoma/internal/security"
"somegit.dev/Owlibou/gnoma/internal/stream"
)
// LocalConfig holds all configuration for a Local session.
type LocalConfig struct {
Engine *engine.Engine
Provider string
Model string
SessionID string // identifies this session on disk
TurnCount int // seed from restored snapshot; 0 for new sessions
Store *SessionStore // nil = no persistence
Incognito *security.IncognitoMode // nil = always persist
Logger *slog.Logger // nil = slog.Default()
}
// Local implements Session using goroutines and channels within the same process.
type Local struct {
mu sync.Mutex
eng *engine.Engine
state SessionState
events chan stream.Event
// Current turn context
cancel context.CancelFunc
turn *engine.Turn
err error
// Stats
provider string
model string
title string
turnCount int
// Persistence
sessionID string
store *SessionStore
incognito *security.IncognitoMode
createdAt time.Time
logger *slog.Logger
}
// NewLocal creates a channel-based in-process session.
func NewLocal(cfg LocalConfig) *Local {
logger := cfg.Logger
if logger == nil {
logger = slog.Default()
}
return &Local{
eng: cfg.Engine,
state: StateIdle,
provider: cfg.Provider,
model: cfg.Model,
turnCount: cfg.TurnCount,
sessionID: cfg.SessionID,
store: cfg.Store,
incognito: cfg.Incognito,
createdAt: time.Now(),
logger: logger,
}
}
// SessionID returns the persistent identifier for this session.
func (s *Local) SessionID() string {
return s.sessionID
}
func (s *Local) Send(input string) error {
return s.SendWithOptions(input, engine.TurnOptions{})
}
// SendWithOptions is like Send but applies per-turn engine options.
func (s *Local) SendWithOptions(input string, opts engine.TurnOptions) error {
s.mu.Lock()
if s.state != StateIdle {
s.mu.Unlock()
return fmt.Errorf("session not idle (state: %s)", s.state)
}
s.state = StateStreaming
s.events = make(chan stream.Event, 64)
s.turn = nil
s.err = nil
ctx, cancel := context.WithCancel(context.Background())
s.cancel = cancel
s.turnCount++
if s.title == "" {
s.title = sessionTitle(input)
}
s.mu.Unlock()
// Run engine in background goroutine
go func() {
cb := func(evt stream.Event) {
select {
case s.events <- evt:
case <-ctx.Done():
}
}
turn, err := s.eng.SubmitWithOptions(ctx, input, opts, cb)
s.mu.Lock()
s.turn = turn
s.err = err
var finalState SessionState
if err != nil && ctx.Err() != nil {
s.state = StateCancelled
finalState = StateCancelled
} else if err != nil {
s.state = StateError
finalState = StateError
} else {
s.state = StateIdle
finalState = StateIdle
}
s.mu.Unlock()
// Auto-save after successful turn (outside lock to avoid holding it during I/O)
if finalState == StateIdle && s.store != nil && (s.incognito == nil || s.incognito.ShouldPersist()) {
snap := Snapshot{
ID: s.sessionID,
Metadata: Metadata{
ID: s.sessionID,
Title: s.title,
Provider: s.provider,
Model: s.model,
TurnCount: s.turnCount,
Usage: s.eng.Usage(),
CreatedAt: s.createdAt,
UpdatedAt: time.Now(),
MessageCount: len(s.eng.History()),
},
Messages: s.eng.History(),
}
if saveErr := s.store.Save(snap); saveErr != nil {
s.logger.Warn("session auto-save failed", "error", saveErr)
}
}
close(s.events)
}()
return nil
}
func (s *Local) Events() <-chan stream.Event {
s.mu.Lock()
defer s.mu.Unlock()
return s.events
}
func (s *Local) TurnResult() (*engine.Turn, error) {
s.mu.Lock()
defer s.mu.Unlock()
return s.turn, s.err
}
func (s *Local) Cancel() {
s.mu.Lock()
defer s.mu.Unlock()
if s.cancel != nil {
s.cancel()
}
}
func (s *Local) ResetError() {
s.mu.Lock()
defer s.mu.Unlock()
if s.state == StateError {
s.state = StateIdle
s.err = nil
}
}
func (s *Local) Close() error {
s.Cancel()
s.mu.Lock()
defer s.mu.Unlock()
s.state = StateClosed
return nil
}
// SetModel updates the displayed model name.
func (s *Local) SetModel(model string) {
s.mu.Lock()
defer s.mu.Unlock()
s.model = model
}
func (s *Local) Status() Status {
s.mu.Lock()
defer s.mu.Unlock()
st := Status{
State: s.state,
Provider: s.provider,
Model: s.model,
TokensUsed: s.eng.Usage().TotalTokens(),
TurnCount: s.turnCount,
TokenState: "ok",
ToolsAvailable: s.eng.ToolsAvailable(),
}
if w := s.eng.ContextWindow(); w != nil {
tr := w.Tracker()
st.TokensMax = tr.MaxTokens()
st.TokenPercent = tr.PercentUsed()
st.TokenState = tr.State().String()
}
return st
}
// sessionTitle derives a short title from the first user message.
func sessionTitle(input string) string {
// Take first line, trim whitespace
line := input
if idx := strings.IndexByte(line, '\n'); idx >= 0 {
line = line[:idx]
}
line = strings.TrimSpace(line)
if line == "" {
return ""
}
const maxLen = 60
if len(line) > maxLen {
line = line[:maxLen] + "…"
}
return line
}