Release v0.7.0 #8

Merged
vikingowl merged 4 commits from dev into main 2026-01-23 15:54:50 +01:00
58 changed files with 6461 additions and 249 deletions

View File

@@ -5,7 +5,7 @@
<h1 align="center">Vessel</h1>
<p align="center">
<strong>A modern, feature-rich web interface for Ollama</strong>
<strong>A modern, feature-rich web interface for local LLMs</strong>
</p>
<p align="center">
@@ -28,13 +28,14 @@
**Vessel** is intentionally focused on:
- A clean, local-first UI for **Ollama**
- A clean, local-first UI for **local LLMs**
- **Multiple backends**: Ollama, llama.cpp, LM Studio
- Minimal configuration
- Low visual and cognitive overhead
- Doing a small set of things well
If you want a **universal, highly configurable platform** → [open-webui](https://github.com/open-webui/open-webui) is a great choice.
If you want a **small, focused UI for local Ollama usage** → Vessel is built for that.
If you want a **small, focused UI for local LLM usage** → Vessel is built for that.
---
@@ -65,7 +66,13 @@ If you want a **small, focused UI for local Ollama usage** → Vessel is built f
- Agentic tool calling with chain-of-thought reasoning
- Test tools before saving with the built-in testing panel
### Models
### LLM Backends
- **Ollama** — Full model management, pull/delete/create custom models
- **llama.cpp** — High-performance inference with GGUF models
- **LM Studio** — Desktop app integration
- Switch backends without restart, auto-detection of available backends
### Models (Ollama)
- Browse and pull models from ollama.com
- Create custom models with embedded system prompts
- **Per-model parameters** — customize temperature, context size, top_k/top_p
@@ -112,7 +119,10 @@ If you want a **small, focused UI for local Ollama usage** → Vessel is built f
### Prerequisites
- [Docker](https://docs.docker.com/get-docker/) and Docker Compose
- [Ollama](https://ollama.com/download) running locally
- An LLM backend (at least one):
- [Ollama](https://ollama.com/download) (recommended)
- [llama.cpp](https://github.com/ggerganov/llama.cpp)
- [LM Studio](https://lmstudio.ai/)
### Configure Ollama
@@ -160,6 +170,7 @@ Full documentation is available on the **[GitHub Wiki](https://github.com/Viking
| Guide | Description |
|-------|-------------|
| [Getting Started](https://github.com/VikingOwl91/vessel/wiki/Getting-Started) | Installation and configuration |
| [LLM Backends](https://github.com/VikingOwl91/vessel/wiki/LLM-Backends) | Configure Ollama, llama.cpp, or LM Studio |
| [Projects](https://github.com/VikingOwl91/vessel/wiki/Projects) | Organize conversations into projects |
| [Knowledge Base](https://github.com/VikingOwl91/vessel/wiki/Knowledge-Base) | RAG with document upload and semantic search |
| [Search](https://github.com/VikingOwl91/vessel/wiki/Search) | Semantic and content search across chats |
@@ -178,6 +189,7 @@ Full documentation is available on the **[GitHub Wiki](https://github.com/Viking
Vessel prioritizes **usability and simplicity** over feature breadth.
**Completed:**
- [x] Multi-backend support (Ollama, llama.cpp, LM Studio)
- [x] Model browser with filtering and update detection
- [x] Custom tools (JavaScript, Python, HTTP)
- [x] System prompt library with model-specific defaults
@@ -197,7 +209,7 @@ Vessel prioritizes **usability and simplicity** over feature breadth.
- Multi-user systems
- Cloud sync
- Plugin ecosystems
- Support for every LLM runtime
- Cloud/API-based LLM providers (OpenAI, Anthropic, etc.)
> *Do one thing well. Keep the UI out of the way.*
@@ -223,5 +235,5 @@ Contributions are welcome!
GPL-3.0 — See [LICENSE](LICENSE) for details.
<p align="center">
Made with <a href="https://ollama.com">Ollama</a> and <a href="https://svelte.dev">Svelte</a>
Made with <a href="https://svelte.dev">Svelte</a> • Supports <a href="https://ollama.com">Ollama</a>, <a href="https://github.com/ggerganov/llama.cpp">llama.cpp</a>, and <a href="https://lmstudio.ai/">LM Studio</a>
</p>

View File

@@ -14,6 +14,9 @@ import (
"github.com/gin-gonic/gin"
"vessel-backend/internal/api"
"vessel-backend/internal/backends"
"vessel-backend/internal/backends/ollama"
"vessel-backend/internal/backends/openai"
"vessel-backend/internal/database"
)
@@ -29,9 +32,11 @@ func getEnvOrDefault(key, defaultValue string) string {
func main() {
var (
port = flag.String("port", getEnvOrDefault("PORT", "8080"), "Server port")
dbPath = flag.String("db", getEnvOrDefault("DB_PATH", "./data/vessel.db"), "Database file path")
ollamaURL = flag.String("ollama-url", getEnvOrDefault("OLLAMA_URL", "http://localhost:11434"), "Ollama API URL")
port = flag.String("port", getEnvOrDefault("PORT", "8080"), "Server port")
dbPath = flag.String("db", getEnvOrDefault("DB_PATH", "./data/vessel.db"), "Database file path")
ollamaURL = flag.String("ollama-url", getEnvOrDefault("OLLAMA_URL", "http://localhost:11434"), "Ollama API URL")
llamacppURL = flag.String("llamacpp-url", getEnvOrDefault("LLAMACPP_URL", "http://localhost:8081"), "llama.cpp server URL")
lmstudioURL = flag.String("lmstudio-url", getEnvOrDefault("LMSTUDIO_URL", "http://localhost:1234"), "LM Studio server URL")
)
flag.Parse()
@@ -47,6 +52,52 @@ func main() {
log.Fatalf("Failed to run migrations: %v", err)
}
// Initialize backend registry
registry := backends.NewRegistry()
// Register Ollama backend
ollamaAdapter, err := ollama.NewAdapter(backends.BackendConfig{
Type: backends.BackendTypeOllama,
BaseURL: *ollamaURL,
})
if err != nil {
log.Printf("Warning: Failed to create Ollama adapter: %v", err)
} else {
if err := registry.Register(ollamaAdapter); err != nil {
log.Printf("Warning: Failed to register Ollama backend: %v", err)
}
}
// Register llama.cpp backend (if URL is configured)
if *llamacppURL != "" {
llamacppAdapter, err := openai.NewAdapter(backends.BackendConfig{
Type: backends.BackendTypeLlamaCpp,
BaseURL: *llamacppURL,
})
if err != nil {
log.Printf("Warning: Failed to create llama.cpp adapter: %v", err)
} else {
if err := registry.Register(llamacppAdapter); err != nil {
log.Printf("Warning: Failed to register llama.cpp backend: %v", err)
}
}
}
// Register LM Studio backend (if URL is configured)
if *lmstudioURL != "" {
lmstudioAdapter, err := openai.NewAdapter(backends.BackendConfig{
Type: backends.BackendTypeLMStudio,
BaseURL: *lmstudioURL,
})
if err != nil {
log.Printf("Warning: Failed to create LM Studio adapter: %v", err)
} else {
if err := registry.Register(lmstudioAdapter); err != nil {
log.Printf("Warning: Failed to register LM Studio backend: %v", err)
}
}
}
// Setup Gin router
gin.SetMode(gin.ReleaseMode)
r := gin.New()
@@ -64,7 +115,7 @@ func main() {
}))
// Register routes
api.SetupRoutes(r, db, *ollamaURL, Version)
api.SetupRoutes(r, db, *ollamaURL, Version, registry)
// Create server
srv := &http.Server{
@@ -79,8 +130,12 @@ func main() {
// Graceful shutdown handling
go func() {
log.Printf("Server starting on port %s", *port)
log.Printf("Ollama URL: %s (using official Go client)", *ollamaURL)
log.Printf("Database: %s", *dbPath)
log.Printf("Backends configured:")
log.Printf(" - Ollama: %s", *ollamaURL)
log.Printf(" - llama.cpp: %s", *llamacppURL)
log.Printf(" - LM Studio: %s", *lmstudioURL)
log.Printf("Active backend: %s", registry.ActiveType().String())
if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed {
log.Fatalf("Failed to start server: %v", err)
}

View File

@@ -0,0 +1,275 @@
package api
import (
"encoding/json"
"net/http"
"github.com/gin-gonic/gin"
"vessel-backend/internal/backends"
)
// AIHandlers provides HTTP handlers for the unified AI API
type AIHandlers struct {
registry *backends.Registry
}
// NewAIHandlers creates a new AIHandlers instance
func NewAIHandlers(registry *backends.Registry) *AIHandlers {
return &AIHandlers{
registry: registry,
}
}
// ListBackendsHandler returns information about all configured backends
func (h *AIHandlers) ListBackendsHandler() gin.HandlerFunc {
return func(c *gin.Context) {
infos := h.registry.AllInfo(c.Request.Context())
c.JSON(http.StatusOK, gin.H{
"backends": infos,
"active": h.registry.ActiveType().String(),
})
}
}
// DiscoverBackendsHandler probes for available backends
func (h *AIHandlers) DiscoverBackendsHandler() gin.HandlerFunc {
return func(c *gin.Context) {
var req struct {
Endpoints []backends.DiscoveryEndpoint `json:"endpoints"`
}
if err := c.ShouldBindJSON(&req); err != nil {
// Use default endpoints if none provided
req.Endpoints = backends.DefaultDiscoveryEndpoints()
}
if len(req.Endpoints) == 0 {
req.Endpoints = backends.DefaultDiscoveryEndpoints()
}
results := h.registry.Discover(c.Request.Context(), req.Endpoints)
c.JSON(http.StatusOK, gin.H{
"results": results,
})
}
}
// SetActiveHandler sets the active backend
func (h *AIHandlers) SetActiveHandler() gin.HandlerFunc {
return func(c *gin.Context) {
var req struct {
Type string `json:"type" binding:"required"`
}
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "type is required"})
return
}
backendType, err := backends.ParseBackendType(req.Type)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
if err := h.registry.SetActive(backendType); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{
"active": backendType.String(),
})
}
}
// HealthCheckHandler checks the health of a specific backend
func (h *AIHandlers) HealthCheckHandler() gin.HandlerFunc {
return func(c *gin.Context) {
typeParam := c.Param("type")
backendType, err := backends.ParseBackendType(typeParam)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
backend, ok := h.registry.Get(backendType)
if !ok {
c.JSON(http.StatusNotFound, gin.H{"error": "backend not registered"})
return
}
if err := backend.HealthCheck(c.Request.Context()); err != nil {
c.JSON(http.StatusServiceUnavailable, gin.H{
"status": "unhealthy",
"error": err.Error(),
})
return
}
c.JSON(http.StatusOK, gin.H{
"status": "healthy",
})
}
}
// ListModelsHandler returns models from the active backend
func (h *AIHandlers) ListModelsHandler() gin.HandlerFunc {
return func(c *gin.Context) {
active := h.registry.Active()
if active == nil {
c.JSON(http.StatusServiceUnavailable, gin.H{"error": "no active backend"})
return
}
models, err := active.ListModels(c.Request.Context())
if err != nil {
c.JSON(http.StatusBadGateway, gin.H{"error": err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{
"models": models,
"backend": active.Type().String(),
})
}
}
// ChatHandler handles chat requests through the active backend
func (h *AIHandlers) ChatHandler() gin.HandlerFunc {
return func(c *gin.Context) {
active := h.registry.Active()
if active == nil {
c.JSON(http.StatusServiceUnavailable, gin.H{"error": "no active backend"})
return
}
var req backends.ChatRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request: " + err.Error()})
return
}
if err := req.Validate(); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
// Check if streaming is requested
streaming := req.Stream != nil && *req.Stream
if streaming {
h.handleStreamingChat(c, active, &req)
} else {
h.handleNonStreamingChat(c, active, &req)
}
}
}
// handleNonStreamingChat handles non-streaming chat requests
func (h *AIHandlers) handleNonStreamingChat(c *gin.Context, backend backends.LLMBackend, req *backends.ChatRequest) {
resp, err := backend.Chat(c.Request.Context(), req)
if err != nil {
c.JSON(http.StatusBadGateway, gin.H{"error": err.Error()})
return
}
c.JSON(http.StatusOK, resp)
}
// handleStreamingChat handles streaming chat requests
func (h *AIHandlers) handleStreamingChat(c *gin.Context, backend backends.LLMBackend, req *backends.ChatRequest) {
// Set headers for NDJSON streaming
c.Header("Content-Type", "application/x-ndjson")
c.Header("Cache-Control", "no-cache")
c.Header("Connection", "keep-alive")
c.Header("Transfer-Encoding", "chunked")
ctx := c.Request.Context()
flusher, ok := c.Writer.(http.Flusher)
if !ok {
c.JSON(http.StatusInternalServerError, gin.H{"error": "streaming not supported"})
return
}
chunkCh, err := backend.StreamChat(ctx, req)
if err != nil {
errResp := gin.H{"error": err.Error()}
data, _ := json.Marshal(errResp)
c.Writer.Write(append(data, '\n'))
flusher.Flush()
return
}
for chunk := range chunkCh {
select {
case <-ctx.Done():
return
default:
}
data, err := json.Marshal(chunk)
if err != nil {
continue
}
_, err = c.Writer.Write(append(data, '\n'))
if err != nil {
return
}
flusher.Flush()
}
}
// RegisterBackendHandler registers a new backend
func (h *AIHandlers) RegisterBackendHandler() gin.HandlerFunc {
return func(c *gin.Context) {
var req backends.BackendConfig
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request: " + err.Error()})
return
}
if err := req.Validate(); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
// Create adapter based on type
var backend backends.LLMBackend
var err error
switch req.Type {
case backends.BackendTypeOllama:
// Would import ollama adapter
c.JSON(http.StatusNotImplemented, gin.H{"error": "use /api/v1/ai/backends/discover to register backends"})
return
case backends.BackendTypeLlamaCpp, backends.BackendTypeLMStudio:
// Would import openai adapter
c.JSON(http.StatusNotImplemented, gin.H{"error": "use /api/v1/ai/backends/discover to register backends"})
return
default:
c.JSON(http.StatusBadRequest, gin.H{"error": "unknown backend type"})
return
}
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
if err := h.registry.Register(backend); err != nil {
c.JSON(http.StatusConflict, gin.H{"error": err.Error()})
return
}
c.JSON(http.StatusCreated, gin.H{
"type": req.Type.String(),
"baseUrl": req.BaseURL,
})
}
}

View File

@@ -0,0 +1,354 @@
package api
import (
"bytes"
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"testing"
"github.com/gin-gonic/gin"
"vessel-backend/internal/backends"
)
func setupAITestRouter(registry *backends.Registry) *gin.Engine {
gin.SetMode(gin.TestMode)
r := gin.New()
handlers := NewAIHandlers(registry)
ai := r.Group("/api/v1/ai")
{
ai.GET("/backends", handlers.ListBackendsHandler())
ai.POST("/backends/discover", handlers.DiscoverBackendsHandler())
ai.POST("/backends/active", handlers.SetActiveHandler())
ai.GET("/backends/:type/health", handlers.HealthCheckHandler())
ai.POST("/chat", handlers.ChatHandler())
ai.GET("/models", handlers.ListModelsHandler())
}
return r
}
func TestAIHandlers_ListBackends(t *testing.T) {
registry := backends.NewRegistry()
mock := &mockAIBackend{
backendType: backends.BackendTypeOllama,
config: backends.BackendConfig{
Type: backends.BackendTypeOllama,
BaseURL: "http://localhost:11434",
},
info: backends.BackendInfo{
Type: backends.BackendTypeOllama,
BaseURL: "http://localhost:11434",
Status: backends.BackendStatusConnected,
Capabilities: backends.OllamaCapabilities(),
Version: "0.3.0",
},
}
registry.Register(mock)
registry.SetActive(backends.BackendTypeOllama)
router := setupAITestRouter(registry)
w := httptest.NewRecorder()
req, _ := http.NewRequest("GET", "/api/v1/ai/backends", nil)
router.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("ListBackends() status = %d, want %d", w.Code, http.StatusOK)
}
var resp struct {
Backends []backends.BackendInfo `json:"backends"`
Active string `json:"active"`
}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("Failed to unmarshal response: %v", err)
}
if len(resp.Backends) != 1 {
t.Errorf("ListBackends() returned %d backends, want 1", len(resp.Backends))
}
if resp.Active != "ollama" {
t.Errorf("ListBackends() active = %q, want %q", resp.Active, "ollama")
}
}
func TestAIHandlers_SetActive(t *testing.T) {
registry := backends.NewRegistry()
mock := &mockAIBackend{
backendType: backends.BackendTypeOllama,
config: backends.BackendConfig{
Type: backends.BackendTypeOllama,
BaseURL: "http://localhost:11434",
},
}
registry.Register(mock)
router := setupAITestRouter(registry)
t.Run("set valid backend active", func(t *testing.T) {
body, _ := json.Marshal(map[string]string{"type": "ollama"})
w := httptest.NewRecorder()
req, _ := http.NewRequest("POST", "/api/v1/ai/backends/active", bytes.NewReader(body))
req.Header.Set("Content-Type", "application/json")
router.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("SetActive() status = %d, want %d", w.Code, http.StatusOK)
}
if registry.ActiveType() != backends.BackendTypeOllama {
t.Errorf("Active backend = %v, want %v", registry.ActiveType(), backends.BackendTypeOllama)
}
})
t.Run("set invalid backend active", func(t *testing.T) {
body, _ := json.Marshal(map[string]string{"type": "llamacpp"})
w := httptest.NewRecorder()
req, _ := http.NewRequest("POST", "/api/v1/ai/backends/active", bytes.NewReader(body))
req.Header.Set("Content-Type", "application/json")
router.ServeHTTP(w, req)
if w.Code != http.StatusBadRequest {
t.Errorf("SetActive() status = %d, want %d", w.Code, http.StatusBadRequest)
}
})
}
func TestAIHandlers_HealthCheck(t *testing.T) {
registry := backends.NewRegistry()
mock := &mockAIBackend{
backendType: backends.BackendTypeOllama,
config: backends.BackendConfig{
Type: backends.BackendTypeOllama,
BaseURL: "http://localhost:11434",
},
healthErr: nil,
}
registry.Register(mock)
router := setupAITestRouter(registry)
t.Run("healthy backend", func(t *testing.T) {
w := httptest.NewRecorder()
req, _ := http.NewRequest("GET", "/api/v1/ai/backends/ollama/health", nil)
router.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("HealthCheck() status = %d, want %d", w.Code, http.StatusOK)
}
})
t.Run("non-existent backend", func(t *testing.T) {
w := httptest.NewRecorder()
req, _ := http.NewRequest("GET", "/api/v1/ai/backends/llamacpp/health", nil)
router.ServeHTTP(w, req)
if w.Code != http.StatusNotFound {
t.Errorf("HealthCheck() status = %d, want %d", w.Code, http.StatusNotFound)
}
})
}
func TestAIHandlers_ListModels(t *testing.T) {
registry := backends.NewRegistry()
mock := &mockAIBackend{
backendType: backends.BackendTypeOllama,
config: backends.BackendConfig{
Type: backends.BackendTypeOllama,
BaseURL: "http://localhost:11434",
},
models: []backends.Model{
{ID: "llama3.2:8b", Name: "llama3.2:8b", Family: "llama"},
{ID: "mistral:7b", Name: "mistral:7b", Family: "mistral"},
},
}
registry.Register(mock)
registry.SetActive(backends.BackendTypeOllama)
router := setupAITestRouter(registry)
w := httptest.NewRecorder()
req, _ := http.NewRequest("GET", "/api/v1/ai/models", nil)
router.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("ListModels() status = %d, want %d", w.Code, http.StatusOK)
}
var resp struct {
Models []backends.Model `json:"models"`
}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("Failed to unmarshal response: %v", err)
}
if len(resp.Models) != 2 {
t.Errorf("ListModels() returned %d models, want 2", len(resp.Models))
}
}
func TestAIHandlers_ListModels_NoActiveBackend(t *testing.T) {
registry := backends.NewRegistry()
router := setupAITestRouter(registry)
w := httptest.NewRecorder()
req, _ := http.NewRequest("GET", "/api/v1/ai/models", nil)
router.ServeHTTP(w, req)
if w.Code != http.StatusServiceUnavailable {
t.Errorf("ListModels() status = %d, want %d", w.Code, http.StatusServiceUnavailable)
}
}
func TestAIHandlers_Chat(t *testing.T) {
registry := backends.NewRegistry()
mock := &mockAIBackend{
backendType: backends.BackendTypeOllama,
config: backends.BackendConfig{
Type: backends.BackendTypeOllama,
BaseURL: "http://localhost:11434",
},
chatResponse: &backends.ChatChunk{
Model: "llama3.2:8b",
Message: &backends.ChatMessage{
Role: "assistant",
Content: "Hello! How can I help?",
},
Done: true,
},
}
registry.Register(mock)
registry.SetActive(backends.BackendTypeOllama)
router := setupAITestRouter(registry)
t.Run("non-streaming chat", func(t *testing.T) {
chatReq := backends.ChatRequest{
Model: "llama3.2:8b",
Messages: []backends.ChatMessage{
{Role: "user", Content: "Hello"},
},
}
body, _ := json.Marshal(chatReq)
w := httptest.NewRecorder()
req, _ := http.NewRequest("POST", "/api/v1/ai/chat", bytes.NewReader(body))
req.Header.Set("Content-Type", "application/json")
router.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("Chat() status = %d, want %d, body: %s", w.Code, http.StatusOK, w.Body.String())
}
var resp backends.ChatChunk
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("Failed to unmarshal response: %v", err)
}
if !resp.Done {
t.Error("Chat() response.Done = false, want true")
}
if resp.Message == nil || resp.Message.Content != "Hello! How can I help?" {
t.Errorf("Chat() unexpected response: %+v", resp)
}
})
}
func TestAIHandlers_Chat_InvalidRequest(t *testing.T) {
registry := backends.NewRegistry()
mock := &mockAIBackend{
backendType: backends.BackendTypeOllama,
}
registry.Register(mock)
registry.SetActive(backends.BackendTypeOllama)
router := setupAITestRouter(registry)
// Missing model
chatReq := map[string]interface{}{
"messages": []map[string]string{
{"role": "user", "content": "Hello"},
},
}
body, _ := json.Marshal(chatReq)
w := httptest.NewRecorder()
req, _ := http.NewRequest("POST", "/api/v1/ai/chat", bytes.NewReader(body))
req.Header.Set("Content-Type", "application/json")
router.ServeHTTP(w, req)
if w.Code != http.StatusBadRequest {
t.Errorf("Chat() status = %d, want %d", w.Code, http.StatusBadRequest)
}
}
// mockAIBackend implements backends.LLMBackend for testing
type mockAIBackend struct {
backendType backends.BackendType
config backends.BackendConfig
info backends.BackendInfo
healthErr error
models []backends.Model
chatResponse *backends.ChatChunk
}
func (m *mockAIBackend) Type() backends.BackendType {
return m.backendType
}
func (m *mockAIBackend) Config() backends.BackendConfig {
return m.config
}
func (m *mockAIBackend) HealthCheck(ctx context.Context) error {
return m.healthErr
}
func (m *mockAIBackend) ListModels(ctx context.Context) ([]backends.Model, error) {
return m.models, nil
}
func (m *mockAIBackend) StreamChat(ctx context.Context, req *backends.ChatRequest) (<-chan backends.ChatChunk, error) {
ch := make(chan backends.ChatChunk, 1)
if m.chatResponse != nil {
ch <- *m.chatResponse
}
close(ch)
return ch, nil
}
func (m *mockAIBackend) Chat(ctx context.Context, req *backends.ChatRequest) (*backends.ChatChunk, error) {
if m.chatResponse != nil {
return m.chatResponse, nil
}
return &backends.ChatChunk{Done: true}, nil
}
func (m *mockAIBackend) Capabilities() backends.BackendCapabilities {
return backends.OllamaCapabilities()
}
func (m *mockAIBackend) Info(ctx context.Context) backends.BackendInfo {
if m.info.Type != "" {
return m.info
}
return backends.BackendInfo{
Type: m.backendType,
BaseURL: m.config.BaseURL,
Status: backends.BackendStatusConnected,
Capabilities: m.Capabilities(),
}
}

View File

@@ -5,10 +5,12 @@ import (
"log"
"github.com/gin-gonic/gin"
"vessel-backend/internal/backends"
)
// SetupRoutes configures all API routes
func SetupRoutes(r *gin.Engine, db *sql.DB, ollamaURL string, appVersion string) {
func SetupRoutes(r *gin.Engine, db *sql.DB, ollamaURL string, appVersion string, registry *backends.Registry) {
// Initialize Ollama service with official client
ollamaService, err := NewOllamaService(ollamaURL)
if err != nil {
@@ -97,6 +99,24 @@ func SetupRoutes(r *gin.Engine, db *sql.DB, ollamaURL string, appVersion string)
models.GET("/remote/status", modelRegistry.SyncStatusHandler())
}
// Unified AI routes (multi-backend support)
if registry != nil {
aiHandlers := NewAIHandlers(registry)
ai := v1.Group("/ai")
{
// Backend management
ai.GET("/backends", aiHandlers.ListBackendsHandler())
ai.POST("/backends/discover", aiHandlers.DiscoverBackendsHandler())
ai.POST("/backends/active", aiHandlers.SetActiveHandler())
ai.GET("/backends/:type/health", aiHandlers.HealthCheckHandler())
ai.POST("/backends/register", aiHandlers.RegisterBackendHandler())
// Unified model and chat endpoints (route to active backend)
ai.GET("/models", aiHandlers.ListModelsHandler())
ai.POST("/chat", aiHandlers.ChatHandler())
}
}
// Ollama API routes (using official client)
if ollamaService != nil {
ollama := v1.Group("/ollama")

View File

@@ -0,0 +1,98 @@
package backends
import (
"context"
)
// LLMBackend defines the interface for LLM backend implementations.
// All backends (Ollama, llama.cpp, LM Studio) must implement this interface.
type LLMBackend interface {
// Type returns the backend type identifier
Type() BackendType
// Config returns the backend configuration
Config() BackendConfig
// HealthCheck verifies the backend is reachable and operational
HealthCheck(ctx context.Context) error
// ListModels returns all models available from this backend
ListModels(ctx context.Context) ([]Model, error)
// StreamChat sends a chat request and returns a channel for streaming responses.
// The channel is closed when the stream completes or an error occurs.
// Callers should check ChatChunk.Error for stream errors.
StreamChat(ctx context.Context, req *ChatRequest) (<-chan ChatChunk, error)
// Chat sends a non-streaming chat request and returns the final response
Chat(ctx context.Context, req *ChatRequest) (*ChatChunk, error)
// Capabilities returns what features this backend supports
Capabilities() BackendCapabilities
// Info returns detailed information about the backend including status
Info(ctx context.Context) BackendInfo
}
// ModelManager extends LLMBackend with model management capabilities.
// Only Ollama implements this interface.
type ModelManager interface {
LLMBackend
// PullModel downloads a model from the registry.
// Returns a channel for progress updates.
PullModel(ctx context.Context, name string) (<-chan PullProgress, error)
// DeleteModel removes a model from local storage
DeleteModel(ctx context.Context, name string) error
// CreateModel creates a custom model with the given Modelfile content
CreateModel(ctx context.Context, name string, modelfile string) (<-chan CreateProgress, error)
// CopyModel creates a copy of an existing model
CopyModel(ctx context.Context, source, destination string) error
// ShowModel returns detailed information about a specific model
ShowModel(ctx context.Context, name string) (*ModelDetails, error)
}
// EmbeddingProvider extends LLMBackend with embedding capabilities.
type EmbeddingProvider interface {
LLMBackend
// Embed generates embeddings for the given input
Embed(ctx context.Context, model string, input []string) ([][]float64, error)
}
// PullProgress represents progress during model download
type PullProgress struct {
Status string `json:"status"`
Digest string `json:"digest,omitempty"`
Total int64 `json:"total,omitempty"`
Completed int64 `json:"completed,omitempty"`
Error string `json:"error,omitempty"`
}
// CreateProgress represents progress during model creation
type CreateProgress struct {
Status string `json:"status"`
Error string `json:"error,omitempty"`
}
// ModelDetails contains detailed information about a model
type ModelDetails struct {
Name string `json:"name"`
ModifiedAt string `json:"modified_at"`
Size int64 `json:"size"`
Digest string `json:"digest"`
Format string `json:"format"`
Family string `json:"family"`
Families []string `json:"families"`
ParamSize string `json:"parameter_size"`
QuantLevel string `json:"quantization_level"`
Template string `json:"template"`
System string `json:"system"`
License string `json:"license"`
Modelfile string `json:"modelfile"`
Parameters map[string]string `json:"parameters"`
}

View File

@@ -0,0 +1,624 @@
package ollama
import (
"bufio"
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"time"
"vessel-backend/internal/backends"
)
// Adapter implements the LLMBackend interface for Ollama.
// It also implements ModelManager and EmbeddingProvider.
type Adapter struct {
config backends.BackendConfig
httpClient *http.Client
baseURL *url.URL
}
// Ensure Adapter implements all required interfaces
var (
_ backends.LLMBackend = (*Adapter)(nil)
_ backends.ModelManager = (*Adapter)(nil)
_ backends.EmbeddingProvider = (*Adapter)(nil)
)
// NewAdapter creates a new Ollama backend adapter
func NewAdapter(config backends.BackendConfig) (*Adapter, error) {
if config.Type != backends.BackendTypeOllama {
return nil, fmt.Errorf("invalid backend type: expected %s, got %s", backends.BackendTypeOllama, config.Type)
}
if err := config.Validate(); err != nil {
return nil, fmt.Errorf("invalid config: %w", err)
}
baseURL, err := url.Parse(config.BaseURL)
if err != nil {
return nil, fmt.Errorf("invalid base URL: %w", err)
}
return &Adapter{
config: config,
baseURL: baseURL,
httpClient: &http.Client{
Timeout: 30 * time.Second,
},
}, nil
}
// Type returns the backend type
func (a *Adapter) Type() backends.BackendType {
return backends.BackendTypeOllama
}
// Config returns the backend configuration
func (a *Adapter) Config() backends.BackendConfig {
return a.config
}
// Capabilities returns what features this backend supports
func (a *Adapter) Capabilities() backends.BackendCapabilities {
return backends.OllamaCapabilities()
}
// HealthCheck verifies the backend is reachable
func (a *Adapter) HealthCheck(ctx context.Context) error {
req, err := http.NewRequestWithContext(ctx, "GET", a.baseURL.String()+"/api/version", nil)
if err != nil {
return fmt.Errorf("failed to create request: %w", err)
}
resp, err := a.httpClient.Do(req)
if err != nil {
return fmt.Errorf("failed to reach Ollama: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("Ollama returned status %d", resp.StatusCode)
}
return nil
}
// ollamaListResponse represents the response from /api/tags
type ollamaListResponse struct {
Models []ollamaModel `json:"models"`
}
type ollamaModel struct {
Name string `json:"name"`
Size int64 `json:"size"`
ModifiedAt string `json:"modified_at"`
Details ollamaModelDetails `json:"details"`
}
type ollamaModelDetails struct {
Family string `json:"family"`
QuantLevel string `json:"quantization_level"`
ParamSize string `json:"parameter_size"`
}
// ListModels returns all models available from Ollama
func (a *Adapter) ListModels(ctx context.Context) ([]backends.Model, error) {
req, err := http.NewRequestWithContext(ctx, "GET", a.baseURL.String()+"/api/tags", nil)
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
resp, err := a.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to list models: %w", err)
}
defer resp.Body.Close()
var listResp ollamaListResponse
if err := json.NewDecoder(resp.Body).Decode(&listResp); err != nil {
return nil, fmt.Errorf("failed to decode response: %w", err)
}
models := make([]backends.Model, len(listResp.Models))
for i, m := range listResp.Models {
models[i] = backends.Model{
ID: m.Name,
Name: m.Name,
Size: m.Size,
ModifiedAt: m.ModifiedAt,
Family: m.Details.Family,
QuantLevel: m.Details.QuantLevel,
}
}
return models, nil
}
// Chat sends a non-streaming chat request
func (a *Adapter) Chat(ctx context.Context, req *backends.ChatRequest) (*backends.ChatChunk, error) {
if err := req.Validate(); err != nil {
return nil, fmt.Errorf("invalid request: %w", err)
}
// Convert to Ollama format
ollamaReq := a.convertChatRequest(req)
ollamaReq["stream"] = false
body, err := json.Marshal(ollamaReq)
if err != nil {
return nil, fmt.Errorf("failed to marshal request: %w", err)
}
httpReq, err := http.NewRequestWithContext(ctx, "POST", a.baseURL.String()+"/api/chat", bytes.NewReader(body))
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
httpReq.Header.Set("Content-Type", "application/json")
resp, err := a.httpClient.Do(httpReq)
if err != nil {
return nil, fmt.Errorf("chat request failed: %w", err)
}
defer resp.Body.Close()
var ollamaResp ollamaChatResponse
if err := json.NewDecoder(resp.Body).Decode(&ollamaResp); err != nil {
return nil, fmt.Errorf("failed to decode response: %w", err)
}
return a.convertChatResponse(&ollamaResp), nil
}
// StreamChat sends a streaming chat request
func (a *Adapter) StreamChat(ctx context.Context, req *backends.ChatRequest) (<-chan backends.ChatChunk, error) {
if err := req.Validate(); err != nil {
return nil, fmt.Errorf("invalid request: %w", err)
}
// Convert to Ollama format
ollamaReq := a.convertChatRequest(req)
ollamaReq["stream"] = true
body, err := json.Marshal(ollamaReq)
if err != nil {
return nil, fmt.Errorf("failed to marshal request: %w", err)
}
// Create HTTP request without timeout for streaming
httpReq, err := http.NewRequestWithContext(ctx, "POST", a.baseURL.String()+"/api/chat", bytes.NewReader(body))
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
httpReq.Header.Set("Content-Type", "application/json")
// Use a client without timeout for streaming
client := &http.Client{}
resp, err := client.Do(httpReq)
if err != nil {
return nil, fmt.Errorf("chat request failed: %w", err)
}
chunkCh := make(chan backends.ChatChunk)
go func() {
defer close(chunkCh)
defer resp.Body.Close()
scanner := bufio.NewScanner(resp.Body)
for scanner.Scan() {
select {
case <-ctx.Done():
return
default:
}
line := scanner.Bytes()
if len(line) == 0 {
continue
}
var ollamaResp ollamaChatResponse
if err := json.Unmarshal(line, &ollamaResp); err != nil {
chunkCh <- backends.ChatChunk{Error: fmt.Sprintf("failed to parse response: %v", err)}
return
}
chunkCh <- *a.convertChatResponse(&ollamaResp)
if ollamaResp.Done {
return
}
}
if err := scanner.Err(); err != nil && ctx.Err() == nil {
chunkCh <- backends.ChatChunk{Error: fmt.Sprintf("stream error: %v", err)}
}
}()
return chunkCh, nil
}
// Info returns detailed information about the backend
func (a *Adapter) Info(ctx context.Context) backends.BackendInfo {
info := backends.BackendInfo{
Type: backends.BackendTypeOllama,
BaseURL: a.config.BaseURL,
Capabilities: a.Capabilities(),
}
// Try to get version
req, err := http.NewRequestWithContext(ctx, "GET", a.baseURL.String()+"/api/version", nil)
if err != nil {
info.Status = backends.BackendStatusDisconnected
info.Error = err.Error()
return info
}
resp, err := a.httpClient.Do(req)
if err != nil {
info.Status = backends.BackendStatusDisconnected
info.Error = err.Error()
return info
}
defer resp.Body.Close()
var versionResp struct {
Version string `json:"version"`
}
if err := json.NewDecoder(resp.Body).Decode(&versionResp); err != nil {
info.Status = backends.BackendStatusDisconnected
info.Error = err.Error()
return info
}
info.Status = backends.BackendStatusConnected
info.Version = versionResp.Version
return info
}
// ShowModel returns detailed information about a specific model
func (a *Adapter) ShowModel(ctx context.Context, name string) (*backends.ModelDetails, error) {
body, err := json.Marshal(map[string]string{"name": name})
if err != nil {
return nil, fmt.Errorf("failed to marshal request: %w", err)
}
req, err := http.NewRequestWithContext(ctx, "POST", a.baseURL.String()+"/api/show", bytes.NewReader(body))
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
resp, err := a.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to show model: %w", err)
}
defer resp.Body.Close()
var showResp struct {
Modelfile string `json:"modelfile"`
Template string `json:"template"`
System string `json:"system"`
Details struct {
Family string `json:"family"`
ParamSize string `json:"parameter_size"`
QuantLevel string `json:"quantization_level"`
} `json:"details"`
}
if err := json.NewDecoder(resp.Body).Decode(&showResp); err != nil {
return nil, fmt.Errorf("failed to decode response: %w", err)
}
return &backends.ModelDetails{
Name: name,
Family: showResp.Details.Family,
ParamSize: showResp.Details.ParamSize,
QuantLevel: showResp.Details.QuantLevel,
Template: showResp.Template,
System: showResp.System,
Modelfile: showResp.Modelfile,
}, nil
}
// PullModel downloads a model from the registry
func (a *Adapter) PullModel(ctx context.Context, name string) (<-chan backends.PullProgress, error) {
body, err := json.Marshal(map[string]interface{}{"name": name, "stream": true})
if err != nil {
return nil, fmt.Errorf("failed to marshal request: %w", err)
}
req, err := http.NewRequestWithContext(ctx, "POST", a.baseURL.String()+"/api/pull", bytes.NewReader(body))
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to pull model: %w", err)
}
progressCh := make(chan backends.PullProgress)
go func() {
defer close(progressCh)
defer resp.Body.Close()
scanner := bufio.NewScanner(resp.Body)
for scanner.Scan() {
select {
case <-ctx.Done():
return
default:
}
var progress struct {
Status string `json:"status"`
Digest string `json:"digest"`
Total int64 `json:"total"`
Completed int64 `json:"completed"`
}
if err := json.Unmarshal(scanner.Bytes(), &progress); err != nil {
progressCh <- backends.PullProgress{Error: err.Error()}
return
}
progressCh <- backends.PullProgress{
Status: progress.Status,
Digest: progress.Digest,
Total: progress.Total,
Completed: progress.Completed,
}
}
if err := scanner.Err(); err != nil && ctx.Err() == nil {
progressCh <- backends.PullProgress{Error: err.Error()}
}
}()
return progressCh, nil
}
// DeleteModel removes a model from local storage
func (a *Adapter) DeleteModel(ctx context.Context, name string) error {
body, err := json.Marshal(map[string]string{"name": name})
if err != nil {
return fmt.Errorf("failed to marshal request: %w", err)
}
req, err := http.NewRequestWithContext(ctx, "DELETE", a.baseURL.String()+"/api/delete", bytes.NewReader(body))
if err != nil {
return fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
resp, err := a.httpClient.Do(req)
if err != nil {
return fmt.Errorf("failed to delete model: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
bodyBytes, _ := io.ReadAll(resp.Body)
return fmt.Errorf("delete failed: %s", string(bodyBytes))
}
return nil
}
// CreateModel creates a custom model with the given Modelfile content
func (a *Adapter) CreateModel(ctx context.Context, name string, modelfile string) (<-chan backends.CreateProgress, error) {
body, err := json.Marshal(map[string]interface{}{
"name": name,
"modelfile": modelfile,
"stream": true,
})
if err != nil {
return nil, fmt.Errorf("failed to marshal request: %w", err)
}
req, err := http.NewRequestWithContext(ctx, "POST", a.baseURL.String()+"/api/create", bytes.NewReader(body))
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to create model: %w", err)
}
progressCh := make(chan backends.CreateProgress)
go func() {
defer close(progressCh)
defer resp.Body.Close()
scanner := bufio.NewScanner(resp.Body)
for scanner.Scan() {
select {
case <-ctx.Done():
return
default:
}
var progress struct {
Status string `json:"status"`
}
if err := json.Unmarshal(scanner.Bytes(), &progress); err != nil {
progressCh <- backends.CreateProgress{Error: err.Error()}
return
}
progressCh <- backends.CreateProgress{Status: progress.Status}
}
if err := scanner.Err(); err != nil && ctx.Err() == nil {
progressCh <- backends.CreateProgress{Error: err.Error()}
}
}()
return progressCh, nil
}
// CopyModel creates a copy of an existing model
func (a *Adapter) CopyModel(ctx context.Context, source, destination string) error {
body, err := json.Marshal(map[string]string{
"source": source,
"destination": destination,
})
if err != nil {
return fmt.Errorf("failed to marshal request: %w", err)
}
req, err := http.NewRequestWithContext(ctx, "POST", a.baseURL.String()+"/api/copy", bytes.NewReader(body))
if err != nil {
return fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
resp, err := a.httpClient.Do(req)
if err != nil {
return fmt.Errorf("failed to copy model: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
bodyBytes, _ := io.ReadAll(resp.Body)
return fmt.Errorf("copy failed: %s", string(bodyBytes))
}
return nil
}
// Embed generates embeddings for the given input
func (a *Adapter) Embed(ctx context.Context, model string, input []string) ([][]float64, error) {
body, err := json.Marshal(map[string]interface{}{
"model": model,
"input": input,
})
if err != nil {
return nil, fmt.Errorf("failed to marshal request: %w", err)
}
req, err := http.NewRequestWithContext(ctx, "POST", a.baseURL.String()+"/api/embed", bytes.NewReader(body))
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
resp, err := a.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("embed request failed: %w", err)
}
defer resp.Body.Close()
var embedResp struct {
Embeddings [][]float64 `json:"embeddings"`
}
if err := json.NewDecoder(resp.Body).Decode(&embedResp); err != nil {
return nil, fmt.Errorf("failed to decode response: %w", err)
}
return embedResp.Embeddings, nil
}
// ollamaChatResponse represents the response from /api/chat
type ollamaChatResponse struct {
Model string `json:"model"`
CreatedAt string `json:"created_at"`
Message ollamaChatMessage `json:"message"`
Done bool `json:"done"`
DoneReason string `json:"done_reason,omitempty"`
PromptEvalCount int `json:"prompt_eval_count,omitempty"`
EvalCount int `json:"eval_count,omitempty"`
}
type ollamaChatMessage struct {
Role string `json:"role"`
Content string `json:"content"`
Images []string `json:"images,omitempty"`
ToolCalls []ollamaToolCall `json:"tool_calls,omitempty"`
}
type ollamaToolCall struct {
Function struct {
Name string `json:"name"`
Arguments json.RawMessage `json:"arguments"`
} `json:"function"`
}
// convertChatRequest converts a backends.ChatRequest to Ollama format
func (a *Adapter) convertChatRequest(req *backends.ChatRequest) map[string]interface{} {
messages := make([]map[string]interface{}, len(req.Messages))
for i, msg := range req.Messages {
m := map[string]interface{}{
"role": msg.Role,
"content": msg.Content,
}
if len(msg.Images) > 0 {
m["images"] = msg.Images
}
messages[i] = m
}
ollamaReq := map[string]interface{}{
"model": req.Model,
"messages": messages,
}
// Add optional parameters
if req.Options != nil {
ollamaReq["options"] = req.Options
}
if len(req.Tools) > 0 {
ollamaReq["tools"] = req.Tools
}
return ollamaReq
}
// convertChatResponse converts an Ollama response to backends.ChatChunk
func (a *Adapter) convertChatResponse(resp *ollamaChatResponse) *backends.ChatChunk {
chunk := &backends.ChatChunk{
Model: resp.Model,
CreatedAt: resp.CreatedAt,
Done: resp.Done,
DoneReason: resp.DoneReason,
PromptEvalCount: resp.PromptEvalCount,
EvalCount: resp.EvalCount,
}
if resp.Message.Role != "" || resp.Message.Content != "" {
msg := &backends.ChatMessage{
Role: resp.Message.Role,
Content: resp.Message.Content,
Images: resp.Message.Images,
}
// Convert tool calls
for _, tc := range resp.Message.ToolCalls {
msg.ToolCalls = append(msg.ToolCalls, backends.ToolCall{
Type: "function",
Function: struct {
Name string `json:"name"`
Arguments string `json:"arguments"`
}{
Name: tc.Function.Name,
Arguments: string(tc.Function.Arguments),
},
})
}
chunk.Message = msg
}
return chunk
}

View File

@@ -0,0 +1,574 @@
package ollama
import (
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"testing"
"time"
"vessel-backend/internal/backends"
)
func TestAdapter_Type(t *testing.T) {
adapter, _ := NewAdapter(backends.BackendConfig{
Type: backends.BackendTypeOllama,
BaseURL: "http://localhost:11434",
})
if adapter.Type() != backends.BackendTypeOllama {
t.Errorf("Type() = %v, want %v", adapter.Type(), backends.BackendTypeOllama)
}
}
func TestAdapter_Config(t *testing.T) {
cfg := backends.BackendConfig{
Type: backends.BackendTypeOllama,
BaseURL: "http://localhost:11434",
Enabled: true,
}
adapter, _ := NewAdapter(cfg)
got := adapter.Config()
if got.Type != cfg.Type {
t.Errorf("Config().Type = %v, want %v", got.Type, cfg.Type)
}
if got.BaseURL != cfg.BaseURL {
t.Errorf("Config().BaseURL = %v, want %v", got.BaseURL, cfg.BaseURL)
}
}
func TestAdapter_Capabilities(t *testing.T) {
adapter, _ := NewAdapter(backends.BackendConfig{
Type: backends.BackendTypeOllama,
BaseURL: "http://localhost:11434",
})
caps := adapter.Capabilities()
if !caps.CanListModels {
t.Error("Ollama adapter should support listing models")
}
if !caps.CanPullModels {
t.Error("Ollama adapter should support pulling models")
}
if !caps.CanDeleteModels {
t.Error("Ollama adapter should support deleting models")
}
if !caps.CanCreateModels {
t.Error("Ollama adapter should support creating models")
}
if !caps.CanStreamChat {
t.Error("Ollama adapter should support streaming chat")
}
if !caps.CanEmbed {
t.Error("Ollama adapter should support embeddings")
}
}
func TestAdapter_HealthCheck(t *testing.T) {
t.Run("healthy server", func(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/" || r.URL.Path == "/api/version" {
w.WriteHeader(http.StatusOK)
json.NewEncoder(w).Encode(map[string]string{"version": "0.1.0"})
}
}))
defer server.Close()
adapter, err := NewAdapter(backends.BackendConfig{
Type: backends.BackendTypeOllama,
BaseURL: server.URL,
})
if err != nil {
t.Fatalf("Failed to create adapter: %v", err)
}
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
if err := adapter.HealthCheck(ctx); err != nil {
t.Errorf("HealthCheck() error = %v, want nil", err)
}
})
t.Run("unreachable server", func(t *testing.T) {
adapter, err := NewAdapter(backends.BackendConfig{
Type: backends.BackendTypeOllama,
BaseURL: "http://localhost:19999", // unlikely to be running
})
if err != nil {
t.Fatalf("Failed to create adapter: %v", err)
}
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
defer cancel()
if err := adapter.HealthCheck(ctx); err == nil {
t.Error("HealthCheck() expected error for unreachable server")
}
})
}
func TestAdapter_ListModels(t *testing.T) {
t.Run("returns model list", func(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/api/tags" {
resp := map[string]interface{}{
"models": []map[string]interface{}{
{
"name": "llama3.2:8b",
"size": int64(4700000000),
"modified_at": "2024-01-15T10:30:00Z",
"details": map[string]interface{}{
"family": "llama",
"quantization_level": "Q4_K_M",
},
},
{
"name": "mistral:7b",
"size": int64(4100000000),
"modified_at": "2024-01-14T08:00:00Z",
"details": map[string]interface{}{
"family": "mistral",
"quantization_level": "Q4_0",
},
},
},
}
json.NewEncoder(w).Encode(resp)
}
}))
defer server.Close()
adapter, _ := NewAdapter(backends.BackendConfig{
Type: backends.BackendTypeOllama,
BaseURL: server.URL,
})
ctx := context.Background()
models, err := adapter.ListModels(ctx)
if err != nil {
t.Fatalf("ListModels() error = %v", err)
}
if len(models) != 2 {
t.Errorf("ListModels() returned %d models, want 2", len(models))
}
if models[0].Name != "llama3.2:8b" {
t.Errorf("First model name = %q, want %q", models[0].Name, "llama3.2:8b")
}
if models[0].Family != "llama" {
t.Errorf("First model family = %q, want %q", models[0].Family, "llama")
}
})
t.Run("handles empty model list", func(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/api/tags" {
resp := map[string]interface{}{
"models": []map[string]interface{}{},
}
json.NewEncoder(w).Encode(resp)
}
}))
defer server.Close()
adapter, _ := NewAdapter(backends.BackendConfig{
Type: backends.BackendTypeOllama,
BaseURL: server.URL,
})
models, err := adapter.ListModels(context.Background())
if err != nil {
t.Fatalf("ListModels() error = %v", err)
}
if len(models) != 0 {
t.Errorf("ListModels() returned %d models, want 0", len(models))
}
})
}
func TestAdapter_Chat(t *testing.T) {
t.Run("non-streaming chat", func(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/api/chat" && r.Method == "POST" {
var req map[string]interface{}
json.NewDecoder(r.Body).Decode(&req)
// Check stream is false
if stream, ok := req["stream"].(bool); !ok || stream {
t.Error("Expected stream=false for non-streaming chat")
}
resp := map[string]interface{}{
"model": "llama3.2:8b",
"message": map[string]interface{}{"role": "assistant", "content": "Hello! How can I help you?"},
"done": true,
}
json.NewEncoder(w).Encode(resp)
}
}))
defer server.Close()
adapter, _ := NewAdapter(backends.BackendConfig{
Type: backends.BackendTypeOllama,
BaseURL: server.URL,
})
req := &backends.ChatRequest{
Model: "llama3.2:8b",
Messages: []backends.ChatMessage{
{Role: "user", Content: "Hello"},
},
}
resp, err := adapter.Chat(context.Background(), req)
if err != nil {
t.Fatalf("Chat() error = %v", err)
}
if !resp.Done {
t.Error("Chat() response.Done = false, want true")
}
if resp.Message == nil || resp.Message.Content != "Hello! How can I help you?" {
t.Errorf("Chat() response content unexpected: %+v", resp.Message)
}
})
}
func TestAdapter_StreamChat(t *testing.T) {
t.Run("streaming chat", func(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/api/chat" && r.Method == "POST" {
var req map[string]interface{}
json.NewDecoder(r.Body).Decode(&req)
// Check stream is true
if stream, ok := req["stream"].(bool); ok && !stream {
t.Error("Expected stream=true for streaming chat")
}
w.Header().Set("Content-Type", "application/x-ndjson")
flusher := w.(http.Flusher)
// Send streaming chunks
chunks := []map[string]interface{}{
{"model": "llama3.2:8b", "message": map[string]interface{}{"role": "assistant", "content": "Hello"}, "done": false},
{"model": "llama3.2:8b", "message": map[string]interface{}{"role": "assistant", "content": "!"}, "done": false},
{"model": "llama3.2:8b", "message": map[string]interface{}{"role": "assistant", "content": ""}, "done": true},
}
for _, chunk := range chunks {
data, _ := json.Marshal(chunk)
w.Write(append(data, '\n'))
flusher.Flush()
}
}
}))
defer server.Close()
adapter, _ := NewAdapter(backends.BackendConfig{
Type: backends.BackendTypeOllama,
BaseURL: server.URL,
})
streaming := true
req := &backends.ChatRequest{
Model: "llama3.2:8b",
Messages: []backends.ChatMessage{
{Role: "user", Content: "Hello"},
},
Stream: &streaming,
}
chunkCh, err := adapter.StreamChat(context.Background(), req)
if err != nil {
t.Fatalf("StreamChat() error = %v", err)
}
var chunks []backends.ChatChunk
for chunk := range chunkCh {
chunks = append(chunks, chunk)
}
if len(chunks) != 3 {
t.Errorf("StreamChat() received %d chunks, want 3", len(chunks))
}
// Last chunk should be done
if !chunks[len(chunks)-1].Done {
t.Error("Last chunk should have Done=true")
}
})
t.Run("handles context cancellation", func(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/api/chat" {
w.Header().Set("Content-Type", "application/x-ndjson")
flusher := w.(http.Flusher)
// Send first chunk then wait
chunk := map[string]interface{}{"model": "llama3.2:8b", "message": map[string]interface{}{"role": "assistant", "content": "Starting..."}, "done": false}
data, _ := json.Marshal(chunk)
w.Write(append(data, '\n'))
flusher.Flush()
// Wait long enough for context to be cancelled
time.Sleep(2 * time.Second)
}
}))
defer server.Close()
adapter, _ := NewAdapter(backends.BackendConfig{
Type: backends.BackendTypeOllama,
BaseURL: server.URL,
})
ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
defer cancel()
streaming := true
req := &backends.ChatRequest{
Model: "llama3.2:8b",
Messages: []backends.ChatMessage{
{Role: "user", Content: "Hello"},
},
Stream: &streaming,
}
chunkCh, err := adapter.StreamChat(ctx, req)
if err != nil {
t.Fatalf("StreamChat() error = %v", err)
}
// Should receive at least one chunk before timeout
receivedChunks := 0
for range chunkCh {
receivedChunks++
}
if receivedChunks == 0 {
t.Error("Expected to receive at least one chunk before cancellation")
}
})
}
func TestAdapter_Info(t *testing.T) {
t.Run("connected server", func(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/" || r.URL.Path == "/api/version" {
json.NewEncoder(w).Encode(map[string]string{"version": "0.3.0"})
}
}))
defer server.Close()
adapter, _ := NewAdapter(backends.BackendConfig{
Type: backends.BackendTypeOllama,
BaseURL: server.URL,
})
info := adapter.Info(context.Background())
if info.Type != backends.BackendTypeOllama {
t.Errorf("Info().Type = %v, want %v", info.Type, backends.BackendTypeOllama)
}
if info.Status != backends.BackendStatusConnected {
t.Errorf("Info().Status = %v, want %v", info.Status, backends.BackendStatusConnected)
}
if info.Version != "0.3.0" {
t.Errorf("Info().Version = %v, want %v", info.Version, "0.3.0")
}
})
t.Run("disconnected server", func(t *testing.T) {
adapter, _ := NewAdapter(backends.BackendConfig{
Type: backends.BackendTypeOllama,
BaseURL: "http://localhost:19999",
})
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
defer cancel()
info := adapter.Info(ctx)
if info.Status != backends.BackendStatusDisconnected {
t.Errorf("Info().Status = %v, want %v", info.Status, backends.BackendStatusDisconnected)
}
if info.Error == "" {
t.Error("Info().Error should be set for disconnected server")
}
})
}
func TestAdapter_ShowModel(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/api/show" && r.Method == "POST" {
var req map[string]string
json.NewDecoder(r.Body).Decode(&req)
resp := map[string]interface{}{
"modelfile": "FROM llama3.2:8b\nSYSTEM You are helpful.",
"template": "{{ .Prompt }}",
"system": "You are helpful.",
"details": map[string]interface{}{
"family": "llama",
"parameter_size": "8B",
"quantization_level": "Q4_K_M",
},
}
json.NewEncoder(w).Encode(resp)
}
}))
defer server.Close()
adapter, _ := NewAdapter(backends.BackendConfig{
Type: backends.BackendTypeOllama,
BaseURL: server.URL,
})
details, err := adapter.ShowModel(context.Background(), "llama3.2:8b")
if err != nil {
t.Fatalf("ShowModel() error = %v", err)
}
if details.Family != "llama" {
t.Errorf("ShowModel().Family = %q, want %q", details.Family, "llama")
}
if details.System != "You are helpful." {
t.Errorf("ShowModel().System = %q, want %q", details.System, "You are helpful.")
}
}
func TestAdapter_DeleteModel(t *testing.T) {
deleted := false
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/api/delete" && r.Method == "DELETE" {
deleted = true
w.WriteHeader(http.StatusOK)
}
}))
defer server.Close()
adapter, _ := NewAdapter(backends.BackendConfig{
Type: backends.BackendTypeOllama,
BaseURL: server.URL,
})
err := adapter.DeleteModel(context.Background(), "test-model")
if err != nil {
t.Fatalf("DeleteModel() error = %v", err)
}
if !deleted {
t.Error("DeleteModel() did not call the delete endpoint")
}
}
func TestAdapter_CopyModel(t *testing.T) {
copied := false
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/api/copy" && r.Method == "POST" {
var req map[string]string
json.NewDecoder(r.Body).Decode(&req)
if req["source"] == "source-model" && req["destination"] == "dest-model" {
copied = true
}
w.WriteHeader(http.StatusOK)
}
}))
defer server.Close()
adapter, _ := NewAdapter(backends.BackendConfig{
Type: backends.BackendTypeOllama,
BaseURL: server.URL,
})
err := adapter.CopyModel(context.Background(), "source-model", "dest-model")
if err != nil {
t.Fatalf("CopyModel() error = %v", err)
}
if !copied {
t.Error("CopyModel() did not call the copy endpoint with correct params")
}
}
func TestAdapter_Embed(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/api/embed" && r.Method == "POST" {
resp := map[string]interface{}{
"embeddings": [][]float64{
{0.1, 0.2, 0.3},
{0.4, 0.5, 0.6},
},
}
json.NewEncoder(w).Encode(resp)
}
}))
defer server.Close()
adapter, _ := NewAdapter(backends.BackendConfig{
Type: backends.BackendTypeOllama,
BaseURL: server.URL,
})
embeddings, err := adapter.Embed(context.Background(), "nomic-embed-text", []string{"hello", "world"})
if err != nil {
t.Fatalf("Embed() error = %v", err)
}
if len(embeddings) != 2 {
t.Errorf("Embed() returned %d embeddings, want 2", len(embeddings))
}
if len(embeddings[0]) != 3 {
t.Errorf("First embedding has %d dimensions, want 3", len(embeddings[0]))
}
}
func TestNewAdapter_Validation(t *testing.T) {
t.Run("invalid URL", func(t *testing.T) {
_, err := NewAdapter(backends.BackendConfig{
Type: backends.BackendTypeOllama,
BaseURL: "not-a-url",
})
if err == nil {
t.Error("NewAdapter() should fail with invalid URL")
}
})
t.Run("wrong backend type", func(t *testing.T) {
_, err := NewAdapter(backends.BackendConfig{
Type: backends.BackendTypeLlamaCpp,
BaseURL: "http://localhost:11434",
})
if err == nil {
t.Error("NewAdapter() should fail with wrong backend type")
}
})
t.Run("valid config", func(t *testing.T) {
adapter, err := NewAdapter(backends.BackendConfig{
Type: backends.BackendTypeOllama,
BaseURL: "http://localhost:11434",
})
if err != nil {
t.Errorf("NewAdapter() error = %v", err)
}
if adapter == nil {
t.Error("NewAdapter() returned nil adapter")
}
})
}

View File

@@ -0,0 +1,503 @@
package openai
import (
"bufio"
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"
"vessel-backend/internal/backends"
)
// Adapter implements the LLMBackend interface for OpenAI-compatible APIs.
// This includes llama.cpp server and LM Studio.
type Adapter struct {
config backends.BackendConfig
httpClient *http.Client
baseURL *url.URL
}
// Ensure Adapter implements required interfaces
var (
_ backends.LLMBackend = (*Adapter)(nil)
_ backends.EmbeddingProvider = (*Adapter)(nil)
)
// NewAdapter creates a new OpenAI-compatible backend adapter
func NewAdapter(config backends.BackendConfig) (*Adapter, error) {
if config.Type != backends.BackendTypeLlamaCpp && config.Type != backends.BackendTypeLMStudio {
return nil, fmt.Errorf("invalid backend type: expected %s or %s, got %s",
backends.BackendTypeLlamaCpp, backends.BackendTypeLMStudio, config.Type)
}
if err := config.Validate(); err != nil {
return nil, fmt.Errorf("invalid config: %w", err)
}
baseURL, err := url.Parse(config.BaseURL)
if err != nil {
return nil, fmt.Errorf("invalid base URL: %w", err)
}
return &Adapter{
config: config,
baseURL: baseURL,
httpClient: &http.Client{
Timeout: 30 * time.Second,
},
}, nil
}
// Type returns the backend type
func (a *Adapter) Type() backends.BackendType {
return a.config.Type
}
// Config returns the backend configuration
func (a *Adapter) Config() backends.BackendConfig {
return a.config
}
// Capabilities returns what features this backend supports
func (a *Adapter) Capabilities() backends.BackendCapabilities {
if a.config.Type == backends.BackendTypeLlamaCpp {
return backends.LlamaCppCapabilities()
}
return backends.LMStudioCapabilities()
}
// HealthCheck verifies the backend is reachable
func (a *Adapter) HealthCheck(ctx context.Context) error {
req, err := http.NewRequestWithContext(ctx, "GET", a.baseURL.String()+"/v1/models", nil)
if err != nil {
return fmt.Errorf("failed to create request: %w", err)
}
resp, err := a.httpClient.Do(req)
if err != nil {
return fmt.Errorf("failed to reach backend: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("backend returned status %d", resp.StatusCode)
}
return nil
}
// openaiModelsResponse represents the response from /v1/models
type openaiModelsResponse struct {
Data []openaiModel `json:"data"`
}
type openaiModel struct {
ID string `json:"id"`
Object string `json:"object"`
OwnedBy string `json:"owned_by"`
Created int64 `json:"created"`
}
// ListModels returns all models available from this backend
func (a *Adapter) ListModels(ctx context.Context) ([]backends.Model, error) {
req, err := http.NewRequestWithContext(ctx, "GET", a.baseURL.String()+"/v1/models", nil)
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
resp, err := a.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to list models: %w", err)
}
defer resp.Body.Close()
var listResp openaiModelsResponse
if err := json.NewDecoder(resp.Body).Decode(&listResp); err != nil {
return nil, fmt.Errorf("failed to decode response: %w", err)
}
models := make([]backends.Model, len(listResp.Data))
for i, m := range listResp.Data {
models[i] = backends.Model{
ID: m.ID,
Name: m.ID,
}
}
return models, nil
}
// Chat sends a non-streaming chat request
func (a *Adapter) Chat(ctx context.Context, req *backends.ChatRequest) (*backends.ChatChunk, error) {
if err := req.Validate(); err != nil {
return nil, fmt.Errorf("invalid request: %w", err)
}
openaiReq := a.convertChatRequest(req)
openaiReq["stream"] = false
body, err := json.Marshal(openaiReq)
if err != nil {
return nil, fmt.Errorf("failed to marshal request: %w", err)
}
httpReq, err := http.NewRequestWithContext(ctx, "POST", a.baseURL.String()+"/v1/chat/completions", bytes.NewReader(body))
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
httpReq.Header.Set("Content-Type", "application/json")
resp, err := a.httpClient.Do(httpReq)
if err != nil {
return nil, fmt.Errorf("chat request failed: %w", err)
}
defer resp.Body.Close()
var openaiResp openaiChatResponse
if err := json.NewDecoder(resp.Body).Decode(&openaiResp); err != nil {
return nil, fmt.Errorf("failed to decode response: %w", err)
}
return a.convertChatResponse(&openaiResp), nil
}
// StreamChat sends a streaming chat request
func (a *Adapter) StreamChat(ctx context.Context, req *backends.ChatRequest) (<-chan backends.ChatChunk, error) {
if err := req.Validate(); err != nil {
return nil, fmt.Errorf("invalid request: %w", err)
}
openaiReq := a.convertChatRequest(req)
openaiReq["stream"] = true
body, err := json.Marshal(openaiReq)
if err != nil {
return nil, fmt.Errorf("failed to marshal request: %w", err)
}
httpReq, err := http.NewRequestWithContext(ctx, "POST", a.baseURL.String()+"/v1/chat/completions", bytes.NewReader(body))
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
httpReq.Header.Set("Content-Type", "application/json")
httpReq.Header.Set("Accept", "text/event-stream")
// Use a client without timeout for streaming
client := &http.Client{}
resp, err := client.Do(httpReq)
if err != nil {
return nil, fmt.Errorf("chat request failed: %w", err)
}
chunkCh := make(chan backends.ChatChunk)
go func() {
defer close(chunkCh)
defer resp.Body.Close()
a.parseSSEStream(ctx, resp.Body, chunkCh)
}()
return chunkCh, nil
}
// parseSSEStream parses Server-Sent Events and emits ChatChunks
func (a *Adapter) parseSSEStream(ctx context.Context, body io.Reader, chunkCh chan<- backends.ChatChunk) {
scanner := bufio.NewScanner(body)
// Track accumulated tool call arguments
toolCallArgs := make(map[int]string)
for scanner.Scan() {
select {
case <-ctx.Done():
return
default:
}
line := scanner.Text()
// Skip empty lines and comments
if line == "" || strings.HasPrefix(line, ":") {
continue
}
// Parse SSE data line
if !strings.HasPrefix(line, "data: ") {
continue
}
data := strings.TrimPrefix(line, "data: ")
// Check for stream end
if data == "[DONE]" {
chunkCh <- backends.ChatChunk{Done: true}
return
}
var streamResp openaiStreamResponse
if err := json.Unmarshal([]byte(data), &streamResp); err != nil {
chunkCh <- backends.ChatChunk{Error: fmt.Sprintf("failed to parse SSE data: %v", err)}
continue
}
chunk := a.convertStreamResponse(&streamResp, toolCallArgs)
chunkCh <- chunk
if chunk.Done {
return
}
}
if err := scanner.Err(); err != nil && ctx.Err() == nil {
chunkCh <- backends.ChatChunk{Error: fmt.Sprintf("stream error: %v", err)}
}
}
// Info returns detailed information about the backend
func (a *Adapter) Info(ctx context.Context) backends.BackendInfo {
info := backends.BackendInfo{
Type: a.config.Type,
BaseURL: a.config.BaseURL,
Capabilities: a.Capabilities(),
}
// Try to reach the models endpoint
if err := a.HealthCheck(ctx); err != nil {
info.Status = backends.BackendStatusDisconnected
info.Error = err.Error()
return info
}
info.Status = backends.BackendStatusConnected
return info
}
// Embed generates embeddings for the given input
func (a *Adapter) Embed(ctx context.Context, model string, input []string) ([][]float64, error) {
body, err := json.Marshal(map[string]interface{}{
"model": model,
"input": input,
})
if err != nil {
return nil, fmt.Errorf("failed to marshal request: %w", err)
}
req, err := http.NewRequestWithContext(ctx, "POST", a.baseURL.String()+"/v1/embeddings", bytes.NewReader(body))
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
resp, err := a.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("embed request failed: %w", err)
}
defer resp.Body.Close()
var embedResp struct {
Data []struct {
Embedding []float64 `json:"embedding"`
Index int `json:"index"`
} `json:"data"`
}
if err := json.NewDecoder(resp.Body).Decode(&embedResp); err != nil {
return nil, fmt.Errorf("failed to decode response: %w", err)
}
embeddings := make([][]float64, len(embedResp.Data))
for _, d := range embedResp.Data {
embeddings[d.Index] = d.Embedding
}
return embeddings, nil
}
// OpenAI API response types
type openaiChatResponse struct {
ID string `json:"id"`
Object string `json:"object"`
Created int64 `json:"created"`
Model string `json:"model"`
Choices []openaiChoice `json:"choices"`
Usage *openaiUsage `json:"usage,omitempty"`
}
type openaiChoice struct {
Index int `json:"index"`
Message *openaiMessage `json:"message,omitempty"`
Delta *openaiMessage `json:"delta,omitempty"`
FinishReason string `json:"finish_reason,omitempty"`
}
type openaiMessage struct {
Role string `json:"role,omitempty"`
Content string `json:"content,omitempty"`
ToolCalls []openaiToolCall `json:"tool_calls,omitempty"`
}
type openaiToolCall struct {
ID string `json:"id,omitempty"`
Index int `json:"index,omitempty"`
Type string `json:"type,omitempty"`
Function struct {
Name string `json:"name,omitempty"`
Arguments string `json:"arguments,omitempty"`
} `json:"function"`
}
type openaiUsage struct {
PromptTokens int `json:"prompt_tokens"`
CompletionTokens int `json:"completion_tokens"`
TotalTokens int `json:"total_tokens"`
}
type openaiStreamResponse struct {
ID string `json:"id"`
Object string `json:"object"`
Created int64 `json:"created"`
Model string `json:"model"`
Choices []openaiChoice `json:"choices"`
}
// convertChatRequest converts a backends.ChatRequest to OpenAI format
func (a *Adapter) convertChatRequest(req *backends.ChatRequest) map[string]interface{} {
messages := make([]map[string]interface{}, len(req.Messages))
for i, msg := range req.Messages {
m := map[string]interface{}{
"role": msg.Role,
"content": msg.Content,
}
if msg.Name != "" {
m["name"] = msg.Name
}
if msg.ToolCallID != "" {
m["tool_call_id"] = msg.ToolCallID
}
messages[i] = m
}
openaiReq := map[string]interface{}{
"model": req.Model,
"messages": messages,
}
// Add optional parameters
if req.Temperature != nil {
openaiReq["temperature"] = *req.Temperature
}
if req.TopP != nil {
openaiReq["top_p"] = *req.TopP
}
if req.MaxTokens != nil {
openaiReq["max_tokens"] = *req.MaxTokens
}
if len(req.Tools) > 0 {
openaiReq["tools"] = req.Tools
}
return openaiReq
}
// convertChatResponse converts an OpenAI response to backends.ChatChunk
func (a *Adapter) convertChatResponse(resp *openaiChatResponse) *backends.ChatChunk {
chunk := &backends.ChatChunk{
Model: resp.Model,
Done: true,
}
if len(resp.Choices) > 0 {
choice := resp.Choices[0]
if choice.Message != nil {
msg := &backends.ChatMessage{
Role: choice.Message.Role,
Content: choice.Message.Content,
}
// Convert tool calls
for _, tc := range choice.Message.ToolCalls {
msg.ToolCalls = append(msg.ToolCalls, backends.ToolCall{
ID: tc.ID,
Type: tc.Type,
Function: struct {
Name string `json:"name"`
Arguments string `json:"arguments"`
}{
Name: tc.Function.Name,
Arguments: tc.Function.Arguments,
},
})
}
chunk.Message = msg
}
if choice.FinishReason != "" {
chunk.DoneReason = choice.FinishReason
}
}
if resp.Usage != nil {
chunk.PromptEvalCount = resp.Usage.PromptTokens
chunk.EvalCount = resp.Usage.CompletionTokens
}
return chunk
}
// convertStreamResponse converts an OpenAI stream response to backends.ChatChunk
func (a *Adapter) convertStreamResponse(resp *openaiStreamResponse, toolCallArgs map[int]string) backends.ChatChunk {
chunk := backends.ChatChunk{
Model: resp.Model,
}
if len(resp.Choices) > 0 {
choice := resp.Choices[0]
if choice.FinishReason != "" {
chunk.Done = true
chunk.DoneReason = choice.FinishReason
}
if choice.Delta != nil {
msg := &backends.ChatMessage{
Role: choice.Delta.Role,
Content: choice.Delta.Content,
}
// Handle streaming tool calls
for _, tc := range choice.Delta.ToolCalls {
// Accumulate arguments
if tc.Function.Arguments != "" {
toolCallArgs[tc.Index] += tc.Function.Arguments
}
// Only add tool call when we have the initial info
if tc.ID != "" || tc.Function.Name != "" {
msg.ToolCalls = append(msg.ToolCalls, backends.ToolCall{
ID: tc.ID,
Type: tc.Type,
Function: struct {
Name string `json:"name"`
Arguments string `json:"arguments"`
}{
Name: tc.Function.Name,
Arguments: toolCallArgs[tc.Index],
},
})
}
}
chunk.Message = msg
}
}
return chunk
}

View File

@@ -0,0 +1,594 @@
package openai
import (
"context"
"encoding/json"
"fmt"
"net/http"
"net/http/httptest"
"testing"
"time"
"vessel-backend/internal/backends"
)
func TestAdapter_Type(t *testing.T) {
tests := []struct {
name string
backendType backends.BackendType
expectedType backends.BackendType
}{
{"llamacpp type", backends.BackendTypeLlamaCpp, backends.BackendTypeLlamaCpp},
{"lmstudio type", backends.BackendTypeLMStudio, backends.BackendTypeLMStudio},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
adapter, _ := NewAdapter(backends.BackendConfig{
Type: tt.backendType,
BaseURL: "http://localhost:8081",
})
if adapter.Type() != tt.expectedType {
t.Errorf("Type() = %v, want %v", adapter.Type(), tt.expectedType)
}
})
}
}
func TestAdapter_Config(t *testing.T) {
cfg := backends.BackendConfig{
Type: backends.BackendTypeLlamaCpp,
BaseURL: "http://localhost:8081",
Enabled: true,
}
adapter, _ := NewAdapter(cfg)
got := adapter.Config()
if got.Type != cfg.Type {
t.Errorf("Config().Type = %v, want %v", got.Type, cfg.Type)
}
if got.BaseURL != cfg.BaseURL {
t.Errorf("Config().BaseURL = %v, want %v", got.BaseURL, cfg.BaseURL)
}
}
func TestAdapter_Capabilities(t *testing.T) {
t.Run("llamacpp capabilities", func(t *testing.T) {
adapter, _ := NewAdapter(backends.BackendConfig{
Type: backends.BackendTypeLlamaCpp,
BaseURL: "http://localhost:8081",
})
caps := adapter.Capabilities()
if !caps.CanListModels {
t.Error("llama.cpp adapter should support listing models")
}
if caps.CanPullModels {
t.Error("llama.cpp adapter should NOT support pulling models")
}
if caps.CanDeleteModels {
t.Error("llama.cpp adapter should NOT support deleting models")
}
if caps.CanCreateModels {
t.Error("llama.cpp adapter should NOT support creating models")
}
if !caps.CanStreamChat {
t.Error("llama.cpp adapter should support streaming chat")
}
if !caps.CanEmbed {
t.Error("llama.cpp adapter should support embeddings")
}
})
t.Run("lmstudio capabilities", func(t *testing.T) {
adapter, _ := NewAdapter(backends.BackendConfig{
Type: backends.BackendTypeLMStudio,
BaseURL: "http://localhost:1234",
})
caps := adapter.Capabilities()
if !caps.CanListModels {
t.Error("LM Studio adapter should support listing models")
}
if caps.CanPullModels {
t.Error("LM Studio adapter should NOT support pulling models")
}
})
}
func TestAdapter_HealthCheck(t *testing.T) {
t.Run("healthy server", func(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/v1/models" {
json.NewEncoder(w).Encode(map[string]interface{}{
"data": []map[string]string{{"id": "llama3.2:8b"}},
})
}
}))
defer server.Close()
adapter, err := NewAdapter(backends.BackendConfig{
Type: backends.BackendTypeLlamaCpp,
BaseURL: server.URL,
})
if err != nil {
t.Fatalf("Failed to create adapter: %v", err)
}
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
if err := adapter.HealthCheck(ctx); err != nil {
t.Errorf("HealthCheck() error = %v, want nil", err)
}
})
t.Run("unreachable server", func(t *testing.T) {
adapter, _ := NewAdapter(backends.BackendConfig{
Type: backends.BackendTypeLlamaCpp,
BaseURL: "http://localhost:19999",
})
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
defer cancel()
if err := adapter.HealthCheck(ctx); err == nil {
t.Error("HealthCheck() expected error for unreachable server")
}
})
}
func TestAdapter_ListModels(t *testing.T) {
t.Run("returns model list", func(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/v1/models" {
resp := map[string]interface{}{
"data": []map[string]interface{}{
{
"id": "llama3.2-8b-instruct",
"object": "model",
"owned_by": "local",
"created": 1700000000,
},
{
"id": "mistral-7b-v0.2",
"object": "model",
"owned_by": "local",
"created": 1700000001,
},
},
}
json.NewEncoder(w).Encode(resp)
}
}))
defer server.Close()
adapter, _ := NewAdapter(backends.BackendConfig{
Type: backends.BackendTypeLlamaCpp,
BaseURL: server.URL,
})
ctx := context.Background()
models, err := adapter.ListModels(ctx)
if err != nil {
t.Fatalf("ListModels() error = %v", err)
}
if len(models) != 2 {
t.Errorf("ListModels() returned %d models, want 2", len(models))
}
if models[0].ID != "llama3.2-8b-instruct" {
t.Errorf("First model ID = %q, want %q", models[0].ID, "llama3.2-8b-instruct")
}
})
t.Run("handles empty model list", func(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/v1/models" {
resp := map[string]interface{}{
"data": []map[string]interface{}{},
}
json.NewEncoder(w).Encode(resp)
}
}))
defer server.Close()
adapter, _ := NewAdapter(backends.BackendConfig{
Type: backends.BackendTypeLlamaCpp,
BaseURL: server.URL,
})
models, err := adapter.ListModels(context.Background())
if err != nil {
t.Fatalf("ListModels() error = %v", err)
}
if len(models) != 0 {
t.Errorf("ListModels() returned %d models, want 0", len(models))
}
})
}
func TestAdapter_Chat(t *testing.T) {
t.Run("non-streaming chat", func(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/v1/chat/completions" && r.Method == "POST" {
var req map[string]interface{}
json.NewDecoder(r.Body).Decode(&req)
// Check stream is false
if stream, ok := req["stream"].(bool); ok && stream {
t.Error("Expected stream=false for non-streaming chat")
}
resp := map[string]interface{}{
"id": "chatcmpl-123",
"object": "chat.completion",
"created": 1700000000,
"model": "llama3.2:8b",
"choices": []map[string]interface{}{
{
"index": 0,
"message": map[string]interface{}{
"role": "assistant",
"content": "Hello! How can I help you?",
},
"finish_reason": "stop",
},
},
"usage": map[string]int{
"prompt_tokens": 10,
"completion_tokens": 8,
"total_tokens": 18,
},
}
json.NewEncoder(w).Encode(resp)
}
}))
defer server.Close()
adapter, _ := NewAdapter(backends.BackendConfig{
Type: backends.BackendTypeLlamaCpp,
BaseURL: server.URL,
})
req := &backends.ChatRequest{
Model: "llama3.2:8b",
Messages: []backends.ChatMessage{
{Role: "user", Content: "Hello"},
},
}
resp, err := adapter.Chat(context.Background(), req)
if err != nil {
t.Fatalf("Chat() error = %v", err)
}
if !resp.Done {
t.Error("Chat() response.Done = false, want true")
}
if resp.Message == nil || resp.Message.Content != "Hello! How can I help you?" {
t.Errorf("Chat() response content unexpected: %+v", resp.Message)
}
})
}
func TestAdapter_StreamChat(t *testing.T) {
t.Run("streaming chat with SSE", func(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/v1/chat/completions" && r.Method == "POST" {
var req map[string]interface{}
json.NewDecoder(r.Body).Decode(&req)
// Check stream is true
if stream, ok := req["stream"].(bool); !ok || !stream {
t.Error("Expected stream=true for streaming chat")
}
w.Header().Set("Content-Type", "text/event-stream")
w.Header().Set("Cache-Control", "no-cache")
flusher := w.(http.Flusher)
// Send SSE chunks
chunks := []string{
`{"id":"chatcmpl-1","choices":[{"delta":{"role":"assistant","content":"Hello"}}]}`,
`{"id":"chatcmpl-1","choices":[{"delta":{"content":"!"}}]}`,
`{"id":"chatcmpl-1","choices":[{"delta":{},"finish_reason":"stop"}]}`,
}
for _, chunk := range chunks {
fmt.Fprintf(w, "data: %s\n\n", chunk)
flusher.Flush()
}
fmt.Fprintf(w, "data: [DONE]\n\n")
flusher.Flush()
}
}))
defer server.Close()
adapter, _ := NewAdapter(backends.BackendConfig{
Type: backends.BackendTypeLlamaCpp,
BaseURL: server.URL,
})
streaming := true
req := &backends.ChatRequest{
Model: "llama3.2:8b",
Messages: []backends.ChatMessage{
{Role: "user", Content: "Hello"},
},
Stream: &streaming,
}
chunkCh, err := adapter.StreamChat(context.Background(), req)
if err != nil {
t.Fatalf("StreamChat() error = %v", err)
}
var chunks []backends.ChatChunk
for chunk := range chunkCh {
chunks = append(chunks, chunk)
}
if len(chunks) < 2 {
t.Errorf("StreamChat() received %d chunks, want at least 2", len(chunks))
}
// Last chunk should be done
if !chunks[len(chunks)-1].Done {
t.Error("Last chunk should have Done=true")
}
})
t.Run("handles context cancellation", func(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/v1/chat/completions" {
w.Header().Set("Content-Type", "text/event-stream")
flusher := w.(http.Flusher)
// Send first chunk then wait
fmt.Fprintf(w, "data: %s\n\n", `{"id":"chatcmpl-1","choices":[{"delta":{"role":"assistant","content":"Starting..."}}]}`)
flusher.Flush()
// Wait long enough for context to be cancelled
time.Sleep(2 * time.Second)
}
}))
defer server.Close()
adapter, _ := NewAdapter(backends.BackendConfig{
Type: backends.BackendTypeLlamaCpp,
BaseURL: server.URL,
})
ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
defer cancel()
streaming := true
req := &backends.ChatRequest{
Model: "llama3.2:8b",
Messages: []backends.ChatMessage{
{Role: "user", Content: "Hello"},
},
Stream: &streaming,
}
chunkCh, err := adapter.StreamChat(ctx, req)
if err != nil {
t.Fatalf("StreamChat() error = %v", err)
}
// Should receive at least one chunk before timeout
receivedChunks := 0
for range chunkCh {
receivedChunks++
}
if receivedChunks == 0 {
t.Error("Expected to receive at least one chunk before cancellation")
}
})
}
func TestAdapter_Info(t *testing.T) {
t.Run("connected server", func(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/v1/models" {
json.NewEncoder(w).Encode(map[string]interface{}{
"data": []map[string]string{{"id": "llama3.2:8b"}},
})
}
}))
defer server.Close()
adapter, _ := NewAdapter(backends.BackendConfig{
Type: backends.BackendTypeLlamaCpp,
BaseURL: server.URL,
})
info := adapter.Info(context.Background())
if info.Type != backends.BackendTypeLlamaCpp {
t.Errorf("Info().Type = %v, want %v", info.Type, backends.BackendTypeLlamaCpp)
}
if info.Status != backends.BackendStatusConnected {
t.Errorf("Info().Status = %v, want %v", info.Status, backends.BackendStatusConnected)
}
})
t.Run("disconnected server", func(t *testing.T) {
adapter, _ := NewAdapter(backends.BackendConfig{
Type: backends.BackendTypeLlamaCpp,
BaseURL: "http://localhost:19999",
})
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
defer cancel()
info := adapter.Info(ctx)
if info.Status != backends.BackendStatusDisconnected {
t.Errorf("Info().Status = %v, want %v", info.Status, backends.BackendStatusDisconnected)
}
if info.Error == "" {
t.Error("Info().Error should be set for disconnected server")
}
})
}
func TestAdapter_Embed(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/v1/embeddings" && r.Method == "POST" {
resp := map[string]interface{}{
"data": []map[string]interface{}{
{"embedding": []float64{0.1, 0.2, 0.3}, "index": 0},
{"embedding": []float64{0.4, 0.5, 0.6}, "index": 1},
},
}
json.NewEncoder(w).Encode(resp)
}
}))
defer server.Close()
adapter, _ := NewAdapter(backends.BackendConfig{
Type: backends.BackendTypeLlamaCpp,
BaseURL: server.URL,
})
embeddings, err := adapter.Embed(context.Background(), "nomic-embed-text", []string{"hello", "world"})
if err != nil {
t.Fatalf("Embed() error = %v", err)
}
if len(embeddings) != 2 {
t.Errorf("Embed() returned %d embeddings, want 2", len(embeddings))
}
if len(embeddings[0]) != 3 {
t.Errorf("First embedding has %d dimensions, want 3", len(embeddings[0]))
}
}
func TestNewAdapter_Validation(t *testing.T) {
t.Run("invalid URL", func(t *testing.T) {
_, err := NewAdapter(backends.BackendConfig{
Type: backends.BackendTypeLlamaCpp,
BaseURL: "not-a-url",
})
if err == nil {
t.Error("NewAdapter() should fail with invalid URL")
}
})
t.Run("wrong backend type", func(t *testing.T) {
_, err := NewAdapter(backends.BackendConfig{
Type: backends.BackendTypeOllama,
BaseURL: "http://localhost:8081",
})
if err == nil {
t.Error("NewAdapter() should fail with Ollama backend type")
}
})
t.Run("valid llamacpp config", func(t *testing.T) {
adapter, err := NewAdapter(backends.BackendConfig{
Type: backends.BackendTypeLlamaCpp,
BaseURL: "http://localhost:8081",
})
if err != nil {
t.Errorf("NewAdapter() error = %v", err)
}
if adapter == nil {
t.Error("NewAdapter() returned nil adapter")
}
})
t.Run("valid lmstudio config", func(t *testing.T) {
adapter, err := NewAdapter(backends.BackendConfig{
Type: backends.BackendTypeLMStudio,
BaseURL: "http://localhost:1234",
})
if err != nil {
t.Errorf("NewAdapter() error = %v", err)
}
if adapter == nil {
t.Error("NewAdapter() returned nil adapter")
}
})
}
func TestAdapter_ToolCalls(t *testing.T) {
t.Run("streaming with tool calls", func(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/v1/chat/completions" {
w.Header().Set("Content-Type", "text/event-stream")
flusher := w.(http.Flusher)
// Send tool call chunks
chunks := []string{
`{"id":"chatcmpl-1","choices":[{"delta":{"role":"assistant","tool_calls":[{"id":"call_1","type":"function","function":{"name":"get_weather","arguments":""}}]}}]}`,
`{"id":"chatcmpl-1","choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\"location\":"}}]}}]}`,
`{"id":"chatcmpl-1","choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"Tokyo\"}"}}]}}]}`,
`{"id":"chatcmpl-1","choices":[{"delta":{},"finish_reason":"tool_calls"}]}`,
}
for _, chunk := range chunks {
fmt.Fprintf(w, "data: %s\n\n", chunk)
flusher.Flush()
}
fmt.Fprintf(w, "data: [DONE]\n\n")
flusher.Flush()
}
}))
defer server.Close()
adapter, _ := NewAdapter(backends.BackendConfig{
Type: backends.BackendTypeLlamaCpp,
BaseURL: server.URL,
})
streaming := true
req := &backends.ChatRequest{
Model: "llama3.2:8b",
Messages: []backends.ChatMessage{
{Role: "user", Content: "What's the weather in Tokyo?"},
},
Stream: &streaming,
Tools: []backends.Tool{
{
Type: "function",
Function: struct {
Name string `json:"name"`
Description string `json:"description"`
Parameters map[string]interface{} `json:"parameters"`
}{
Name: "get_weather",
Description: "Get weather for a location",
},
},
},
}
chunkCh, err := adapter.StreamChat(context.Background(), req)
if err != nil {
t.Fatalf("StreamChat() error = %v", err)
}
var lastChunk backends.ChatChunk
for chunk := range chunkCh {
lastChunk = chunk
}
if !lastChunk.Done {
t.Error("Last chunk should have Done=true")
}
})
}

View File

@@ -0,0 +1,242 @@
package backends
import (
"context"
"fmt"
"net/http"
"sync"
"time"
)
// Registry manages multiple LLM backend instances
type Registry struct {
mu sync.RWMutex
backends map[BackendType]LLMBackend
active BackendType
}
// NewRegistry creates a new backend registry
func NewRegistry() *Registry {
return &Registry{
backends: make(map[BackendType]LLMBackend),
}
}
// Register adds a backend to the registry
func (r *Registry) Register(backend LLMBackend) error {
r.mu.Lock()
defer r.mu.Unlock()
bt := backend.Type()
if _, exists := r.backends[bt]; exists {
return fmt.Errorf("backend %q already registered", bt)
}
r.backends[bt] = backend
return nil
}
// Unregister removes a backend from the registry
func (r *Registry) Unregister(backendType BackendType) error {
r.mu.Lock()
defer r.mu.Unlock()
if _, exists := r.backends[backendType]; !exists {
return fmt.Errorf("backend %q not registered", backendType)
}
delete(r.backends, backendType)
// Clear active if it was the unregistered backend
if r.active == backendType {
r.active = ""
}
return nil
}
// Get retrieves a backend by type
func (r *Registry) Get(backendType BackendType) (LLMBackend, bool) {
r.mu.RLock()
defer r.mu.RUnlock()
backend, ok := r.backends[backendType]
return backend, ok
}
// SetActive sets the active backend
func (r *Registry) SetActive(backendType BackendType) error {
r.mu.Lock()
defer r.mu.Unlock()
if _, exists := r.backends[backendType]; !exists {
return fmt.Errorf("backend %q not registered", backendType)
}
r.active = backendType
return nil
}
// Active returns the currently active backend
func (r *Registry) Active() LLMBackend {
r.mu.RLock()
defer r.mu.RUnlock()
if r.active == "" {
return nil
}
return r.backends[r.active]
}
// ActiveType returns the type of the currently active backend
func (r *Registry) ActiveType() BackendType {
r.mu.RLock()
defer r.mu.RUnlock()
return r.active
}
// Backends returns all registered backend types
func (r *Registry) Backends() []BackendType {
r.mu.RLock()
defer r.mu.RUnlock()
types := make([]BackendType, 0, len(r.backends))
for bt := range r.backends {
types = append(types, bt)
}
return types
}
// AllInfo returns information about all registered backends
func (r *Registry) AllInfo(ctx context.Context) []BackendInfo {
r.mu.RLock()
defer r.mu.RUnlock()
infos := make([]BackendInfo, 0, len(r.backends))
for _, backend := range r.backends {
infos = append(infos, backend.Info(ctx))
}
return infos
}
// DiscoveryEndpoint represents a potential backend endpoint to probe
type DiscoveryEndpoint struct {
Type BackendType
BaseURL string
}
// DiscoveryResult represents the result of probing an endpoint
type DiscoveryResult struct {
Type BackendType `json:"type"`
BaseURL string `json:"baseUrl"`
Available bool `json:"available"`
Version string `json:"version,omitempty"`
Error string `json:"error,omitempty"`
}
// Discover probes the given endpoints to find available backends
func (r *Registry) Discover(ctx context.Context, endpoints []DiscoveryEndpoint) []DiscoveryResult {
results := make([]DiscoveryResult, len(endpoints))
var wg sync.WaitGroup
for i, endpoint := range endpoints {
wg.Add(1)
go func(idx int, ep DiscoveryEndpoint) {
defer wg.Done()
results[idx] = probeEndpoint(ctx, ep)
}(i, endpoint)
}
wg.Wait()
return results
}
// probeEndpoint checks if a backend is available at the given endpoint
func probeEndpoint(ctx context.Context, endpoint DiscoveryEndpoint) DiscoveryResult {
result := DiscoveryResult{
Type: endpoint.Type,
BaseURL: endpoint.BaseURL,
}
client := &http.Client{
Timeout: 3 * time.Second,
}
// Determine probe path based on backend type
var probePath string
switch endpoint.Type {
case BackendTypeOllama:
probePath = "/api/version"
case BackendTypeLlamaCpp, BackendTypeLMStudio:
probePath = "/v1/models"
default:
probePath = "/health"
}
req, err := http.NewRequestWithContext(ctx, "GET", endpoint.BaseURL+probePath, nil)
if err != nil {
result.Error = err.Error()
return result
}
resp, err := client.Do(req)
if err != nil {
result.Error = err.Error()
return result
}
defer resp.Body.Close()
if resp.StatusCode == http.StatusOK {
result.Available = true
} else {
result.Error = fmt.Sprintf("HTTP %d", resp.StatusCode)
}
return result
}
// DefaultDiscoveryEndpoints returns the default endpoints to probe
func DefaultDiscoveryEndpoints() []DiscoveryEndpoint {
return []DiscoveryEndpoint{
{Type: BackendTypeOllama, BaseURL: "http://localhost:11434"},
{Type: BackendTypeLlamaCpp, BaseURL: "http://localhost:8081"},
{Type: BackendTypeLlamaCpp, BaseURL: "http://localhost:8080"},
{Type: BackendTypeLMStudio, BaseURL: "http://localhost:1234"},
}
}
// DiscoverAndRegister probes endpoints and registers available backends
func (r *Registry) DiscoverAndRegister(ctx context.Context, endpoints []DiscoveryEndpoint, adapterFactory AdapterFactory) []DiscoveryResult {
results := r.Discover(ctx, endpoints)
for _, result := range results {
if !result.Available {
continue
}
// Skip if already registered
if _, exists := r.Get(result.Type); exists {
continue
}
config := BackendConfig{
Type: result.Type,
BaseURL: result.BaseURL,
Enabled: true,
}
adapter, err := adapterFactory(config)
if err != nil {
continue
}
r.Register(adapter)
}
return results
}
// AdapterFactory creates an LLMBackend from a config
type AdapterFactory func(config BackendConfig) (LLMBackend, error)

View File

@@ -0,0 +1,352 @@
package backends
import (
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"testing"
"time"
)
func TestNewRegistry(t *testing.T) {
registry := NewRegistry()
if registry == nil {
t.Fatal("NewRegistry() returned nil")
}
if len(registry.Backends()) != 0 {
t.Errorf("New registry should have no backends, got %d", len(registry.Backends()))
}
if registry.Active() != nil {
t.Error("New registry should have no active backend")
}
}
func TestRegistry_Register(t *testing.T) {
registry := NewRegistry()
// Create a mock backend
mock := &mockBackend{
backendType: BackendTypeOllama,
config: BackendConfig{
Type: BackendTypeOllama,
BaseURL: "http://localhost:11434",
},
}
err := registry.Register(mock)
if err != nil {
t.Fatalf("Register() error = %v", err)
}
if len(registry.Backends()) != 1 {
t.Errorf("Registry should have 1 backend, got %d", len(registry.Backends()))
}
// Should not allow duplicate registration
err = registry.Register(mock)
if err == nil {
t.Error("Register() should fail for duplicate backend type")
}
}
func TestRegistry_Get(t *testing.T) {
registry := NewRegistry()
mock := &mockBackend{
backendType: BackendTypeOllama,
config: BackendConfig{
Type: BackendTypeOllama,
BaseURL: "http://localhost:11434",
},
}
registry.Register(mock)
t.Run("existing backend", func(t *testing.T) {
backend, ok := registry.Get(BackendTypeOllama)
if !ok {
t.Error("Get() should return ok=true for registered backend")
}
if backend != mock {
t.Error("Get() returned wrong backend")
}
})
t.Run("non-existing backend", func(t *testing.T) {
_, ok := registry.Get(BackendTypeLlamaCpp)
if ok {
t.Error("Get() should return ok=false for unregistered backend")
}
})
}
func TestRegistry_SetActive(t *testing.T) {
registry := NewRegistry()
mock := &mockBackend{
backendType: BackendTypeOllama,
config: BackendConfig{
Type: BackendTypeOllama,
BaseURL: "http://localhost:11434",
},
}
registry.Register(mock)
t.Run("set registered backend as active", func(t *testing.T) {
err := registry.SetActive(BackendTypeOllama)
if err != nil {
t.Errorf("SetActive() error = %v", err)
}
active := registry.Active()
if active == nil {
t.Fatal("Active() returned nil after SetActive()")
}
if active.Type() != BackendTypeOllama {
t.Errorf("Active().Type() = %v, want %v", active.Type(), BackendTypeOllama)
}
})
t.Run("set unregistered backend as active", func(t *testing.T) {
err := registry.SetActive(BackendTypeLlamaCpp)
if err == nil {
t.Error("SetActive() should fail for unregistered backend")
}
})
}
func TestRegistry_ActiveType(t *testing.T) {
registry := NewRegistry()
t.Run("no active backend", func(t *testing.T) {
activeType := registry.ActiveType()
if activeType != "" {
t.Errorf("ActiveType() = %q, want empty string", activeType)
}
})
t.Run("with active backend", func(t *testing.T) {
mock := &mockBackend{backendType: BackendTypeOllama}
registry.Register(mock)
registry.SetActive(BackendTypeOllama)
activeType := registry.ActiveType()
if activeType != BackendTypeOllama {
t.Errorf("ActiveType() = %v, want %v", activeType, BackendTypeOllama)
}
})
}
func TestRegistry_Unregister(t *testing.T) {
registry := NewRegistry()
mock := &mockBackend{backendType: BackendTypeOllama}
registry.Register(mock)
registry.SetActive(BackendTypeOllama)
err := registry.Unregister(BackendTypeOllama)
if err != nil {
t.Errorf("Unregister() error = %v", err)
}
if len(registry.Backends()) != 0 {
t.Error("Registry should have no backends after unregister")
}
if registry.Active() != nil {
t.Error("Active backend should be nil after unregistering it")
}
}
func TestRegistry_AllInfo(t *testing.T) {
registry := NewRegistry()
mock1 := &mockBackend{
backendType: BackendTypeOllama,
config: BackendConfig{Type: BackendTypeOllama, BaseURL: "http://localhost:11434"},
info: BackendInfo{
Type: BackendTypeOllama,
Status: BackendStatusConnected,
Version: "0.1.0",
},
}
mock2 := &mockBackend{
backendType: BackendTypeLlamaCpp,
config: BackendConfig{Type: BackendTypeLlamaCpp, BaseURL: "http://localhost:8081"},
info: BackendInfo{
Type: BackendTypeLlamaCpp,
Status: BackendStatusDisconnected,
},
}
registry.Register(mock1)
registry.Register(mock2)
registry.SetActive(BackendTypeOllama)
infos := registry.AllInfo(context.Background())
if len(infos) != 2 {
t.Errorf("AllInfo() returned %d infos, want 2", len(infos))
}
// Find the active one
var foundActive bool
for _, info := range infos {
if info.Type == BackendTypeOllama {
foundActive = true
}
}
if !foundActive {
t.Error("AllInfo() did not include ollama backend info")
}
}
func TestRegistry_Discover(t *testing.T) {
// Create test servers for each backend type
ollamaServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/api/version" || r.URL.Path == "/" {
json.NewEncoder(w).Encode(map[string]string{"version": "0.3.0"})
}
}))
defer ollamaServer.Close()
llamacppServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/v1/models" {
json.NewEncoder(w).Encode(map[string]interface{}{
"data": []map[string]string{{"id": "llama3.2:8b"}},
})
}
if r.URL.Path == "/health" {
json.NewEncoder(w).Encode(map[string]string{"status": "ok"})
}
}))
defer llamacppServer.Close()
registry := NewRegistry()
// Configure discovery endpoints
endpoints := []DiscoveryEndpoint{
{Type: BackendTypeOllama, BaseURL: ollamaServer.URL},
{Type: BackendTypeLlamaCpp, BaseURL: llamacppServer.URL},
{Type: BackendTypeLMStudio, BaseURL: "http://localhost:19999"}, // Not running
}
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
results := registry.Discover(ctx, endpoints)
if len(results) != 3 {
t.Errorf("Discover() returned %d results, want 3", len(results))
}
// Check Ollama was discovered
var ollamaResult *DiscoveryResult
for i := range results {
if results[i].Type == BackendTypeOllama {
ollamaResult = &results[i]
break
}
}
if ollamaResult == nil {
t.Fatal("Ollama not found in discovery results")
}
if !ollamaResult.Available {
t.Errorf("Ollama should be available, error: %s", ollamaResult.Error)
}
// Check LM Studio was not discovered
var lmstudioResult *DiscoveryResult
for i := range results {
if results[i].Type == BackendTypeLMStudio {
lmstudioResult = &results[i]
break
}
}
if lmstudioResult == nil {
t.Fatal("LM Studio not found in discovery results")
}
if lmstudioResult.Available {
t.Error("LM Studio should NOT be available")
}
}
func TestRegistry_DefaultEndpoints(t *testing.T) {
endpoints := DefaultDiscoveryEndpoints()
if len(endpoints) < 3 {
t.Errorf("DefaultDiscoveryEndpoints() returned %d endpoints, want at least 3", len(endpoints))
}
// Check that all expected types are present
types := make(map[BackendType]bool)
for _, e := range endpoints {
types[e.Type] = true
}
if !types[BackendTypeOllama] {
t.Error("DefaultDiscoveryEndpoints() missing Ollama")
}
if !types[BackendTypeLlamaCpp] {
t.Error("DefaultDiscoveryEndpoints() missing llama.cpp")
}
if !types[BackendTypeLMStudio] {
t.Error("DefaultDiscoveryEndpoints() missing LM Studio")
}
}
// mockBackend implements LLMBackend for testing
type mockBackend struct {
backendType BackendType
config BackendConfig
info BackendInfo
healthErr error
models []Model
}
func (m *mockBackend) Type() BackendType {
return m.backendType
}
func (m *mockBackend) Config() BackendConfig {
return m.config
}
func (m *mockBackend) HealthCheck(ctx context.Context) error {
return m.healthErr
}
func (m *mockBackend) ListModels(ctx context.Context) ([]Model, error) {
return m.models, nil
}
func (m *mockBackend) StreamChat(ctx context.Context, req *ChatRequest) (<-chan ChatChunk, error) {
ch := make(chan ChatChunk)
close(ch)
return ch, nil
}
func (m *mockBackend) Chat(ctx context.Context, req *ChatRequest) (*ChatChunk, error) {
return &ChatChunk{Done: true}, nil
}
func (m *mockBackend) Capabilities() BackendCapabilities {
return OllamaCapabilities()
}
func (m *mockBackend) Info(ctx context.Context) BackendInfo {
if m.info.Type != "" {
return m.info
}
return BackendInfo{
Type: m.backendType,
BaseURL: m.config.BaseURL,
Status: BackendStatusConnected,
Capabilities: m.Capabilities(),
}
}

View File

@@ -0,0 +1,245 @@
package backends
import (
"errors"
"fmt"
"net/url"
"strings"
)
// BackendType identifies the type of LLM backend
type BackendType string
const (
BackendTypeOllama BackendType = "ollama"
BackendTypeLlamaCpp BackendType = "llamacpp"
BackendTypeLMStudio BackendType = "lmstudio"
)
// String returns the string representation of the backend type
func (bt BackendType) String() string {
return string(bt)
}
// ParseBackendType parses a string into a BackendType
func ParseBackendType(s string) (BackendType, error) {
switch strings.ToLower(s) {
case "ollama":
return BackendTypeOllama, nil
case "llamacpp", "llama.cpp", "llama-cpp":
return BackendTypeLlamaCpp, nil
case "lmstudio", "lm-studio", "lm_studio":
return BackendTypeLMStudio, nil
default:
return "", fmt.Errorf("unknown backend type: %q", s)
}
}
// BackendCapabilities describes what features a backend supports
type BackendCapabilities struct {
CanListModels bool `json:"canListModels"`
CanPullModels bool `json:"canPullModels"`
CanDeleteModels bool `json:"canDeleteModels"`
CanCreateModels bool `json:"canCreateModels"`
CanStreamChat bool `json:"canStreamChat"`
CanEmbed bool `json:"canEmbed"`
}
// OllamaCapabilities returns the capabilities for Ollama backend
func OllamaCapabilities() BackendCapabilities {
return BackendCapabilities{
CanListModels: true,
CanPullModels: true,
CanDeleteModels: true,
CanCreateModels: true,
CanStreamChat: true,
CanEmbed: true,
}
}
// LlamaCppCapabilities returns the capabilities for llama.cpp backend
func LlamaCppCapabilities() BackendCapabilities {
return BackendCapabilities{
CanListModels: true,
CanPullModels: false,
CanDeleteModels: false,
CanCreateModels: false,
CanStreamChat: true,
CanEmbed: true,
}
}
// LMStudioCapabilities returns the capabilities for LM Studio backend
func LMStudioCapabilities() BackendCapabilities {
return BackendCapabilities{
CanListModels: true,
CanPullModels: false,
CanDeleteModels: false,
CanCreateModels: false,
CanStreamChat: true,
CanEmbed: true,
}
}
// BackendStatus represents the connection status of a backend
type BackendStatus string
const (
BackendStatusConnected BackendStatus = "connected"
BackendStatusDisconnected BackendStatus = "disconnected"
BackendStatusUnknown BackendStatus = "unknown"
)
// BackendConfig holds configuration for a backend
type BackendConfig struct {
Type BackendType `json:"type"`
BaseURL string `json:"baseUrl"`
Enabled bool `json:"enabled"`
}
// Validate checks if the backend config is valid
func (c BackendConfig) Validate() error {
if c.BaseURL == "" {
return errors.New("base URL is required")
}
u, err := url.Parse(c.BaseURL)
if err != nil {
return fmt.Errorf("invalid base URL: %w", err)
}
if u.Scheme == "" || u.Host == "" {
return errors.New("invalid URL: missing scheme or host")
}
return nil
}
// BackendInfo describes a configured backend and its current state
type BackendInfo struct {
Type BackendType `json:"type"`
BaseURL string `json:"baseUrl"`
Status BackendStatus `json:"status"`
Capabilities BackendCapabilities `json:"capabilities"`
Version string `json:"version,omitempty"`
Error string `json:"error,omitempty"`
}
// IsConnected returns true if the backend is connected
func (bi BackendInfo) IsConnected() bool {
return bi.Status == BackendStatusConnected
}
// Model represents an LLM model available from a backend
type Model struct {
ID string `json:"id"`
Name string `json:"name"`
Size int64 `json:"size,omitempty"`
ModifiedAt string `json:"modifiedAt,omitempty"`
Family string `json:"family,omitempty"`
QuantLevel string `json:"quantLevel,omitempty"`
Capabilities []string `json:"capabilities,omitempty"`
Metadata map[string]string `json:"metadata,omitempty"`
}
// HasCapability checks if the model has a specific capability
func (m Model) HasCapability(cap string) bool {
for _, c := range m.Capabilities {
if c == cap {
return true
}
}
return false
}
// ChatMessage represents a message in a chat conversation
type ChatMessage struct {
Role string `json:"role"`
Content string `json:"content"`
Images []string `json:"images,omitempty"`
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
ToolCallID string `json:"tool_call_id,omitempty"`
Name string `json:"name,omitempty"`
}
var validRoles = map[string]bool{
"user": true,
"assistant": true,
"system": true,
"tool": true,
}
// Validate checks if the chat message is valid
func (m ChatMessage) Validate() error {
if m.Role == "" {
return errors.New("role is required")
}
if !validRoles[m.Role] {
return fmt.Errorf("invalid role: %q", m.Role)
}
return nil
}
// ToolCall represents a tool invocation
type ToolCall struct {
ID string `json:"id"`
Type string `json:"type"`
Function struct {
Name string `json:"name"`
Arguments string `json:"arguments"`
} `json:"function"`
}
// Tool represents a tool definition
type Tool struct {
Type string `json:"type"`
Function struct {
Name string `json:"name"`
Description string `json:"description"`
Parameters map[string]interface{} `json:"parameters"`
} `json:"function"`
}
// ChatRequest represents a chat completion request
type ChatRequest struct {
Model string `json:"model"`
Messages []ChatMessage `json:"messages"`
Stream *bool `json:"stream,omitempty"`
Temperature *float64 `json:"temperature,omitempty"`
TopP *float64 `json:"top_p,omitempty"`
MaxTokens *int `json:"max_tokens,omitempty"`
Tools []Tool `json:"tools,omitempty"`
Options map[string]any `json:"options,omitempty"`
}
// Validate checks if the chat request is valid
func (r ChatRequest) Validate() error {
if r.Model == "" {
return errors.New("model is required")
}
if len(r.Messages) == 0 {
return errors.New("at least one message is required")
}
for i, msg := range r.Messages {
if err := msg.Validate(); err != nil {
return fmt.Errorf("message %d: %w", i, err)
}
}
return nil
}
// ChatChunk represents a streaming chat response chunk
type ChatChunk struct {
Model string `json:"model"`
CreatedAt string `json:"created_at,omitempty"`
Message *ChatMessage `json:"message,omitempty"`
Done bool `json:"done"`
DoneReason string `json:"done_reason,omitempty"`
// Token counts (final chunk only)
PromptEvalCount int `json:"prompt_eval_count,omitempty"`
EvalCount int `json:"eval_count,omitempty"`
// Error information
Error string `json:"error,omitempty"`
}

View File

@@ -0,0 +1,323 @@
package backends
import (
"testing"
)
func TestBackendType_String(t *testing.T) {
tests := []struct {
name string
bt BackendType
expected string
}{
{"ollama type", BackendTypeOllama, "ollama"},
{"llamacpp type", BackendTypeLlamaCpp, "llamacpp"},
{"lmstudio type", BackendTypeLMStudio, "lmstudio"},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := tt.bt.String(); got != tt.expected {
t.Errorf("BackendType.String() = %v, want %v", got, tt.expected)
}
})
}
}
func TestParseBackendType(t *testing.T) {
tests := []struct {
name string
input string
expected BackendType
expectErr bool
}{
{"parse ollama", "ollama", BackendTypeOllama, false},
{"parse llamacpp", "llamacpp", BackendTypeLlamaCpp, false},
{"parse lmstudio", "lmstudio", BackendTypeLMStudio, false},
{"parse llama.cpp alias", "llama.cpp", BackendTypeLlamaCpp, false},
{"parse llama-cpp alias", "llama-cpp", BackendTypeLlamaCpp, false},
{"parse unknown", "unknown", "", true},
{"parse empty", "", "", true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := ParseBackendType(tt.input)
if (err != nil) != tt.expectErr {
t.Errorf("ParseBackendType() error = %v, expectErr %v", err, tt.expectErr)
return
}
if got != tt.expected {
t.Errorf("ParseBackendType() = %v, want %v", got, tt.expected)
}
})
}
}
func TestBackendCapabilities(t *testing.T) {
t.Run("ollama capabilities", func(t *testing.T) {
caps := OllamaCapabilities()
if !caps.CanListModels {
t.Error("Ollama should be able to list models")
}
if !caps.CanPullModels {
t.Error("Ollama should be able to pull models")
}
if !caps.CanDeleteModels {
t.Error("Ollama should be able to delete models")
}
if !caps.CanCreateModels {
t.Error("Ollama should be able to create models")
}
if !caps.CanStreamChat {
t.Error("Ollama should be able to stream chat")
}
if !caps.CanEmbed {
t.Error("Ollama should be able to embed")
}
})
t.Run("llamacpp capabilities", func(t *testing.T) {
caps := LlamaCppCapabilities()
if !caps.CanListModels {
t.Error("llama.cpp should be able to list models")
}
if caps.CanPullModels {
t.Error("llama.cpp should NOT be able to pull models")
}
if caps.CanDeleteModels {
t.Error("llama.cpp should NOT be able to delete models")
}
if caps.CanCreateModels {
t.Error("llama.cpp should NOT be able to create models")
}
if !caps.CanStreamChat {
t.Error("llama.cpp should be able to stream chat")
}
if !caps.CanEmbed {
t.Error("llama.cpp should be able to embed")
}
})
t.Run("lmstudio capabilities", func(t *testing.T) {
caps := LMStudioCapabilities()
if !caps.CanListModels {
t.Error("LM Studio should be able to list models")
}
if caps.CanPullModels {
t.Error("LM Studio should NOT be able to pull models")
}
if caps.CanDeleteModels {
t.Error("LM Studio should NOT be able to delete models")
}
if caps.CanCreateModels {
t.Error("LM Studio should NOT be able to create models")
}
if !caps.CanStreamChat {
t.Error("LM Studio should be able to stream chat")
}
if !caps.CanEmbed {
t.Error("LM Studio should be able to embed")
}
})
}
func TestBackendConfig_Validate(t *testing.T) {
tests := []struct {
name string
config BackendConfig
expectErr bool
}{
{
name: "valid ollama config",
config: BackendConfig{
Type: BackendTypeOllama,
BaseURL: "http://localhost:11434",
},
expectErr: false,
},
{
name: "valid llamacpp config",
config: BackendConfig{
Type: BackendTypeLlamaCpp,
BaseURL: "http://localhost:8081",
},
expectErr: false,
},
{
name: "empty base URL",
config: BackendConfig{
Type: BackendTypeOllama,
BaseURL: "",
},
expectErr: true,
},
{
name: "invalid URL",
config: BackendConfig{
Type: BackendTypeOllama,
BaseURL: "not-a-url",
},
expectErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
err := tt.config.Validate()
if (err != nil) != tt.expectErr {
t.Errorf("BackendConfig.Validate() error = %v, expectErr %v", err, tt.expectErr)
}
})
}
}
func TestModel_HasCapability(t *testing.T) {
model := Model{
ID: "llama3.2:8b",
Name: "llama3.2:8b",
Capabilities: []string{"chat", "vision", "tools"},
}
tests := []struct {
name string
capability string
expected bool
}{
{"has chat", "chat", true},
{"has vision", "vision", true},
{"has tools", "tools", true},
{"no thinking", "thinking", false},
{"no code", "code", false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := model.HasCapability(tt.capability); got != tt.expected {
t.Errorf("Model.HasCapability(%q) = %v, want %v", tt.capability, got, tt.expected)
}
})
}
}
func TestChatMessage_Validation(t *testing.T) {
tests := []struct {
name string
msg ChatMessage
expectErr bool
}{
{
name: "valid user message",
msg: ChatMessage{Role: "user", Content: "Hello"},
expectErr: false,
},
{
name: "valid assistant message",
msg: ChatMessage{Role: "assistant", Content: "Hi there"},
expectErr: false,
},
{
name: "valid system message",
msg: ChatMessage{Role: "system", Content: "You are helpful"},
expectErr: false,
},
{
name: "invalid role",
msg: ChatMessage{Role: "invalid", Content: "Hello"},
expectErr: true,
},
{
name: "empty role",
msg: ChatMessage{Role: "", Content: "Hello"},
expectErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
err := tt.msg.Validate()
if (err != nil) != tt.expectErr {
t.Errorf("ChatMessage.Validate() error = %v, expectErr %v", err, tt.expectErr)
}
})
}
}
func TestChatRequest_Validation(t *testing.T) {
streaming := true
tests := []struct {
name string
req ChatRequest
expectErr bool
}{
{
name: "valid request",
req: ChatRequest{
Model: "llama3.2:8b",
Messages: []ChatMessage{
{Role: "user", Content: "Hello"},
},
Stream: &streaming,
},
expectErr: false,
},
{
name: "empty model",
req: ChatRequest{
Model: "",
Messages: []ChatMessage{
{Role: "user", Content: "Hello"},
},
},
expectErr: true,
},
{
name: "empty messages",
req: ChatRequest{
Model: "llama3.2:8b",
Messages: []ChatMessage{},
},
expectErr: true,
},
{
name: "nil messages",
req: ChatRequest{
Model: "llama3.2:8b",
Messages: nil,
},
expectErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
err := tt.req.Validate()
if (err != nil) != tt.expectErr {
t.Errorf("ChatRequest.Validate() error = %v, expectErr %v", err, tt.expectErr)
}
})
}
}
func TestBackendInfo(t *testing.T) {
info := BackendInfo{
Type: BackendTypeOllama,
BaseURL: "http://localhost:11434",
Status: BackendStatusConnected,
Capabilities: OllamaCapabilities(),
Version: "0.1.0",
}
if !info.IsConnected() {
t.Error("BackendInfo.IsConnected() should be true when status is connected")
}
info.Status = BackendStatusDisconnected
if info.IsConnected() {
t.Error("BackendInfo.IsConnected() should be false when status is disconnected")
}
}

View File

@@ -2,7 +2,6 @@
/**
* BranchNavigator - Navigate between message branches
* Shows "< 1/3 >" style navigation for sibling messages
* Supports keyboard navigation with arrow keys when focused
*/
import type { BranchInfo } from '$lib/types';
@@ -15,7 +14,7 @@
const { branchInfo, onSwitch }: Props = $props();
// Reference to the navigator container for focus management
let navigatorRef: HTMLDivElement | null = $state(null);
let navigatorRef: HTMLElement | null = $state(null);
// Track transition state for smooth animations
let isTransitioning = $state(false);
@@ -52,7 +51,7 @@
}
/**
* Handle keyboard navigation when the component is focused
* Handle keyboard navigation with arrow keys
*/
function handleKeydown(event: KeyboardEvent): void {
if (event.key === 'ArrowLeft' && canGoPrev) {
@@ -65,11 +64,10 @@
}
</script>
<div
<nav
bind:this={navigatorRef}
class="inline-flex items-center gap-1 rounded-full bg-gray-100 px-2 py-0.5 text-xs text-gray-600 transition-all duration-150 ease-out dark:bg-gray-700 dark:text-gray-300"
class:opacity-50={isTransitioning}
role="navigation"
aria-label="Message branch navigation - Use left/right arrow keys to navigate"
tabindex="0"
onkeydown={handleKeydown}
@@ -126,16 +124,16 @@
/>
</svg>
</button>
</div>
</nav>
<style>
/* Focus ring style for keyboard navigation */
div:focus {
nav:focus {
outline: none;
box-shadow: 0 0 0 2px rgba(59, 130, 246, 0.5);
}
div:focus-visible {
nav:focus-visible {
outline: none;
box-shadow: 0 0 0 2px rgba(59, 130, 246, 0.5);
}

View File

@@ -5,10 +5,12 @@
*/
import { chatState, modelsState, conversationsState, toolsState, promptsState, toastState, agentsState } from '$lib/stores';
import { backendsState } from '$lib/stores/backends.svelte';
import { resolveSystemPrompt } from '$lib/services/prompt-resolution.js';
import { serverConversationsState } from '$lib/stores/server-conversations.svelte';
import { streamingMetricsState } from '$lib/stores/streaming-metrics.svelte';
import { ollamaClient } from '$lib/ollama';
import { unifiedLLMClient, type ChatMessage as UnifiedChatMessage } from '$lib/llm';
import { addMessage as addStoredMessage, updateConversation, createConversation as createStoredConversation, saveAttachments } from '$lib/storage';
import type { FileAttachment } from '$lib/types/attachment.js';
import { fileAnalyzer, analyzeFilesInBatches, formatAnalyzedAttachment, type AnalysisResult } from '$lib/services/fileAnalyzer.js';
@@ -530,11 +532,33 @@
await sendMessageInternal(content, images, attachments);
}
/**
* Get current model name based on active backend
*/
async function getCurrentModelName(): Promise<string | null> {
if (backendsState.activeType === 'ollama') {
return modelsState.selectedId;
} else if (backendsState.activeType === 'llamacpp' || backendsState.activeType === 'lmstudio') {
try {
const response = await fetch('/api/v1/ai/models');
if (response.ok) {
const data = await response.json();
if (data.models && data.models.length > 0) {
return data.models[0].name;
}
}
} catch (err) {
console.error('Failed to get model from backend:', err);
}
}
return null;
}
/**
* Internal: Send message and stream response (bypasses context check)
*/
async function sendMessageInternal(content: string, images?: string[], attachments?: FileAttachment[]): Promise<void> {
const selectedModel = modelsState.selectedId;
const selectedModel = await getCurrentModelName();
if (!selectedModel) return;
// In 'new' mode with no messages yet, create conversation first
@@ -807,7 +831,91 @@
let streamingThinking = '';
let thinkingClosed = false;
await ollamaClient.streamChatWithCallbacks(
// Common completion handler for both clients
const handleStreamComplete = async () => {
// Close thinking block if it was opened but not closed (e.g., tool calls without content)
if (streamingThinking && !thinkingClosed) {
chatState.appendToStreaming('</think>\n\n');
thinkingClosed = true;
}
chatState.finishStreaming();
streamingMetricsState.endStream();
abortController = null;
// Handle native tool calls if received (Ollama only)
if (pendingToolCalls && pendingToolCalls.length > 0) {
await executeToolsAndContinue(
model,
assistantMessageId,
pendingToolCalls,
conversationId
);
return; // Tool continuation handles persistence
}
// Check for text-based tool calls (models without native tool calling)
const node = chatState.messageTree.get(assistantMessageId);
if (node && toolsState.toolsEnabled) {
const { toolCalls: textToolCalls, cleanContent } = parseTextToolCalls(node.message.content);
if (textToolCalls.length > 0) {
// Convert to OllamaToolCall format
const convertedCalls: OllamaToolCall[] = textToolCalls.map(tc => ({
function: {
name: tc.name,
arguments: tc.arguments
}
}));
// Update message content to remove the raw tool call text
if (cleanContent !== node.message.content) {
node.message.content = cleanContent || 'Using tool...';
}
await executeToolsAndContinue(
model,
assistantMessageId,
convertedCalls,
conversationId
);
return; // Tool continuation handles persistence
}
}
// Persist assistant message to IndexedDB with the SAME ID as chatState
if (conversationId) {
const nodeForPersist = chatState.messageTree.get(assistantMessageId);
if (nodeForPersist) {
await addStoredMessage(
conversationId,
{ role: 'assistant', content: nodeForPersist.message.content },
parentMessageId,
assistantMessageId
);
await updateConversation(conversationId, {});
conversationsState.update(conversationId, {});
}
}
// Check for auto-compact after response completes
await handleAutoCompact();
};
// Common error handler for both clients
const handleStreamError = (error: unknown) => {
console.error('Streaming error:', error);
// Show error to user instead of leaving "Processing..."
const errorMsg = error instanceof Error ? error.message : 'Unknown error';
chatState.setStreamContent(`⚠️ Error: ${errorMsg}`);
chatState.finishStreaming();
streamingMetricsState.endStream();
abortController = null;
};
// Use appropriate client based on active backend
if (backendsState.activeType === 'ollama') {
// Ollama - full feature support (thinking, native tool calls)
await ollamaClient.streamChatWithCallbacks(
{
model: chatModel,
messages,
@@ -851,86 +959,42 @@
// Store tool calls to process after streaming completes
pendingToolCalls = toolCalls;
},
onComplete: async () => {
// Close thinking block if it was opened but not closed (e.g., tool calls without content)
if (streamingThinking && !thinkingClosed) {
chatState.appendToStreaming('</think>\n\n');
thinkingClosed = true;
}
chatState.finishStreaming();
streamingMetricsState.endStream();
abortController = null;
// Handle native tool calls if received
if (pendingToolCalls && pendingToolCalls.length > 0) {
await executeToolsAndContinue(
model,
assistantMessageId,
pendingToolCalls,
conversationId
);
return; // Tool continuation handles persistence
}
// Check for text-based tool calls (models without native tool calling)
const node = chatState.messageTree.get(assistantMessageId);
if (node && toolsState.toolsEnabled) {
const { toolCalls: textToolCalls, cleanContent } = parseTextToolCalls(node.message.content);
if (textToolCalls.length > 0) {
// Convert to OllamaToolCall format
const convertedCalls: OllamaToolCall[] = textToolCalls.map(tc => ({
function: {
name: tc.name,
arguments: tc.arguments
}
}));
// Update message content to remove the raw tool call text
if (cleanContent !== node.message.content) {
node.message.content = cleanContent || 'Using tool...';
}
await executeToolsAndContinue(
model,
assistantMessageId,
convertedCalls,
conversationId
);
return; // Tool continuation handles persistence
}
}
// Persist assistant message to IndexedDB with the SAME ID as chatState
if (conversationId) {
const nodeForPersist = chatState.messageTree.get(assistantMessageId);
if (nodeForPersist) {
await addStoredMessage(
conversationId,
{ role: 'assistant', content: nodeForPersist.message.content },
parentMessageId,
assistantMessageId
);
await updateConversation(conversationId, {});
conversationsState.update(conversationId, {});
}
}
// Check for auto-compact after response completes
await handleAutoCompact();
},
onError: (error) => {
console.error('Streaming error:', error);
// Show error to user instead of leaving "Processing..."
const errorMsg = error instanceof Error ? error.message : 'Unknown error';
chatState.setStreamContent(`⚠️ Error: ${errorMsg}`);
chatState.finishStreaming();
streamingMetricsState.endStream();
abortController = null;
}
onComplete: handleStreamComplete,
onError: handleStreamError
},
abortController.signal
);
} else {
// llama.cpp / LM Studio - basic streaming via unified API
const unifiedMessages: UnifiedChatMessage[] = messages.map(m => ({
role: m.role as 'system' | 'user' | 'assistant' | 'tool',
content: m.content,
images: m.images
}));
await unifiedLLMClient.streamChatWithCallbacks(
{
model: chatModel,
messages: unifiedMessages,
options: settingsState.apiParameters
},
{
onToken: (token) => {
// Clear "Processing..." on first token
if (needsClearOnFirstToken) {
chatState.setStreamContent('');
needsClearOnFirstToken = false;
}
chatState.appendToStreaming(token);
// Track content tokens for metrics
streamingMetricsState.incrementTokens();
},
onComplete: handleStreamComplete,
onError: handleStreamError
},
abortController.signal
);
}
} catch (error) {
console.error('Failed to send message:', error);
// Show error to user
@@ -1346,6 +1410,7 @@
type="button"
role="switch"
aria-checked={thinkingEnabled}
aria-label="Toggle thinking mode"
onclick={() => (thinkingEnabled = !thinkingEnabled)}
class="relative inline-flex h-5 w-9 flex-shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors duration-200 ease-in-out focus:outline-none focus:ring-2 focus:ring-amber-500 focus:ring-offset-2 focus:ring-offset-theme-primary {thinkingEnabled ? 'bg-amber-600' : 'bg-theme-tertiary'}"
>

View File

@@ -13,12 +13,25 @@
height?: number;
}
const { html, title = 'Preview', height = 300 }: Props = $props();
const props: Props = $props();
// Derive values from props
const html = $derived(props.html);
const title = $derived(props.title ?? 'Preview');
const height = $derived(props.height ?? 300);
// State
let iframeRef: HTMLIFrameElement | null = $state(null);
let isExpanded = $state(false);
let actualHeight = $state(height);
// actualHeight tracks the current display height, synced from prop when not expanded
let actualHeight = $state(props.height ?? 300);
// Sync actualHeight when height prop changes (only when not expanded)
$effect(() => {
if (!isExpanded) {
actualHeight = height;
}
});
// Generate a complete HTML document if the code is just a fragment
const fullHtml = $derived.by(() => {

View File

@@ -14,9 +14,15 @@
inProgress?: boolean;
}
const { content, defaultExpanded = false, inProgress = false }: Props = $props();
const props: Props = $props();
let isExpanded = $state(defaultExpanded);
// Initialize isExpanded from defaultExpanded prop
// This intentionally captures the initial value only - user controls expansion independently
let isExpanded = $state(props.defaultExpanded ?? false);
// Derived values from props for reactivity
const content = $derived(props.content);
const inProgress = $derived(props.inProgress ?? false);
// Keep collapsed during and after streaming - user can expand manually if desired

View File

@@ -109,9 +109,11 @@
<div
class="fixed inset-0 z-50 flex items-center justify-center bg-black/60 backdrop-blur-sm p-4"
onclick={handleBackdropClick}
onkeydown={handleKeydown}
role="dialog"
aria-modal="true"
aria-labelledby="model-editor-title"
tabindex="-1"
>
<!-- Dialog -->
<div class="w-full max-w-lg rounded-xl bg-theme-secondary shadow-xl">

View File

@@ -40,9 +40,11 @@
<div
class="fixed inset-0 z-50 flex items-center justify-center bg-black/60 backdrop-blur-sm"
onclick={handleBackdropClick}
onkeydown={handleKeydown}
role="dialog"
aria-modal="true"
aria-labelledby="pull-dialog-title"
tabindex="-1"
>
<!-- Dialog -->
<div class="w-full max-w-md rounded-xl bg-theme-secondary p-6 shadow-xl">

View File

@@ -71,9 +71,11 @@
<div
class="fixed inset-0 z-50 flex items-center justify-center bg-black/60 backdrop-blur-sm"
onclick={handleBackdropClick}
onkeydown={handleKeydown}
role="dialog"
aria-modal="true"
aria-labelledby="move-dialog-title"
tabindex="-1"
>
<!-- Dialog -->
<div class="mx-4 w-full max-w-sm rounded-xl border border-theme bg-theme-primary shadow-2xl">

View File

@@ -210,9 +210,11 @@
<div
class="fixed inset-0 z-50 flex items-center justify-center bg-black/60 backdrop-blur-sm"
onclick={handleBackdropClick}
onkeydown={handleKeydown}
role="dialog"
aria-modal="true"
aria-labelledby="project-dialog-title"
tabindex="-1"
>
<!-- Dialog -->
<div class="mx-4 w-full max-w-lg rounded-xl border border-theme bg-theme-primary shadow-2xl">
@@ -313,9 +315,9 @@
<!-- Color -->
<div>
<label class="mb-1.5 block text-sm font-medium text-theme-secondary">
<span class="mb-1.5 block text-sm font-medium text-theme-secondary">
Color
</label>
</span>
<div class="flex items-center gap-2">
{#each presetColors as presetColor}
<button

View File

@@ -0,0 +1,74 @@
<script lang="ts">
/**
* AIProvidersTab - Combined Backends and Models management
* Sub-tabs for backend configuration and model management
* Models sub-tab only available when Ollama is active
*/
import { backendsState } from '$lib/stores/backends.svelte';
import BackendsPanel from './BackendsPanel.svelte';
import ModelsTab from './ModelsTab.svelte';
type SubTab = 'backends' | 'models';
let activeSubTab = $state<SubTab>('backends');
// Models tab only available for Ollama
const isOllamaActive = $derived(backendsState.activeType === 'ollama');
// If Models tab is active but Ollama is no longer active, switch to Backends
$effect(() => {
if (activeSubTab === 'models' && !isOllamaActive) {
activeSubTab = 'backends';
}
});
</script>
<div class="space-y-6">
<!-- Sub-tab Navigation -->
<div class="flex gap-1 border-b border-theme">
<button
type="button"
onclick={() => (activeSubTab = 'backends')}
class="flex items-center gap-2 border-b-2 px-4 py-2 text-sm font-medium transition-colors {activeSubTab === 'backends'
? 'border-violet-500 text-violet-400'
: 'border-transparent text-theme-muted hover:border-theme hover:text-theme-primary'}"
>
<svg class="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
<path stroke-linecap="round" stroke-linejoin="round" d="M5 12h14M5 12a2 2 0 0 1-2-2V6a2 2 0 0 1 2-2h14a2 2 0 0 1 2 2v4a2 2 0 0 1-2 2M5 12a2 2 0 0 0-2 2v4a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2v-4a2 2 0 0 0-2-2m-2-4h.01M17 16h.01" />
</svg>
Backends
</button>
{#if isOllamaActive}
<button
type="button"
onclick={() => (activeSubTab = 'models')}
class="flex items-center gap-2 border-b-2 px-4 py-2 text-sm font-medium transition-colors {activeSubTab === 'models'
? 'border-violet-500 text-violet-400'
: 'border-transparent text-theme-muted hover:border-theme hover:text-theme-primary'}"
>
<svg class="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
<path stroke-linecap="round" stroke-linejoin="round" d="M8.25 3v1.5M4.5 8.25H3m18 0h-1.5M4.5 12H3m18 0h-1.5m-15 3.75H3m18 0h-1.5M8.25 19.5V21M12 3v1.5m0 15V21m3.75-18v1.5m0 15V21m-9-1.5h10.5a2.25 2.25 0 0 0 2.25-2.25V6.75a2.25 2.25 0 0 0-2.25-2.25H6.75A2.25 2.25 0 0 0 4.5 6.75v10.5a2.25 2.25 0 0 0 2.25 2.25Zm.75-12h9v9h-9v-9Z" />
</svg>
Models
</button>
{:else}
<span
class="flex cursor-not-allowed items-center gap-2 border-b-2 border-transparent px-4 py-2 text-sm font-medium text-theme-muted/50"
title="Models tab only available when Ollama is the active backend"
>
<svg class="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
<path stroke-linecap="round" stroke-linejoin="round" d="M8.25 3v1.5M4.5 8.25H3m18 0h-1.5M4.5 12H3m18 0h-1.5m-15 3.75H3m18 0h-1.5M8.25 19.5V21M12 3v1.5m0 15V21m3.75-18v1.5m0 15V21m-9-1.5h10.5a2.25 2.25 0 0 0 2.25-2.25V6.75a2.25 2.25 0 0 0-2.25-2.25H6.75A2.25 2.25 0 0 0 4.5 6.75v10.5a2.25 2.25 0 0 0 2.25 2.25Zm.75-12h9v9h-9v-9Z" />
</svg>
Models
<span class="text-xs">(Ollama only)</span>
</span>
{/if}
</div>
<!-- Sub-tab Content -->
{#if activeSubTab === 'backends'}
<BackendsPanel />
{:else if activeSubTab === 'models'}
<ModelsTab />
{/if}
</div>

View File

@@ -435,7 +435,7 @@
<!-- Tools Selection -->
<div class="mb-4">
<label class="mb-2 block text-sm font-medium text-theme-primary"> Allowed Tools </label>
<span class="mb-2 block text-sm font-medium text-theme-primary"> Allowed Tools </span>
<div class="max-h-48 overflow-y-auto rounded-lg border border-theme bg-theme-secondary p-2">
{#if availableTools.length === 0}
<p class="p-2 text-sm text-theme-muted">No tools available</p>

View File

@@ -0,0 +1,305 @@
<script lang="ts">
/**
* BackendsPanel - Multi-backend LLM management
* Configure and switch between Ollama, llama.cpp, and LM Studio
*/
import { onMount } from 'svelte';
import { backendsState, type BackendType, type BackendInfo, type DiscoveryResult } from '$lib/stores/backends.svelte';
let discovering = $state(false);
let discoveryResults = $state<DiscoveryResult[]>([]);
let showDiscoveryResults = $state(false);
async function handleDiscover(): Promise<void> {
discovering = true;
showDiscoveryResults = false;
try {
discoveryResults = await backendsState.discover();
showDiscoveryResults = true;
// Reload backends after discovery
await backendsState.load();
} finally {
discovering = false;
}
}
async function handleSetActive(type: BackendType): Promise<void> {
await backendsState.setActive(type);
}
function getBackendDisplayName(type: BackendType): string {
switch (type) {
case 'ollama':
return 'Ollama';
case 'llamacpp':
return 'llama.cpp';
case 'lmstudio':
return 'LM Studio';
default:
return type;
}
}
function getBackendDescription(type: BackendType): string {
switch (type) {
case 'ollama':
return 'Full model management - pull, delete, create custom models';
case 'llamacpp':
return 'OpenAI-compatible API - models loaded at server startup';
case 'lmstudio':
return 'OpenAI-compatible API - manage models via LM Studio app';
default:
return '';
}
}
function getDefaultPort(type: BackendType): string {
switch (type) {
case 'ollama':
return '11434';
case 'llamacpp':
return '8081';
case 'lmstudio':
return '1234';
default:
return '';
}
}
function getStatusColor(status: string): string {
switch (status) {
case 'connected':
return 'bg-green-500';
case 'disconnected':
return 'bg-red-500';
default:
return 'bg-yellow-500';
}
}
onMount(() => {
backendsState.load();
});
</script>
<div class="space-y-6">
<!-- Header -->
<div class="flex items-start justify-between gap-4">
<div>
<h2 class="text-xl font-bold text-theme-primary">AI Backends</h2>
<p class="mt-1 text-sm text-theme-muted">
Configure LLM backends: Ollama, llama.cpp server, or LM Studio
</p>
</div>
<button
type="button"
onclick={handleDiscover}
disabled={discovering}
class="flex items-center gap-2 rounded-lg bg-blue-600 px-4 py-2 text-sm font-medium text-white transition-colors hover:bg-blue-700 disabled:cursor-not-allowed disabled:opacity-50"
>
{#if discovering}
<svg class="h-4 w-4 animate-spin" viewBox="0 0 24 24">
<circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4" fill="none"></circle>
<path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path>
</svg>
<span>Discovering...</span>
{:else}
<svg xmlns="http://www.w3.org/2000/svg" class="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
<path stroke-linecap="round" stroke-linejoin="round" d="M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0z" />
</svg>
<span>Auto-Detect</span>
{/if}
</button>
</div>
<!-- Error Message -->
{#if backendsState.error}
<div class="rounded-lg border border-red-900/50 bg-red-900/20 p-4">
<div class="flex items-center gap-2 text-red-400">
<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
<path stroke-linecap="round" stroke-linejoin="round" d="M12 8v4m0 4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
</svg>
<span>{backendsState.error}</span>
<button type="button" onclick={() => backendsState.clearError()} class="ml-auto text-red-400 hover:text-red-300" aria-label="Dismiss error">
<svg xmlns="http://www.w3.org/2000/svg" class="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
<path stroke-linecap="round" stroke-linejoin="round" d="M6 18L18 6M6 6l12 12" />
</svg>
</button>
</div>
</div>
{/if}
<!-- Discovery Results -->
{#if showDiscoveryResults && discoveryResults.length > 0}
<div class="rounded-lg border border-theme bg-theme-secondary p-4">
<h3 class="mb-3 text-sm font-medium text-theme-secondary">Discovery Results</h3>
<div class="space-y-2">
{#each discoveryResults as result}
<div class="flex items-center justify-between rounded-lg bg-theme-tertiary/50 px-3 py-2">
<div class="flex items-center gap-3">
<span class="h-2 w-2 rounded-full {result.available ? 'bg-green-500' : 'bg-red-500'}"></span>
<span class="text-sm text-theme-primary">{getBackendDisplayName(result.type)}</span>
<span class="text-xs text-theme-muted">{result.baseUrl}</span>
</div>
<span class="text-xs {result.available ? 'text-green-400' : 'text-red-400'}">
{result.available ? 'Available' : result.error || 'Not found'}
</span>
</div>
{/each}
</div>
<button
type="button"
onclick={() => showDiscoveryResults = false}
class="mt-3 text-xs text-theme-muted hover:text-theme-primary"
>
Dismiss
</button>
</div>
{/if}
<!-- Active Backend Info -->
{#if backendsState.activeBackend}
<div class="rounded-lg border border-blue-900/50 bg-blue-900/20 p-4">
<div class="flex items-center gap-2 text-blue-400">
<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
<path stroke-linecap="round" stroke-linejoin="round" d="M9 12l2 2 4-4m6 2a9 9 0 11-18 0 9 9 0 0118 0z" />
</svg>
<span class="font-medium">Active: {getBackendDisplayName(backendsState.activeBackend.type)}</span>
{#if backendsState.activeBackend.version}
<span class="text-xs text-blue-300/70">v{backendsState.activeBackend.version}</span>
{/if}
</div>
<p class="mt-1 text-sm text-blue-300/70">{backendsState.activeBackend.baseUrl}</p>
<!-- Capabilities -->
<div class="mt-3 flex flex-wrap gap-2">
{#if backendsState.canPullModels}
<span class="rounded bg-green-900/30 px-2 py-1 text-xs text-green-400">Pull Models</span>
{/if}
{#if backendsState.canDeleteModels}
<span class="rounded bg-green-900/30 px-2 py-1 text-xs text-green-400">Delete Models</span>
{/if}
{#if backendsState.canCreateModels}
<span class="rounded bg-green-900/30 px-2 py-1 text-xs text-green-400">Create Custom</span>
{/if}
{#if backendsState.activeBackend.capabilities.canStreamChat}
<span class="rounded bg-blue-900/30 px-2 py-1 text-xs text-blue-400">Streaming</span>
{/if}
{#if backendsState.activeBackend.capabilities.canEmbed}
<span class="rounded bg-purple-900/30 px-2 py-1 text-xs text-purple-400">Embeddings</span>
{/if}
</div>
</div>
{:else if !backendsState.isLoading}
<div class="rounded-lg border border-amber-900/50 bg-amber-900/20 p-4">
<div class="flex items-center gap-2 text-amber-400">
<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
<path stroke-linecap="round" stroke-linejoin="round" d="M12 9v2m0 4h.01m-6.938 4h13.856c1.54 0 2.502-1.667 1.732-3L13.732 4c-.77-1.333-2.694-1.333-3.464 0L3.34 16c-.77 1.333.192 3 1.732 3z" />
</svg>
<span>No active backend configured. Click "Auto-Detect" to find available backends.</span>
</div>
</div>
{/if}
<!-- Backend Cards -->
<div class="space-y-4">
<h3 class="text-sm font-medium text-theme-secondary">Available Backends</h3>
{#if backendsState.isLoading}
<div class="space-y-3">
{#each Array(3) as _}
<div class="animate-pulse rounded-lg border border-theme bg-theme-secondary p-4">
<div class="flex items-center gap-4">
<div class="h-10 w-10 rounded-lg bg-theme-tertiary"></div>
<div class="flex-1">
<div class="h-5 w-32 rounded bg-theme-tertiary"></div>
<div class="mt-2 h-4 w-48 rounded bg-theme-tertiary"></div>
</div>
</div>
</div>
{/each}
</div>
{:else if backendsState.backends.length === 0}
<div class="rounded-lg border border-dashed border-theme bg-theme-secondary/50 p-8 text-center">
<svg xmlns="http://www.w3.org/2000/svg" class="mx-auto h-12 w-12 text-theme-muted" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="1.5">
<path stroke-linecap="round" stroke-linejoin="round" d="M5 12h14M5 12a2 2 0 01-2-2V6a2 2 0 012-2h14a2 2 0 012 2v4a2 2 0 01-2 2M5 12a2 2 0 00-2 2v4a2 2 0 002 2h14a2 2 0 002-2v-4a2 2 0 00-2-2m-2-4h.01M17 16h.01" />
</svg>
<h3 class="mt-4 text-sm font-medium text-theme-muted">No backends configured</h3>
<p class="mt-1 text-sm text-theme-muted">
Click "Auto-Detect" to scan for available LLM backends
</p>
</div>
{:else}
{#each backendsState.backends as backend}
{@const isActive = backendsState.activeType === backend.type}
<div class="rounded-lg border transition-colors {isActive ? 'border-blue-500 bg-blue-900/10' : 'border-theme bg-theme-secondary hover:border-theme-subtle'}">
<div class="p-4">
<div class="flex items-start justify-between">
<div class="flex items-center gap-4">
<!-- Backend Icon -->
<div class="flex h-12 w-12 items-center justify-center rounded-lg bg-theme-tertiary">
{#if backend.type === 'ollama'}
<svg xmlns="http://www.w3.org/2000/svg" class="h-6 w-6 text-theme-primary" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
<path stroke-linecap="round" stroke-linejoin="round" d="M5 12h14M5 12a2 2 0 01-2-2V6a2 2 0 012-2h14a2 2 0 012 2v4a2 2 0 01-2 2M5 12a2 2 0 00-2 2v4a2 2 0 002 2h14a2 2 0 002-2v-4a2 2 0 00-2-2" />
</svg>
{:else if backend.type === 'llamacpp'}
<svg xmlns="http://www.w3.org/2000/svg" class="h-6 w-6 text-theme-primary" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
<path stroke-linecap="round" stroke-linejoin="round" d="M10 20l4-16m4 4l4 4-4 4M6 16l-4-4 4-4" />
</svg>
{:else}
<svg xmlns="http://www.w3.org/2000/svg" class="h-6 w-6 text-theme-primary" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
<path stroke-linecap="round" stroke-linejoin="round" d="M9.75 17L9 20l-1 1h8l-1-1-.75-3M3 13h18M5 17h14a2 2 0 002-2V5a2 2 0 00-2-2H5a2 2 0 00-2 2v10a2 2 0 002 2z" />
</svg>
{/if}
</div>
<div>
<div class="flex items-center gap-2">
<h4 class="font-medium text-theme-primary">{getBackendDisplayName(backend.type)}</h4>
<span class="flex items-center gap-1.5 rounded-full px-2 py-0.5 text-xs {backend.status === 'connected' ? 'bg-green-900/30 text-green-400' : 'bg-red-900/30 text-red-400'}">
<span class="h-1.5 w-1.5 rounded-full {getStatusColor(backend.status)}"></span>
{backend.status}
</span>
{#if isActive}
<span class="rounded bg-blue-600 px-2 py-0.5 text-xs font-medium text-white">Active</span>
{/if}
</div>
<p class="mt-1 text-sm text-theme-muted">{getBackendDescription(backend.type)}</p>
<p class="mt-1 text-xs text-theme-muted/70">{backend.baseUrl}</p>
</div>
</div>
<div class="flex items-center gap-2">
{#if !isActive && backend.status === 'connected'}
<button
type="button"
onclick={() => handleSetActive(backend.type)}
class="rounded-lg bg-blue-600 px-3 py-1.5 text-sm font-medium text-white transition-colors hover:bg-blue-700"
>
Set Active
</button>
{/if}
</div>
</div>
{#if backend.error}
<div class="mt-3 rounded bg-red-900/20 px-3 py-2 text-xs text-red-400">
{backend.error}
</div>
{/if}
</div>
</div>
{/each}
{/if}
</div>
<!-- Help Section -->
<div class="rounded-lg border border-theme bg-theme-secondary/50 p-4">
<h3 class="text-sm font-medium text-theme-secondary">Quick Start</h3>
<div class="mt-2 space-y-2 text-sm text-theme-muted">
<p><strong>Ollama:</strong> Run <code class="rounded bg-theme-tertiary px-1.5 py-0.5 text-xs">ollama serve</code> (default port 11434)</p>
<p><strong>llama.cpp:</strong> Run <code class="rounded bg-theme-tertiary px-1.5 py-0.5 text-xs">llama-server -m model.gguf</code> (default port 8081)</p>
<p><strong>LM Studio:</strong> Start local server from the app (default port 1234)</p>
</div>
</div>
</div>

View File

@@ -41,6 +41,7 @@
class="relative inline-flex h-6 w-11 flex-shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors duration-200 ease-in-out focus:outline-none focus:ring-2 focus:ring-purple-500 focus:ring-offset-2 focus:ring-offset-theme {uiState.darkMode ? 'bg-purple-600' : 'bg-theme-tertiary'}"
role="switch"
aria-checked={uiState.darkMode}
aria-label="Toggle dark mode"
>
<span
class="pointer-events-none inline-block h-5 w-5 transform rounded-full bg-white shadow ring-0 transition duration-200 ease-in-out {uiState.darkMode ? 'translate-x-5' : 'translate-x-0'}"

View File

@@ -25,7 +25,7 @@
let dragOver = $state(false);
let deleteConfirm = $state<{ show: boolean; doc: StoredDocument | null }>({ show: false, doc: null });
let fileInput: HTMLInputElement;
let fileInput = $state<HTMLInputElement | null>(null);
onMount(async () => {
await refreshData();

View File

@@ -108,6 +108,7 @@
class="relative inline-flex h-6 w-11 flex-shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors duration-200 ease-in-out focus:outline-none focus:ring-2 focus:ring-emerald-500 focus:ring-offset-2 focus:ring-offset-theme {settingsState.autoCompactEnabled ? 'bg-emerald-600' : 'bg-theme-tertiary'}"
role="switch"
aria-checked={settingsState.autoCompactEnabled}
aria-label="Toggle auto-compact"
>
<span
class="pointer-events-none inline-block h-5 w-5 transform rounded-full bg-white shadow ring-0 transition duration-200 ease-in-out {settingsState.autoCompactEnabled ? 'translate-x-5' : 'translate-x-0'}"
@@ -192,6 +193,7 @@
class="relative inline-flex h-6 w-11 flex-shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors duration-200 ease-in-out focus:outline-none focus:ring-2 focus:ring-orange-500 focus:ring-offset-2 focus:ring-offset-theme {settingsState.useCustomParameters ? 'bg-orange-600' : 'bg-theme-tertiary'}"
role="switch"
aria-checked={settingsState.useCustomParameters}
aria-label="Toggle custom model parameters"
>
<span
class="pointer-events-none inline-block h-5 w-5 transform rounded-full bg-white shadow ring-0 transition duration-200 ease-in-out {settingsState.useCustomParameters ? 'translate-x-5' : 'translate-x-0'}"

View File

@@ -93,13 +93,12 @@
<!-- Enable custom parameters toggle -->
<div class="mb-4 flex items-center justify-between">
<label class="flex items-center gap-2 text-sm text-theme-secondary">
<span>Use custom parameters</span>
</label>
<span class="text-sm text-theme-secondary">Use custom parameters</span>
<button
type="button"
role="switch"
aria-checked={settingsState.useCustomParameters}
aria-label="Toggle custom model parameters"
onclick={() => settingsState.toggleCustomParameters(modelDefaults)}
class="relative inline-flex h-5 w-9 flex-shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors duration-200 ease-in-out focus:outline-none focus:ring-2 focus:ring-sky-500 focus:ring-offset-2 focus:ring-offset-theme-secondary {settingsState.useCustomParameters ? 'bg-sky-600' : 'bg-theme-tertiary'}"
>

View File

@@ -427,7 +427,7 @@
<path stroke-linecap="round" stroke-linejoin="round" d="M12 8v4m0 4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
</svg>
<span>{deleteError}</span>
<button type="button" onclick={() => deleteError = null} class="ml-auto text-red-400 hover:text-red-300">
<button type="button" onclick={() => deleteError = null} class="ml-auto text-red-400 hover:text-red-300" aria-label="Dismiss error">
<svg xmlns="http://www.w3.org/2000/svg" class="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
<path stroke-linecap="round" stroke-linejoin="round" d="M6 18L18 6M6 6l12 12" />
</svg>
@@ -833,13 +833,13 @@
{#if modelRegistry.totalPages > 1}
<div class="mt-6 flex items-center justify-center gap-2">
<button type="button" onclick={() => modelRegistry.prevPage()} disabled={!modelRegistry.hasPrevPage} class="rounded-lg border border-theme bg-theme-secondary px-3 py-2 text-sm text-theme-muted transition-colors hover:bg-theme-tertiary hover:text-theme-primary disabled:cursor-not-allowed disabled:opacity-50">
<button type="button" onclick={() => modelRegistry.prevPage()} disabled={!modelRegistry.hasPrevPage} class="rounded-lg border border-theme bg-theme-secondary px-3 py-2 text-sm text-theme-muted transition-colors hover:bg-theme-tertiary hover:text-theme-primary disabled:cursor-not-allowed disabled:opacity-50" aria-label="Previous page">
<svg xmlns="http://www.w3.org/2000/svg" class="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
<path stroke-linecap="round" stroke-linejoin="round" d="M15 19l-7-7 7-7" />
</svg>
</button>
<span class="text-sm text-theme-muted">Page {modelRegistry.currentPage + 1} of {modelRegistry.totalPages}</span>
<button type="button" onclick={() => modelRegistry.nextPage()} disabled={!modelRegistry.hasNextPage} class="rounded-lg border border-theme bg-theme-secondary px-3 py-2 text-sm text-theme-muted transition-colors hover:bg-theme-tertiary hover:text-theme-primary disabled:cursor-not-allowed disabled:opacity-50">
<button type="button" onclick={() => modelRegistry.nextPage()} disabled={!modelRegistry.hasNextPage} class="rounded-lg border border-theme bg-theme-secondary px-3 py-2 text-sm text-theme-muted transition-colors hover:bg-theme-tertiary hover:text-theme-primary disabled:cursor-not-allowed disabled:opacity-50" aria-label="Next page">
<svg xmlns="http://www.w3.org/2000/svg" class="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
<path stroke-linecap="round" stroke-linejoin="round" d="M9 5l7 7-7 7" />
</svg>
@@ -855,7 +855,7 @@
<div class="w-80 flex-shrink-0 overflow-y-auto border-l border-theme bg-theme-secondary p-4">
<div class="mb-4 flex items-start justify-between">
<h3 class="text-lg font-semibold text-theme-primary">{selectedModel.name}</h3>
<button type="button" onclick={closeDetails} class="rounded p-1 text-theme-muted hover:bg-theme-tertiary hover:text-theme-primary">
<button type="button" onclick={closeDetails} class="rounded p-1 text-theme-muted hover:bg-theme-tertiary hover:text-theme-primary" aria-label="Close details">
<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
<path stroke-linecap="round" stroke-linejoin="round" d="M6 18L18 6M6 6l12 12" />
</svg>

View File

@@ -358,11 +358,11 @@
<!-- Editor Modal -->
{#if showEditor}
<div class="fixed inset-0 z-50 flex items-center justify-center bg-black/50 p-4" onclick={(e) => { if (e.target === e.currentTarget) closeEditor(); }} role="dialog" aria-modal="true">
<div class="fixed inset-0 z-50 flex items-center justify-center bg-black/50 p-4" onclick={(e) => { if (e.target === e.currentTarget) closeEditor(); }} onkeydown={(e) => { if (e.key === 'Escape') closeEditor(); }} role="dialog" aria-modal="true" tabindex="-1">
<div class="w-full max-w-2xl rounded-xl bg-theme-secondary shadow-xl">
<div class="flex items-center justify-between border-b border-theme px-6 py-4">
<h3 class="text-lg font-semibold text-theme-primary">{editingPrompt ? 'Edit Prompt' : 'Create Prompt'}</h3>
<button type="button" onclick={closeEditor} class="rounded p-1 text-theme-muted hover:bg-theme-tertiary hover:text-theme-primary">
<button type="button" onclick={closeEditor} aria-label="Close dialog" class="rounded p-1 text-theme-muted hover:bg-theme-tertiary hover:text-theme-primary">
<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
<path stroke-linecap="round" stroke-linejoin="round" d="M6 18L18 6M6 6l12 12" />
</svg>
@@ -392,8 +392,8 @@
<label for="prompt-default" class="text-sm text-theme-secondary">Set as default for new chats</label>
</div>
<div>
<label class="mb-2 block text-sm font-medium text-theme-secondary">Auto-use for model types</label>
<fieldset>
<legend class="mb-2 block text-sm font-medium text-theme-secondary">Auto-use for model types</legend>
<div class="flex flex-wrap gap-2">
{#each CAPABILITIES as cap (cap.id)}
<button type="button" onclick={() => toggleCapability(cap.id)} class="rounded-lg border px-3 py-1.5 text-sm transition-colors {formTargetCapabilities.includes(cap.id) ? 'border-blue-500 bg-blue-500/20 text-blue-300' : 'border-theme-subtle bg-theme-tertiary text-theme-muted hover:border-theme hover:text-theme-secondary'}" title={cap.description}>
@@ -401,7 +401,7 @@
</button>
{/each}
</div>
</div>
</fieldset>
</div>
<div class="mt-6 flex justify-end gap-3">
@@ -418,7 +418,7 @@
<!-- Template Preview Modal -->
{#if previewTemplate}
{@const info = categoryInfo[previewTemplate.category]}
<div class="fixed inset-0 z-50 flex items-center justify-center bg-black/50 p-4" onclick={(e) => { if (e.target === e.currentTarget) previewTemplate = null; }} role="dialog" aria-modal="true">
<div class="fixed inset-0 z-50 flex items-center justify-center bg-black/50 p-4" onclick={(e) => { if (e.target === e.currentTarget) previewTemplate = null; }} onkeydown={(e) => { if (e.key === 'Escape') previewTemplate = null; }} role="dialog" aria-modal="true" tabindex="-1">
<div class="w-full max-w-2xl max-h-[80vh] flex flex-col rounded-xl bg-theme-secondary shadow-xl">
<div class="flex items-center justify-between border-b border-theme px-6 py-4">
<div>
@@ -428,7 +428,7 @@
{info.label}
</span>
</div>
<button type="button" onclick={() => (previewTemplate = null)} class="rounded p-1 text-theme-muted hover:bg-theme-tertiary hover:text-theme-primary">
<button type="button" onclick={() => (previewTemplate = null)} aria-label="Close dialog" class="rounded p-1 text-theme-muted hover:bg-theme-tertiary hover:text-theme-primary">
<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
<path stroke-linecap="round" stroke-linejoin="round" d="M6 18L18 6M6 6l12 12" />
</svg>

View File

@@ -2,7 +2,7 @@
/**
* SettingsTabs - Horizontal tab navigation for Settings Hub
*/
export type SettingsTab = 'general' | 'models' | 'prompts' | 'tools' | 'agents' | 'knowledge' | 'memory' | 'about';
export type SettingsTab = 'general' | 'ai' | 'prompts' | 'tools' | 'agents' | 'knowledge' | 'memory' | 'about';
</script>
<script lang="ts">
@@ -16,7 +16,7 @@
const tabs: Tab[] = [
{ id: 'general', label: 'General', icon: 'settings' },
{ id: 'models', label: 'Models', icon: 'cpu' },
{ id: 'ai', label: 'AI Providers', icon: 'server' },
{ id: 'prompts', label: 'Prompts', icon: 'message' },
{ id: 'tools', label: 'Tools', icon: 'wrench' },
{ id: 'agents', label: 'Agents', icon: 'robot' },
@@ -45,7 +45,11 @@
<path stroke-linecap="round" stroke-linejoin="round" d="M10.343 3.94c.09-.542.56-.94 1.11-.94h1.093c.55 0 1.02.398 1.11.94l.149.894c.07.424.384.764.78.93.398.164.855.142 1.205-.108l.737-.527a1.125 1.125 0 0 1 1.45.12l.773.774c.39.389.44 1.002.12 1.45l-.527.737c-.25.35-.272.806-.107 1.204.165.397.505.71.93.78l.893.15c.543.09.94.559.94 1.109v1.094c0 .55-.397 1.02-.94 1.11l-.894.149c-.424.07-.764.383-.929.78-.165.398-.143.854.107 1.204l.527.738c.32.447.269 1.06-.12 1.45l-.774.773a1.125 1.125 0 0 1-1.449.12l-.738-.527c-.35-.25-.806-.272-1.203-.107-.398.165-.71.505-.781.929l-.149.894c-.09.542-.56.94-1.11.94h-1.094c-.55 0-1.019-.398-1.11-.94l-.148-.894c-.071-.424-.384-.764-.781-.93-.398-.164-.854-.142-1.204.108l-.738.527c-.447.32-1.06.269-1.45-.12l-.773-.774a1.125 1.125 0 0 1-.12-1.45l.527-.737c.25-.35.272-.806.108-1.204-.165-.397-.506-.71-.93-.78l-.894-.15c-.542-.09-.94-.56-.94-1.109v-1.094c0-.55.398-1.02.94-1.11l.894-.149c.424-.07.765-.383.93-.78.165-.398.143-.854-.108-1.204l-.526-.738a1.125 1.125 0 0 1 .12-1.45l.773-.773a1.125 1.125 0 0 1 1.45-.12l.737.527c.35.25.807.272 1.204.107.397-.165.71-.505.78-.929l.15-.894Z" />
<path stroke-linecap="round" stroke-linejoin="round" d="M15 12a3 3 0 1 1-6 0 3 3 0 0 1 6 0Z" />
</svg>
{:else if tab.icon === 'cpu'}
{:else if tab.icon === 'server'}
<svg class="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
<path stroke-linecap="round" stroke-linejoin="round" d="M5 12h14M5 12a2 2 0 0 1-2-2V6a2 2 0 0 1 2-2h14a2 2 0 0 1 2 2v4a2 2 0 0 1-2 2M5 12a2 2 0 0 0-2 2v4a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2v-4a2 2 0 0 0-2-2m-2-4h.01M17 16h.01" />
</svg>
{:else if tab.icon === 'cpu'}
<svg class="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
<path stroke-linecap="round" stroke-linejoin="round" d="M8.25 3v1.5M4.5 8.25H3m18 0h-1.5M4.5 12H3m18 0h-1.5m-15 3.75H3m18 0h-1.5M8.25 19.5V21M12 3v1.5m0 15V21m3.75-18v1.5m0 15V21m-9-1.5h10.5a2.25 2.25 0 0 0 2.25-2.25V6.75a2.25 2.25 0 0 0-2.25-2.25H6.75A2.25 2.25 0 0 0 4.5 6.75v10.5a2.25 2.25 0 0 0 2.25 2.25Zm.75-12h9v9h-9v-9Z" />
</svg>

View File

@@ -151,6 +151,7 @@
class="relative inline-flex h-6 w-11 flex-shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors duration-200 ease-in-out focus:outline-none focus:ring-2 focus:ring-violet-500 focus:ring-offset-2 focus:ring-offset-theme-primary {toolsState.toolsEnabled ? 'bg-violet-600' : 'bg-theme-tertiary'}"
role="switch"
aria-checked={toolsState.toolsEnabled}
aria-label="Toggle all tools"
>
<span class="pointer-events-none inline-block h-5 w-5 transform rounded-full bg-white shadow ring-0 transition duration-200 ease-in-out {toolsState.toolsEnabled ? 'translate-x-5' : 'translate-x-0'}"></span>
</button>
@@ -194,6 +195,7 @@
type="button"
onclick={() => searchQuery = ''}
class="absolute right-3 top-1/2 -translate-y-1/2 text-theme-muted hover:text-theme-primary"
aria-label="Clear search"
>
<svg class="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
<path stroke-linecap="round" stroke-linejoin="round" d="M6 18L18 6M6 6l12 12" />
@@ -289,6 +291,7 @@
class="relative inline-flex h-6 w-11 flex-shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors duration-200 ease-in-out focus:outline-none focus:ring-2 focus:ring-blue-500 focus:ring-offset-2 focus:ring-offset-theme {tool.enabled ? 'bg-blue-600' : 'bg-theme-tertiary'}"
role="switch"
aria-checked={tool.enabled}
aria-label="Toggle {tool.definition.function.name} tool"
disabled={!toolsState.toolsEnabled}
>
<span class="pointer-events-none inline-block h-5 w-5 transform rounded-full bg-white shadow ring-0 transition duration-200 ease-in-out {tool.enabled ? 'translate-x-5' : 'translate-x-0'}"></span>
@@ -438,6 +441,7 @@
class="relative inline-flex h-6 w-11 flex-shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors duration-200 ease-in-out focus:outline-none focus:ring-2 focus:ring-violet-500 focus:ring-offset-2 focus:ring-offset-theme {tool.enabled ? 'bg-violet-600' : 'bg-theme-tertiary'}"
role="switch"
aria-checked={tool.enabled}
aria-label="Toggle {tool.name} tool"
disabled={!toolsState.toolsEnabled}
>
<span class="pointer-events-none inline-block h-5 w-5 transform rounded-full bg-white shadow ring-0 transition duration-200 ease-in-out {tool.enabled ? 'translate-x-5' : 'translate-x-0'}"></span>

View File

@@ -3,7 +3,7 @@
*/
export { default as SettingsTabs } from './SettingsTabs.svelte';
export { default as GeneralTab } from './GeneralTab.svelte';
export { default as ModelsTab } from './ModelsTab.svelte';
export { default as AIProvidersTab } from './AIProvidersTab.svelte';
export { default as PromptsTab } from './PromptsTab.svelte';
export { default as ToolsTab } from './ToolsTab.svelte';
export { default as AgentsTab } from './AgentsTab.svelte';

View File

@@ -20,7 +20,7 @@
let { isOpen, onClose }: Props = $props();
let fileInput: HTMLInputElement;
let fileInput = $state<HTMLInputElement | null>(null);
let isDragOver = $state(false);
let selectedFile = $state<File | null>(null);
let validationResult = $state<ValidationResult | null>(null);
@@ -168,9 +168,11 @@
<div
class="fixed inset-0 z-50 flex items-center justify-center bg-black/60 backdrop-blur-sm"
onclick={handleBackdropClick}
onkeydown={handleKeydown}
role="dialog"
aria-modal="true"
aria-labelledby="import-dialog-title"
tabindex="-1"
>
<!-- Dialog -->
<div class="mx-4 w-full max-w-lg rounded-xl border border-theme bg-theme-primary shadow-2xl">

View File

@@ -163,9 +163,11 @@
<div
class="fixed inset-0 z-50 flex items-start justify-center bg-black/60 pt-[15vh] backdrop-blur-sm"
onclick={handleBackdropClick}
onkeydown={handleKeydown}
role="dialog"
aria-modal="true"
aria-labelledby="search-dialog-title"
tabindex="-1"
>
<!-- Dialog -->
<div class="mx-4 w-full max-w-2xl rounded-xl border border-theme bg-theme-primary shadow-2xl">

View File

@@ -61,9 +61,11 @@
<div
class="fixed inset-0 z-50 flex items-center justify-center bg-black/60 backdrop-blur-sm"
onclick={handleBackdropClick}
onkeydown={handleKeydown}
role="dialog"
aria-modal="true"
aria-labelledby="shortcuts-dialog-title"
tabindex="-1"
>
<!-- Dialog -->
<div class="mx-4 w-full max-w-md rounded-xl border border-theme bg-theme-primary shadow-2xl">

View File

@@ -248,6 +248,7 @@ print(json.dumps(result))`;
type="button"
onclick={onClose}
class="rounded-lg p-1.5 text-theme-muted hover:bg-theme-tertiary hover:text-theme-primary"
aria-label="Close dialog"
>
<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" viewBox="0 0 20 20" fill="currentColor">
<path fill-rule="evenodd" d="M4.293 4.293a1 1 0 011.414 0L10 8.586l4.293-4.293a1 1 0 111.414 1.414L11.414 10l4.293 4.293a1 1 0 01-1.414 1.414L10 11.414l-4.293 4.293a1 1 0 01-1.414-1.414L8.586 10 4.293 5.707a1 1 0 010-1.414z" clip-rule="evenodd" />
@@ -290,7 +291,7 @@ print(json.dumps(result))`;
<!-- Parameters -->
<div>
<div class="flex items-center justify-between">
<label class="block text-sm font-medium text-theme-secondary">Parameters</label>
<span class="block text-sm font-medium text-theme-secondary">Parameters</span>
<button
type="button"
onclick={addParameter}
@@ -335,6 +336,7 @@ print(json.dumps(result))`;
type="button"
onclick={() => removeParameter(index)}
class="text-theme-muted hover:text-red-400"
aria-label="Remove parameter"
>
<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" viewBox="0 0 20 20" fill="currentColor">
<path fill-rule="evenodd" d="M9 2a1 1 0 00-.894.553L7.382 4H4a1 1 0 000 2v10a2 2 0 002 2h8a2 2 0 002-2V6a1 1 0 100-2h-3.382l-.724-1.447A1 1 0 0011 2H9zM7 8a1 1 0 012 0v6a1 1 0 11-2 0V8zm5-1a1 1 0 00-1 1v6a1 1 0 102 0V8a1 1 0 00-1-1z" clip-rule="evenodd" />
@@ -352,8 +354,8 @@ print(json.dumps(result))`;
</div>
<!-- Implementation Type -->
<div>
<label class="block text-sm font-medium text-theme-secondary">Implementation</label>
<fieldset>
<legend class="block text-sm font-medium text-theme-secondary">Implementation</legend>
<div class="mt-2 flex flex-wrap gap-4">
<label class="flex items-center gap-2 text-theme-secondary">
<input
@@ -383,15 +385,15 @@ print(json.dumps(result))`;
HTTP Endpoint
</label>
</div>
</div>
</fieldset>
<!-- Code Editor (JavaScript or Python) -->
{#if implementation === 'javascript' || implementation === 'python'}
<div>
<div class="flex items-center justify-between mb-1">
<label class="block text-sm font-medium text-theme-secondary">
<span class="block text-sm font-medium text-theme-secondary">
{implementation === 'javascript' ? 'JavaScript' : 'Python'} Code
</label>
</span>
<div class="flex items-center gap-2">
<!-- Templates dropdown -->
<div class="relative">
@@ -500,8 +502,8 @@ print(json.dumps(result))`;
<p class="mt-1 text-sm text-red-400">{errors.endpoint}</p>
{/if}
</div>
<div>
<label class="block text-sm font-medium text-theme-secondary">HTTP Method</label>
<fieldset>
<legend class="block text-sm font-medium text-theme-secondary">HTTP Method</legend>
<div class="mt-2 flex gap-4">
<label class="flex items-center gap-2 text-theme-secondary">
<input type="radio" bind:group={httpMethod} value="GET" />
@@ -512,7 +514,7 @@ print(json.dumps(result))`;
POST
</label>
</div>
</div>
</fieldset>
<!-- Test button for HTTP -->
<button
@@ -548,6 +550,7 @@ print(json.dumps(result))`;
class="relative inline-flex h-6 w-11 cursor-pointer rounded-full transition-colors {enabled ? 'bg-blue-600' : 'bg-theme-tertiary'}"
role="switch"
aria-checked={enabled}
aria-label="Enable tool"
>
<span
class="pointer-events-none inline-block h-5 w-5 transform rounded-full bg-white shadow transition {enabled ? 'translate-x-5' : 'translate-x-0'}"

View File

@@ -209,7 +209,7 @@
<div class="space-y-4">
<!-- Input -->
<div>
<label class="block text-xs font-medium text-theme-secondary mb-1">Input Arguments (JSON)</label>
<span class="block text-xs font-medium text-theme-secondary mb-1">Input Arguments (JSON)</span>
<CodeEditor bind:value={testInput} language="json" minHeight="80px" />
</div>
@@ -237,7 +237,7 @@
<!-- Result -->
{#if testResult}
<div>
<label class="block text-xs font-medium text-theme-secondary mb-1">Result</label>
<span class="block text-xs font-medium text-theme-secondary mb-1">Result</span>
<div
class="rounded-lg p-3 text-sm font-mono overflow-x-auto {testResult.success
? 'bg-emerald-900/30 border border-emerald-500/30'

View File

@@ -0,0 +1,225 @@
/**
* Tests for Unified LLM Client
*/
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
// Types matching the backend response
interface ChatChunk {
model: string;
message?: {
role: string;
content: string;
};
done: boolean;
done_reason?: string;
total_duration?: number;
load_duration?: number;
prompt_eval_count?: number;
eval_count?: number;
}
interface Model {
name: string;
size: number;
digest: string;
modified_at: string;
}
describe('UnifiedLLMClient', () => {
let UnifiedLLMClient: typeof import('./client.js').UnifiedLLMClient;
let client: InstanceType<typeof UnifiedLLMClient>;
beforeEach(async () => {
vi.resetModules();
// Mock fetch
global.fetch = vi.fn();
const module = await import('./client.js');
UnifiedLLMClient = module.UnifiedLLMClient;
client = new UnifiedLLMClient();
});
afterEach(() => {
vi.restoreAllMocks();
});
describe('listModels', () => {
it('fetches models from unified API', async () => {
const mockModels: Model[] = [
{
name: 'llama3.2:8b',
size: 4500000000,
digest: 'abc123',
modified_at: '2024-01-15T10:00:00Z'
}
];
(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
ok: true,
json: async () => ({ models: mockModels, backend: 'ollama' })
});
const result = await client.listModels();
expect(result.models).toEqual(mockModels);
expect(result.backend).toBe('ollama');
expect(global.fetch).toHaveBeenCalledWith(
expect.stringContaining('/api/v1/ai/models'),
expect.objectContaining({ method: 'GET' })
);
});
it('throws on API error', async () => {
(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
ok: false,
status: 503,
statusText: 'Service Unavailable',
json: async () => ({ error: 'no active backend' })
});
await expect(client.listModels()).rejects.toThrow('no active backend');
});
});
describe('chat', () => {
it('sends chat request to unified API', async () => {
const mockResponse: ChatChunk = {
model: 'llama3.2:8b',
message: { role: 'assistant', content: 'Hello!' },
done: true,
total_duration: 1000000000,
eval_count: 10
};
(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
ok: true,
json: async () => mockResponse
});
const result = await client.chat({
model: 'llama3.2:8b',
messages: [{ role: 'user', content: 'Hi' }]
});
expect(result.message?.content).toBe('Hello!');
expect(global.fetch).toHaveBeenCalledWith(
expect.stringContaining('/api/v1/ai/chat'),
expect.objectContaining({
method: 'POST',
body: expect.stringContaining('"model":"llama3.2:8b"')
})
);
});
});
describe('streamChat', () => {
it('streams chat responses as NDJSON', async () => {
const chunks: ChatChunk[] = [
{ model: 'llama3.2:8b', message: { role: 'assistant', content: 'Hello' }, done: false },
{ model: 'llama3.2:8b', message: { role: 'assistant', content: ' there' }, done: false },
{ model: 'llama3.2:8b', message: { role: 'assistant', content: '!' }, done: true }
];
// Create a mock readable stream
const mockBody = new ReadableStream({
start(controller) {
for (const chunk of chunks) {
controller.enqueue(new TextEncoder().encode(JSON.stringify(chunk) + '\n'));
}
controller.close();
}
});
(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
ok: true,
body: mockBody
});
const receivedChunks: ChatChunk[] = [];
for await (const chunk of client.streamChat({
model: 'llama3.2:8b',
messages: [{ role: 'user', content: 'Hi' }]
})) {
receivedChunks.push(chunk);
}
expect(receivedChunks).toHaveLength(3);
expect(receivedChunks[0].message?.content).toBe('Hello');
expect(receivedChunks[2].done).toBe(true);
});
it('handles stream errors', async () => {
(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
ok: false,
status: 500,
json: async () => ({ error: 'Internal Server Error' })
});
const generator = client.streamChat({
model: 'llama3.2:8b',
messages: [{ role: 'user', content: 'Hi' }]
});
await expect(generator.next()).rejects.toThrow('Internal Server Error');
});
});
describe('healthCheck', () => {
it('returns true when backend is healthy', async () => {
(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
ok: true,
json: async () => ({ status: 'healthy' })
});
const result = await client.healthCheck('ollama');
expect(result).toBe(true);
expect(global.fetch).toHaveBeenCalledWith(
expect.stringContaining('/api/v1/ai/backends/ollama/health'),
expect.any(Object)
);
});
it('returns false when backend is unhealthy', async () => {
(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
ok: false,
status: 503,
json: async () => ({ status: 'unhealthy', error: 'Connection refused' })
});
const result = await client.healthCheck('ollama');
expect(result).toBe(false);
});
});
describe('configuration', () => {
it('uses custom base URL', async () => {
const customClient = new UnifiedLLMClient({ baseUrl: 'http://custom:9090' });
(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
ok: true,
json: async () => ({ models: [], backend: 'ollama' })
});
await customClient.listModels();
expect(global.fetch).toHaveBeenCalledWith(
'http://custom:9090/api/v1/ai/models',
expect.any(Object)
);
});
it('respects abort signal', async () => {
const controller = new AbortController();
controller.abort();
(global.fetch as ReturnType<typeof vi.fn>).mockRejectedValueOnce(
new DOMException('The user aborted a request.', 'AbortError')
);
await expect(client.listModels(controller.signal)).rejects.toThrow('aborted');
});
});
});

View File

@@ -0,0 +1,340 @@
/**
* Unified LLM Client
* Routes chat requests through the unified /api/v1/ai/* endpoints
* Supports Ollama, llama.cpp, and LM Studio backends transparently
*/
import type { BackendType } from '../stores/backends.svelte.js';
/** Message format (compatible with Ollama and OpenAI) */
export interface ChatMessage {
role: 'system' | 'user' | 'assistant' | 'tool';
content: string;
images?: string[];
tool_calls?: ToolCall[];
}
/** Tool call in assistant message */
export interface ToolCall {
function: {
name: string;
arguments: Record<string, unknown>;
};
}
/** Tool definition */
export interface ToolDefinition {
type: 'function';
function: {
name: string;
description: string;
parameters: {
type: 'object';
properties: Record<string, unknown>;
required?: string[];
};
};
}
/** Chat request options */
export interface ChatRequest {
model: string;
messages: ChatMessage[];
stream?: boolean;
format?: 'json' | object;
tools?: ToolDefinition[];
options?: ModelOptions;
keep_alive?: string;
}
/** Model-specific options */
export interface ModelOptions {
temperature?: number;
top_p?: number;
top_k?: number;
num_ctx?: number;
num_predict?: number;
stop?: string[];
seed?: number;
}
/** Chat response chunk (NDJSON streaming format) */
export interface ChatChunk {
model: string;
message?: ChatMessage;
done: boolean;
done_reason?: string;
total_duration?: number;
load_duration?: number;
prompt_eval_count?: number;
prompt_eval_duration?: number;
eval_count?: number;
eval_duration?: number;
error?: string;
}
/** Model information */
export interface Model {
name: string;
size: number;
digest: string;
modified_at: string;
details?: {
family?: string;
parameter_size?: string;
quantization_level?: string;
};
}
/** Models list response */
export interface ModelsResponse {
models: Model[];
backend: string;
}
/** Client configuration */
export interface UnifiedLLMClientConfig {
baseUrl?: string;
defaultTimeoutMs?: number;
fetchFn?: typeof fetch;
}
const DEFAULT_CONFIG = {
baseUrl: '',
defaultTimeoutMs: 120000
};
/**
* Unified LLM client that routes requests through the multi-backend API
*/
export class UnifiedLLMClient {
private readonly config: Required<Omit<UnifiedLLMClientConfig, 'fetchFn'>>;
private readonly fetchFn: typeof fetch;
constructor(config: UnifiedLLMClientConfig = {}) {
this.config = {
...DEFAULT_CONFIG,
...config
};
this.fetchFn = config.fetchFn ?? fetch;
}
/**
* Lists models from the active backend
*/
async listModels(signal?: AbortSignal): Promise<ModelsResponse> {
return this.request<ModelsResponse>('/api/v1/ai/models', {
method: 'GET',
signal
});
}
/**
* Non-streaming chat completion
*/
async chat(request: ChatRequest, signal?: AbortSignal): Promise<ChatChunk> {
return this.request<ChatChunk>('/api/v1/ai/chat', {
method: 'POST',
body: JSON.stringify({ ...request, stream: false }),
signal
});
}
/**
* Streaming chat completion (async generator)
* Yields NDJSON chunks as they arrive
*/
async *streamChat(
request: ChatRequest,
signal?: AbortSignal
): AsyncGenerator<ChatChunk, void, unknown> {
const url = `${this.config.baseUrl}/api/v1/ai/chat`;
const response = await this.fetchFn(url, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ ...request, stream: true }),
signal
});
if (!response.ok) {
const errorData = await response.json().catch(() => ({}));
throw new Error(errorData.error || `HTTP ${response.status}: ${response.statusText}`);
}
if (!response.body) {
throw new Error('No response body for streaming');
}
const reader = response.body.getReader();
const decoder = new TextDecoder();
let buffer = '';
try {
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
// Process complete NDJSON lines
let newlineIndex: number;
while ((newlineIndex = buffer.indexOf('\n')) !== -1) {
const line = buffer.slice(0, newlineIndex).trim();
buffer = buffer.slice(newlineIndex + 1);
if (!line) continue;
try {
const chunk = JSON.parse(line) as ChatChunk;
// Check for error in chunk
if (chunk.error) {
throw new Error(chunk.error);
}
yield chunk;
// Stop if done
if (chunk.done) {
return;
}
} catch (e) {
if (e instanceof SyntaxError) {
console.warn('[UnifiedLLM] Failed to parse chunk:', line);
} else {
throw e;
}
}
}
}
} finally {
reader.releaseLock();
}
}
/**
* Streaming chat with callbacks (more ergonomic for UI)
*/
async streamChatWithCallbacks(
request: ChatRequest,
callbacks: {
onChunk?: (chunk: ChatChunk) => void;
onToken?: (token: string) => void;
onComplete?: (fullResponse: ChatChunk) => void;
onError?: (error: Error) => void;
},
signal?: AbortSignal
): Promise<string> {
let accumulatedContent = '';
let lastChunk: ChatChunk | null = null;
try {
for await (const chunk of this.streamChat(request, signal)) {
lastChunk = chunk;
callbacks.onChunk?.(chunk);
if (chunk.message?.content) {
accumulatedContent += chunk.message.content;
callbacks.onToken?.(chunk.message.content);
}
if (chunk.done && callbacks.onComplete) {
callbacks.onComplete(chunk);
}
}
} catch (error) {
if (callbacks.onError && error instanceof Error) {
callbacks.onError(error);
}
throw error;
}
return accumulatedContent;
}
/**
* Check health of a specific backend
*/
async healthCheck(type: BackendType, signal?: AbortSignal): Promise<boolean> {
try {
await this.request<{ status: string }>(`/api/v1/ai/backends/${type}/health`, {
method: 'GET',
signal,
timeoutMs: 5000
});
return true;
} catch {
return false;
}
}
/**
* Make an HTTP request to the unified API
*/
private async request<T>(
endpoint: string,
options: {
method: 'GET' | 'POST';
body?: string;
signal?: AbortSignal;
timeoutMs?: number;
}
): Promise<T> {
const { method, body, signal, timeoutMs = this.config.defaultTimeoutMs } = options;
const url = `${this.config.baseUrl}${endpoint}`;
// Create timeout controller
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
// Combine with external signal
const combinedSignal = signal ? this.combineSignals(signal, controller.signal) : controller.signal;
try {
const response = await this.fetchFn(url, {
method,
headers: body ? { 'Content-Type': 'application/json' } : undefined,
body,
signal: combinedSignal
});
clearTimeout(timeoutId);
if (!response.ok) {
const errorData = await response.json().catch(() => ({}));
throw new Error(errorData.error || `HTTP ${response.status}: ${response.statusText}`);
}
return (await response.json()) as T;
} catch (error) {
clearTimeout(timeoutId);
throw error;
}
}
/**
* Combines multiple AbortSignals into one
*/
private combineSignals(...signals: AbortSignal[]): AbortSignal {
const controller = new AbortController();
for (const signal of signals) {
if (signal.aborted) {
controller.abort(signal.reason);
break;
}
signal.addEventListener('abort', () => controller.abort(signal.reason), {
once: true,
signal: controller.signal
});
}
return controller.signal;
}
}
/** Default client instance */
export const unifiedLLMClient = new UnifiedLLMClient();

View File

@@ -0,0 +1,15 @@
/**
* Unified LLM Client exports
*/
export { UnifiedLLMClient, unifiedLLMClient } from './client.js';
export type {
ChatMessage,
ChatRequest,
ChatChunk,
Model,
ModelsResponse,
ModelOptions,
ToolCall,
ToolDefinition,
UnifiedLLMClientConfig
} from './client.js';

View File

@@ -18,7 +18,7 @@ import type { DocumentChunk } from './types';
let uuidCounter = 0;
beforeEach(() => {
uuidCounter = 0;
vi.spyOn(crypto, 'randomUUID').mockImplementation(() => `test-uuid-${++uuidCounter}`);
vi.spyOn(crypto, 'randomUUID').mockImplementation(() => `00000000-0000-0000-0000-00000000000${++uuidCounter}` as `${string}-${string}-${string}-${string}-${string}`);
});
afterEach(() => {

View File

@@ -23,11 +23,11 @@ function createMessageNode(
return {
id: id || crypto.randomUUID(),
parentId: null,
siblingIds: [],
childIds: [],
createdAt: new Date(),
message: {
role,
content,
timestamp: Date.now()
content
}
};
}

View File

@@ -41,7 +41,8 @@ function mockStreamResponse(chunks: unknown[]): Response {
}
describe('OllamaClient', () => {
let mockFetch: ReturnType<typeof vi.fn>;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
let mockFetch: any;
let client: OllamaClient;
beforeEach(() => {
@@ -228,7 +229,11 @@ describe('OllamaClient', () => {
tools: [
{
type: 'function',
function: { name: 'get_time', description: 'Get current time' }
function: {
name: 'get_time',
description: 'Get current time',
parameters: { type: 'object', properties: {} }
}
}
]
});

View File

@@ -15,8 +15,7 @@ function createMessage(
): Message {
return {
role,
content,
timestamp: Date.now()
content
};
}

View File

@@ -0,0 +1,301 @@
/**
* Backends state management using Svelte 5 runes
* Manages multiple LLM backend configurations (Ollama, llama.cpp, LM Studio)
*/
/** Backend type identifiers */
export type BackendType = 'ollama' | 'llamacpp' | 'lmstudio';
/** Backend connection status */
export type BackendStatus = 'connected' | 'disconnected' | 'unknown';
/** Backend capabilities */
export interface BackendCapabilities {
canListModels: boolean;
canPullModels: boolean;
canDeleteModels: boolean;
canCreateModels: boolean;
canStreamChat: boolean;
canEmbed: boolean;
}
/** Backend information */
export interface BackendInfo {
type: BackendType;
baseUrl: string;
status: BackendStatus;
capabilities: BackendCapabilities;
version?: string;
error?: string;
}
/** Discovery result for a backend endpoint */
export interface DiscoveryResult {
type: BackendType;
baseUrl: string;
available: boolean;
version?: string;
error?: string;
}
/** Health check result */
export interface HealthResult {
healthy: boolean;
error?: string;
}
/** API response wrapper */
interface ApiResponse<T> {
data?: T;
error?: string;
}
/** Get base URL for API calls */
function getApiBaseUrl(): string {
if (typeof window !== 'undefined') {
const envUrl = (import.meta.env as Record<string, string>)?.PUBLIC_BACKEND_URL;
if (envUrl) return envUrl;
}
return '';
}
/** Make an API request */
async function apiRequest<T>(
method: string,
path: string,
body?: unknown
): Promise<ApiResponse<T>> {
const baseUrl = getApiBaseUrl();
try {
const response = await fetch(`${baseUrl}${path}`, {
method,
headers: {
'Content-Type': 'application/json'
},
body: body ? JSON.stringify(body) : undefined
});
if (!response.ok) {
const errorData = await response.json().catch(() => ({}));
return { error: errorData.error || `HTTP ${response.status}: ${response.statusText}` };
}
const data = await response.json();
return { data };
} catch (err) {
if (err instanceof Error) {
return { error: err.message };
}
return { error: 'Unknown error occurred' };
}
}
/** Backends state class with reactive properties */
export class BackendsState {
/** All configured backends */
backends = $state<BackendInfo[]>([]);
/** Currently active backend type */
activeType = $state<BackendType | null>(null);
/** Loading state */
isLoading = $state(false);
/** Discovering state */
isDiscovering = $state(false);
/** Error state */
error = $state<string | null>(null);
/** Promise that resolves when initial load is complete */
private _readyPromise: Promise<void> | null = null;
private _readyResolve: (() => void) | null = null;
/** Derived: the currently active backend info */
get activeBackend(): BackendInfo | null {
if (!this.activeType) return null;
return this.backends.find((b) => b.type === this.activeType) ?? null;
}
/** Derived: whether the active backend can pull models (Ollama only) */
get canPullModels(): boolean {
return this.activeBackend?.capabilities.canPullModels ?? false;
}
/** Derived: whether the active backend can delete models (Ollama only) */
get canDeleteModels(): boolean {
return this.activeBackend?.capabilities.canDeleteModels ?? false;
}
/** Derived: whether the active backend can create custom models (Ollama only) */
get canCreateModels(): boolean {
return this.activeBackend?.capabilities.canCreateModels ?? false;
}
/** Derived: connected backends */
get connectedBackends(): BackendInfo[] {
return this.backends.filter((b) => b.status === 'connected');
}
constructor() {
// Create ready promise
this._readyPromise = new Promise((resolve) => {
this._readyResolve = resolve;
});
// Load backends on initialization (client-side only)
if (typeof window !== 'undefined') {
this.load();
} else {
// SSR: resolve immediately
this._readyResolve?.();
}
}
/** Wait for initial load to complete */
async ready(): Promise<void> {
return this._readyPromise ?? Promise.resolve();
}
/**
* Load backends from the API
*/
async load(): Promise<void> {
this.isLoading = true;
this.error = null;
try {
const result = await apiRequest<{ backends: BackendInfo[]; active: string }>(
'GET',
'/api/v1/ai/backends'
);
if (result.data) {
this.backends = result.data.backends || [];
this.activeType = (result.data.active as BackendType) || null;
} else if (result.error) {
this.error = result.error;
}
} catch (err) {
this.error = err instanceof Error ? err.message : 'Failed to load backends';
} finally {
this.isLoading = false;
this._readyResolve?.();
}
}
/**
* Discover available backends by probing default endpoints
*/
async discover(endpoints?: Array<{ type: BackendType; baseUrl: string }>): Promise<DiscoveryResult[]> {
this.isDiscovering = true;
this.error = null;
try {
const result = await apiRequest<{ results: DiscoveryResult[] }>(
'POST',
'/api/v1/ai/backends/discover',
endpoints ? { endpoints } : {}
);
if (result.data?.results) {
return result.data.results;
} else if (result.error) {
this.error = result.error;
}
return [];
} catch (err) {
this.error = err instanceof Error ? err.message : 'Failed to discover backends';
return [];
} finally {
this.isDiscovering = false;
}
}
/**
* Set the active backend
*/
async setActive(type: BackendType): Promise<boolean> {
this.error = null;
try {
const result = await apiRequest<{ active: string }>('POST', '/api/v1/ai/backends/active', {
type
});
if (result.data) {
this.activeType = result.data.active as BackendType;
return true;
} else if (result.error) {
this.error = result.error;
}
return false;
} catch (err) {
this.error = err instanceof Error ? err.message : 'Failed to set active backend';
return false;
}
}
/**
* Check the health of a specific backend
*/
async checkHealth(type: BackendType): Promise<HealthResult> {
try {
const result = await apiRequest<{ status: string; error?: string }>(
'GET',
`/api/v1/ai/backends/${type}/health`
);
if (result.data) {
return {
healthy: result.data.status === 'healthy',
error: result.data.error
};
} else {
return {
healthy: false,
error: result.error
};
}
} catch (err) {
return {
healthy: false,
error: err instanceof Error ? err.message : 'Health check failed'
};
}
}
/**
* Update local backend configuration (URL)
* Note: This updates local state only; backend registration happens via discovery
*/
updateConfig(type: BackendType, config: { baseUrl?: string }): void {
this.backends = this.backends.map((b) => {
if (b.type === type) {
return {
...b,
...config
};
}
return b;
});
}
/**
* Get a backend by type
*/
get(type: BackendType): BackendInfo | undefined {
return this.backends.find((b) => b.type === type);
}
/**
* Clear any error state
*/
clearError(): void {
this.error = null;
}
}
/** Singleton backends state instance */
export const backendsState = new BackendsState();

View File

@@ -0,0 +1,386 @@
/**
* Tests for BackendsState store
*/
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
// Types for the backends API
interface BackendInfo {
type: 'ollama' | 'llamacpp' | 'lmstudio';
baseUrl: string;
status: 'connected' | 'disconnected' | 'unknown';
capabilities: BackendCapabilities;
version?: string;
error?: string;
}
interface BackendCapabilities {
canListModels: boolean;
canPullModels: boolean;
canDeleteModels: boolean;
canCreateModels: boolean;
canStreamChat: boolean;
canEmbed: boolean;
}
interface DiscoveryResult {
type: 'ollama' | 'llamacpp' | 'lmstudio';
baseUrl: string;
available: boolean;
version?: string;
error?: string;
}
describe('BackendsState', () => {
let BackendsState: typeof import('./backends.svelte.js').BackendsState;
let backendsState: InstanceType<typeof BackendsState>;
beforeEach(async () => {
// Reset modules for fresh state
vi.resetModules();
// Mock fetch globally with default empty response for initial load
global.fetch = vi.fn().mockResolvedValue({
ok: true,
json: async () => ({ backends: [], active: '' })
});
// Import fresh module
const module = await import('./backends.svelte.js');
BackendsState = module.BackendsState;
backendsState = new BackendsState();
// Wait for initial load to complete
await backendsState.ready();
});
afterEach(() => {
vi.restoreAllMocks();
});
describe('initialization', () => {
it('starts with empty backends array', () => {
expect(backendsState.backends).toEqual([]);
});
it('starts with no active backend', () => {
expect(backendsState.activeType).toBeNull();
});
it('starts with not loading', () => {
expect(backendsState.isLoading).toBe(false);
});
it('starts with no error', () => {
expect(backendsState.error).toBeNull();
});
});
describe('load', () => {
it('loads backends from API', async () => {
const mockBackends: BackendInfo[] = [
{
type: 'ollama',
baseUrl: 'http://localhost:11434',
status: 'connected',
capabilities: {
canListModels: true,
canPullModels: true,
canDeleteModels: true,
canCreateModels: true,
canStreamChat: true,
canEmbed: true
},
version: '0.3.0'
}
];
(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
ok: true,
json: async () => ({ backends: mockBackends, active: 'ollama' })
});
await backendsState.load();
expect(backendsState.backends).toEqual(mockBackends);
expect(backendsState.activeType).toBe('ollama');
expect(backendsState.isLoading).toBe(false);
});
it('handles load error', async () => {
(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
ok: false,
status: 500,
statusText: 'Internal Server Error',
json: async () => ({ error: 'Server error' })
});
await backendsState.load();
expect(backendsState.error).not.toBeNull();
expect(backendsState.isLoading).toBe(false);
});
it('handles network error', async () => {
(global.fetch as ReturnType<typeof vi.fn>).mockRejectedValueOnce(
new Error('Network error')
);
await backendsState.load();
expect(backendsState.error).toBe('Network error');
expect(backendsState.isLoading).toBe(false);
});
});
describe('discover', () => {
it('discovers available backends', async () => {
const mockResults: DiscoveryResult[] = [
{
type: 'ollama',
baseUrl: 'http://localhost:11434',
available: true,
version: '0.3.0'
},
{
type: 'llamacpp',
baseUrl: 'http://localhost:8081',
available: true
},
{
type: 'lmstudio',
baseUrl: 'http://localhost:1234',
available: false,
error: 'Connection refused'
}
];
(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
ok: true,
json: async () => ({ results: mockResults })
});
const results = await backendsState.discover();
expect(results).toEqual(mockResults);
expect(global.fetch).toHaveBeenCalledWith(
expect.stringContaining('/api/v1/ai/backends/discover'),
expect.objectContaining({ method: 'POST' })
);
});
it('returns empty array on error', async () => {
(global.fetch as ReturnType<typeof vi.fn>).mockRejectedValueOnce(
new Error('Network error')
);
const results = await backendsState.discover();
expect(results).toEqual([]);
expect(backendsState.error).toBe('Network error');
});
});
describe('setActive', () => {
it('sets active backend', async () => {
// First load some backends
(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
ok: true,
json: async () => ({
backends: [
{ type: 'ollama', baseUrl: 'http://localhost:11434', status: 'connected' }
],
active: ''
})
});
await backendsState.load();
// Then set active
(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
ok: true,
json: async () => ({ active: 'ollama' })
});
const success = await backendsState.setActive('ollama');
expect(success).toBe(true);
expect(backendsState.activeType).toBe('ollama');
});
it('handles setActive error', async () => {
(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
ok: false,
status: 400,
statusText: 'Bad Request',
json: async () => ({ error: 'Backend not registered' })
});
const success = await backendsState.setActive('llamacpp');
expect(success).toBe(false);
expect(backendsState.error).not.toBeNull();
});
});
describe('checkHealth', () => {
it('checks backend health', async () => {
(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
ok: true,
json: async () => ({ status: 'healthy' })
});
const result = await backendsState.checkHealth('ollama');
expect(result.healthy).toBe(true);
expect(global.fetch).toHaveBeenCalledWith(
expect.stringContaining('/api/v1/ai/backends/ollama/health'),
expect.any(Object)
);
});
it('returns unhealthy on error response', async () => {
(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
ok: false,
status: 503,
statusText: 'Service Unavailable',
json: async () => ({ status: 'unhealthy', error: 'Connection refused' })
});
const result = await backendsState.checkHealth('ollama');
expect(result.healthy).toBe(false);
expect(result.error).toBe('Connection refused');
});
});
describe('derived state', () => {
it('activeBackend returns the active backend info', async () => {
const mockBackends: BackendInfo[] = [
{
type: 'ollama',
baseUrl: 'http://localhost:11434',
status: 'connected',
capabilities: {
canListModels: true,
canPullModels: true,
canDeleteModels: true,
canCreateModels: true,
canStreamChat: true,
canEmbed: true
}
},
{
type: 'llamacpp',
baseUrl: 'http://localhost:8081',
status: 'connected',
capabilities: {
canListModels: true,
canPullModels: false,
canDeleteModels: false,
canCreateModels: false,
canStreamChat: true,
canEmbed: true
}
}
];
(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
ok: true,
json: async () => ({ backends: mockBackends, active: 'llamacpp' })
});
await backendsState.load();
const active = backendsState.activeBackend;
expect(active?.type).toBe('llamacpp');
expect(active?.baseUrl).toBe('http://localhost:8081');
});
it('canPullModels is true only for Ollama', async () => {
const mockBackends: BackendInfo[] = [
{
type: 'ollama',
baseUrl: 'http://localhost:11434',
status: 'connected',
capabilities: {
canListModels: true,
canPullModels: true,
canDeleteModels: true,
canCreateModels: true,
canStreamChat: true,
canEmbed: true
}
}
];
(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
ok: true,
json: async () => ({ backends: mockBackends, active: 'ollama' })
});
await backendsState.load();
expect(backendsState.canPullModels).toBe(true);
});
it('canPullModels is false for llama.cpp', async () => {
const mockBackends: BackendInfo[] = [
{
type: 'llamacpp',
baseUrl: 'http://localhost:8081',
status: 'connected',
capabilities: {
canListModels: true,
canPullModels: false,
canDeleteModels: false,
canCreateModels: false,
canStreamChat: true,
canEmbed: true
}
}
];
(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
ok: true,
json: async () => ({ backends: mockBackends, active: 'llamacpp' })
});
await backendsState.load();
expect(backendsState.canPullModels).toBe(false);
});
});
describe('updateConfig', () => {
it('updates backend URL', async () => {
// Load initial backends
(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
ok: true,
json: async () => ({
backends: [
{
type: 'ollama',
baseUrl: 'http://localhost:11434',
status: 'connected',
capabilities: {
canListModels: true,
canPullModels: true,
canDeleteModels: true,
canCreateModels: true,
canStreamChat: true,
canEmbed: true
}
}
],
active: 'ollama'
})
});
await backendsState.load();
// Update config
backendsState.updateConfig('ollama', { baseUrl: 'http://192.168.1.100:11434' });
const backend = backendsState.backends.find((b) => b.type === 'ollama');
expect(backend?.baseUrl).toBe('http://192.168.1.100:11434');
});
});
});

View File

@@ -8,6 +8,7 @@
import { onMount } from 'svelte';
import { goto } from '$app/navigation';
import { chatState, conversationsState, modelsState, uiState, promptsState, versionState, projectsState } from '$lib/stores';
import { backendsState, type BackendType } from '$lib/stores/backends.svelte';
import { getAllConversations } from '$lib/storage';
import { syncManager } from '$lib/backend';
import { keyboardShortcuts, getShortcuts } from '$lib/utils';
@@ -22,6 +23,12 @@
import type { LayoutData } from './$types';
import type { Snippet } from 'svelte';
// LocalStorage key for persisting backend selection
const BACKEND_STORAGE_KEY = 'vessel:selectedBackend';
// Flag to track if initial backend restoration is complete
let backendRestoreComplete = $state(false);
interface Props {
data: LayoutData;
children: Snippet;
@@ -35,6 +42,88 @@
// Shortcuts modal state
let showShortcutsModal = $state(false);
// Model name for non-Ollama backends
let nonOllamaModelName = $state<string | null>(null);
let modelFetchFailed = $state(false);
// Fetch model name when backend changes to non-Ollama
$effect(() => {
const backendType = backendsState.activeType;
if (backendType && backendType !== 'ollama') {
fetchNonOllamaModel();
} else {
nonOllamaModelName = null;
modelFetchFailed = false;
}
});
/**
* Fetch model name from unified API for non-Ollama backends
*/
async function fetchNonOllamaModel(): Promise<void> {
modelFetchFailed = false;
nonOllamaModelName = null;
try {
const response = await fetch('/api/v1/ai/models');
if (response.ok) {
const data = await response.json();
if (data.models && data.models.length > 0) {
// Extract just the model name (strip path/extension for cleaner display)
const fullName = data.models[0].name;
nonOllamaModelName = fullName.replace(/\.gguf$/i, '');
} else {
// No models loaded
modelFetchFailed = true;
}
} else {
modelFetchFailed = true;
}
} catch (err) {
console.error('Failed to fetch model from backend:', err);
modelFetchFailed = true;
}
}
/**
* Persist backend selection to localStorage
*/
function persistBackendSelection(type: BackendType): void {
try {
localStorage.setItem(BACKEND_STORAGE_KEY, type);
} catch (err) {
console.error('Failed to persist backend selection:', err);
}
}
/**
* Restore last selected backend if it's available
*/
async function restoreLastBackend(): Promise<void> {
try {
const lastBackend = localStorage.getItem(BACKEND_STORAGE_KEY) as BackendType | null;
if (lastBackend && lastBackend !== backendsState.activeType) {
// Check if the last backend is connected
const backend = backendsState.get(lastBackend);
if (backend?.status === 'connected') {
await backendsState.setActive(lastBackend);
}
}
} catch (err) {
console.error('Failed to restore backend selection:', err);
} finally {
// Mark restore as complete so persistence effect can start working
backendRestoreComplete = true;
}
}
// Watch for backend changes and persist (only after initial restore is complete)
$effect(() => {
const activeType = backendsState.activeType;
if (activeType && backendRestoreComplete) {
persistBackendSelection(activeType);
}
});
onMount(() => {
// Initialize UI state (handles responsive detection, theme, etc.)
uiState.initialize();
@@ -68,6 +157,9 @@
// Load projects from IndexedDB
projectsState.load();
// Restore last selected backend after backends finish loading
backendsState.ready().then(() => restoreLastBackend());
// Schedule background migration for chat indexing (runs after 5 seconds)
scheduleMigration(5000);
@@ -167,7 +259,30 @@
<header class="relative z-40 flex-shrink-0">
<TopNav onNavigateHome={handleNavigateHome}>
{#snippet modelSelect()}
<ModelSelect />
{#if backendsState.activeType === 'ollama'}
<ModelSelect />
{:else if backendsState.activeBackend}
<!-- Non-Ollama backend indicator with model name -->
<div class="flex items-center gap-2 rounded-lg border border-theme bg-theme-secondary/50 px-3 py-2 text-sm">
<svg class="h-4 w-4 text-blue-500" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
<path stroke-linecap="round" stroke-linejoin="round" d="M5 12h14M5 12a2 2 0 01-2-2V6a2 2 0 012-2h14a2 2 0 012 2v4a2 2 0 01-2 2M5 12a2 2 0 00-2 2v4a2 2 0 002 2h14a2 2 0 002-2v-4a2 2 0 00-2-2" />
</svg>
<div class="flex flex-col">
<span class="font-medium text-theme-primary">
{#if nonOllamaModelName}
{nonOllamaModelName}
{:else if modelFetchFailed}
<span class="text-amber-400">No model loaded</span>
{:else}
Loading...
{/if}
</span>
<span class="text-xs text-theme-muted">
{backendsState.activeType === 'llamacpp' ? 'llama.cpp' : 'LM Studio'}
</span>
</div>
</div>
{/if}
{/snippet}
</TopNav>
</header>

View File

@@ -7,11 +7,13 @@
import { onMount } from 'svelte';
import { chatState, conversationsState, modelsState, toolsState, promptsState } from '$lib/stores';
import { backendsState } from '$lib/stores/backends.svelte';
import { resolveSystemPrompt } from '$lib/services/prompt-resolution.js';
import { streamingMetricsState } from '$lib/stores/streaming-metrics.svelte';
import { settingsState } from '$lib/stores/settings.svelte';
import { createConversation as createStoredConversation, addMessage as addStoredMessage, updateConversation, saveAttachments } from '$lib/storage';
import { ollamaClient } from '$lib/ollama';
import { unifiedLLMClient } from '$lib/llm';
import type { OllamaMessage, OllamaToolDefinition, OllamaToolCall } from '$lib/ollama';
import { getFunctionModel, USE_FUNCTION_MODEL, runToolCalls, formatToolResultsForChat } from '$lib/tools';
import { searchSimilar, formatResultsAsContext, getKnowledgeBaseStats } from '$lib/memory';
@@ -80,9 +82,28 @@
* Creates a new conversation and starts streaming the response
*/
async function handleFirstMessage(content: string, images?: string[], attachments?: FileAttachment[]): Promise<void> {
const model = modelsState.selectedId;
// Get model name based on active backend
let model: string | null = null;
if (backendsState.activeType === 'ollama') {
model = modelsState.selectedId;
} else if (backendsState.activeType === 'llamacpp' || backendsState.activeType === 'lmstudio') {
// For OpenAI-compatible backends, fetch model from the unified API
try {
const response = await fetch('/api/v1/ai/models');
if (response.ok) {
const data = await response.json();
if (data.models && data.models.length > 0) {
model = data.models[0].name;
}
}
} catch (err) {
console.error('Failed to get model from backend:', err);
}
}
if (!model) {
console.error('No model selected');
console.error('No model available');
return;
}
@@ -298,92 +319,121 @@
let streamingThinking = '';
let thinkingClosed = false;
await ollamaClient.streamChatWithCallbacks(
{ model: chatModel, messages, tools, think: useNativeThinking, options: settingsState.apiParameters },
{
onThinkingToken: (token) => {
// Clear "Processing..." on first token
if (needsClearOnFirstToken) {
chatState.setStreamContent('');
needsClearOnFirstToken = false;
}
// Accumulate thinking and update the message
if (!streamingThinking) {
// Start the thinking block
chatState.appendToStreaming('<think>');
}
streamingThinking += token;
chatState.appendToStreaming(token);
streamingMetricsState.incrementTokens();
},
onToken: (token) => {
// Clear "Processing..." on first token
if (needsClearOnFirstToken) {
chatState.setStreamContent('');
needsClearOnFirstToken = false;
}
// Close thinking block when content starts
if (streamingThinking && !thinkingClosed) {
chatState.appendToStreaming('</think>\n\n');
thinkingClosed = true;
}
chatState.appendToStreaming(token);
streamingMetricsState.incrementTokens();
},
onToolCall: (toolCalls) => {
pendingToolCalls = toolCalls;
},
onComplete: async () => {
// Close thinking block if it was opened but not closed (e.g., tool calls without content)
if (streamingThinking && !thinkingClosed) {
chatState.appendToStreaming('</think>\n\n');
thinkingClosed = true;
}
chatState.finishStreaming();
streamingMetricsState.endStream();
// Handle tool calls if received
if (pendingToolCalls && pendingToolCalls.length > 0) {
await executeToolsAndContinue(
model,
assistantMessageId,
userMessageId,
pendingToolCalls,
conversationId
);
return;
}
// Persist assistant message with the SAME ID as chatState
const node = chatState.messageTree.get(assistantMessageId);
if (node) {
await addStoredMessage(
conversationId,
{ role: 'assistant', content: node.message.content },
userMessageId,
assistantMessageId
);
await updateConversation(conversationId, {});
conversationsState.update(conversationId, {});
// Generate a smarter title in the background (don't await)
generateSmartTitle(conversationId, content, node.message.content);
// Update URL now that streaming is complete
replaceState(`/chat/${conversationId}`, {});
}
},
onError: (error) => {
console.error('Streaming error:', error);
// Show error to user instead of leaving "Processing..."
const errorMsg = error instanceof Error ? error.message : 'Unknown error';
chatState.setStreamContent(`⚠️ Error: ${errorMsg}`);
chatState.finishStreaming();
streamingMetricsState.endStream();
}
// Helper to handle completion (shared by both backends)
const handleComplete = async () => {
// Close thinking block if it was opened but not closed
if (streamingThinking && !thinkingClosed) {
chatState.appendToStreaming('</think>\n\n');
thinkingClosed = true;
}
);
chatState.finishStreaming();
streamingMetricsState.endStream();
// Handle tool calls if received (Ollama only)
if (pendingToolCalls && pendingToolCalls.length > 0) {
await executeToolsAndContinue(
model,
assistantMessageId,
userMessageId,
pendingToolCalls,
conversationId
);
return;
}
// Persist assistant message with the SAME ID as chatState
const node = chatState.messageTree.get(assistantMessageId);
if (node) {
await addStoredMessage(
conversationId,
{ role: 'assistant', content: node.message.content },
userMessageId,
assistantMessageId
);
await updateConversation(conversationId, {});
conversationsState.update(conversationId, {});
// Generate a smarter title in the background (don't await)
generateSmartTitle(conversationId, content, node.message.content);
// Update URL now that streaming is complete
replaceState(`/chat/${conversationId}`, {});
}
};
// Helper to handle errors (shared by both backends)
const handleError = (error: unknown) => {
console.error('Streaming error:', error);
const errorMsg = error instanceof Error ? error.message : 'Unknown error';
chatState.setStreamContent(`⚠️ Error: ${errorMsg}`);
chatState.finishStreaming();
streamingMetricsState.endStream();
};
// Use appropriate client based on active backend
if (backendsState.activeType === 'ollama') {
// Ollama: full features including tools and thinking
await ollamaClient.streamChatWithCallbacks(
{ model: chatModel, messages, tools, think: useNativeThinking, options: settingsState.apiParameters },
{
onThinkingToken: (token) => {
if (needsClearOnFirstToken) {
chatState.setStreamContent('');
needsClearOnFirstToken = false;
}
if (!streamingThinking) {
chatState.appendToStreaming('<think>');
}
streamingThinking += token;
chatState.appendToStreaming(token);
streamingMetricsState.incrementTokens();
},
onToken: (token) => {
if (needsClearOnFirstToken) {
chatState.setStreamContent('');
needsClearOnFirstToken = false;
}
if (streamingThinking && !thinkingClosed) {
chatState.appendToStreaming('</think>\n\n');
thinkingClosed = true;
}
chatState.appendToStreaming(token);
streamingMetricsState.incrementTokens();
},
onToolCall: (toolCalls) => {
pendingToolCalls = toolCalls;
},
onComplete: handleComplete,
onError: handleError
}
);
} else {
// llama.cpp / LM Studio: use unified API (no tools/thinking support)
try {
await unifiedLLMClient.streamChatWithCallbacks(
{
model: chatModel,
messages: messages.map(m => ({ role: m.role, content: m.content })),
stream: true
},
{
onToken: (token) => {
if (needsClearOnFirstToken) {
chatState.setStreamContent('');
needsClearOnFirstToken = false;
}
chatState.appendToStreaming(token);
streamingMetricsState.incrementTokens();
},
onComplete: handleComplete,
onError: handleError
}
);
} catch (error) {
handleError(error);
}
}
} catch (error) {
console.error('Failed to send message:', error);
// Show error to user

View File

@@ -21,11 +21,11 @@
let currentConversationId = $state<string | null>(null);
let isLoading = $state(false);
// Extract first message from data and clear from URL
let initialMessage = $state<string | null>(data.firstMessage);
// Extract first message from data (captured once per page load)
const initialMessage = $derived(data.firstMessage);
$effect(() => {
// Clear firstMessage from URL to keep it clean
if (data.firstMessage && $page.url.searchParams.has('firstMessage')) {
if (initialMessage && $page.url.searchParams.has('firstMessage')) {
const url = new URL($page.url);
url.searchParams.delete('firstMessage');
replaceState(url, {});

View File

@@ -27,7 +27,7 @@
let dragOver = $state(false);
// File input reference
let fileInput: HTMLInputElement;
let fileInput = $state<HTMLInputElement | null>(null);
// Load documents on mount
onMount(async () => {

View File

@@ -466,7 +466,7 @@
<path stroke-linecap="round" stroke-linejoin="round" d="M12 8v4m0 4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
</svg>
<span>{deleteError}</span>
<button type="button" onclick={() => deleteError = null} class="ml-auto text-red-400 hover:text-red-300">
<button type="button" onclick={() => deleteError = null} class="ml-auto text-red-400 hover:text-red-300" aria-label="Dismiss error">
<svg xmlns="http://www.w3.org/2000/svg" class="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
<path stroke-linecap="round" stroke-linejoin="round" d="M6 18L18 6M6 6l12 12" />
</svg>
@@ -987,6 +987,7 @@
onclick={() => modelRegistry.prevPage()}
disabled={!modelRegistry.hasPrevPage}
class="rounded-lg border border-theme bg-theme-secondary px-3 py-2 text-sm text-theme-muted transition-colors hover:bg-theme-tertiary hover:text-theme-primary disabled:cursor-not-allowed disabled:opacity-50"
aria-label="Previous page"
>
<svg xmlns="http://www.w3.org/2000/svg" class="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
<path stroke-linecap="round" stroke-linejoin="round" d="M15 19l-7-7 7-7" />
@@ -1002,6 +1003,7 @@
onclick={() => modelRegistry.nextPage()}
disabled={!modelRegistry.hasNextPage}
class="rounded-lg border border-theme bg-theme-secondary px-3 py-2 text-sm text-theme-muted transition-colors hover:bg-theme-tertiary hover:text-theme-primary disabled:cursor-not-allowed disabled:opacity-50"
aria-label="Next page"
>
<svg xmlns="http://www.w3.org/2000/svg" class="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
<path stroke-linecap="round" stroke-linejoin="round" d="M9 5l7 7-7 7" />
@@ -1024,6 +1026,7 @@
type="button"
onclick={closeDetails}
class="rounded p-1 text-theme-muted transition-colors hover:bg-theme-tertiary hover:text-theme-primary"
aria-label="Close model details"
>
<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
<path stroke-linecap="round" stroke-linejoin="round" d="M6 18L18 6M6 6l12 12" />

View File

@@ -46,7 +46,7 @@
let isLoadingDocs = $state(false);
let selectedEmbeddingModel = $state(DEFAULT_EMBEDDING_MODEL);
let activeTab = $state<'chats' | 'files' | 'links'>('chats');
let fileInput: HTMLInputElement;
let fileInput = $state<HTMLInputElement | null>(null);
let dragOver = $state(false);
let isSearching = $state(false);
let searchResults = $state<ChatSearchResult[]>([]);
@@ -399,6 +399,7 @@
onclick={() => showProjectModal = true}
class="rounded-lg p-2 text-theme-muted transition-colors hover:bg-theme-secondary hover:text-theme-primary"
title="Project settings"
aria-label="Project settings"
>
<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
<path stroke-linecap="round" stroke-linejoin="round" d="M9.594 3.94c.09-.542.56-.94 1.11-.94h2.593c.55 0 1.02.398 1.11.94l.213 1.281c.063.374.313.686.645.87.074.04.147.083.22.127.325.196.72.257 1.075.124l1.217-.456a1.125 1.125 0 0 1 1.37.49l1.296 2.247a1.125 1.125 0 0 1-.26 1.431l-1.003.827c-.293.241-.438.613-.43.992a7.723 7.723 0 0 1 0 .255c-.008.378.137.75.43.991l1.004.827c.424.35.534.955.26 1.43l-1.298 2.247a1.125 1.125 0 0 1-1.369.491l-1.217-.456c-.355-.133-.75-.072-1.076.124a6.47 6.47 0 0 1-.22.128c-.331.183-.581.495-.644.869l-.213 1.281c-.09.543-.56.94-1.11.94h-2.594c-.55 0-1.019-.398-1.11-.94l-.213-1.281c-.062-.374-.312-.686-.644-.87a6.52 6.52 0 0 1-.22-.127c-.325-.196-.72-.257-1.076-.124l-1.217.456a1.125 1.125 0 0 1-1.369-.49l-1.297-2.247a1.125 1.125 0 0 1 .26-1.431l1.004-.827c.292-.24.437-.613.43-.991a6.932 6.932 0 0 1 0-.255c.007-.38-.138-.751-.43-.992l-1.004-.827a1.125 1.125 0 0 1-.26-1.43l1.297-2.247a1.125 1.125 0 0 1 1.37-.491l1.216.456c.356.133.751.072 1.076-.124.072-.044.146-.086.22-.128.332-.183.582-.495.644-.869l.214-1.28Z" />
@@ -428,6 +429,7 @@
</div>
<button
type="button"
aria-label="Send message"
onclick={handleCreateChat}
disabled={!newChatMessage.trim() || isCreatingChat || !modelsState.selectedId}
class="rounded-full bg-emerald-600 p-2 text-white transition-colors hover:bg-emerald-500 disabled:opacity-50"
@@ -579,6 +581,8 @@
ondragover={(e) => { e.preventDefault(); dragOver = true; }}
ondragleave={() => dragOver = false}
ondrop={handleDrop}
role="region"
aria-label="File upload drop zone"
>
<input
bind:this={fileInput}
@@ -593,7 +597,7 @@
</svg>
<p class="text-sm text-theme-muted">
Drag & drop files here, or
<button type="button" onclick={() => fileInput.click()} class="text-emerald-500 hover:text-emerald-400">browse</button>
<button type="button" onclick={() => fileInput?.click()} class="text-emerald-500 hover:text-emerald-400">browse</button>
</p>
<p class="mt-1 text-xs text-theme-muted">
Text files, code, markdown, JSON, etc.
@@ -640,6 +644,7 @@
</div>
<button
type="button"
aria-label="Delete document"
onclick={() => handleDeleteDocumentClick(doc)}
class="rounded p-1.5 text-theme-muted transition-colors hover:bg-red-900/30 hover:text-red-400"
>

View File

@@ -646,6 +646,7 @@
role="dialog"
aria-modal="true"
aria-labelledby="editor-title"
tabindex="-1"
>
<div class="w-full max-w-2xl rounded-xl bg-theme-secondary shadow-xl">
<div class="flex items-center justify-between border-b border-theme px-6 py-4">
@@ -655,6 +656,7 @@
<button
type="button"
onclick={closeEditor}
aria-label="Close dialog"
class="rounded p-1 text-theme-muted transition-colors hover:bg-theme-tertiary hover:text-theme-primary"
>
<svg
@@ -742,10 +744,10 @@
</div>
<!-- Capability targeting -->
<div>
<label class="mb-2 block text-sm font-medium text-theme-secondary">
<fieldset>
<legend class="mb-2 block text-sm font-medium text-theme-secondary">
Auto-use for model types
</label>
</legend>
<p class="mb-3 text-xs text-theme-muted">
When a model has these capabilities and no other prompt is selected, this prompt will
be used automatically.
@@ -766,7 +768,7 @@
</button>
{/each}
</div>
</div>
</fieldset>
</div>
<!-- Actions -->
@@ -804,11 +806,13 @@
}}
role="dialog"
aria-modal="true"
aria-labelledby="preview-title"
tabindex="-1"
>
<div class="w-full max-w-2xl max-h-[80vh] flex flex-col rounded-xl bg-theme-secondary shadow-xl">
<div class="flex items-center justify-between border-b border-theme px-6 py-4">
<div>
<h2 class="text-lg font-semibold text-theme-primary">{previewTemplate.name}</h2>
<h2 id="preview-title" class="text-lg font-semibold text-theme-primary">{previewTemplate.name}</h2>
<div class="mt-1 flex items-center gap-2">
<span class="inline-flex items-center gap-1 rounded px-2 py-0.5 text-xs {info.color}">
<span>{info.icon}</span>
@@ -826,6 +830,7 @@
<button
type="button"
onclick={() => (previewTemplate = null)}
aria-label="Close dialog"
class="rounded p-1 text-theme-muted transition-colors hover:bg-theme-tertiary hover:text-theme-primary"
>
<svg

View File

@@ -211,6 +211,7 @@
{:else if searchQuery}
<button
type="button"
aria-label="Clear search"
onclick={() => { searchQuery = ''; titleResults = []; messageResults = []; semanticResults = []; updateUrl(''); }}
class="absolute right-4 top-1/2 -translate-y-1/2 rounded p-1 text-theme-muted hover:text-theme-primary"
>

View File

@@ -7,7 +7,7 @@
import {
SettingsTabs,
GeneralTab,
ModelsTab,
AIProvidersTab,
PromptsTab,
ToolsTab,
AgentsTab,
@@ -37,8 +37,8 @@
<div class="mx-auto max-w-5xl">
{#if activeTab === 'general'}
<GeneralTab />
{:else if activeTab === 'models'}
<ModelsTab />
{:else if activeTab === 'ai'}
<AIProvidersTab />
{:else if activeTab === 'prompts'}
<PromptsTab />
{:else if activeTab === 'tools'}

View File

@@ -97,6 +97,7 @@
class="relative inline-flex h-6 w-11 flex-shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors duration-200 ease-in-out focus:outline-none focus:ring-2 focus:ring-blue-500 focus:ring-offset-2 focus:ring-offset-theme-primary {toolsState.toolsEnabled ? 'bg-blue-600' : 'bg-theme-tertiary'}"
role="switch"
aria-checked={toolsState.toolsEnabled}
aria-label="Toggle all tools"
>
<span
class="pointer-events-none inline-block h-5 w-5 transform rounded-full bg-white shadow ring-0 transition duration-200 ease-in-out {toolsState.toolsEnabled ? 'translate-x-5' : 'translate-x-0'}"
@@ -144,6 +145,7 @@
class="relative inline-flex h-6 w-11 flex-shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors duration-200 ease-in-out focus:outline-none focus:ring-2 focus:ring-blue-500 focus:ring-offset-2 focus:ring-offset-theme {tool.enabled ? 'bg-blue-600' : 'bg-theme-tertiary'}"
role="switch"
aria-checked={tool.enabled}
aria-label="Toggle {tool.definition.function.name}"
disabled={!toolsState.toolsEnabled}
>
<span
@@ -246,6 +248,7 @@
class="relative inline-flex h-6 w-11 flex-shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors duration-200 ease-in-out focus:outline-none focus:ring-2 focus:ring-emerald-500 focus:ring-offset-2 focus:ring-offset-theme {tool.enabled ? 'bg-emerald-600' : 'bg-theme-tertiary'}"
role="switch"
aria-checked={tool.enabled}
aria-label="Toggle {tool.name}"
disabled={!toolsState.toolsEnabled}
>
<span