diff --git a/README.md b/README.md
index e986512..d2633c8 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
 <h1 align="center">Vessel</h1>
 
 <p align="center">
-  <strong>A modern, feature-rich web interface for Ollama</strong>
+  <strong>A modern, feature-rich web interface for local LLMs</strong>
 </p>
 
 <p align="center">
@@ -28,13 +28,14 @@
 
 **Vessel** is intentionally focused on:
 
-- A clean, local-first UI for **Ollama**
+- A clean, local-first UI for **local LLMs**
+- **Multiple backends**: Ollama, llama.cpp, LM Studio
 - Minimal configuration
 - Low visual and cognitive overhead
 - Doing a small set of things well
 
 If you want a **universal, highly configurable platform** → [open-webui](https://github.com/open-webui/open-webui) is a great choice.
-If you want a **small, focused UI for local Ollama usage** → Vessel is built for that.
+If you want a **small, focused UI for local LLM usage** → Vessel is built for that.
 
 ---
 
@@ -65,7 +66,13 @@ If you want a **small, focused UI for local Ollama usage** → Vessel is built f
 - Agentic tool calling with chain-of-thought reasoning
 - Test tools before saving with the built-in testing panel
 
-### Models
+### LLM Backends
+- **Ollama** — Full model management, pull/delete/create custom models
+- **llama.cpp** — High-performance inference with GGUF models
+- **LM Studio** — Desktop app integration
+- Switch backends without restart, auto-detection of available backends
+
+### Models (Ollama)
 - Browse and pull models from ollama.com
 - Create custom models with embedded system prompts
 - **Per-model parameters** — customize temperature, context size, top_k/top_p
@@ -112,7 +119,10 @@ If you want a **small, focused UI for local Ollama usage** → Vessel is built f
 ### Prerequisites
 
 - [Docker](https://docs.docker.com/get-docker/) and Docker Compose
-- [Ollama](https://ollama.com/download) running locally
+- An LLM backend (at least one):
+  - [Ollama](https://ollama.com/download) (recommended)
+  - [llama.cpp](https://github.com/ggerganov/llama.cpp)
+  - [LM Studio](https://lmstudio.ai/)
 
 ### Configure Ollama
 
@@ -160,6 +170,7 @@ Full documentation is available on the **[GitHub Wiki](https://github.com/Viking
 | Guide | Description |
 |-------|-------------|
 | [Getting Started](https://github.com/VikingOwl91/vessel/wiki/Getting-Started) | Installation and configuration |
+| [LLM Backends](https://github.com/VikingOwl91/vessel/wiki/LLM-Backends) | Configure Ollama, llama.cpp, or LM Studio |
 | [Projects](https://github.com/VikingOwl91/vessel/wiki/Projects) | Organize conversations into projects |
 | [Knowledge Base](https://github.com/VikingOwl91/vessel/wiki/Knowledge-Base) | RAG with document upload and semantic search |
 | [Search](https://github.com/VikingOwl91/vessel/wiki/Search) | Semantic and content search across chats |
@@ -178,6 +189,7 @@ Full documentation is available on the **[GitHub Wiki](https://github.com/Viking
 Vessel prioritizes **usability and simplicity** over feature breadth.
 
 **Completed:**
+- [x] Multi-backend support (Ollama, llama.cpp, LM Studio)
 - [x] Model browser with filtering and update detection
 - [x] Custom tools (JavaScript, Python, HTTP)
 - [x] System prompt library with model-specific defaults
@@ -197,7 +209,7 @@ Vessel prioritizes **usability and simplicity** over feature breadth.
 - Multi-user systems
 - Cloud sync
 - Plugin ecosystems
-- Support for every LLM runtime
+- Cloud/API-based LLM providers (OpenAI, Anthropic, etc.)
 
 > *Do one thing well. Keep the UI out of the way.*
 
@@ -223,5 +235,5 @@ Contributions are welcome!
 GPL-3.0 — See [LICENSE](LICENSE) for details.
 
 <p align="center">
-  Made with <a href="https://ollama.com">Ollama</a> and <a href="https://svelte.dev">Svelte</a>
+  Made with <a href="https://svelte.dev">Svelte</a> • Supports <a href="https://ollama.com">Ollama</a>, <a href="https://github.com/ggerganov/llama.cpp">llama.cpp</a>, and <a href="https://lmstudio.ai/">LM Studio</a>
 </p>
diff --git a/backend/cmd/server/main.go b/backend/cmd/server/main.go
index 9c0f94a..112e8a7 100644
--- a/backend/cmd/server/main.go
+++ b/backend/cmd/server/main.go
@@ -14,6 +14,9 @@ import (
 	"github.com/gin-gonic/gin"
 
 	"vessel-backend/internal/api"
+	"vessel-backend/internal/backends"
+	"vessel-backend/internal/backends/ollama"
+	"vessel-backend/internal/backends/openai"
 	"vessel-backend/internal/database"
 )
 
@@ -29,9 +32,11 @@ func getEnvOrDefault(key, defaultValue string) string {
 
 func main() {
 	var (
-		port      = flag.String("port", getEnvOrDefault("PORT", "8080"), "Server port")
-		dbPath    = flag.String("db", getEnvOrDefault("DB_PATH", "./data/vessel.db"), "Database file path")
-		ollamaURL = flag.String("ollama-url", getEnvOrDefault("OLLAMA_URL", "http://localhost:11434"), "Ollama API URL")
+		port        = flag.String("port", getEnvOrDefault("PORT", "8080"), "Server port")
+		dbPath      = flag.String("db", getEnvOrDefault("DB_PATH", "./data/vessel.db"), "Database file path")
+		ollamaURL   = flag.String("ollama-url", getEnvOrDefault("OLLAMA_URL", "http://localhost:11434"), "Ollama API URL")
+		llamacppURL = flag.String("llamacpp-url", getEnvOrDefault("LLAMACPP_URL", "http://localhost:8081"), "llama.cpp server URL")
+		lmstudioURL = flag.String("lmstudio-url", getEnvOrDefault("LMSTUDIO_URL", "http://localhost:1234"), "LM Studio server URL")
 	)
 	flag.Parse()
 
@@ -47,6 +52,52 @@ func main() {
 		log.Fatalf("Failed to run migrations: %v", err)
 	}
 
+	// Initialize backend registry
+	registry := backends.NewRegistry()
+
+	// Register Ollama backend
+	ollamaAdapter, err := ollama.NewAdapter(backends.BackendConfig{
+		Type:    backends.BackendTypeOllama,
+		BaseURL: *ollamaURL,
+	})
+	if err != nil {
+		log.Printf("Warning: Failed to create Ollama adapter: %v", err)
+	} else {
+		if err := registry.Register(ollamaAdapter); err != nil {
+			log.Printf("Warning: Failed to register Ollama backend: %v", err)
+		}
+	}
+
+	// Register llama.cpp backend (if URL is configured)
+	if *llamacppURL != "" {
+		llamacppAdapter, err := openai.NewAdapter(backends.BackendConfig{
+			Type:    backends.BackendTypeLlamaCpp,
+			BaseURL: *llamacppURL,
+		})
+		if err != nil {
+			log.Printf("Warning: Failed to create llama.cpp adapter: %v", err)
+		} else {
+			if err := registry.Register(llamacppAdapter); err != nil {
+				log.Printf("Warning: Failed to register llama.cpp backend: %v", err)
+			}
+		}
+	}
+
+	// Register LM Studio backend (if URL is configured)
+	if *lmstudioURL != "" {
+		lmstudioAdapter, err := openai.NewAdapter(backends.BackendConfig{
+			Type:    backends.BackendTypeLMStudio,
+			BaseURL: *lmstudioURL,
+		})
+		if err != nil {
+			log.Printf("Warning: Failed to create LM Studio adapter: %v", err)
+		} else {
+			if err := registry.Register(lmstudioAdapter); err != nil {
+				log.Printf("Warning: Failed to register LM Studio backend: %v", err)
+			}
+		}
+	}
+
 	// Setup Gin router
 	gin.SetMode(gin.ReleaseMode)
 	r := gin.New()
@@ -64,7 +115,7 @@ func main() {
 	}))
 
 	// Register routes
-	api.SetupRoutes(r, db, *ollamaURL, Version)
+	api.SetupRoutes(r, db, *ollamaURL, Version, registry)
 
 	// Create server
 	srv := &http.Server{
@@ -79,8 +130,12 @@ func main() {
 	// Graceful shutdown handling
 	go func() {
 		log.Printf("Server starting on port %s", *port)
-		log.Printf("Ollama URL: %s (using official Go client)", *ollamaURL)
 		log.Printf("Database: %s", *dbPath)
+		log.Printf("Backends configured:")
+		log.Printf("  - Ollama: %s", *ollamaURL)
+		log.Printf("  - llama.cpp: %s", *llamacppURL)
+		log.Printf("  - LM Studio: %s", *lmstudioURL)
+		log.Printf("Active backend: %s", registry.ActiveType().String())
 		if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed {
 			log.Fatalf("Failed to start server: %v", err)
 		}
diff --git a/backend/internal/api/ai_handlers.go b/backend/internal/api/ai_handlers.go
new file mode 100644
index 0000000..2fa4248
--- /dev/null
+++ b/backend/internal/api/ai_handlers.go
@@ -0,0 +1,275 @@
+package api
+
+import (
+	"encoding/json"
+	"net/http"
+
+	"github.com/gin-gonic/gin"
+
+	"vessel-backend/internal/backends"
+)
+
+// AIHandlers provides HTTP handlers for the unified AI API
+type AIHandlers struct {
+	registry *backends.Registry
+}
+
+// NewAIHandlers creates a new AIHandlers instance
+func NewAIHandlers(registry *backends.Registry) *AIHandlers {
+	return &AIHandlers{
+		registry: registry,
+	}
+}
+
+// ListBackendsHandler returns information about all configured backends
+func (h *AIHandlers) ListBackendsHandler() gin.HandlerFunc {
+	return func(c *gin.Context) {
+		infos := h.registry.AllInfo(c.Request.Context())
+
+		c.JSON(http.StatusOK, gin.H{
+			"backends": infos,
+			"active":   h.registry.ActiveType().String(),
+		})
+	}
+}
+
+// DiscoverBackendsHandler probes for available backends
+func (h *AIHandlers) DiscoverBackendsHandler() gin.HandlerFunc {
+	return func(c *gin.Context) {
+		var req struct {
+			Endpoints []backends.DiscoveryEndpoint `json:"endpoints"`
+		}
+
+		if err := c.ShouldBindJSON(&req); err != nil {
+			// Use default endpoints if none provided
+			req.Endpoints = backends.DefaultDiscoveryEndpoints()
+		}
+
+		if len(req.Endpoints) == 0 {
+			req.Endpoints = backends.DefaultDiscoveryEndpoints()
+		}
+
+		results := h.registry.Discover(c.Request.Context(), req.Endpoints)
+
+		c.JSON(http.StatusOK, gin.H{
+			"results": results,
+		})
+	}
+}
+
+// SetActiveHandler sets the active backend
+func (h *AIHandlers) SetActiveHandler() gin.HandlerFunc {
+	return func(c *gin.Context) {
+		var req struct {
+			Type string `json:"type" binding:"required"`
+		}
+
+		if err := c.ShouldBindJSON(&req); err != nil {
+			c.JSON(http.StatusBadRequest, gin.H{"error": "type is required"})
+			return
+		}
+
+		backendType, err := backends.ParseBackendType(req.Type)
+		if err != nil {
+			c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
+			return
+		}
+
+		if err := h.registry.SetActive(backendType); err != nil {
+			c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
+			return
+		}
+
+		c.JSON(http.StatusOK, gin.H{
+			"active": backendType.String(),
+		})
+	}
+}
+
+// HealthCheckHandler checks the health of a specific backend
+func (h *AIHandlers) HealthCheckHandler() gin.HandlerFunc {
+	return func(c *gin.Context) {
+		typeParam := c.Param("type")
+
+		backendType, err := backends.ParseBackendType(typeParam)
+		if err != nil {
+			c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
+			return
+		}
+
+		backend, ok := h.registry.Get(backendType)
+		if !ok {
+			c.JSON(http.StatusNotFound, gin.H{"error": "backend not registered"})
+			return
+		}
+
+		if err := backend.HealthCheck(c.Request.Context()); err != nil {
+			c.JSON(http.StatusServiceUnavailable, gin.H{
+				"status": "unhealthy",
+				"error":  err.Error(),
+			})
+			return
+		}
+
+		c.JSON(http.StatusOK, gin.H{
+			"status": "healthy",
+		})
+	}
+}
+
+// ListModelsHandler returns models from the active backend
+func (h *AIHandlers) ListModelsHandler() gin.HandlerFunc {
+	return func(c *gin.Context) {
+		active := h.registry.Active()
+		if active == nil {
+			c.JSON(http.StatusServiceUnavailable, gin.H{"error": "no active backend"})
+			return
+		}
+
+		models, err := active.ListModels(c.Request.Context())
+		if err != nil {
+			c.JSON(http.StatusBadGateway, gin.H{"error": err.Error()})
+			return
+		}
+
+		c.JSON(http.StatusOK, gin.H{
+			"models":  models,
+			"backend": active.Type().String(),
+		})
+	}
+}
+
+// ChatHandler handles chat requests through the active backend
+func (h *AIHandlers) ChatHandler() gin.HandlerFunc {
+	return func(c *gin.Context) {
+		active := h.registry.Active()
+		if active == nil {
+			c.JSON(http.StatusServiceUnavailable, gin.H{"error": "no active backend"})
+			return
+		}
+
+		var req backends.ChatRequest
+		if err := c.ShouldBindJSON(&req); err != nil {
+			c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request: " + err.Error()})
+			return
+		}
+
+		if err := req.Validate(); err != nil {
+			c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
+			return
+		}
+
+		// Check if streaming is requested
+		streaming := req.Stream != nil && *req.Stream
+
+		if streaming {
+			h.handleStreamingChat(c, active, &req)
+		} else {
+			h.handleNonStreamingChat(c, active, &req)
+		}
+	}
+}
+
+// handleNonStreamingChat handles non-streaming chat requests
+func (h *AIHandlers) handleNonStreamingChat(c *gin.Context, backend backends.LLMBackend, req *backends.ChatRequest) {
+	resp, err := backend.Chat(c.Request.Context(), req)
+	if err != nil {
+		c.JSON(http.StatusBadGateway, gin.H{"error": err.Error()})
+		return
+	}
+
+	c.JSON(http.StatusOK, resp)
+}
+
+// handleStreamingChat handles streaming chat requests
+func (h *AIHandlers) handleStreamingChat(c *gin.Context, backend backends.LLMBackend, req *backends.ChatRequest) {
+	// Set headers for NDJSON streaming
+	c.Header("Content-Type", "application/x-ndjson")
+	c.Header("Cache-Control", "no-cache")
+	c.Header("Connection", "keep-alive")
+	c.Header("Transfer-Encoding", "chunked")
+
+	ctx := c.Request.Context()
+	flusher, ok := c.Writer.(http.Flusher)
+	if !ok {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "streaming not supported"})
+		return
+	}
+
+	chunkCh, err := backend.StreamChat(ctx, req)
+	if err != nil {
+		errResp := gin.H{"error": err.Error()}
+		data, _ := json.Marshal(errResp)
+		c.Writer.Write(append(data, '\n'))
+		flusher.Flush()
+		return
+	}
+
+	for chunk := range chunkCh {
+		select {
+		case <-ctx.Done():
+			return
+		default:
+		}
+
+		data, err := json.Marshal(chunk)
+		if err != nil {
+			continue
+		}
+
+		_, err = c.Writer.Write(append(data, '\n'))
+		if err != nil {
+			return
+		}
+		flusher.Flush()
+	}
+}
+
+// RegisterBackendHandler registers a new backend
+func (h *AIHandlers) RegisterBackendHandler() gin.HandlerFunc {
+	return func(c *gin.Context) {
+		var req backends.BackendConfig
+		if err := c.ShouldBindJSON(&req); err != nil {
+			c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request: " + err.Error()})
+			return
+		}
+
+		if err := req.Validate(); err != nil {
+			c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
+			return
+		}
+
+		// Create adapter based on type
+		var backend backends.LLMBackend
+		var err error
+
+		switch req.Type {
+		case backends.BackendTypeOllama:
+			// Would import ollama adapter
+			c.JSON(http.StatusNotImplemented, gin.H{"error": "use /api/v1/ai/backends/discover to register backends"})
+			return
+		case backends.BackendTypeLlamaCpp, backends.BackendTypeLMStudio:
+			// Would import openai adapter
+			c.JSON(http.StatusNotImplemented, gin.H{"error": "use /api/v1/ai/backends/discover to register backends"})
+			return
+		default:
+			c.JSON(http.StatusBadRequest, gin.H{"error": "unknown backend type"})
+			return
+		}
+
+		if err != nil {
+			c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
+			return
+		}
+
+		if err := h.registry.Register(backend); err != nil {
+			c.JSON(http.StatusConflict, gin.H{"error": err.Error()})
+			return
+		}
+
+		c.JSON(http.StatusCreated, gin.H{
+			"type":    req.Type.String(),
+			"baseUrl": req.BaseURL,
+		})
+	}
+}
diff --git a/backend/internal/api/ai_handlers_test.go b/backend/internal/api/ai_handlers_test.go
new file mode 100644
index 0000000..a70f14b
--- /dev/null
+++ b/backend/internal/api/ai_handlers_test.go
@@ -0,0 +1,354 @@
+package api
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	"github.com/gin-gonic/gin"
+
+	"vessel-backend/internal/backends"
+)
+
+func setupAITestRouter(registry *backends.Registry) *gin.Engine {
+	gin.SetMode(gin.TestMode)
+	r := gin.New()
+
+	handlers := NewAIHandlers(registry)
+
+	ai := r.Group("/api/v1/ai")
+	{
+		ai.GET("/backends", handlers.ListBackendsHandler())
+		ai.POST("/backends/discover", handlers.DiscoverBackendsHandler())
+		ai.POST("/backends/active", handlers.SetActiveHandler())
+		ai.GET("/backends/:type/health", handlers.HealthCheckHandler())
+		ai.POST("/chat", handlers.ChatHandler())
+		ai.GET("/models", handlers.ListModelsHandler())
+	}
+
+	return r
+}
+
+func TestAIHandlers_ListBackends(t *testing.T) {
+	registry := backends.NewRegistry()
+
+	mock := &mockAIBackend{
+		backendType: backends.BackendTypeOllama,
+		config: backends.BackendConfig{
+			Type:    backends.BackendTypeOllama,
+			BaseURL: "http://localhost:11434",
+		},
+		info: backends.BackendInfo{
+			Type:         backends.BackendTypeOllama,
+			BaseURL:      "http://localhost:11434",
+			Status:       backends.BackendStatusConnected,
+			Capabilities: backends.OllamaCapabilities(),
+			Version:      "0.3.0",
+		},
+	}
+	registry.Register(mock)
+	registry.SetActive(backends.BackendTypeOllama)
+
+	router := setupAITestRouter(registry)
+
+	w := httptest.NewRecorder()
+	req, _ := http.NewRequest("GET", "/api/v1/ai/backends", nil)
+	router.ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Errorf("ListBackends() status = %d, want %d", w.Code, http.StatusOK)
+	}
+
+	var resp struct {
+		Backends []backends.BackendInfo `json:"backends"`
+		Active   string                 `json:"active"`
+	}
+	if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("Failed to unmarshal response: %v", err)
+	}
+
+	if len(resp.Backends) != 1 {
+		t.Errorf("ListBackends() returned %d backends, want 1", len(resp.Backends))
+	}
+
+	if resp.Active != "ollama" {
+		t.Errorf("ListBackends() active = %q, want %q", resp.Active, "ollama")
+	}
+}
+
+func TestAIHandlers_SetActive(t *testing.T) {
+	registry := backends.NewRegistry()
+
+	mock := &mockAIBackend{
+		backendType: backends.BackendTypeOllama,
+		config: backends.BackendConfig{
+			Type:    backends.BackendTypeOllama,
+			BaseURL: "http://localhost:11434",
+		},
+	}
+	registry.Register(mock)
+
+	router := setupAITestRouter(registry)
+
+	t.Run("set valid backend active", func(t *testing.T) {
+		body, _ := json.Marshal(map[string]string{"type": "ollama"})
+		w := httptest.NewRecorder()
+		req, _ := http.NewRequest("POST", "/api/v1/ai/backends/active", bytes.NewReader(body))
+		req.Header.Set("Content-Type", "application/json")
+		router.ServeHTTP(w, req)
+
+		if w.Code != http.StatusOK {
+			t.Errorf("SetActive() status = %d, want %d", w.Code, http.StatusOK)
+		}
+
+		if registry.ActiveType() != backends.BackendTypeOllama {
+			t.Errorf("Active backend = %v, want %v", registry.ActiveType(), backends.BackendTypeOllama)
+		}
+	})
+
+	t.Run("set invalid backend active", func(t *testing.T) {
+		body, _ := json.Marshal(map[string]string{"type": "llamacpp"})
+		w := httptest.NewRecorder()
+		req, _ := http.NewRequest("POST", "/api/v1/ai/backends/active", bytes.NewReader(body))
+		req.Header.Set("Content-Type", "application/json")
+		router.ServeHTTP(w, req)
+
+		if w.Code != http.StatusBadRequest {
+			t.Errorf("SetActive() status = %d, want %d", w.Code, http.StatusBadRequest)
+		}
+	})
+}
+
+func TestAIHandlers_HealthCheck(t *testing.T) {
+	registry := backends.NewRegistry()
+
+	mock := &mockAIBackend{
+		backendType: backends.BackendTypeOllama,
+		config: backends.BackendConfig{
+			Type:    backends.BackendTypeOllama,
+			BaseURL: "http://localhost:11434",
+		},
+		healthErr: nil,
+	}
+	registry.Register(mock)
+
+	router := setupAITestRouter(registry)
+
+	t.Run("healthy backend", func(t *testing.T) {
+		w := httptest.NewRecorder()
+		req, _ := http.NewRequest("GET", "/api/v1/ai/backends/ollama/health", nil)
+		router.ServeHTTP(w, req)
+
+		if w.Code != http.StatusOK {
+			t.Errorf("HealthCheck() status = %d, want %d", w.Code, http.StatusOK)
+		}
+	})
+
+	t.Run("non-existent backend", func(t *testing.T) {
+		w := httptest.NewRecorder()
+		req, _ := http.NewRequest("GET", "/api/v1/ai/backends/llamacpp/health", nil)
+		router.ServeHTTP(w, req)
+
+		if w.Code != http.StatusNotFound {
+			t.Errorf("HealthCheck() status = %d, want %d", w.Code, http.StatusNotFound)
+		}
+	})
+}
+
+func TestAIHandlers_ListModels(t *testing.T) {
+	registry := backends.NewRegistry()
+
+	mock := &mockAIBackend{
+		backendType: backends.BackendTypeOllama,
+		config: backends.BackendConfig{
+			Type:    backends.BackendTypeOllama,
+			BaseURL: "http://localhost:11434",
+		},
+		models: []backends.Model{
+			{ID: "llama3.2:8b", Name: "llama3.2:8b", Family: "llama"},
+			{ID: "mistral:7b", Name: "mistral:7b", Family: "mistral"},
+		},
+	}
+	registry.Register(mock)
+	registry.SetActive(backends.BackendTypeOllama)
+
+	router := setupAITestRouter(registry)
+
+	w := httptest.NewRecorder()
+	req, _ := http.NewRequest("GET", "/api/v1/ai/models", nil)
+	router.ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Errorf("ListModels() status = %d, want %d", w.Code, http.StatusOK)
+	}
+
+	var resp struct {
+		Models []backends.Model `json:"models"`
+	}
+	if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("Failed to unmarshal response: %v", err)
+	}
+
+	if len(resp.Models) != 2 {
+		t.Errorf("ListModels() returned %d models, want 2", len(resp.Models))
+	}
+}
+
+func TestAIHandlers_ListModels_NoActiveBackend(t *testing.T) {
+	registry := backends.NewRegistry()
+	router := setupAITestRouter(registry)
+
+	w := httptest.NewRecorder()
+	req, _ := http.NewRequest("GET", "/api/v1/ai/models", nil)
+	router.ServeHTTP(w, req)
+
+	if w.Code != http.StatusServiceUnavailable {
+		t.Errorf("ListModels() status = %d, want %d", w.Code, http.StatusServiceUnavailable)
+	}
+}
+
+func TestAIHandlers_Chat(t *testing.T) {
+	registry := backends.NewRegistry()
+
+	mock := &mockAIBackend{
+		backendType: backends.BackendTypeOllama,
+		config: backends.BackendConfig{
+			Type:    backends.BackendTypeOllama,
+			BaseURL: "http://localhost:11434",
+		},
+		chatResponse: &backends.ChatChunk{
+			Model: "llama3.2:8b",
+			Message: &backends.ChatMessage{
+				Role:    "assistant",
+				Content: "Hello! How can I help?",
+			},
+			Done: true,
+		},
+	}
+	registry.Register(mock)
+	registry.SetActive(backends.BackendTypeOllama)
+
+	router := setupAITestRouter(registry)
+
+	t.Run("non-streaming chat", func(t *testing.T) {
+		chatReq := backends.ChatRequest{
+			Model: "llama3.2:8b",
+			Messages: []backends.ChatMessage{
+				{Role: "user", Content: "Hello"},
+			},
+		}
+		body, _ := json.Marshal(chatReq)
+
+		w := httptest.NewRecorder()
+		req, _ := http.NewRequest("POST", "/api/v1/ai/chat", bytes.NewReader(body))
+		req.Header.Set("Content-Type", "application/json")
+		router.ServeHTTP(w, req)
+
+		if w.Code != http.StatusOK {
+			t.Errorf("Chat() status = %d, want %d, body: %s", w.Code, http.StatusOK, w.Body.String())
+		}
+
+		var resp backends.ChatChunk
+		if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
+			t.Fatalf("Failed to unmarshal response: %v", err)
+		}
+
+		if !resp.Done {
+			t.Error("Chat() response.Done = false, want true")
+		}
+
+		if resp.Message == nil || resp.Message.Content != "Hello! How can I help?" {
+			t.Errorf("Chat() unexpected response: %+v", resp)
+		}
+	})
+}
+
+func TestAIHandlers_Chat_InvalidRequest(t *testing.T) {
+	registry := backends.NewRegistry()
+
+	mock := &mockAIBackend{
+		backendType: backends.BackendTypeOllama,
+	}
+	registry.Register(mock)
+	registry.SetActive(backends.BackendTypeOllama)
+
+	router := setupAITestRouter(registry)
+
+	// Missing model
+	chatReq := map[string]interface{}{
+		"messages": []map[string]string{
+			{"role": "user", "content": "Hello"},
+		},
+	}
+	body, _ := json.Marshal(chatReq)
+
+	w := httptest.NewRecorder()
+	req, _ := http.NewRequest("POST", "/api/v1/ai/chat", bytes.NewReader(body))
+	req.Header.Set("Content-Type", "application/json")
+	router.ServeHTTP(w, req)
+
+	if w.Code != http.StatusBadRequest {
+		t.Errorf("Chat() status = %d, want %d", w.Code, http.StatusBadRequest)
+	}
+}
+
+// mockAIBackend implements backends.LLMBackend for testing
+type mockAIBackend struct {
+	backendType  backends.BackendType
+	config       backends.BackendConfig
+	info         backends.BackendInfo
+	healthErr    error
+	models       []backends.Model
+	chatResponse *backends.ChatChunk
+}
+
+func (m *mockAIBackend) Type() backends.BackendType {
+	return m.backendType
+}
+
+func (m *mockAIBackend) Config() backends.BackendConfig {
+	return m.config
+}
+
+func (m *mockAIBackend) HealthCheck(ctx context.Context) error {
+	return m.healthErr
+}
+
+func (m *mockAIBackend) ListModels(ctx context.Context) ([]backends.Model, error) {
+	return m.models, nil
+}
+
+func (m *mockAIBackend) StreamChat(ctx context.Context, req *backends.ChatRequest) (<-chan backends.ChatChunk, error) {
+	ch := make(chan backends.ChatChunk, 1)
+	if m.chatResponse != nil {
+		ch <- *m.chatResponse
+	}
+	close(ch)
+	return ch, nil
+}
+
+func (m *mockAIBackend) Chat(ctx context.Context, req *backends.ChatRequest) (*backends.ChatChunk, error) {
+	if m.chatResponse != nil {
+		return m.chatResponse, nil
+	}
+	return &backends.ChatChunk{Done: true}, nil
+}
+
+func (m *mockAIBackend) Capabilities() backends.BackendCapabilities {
+	return backends.OllamaCapabilities()
+}
+
+func (m *mockAIBackend) Info(ctx context.Context) backends.BackendInfo {
+	if m.info.Type != "" {
+		return m.info
+	}
+	return backends.BackendInfo{
+		Type:         m.backendType,
+		BaseURL:      m.config.BaseURL,
+		Status:       backends.BackendStatusConnected,
+		Capabilities: m.Capabilities(),
+	}
+}
diff --git a/backend/internal/api/routes.go b/backend/internal/api/routes.go
index 8e76f22..fb192b0 100644
--- a/backend/internal/api/routes.go
+++ b/backend/internal/api/routes.go
@@ -5,10 +5,12 @@ import (
 	"log"
 
 	"github.com/gin-gonic/gin"
+
+	"vessel-backend/internal/backends"
 )
 
 // SetupRoutes configures all API routes
-func SetupRoutes(r *gin.Engine, db *sql.DB, ollamaURL string, appVersion string) {
+func SetupRoutes(r *gin.Engine, db *sql.DB, ollamaURL string, appVersion string, registry *backends.Registry) {
 	// Initialize Ollama service with official client
 	ollamaService, err := NewOllamaService(ollamaURL)
 	if err != nil {
@@ -97,6 +99,24 @@ func SetupRoutes(r *gin.Engine, db *sql.DB, ollamaURL string, appVersion string)
 			models.GET("/remote/status", modelRegistry.SyncStatusHandler())
 		}
 
+		// Unified AI routes (multi-backend support)
+		if registry != nil {
+			aiHandlers := NewAIHandlers(registry)
+			ai := v1.Group("/ai")
+			{
+				// Backend management
+				ai.GET("/backends", aiHandlers.ListBackendsHandler())
+				ai.POST("/backends/discover", aiHandlers.DiscoverBackendsHandler())
+				ai.POST("/backends/active", aiHandlers.SetActiveHandler())
+				ai.GET("/backends/:type/health", aiHandlers.HealthCheckHandler())
+				ai.POST("/backends/register", aiHandlers.RegisterBackendHandler())
+
+				// Unified model and chat endpoints (route to active backend)
+				ai.GET("/models", aiHandlers.ListModelsHandler())
+				ai.POST("/chat", aiHandlers.ChatHandler())
+			}
+		}
+
 		// Ollama API routes (using official client)
 		if ollamaService != nil {
 			ollama := v1.Group("/ollama")
diff --git a/backend/internal/backends/interface.go b/backend/internal/backends/interface.go
new file mode 100644
index 0000000..70da0f0
--- /dev/null
+++ b/backend/internal/backends/interface.go
@@ -0,0 +1,98 @@
+package backends
+
+import (
+	"context"
+)
+
+// LLMBackend defines the interface for LLM backend implementations.
+// All backends (Ollama, llama.cpp, LM Studio) must implement this interface.
+type LLMBackend interface {
+	// Type returns the backend type identifier
+	Type() BackendType
+
+	// Config returns the backend configuration
+	Config() BackendConfig
+
+	// HealthCheck verifies the backend is reachable and operational
+	HealthCheck(ctx context.Context) error
+
+	// ListModels returns all models available from this backend
+	ListModels(ctx context.Context) ([]Model, error)
+
+	// StreamChat sends a chat request and returns a channel for streaming responses.
+	// The channel is closed when the stream completes or an error occurs.
+	// Callers should check ChatChunk.Error for stream errors.
+	StreamChat(ctx context.Context, req *ChatRequest) (<-chan ChatChunk, error)
+
+	// Chat sends a non-streaming chat request and returns the final response
+	Chat(ctx context.Context, req *ChatRequest) (*ChatChunk, error)
+
+	// Capabilities returns what features this backend supports
+	Capabilities() BackendCapabilities
+
+	// Info returns detailed information about the backend including status
+	Info(ctx context.Context) BackendInfo
+}
+
+// ModelManager extends LLMBackend with model management capabilities.
+// Only Ollama implements this interface.
+type ModelManager interface {
+	LLMBackend
+
+	// PullModel downloads a model from the registry.
+	// Returns a channel for progress updates.
+	PullModel(ctx context.Context, name string) (<-chan PullProgress, error)
+
+	// DeleteModel removes a model from local storage
+	DeleteModel(ctx context.Context, name string) error
+
+	// CreateModel creates a custom model with the given Modelfile content
+	CreateModel(ctx context.Context, name string, modelfile string) (<-chan CreateProgress, error)
+
+	// CopyModel creates a copy of an existing model
+	CopyModel(ctx context.Context, source, destination string) error
+
+	// ShowModel returns detailed information about a specific model
+	ShowModel(ctx context.Context, name string) (*ModelDetails, error)
+}
+
+// EmbeddingProvider extends LLMBackend with embedding capabilities.
+type EmbeddingProvider interface {
+	LLMBackend
+
+	// Embed generates embeddings for the given input
+	Embed(ctx context.Context, model string, input []string) ([][]float64, error)
+}
+
+// PullProgress represents progress during model download
+type PullProgress struct {
+	Status    string `json:"status"`
+	Digest    string `json:"digest,omitempty"`
+	Total     int64  `json:"total,omitempty"`
+	Completed int64  `json:"completed,omitempty"`
+	Error     string `json:"error,omitempty"`
+}
+
+// CreateProgress represents progress during model creation
+type CreateProgress struct {
+	Status string `json:"status"`
+	Error  string `json:"error,omitempty"`
+}
+
+// ModelDetails contains detailed information about a model
+type ModelDetails struct {
+	Name       string            `json:"name"`
+	ModifiedAt string            `json:"modified_at"`
+	Size       int64             `json:"size"`
+	Digest     string            `json:"digest"`
+	Format     string            `json:"format"`
+	Family     string            `json:"family"`
+	Families   []string          `json:"families"`
+	ParamSize  string            `json:"parameter_size"`
+	QuantLevel string            `json:"quantization_level"`
+	Template   string            `json:"template"`
+	System     string            `json:"system"`
+	License    string            `json:"license"`
+	Modelfile  string            `json:"modelfile"`
+	Parameters map[string]string `json:"parameters"`
+}
diff --git a/backend/internal/backends/ollama/adapter.go b/backend/internal/backends/ollama/adapter.go
new file mode 100644
index 0000000..b46eb00
--- /dev/null
+++ b/backend/internal/backends/ollama/adapter.go
@@ -0,0 +1,624 @@
+package ollama
+
+import (
+	"bufio"
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"net/url"
+	"time"
+
+	"vessel-backend/internal/backends"
+)
+
+// Adapter implements the LLMBackend interface for Ollama.
+// It also implements ModelManager and EmbeddingProvider.
+type Adapter struct {
+	config     backends.BackendConfig
+	httpClient *http.Client
+	baseURL    *url.URL
+}
+
+// Ensure Adapter implements all required interfaces
+var (
+	_ backends.LLMBackend         = (*Adapter)(nil)
+	_ backends.ModelManager       = (*Adapter)(nil)
+	_ backends.EmbeddingProvider  = (*Adapter)(nil)
+)
+
+// NewAdapter creates a new Ollama backend adapter
+func NewAdapter(config backends.BackendConfig) (*Adapter, error) {
+	if config.Type != backends.BackendTypeOllama {
+		return nil, fmt.Errorf("invalid backend type: expected %s, got %s", backends.BackendTypeOllama, config.Type)
+	}
+
+	if err := config.Validate(); err != nil {
+		return nil, fmt.Errorf("invalid config: %w", err)
+	}
+
+	baseURL, err := url.Parse(config.BaseURL)
+	if err != nil {
+		return nil, fmt.Errorf("invalid base URL: %w", err)
+	}
+
+	return &Adapter{
+		config:  config,
+		baseURL: baseURL,
+		httpClient: &http.Client{
+			Timeout: 30 * time.Second,
+		},
+	}, nil
+}
+
+// Type returns the backend type
+func (a *Adapter) Type() backends.BackendType {
+	return backends.BackendTypeOllama
+}
+
+// Config returns the backend configuration
+func (a *Adapter) Config() backends.BackendConfig {
+	return a.config
+}
+
+// Capabilities returns what features this backend supports
+func (a *Adapter) Capabilities() backends.BackendCapabilities {
+	return backends.OllamaCapabilities()
+}
+
+// HealthCheck verifies the backend is reachable
+func (a *Adapter) HealthCheck(ctx context.Context) error {
+	req, err := http.NewRequestWithContext(ctx, "GET", a.baseURL.String()+"/api/version", nil)
+	if err != nil {
+		return fmt.Errorf("failed to create request: %w", err)
+	}
+
+	resp, err := a.httpClient.Do(req)
+	if err != nil {
+		return fmt.Errorf("failed to reach Ollama: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		return fmt.Errorf("Ollama returned status %d", resp.StatusCode)
+	}
+
+	return nil
+}
+
+// ollamaListResponse represents the response from /api/tags
+type ollamaListResponse struct {
+	Models []ollamaModel `json:"models"`
+}
+
+type ollamaModel struct {
+	Name       string            `json:"name"`
+	Size       int64             `json:"size"`
+	ModifiedAt string            `json:"modified_at"`
+	Details    ollamaModelDetails `json:"details"`
+}
+
+type ollamaModelDetails struct {
+	Family     string `json:"family"`
+	QuantLevel string `json:"quantization_level"`
+	ParamSize  string `json:"parameter_size"`
+}
+
+// ListModels returns all models available from Ollama
+func (a *Adapter) ListModels(ctx context.Context) ([]backends.Model, error) {
+	req, err := http.NewRequestWithContext(ctx, "GET", a.baseURL.String()+"/api/tags", nil)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create request: %w", err)
+	}
+
+	resp, err := a.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("failed to list models: %w", err)
+	}
+	defer resp.Body.Close()
+
+	var listResp ollamaListResponse
+	if err := json.NewDecoder(resp.Body).Decode(&listResp); err != nil {
+		return nil, fmt.Errorf("failed to decode response: %w", err)
+	}
+
+	models := make([]backends.Model, len(listResp.Models))
+	for i, m := range listResp.Models {
+		models[i] = backends.Model{
+			ID:         m.Name,
+			Name:       m.Name,
+			Size:       m.Size,
+			ModifiedAt: m.ModifiedAt,
+			Family:     m.Details.Family,
+			QuantLevel: m.Details.QuantLevel,
+		}
+	}
+
+	return models, nil
+}
+
+// Chat sends a non-streaming chat request
+func (a *Adapter) Chat(ctx context.Context, req *backends.ChatRequest) (*backends.ChatChunk, error) {
+	if err := req.Validate(); err != nil {
+		return nil, fmt.Errorf("invalid request: %w", err)
+	}
+
+	// Convert to Ollama format
+	ollamaReq := a.convertChatRequest(req)
+	ollamaReq["stream"] = false
+
+	body, err := json.Marshal(ollamaReq)
+	if err != nil {
+		return nil, fmt.Errorf("failed to marshal request: %w", err)
+	}
+
+	httpReq, err := http.NewRequestWithContext(ctx, "POST", a.baseURL.String()+"/api/chat", bytes.NewReader(body))
+	if err != nil {
+		return nil, fmt.Errorf("failed to create request: %w", err)
+	}
+	httpReq.Header.Set("Content-Type", "application/json")
+
+	resp, err := a.httpClient.Do(httpReq)
+	if err != nil {
+		return nil, fmt.Errorf("chat request failed: %w", err)
+	}
+	defer resp.Body.Close()
+
+	var ollamaResp ollamaChatResponse
+	if err := json.NewDecoder(resp.Body).Decode(&ollamaResp); err != nil {
+		return nil, fmt.Errorf("failed to decode response: %w", err)
+	}
+
+	return a.convertChatResponse(&ollamaResp), nil
+}
+
+// StreamChat sends a streaming chat request
+func (a *Adapter) StreamChat(ctx context.Context, req *backends.ChatRequest) (<-chan backends.ChatChunk, error) {
+	if err := req.Validate(); err != nil {
+		return nil, fmt.Errorf("invalid request: %w", err)
+	}
+
+	// Convert to Ollama format
+	ollamaReq := a.convertChatRequest(req)
+	ollamaReq["stream"] = true
+
+	body, err := json.Marshal(ollamaReq)
+	if err != nil {
+		return nil, fmt.Errorf("failed to marshal request: %w", err)
+	}
+
+	// Create HTTP request without timeout for streaming
+	httpReq, err := http.NewRequestWithContext(ctx, "POST", a.baseURL.String()+"/api/chat", bytes.NewReader(body))
+	if err != nil {
+		return nil, fmt.Errorf("failed to create request: %w", err)
+	}
+	httpReq.Header.Set("Content-Type", "application/json")
+
+	// Use a client without timeout for streaming
+	client := &http.Client{}
+	resp, err := client.Do(httpReq)
+	if err != nil {
+		return nil, fmt.Errorf("chat request failed: %w", err)
+	}
+
+	chunkCh := make(chan backends.ChatChunk)
+
+	go func() {
+		defer close(chunkCh)
+		defer resp.Body.Close()
+
+		scanner := bufio.NewScanner(resp.Body)
+		for scanner.Scan() {
+			select {
+			case <-ctx.Done():
+				return
+			default:
+			}
+
+			line := scanner.Bytes()
+			if len(line) == 0 {
+				continue
+			}
+
+			var ollamaResp ollamaChatResponse
+			if err := json.Unmarshal(line, &ollamaResp); err != nil {
+				chunkCh <- backends.ChatChunk{Error: fmt.Sprintf("failed to parse response: %v", err)}
+				return
+			}
+
+			chunkCh <- *a.convertChatResponse(&ollamaResp)
+
+			if ollamaResp.Done {
+				return
+			}
+		}
+
+		if err := scanner.Err(); err != nil && ctx.Err() == nil {
+			chunkCh <- backends.ChatChunk{Error: fmt.Sprintf("stream error: %v", err)}
+		}
+	}()
+
+	return chunkCh, nil
+}
+
+// Info returns detailed information about the backend
+func (a *Adapter) Info(ctx context.Context) backends.BackendInfo {
+	info := backends.BackendInfo{
+		Type:         backends.BackendTypeOllama,
+		BaseURL:      a.config.BaseURL,
+		Capabilities: a.Capabilities(),
+	}
+
+	// Try to get version
+	req, err := http.NewRequestWithContext(ctx, "GET", a.baseURL.String()+"/api/version", nil)
+	if err != nil {
+		info.Status = backends.BackendStatusDisconnected
+		info.Error = err.Error()
+		return info
+	}
+
+	resp, err := a.httpClient.Do(req)
+	if err != nil {
+		info.Status = backends.BackendStatusDisconnected
+		info.Error = err.Error()
+		return info
+	}
+	defer resp.Body.Close()
+
+	var versionResp struct {
+		Version string `json:"version"`
+	}
+	if err := json.NewDecoder(resp.Body).Decode(&versionResp); err != nil {
+		info.Status = backends.BackendStatusDisconnected
+		info.Error = err.Error()
+		return info
+	}
+
+	info.Status = backends.BackendStatusConnected
+	info.Version = versionResp.Version
+	return info
+}
+
+// ShowModel returns detailed information about a specific model
+func (a *Adapter) ShowModel(ctx context.Context, name string) (*backends.ModelDetails, error) {
+	body, err := json.Marshal(map[string]string{"name": name})
+	if err != nil {
+		return nil, fmt.Errorf("failed to marshal request: %w", err)
+	}
+
+	req, err := http.NewRequestWithContext(ctx, "POST", a.baseURL.String()+"/api/show", bytes.NewReader(body))
+	if err != nil {
+		return nil, fmt.Errorf("failed to create request: %w", err)
+	}
+	req.Header.Set("Content-Type", "application/json")
+
+	resp, err := a.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("failed to show model: %w", err)
+	}
+	defer resp.Body.Close()
+
+	var showResp struct {
+		Modelfile string `json:"modelfile"`
+		Template  string `json:"template"`
+		System    string `json:"system"`
+		Details   struct {
+			Family     string `json:"family"`
+			ParamSize  string `json:"parameter_size"`
+			QuantLevel string `json:"quantization_level"`
+		} `json:"details"`
+	}
+	if err := json.NewDecoder(resp.Body).Decode(&showResp); err != nil {
+		return nil, fmt.Errorf("failed to decode response: %w", err)
+	}
+
+	return &backends.ModelDetails{
+		Name:       name,
+		Family:     showResp.Details.Family,
+		ParamSize:  showResp.Details.ParamSize,
+		QuantLevel: showResp.Details.QuantLevel,
+		Template:   showResp.Template,
+		System:     showResp.System,
+		Modelfile:  showResp.Modelfile,
+	}, nil
+}
+
+// PullModel downloads a model from the registry
+func (a *Adapter) PullModel(ctx context.Context, name string) (<-chan backends.PullProgress, error) {
+	body, err := json.Marshal(map[string]interface{}{"name": name, "stream": true})
+	if err != nil {
+		return nil, fmt.Errorf("failed to marshal request: %w", err)
+	}
+
+	req, err := http.NewRequestWithContext(ctx, "POST", a.baseURL.String()+"/api/pull", bytes.NewReader(body))
+	if err != nil {
+		return nil, fmt.Errorf("failed to create request: %w", err)
+	}
+	req.Header.Set("Content-Type", "application/json")
+
+	client := &http.Client{}
+	resp, err := client.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("failed to pull model: %w", err)
+	}
+
+	progressCh := make(chan backends.PullProgress)
+
+	go func() {
+		defer close(progressCh)
+		defer resp.Body.Close()
+
+		scanner := bufio.NewScanner(resp.Body)
+		for scanner.Scan() {
+			select {
+			case <-ctx.Done():
+				return
+			default:
+			}
+
+			var progress struct {
+				Status    string `json:"status"`
+				Digest    string `json:"digest"`
+				Total     int64  `json:"total"`
+				Completed int64  `json:"completed"`
+			}
+			if err := json.Unmarshal(scanner.Bytes(), &progress); err != nil {
+				progressCh <- backends.PullProgress{Error: err.Error()}
+				return
+			}
+
+			progressCh <- backends.PullProgress{
+				Status:    progress.Status,
+				Digest:    progress.Digest,
+				Total:     progress.Total,
+				Completed: progress.Completed,
+			}
+		}
+
+		if err := scanner.Err(); err != nil && ctx.Err() == nil {
+			progressCh <- backends.PullProgress{Error: err.Error()}
+		}
+	}()
+
+	return progressCh, nil
+}
+
+// DeleteModel removes a model from local storage
+func (a *Adapter) DeleteModel(ctx context.Context, name string) error {
+	body, err := json.Marshal(map[string]string{"name": name})
+	if err != nil {
+		return fmt.Errorf("failed to marshal request: %w", err)
+	}
+
+	req, err := http.NewRequestWithContext(ctx, "DELETE", a.baseURL.String()+"/api/delete", bytes.NewReader(body))
+	if err != nil {
+		return fmt.Errorf("failed to create request: %w", err)
+	}
+	req.Header.Set("Content-Type", "application/json")
+
+	resp, err := a.httpClient.Do(req)
+	if err != nil {
+		return fmt.Errorf("failed to delete model: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		bodyBytes, _ := io.ReadAll(resp.Body)
+		return fmt.Errorf("delete failed: %s", string(bodyBytes))
+	}
+
+	return nil
+}
+
+// CreateModel creates a custom model with the given Modelfile content
+func (a *Adapter) CreateModel(ctx context.Context, name string, modelfile string) (<-chan backends.CreateProgress, error) {
+	body, err := json.Marshal(map[string]interface{}{
+		"name":      name,
+		"modelfile": modelfile,
+		"stream":    true,
+	})
+	if err != nil {
+		return nil, fmt.Errorf("failed to marshal request: %w", err)
+	}
+
+	req, err := http.NewRequestWithContext(ctx, "POST", a.baseURL.String()+"/api/create", bytes.NewReader(body))
+	if err != nil {
+		return nil, fmt.Errorf("failed to create request: %w", err)
+	}
+	req.Header.Set("Content-Type", "application/json")
+
+	client := &http.Client{}
+	resp, err := client.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create model: %w", err)
+	}
+
+	progressCh := make(chan backends.CreateProgress)
+
+	go func() {
+		defer close(progressCh)
+		defer resp.Body.Close()
+
+		scanner := bufio.NewScanner(resp.Body)
+		for scanner.Scan() {
+			select {
+			case <-ctx.Done():
+				return
+			default:
+			}
+
+			var progress struct {
+				Status string `json:"status"`
+			}
+			if err := json.Unmarshal(scanner.Bytes(), &progress); err != nil {
+				progressCh <- backends.CreateProgress{Error: err.Error()}
+				return
+			}
+
+			progressCh <- backends.CreateProgress{Status: progress.Status}
+		}
+
+		if err := scanner.Err(); err != nil && ctx.Err() == nil {
+			progressCh <- backends.CreateProgress{Error: err.Error()}
+		}
+	}()
+
+	return progressCh, nil
+}
+
+// CopyModel creates a copy of an existing model
+func (a *Adapter) CopyModel(ctx context.Context, source, destination string) error {
+	body, err := json.Marshal(map[string]string{
+		"source":      source,
+		"destination": destination,
+	})
+	if err != nil {
+		return fmt.Errorf("failed to marshal request: %w", err)
+	}
+
+	req, err := http.NewRequestWithContext(ctx, "POST", a.baseURL.String()+"/api/copy", bytes.NewReader(body))
+	if err != nil {
+		return fmt.Errorf("failed to create request: %w", err)
+	}
+	req.Header.Set("Content-Type", "application/json")
+
+	resp, err := a.httpClient.Do(req)
+	if err != nil {
+		return fmt.Errorf("failed to copy model: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		bodyBytes, _ := io.ReadAll(resp.Body)
+		return fmt.Errorf("copy failed: %s", string(bodyBytes))
+	}
+
+	return nil
+}
+
+// Embed generates embeddings for the given input
+func (a *Adapter) Embed(ctx context.Context, model string, input []string) ([][]float64, error) {
+	body, err := json.Marshal(map[string]interface{}{
+		"model": model,
+		"input": input,
+	})
+	if err != nil {
+		return nil, fmt.Errorf("failed to marshal request: %w", err)
+	}
+
+	req, err := http.NewRequestWithContext(ctx, "POST", a.baseURL.String()+"/api/embed", bytes.NewReader(body))
+	if err != nil {
+		return nil, fmt.Errorf("failed to create request: %w", err)
+	}
+	req.Header.Set("Content-Type", "application/json")
+
+	resp, err := a.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("embed request failed: %w", err)
+	}
+	defer resp.Body.Close()
+
+	var embedResp struct {
+		Embeddings [][]float64 `json:"embeddings"`
+	}
+	if err := json.NewDecoder(resp.Body).Decode(&embedResp); err != nil {
+		return nil, fmt.Errorf("failed to decode response: %w", err)
+	}
+
+	return embedResp.Embeddings, nil
+}
+
+// ollamaChatResponse represents the response from /api/chat
+type ollamaChatResponse struct {
+	Model     string             `json:"model"`
+	CreatedAt string             `json:"created_at"`
+	Message   ollamaChatMessage  `json:"message"`
+	Done      bool               `json:"done"`
+	DoneReason string            `json:"done_reason,omitempty"`
+	PromptEvalCount int          `json:"prompt_eval_count,omitempty"`
+	EvalCount       int          `json:"eval_count,omitempty"`
+}
+
+type ollamaChatMessage struct {
+	Role      string            `json:"role"`
+	Content   string            `json:"content"`
+	Images    []string          `json:"images,omitempty"`
+	ToolCalls []ollamaToolCall  `json:"tool_calls,omitempty"`
+}
+
+type ollamaToolCall struct {
+	Function struct {
+		Name      string          `json:"name"`
+		Arguments json.RawMessage `json:"arguments"`
+	} `json:"function"`
+}
+
+// convertChatRequest converts a backends.ChatRequest to Ollama format
+func (a *Adapter) convertChatRequest(req *backends.ChatRequest) map[string]interface{} {
+	messages := make([]map[string]interface{}, len(req.Messages))
+	for i, msg := range req.Messages {
+		m := map[string]interface{}{
+			"role":    msg.Role,
+			"content": msg.Content,
+		}
+		if len(msg.Images) > 0 {
+			m["images"] = msg.Images
+		}
+		messages[i] = m
+	}
+
+	ollamaReq := map[string]interface{}{
+		"model":    req.Model,
+		"messages": messages,
+	}
+
+	// Add optional parameters
+	if req.Options != nil {
+		ollamaReq["options"] = req.Options
+	}
+	if len(req.Tools) > 0 {
+		ollamaReq["tools"] = req.Tools
+	}
+
+	return ollamaReq
+}
+
+// convertChatResponse converts an Ollama response to backends.ChatChunk
+func (a *Adapter) convertChatResponse(resp *ollamaChatResponse) *backends.ChatChunk {
+	chunk := &backends.ChatChunk{
+		Model:           resp.Model,
+		CreatedAt:       resp.CreatedAt,
+		Done:            resp.Done,
+		DoneReason:      resp.DoneReason,
+		PromptEvalCount: resp.PromptEvalCount,
+		EvalCount:       resp.EvalCount,
+	}
+
+	if resp.Message.Role != "" || resp.Message.Content != "" {
+		msg := &backends.ChatMessage{
+			Role:    resp.Message.Role,
+			Content: resp.Message.Content,
+			Images:  resp.Message.Images,
+		}
+
+		// Convert tool calls
+		for _, tc := range resp.Message.ToolCalls {
+			msg.ToolCalls = append(msg.ToolCalls, backends.ToolCall{
+				Type: "function",
+				Function: struct {
+					Name      string `json:"name"`
+					Arguments string `json:"arguments"`
+				}{
+					Name:      tc.Function.Name,
+					Arguments: string(tc.Function.Arguments),
+				},
+			})
+		}
+
+		chunk.Message = msg
+	}
+
+	return chunk
+}
diff --git a/backend/internal/backends/ollama/adapter_test.go b/backend/internal/backends/ollama/adapter_test.go
new file mode 100644
index 0000000..3b6041f
--- /dev/null
+++ b/backend/internal/backends/ollama/adapter_test.go
@@ -0,0 +1,574 @@
+package ollama
+
+import (
+	"context"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+	"time"
+
+	"vessel-backend/internal/backends"
+)
+
+func TestAdapter_Type(t *testing.T) {
+	adapter, _ := NewAdapter(backends.BackendConfig{
+		Type:    backends.BackendTypeOllama,
+		BaseURL: "http://localhost:11434",
+	})
+
+	if adapter.Type() != backends.BackendTypeOllama {
+		t.Errorf("Type() = %v, want %v", adapter.Type(), backends.BackendTypeOllama)
+	}
+}
+
+func TestAdapter_Config(t *testing.T) {
+	cfg := backends.BackendConfig{
+		Type:    backends.BackendTypeOllama,
+		BaseURL: "http://localhost:11434",
+		Enabled: true,
+	}
+
+	adapter, _ := NewAdapter(cfg)
+	got := adapter.Config()
+
+	if got.Type != cfg.Type {
+		t.Errorf("Config().Type = %v, want %v", got.Type, cfg.Type)
+	}
+	if got.BaseURL != cfg.BaseURL {
+		t.Errorf("Config().BaseURL = %v, want %v", got.BaseURL, cfg.BaseURL)
+	}
+}
+
+func TestAdapter_Capabilities(t *testing.T) {
+	adapter, _ := NewAdapter(backends.BackendConfig{
+		Type:    backends.BackendTypeOllama,
+		BaseURL: "http://localhost:11434",
+	})
+
+	caps := adapter.Capabilities()
+
+	if !caps.CanListModels {
+		t.Error("Ollama adapter should support listing models")
+	}
+	if !caps.CanPullModels {
+		t.Error("Ollama adapter should support pulling models")
+	}
+	if !caps.CanDeleteModels {
+		t.Error("Ollama adapter should support deleting models")
+	}
+	if !caps.CanCreateModels {
+		t.Error("Ollama adapter should support creating models")
+	}
+	if !caps.CanStreamChat {
+		t.Error("Ollama adapter should support streaming chat")
+	}
+	if !caps.CanEmbed {
+		t.Error("Ollama adapter should support embeddings")
+	}
+}
+
+func TestAdapter_HealthCheck(t *testing.T) {
+	t.Run("healthy server", func(t *testing.T) {
+		server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			if r.URL.Path == "/" || r.URL.Path == "/api/version" {
+				w.WriteHeader(http.StatusOK)
+				json.NewEncoder(w).Encode(map[string]string{"version": "0.1.0"})
+			}
+		}))
+		defer server.Close()
+
+		adapter, err := NewAdapter(backends.BackendConfig{
+			Type:    backends.BackendTypeOllama,
+			BaseURL: server.URL,
+		})
+		if err != nil {
+			t.Fatalf("Failed to create adapter: %v", err)
+		}
+
+		ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+		defer cancel()
+
+		if err := adapter.HealthCheck(ctx); err != nil {
+			t.Errorf("HealthCheck() error = %v, want nil", err)
+		}
+	})
+
+	t.Run("unreachable server", func(t *testing.T) {
+		adapter, err := NewAdapter(backends.BackendConfig{
+			Type:    backends.BackendTypeOllama,
+			BaseURL: "http://localhost:19999", // unlikely to be running
+		})
+		if err != nil {
+			t.Fatalf("Failed to create adapter: %v", err)
+		}
+
+		ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
+		defer cancel()
+
+		if err := adapter.HealthCheck(ctx); err == nil {
+			t.Error("HealthCheck() expected error for unreachable server")
+		}
+	})
+}
+
+func TestAdapter_ListModels(t *testing.T) {
+	t.Run("returns model list", func(t *testing.T) {
+		server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			if r.URL.Path == "/api/tags" {
+				resp := map[string]interface{}{
+					"models": []map[string]interface{}{
+						{
+							"name":        "llama3.2:8b",
+							"size":        int64(4700000000),
+							"modified_at": "2024-01-15T10:30:00Z",
+							"details": map[string]interface{}{
+								"family":             "llama",
+								"quantization_level": "Q4_K_M",
+							},
+						},
+						{
+							"name":        "mistral:7b",
+							"size":        int64(4100000000),
+							"modified_at": "2024-01-14T08:00:00Z",
+							"details": map[string]interface{}{
+								"family":             "mistral",
+								"quantization_level": "Q4_0",
+							},
+						},
+					},
+				}
+				json.NewEncoder(w).Encode(resp)
+			}
+		}))
+		defer server.Close()
+
+		adapter, _ := NewAdapter(backends.BackendConfig{
+			Type:    backends.BackendTypeOllama,
+			BaseURL: server.URL,
+		})
+
+		ctx := context.Background()
+		models, err := adapter.ListModels(ctx)
+		if err != nil {
+			t.Fatalf("ListModels() error = %v", err)
+		}
+
+		if len(models) != 2 {
+			t.Errorf("ListModels() returned %d models, want 2", len(models))
+		}
+
+		if models[0].Name != "llama3.2:8b" {
+			t.Errorf("First model name = %q, want %q", models[0].Name, "llama3.2:8b")
+		}
+
+		if models[0].Family != "llama" {
+			t.Errorf("First model family = %q, want %q", models[0].Family, "llama")
+		}
+	})
+
+	t.Run("handles empty model list", func(t *testing.T) {
+		server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			if r.URL.Path == "/api/tags" {
+				resp := map[string]interface{}{
+					"models": []map[string]interface{}{},
+				}
+				json.NewEncoder(w).Encode(resp)
+			}
+		}))
+		defer server.Close()
+
+		adapter, _ := NewAdapter(backends.BackendConfig{
+			Type:    backends.BackendTypeOllama,
+			BaseURL: server.URL,
+		})
+
+		models, err := adapter.ListModels(context.Background())
+		if err != nil {
+			t.Fatalf("ListModels() error = %v", err)
+		}
+
+		if len(models) != 0 {
+			t.Errorf("ListModels() returned %d models, want 0", len(models))
+		}
+	})
+}
+
+func TestAdapter_Chat(t *testing.T) {
+	t.Run("non-streaming chat", func(t *testing.T) {
+		server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			if r.URL.Path == "/api/chat" && r.Method == "POST" {
+				var req map[string]interface{}
+				json.NewDecoder(r.Body).Decode(&req)
+
+				// Check stream is false
+				if stream, ok := req["stream"].(bool); !ok || stream {
+					t.Error("Expected stream=false for non-streaming chat")
+				}
+
+				resp := map[string]interface{}{
+					"model":   "llama3.2:8b",
+					"message": map[string]interface{}{"role": "assistant", "content": "Hello! How can I help you?"},
+					"done":    true,
+				}
+				json.NewEncoder(w).Encode(resp)
+			}
+		}))
+		defer server.Close()
+
+		adapter, _ := NewAdapter(backends.BackendConfig{
+			Type:    backends.BackendTypeOllama,
+			BaseURL: server.URL,
+		})
+
+		req := &backends.ChatRequest{
+			Model: "llama3.2:8b",
+			Messages: []backends.ChatMessage{
+				{Role: "user", Content: "Hello"},
+			},
+		}
+
+		resp, err := adapter.Chat(context.Background(), req)
+		if err != nil {
+			t.Fatalf("Chat() error = %v", err)
+		}
+
+		if !resp.Done {
+			t.Error("Chat() response.Done = false, want true")
+		}
+
+		if resp.Message == nil || resp.Message.Content != "Hello! How can I help you?" {
+			t.Errorf("Chat() response content unexpected: %+v", resp.Message)
+		}
+	})
+}
+
+func TestAdapter_StreamChat(t *testing.T) {
+	t.Run("streaming chat", func(t *testing.T) {
+		server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			if r.URL.Path == "/api/chat" && r.Method == "POST" {
+				var req map[string]interface{}
+				json.NewDecoder(r.Body).Decode(&req)
+
+				// Check stream is true
+				if stream, ok := req["stream"].(bool); ok && !stream {
+					t.Error("Expected stream=true for streaming chat")
+				}
+
+				w.Header().Set("Content-Type", "application/x-ndjson")
+				flusher := w.(http.Flusher)
+
+				// Send streaming chunks
+				chunks := []map[string]interface{}{
+					{"model": "llama3.2:8b", "message": map[string]interface{}{"role": "assistant", "content": "Hello"}, "done": false},
+					{"model": "llama3.2:8b", "message": map[string]interface{}{"role": "assistant", "content": "!"}, "done": false},
+					{"model": "llama3.2:8b", "message": map[string]interface{}{"role": "assistant", "content": ""}, "done": true},
+				}
+
+				for _, chunk := range chunks {
+					data, _ := json.Marshal(chunk)
+					w.Write(append(data, '\n'))
+					flusher.Flush()
+				}
+			}
+		}))
+		defer server.Close()
+
+		adapter, _ := NewAdapter(backends.BackendConfig{
+			Type:    backends.BackendTypeOllama,
+			BaseURL: server.URL,
+		})
+
+		streaming := true
+		req := &backends.ChatRequest{
+			Model: "llama3.2:8b",
+			Messages: []backends.ChatMessage{
+				{Role: "user", Content: "Hello"},
+			},
+			Stream: &streaming,
+		}
+
+		chunkCh, err := adapter.StreamChat(context.Background(), req)
+		if err != nil {
+			t.Fatalf("StreamChat() error = %v", err)
+		}
+
+		var chunks []backends.ChatChunk
+		for chunk := range chunkCh {
+			chunks = append(chunks, chunk)
+		}
+
+		if len(chunks) != 3 {
+			t.Errorf("StreamChat() received %d chunks, want 3", len(chunks))
+		}
+
+		// Last chunk should be done
+		if !chunks[len(chunks)-1].Done {
+			t.Error("Last chunk should have Done=true")
+		}
+	})
+
+	t.Run("handles context cancellation", func(t *testing.T) {
+		server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			if r.URL.Path == "/api/chat" {
+				w.Header().Set("Content-Type", "application/x-ndjson")
+				flusher := w.(http.Flusher)
+
+				// Send first chunk then wait
+				chunk := map[string]interface{}{"model": "llama3.2:8b", "message": map[string]interface{}{"role": "assistant", "content": "Starting..."}, "done": false}
+				data, _ := json.Marshal(chunk)
+				w.Write(append(data, '\n'))
+				flusher.Flush()
+
+				// Wait long enough for context to be cancelled
+				time.Sleep(2 * time.Second)
+			}
+		}))
+		defer server.Close()
+
+		adapter, _ := NewAdapter(backends.BackendConfig{
+			Type:    backends.BackendTypeOllama,
+			BaseURL: server.URL,
+		})
+
+		ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
+		defer cancel()
+
+		streaming := true
+		req := &backends.ChatRequest{
+			Model: "llama3.2:8b",
+			Messages: []backends.ChatMessage{
+				{Role: "user", Content: "Hello"},
+			},
+			Stream: &streaming,
+		}
+
+		chunkCh, err := adapter.StreamChat(ctx, req)
+		if err != nil {
+			t.Fatalf("StreamChat() error = %v", err)
+		}
+
+		// Should receive at least one chunk before timeout
+		receivedChunks := 0
+		for range chunkCh {
+			receivedChunks++
+		}
+
+		if receivedChunks == 0 {
+			t.Error("Expected to receive at least one chunk before cancellation")
+		}
+	})
+}
+
+func TestAdapter_Info(t *testing.T) {
+	t.Run("connected server", func(t *testing.T) {
+		server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			if r.URL.Path == "/" || r.URL.Path == "/api/version" {
+				json.NewEncoder(w).Encode(map[string]string{"version": "0.3.0"})
+			}
+		}))
+		defer server.Close()
+
+		adapter, _ := NewAdapter(backends.BackendConfig{
+			Type:    backends.BackendTypeOllama,
+			BaseURL: server.URL,
+		})
+
+		info := adapter.Info(context.Background())
+
+		if info.Type != backends.BackendTypeOllama {
+			t.Errorf("Info().Type = %v, want %v", info.Type, backends.BackendTypeOllama)
+		}
+
+		if info.Status != backends.BackendStatusConnected {
+			t.Errorf("Info().Status = %v, want %v", info.Status, backends.BackendStatusConnected)
+		}
+
+		if info.Version != "0.3.0" {
+			t.Errorf("Info().Version = %v, want %v", info.Version, "0.3.0")
+		}
+	})
+
+	t.Run("disconnected server", func(t *testing.T) {
+		adapter, _ := NewAdapter(backends.BackendConfig{
+			Type:    backends.BackendTypeOllama,
+			BaseURL: "http://localhost:19999",
+		})
+
+		ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
+		defer cancel()
+
+		info := adapter.Info(ctx)
+
+		if info.Status != backends.BackendStatusDisconnected {
+			t.Errorf("Info().Status = %v, want %v", info.Status, backends.BackendStatusDisconnected)
+		}
+
+		if info.Error == "" {
+			t.Error("Info().Error should be set for disconnected server")
+		}
+	})
+}
+
+func TestAdapter_ShowModel(t *testing.T) {
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.URL.Path == "/api/show" && r.Method == "POST" {
+			var req map[string]string
+			json.NewDecoder(r.Body).Decode(&req)
+
+			resp := map[string]interface{}{
+				"modelfile": "FROM llama3.2:8b\nSYSTEM You are helpful.",
+				"template":  "{{ .Prompt }}",
+				"system":    "You are helpful.",
+				"details": map[string]interface{}{
+					"family":             "llama",
+					"parameter_size":     "8B",
+					"quantization_level": "Q4_K_M",
+				},
+			}
+			json.NewEncoder(w).Encode(resp)
+		}
+	}))
+	defer server.Close()
+
+	adapter, _ := NewAdapter(backends.BackendConfig{
+		Type:    backends.BackendTypeOllama,
+		BaseURL: server.URL,
+	})
+
+	details, err := adapter.ShowModel(context.Background(), "llama3.2:8b")
+	if err != nil {
+		t.Fatalf("ShowModel() error = %v", err)
+	}
+
+	if details.Family != "llama" {
+		t.Errorf("ShowModel().Family = %q, want %q", details.Family, "llama")
+	}
+
+	if details.System != "You are helpful." {
+		t.Errorf("ShowModel().System = %q, want %q", details.System, "You are helpful.")
+	}
+}
+
+func TestAdapter_DeleteModel(t *testing.T) {
+	deleted := false
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.URL.Path == "/api/delete" && r.Method == "DELETE" {
+			deleted = true
+			w.WriteHeader(http.StatusOK)
+		}
+	}))
+	defer server.Close()
+
+	adapter, _ := NewAdapter(backends.BackendConfig{
+		Type:    backends.BackendTypeOllama,
+		BaseURL: server.URL,
+	})
+
+	err := adapter.DeleteModel(context.Background(), "test-model")
+	if err != nil {
+		t.Fatalf("DeleteModel() error = %v", err)
+	}
+
+	if !deleted {
+		t.Error("DeleteModel() did not call the delete endpoint")
+	}
+}
+
+func TestAdapter_CopyModel(t *testing.T) {
+	copied := false
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.URL.Path == "/api/copy" && r.Method == "POST" {
+			var req map[string]string
+			json.NewDecoder(r.Body).Decode(&req)
+
+			if req["source"] == "source-model" && req["destination"] == "dest-model" {
+				copied = true
+			}
+			w.WriteHeader(http.StatusOK)
+		}
+	}))
+	defer server.Close()
+
+	adapter, _ := NewAdapter(backends.BackendConfig{
+		Type:    backends.BackendTypeOllama,
+		BaseURL: server.URL,
+	})
+
+	err := adapter.CopyModel(context.Background(), "source-model", "dest-model")
+	if err != nil {
+		t.Fatalf("CopyModel() error = %v", err)
+	}
+
+	if !copied {
+		t.Error("CopyModel() did not call the copy endpoint with correct params")
+	}
+}
+
+func TestAdapter_Embed(t *testing.T) {
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.URL.Path == "/api/embed" && r.Method == "POST" {
+			resp := map[string]interface{}{
+				"embeddings": [][]float64{
+					{0.1, 0.2, 0.3},
+					{0.4, 0.5, 0.6},
+				},
+			}
+			json.NewEncoder(w).Encode(resp)
+		}
+	}))
+	defer server.Close()
+
+	adapter, _ := NewAdapter(backends.BackendConfig{
+		Type:    backends.BackendTypeOllama,
+		BaseURL: server.URL,
+	})
+
+	embeddings, err := adapter.Embed(context.Background(), "nomic-embed-text", []string{"hello", "world"})
+	if err != nil {
+		t.Fatalf("Embed() error = %v", err)
+	}
+
+	if len(embeddings) != 2 {
+		t.Errorf("Embed() returned %d embeddings, want 2", len(embeddings))
+	}
+
+	if len(embeddings[0]) != 3 {
+		t.Errorf("First embedding has %d dimensions, want 3", len(embeddings[0]))
+	}
+}
+
+func TestNewAdapter_Validation(t *testing.T) {
+	t.Run("invalid URL", func(t *testing.T) {
+		_, err := NewAdapter(backends.BackendConfig{
+			Type:    backends.BackendTypeOllama,
+			BaseURL: "not-a-url",
+		})
+		if err == nil {
+			t.Error("NewAdapter() should fail with invalid URL")
+		}
+	})
+
+	t.Run("wrong backend type", func(t *testing.T) {
+		_, err := NewAdapter(backends.BackendConfig{
+			Type:    backends.BackendTypeLlamaCpp,
+			BaseURL: "http://localhost:11434",
+		})
+		if err == nil {
+			t.Error("NewAdapter() should fail with wrong backend type")
+		}
+	})
+
+	t.Run("valid config", func(t *testing.T) {
+		adapter, err := NewAdapter(backends.BackendConfig{
+			Type:    backends.BackendTypeOllama,
+			BaseURL: "http://localhost:11434",
+		})
+		if err != nil {
+			t.Errorf("NewAdapter() error = %v", err)
+		}
+		if adapter == nil {
+			t.Error("NewAdapter() returned nil adapter")
+		}
+	})
+}
diff --git a/backend/internal/backends/openai/adapter.go b/backend/internal/backends/openai/adapter.go
new file mode 100644
index 0000000..a2908eb
--- /dev/null
+++ b/backend/internal/backends/openai/adapter.go
@@ -0,0 +1,503 @@
+package openai
+
+import (
+	"bufio"
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"net/url"
+	"strings"
+	"time"
+
+	"vessel-backend/internal/backends"
+)
+
+// Adapter implements the LLMBackend interface for OpenAI-compatible APIs.
+// This includes llama.cpp server and LM Studio.
+type Adapter struct {
+	config     backends.BackendConfig
+	httpClient *http.Client
+	baseURL    *url.URL
+}
+
+// Ensure Adapter implements required interfaces
+var (
+	_ backends.LLMBackend        = (*Adapter)(nil)
+	_ backends.EmbeddingProvider = (*Adapter)(nil)
+)
+
+// NewAdapter creates a new OpenAI-compatible backend adapter
+func NewAdapter(config backends.BackendConfig) (*Adapter, error) {
+	if config.Type != backends.BackendTypeLlamaCpp && config.Type != backends.BackendTypeLMStudio {
+		return nil, fmt.Errorf("invalid backend type: expected %s or %s, got %s",
+			backends.BackendTypeLlamaCpp, backends.BackendTypeLMStudio, config.Type)
+	}
+
+	if err := config.Validate(); err != nil {
+		return nil, fmt.Errorf("invalid config: %w", err)
+	}
+
+	baseURL, err := url.Parse(config.BaseURL)
+	if err != nil {
+		return nil, fmt.Errorf("invalid base URL: %w", err)
+	}
+
+	return &Adapter{
+		config:  config,
+		baseURL: baseURL,
+		httpClient: &http.Client{
+			Timeout: 30 * time.Second,
+		},
+	}, nil
+}
+
+// Type returns the backend type
+func (a *Adapter) Type() backends.BackendType {
+	return a.config.Type
+}
+
+// Config returns the backend configuration
+func (a *Adapter) Config() backends.BackendConfig {
+	return a.config
+}
+
+// Capabilities returns what features this backend supports
+func (a *Adapter) Capabilities() backends.BackendCapabilities {
+	if a.config.Type == backends.BackendTypeLlamaCpp {
+		return backends.LlamaCppCapabilities()
+	}
+	return backends.LMStudioCapabilities()
+}
+
+// HealthCheck verifies the backend is reachable
+func (a *Adapter) HealthCheck(ctx context.Context) error {
+	req, err := http.NewRequestWithContext(ctx, "GET", a.baseURL.String()+"/v1/models", nil)
+	if err != nil {
+		return fmt.Errorf("failed to create request: %w", err)
+	}
+
+	resp, err := a.httpClient.Do(req)
+	if err != nil {
+		return fmt.Errorf("failed to reach backend: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		return fmt.Errorf("backend returned status %d", resp.StatusCode)
+	}
+
+	return nil
+}
+
+// openaiModelsResponse represents the response from /v1/models
+type openaiModelsResponse struct {
+	Data []openaiModel `json:"data"`
+}
+
+type openaiModel struct {
+	ID      string `json:"id"`
+	Object  string `json:"object"`
+	OwnedBy string `json:"owned_by"`
+	Created int64  `json:"created"`
+}
+
+// ListModels returns all models available from this backend
+func (a *Adapter) ListModels(ctx context.Context) ([]backends.Model, error) {
+	req, err := http.NewRequestWithContext(ctx, "GET", a.baseURL.String()+"/v1/models", nil)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create request: %w", err)
+	}
+
+	resp, err := a.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("failed to list models: %w", err)
+	}
+	defer resp.Body.Close()
+
+	var listResp openaiModelsResponse
+	if err := json.NewDecoder(resp.Body).Decode(&listResp); err != nil {
+		return nil, fmt.Errorf("failed to decode response: %w", err)
+	}
+
+	models := make([]backends.Model, len(listResp.Data))
+	for i, m := range listResp.Data {
+		models[i] = backends.Model{
+			ID:   m.ID,
+			Name: m.ID,
+		}
+	}
+
+	return models, nil
+}
+
+// Chat sends a non-streaming chat request
+func (a *Adapter) Chat(ctx context.Context, req *backends.ChatRequest) (*backends.ChatChunk, error) {
+	if err := req.Validate(); err != nil {
+		return nil, fmt.Errorf("invalid request: %w", err)
+	}
+
+	openaiReq := a.convertChatRequest(req)
+	openaiReq["stream"] = false
+
+	body, err := json.Marshal(openaiReq)
+	if err != nil {
+		return nil, fmt.Errorf("failed to marshal request: %w", err)
+	}
+
+	httpReq, err := http.NewRequestWithContext(ctx, "POST", a.baseURL.String()+"/v1/chat/completions", bytes.NewReader(body))
+	if err != nil {
+		return nil, fmt.Errorf("failed to create request: %w", err)
+	}
+	httpReq.Header.Set("Content-Type", "application/json")
+
+	resp, err := a.httpClient.Do(httpReq)
+	if err != nil {
+		return nil, fmt.Errorf("chat request failed: %w", err)
+	}
+	defer resp.Body.Close()
+
+	var openaiResp openaiChatResponse
+	if err := json.NewDecoder(resp.Body).Decode(&openaiResp); err != nil {
+		return nil, fmt.Errorf("failed to decode response: %w", err)
+	}
+
+	return a.convertChatResponse(&openaiResp), nil
+}
+
+// StreamChat sends a streaming chat request
+func (a *Adapter) StreamChat(ctx context.Context, req *backends.ChatRequest) (<-chan backends.ChatChunk, error) {
+	if err := req.Validate(); err != nil {
+		return nil, fmt.Errorf("invalid request: %w", err)
+	}
+
+	openaiReq := a.convertChatRequest(req)
+	openaiReq["stream"] = true
+
+	body, err := json.Marshal(openaiReq)
+	if err != nil {
+		return nil, fmt.Errorf("failed to marshal request: %w", err)
+	}
+
+	httpReq, err := http.NewRequestWithContext(ctx, "POST", a.baseURL.String()+"/v1/chat/completions", bytes.NewReader(body))
+	if err != nil {
+		return nil, fmt.Errorf("failed to create request: %w", err)
+	}
+	httpReq.Header.Set("Content-Type", "application/json")
+	httpReq.Header.Set("Accept", "text/event-stream")
+
+	// Use a client without timeout for streaming
+	client := &http.Client{}
+	resp, err := client.Do(httpReq)
+	if err != nil {
+		return nil, fmt.Errorf("chat request failed: %w", err)
+	}
+
+	chunkCh := make(chan backends.ChatChunk)
+
+	go func() {
+		defer close(chunkCh)
+		defer resp.Body.Close()
+
+		a.parseSSEStream(ctx, resp.Body, chunkCh)
+	}()
+
+	return chunkCh, nil
+}
+
+// parseSSEStream parses Server-Sent Events and emits ChatChunks
+func (a *Adapter) parseSSEStream(ctx context.Context, body io.Reader, chunkCh chan<- backends.ChatChunk) {
+	scanner := bufio.NewScanner(body)
+
+	// Track accumulated tool call arguments
+	toolCallArgs := make(map[int]string)
+
+	for scanner.Scan() {
+		select {
+		case <-ctx.Done():
+			return
+		default:
+		}
+
+		line := scanner.Text()
+
+		// Skip empty lines and comments
+		if line == "" || strings.HasPrefix(line, ":") {
+			continue
+		}
+
+		// Parse SSE data line
+		if !strings.HasPrefix(line, "data: ") {
+			continue
+		}
+
+		data := strings.TrimPrefix(line, "data: ")
+
+		// Check for stream end
+		if data == "[DONE]" {
+			chunkCh <- backends.ChatChunk{Done: true}
+			return
+		}
+
+		var streamResp openaiStreamResponse
+		if err := json.Unmarshal([]byte(data), &streamResp); err != nil {
+			chunkCh <- backends.ChatChunk{Error: fmt.Sprintf("failed to parse SSE data: %v", err)}
+			continue
+		}
+
+		chunk := a.convertStreamResponse(&streamResp, toolCallArgs)
+		chunkCh <- chunk
+
+		if chunk.Done {
+			return
+		}
+	}
+
+	if err := scanner.Err(); err != nil && ctx.Err() == nil {
+		chunkCh <- backends.ChatChunk{Error: fmt.Sprintf("stream error: %v", err)}
+	}
+}
+
+// Info returns detailed information about the backend
+func (a *Adapter) Info(ctx context.Context) backends.BackendInfo {
+	info := backends.BackendInfo{
+		Type:         a.config.Type,
+		BaseURL:      a.config.BaseURL,
+		Capabilities: a.Capabilities(),
+	}
+
+	// Try to reach the models endpoint
+	if err := a.HealthCheck(ctx); err != nil {
+		info.Status = backends.BackendStatusDisconnected
+		info.Error = err.Error()
+		return info
+	}
+
+	info.Status = backends.BackendStatusConnected
+	return info
+}
+
+// Embed generates embeddings for the given input
+func (a *Adapter) Embed(ctx context.Context, model string, input []string) ([][]float64, error) {
+	body, err := json.Marshal(map[string]interface{}{
+		"model": model,
+		"input": input,
+	})
+	if err != nil {
+		return nil, fmt.Errorf("failed to marshal request: %w", err)
+	}
+
+	req, err := http.NewRequestWithContext(ctx, "POST", a.baseURL.String()+"/v1/embeddings", bytes.NewReader(body))
+	if err != nil {
+		return nil, fmt.Errorf("failed to create request: %w", err)
+	}
+	req.Header.Set("Content-Type", "application/json")
+
+	resp, err := a.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("embed request failed: %w", err)
+	}
+	defer resp.Body.Close()
+
+	var embedResp struct {
+		Data []struct {
+			Embedding []float64 `json:"embedding"`
+			Index     int       `json:"index"`
+		} `json:"data"`
+	}
+	if err := json.NewDecoder(resp.Body).Decode(&embedResp); err != nil {
+		return nil, fmt.Errorf("failed to decode response: %w", err)
+	}
+
+	embeddings := make([][]float64, len(embedResp.Data))
+	for _, d := range embedResp.Data {
+		embeddings[d.Index] = d.Embedding
+	}
+
+	return embeddings, nil
+}
+
+// OpenAI API response types
+
+type openaiChatResponse struct {
+	ID      string         `json:"id"`
+	Object  string         `json:"object"`
+	Created int64          `json:"created"`
+	Model   string         `json:"model"`
+	Choices []openaiChoice `json:"choices"`
+	Usage   *openaiUsage   `json:"usage,omitempty"`
+}
+
+type openaiChoice struct {
+	Index        int            `json:"index"`
+	Message      *openaiMessage `json:"message,omitempty"`
+	Delta        *openaiMessage `json:"delta,omitempty"`
+	FinishReason string         `json:"finish_reason,omitempty"`
+}
+
+type openaiMessage struct {
+	Role      string           `json:"role,omitempty"`
+	Content   string           `json:"content,omitempty"`
+	ToolCalls []openaiToolCall `json:"tool_calls,omitempty"`
+}
+
+type openaiToolCall struct {
+	ID       string `json:"id,omitempty"`
+	Index    int    `json:"index,omitempty"`
+	Type     string `json:"type,omitempty"`
+	Function struct {
+		Name      string `json:"name,omitempty"`
+		Arguments string `json:"arguments,omitempty"`
+	} `json:"function"`
+}
+
+type openaiUsage struct {
+	PromptTokens     int `json:"prompt_tokens"`
+	CompletionTokens int `json:"completion_tokens"`
+	TotalTokens      int `json:"total_tokens"`
+}
+
+type openaiStreamResponse struct {
+	ID      string         `json:"id"`
+	Object  string         `json:"object"`
+	Created int64          `json:"created"`
+	Model   string         `json:"model"`
+	Choices []openaiChoice `json:"choices"`
+}
+
+// convertChatRequest converts a backends.ChatRequest to OpenAI format
+func (a *Adapter) convertChatRequest(req *backends.ChatRequest) map[string]interface{} {
+	messages := make([]map[string]interface{}, len(req.Messages))
+	for i, msg := range req.Messages {
+		m := map[string]interface{}{
+			"role":    msg.Role,
+			"content": msg.Content,
+		}
+		if msg.Name != "" {
+			m["name"] = msg.Name
+		}
+		if msg.ToolCallID != "" {
+			m["tool_call_id"] = msg.ToolCallID
+		}
+		messages[i] = m
+	}
+
+	openaiReq := map[string]interface{}{
+		"model":    req.Model,
+		"messages": messages,
+	}
+
+	// Add optional parameters
+	if req.Temperature != nil {
+		openaiReq["temperature"] = *req.Temperature
+	}
+	if req.TopP != nil {
+		openaiReq["top_p"] = *req.TopP
+	}
+	if req.MaxTokens != nil {
+		openaiReq["max_tokens"] = *req.MaxTokens
+	}
+	if len(req.Tools) > 0 {
+		openaiReq["tools"] = req.Tools
+	}
+
+	return openaiReq
+}
+
+// convertChatResponse converts an OpenAI response to backends.ChatChunk
+func (a *Adapter) convertChatResponse(resp *openaiChatResponse) *backends.ChatChunk {
+	chunk := &backends.ChatChunk{
+		Model: resp.Model,
+		Done:  true,
+	}
+
+	if len(resp.Choices) > 0 {
+		choice := resp.Choices[0]
+		if choice.Message != nil {
+			msg := &backends.ChatMessage{
+				Role:    choice.Message.Role,
+				Content: choice.Message.Content,
+			}
+
+			// Convert tool calls
+			for _, tc := range choice.Message.ToolCalls {
+				msg.ToolCalls = append(msg.ToolCalls, backends.ToolCall{
+					ID:   tc.ID,
+					Type: tc.Type,
+					Function: struct {
+						Name      string `json:"name"`
+						Arguments string `json:"arguments"`
+					}{
+						Name:      tc.Function.Name,
+						Arguments: tc.Function.Arguments,
+					},
+				})
+			}
+
+			chunk.Message = msg
+		}
+
+		if choice.FinishReason != "" {
+			chunk.DoneReason = choice.FinishReason
+		}
+	}
+
+	if resp.Usage != nil {
+		chunk.PromptEvalCount = resp.Usage.PromptTokens
+		chunk.EvalCount = resp.Usage.CompletionTokens
+	}
+
+	return chunk
+}
+
+// convertStreamResponse converts an OpenAI stream response to backends.ChatChunk
+func (a *Adapter) convertStreamResponse(resp *openaiStreamResponse, toolCallArgs map[int]string) backends.ChatChunk {
+	chunk := backends.ChatChunk{
+		Model: resp.Model,
+	}
+
+	if len(resp.Choices) > 0 {
+		choice := resp.Choices[0]
+
+		if choice.FinishReason != "" {
+			chunk.Done = true
+			chunk.DoneReason = choice.FinishReason
+		}
+
+		if choice.Delta != nil {
+			msg := &backends.ChatMessage{
+				Role:    choice.Delta.Role,
+				Content: choice.Delta.Content,
+			}
+
+			// Handle streaming tool calls
+			for _, tc := range choice.Delta.ToolCalls {
+				// Accumulate arguments
+				if tc.Function.Arguments != "" {
+					toolCallArgs[tc.Index] += tc.Function.Arguments
+				}
+
+				// Only add tool call when we have the initial info
+				if tc.ID != "" || tc.Function.Name != "" {
+					msg.ToolCalls = append(msg.ToolCalls, backends.ToolCall{
+						ID:   tc.ID,
+						Type: tc.Type,
+						Function: struct {
+							Name      string `json:"name"`
+							Arguments string `json:"arguments"`
+						}{
+							Name:      tc.Function.Name,
+							Arguments: toolCallArgs[tc.Index],
+						},
+					})
+				}
+			}
+
+			chunk.Message = msg
+		}
+	}
+
+	return chunk
+}
diff --git a/backend/internal/backends/openai/adapter_test.go b/backend/internal/backends/openai/adapter_test.go
new file mode 100644
index 0000000..4b70dc3
--- /dev/null
+++ b/backend/internal/backends/openai/adapter_test.go
@@ -0,0 +1,594 @@
+package openai
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+	"time"
+
+	"vessel-backend/internal/backends"
+)
+
+func TestAdapter_Type(t *testing.T) {
+	tests := []struct {
+		name         string
+		backendType  backends.BackendType
+		expectedType backends.BackendType
+	}{
+		{"llamacpp type", backends.BackendTypeLlamaCpp, backends.BackendTypeLlamaCpp},
+		{"lmstudio type", backends.BackendTypeLMStudio, backends.BackendTypeLMStudio},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			adapter, _ := NewAdapter(backends.BackendConfig{
+				Type:    tt.backendType,
+				BaseURL: "http://localhost:8081",
+			})
+
+			if adapter.Type() != tt.expectedType {
+				t.Errorf("Type() = %v, want %v", adapter.Type(), tt.expectedType)
+			}
+		})
+	}
+}
+
+func TestAdapter_Config(t *testing.T) {
+	cfg := backends.BackendConfig{
+		Type:    backends.BackendTypeLlamaCpp,
+		BaseURL: "http://localhost:8081",
+		Enabled: true,
+	}
+
+	adapter, _ := NewAdapter(cfg)
+	got := adapter.Config()
+
+	if got.Type != cfg.Type {
+		t.Errorf("Config().Type = %v, want %v", got.Type, cfg.Type)
+	}
+	if got.BaseURL != cfg.BaseURL {
+		t.Errorf("Config().BaseURL = %v, want %v", got.BaseURL, cfg.BaseURL)
+	}
+}
+
+func TestAdapter_Capabilities(t *testing.T) {
+	t.Run("llamacpp capabilities", func(t *testing.T) {
+		adapter, _ := NewAdapter(backends.BackendConfig{
+			Type:    backends.BackendTypeLlamaCpp,
+			BaseURL: "http://localhost:8081",
+		})
+
+		caps := adapter.Capabilities()
+
+		if !caps.CanListModels {
+			t.Error("llama.cpp adapter should support listing models")
+		}
+		if caps.CanPullModels {
+			t.Error("llama.cpp adapter should NOT support pulling models")
+		}
+		if caps.CanDeleteModels {
+			t.Error("llama.cpp adapter should NOT support deleting models")
+		}
+		if caps.CanCreateModels {
+			t.Error("llama.cpp adapter should NOT support creating models")
+		}
+		if !caps.CanStreamChat {
+			t.Error("llama.cpp adapter should support streaming chat")
+		}
+		if !caps.CanEmbed {
+			t.Error("llama.cpp adapter should support embeddings")
+		}
+	})
+
+	t.Run("lmstudio capabilities", func(t *testing.T) {
+		adapter, _ := NewAdapter(backends.BackendConfig{
+			Type:    backends.BackendTypeLMStudio,
+			BaseURL: "http://localhost:1234",
+		})
+
+		caps := adapter.Capabilities()
+
+		if !caps.CanListModels {
+			t.Error("LM Studio adapter should support listing models")
+		}
+		if caps.CanPullModels {
+			t.Error("LM Studio adapter should NOT support pulling models")
+		}
+	})
+}
+
+func TestAdapter_HealthCheck(t *testing.T) {
+	t.Run("healthy server", func(t *testing.T) {
+		server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			if r.URL.Path == "/v1/models" {
+				json.NewEncoder(w).Encode(map[string]interface{}{
+					"data": []map[string]string{{"id": "llama3.2:8b"}},
+				})
+			}
+		}))
+		defer server.Close()
+
+		adapter, err := NewAdapter(backends.BackendConfig{
+			Type:    backends.BackendTypeLlamaCpp,
+			BaseURL: server.URL,
+		})
+		if err != nil {
+			t.Fatalf("Failed to create adapter: %v", err)
+		}
+
+		ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+		defer cancel()
+
+		if err := adapter.HealthCheck(ctx); err != nil {
+			t.Errorf("HealthCheck() error = %v, want nil", err)
+		}
+	})
+
+	t.Run("unreachable server", func(t *testing.T) {
+		adapter, _ := NewAdapter(backends.BackendConfig{
+			Type:    backends.BackendTypeLlamaCpp,
+			BaseURL: "http://localhost:19999",
+		})
+
+		ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
+		defer cancel()
+
+		if err := adapter.HealthCheck(ctx); err == nil {
+			t.Error("HealthCheck() expected error for unreachable server")
+		}
+	})
+}
+
+func TestAdapter_ListModels(t *testing.T) {
+	t.Run("returns model list", func(t *testing.T) {
+		server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			if r.URL.Path == "/v1/models" {
+				resp := map[string]interface{}{
+					"data": []map[string]interface{}{
+						{
+							"id":       "llama3.2-8b-instruct",
+							"object":   "model",
+							"owned_by": "local",
+							"created":  1700000000,
+						},
+						{
+							"id":       "mistral-7b-v0.2",
+							"object":   "model",
+							"owned_by": "local",
+							"created":  1700000001,
+						},
+					},
+				}
+				json.NewEncoder(w).Encode(resp)
+			}
+		}))
+		defer server.Close()
+
+		adapter, _ := NewAdapter(backends.BackendConfig{
+			Type:    backends.BackendTypeLlamaCpp,
+			BaseURL: server.URL,
+		})
+
+		ctx := context.Background()
+		models, err := adapter.ListModels(ctx)
+		if err != nil {
+			t.Fatalf("ListModels() error = %v", err)
+		}
+
+		if len(models) != 2 {
+			t.Errorf("ListModels() returned %d models, want 2", len(models))
+		}
+
+		if models[0].ID != "llama3.2-8b-instruct" {
+			t.Errorf("First model ID = %q, want %q", models[0].ID, "llama3.2-8b-instruct")
+		}
+	})
+
+	t.Run("handles empty model list", func(t *testing.T) {
+		server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			if r.URL.Path == "/v1/models" {
+				resp := map[string]interface{}{
+					"data": []map[string]interface{}{},
+				}
+				json.NewEncoder(w).Encode(resp)
+			}
+		}))
+		defer server.Close()
+
+		adapter, _ := NewAdapter(backends.BackendConfig{
+			Type:    backends.BackendTypeLlamaCpp,
+			BaseURL: server.URL,
+		})
+
+		models, err := adapter.ListModels(context.Background())
+		if err != nil {
+			t.Fatalf("ListModels() error = %v", err)
+		}
+
+		if len(models) != 0 {
+			t.Errorf("ListModels() returned %d models, want 0", len(models))
+		}
+	})
+}
+
+func TestAdapter_Chat(t *testing.T) {
+	t.Run("non-streaming chat", func(t *testing.T) {
+		server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			if r.URL.Path == "/v1/chat/completions" && r.Method == "POST" {
+				var req map[string]interface{}
+				json.NewDecoder(r.Body).Decode(&req)
+
+				// Check stream is false
+				if stream, ok := req["stream"].(bool); ok && stream {
+					t.Error("Expected stream=false for non-streaming chat")
+				}
+
+				resp := map[string]interface{}{
+					"id":      "chatcmpl-123",
+					"object":  "chat.completion",
+					"created": 1700000000,
+					"model":   "llama3.2:8b",
+					"choices": []map[string]interface{}{
+						{
+							"index": 0,
+							"message": map[string]interface{}{
+								"role":    "assistant",
+								"content": "Hello! How can I help you?",
+							},
+							"finish_reason": "stop",
+						},
+					},
+					"usage": map[string]int{
+						"prompt_tokens":     10,
+						"completion_tokens": 8,
+						"total_tokens":      18,
+					},
+				}
+				json.NewEncoder(w).Encode(resp)
+			}
+		}))
+		defer server.Close()
+
+		adapter, _ := NewAdapter(backends.BackendConfig{
+			Type:    backends.BackendTypeLlamaCpp,
+			BaseURL: server.URL,
+		})
+
+		req := &backends.ChatRequest{
+			Model: "llama3.2:8b",
+			Messages: []backends.ChatMessage{
+				{Role: "user", Content: "Hello"},
+			},
+		}
+
+		resp, err := adapter.Chat(context.Background(), req)
+		if err != nil {
+			t.Fatalf("Chat() error = %v", err)
+		}
+
+		if !resp.Done {
+			t.Error("Chat() response.Done = false, want true")
+		}
+
+		if resp.Message == nil || resp.Message.Content != "Hello! How can I help you?" {
+			t.Errorf("Chat() response content unexpected: %+v", resp.Message)
+		}
+	})
+}
+
+func TestAdapter_StreamChat(t *testing.T) {
+	t.Run("streaming chat with SSE", func(t *testing.T) {
+		server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			if r.URL.Path == "/v1/chat/completions" && r.Method == "POST" {
+				var req map[string]interface{}
+				json.NewDecoder(r.Body).Decode(&req)
+
+				// Check stream is true
+				if stream, ok := req["stream"].(bool); !ok || !stream {
+					t.Error("Expected stream=true for streaming chat")
+				}
+
+				w.Header().Set("Content-Type", "text/event-stream")
+				w.Header().Set("Cache-Control", "no-cache")
+				flusher := w.(http.Flusher)
+
+				// Send SSE chunks
+				chunks := []string{
+					`{"id":"chatcmpl-1","choices":[{"delta":{"role":"assistant","content":"Hello"}}]}`,
+					`{"id":"chatcmpl-1","choices":[{"delta":{"content":"!"}}]}`,
+					`{"id":"chatcmpl-1","choices":[{"delta":{},"finish_reason":"stop"}]}`,
+				}
+
+				for _, chunk := range chunks {
+					fmt.Fprintf(w, "data: %s\n\n", chunk)
+					flusher.Flush()
+				}
+				fmt.Fprintf(w, "data: [DONE]\n\n")
+				flusher.Flush()
+			}
+		}))
+		defer server.Close()
+
+		adapter, _ := NewAdapter(backends.BackendConfig{
+			Type:    backends.BackendTypeLlamaCpp,
+			BaseURL: server.URL,
+		})
+
+		streaming := true
+		req := &backends.ChatRequest{
+			Model: "llama3.2:8b",
+			Messages: []backends.ChatMessage{
+				{Role: "user", Content: "Hello"},
+			},
+			Stream: &streaming,
+		}
+
+		chunkCh, err := adapter.StreamChat(context.Background(), req)
+		if err != nil {
+			t.Fatalf("StreamChat() error = %v", err)
+		}
+
+		var chunks []backends.ChatChunk
+		for chunk := range chunkCh {
+			chunks = append(chunks, chunk)
+		}
+
+		if len(chunks) < 2 {
+			t.Errorf("StreamChat() received %d chunks, want at least 2", len(chunks))
+		}
+
+		// Last chunk should be done
+		if !chunks[len(chunks)-1].Done {
+			t.Error("Last chunk should have Done=true")
+		}
+	})
+
+	t.Run("handles context cancellation", func(t *testing.T) {
+		server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			if r.URL.Path == "/v1/chat/completions" {
+				w.Header().Set("Content-Type", "text/event-stream")
+				flusher := w.(http.Flusher)
+
+				// Send first chunk then wait
+				fmt.Fprintf(w, "data: %s\n\n", `{"id":"chatcmpl-1","choices":[{"delta":{"role":"assistant","content":"Starting..."}}]}`)
+				flusher.Flush()
+
+				// Wait long enough for context to be cancelled
+				time.Sleep(2 * time.Second)
+			}
+		}))
+		defer server.Close()
+
+		adapter, _ := NewAdapter(backends.BackendConfig{
+			Type:    backends.BackendTypeLlamaCpp,
+			BaseURL: server.URL,
+		})
+
+		ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
+		defer cancel()
+
+		streaming := true
+		req := &backends.ChatRequest{
+			Model: "llama3.2:8b",
+			Messages: []backends.ChatMessage{
+				{Role: "user", Content: "Hello"},
+			},
+			Stream: &streaming,
+		}
+
+		chunkCh, err := adapter.StreamChat(ctx, req)
+		if err != nil {
+			t.Fatalf("StreamChat() error = %v", err)
+		}
+
+		// Should receive at least one chunk before timeout
+		receivedChunks := 0
+		for range chunkCh {
+			receivedChunks++
+		}
+
+		if receivedChunks == 0 {
+			t.Error("Expected to receive at least one chunk before cancellation")
+		}
+	})
+}
+
+func TestAdapter_Info(t *testing.T) {
+	t.Run("connected server", func(t *testing.T) {
+		server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			if r.URL.Path == "/v1/models" {
+				json.NewEncoder(w).Encode(map[string]interface{}{
+					"data": []map[string]string{{"id": "llama3.2:8b"}},
+				})
+			}
+		}))
+		defer server.Close()
+
+		adapter, _ := NewAdapter(backends.BackendConfig{
+			Type:    backends.BackendTypeLlamaCpp,
+			BaseURL: server.URL,
+		})
+
+		info := adapter.Info(context.Background())
+
+		if info.Type != backends.BackendTypeLlamaCpp {
+			t.Errorf("Info().Type = %v, want %v", info.Type, backends.BackendTypeLlamaCpp)
+		}
+
+		if info.Status != backends.BackendStatusConnected {
+			t.Errorf("Info().Status = %v, want %v", info.Status, backends.BackendStatusConnected)
+		}
+	})
+
+	t.Run("disconnected server", func(t *testing.T) {
+		adapter, _ := NewAdapter(backends.BackendConfig{
+			Type:    backends.BackendTypeLlamaCpp,
+			BaseURL: "http://localhost:19999",
+		})
+
+		ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
+		defer cancel()
+
+		info := adapter.Info(ctx)
+
+		if info.Status != backends.BackendStatusDisconnected {
+			t.Errorf("Info().Status = %v, want %v", info.Status, backends.BackendStatusDisconnected)
+		}
+
+		if info.Error == "" {
+			t.Error("Info().Error should be set for disconnected server")
+		}
+	})
+}
+
+func TestAdapter_Embed(t *testing.T) {
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.URL.Path == "/v1/embeddings" && r.Method == "POST" {
+			resp := map[string]interface{}{
+				"data": []map[string]interface{}{
+					{"embedding": []float64{0.1, 0.2, 0.3}, "index": 0},
+					{"embedding": []float64{0.4, 0.5, 0.6}, "index": 1},
+				},
+			}
+			json.NewEncoder(w).Encode(resp)
+		}
+	}))
+	defer server.Close()
+
+	adapter, _ := NewAdapter(backends.BackendConfig{
+		Type:    backends.BackendTypeLlamaCpp,
+		BaseURL: server.URL,
+	})
+
+	embeddings, err := adapter.Embed(context.Background(), "nomic-embed-text", []string{"hello", "world"})
+	if err != nil {
+		t.Fatalf("Embed() error = %v", err)
+	}
+
+	if len(embeddings) != 2 {
+		t.Errorf("Embed() returned %d embeddings, want 2", len(embeddings))
+	}
+
+	if len(embeddings[0]) != 3 {
+		t.Errorf("First embedding has %d dimensions, want 3", len(embeddings[0]))
+	}
+}
+
+func TestNewAdapter_Validation(t *testing.T) {
+	t.Run("invalid URL", func(t *testing.T) {
+		_, err := NewAdapter(backends.BackendConfig{
+			Type:    backends.BackendTypeLlamaCpp,
+			BaseURL: "not-a-url",
+		})
+		if err == nil {
+			t.Error("NewAdapter() should fail with invalid URL")
+		}
+	})
+
+	t.Run("wrong backend type", func(t *testing.T) {
+		_, err := NewAdapter(backends.BackendConfig{
+			Type:    backends.BackendTypeOllama,
+			BaseURL: "http://localhost:8081",
+		})
+		if err == nil {
+			t.Error("NewAdapter() should fail with Ollama backend type")
+		}
+	})
+
+	t.Run("valid llamacpp config", func(t *testing.T) {
+		adapter, err := NewAdapter(backends.BackendConfig{
+			Type:    backends.BackendTypeLlamaCpp,
+			BaseURL: "http://localhost:8081",
+		})
+		if err != nil {
+			t.Errorf("NewAdapter() error = %v", err)
+		}
+		if adapter == nil {
+			t.Error("NewAdapter() returned nil adapter")
+		}
+	})
+
+	t.Run("valid lmstudio config", func(t *testing.T) {
+		adapter, err := NewAdapter(backends.BackendConfig{
+			Type:    backends.BackendTypeLMStudio,
+			BaseURL: "http://localhost:1234",
+		})
+		if err != nil {
+			t.Errorf("NewAdapter() error = %v", err)
+		}
+		if adapter == nil {
+			t.Error("NewAdapter() returned nil adapter")
+		}
+	})
+}
+
+func TestAdapter_ToolCalls(t *testing.T) {
+	t.Run("streaming with tool calls", func(t *testing.T) {
+		server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			if r.URL.Path == "/v1/chat/completions" {
+				w.Header().Set("Content-Type", "text/event-stream")
+				flusher := w.(http.Flusher)
+
+				// Send tool call chunks
+				chunks := []string{
+					`{"id":"chatcmpl-1","choices":[{"delta":{"role":"assistant","tool_calls":[{"id":"call_1","type":"function","function":{"name":"get_weather","arguments":""}}]}}]}`,
+					`{"id":"chatcmpl-1","choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\"location\":"}}]}}]}`,
+					`{"id":"chatcmpl-1","choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"Tokyo\"}"}}]}}]}`,
+					`{"id":"chatcmpl-1","choices":[{"delta":{},"finish_reason":"tool_calls"}]}`,
+				}
+
+				for _, chunk := range chunks {
+					fmt.Fprintf(w, "data: %s\n\n", chunk)
+					flusher.Flush()
+				}
+				fmt.Fprintf(w, "data: [DONE]\n\n")
+				flusher.Flush()
+			}
+		}))
+		defer server.Close()
+
+		adapter, _ := NewAdapter(backends.BackendConfig{
+			Type:    backends.BackendTypeLlamaCpp,
+			BaseURL: server.URL,
+		})
+
+		streaming := true
+		req := &backends.ChatRequest{
+			Model: "llama3.2:8b",
+			Messages: []backends.ChatMessage{
+				{Role: "user", Content: "What's the weather in Tokyo?"},
+			},
+			Stream: &streaming,
+			Tools: []backends.Tool{
+				{
+					Type: "function",
+					Function: struct {
+						Name        string                 `json:"name"`
+						Description string                 `json:"description"`
+						Parameters  map[string]interface{} `json:"parameters"`
+					}{
+						Name:        "get_weather",
+						Description: "Get weather for a location",
+					},
+				},
+			},
+		}
+
+		chunkCh, err := adapter.StreamChat(context.Background(), req)
+		if err != nil {
+			t.Fatalf("StreamChat() error = %v", err)
+		}
+
+		var lastChunk backends.ChatChunk
+		for chunk := range chunkCh {
+			lastChunk = chunk
+		}
+
+		if !lastChunk.Done {
+			t.Error("Last chunk should have Done=true")
+		}
+	})
+}
diff --git a/backend/internal/backends/registry.go b/backend/internal/backends/registry.go
new file mode 100644
index 0000000..65bce06
--- /dev/null
+++ b/backend/internal/backends/registry.go
@@ -0,0 +1,242 @@
+package backends
+
+import (
+	"context"
+	"fmt"
+	"net/http"
+	"sync"
+	"time"
+)
+
+// Registry manages multiple LLM backend instances
+type Registry struct {
+	mu       sync.RWMutex
+	backends map[BackendType]LLMBackend
+	active   BackendType
+}
+
+// NewRegistry creates a new backend registry
+func NewRegistry() *Registry {
+	return &Registry{
+		backends: make(map[BackendType]LLMBackend),
+	}
+}
+
+// Register adds a backend to the registry
+func (r *Registry) Register(backend LLMBackend) error {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+
+	bt := backend.Type()
+	if _, exists := r.backends[bt]; exists {
+		return fmt.Errorf("backend %q already registered", bt)
+	}
+
+	r.backends[bt] = backend
+	return nil
+}
+
+// Unregister removes a backend from the registry
+func (r *Registry) Unregister(backendType BackendType) error {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+
+	if _, exists := r.backends[backendType]; !exists {
+		return fmt.Errorf("backend %q not registered", backendType)
+	}
+
+	delete(r.backends, backendType)
+
+	// Clear active if it was the unregistered backend
+	if r.active == backendType {
+		r.active = ""
+	}
+
+	return nil
+}
+
+// Get retrieves a backend by type
+func (r *Registry) Get(backendType BackendType) (LLMBackend, bool) {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+
+	backend, ok := r.backends[backendType]
+	return backend, ok
+}
+
+// SetActive sets the active backend
+func (r *Registry) SetActive(backendType BackendType) error {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+
+	if _, exists := r.backends[backendType]; !exists {
+		return fmt.Errorf("backend %q not registered", backendType)
+	}
+
+	r.active = backendType
+	return nil
+}
+
+// Active returns the currently active backend
+func (r *Registry) Active() LLMBackend {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+
+	if r.active == "" {
+		return nil
+	}
+
+	return r.backends[r.active]
+}
+
+// ActiveType returns the type of the currently active backend
+func (r *Registry) ActiveType() BackendType {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+
+	return r.active
+}
+
+// Backends returns all registered backend types
+func (r *Registry) Backends() []BackendType {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+
+	types := make([]BackendType, 0, len(r.backends))
+	for bt := range r.backends {
+		types = append(types, bt)
+	}
+	return types
+}
+
+// AllInfo returns information about all registered backends
+func (r *Registry) AllInfo(ctx context.Context) []BackendInfo {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+
+	infos := make([]BackendInfo, 0, len(r.backends))
+	for _, backend := range r.backends {
+		infos = append(infos, backend.Info(ctx))
+	}
+	return infos
+}
+
+// DiscoveryEndpoint represents a potential backend endpoint to probe
+type DiscoveryEndpoint struct {
+	Type    BackendType
+	BaseURL string
+}
+
+// DiscoveryResult represents the result of probing an endpoint
+type DiscoveryResult struct {
+	Type      BackendType `json:"type"`
+	BaseURL   string      `json:"baseUrl"`
+	Available bool        `json:"available"`
+	Version   string      `json:"version,omitempty"`
+	Error     string      `json:"error,omitempty"`
+}
+
+// Discover probes the given endpoints to find available backends
+func (r *Registry) Discover(ctx context.Context, endpoints []DiscoveryEndpoint) []DiscoveryResult {
+	results := make([]DiscoveryResult, len(endpoints))
+	var wg sync.WaitGroup
+
+	for i, endpoint := range endpoints {
+		wg.Add(1)
+		go func(idx int, ep DiscoveryEndpoint) {
+			defer wg.Done()
+			results[idx] = probeEndpoint(ctx, ep)
+		}(i, endpoint)
+	}
+
+	wg.Wait()
+	return results
+}
+
+// probeEndpoint checks if a backend is available at the given endpoint
+func probeEndpoint(ctx context.Context, endpoint DiscoveryEndpoint) DiscoveryResult {
+	result := DiscoveryResult{
+		Type:    endpoint.Type,
+		BaseURL: endpoint.BaseURL,
+	}
+
+	client := &http.Client{
+		Timeout: 3 * time.Second,
+	}
+
+	// Determine probe path based on backend type
+	var probePath string
+	switch endpoint.Type {
+	case BackendTypeOllama:
+		probePath = "/api/version"
+	case BackendTypeLlamaCpp, BackendTypeLMStudio:
+		probePath = "/v1/models"
+	default:
+		probePath = "/health"
+	}
+
+	req, err := http.NewRequestWithContext(ctx, "GET", endpoint.BaseURL+probePath, nil)
+	if err != nil {
+		result.Error = err.Error()
+		return result
+	}
+
+	resp, err := client.Do(req)
+	if err != nil {
+		result.Error = err.Error()
+		return result
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode == http.StatusOK {
+		result.Available = true
+	} else {
+		result.Error = fmt.Sprintf("HTTP %d", resp.StatusCode)
+	}
+
+	return result
+}
+
+// DefaultDiscoveryEndpoints returns the default endpoints to probe
+func DefaultDiscoveryEndpoints() []DiscoveryEndpoint {
+	return []DiscoveryEndpoint{
+		{Type: BackendTypeOllama, BaseURL: "http://localhost:11434"},
+		{Type: BackendTypeLlamaCpp, BaseURL: "http://localhost:8081"},
+		{Type: BackendTypeLlamaCpp, BaseURL: "http://localhost:8080"},
+		{Type: BackendTypeLMStudio, BaseURL: "http://localhost:1234"},
+	}
+}
+
+// DiscoverAndRegister probes endpoints and registers available backends
+func (r *Registry) DiscoverAndRegister(ctx context.Context, endpoints []DiscoveryEndpoint, adapterFactory AdapterFactory) []DiscoveryResult {
+	results := r.Discover(ctx, endpoints)
+
+	for _, result := range results {
+		if !result.Available {
+			continue
+		}
+
+		// Skip if already registered
+		if _, exists := r.Get(result.Type); exists {
+			continue
+		}
+
+		config := BackendConfig{
+			Type:    result.Type,
+			BaseURL: result.BaseURL,
+			Enabled: true,
+		}
+
+		adapter, err := adapterFactory(config)
+		if err != nil {
+			continue
+		}
+
+		r.Register(adapter)
+	}
+
+	return results
+}
+
+// AdapterFactory creates an LLMBackend from a config
+type AdapterFactory func(config BackendConfig) (LLMBackend, error)
diff --git a/backend/internal/backends/registry_test.go b/backend/internal/backends/registry_test.go
new file mode 100644
index 0000000..f23ebde
--- /dev/null
+++ b/backend/internal/backends/registry_test.go
@@ -0,0 +1,352 @@
+package backends
+
+import (
+	"context"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+	"time"
+)
+
+func TestNewRegistry(t *testing.T) {
+	registry := NewRegistry()
+
+	if registry == nil {
+		t.Fatal("NewRegistry() returned nil")
+	}
+
+	if len(registry.Backends()) != 0 {
+		t.Errorf("New registry should have no backends, got %d", len(registry.Backends()))
+	}
+
+	if registry.Active() != nil {
+		t.Error("New registry should have no active backend")
+	}
+}
+
+func TestRegistry_Register(t *testing.T) {
+	registry := NewRegistry()
+
+	// Create a mock backend
+	mock := &mockBackend{
+		backendType: BackendTypeOllama,
+		config: BackendConfig{
+			Type:    BackendTypeOllama,
+			BaseURL: "http://localhost:11434",
+		},
+	}
+
+	err := registry.Register(mock)
+	if err != nil {
+		t.Fatalf("Register() error = %v", err)
+	}
+
+	if len(registry.Backends()) != 1 {
+		t.Errorf("Registry should have 1 backend, got %d", len(registry.Backends()))
+	}
+
+	// Should not allow duplicate registration
+	err = registry.Register(mock)
+	if err == nil {
+		t.Error("Register() should fail for duplicate backend type")
+	}
+}
+
+func TestRegistry_Get(t *testing.T) {
+	registry := NewRegistry()
+
+	mock := &mockBackend{
+		backendType: BackendTypeOllama,
+		config: BackendConfig{
+			Type:    BackendTypeOllama,
+			BaseURL: "http://localhost:11434",
+		},
+	}
+	registry.Register(mock)
+
+	t.Run("existing backend", func(t *testing.T) {
+		backend, ok := registry.Get(BackendTypeOllama)
+		if !ok {
+			t.Error("Get() should return ok=true for registered backend")
+		}
+		if backend != mock {
+			t.Error("Get() returned wrong backend")
+		}
+	})
+
+	t.Run("non-existing backend", func(t *testing.T) {
+		_, ok := registry.Get(BackendTypeLlamaCpp)
+		if ok {
+			t.Error("Get() should return ok=false for unregistered backend")
+		}
+	})
+}
+
+func TestRegistry_SetActive(t *testing.T) {
+	registry := NewRegistry()
+
+	mock := &mockBackend{
+		backendType: BackendTypeOllama,
+		config: BackendConfig{
+			Type:    BackendTypeOllama,
+			BaseURL: "http://localhost:11434",
+		},
+	}
+	registry.Register(mock)
+
+	t.Run("set registered backend as active", func(t *testing.T) {
+		err := registry.SetActive(BackendTypeOllama)
+		if err != nil {
+			t.Errorf("SetActive() error = %v", err)
+		}
+
+		active := registry.Active()
+		if active == nil {
+			t.Fatal("Active() returned nil after SetActive()")
+		}
+		if active.Type() != BackendTypeOllama {
+			t.Errorf("Active().Type() = %v, want %v", active.Type(), BackendTypeOllama)
+		}
+	})
+
+	t.Run("set unregistered backend as active", func(t *testing.T) {
+		err := registry.SetActive(BackendTypeLlamaCpp)
+		if err == nil {
+			t.Error("SetActive() should fail for unregistered backend")
+		}
+	})
+}
+
+func TestRegistry_ActiveType(t *testing.T) {
+	registry := NewRegistry()
+
+	t.Run("no active backend", func(t *testing.T) {
+		activeType := registry.ActiveType()
+		if activeType != "" {
+			t.Errorf("ActiveType() = %q, want empty string", activeType)
+		}
+	})
+
+	t.Run("with active backend", func(t *testing.T) {
+		mock := &mockBackend{backendType: BackendTypeOllama}
+		registry.Register(mock)
+		registry.SetActive(BackendTypeOllama)
+
+		activeType := registry.ActiveType()
+		if activeType != BackendTypeOllama {
+			t.Errorf("ActiveType() = %v, want %v", activeType, BackendTypeOllama)
+		}
+	})
+}
+
+func TestRegistry_Unregister(t *testing.T) {
+	registry := NewRegistry()
+
+	mock := &mockBackend{backendType: BackendTypeOllama}
+	registry.Register(mock)
+	registry.SetActive(BackendTypeOllama)
+
+	err := registry.Unregister(BackendTypeOllama)
+	if err != nil {
+		t.Errorf("Unregister() error = %v", err)
+	}
+
+	if len(registry.Backends()) != 0 {
+		t.Error("Registry should have no backends after unregister")
+	}
+
+	if registry.Active() != nil {
+		t.Error("Active backend should be nil after unregistering it")
+	}
+}
+
+func TestRegistry_AllInfo(t *testing.T) {
+	registry := NewRegistry()
+
+	mock1 := &mockBackend{
+		backendType: BackendTypeOllama,
+		config:      BackendConfig{Type: BackendTypeOllama, BaseURL: "http://localhost:11434"},
+		info: BackendInfo{
+			Type:    BackendTypeOllama,
+			Status:  BackendStatusConnected,
+			Version: "0.1.0",
+		},
+	}
+	mock2 := &mockBackend{
+		backendType: BackendTypeLlamaCpp,
+		config:      BackendConfig{Type: BackendTypeLlamaCpp, BaseURL: "http://localhost:8081"},
+		info: BackendInfo{
+			Type:   BackendTypeLlamaCpp,
+			Status: BackendStatusDisconnected,
+		},
+	}
+
+	registry.Register(mock1)
+	registry.Register(mock2)
+	registry.SetActive(BackendTypeOllama)
+
+	infos := registry.AllInfo(context.Background())
+
+	if len(infos) != 2 {
+		t.Errorf("AllInfo() returned %d infos, want 2", len(infos))
+	}
+
+	// Find the active one
+	var foundActive bool
+	for _, info := range infos {
+		if info.Type == BackendTypeOllama {
+			foundActive = true
+		}
+	}
+	if !foundActive {
+		t.Error("AllInfo() did not include ollama backend info")
+	}
+}
+
+func TestRegistry_Discover(t *testing.T) {
+	// Create test servers for each backend type
+	ollamaServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.URL.Path == "/api/version" || r.URL.Path == "/" {
+			json.NewEncoder(w).Encode(map[string]string{"version": "0.3.0"})
+		}
+	}))
+	defer ollamaServer.Close()
+
+	llamacppServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.URL.Path == "/v1/models" {
+			json.NewEncoder(w).Encode(map[string]interface{}{
+				"data": []map[string]string{{"id": "llama3.2:8b"}},
+			})
+		}
+		if r.URL.Path == "/health" {
+			json.NewEncoder(w).Encode(map[string]string{"status": "ok"})
+		}
+	}))
+	defer llamacppServer.Close()
+
+	registry := NewRegistry()
+
+	// Configure discovery endpoints
+	endpoints := []DiscoveryEndpoint{
+		{Type: BackendTypeOllama, BaseURL: ollamaServer.URL},
+		{Type: BackendTypeLlamaCpp, BaseURL: llamacppServer.URL},
+		{Type: BackendTypeLMStudio, BaseURL: "http://localhost:19999"}, // Not running
+	}
+
+	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+	defer cancel()
+
+	results := registry.Discover(ctx, endpoints)
+
+	if len(results) != 3 {
+		t.Errorf("Discover() returned %d results, want 3", len(results))
+	}
+
+	// Check Ollama was discovered
+	var ollamaResult *DiscoveryResult
+	for i := range results {
+		if results[i].Type == BackendTypeOllama {
+			ollamaResult = &results[i]
+			break
+		}
+	}
+
+	if ollamaResult == nil {
+		t.Fatal("Ollama not found in discovery results")
+	}
+	if !ollamaResult.Available {
+		t.Errorf("Ollama should be available, error: %s", ollamaResult.Error)
+	}
+
+	// Check LM Studio was not discovered
+	var lmstudioResult *DiscoveryResult
+	for i := range results {
+		if results[i].Type == BackendTypeLMStudio {
+			lmstudioResult = &results[i]
+			break
+		}
+	}
+
+	if lmstudioResult == nil {
+		t.Fatal("LM Studio not found in discovery results")
+	}
+	if lmstudioResult.Available {
+		t.Error("LM Studio should NOT be available")
+	}
+}
+
+func TestRegistry_DefaultEndpoints(t *testing.T) {
+	endpoints := DefaultDiscoveryEndpoints()
+
+	if len(endpoints) < 3 {
+		t.Errorf("DefaultDiscoveryEndpoints() returned %d endpoints, want at least 3", len(endpoints))
+	}
+
+	// Check that all expected types are present
+	types := make(map[BackendType]bool)
+	for _, e := range endpoints {
+		types[e.Type] = true
+	}
+
+	if !types[BackendTypeOllama] {
+		t.Error("DefaultDiscoveryEndpoints() missing Ollama")
+	}
+	if !types[BackendTypeLlamaCpp] {
+		t.Error("DefaultDiscoveryEndpoints() missing llama.cpp")
+	}
+	if !types[BackendTypeLMStudio] {
+		t.Error("DefaultDiscoveryEndpoints() missing LM Studio")
+	}
+}
+
+// mockBackend implements LLMBackend for testing
+type mockBackend struct {
+	backendType BackendType
+	config      BackendConfig
+	info        BackendInfo
+	healthErr   error
+	models      []Model
+}
+
+func (m *mockBackend) Type() BackendType {
+	return m.backendType
+}
+
+func (m *mockBackend) Config() BackendConfig {
+	return m.config
+}
+
+func (m *mockBackend) HealthCheck(ctx context.Context) error {
+	return m.healthErr
+}
+
+func (m *mockBackend) ListModels(ctx context.Context) ([]Model, error) {
+	return m.models, nil
+}
+
+func (m *mockBackend) StreamChat(ctx context.Context, req *ChatRequest) (<-chan ChatChunk, error) {
+	ch := make(chan ChatChunk)
+	close(ch)
+	return ch, nil
+}
+
+func (m *mockBackend) Chat(ctx context.Context, req *ChatRequest) (*ChatChunk, error) {
+	return &ChatChunk{Done: true}, nil
+}
+
+func (m *mockBackend) Capabilities() BackendCapabilities {
+	return OllamaCapabilities()
+}
+
+func (m *mockBackend) Info(ctx context.Context) BackendInfo {
+	if m.info.Type != "" {
+		return m.info
+	}
+	return BackendInfo{
+		Type:         m.backendType,
+		BaseURL:      m.config.BaseURL,
+		Status:       BackendStatusConnected,
+		Capabilities: m.Capabilities(),
+	}
+}
diff --git a/backend/internal/backends/types.go b/backend/internal/backends/types.go
new file mode 100644
index 0000000..f7b7fd4
--- /dev/null
+++ b/backend/internal/backends/types.go
@@ -0,0 +1,245 @@
+package backends
+
+import (
+	"errors"
+	"fmt"
+	"net/url"
+	"strings"
+)
+
+// BackendType identifies the type of LLM backend
+type BackendType string
+
+const (
+	BackendTypeOllama   BackendType = "ollama"
+	BackendTypeLlamaCpp BackendType = "llamacpp"
+	BackendTypeLMStudio BackendType = "lmstudio"
+)
+
+// String returns the string representation of the backend type
+func (bt BackendType) String() string {
+	return string(bt)
+}
+
+// ParseBackendType parses a string into a BackendType
+func ParseBackendType(s string) (BackendType, error) {
+	switch strings.ToLower(s) {
+	case "ollama":
+		return BackendTypeOllama, nil
+	case "llamacpp", "llama.cpp", "llama-cpp":
+		return BackendTypeLlamaCpp, nil
+	case "lmstudio", "lm-studio", "lm_studio":
+		return BackendTypeLMStudio, nil
+	default:
+		return "", fmt.Errorf("unknown backend type: %q", s)
+	}
+}
+
+// BackendCapabilities describes what features a backend supports
+type BackendCapabilities struct {
+	CanListModels   bool `json:"canListModels"`
+	CanPullModels   bool `json:"canPullModels"`
+	CanDeleteModels bool `json:"canDeleteModels"`
+	CanCreateModels bool `json:"canCreateModels"`
+	CanStreamChat   bool `json:"canStreamChat"`
+	CanEmbed        bool `json:"canEmbed"`
+}
+
+// OllamaCapabilities returns the capabilities for Ollama backend
+func OllamaCapabilities() BackendCapabilities {
+	return BackendCapabilities{
+		CanListModels:   true,
+		CanPullModels:   true,
+		CanDeleteModels: true,
+		CanCreateModels: true,
+		CanStreamChat:   true,
+		CanEmbed:        true,
+	}
+}
+
+// LlamaCppCapabilities returns the capabilities for llama.cpp backend
+func LlamaCppCapabilities() BackendCapabilities {
+	return BackendCapabilities{
+		CanListModels:   true,
+		CanPullModels:   false,
+		CanDeleteModels: false,
+		CanCreateModels: false,
+		CanStreamChat:   true,
+		CanEmbed:        true,
+	}
+}
+
+// LMStudioCapabilities returns the capabilities for LM Studio backend
+func LMStudioCapabilities() BackendCapabilities {
+	return BackendCapabilities{
+		CanListModels:   true,
+		CanPullModels:   false,
+		CanDeleteModels: false,
+		CanCreateModels: false,
+		CanStreamChat:   true,
+		CanEmbed:        true,
+	}
+}
+
+// BackendStatus represents the connection status of a backend
+type BackendStatus string
+
+const (
+	BackendStatusConnected    BackendStatus = "connected"
+	BackendStatusDisconnected BackendStatus = "disconnected"
+	BackendStatusUnknown      BackendStatus = "unknown"
+)
+
+// BackendConfig holds configuration for a backend
+type BackendConfig struct {
+	Type    BackendType `json:"type"`
+	BaseURL string      `json:"baseUrl"`
+	Enabled bool        `json:"enabled"`
+}
+
+// Validate checks if the backend config is valid
+func (c BackendConfig) Validate() error {
+	if c.BaseURL == "" {
+		return errors.New("base URL is required")
+	}
+
+	u, err := url.Parse(c.BaseURL)
+	if err != nil {
+		return fmt.Errorf("invalid base URL: %w", err)
+	}
+
+	if u.Scheme == "" || u.Host == "" {
+		return errors.New("invalid URL: missing scheme or host")
+	}
+
+	return nil
+}
+
+// BackendInfo describes a configured backend and its current state
+type BackendInfo struct {
+	Type         BackendType         `json:"type"`
+	BaseURL      string              `json:"baseUrl"`
+	Status       BackendStatus       `json:"status"`
+	Capabilities BackendCapabilities `json:"capabilities"`
+	Version      string              `json:"version,omitempty"`
+	Error        string              `json:"error,omitempty"`
+}
+
+// IsConnected returns true if the backend is connected
+func (bi BackendInfo) IsConnected() bool {
+	return bi.Status == BackendStatusConnected
+}
+
+// Model represents an LLM model available from a backend
+type Model struct {
+	ID           string            `json:"id"`
+	Name         string            `json:"name"`
+	Size         int64             `json:"size,omitempty"`
+	ModifiedAt   string            `json:"modifiedAt,omitempty"`
+	Family       string            `json:"family,omitempty"`
+	QuantLevel   string            `json:"quantLevel,omitempty"`
+	Capabilities []string          `json:"capabilities,omitempty"`
+	Metadata     map[string]string `json:"metadata,omitempty"`
+}
+
+// HasCapability checks if the model has a specific capability
+func (m Model) HasCapability(cap string) bool {
+	for _, c := range m.Capabilities {
+		if c == cap {
+			return true
+		}
+	}
+	return false
+}
+
+// ChatMessage represents a message in a chat conversation
+type ChatMessage struct {
+	Role       string       `json:"role"`
+	Content    string       `json:"content"`
+	Images     []string     `json:"images,omitempty"`
+	ToolCalls  []ToolCall   `json:"tool_calls,omitempty"`
+	ToolCallID string       `json:"tool_call_id,omitempty"`
+	Name       string       `json:"name,omitempty"`
+}
+
+var validRoles = map[string]bool{
+	"user":      true,
+	"assistant": true,
+	"system":    true,
+	"tool":      true,
+}
+
+// Validate checks if the chat message is valid
+func (m ChatMessage) Validate() error {
+	if m.Role == "" {
+		return errors.New("role is required")
+	}
+	if !validRoles[m.Role] {
+		return fmt.Errorf("invalid role: %q", m.Role)
+	}
+	return nil
+}
+
+// ToolCall represents a tool invocation
+type ToolCall struct {
+	ID       string `json:"id"`
+	Type     string `json:"type"`
+	Function struct {
+		Name      string `json:"name"`
+		Arguments string `json:"arguments"`
+	} `json:"function"`
+}
+
+// Tool represents a tool definition
+type Tool struct {
+	Type     string `json:"type"`
+	Function struct {
+		Name        string                 `json:"name"`
+		Description string                 `json:"description"`
+		Parameters  map[string]interface{} `json:"parameters"`
+	} `json:"function"`
+}
+
+// ChatRequest represents a chat completion request
+type ChatRequest struct {
+	Model       string         `json:"model"`
+	Messages    []ChatMessage  `json:"messages"`
+	Stream      *bool          `json:"stream,omitempty"`
+	Temperature *float64       `json:"temperature,omitempty"`
+	TopP        *float64       `json:"top_p,omitempty"`
+	MaxTokens   *int           `json:"max_tokens,omitempty"`
+	Tools       []Tool         `json:"tools,omitempty"`
+	Options     map[string]any `json:"options,omitempty"`
+}
+
+// Validate checks if the chat request is valid
+func (r ChatRequest) Validate() error {
+	if r.Model == "" {
+		return errors.New("model is required")
+	}
+	if len(r.Messages) == 0 {
+		return errors.New("at least one message is required")
+	}
+	for i, msg := range r.Messages {
+		if err := msg.Validate(); err != nil {
+			return fmt.Errorf("message %d: %w", i, err)
+		}
+	}
+	return nil
+}
+
+// ChatChunk represents a streaming chat response chunk
+type ChatChunk struct {
+	Model      string       `json:"model"`
+	CreatedAt  string       `json:"created_at,omitempty"`
+	Message    *ChatMessage `json:"message,omitempty"`
+	Done       bool         `json:"done"`
+	DoneReason string       `json:"done_reason,omitempty"`
+
+	// Token counts (final chunk only)
+	PromptEvalCount int `json:"prompt_eval_count,omitempty"`
+	EvalCount       int `json:"eval_count,omitempty"`
+
+	// Error information
+	Error string `json:"error,omitempty"`
+}
diff --git a/backend/internal/backends/types_test.go b/backend/internal/backends/types_test.go
new file mode 100644
index 0000000..bd37c2d
--- /dev/null
+++ b/backend/internal/backends/types_test.go
@@ -0,0 +1,323 @@
+package backends
+
+import (
+	"testing"
+)
+
+func TestBackendType_String(t *testing.T) {
+	tests := []struct {
+		name     string
+		bt       BackendType
+		expected string
+	}{
+		{"ollama type", BackendTypeOllama, "ollama"},
+		{"llamacpp type", BackendTypeLlamaCpp, "llamacpp"},
+		{"lmstudio type", BackendTypeLMStudio, "lmstudio"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := tt.bt.String(); got != tt.expected {
+				t.Errorf("BackendType.String() = %v, want %v", got, tt.expected)
+			}
+		})
+	}
+}
+
+func TestParseBackendType(t *testing.T) {
+	tests := []struct {
+		name      string
+		input     string
+		expected  BackendType
+		expectErr bool
+	}{
+		{"parse ollama", "ollama", BackendTypeOllama, false},
+		{"parse llamacpp", "llamacpp", BackendTypeLlamaCpp, false},
+		{"parse lmstudio", "lmstudio", BackendTypeLMStudio, false},
+		{"parse llama.cpp alias", "llama.cpp", BackendTypeLlamaCpp, false},
+		{"parse llama-cpp alias", "llama-cpp", BackendTypeLlamaCpp, false},
+		{"parse unknown", "unknown", "", true},
+		{"parse empty", "", "", true},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, err := ParseBackendType(tt.input)
+			if (err != nil) != tt.expectErr {
+				t.Errorf("ParseBackendType() error = %v, expectErr %v", err, tt.expectErr)
+				return
+			}
+			if got != tt.expected {
+				t.Errorf("ParseBackendType() = %v, want %v", got, tt.expected)
+			}
+		})
+	}
+}
+
+func TestBackendCapabilities(t *testing.T) {
+	t.Run("ollama capabilities", func(t *testing.T) {
+		caps := OllamaCapabilities()
+
+		if !caps.CanListModels {
+			t.Error("Ollama should be able to list models")
+		}
+		if !caps.CanPullModels {
+			t.Error("Ollama should be able to pull models")
+		}
+		if !caps.CanDeleteModels {
+			t.Error("Ollama should be able to delete models")
+		}
+		if !caps.CanCreateModels {
+			t.Error("Ollama should be able to create models")
+		}
+		if !caps.CanStreamChat {
+			t.Error("Ollama should be able to stream chat")
+		}
+		if !caps.CanEmbed {
+			t.Error("Ollama should be able to embed")
+		}
+	})
+
+	t.Run("llamacpp capabilities", func(t *testing.T) {
+		caps := LlamaCppCapabilities()
+
+		if !caps.CanListModels {
+			t.Error("llama.cpp should be able to list models")
+		}
+		if caps.CanPullModels {
+			t.Error("llama.cpp should NOT be able to pull models")
+		}
+		if caps.CanDeleteModels {
+			t.Error("llama.cpp should NOT be able to delete models")
+		}
+		if caps.CanCreateModels {
+			t.Error("llama.cpp should NOT be able to create models")
+		}
+		if !caps.CanStreamChat {
+			t.Error("llama.cpp should be able to stream chat")
+		}
+		if !caps.CanEmbed {
+			t.Error("llama.cpp should be able to embed")
+		}
+	})
+
+	t.Run("lmstudio capabilities", func(t *testing.T) {
+		caps := LMStudioCapabilities()
+
+		if !caps.CanListModels {
+			t.Error("LM Studio should be able to list models")
+		}
+		if caps.CanPullModels {
+			t.Error("LM Studio should NOT be able to pull models")
+		}
+		if caps.CanDeleteModels {
+			t.Error("LM Studio should NOT be able to delete models")
+		}
+		if caps.CanCreateModels {
+			t.Error("LM Studio should NOT be able to create models")
+		}
+		if !caps.CanStreamChat {
+			t.Error("LM Studio should be able to stream chat")
+		}
+		if !caps.CanEmbed {
+			t.Error("LM Studio should be able to embed")
+		}
+	})
+}
+
+func TestBackendConfig_Validate(t *testing.T) {
+	tests := []struct {
+		name      string
+		config    BackendConfig
+		expectErr bool
+	}{
+		{
+			name: "valid ollama config",
+			config: BackendConfig{
+				Type:    BackendTypeOllama,
+				BaseURL: "http://localhost:11434",
+			},
+			expectErr: false,
+		},
+		{
+			name: "valid llamacpp config",
+			config: BackendConfig{
+				Type:    BackendTypeLlamaCpp,
+				BaseURL: "http://localhost:8081",
+			},
+			expectErr: false,
+		},
+		{
+			name: "empty base URL",
+			config: BackendConfig{
+				Type:    BackendTypeOllama,
+				BaseURL: "",
+			},
+			expectErr: true,
+		},
+		{
+			name: "invalid URL",
+			config: BackendConfig{
+				Type:    BackendTypeOllama,
+				BaseURL: "not-a-url",
+			},
+			expectErr: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			err := tt.config.Validate()
+			if (err != nil) != tt.expectErr {
+				t.Errorf("BackendConfig.Validate() error = %v, expectErr %v", err, tt.expectErr)
+			}
+		})
+	}
+}
+
+func TestModel_HasCapability(t *testing.T) {
+	model := Model{
+		ID:           "llama3.2:8b",
+		Name:         "llama3.2:8b",
+		Capabilities: []string{"chat", "vision", "tools"},
+	}
+
+	tests := []struct {
+		name       string
+		capability string
+		expected   bool
+	}{
+		{"has chat", "chat", true},
+		{"has vision", "vision", true},
+		{"has tools", "tools", true},
+		{"no thinking", "thinking", false},
+		{"no code", "code", false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := model.HasCapability(tt.capability); got != tt.expected {
+				t.Errorf("Model.HasCapability(%q) = %v, want %v", tt.capability, got, tt.expected)
+			}
+		})
+	}
+}
+
+func TestChatMessage_Validation(t *testing.T) {
+	tests := []struct {
+		name      string
+		msg       ChatMessage
+		expectErr bool
+	}{
+		{
+			name:      "valid user message",
+			msg:       ChatMessage{Role: "user", Content: "Hello"},
+			expectErr: false,
+		},
+		{
+			name:      "valid assistant message",
+			msg:       ChatMessage{Role: "assistant", Content: "Hi there"},
+			expectErr: false,
+		},
+		{
+			name:      "valid system message",
+			msg:       ChatMessage{Role: "system", Content: "You are helpful"},
+			expectErr: false,
+		},
+		{
+			name:      "invalid role",
+			msg:       ChatMessage{Role: "invalid", Content: "Hello"},
+			expectErr: true,
+		},
+		{
+			name:      "empty role",
+			msg:       ChatMessage{Role: "", Content: "Hello"},
+			expectErr: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			err := tt.msg.Validate()
+			if (err != nil) != tt.expectErr {
+				t.Errorf("ChatMessage.Validate() error = %v, expectErr %v", err, tt.expectErr)
+			}
+		})
+	}
+}
+
+func TestChatRequest_Validation(t *testing.T) {
+	streaming := true
+
+	tests := []struct {
+		name      string
+		req       ChatRequest
+		expectErr bool
+	}{
+		{
+			name: "valid request",
+			req: ChatRequest{
+				Model: "llama3.2:8b",
+				Messages: []ChatMessage{
+					{Role: "user", Content: "Hello"},
+				},
+				Stream: &streaming,
+			},
+			expectErr: false,
+		},
+		{
+			name: "empty model",
+			req: ChatRequest{
+				Model: "",
+				Messages: []ChatMessage{
+					{Role: "user", Content: "Hello"},
+				},
+			},
+			expectErr: true,
+		},
+		{
+			name: "empty messages",
+			req: ChatRequest{
+				Model:    "llama3.2:8b",
+				Messages: []ChatMessage{},
+			},
+			expectErr: true,
+		},
+		{
+			name: "nil messages",
+			req: ChatRequest{
+				Model:    "llama3.2:8b",
+				Messages: nil,
+			},
+			expectErr: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			err := tt.req.Validate()
+			if (err != nil) != tt.expectErr {
+				t.Errorf("ChatRequest.Validate() error = %v, expectErr %v", err, tt.expectErr)
+			}
+		})
+	}
+}
+
+func TestBackendInfo(t *testing.T) {
+	info := BackendInfo{
+		Type:         BackendTypeOllama,
+		BaseURL:      "http://localhost:11434",
+		Status:       BackendStatusConnected,
+		Capabilities: OllamaCapabilities(),
+		Version:      "0.1.0",
+	}
+
+	if !info.IsConnected() {
+		t.Error("BackendInfo.IsConnected() should be true when status is connected")
+	}
+
+	info.Status = BackendStatusDisconnected
+	if info.IsConnected() {
+		t.Error("BackendInfo.IsConnected() should be false when status is disconnected")
+	}
+}
diff --git a/frontend/src/lib/components/chat/BranchNavigator.svelte b/frontend/src/lib/components/chat/BranchNavigator.svelte
index 291e83d..148e368 100644
--- a/frontend/src/lib/components/chat/BranchNavigator.svelte
+++ b/frontend/src/lib/components/chat/BranchNavigator.svelte
@@ -2,7 +2,6 @@
 	/**
 	 * BranchNavigator - Navigate between message branches
 	 * Shows "< 1/3 >" style navigation for sibling messages
-	 * Supports keyboard navigation with arrow keys when focused
 	 */
 
 	import type { BranchInfo } from '$lib/types';
@@ -15,7 +14,7 @@
 	const { branchInfo, onSwitch }: Props = $props();
 
 	// Reference to the navigator container for focus management
-	let navigatorRef: HTMLDivElement | null = $state(null);
+	let navigatorRef: HTMLElement | null = $state(null);
 
 	// Track transition state for smooth animations
 	let isTransitioning = $state(false);
@@ -52,7 +51,7 @@
 	}
 
 	/**
-	 * Handle keyboard navigation when the component is focused
+	 * Handle keyboard navigation with arrow keys
 	 */
 	function handleKeydown(event: KeyboardEvent): void {
 		if (event.key === 'ArrowLeft' && canGoPrev) {
@@ -65,11 +64,10 @@
 	}
 </script>
 
-<div
+<nav
 	bind:this={navigatorRef}
 	class="inline-flex items-center gap-1 rounded-full bg-gray-100 px-2 py-0.5 text-xs text-gray-600 transition-all duration-150 ease-out dark:bg-gray-700 dark:text-gray-300"
 	class:opacity-50={isTransitioning}
-	role="navigation"
 	aria-label="Message branch navigation - Use left/right arrow keys to navigate"
 	tabindex="0"
 	onkeydown={handleKeydown}
@@ -126,16 +124,16 @@
 			/>
 		</svg>
 	</button>
-</div>
+</nav>
 
 <style>
 	/* Focus ring style for keyboard navigation */
-	div:focus {
+	nav:focus {
 		outline: none;
 		box-shadow: 0 0 0 2px rgba(59, 130, 246, 0.5);
 	}
 
-	div:focus-visible {
+	nav:focus-visible {
 		outline: none;
 		box-shadow: 0 0 0 2px rgba(59, 130, 246, 0.5);
 	}
diff --git a/frontend/src/lib/components/chat/ChatWindow.svelte b/frontend/src/lib/components/chat/ChatWindow.svelte
index 164c6fa..4477dad 100644
--- a/frontend/src/lib/components/chat/ChatWindow.svelte
+++ b/frontend/src/lib/components/chat/ChatWindow.svelte
@@ -5,10 +5,12 @@
 	 */
 
 	import { chatState, modelsState, conversationsState, toolsState, promptsState, toastState, agentsState } from '$lib/stores';
+	import { backendsState } from '$lib/stores/backends.svelte';
 	import { resolveSystemPrompt } from '$lib/services/prompt-resolution.js';
 	import { serverConversationsState } from '$lib/stores/server-conversations.svelte';
 	import { streamingMetricsState } from '$lib/stores/streaming-metrics.svelte';
 	import { ollamaClient } from '$lib/ollama';
+	import { unifiedLLMClient, type ChatMessage as UnifiedChatMessage } from '$lib/llm';
 	import { addMessage as addStoredMessage, updateConversation, createConversation as createStoredConversation, saveAttachments } from '$lib/storage';
 	import type { FileAttachment } from '$lib/types/attachment.js';
 	import { fileAnalyzer, analyzeFilesInBatches, formatAnalyzedAttachment, type AnalysisResult } from '$lib/services/fileAnalyzer.js';
@@ -530,11 +532,33 @@
 		await sendMessageInternal(content, images, attachments);
 	}
 
+	/**
+	 * Get current model name based on active backend
+	 */
+	async function getCurrentModelName(): Promise<string | null> {
+		if (backendsState.activeType === 'ollama') {
+			return modelsState.selectedId;
+		} else if (backendsState.activeType === 'llamacpp' || backendsState.activeType === 'lmstudio') {
+			try {
+				const response = await fetch('/api/v1/ai/models');
+				if (response.ok) {
+					const data = await response.json();
+					if (data.models && data.models.length > 0) {
+						return data.models[0].name;
+					}
+				}
+			} catch (err) {
+				console.error('Failed to get model from backend:', err);
+			}
+		}
+		return null;
+	}
+
 	/**
 	 * Internal: Send message and stream response (bypasses context check)
 	 */
 	async function sendMessageInternal(content: string, images?: string[], attachments?: FileAttachment[]): Promise<void> {
-		const selectedModel = modelsState.selectedId;
+		const selectedModel = await getCurrentModelName();
 		if (!selectedModel) return;
 
 		// In 'new' mode with no messages yet, create conversation first
@@ -807,7 +831,91 @@
 			let streamingThinking = '';
 			let thinkingClosed = false;
 
-			await ollamaClient.streamChatWithCallbacks(
+			// Common completion handler for both clients
+			const handleStreamComplete = async () => {
+				// Close thinking block if it was opened but not closed (e.g., tool calls without content)
+				if (streamingThinking && !thinkingClosed) {
+					chatState.appendToStreaming('</think>\n\n');
+					thinkingClosed = true;
+				}
+
+				chatState.finishStreaming();
+				streamingMetricsState.endStream();
+				abortController = null;
+
+				// Handle native tool calls if received (Ollama only)
+				if (pendingToolCalls && pendingToolCalls.length > 0) {
+					await executeToolsAndContinue(
+						model,
+						assistantMessageId,
+						pendingToolCalls,
+						conversationId
+					);
+					return; // Tool continuation handles persistence
+				}
+
+				// Check for text-based tool calls (models without native tool calling)
+				const node = chatState.messageTree.get(assistantMessageId);
+				if (node && toolsState.toolsEnabled) {
+					const { toolCalls: textToolCalls, cleanContent } = parseTextToolCalls(node.message.content);
+					if (textToolCalls.length > 0) {
+						// Convert to OllamaToolCall format
+						const convertedCalls: OllamaToolCall[] = textToolCalls.map(tc => ({
+							function: {
+								name: tc.name,
+								arguments: tc.arguments
+							}
+						}));
+
+						// Update message content to remove the raw tool call text
+						if (cleanContent !== node.message.content) {
+							node.message.content = cleanContent || 'Using tool...';
+						}
+
+						await executeToolsAndContinue(
+							model,
+							assistantMessageId,
+							convertedCalls,
+							conversationId
+						);
+						return; // Tool continuation handles persistence
+					}
+				}
+
+				// Persist assistant message to IndexedDB with the SAME ID as chatState
+				if (conversationId) {
+					const nodeForPersist = chatState.messageTree.get(assistantMessageId);
+					if (nodeForPersist) {
+						await addStoredMessage(
+							conversationId,
+							{ role: 'assistant', content: nodeForPersist.message.content },
+							parentMessageId,
+							assistantMessageId
+						);
+						await updateConversation(conversationId, {});
+						conversationsState.update(conversationId, {});
+					}
+				}
+
+				// Check for auto-compact after response completes
+				await handleAutoCompact();
+			};
+
+			// Common error handler for both clients
+			const handleStreamError = (error: unknown) => {
+				console.error('Streaming error:', error);
+				// Show error to user instead of leaving "Processing..."
+				const errorMsg = error instanceof Error ? error.message : 'Unknown error';
+				chatState.setStreamContent(`⚠️ Error: ${errorMsg}`);
+				chatState.finishStreaming();
+				streamingMetricsState.endStream();
+				abortController = null;
+			};
+
+			// Use appropriate client based on active backend
+			if (backendsState.activeType === 'ollama') {
+				// Ollama - full feature support (thinking, native tool calls)
+				await ollamaClient.streamChatWithCallbacks(
 				{
 					model: chatModel,
 					messages,
@@ -851,86 +959,42 @@
 						// Store tool calls to process after streaming completes
 						pendingToolCalls = toolCalls;
 					},
-					onComplete: async () => {
-						// Close thinking block if it was opened but not closed (e.g., tool calls without content)
-						if (streamingThinking && !thinkingClosed) {
-							chatState.appendToStreaming('</think>\n\n');
-							thinkingClosed = true;
-						}
-
-						chatState.finishStreaming();
-						streamingMetricsState.endStream();
-						abortController = null;
-
-						// Handle native tool calls if received
-						if (pendingToolCalls && pendingToolCalls.length > 0) {
-							await executeToolsAndContinue(
-								model,
-								assistantMessageId,
-								pendingToolCalls,
-								conversationId
-							);
-							return; // Tool continuation handles persistence
-						}
-
-						// Check for text-based tool calls (models without native tool calling)
-						const node = chatState.messageTree.get(assistantMessageId);
-						if (node && toolsState.toolsEnabled) {
-							const { toolCalls: textToolCalls, cleanContent } = parseTextToolCalls(node.message.content);
-							if (textToolCalls.length > 0) {
-								// Convert to OllamaToolCall format
-								const convertedCalls: OllamaToolCall[] = textToolCalls.map(tc => ({
-									function: {
-										name: tc.name,
-										arguments: tc.arguments
-									}
-								}));
-
-								// Update message content to remove the raw tool call text
-								if (cleanContent !== node.message.content) {
-									node.message.content = cleanContent || 'Using tool...';
-								}
-
-								await executeToolsAndContinue(
-									model,
-									assistantMessageId,
-									convertedCalls,
-									conversationId
-								);
-								return; // Tool continuation handles persistence
-							}
-						}
-
-						// Persist assistant message to IndexedDB with the SAME ID as chatState
-						if (conversationId) {
-							const nodeForPersist = chatState.messageTree.get(assistantMessageId);
-							if (nodeForPersist) {
-								await addStoredMessage(
-									conversationId,
-									{ role: 'assistant', content: nodeForPersist.message.content },
-									parentMessageId,
-									assistantMessageId
-								);
-								await updateConversation(conversationId, {});
-								conversationsState.update(conversationId, {});
-							}
-						}
-
-						// Check for auto-compact after response completes
-						await handleAutoCompact();
-					},
-					onError: (error) => {
-						console.error('Streaming error:', error);
-						// Show error to user instead of leaving "Processing..."
-						const errorMsg = error instanceof Error ? error.message : 'Unknown error';
-						chatState.setStreamContent(`⚠️ Error: ${errorMsg}`);
-						chatState.finishStreaming();
-						streamingMetricsState.endStream();
-						abortController = null;
-					}
+					onComplete: handleStreamComplete,
+					onError: handleStreamError
 				},
 				abortController.signal
 			);
+			} else {
+				// llama.cpp / LM Studio - basic streaming via unified API
+				const unifiedMessages: UnifiedChatMessage[] = messages.map(m => ({
+					role: m.role as 'system' | 'user' | 'assistant' | 'tool',
+					content: m.content,
+					images: m.images
+				}));
+
+				await unifiedLLMClient.streamChatWithCallbacks(
+					{
+						model: chatModel,
+						messages: unifiedMessages,
+						options: settingsState.apiParameters
+					},
+					{
+						onToken: (token) => {
+							// Clear "Processing..." on first token
+							if (needsClearOnFirstToken) {
+								chatState.setStreamContent('');
+								needsClearOnFirstToken = false;
+							}
+							chatState.appendToStreaming(token);
+							// Track content tokens for metrics
+							streamingMetricsState.incrementTokens();
+						},
+						onComplete: handleStreamComplete,
+						onError: handleStreamError
+					},
+					abortController.signal
+				);
+			}
 		} catch (error) {
 			console.error('Failed to send message:', error);
 			// Show error to user
@@ -1346,6 +1410,7 @@
 								type="button"
 								role="switch"
 								aria-checked={thinkingEnabled}
+								aria-label="Toggle thinking mode"
 								onclick={() => (thinkingEnabled = !thinkingEnabled)}
 								class="relative inline-flex h-5 w-9 flex-shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors duration-200 ease-in-out focus:outline-none focus:ring-2 focus:ring-amber-500 focus:ring-offset-2 focus:ring-offset-theme-primary {thinkingEnabled ? 'bg-amber-600' : 'bg-theme-tertiary'}"
 							>
diff --git a/frontend/src/lib/components/chat/HtmlPreview.svelte b/frontend/src/lib/components/chat/HtmlPreview.svelte
index ed84dff..5b13f00 100644
--- a/frontend/src/lib/components/chat/HtmlPreview.svelte
+++ b/frontend/src/lib/components/chat/HtmlPreview.svelte
@@ -13,12 +13,25 @@
 		height?: number;
 	}
 
-	const { html, title = 'Preview', height = 300 }: Props = $props();
+	const props: Props = $props();
+
+	// Derive values from props
+	const html = $derived(props.html);
+	const title = $derived(props.title ?? 'Preview');
+	const height = $derived(props.height ?? 300);
 
 	// State
 	let iframeRef: HTMLIFrameElement | null = $state(null);
 	let isExpanded = $state(false);
-	let actualHeight = $state(height);
+	// actualHeight tracks the current display height, synced from prop when not expanded
+	let actualHeight = $state(props.height ?? 300);
+
+	// Sync actualHeight when height prop changes (only when not expanded)
+	$effect(() => {
+		if (!isExpanded) {
+			actualHeight = height;
+		}
+	});
 
 	// Generate a complete HTML document if the code is just a fragment
 	const fullHtml = $derived.by(() => {
diff --git a/frontend/src/lib/components/chat/ThinkingBlock.svelte b/frontend/src/lib/components/chat/ThinkingBlock.svelte
index e841c25..06a21d2 100644
--- a/frontend/src/lib/components/chat/ThinkingBlock.svelte
+++ b/frontend/src/lib/components/chat/ThinkingBlock.svelte
@@ -14,9 +14,15 @@
 		inProgress?: boolean;
 	}
 
-	const { content, defaultExpanded = false, inProgress = false }: Props = $props();
+	const props: Props = $props();
 
-	let isExpanded = $state(defaultExpanded);
+	// Initialize isExpanded from defaultExpanded prop
+	// This intentionally captures the initial value only - user controls expansion independently
+	let isExpanded = $state(props.defaultExpanded ?? false);
+
+	// Derived values from props for reactivity
+	const content = $derived(props.content);
+	const inProgress = $derived(props.inProgress ?? false);
 
 	// Keep collapsed during and after streaming - user can expand manually if desired
 
diff --git a/frontend/src/lib/components/models/ModelEditorDialog.svelte b/frontend/src/lib/components/models/ModelEditorDialog.svelte
index b2e37a9..11da41b 100644
--- a/frontend/src/lib/components/models/ModelEditorDialog.svelte
+++ b/frontend/src/lib/components/models/ModelEditorDialog.svelte
@@ -109,9 +109,11 @@
 	<div
 		class="fixed inset-0 z-50 flex items-center justify-center bg-black/60 backdrop-blur-sm p-4"
 		onclick={handleBackdropClick}
+		onkeydown={handleKeydown}
 		role="dialog"
 		aria-modal="true"
 		aria-labelledby="model-editor-title"
+		tabindex="-1"
 	>
 		<!-- Dialog -->
 		<div class="w-full max-w-lg rounded-xl bg-theme-secondary shadow-xl">
diff --git a/frontend/src/lib/components/models/PullModelDialog.svelte b/frontend/src/lib/components/models/PullModelDialog.svelte
index 82d9c15..34db6b4 100644
--- a/frontend/src/lib/components/models/PullModelDialog.svelte
+++ b/frontend/src/lib/components/models/PullModelDialog.svelte
@@ -40,9 +40,11 @@
 	<div
 		class="fixed inset-0 z-50 flex items-center justify-center bg-black/60 backdrop-blur-sm"
 		onclick={handleBackdropClick}
+		onkeydown={handleKeydown}
 		role="dialog"
 		aria-modal="true"
 		aria-labelledby="pull-dialog-title"
+		tabindex="-1"
 	>
 		<!-- Dialog -->
 		<div class="w-full max-w-md rounded-xl bg-theme-secondary p-6 shadow-xl">
diff --git a/frontend/src/lib/components/projects/MoveToProjectModal.svelte b/frontend/src/lib/components/projects/MoveToProjectModal.svelte
index 815e643..a98e33e 100644
--- a/frontend/src/lib/components/projects/MoveToProjectModal.svelte
+++ b/frontend/src/lib/components/projects/MoveToProjectModal.svelte
@@ -71,9 +71,11 @@
 	<div
 		class="fixed inset-0 z-50 flex items-center justify-center bg-black/60 backdrop-blur-sm"
 		onclick={handleBackdropClick}
+		onkeydown={handleKeydown}
 		role="dialog"
 		aria-modal="true"
 		aria-labelledby="move-dialog-title"
+		tabindex="-1"
 	>
 		<!-- Dialog -->
 		<div class="mx-4 w-full max-w-sm rounded-xl border border-theme bg-theme-primary shadow-2xl">
diff --git a/frontend/src/lib/components/projects/ProjectModal.svelte b/frontend/src/lib/components/projects/ProjectModal.svelte
index ed25e6b..0387862 100644
--- a/frontend/src/lib/components/projects/ProjectModal.svelte
+++ b/frontend/src/lib/components/projects/ProjectModal.svelte
@@ -210,9 +210,11 @@
 	<div
 		class="fixed inset-0 z-50 flex items-center justify-center bg-black/60 backdrop-blur-sm"
 		onclick={handleBackdropClick}
+		onkeydown={handleKeydown}
 		role="dialog"
 		aria-modal="true"
 		aria-labelledby="project-dialog-title"
+		tabindex="-1"
 	>
 		<!-- Dialog -->
 		<div class="mx-4 w-full max-w-lg rounded-xl border border-theme bg-theme-primary shadow-2xl">
@@ -313,9 +315,9 @@
 
 						<!-- Color -->
 						<div>
-							<label class="mb-1.5 block text-sm font-medium text-theme-secondary">
+							<span class="mb-1.5 block text-sm font-medium text-theme-secondary">
 								Color
-							</label>
+							</span>
 							<div class="flex items-center gap-2">
 								{#each presetColors as presetColor}
 									<button
diff --git a/frontend/src/lib/components/settings/AIProvidersTab.svelte b/frontend/src/lib/components/settings/AIProvidersTab.svelte
new file mode 100644
index 0000000..0dba94c
--- /dev/null
+++ b/frontend/src/lib/components/settings/AIProvidersTab.svelte
@@ -0,0 +1,74 @@
+<script lang="ts">
+	/**
+	 * AIProvidersTab - Combined Backends and Models management
+	 * Sub-tabs for backend configuration and model management
+	 * Models sub-tab only available when Ollama is active
+	 */
+	import { backendsState } from '$lib/stores/backends.svelte';
+	import BackendsPanel from './BackendsPanel.svelte';
+	import ModelsTab from './ModelsTab.svelte';
+
+	type SubTab = 'backends' | 'models';
+
+	let activeSubTab = $state<SubTab>('backends');
+
+	// Models tab only available for Ollama
+	const isOllamaActive = $derived(backendsState.activeType === 'ollama');
+
+	// If Models tab is active but Ollama is no longer active, switch to Backends
+	$effect(() => {
+		if (activeSubTab === 'models' && !isOllamaActive) {
+			activeSubTab = 'backends';
+		}
+	});
+</script>
+
+<div class="space-y-6">
+	<!-- Sub-tab Navigation -->
+	<div class="flex gap-1 border-b border-theme">
+		<button
+			type="button"
+			onclick={() => (activeSubTab = 'backends')}
+			class="flex items-center gap-2 border-b-2 px-4 py-2 text-sm font-medium transition-colors {activeSubTab === 'backends'
+				? 'border-violet-500 text-violet-400'
+				: 'border-transparent text-theme-muted hover:border-theme hover:text-theme-primary'}"
+		>
+			<svg class="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
+				<path stroke-linecap="round" stroke-linejoin="round" d="M5 12h14M5 12a2 2 0 0 1-2-2V6a2 2 0 0 1 2-2h14a2 2 0 0 1 2 2v4a2 2 0 0 1-2 2M5 12a2 2 0 0 0-2 2v4a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2v-4a2 2 0 0 0-2-2m-2-4h.01M17 16h.01" />
+			</svg>
+			Backends
+		</button>
+		{#if isOllamaActive}
+			<button
+				type="button"
+				onclick={() => (activeSubTab = 'models')}
+				class="flex items-center gap-2 border-b-2 px-4 py-2 text-sm font-medium transition-colors {activeSubTab === 'models'
+					? 'border-violet-500 text-violet-400'
+					: 'border-transparent text-theme-muted hover:border-theme hover:text-theme-primary'}"
+			>
+				<svg class="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
+					<path stroke-linecap="round" stroke-linejoin="round" d="M8.25 3v1.5M4.5 8.25H3m18 0h-1.5M4.5 12H3m18 0h-1.5m-15 3.75H3m18 0h-1.5M8.25 19.5V21M12 3v1.5m0 15V21m3.75-18v1.5m0 15V21m-9-1.5h10.5a2.25 2.25 0 0 0 2.25-2.25V6.75a2.25 2.25 0 0 0-2.25-2.25H6.75A2.25 2.25 0 0 0 4.5 6.75v10.5a2.25 2.25 0 0 0 2.25 2.25Zm.75-12h9v9h-9v-9Z" />
+				</svg>
+				Models
+			</button>
+		{:else}
+			<span
+				class="flex cursor-not-allowed items-center gap-2 border-b-2 border-transparent px-4 py-2 text-sm font-medium text-theme-muted/50"
+				title="Models tab only available when Ollama is the active backend"
+			>
+				<svg class="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
+					<path stroke-linecap="round" stroke-linejoin="round" d="M8.25 3v1.5M4.5 8.25H3m18 0h-1.5M4.5 12H3m18 0h-1.5m-15 3.75H3m18 0h-1.5M8.25 19.5V21M12 3v1.5m0 15V21m3.75-18v1.5m0 15V21m-9-1.5h10.5a2.25 2.25 0 0 0 2.25-2.25V6.75a2.25 2.25 0 0 0-2.25-2.25H6.75A2.25 2.25 0 0 0 4.5 6.75v10.5a2.25 2.25 0 0 0 2.25 2.25Zm.75-12h9v9h-9v-9Z" />
+				</svg>
+				Models
+				<span class="text-xs">(Ollama only)</span>
+			</span>
+		{/if}
+	</div>
+
+	<!-- Sub-tab Content -->
+	{#if activeSubTab === 'backends'}
+		<BackendsPanel />
+	{:else if activeSubTab === 'models'}
+		<ModelsTab />
+	{/if}
+</div>
diff --git a/frontend/src/lib/components/settings/AgentsTab.svelte b/frontend/src/lib/components/settings/AgentsTab.svelte
index 940a15d..a0e83c5 100644
--- a/frontend/src/lib/components/settings/AgentsTab.svelte
+++ b/frontend/src/lib/components/settings/AgentsTab.svelte
@@ -435,7 +435,7 @@
 
 				<!-- Tools Selection -->
 				<div class="mb-4">
-					<label class="mb-2 block text-sm font-medium text-theme-primary"> Allowed Tools </label>
+					<span class="mb-2 block text-sm font-medium text-theme-primary"> Allowed Tools </span>
 					<div class="max-h-48 overflow-y-auto rounded-lg border border-theme bg-theme-secondary p-2">
 						{#if availableTools.length === 0}
 							<p class="p-2 text-sm text-theme-muted">No tools available</p>
diff --git a/frontend/src/lib/components/settings/BackendsPanel.svelte b/frontend/src/lib/components/settings/BackendsPanel.svelte
new file mode 100644
index 0000000..e2cea2b
--- /dev/null
+++ b/frontend/src/lib/components/settings/BackendsPanel.svelte
@@ -0,0 +1,305 @@
+<script lang="ts">
+	/**
+	 * BackendsPanel - Multi-backend LLM management
+	 * Configure and switch between Ollama, llama.cpp, and LM Studio
+	 */
+	import { onMount } from 'svelte';
+	import { backendsState, type BackendType, type BackendInfo, type DiscoveryResult } from '$lib/stores/backends.svelte';
+
+	let discovering = $state(false);
+	let discoveryResults = $state<DiscoveryResult[]>([]);
+	let showDiscoveryResults = $state(false);
+
+	async function handleDiscover(): Promise<void> {
+		discovering = true;
+		showDiscoveryResults = false;
+		try {
+			discoveryResults = await backendsState.discover();
+			showDiscoveryResults = true;
+			// Reload backends after discovery
+			await backendsState.load();
+		} finally {
+			discovering = false;
+		}
+	}
+
+	async function handleSetActive(type: BackendType): Promise<void> {
+		await backendsState.setActive(type);
+	}
+
+	function getBackendDisplayName(type: BackendType): string {
+		switch (type) {
+			case 'ollama':
+				return 'Ollama';
+			case 'llamacpp':
+				return 'llama.cpp';
+			case 'lmstudio':
+				return 'LM Studio';
+			default:
+				return type;
+		}
+	}
+
+	function getBackendDescription(type: BackendType): string {
+		switch (type) {
+			case 'ollama':
+				return 'Full model management - pull, delete, create custom models';
+			case 'llamacpp':
+				return 'OpenAI-compatible API - models loaded at server startup';
+			case 'lmstudio':
+				return 'OpenAI-compatible API - manage models via LM Studio app';
+			default:
+				return '';
+		}
+	}
+
+	function getDefaultPort(type: BackendType): string {
+		switch (type) {
+			case 'ollama':
+				return '11434';
+			case 'llamacpp':
+				return '8081';
+			case 'lmstudio':
+				return '1234';
+			default:
+				return '';
+		}
+	}
+
+	function getStatusColor(status: string): string {
+		switch (status) {
+			case 'connected':
+				return 'bg-green-500';
+			case 'disconnected':
+				return 'bg-red-500';
+			default:
+				return 'bg-yellow-500';
+		}
+	}
+
+	onMount(() => {
+		backendsState.load();
+	});
+</script>
+
+<div class="space-y-6">
+	<!-- Header -->
+	<div class="flex items-start justify-between gap-4">
+		<div>
+			<h2 class="text-xl font-bold text-theme-primary">AI Backends</h2>
+			<p class="mt-1 text-sm text-theme-muted">
+				Configure LLM backends: Ollama, llama.cpp server, or LM Studio
+			</p>
+		</div>
+		<button
+			type="button"
+			onclick={handleDiscover}
+			disabled={discovering}
+			class="flex items-center gap-2 rounded-lg bg-blue-600 px-4 py-2 text-sm font-medium text-white transition-colors hover:bg-blue-700 disabled:cursor-not-allowed disabled:opacity-50"
+		>
+			{#if discovering}
+				<svg class="h-4 w-4 animate-spin" viewBox="0 0 24 24">
+					<circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4" fill="none"></circle>
+					<path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path>
+				</svg>
+				<span>Discovering...</span>
+			{:else}
+				<svg xmlns="http://www.w3.org/2000/svg" class="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
+					<path stroke-linecap="round" stroke-linejoin="round" d="M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0z" />
+				</svg>
+				<span>Auto-Detect</span>
+			{/if}
+		</button>
+	</div>
+
+	<!-- Error Message -->
+	{#if backendsState.error}
+		<div class="rounded-lg border border-red-900/50 bg-red-900/20 p-4">
+			<div class="flex items-center gap-2 text-red-400">
+				<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
+					<path stroke-linecap="round" stroke-linejoin="round" d="M12 8v4m0 4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
+				</svg>
+				<span>{backendsState.error}</span>
+				<button type="button" onclick={() => backendsState.clearError()} class="ml-auto text-red-400 hover:text-red-300" aria-label="Dismiss error">
+					<svg xmlns="http://www.w3.org/2000/svg" class="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
+						<path stroke-linecap="round" stroke-linejoin="round" d="M6 18L18 6M6 6l12 12" />
+					</svg>
+				</button>
+			</div>
+		</div>
+	{/if}
+
+	<!-- Discovery Results -->
+	{#if showDiscoveryResults && discoveryResults.length > 0}
+		<div class="rounded-lg border border-theme bg-theme-secondary p-4">
+			<h3 class="mb-3 text-sm font-medium text-theme-secondary">Discovery Results</h3>
+			<div class="space-y-2">
+				{#each discoveryResults as result}
+					<div class="flex items-center justify-between rounded-lg bg-theme-tertiary/50 px-3 py-2">
+						<div class="flex items-center gap-3">
+							<span class="h-2 w-2 rounded-full {result.available ? 'bg-green-500' : 'bg-red-500'}"></span>
+							<span class="text-sm text-theme-primary">{getBackendDisplayName(result.type)}</span>
+							<span class="text-xs text-theme-muted">{result.baseUrl}</span>
+						</div>
+						<span class="text-xs {result.available ? 'text-green-400' : 'text-red-400'}">
+							{result.available ? 'Available' : result.error || 'Not found'}
+						</span>
+					</div>
+				{/each}
+			</div>
+			<button
+				type="button"
+				onclick={() => showDiscoveryResults = false}
+				class="mt-3 text-xs text-theme-muted hover:text-theme-primary"
+			>
+				Dismiss
+			</button>
+		</div>
+	{/if}
+
+	<!-- Active Backend Info -->
+	{#if backendsState.activeBackend}
+		<div class="rounded-lg border border-blue-900/50 bg-blue-900/20 p-4">
+			<div class="flex items-center gap-2 text-blue-400">
+				<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
+					<path stroke-linecap="round" stroke-linejoin="round" d="M9 12l2 2 4-4m6 2a9 9 0 11-18 0 9 9 0 0118 0z" />
+				</svg>
+				<span class="font-medium">Active: {getBackendDisplayName(backendsState.activeBackend.type)}</span>
+				{#if backendsState.activeBackend.version}
+					<span class="text-xs text-blue-300/70">v{backendsState.activeBackend.version}</span>
+				{/if}
+			</div>
+			<p class="mt-1 text-sm text-blue-300/70">{backendsState.activeBackend.baseUrl}</p>
+
+			<!-- Capabilities -->
+			<div class="mt-3 flex flex-wrap gap-2">
+				{#if backendsState.canPullModels}
+					<span class="rounded bg-green-900/30 px-2 py-1 text-xs text-green-400">Pull Models</span>
+				{/if}
+				{#if backendsState.canDeleteModels}
+					<span class="rounded bg-green-900/30 px-2 py-1 text-xs text-green-400">Delete Models</span>
+				{/if}
+				{#if backendsState.canCreateModels}
+					<span class="rounded bg-green-900/30 px-2 py-1 text-xs text-green-400">Create Custom</span>
+				{/if}
+				{#if backendsState.activeBackend.capabilities.canStreamChat}
+					<span class="rounded bg-blue-900/30 px-2 py-1 text-xs text-blue-400">Streaming</span>
+				{/if}
+				{#if backendsState.activeBackend.capabilities.canEmbed}
+					<span class="rounded bg-purple-900/30 px-2 py-1 text-xs text-purple-400">Embeddings</span>
+				{/if}
+			</div>
+		</div>
+	{:else if !backendsState.isLoading}
+		<div class="rounded-lg border border-amber-900/50 bg-amber-900/20 p-4">
+			<div class="flex items-center gap-2 text-amber-400">
+				<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
+					<path stroke-linecap="round" stroke-linejoin="round" d="M12 9v2m0 4h.01m-6.938 4h13.856c1.54 0 2.502-1.667 1.732-3L13.732 4c-.77-1.333-2.694-1.333-3.464 0L3.34 16c-.77 1.333.192 3 1.732 3z" />
+				</svg>
+				<span>No active backend configured. Click "Auto-Detect" to find available backends.</span>
+			</div>
+		</div>
+	{/if}
+
+	<!-- Backend Cards -->
+	<div class="space-y-4">
+		<h3 class="text-sm font-medium text-theme-secondary">Available Backends</h3>
+
+		{#if backendsState.isLoading}
+			<div class="space-y-3">
+				{#each Array(3) as _}
+					<div class="animate-pulse rounded-lg border border-theme bg-theme-secondary p-4">
+						<div class="flex items-center gap-4">
+							<div class="h-10 w-10 rounded-lg bg-theme-tertiary"></div>
+							<div class="flex-1">
+								<div class="h-5 w-32 rounded bg-theme-tertiary"></div>
+								<div class="mt-2 h-4 w-48 rounded bg-theme-tertiary"></div>
+							</div>
+						</div>
+					</div>
+				{/each}
+			</div>
+		{:else if backendsState.backends.length === 0}
+			<div class="rounded-lg border border-dashed border-theme bg-theme-secondary/50 p-8 text-center">
+				<svg xmlns="http://www.w3.org/2000/svg" class="mx-auto h-12 w-12 text-theme-muted" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="1.5">
+					<path stroke-linecap="round" stroke-linejoin="round" d="M5 12h14M5 12a2 2 0 01-2-2V6a2 2 0 012-2h14a2 2 0 012 2v4a2 2 0 01-2 2M5 12a2 2 0 00-2 2v4a2 2 0 002 2h14a2 2 0 002-2v-4a2 2 0 00-2-2m-2-4h.01M17 16h.01" />
+				</svg>
+				<h3 class="mt-4 text-sm font-medium text-theme-muted">No backends configured</h3>
+				<p class="mt-1 text-sm text-theme-muted">
+					Click "Auto-Detect" to scan for available LLM backends
+				</p>
+			</div>
+		{:else}
+			{#each backendsState.backends as backend}
+				{@const isActive = backendsState.activeType === backend.type}
+				<div class="rounded-lg border transition-colors {isActive ? 'border-blue-500 bg-blue-900/10' : 'border-theme bg-theme-secondary hover:border-theme-subtle'}">
+					<div class="p-4">
+						<div class="flex items-start justify-between">
+							<div class="flex items-center gap-4">
+								<!-- Backend Icon -->
+								<div class="flex h-12 w-12 items-center justify-center rounded-lg bg-theme-tertiary">
+									{#if backend.type === 'ollama'}
+										<svg xmlns="http://www.w3.org/2000/svg" class="h-6 w-6 text-theme-primary" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
+											<path stroke-linecap="round" stroke-linejoin="round" d="M5 12h14M5 12a2 2 0 01-2-2V6a2 2 0 012-2h14a2 2 0 012 2v4a2 2 0 01-2 2M5 12a2 2 0 00-2 2v4a2 2 0 002 2h14a2 2 0 002-2v-4a2 2 0 00-2-2" />
+										</svg>
+									{:else if backend.type === 'llamacpp'}
+										<svg xmlns="http://www.w3.org/2000/svg" class="h-6 w-6 text-theme-primary" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
+											<path stroke-linecap="round" stroke-linejoin="round" d="M10 20l4-16m4 4l4 4-4 4M6 16l-4-4 4-4" />
+										</svg>
+									{:else}
+										<svg xmlns="http://www.w3.org/2000/svg" class="h-6 w-6 text-theme-primary" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
+											<path stroke-linecap="round" stroke-linejoin="round" d="M9.75 17L9 20l-1 1h8l-1-1-.75-3M3 13h18M5 17h14a2 2 0 002-2V5a2 2 0 00-2-2H5a2 2 0 00-2 2v10a2 2 0 002 2z" />
+										</svg>
+									{/if}
+								</div>
+
+								<div>
+									<div class="flex items-center gap-2">
+										<h4 class="font-medium text-theme-primary">{getBackendDisplayName(backend.type)}</h4>
+										<span class="flex items-center gap-1.5 rounded-full px-2 py-0.5 text-xs {backend.status === 'connected' ? 'bg-green-900/30 text-green-400' : 'bg-red-900/30 text-red-400'}">
+											<span class="h-1.5 w-1.5 rounded-full {getStatusColor(backend.status)}"></span>
+											{backend.status}
+										</span>
+										{#if isActive}
+											<span class="rounded bg-blue-600 px-2 py-0.5 text-xs font-medium text-white">Active</span>
+										{/if}
+									</div>
+									<p class="mt-1 text-sm text-theme-muted">{getBackendDescription(backend.type)}</p>
+									<p class="mt-1 text-xs text-theme-muted/70">{backend.baseUrl}</p>
+								</div>
+							</div>
+
+							<div class="flex items-center gap-2">
+								{#if !isActive && backend.status === 'connected'}
+									<button
+										type="button"
+										onclick={() => handleSetActive(backend.type)}
+										class="rounded-lg bg-blue-600 px-3 py-1.5 text-sm font-medium text-white transition-colors hover:bg-blue-700"
+									>
+										Set Active
+									</button>
+								{/if}
+							</div>
+						</div>
+
+						{#if backend.error}
+							<div class="mt-3 rounded bg-red-900/20 px-3 py-2 text-xs text-red-400">
+								{backend.error}
+							</div>
+						{/if}
+					</div>
+				</div>
+			{/each}
+		{/if}
+	</div>
+
+	<!-- Help Section -->
+	<div class="rounded-lg border border-theme bg-theme-secondary/50 p-4">
+		<h3 class="text-sm font-medium text-theme-secondary">Quick Start</h3>
+		<div class="mt-2 space-y-2 text-sm text-theme-muted">
+			<p><strong>Ollama:</strong> Run <code class="rounded bg-theme-tertiary px-1.5 py-0.5 text-xs">ollama serve</code> (default port 11434)</p>
+			<p><strong>llama.cpp:</strong> Run <code class="rounded bg-theme-tertiary px-1.5 py-0.5 text-xs">llama-server -m model.gguf</code> (default port 8081)</p>
+			<p><strong>LM Studio:</strong> Start local server from the app (default port 1234)</p>
+		</div>
+	</div>
+</div>
diff --git a/frontend/src/lib/components/settings/GeneralTab.svelte b/frontend/src/lib/components/settings/GeneralTab.svelte
index 49167c7..5e32f4c 100644
--- a/frontend/src/lib/components/settings/GeneralTab.svelte
+++ b/frontend/src/lib/components/settings/GeneralTab.svelte
@@ -41,6 +41,7 @@
 					class="relative inline-flex h-6 w-11 flex-shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors duration-200 ease-in-out focus:outline-none focus:ring-2 focus:ring-purple-500 focus:ring-offset-2 focus:ring-offset-theme {uiState.darkMode ? 'bg-purple-600' : 'bg-theme-tertiary'}"
 					role="switch"
 					aria-checked={uiState.darkMode}
+					aria-label="Toggle dark mode"
 				>
 					<span
 						class="pointer-events-none inline-block h-5 w-5 transform rounded-full bg-white shadow ring-0 transition duration-200 ease-in-out {uiState.darkMode ? 'translate-x-5' : 'translate-x-0'}"
diff --git a/frontend/src/lib/components/settings/KnowledgeTab.svelte b/frontend/src/lib/components/settings/KnowledgeTab.svelte
index f11beb1..e95566b 100644
--- a/frontend/src/lib/components/settings/KnowledgeTab.svelte
+++ b/frontend/src/lib/components/settings/KnowledgeTab.svelte
@@ -25,7 +25,7 @@
 	let dragOver = $state(false);
 	let deleteConfirm = $state<{ show: boolean; doc: StoredDocument | null }>({ show: false, doc: null });
 
-	let fileInput: HTMLInputElement;
+	let fileInput = $state<HTMLInputElement | null>(null);
 
 	onMount(async () => {
 		await refreshData();
diff --git a/frontend/src/lib/components/settings/MemoryTab.svelte b/frontend/src/lib/components/settings/MemoryTab.svelte
index 607273f..c9a285f 100644
--- a/frontend/src/lib/components/settings/MemoryTab.svelte
+++ b/frontend/src/lib/components/settings/MemoryTab.svelte
@@ -108,6 +108,7 @@
 					class="relative inline-flex h-6 w-11 flex-shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors duration-200 ease-in-out focus:outline-none focus:ring-2 focus:ring-emerald-500 focus:ring-offset-2 focus:ring-offset-theme {settingsState.autoCompactEnabled ? 'bg-emerald-600' : 'bg-theme-tertiary'}"
 					role="switch"
 					aria-checked={settingsState.autoCompactEnabled}
+					aria-label="Toggle auto-compact"
 				>
 					<span
 						class="pointer-events-none inline-block h-5 w-5 transform rounded-full bg-white shadow ring-0 transition duration-200 ease-in-out {settingsState.autoCompactEnabled ? 'translate-x-5' : 'translate-x-0'}"
@@ -192,6 +193,7 @@
 					class="relative inline-flex h-6 w-11 flex-shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors duration-200 ease-in-out focus:outline-none focus:ring-2 focus:ring-orange-500 focus:ring-offset-2 focus:ring-offset-theme {settingsState.useCustomParameters ? 'bg-orange-600' : 'bg-theme-tertiary'}"
 					role="switch"
 					aria-checked={settingsState.useCustomParameters}
+					aria-label="Toggle custom model parameters"
 				>
 					<span
 						class="pointer-events-none inline-block h-5 w-5 transform rounded-full bg-white shadow ring-0 transition duration-200 ease-in-out {settingsState.useCustomParameters ? 'translate-x-5' : 'translate-x-0'}"
diff --git a/frontend/src/lib/components/settings/ModelParametersPanel.svelte b/frontend/src/lib/components/settings/ModelParametersPanel.svelte
index 7463f53..bd611eb 100644
--- a/frontend/src/lib/components/settings/ModelParametersPanel.svelte
+++ b/frontend/src/lib/components/settings/ModelParametersPanel.svelte
@@ -93,13 +93,12 @@
 
 		<!-- Enable custom parameters toggle -->
 		<div class="mb-4 flex items-center justify-between">
-			<label class="flex items-center gap-2 text-sm text-theme-secondary">
-				<span>Use custom parameters</span>
-			</label>
+			<span class="text-sm text-theme-secondary">Use custom parameters</span>
 			<button
 				type="button"
 				role="switch"
 				aria-checked={settingsState.useCustomParameters}
+				aria-label="Toggle custom model parameters"
 				onclick={() => settingsState.toggleCustomParameters(modelDefaults)}
 				class="relative inline-flex h-5 w-9 flex-shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors duration-200 ease-in-out focus:outline-none focus:ring-2 focus:ring-sky-500 focus:ring-offset-2 focus:ring-offset-theme-secondary {settingsState.useCustomParameters ? 'bg-sky-600' : 'bg-theme-tertiary'}"
 			>
diff --git a/frontend/src/lib/components/settings/ModelsTab.svelte b/frontend/src/lib/components/settings/ModelsTab.svelte
index 2ecd378..9818a81 100644
--- a/frontend/src/lib/components/settings/ModelsTab.svelte
+++ b/frontend/src/lib/components/settings/ModelsTab.svelte
@@ -427,7 +427,7 @@
 							<path stroke-linecap="round" stroke-linejoin="round" d="M12 8v4m0 4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
 						</svg>
 						<span>{deleteError}</span>
-						<button type="button" onclick={() => deleteError = null} class="ml-auto text-red-400 hover:text-red-300">
+						<button type="button" onclick={() => deleteError = null} class="ml-auto text-red-400 hover:text-red-300" aria-label="Dismiss error">
 							<svg xmlns="http://www.w3.org/2000/svg" class="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
 								<path stroke-linecap="round" stroke-linejoin="round" d="M6 18L18 6M6 6l12 12" />
 							</svg>
@@ -833,13 +833,13 @@
 
 				{#if modelRegistry.totalPages > 1}
 					<div class="mt-6 flex items-center justify-center gap-2">
-						<button type="button" onclick={() => modelRegistry.prevPage()} disabled={!modelRegistry.hasPrevPage} class="rounded-lg border border-theme bg-theme-secondary px-3 py-2 text-sm text-theme-muted transition-colors hover:bg-theme-tertiary hover:text-theme-primary disabled:cursor-not-allowed disabled:opacity-50">
+						<button type="button" onclick={() => modelRegistry.prevPage()} disabled={!modelRegistry.hasPrevPage} class="rounded-lg border border-theme bg-theme-secondary px-3 py-2 text-sm text-theme-muted transition-colors hover:bg-theme-tertiary hover:text-theme-primary disabled:cursor-not-allowed disabled:opacity-50" aria-label="Previous page">
 							<svg xmlns="http://www.w3.org/2000/svg" class="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
 								<path stroke-linecap="round" stroke-linejoin="round" d="M15 19l-7-7 7-7" />
 							</svg>
 						</button>
 						<span class="text-sm text-theme-muted">Page {modelRegistry.currentPage + 1} of {modelRegistry.totalPages}</span>
-						<button type="button" onclick={() => modelRegistry.nextPage()} disabled={!modelRegistry.hasNextPage} class="rounded-lg border border-theme bg-theme-secondary px-3 py-2 text-sm text-theme-muted transition-colors hover:bg-theme-tertiary hover:text-theme-primary disabled:cursor-not-allowed disabled:opacity-50">
+						<button type="button" onclick={() => modelRegistry.nextPage()} disabled={!modelRegistry.hasNextPage} class="rounded-lg border border-theme bg-theme-secondary px-3 py-2 text-sm text-theme-muted transition-colors hover:bg-theme-tertiary hover:text-theme-primary disabled:cursor-not-allowed disabled:opacity-50" aria-label="Next page">
 							<svg xmlns="http://www.w3.org/2000/svg" class="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
 								<path stroke-linecap="round" stroke-linejoin="round" d="M9 5l7 7-7 7" />
 							</svg>
@@ -855,7 +855,7 @@
 		<div class="w-80 flex-shrink-0 overflow-y-auto border-l border-theme bg-theme-secondary p-4">
 			<div class="mb-4 flex items-start justify-between">
 				<h3 class="text-lg font-semibold text-theme-primary">{selectedModel.name}</h3>
-				<button type="button" onclick={closeDetails} class="rounded p-1 text-theme-muted hover:bg-theme-tertiary hover:text-theme-primary">
+				<button type="button" onclick={closeDetails} class="rounded p-1 text-theme-muted hover:bg-theme-tertiary hover:text-theme-primary" aria-label="Close details">
 					<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
 						<path stroke-linecap="round" stroke-linejoin="round" d="M6 18L18 6M6 6l12 12" />
 					</svg>
diff --git a/frontend/src/lib/components/settings/PromptsTab.svelte b/frontend/src/lib/components/settings/PromptsTab.svelte
index 0ba5be4..72d3972 100644
--- a/frontend/src/lib/components/settings/PromptsTab.svelte
+++ b/frontend/src/lib/components/settings/PromptsTab.svelte
@@ -358,11 +358,11 @@
 
 <!-- Editor Modal -->
 {#if showEditor}
-	<div class="fixed inset-0 z-50 flex items-center justify-center bg-black/50 p-4" onclick={(e) => { if (e.target === e.currentTarget) closeEditor(); }} role="dialog" aria-modal="true">
+	<div class="fixed inset-0 z-50 flex items-center justify-center bg-black/50 p-4" onclick={(e) => { if (e.target === e.currentTarget) closeEditor(); }} onkeydown={(e) => { if (e.key === 'Escape') closeEditor(); }} role="dialog" aria-modal="true" tabindex="-1">
 		<div class="w-full max-w-2xl rounded-xl bg-theme-secondary shadow-xl">
 			<div class="flex items-center justify-between border-b border-theme px-6 py-4">
 				<h3 class="text-lg font-semibold text-theme-primary">{editingPrompt ? 'Edit Prompt' : 'Create Prompt'}</h3>
-				<button type="button" onclick={closeEditor} class="rounded p-1 text-theme-muted hover:bg-theme-tertiary hover:text-theme-primary">
+				<button type="button" onclick={closeEditor} aria-label="Close dialog" class="rounded p-1 text-theme-muted hover:bg-theme-tertiary hover:text-theme-primary">
 					<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
 						<path stroke-linecap="round" stroke-linejoin="round" d="M6 18L18 6M6 6l12 12" />
 					</svg>
@@ -392,8 +392,8 @@
 						<label for="prompt-default" class="text-sm text-theme-secondary">Set as default for new chats</label>
 					</div>
 
-					<div>
-						<label class="mb-2 block text-sm font-medium text-theme-secondary">Auto-use for model types</label>
+					<fieldset>
+						<legend class="mb-2 block text-sm font-medium text-theme-secondary">Auto-use for model types</legend>
 						<div class="flex flex-wrap gap-2">
 							{#each CAPABILITIES as cap (cap.id)}
 								<button type="button" onclick={() => toggleCapability(cap.id)} class="rounded-lg border px-3 py-1.5 text-sm transition-colors {formTargetCapabilities.includes(cap.id) ? 'border-blue-500 bg-blue-500/20 text-blue-300' : 'border-theme-subtle bg-theme-tertiary text-theme-muted hover:border-theme hover:text-theme-secondary'}" title={cap.description}>
@@ -401,7 +401,7 @@
 								</button>
 							{/each}
 						</div>
-					</div>
+					</fieldset>
 				</div>
 
 				<div class="mt-6 flex justify-end gap-3">
@@ -418,7 +418,7 @@
 <!-- Template Preview Modal -->
 {#if previewTemplate}
 	{@const info = categoryInfo[previewTemplate.category]}
-	<div class="fixed inset-0 z-50 flex items-center justify-center bg-black/50 p-4" onclick={(e) => { if (e.target === e.currentTarget) previewTemplate = null; }} role="dialog" aria-modal="true">
+	<div class="fixed inset-0 z-50 flex items-center justify-center bg-black/50 p-4" onclick={(e) => { if (e.target === e.currentTarget) previewTemplate = null; }} onkeydown={(e) => { if (e.key === 'Escape') previewTemplate = null; }} role="dialog" aria-modal="true" tabindex="-1">
 		<div class="w-full max-w-2xl max-h-[80vh] flex flex-col rounded-xl bg-theme-secondary shadow-xl">
 			<div class="flex items-center justify-between border-b border-theme px-6 py-4">
 				<div>
@@ -428,7 +428,7 @@
 						{info.label}
 					</span>
 				</div>
-				<button type="button" onclick={() => (previewTemplate = null)} class="rounded p-1 text-theme-muted hover:bg-theme-tertiary hover:text-theme-primary">
+				<button type="button" onclick={() => (previewTemplate = null)} aria-label="Close dialog" class="rounded p-1 text-theme-muted hover:bg-theme-tertiary hover:text-theme-primary">
 					<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
 						<path stroke-linecap="round" stroke-linejoin="round" d="M6 18L18 6M6 6l12 12" />
 					</svg>
diff --git a/frontend/src/lib/components/settings/SettingsTabs.svelte b/frontend/src/lib/components/settings/SettingsTabs.svelte
index 17f2dab..47795d1 100644
--- a/frontend/src/lib/components/settings/SettingsTabs.svelte
+++ b/frontend/src/lib/components/settings/SettingsTabs.svelte
@@ -2,7 +2,7 @@
 	/**
 	 * SettingsTabs - Horizontal tab navigation for Settings Hub
 	 */
-	export type SettingsTab = 'general' | 'models' | 'prompts' | 'tools' | 'agents' | 'knowledge' | 'memory' | 'about';
+	export type SettingsTab = 'general' | 'ai' | 'prompts' | 'tools' | 'agents' | 'knowledge' | 'memory' | 'about';
 </script>
 
 <script lang="ts">
@@ -16,7 +16,7 @@
 
 	const tabs: Tab[] = [
 		{ id: 'general', label: 'General', icon: 'settings' },
-		{ id: 'models', label: 'Models', icon: 'cpu' },
+		{ id: 'ai', label: 'AI Providers', icon: 'server' },
 		{ id: 'prompts', label: 'Prompts', icon: 'message' },
 		{ id: 'tools', label: 'Tools', icon: 'wrench' },
 		{ id: 'agents', label: 'Agents', icon: 'robot' },
@@ -45,7 +45,11 @@
 					<path stroke-linecap="round" stroke-linejoin="round" d="M10.343 3.94c.09-.542.56-.94 1.11-.94h1.093c.55 0 1.02.398 1.11.94l.149.894c.07.424.384.764.78.93.398.164.855.142 1.205-.108l.737-.527a1.125 1.125 0 0 1 1.45.12l.773.774c.39.389.44 1.002.12 1.45l-.527.737c-.25.35-.272.806-.107 1.204.165.397.505.71.93.78l.893.15c.543.09.94.559.94 1.109v1.094c0 .55-.397 1.02-.94 1.11l-.894.149c-.424.07-.764.383-.929.78-.165.398-.143.854.107 1.204l.527.738c.32.447.269 1.06-.12 1.45l-.774.773a1.125 1.125 0 0 1-1.449.12l-.738-.527c-.35-.25-.806-.272-1.203-.107-.398.165-.71.505-.781.929l-.149.894c-.09.542-.56.94-1.11.94h-1.094c-.55 0-1.019-.398-1.11-.94l-.148-.894c-.071-.424-.384-.764-.781-.93-.398-.164-.854-.142-1.204.108l-.738.527c-.447.32-1.06.269-1.45-.12l-.773-.774a1.125 1.125 0 0 1-.12-1.45l.527-.737c.25-.35.272-.806.108-1.204-.165-.397-.506-.71-.93-.78l-.894-.15c-.542-.09-.94-.56-.94-1.109v-1.094c0-.55.398-1.02.94-1.11l.894-.149c.424-.07.765-.383.93-.78.165-.398.143-.854-.108-1.204l-.526-.738a1.125 1.125 0 0 1 .12-1.45l.773-.773a1.125 1.125 0 0 1 1.45-.12l.737.527c.35.25.807.272 1.204.107.397-.165.71-.505.78-.929l.15-.894Z" />
 					<path stroke-linecap="round" stroke-linejoin="round" d="M15 12a3 3 0 1 1-6 0 3 3 0 0 1 6 0Z" />
 				</svg>
-			{:else if tab.icon === 'cpu'}
+			{:else if tab.icon === 'server'}
+			<svg class="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
+				<path stroke-linecap="round" stroke-linejoin="round" d="M5 12h14M5 12a2 2 0 0 1-2-2V6a2 2 0 0 1 2-2h14a2 2 0 0 1 2 2v4a2 2 0 0 1-2 2M5 12a2 2 0 0 0-2 2v4a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2v-4a2 2 0 0 0-2-2m-2-4h.01M17 16h.01" />
+			</svg>
+		{:else if tab.icon === 'cpu'}
 				<svg class="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
 					<path stroke-linecap="round" stroke-linejoin="round" d="M8.25 3v1.5M4.5 8.25H3m18 0h-1.5M4.5 12H3m18 0h-1.5m-15 3.75H3m18 0h-1.5M8.25 19.5V21M12 3v1.5m0 15V21m3.75-18v1.5m0 15V21m-9-1.5h10.5a2.25 2.25 0 0 0 2.25-2.25V6.75a2.25 2.25 0 0 0-2.25-2.25H6.75A2.25 2.25 0 0 0 4.5 6.75v10.5a2.25 2.25 0 0 0 2.25 2.25Zm.75-12h9v9h-9v-9Z" />
 				</svg>
diff --git a/frontend/src/lib/components/settings/ToolsTab.svelte b/frontend/src/lib/components/settings/ToolsTab.svelte
index 5a44e44..b4f0357 100644
--- a/frontend/src/lib/components/settings/ToolsTab.svelte
+++ b/frontend/src/lib/components/settings/ToolsTab.svelte
@@ -151,6 +151,7 @@
 				class="relative inline-flex h-6 w-11 flex-shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors duration-200 ease-in-out focus:outline-none focus:ring-2 focus:ring-violet-500 focus:ring-offset-2 focus:ring-offset-theme-primary {toolsState.toolsEnabled ? 'bg-violet-600' : 'bg-theme-tertiary'}"
 				role="switch"
 				aria-checked={toolsState.toolsEnabled}
+				aria-label="Toggle all tools"
 			>
 				<span class="pointer-events-none inline-block h-5 w-5 transform rounded-full bg-white shadow ring-0 transition duration-200 ease-in-out {toolsState.toolsEnabled ? 'translate-x-5' : 'translate-x-0'}"></span>
 			</button>
@@ -194,6 +195,7 @@
 					type="button"
 					onclick={() => searchQuery = ''}
 					class="absolute right-3 top-1/2 -translate-y-1/2 text-theme-muted hover:text-theme-primary"
+					aria-label="Clear search"
 				>
 					<svg class="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
 						<path stroke-linecap="round" stroke-linejoin="round" d="M6 18L18 6M6 6l12 12" />
@@ -289,6 +291,7 @@
 									class="relative inline-flex h-6 w-11 flex-shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors duration-200 ease-in-out focus:outline-none focus:ring-2 focus:ring-blue-500 focus:ring-offset-2 focus:ring-offset-theme {tool.enabled ? 'bg-blue-600' : 'bg-theme-tertiary'}"
 									role="switch"
 									aria-checked={tool.enabled}
+									aria-label="Toggle {tool.definition.function.name} tool"
 									disabled={!toolsState.toolsEnabled}
 								>
 									<span class="pointer-events-none inline-block h-5 w-5 transform rounded-full bg-white shadow ring-0 transition duration-200 ease-in-out {tool.enabled ? 'translate-x-5' : 'translate-x-0'}"></span>
@@ -438,6 +441,7 @@
 										class="relative inline-flex h-6 w-11 flex-shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors duration-200 ease-in-out focus:outline-none focus:ring-2 focus:ring-violet-500 focus:ring-offset-2 focus:ring-offset-theme {tool.enabled ? 'bg-violet-600' : 'bg-theme-tertiary'}"
 										role="switch"
 										aria-checked={tool.enabled}
+										aria-label="Toggle {tool.name} tool"
 										disabled={!toolsState.toolsEnabled}
 									>
 										<span class="pointer-events-none inline-block h-5 w-5 transform rounded-full bg-white shadow ring-0 transition duration-200 ease-in-out {tool.enabled ? 'translate-x-5' : 'translate-x-0'}"></span>
diff --git a/frontend/src/lib/components/settings/index.ts b/frontend/src/lib/components/settings/index.ts
index 2b56cc9..33e5484 100644
--- a/frontend/src/lib/components/settings/index.ts
+++ b/frontend/src/lib/components/settings/index.ts
@@ -3,7 +3,7 @@
  */
 export { default as SettingsTabs } from './SettingsTabs.svelte';
 export { default as GeneralTab } from './GeneralTab.svelte';
-export { default as ModelsTab } from './ModelsTab.svelte';
+export { default as AIProvidersTab } from './AIProvidersTab.svelte';
 export { default as PromptsTab } from './PromptsTab.svelte';
 export { default as ToolsTab } from './ToolsTab.svelte';
 export { default as AgentsTab } from './AgentsTab.svelte';
diff --git a/frontend/src/lib/components/shared/ImportDialog.svelte b/frontend/src/lib/components/shared/ImportDialog.svelte
index eb4bea4..82fbb34 100644
--- a/frontend/src/lib/components/shared/ImportDialog.svelte
+++ b/frontend/src/lib/components/shared/ImportDialog.svelte
@@ -20,7 +20,7 @@
 
 	let { isOpen, onClose }: Props = $props();
 
-	let fileInput: HTMLInputElement;
+	let fileInput = $state<HTMLInputElement | null>(null);
 	let isDragOver = $state(false);
 	let selectedFile = $state<File | null>(null);
 	let validationResult = $state<ValidationResult | null>(null);
@@ -168,9 +168,11 @@
 	<div
 		class="fixed inset-0 z-50 flex items-center justify-center bg-black/60 backdrop-blur-sm"
 		onclick={handleBackdropClick}
+		onkeydown={handleKeydown}
 		role="dialog"
 		aria-modal="true"
 		aria-labelledby="import-dialog-title"
+		tabindex="-1"
 	>
 		<!-- Dialog -->
 		<div class="mx-4 w-full max-w-lg rounded-xl border border-theme bg-theme-primary shadow-2xl">
diff --git a/frontend/src/lib/components/shared/SearchModal.svelte b/frontend/src/lib/components/shared/SearchModal.svelte
index e14f309..5fcc866 100644
--- a/frontend/src/lib/components/shared/SearchModal.svelte
+++ b/frontend/src/lib/components/shared/SearchModal.svelte
@@ -163,9 +163,11 @@
 	<div
 		class="fixed inset-0 z-50 flex items-start justify-center bg-black/60 pt-[15vh] backdrop-blur-sm"
 		onclick={handleBackdropClick}
+		onkeydown={handleKeydown}
 		role="dialog"
 		aria-modal="true"
 		aria-labelledby="search-dialog-title"
+		tabindex="-1"
 	>
 		<!-- Dialog -->
 		<div class="mx-4 w-full max-w-2xl rounded-xl border border-theme bg-theme-primary shadow-2xl">
diff --git a/frontend/src/lib/components/shared/ShortcutsModal.svelte b/frontend/src/lib/components/shared/ShortcutsModal.svelte
index fcbb7ab..c84e980 100644
--- a/frontend/src/lib/components/shared/ShortcutsModal.svelte
+++ b/frontend/src/lib/components/shared/ShortcutsModal.svelte
@@ -61,9 +61,11 @@
 	<div
 		class="fixed inset-0 z-50 flex items-center justify-center bg-black/60 backdrop-blur-sm"
 		onclick={handleBackdropClick}
+		onkeydown={handleKeydown}
 		role="dialog"
 		aria-modal="true"
 		aria-labelledby="shortcuts-dialog-title"
+		tabindex="-1"
 	>
 		<!-- Dialog -->
 		<div class="mx-4 w-full max-w-md rounded-xl border border-theme bg-theme-primary shadow-2xl">
diff --git a/frontend/src/lib/components/tools/ToolEditor.svelte b/frontend/src/lib/components/tools/ToolEditor.svelte
index 45b97bd..f49a5f7 100644
--- a/frontend/src/lib/components/tools/ToolEditor.svelte
+++ b/frontend/src/lib/components/tools/ToolEditor.svelte
@@ -248,6 +248,7 @@ print(json.dumps(result))`;
 					type="button"
 					onclick={onClose}
 					class="rounded-lg p-1.5 text-theme-muted hover:bg-theme-tertiary hover:text-theme-primary"
+					aria-label="Close dialog"
 				>
 					<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" viewBox="0 0 20 20" fill="currentColor">
 						<path fill-rule="evenodd" d="M4.293 4.293a1 1 0 011.414 0L10 8.586l4.293-4.293a1 1 0 111.414 1.414L11.414 10l4.293 4.293a1 1 0 01-1.414 1.414L10 11.414l-4.293 4.293a1 1 0 01-1.414-1.414L8.586 10 4.293 5.707a1 1 0 010-1.414z" clip-rule="evenodd" />
@@ -290,7 +291,7 @@ print(json.dumps(result))`;
 				<!-- Parameters -->
 				<div>
 					<div class="flex items-center justify-between">
-						<label class="block text-sm font-medium text-theme-secondary">Parameters</label>
+						<span class="block text-sm font-medium text-theme-secondary">Parameters</span>
 						<button
 							type="button"
 							onclick={addParameter}
@@ -335,6 +336,7 @@ print(json.dumps(result))`;
 										type="button"
 										onclick={() => removeParameter(index)}
 										class="text-theme-muted hover:text-red-400"
+										aria-label="Remove parameter"
 									>
 										<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" viewBox="0 0 20 20" fill="currentColor">
 											<path fill-rule="evenodd" d="M9 2a1 1 0 00-.894.553L7.382 4H4a1 1 0 000 2v10a2 2 0 002 2h8a2 2 0 002-2V6a1 1 0 100-2h-3.382l-.724-1.447A1 1 0 0011 2H9zM7 8a1 1 0 012 0v6a1 1 0 11-2 0V8zm5-1a1 1 0 00-1 1v6a1 1 0 102 0V8a1 1 0 00-1-1z" clip-rule="evenodd" />
@@ -352,8 +354,8 @@ print(json.dumps(result))`;
 				</div>
 
 				<!-- Implementation Type -->
-				<div>
-					<label class="block text-sm font-medium text-theme-secondary">Implementation</label>
+				<fieldset>
+					<legend class="block text-sm font-medium text-theme-secondary">Implementation</legend>
 					<div class="mt-2 flex flex-wrap gap-4">
 						<label class="flex items-center gap-2 text-theme-secondary">
 							<input
@@ -383,15 +385,15 @@ print(json.dumps(result))`;
 							HTTP Endpoint
 						</label>
 					</div>
-				</div>
+				</fieldset>
 
 				<!-- Code Editor (JavaScript or Python) -->
 				{#if implementation === 'javascript' || implementation === 'python'}
 					<div>
 						<div class="flex items-center justify-between mb-1">
-							<label class="block text-sm font-medium text-theme-secondary">
+							<span class="block text-sm font-medium text-theme-secondary">
 								{implementation === 'javascript' ? 'JavaScript' : 'Python'} Code
-							</label>
+							</span>
 							<div class="flex items-center gap-2">
 								<!-- Templates dropdown -->
 								<div class="relative">
@@ -500,8 +502,8 @@ print(json.dumps(result))`;
 								<p class="mt-1 text-sm text-red-400">{errors.endpoint}</p>
 							{/if}
 						</div>
-						<div>
-							<label class="block text-sm font-medium text-theme-secondary">HTTP Method</label>
+						<fieldset>
+							<legend class="block text-sm font-medium text-theme-secondary">HTTP Method</legend>
 							<div class="mt-2 flex gap-4">
 								<label class="flex items-center gap-2 text-theme-secondary">
 									<input type="radio" bind:group={httpMethod} value="GET" />
@@ -512,7 +514,7 @@ print(json.dumps(result))`;
 									POST
 								</label>
 							</div>
-						</div>
+						</fieldset>
 
 						<!-- Test button for HTTP -->
 						<button
@@ -548,6 +550,7 @@ print(json.dumps(result))`;
 						class="relative inline-flex h-6 w-11 cursor-pointer rounded-full transition-colors {enabled ? 'bg-blue-600' : 'bg-theme-tertiary'}"
 						role="switch"
 						aria-checked={enabled}
+						aria-label="Enable tool"
 					>
 						<span
 							class="pointer-events-none inline-block h-5 w-5 transform rounded-full bg-white shadow transition {enabled ? 'translate-x-5' : 'translate-x-0'}"
diff --git a/frontend/src/lib/components/tools/ToolTester.svelte b/frontend/src/lib/components/tools/ToolTester.svelte
index 497ef11..d8e83b0 100644
--- a/frontend/src/lib/components/tools/ToolTester.svelte
+++ b/frontend/src/lib/components/tools/ToolTester.svelte
@@ -209,7 +209,7 @@
 		<div class="space-y-4">
 			<!-- Input -->
 			<div>
-				<label class="block text-xs font-medium text-theme-secondary mb-1">Input Arguments (JSON)</label>
+				<span class="block text-xs font-medium text-theme-secondary mb-1">Input Arguments (JSON)</span>
 				<CodeEditor bind:value={testInput} language="json" minHeight="80px" />
 			</div>
 
@@ -237,7 +237,7 @@
 			<!-- Result -->
 			{#if testResult}
 				<div>
-					<label class="block text-xs font-medium text-theme-secondary mb-1">Result</label>
+					<span class="block text-xs font-medium text-theme-secondary mb-1">Result</span>
 					<div
 						class="rounded-lg p-3 text-sm font-mono overflow-x-auto {testResult.success
 							? 'bg-emerald-900/30 border border-emerald-500/30'
diff --git a/frontend/src/lib/llm/client.test.ts b/frontend/src/lib/llm/client.test.ts
new file mode 100644
index 0000000..04f8c80
--- /dev/null
+++ b/frontend/src/lib/llm/client.test.ts
@@ -0,0 +1,225 @@
+/**
+ * Tests for Unified LLM Client
+ */
+import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
+
+// Types matching the backend response
+interface ChatChunk {
+	model: string;
+	message?: {
+		role: string;
+		content: string;
+	};
+	done: boolean;
+	done_reason?: string;
+	total_duration?: number;
+	load_duration?: number;
+	prompt_eval_count?: number;
+	eval_count?: number;
+}
+
+interface Model {
+	name: string;
+	size: number;
+	digest: string;
+	modified_at: string;
+}
+
+describe('UnifiedLLMClient', () => {
+	let UnifiedLLMClient: typeof import('./client.js').UnifiedLLMClient;
+	let client: InstanceType<typeof UnifiedLLMClient>;
+
+	beforeEach(async () => {
+		vi.resetModules();
+
+		// Mock fetch
+		global.fetch = vi.fn();
+
+		const module = await import('./client.js');
+		UnifiedLLMClient = module.UnifiedLLMClient;
+		client = new UnifiedLLMClient();
+	});
+
+	afterEach(() => {
+		vi.restoreAllMocks();
+	});
+
+	describe('listModels', () => {
+		it('fetches models from unified API', async () => {
+			const mockModels: Model[] = [
+				{
+					name: 'llama3.2:8b',
+					size: 4500000000,
+					digest: 'abc123',
+					modified_at: '2024-01-15T10:00:00Z'
+				}
+			];
+
+			(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
+				ok: true,
+				json: async () => ({ models: mockModels, backend: 'ollama' })
+			});
+
+			const result = await client.listModels();
+
+			expect(result.models).toEqual(mockModels);
+			expect(result.backend).toBe('ollama');
+			expect(global.fetch).toHaveBeenCalledWith(
+				expect.stringContaining('/api/v1/ai/models'),
+				expect.objectContaining({ method: 'GET' })
+			);
+		});
+
+		it('throws on API error', async () => {
+			(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
+				ok: false,
+				status: 503,
+				statusText: 'Service Unavailable',
+				json: async () => ({ error: 'no active backend' })
+			});
+
+			await expect(client.listModels()).rejects.toThrow('no active backend');
+		});
+	});
+
+	describe('chat', () => {
+		it('sends chat request to unified API', async () => {
+			const mockResponse: ChatChunk = {
+				model: 'llama3.2:8b',
+				message: { role: 'assistant', content: 'Hello!' },
+				done: true,
+				total_duration: 1000000000,
+				eval_count: 10
+			};
+
+			(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
+				ok: true,
+				json: async () => mockResponse
+			});
+
+			const result = await client.chat({
+				model: 'llama3.2:8b',
+				messages: [{ role: 'user', content: 'Hi' }]
+			});
+
+			expect(result.message?.content).toBe('Hello!');
+			expect(global.fetch).toHaveBeenCalledWith(
+				expect.stringContaining('/api/v1/ai/chat'),
+				expect.objectContaining({
+					method: 'POST',
+					body: expect.stringContaining('"model":"llama3.2:8b"')
+				})
+			);
+		});
+	});
+
+	describe('streamChat', () => {
+		it('streams chat responses as NDJSON', async () => {
+			const chunks: ChatChunk[] = [
+				{ model: 'llama3.2:8b', message: { role: 'assistant', content: 'Hello' }, done: false },
+				{ model: 'llama3.2:8b', message: { role: 'assistant', content: ' there' }, done: false },
+				{ model: 'llama3.2:8b', message: { role: 'assistant', content: '!' }, done: true }
+			];
+
+			// Create a mock readable stream
+			const mockBody = new ReadableStream({
+				start(controller) {
+					for (const chunk of chunks) {
+						controller.enqueue(new TextEncoder().encode(JSON.stringify(chunk) + '\n'));
+					}
+					controller.close();
+				}
+			});
+
+			(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
+				ok: true,
+				body: mockBody
+			});
+
+			const receivedChunks: ChatChunk[] = [];
+			for await (const chunk of client.streamChat({
+				model: 'llama3.2:8b',
+				messages: [{ role: 'user', content: 'Hi' }]
+			})) {
+				receivedChunks.push(chunk);
+			}
+
+			expect(receivedChunks).toHaveLength(3);
+			expect(receivedChunks[0].message?.content).toBe('Hello');
+			expect(receivedChunks[2].done).toBe(true);
+		});
+
+		it('handles stream errors', async () => {
+			(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
+				ok: false,
+				status: 500,
+				json: async () => ({ error: 'Internal Server Error' })
+			});
+
+			const generator = client.streamChat({
+				model: 'llama3.2:8b',
+				messages: [{ role: 'user', content: 'Hi' }]
+			});
+
+			await expect(generator.next()).rejects.toThrow('Internal Server Error');
+		});
+	});
+
+	describe('healthCheck', () => {
+		it('returns true when backend is healthy', async () => {
+			(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
+				ok: true,
+				json: async () => ({ status: 'healthy' })
+			});
+
+			const result = await client.healthCheck('ollama');
+
+			expect(result).toBe(true);
+			expect(global.fetch).toHaveBeenCalledWith(
+				expect.stringContaining('/api/v1/ai/backends/ollama/health'),
+				expect.any(Object)
+			);
+		});
+
+		it('returns false when backend is unhealthy', async () => {
+			(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
+				ok: false,
+				status: 503,
+				json: async () => ({ status: 'unhealthy', error: 'Connection refused' })
+			});
+
+			const result = await client.healthCheck('ollama');
+
+			expect(result).toBe(false);
+		});
+	});
+
+	describe('configuration', () => {
+		it('uses custom base URL', async () => {
+			const customClient = new UnifiedLLMClient({ baseUrl: 'http://custom:9090' });
+
+			(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
+				ok: true,
+				json: async () => ({ models: [], backend: 'ollama' })
+			});
+
+			await customClient.listModels();
+
+			expect(global.fetch).toHaveBeenCalledWith(
+				'http://custom:9090/api/v1/ai/models',
+				expect.any(Object)
+			);
+		});
+
+		it('respects abort signal', async () => {
+			const controller = new AbortController();
+			controller.abort();
+
+			(global.fetch as ReturnType<typeof vi.fn>).mockRejectedValueOnce(
+				new DOMException('The user aborted a request.', 'AbortError')
+			);
+
+			await expect(client.listModels(controller.signal)).rejects.toThrow('aborted');
+		});
+	});
+});
diff --git a/frontend/src/lib/llm/client.ts b/frontend/src/lib/llm/client.ts
new file mode 100644
index 0000000..6eac971
--- /dev/null
+++ b/frontend/src/lib/llm/client.ts
@@ -0,0 +1,340 @@
+/**
+ * Unified LLM Client
+ * Routes chat requests through the unified /api/v1/ai/* endpoints
+ * Supports Ollama, llama.cpp, and LM Studio backends transparently
+ */
+
+import type { BackendType } from '../stores/backends.svelte.js';
+
+/** Message format (compatible with Ollama and OpenAI) */
+export interface ChatMessage {
+	role: 'system' | 'user' | 'assistant' | 'tool';
+	content: string;
+	images?: string[];
+	tool_calls?: ToolCall[];
+}
+
+/** Tool call in assistant message */
+export interface ToolCall {
+	function: {
+		name: string;
+		arguments: Record<string, unknown>;
+	};
+}
+
+/** Tool definition */
+export interface ToolDefinition {
+	type: 'function';
+	function: {
+		name: string;
+		description: string;
+		parameters: {
+			type: 'object';
+			properties: Record<string, unknown>;
+			required?: string[];
+		};
+	};
+}
+
+/** Chat request options */
+export interface ChatRequest {
+	model: string;
+	messages: ChatMessage[];
+	stream?: boolean;
+	format?: 'json' | object;
+	tools?: ToolDefinition[];
+	options?: ModelOptions;
+	keep_alive?: string;
+}
+
+/** Model-specific options */
+export interface ModelOptions {
+	temperature?: number;
+	top_p?: number;
+	top_k?: number;
+	num_ctx?: number;
+	num_predict?: number;
+	stop?: string[];
+	seed?: number;
+}
+
+/** Chat response chunk (NDJSON streaming format) */
+export interface ChatChunk {
+	model: string;
+	message?: ChatMessage;
+	done: boolean;
+	done_reason?: string;
+	total_duration?: number;
+	load_duration?: number;
+	prompt_eval_count?: number;
+	prompt_eval_duration?: number;
+	eval_count?: number;
+	eval_duration?: number;
+	error?: string;
+}
+
+/** Model information */
+export interface Model {
+	name: string;
+	size: number;
+	digest: string;
+	modified_at: string;
+	details?: {
+		family?: string;
+		parameter_size?: string;
+		quantization_level?: string;
+	};
+}
+
+/** Models list response */
+export interface ModelsResponse {
+	models: Model[];
+	backend: string;
+}
+
+/** Client configuration */
+export interface UnifiedLLMClientConfig {
+	baseUrl?: string;
+	defaultTimeoutMs?: number;
+	fetchFn?: typeof fetch;
+}
+
+const DEFAULT_CONFIG = {
+	baseUrl: '',
+	defaultTimeoutMs: 120000
+};
+
+/**
+ * Unified LLM client that routes requests through the multi-backend API
+ */
+export class UnifiedLLMClient {
+	private readonly config: Required<Omit<UnifiedLLMClientConfig, 'fetchFn'>>;
+	private readonly fetchFn: typeof fetch;
+
+	constructor(config: UnifiedLLMClientConfig = {}) {
+		this.config = {
+			...DEFAULT_CONFIG,
+			...config
+		};
+		this.fetchFn = config.fetchFn ?? fetch;
+	}
+
+	/**
+	 * Lists models from the active backend
+	 */
+	async listModels(signal?: AbortSignal): Promise<ModelsResponse> {
+		return this.request<ModelsResponse>('/api/v1/ai/models', {
+			method: 'GET',
+			signal
+		});
+	}
+
+	/**
+	 * Non-streaming chat completion
+	 */
+	async chat(request: ChatRequest, signal?: AbortSignal): Promise<ChatChunk> {
+		return this.request<ChatChunk>('/api/v1/ai/chat', {
+			method: 'POST',
+			body: JSON.stringify({ ...request, stream: false }),
+			signal
+		});
+	}
+
+	/**
+	 * Streaming chat completion (async generator)
+	 * Yields NDJSON chunks as they arrive
+	 */
+	async *streamChat(
+		request: ChatRequest,
+		signal?: AbortSignal
+	): AsyncGenerator<ChatChunk, void, unknown> {
+		const url = `${this.config.baseUrl}/api/v1/ai/chat`;
+
+		const response = await this.fetchFn(url, {
+			method: 'POST',
+			headers: { 'Content-Type': 'application/json' },
+			body: JSON.stringify({ ...request, stream: true }),
+			signal
+		});
+
+		if (!response.ok) {
+			const errorData = await response.json().catch(() => ({}));
+			throw new Error(errorData.error || `HTTP ${response.status}: ${response.statusText}`);
+		}
+
+		if (!response.body) {
+			throw new Error('No response body for streaming');
+		}
+
+		const reader = response.body.getReader();
+		const decoder = new TextDecoder();
+		let buffer = '';
+
+		try {
+			while (true) {
+				const { done, value } = await reader.read();
+
+				if (done) break;
+
+				buffer += decoder.decode(value, { stream: true });
+
+				// Process complete NDJSON lines
+				let newlineIndex: number;
+				while ((newlineIndex = buffer.indexOf('\n')) !== -1) {
+					const line = buffer.slice(0, newlineIndex).trim();
+					buffer = buffer.slice(newlineIndex + 1);
+
+					if (!line) continue;
+
+					try {
+						const chunk = JSON.parse(line) as ChatChunk;
+
+						// Check for error in chunk
+						if (chunk.error) {
+							throw new Error(chunk.error);
+						}
+
+						yield chunk;
+
+						// Stop if done
+						if (chunk.done) {
+							return;
+						}
+					} catch (e) {
+						if (e instanceof SyntaxError) {
+							console.warn('[UnifiedLLM] Failed to parse chunk:', line);
+						} else {
+							throw e;
+						}
+					}
+				}
+			}
+		} finally {
+			reader.releaseLock();
+		}
+	}
+
+	/**
+	 * Streaming chat with callbacks (more ergonomic for UI)
+	 */
+	async streamChatWithCallbacks(
+		request: ChatRequest,
+		callbacks: {
+			onChunk?: (chunk: ChatChunk) => void;
+			onToken?: (token: string) => void;
+			onComplete?: (fullResponse: ChatChunk) => void;
+			onError?: (error: Error) => void;
+		},
+		signal?: AbortSignal
+	): Promise<string> {
+		let accumulatedContent = '';
+		let lastChunk: ChatChunk | null = null;
+
+		try {
+			for await (const chunk of this.streamChat(request, signal)) {
+				lastChunk = chunk;
+				callbacks.onChunk?.(chunk);
+
+				if (chunk.message?.content) {
+					accumulatedContent += chunk.message.content;
+					callbacks.onToken?.(chunk.message.content);
+				}
+
+				if (chunk.done && callbacks.onComplete) {
+					callbacks.onComplete(chunk);
+				}
+			}
+		} catch (error) {
+			if (callbacks.onError && error instanceof Error) {
+				callbacks.onError(error);
+			}
+			throw error;
+		}
+
+		return accumulatedContent;
+	}
+
+	/**
+	 * Check health of a specific backend
+	 */
+	async healthCheck(type: BackendType, signal?: AbortSignal): Promise<boolean> {
+		try {
+			await this.request<{ status: string }>(`/api/v1/ai/backends/${type}/health`, {
+				method: 'GET',
+				signal,
+				timeoutMs: 5000
+			});
+			return true;
+		} catch {
+			return false;
+		}
+	}
+
+	/**
+	 * Make an HTTP request to the unified API
+	 */
+	private async request<T>(
+		endpoint: string,
+		options: {
+			method: 'GET' | 'POST';
+			body?: string;
+			signal?: AbortSignal;
+			timeoutMs?: number;
+		}
+	): Promise<T> {
+		const { method, body, signal, timeoutMs = this.config.defaultTimeoutMs } = options;
+		const url = `${this.config.baseUrl}${endpoint}`;
+
+		// Create timeout controller
+		const controller = new AbortController();
+		const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
+
+		// Combine with external signal
+		const combinedSignal = signal ? this.combineSignals(signal, controller.signal) : controller.signal;
+
+		try {
+			const response = await this.fetchFn(url, {
+				method,
+				headers: body ? { 'Content-Type': 'application/json' } : undefined,
+				body,
+				signal: combinedSignal
+			});
+
+			clearTimeout(timeoutId);
+
+			if (!response.ok) {
+				const errorData = await response.json().catch(() => ({}));
+				throw new Error(errorData.error || `HTTP ${response.status}: ${response.statusText}`);
+			}
+
+			return (await response.json()) as T;
+		} catch (error) {
+			clearTimeout(timeoutId);
+			throw error;
+		}
+	}
+
+	/**
+	 * Combines multiple AbortSignals into one
+	 */
+	private combineSignals(...signals: AbortSignal[]): AbortSignal {
+		const controller = new AbortController();
+
+		for (const signal of signals) {
+			if (signal.aborted) {
+				controller.abort(signal.reason);
+				break;
+			}
+
+			signal.addEventListener('abort', () => controller.abort(signal.reason), {
+				once: true,
+				signal: controller.signal
+			});
+		}
+
+		return controller.signal;
+	}
+}
+
+/** Default client instance */
+export const unifiedLLMClient = new UnifiedLLMClient();
diff --git a/frontend/src/lib/llm/index.ts b/frontend/src/lib/llm/index.ts
new file mode 100644
index 0000000..6f86a38
--- /dev/null
+++ b/frontend/src/lib/llm/index.ts
@@ -0,0 +1,15 @@
+/**
+ * Unified LLM Client exports
+ */
+export { UnifiedLLMClient, unifiedLLMClient } from './client.js';
+export type {
+	ChatMessage,
+	ChatRequest,
+	ChatChunk,
+	Model,
+	ModelsResponse,
+	ModelOptions,
+	ToolCall,
+	ToolDefinition,
+	UnifiedLLMClientConfig
+} from './client.js';
diff --git a/frontend/src/lib/memory/chunker.test.ts b/frontend/src/lib/memory/chunker.test.ts
index 3c714c5..e7892c4 100644
--- a/frontend/src/lib/memory/chunker.test.ts
+++ b/frontend/src/lib/memory/chunker.test.ts
@@ -18,7 +18,7 @@ import type { DocumentChunk } from './types';
 let uuidCounter = 0;
 beforeEach(() => {
 	uuidCounter = 0;
-	vi.spyOn(crypto, 'randomUUID').mockImplementation(() => `test-uuid-${++uuidCounter}`);
+	vi.spyOn(crypto, 'randomUUID').mockImplementation(() => `00000000-0000-0000-0000-00000000000${++uuidCounter}` as `${string}-${string}-${string}-${string}-${string}`);
 });
 
 afterEach(() => {
diff --git a/frontend/src/lib/memory/summarizer.test.ts b/frontend/src/lib/memory/summarizer.test.ts
index 22d56e8..17505d7 100644
--- a/frontend/src/lib/memory/summarizer.test.ts
+++ b/frontend/src/lib/memory/summarizer.test.ts
@@ -23,11 +23,11 @@ function createMessageNode(
 	return {
 		id: id || crypto.randomUUID(),
 		parentId: null,
-		siblingIds: [],
+		childIds: [],
+		createdAt: new Date(),
 		message: {
 			role,
-			content,
-			timestamp: Date.now()
+			content
 		}
 	};
 }
diff --git a/frontend/src/lib/ollama/client.test.ts b/frontend/src/lib/ollama/client.test.ts
index 1578770..9b19241 100644
--- a/frontend/src/lib/ollama/client.test.ts
+++ b/frontend/src/lib/ollama/client.test.ts
@@ -41,7 +41,8 @@ function mockStreamResponse(chunks: unknown[]): Response {
 }
 
 describe('OllamaClient', () => {
-	let mockFetch: ReturnType<typeof vi.fn>;
+	// eslint-disable-next-line @typescript-eslint/no-explicit-any
+	let mockFetch: any;
 	let client: OllamaClient;
 
 	beforeEach(() => {
@@ -228,7 +229,11 @@ describe('OllamaClient', () => {
 				tools: [
 					{
 						type: 'function',
-						function: { name: 'get_time', description: 'Get current time' }
+						function: {
+							name: 'get_time',
+							description: 'Get current time',
+							parameters: { type: 'object', properties: {} }
+						}
 					}
 				]
 			});
diff --git a/frontend/src/lib/services/conversation-summary.test.ts b/frontend/src/lib/services/conversation-summary.test.ts
index a7c91cc..67544bf 100644
--- a/frontend/src/lib/services/conversation-summary.test.ts
+++ b/frontend/src/lib/services/conversation-summary.test.ts
@@ -15,8 +15,7 @@ function createMessage(
 ): Message {
 	return {
 		role,
-		content,
-		timestamp: Date.now()
+		content
 	};
 }
 
diff --git a/frontend/src/lib/stores/backends.svelte.ts b/frontend/src/lib/stores/backends.svelte.ts
new file mode 100644
index 0000000..4efad1b
--- /dev/null
+++ b/frontend/src/lib/stores/backends.svelte.ts
@@ -0,0 +1,301 @@
+/**
+ * Backends state management using Svelte 5 runes
+ * Manages multiple LLM backend configurations (Ollama, llama.cpp, LM Studio)
+ */
+
+/** Backend type identifiers */
+export type BackendType = 'ollama' | 'llamacpp' | 'lmstudio';
+
+/** Backend connection status */
+export type BackendStatus = 'connected' | 'disconnected' | 'unknown';
+
+/** Backend capabilities */
+export interface BackendCapabilities {
+	canListModels: boolean;
+	canPullModels: boolean;
+	canDeleteModels: boolean;
+	canCreateModels: boolean;
+	canStreamChat: boolean;
+	canEmbed: boolean;
+}
+
+/** Backend information */
+export interface BackendInfo {
+	type: BackendType;
+	baseUrl: string;
+	status: BackendStatus;
+	capabilities: BackendCapabilities;
+	version?: string;
+	error?: string;
+}
+
+/** Discovery result for a backend endpoint */
+export interface DiscoveryResult {
+	type: BackendType;
+	baseUrl: string;
+	available: boolean;
+	version?: string;
+	error?: string;
+}
+
+/** Health check result */
+export interface HealthResult {
+	healthy: boolean;
+	error?: string;
+}
+
+/** API response wrapper */
+interface ApiResponse<T> {
+	data?: T;
+	error?: string;
+}
+
+/** Get base URL for API calls */
+function getApiBaseUrl(): string {
+	if (typeof window !== 'undefined') {
+		const envUrl = (import.meta.env as Record<string, string>)?.PUBLIC_BACKEND_URL;
+		if (envUrl) return envUrl;
+	}
+	return '';
+}
+
+/** Make an API request */
+async function apiRequest<T>(
+	method: string,
+	path: string,
+	body?: unknown
+): Promise<ApiResponse<T>> {
+	const baseUrl = getApiBaseUrl();
+
+	try {
+		const response = await fetch(`${baseUrl}${path}`, {
+			method,
+			headers: {
+				'Content-Type': 'application/json'
+			},
+			body: body ? JSON.stringify(body) : undefined
+		});
+
+		if (!response.ok) {
+			const errorData = await response.json().catch(() => ({}));
+			return { error: errorData.error || `HTTP ${response.status}: ${response.statusText}` };
+		}
+
+		const data = await response.json();
+		return { data };
+	} catch (err) {
+		if (err instanceof Error) {
+			return { error: err.message };
+		}
+		return { error: 'Unknown error occurred' };
+	}
+}
+
+/** Backends state class with reactive properties */
+export class BackendsState {
+	/** All configured backends */
+	backends = $state<BackendInfo[]>([]);
+
+	/** Currently active backend type */
+	activeType = $state<BackendType | null>(null);
+
+	/** Loading state */
+	isLoading = $state(false);
+
+	/** Discovering state */
+	isDiscovering = $state(false);
+
+	/** Error state */
+	error = $state<string | null>(null);
+
+	/** Promise that resolves when initial load is complete */
+	private _readyPromise: Promise<void> | null = null;
+	private _readyResolve: (() => void) | null = null;
+
+	/** Derived: the currently active backend info */
+	get activeBackend(): BackendInfo | null {
+		if (!this.activeType) return null;
+		return this.backends.find((b) => b.type === this.activeType) ?? null;
+	}
+
+	/** Derived: whether the active backend can pull models (Ollama only) */
+	get canPullModels(): boolean {
+		return this.activeBackend?.capabilities.canPullModels ?? false;
+	}
+
+	/** Derived: whether the active backend can delete models (Ollama only) */
+	get canDeleteModels(): boolean {
+		return this.activeBackend?.capabilities.canDeleteModels ?? false;
+	}
+
+	/** Derived: whether the active backend can create custom models (Ollama only) */
+	get canCreateModels(): boolean {
+		return this.activeBackend?.capabilities.canCreateModels ?? false;
+	}
+
+	/** Derived: connected backends */
+	get connectedBackends(): BackendInfo[] {
+		return this.backends.filter((b) => b.status === 'connected');
+	}
+
+	constructor() {
+		// Create ready promise
+		this._readyPromise = new Promise((resolve) => {
+			this._readyResolve = resolve;
+		});
+
+		// Load backends on initialization (client-side only)
+		if (typeof window !== 'undefined') {
+			this.load();
+		} else {
+			// SSR: resolve immediately
+			this._readyResolve?.();
+		}
+	}
+
+	/** Wait for initial load to complete */
+	async ready(): Promise<void> {
+		return this._readyPromise ?? Promise.resolve();
+	}
+
+	/**
+	 * Load backends from the API
+	 */
+	async load(): Promise<void> {
+		this.isLoading = true;
+		this.error = null;
+
+		try {
+			const result = await apiRequest<{ backends: BackendInfo[]; active: string }>(
+				'GET',
+				'/api/v1/ai/backends'
+			);
+
+			if (result.data) {
+				this.backends = result.data.backends || [];
+				this.activeType = (result.data.active as BackendType) || null;
+			} else if (result.error) {
+				this.error = result.error;
+			}
+		} catch (err) {
+			this.error = err instanceof Error ? err.message : 'Failed to load backends';
+		} finally {
+			this.isLoading = false;
+			this._readyResolve?.();
+		}
+	}
+
+	/**
+	 * Discover available backends by probing default endpoints
+	 */
+	async discover(endpoints?: Array<{ type: BackendType; baseUrl: string }>): Promise<DiscoveryResult[]> {
+		this.isDiscovering = true;
+		this.error = null;
+
+		try {
+			const result = await apiRequest<{ results: DiscoveryResult[] }>(
+				'POST',
+				'/api/v1/ai/backends/discover',
+				endpoints ? { endpoints } : {}
+			);
+
+			if (result.data?.results) {
+				return result.data.results;
+			} else if (result.error) {
+				this.error = result.error;
+			}
+			return [];
+		} catch (err) {
+			this.error = err instanceof Error ? err.message : 'Failed to discover backends';
+			return [];
+		} finally {
+			this.isDiscovering = false;
+		}
+	}
+
+	/**
+	 * Set the active backend
+	 */
+	async setActive(type: BackendType): Promise<boolean> {
+		this.error = null;
+
+		try {
+			const result = await apiRequest<{ active: string }>('POST', '/api/v1/ai/backends/active', {
+				type
+			});
+
+			if (result.data) {
+				this.activeType = result.data.active as BackendType;
+				return true;
+			} else if (result.error) {
+				this.error = result.error;
+			}
+			return false;
+		} catch (err) {
+			this.error = err instanceof Error ? err.message : 'Failed to set active backend';
+			return false;
+		}
+	}
+
+	/**
+	 * Check the health of a specific backend
+	 */
+	async checkHealth(type: BackendType): Promise<HealthResult> {
+		try {
+			const result = await apiRequest<{ status: string; error?: string }>(
+				'GET',
+				`/api/v1/ai/backends/${type}/health`
+			);
+
+			if (result.data) {
+				return {
+					healthy: result.data.status === 'healthy',
+					error: result.data.error
+				};
+			} else {
+				return {
+					healthy: false,
+					error: result.error
+				};
+			}
+		} catch (err) {
+			return {
+				healthy: false,
+				error: err instanceof Error ? err.message : 'Health check failed'
+			};
+		}
+	}
+
+	/**
+	 * Update local backend configuration (URL)
+	 * Note: This updates local state only; backend registration happens via discovery
+	 */
+	updateConfig(type: BackendType, config: { baseUrl?: string }): void {
+		this.backends = this.backends.map((b) => {
+			if (b.type === type) {
+				return {
+					...b,
+					...config
+				};
+			}
+			return b;
+		});
+	}
+
+	/**
+	 * Get a backend by type
+	 */
+	get(type: BackendType): BackendInfo | undefined {
+		return this.backends.find((b) => b.type === type);
+	}
+
+	/**
+	 * Clear any error state
+	 */
+	clearError(): void {
+		this.error = null;
+	}
+}
+
+/** Singleton backends state instance */
+export const backendsState = new BackendsState();
diff --git a/frontend/src/lib/stores/backends.test.ts b/frontend/src/lib/stores/backends.test.ts
new file mode 100644
index 0000000..1aa02c6
--- /dev/null
+++ b/frontend/src/lib/stores/backends.test.ts
@@ -0,0 +1,386 @@
+/**
+ * Tests for BackendsState store
+ */
+import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
+
+// Types for the backends API
+interface BackendInfo {
+	type: 'ollama' | 'llamacpp' | 'lmstudio';
+	baseUrl: string;
+	status: 'connected' | 'disconnected' | 'unknown';
+	capabilities: BackendCapabilities;
+	version?: string;
+	error?: string;
+}
+
+interface BackendCapabilities {
+	canListModels: boolean;
+	canPullModels: boolean;
+	canDeleteModels: boolean;
+	canCreateModels: boolean;
+	canStreamChat: boolean;
+	canEmbed: boolean;
+}
+
+interface DiscoveryResult {
+	type: 'ollama' | 'llamacpp' | 'lmstudio';
+	baseUrl: string;
+	available: boolean;
+	version?: string;
+	error?: string;
+}
+
+describe('BackendsState', () => {
+	let BackendsState: typeof import('./backends.svelte.js').BackendsState;
+	let backendsState: InstanceType<typeof BackendsState>;
+
+	beforeEach(async () => {
+		// Reset modules for fresh state
+		vi.resetModules();
+
+		// Mock fetch globally with default empty response for initial load
+		global.fetch = vi.fn().mockResolvedValue({
+			ok: true,
+			json: async () => ({ backends: [], active: '' })
+		});
+
+		// Import fresh module
+		const module = await import('./backends.svelte.js');
+		BackendsState = module.BackendsState;
+		backendsState = new BackendsState();
+
+		// Wait for initial load to complete
+		await backendsState.ready();
+	});
+
+	afterEach(() => {
+		vi.restoreAllMocks();
+	});
+
+	describe('initialization', () => {
+		it('starts with empty backends array', () => {
+			expect(backendsState.backends).toEqual([]);
+		});
+
+		it('starts with no active backend', () => {
+			expect(backendsState.activeType).toBeNull();
+		});
+
+		it('starts with not loading', () => {
+			expect(backendsState.isLoading).toBe(false);
+		});
+
+		it('starts with no error', () => {
+			expect(backendsState.error).toBeNull();
+		});
+	});
+
+	describe('load', () => {
+		it('loads backends from API', async () => {
+			const mockBackends: BackendInfo[] = [
+				{
+					type: 'ollama',
+					baseUrl: 'http://localhost:11434',
+					status: 'connected',
+					capabilities: {
+						canListModels: true,
+						canPullModels: true,
+						canDeleteModels: true,
+						canCreateModels: true,
+						canStreamChat: true,
+						canEmbed: true
+					},
+					version: '0.3.0'
+				}
+			];
+
+			(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
+				ok: true,
+				json: async () => ({ backends: mockBackends, active: 'ollama' })
+			});
+
+			await backendsState.load();
+
+			expect(backendsState.backends).toEqual(mockBackends);
+			expect(backendsState.activeType).toBe('ollama');
+			expect(backendsState.isLoading).toBe(false);
+		});
+
+		it('handles load error', async () => {
+			(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
+				ok: false,
+				status: 500,
+				statusText: 'Internal Server Error',
+				json: async () => ({ error: 'Server error' })
+			});
+
+			await backendsState.load();
+
+			expect(backendsState.error).not.toBeNull();
+			expect(backendsState.isLoading).toBe(false);
+		});
+
+		it('handles network error', async () => {
+			(global.fetch as ReturnType<typeof vi.fn>).mockRejectedValueOnce(
+				new Error('Network error')
+			);
+
+			await backendsState.load();
+
+			expect(backendsState.error).toBe('Network error');
+			expect(backendsState.isLoading).toBe(false);
+		});
+	});
+
+	describe('discover', () => {
+		it('discovers available backends', async () => {
+			const mockResults: DiscoveryResult[] = [
+				{
+					type: 'ollama',
+					baseUrl: 'http://localhost:11434',
+					available: true,
+					version: '0.3.0'
+				},
+				{
+					type: 'llamacpp',
+					baseUrl: 'http://localhost:8081',
+					available: true
+				},
+				{
+					type: 'lmstudio',
+					baseUrl: 'http://localhost:1234',
+					available: false,
+					error: 'Connection refused'
+				}
+			];
+
+			(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
+				ok: true,
+				json: async () => ({ results: mockResults })
+			});
+
+			const results = await backendsState.discover();
+
+			expect(results).toEqual(mockResults);
+			expect(global.fetch).toHaveBeenCalledWith(
+				expect.stringContaining('/api/v1/ai/backends/discover'),
+				expect.objectContaining({ method: 'POST' })
+			);
+		});
+
+		it('returns empty array on error', async () => {
+			(global.fetch as ReturnType<typeof vi.fn>).mockRejectedValueOnce(
+				new Error('Network error')
+			);
+
+			const results = await backendsState.discover();
+
+			expect(results).toEqual([]);
+			expect(backendsState.error).toBe('Network error');
+		});
+	});
+
+	describe('setActive', () => {
+		it('sets active backend', async () => {
+			// First load some backends
+			(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
+				ok: true,
+				json: async () => ({
+					backends: [
+						{ type: 'ollama', baseUrl: 'http://localhost:11434', status: 'connected' }
+					],
+					active: ''
+				})
+			});
+			await backendsState.load();
+
+			// Then set active
+			(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
+				ok: true,
+				json: async () => ({ active: 'ollama' })
+			});
+
+			const success = await backendsState.setActive('ollama');
+
+			expect(success).toBe(true);
+			expect(backendsState.activeType).toBe('ollama');
+		});
+
+		it('handles setActive error', async () => {
+			(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
+				ok: false,
+				status: 400,
+				statusText: 'Bad Request',
+				json: async () => ({ error: 'Backend not registered' })
+			});
+
+			const success = await backendsState.setActive('llamacpp');
+
+			expect(success).toBe(false);
+			expect(backendsState.error).not.toBeNull();
+		});
+	});
+
+	describe('checkHealth', () => {
+		it('checks backend health', async () => {
+			(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
+				ok: true,
+				json: async () => ({ status: 'healthy' })
+			});
+
+			const result = await backendsState.checkHealth('ollama');
+
+			expect(result.healthy).toBe(true);
+			expect(global.fetch).toHaveBeenCalledWith(
+				expect.stringContaining('/api/v1/ai/backends/ollama/health'),
+				expect.any(Object)
+			);
+		});
+
+		it('returns unhealthy on error response', async () => {
+			(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
+				ok: false,
+				status: 503,
+				statusText: 'Service Unavailable',
+				json: async () => ({ status: 'unhealthy', error: 'Connection refused' })
+			});
+
+			const result = await backendsState.checkHealth('ollama');
+
+			expect(result.healthy).toBe(false);
+			expect(result.error).toBe('Connection refused');
+		});
+	});
+
+	describe('derived state', () => {
+		it('activeBackend returns the active backend info', async () => {
+			const mockBackends: BackendInfo[] = [
+				{
+					type: 'ollama',
+					baseUrl: 'http://localhost:11434',
+					status: 'connected',
+					capabilities: {
+						canListModels: true,
+						canPullModels: true,
+						canDeleteModels: true,
+						canCreateModels: true,
+						canStreamChat: true,
+						canEmbed: true
+					}
+				},
+				{
+					type: 'llamacpp',
+					baseUrl: 'http://localhost:8081',
+					status: 'connected',
+					capabilities: {
+						canListModels: true,
+						canPullModels: false,
+						canDeleteModels: false,
+						canCreateModels: false,
+						canStreamChat: true,
+						canEmbed: true
+					}
+				}
+			];
+
+			(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
+				ok: true,
+				json: async () => ({ backends: mockBackends, active: 'llamacpp' })
+			});
+
+			await backendsState.load();
+
+			const active = backendsState.activeBackend;
+			expect(active?.type).toBe('llamacpp');
+			expect(active?.baseUrl).toBe('http://localhost:8081');
+		});
+
+		it('canPullModels is true only for Ollama', async () => {
+			const mockBackends: BackendInfo[] = [
+				{
+					type: 'ollama',
+					baseUrl: 'http://localhost:11434',
+					status: 'connected',
+					capabilities: {
+						canListModels: true,
+						canPullModels: true,
+						canDeleteModels: true,
+						canCreateModels: true,
+						canStreamChat: true,
+						canEmbed: true
+					}
+				}
+			];
+
+			(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
+				ok: true,
+				json: async () => ({ backends: mockBackends, active: 'ollama' })
+			});
+
+			await backendsState.load();
+
+			expect(backendsState.canPullModels).toBe(true);
+		});
+
+		it('canPullModels is false for llama.cpp', async () => {
+			const mockBackends: BackendInfo[] = [
+				{
+					type: 'llamacpp',
+					baseUrl: 'http://localhost:8081',
+					status: 'connected',
+					capabilities: {
+						canListModels: true,
+						canPullModels: false,
+						canDeleteModels: false,
+						canCreateModels: false,
+						canStreamChat: true,
+						canEmbed: true
+					}
+				}
+			];
+
+			(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
+				ok: true,
+				json: async () => ({ backends: mockBackends, active: 'llamacpp' })
+			});
+
+			await backendsState.load();
+
+			expect(backendsState.canPullModels).toBe(false);
+		});
+	});
+
+	describe('updateConfig', () => {
+		it('updates backend URL', async () => {
+			// Load initial backends
+			(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
+				ok: true,
+				json: async () => ({
+					backends: [
+						{
+							type: 'ollama',
+							baseUrl: 'http://localhost:11434',
+							status: 'connected',
+							capabilities: {
+								canListModels: true,
+								canPullModels: true,
+								canDeleteModels: true,
+								canCreateModels: true,
+								canStreamChat: true,
+								canEmbed: true
+							}
+						}
+					],
+					active: 'ollama'
+				})
+			});
+			await backendsState.load();
+
+			// Update config
+			backendsState.updateConfig('ollama', { baseUrl: 'http://192.168.1.100:11434' });
+
+			const backend = backendsState.backends.find((b) => b.type === 'ollama');
+			expect(backend?.baseUrl).toBe('http://192.168.1.100:11434');
+		});
+	});
+});
diff --git a/frontend/src/routes/+layout.svelte b/frontend/src/routes/+layout.svelte
index 256c1e6..d777787 100644
--- a/frontend/src/routes/+layout.svelte
+++ b/frontend/src/routes/+layout.svelte
@@ -8,6 +8,7 @@
 	import { onMount } from 'svelte';
 	import { goto } from '$app/navigation';
 	import { chatState, conversationsState, modelsState, uiState, promptsState, versionState, projectsState } from '$lib/stores';
+	import { backendsState, type BackendType } from '$lib/stores/backends.svelte';
 	import { getAllConversations } from '$lib/storage';
 	import { syncManager } from '$lib/backend';
 	import { keyboardShortcuts, getShortcuts } from '$lib/utils';
@@ -22,6 +23,12 @@
 	import type { LayoutData } from './$types';
 	import type { Snippet } from 'svelte';
 
+	// LocalStorage key for persisting backend selection
+	const BACKEND_STORAGE_KEY = 'vessel:selectedBackend';
+
+	// Flag to track if initial backend restoration is complete
+	let backendRestoreComplete = $state(false);
+
 	interface Props {
 		data: LayoutData;
 		children: Snippet;
@@ -35,6 +42,88 @@
 	// Shortcuts modal state
 	let showShortcutsModal = $state(false);
 
+	// Model name for non-Ollama backends
+	let nonOllamaModelName = $state<string | null>(null);
+	let modelFetchFailed = $state(false);
+
+	// Fetch model name when backend changes to non-Ollama
+	$effect(() => {
+		const backendType = backendsState.activeType;
+		if (backendType && backendType !== 'ollama') {
+			fetchNonOllamaModel();
+		} else {
+			nonOllamaModelName = null;
+			modelFetchFailed = false;
+		}
+	});
+
+	/**
+	 * Fetch model name from unified API for non-Ollama backends
+	 */
+	async function fetchNonOllamaModel(): Promise<void> {
+		modelFetchFailed = false;
+		nonOllamaModelName = null;
+		try {
+			const response = await fetch('/api/v1/ai/models');
+			if (response.ok) {
+				const data = await response.json();
+				if (data.models && data.models.length > 0) {
+					// Extract just the model name (strip path/extension for cleaner display)
+					const fullName = data.models[0].name;
+					nonOllamaModelName = fullName.replace(/\.gguf$/i, '');
+				} else {
+					// No models loaded
+					modelFetchFailed = true;
+				}
+			} else {
+				modelFetchFailed = true;
+			}
+		} catch (err) {
+			console.error('Failed to fetch model from backend:', err);
+			modelFetchFailed = true;
+		}
+	}
+
+	/**
+	 * Persist backend selection to localStorage
+	 */
+	function persistBackendSelection(type: BackendType): void {
+		try {
+			localStorage.setItem(BACKEND_STORAGE_KEY, type);
+		} catch (err) {
+			console.error('Failed to persist backend selection:', err);
+		}
+	}
+
+	/**
+	 * Restore last selected backend if it's available
+	 */
+	async function restoreLastBackend(): Promise<void> {
+		try {
+			const lastBackend = localStorage.getItem(BACKEND_STORAGE_KEY) as BackendType | null;
+			if (lastBackend && lastBackend !== backendsState.activeType) {
+				// Check if the last backend is connected
+				const backend = backendsState.get(lastBackend);
+				if (backend?.status === 'connected') {
+					await backendsState.setActive(lastBackend);
+				}
+			}
+		} catch (err) {
+			console.error('Failed to restore backend selection:', err);
+		} finally {
+			// Mark restore as complete so persistence effect can start working
+			backendRestoreComplete = true;
+		}
+	}
+
+	// Watch for backend changes and persist (only after initial restore is complete)
+	$effect(() => {
+		const activeType = backendsState.activeType;
+		if (activeType && backendRestoreComplete) {
+			persistBackendSelection(activeType);
+		}
+	});
+
 	onMount(() => {
 		// Initialize UI state (handles responsive detection, theme, etc.)
 		uiState.initialize();
@@ -68,6 +157,9 @@
 		// Load projects from IndexedDB
 		projectsState.load();
 
+		// Restore last selected backend after backends finish loading
+		backendsState.ready().then(() => restoreLastBackend());
+
 		// Schedule background migration for chat indexing (runs after 5 seconds)
 		scheduleMigration(5000);
 
@@ -167,7 +259,30 @@
 		<header class="relative z-40 flex-shrink-0">
 			<TopNav onNavigateHome={handleNavigateHome}>
 				{#snippet modelSelect()}
-					<ModelSelect />
+					{#if backendsState.activeType === 'ollama'}
+						<ModelSelect />
+					{:else if backendsState.activeBackend}
+						<!-- Non-Ollama backend indicator with model name -->
+						<div class="flex items-center gap-2 rounded-lg border border-theme bg-theme-secondary/50 px-3 py-2 text-sm">
+							<svg class="h-4 w-4 text-blue-500" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
+								<path stroke-linecap="round" stroke-linejoin="round" d="M5 12h14M5 12a2 2 0 01-2-2V6a2 2 0 012-2h14a2 2 0 012 2v4a2 2 0 01-2 2M5 12a2 2 0 00-2 2v4a2 2 0 002 2h14a2 2 0 002-2v-4a2 2 0 00-2-2" />
+							</svg>
+							<div class="flex flex-col">
+								<span class="font-medium text-theme-primary">
+									{#if nonOllamaModelName}
+										{nonOllamaModelName}
+									{:else if modelFetchFailed}
+										<span class="text-amber-400">No model loaded</span>
+									{:else}
+										Loading...
+									{/if}
+								</span>
+								<span class="text-xs text-theme-muted">
+									{backendsState.activeType === 'llamacpp' ? 'llama.cpp' : 'LM Studio'}
+								</span>
+							</div>
+						</div>
+					{/if}
 				{/snippet}
 			</TopNav>
 		</header>
diff --git a/frontend/src/routes/+page.svelte b/frontend/src/routes/+page.svelte
index 22143ff..e8c2c4a 100644
--- a/frontend/src/routes/+page.svelte
+++ b/frontend/src/routes/+page.svelte
@@ -7,11 +7,13 @@
 
 	import { onMount } from 'svelte';
 	import { chatState, conversationsState, modelsState, toolsState, promptsState } from '$lib/stores';
+	import { backendsState } from '$lib/stores/backends.svelte';
 	import { resolveSystemPrompt } from '$lib/services/prompt-resolution.js';
 	import { streamingMetricsState } from '$lib/stores/streaming-metrics.svelte';
 	import { settingsState } from '$lib/stores/settings.svelte';
 	import { createConversation as createStoredConversation, addMessage as addStoredMessage, updateConversation, saveAttachments } from '$lib/storage';
 	import { ollamaClient } from '$lib/ollama';
+	import { unifiedLLMClient } from '$lib/llm';
 	import type { OllamaMessage, OllamaToolDefinition, OllamaToolCall } from '$lib/ollama';
 	import { getFunctionModel, USE_FUNCTION_MODEL, runToolCalls, formatToolResultsForChat } from '$lib/tools';
 	import { searchSimilar, formatResultsAsContext, getKnowledgeBaseStats } from '$lib/memory';
@@ -80,9 +82,28 @@
 	 * Creates a new conversation and starts streaming the response
 	 */
 	async function handleFirstMessage(content: string, images?: string[], attachments?: FileAttachment[]): Promise<void> {
-		const model = modelsState.selectedId;
+		// Get model name based on active backend
+		let model: string | null = null;
+
+		if (backendsState.activeType === 'ollama') {
+			model = modelsState.selectedId;
+		} else if (backendsState.activeType === 'llamacpp' || backendsState.activeType === 'lmstudio') {
+			// For OpenAI-compatible backends, fetch model from the unified API
+			try {
+				const response = await fetch('/api/v1/ai/models');
+				if (response.ok) {
+					const data = await response.json();
+					if (data.models && data.models.length > 0) {
+						model = data.models[0].name;
+					}
+				}
+			} catch (err) {
+				console.error('Failed to get model from backend:', err);
+			}
+		}
+
 		if (!model) {
-			console.error('No model selected');
+			console.error('No model available');
 			return;
 		}
 
@@ -298,92 +319,121 @@
 			let streamingThinking = '';
 			let thinkingClosed = false;
 
-			await ollamaClient.streamChatWithCallbacks(
-				{ model: chatModel, messages, tools, think: useNativeThinking, options: settingsState.apiParameters },
-				{
-					onThinkingToken: (token) => {
-						// Clear "Processing..." on first token
-						if (needsClearOnFirstToken) {
-							chatState.setStreamContent('');
-							needsClearOnFirstToken = false;
-						}
-						// Accumulate thinking and update the message
-						if (!streamingThinking) {
-							// Start the thinking block
-							chatState.appendToStreaming('<think>');
-						}
-						streamingThinking += token;
-						chatState.appendToStreaming(token);
-						streamingMetricsState.incrementTokens();
-					},
-					onToken: (token) => {
-						// Clear "Processing..." on first token
-						if (needsClearOnFirstToken) {
-							chatState.setStreamContent('');
-							needsClearOnFirstToken = false;
-						}
-						// Close thinking block when content starts
-						if (streamingThinking && !thinkingClosed) {
-							chatState.appendToStreaming('</think>\n\n');
-							thinkingClosed = true;
-						}
-						chatState.appendToStreaming(token);
-						streamingMetricsState.incrementTokens();
-					},
-					onToolCall: (toolCalls) => {
-						pendingToolCalls = toolCalls;
-					},
-					onComplete: async () => {
-						// Close thinking block if it was opened but not closed (e.g., tool calls without content)
-						if (streamingThinking && !thinkingClosed) {
-							chatState.appendToStreaming('</think>\n\n');
-							thinkingClosed = true;
-						}
-
-						chatState.finishStreaming();
-						streamingMetricsState.endStream();
-
-						// Handle tool calls if received
-						if (pendingToolCalls && pendingToolCalls.length > 0) {
-							await executeToolsAndContinue(
-								model,
-								assistantMessageId,
-								userMessageId,
-								pendingToolCalls,
-								conversationId
-							);
-							return;
-						}
-
-						// Persist assistant message with the SAME ID as chatState
-						const node = chatState.messageTree.get(assistantMessageId);
-						if (node) {
-							await addStoredMessage(
-								conversationId,
-								{ role: 'assistant', content: node.message.content },
-								userMessageId,
-								assistantMessageId
-							);
-							await updateConversation(conversationId, {});
-							conversationsState.update(conversationId, {});
-
-							// Generate a smarter title in the background (don't await)
-							generateSmartTitle(conversationId, content, node.message.content);
-
-							// Update URL now that streaming is complete
-							replaceState(`/chat/${conversationId}`, {});
-						}
-					},
-					onError: (error) => {
-						console.error('Streaming error:', error);
-						// Show error to user instead of leaving "Processing..."
-						const errorMsg = error instanceof Error ? error.message : 'Unknown error';
-						chatState.setStreamContent(`⚠️ Error: ${errorMsg}`);
-						chatState.finishStreaming();
-						streamingMetricsState.endStream();
-					}
+			// Helper to handle completion (shared by both backends)
+			const handleComplete = async () => {
+				// Close thinking block if it was opened but not closed
+				if (streamingThinking && !thinkingClosed) {
+					chatState.appendToStreaming('</think>\n\n');
+					thinkingClosed = true;
 				}
-			);
+
+				chatState.finishStreaming();
+				streamingMetricsState.endStream();
+
+				// Handle tool calls if received (Ollama only)
+				if (pendingToolCalls && pendingToolCalls.length > 0) {
+					await executeToolsAndContinue(
+						model,
+						assistantMessageId,
+						userMessageId,
+						pendingToolCalls,
+						conversationId
+					);
+					return;
+				}
+
+				// Persist assistant message with the SAME ID as chatState
+				const node = chatState.messageTree.get(assistantMessageId);
+				if (node) {
+					await addStoredMessage(
+						conversationId,
+						{ role: 'assistant', content: node.message.content },
+						userMessageId,
+						assistantMessageId
+					);
+					await updateConversation(conversationId, {});
+					conversationsState.update(conversationId, {});
+
+					// Generate a smarter title in the background (don't await)
+					generateSmartTitle(conversationId, content, node.message.content);
+
+					// Update URL now that streaming is complete
+					replaceState(`/chat/${conversationId}`, {});
+				}
+			};
+
+			// Helper to handle errors (shared by both backends)
+			const handleError = (error: unknown) => {
+				console.error('Streaming error:', error);
+				const errorMsg = error instanceof Error ? error.message : 'Unknown error';
+				chatState.setStreamContent(`⚠️ Error: ${errorMsg}`);
+				chatState.finishStreaming();
+				streamingMetricsState.endStream();
+			};
+
+			// Use appropriate client based on active backend
+			if (backendsState.activeType === 'ollama') {
+				// Ollama: full features including tools and thinking
+				await ollamaClient.streamChatWithCallbacks(
+					{ model: chatModel, messages, tools, think: useNativeThinking, options: settingsState.apiParameters },
+					{
+						onThinkingToken: (token) => {
+							if (needsClearOnFirstToken) {
+								chatState.setStreamContent('');
+								needsClearOnFirstToken = false;
+							}
+							if (!streamingThinking) {
+								chatState.appendToStreaming('<think>');
+							}
+							streamingThinking += token;
+							chatState.appendToStreaming(token);
+							streamingMetricsState.incrementTokens();
+						},
+						onToken: (token) => {
+							if (needsClearOnFirstToken) {
+								chatState.setStreamContent('');
+								needsClearOnFirstToken = false;
+							}
+							if (streamingThinking && !thinkingClosed) {
+								chatState.appendToStreaming('</think>\n\n');
+								thinkingClosed = true;
+							}
+							chatState.appendToStreaming(token);
+							streamingMetricsState.incrementTokens();
+						},
+						onToolCall: (toolCalls) => {
+							pendingToolCalls = toolCalls;
+						},
+						onComplete: handleComplete,
+						onError: handleError
+					}
+				);
+			} else {
+				// llama.cpp / LM Studio: use unified API (no tools/thinking support)
+				try {
+					await unifiedLLMClient.streamChatWithCallbacks(
+						{
+							model: chatModel,
+							messages: messages.map(m => ({ role: m.role, content: m.content })),
+							stream: true
+						},
+						{
+							onToken: (token) => {
+								if (needsClearOnFirstToken) {
+									chatState.setStreamContent('');
+									needsClearOnFirstToken = false;
+								}
+								chatState.appendToStreaming(token);
+								streamingMetricsState.incrementTokens();
+							},
+							onComplete: handleComplete,
+							onError: handleError
+						}
+					);
+				} catch (error) {
+					handleError(error);
+				}
+			}
 		} catch (error) {
 			console.error('Failed to send message:', error);
 			// Show error to user
diff --git a/frontend/src/routes/chat/[id]/+page.svelte b/frontend/src/routes/chat/[id]/+page.svelte
index 9818b3f..efebdcc 100644
--- a/frontend/src/routes/chat/[id]/+page.svelte
+++ b/frontend/src/routes/chat/[id]/+page.svelte
@@ -21,11 +21,11 @@
 	let currentConversationId = $state<string | null>(null);
 	let isLoading = $state(false);
 
-	// Extract first message from data and clear from URL
-	let initialMessage = $state<string | null>(data.firstMessage);
+	// Extract first message from data (captured once per page load)
+	const initialMessage = $derived(data.firstMessage);
 	$effect(() => {
 		// Clear firstMessage from URL to keep it clean
-		if (data.firstMessage && $page.url.searchParams.has('firstMessage')) {
+		if (initialMessage && $page.url.searchParams.has('firstMessage')) {
 			const url = new URL($page.url);
 			url.searchParams.delete('firstMessage');
 			replaceState(url, {});
diff --git a/frontend/src/routes/knowledge/+page.svelte b/frontend/src/routes/knowledge/+page.svelte
index 53360ae..5a38888 100644
--- a/frontend/src/routes/knowledge/+page.svelte
+++ b/frontend/src/routes/knowledge/+page.svelte
@@ -27,7 +27,7 @@
 	let dragOver = $state(false);
 
 	// File input reference
-	let fileInput: HTMLInputElement;
+	let fileInput = $state<HTMLInputElement | null>(null);
 
 	// Load documents on mount
 	onMount(async () => {
diff --git a/frontend/src/routes/models/+page.svelte b/frontend/src/routes/models/+page.svelte
index d81ef59..162d0f0 100644
--- a/frontend/src/routes/models/+page.svelte
+++ b/frontend/src/routes/models/+page.svelte
@@ -466,7 +466,7 @@
 								<path stroke-linecap="round" stroke-linejoin="round" d="M12 8v4m0 4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
 							</svg>
 							<span>{deleteError}</span>
-							<button type="button" onclick={() => deleteError = null} class="ml-auto text-red-400 hover:text-red-300">
+							<button type="button" onclick={() => deleteError = null} class="ml-auto text-red-400 hover:text-red-300" aria-label="Dismiss error">
 								<svg xmlns="http://www.w3.org/2000/svg" class="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
 									<path stroke-linecap="round" stroke-linejoin="round" d="M6 18L18 6M6 6l12 12" />
 								</svg>
@@ -987,6 +987,7 @@
 							onclick={() => modelRegistry.prevPage()}
 							disabled={!modelRegistry.hasPrevPage}
 							class="rounded-lg border border-theme bg-theme-secondary px-3 py-2 text-sm text-theme-muted transition-colors hover:bg-theme-tertiary hover:text-theme-primary disabled:cursor-not-allowed disabled:opacity-50"
+							aria-label="Previous page"
 						>
 							<svg xmlns="http://www.w3.org/2000/svg" class="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
 								<path stroke-linecap="round" stroke-linejoin="round" d="M15 19l-7-7 7-7" />
@@ -1002,6 +1003,7 @@
 							onclick={() => modelRegistry.nextPage()}
 							disabled={!modelRegistry.hasNextPage}
 							class="rounded-lg border border-theme bg-theme-secondary px-3 py-2 text-sm text-theme-muted transition-colors hover:bg-theme-tertiary hover:text-theme-primary disabled:cursor-not-allowed disabled:opacity-50"
+							aria-label="Next page"
 						>
 							<svg xmlns="http://www.w3.org/2000/svg" class="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
 								<path stroke-linecap="round" stroke-linejoin="round" d="M9 5l7 7-7 7" />
@@ -1024,6 +1026,7 @@
 					type="button"
 					onclick={closeDetails}
 					class="rounded p-1 text-theme-muted transition-colors hover:bg-theme-tertiary hover:text-theme-primary"
+					aria-label="Close model details"
 				>
 					<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
 						<path stroke-linecap="round" stroke-linejoin="round" d="M6 18L18 6M6 6l12 12" />
diff --git a/frontend/src/routes/projects/[id]/+page.svelte b/frontend/src/routes/projects/[id]/+page.svelte
index 61446c3..d82c4a7 100644
--- a/frontend/src/routes/projects/[id]/+page.svelte
+++ b/frontend/src/routes/projects/[id]/+page.svelte
@@ -46,7 +46,7 @@
 	let isLoadingDocs = $state(false);
 	let selectedEmbeddingModel = $state(DEFAULT_EMBEDDING_MODEL);
 	let activeTab = $state<'chats' | 'files' | 'links'>('chats');
-	let fileInput: HTMLInputElement;
+	let fileInput = $state<HTMLInputElement | null>(null);
 	let dragOver = $state(false);
 	let isSearching = $state(false);
 	let searchResults = $state<ChatSearchResult[]>([]);
@@ -399,6 +399,7 @@
 							onclick={() => showProjectModal = true}
 							class="rounded-lg p-2 text-theme-muted transition-colors hover:bg-theme-secondary hover:text-theme-primary"
 							title="Project settings"
+							aria-label="Project settings"
 						>
 							<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
 								<path stroke-linecap="round" stroke-linejoin="round" d="M9.594 3.94c.09-.542.56-.94 1.11-.94h2.593c.55 0 1.02.398 1.11.94l.213 1.281c.063.374.313.686.645.87.074.04.147.083.22.127.325.196.72.257 1.075.124l1.217-.456a1.125 1.125 0 0 1 1.37.49l1.296 2.247a1.125 1.125 0 0 1-.26 1.431l-1.003.827c-.293.241-.438.613-.43.992a7.723 7.723 0 0 1 0 .255c-.008.378.137.75.43.991l1.004.827c.424.35.534.955.26 1.43l-1.298 2.247a1.125 1.125 0 0 1-1.369.491l-1.217-.456c-.355-.133-.75-.072-1.076.124a6.47 6.47 0 0 1-.22.128c-.331.183-.581.495-.644.869l-.213 1.281c-.09.543-.56.94-1.11.94h-2.594c-.55 0-1.019-.398-1.11-.94l-.213-1.281c-.062-.374-.312-.686-.644-.87a6.52 6.52 0 0 1-.22-.127c-.325-.196-.72-.257-1.076-.124l-1.217.456a1.125 1.125 0 0 1-1.369-.49l-1.297-2.247a1.125 1.125 0 0 1 .26-1.431l1.004-.827c.292-.24.437-.613.43-.991a6.932 6.932 0 0 1 0-.255c.007-.38-.138-.751-.43-.992l-1.004-.827a1.125 1.125 0 0 1-.26-1.43l1.297-2.247a1.125 1.125 0 0 1 1.37-.491l1.216.456c.356.133.751.072 1.076-.124.072-.044.146-.086.22-.128.332-.183.582-.495.644-.869l.214-1.28Z" />
@@ -428,6 +429,7 @@
 						</div>
 						<button
 							type="button"
+							aria-label="Send message"
 							onclick={handleCreateChat}
 							disabled={!newChatMessage.trim() || isCreatingChat || !modelsState.selectedId}
 							class="rounded-full bg-emerald-600 p-2 text-white transition-colors hover:bg-emerald-500 disabled:opacity-50"
@@ -579,6 +581,8 @@
 						ondragover={(e) => { e.preventDefault(); dragOver = true; }}
 						ondragleave={() => dragOver = false}
 						ondrop={handleDrop}
+						role="region"
+						aria-label="File upload drop zone"
 					>
 						<input
 							bind:this={fileInput}
@@ -593,7 +597,7 @@
 						</svg>
 						<p class="text-sm text-theme-muted">
 							Drag & drop files here, or
-							<button type="button" onclick={() => fileInput.click()} class="text-emerald-500 hover:text-emerald-400">browse</button>
+							<button type="button" onclick={() => fileInput?.click()} class="text-emerald-500 hover:text-emerald-400">browse</button>
 						</p>
 						<p class="mt-1 text-xs text-theme-muted">
 							Text files, code, markdown, JSON, etc.
@@ -640,6 +644,7 @@
 									</div>
 									<button
 										type="button"
+										aria-label="Delete document"
 										onclick={() => handleDeleteDocumentClick(doc)}
 										class="rounded p-1.5 text-theme-muted transition-colors hover:bg-red-900/30 hover:text-red-400"
 									>
diff --git a/frontend/src/routes/prompts/+page.svelte b/frontend/src/routes/prompts/+page.svelte
index 69fcfbe..afa458b 100644
--- a/frontend/src/routes/prompts/+page.svelte
+++ b/frontend/src/routes/prompts/+page.svelte
@@ -646,6 +646,7 @@
 		role="dialog"
 		aria-modal="true"
 		aria-labelledby="editor-title"
+		tabindex="-1"
 	>
 		<div class="w-full max-w-2xl rounded-xl bg-theme-secondary shadow-xl">
 			<div class="flex items-center justify-between border-b border-theme px-6 py-4">
@@ -655,6 +656,7 @@
 				<button
 					type="button"
 					onclick={closeEditor}
+					aria-label="Close dialog"
 					class="rounded p-1 text-theme-muted transition-colors hover:bg-theme-tertiary hover:text-theme-primary"
 				>
 					<svg
@@ -742,10 +744,10 @@
 					</div>
 
 					<!-- Capability targeting -->
-					<div>
-						<label class="mb-2 block text-sm font-medium text-theme-secondary">
+					<fieldset>
+						<legend class="mb-2 block text-sm font-medium text-theme-secondary">
 							Auto-use for model types
-						</label>
+						</legend>
 						<p class="mb-3 text-xs text-theme-muted">
 							When a model has these capabilities and no other prompt is selected, this prompt will
 							be used automatically.
@@ -766,7 +768,7 @@
 								</button>
 							{/each}
 						</div>
-					</div>
+					</fieldset>
 				</div>
 
 				<!-- Actions -->
@@ -804,11 +806,13 @@
 		}}
 		role="dialog"
 		aria-modal="true"
+		aria-labelledby="preview-title"
+		tabindex="-1"
 	>
 		<div class="w-full max-w-2xl max-h-[80vh] flex flex-col rounded-xl bg-theme-secondary shadow-xl">
 			<div class="flex items-center justify-between border-b border-theme px-6 py-4">
 				<div>
-					<h2 class="text-lg font-semibold text-theme-primary">{previewTemplate.name}</h2>
+					<h2 id="preview-title" class="text-lg font-semibold text-theme-primary">{previewTemplate.name}</h2>
 					<div class="mt-1 flex items-center gap-2">
 						<span class="inline-flex items-center gap-1 rounded px-2 py-0.5 text-xs {info.color}">
 							<span>{info.icon}</span>
@@ -826,6 +830,7 @@
 				<button
 					type="button"
 					onclick={() => (previewTemplate = null)}
+					aria-label="Close dialog"
 					class="rounded p-1 text-theme-muted transition-colors hover:bg-theme-tertiary hover:text-theme-primary"
 				>
 					<svg
diff --git a/frontend/src/routes/search/+page.svelte b/frontend/src/routes/search/+page.svelte
index ec1592b..d11a8d2 100644
--- a/frontend/src/routes/search/+page.svelte
+++ b/frontend/src/routes/search/+page.svelte
@@ -211,6 +211,7 @@
 				{:else if searchQuery}
 					<button
 						type="button"
+						aria-label="Clear search"
 						onclick={() => { searchQuery = ''; titleResults = []; messageResults = []; semanticResults = []; updateUrl(''); }}
 						class="absolute right-4 top-1/2 -translate-y-1/2 rounded p-1 text-theme-muted hover:text-theme-primary"
 					>
diff --git a/frontend/src/routes/settings/+page.svelte b/frontend/src/routes/settings/+page.svelte
index 7161f67..8869442 100644
--- a/frontend/src/routes/settings/+page.svelte
+++ b/frontend/src/routes/settings/+page.svelte
@@ -7,7 +7,7 @@
 	import {
 		SettingsTabs,
 		GeneralTab,
-		ModelsTab,
+		AIProvidersTab,
 		PromptsTab,
 		ToolsTab,
 		AgentsTab,
@@ -37,8 +37,8 @@
 		<div class="mx-auto max-w-5xl">
 			{#if activeTab === 'general'}
 				<GeneralTab />
-			{:else if activeTab === 'models'}
-				<ModelsTab />
+			{:else if activeTab === 'ai'}
+				<AIProvidersTab />
 			{:else if activeTab === 'prompts'}
 				<PromptsTab />
 			{:else if activeTab === 'tools'}
diff --git a/frontend/src/routes/tools/+page.svelte b/frontend/src/routes/tools/+page.svelte
index 9268498..59f32e4 100644
--- a/frontend/src/routes/tools/+page.svelte
+++ b/frontend/src/routes/tools/+page.svelte
@@ -97,6 +97,7 @@
 					class="relative inline-flex h-6 w-11 flex-shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors duration-200 ease-in-out focus:outline-none focus:ring-2 focus:ring-blue-500 focus:ring-offset-2 focus:ring-offset-theme-primary {toolsState.toolsEnabled ? 'bg-blue-600' : 'bg-theme-tertiary'}"
 					role="switch"
 					aria-checked={toolsState.toolsEnabled}
+					aria-label="Toggle all tools"
 				>
 					<span
 						class="pointer-events-none inline-block h-5 w-5 transform rounded-full bg-white shadow ring-0 transition duration-200 ease-in-out {toolsState.toolsEnabled ? 'translate-x-5' : 'translate-x-0'}"
@@ -144,6 +145,7 @@
 								class="relative inline-flex h-6 w-11 flex-shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors duration-200 ease-in-out focus:outline-none focus:ring-2 focus:ring-blue-500 focus:ring-offset-2 focus:ring-offset-theme {tool.enabled ? 'bg-blue-600' : 'bg-theme-tertiary'}"
 								role="switch"
 								aria-checked={tool.enabled}
+								aria-label="Toggle {tool.definition.function.name}"
 								disabled={!toolsState.toolsEnabled}
 							>
 								<span
@@ -246,6 +248,7 @@
 										class="relative inline-flex h-6 w-11 flex-shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors duration-200 ease-in-out focus:outline-none focus:ring-2 focus:ring-emerald-500 focus:ring-offset-2 focus:ring-offset-theme {tool.enabled ? 'bg-emerald-600' : 'bg-theme-tertiary'}"
 										role="switch"
 										aria-checked={tool.enabled}
+										aria-label="Toggle {tool.name}"
 										disabled={!toolsState.toolsEnabled}
 									>
 										<span