feat(v2): complete multi-LLM providers, TUI redesign, and advanced agent features

Multi-LLM Provider Support: - Add llm-core crate with LlmProvider trait abstraction - Implement Anthropic Claude API client with streaming - Implement OpenAI API client with streaming - Add token counting with SimpleTokenCounter and ClaudeTokenCounter - Add retry logic with exponential backoff and jitter Borderless TUI Redesign: - Rewrite theme system with terminal capability detection (Full/Unicode256/Basic) - Add provider tabs component with keybind switching [1]/[2]/[3] - Implement vim-modal input (Normal/Insert/Visual/Command modes) - Redesign chat panel with timestamps and streaming indicators - Add multi-provider status bar with cost tracking - Add Nerd Font icons with graceful ASCII fallbacks - Add syntax highlighting (syntect) and markdown rendering (pulldown-cmark) Advanced Agent Features: - Add system prompt builder with configurable components - Enhance subagent orchestration with parallel execution - Add git integration module for safe command detection - Add streaming tool results via channels - Expand tool set: AskUserQuestion, TodoWrite, LS, MultiEdit, BashOutput, KillShell - Add WebSearch with provider abstraction Plugin System Enhancement: - Add full agent definition parsing from YAML frontmatter - Add skill system with progressive disclosure - Wire plugin hooks into HookManager 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-02 17:24:14 +01:00
parent 09c8c9d83e
commit 10c8e2baae
67 changed files with 11444 additions and 626 deletions
--- a/crates/llm/ollama/Cargo.toml
+++ b/crates/llm/ollama/Cargo.toml
@@ -6,11 +6,13 @@ license.workspace = true
 rust-version.workspace = true

 [dependencies]
+llm-core = { path = "../core" }
 reqwest = { version = "0.12", features = ["json", "stream"] }
-tokio = { version = "1.39", features = ["rt-multi-thread"] }
+tokio = { version = "1.39", features = ["rt-multi-thread", "macros"] }
 futures = "0.3"
 serde = { version = "1", features = ["derive"] }
 serde_json = "1"
 thiserror = "1"
 bytes = "1"
 tokio-stream = "0.1.17"
+async-trait = "0.1"
--- a/crates/llm/ollama/src/client.rs
+++ b/crates/llm/ollama/src/client.rs
@@ -1,14 +1,20 @@
 use crate::types::{ChatMessage, ChatResponseChunk, Tool};
-use futures::{Stream, TryStreamExt};
+use futures::{Stream, StreamExt, TryStreamExt};
 use reqwest::Client;
-use serde::Serialize;
+use serde::{Deserialize, Serialize};
 use thiserror::Error;
+use async_trait::async_trait;
+use llm_core::{
+    LlmProvider, ProviderInfo, LlmError, ChatOptions, ChunkStream,
+    ProviderStatus, AccountInfo, UsageStats, ModelInfo,
+};

 #[derive(Debug, Clone)]
 pub struct OllamaClient {
    http: Client,
    base_url: String,  // e.g. "http://localhost:11434"
    api_key: Option<String>,  // For Ollama Cloud authentication
+    current_model: String,  // Default model for this client
 }

 #[derive(Debug, Clone, Default)]
@@ -27,12 +33,24 @@ pub enum OllamaError {
    Protocol(String),
 }

+// Convert OllamaError to LlmError
+impl From<OllamaError> for LlmError {
+    fn from(err: OllamaError) -> Self {
+        match err {
+            OllamaError::Http(e) => LlmError::Http(e.to_string()),
+            OllamaError::Json(e) => LlmError::Json(e.to_string()),
+            OllamaError::Protocol(msg) => LlmError::Provider(msg),
+        }
+    }
+}
+
 impl OllamaClient {
    pub fn new(base_url: impl Into<String>) -> Self {
        Self {
            http: Client::new(),
            base_url: base_url.into().trim_end_matches('/').to_string(),
            api_key: None,
+            current_model: "qwen3:8b".to_string(),
        }
    }

@@ -41,12 +59,17 @@ impl OllamaClient {
        self
    }

+    pub fn with_model(mut self, model: impl Into<String>) -> Self {
+        self.current_model = model.into();
+        self
+    }
+
    pub fn with_cloud() -> Self {
        // Same API, different base
        Self::new("https://ollama.com")
    }

-    pub async fn chat_stream(
+    pub async fn chat_stream_raw(
        &self,
        messages: &[ChatMessage],
        opts: &OllamaOptions,
@@ -99,3 +122,208 @@ impl OllamaClient {
        Ok(out)
    }
 }
+
+// ============================================================================
+// LlmProvider Trait Implementation
+// ============================================================================
+
+#[async_trait]
+impl LlmProvider for OllamaClient {
+    fn name(&self) -> &str {
+        "ollama"
+    }
+
+    fn model(&self) -> &str {
+        &self.current_model
+    }
+
+    async fn chat_stream(
+        &self,
+        messages: &[llm_core::ChatMessage],
+        options: &ChatOptions,
+        tools: Option<&[llm_core::Tool]>,
+    ) -> Result<ChunkStream, LlmError> {
+        // Convert llm_core messages to Ollama messages
+        let ollama_messages: Vec<ChatMessage> = messages.iter().map(|m| m.into()).collect();
+
+        // Convert llm_core tools to Ollama tools if present
+        let ollama_tools: Option<Vec<Tool>> = tools.map(|tools| {
+            tools.iter().map(|t| Tool {
+                tool_type: t.tool_type.clone(),
+                function: crate::types::ToolFunction {
+                    name: t.function.name.clone(),
+                    description: t.function.description.clone(),
+                    parameters: crate::types::ToolParameters {
+                        param_type: t.function.parameters.param_type.clone(),
+                        properties: t.function.parameters.properties.clone(),
+                        required: t.function.parameters.required.clone(),
+                    },
+                },
+            }).collect()
+        });
+
+        let opts = OllamaOptions {
+            model: options.model.clone(),
+            stream: true,
+        };
+
+        // Make the request and build the body inline to avoid lifetime issues
+        #[derive(Serialize)]
+        struct Body<'a> {
+            model: &'a str,
+            messages: &'a [ChatMessage],
+            stream: bool,
+            #[serde(skip_serializing_if = "Option::is_none")]
+            tools: Option<&'a [Tool]>,
+        }
+
+        let url = format!("{}/api/chat", self.base_url);
+        let body = Body {
+            model: &opts.model,
+            messages: &ollama_messages,
+            stream: true,
+            tools: ollama_tools.as_deref(),
+        };
+
+        let mut req = self.http.post(url).json(&body);
+
+        // Add Authorization header if API key is present
+        if let Some(ref key) = self.api_key {
+            req = req.header("Authorization", format!("Bearer {}", key));
+        }
+
+        let resp = req.send().await
+            .map_err(|e| LlmError::Http(e.to_string()))?;
+        let bytes_stream = resp.bytes_stream();
+
+        // NDJSON parser: split by '\n', parse each as JSON and stream the results
+        let converted_stream = bytes_stream
+            .map(|result| {
+                result.map_err(|e| LlmError::Http(e.to_string()))
+            })
+            .map_ok(|bytes| {
+                // Convert the chunk to a UTF-8 string and own it
+                let txt = String::from_utf8_lossy(&bytes).into_owned();
+                // Parse each non-empty line into a ChatResponseChunk
+                let results: Vec<Result<llm_core::StreamChunk, LlmError>> = txt
+                    .lines()
+                    .filter_map(|line| {
+                        let trimmed = line.trim();
+                        if trimmed.is_empty() {
+                            None
+                        } else {
+                            Some(
+                                serde_json::from_str::<ChatResponseChunk>(trimmed)
+                                    .map(|chunk| llm_core::StreamChunk::from(chunk))
+                                    .map_err(|e| LlmError::Json(e.to_string())),
+                            )
+                        }
+                    })
+                    .collect();
+                futures::stream::iter(results)
+            })
+            .try_flatten();
+
+        Ok(Box::pin(converted_stream))
+    }
+}
+
+// ============================================================================
+// ProviderInfo Trait Implementation
+// ============================================================================
+
+#[derive(Debug, Clone, Deserialize)]
+struct OllamaModelList {
+    models: Vec<OllamaModel>,
+}
+
+#[derive(Debug, Clone, Deserialize)]
+struct OllamaModel {
+    name: String,
+    #[serde(default)]
+    modified_at: Option<String>,
+    #[serde(default)]
+    size: Option<u64>,
+    #[serde(default)]
+    digest: Option<String>,
+    #[serde(default)]
+    details: Option<OllamaModelDetails>,
+}
+
+#[derive(Debug, Clone, Deserialize)]
+struct OllamaModelDetails {
+    #[serde(default)]
+    format: Option<String>,
+    #[serde(default)]
+    family: Option<String>,
+    #[serde(default)]
+    parameter_size: Option<String>,
+}
+
+#[async_trait]
+impl ProviderInfo for OllamaClient {
+    async fn status(&self) -> Result<ProviderStatus, LlmError> {
+        // Try to ping the Ollama server
+        let url = format!("{}/api/tags", self.base_url);
+        let reachable = self.http.get(&url).send().await.is_ok();
+
+        Ok(ProviderStatus {
+            provider: "ollama".to_string(),
+            authenticated: self.api_key.is_some(),
+            account: None, // Ollama is local, no account info
+            model: self.current_model.clone(),
+            endpoint: self.base_url.clone(),
+            reachable,
+            message: if reachable {
+                Some("Connected to Ollama".to_string())
+            } else {
+                Some("Cannot reach Ollama server".to_string())
+            },
+        })
+    }
+
+    async fn account_info(&self) -> Result<Option<AccountInfo>, LlmError> {
+        // Ollama is a local service, no account info
+        Ok(None)
+    }
+
+    async fn usage_stats(&self) -> Result<Option<UsageStats>, LlmError> {
+        // Ollama doesn't track usage statistics
+        Ok(None)
+    }
+
+    async fn list_models(&self) -> Result<Vec<ModelInfo>, LlmError> {
+        let url = format!("{}/api/tags", self.base_url);
+        let mut req = self.http.get(&url);
+
+        // Add Authorization header if API key is present
+        if let Some(ref key) = self.api_key {
+            req = req.header("Authorization", format!("Bearer {}", key));
+        }
+
+        let resp = req.send().await
+            .map_err(|e| LlmError::Http(e.to_string()))?;
+
+        let model_list: OllamaModelList = resp.json().await
+            .map_err(|e| LlmError::Json(e.to_string()))?;
+
+        // Convert Ollama models to ModelInfo
+        let models = model_list.models.into_iter().map(|m| {
+            ModelInfo {
+                id: m.name.clone(),
+                display_name: Some(m.name.clone()),
+                description: m.details.as_ref()
+                    .and_then(|d| d.family.as_ref())
+                    .map(|f| format!("{} model", f)),
+                context_window: None, // Ollama doesn't provide this in list
+                max_output_tokens: None,
+                supports_tools: true, // Most Ollama models support tools
+                supports_vision: false, // Would need to check model capabilities
+                input_price_per_mtok: None, // Local models are free
+                output_price_per_mtok: None,
+            }
+        }).collect();
+
+        Ok(models)
+    }
+}
--- a/crates/llm/ollama/src/lib.rs
+++ b/crates/llm/ollama/src/lib.rs
@@ -1,5 +1,13 @@
 pub mod client;
 pub mod types;

-pub use client::{OllamaClient, OllamaOptions};
+pub use client::{OllamaClient, OllamaOptions, OllamaError};
 pub use types::{ChatMessage, ChatResponseChunk, Tool, ToolCall, ToolFunction, ToolParameters, FunctionCall};
+
+// Re-export llm-core traits and types for convenience
+pub use llm_core::{
+    LlmProvider, ProviderInfo, LlmError,
+    ChatOptions, StreamChunk, ToolCallDelta, Usage,
+    ProviderStatus, AccountInfo, UsageStats, ModelInfo,
+    Role,
+};
--- a/crates/llm/ollama/src/types.rs
+++ b/crates/llm/ollama/src/types.rs
@@ -1,5 +1,6 @@
 use serde::{Deserialize, Serialize};
 use serde_json::Value;
+use llm_core::{StreamChunk, ToolCallDelta};

 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ChatMessage {
@@ -63,3 +64,67 @@ pub struct ChunkMessage {
    #[serde(skip_serializing_if = "Option::is_none")]
    pub tool_calls: Option<Vec<ToolCall>>,
 }
+
+// ============================================================================
+// Conversions to/from llm-core types
+// ============================================================================
+
+/// Convert from llm_core::ChatMessage to Ollama's ChatMessage
+impl From<&llm_core::ChatMessage> for ChatMessage {
+    fn from(msg: &llm_core::ChatMessage) -> Self {
+        let role = msg.role.as_str().to_string();
+
+        // Convert tool_calls if present
+        let tool_calls = msg.tool_calls.as_ref().map(|calls| {
+            calls.iter().map(|tc| ToolCall {
+                id: Some(tc.id.clone()),
+                call_type: Some(tc.call_type.clone()),
+                function: FunctionCall {
+                    name: tc.function.name.clone(),
+                    arguments: tc.function.arguments.clone(),
+                },
+            }).collect()
+        });
+
+        ChatMessage {
+            role,
+            content: msg.content.clone(),
+            tool_calls,
+        }
+    }
+}
+
+/// Convert from Ollama's ChatResponseChunk to llm_core::StreamChunk
+impl From<ChatResponseChunk> for StreamChunk {
+    fn from(chunk: ChatResponseChunk) -> Self {
+        let done = chunk.done.unwrap_or(false);
+        let content = chunk.message.as_ref().and_then(|m| m.content.clone());
+
+        // Convert tool calls to deltas
+        let tool_calls = chunk.message.as_ref().and_then(|m| {
+            m.tool_calls.as_ref().map(|calls| {
+                calls.iter().enumerate().map(|(index, tc)| {
+                    // Serialize arguments back to JSON string for delta
+                    let arguments_delta = serde_json::to_string(&tc.function.arguments).ok();
+
+                    ToolCallDelta {
+                        index,
+                        id: tc.id.clone(),
+                        function_name: Some(tc.function.name.clone()),
+                        arguments_delta,
+                    }
+                }).collect()
+            })
+        });
+
+        // Ollama doesn't provide per-chunk usage stats, only in final chunk
+        let usage = None;
+
+        StreamChunk {
+            content,
+            tool_calls,
+            done,
+            usage,
+        }
+    }
+}