feat(v2): complete multi-LLM providers, TUI redesign, and advanced agent features

Multi-LLM Provider Support: - Add llm-core crate with LlmProvider trait abstraction - Implement Anthropic Claude API client with streaming - Implement OpenAI API client with streaming - Add token counting with SimpleTokenCounter and ClaudeTokenCounter - Add retry logic with exponential backoff and jitter Borderless TUI Redesign: - Rewrite theme system with terminal capability detection (Full/Unicode256/Basic) - Add provider tabs component with keybind switching [1]/[2]/[3] - Implement vim-modal input (Normal/Insert/Visual/Command modes) - Redesign chat panel with timestamps and streaming indicators - Add multi-provider status bar with cost tracking - Add Nerd Font icons with graceful ASCII fallbacks - Add syntax highlighting (syntect) and markdown rendering (pulldown-cmark) Advanced Agent Features: - Add system prompt builder with configurable components - Enhance subagent orchestration with parallel execution - Add git integration module for safe command detection - Add streaming tool results via channels - Expand tool set: AskUserQuestion, TodoWrite, LS, MultiEdit, BashOutput, KillShell - Add WebSearch with provider abstraction Plugin System Enhancement: - Add full agent definition parsing from YAML frontmatter - Add skill system with progressive disclosure - Wire plugin hooks into HookManager 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-02 17:24:14 +01:00
parent 09c8c9d83e
commit 10c8e2baae
67 changed files with 11444 additions and 626 deletions
--- a/crates/llm/core/examples/token_counting.rs
+++ b/crates/llm/core/examples/token_counting.rs
@@ -0,0 +1,195 @@
+//! Token counting example
+//!
+//! This example demonstrates how to use the token counting utilities
+//! to manage LLM context windows.
+//!
+//! Run with: cargo run --example token_counting -p llm-core
+
+use llm_core::{
+    ChatMessage, ClaudeTokenCounter, ContextWindow, SimpleTokenCounter, TokenCounter,
+};
+
+fn main() {
+    println!("=== Token Counting Example ===\n");
+
+    // Example 1: Basic token counting with SimpleTokenCounter
+    println!("1. Basic Token Counting");
+    println!("{}", "-".repeat(50));
+
+    let simple_counter = SimpleTokenCounter::new(8192);
+    let text = "The quick brown fox jumps over the lazy dog.";
+
+    let token_count = simple_counter.count(text);
+    println!("Text: \"{}\"", text);
+    println!("Estimated tokens: {}", token_count);
+    println!("Max context: {}\n", simple_counter.max_context());
+
+    // Example 2: Counting tokens in chat messages
+    println!("2. Counting Tokens in Chat Messages");
+    println!("{}", "-".repeat(50));
+
+    let messages = vec![
+        ChatMessage::system("You are a helpful assistant that provides concise answers."),
+        ChatMessage::user("What is the capital of France?"),
+        ChatMessage::assistant("The capital of France is Paris."),
+        ChatMessage::user("What is its population?"),
+    ];
+
+    let total_tokens = simple_counter.count_messages(&messages);
+    println!("Number of messages: {}", messages.len());
+    println!("Total tokens (with overhead): {}\n", total_tokens);
+
+    // Example 3: Using ClaudeTokenCounter for Claude models
+    println!("3. Claude-Specific Token Counting");
+    println!("{}", "-".repeat(50));
+
+    let claude_counter = ClaudeTokenCounter::new();
+    let claude_total = claude_counter.count_messages(&messages);
+
+    println!("Claude counter max context: {}", claude_counter.max_context());
+    println!("Claude estimated tokens: {}\n", claude_total);
+
+    // Example 4: Context window management
+    println!("4. Context Window Management");
+    println!("{}", "-".repeat(50));
+
+    let mut context = ContextWindow::new(8192);
+    println!("Created context window with max: {} tokens", context.max());
+
+    // Simulate adding messages
+    let conversation = vec![
+        ChatMessage::user("Tell me about Rust programming."),
+        ChatMessage::assistant(
+            "Rust is a systems programming language focused on safety, \
+             speed, and concurrency. It prevents common bugs like null pointer \
+             dereferences and data races through its ownership system.",
+        ),
+        ChatMessage::user("What are its main features?"),
+        ChatMessage::assistant(
+            "Rust's main features include: 1) Memory safety without garbage collection, \
+             2) Zero-cost abstractions, 3) Fearless concurrency, 4) Pattern matching, \
+             5) Type inference, and 6) A powerful macro system.",
+        ),
+    ];
+
+    for (i, msg) in conversation.iter().enumerate() {
+        let tokens = simple_counter.count_messages(&[msg.clone()]);
+        context.add_tokens(tokens);
+
+        let role = msg.role.as_str();
+        let preview = msg
+            .content
+            .as_ref()
+            .map(|c| {
+                if c.len() > 50 {
+                    format!("{}...", &c[..50])
+                } else {
+                    c.clone()
+                }
+            })
+            .unwrap_or_default();
+
+        println!(
+            "Message {}: [{}] \"{}\"",
+            i + 1,
+            role,
+            preview
+        );
+        println!("  Added {} tokens", tokens);
+        println!("  Total used: {} / {}", context.used(), context.max());
+        println!("  Usage: {:.1}%", context.usage_percent() * 100.0);
+        println!("  Progress: {}\n", context.progress_bar(30));
+    }
+
+    // Example 5: Checking context limits
+    println!("5. Checking Context Limits");
+    println!("{}", "-".repeat(50));
+
+    if context.is_near_limit(0.8) {
+        println!("Warning: Context is over 80% full!");
+    } else {
+        println!("Context usage is below 80%");
+    }
+
+    let remaining = context.remaining();
+    println!("Remaining tokens: {}", remaining);
+
+    let new_message_tokens = 500;
+    if context.has_room_for(new_message_tokens) {
+        println!(
+            "Can fit a message of {} tokens",
+            new_message_tokens
+        );
+    } else {
+        println!(
+            "Cannot fit a message of {} tokens - would need to compact or start new context",
+            new_message_tokens
+        );
+    }
+
+    // Example 6: Different counter variants
+    println!("\n6. Using Different Counter Variants");
+    println!("{}", "-".repeat(50));
+
+    let counter_8k = SimpleTokenCounter::default_8k();
+    let counter_32k = SimpleTokenCounter::with_32k();
+    let counter_128k = SimpleTokenCounter::with_128k();
+
+    println!("8k context counter: {} tokens", counter_8k.max_context());
+    println!("32k context counter: {} tokens", counter_32k.max_context());
+    println!("128k context counter: {} tokens", counter_128k.max_context());
+
+    let haiku = ClaudeTokenCounter::haiku();
+    let sonnet = ClaudeTokenCounter::sonnet();
+    let opus = ClaudeTokenCounter::opus();
+
+    println!("\nClaude Haiku: {} tokens", haiku.max_context());
+    println!("Claude Sonnet: {} tokens", sonnet.max_context());
+    println!("Claude Opus: {} tokens", opus.max_context());
+
+    // Example 7: Managing context for a long conversation
+    println!("\n7. Long Conversation Simulation");
+    println!("{}", "-".repeat(50));
+
+    let mut long_context = ContextWindow::new(4096); // Smaller context for demo
+    let counter = SimpleTokenCounter::new(4096);
+
+    let mut message_count = 0;
+    let mut compaction_count = 0;
+
+    // Simulate 20 exchanges
+    for i in 0..20 {
+        let user_msg = ChatMessage::user(format!(
+            "This is user message number {} asking a question.",
+            i + 1
+        ));
+        let assistant_msg = ChatMessage::assistant(format!(
+            "This is assistant response number {} providing a detailed answer with multiple sentences to make it longer.",
+            i + 1
+        ));
+
+        let tokens_needed = counter.count_messages(&[user_msg, assistant_msg]);
+
+        if !long_context.has_room_for(tokens_needed) {
+            println!(
+                "After {} messages, context is full ({}%). Compacting...",
+                message_count,
+                (long_context.usage_percent() * 100.0) as u32
+            );
+            // In a real scenario, we would compact the conversation
+            // For now, just reset
+            long_context.reset();
+            compaction_count += 1;
+        }
+
+        long_context.add_tokens(tokens_needed);
+        message_count += 2;
+    }
+
+    println!("Total messages: {}", message_count);
+    println!("Compactions needed: {}", compaction_count);
+    println!("Final context usage: {:.1}%", long_context.usage_percent() * 100.0);
+    println!("Final progress: {}", long_context.progress_bar(40));
+
+    println!("\n=== Example Complete ===");
+}