feat(v2): complete multi-LLM providers, TUI redesign, and advanced agent features

Multi-LLM Provider Support:
- Add llm-core crate with LlmProvider trait abstraction
- Implement Anthropic Claude API client with streaming
- Implement OpenAI API client with streaming
- Add token counting with SimpleTokenCounter and ClaudeTokenCounter
- Add retry logic with exponential backoff and jitter

Borderless TUI Redesign:
- Rewrite theme system with terminal capability detection (Full/Unicode256/Basic)
- Add provider tabs component with keybind switching [1]/[2]/[3]
- Implement vim-modal input (Normal/Insert/Visual/Command modes)
- Redesign chat panel with timestamps and streaming indicators
- Add multi-provider status bar with cost tracking
- Add Nerd Font icons with graceful ASCII fallbacks
- Add syntax highlighting (syntect) and markdown rendering (pulldown-cmark)

Advanced Agent Features:
- Add system prompt builder with configurable components
- Enhance subagent orchestration with parallel execution
- Add git integration module for safe command detection
- Add streaming tool results via channels
- Expand tool set: AskUserQuestion, TodoWrite, LS, MultiEdit, BashOutput, KillShell
- Add WebSearch with provider abstraction

Plugin System Enhancement:
- Add full agent definition parsing from YAML frontmatter
- Add skill system with progressive disclosure
- Wire plugin hooks into HookManager

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-12-02 17:24:14 +01:00
parent 09c8c9d83e
commit 10c8e2baae
67 changed files with 11444 additions and 626 deletions

View File

@@ -0,0 +1,195 @@
//! Token counting example
//!
//! This example demonstrates how to use the token counting utilities
//! to manage LLM context windows.
//!
//! Run with: cargo run --example token_counting -p llm-core
use llm_core::{
ChatMessage, ClaudeTokenCounter, ContextWindow, SimpleTokenCounter, TokenCounter,
};
fn main() {
println!("=== Token Counting Example ===\n");
// Example 1: Basic token counting with SimpleTokenCounter
println!("1. Basic Token Counting");
println!("{}", "-".repeat(50));
let simple_counter = SimpleTokenCounter::new(8192);
let text = "The quick brown fox jumps over the lazy dog.";
let token_count = simple_counter.count(text);
println!("Text: \"{}\"", text);
println!("Estimated tokens: {}", token_count);
println!("Max context: {}\n", simple_counter.max_context());
// Example 2: Counting tokens in chat messages
println!("2. Counting Tokens in Chat Messages");
println!("{}", "-".repeat(50));
let messages = vec![
ChatMessage::system("You are a helpful assistant that provides concise answers."),
ChatMessage::user("What is the capital of France?"),
ChatMessage::assistant("The capital of France is Paris."),
ChatMessage::user("What is its population?"),
];
let total_tokens = simple_counter.count_messages(&messages);
println!("Number of messages: {}", messages.len());
println!("Total tokens (with overhead): {}\n", total_tokens);
// Example 3: Using ClaudeTokenCounter for Claude models
println!("3. Claude-Specific Token Counting");
println!("{}", "-".repeat(50));
let claude_counter = ClaudeTokenCounter::new();
let claude_total = claude_counter.count_messages(&messages);
println!("Claude counter max context: {}", claude_counter.max_context());
println!("Claude estimated tokens: {}\n", claude_total);
// Example 4: Context window management
println!("4. Context Window Management");
println!("{}", "-".repeat(50));
let mut context = ContextWindow::new(8192);
println!("Created context window with max: {} tokens", context.max());
// Simulate adding messages
let conversation = vec![
ChatMessage::user("Tell me about Rust programming."),
ChatMessage::assistant(
"Rust is a systems programming language focused on safety, \
speed, and concurrency. It prevents common bugs like null pointer \
dereferences and data races through its ownership system.",
),
ChatMessage::user("What are its main features?"),
ChatMessage::assistant(
"Rust's main features include: 1) Memory safety without garbage collection, \
2) Zero-cost abstractions, 3) Fearless concurrency, 4) Pattern matching, \
5) Type inference, and 6) A powerful macro system.",
),
];
for (i, msg) in conversation.iter().enumerate() {
let tokens = simple_counter.count_messages(&[msg.clone()]);
context.add_tokens(tokens);
let role = msg.role.as_str();
let preview = msg
.content
.as_ref()
.map(|c| {
if c.len() > 50 {
format!("{}...", &c[..50])
} else {
c.clone()
}
})
.unwrap_or_default();
println!(
"Message {}: [{}] \"{}\"",
i + 1,
role,
preview
);
println!(" Added {} tokens", tokens);
println!(" Total used: {} / {}", context.used(), context.max());
println!(" Usage: {:.1}%", context.usage_percent() * 100.0);
println!(" Progress: {}\n", context.progress_bar(30));
}
// Example 5: Checking context limits
println!("5. Checking Context Limits");
println!("{}", "-".repeat(50));
if context.is_near_limit(0.8) {
println!("Warning: Context is over 80% full!");
} else {
println!("Context usage is below 80%");
}
let remaining = context.remaining();
println!("Remaining tokens: {}", remaining);
let new_message_tokens = 500;
if context.has_room_for(new_message_tokens) {
println!(
"Can fit a message of {} tokens",
new_message_tokens
);
} else {
println!(
"Cannot fit a message of {} tokens - would need to compact or start new context",
new_message_tokens
);
}
// Example 6: Different counter variants
println!("\n6. Using Different Counter Variants");
println!("{}", "-".repeat(50));
let counter_8k = SimpleTokenCounter::default_8k();
let counter_32k = SimpleTokenCounter::with_32k();
let counter_128k = SimpleTokenCounter::with_128k();
println!("8k context counter: {} tokens", counter_8k.max_context());
println!("32k context counter: {} tokens", counter_32k.max_context());
println!("128k context counter: {} tokens", counter_128k.max_context());
let haiku = ClaudeTokenCounter::haiku();
let sonnet = ClaudeTokenCounter::sonnet();
let opus = ClaudeTokenCounter::opus();
println!("\nClaude Haiku: {} tokens", haiku.max_context());
println!("Claude Sonnet: {} tokens", sonnet.max_context());
println!("Claude Opus: {} tokens", opus.max_context());
// Example 7: Managing context for a long conversation
println!("\n7. Long Conversation Simulation");
println!("{}", "-".repeat(50));
let mut long_context = ContextWindow::new(4096); // Smaller context for demo
let counter = SimpleTokenCounter::new(4096);
let mut message_count = 0;
let mut compaction_count = 0;
// Simulate 20 exchanges
for i in 0..20 {
let user_msg = ChatMessage::user(format!(
"This is user message number {} asking a question.",
i + 1
));
let assistant_msg = ChatMessage::assistant(format!(
"This is assistant response number {} providing a detailed answer with multiple sentences to make it longer.",
i + 1
));
let tokens_needed = counter.count_messages(&[user_msg, assistant_msg]);
if !long_context.has_room_for(tokens_needed) {
println!(
"After {} messages, context is full ({}%). Compacting...",
message_count,
(long_context.usage_percent() * 100.0) as u32
);
// In a real scenario, we would compact the conversation
// For now, just reset
long_context.reset();
compaction_count += 1;
}
long_context.add_tokens(tokens_needed);
message_count += 2;
}
println!("Total messages: {}", message_count);
println!("Compactions needed: {}", compaction_count);
println!("Final context usage: {:.1}%", long_context.usage_percent() * 100.0);
println!("Final progress: {}", long_context.progress_bar(40));
println!("\n=== Example Complete ===");
}