feat(v2): complete multi-LLM providers, TUI redesign, and advanced agent features
Multi-LLM Provider Support: - Add llm-core crate with LlmProvider trait abstraction - Implement Anthropic Claude API client with streaming - Implement OpenAI API client with streaming - Add token counting with SimpleTokenCounter and ClaudeTokenCounter - Add retry logic with exponential backoff and jitter Borderless TUI Redesign: - Rewrite theme system with terminal capability detection (Full/Unicode256/Basic) - Add provider tabs component with keybind switching [1]/[2]/[3] - Implement vim-modal input (Normal/Insert/Visual/Command modes) - Redesign chat panel with timestamps and streaming indicators - Add multi-provider status bar with cost tracking - Add Nerd Font icons with graceful ASCII fallbacks - Add syntax highlighting (syntect) and markdown rendering (pulldown-cmark) Advanced Agent Features: - Add system prompt builder with configurable components - Enhance subagent orchestration with parallel execution - Add git integration module for safe command detection - Add streaming tool results via channels - Expand tool set: AskUserQuestion, TodoWrite, LS, MultiEdit, BashOutput, KillShell - Add WebSearch with provider abstraction Plugin System Enhancement: - Add full agent definition parsing from YAML frontmatter - Add skill system with progressive disclosure - Wire plugin hooks into HookManager 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
195
crates/llm/core/examples/token_counting.rs
Normal file
195
crates/llm/core/examples/token_counting.rs
Normal file
@@ -0,0 +1,195 @@
|
||||
//! Token counting example
|
||||
//!
|
||||
//! This example demonstrates how to use the token counting utilities
|
||||
//! to manage LLM context windows.
|
||||
//!
|
||||
//! Run with: cargo run --example token_counting -p llm-core
|
||||
|
||||
use llm_core::{
|
||||
ChatMessage, ClaudeTokenCounter, ContextWindow, SimpleTokenCounter, TokenCounter,
|
||||
};
|
||||
|
||||
fn main() {
|
||||
println!("=== Token Counting Example ===\n");
|
||||
|
||||
// Example 1: Basic token counting with SimpleTokenCounter
|
||||
println!("1. Basic Token Counting");
|
||||
println!("{}", "-".repeat(50));
|
||||
|
||||
let simple_counter = SimpleTokenCounter::new(8192);
|
||||
let text = "The quick brown fox jumps over the lazy dog.";
|
||||
|
||||
let token_count = simple_counter.count(text);
|
||||
println!("Text: \"{}\"", text);
|
||||
println!("Estimated tokens: {}", token_count);
|
||||
println!("Max context: {}\n", simple_counter.max_context());
|
||||
|
||||
// Example 2: Counting tokens in chat messages
|
||||
println!("2. Counting Tokens in Chat Messages");
|
||||
println!("{}", "-".repeat(50));
|
||||
|
||||
let messages = vec![
|
||||
ChatMessage::system("You are a helpful assistant that provides concise answers."),
|
||||
ChatMessage::user("What is the capital of France?"),
|
||||
ChatMessage::assistant("The capital of France is Paris."),
|
||||
ChatMessage::user("What is its population?"),
|
||||
];
|
||||
|
||||
let total_tokens = simple_counter.count_messages(&messages);
|
||||
println!("Number of messages: {}", messages.len());
|
||||
println!("Total tokens (with overhead): {}\n", total_tokens);
|
||||
|
||||
// Example 3: Using ClaudeTokenCounter for Claude models
|
||||
println!("3. Claude-Specific Token Counting");
|
||||
println!("{}", "-".repeat(50));
|
||||
|
||||
let claude_counter = ClaudeTokenCounter::new();
|
||||
let claude_total = claude_counter.count_messages(&messages);
|
||||
|
||||
println!("Claude counter max context: {}", claude_counter.max_context());
|
||||
println!("Claude estimated tokens: {}\n", claude_total);
|
||||
|
||||
// Example 4: Context window management
|
||||
println!("4. Context Window Management");
|
||||
println!("{}", "-".repeat(50));
|
||||
|
||||
let mut context = ContextWindow::new(8192);
|
||||
println!("Created context window with max: {} tokens", context.max());
|
||||
|
||||
// Simulate adding messages
|
||||
let conversation = vec![
|
||||
ChatMessage::user("Tell me about Rust programming."),
|
||||
ChatMessage::assistant(
|
||||
"Rust is a systems programming language focused on safety, \
|
||||
speed, and concurrency. It prevents common bugs like null pointer \
|
||||
dereferences and data races through its ownership system.",
|
||||
),
|
||||
ChatMessage::user("What are its main features?"),
|
||||
ChatMessage::assistant(
|
||||
"Rust's main features include: 1) Memory safety without garbage collection, \
|
||||
2) Zero-cost abstractions, 3) Fearless concurrency, 4) Pattern matching, \
|
||||
5) Type inference, and 6) A powerful macro system.",
|
||||
),
|
||||
];
|
||||
|
||||
for (i, msg) in conversation.iter().enumerate() {
|
||||
let tokens = simple_counter.count_messages(&[msg.clone()]);
|
||||
context.add_tokens(tokens);
|
||||
|
||||
let role = msg.role.as_str();
|
||||
let preview = msg
|
||||
.content
|
||||
.as_ref()
|
||||
.map(|c| {
|
||||
if c.len() > 50 {
|
||||
format!("{}...", &c[..50])
|
||||
} else {
|
||||
c.clone()
|
||||
}
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
println!(
|
||||
"Message {}: [{}] \"{}\"",
|
||||
i + 1,
|
||||
role,
|
||||
preview
|
||||
);
|
||||
println!(" Added {} tokens", tokens);
|
||||
println!(" Total used: {} / {}", context.used(), context.max());
|
||||
println!(" Usage: {:.1}%", context.usage_percent() * 100.0);
|
||||
println!(" Progress: {}\n", context.progress_bar(30));
|
||||
}
|
||||
|
||||
// Example 5: Checking context limits
|
||||
println!("5. Checking Context Limits");
|
||||
println!("{}", "-".repeat(50));
|
||||
|
||||
if context.is_near_limit(0.8) {
|
||||
println!("Warning: Context is over 80% full!");
|
||||
} else {
|
||||
println!("Context usage is below 80%");
|
||||
}
|
||||
|
||||
let remaining = context.remaining();
|
||||
println!("Remaining tokens: {}", remaining);
|
||||
|
||||
let new_message_tokens = 500;
|
||||
if context.has_room_for(new_message_tokens) {
|
||||
println!(
|
||||
"Can fit a message of {} tokens",
|
||||
new_message_tokens
|
||||
);
|
||||
} else {
|
||||
println!(
|
||||
"Cannot fit a message of {} tokens - would need to compact or start new context",
|
||||
new_message_tokens
|
||||
);
|
||||
}
|
||||
|
||||
// Example 6: Different counter variants
|
||||
println!("\n6. Using Different Counter Variants");
|
||||
println!("{}", "-".repeat(50));
|
||||
|
||||
let counter_8k = SimpleTokenCounter::default_8k();
|
||||
let counter_32k = SimpleTokenCounter::with_32k();
|
||||
let counter_128k = SimpleTokenCounter::with_128k();
|
||||
|
||||
println!("8k context counter: {} tokens", counter_8k.max_context());
|
||||
println!("32k context counter: {} tokens", counter_32k.max_context());
|
||||
println!("128k context counter: {} tokens", counter_128k.max_context());
|
||||
|
||||
let haiku = ClaudeTokenCounter::haiku();
|
||||
let sonnet = ClaudeTokenCounter::sonnet();
|
||||
let opus = ClaudeTokenCounter::opus();
|
||||
|
||||
println!("\nClaude Haiku: {} tokens", haiku.max_context());
|
||||
println!("Claude Sonnet: {} tokens", sonnet.max_context());
|
||||
println!("Claude Opus: {} tokens", opus.max_context());
|
||||
|
||||
// Example 7: Managing context for a long conversation
|
||||
println!("\n7. Long Conversation Simulation");
|
||||
println!("{}", "-".repeat(50));
|
||||
|
||||
let mut long_context = ContextWindow::new(4096); // Smaller context for demo
|
||||
let counter = SimpleTokenCounter::new(4096);
|
||||
|
||||
let mut message_count = 0;
|
||||
let mut compaction_count = 0;
|
||||
|
||||
// Simulate 20 exchanges
|
||||
for i in 0..20 {
|
||||
let user_msg = ChatMessage::user(format!(
|
||||
"This is user message number {} asking a question.",
|
||||
i + 1
|
||||
));
|
||||
let assistant_msg = ChatMessage::assistant(format!(
|
||||
"This is assistant response number {} providing a detailed answer with multiple sentences to make it longer.",
|
||||
i + 1
|
||||
));
|
||||
|
||||
let tokens_needed = counter.count_messages(&[user_msg, assistant_msg]);
|
||||
|
||||
if !long_context.has_room_for(tokens_needed) {
|
||||
println!(
|
||||
"After {} messages, context is full ({}%). Compacting...",
|
||||
message_count,
|
||||
(long_context.usage_percent() * 100.0) as u32
|
||||
);
|
||||
// In a real scenario, we would compact the conversation
|
||||
// For now, just reset
|
||||
long_context.reset();
|
||||
compaction_count += 1;
|
||||
}
|
||||
|
||||
long_context.add_tokens(tokens_needed);
|
||||
message_count += 2;
|
||||
}
|
||||
|
||||
println!("Total messages: {}", message_count);
|
||||
println!("Compactions needed: {}", compaction_count);
|
||||
println!("Final context usage: {:.1}%", long_context.usage_percent() * 100.0);
|
||||
println!("Final progress: {}", long_context.progress_bar(40));
|
||||
|
||||
println!("\n=== Example Complete ===");
|
||||
}
|
||||
Reference in New Issue
Block a user