Multi-LLM Provider Support: - Add llm-core crate with LlmProvider trait abstraction - Implement Anthropic Claude API client with streaming - Implement OpenAI API client with streaming - Add token counting with SimpleTokenCounter and ClaudeTokenCounter - Add retry logic with exponential backoff and jitter Borderless TUI Redesign: - Rewrite theme system with terminal capability detection (Full/Unicode256/Basic) - Add provider tabs component with keybind switching [1]/[2]/[3] - Implement vim-modal input (Normal/Insert/Visual/Command modes) - Redesign chat panel with timestamps and streaming indicators - Add multi-provider status bar with cost tracking - Add Nerd Font icons with graceful ASCII fallbacks - Add syntax highlighting (syntect) and markdown rendering (pulldown-cmark) Advanced Agent Features: - Add system prompt builder with configurable components - Enhance subagent orchestration with parallel execution - Add git integration module for safe command detection - Add streaming tool results via channels - Expand tool set: AskUserQuestion, TodoWrite, LS, MultiEdit, BashOutput, KillShell - Add WebSearch with provider abstraction Plugin System Enhancement: - Add full agent definition parsing from YAML frontmatter - Add skill system with progressive disclosure - Wire plugin hooks into HookManager 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
196 lines
6.8 KiB
Rust
196 lines
6.8 KiB
Rust
//! Token counting example
|
|
//!
|
|
//! This example demonstrates how to use the token counting utilities
|
|
//! to manage LLM context windows.
|
|
//!
|
|
//! Run with: cargo run --example token_counting -p llm-core
|
|
|
|
use llm_core::{
|
|
ChatMessage, ClaudeTokenCounter, ContextWindow, SimpleTokenCounter, TokenCounter,
|
|
};
|
|
|
|
fn main() {
|
|
println!("=== Token Counting Example ===\n");
|
|
|
|
// Example 1: Basic token counting with SimpleTokenCounter
|
|
println!("1. Basic Token Counting");
|
|
println!("{}", "-".repeat(50));
|
|
|
|
let simple_counter = SimpleTokenCounter::new(8192);
|
|
let text = "The quick brown fox jumps over the lazy dog.";
|
|
|
|
let token_count = simple_counter.count(text);
|
|
println!("Text: \"{}\"", text);
|
|
println!("Estimated tokens: {}", token_count);
|
|
println!("Max context: {}\n", simple_counter.max_context());
|
|
|
|
// Example 2: Counting tokens in chat messages
|
|
println!("2. Counting Tokens in Chat Messages");
|
|
println!("{}", "-".repeat(50));
|
|
|
|
let messages = vec![
|
|
ChatMessage::system("You are a helpful assistant that provides concise answers."),
|
|
ChatMessage::user("What is the capital of France?"),
|
|
ChatMessage::assistant("The capital of France is Paris."),
|
|
ChatMessage::user("What is its population?"),
|
|
];
|
|
|
|
let total_tokens = simple_counter.count_messages(&messages);
|
|
println!("Number of messages: {}", messages.len());
|
|
println!("Total tokens (with overhead): {}\n", total_tokens);
|
|
|
|
// Example 3: Using ClaudeTokenCounter for Claude models
|
|
println!("3. Claude-Specific Token Counting");
|
|
println!("{}", "-".repeat(50));
|
|
|
|
let claude_counter = ClaudeTokenCounter::new();
|
|
let claude_total = claude_counter.count_messages(&messages);
|
|
|
|
println!("Claude counter max context: {}", claude_counter.max_context());
|
|
println!("Claude estimated tokens: {}\n", claude_total);
|
|
|
|
// Example 4: Context window management
|
|
println!("4. Context Window Management");
|
|
println!("{}", "-".repeat(50));
|
|
|
|
let mut context = ContextWindow::new(8192);
|
|
println!("Created context window with max: {} tokens", context.max());
|
|
|
|
// Simulate adding messages
|
|
let conversation = vec![
|
|
ChatMessage::user("Tell me about Rust programming."),
|
|
ChatMessage::assistant(
|
|
"Rust is a systems programming language focused on safety, \
|
|
speed, and concurrency. It prevents common bugs like null pointer \
|
|
dereferences and data races through its ownership system.",
|
|
),
|
|
ChatMessage::user("What are its main features?"),
|
|
ChatMessage::assistant(
|
|
"Rust's main features include: 1) Memory safety without garbage collection, \
|
|
2) Zero-cost abstractions, 3) Fearless concurrency, 4) Pattern matching, \
|
|
5) Type inference, and 6) A powerful macro system.",
|
|
),
|
|
];
|
|
|
|
for (i, msg) in conversation.iter().enumerate() {
|
|
let tokens = simple_counter.count_messages(&[msg.clone()]);
|
|
context.add_tokens(tokens);
|
|
|
|
let role = msg.role.as_str();
|
|
let preview = msg
|
|
.content
|
|
.as_ref()
|
|
.map(|c| {
|
|
if c.len() > 50 {
|
|
format!("{}...", &c[..50])
|
|
} else {
|
|
c.clone()
|
|
}
|
|
})
|
|
.unwrap_or_default();
|
|
|
|
println!(
|
|
"Message {}: [{}] \"{}\"",
|
|
i + 1,
|
|
role,
|
|
preview
|
|
);
|
|
println!(" Added {} tokens", tokens);
|
|
println!(" Total used: {} / {}", context.used(), context.max());
|
|
println!(" Usage: {:.1}%", context.usage_percent() * 100.0);
|
|
println!(" Progress: {}\n", context.progress_bar(30));
|
|
}
|
|
|
|
// Example 5: Checking context limits
|
|
println!("5. Checking Context Limits");
|
|
println!("{}", "-".repeat(50));
|
|
|
|
if context.is_near_limit(0.8) {
|
|
println!("Warning: Context is over 80% full!");
|
|
} else {
|
|
println!("Context usage is below 80%");
|
|
}
|
|
|
|
let remaining = context.remaining();
|
|
println!("Remaining tokens: {}", remaining);
|
|
|
|
let new_message_tokens = 500;
|
|
if context.has_room_for(new_message_tokens) {
|
|
println!(
|
|
"Can fit a message of {} tokens",
|
|
new_message_tokens
|
|
);
|
|
} else {
|
|
println!(
|
|
"Cannot fit a message of {} tokens - would need to compact or start new context",
|
|
new_message_tokens
|
|
);
|
|
}
|
|
|
|
// Example 6: Different counter variants
|
|
println!("\n6. Using Different Counter Variants");
|
|
println!("{}", "-".repeat(50));
|
|
|
|
let counter_8k = SimpleTokenCounter::default_8k();
|
|
let counter_32k = SimpleTokenCounter::with_32k();
|
|
let counter_128k = SimpleTokenCounter::with_128k();
|
|
|
|
println!("8k context counter: {} tokens", counter_8k.max_context());
|
|
println!("32k context counter: {} tokens", counter_32k.max_context());
|
|
println!("128k context counter: {} tokens", counter_128k.max_context());
|
|
|
|
let haiku = ClaudeTokenCounter::haiku();
|
|
let sonnet = ClaudeTokenCounter::sonnet();
|
|
let opus = ClaudeTokenCounter::opus();
|
|
|
|
println!("\nClaude Haiku: {} tokens", haiku.max_context());
|
|
println!("Claude Sonnet: {} tokens", sonnet.max_context());
|
|
println!("Claude Opus: {} tokens", opus.max_context());
|
|
|
|
// Example 7: Managing context for a long conversation
|
|
println!("\n7. Long Conversation Simulation");
|
|
println!("{}", "-".repeat(50));
|
|
|
|
let mut long_context = ContextWindow::new(4096); // Smaller context for demo
|
|
let counter = SimpleTokenCounter::new(4096);
|
|
|
|
let mut message_count = 0;
|
|
let mut compaction_count = 0;
|
|
|
|
// Simulate 20 exchanges
|
|
for i in 0..20 {
|
|
let user_msg = ChatMessage::user(format!(
|
|
"This is user message number {} asking a question.",
|
|
i + 1
|
|
));
|
|
let assistant_msg = ChatMessage::assistant(format!(
|
|
"This is assistant response number {} providing a detailed answer with multiple sentences to make it longer.",
|
|
i + 1
|
|
));
|
|
|
|
let tokens_needed = counter.count_messages(&[user_msg, assistant_msg]);
|
|
|
|
if !long_context.has_room_for(tokens_needed) {
|
|
println!(
|
|
"After {} messages, context is full ({}%). Compacting...",
|
|
message_count,
|
|
(long_context.usage_percent() * 100.0) as u32
|
|
);
|
|
// In a real scenario, we would compact the conversation
|
|
// For now, just reset
|
|
long_context.reset();
|
|
compaction_count += 1;
|
|
}
|
|
|
|
long_context.add_tokens(tokens_needed);
|
|
message_count += 2;
|
|
}
|
|
|
|
println!("Total messages: {}", message_count);
|
|
println!("Compactions needed: {}", compaction_count);
|
|
println!("Final context usage: {:.1}%", long_context.usage_percent() * 100.0);
|
|
println!("Final progress: {}", long_context.progress_bar(40));
|
|
|
|
println!("\n=== Example Complete ===");
|
|
}
|