//! Token counting example //! //! This example demonstrates how to use the token counting utilities //! to manage LLM context windows. //! //! Run with: cargo run --example token_counting -p llm-core use llm_core::{ ChatMessage, ClaudeTokenCounter, ContextWindow, SimpleTokenCounter, TokenCounter, }; fn main() { println!("=== Token Counting Example ===\n"); // Example 1: Basic token counting with SimpleTokenCounter println!("1. Basic Token Counting"); println!("{}", "-".repeat(50)); let simple_counter = SimpleTokenCounter::new(8192); let text = "The quick brown fox jumps over the lazy dog."; let token_count = simple_counter.count(text); println!("Text: \"{}\"", text); println!("Estimated tokens: {}", token_count); println!("Max context: {}\n", simple_counter.max_context()); // Example 2: Counting tokens in chat messages println!("2. Counting Tokens in Chat Messages"); println!("{}", "-".repeat(50)); let messages = vec![ ChatMessage::system("You are a helpful assistant that provides concise answers."), ChatMessage::user("What is the capital of France?"), ChatMessage::assistant("The capital of France is Paris."), ChatMessage::user("What is its population?"), ]; let total_tokens = simple_counter.count_messages(&messages); println!("Number of messages: {}", messages.len()); println!("Total tokens (with overhead): {}\n", total_tokens); // Example 3: Using ClaudeTokenCounter for Claude models println!("3. Claude-Specific Token Counting"); println!("{}", "-".repeat(50)); let claude_counter = ClaudeTokenCounter::new(); let claude_total = claude_counter.count_messages(&messages); println!("Claude counter max context: {}", claude_counter.max_context()); println!("Claude estimated tokens: {}\n", claude_total); // Example 4: Context window management println!("4. Context Window Management"); println!("{}", "-".repeat(50)); let mut context = ContextWindow::new(8192); println!("Created context window with max: {} tokens", context.max()); // Simulate adding messages let conversation = vec![ ChatMessage::user("Tell me about Rust programming."), ChatMessage::assistant( "Rust is a systems programming language focused on safety, \ speed, and concurrency. It prevents common bugs like null pointer \ dereferences and data races through its ownership system.", ), ChatMessage::user("What are its main features?"), ChatMessage::assistant( "Rust's main features include: 1) Memory safety without garbage collection, \ 2) Zero-cost abstractions, 3) Fearless concurrency, 4) Pattern matching, \ 5) Type inference, and 6) A powerful macro system.", ), ]; for (i, msg) in conversation.iter().enumerate() { let tokens = simple_counter.count_messages(&[msg.clone()]); context.add_tokens(tokens); let role = msg.role.as_str(); let preview = msg .content .as_ref() .map(|c| { if c.len() > 50 { format!("{}...", &c[..50]) } else { c.clone() } }) .unwrap_or_default(); println!( "Message {}: [{}] \"{}\"", i + 1, role, preview ); println!(" Added {} tokens", tokens); println!(" Total used: {} / {}", context.used(), context.max()); println!(" Usage: {:.1}%", context.usage_percent() * 100.0); println!(" Progress: {}\n", context.progress_bar(30)); } // Example 5: Checking context limits println!("5. Checking Context Limits"); println!("{}", "-".repeat(50)); if context.is_near_limit(0.8) { println!("Warning: Context is over 80% full!"); } else { println!("Context usage is below 80%"); } let remaining = context.remaining(); println!("Remaining tokens: {}", remaining); let new_message_tokens = 500; if context.has_room_for(new_message_tokens) { println!( "Can fit a message of {} tokens", new_message_tokens ); } else { println!( "Cannot fit a message of {} tokens - would need to compact or start new context", new_message_tokens ); } // Example 6: Different counter variants println!("\n6. Using Different Counter Variants"); println!("{}", "-".repeat(50)); let counter_8k = SimpleTokenCounter::default_8k(); let counter_32k = SimpleTokenCounter::with_32k(); let counter_128k = SimpleTokenCounter::with_128k(); println!("8k context counter: {} tokens", counter_8k.max_context()); println!("32k context counter: {} tokens", counter_32k.max_context()); println!("128k context counter: {} tokens", counter_128k.max_context()); let haiku = ClaudeTokenCounter::haiku(); let sonnet = ClaudeTokenCounter::sonnet(); let opus = ClaudeTokenCounter::opus(); println!("\nClaude Haiku: {} tokens", haiku.max_context()); println!("Claude Sonnet: {} tokens", sonnet.max_context()); println!("Claude Opus: {} tokens", opus.max_context()); // Example 7: Managing context for a long conversation println!("\n7. Long Conversation Simulation"); println!("{}", "-".repeat(50)); let mut long_context = ContextWindow::new(4096); // Smaller context for demo let counter = SimpleTokenCounter::new(4096); let mut message_count = 0; let mut compaction_count = 0; // Simulate 20 exchanges for i in 0..20 { let user_msg = ChatMessage::user(format!( "This is user message number {} asking a question.", i + 1 )); let assistant_msg = ChatMessage::assistant(format!( "This is assistant response number {} providing a detailed answer with multiple sentences to make it longer.", i + 1 )); let tokens_needed = counter.count_messages(&[user_msg, assistant_msg]); if !long_context.has_room_for(tokens_needed) { println!( "After {} messages, context is full ({}%). Compacting...", message_count, (long_context.usage_percent() * 100.0) as u32 ); // In a real scenario, we would compact the conversation // For now, just reset long_context.reset(); compaction_count += 1; } long_context.add_tokens(tokens_needed); message_count += 2; } println!("Total messages: {}", message_count); println!("Compactions needed: {}", compaction_count); println!("Final context usage: {:.1}%", long_context.usage_percent() * 100.0); println!("Final progress: {}", long_context.progress_bar(40)); println!("\n=== Example Complete ==="); }