diff --git a/CHANGELOG.md b/CHANGELOG.md index a4882ee..8d0e436 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Input panel respects a new `ui.input_max_rows` setting so long prompts expand predictably before scrolling kicks in. - Adaptive TUI layout with responsive 80/120-column breakpoints, refreshed glass/neon theming, and animated focus rings for pane transitions. - Configurable `ui.layers` and `ui.animations` settings to tune glass elevation, neon intensity, and opt-in micro-animations. +- Adaptive transcript compactor with configurable auto mode, CLI opt-out (`--no-auto-compress`), and `:compress` commands for manual runs and toggling. - Command palette offers fuzzy `:model` filtering and `:provider` completions for fast switching. - Inline guidance overlay adds a three-step onboarding tour, keymap-aware cheat sheets (F1 / `?`), and persists completion state via `ui.guidance`. - Status surface renders a layered HUD with streaming/tool indicators, contextual gauges, and redesigned toast cards featuring icons, countdown timers, and a compact history log. diff --git a/README.md b/README.md index 4d2d3b6..9784022 100644 --- a/README.md +++ b/README.md @@ -166,6 +166,18 @@ For more detailed information, please refer to the following documents: `chafa` is available, PNG renders) for the documentation gallery. Use `--no-png` to skip the PNG step or `--output ` to redirect the output. +## Conversation Compression + +Owlen automatically compacts older turns once a chat crosses the configured +token threshold. The behaviour is controlled by the `[chat]` section in +`config.toml` (enabled by default via `chat.auto_compress = true`). + +- Launch the TUI with `--no-auto-compress` to opt out for a single run. +- Inside the app, `:compress now` generates an on-demand summary, while + `:compress auto on|off` flips the automatic mode and persists the change. +- Each compression pass emits a system summary that carries metadata about the + retained messages, strategy, and estimated token savings. + ## Configuration OWLEN stores its configuration in the standard platform-specific config directory: diff --git a/crates/owlen-cli/src/bootstrap.rs b/crates/owlen-cli/src/bootstrap.rs index feb6f15..694ee4b 100644 --- a/crates/owlen-cli/src/bootstrap.rs +++ b/crates/owlen-cli/src/bootstrap.rs @@ -33,13 +33,22 @@ use tokio::sync::mpsc; use crate::commands::cloud::{load_runtime_credentials, set_env_var}; -pub async fn launch(initial_mode: Mode) -> Result<()> { +#[derive(Debug, Clone, Copy, Default)] +pub struct LaunchOptions { + pub disable_auto_compress: bool, +} + +pub async fn launch(initial_mode: Mode, options: LaunchOptions) -> Result<()> { set_env_var("OWLEN_AUTO_CONSENT", "1"); let color_support = detect_terminal_color_support(); let mut cfg = config::try_load_config().unwrap_or_default(); let _ = cfg.refresh_mcp_servers(None); + if options.disable_auto_compress { + cfg.chat.auto_compress = false; + } + if let Some(previous_theme) = apply_terminal_theme(&mut cfg, &color_support) { let term_label = match &color_support { TerminalColorSupport::Limited { term } => Cow::from(term.as_str()), @@ -107,6 +116,10 @@ pub async fn launch(initial_mode: Mode) -> Result<()> { app.set_system_status(notice); } + if options.disable_auto_compress { + app.append_system_status("Auto compression off"); + } + app.set_mode(initial_mode).await; enable_raw_mode()?; diff --git a/crates/owlen-cli/src/code_main.rs b/crates/owlen-cli/src/code_main.rs index 184e4e2..f0d9cc6 100644 --- a/crates/owlen-cli/src/code_main.rs +++ b/crates/owlen-cli/src/code_main.rs @@ -12,5 +12,5 @@ use owlen_tui::config; #[tokio::main(flavor = "multi_thread")] async fn main() -> Result<()> { - bootstrap::launch(Mode::Code).await + bootstrap::launch(Mode::Code, bootstrap::LaunchOptions::default()).await } diff --git a/crates/owlen-cli/src/main.rs b/crates/owlen-cli/src/main.rs index 61c303a..ff03503 100644 --- a/crates/owlen-cli/src/main.rs +++ b/crates/owlen-cli/src/main.rs @@ -34,6 +34,9 @@ struct Args { /// Start in code mode (enables all tools) #[arg(long, short = 'c')] code: bool, + /// Disable automatic transcript compression for this session + #[arg(long)] + no_auto_compress: bool, #[command(subcommand)] command: Option, } @@ -462,10 +465,20 @@ fn ensure_string_extra_with_change( #[tokio::main(flavor = "multi_thread")] async fn main() -> Result<()> { // Parse command-line arguments - let Args { code, command } = Args::parse(); + let Args { + code, + command, + no_auto_compress, + } = Args::parse(); if let Some(command) = command { return run_command(command).await; } let initial_mode = if code { Mode::Code } else { Mode::Chat }; - bootstrap::launch(initial_mode).await + bootstrap::launch( + initial_mode, + bootstrap::LaunchOptions { + disable_auto_compress: no_auto_compress, + }, + ) + .await } diff --git a/crates/owlen-core/src/config.rs b/crates/owlen-core/src/config.rs index 6e2447c..97066aa 100644 --- a/crates/owlen-core/src/config.rs +++ b/crates/owlen-core/src/config.rs @@ -17,7 +17,7 @@ use std::time::Duration; pub const DEFAULT_CONFIG_PATH: &str = "~/.config/owlen/config.toml"; /// Current schema version written to `config.toml`. -pub const CONFIG_SCHEMA_VERSION: &str = "1.8.0"; +pub const CONFIG_SCHEMA_VERSION: &str = "1.9.0"; /// Provider config key for forcing Ollama provider mode. pub const OLLAMA_MODE_KEY: &str = "ollama_mode"; @@ -63,6 +63,9 @@ pub struct Config { /// MCP (Multi-Client-Provider) settings #[serde(default)] pub mcp: McpSettings, + /// Chat-specific behaviour (history compression, etc.) + #[serde(default)] + pub chat: ChatSettings, /// Provider specific configuration keyed by provider name #[serde(default)] pub providers: HashMap, @@ -115,6 +118,7 @@ impl Default for Config { schema_version: Self::default_schema_version(), general: GeneralSettings::default(), mcp: McpSettings::default(), + chat: ChatSettings::default(), providers, ui: UiSettings::default(), storage: StorageSettings::default(), @@ -616,6 +620,7 @@ impl Config { self.validate_mcp_settings()?; self.validate_mcp_servers()?; self.validate_providers()?; + self.chat.validate()?; Ok(()) } @@ -1536,6 +1541,72 @@ impl Default for GeneralSettings { } } +/// Strategy used for compressing historical conversation turns. +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)] +#[serde(rename_all = "snake_case")] +pub enum CompressionStrategy { + /// Use the active (or override) model to generate a summary. + #[default] + Provider, + /// Use Owlen's built-in heuristic summariser without model calls. + Local, +} + +/// Chat-specific configuration (history compression, retention, etc.) +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ChatSettings { + #[serde(default = "ChatSettings::default_auto_compress")] + pub auto_compress: bool, + #[serde(default = "ChatSettings::default_trigger_tokens")] + pub trigger_tokens: u32, + #[serde(default = "ChatSettings::default_retain_recent")] + pub retain_recent_messages: usize, + #[serde(default)] + pub model_override: Option, + #[serde(default)] + pub strategy: CompressionStrategy, +} + +impl ChatSettings { + const fn default_auto_compress() -> bool { + true + } + + const fn default_trigger_tokens() -> u32 { + 6_000 + } + + const fn default_retain_recent() -> usize { + 8 + } + + pub fn validate(&self) -> Result<()> { + if self.trigger_tokens < 64 { + return Err(crate::Error::Config( + "chat.trigger_tokens must be at least 64".to_string(), + )); + } + if self.retain_recent_messages < 2 { + return Err(crate::Error::Config( + "chat.retain_recent_messages must be at least 2".to_string(), + )); + } + Ok(()) + } +} + +impl Default for ChatSettings { + fn default() -> Self { + Self { + auto_compress: Self::default_auto_compress(), + trigger_tokens: Self::default_trigger_tokens(), + retain_recent_messages: Self::default_retain_recent(), + model_override: None, + strategy: CompressionStrategy::default(), + } + } +} + /// Operating modes for the MCP subsystem. #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] #[serde(rename_all = "snake_case")] diff --git a/crates/owlen-core/src/conversation.rs b/crates/owlen-core/src/conversation.rs index 3768678..f38fd77 100644 --- a/crates/owlen-core/src/conversation.rs +++ b/crates/owlen-core/src/conversation.rs @@ -353,6 +353,26 @@ impl ConversationManager { id } + /// Replace the active conversation messages and rebuild internal indexes. + pub fn replace_active_messages(&mut self, mut messages: Vec) { + let now = std::time::SystemTime::now(); + for message in &mut messages { + // Ensure message timestamps are not in the far past when rewired. + message.timestamp = now; + } + self.active.messages = messages; + self.active.updated_at = now; + self.rebuild_index(); + self.stream_reset(); + } + + fn rebuild_index(&mut self) { + self.message_index.clear(); + for (idx, message) in self.active.messages.iter().enumerate() { + self.message_index.insert(message.id, idx); + } + } + fn stream_reset(&mut self) { self.streaming.clear(); } diff --git a/crates/owlen-core/src/session.rs b/crates/owlen-core/src/session.rs index 8eadd5b..d324d69 100644 --- a/crates/owlen-core/src/session.rs +++ b/crates/owlen-core/src/session.rs @@ -1,6 +1,7 @@ use crate::config::{ - Config, LEGACY_OLLAMA_CLOUD_API_KEY_ENV, LEGACY_OWLEN_OLLAMA_CLOUD_API_KEY_ENV, - McpResourceConfig, McpServerConfig, OLLAMA_API_KEY_ENV, OLLAMA_CLOUD_BASE_URL, + ChatSettings, CompressionStrategy, Config, LEGACY_OLLAMA_CLOUD_API_KEY_ENV, + LEGACY_OWLEN_OLLAMA_CLOUD_API_KEY_ENV, McpResourceConfig, McpServerConfig, OLLAMA_API_KEY_ENV, + OLLAMA_CLOUD_BASE_URL, }; use crate::consent::{ConsentManager, ConsentScope}; use crate::conversation::ConversationManager; @@ -21,7 +22,7 @@ use crate::providers::OllamaProvider; use crate::storage::{SessionMeta, StorageManager}; use crate::tools::{WEB_SEARCH_TOOL_NAME, canonical_tool_name, tool_name_matches}; use crate::types::{ - ChatParameters, ChatRequest, ChatResponse, Conversation, Message, ModelInfo, ToolCall, + ChatParameters, ChatRequest, ChatResponse, Conversation, Message, ModelInfo, Role, ToolCall, }; use crate::ui::{RoleLabelDisplay, UiController}; use crate::usage::{UsageLedger, UsageQuota, UsageSnapshot}; @@ -32,10 +33,11 @@ use crate::{ ToolRegistry, WebScrapeTool, WebSearchSettings, WebSearchTool, }; use crate::{Error, Result}; -use chrono::Utc; -use log::warn; +use chrono::{DateTime, Utc}; +use log::{info, warn}; use reqwest::Url; use serde_json::{Value, json}; +use std::cmp::{max, min}; use std::collections::{HashMap, HashSet}; use std::env; use std::path::PathBuf; @@ -53,6 +55,107 @@ fn env_var_non_empty(name: &str) -> Option { .filter(|value| !value.is_empty()) } +fn estimate_tokens(messages: &[Message]) -> u32 { + messages + .iter() + .map(estimate_message_tokens) + .fold(0u32, |acc, value| acc.saturating_add(value)) +} + +fn estimate_message_tokens(message: &Message) -> u32 { + let content = message.content.trim(); + if content.is_empty() { + return 4; + } + let approx = max(4, content.chars().count() / 4 + 1); + (approx + 4) as u32 +} + +fn build_transcript(messages: &[Message]) -> String { + let mut transcript = String::new(); + let take = min(messages.len(), MAX_TRANSCRIPT_MESSAGES); + for message in messages.iter().take(take) { + let role = match message.role { + Role::User => "User", + Role::Assistant => "Assistant", + Role::System => "System", + Role::Tool => "Tool", + }; + let snippet = sanitize_snippet(&message.content); + if snippet.is_empty() { + continue; + } + transcript.push_str(&format!("{role}: {snippet}\n\n")); + } + if messages.len() > take { + transcript.push_str(&format!( + "... ({} additional messages omitted for brevity)\n", + messages.len() - take + )); + } + transcript +} + +fn local_summary(messages: &[Message]) -> String { + if messages.is_empty() { + return "(no content to summarize)".to_string(); + } + let total = messages.len(); + let mut summary = String::from("Summary (local heuristic)\n\n"); + summary.push_str(&format!("- Compressed {total} prior messages.\n")); + + let recent_users = collect_recent_by_role(messages, Role::User, 3); + if !recent_users.is_empty() { + summary.push_str("- Recent user intents:\n"); + for intent in recent_users { + summary.push_str(&format!(" - {intent}\n")); + } + } + + let recent_assistant = collect_recent_by_role(messages, Role::Assistant, 3); + if !recent_assistant.is_empty() { + summary.push_str("- Recent assistant responses:\n"); + for reply in recent_assistant { + summary.push_str(&format!(" - {reply}\n")); + } + } + + summary.trim_end().to_string() +} + +fn collect_recent_by_role(messages: &[Message], role: Role, limit: usize) -> Vec { + if limit == 0 { + return Vec::new(); + } + let mut results = Vec::new(); + for message in messages.iter().rev() { + if message.role == role { + let snippet = sanitize_snippet(&message.content); + if !snippet.is_empty() { + results.push(snippet); + if results.len() == limit { + break; + } + } + } + } + results.reverse(); + results +} + +fn sanitize_snippet(content: &str) -> String { + let trimmed = content.trim(); + if trimmed.is_empty() { + return String::new(); + } + let mut snippet = trimmed.replace('\r', ""); + if snippet.len() > MAX_TRANSCRIPT_MESSAGE_CHARS { + snippet.truncate(MAX_TRANSCRIPT_MESSAGE_CHARS); + snippet.push_str("..."); + } + snippet +} + fn compute_web_search_settings( config: &Config, provider_id: &str, @@ -195,6 +298,9 @@ pub enum ControllerEvent { endpoints: Vec, tool_calls: Vec, }, + CompressionCompleted { + report: CompressionReport, + }, } #[derive(Clone, Debug)] @@ -206,6 +312,53 @@ struct PendingToolRequest { tool_calls: Vec, } +#[derive(Debug, Clone)] +pub struct CompressionReport { + pub summary_message_id: Uuid, + pub compressed_messages: usize, + pub estimated_tokens_before: u32, + pub estimated_tokens_after: u32, + pub strategy: CompressionStrategy, + pub model_used: String, + pub retained_recent: usize, + pub automated: bool, + pub timestamp: DateTime, +} + +#[derive(Debug, Clone)] +struct CompressionOptions { + trigger_tokens: u32, + retain_recent: usize, + strategy: CompressionStrategy, + model_override: Option, +} + +impl CompressionOptions { + fn from_settings(settings: &ChatSettings) -> Self { + Self { + trigger_tokens: settings.trigger_tokens.max(64), + retain_recent: settings.retain_recent_messages.max(2), + strategy: settings.strategy, + model_override: settings.model_override.clone(), + } + } + + fn min_chunk_messages(&self) -> usize { + self.retain_recent.saturating_add(2).max(4) + } + + fn resolve_model<'a>(&'a self, active_model: &'a str) -> String { + self.model_override + .clone() + .filter(|model| !model.trim().is_empty()) + .unwrap_or_else(|| active_model.to_string()) + } +} + +const MAX_TRANSCRIPT_MESSAGE_CHARS: usize = 1024; +const MAX_TRANSCRIPT_MESSAGES: usize = 32; +const COMPRESSION_METADATA_KEY: &str = "compression"; + #[derive(Debug, Default)] struct StreamingMessageState { full_text: String, @@ -381,6 +534,7 @@ pub struct SessionController { pending_tool_requests: HashMap, stream_states: HashMap, usage_ledger: Arc>, + last_compression: Option, } async fn build_tools( @@ -723,6 +877,7 @@ impl SessionController { pending_tool_requests: HashMap::new(), stream_states: HashMap::new(), usage_ledger, + last_compression: None, }) } @@ -734,6 +889,10 @@ impl SessionController { &mut self.conversation } + pub fn last_compression(&self) -> Option { + self.last_compression.clone() + } + pub fn input_buffer(&self) -> &InputBuffer { &self.input_buffer } @@ -956,6 +1115,210 @@ impl SessionController { self.config.clone() } + pub async fn compress_now(&mut self) -> Result> { + let settings = { + let guard = self.config.lock().await; + guard.chat.clone() + }; + let options = CompressionOptions::from_settings(&settings); + self.perform_compression(options, false).await + } + + pub async fn maybe_auto_compress(&mut self) -> Result> { + let settings = { + let guard = self.config.lock().await; + if !guard.chat.auto_compress { + return Ok(None); + } + guard.chat.clone() + }; + let options = CompressionOptions::from_settings(&settings); + self.perform_compression(options, true).await + } + + async fn perform_compression( + &mut self, + options: CompressionOptions, + automated: bool, + ) -> Result> { + let mut final_report = None; + let mut iterations = 0usize; + + loop { + iterations += 1; + if iterations > 4 { + break; + } + + let snapshot = self.conversation.active().clone(); + let total_tokens = estimate_tokens(&snapshot.messages); + if total_tokens <= options.trigger_tokens { + break; + } + + if snapshot.messages.len() <= options.retain_recent + 1 { + break; + } + + let split_index = snapshot + .messages + .len() + .saturating_sub(options.retain_recent); + if split_index == 0 { + break; + } + + let older_messages = &snapshot.messages[..split_index]; + if older_messages.len() < options.min_chunk_messages() { + break; + } + + if older_messages + .iter() + .all(|msg| msg.metadata.contains_key(COMPRESSION_METADATA_KEY)) + { + break; + } + + let model_used = options.resolve_model(&snapshot.model); + let summary = self + .generate_summary(older_messages, &options, &model_used) + .await; + + let summary_body = summary.trim(); + let intro = "### Conversation summary"; + let footer = if automated { + "_This summary was generated automatically to preserve context._" + } else { + "_Manual compression complete._" + }; + let content = if summary_body.is_empty() { + format!( + "{intro}\n\n_Compressed {} prior messages._\n\n{footer}", + older_messages.len() + ) + } else { + format!( + "{intro}\n\n{summary_body}\n\n_Compressed {} prior messages._\n\n{footer}", + older_messages.len() + ) + }; + + let mut summary_message = Message::system(content); + let compressed_ids: Vec = older_messages + .iter() + .map(|msg| msg.id.to_string()) + .collect(); + let summary_tokens = estimate_message_tokens(&summary_message); + let retained_tokens = estimate_tokens(&snapshot.messages[split_index..]); + let updated_tokens = summary_tokens.saturating_add(retained_tokens); + let timestamp = Utc::now(); + let metadata = json!({ + "strategy": match options.strategy { + CompressionStrategy::Provider => "provider", + CompressionStrategy::Local => "local", + }, + "automated": automated, + "compressed_message_ids": compressed_ids, + "compressed_count": older_messages.len(), + "retain_recent": options.retain_recent, + "trigger_tokens": options.trigger_tokens, + "estimated_tokens_before": total_tokens, + "model": model_used, + "estimated_tokens_after": updated_tokens, + "timestamp": timestamp.to_rfc3339(), + }); + summary_message + .metadata + .insert(COMPRESSION_METADATA_KEY.to_string(), metadata); + + let mut new_messages = + Vec::with_capacity(snapshot.messages.len() - older_messages.len() + 1); + new_messages.push(summary_message.clone()); + new_messages.extend_from_slice(&snapshot.messages[split_index..]); + self.conversation.replace_active_messages(new_messages); + let report = CompressionReport { + summary_message_id: summary_message.id, + compressed_messages: older_messages.len(), + estimated_tokens_before: total_tokens, + estimated_tokens_after: updated_tokens, + strategy: options.strategy, + model_used: model_used.clone(), + retained_recent: options.retain_recent, + automated, + timestamp, + }; + + self.last_compression = Some(report.clone()); + if automated { + info!( + "auto compression reduced transcript from {} to {} tokens (compressed {} messages)", + total_tokens, updated_tokens, report.compressed_messages + ); + } + self.emit_compression_event(report.clone()); + final_report = Some(report.clone()); + + if updated_tokens >= total_tokens { + break; + } + if updated_tokens <= options.trigger_tokens { + break; + } + + // Continue loop to attempt further reduction if needed. + } + + Ok(final_report) + } + + async fn generate_summary( + &self, + slice: &[Message], + options: &CompressionOptions, + model: &str, + ) -> String { + match options.strategy { + CompressionStrategy::Provider => { + match self.generate_provider_summary(slice, model).await { + Ok(content) if !content.trim().is_empty() => content, + Ok(_) => local_summary(slice), + Err(err) => { + warn!( + "Falling back to local compression: provider summary failed ({})", + err + ); + local_summary(slice) + } + } + } + CompressionStrategy::Local => local_summary(slice), + } + } + + async fn generate_provider_summary(&self, slice: &[Message], model: &str) -> Result { + let mut prompt_messages = Vec::new(); + prompt_messages.push(Message::system("You are Owlen's transcript compactor. Summarize the provided conversation excerpt into concise markdown with sections for context, decisions, outstanding tasks, and facts that must be preserved. Avoid referring to removed content explicitly.".to_string())); + let transcript = build_transcript(slice); + prompt_messages.push(Message::user(transcript)); + + let request = ChatRequest { + model: model.to_string(), + messages: prompt_messages, + parameters: ChatParameters::default(), + tools: None, + }; + + let response = self.provider.send_prompt(request).await?; + Ok(response.message.content) + } + + fn emit_compression_event(&self, report: CompressionReport) { + if let Some(tx) = &self.event_tx { + let _ = tx.send(ControllerEvent::CompressionCompleted { report }); + } + } + pub async fn reload_mcp_clients(&mut self) -> Result<()> { let (primary, named, missing) = Self::create_mcp_clients( self.config.clone(), @@ -1518,6 +1881,7 @@ impl SessionController { let streaming = { self.config.lock().await.general.enable_streaming || parameters.stream }; parameters.stream = streaming; self.conversation.push_user_message(content); + let _ = self.maybe_auto_compress().await?; self.send_request_with_current_conversation(parameters) .await } @@ -1588,6 +1952,7 @@ impl SessionController { let _ = self.record_usage_sample(usage).await; } self.conversation.push_message(response.message.clone()); + let _ = self.maybe_auto_compress().await?; return Ok(SessionOutcome::Complete(response)); } } diff --git a/crates/owlen-core/tests/agent_tool_flow.rs b/crates/owlen-core/tests/agent_tool_flow.rs index aa05bba..f2f2b1c 100644 --- a/crates/owlen-core/tests/agent_tool_flow.rs +++ b/crates/owlen-core/tests/agent_tool_flow.rs @@ -231,19 +231,21 @@ async fn streaming_file_write_consent_denied_returns_resolution() { assert_eq!(tool_calls.len(), 1); assert_eq!(tool_calls[0].name, "resources_write"); - let event = event_rx.recv().await.expect("controller event"); - let request_id = match event { - ControllerEvent::ToolRequested { - request_id, - tool_name, - data_types, - endpoints, - .. - } => { - assert_eq!(tool_name, "resources_write"); - assert!(data_types.iter().any(|t| t.contains("file"))); - assert!(endpoints.iter().any(|e| e.contains("filesystem"))); - request_id + let request_id = loop { + match event_rx.recv().await.expect("controller event") { + ControllerEvent::ToolRequested { + request_id, + tool_name, + data_types, + endpoints, + .. + } => { + assert_eq!(tool_name, "resources_write"); + assert!(data_types.iter().any(|t| t.contains("file"))); + assert!(endpoints.iter().any(|e| e.contains("filesystem"))); + break request_id; + } + ControllerEvent::CompressionCompleted { .. } => continue, } }; diff --git a/crates/owlen-core/tests/compression.rs b/crates/owlen-core/tests/compression.rs new file mode 100644 index 0000000..b191542 --- /dev/null +++ b/crates/owlen-core/tests/compression.rs @@ -0,0 +1,146 @@ +use std::sync::Arc; + +use anyhow::{Result, anyhow}; +use async_trait::async_trait; +use futures::stream; +use owlen_core::config::{CompressionStrategy, Config}; +use owlen_core::session::SessionController; +use owlen_core::storage::StorageManager; +use owlen_core::types::{ChatRequest, ChatResponse, Message, ModelInfo, Role}; +use owlen_core::ui::NoOpUiController; +use owlen_core::{ChatStream, Provider, Result as CoreResult}; +use tempfile::tempdir; + +fn make_session_config(strategy: CompressionStrategy, auto: bool) -> Config { + let mut config = Config::default(); + config.general.default_model = Some("stub-model".into()); + config.general.enable_streaming = false; + config.chat.strategy = strategy; + config.chat.auto_compress = auto; + config.chat.trigger_tokens = 64; + config.chat.retain_recent_messages = 2; + config +} + +async fn build_session(config: Config) -> Result { + let temp_dir = tempdir().expect("temp dir"); + let storage = Arc::new( + StorageManager::with_database_path(temp_dir.path().join("owlen-compression-tests.db")) + .await + .expect("storage"), + ); + let provider: Arc = Arc::new(StubProvider); + let ui = Arc::new(NoOpUiController); + SessionController::new(provider, config, storage, ui, false, None) + .await + .map_err(|err| anyhow!(err)) +} + +struct StubProvider; + +#[async_trait] +impl Provider for StubProvider { + fn name(&self) -> &str { + "stub-provider" + } + + async fn list_models(&self) -> CoreResult> { + Ok(vec![ModelInfo { + id: "stub-model".into(), + name: "Stub Model".into(), + description: Some("Stub provider model".into()), + provider: "stub-provider".into(), + context_window: Some(8_192), + capabilities: vec!["chat".into()], + supports_tools: false, + }]) + } + + async fn send_prompt(&self, _request: ChatRequest) -> CoreResult { + Ok(ChatResponse { + message: Message::assistant("stub completion".into()), + usage: None, + is_streaming: false, + is_final: true, + }) + } + + async fn stream_prompt(&self, _request: ChatRequest) -> CoreResult { + Ok(Box::pin(stream::empty())) + } + + async fn health_check(&self) -> CoreResult<()> { + Ok(()) + } + + fn as_any(&self) -> &(dyn std::any::Any + Send + Sync) { + self + } +} + +#[tokio::test(flavor = "multi_thread")] +async fn compression_compacts_history() -> Result<()> { + let mut session = build_session(make_session_config(CompressionStrategy::Local, true)).await?; + + for idx in 0..6 { + session.conversation_mut().push_user_message(format!( + "User request #{idx}: Explain the subsystem in detail." + )); + session.conversation_mut().push_assistant_message(format!( + "Assistant reply #{idx}: Provided detailed explanation with follow-up tasks." + )); + } + + let before_len = session.conversation().messages.len(); + assert!( + before_len > 6, + "expected longer transcript before compression" + ); + + let report = session + .compress_now() + .await? + .expect("compression should trigger"); + assert!( + !report.automated, + "manual compression should flag automated = false" + ); + assert!(report.compressed_messages > 0); + assert!(report.estimated_tokens_after < report.estimated_tokens_before); + + let after = session.conversation(); + assert!(after.messages.len() < before_len); + let first = after + .messages + .first() + .expect("summary message should exist after compression"); + assert_eq!(first.role, Role::System); + assert!( + first.metadata.contains_key("compression"), + "summary message must include metadata" + ); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread")] +async fn auto_compress_respects_toggle() -> Result<()> { + let mut session = build_session(make_session_config(CompressionStrategy::Local, false)).await?; + + for idx in 0..5 { + session + .conversation_mut() + .push_user_message(format!("Message {idx} from user.")); + session + .conversation_mut() + .push_assistant_message(format!("Assistant reply {idx}.")); + } + + let result = session.maybe_auto_compress().await?; + assert!( + result.is_none(), + "auto compression should skip when disabled" + ); + + Ok(()) +} diff --git a/crates/owlen-tui/src/chat_app.rs b/crates/owlen-tui/src/chat_app.rs index 3a95327..78bf71c 100644 --- a/crates/owlen-tui/src/chat_app.rs +++ b/crates/owlen-tui/src/chat_app.rs @@ -21,7 +21,10 @@ use owlen_core::{ config::McpResourceConfig, model::DetailedModelInfo, oauth::{DeviceAuthorization, DevicePollState}, - session::{ControllerEvent, SessionController, SessionOutcome, ToolConsentResolution}, + session::{ + CompressionReport, ControllerEvent, SessionController, SessionOutcome, + ToolConsentResolution, + }, storage::SessionMeta, theme::Theme, types::{ChatParameters, ChatResponse, Conversation, ModelInfo, Role, TokenUsage}, @@ -1207,10 +1210,41 @@ impl ChatApp { tool_calls, }); } + ControllerEvent::CompressionCompleted { report } => { + self.handle_compression_report(&report); + } } Ok(()) } + fn handle_compression_report(&mut self, report: &CompressionReport) { + let saved_tokens = report + .estimated_tokens_before + .saturating_sub(report.estimated_tokens_after); + let saved_fmt = format_token_short(saved_tokens as u64); + let before_fmt = format_token_short(report.estimated_tokens_before as u64); + let after_fmt = format_token_short(report.estimated_tokens_after as u64); + let mode_label = if report.automated { "Auto" } else { "Manual" }; + + self.status = format!( + "{mode_label} compression archived {} messages ({} → {}, saved {}).", + report.compressed_messages, before_fmt, after_fmt, saved_fmt + ); + self.error = None; + let toast_level = if report.automated { + ToastLevel::Info + } else { + ToastLevel::Success + }; + self.push_toast( + toast_level, + format!( + "{mode_label} compression saved {saved_fmt} tokens ({} messages).", + report.compressed_messages + ), + ); + } + fn apply_tool_consent_resolution(&mut self, resolution: ToolConsentResolution) -> Result<()> { let ToolConsentResolution { message_id, @@ -7481,6 +7515,131 @@ impl ChatApp { self.command_palette.clear(); return Ok(AppState::Running); } + "compress" => { + let subcommand = + args.first().map(|arg| arg.to_ascii_lowercase()); + + match subcommand.as_deref() { + None => { + let auto_enabled = { + let guard = self.controller.config(); + guard.chat.auto_compress + }; + if let Some(report) = self.controller.last_compression() + { + let saved = report + .estimated_tokens_before + .saturating_sub(report.estimated_tokens_after); + let saved_fmt = format_token_short(saved as u64); + let before_fmt = format_token_short( + report.estimated_tokens_before as u64, + ); + let after_fmt = format_token_short( + report.estimated_tokens_after as u64, + ); + self.status = format!( + "Auto compression is {}. Last run saved {} tokens ({} → {}).", + if auto_enabled { + "enabled" + } else { + "disabled" + }, + saved_fmt, + before_fmt, + after_fmt + ); + } else { + self.status = format!( + "Auto compression is {}. No compression has run yet.", + if auto_enabled { + "enabled" + } else { + "disabled" + } + ); + } + self.error = None; + } + Some("now") | Some("run") => { + match self.controller.compress_now().await? { + Some(report) => { + self.handle_compression_report(&report); + } + None => { + self.status = "Conversation is below the compression threshold.".to_string(); + self.error = None; + } + } + } + Some("auto") => { + if args.len() < 2 { + self.error = Some( + "Usage: :compress auto " + .to_string(), + ); + } else { + let mode = args[1].to_ascii_lowercase(); + let current = { + let guard = self.controller.config(); + guard.chat.auto_compress + }; + let desired = match mode.as_str() { + "on" | "enable" | "enabled" | "true" => { + Some(true) + } + "off" | "disable" | "disabled" | "false" => { + Some(false) + } + "toggle" => Some(!current), + other => { + self.error = Some(format!( + "Unknown auto setting '{}'. Use on, off, or toggle.", + other + )); + None + } + }; + + if let Some(desired) = desired { + { + let mut guard = + self.controller.config_mut(); + guard.chat.auto_compress = desired; + } + if let Err(err) = config::save_config( + &self.controller.config(), + ) { + self.error = Some(format!( + "Failed to save config: {}", + err + )); + } else { + self.error = None; + if desired { + self.status = + "Auto compression enabled" + .to_string(); + } else { + self.status = + "Auto compression disabled" + .to_string(); + } + } + } + } + } + Some(other) => { + self.error = Some(format!( + "Unknown compress option '{}'. Use :compress, :compress now, or :compress auto .", + other + )); + } + } + + self.set_input_mode(InputMode::Normal); + self.command_palette.clear(); + return Ok(AppState::Running); + } "c" | "clear" => { self.controller.clear(); self.chat_line_offset = 0; diff --git a/crates/owlen-tui/src/commands/mod.rs b/crates/owlen-tui/src/commands/mod.rs index c03a730..f697b87 100644 --- a/crates/owlen-tui/src/commands/mod.rs +++ b/crates/owlen-tui/src/commands/mod.rs @@ -154,6 +154,15 @@ const COMMANDS: &[CommandDescriptor] = &[ keybinding: None, preview: None, }, + CommandDescriptor { + keywords: &["compress", "compress now", "compress auto"], + description: "Manage transcript compression (run now or toggle auto mode)", + category: CommandCategory::Conversation, + modes: &["Command"], + tags: &["conversation", "compression", "history"], + keybinding: None, + preview: None, + }, CommandDescriptor { keywords: &["session save"], description: "Save the current conversation", diff --git a/docs/tui-ux-playbook.md b/docs/tui-ux-playbook.md index 0bdd51e..3251ad1 100644 --- a/docs/tui-ux-playbook.md +++ b/docs/tui-ux-playbook.md @@ -189,3 +189,18 @@ illustrations: - The pipeline reuses the same stub provider harness as the snapshot tests, so new scenes should be added in tandem with `chat_snapshots.rs` to keep visual regression coverage and documentation imagery aligned. + +## 9. Transcript Compression + +- The compactor lives under `[chat]` in `config.toml`. Defaults keep + `auto_compress = true`, `trigger_tokens = 6000`, and retain the last eight + turns verbatim. +- Strategy is configurable: `provider` summaries call back through the active + model (or `chat.model_override`), while `local` uses a heuristic bullet list + for fully offline runs. +- Users can disable the feature per session with `owlen --no-auto-compress`, or + at runtime via `:compress auto on|off`. `:compress now` triggers an immediate + compaction even when auto mode is disabled. +- Each compression pass replaces older turns with a system summary annotated by + `message.metadata.compression` (strategy, timestamps, token deltas, and the + archived message ids) to support audits and future rehydration tools.