feat(compression): adaptive auto transcript compactor

2025-10-26 00:25:23 +02:00
parent 877ece07be
commit a0868a9b49
13 changed files with 850 additions and 24 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Input panel respects a new `ui.input_max_rows` setting so long prompts expand predictably before scrolling kicks in.
 - Adaptive TUI layout with responsive 80/120-column breakpoints, refreshed glass/neon theming, and animated focus rings for pane transitions.
 - Configurable `ui.layers` and `ui.animations` settings to tune glass elevation, neon intensity, and opt-in micro-animations.
 - Adaptive transcript compactor with configurable auto mode, CLI opt-out (`--no-auto-compress`), and `:compress` commands for manual runs and toggling.
 - Command palette offers fuzzy `:model` filtering and `:provider` completions for fast switching.
 - Inline guidance overlay adds a three-step onboarding tour, keymap-aware cheat sheets (F1 / `?`), and persists completion state via `ui.guidance`.
 - Status surface renders a layered HUD with streaming/tool indicators, contextual gauges, and redesigned toast cards featuring icons, countdown timers, and a compact history log.
--- a/README.md
+++ b/README.md
@@ -166,6 +166,18 @@ For more detailed information, please refer to the following documents:
  `chafa` is available, PNG renders) for the documentation gallery. Use
  `--no-png` to skip the PNG step or `--output <dir>` to redirect the output.
 ## Conversation Compression
 Owlen automatically compacts older turns once a chat crosses the configured
 token threshold. The behaviour is controlled by the `[chat]` section in
 `config.toml` (enabled by default via `chat.auto_compress = true`).
 - Launch the TUI with `--no-auto-compress` to opt out for a single run.
 - Inside the app, `:compress now` generates an on-demand summary, while
  `:compress auto on|off` flips the automatic mode and persists the change.
 - Each compression pass emits a system summary that carries metadata about the
  retained messages, strategy, and estimated token savings.
 ## Configuration
 OWLEN stores its configuration in the standard platform-specific config directory:
--- a/crates/owlen-cli/src/bootstrap.rs
+++ b/crates/owlen-cli/src/bootstrap.rs
@@ -33,13 +33,22 @@ use tokio::sync::mpsc;
 use crate::commands::cloud::{load_runtime_credentials, set_env_var};
-pub async fn launch(initial_mode: Mode) -> Result<()> {
+#[derive(Debug, Clone, Copy, Default)]
 pub struct LaunchOptions {
    pub disable_auto_compress: bool,
 }
 pub async fn launch(initial_mode: Mode, options: LaunchOptions) -> Result<()> {
    set_env_var("OWLEN_AUTO_CONSENT", "1");
    let color_support = detect_terminal_color_support();
    let mut cfg = config::try_load_config().unwrap_or_default();
    let _ = cfg.refresh_mcp_servers(None);
    if options.disable_auto_compress {
        cfg.chat.auto_compress = false;
    }
    if let Some(previous_theme) = apply_terminal_theme(&mut cfg, &color_support) {
        let term_label = match &color_support {
            TerminalColorSupport::Limited { term } => Cow::from(term.as_str()),
@@ -107,6 +116,10 @@ pub async fn launch(initial_mode: Mode) -> Result<()> {
        app.set_system_status(notice);
    }
    if options.disable_auto_compress {
        app.append_system_status("Auto compression off");
    }
    app.set_mode(initial_mode).await;
    enable_raw_mode()?;
--- a/crates/owlen-cli/src/code_main.rs
+++ b/crates/owlen-cli/src/code_main.rs
@@ -12,5 +12,5 @@ use owlen_tui::config;
 #[tokio::main(flavor = "multi_thread")]
 async fn main() -> Result<()> {
-    bootstrap::launch(Mode::Code).await
+    bootstrap::launch(Mode::Code, bootstrap::LaunchOptions::default()).await
 }
--- a/crates/owlen-cli/src/main.rs
+++ b/crates/owlen-cli/src/main.rs
@@ -34,6 +34,9 @@ struct Args {
    /// Start in code mode (enables all tools)
    #[arg(long, short = 'c')]
    code: bool,
    /// Disable automatic transcript compression for this session
    #[arg(long)]
    no_auto_compress: bool,
    #[command(subcommand)]
    command: Option<OwlenCommand>,
 }
@@ -462,10 +465,20 @@ fn ensure_string_extra_with_change(
 #[tokio::main(flavor = "multi_thread")]
 async fn main() -> Result<()> {
    // Parse command-line arguments
-    let Args { code, command } = Args::parse();
+    let Args {
        code,
        command,
        no_auto_compress,
    } = Args::parse();
    if let Some(command) = command {
        return run_command(command).await;
    }
    let initial_mode = if code { Mode::Code } else { Mode::Chat };
-    bootstrap::launch(initial_mode).await
+    bootstrap::launch(
        initial_mode,
        bootstrap::LaunchOptions {
            disable_auto_compress: no_auto_compress,
        },
    )
    .await
 }
--- a/crates/owlen-core/src/config.rs
+++ b/crates/owlen-core/src/config.rs
@@ -17,7 +17,7 @@ use std::time::Duration;
 pub const DEFAULT_CONFIG_PATH: &str = "~/.config/owlen/config.toml";
 /// Current schema version written to `config.toml`.
-pub const CONFIG_SCHEMA_VERSION: &str = "1.8.0";
+pub const CONFIG_SCHEMA_VERSION: &str = "1.9.0";
 /// Provider config key for forcing Ollama provider mode.
 pub const OLLAMA_MODE_KEY: &str = "ollama_mode";
@@ -63,6 +63,9 @@ pub struct Config {
    /// MCP (Multi-Client-Provider) settings
    #[serde(default)]
    pub mcp: McpSettings,
    /// Chat-specific behaviour (history compression, etc.)
    #[serde(default)]
    pub chat: ChatSettings,
    /// Provider specific configuration keyed by provider name
    #[serde(default)]
    pub providers: HashMap<String, ProviderConfig>,
@@ -115,6 +118,7 @@ impl Default for Config {
            schema_version: Self::default_schema_version(),
            general: GeneralSettings::default(),
            mcp: McpSettings::default(),
            chat: ChatSettings::default(),
            providers,
            ui: UiSettings::default(),
            storage: StorageSettings::default(),
@@ -616,6 +620,7 @@ impl Config {
        self.validate_mcp_settings()?;
        self.validate_mcp_servers()?;
        self.validate_providers()?;
        self.chat.validate()?;
        Ok(())
    }
@@ -1536,6 +1541,72 @@ impl Default for GeneralSettings {
    }
 }
 /// Strategy used for compressing historical conversation turns.
 #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
 #[serde(rename_all = "snake_case")]
 pub enum CompressionStrategy {
    /// Use the active (or override) model to generate a summary.
    #[default]
    Provider,
    /// Use Owlen's built-in heuristic summariser without model calls.
    Local,
 }
 /// Chat-specific configuration (history compression, retention, etc.)
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ChatSettings {
    #[serde(default = "ChatSettings::default_auto_compress")]
    pub auto_compress: bool,
    #[serde(default = "ChatSettings::default_trigger_tokens")]
    pub trigger_tokens: u32,
    #[serde(default = "ChatSettings::default_retain_recent")]
    pub retain_recent_messages: usize,
    #[serde(default)]
    pub model_override: Option<String>,
    #[serde(default)]
    pub strategy: CompressionStrategy,
 }
 impl ChatSettings {
    const fn default_auto_compress() -> bool {
        true
    }
    const fn default_trigger_tokens() -> u32 {
        6_000
    }
    const fn default_retain_recent() -> usize {
        8
    }
    pub fn validate(&self) -> Result<()> {
        if self.trigger_tokens < 64 {
            return Err(crate::Error::Config(
                "chat.trigger_tokens must be at least 64".to_string(),
            ));
        }
        if self.retain_recent_messages < 2 {
            return Err(crate::Error::Config(
                "chat.retain_recent_messages must be at least 2".to_string(),
            ));
        }
        Ok(())
    }
 }
 impl Default for ChatSettings {
    fn default() -> Self {
        Self {
            auto_compress: Self::default_auto_compress(),
            trigger_tokens: Self::default_trigger_tokens(),
            retain_recent_messages: Self::default_retain_recent(),
            model_override: None,
            strategy: CompressionStrategy::default(),
        }
    }
 }
 /// Operating modes for the MCP subsystem.
 #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
 #[serde(rename_all = "snake_case")]
--- a/crates/owlen-core/src/conversation.rs
+++ b/crates/owlen-core/src/conversation.rs
@@ -353,6 +353,26 @@ impl ConversationManager {
        id
    }
    /// Replace the active conversation messages and rebuild internal indexes.
    pub fn replace_active_messages(&mut self, mut messages: Vec<Message>) {
        let now = std::time::SystemTime::now();
        for message in &mut messages {
            // Ensure message timestamps are not in the far past when rewired.
            message.timestamp = now;
        }
        self.active.messages = messages;
        self.active.updated_at = now;
        self.rebuild_index();
        self.stream_reset();
    }
    fn rebuild_index(&mut self) {
        self.message_index.clear();
        for (idx, message) in self.active.messages.iter().enumerate() {
            self.message_index.insert(message.id, idx);
        }
    }
    fn stream_reset(&mut self) {
        self.streaming.clear();
    }
--- a/crates/owlen-core/src/session.rs
+++ b/crates/owlen-core/src/session.rs
@@ -1,6 +1,7 @@
 use crate::config::{
-    Config, LEGACY_OLLAMA_CLOUD_API_KEY_ENV, LEGACY_OWLEN_OLLAMA_CLOUD_API_KEY_ENV,
+    ChatSettings, CompressionStrategy, Config, LEGACY_OLLAMA_CLOUD_API_KEY_ENV,
-    McpResourceConfig, McpServerConfig, OLLAMA_API_KEY_ENV, OLLAMA_CLOUD_BASE_URL,
+    LEGACY_OWLEN_OLLAMA_CLOUD_API_KEY_ENV, McpResourceConfig, McpServerConfig, OLLAMA_API_KEY_ENV,
    OLLAMA_CLOUD_BASE_URL,
 };
 use crate::consent::{ConsentManager, ConsentScope};
 use crate::conversation::ConversationManager;
@@ -21,7 +22,7 @@ use crate::providers::OllamaProvider;
 use crate::storage::{SessionMeta, StorageManager};
 use crate::tools::{WEB_SEARCH_TOOL_NAME, canonical_tool_name, tool_name_matches};
 use crate::types::{
-    ChatParameters, ChatRequest, ChatResponse, Conversation, Message, ModelInfo, ToolCall,
+    ChatParameters, ChatRequest, ChatResponse, Conversation, Message, ModelInfo, Role, ToolCall,
 };
 use crate::ui::{RoleLabelDisplay, UiController};
 use crate::usage::{UsageLedger, UsageQuota, UsageSnapshot};
@@ -32,10 +33,11 @@ use crate::{
    ToolRegistry, WebScrapeTool, WebSearchSettings, WebSearchTool,
 };
 use crate::{Error, Result};
-use chrono::Utc;
+use chrono::{DateTime, Utc};
-use log::warn;
+use log::{info, warn};
 use reqwest::Url;
 use serde_json::{Value, json};
 use std::cmp::{max, min};
 use std::collections::{HashMap, HashSet};
 use std::env;
 use std::path::PathBuf;
@@ -53,6 +55,107 @@ fn env_var_non_empty(name: &str) -> Option<String> {
        .filter(|value| !value.is_empty())
 }
 fn estimate_tokens(messages: &[Message]) -> u32 {
    messages
        .iter()
        .map(estimate_message_tokens)
        .fold(0u32, |acc, value| acc.saturating_add(value))
 }
 fn estimate_message_tokens(message: &Message) -> u32 {
    let content = message.content.trim();
    if content.is_empty() {
        return 4;
    }
    let approx = max(4, content.chars().count() / 4 + 1);
    (approx + 4) as u32
 }
 fn build_transcript(messages: &[Message]) -> String {
    let mut transcript = String::new();
    let take = min(messages.len(), MAX_TRANSCRIPT_MESSAGES);
    for message in messages.iter().take(take) {
        let role = match message.role {
            Role::User => "User",
            Role::Assistant => "Assistant",
            Role::System => "System",
            Role::Tool => "Tool",
        };
        let snippet = sanitize_snippet(&message.content);
        if snippet.is_empty() {
            continue;
        }
        transcript.push_str(&format!("{role}: {snippet}\n\n"));
    }
    if messages.len() > take {
        transcript.push_str(&format!(
            "... ({} additional messages omitted for brevity)\n",
            messages.len() - take
        ));
    }
    transcript
 }
 fn local_summary(messages: &[Message]) -> String {
    if messages.is_empty() {
        return "(no content to summarize)".to_string();
    }
    let total = messages.len();
    let mut summary = String::from("Summary (local heuristic)\n\n");
    summary.push_str(&format!("- Compressed {total} prior messages.\n"));
    let recent_users = collect_recent_by_role(messages, Role::User, 3);
    if !recent_users.is_empty() {
        summary.push_str("- Recent user intents:\n");
        for intent in recent_users {
            summary.push_str(&format!("  - {intent}\n"));
        }
    }
    let recent_assistant = collect_recent_by_role(messages, Role::Assistant, 3);
    if !recent_assistant.is_empty() {
        summary.push_str("- Recent assistant responses:\n");
        for reply in recent_assistant {
            summary.push_str(&format!("  - {reply}\n"));
        }
    }
    summary.trim_end().to_string()
 }
 fn collect_recent_by_role(messages: &[Message], role: Role, limit: usize) -> Vec<String> {
    if limit == 0 {
        return Vec::new();
    }
    let mut results = Vec::new();
    for message in messages.iter().rev() {
        if message.role == role {
            let snippet = sanitize_snippet(&message.content);
            if !snippet.is_empty() {
                results.push(snippet);
                if results.len() == limit {
                    break;
                }
            }
        }
    }
    results.reverse();
    results
 }
 fn sanitize_snippet(content: &str) -> String {
    let trimmed = content.trim();
    if trimmed.is_empty() {
        return String::new();
    }
    let mut snippet = trimmed.replace('\r', "");
    if snippet.len() > MAX_TRANSCRIPT_MESSAGE_CHARS {
        snippet.truncate(MAX_TRANSCRIPT_MESSAGE_CHARS);
        snippet.push_str("...");
    }
    snippet
 }
 fn compute_web_search_settings(
    config: &Config,
    provider_id: &str,
@@ -195,6 +298,9 @@ pub enum ControllerEvent {
        endpoints: Vec<String>,
        tool_calls: Vec<ToolCall>,
    },
    CompressionCompleted {
        report: CompressionReport,
    },
 }
 #[derive(Clone, Debug)]
@@ -206,6 +312,53 @@ struct PendingToolRequest {
    tool_calls: Vec<ToolCall>,
 }
 #[derive(Debug, Clone)]
 pub struct CompressionReport {
    pub summary_message_id: Uuid,
    pub compressed_messages: usize,
    pub estimated_tokens_before: u32,
    pub estimated_tokens_after: u32,
    pub strategy: CompressionStrategy,
    pub model_used: String,
    pub retained_recent: usize,
    pub automated: bool,
    pub timestamp: DateTime<Utc>,
 }
 #[derive(Debug, Clone)]
 struct CompressionOptions {
    trigger_tokens: u32,
    retain_recent: usize,
    strategy: CompressionStrategy,
    model_override: Option<String>,
 }
 impl CompressionOptions {
    fn from_settings(settings: &ChatSettings) -> Self {
        Self {
            trigger_tokens: settings.trigger_tokens.max(64),
            retain_recent: settings.retain_recent_messages.max(2),
            strategy: settings.strategy,
            model_override: settings.model_override.clone(),
        }
    }
    fn min_chunk_messages(&self) -> usize {
        self.retain_recent.saturating_add(2).max(4)
    }
    fn resolve_model<'a>(&'a self, active_model: &'a str) -> String {
        self.model_override
            .clone()
            .filter(|model| !model.trim().is_empty())
            .unwrap_or_else(|| active_model.to_string())
    }
 }
 const MAX_TRANSCRIPT_MESSAGE_CHARS: usize = 1024;
 const MAX_TRANSCRIPT_MESSAGES: usize = 32;
 const COMPRESSION_METADATA_KEY: &str = "compression";
 #[derive(Debug, Default)]
 struct StreamingMessageState {
    full_text: String,
@@ -381,6 +534,7 @@ pub struct SessionController {
    pending_tool_requests: HashMap<Uuid, PendingToolRequest>,
    stream_states: HashMap<Uuid, StreamingMessageState>,
    usage_ledger: Arc<TokioMutex<UsageLedger>>,
    last_compression: Option<CompressionReport>,
 }
 async fn build_tools(
@@ -723,6 +877,7 @@ impl SessionController {
            pending_tool_requests: HashMap::new(),
            stream_states: HashMap::new(),
            usage_ledger,
            last_compression: None,
        })
    }
@@ -734,6 +889,10 @@ impl SessionController {
        &mut self.conversation
    }
    pub fn last_compression(&self) -> Option<CompressionReport> {
        self.last_compression.clone()
    }
    pub fn input_buffer(&self) -> &InputBuffer {
        &self.input_buffer
    }
@@ -956,6 +1115,210 @@ impl SessionController {
        self.config.clone()
    }
    pub async fn compress_now(&mut self) -> Result<Option<CompressionReport>> {
        let settings = {
            let guard = self.config.lock().await;
            guard.chat.clone()
        };
        let options = CompressionOptions::from_settings(&settings);
        self.perform_compression(options, false).await
    }
    pub async fn maybe_auto_compress(&mut self) -> Result<Option<CompressionReport>> {
        let settings = {
            let guard = self.config.lock().await;
            if !guard.chat.auto_compress {
                return Ok(None);
            }
            guard.chat.clone()
        };
        let options = CompressionOptions::from_settings(&settings);
        self.perform_compression(options, true).await
    }
    async fn perform_compression(
        &mut self,
        options: CompressionOptions,
        automated: bool,
    ) -> Result<Option<CompressionReport>> {
        let mut final_report = None;
        let mut iterations = 0usize;
        loop {
            iterations += 1;
            if iterations > 4 {
                break;
            }
            let snapshot = self.conversation.active().clone();
            let total_tokens = estimate_tokens(&snapshot.messages);
            if total_tokens <= options.trigger_tokens {
                break;
            }
            if snapshot.messages.len() <= options.retain_recent + 1 {
                break;
            }
            let split_index = snapshot
                .messages
                .len()
                .saturating_sub(options.retain_recent);
            if split_index == 0 {
                break;
            }
            let older_messages = &snapshot.messages[..split_index];
            if older_messages.len() < options.min_chunk_messages() {
                break;
            }
            if older_messages
                .iter()
                .all(|msg| msg.metadata.contains_key(COMPRESSION_METADATA_KEY))
            {
                break;
            }
            let model_used = options.resolve_model(&snapshot.model);
            let summary = self
                .generate_summary(older_messages, &options, &model_used)
                .await;
            let summary_body = summary.trim();
            let intro = "### Conversation summary";
            let footer = if automated {
                "_This summary was generated automatically to preserve context._"
            } else {
                "_Manual compression complete._"
            };
            let content = if summary_body.is_empty() {
                format!(
                    "{intro}\n\n_Compressed {} prior messages._\n\n{footer}",
                    older_messages.len()
                )
            } else {
                format!(
                    "{intro}\n\n{summary_body}\n\n_Compressed {} prior messages._\n\n{footer}",
                    older_messages.len()
                )
            };
            let mut summary_message = Message::system(content);
            let compressed_ids: Vec<String> = older_messages
                .iter()
                .map(|msg| msg.id.to_string())
                .collect();
            let summary_tokens = estimate_message_tokens(&summary_message);
            let retained_tokens = estimate_tokens(&snapshot.messages[split_index..]);
            let updated_tokens = summary_tokens.saturating_add(retained_tokens);
            let timestamp = Utc::now();
            let metadata = json!({
                "strategy": match options.strategy {
                    CompressionStrategy::Provider => "provider",
                    CompressionStrategy::Local => "local",
                },
                "automated": automated,
                "compressed_message_ids": compressed_ids,
                "compressed_count": older_messages.len(),
                "retain_recent": options.retain_recent,
                "trigger_tokens": options.trigger_tokens,
                "estimated_tokens_before": total_tokens,
                "model": model_used,
                "estimated_tokens_after": updated_tokens,
                "timestamp": timestamp.to_rfc3339(),
            });
            summary_message
                .metadata
                .insert(COMPRESSION_METADATA_KEY.to_string(), metadata);
            let mut new_messages =
                Vec::with_capacity(snapshot.messages.len() - older_messages.len() + 1);
            new_messages.push(summary_message.clone());
            new_messages.extend_from_slice(&snapshot.messages[split_index..]);
            self.conversation.replace_active_messages(new_messages);
            let report = CompressionReport {
                summary_message_id: summary_message.id,
                compressed_messages: older_messages.len(),
                estimated_tokens_before: total_tokens,
                estimated_tokens_after: updated_tokens,
                strategy: options.strategy,
                model_used: model_used.clone(),
                retained_recent: options.retain_recent,
                automated,
                timestamp,
            };
            self.last_compression = Some(report.clone());
            if automated {
                info!(
                    "auto compression reduced transcript from {} to {} tokens (compressed {} messages)",
                    total_tokens, updated_tokens, report.compressed_messages
                );
            }
            self.emit_compression_event(report.clone());
            final_report = Some(report.clone());
            if updated_tokens >= total_tokens {
                break;
            }
            if updated_tokens <= options.trigger_tokens {
                break;
            }
            // Continue loop to attempt further reduction if needed.
        }
        Ok(final_report)
    }
    async fn generate_summary(
        &self,
        slice: &[Message],
        options: &CompressionOptions,
        model: &str,
    ) -> String {
        match options.strategy {
            CompressionStrategy::Provider => {
                match self.generate_provider_summary(slice, model).await {
                    Ok(content) if !content.trim().is_empty() => content,
                    Ok(_) => local_summary(slice),
                    Err(err) => {
                        warn!(
                            "Falling back to local compression: provider summary failed ({})",
                            err
                        );
                        local_summary(slice)
                    }
                }
            }
            CompressionStrategy::Local => local_summary(slice),
        }
    }
    async fn generate_provider_summary(&self, slice: &[Message], model: &str) -> Result<String> {
        let mut prompt_messages = Vec::new();
        prompt_messages.push(Message::system("You are Owlen's transcript compactor. Summarize the provided conversation excerpt into concise markdown with sections for context, decisions, outstanding tasks, and facts that must be preserved. Avoid referring to removed content explicitly.".to_string()));
        let transcript = build_transcript(slice);
        prompt_messages.push(Message::user(transcript));
        let request = ChatRequest {
            model: model.to_string(),
            messages: prompt_messages,
            parameters: ChatParameters::default(),
            tools: None,
        };
        let response = self.provider.send_prompt(request).await?;
        Ok(response.message.content)
    }
    fn emit_compression_event(&self, report: CompressionReport) {
        if let Some(tx) = &self.event_tx {
            let _ = tx.send(ControllerEvent::CompressionCompleted { report });
        }
    }
    pub async fn reload_mcp_clients(&mut self) -> Result<()> {
        let (primary, named, missing) = Self::create_mcp_clients(
            self.config.clone(),
@@ -1518,6 +1881,7 @@ impl SessionController {
        let streaming = { self.config.lock().await.general.enable_streaming || parameters.stream };
        parameters.stream = streaming;
        self.conversation.push_user_message(content);
        let _ = self.maybe_auto_compress().await?;
        self.send_request_with_current_conversation(parameters)
            .await
    }
@@ -1588,6 +1952,7 @@ impl SessionController {
                                let _ = self.record_usage_sample(usage).await;
                            }
                            self.conversation.push_message(response.message.clone());
                            let _ = self.maybe_auto_compress().await?;
                            return Ok(SessionOutcome::Complete(response));
                        }
                    }
--- a/crates/owlen-core/tests/agent_tool_flow.rs
+++ b/crates/owlen-core/tests/agent_tool_flow.rs
@@ -231,8 +231,8 @@ async fn streaming_file_write_consent_denied_returns_resolution() {
    assert_eq!(tool_calls.len(), 1);
    assert_eq!(tool_calls[0].name, "resources_write");
-    let event = event_rx.recv().await.expect("controller event");
+    let request_id = loop {
-    let request_id = match event {
+        match event_rx.recv().await.expect("controller event") {
            ControllerEvent::ToolRequested {
                request_id,
                tool_name,
@@ -243,7 +243,9 @@ async fn streaming_file_write_consent_denied_returns_resolution() {
                assert_eq!(tool_name, "resources_write");
                assert!(data_types.iter().any(|t| t.contains("file")));
                assert!(endpoints.iter().any(|e| e.contains("filesystem")));
-            request_id
+                break request_id;
            }
            ControllerEvent::CompressionCompleted { .. } => continue,
        }
    };
--- a/crates/owlen-core/tests/compression.rs
+++ b/crates/owlen-core/tests/compression.rs
@@ -0,0 +1,146 @@
 use std::sync::Arc;
 use anyhow::{Result, anyhow};
 use async_trait::async_trait;
 use futures::stream;
 use owlen_core::config::{CompressionStrategy, Config};
 use owlen_core::session::SessionController;
 use owlen_core::storage::StorageManager;
 use owlen_core::types::{ChatRequest, ChatResponse, Message, ModelInfo, Role};
 use owlen_core::ui::NoOpUiController;
 use owlen_core::{ChatStream, Provider, Result as CoreResult};
 use tempfile::tempdir;
 fn make_session_config(strategy: CompressionStrategy, auto: bool) -> Config {
    let mut config = Config::default();
    config.general.default_model = Some("stub-model".into());
    config.general.enable_streaming = false;
    config.chat.strategy = strategy;
    config.chat.auto_compress = auto;
    config.chat.trigger_tokens = 64;
    config.chat.retain_recent_messages = 2;
    config
 }
 async fn build_session(config: Config) -> Result<SessionController> {
    let temp_dir = tempdir().expect("temp dir");
    let storage = Arc::new(
        StorageManager::with_database_path(temp_dir.path().join("owlen-compression-tests.db"))
            .await
            .expect("storage"),
    );
    let provider: Arc<dyn Provider> = Arc::new(StubProvider);
    let ui = Arc::new(NoOpUiController);
    SessionController::new(provider, config, storage, ui, false, None)
        .await
        .map_err(|err| anyhow!(err))
 }
 struct StubProvider;
 #[async_trait]
 impl Provider for StubProvider {
    fn name(&self) -> &str {
        "stub-provider"
    }
    async fn list_models(&self) -> CoreResult<Vec<ModelInfo>> {
        Ok(vec![ModelInfo {
            id: "stub-model".into(),
            name: "Stub Model".into(),
            description: Some("Stub provider model".into()),
            provider: "stub-provider".into(),
            context_window: Some(8_192),
            capabilities: vec!["chat".into()],
            supports_tools: false,
        }])
    }
    async fn send_prompt(&self, _request: ChatRequest) -> CoreResult<ChatResponse> {
        Ok(ChatResponse {
            message: Message::assistant("stub completion".into()),
            usage: None,
            is_streaming: false,
            is_final: true,
        })
    }
    async fn stream_prompt(&self, _request: ChatRequest) -> CoreResult<ChatStream> {
        Ok(Box::pin(stream::empty()))
    }
    async fn health_check(&self) -> CoreResult<()> {
        Ok(())
    }
    fn as_any(&self) -> &(dyn std::any::Any + Send + Sync) {
        self
    }
 }
 #[tokio::test(flavor = "multi_thread")]
 async fn compression_compacts_history() -> Result<()> {
    let mut session = build_session(make_session_config(CompressionStrategy::Local, true)).await?;
    for idx in 0..6 {
        session.conversation_mut().push_user_message(format!(
            "User request #{idx}: Explain the subsystem in detail."
        ));
        session.conversation_mut().push_assistant_message(format!(
            "Assistant reply #{idx}: Provided detailed explanation with follow-up tasks."
        ));
    }
    let before_len = session.conversation().messages.len();
    assert!(
        before_len > 6,
        "expected longer transcript before compression"
    );
    let report = session
        .compress_now()
        .await?
        .expect("compression should trigger");
    assert!(
        !report.automated,
        "manual compression should flag automated = false"
    );
    assert!(report.compressed_messages > 0);
    assert!(report.estimated_tokens_after < report.estimated_tokens_before);
    let after = session.conversation();
    assert!(after.messages.len() < before_len);
    let first = after
        .messages
        .first()
        .expect("summary message should exist after compression");
    assert_eq!(first.role, Role::System);
    assert!(
        first.metadata.contains_key("compression"),
        "summary message must include metadata"
    );
    Ok(())
 }
 #[tokio::test(flavor = "multi_thread")]
 async fn auto_compress_respects_toggle() -> Result<()> {
    let mut session = build_session(make_session_config(CompressionStrategy::Local, false)).await?;
    for idx in 0..5 {
        session
            .conversation_mut()
            .push_user_message(format!("Message {idx} from user."));
        session
            .conversation_mut()
            .push_assistant_message(format!("Assistant reply {idx}."));
    }
    let result = session.maybe_auto_compress().await?;
    assert!(
        result.is_none(),
        "auto compression should skip when disabled"
    );
    Ok(())
 }
--- a/crates/owlen-tui/src/chat_app.rs
+++ b/crates/owlen-tui/src/chat_app.rs
@@ -21,7 +21,10 @@ use owlen_core::{
    config::McpResourceConfig,
    model::DetailedModelInfo,
    oauth::{DeviceAuthorization, DevicePollState},
-    session::{ControllerEvent, SessionController, SessionOutcome, ToolConsentResolution},
+    session::{
        CompressionReport, ControllerEvent, SessionController, SessionOutcome,
        ToolConsentResolution,
    },
    storage::SessionMeta,
    theme::Theme,
    types::{ChatParameters, ChatResponse, Conversation, ModelInfo, Role, TokenUsage},
@@ -1207,10 +1210,41 @@ impl ChatApp {
                    tool_calls,
                });
            }
            ControllerEvent::CompressionCompleted { report } => {
                self.handle_compression_report(&report);
            }
        }
        Ok(())
    }
    fn handle_compression_report(&mut self, report: &CompressionReport) {
        let saved_tokens = report
            .estimated_tokens_before
            .saturating_sub(report.estimated_tokens_after);
        let saved_fmt = format_token_short(saved_tokens as u64);
        let before_fmt = format_token_short(report.estimated_tokens_before as u64);
        let after_fmt = format_token_short(report.estimated_tokens_after as u64);
        let mode_label = if report.automated { "Auto" } else { "Manual" };
        self.status = format!(
            "{mode_label} compression archived {} messages ({} → {}, saved {}).",
            report.compressed_messages, before_fmt, after_fmt, saved_fmt
        );
        self.error = None;
        let toast_level = if report.automated {
            ToastLevel::Info
        } else {
            ToastLevel::Success
        };
        self.push_toast(
            toast_level,
            format!(
                "{mode_label} compression saved {saved_fmt} tokens ({} messages).",
                report.compressed_messages
            ),
        );
    }
    fn apply_tool_consent_resolution(&mut self, resolution: ToolConsentResolution) -> Result<()> {
        let ToolConsentResolution {
            message_id,
@@ -7481,6 +7515,131 @@ impl ChatApp {
                                    self.command_palette.clear();
                                    return Ok(AppState::Running);
                                }
                                "compress" => {
                                    let subcommand =
                                        args.first().map(|arg| arg.to_ascii_lowercase());
                                    match subcommand.as_deref() {
                                        None => {
                                            let auto_enabled = {
                                                let guard = self.controller.config();
                                                guard.chat.auto_compress
                                            };
                                            if let Some(report) = self.controller.last_compression()
                                            {
                                                let saved = report
                                                    .estimated_tokens_before
                                                    .saturating_sub(report.estimated_tokens_after);
                                                let saved_fmt = format_token_short(saved as u64);
                                                let before_fmt = format_token_short(
                                                    report.estimated_tokens_before as u64,
                                                );
                                                let after_fmt = format_token_short(
                                                    report.estimated_tokens_after as u64,
                                                );
                                                self.status = format!(
                                                    "Auto compression is {}. Last run saved {} tokens ({} → {}).",
                                                    if auto_enabled {
                                                        "enabled"
                                                    } else {
                                                        "disabled"
                                                    },
                                                    saved_fmt,
                                                    before_fmt,
                                                    after_fmt
                                                );
                                            } else {
                                                self.status = format!(
                                                    "Auto compression is {}. No compression has run yet.",
                                                    if auto_enabled {
                                                        "enabled"
                                                    } else {
                                                        "disabled"
                                                    }
                                                );
                                            }
                                            self.error = None;
                                        }
                                        Some("now") | Some("run") => {
                                            match self.controller.compress_now().await? {
                                                Some(report) => {
                                                    self.handle_compression_report(&report);
                                                }
                                                None => {
                                                    self.status = "Conversation is below the compression threshold.".to_string();
                                                    self.error = None;
                                                }
                                            }
                                        }
                                        Some("auto") => {
                                            if args.len() < 2 {
                                                self.error = Some(
                                                    "Usage: :compress auto <on|off|toggle>"
                                                        .to_string(),
                                                );
                                            } else {
                                                let mode = args[1].to_ascii_lowercase();
                                                let current = {
                                                    let guard = self.controller.config();
                                                    guard.chat.auto_compress
                                                };
                                                let desired = match mode.as_str() {
                                                    "on" | "enable" | "enabled" | "true" => {
                                                        Some(true)
                                                    }
                                                    "off" | "disable" | "disabled" | "false" => {
                                                        Some(false)
                                                    }
                                                    "toggle" => Some(!current),
                                                    other => {
                                                        self.error = Some(format!(
                                                            "Unknown auto setting '{}'. Use on, off, or toggle.",
                                                            other
                                                        ));
                                                        None
                                                    }
                                                };
                                                if let Some(desired) = desired {
                                                    {
                                                        let mut guard =
                                                            self.controller.config_mut();
                                                        guard.chat.auto_compress = desired;
                                                    }
                                                    if let Err(err) = config::save_config(
                                                        &self.controller.config(),
                                                    ) {
                                                        self.error = Some(format!(
                                                            "Failed to save config: {}",
                                                            err
                                                        ));
                                                    } else {
                                                        self.error = None;
                                                        if desired {
                                                            self.status =
                                                                "Auto compression enabled"
                                                                    .to_string();
                                                        } else {
                                                            self.status =
                                                                "Auto compression disabled"
                                                                    .to_string();
                                                        }
                                                    }
                                                }
                                            }
                                        }
                                        Some(other) => {
                                            self.error = Some(format!(
                                                "Unknown compress option '{}'. Use :compress, :compress now, or :compress auto <on|off|toggle>.",
                                                other
                                            ));
                                        }
                                    }
                                    self.set_input_mode(InputMode::Normal);
                                    self.command_palette.clear();
                                    return Ok(AppState::Running);
                                }
                                "c" | "clear" => {
                                    self.controller.clear();
                                    self.chat_line_offset = 0;
--- a/crates/owlen-tui/src/commands/mod.rs
+++ b/crates/owlen-tui/src/commands/mod.rs
@@ -154,6 +154,15 @@ const COMMANDS: &[CommandDescriptor] = &[
        keybinding: None,
        preview: None,
    },
    CommandDescriptor {
        keywords: &["compress", "compress now", "compress auto"],
        description: "Manage transcript compression (run now or toggle auto mode)",
        category: CommandCategory::Conversation,
        modes: &["Command"],
        tags: &["conversation", "compression", "history"],
        keybinding: None,
        preview: None,
    },
    CommandDescriptor {
        keywords: &["session save"],
        description: "Save the current conversation",
--- a/docs/tui-ux-playbook.md
+++ b/docs/tui-ux-playbook.md
@@ -189,3 +189,18 @@ illustrations:
 - The pipeline reuses the same stub provider harness as the snapshot tests, so
  new scenes should be added in tandem with `chat_snapshots.rs` to keep visual
  regression coverage and documentation imagery aligned.
 ## 9. Transcript Compression
 - The compactor lives under `[chat]` in `config.toml`. Defaults keep
  `auto_compress = true`, `trigger_tokens = 6000`, and retain the last eight
  turns verbatim.
 - Strategy is configurable: `provider` summaries call back through the active
  model (or `chat.model_override`), while `local` uses a heuristic bullet list
  for fully offline runs.
 - Users can disable the feature per session with `owlen --no-auto-compress`, or
  at runtime via `:compress auto on|off`. `:compress now` triggers an immediate
  compaction even when auto mode is disabled.
 - Each compression pass replaces older turns with a system summary annotated by
  `message.metadata.compression` (strategy, timestamps, token deltas, and the
  archived message ids) to support audits and future rehydration tools.