diff --git a/CHANGELOG.md b/CHANGELOG.md
index a4882ee..8d0e436 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Input panel respects a new `ui.input_max_rows` setting so long prompts expand predictably before scrolling kicks in.
- Adaptive TUI layout with responsive 80/120-column breakpoints, refreshed glass/neon theming, and animated focus rings for pane transitions.
- Configurable `ui.layers` and `ui.animations` settings to tune glass elevation, neon intensity, and opt-in micro-animations.
+- Adaptive transcript compactor with configurable auto mode, CLI opt-out (`--no-auto-compress`), and `:compress` commands for manual runs and toggling.
- Command palette offers fuzzy `:model` filtering and `:provider` completions for fast switching.
- Inline guidance overlay adds a three-step onboarding tour, keymap-aware cheat sheets (F1 / `?`), and persists completion state via `ui.guidance`.
- Status surface renders a layered HUD with streaming/tool indicators, contextual gauges, and redesigned toast cards featuring icons, countdown timers, and a compact history log.
diff --git a/README.md b/README.md
index 4d2d3b6..9784022 100644
--- a/README.md
+++ b/README.md
@@ -166,6 +166,18 @@ For more detailed information, please refer to the following documents:
`chafa` is available, PNG renders) for the documentation gallery. Use
`--no-png` to skip the PNG step or `--output
` to redirect the output.
+## Conversation Compression
+
+Owlen automatically compacts older turns once a chat crosses the configured
+token threshold. The behaviour is controlled by the `[chat]` section in
+`config.toml` (enabled by default via `chat.auto_compress = true`).
+
+- Launch the TUI with `--no-auto-compress` to opt out for a single run.
+- Inside the app, `:compress now` generates an on-demand summary, while
+ `:compress auto on|off` flips the automatic mode and persists the change.
+- Each compression pass emits a system summary that carries metadata about the
+ retained messages, strategy, and estimated token savings.
+
## Configuration
OWLEN stores its configuration in the standard platform-specific config directory:
diff --git a/crates/owlen-cli/src/bootstrap.rs b/crates/owlen-cli/src/bootstrap.rs
index feb6f15..694ee4b 100644
--- a/crates/owlen-cli/src/bootstrap.rs
+++ b/crates/owlen-cli/src/bootstrap.rs
@@ -33,13 +33,22 @@ use tokio::sync::mpsc;
use crate::commands::cloud::{load_runtime_credentials, set_env_var};
-pub async fn launch(initial_mode: Mode) -> Result<()> {
+#[derive(Debug, Clone, Copy, Default)]
+pub struct LaunchOptions {
+ pub disable_auto_compress: bool,
+}
+
+pub async fn launch(initial_mode: Mode, options: LaunchOptions) -> Result<()> {
set_env_var("OWLEN_AUTO_CONSENT", "1");
let color_support = detect_terminal_color_support();
let mut cfg = config::try_load_config().unwrap_or_default();
let _ = cfg.refresh_mcp_servers(None);
+ if options.disable_auto_compress {
+ cfg.chat.auto_compress = false;
+ }
+
if let Some(previous_theme) = apply_terminal_theme(&mut cfg, &color_support) {
let term_label = match &color_support {
TerminalColorSupport::Limited { term } => Cow::from(term.as_str()),
@@ -107,6 +116,10 @@ pub async fn launch(initial_mode: Mode) -> Result<()> {
app.set_system_status(notice);
}
+ if options.disable_auto_compress {
+ app.append_system_status("Auto compression off");
+ }
+
app.set_mode(initial_mode).await;
enable_raw_mode()?;
diff --git a/crates/owlen-cli/src/code_main.rs b/crates/owlen-cli/src/code_main.rs
index 184e4e2..f0d9cc6 100644
--- a/crates/owlen-cli/src/code_main.rs
+++ b/crates/owlen-cli/src/code_main.rs
@@ -12,5 +12,5 @@ use owlen_tui::config;
#[tokio::main(flavor = "multi_thread")]
async fn main() -> Result<()> {
- bootstrap::launch(Mode::Code).await
+ bootstrap::launch(Mode::Code, bootstrap::LaunchOptions::default()).await
}
diff --git a/crates/owlen-cli/src/main.rs b/crates/owlen-cli/src/main.rs
index 61c303a..ff03503 100644
--- a/crates/owlen-cli/src/main.rs
+++ b/crates/owlen-cli/src/main.rs
@@ -34,6 +34,9 @@ struct Args {
/// Start in code mode (enables all tools)
#[arg(long, short = 'c')]
code: bool,
+ /// Disable automatic transcript compression for this session
+ #[arg(long)]
+ no_auto_compress: bool,
#[command(subcommand)]
command: Option,
}
@@ -462,10 +465,20 @@ fn ensure_string_extra_with_change(
#[tokio::main(flavor = "multi_thread")]
async fn main() -> Result<()> {
// Parse command-line arguments
- let Args { code, command } = Args::parse();
+ let Args {
+ code,
+ command,
+ no_auto_compress,
+ } = Args::parse();
if let Some(command) = command {
return run_command(command).await;
}
let initial_mode = if code { Mode::Code } else { Mode::Chat };
- bootstrap::launch(initial_mode).await
+ bootstrap::launch(
+ initial_mode,
+ bootstrap::LaunchOptions {
+ disable_auto_compress: no_auto_compress,
+ },
+ )
+ .await
}
diff --git a/crates/owlen-core/src/config.rs b/crates/owlen-core/src/config.rs
index 6e2447c..97066aa 100644
--- a/crates/owlen-core/src/config.rs
+++ b/crates/owlen-core/src/config.rs
@@ -17,7 +17,7 @@ use std::time::Duration;
pub const DEFAULT_CONFIG_PATH: &str = "~/.config/owlen/config.toml";
/// Current schema version written to `config.toml`.
-pub const CONFIG_SCHEMA_VERSION: &str = "1.8.0";
+pub const CONFIG_SCHEMA_VERSION: &str = "1.9.0";
/// Provider config key for forcing Ollama provider mode.
pub const OLLAMA_MODE_KEY: &str = "ollama_mode";
@@ -63,6 +63,9 @@ pub struct Config {
/// MCP (Multi-Client-Provider) settings
#[serde(default)]
pub mcp: McpSettings,
+ /// Chat-specific behaviour (history compression, etc.)
+ #[serde(default)]
+ pub chat: ChatSettings,
/// Provider specific configuration keyed by provider name
#[serde(default)]
pub providers: HashMap,
@@ -115,6 +118,7 @@ impl Default for Config {
schema_version: Self::default_schema_version(),
general: GeneralSettings::default(),
mcp: McpSettings::default(),
+ chat: ChatSettings::default(),
providers,
ui: UiSettings::default(),
storage: StorageSettings::default(),
@@ -616,6 +620,7 @@ impl Config {
self.validate_mcp_settings()?;
self.validate_mcp_servers()?;
self.validate_providers()?;
+ self.chat.validate()?;
Ok(())
}
@@ -1536,6 +1541,72 @@ impl Default for GeneralSettings {
}
}
+/// Strategy used for compressing historical conversation turns.
+#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
+#[serde(rename_all = "snake_case")]
+pub enum CompressionStrategy {
+ /// Use the active (or override) model to generate a summary.
+ #[default]
+ Provider,
+ /// Use Owlen's built-in heuristic summariser without model calls.
+ Local,
+}
+
+/// Chat-specific configuration (history compression, retention, etc.)
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ChatSettings {
+ #[serde(default = "ChatSettings::default_auto_compress")]
+ pub auto_compress: bool,
+ #[serde(default = "ChatSettings::default_trigger_tokens")]
+ pub trigger_tokens: u32,
+ #[serde(default = "ChatSettings::default_retain_recent")]
+ pub retain_recent_messages: usize,
+ #[serde(default)]
+ pub model_override: Option,
+ #[serde(default)]
+ pub strategy: CompressionStrategy,
+}
+
+impl ChatSettings {
+ const fn default_auto_compress() -> bool {
+ true
+ }
+
+ const fn default_trigger_tokens() -> u32 {
+ 6_000
+ }
+
+ const fn default_retain_recent() -> usize {
+ 8
+ }
+
+ pub fn validate(&self) -> Result<()> {
+ if self.trigger_tokens < 64 {
+ return Err(crate::Error::Config(
+ "chat.trigger_tokens must be at least 64".to_string(),
+ ));
+ }
+ if self.retain_recent_messages < 2 {
+ return Err(crate::Error::Config(
+ "chat.retain_recent_messages must be at least 2".to_string(),
+ ));
+ }
+ Ok(())
+ }
+}
+
+impl Default for ChatSettings {
+ fn default() -> Self {
+ Self {
+ auto_compress: Self::default_auto_compress(),
+ trigger_tokens: Self::default_trigger_tokens(),
+ retain_recent_messages: Self::default_retain_recent(),
+ model_override: None,
+ strategy: CompressionStrategy::default(),
+ }
+ }
+}
+
/// Operating modes for the MCP subsystem.
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
diff --git a/crates/owlen-core/src/conversation.rs b/crates/owlen-core/src/conversation.rs
index 3768678..f38fd77 100644
--- a/crates/owlen-core/src/conversation.rs
+++ b/crates/owlen-core/src/conversation.rs
@@ -353,6 +353,26 @@ impl ConversationManager {
id
}
+ /// Replace the active conversation messages and rebuild internal indexes.
+ pub fn replace_active_messages(&mut self, mut messages: Vec) {
+ let now = std::time::SystemTime::now();
+ for message in &mut messages {
+ // Ensure message timestamps are not in the far past when rewired.
+ message.timestamp = now;
+ }
+ self.active.messages = messages;
+ self.active.updated_at = now;
+ self.rebuild_index();
+ self.stream_reset();
+ }
+
+ fn rebuild_index(&mut self) {
+ self.message_index.clear();
+ for (idx, message) in self.active.messages.iter().enumerate() {
+ self.message_index.insert(message.id, idx);
+ }
+ }
+
fn stream_reset(&mut self) {
self.streaming.clear();
}
diff --git a/crates/owlen-core/src/session.rs b/crates/owlen-core/src/session.rs
index 8eadd5b..d324d69 100644
--- a/crates/owlen-core/src/session.rs
+++ b/crates/owlen-core/src/session.rs
@@ -1,6 +1,7 @@
use crate::config::{
- Config, LEGACY_OLLAMA_CLOUD_API_KEY_ENV, LEGACY_OWLEN_OLLAMA_CLOUD_API_KEY_ENV,
- McpResourceConfig, McpServerConfig, OLLAMA_API_KEY_ENV, OLLAMA_CLOUD_BASE_URL,
+ ChatSettings, CompressionStrategy, Config, LEGACY_OLLAMA_CLOUD_API_KEY_ENV,
+ LEGACY_OWLEN_OLLAMA_CLOUD_API_KEY_ENV, McpResourceConfig, McpServerConfig, OLLAMA_API_KEY_ENV,
+ OLLAMA_CLOUD_BASE_URL,
};
use crate::consent::{ConsentManager, ConsentScope};
use crate::conversation::ConversationManager;
@@ -21,7 +22,7 @@ use crate::providers::OllamaProvider;
use crate::storage::{SessionMeta, StorageManager};
use crate::tools::{WEB_SEARCH_TOOL_NAME, canonical_tool_name, tool_name_matches};
use crate::types::{
- ChatParameters, ChatRequest, ChatResponse, Conversation, Message, ModelInfo, ToolCall,
+ ChatParameters, ChatRequest, ChatResponse, Conversation, Message, ModelInfo, Role, ToolCall,
};
use crate::ui::{RoleLabelDisplay, UiController};
use crate::usage::{UsageLedger, UsageQuota, UsageSnapshot};
@@ -32,10 +33,11 @@ use crate::{
ToolRegistry, WebScrapeTool, WebSearchSettings, WebSearchTool,
};
use crate::{Error, Result};
-use chrono::Utc;
-use log::warn;
+use chrono::{DateTime, Utc};
+use log::{info, warn};
use reqwest::Url;
use serde_json::{Value, json};
+use std::cmp::{max, min};
use std::collections::{HashMap, HashSet};
use std::env;
use std::path::PathBuf;
@@ -53,6 +55,107 @@ fn env_var_non_empty(name: &str) -> Option {
.filter(|value| !value.is_empty())
}
+fn estimate_tokens(messages: &[Message]) -> u32 {
+ messages
+ .iter()
+ .map(estimate_message_tokens)
+ .fold(0u32, |acc, value| acc.saturating_add(value))
+}
+
+fn estimate_message_tokens(message: &Message) -> u32 {
+ let content = message.content.trim();
+ if content.is_empty() {
+ return 4;
+ }
+ let approx = max(4, content.chars().count() / 4 + 1);
+ (approx + 4) as u32
+}
+
+fn build_transcript(messages: &[Message]) -> String {
+ let mut transcript = String::new();
+ let take = min(messages.len(), MAX_TRANSCRIPT_MESSAGES);
+ for message in messages.iter().take(take) {
+ let role = match message.role {
+ Role::User => "User",
+ Role::Assistant => "Assistant",
+ Role::System => "System",
+ Role::Tool => "Tool",
+ };
+ let snippet = sanitize_snippet(&message.content);
+ if snippet.is_empty() {
+ continue;
+ }
+ transcript.push_str(&format!("{role}: {snippet}\n\n"));
+ }
+ if messages.len() > take {
+ transcript.push_str(&format!(
+ "... ({} additional messages omitted for brevity)\n",
+ messages.len() - take
+ ));
+ }
+ transcript
+}
+
+fn local_summary(messages: &[Message]) -> String {
+ if messages.is_empty() {
+ return "(no content to summarize)".to_string();
+ }
+ let total = messages.len();
+ let mut summary = String::from("Summary (local heuristic)\n\n");
+ summary.push_str(&format!("- Compressed {total} prior messages.\n"));
+
+ let recent_users = collect_recent_by_role(messages, Role::User, 3);
+ if !recent_users.is_empty() {
+ summary.push_str("- Recent user intents:\n");
+ for intent in recent_users {
+ summary.push_str(&format!(" - {intent}\n"));
+ }
+ }
+
+ let recent_assistant = collect_recent_by_role(messages, Role::Assistant, 3);
+ if !recent_assistant.is_empty() {
+ summary.push_str("- Recent assistant responses:\n");
+ for reply in recent_assistant {
+ summary.push_str(&format!(" - {reply}\n"));
+ }
+ }
+
+ summary.trim_end().to_string()
+}
+
+fn collect_recent_by_role(messages: &[Message], role: Role, limit: usize) -> Vec {
+ if limit == 0 {
+ return Vec::new();
+ }
+ let mut results = Vec::new();
+ for message in messages.iter().rev() {
+ if message.role == role {
+ let snippet = sanitize_snippet(&message.content);
+ if !snippet.is_empty() {
+ results.push(snippet);
+ if results.len() == limit {
+ break;
+ }
+ }
+ }
+ }
+ results.reverse();
+ results
+}
+
+fn sanitize_snippet(content: &str) -> String {
+ let trimmed = content.trim();
+ if trimmed.is_empty() {
+ return String::new();
+ }
+ let mut snippet = trimmed.replace('\r', "");
+ if snippet.len() > MAX_TRANSCRIPT_MESSAGE_CHARS {
+ snippet.truncate(MAX_TRANSCRIPT_MESSAGE_CHARS);
+ snippet.push_str("...");
+ }
+ snippet
+}
+
fn compute_web_search_settings(
config: &Config,
provider_id: &str,
@@ -195,6 +298,9 @@ pub enum ControllerEvent {
endpoints: Vec,
tool_calls: Vec,
},
+ CompressionCompleted {
+ report: CompressionReport,
+ },
}
#[derive(Clone, Debug)]
@@ -206,6 +312,53 @@ struct PendingToolRequest {
tool_calls: Vec,
}
+#[derive(Debug, Clone)]
+pub struct CompressionReport {
+ pub summary_message_id: Uuid,
+ pub compressed_messages: usize,
+ pub estimated_tokens_before: u32,
+ pub estimated_tokens_after: u32,
+ pub strategy: CompressionStrategy,
+ pub model_used: String,
+ pub retained_recent: usize,
+ pub automated: bool,
+ pub timestamp: DateTime,
+}
+
+#[derive(Debug, Clone)]
+struct CompressionOptions {
+ trigger_tokens: u32,
+ retain_recent: usize,
+ strategy: CompressionStrategy,
+ model_override: Option,
+}
+
+impl CompressionOptions {
+ fn from_settings(settings: &ChatSettings) -> Self {
+ Self {
+ trigger_tokens: settings.trigger_tokens.max(64),
+ retain_recent: settings.retain_recent_messages.max(2),
+ strategy: settings.strategy,
+ model_override: settings.model_override.clone(),
+ }
+ }
+
+ fn min_chunk_messages(&self) -> usize {
+ self.retain_recent.saturating_add(2).max(4)
+ }
+
+ fn resolve_model<'a>(&'a self, active_model: &'a str) -> String {
+ self.model_override
+ .clone()
+ .filter(|model| !model.trim().is_empty())
+ .unwrap_or_else(|| active_model.to_string())
+ }
+}
+
+const MAX_TRANSCRIPT_MESSAGE_CHARS: usize = 1024;
+const MAX_TRANSCRIPT_MESSAGES: usize = 32;
+const COMPRESSION_METADATA_KEY: &str = "compression";
+
#[derive(Debug, Default)]
struct StreamingMessageState {
full_text: String,
@@ -381,6 +534,7 @@ pub struct SessionController {
pending_tool_requests: HashMap,
stream_states: HashMap,
usage_ledger: Arc>,
+ last_compression: Option,
}
async fn build_tools(
@@ -723,6 +877,7 @@ impl SessionController {
pending_tool_requests: HashMap::new(),
stream_states: HashMap::new(),
usage_ledger,
+ last_compression: None,
})
}
@@ -734,6 +889,10 @@ impl SessionController {
&mut self.conversation
}
+ pub fn last_compression(&self) -> Option {
+ self.last_compression.clone()
+ }
+
pub fn input_buffer(&self) -> &InputBuffer {
&self.input_buffer
}
@@ -956,6 +1115,210 @@ impl SessionController {
self.config.clone()
}
+ pub async fn compress_now(&mut self) -> Result