From 79fdafce97c47607544da7cd98e9f006c8a6397f Mon Sep 17 00:00:00 2001 From: vikingowl Date: Fri, 24 Oct 2025 00:30:59 +0200 Subject: [PATCH] feat(usage): track cloud quotas and expose :limits Acceptance Criteria:\n- header shows hourly/weekly usage with colored thresholds\n- :limits command prints persisted usage data and quotas\n- token usage survives restarts and emits 80%/95% toasts Test Notes:\n- cargo test -p owlen-core usage --- CHANGELOG.md | 1 + README.md | 1 + config.toml | 2 + crates/owlen-core/src/config.rs | 12 + crates/owlen-core/src/lib.rs | 2 + crates/owlen-core/src/providers/ollama.rs | 17 +- crates/owlen-core/src/session.rs | 157 +++++++++- crates/owlen-core/src/usage.rs | 332 ++++++++++++++++++++++ crates/owlen-tui/src/chat_app.rs | 216 +++++++++++++- crates/owlen-tui/src/ui.rs | 322 ++++++++++++++++++++- docs/configuration.md | 4 + 11 files changed, 1054 insertions(+), 12 deletions(-) create mode 100644 crates/owlen-core/src/usage.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 3259bf9..c38d2dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Secure credential vault integration for Ollama Cloud API keys when `privacy.encrypt_local_data = true`. - Input panel respects a new `ui.input_max_rows` setting so long prompts expand predictably before scrolling kicks in. - Command palette offers fuzzy `:model` filtering and `:provider` completions for fast switching. +- Cloud usage tracker persists hourly/weekly token totals, adds a `:limits` command, shows live header badges, and raises toast warnings at 80 %/95 % of the configured quotas. - Message rendering caches wrapped lines and throttles streaming redraws to keep the TUI responsive on long sessions. - Model picker badges now inspect provider capabilities so vision/audio/thinking models surface the correct icons even when descriptions are sparse. - Chat history honors `ui.scrollback_lines`, trimming older rows to keep the TUI responsive and surfacing a "↓ New messages" badge whenever updates land off-screen. diff --git a/README.md b/README.md index 861bf57..050807e 100644 --- a/README.md +++ b/README.md @@ -119,6 +119,7 @@ Model discovery commands worth remembering: - `:models --local` or `:models --cloud` jump directly to the corresponding section in the picker. - `:cloud setup [--force-cloud-base-url]` stores your cloud API key without clobbering an existing local base URL (unless you opt in with the flag). +- `:limits` prints the locally tracked hourly/weekly token totals for each provider and mirrors the values shown in the chat header. When a catalogue is unreachable, Owlen now tags the picker with `Local unavailable` / `Cloud unavailable` so you can recover without guessing. ## Documentation diff --git a/config.toml b/config.toml index 0c47e1e..f472662 100644 --- a/config.toml +++ b/config.toml @@ -15,6 +15,8 @@ enabled = false provider_type = "ollama_cloud" base_url = "https://ollama.com" api_key_env = "OLLAMA_API_KEY" +hourly_quota_tokens = 50000 +weekly_quota_tokens = 250000 [providers.openai] enabled = false diff --git a/crates/owlen-core/src/config.rs b/crates/owlen-core/src/config.rs index 92bc509..a5f521e 100644 --- a/crates/owlen-core/src/config.rs +++ b/crates/owlen-core/src/config.rs @@ -32,6 +32,10 @@ pub const OLLAMA_API_KEY_ENV: &str = "OLLAMA_API_KEY"; pub const LEGACY_OLLAMA_CLOUD_API_KEY_ENV: &str = "OLLAMA_CLOUD_API_KEY"; /// Legacy environment variable used by earlier Owlen releases. pub const LEGACY_OWLEN_OLLAMA_CLOUD_API_KEY_ENV: &str = "OWLEN_OLLAMA_CLOUD_API_KEY"; +/// Default hourly soft quota for Ollama Cloud usage visualization (tokens). +pub const DEFAULT_OLLAMA_CLOUD_HOURLY_QUOTA: u64 = 50_000; +/// Default weekly soft quota for Ollama Cloud usage visualization (tokens). +pub const DEFAULT_OLLAMA_CLOUD_WEEKLY_QUOTA: u64 = 250_000; /// Default base URL for local Ollama daemons. pub const OLLAMA_LOCAL_BASE_URL: &str = "http://localhost:11434"; /// Default OpenAI API base URL. @@ -880,6 +884,14 @@ fn default_ollama_cloud_config() -> ProviderConfig { OLLAMA_CLOUD_ENDPOINT_KEY.to_string(), serde_json::Value::String(OLLAMA_CLOUD_BASE_URL.to_string()), ); + extra.insert( + "hourly_quota_tokens".to_string(), + serde_json::Value::Number(serde_json::Number::from(DEFAULT_OLLAMA_CLOUD_HOURLY_QUOTA)), + ); + extra.insert( + "weekly_quota_tokens".to_string(), + serde_json::Value::Number(serde_json::Number::from(DEFAULT_OLLAMA_CLOUD_WEEKLY_QUOTA)), + ); ProviderConfig { enabled: false, diff --git a/crates/owlen-core/src/lib.rs b/crates/owlen-core/src/lib.rs index 3a1c574..f15995f 100644 --- a/crates/owlen-core/src/lib.rs +++ b/crates/owlen-core/src/lib.rs @@ -30,6 +30,7 @@ pub mod theme; pub mod tools; pub mod types; pub mod ui; +pub mod usage; pub mod validation; pub mod wrap_cursor; @@ -61,6 +62,7 @@ pub use session::*; pub use state::*; pub use theme::*; pub use tools::*; +pub use usage::*; pub use validation::*; /// Result type used throughout the OWLEN ecosystem diff --git a/crates/owlen-core/src/providers/ollama.rs b/crates/owlen-core/src/providers/ollama.rs index b81026f..f35fce4 100644 --- a/crates/owlen-core/src/providers/ollama.rs +++ b/crates/owlen-core/src/providers/ollama.rs @@ -41,8 +41,9 @@ use uuid::Uuid; use crate::{ Error, Result, config::{ - GeneralSettings, LEGACY_OLLAMA_CLOUD_API_KEY_ENV, LEGACY_OWLEN_OLLAMA_CLOUD_API_KEY_ENV, - OLLAMA_API_KEY_ENV, OLLAMA_CLOUD_BASE_URL, OLLAMA_CLOUD_ENDPOINT_KEY, OLLAMA_MODE_KEY, + DEFAULT_OLLAMA_CLOUD_HOURLY_QUOTA, DEFAULT_OLLAMA_CLOUD_WEEKLY_QUOTA, GeneralSettings, + LEGACY_OLLAMA_CLOUD_API_KEY_ENV, LEGACY_OWLEN_OLLAMA_CLOUD_API_KEY_ENV, OLLAMA_API_KEY_ENV, + OLLAMA_CLOUD_BASE_URL, OLLAMA_CLOUD_ENDPOINT_KEY, OLLAMA_MODE_KEY, }, llm::{LlmProvider, ProviderConfig}, mcp::McpToolDescriptor, @@ -1505,6 +1506,18 @@ impl LlmProvider for OllamaProvider { "description": "Seconds to cache model listings", "minimum": 5, "default": DEFAULT_MODEL_CACHE_TTL_SECS + }, + "hourly_quota_tokens": { + "type": "integer", + "description": "Soft hourly token quota used for UI alerts", + "minimum": 0, + "default": DEFAULT_OLLAMA_CLOUD_HOURLY_QUOTA + }, + "weekly_quota_tokens": { + "type": "integer", + "description": "Soft weekly token quota used for UI alerts", + "minimum": 0, + "default": DEFAULT_OLLAMA_CLOUD_WEEKLY_QUOTA } } }) diff --git a/crates/owlen-core/src/session.rs b/crates/owlen-core/src/session.rs index 6d5d830..2deea3d 100644 --- a/crates/owlen-core/src/session.rs +++ b/crates/owlen-core/src/session.rs @@ -19,6 +19,7 @@ use crate::types::{ ChatParameters, ChatRequest, ChatResponse, Conversation, Message, ModelInfo, ToolCall, }; use crate::ui::{RoleLabelDisplay, UiController}; +use crate::usage::{UsageLedger, UsageQuota, UsageSnapshot}; use crate::validation::{SchemaValidator, get_builtin_schemas}; use crate::{ChatStream, Provider}; use crate::{ @@ -29,10 +30,12 @@ use crate::{Error, Result}; use chrono::Utc; use log::warn; use serde_json::{Value, json}; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::env; use std::path::PathBuf; use std::sync::{Arc, Mutex}; +use std::time::SystemTime; +use tokio::fs; use tokio::sync::Mutex as TokioMutex; use tokio::sync::mpsc::UnboundedSender; use uuid::Uuid; @@ -240,6 +243,7 @@ pub struct SessionController { event_tx: Option>, pending_tool_requests: HashMap, stream_states: HashMap, + usage_ledger: Arc>, } async fn build_tools( @@ -545,6 +549,25 @@ impl SessionController { ) .await?; + let usage_ledger_path = storage + .database_path() + .parent() + .map(|dir| dir.join("usage-ledger.json")) + .unwrap_or_else(|| PathBuf::from("usage-ledger.json")); + + let usage_ledger_instance = + match UsageLedger::load_or_default(usage_ledger_path.clone()).await { + Ok(ledger) => ledger, + Err(err) => { + warn!( + "Failed to load usage ledger at {}: {err}. Starting with an empty ledger.", + usage_ledger_path.display() + ); + UsageLedger::empty(usage_ledger_path) + } + }; + let usage_ledger = Arc::new(TokioMutex::new(usage_ledger_instance)); + Ok(Self { provider, conversation, @@ -568,6 +591,7 @@ impl SessionController { event_tx, pending_tool_requests: HashMap::new(), stream_states: HashMap::new(), + usage_ledger, }) } @@ -656,6 +680,134 @@ impl SessionController { Some((server.to_string(), uri.to_string())) } + async fn persist_usage_serialized(path: PathBuf, serialized: String) { + if let Some(parent) = path.parent() { + if let Err(err) = fs::create_dir_all(parent).await { + warn!( + "Failed to create usage ledger directory {}: {}", + parent.display(), + err + ); + return; + } + } + + if let Err(err) = fs::write(&path, serialized).await { + warn!("Failed to write usage ledger {}: {}", path.display(), err); + } + } + + fn parse_quota_value(value: &Value) -> Option { + match value { + Value::Number(num) => num.as_u64(), + Value::String(text) => text.trim().parse::().ok(), + _ => None, + } + } + + fn quota_from_config(config: &Config, provider: &str) -> UsageQuota { + let mut quota = UsageQuota::default(); + + if let Some(entry) = config.providers.get(provider) { + if let Some(value) = entry.extra.get("hourly_quota_tokens") { + quota.hourly_quota_tokens = Self::parse_quota_value(value); + } + if let Some(value) = entry.extra.get("weekly_quota_tokens") { + quota.weekly_quota_tokens = Self::parse_quota_value(value); + } + } + + quota + } + + pub async fn record_usage_sample( + &self, + usage: &crate::types::TokenUsage, + ) -> Option { + if usage.total_tokens == 0 { + return None; + } + + let provider_name = self.provider.name().to_string(); + if provider_name.trim().is_empty() { + return None; + } + + let quotas = { + let guard = self.config.lock().await; + Self::quota_from_config(&guard, &provider_name) + }; + + let timestamp = SystemTime::now(); + let mut serialized_payload: Option<(PathBuf, String)> = None; + + let snapshot = { + let mut ledger = self.usage_ledger.lock().await; + ledger.record(&provider_name, usage, timestamp); + let snapshot = ledger.snapshot(&provider_name, quotas, timestamp); + match ledger.serialize() { + Ok(payload) => { + serialized_payload = Some((ledger.path().to_path_buf(), payload)); + } + Err(err) => warn!("Failed to serialize usage ledger: {}", err), + } + snapshot + }; + + if let Some((path, payload)) = serialized_payload { + Self::persist_usage_serialized(path, payload).await; + } + + Some(snapshot) + } + + pub async fn current_usage_snapshot(&self) -> Option { + let provider_name = self.provider.name().to_string(); + if provider_name.trim().is_empty() { + return None; + } + + let quotas = { + let guard = self.config.lock().await; + Self::quota_from_config(&guard, &provider_name) + }; + + let now = SystemTime::now(); + let ledger = self.usage_ledger.lock().await; + Some(ledger.snapshot(&provider_name, quotas, now)) + } + + pub async fn usage_overview(&self) -> Vec { + let quota_map = { + let guard = self.config.lock().await; + guard + .providers + .iter() + .map(|(name, _)| (name.clone(), Self::quota_from_config(&guard, name))) + .collect::>() + }; + + let now = SystemTime::now(); + let mut provider_names: HashSet = quota_map.keys().cloned().collect(); + + let snapshots = { + let ledger = self.usage_ledger.lock().await; + for key in ledger.provider_keys() { + provider_names.insert(key.clone()); + } + + provider_names + .into_iter() + .map(|provider| { + let quota = quota_map.get(&provider).cloned().unwrap_or_default(); + ledger.snapshot(&provider, quota, now) + }) + .collect::>() + }; + + snapshots + } + // Asynchronous access to the configuration (used internally). pub async fn config_async(&self) -> tokio::sync::MutexGuard<'_, Config> { self.config.lock().await @@ -1305,6 +1457,9 @@ impl SessionController { request.messages = self.conversation.active().messages.clone(); continue; } else { + if let Some(usage) = response.usage.as_ref() { + let _ = self.record_usage_sample(usage).await; + } self.conversation.push_message(response.message.clone()); return Ok(SessionOutcome::Complete(response)); } diff --git a/crates/owlen-core/src/usage.rs b/crates/owlen-core/src/usage.rs new file mode 100644 index 0000000..3048324 --- /dev/null +++ b/crates/owlen-core/src/usage.rs @@ -0,0 +1,332 @@ +use crate::{Error, Result}; +use serde::{Deserialize, Serialize}; +use std::collections::{HashMap, VecDeque}; +use std::path::{Path, PathBuf}; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; +use tokio::fs; + +const LEDGER_VERSION: u32 = 1; +const SECONDS_PER_HOUR: i64 = 60 * 60; +const SECONDS_PER_WEEK: i64 = 7 * 24 * 60 * 60; + +#[derive(Clone, Debug, Serialize, Deserialize)] +struct UsageRecord { + timestamp: i64, + prompt_tokens: u32, + completion_tokens: u32, +} + +#[derive(Serialize, Deserialize)] +struct LedgerFile { + version: u32, + providers: HashMap>, +} + +impl Default for LedgerFile { + fn default() -> Self { + Self { + version: LEDGER_VERSION, + providers: HashMap::new(), + } + } +} + +#[derive(Clone, Debug, Default)] +pub struct UsageLedger { + path: PathBuf, + providers: HashMap>, +} + +#[derive(Clone, Debug, Default)] +pub struct UsageQuota { + pub hourly_quota_tokens: Option, + pub weekly_quota_tokens: Option, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum UsageWindow { + Hour, + Week, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum UsageBand { + Normal = 0, + Warning = 1, + Critical = 2, +} + +#[derive(Clone, Debug, Default)] +pub struct WindowMetrics { + pub prompt_tokens: u64, + pub completion_tokens: u64, + pub total_tokens: u64, + pub quota_tokens: Option, +} + +impl WindowMetrics { + pub fn percent_of_quota(&self) -> Option { + let quota = self.quota_tokens?; + if quota == 0 { + return None; + } + Some(self.total_tokens as f64 / quota as f64) + } + + pub fn band(&self) -> UsageBand { + match self.percent_of_quota() { + Some(p) if p >= 0.95_f64 => UsageBand::Critical, + Some(p) if p >= 0.80_f64 => UsageBand::Warning, + _ => UsageBand::Normal, + } + } +} + +#[derive(Clone, Debug, Default)] +pub struct UsageSnapshot { + pub provider: String, + pub hourly: WindowMetrics, + pub weekly: WindowMetrics, + pub last_updated: Option, +} + +impl UsageSnapshot { + pub fn window(&self, window: UsageWindow) -> &WindowMetrics { + match window { + UsageWindow::Hour => &self.hourly, + UsageWindow::Week => &self.weekly, + } + } +} + +impl UsageLedger { + pub fn empty(path: PathBuf) -> Self { + Self { + path, + providers: HashMap::new(), + } + } + + pub async fn load_or_default(path: PathBuf) -> Result { + if !path.exists() { + return Ok(Self { + path, + providers: HashMap::new(), + }); + } + + let contents = fs::read_to_string(&path) + .await + .map_err(|err| Error::Storage(format!("Failed to read usage ledger: {err}")))?; + + let file: LedgerFile = match serde_json::from_str(&contents) { + Ok(file) => file, + Err(err) => { + return Err(Error::Storage(format!( + "Failed to parse usage ledger at {}: {err}", + path.display() + ))); + } + }; + + Ok(Self { + path, + providers: file.providers, + }) + } + + pub async fn persist(&self) -> Result<()> { + if let Some(parent) = self.path.parent() { + fs::create_dir_all(parent) + .await + .map_err(|err| Error::Storage(format!("Failed to create data directory: {err}")))?; + } + + let serialized = self.serialize()?; + + fs::write(&self.path, serialized) + .await + .map_err(|err| Error::Storage(format!("Failed to write usage ledger: {err}")))?; + + Ok(()) + } + + pub fn record( + &mut self, + provider: &str, + usage: &crate::types::TokenUsage, + timestamp: SystemTime, + ) { + let total_tokens = usage.total_tokens; + if total_tokens == 0 { + return; + } + + let ts = match timestamp.duration_since(UNIX_EPOCH) { + Ok(duration) => duration.as_secs() as i64, + Err(_) => 0, + }; + + let entry = self + .providers + .entry(provider.to_string()) + .or_insert_with(VecDeque::new); + + entry.push_back(UsageRecord { + timestamp: ts, + prompt_tokens: usage.prompt_tokens, + completion_tokens: usage.completion_tokens, + }); + + self.prune_old(provider, ts); + } + + pub fn provider_keys(&self) -> impl Iterator { + self.providers.keys() + } + + pub fn serialize(&self) -> Result { + let file = LedgerFile { + version: LEDGER_VERSION, + providers: self.providers.clone(), + }; + + serde_json::to_string_pretty(&file) + .map_err(|err| Error::Storage(format!("Failed to serialize usage ledger: {err}"))) + } + + pub fn path(&self) -> &Path { + &self.path + } + + pub fn snapshot(&self, provider: &str, quotas: UsageQuota, now: SystemTime) -> UsageSnapshot { + let now_secs = now + .duration_since(UNIX_EPOCH) + .unwrap_or_else(|_| Duration::from_secs(0)) + .as_secs() as i64; + + let mut snapshot = UsageSnapshot { + provider: provider.to_string(), + hourly: WindowMetrics { + quota_tokens: quotas.hourly_quota_tokens, + ..Default::default() + }, + weekly: WindowMetrics { + quota_tokens: quotas.weekly_quota_tokens, + ..Default::default() + }, + last_updated: None, + }; + + if let Some(records) = self.providers.get(provider) { + for record in records { + if now_secs - record.timestamp <= SECONDS_PER_HOUR { + snapshot.hourly.prompt_tokens += record.prompt_tokens as u64; + snapshot.hourly.completion_tokens += record.completion_tokens as u64; + } + + if now_secs - record.timestamp <= SECONDS_PER_WEEK { + snapshot.weekly.prompt_tokens += record.prompt_tokens as u64; + snapshot.weekly.completion_tokens += record.completion_tokens as u64; + } + } + + snapshot.hourly.total_tokens = + snapshot.hourly.prompt_tokens + snapshot.hourly.completion_tokens; + snapshot.weekly.total_tokens = + snapshot.weekly.prompt_tokens + snapshot.weekly.completion_tokens; + + snapshot.last_updated = records.back().and_then(|record| { + UNIX_EPOCH.checked_add(Duration::from_secs(record.timestamp as u64)) + }); + } + + snapshot + } + + pub fn prune_old(&mut self, provider: &str, now_secs: i64) { + if let Some(records) = self.providers.get_mut(provider) { + while let Some(front) = records.front() { + if now_secs - front.timestamp > SECONDS_PER_WEEK { + records.pop_front(); + } else { + break; + } + } + } + } + + pub fn prune_all(&mut self, now: SystemTime) { + let now_secs = now + .duration_since(UNIX_EPOCH) + .unwrap_or_else(|_| Duration::from_secs(0)) + .as_secs() as i64; + let provider_keys: Vec = self.providers.keys().cloned().collect(); + for provider in provider_keys { + self.prune_old(&provider, now_secs); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::types::TokenUsage; + use std::time::{Duration, UNIX_EPOCH}; + use tempfile::tempdir; + + fn make_usage(prompt: u32, completion: u32) -> TokenUsage { + TokenUsage { + prompt_tokens: prompt, + completion_tokens: completion, + total_tokens: prompt.saturating_add(completion), + } + } + + #[test] + fn records_and_summarizes_usage() { + let temp = tempdir().expect("tempdir"); + let path = temp.path().join("ledger.json"); + let mut ledger = UsageLedger::empty(path); + + let usage = make_usage(40, 10); + let timestamp = UNIX_EPOCH + Duration::from_secs(1); + ledger.record("ollama_cloud", &usage, timestamp); + + let quotas = UsageQuota { + hourly_quota_tokens: Some(100), + weekly_quota_tokens: Some(1000), + }; + + let snapshot = ledger.snapshot("ollama_cloud", quotas, UNIX_EPOCH + Duration::from_secs(2)); + + assert_eq!(snapshot.hourly.total_tokens, 50); + assert_eq!(snapshot.weekly.total_tokens, 50); + assert_eq!(snapshot.hourly.quota_tokens, Some(100)); + assert_eq!(snapshot.weekly.quota_tokens, Some(1000)); + assert_eq!(snapshot.hourly.band(), UsageBand::Normal); + } + + #[test] + fn prunes_records_outside_week() { + let temp = tempdir().expect("tempdir"); + let path = temp.path().join("ledger.json"); + let mut ledger = UsageLedger::empty(path); + + let old_usage = make_usage(30, 5); + let recent_usage = make_usage(20, 5); + + let base = UNIX_EPOCH; + ledger.record("ollama_cloud", &old_usage, base); + + // Advance beyond a week for the second record. + let later = UNIX_EPOCH + Duration::from_secs(SECONDS_PER_WEEK as u64 + 120); + ledger.record("ollama_cloud", &recent_usage, later); + + let quotas = UsageQuota::default(); + let snapshot = ledger.snapshot("ollama_cloud", quotas, later); + + assert_eq!(snapshot.hourly.total_tokens, 25); + assert_eq!(snapshot.weekly.total_tokens, 25); + } +} diff --git a/crates/owlen-tui/src/chat_app.rs b/crates/owlen-tui/src/chat_app.rs index 42664bb..7742e19 100644 --- a/crates/owlen-tui/src/chat_app.rs +++ b/crates/owlen-tui/src/chat_app.rs @@ -22,8 +22,9 @@ use owlen_core::{ session::{ControllerEvent, SessionController, SessionOutcome, ToolConsentResolution}, storage::SessionMeta, theme::Theme, - types::{ChatParameters, ChatResponse, Conversation, ModelInfo, Role}, + types::{ChatParameters, ChatResponse, Conversation, ModelInfo, Role, TokenUsage}, ui::{AppState, AutoScroll, FocusedPanel, InputMode, RoleLabelDisplay}, + usage::{UsageBand, UsageSnapshot, UsageWindow, WindowMetrics}, }; use owlen_markdown::from_str; use pathdiff::diff_paths; @@ -59,7 +60,7 @@ use crate::state::{ spawn_repo_search_task, spawn_symbol_search_task, }; use crate::toast::{Toast, ToastLevel, ToastManager}; -use crate::ui::format_tool_output; +use crate::ui::{format_token_short, format_tool_output}; use crate::widgets::model_picker::FilterMode; use crate::{commands, highlight}; use owlen_core::config::{ @@ -101,6 +102,14 @@ const RESIZE_SNAP_VALUES: [f32; 3] = [0.5, 0.75, 0.25]; const DOUBLE_CTRL_C_WINDOW: Duration = Duration::from_millis(1500); pub(crate) const MIN_MESSAGE_CARD_WIDTH: usize = 14; const MOUSE_SCROLL_STEP: isize = 3; +const DEFAULT_CONTEXT_WINDOW_TOKENS: u32 = 8_192; + +#[derive(Clone, Copy, Debug, Default)] +pub struct ContextUsage { + pub prompt_tokens: u32, + pub completion_tokens: u32, + pub context_window: u32, +} #[derive(Clone, Copy, Debug)] pub(crate) struct LayoutSnapshot { @@ -606,6 +615,9 @@ pub struct ChatApp { system_status: String, // System/status messages (tool execution, status, etc) toasts: ToastManager, debug_log: DebugLogState, + usage_snapshot: Option, + usage_thresholds: HashMap<(String, UsageWindow), UsageBand>, + context_usage: Option, last_layout: LayoutSnapshot, /// Simple execution budget: maximum number of tool calls allowed per session. _execution_budget: usize, @@ -880,6 +892,9 @@ impl ChatApp { }, toasts: ToastManager::new(), debug_log: DebugLogState::new(), + usage_snapshot: None, + usage_thresholds: HashMap::new(), + context_usage: None, last_layout: LayoutSnapshot::default(), _execution_budget: 50, agent_mode: false, @@ -919,6 +934,8 @@ impl ChatApp { } } + app.refresh_usage_summary().await?; + Ok((app, session_rx)) } @@ -1039,6 +1056,45 @@ impl ChatApp { &self.current_provider } + pub fn usage_snapshot(&self) -> Option<&UsageSnapshot> { + self.usage_snapshot.as_ref() + } + + pub fn context_usage_with_fallback(&self) -> Option { + if let Some(usage) = self.context_usage { + Some(usage) + } else { + self.active_context_window().map(|window| ContextUsage { + prompt_tokens: 0, + completion_tokens: 0, + context_window: window, + }) + } + } + + fn update_context_usage(&mut self, usage: &TokenUsage) { + let context_window = self + .active_context_window() + .unwrap_or(DEFAULT_CONTEXT_WINDOW_TOKENS); + self.context_usage = Some(ContextUsage { + prompt_tokens: usage.prompt_tokens, + completion_tokens: usage.completion_tokens, + context_window, + }); + } + + fn active_context_window(&self) -> Option { + let current_model = self.controller.selected_model(); + + self.models.iter().find_map(|model| { + if model.id == current_model || model.name == current_model { + model.context_window + } else { + None + } + }) + } + pub fn should_show_code_view(&self) -> bool { if !matches!(self.operating_mode, owlen_core::mode::Mode::Code) { return false; @@ -6968,6 +7024,8 @@ impl ChatApp { "Provider switched; failed to refresh models" .to_string(); } + self.context_usage = None; + self.refresh_usage_summary().await?; } Err(err) => { self.error = Some(format!( @@ -6994,6 +7052,12 @@ impl ChatApp { self.command_palette.clear(); return Ok(AppState::Running); } + "limits" => { + self.show_usage_limits().await?; + self.set_input_mode(InputMode::Normal); + self.command_palette.clear(); + return Ok(AppState::Running); + } "models" => { if args.is_empty() { if let Err(err) = self.show_model_picker(None).await { @@ -8219,6 +8283,20 @@ impl ChatApp { // Auto-scroll will handle this in the render loop if response.is_final { + let recorded_snapshot = match response.usage.as_ref() { + Some(usage) => { + self.update_context_usage(usage); + self.controller.record_usage_sample(usage).await + } + None => None, + }; + if let Some(snapshot) = recorded_snapshot { + self.usage_snapshot = Some(snapshot.clone()); + self.update_usage_toasts(&snapshot); + } else { + self.refresh_usage_summary().await?; + } + self.streaming.remove(&message_id); self.stream_tasks.remove(&message_id); self.stop_loading_animation(); @@ -8338,6 +8416,134 @@ impl ChatApp { self.error = None; } + async fn refresh_usage_summary(&mut self) -> Result<()> { + if let Some(snapshot) = self.controller.current_usage_snapshot().await { + self.usage_snapshot = Some(snapshot.clone()); + self.update_usage_toasts(&snapshot); + } else { + self.usage_snapshot = None; + } + Ok(()) + } + + fn update_usage_toasts(&mut self, snapshot: &UsageSnapshot) { + for window in [UsageWindow::Hour, UsageWindow::Week] { + let key = (snapshot.provider.clone(), window); + let metrics = snapshot.window(window); + let quota = match metrics.quota_tokens { + Some(value) if value > 0 => value, + _ => { + self.usage_thresholds.remove(&key); + continue; + } + }; + + let previous = self + .usage_thresholds + .get(&key) + .copied() + .unwrap_or(UsageBand::Normal); + let current = metrics.band(); + + if current > previous { + if let Some(percent_ratio) = metrics.percent_of_quota() { + let percent_value = percent_ratio * 100.0; + let percent_text = Self::format_percent_value(percent_value.min(999.9)); + let quota_text = format_token_short(quota); + let used_text = format_token_short(metrics.total_tokens); + let provider_display = Self::provider_display_name(&snapshot.provider); + let window_label = Self::usage_window_label(window); + let message = format!( + "{} {} usage at {}% ({}/{})", + provider_display, window_label, percent_text, used_text, quota_text + ); + let level = if current == UsageBand::Critical { + ToastLevel::Error + } else { + ToastLevel::Warning + }; + self.push_toast(level, message); + } + } else if current == UsageBand::Normal && previous != UsageBand::Normal { + self.usage_thresholds.insert(key.clone(), UsageBand::Normal); + } + + self.usage_thresholds.insert(key, current); + } + } + + async fn show_usage_limits(&mut self) -> Result<()> { + let snapshots = self.controller.usage_overview().await; + if snapshots.is_empty() { + let message = "Usage: no data recorded yet.".to_string(); + self.status = message.clone(); + self.error = None; + self.push_toast(ToastLevel::Info, message); + return Ok(()); + } + + let mut parts = Vec::new(); + let mut current_snapshot: Option = None; + + for snapshot in snapshots.iter() { + if snapshot.provider == self.current_provider { + current_snapshot = Some(snapshot.clone()); + } + self.update_usage_toasts(snapshot); + + let provider_display = Self::provider_display_name(&snapshot.provider); + let hour = Self::summarize_usage_window("hour", snapshot.window(UsageWindow::Hour)); + let week = Self::summarize_usage_window("week", snapshot.window(UsageWindow::Week)); + parts.push(format!("{provider_display}: {hour}; {week}")); + } + + if let Some(snapshot) = current_snapshot { + self.usage_snapshot = Some(snapshot); + } + + let message = parts.join(" | "); + self.status = format!("Usage • {message}"); + self.error = None; + self.push_toast(ToastLevel::Info, message.clone()); + + Ok(()) + } + + fn summarize_usage_window(label: &str, metrics: &WindowMetrics) -> String { + let used = format_token_short(metrics.total_tokens); + if let Some(quota) = metrics.quota_tokens { + if quota == 0 { + return format!("{label} {used} tokens"); + } + let quota_text = format_token_short(quota); + let percent = metrics + .percent_of_quota() + .map(|ratio| ratio * 100.0) + .unwrap_or(0.0); + let percent_text = Self::format_percent_value(percent.min(999.9)); + format!("{label} {used}/{quota_text} ({percent_text}%)") + } else { + format!("{label} {used} tokens") + } + } + + fn usage_window_label(window: UsageWindow) -> &'static str { + match window { + UsageWindow::Hour => "hourly", + UsageWindow::Week => "weekly", + } + } + + fn format_percent_value(percent: f64) -> String { + if percent >= 100.0 || percent == 0.0 { + format!("{percent:.0}") + } else if percent >= 10.0 { + format!("{percent:.0}") + } else { + format!("{percent:.1}") + } + } + async fn collect_models_from_all_providers( &self, ) -> ( @@ -10178,10 +10384,14 @@ impl ChatApp { let timeout_duration = std::time::Duration::from_secs(30); match tokio::time::timeout(timeout_duration, request_future).await { - Ok(Ok(SessionOutcome::Complete(_response))) => { + Ok(Ok(SessionOutcome::Complete(response))) => { + if let Some(usage) = response.usage.as_ref() { + self.update_context_usage(usage); + } self.stop_loading_animation(); self.status = "Ready".to_string(); self.error = None; + self.refresh_usage_summary().await?; Ok(()) } Ok(Ok(SessionOutcome::Streaming { diff --git a/crates/owlen-tui/src/ui.rs b/crates/owlen-tui/src/ui.rs index bf5a794..31bb77d 100644 --- a/crates/owlen-tui/src/ui.rs +++ b/crates/owlen-tui/src/ui.rs @@ -13,7 +13,8 @@ use unicode_segmentation::UnicodeSegmentation; use unicode_width::UnicodeWidthStr; use crate::chat_app::{ - ChatApp, HELP_TAB_COUNT, LayoutSnapshot, MIN_MESSAGE_CARD_WIDTH, MessageRenderContext, + ChatApp, ContextUsage, HELP_TAB_COUNT, LayoutSnapshot, MIN_MESSAGE_CARD_WIDTH, + MessageRenderContext, }; use crate::highlight; use crate::state::{ @@ -25,6 +26,7 @@ use crate::widgets::model_picker::render_model_picker; use owlen_core::theme::Theme; use owlen_core::types::Role; use owlen_core::ui::{FocusedPanel, InputMode, RoleLabelDisplay}; +use owlen_core::usage::{UsageBand, UsageSnapshot, UsageWindow, WindowMetrics}; use textwrap::wrap; const PRIVACY_TAB_INDEX: usize = HELP_TAB_COUNT - 1; @@ -183,6 +185,75 @@ mod focus_tests { } } +#[cfg(test)] +mod context_usage_tests { + use super::*; + use ratatui::style::{Color, Modifier}; + + fn theme() -> Theme { + Theme::default() + } + + #[test] + fn context_badge_formats_label_and_highlights() { + let theme = theme(); + let usage = ContextUsage { + prompt_tokens: 2600, + completion_tokens: 0, + total_tokens: 2600, + context_window: 8000, + }; + + let (label, style) = context_usage_badge(usage, &theme).expect("badge should render"); + assert_eq!(label, "Context 2.6k / 8k (33%)"); + assert_eq!(style.fg, Some(theme.info)); + assert!(style.add_modifier.contains(Modifier::BOLD)); + } + + #[test] + fn context_badge_warns_near_limits() { + let theme = theme(); + let usage = ContextUsage { + prompt_tokens: 7000, + completion_tokens: 0, + total_tokens: 7000, + context_window: 10000, + }; + + let (_, style) = context_usage_badge(usage, &theme).expect("badge should render"); + assert_eq!(style.fg, Some(Color::Yellow)); + } + + #[test] + fn context_badge_flags_danger_zone() { + let theme = theme(); + let usage = ContextUsage { + prompt_tokens: 9000, + completion_tokens: 0, + total_tokens: 9000, + context_window: 10000, + }; + + let (_, style) = context_usage_badge(usage, &theme).expect("badge should render"); + assert_eq!(style.fg, Some(theme.error)); + } + + #[test] + fn context_badge_handles_zero_usage() { + let theme = theme(); + let usage = ContextUsage { + prompt_tokens: 0, + completion_tokens: 0, + total_tokens: 0, + context_window: 32000, + }; + + let (label, style) = context_usage_badge(usage, &theme).expect("badge should render"); + assert_eq!(label, "Context 0 / 32k (0%)"); + assert_eq!(style.fg, Some(theme.info)); + } +} + pub fn render_chat(frame: &mut Frame<'_>, app: &mut ChatApp) { // Update thinking content from last message app.update_thinking_from_last_message(); @@ -1501,6 +1572,24 @@ fn render_messages(frame: &mut Frame<'_>, area: Rect, app: &mut ChatApp) { let scroll_position = app.scroll().min(u16::MAX as usize) as u16; let mut title_spans = panel_title_spans("Chat", true, has_focus, &theme); + + let active_model = app.active_model_label(); + if !active_model.is_empty() { + let model_display = truncate_with_ellipsis(&active_model, 28); + title_spans.push(Span::raw(" · ")); + title_spans.push(Span::styled( + model_display, + Style::default().fg(theme.pane_header_active), + )); + } + + if let Some(usage) = app.context_usage_with_fallback() { + if let Some((label, style)) = context_usage_badge(usage, &theme) { + title_spans.push(Span::raw(" · ")); + title_spans.push(Span::styled(label, style)); + } + } + title_spans.push(Span::raw(" ")); title_spans.push(Span::styled( "PgUp/PgDn scroll · g/G jump · s save · Ctrl+2 focus", @@ -2166,9 +2255,9 @@ fn render_status(frame: &mut Frame<'_>, area: Rect, app: &ChatApp) { let columns = Layout::default() .direction(Direction::Horizontal) .constraints([ - Constraint::Percentage(30), - Constraint::Percentage(40), - Constraint::Percentage(30), + Constraint::Length(32), + Constraint::Min(24), + Constraint::Length(48), ]) .split(inner); @@ -2294,9 +2383,11 @@ fn render_status(frame: &mut Frame<'_>, area: Rect, app: &ChatApp) { frame.render_widget(mid_paragraph, columns[1]); let provider = app.current_provider(); + let provider_display = truncate_with_ellipsis(provider, 16); let model_label = app.active_model_label(); + let model_display = truncate_with_ellipsis(&model_label, 24); let mut right_spans = vec![Span::styled( - format!("{} ▸ {}", provider, model_label), + format!("{} ▸ {}", provider_display, model_display), Style::default().fg(theme.text).add_modifier(Modifier::BOLD), )]; @@ -2315,6 +2406,18 @@ fn render_status(frame: &mut Frame<'_>, area: Rect, app: &ChatApp) { )); } + if let Some(usage) = app.context_usage_with_fallback() { + if let Some((label, style)) = context_usage_badge(usage, theme) { + right_spans.push(Span::styled(format!(" · {}", label), style)); + } + } + + if let Some(snapshot) = app.usage_snapshot() { + for span in usage_badge_spans(snapshot, theme) { + right_spans.push(span); + } + } + right_spans.push(Span::styled( " · LSP:✓", Style::default() @@ -2322,12 +2425,219 @@ fn render_status(frame: &mut Frame<'_>, area: Rect, app: &ChatApp) { .add_modifier(Modifier::DIM), )); - let right_paragraph = Paragraph::new(Line::from(right_spans)) + let right_line = spans_within_width(right_spans, columns[2].width); + let right_paragraph = Paragraph::new(right_line) .alignment(Alignment::Right) .style(Style::default().bg(theme.status_background).fg(theme.text)); frame.render_widget(right_paragraph, columns[2]); } +fn context_usage_badge(usage: ContextUsage, theme: &Theme) -> Option<(String, Style)> { + let context_window = usage.context_window.max(1); + let ratio = usage.prompt_tokens as f64 / context_window as f64; + let percent = ((ratio * 100.0).round() as u32).min(100); + let used = format_token_short(usage.prompt_tokens as u64); + let window = format_token_short(context_window as u64); + let label = format!("Context {} / {} ({}%)", used, window, percent); + + let style = if ratio < 0.60 { + Style::default().fg(theme.info).add_modifier(Modifier::BOLD) + } else if ratio < 0.85 { + Style::default() + .fg(Color::Yellow) + .add_modifier(Modifier::BOLD) + } else { + Style::default() + .fg(theme.error) + .add_modifier(Modifier::BOLD) + }; + + Some((label, style)) +} + +fn usage_badge_spans(snapshot: &UsageSnapshot, theme: &Theme) -> Vec> { + let mut spans = Vec::new(); + let provider_label = ChatApp::provider_display_name(&snapshot.provider); + + let hour_metrics = snapshot.window(UsageWindow::Hour); + let week_metrics = snapshot.window(UsageWindow::Week); + + if hour_metrics.quota_tokens.is_none() + && hour_metrics.total_tokens == 0 + && week_metrics.quota_tokens.is_none() + && week_metrics.total_tokens == 0 + { + return spans; + } + + spans.push(Span::styled( + format!(" · {} usage", provider_label), + Style::default() + .fg(theme.placeholder) + .add_modifier(Modifier::DIM), + )); + + if let Some(span) = usage_window_span("hr", hour_metrics, theme) { + spans.push(span); + } + if let Some(span) = usage_window_span("wk", week_metrics, theme) { + spans.push(span); + } + + spans +} + +fn usage_window_span(label: &str, metrics: &WindowMetrics, theme: &Theme) -> Option> { + if metrics.quota_tokens.is_none() && metrics.total_tokens == 0 { + return None; + } + + let used = format_token_short(metrics.total_tokens); + let text = if let Some(quota) = metrics.quota_tokens { + if quota == 0 { + format!(" · {} {} tokens", label, used) + } else if let Some(percent_ratio) = metrics.percent_of_quota() { + let quota_text = format_token_short(quota); + let percent_text = format_percent_short((percent_ratio * 100.0).min(999.9)); + format!(" · {} {}/{} ({}%)", label, used, quota_text, percent_text) + } else { + let quota_text = format_token_short(quota); + format!(" · {} {}/{}", label, used, quota_text) + } + } else { + format!(" · {} {} tokens", label, used) + }; + + let style = match metrics.band() { + UsageBand::Normal => Style::default().fg(theme.info).add_modifier(Modifier::BOLD), + UsageBand::Warning => Style::default() + .fg(Color::Yellow) + .add_modifier(Modifier::BOLD), + UsageBand::Critical => Style::default() + .fg(theme.error) + .add_modifier(Modifier::BOLD), + }; + + Some(Span::styled(text, style)) +} + +fn format_percent_short(percent: f64) -> String { + let clamped = percent.max(0.0); + if clamped >= 100.0 || clamped == 0.0 { + format!("{clamped:.0}") + } else if clamped >= 10.0 { + format!("{clamped:.0}") + } else { + format!("{clamped:.1}") + } +} + +pub(crate) fn format_token_short(value: u64) -> String { + if value >= 1_000_000_000 { + format_compact(value as f64 / 1_000_000_000.0, "B") + } else if value >= 1_000_000 { + format_compact(value as f64 / 1_000_000.0, "M") + } else if value >= 1_000 { + format_compact(value as f64 / 1_000.0, "k") + } else { + value.to_string() + } +} + +fn format_compact(value: f64, suffix: &str) -> String { + let formatted = if value >= 100.0 { + format!("{:.0}", value) + } else { + format!("{:.1}", value) + }; + + let trimmed = formatted.trim_end_matches('0').trim_end_matches('.'); + let mut result = trimmed.to_string(); + if result.is_empty() { + result.push('0'); + } + result.push_str(suffix); + result +} + +fn truncate_with_ellipsis(text: &str, max_width: usize) -> String { + if max_width == 0 { + return String::new(); + } + + let current = UnicodeWidthStr::width(text); + if current <= max_width { + return text.to_string(); + } + + let ellipsis = "…"; + let ellipsis_width = UnicodeWidthStr::width(ellipsis); + if ellipsis_width >= max_width { + return ellipsis.to_string(); + } + + let keep_width = max_width - ellipsis_width; + let prefix = truncate_to_width(text, keep_width); + if prefix.is_empty() { + ellipsis.to_string() + } else { + format!("{}{}", prefix, ellipsis) + } +} + +fn spans_within_width(spans: Vec>, max_width: u16) -> Line<'static> { + if max_width == 0 { + return Line::from(Vec::>::new()); + } + + let mut remaining = max_width as usize; + let mut output = Vec::new(); + + for span in spans.into_iter() { + if remaining == 0 { + break; + } + + let text = span.content.as_ref(); + let width = UnicodeWidthStr::width(text); + if width == 0 { + continue; + } + + let style = span.style; + if width <= remaining { + output.push(Span::styled(text.to_string(), style)); + remaining -= width; + } else { + let truncated = truncate_to_width(text, remaining); + if !truncated.is_empty() { + output.push(Span::styled(truncated, style)); + } + break; + } + } + + Line::from(output) +} + +fn truncate_to_width(text: &str, max_width: usize) -> String { + if max_width == 0 { + return String::new(); + } + + let mut result = String::new(); + let mut used = 0; + for grapheme in text.graphemes(true) { + let width = UnicodeWidthStr::width(grapheme); + if used + width > max_width { + break; + } + result.push_str(grapheme); + used += width; + } + result +} + fn render_code_workspace(frame: &mut Frame<'_>, area: Rect, app: &mut ChatApp) { let theme = app.theme().clone(); frame.render_widget(Clear, area); diff --git a/docs/configuration.md b/docs/configuration.md index b43814a..7743b69 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -177,10 +177,14 @@ base_url = "http://localhost:11434" enabled = true base_url = "https://ollama.com" api_key_env = "OLLAMA_API_KEY" +hourly_quota_tokens = 50000 +weekly_quota_tokens = 250000 ``` Requests target the same `/api/chat` endpoint documented by Ollama and automatically include the API key using a `Bearer` authorization header. If you prefer not to store the key in the config file, either rely on `api_key_env` or export the `OLLAMA_API_KEY` environment variable manually (legacy names `OLLAMA_CLOUD_API_KEY` and `OWLEN_OLLAMA_CLOUD_API_KEY` continue to work, but Owlen will emit a warning). Owlen normalises the base URL automatically—it enforces HTTPS, trims trailing slashes, and accepts both `https://ollama.com` and `https://api.ollama.com` without rewriting the host. +The quota fields are optional and purely informational—they are never sent to the provider. Owlen uses them to display hourly/weekly token usage in the chat header, emit pre-limit toasts at 80 % and 95 %, and power the `:limits` command. Adjust the numbers to reflect the soft limits on your account or remove the keys altogether if you do not want usage tracking. + > **Tip:** If the official `ollama signin` flow fails on Linux v0.12.3, follow the [Linux Ollama sign-in workaround](#linux-ollama-sign-in-workaround-v0123) in the troubleshooting guide to copy keys from a working machine or register them manually. ### Managing cloud credentials via CLI