Files
vessel/frontend/src/lib/memory/context-manager.svelte.ts
vikingowl cf4981f3b2 feat: add auto-compact, settings page, and message virtualization
- Add auto-compact feature with configurable threshold (50-90%)
- Convert settings modal to full /settings page with organized sections
- Add Memory Management settings (auto-compact toggle, threshold, preserve count)
- Add inline SummarizationIndicator shown where compaction occurred
- Add VirtualMessageList with fallback for long conversation performance
- Trigger auto-compact after assistant responses when threshold reached
2026-01-03 18:26:11 +01:00

323 lines
8.9 KiB
TypeScript

/**
* Context window management with reactive state
*
* Tracks token usage across the conversation and provides
* warnings when approaching context limits.
*/
import type { MessageNode } from '$lib/types/chat.js';
import type { ContextUsage, TokenEstimate, MessageWithTokens } from './types.js';
import { estimateMessageTokens, estimateFormatOverhead, formatTokenCount } from './tokenizer.js';
import { getModelContextLimit, formatContextSize } from './model-limits.js';
import { settingsState } from '$lib/stores/settings.svelte.js';
/** Warning threshold as percentage of context (0.85 = 85%) */
const WARNING_THRESHOLD = 0.85;
/** Critical threshold (context almost full) */
const CRITICAL_THRESHOLD = 0.95;
/** Throttle interval for updates during streaming (ms) */
const STREAMING_THROTTLE_MS = 500;
/** Context manager with reactive state */
class ContextManager {
/** Current model name */
currentModel = $state<string>('');
/** Maximum context length for current model (from model lookup) */
modelMaxTokens = $state<number>(4096);
/** Custom context limit override (from user settings) */
customMaxTokens = $state<number | null>(null);
/** Effective max tokens (custom override or model default) */
maxTokens = $derived(this.customMaxTokens ?? this.modelMaxTokens);
/**
* Cached token estimates for messages (id -> estimate)
* Non-reactive to avoid cascading updates during streaming
*/
private tokenCache: Map<string, TokenEstimate> = new Map();
/** Current conversation messages with token counts */
messagesWithTokens = $state<MessageWithTokens[]>([]);
/** Last update timestamp for throttling */
private lastUpdateTime = 0;
/** Pending update for throttled calls */
private pendingUpdate: MessageNode[] | null = null;
/** Timeout handle for pending updates */
private updateTimeout: ReturnType<typeof setTimeout> | null = null;
/** Total estimated tokens used */
usedTokens = $derived.by(() => {
let total = 0;
for (const msg of this.messagesWithTokens) {
total += msg.estimatedTokens.totalTokens;
}
// Add format overhead
total += estimateFormatOverhead(this.messagesWithTokens.length);
return total;
});
/** Context usage info */
contextUsage = $derived.by((): ContextUsage => {
const used = this.usedTokens;
const max = this.maxTokens;
return {
usedTokens: used,
maxTokens: max,
percentage: max > 0 ? (used / max) * 100 : 0,
remainingTokens: Math.max(0, max - used)
};
});
/** Whether we're approaching the context limit */
isNearLimit = $derived(this.contextUsage.percentage >= WARNING_THRESHOLD * 100);
/** Whether context is critically full */
isCritical = $derived(this.contextUsage.percentage >= CRITICAL_THRESHOLD * 100);
/** Human-readable status message */
statusMessage = $derived.by(() => {
const { percentage, usedTokens, maxTokens } = this.contextUsage;
const used = formatTokenCount(usedTokens);
const max = formatContextSize(maxTokens);
if (this.isCritical) {
return `Context almost full: ${used} / ${max} (${percentage.toFixed(0)}%)`;
}
if (this.isNearLimit) {
return `Approaching context limit: ${used} / ${max} (${percentage.toFixed(0)}%)`;
}
return `${used} / ${max} tokens (${percentage.toFixed(0)}%)`;
});
/**
* Set the current model and update context limit
*/
setModel(modelName: string): void {
this.currentModel = modelName;
this.modelMaxTokens = getModelContextLimit(modelName);
}
/**
* Set custom context limit override
* Pass null to clear and use model default
*/
setCustomContextLimit(tokens: number | null): void {
this.customMaxTokens = tokens;
}
/**
* Update messages and recalculate token estimates
* Throttles updates during streaming to prevent performance issues
*/
updateMessages(messages: MessageNode[], force = false): void {
const now = Date.now();
const timeSinceLastUpdate = now - this.lastUpdateTime;
// If we're within the throttle window and not forcing, schedule for later
if (!force && timeSinceLastUpdate < STREAMING_THROTTLE_MS) {
this.pendingUpdate = messages;
// Schedule update if not already scheduled
if (!this.updateTimeout) {
this.updateTimeout = setTimeout(() => {
this.updateTimeout = null;
if (this.pendingUpdate) {
this.updateMessages(this.pendingUpdate, true);
this.pendingUpdate = null;
}
}, STREAMING_THROTTLE_MS - timeSinceLastUpdate);
}
return;
}
this.lastUpdateTime = now;
this.performUpdate(messages);
}
/**
* Actually perform the message update (internal)
*/
private performUpdate(messages: MessageNode[]): void {
const newMessagesWithTokens: MessageWithTokens[] = [];
for (const node of messages) {
// Check cache first
let estimate = this.tokenCache.get(node.id);
if (!estimate) {
// Calculate and cache (non-reactive mutation)
estimate = estimateMessageTokens(
node.message.content,
node.message.images
);
this.tokenCache.set(node.id, estimate);
}
newMessagesWithTokens.push({
id: node.id,
role: node.message.role,
content: node.message.content,
images: node.message.images,
estimatedTokens: estimate
});
}
this.messagesWithTokens = newMessagesWithTokens;
}
/**
* Invalidate cache for a specific message (e.g., after streaming update)
* Non-reactive to avoid cascading updates during streaming
*/
invalidateMessage(messageId: string): void {
// Non-reactive deletion - just mutate the cache directly
this.tokenCache.delete(messageId);
}
/**
* Flush any pending updates immediately
* Call this when streaming ends to ensure final state is accurate
*/
flushPendingUpdate(): void {
if (this.updateTimeout) {
clearTimeout(this.updateTimeout);
this.updateTimeout = null;
}
if (this.pendingUpdate) {
this.performUpdate(this.pendingUpdate);
this.pendingUpdate = null;
}
}
/**
* Get token estimate for a specific message
*/
getMessageTokens(messageId: string): TokenEstimate | null {
return this.tokenCache.get(messageId) ?? null;
}
/**
* Estimate tokens for new content (before sending)
*/
estimateNewMessage(content: string, images?: string[]): TokenEstimate {
return estimateMessageTokens(content, images);
}
/**
* Check if adding a message would exceed context
*/
wouldExceedContext(newTokens: number): boolean {
return (this.usedTokens + newTokens) > this.maxTokens;
}
/**
* Get the number of messages that could be trimmed to free space
* Returns indices of messages to remove (oldest first, excluding system)
*/
getMessagesToTrim(targetFreeTokens: number): number[] {
const indicesToRemove: number[] = [];
let freedTokens = 0;
// Start from oldest messages (index 0), skip system messages
for (let i = 0; i < this.messagesWithTokens.length && freedTokens < targetFreeTokens; i++) {
const msg = this.messagesWithTokens[i];
if (msg.role === 'system') continue;
indicesToRemove.push(i);
freedTokens += msg.estimatedTokens.totalTokens;
}
return indicesToRemove;
}
/**
* Clear the cache and messages
*/
reset(): void {
// Clear pending updates
if (this.updateTimeout) {
clearTimeout(this.updateTimeout);
this.updateTimeout = null;
}
this.pendingUpdate = null;
this.lastUpdateTime = 0;
// Clear cache and messages
this.tokenCache.clear();
this.messagesWithTokens = [];
}
/**
* Check if auto-compact should be triggered
* Returns true if:
* - Auto-compact is enabled in settings
* - Context usage exceeds the configured threshold
* - There are enough messages to summarize
*/
shouldAutoCompact(): boolean {
// Check if auto-compact is enabled
if (!settingsState.autoCompactEnabled) {
return false;
}
// Check context usage against threshold
const threshold = settingsState.autoCompactThreshold;
if (this.contextUsage.percentage < threshold) {
return false;
}
// Check if there are enough messages to summarize
// Need at least preserveCount + 2 messages to have anything to summarize
const preserveCount = settingsState.autoCompactPreserveCount;
const minMessages = preserveCount + 2;
if (this.messagesWithTokens.length < minMessages) {
return false;
}
return true;
}
/**
* Get the number of recent messages to preserve during auto-compact
*/
getAutoCompactPreserveCount(): number {
return settingsState.autoCompactPreserveCount;
}
}
/** Singleton context manager instance */
export const contextManager = new ContextManager();
/**
* Get color class for context usage percentage
*/
export function getContextUsageColor(percentage: number): string {
if (percentage >= CRITICAL_THRESHOLD * 100) {
return 'text-red-500';
}
if (percentage >= WARNING_THRESHOLD * 100) {
return 'text-yellow-500';
}
return 'text-theme-muted';
}
/**
* Get progress bar color class
*/
export function getProgressBarColor(percentage: number): string {
if (percentage >= CRITICAL_THRESHOLD * 100) {
return 'bg-red-500';
}
if (percentage >= WARNING_THRESHOLD * 100) {
return 'bg-yellow-500';
}
return 'bg-blue-500';
}