Files
vessel/frontend/src/lib/memory/summarizer.ts
vikingowl cf4981f3b2 feat: add auto-compact, settings page, and message virtualization
- Add auto-compact feature with configurable threshold (50-90%)
- Convert settings modal to full /settings page with organized sections
- Add Memory Management settings (auto-compact toggle, threshold, preserve count)
- Add inline SummarizationIndicator shown where compaction occurred
- Add VirtualMessageList with fallback for long conversation performance
- Trigger auto-compact after assistant responses when threshold reached
2026-01-03 18:26:11 +01:00

184 lines
5.1 KiB
TypeScript

/**
* Conversation summarization service
*
* Generates summaries of conversation history to compress context
* when approaching context window limits.
*/
import { ollamaClient } from '$lib/ollama';
import type { MessageNode } from '$lib/types/chat.js';
import type { ConversationSummary } from './types.js';
import { estimateMessageTokens, estimateConversationTokens } from './tokenizer.js';
/** Default prompt for generating summaries */
const SUMMARIZATION_PROMPT = `Summarize the following conversation concisely, capturing key points, decisions, and context that would be needed to continue the conversation. Focus on:
- Main topics discussed
- Important facts or information shared
- Decisions or conclusions reached
- Any pending questions or tasks
Keep the summary brief but complete. Write in third person.
Conversation:
`;
/** Minimum messages required in the toSummarize array (after filtering) */
const MIN_MESSAGES_TO_SUMMARIZE = 2;
/** Minimum total messages before showing summarization option */
const MIN_TOTAL_MESSAGES_FOR_SUMMARY = 6;
/** How many recent messages to always preserve */
const PRESERVE_RECENT_MESSAGES = 4;
/** Target reduction ratio (aim to reduce to this fraction of original) */
const TARGET_REDUCTION_RATIO = 0.25;
/**
* Format messages for summarization prompt
*/
function formatMessagesForSummary(messages: MessageNode[]): string {
return messages
.map((node) => {
const role = node.message.role === 'user' ? 'User' : 'Assistant';
const content = node.message.content;
const images = node.message.images?.length
? ` [${node.message.images.length} image(s)]`
: '';
return `${role}:${images} ${content}`;
})
.join('\n\n');
}
/**
* Generate a summary of messages using Ollama
*/
export async function generateSummary(
messages: MessageNode[],
model: string
): Promise<string> {
if (messages.length < MIN_MESSAGES_TO_SUMMARIZE) {
throw new Error('Not enough messages to summarize');
}
const formattedConversation = formatMessagesForSummary(messages);
const prompt = SUMMARIZATION_PROMPT + formattedConversation;
const response = await ollamaClient.generate({
model,
prompt,
options: {
temperature: 0.3, // Lower temperature for more consistent summaries
num_predict: 500 // Limit summary length
}
});
return response.response;
}
/**
* Determine which messages should be summarized
* Returns indices of messages to summarize (older messages) and messages to keep
* @param messages - All messages in the conversation
* @param targetFreeTokens - Not currently used (preserved for API compatibility)
* @param preserveCount - Number of recent messages to keep (defaults to PRESERVE_RECENT_MESSAGES)
*/
export function selectMessagesForSummarization(
messages: MessageNode[],
targetFreeTokens: number,
preserveCount: number = PRESERVE_RECENT_MESSAGES
): { toSummarize: MessageNode[]; toKeep: MessageNode[] } {
if (messages.length <= preserveCount) {
return { toSummarize: [], toKeep: messages };
}
// Calculate how many messages to summarize
// Keep the recent ones, summarize the rest
const cutoffIndex = Math.max(0, messages.length - preserveCount);
// Filter out system messages from summarization (they should stay)
const toSummarize: MessageNode[] = [];
const toKeep: MessageNode[] = [];
for (let i = 0; i < messages.length; i++) {
const msg = messages[i];
if (i < cutoffIndex && msg.message.role !== 'system') {
toSummarize.push(msg);
} else {
toKeep.push(msg);
}
}
return { toSummarize, toKeep };
}
/**
* Calculate token savings from summarization
*/
export function calculateTokenSavings(
originalMessages: MessageNode[],
summaryText: string
): number {
const originalTokens = estimateConversationTokens(
originalMessages.map((m) => ({
content: m.message.content,
images: m.message.images
}))
);
const summaryTokens = estimateMessageTokens(summaryText).totalTokens;
return Math.max(0, originalTokens - summaryTokens);
}
/**
* Create a summary record
*/
export function createSummaryRecord(
conversationId: string,
summary: string,
originalMessageCount: number,
tokensSaved: number
): ConversationSummary {
return {
id: crypto.randomUUID(),
conversationId,
summary,
originalMessageCount,
summarizedAt: new Date(),
tokensSaved
};
}
/**
* Check if conversation should be summarized based on context usage
*/
export function shouldSummarize(
usedTokens: number,
maxTokens: number,
messageCount: number
): boolean {
// Don't show summarization if not enough total messages
if (messageCount < MIN_TOTAL_MESSAGES_FOR_SUMMARY) {
return false;
}
// Check if there would actually be messages to summarize after filtering
// (messageCount - PRESERVE_RECENT_MESSAGES = messages available for summarization)
const summarizableCount = messageCount - PRESERVE_RECENT_MESSAGES;
if (summarizableCount < MIN_MESSAGES_TO_SUMMARIZE) {
return false;
}
// Summarize if we're using more than 80% of context
const usageRatio = usedTokens / maxTokens;
return usageRatio >= 0.8;
}
/**
* Format a summary as a system message prefix
*/
export function formatSummaryAsContext(summary: string): string {
return `[Previous conversation summary: ${summary}]`;
}