Files
vessel/frontend/src/lib/memory/vector-store.ts
vikingowl ddce578833 feat: implement cross-chat RAG for project conversations
- Add embedding-based chat indexing for project conversations
- Chunk long messages (1500 chars with 200 overlap) for better coverage
- Index messages when leaving a conversation (background)
- Search indexed chat history with semantic similarity
- Show other project conversations with message count and summary status
- Include relevant chat snippets in project context for LLM
- Fix chunker infinite loop bug near end of text
- Fix curl encoding error with explicit Accept-Encoding header
- Add document previews to project knowledge base context
- Lower RAG threshold to 0.2 and increase topK to 10 for better recall
2026-01-07 18:06:49 +01:00

438 lines
12 KiB
TypeScript

/**
* Vector store for knowledge base documents
*
* Stores document chunks with embeddings in IndexedDB
* and provides similarity search for RAG retrieval.
*/
import { db, type StoredDocument, type StoredChunk } from '$lib/storage/db.js';
import { generateEmbedding, generateEmbeddings, cosineSimilarity, DEFAULT_EMBEDDING_MODEL } from './embeddings.js';
import { chunkText, chunkTextAsync, estimateChunkTokens, type ChunkOptions } from './chunker.js';
/** Result of a similarity search */
export interface SearchResult {
chunk: StoredChunk;
document: StoredDocument;
similarity: number;
}
/** Options for adding a document */
export interface AddDocumentOptions {
/** Chunking options */
chunkOptions?: ChunkOptions;
/** Embedding model to use */
embeddingModel?: string;
/** Callback for progress updates */
onProgress?: (current: number, total: number) => void;
/** Project ID if document belongs to a project */
projectId?: string;
}
/**
* Add a document to the knowledge base
* Chunks the content and generates embeddings for each chunk
*/
export async function addDocument(
name: string,
content: string,
mimeType: string,
options: AddDocumentOptions = {}
): Promise<StoredDocument> {
const {
chunkOptions,
embeddingModel = DEFAULT_EMBEDDING_MODEL,
onProgress,
projectId
} = options;
const documentId = crypto.randomUUID();
const now = Date.now();
// Chunk the content
const textChunks = chunkText(content, documentId, chunkOptions);
if (textChunks.length === 0) {
throw new Error('Document produced no chunks');
}
// Generate embeddings for all chunks
const chunkContents = textChunks.map(c => c.content);
const embeddings: number[][] = [];
// Process in batches with progress
const BATCH_SIZE = 5;
for (let i = 0; i < chunkContents.length; i += BATCH_SIZE) {
const batch = chunkContents.slice(i, i + BATCH_SIZE);
const batchEmbeddings = await generateEmbeddings(batch, embeddingModel);
embeddings.push(...batchEmbeddings);
if (onProgress) {
onProgress(Math.min(i + BATCH_SIZE, chunkContents.length), chunkContents.length);
}
}
// Create stored chunks with embeddings
const storedChunks: StoredChunk[] = textChunks.map((chunk, index) => ({
id: chunk.id,
documentId,
content: chunk.content,
embedding: embeddings[index],
startIndex: chunk.startIndex,
endIndex: chunk.endIndex,
tokenCount: estimateChunkTokens(chunk.content)
}));
// Create document record
const document: StoredDocument = {
id: documentId,
name,
mimeType,
size: content.length,
createdAt: now,
updatedAt: now,
chunkCount: storedChunks.length,
embeddingModel,
projectId: projectId ?? null,
embeddingStatus: 'ready'
};
// Store in database
await db.transaction('rw', [db.documents, db.chunks], async () => {
await db.documents.add(document);
await db.chunks.bulkAdd(storedChunks);
});
return document;
}
/** Options for async document upload */
export interface AddDocumentAsyncOptions extends AddDocumentOptions {
/** Callback when embedding generation completes */
onComplete?: (doc: StoredDocument) => void;
/** Callback when embedding generation fails */
onError?: (error: Error) => void;
}
/**
* Add a document asynchronously - stores immediately, generates embeddings in background
* Returns immediately with the document in 'pending' state
*/
export async function addDocumentAsync(
name: string,
content: string,
mimeType: string,
options: AddDocumentAsyncOptions = {}
): Promise<StoredDocument> {
const {
chunkOptions,
embeddingModel = DEFAULT_EMBEDDING_MODEL,
onProgress,
onComplete,
onError,
projectId
} = options;
const documentId = crypto.randomUUID();
const now = Date.now();
// Create document record immediately (without knowing chunk count yet)
// We'll update it after chunking in the background
const document: StoredDocument = {
id: documentId,
name,
mimeType,
size: content.length,
createdAt: now,
updatedAt: now,
chunkCount: 0, // Will be updated after chunking
embeddingModel,
projectId: projectId ?? null,
embeddingStatus: 'pending'
};
// Store document immediately
await db.documents.add(document);
// Process everything in background (non-blocking) - including chunking
setTimeout(async () => {
console.log('[Embedding] Starting for:', name, 'content length:', content.length);
try {
// Update status to processing
await db.documents.update(documentId, { embeddingStatus: 'processing' });
console.log('[Embedding] Status updated, starting chunking...');
// Chunk the content using async version (yields periodically)
let textChunks;
try {
textChunks = await chunkTextAsync(content, documentId, chunkOptions);
} catch (chunkError) {
console.error('[Embedding] Chunking failed:', chunkError);
throw chunkError;
}
console.log('[Embedding] Chunked into', textChunks.length, 'chunks');
if (textChunks.length === 0) {
throw new Error('Document produced no chunks');
}
// Update chunk count
await db.documents.update(documentId, { chunkCount: textChunks.length });
const chunkContents = textChunks.map(c => c.content);
const embeddings: number[][] = [];
// Process embeddings in batches with progress
const BATCH_SIZE = 5;
const totalBatches = Math.ceil(chunkContents.length / BATCH_SIZE);
for (let i = 0; i < chunkContents.length; i += BATCH_SIZE) {
const batchNum = Math.floor(i / BATCH_SIZE) + 1;
console.log(`[Embedding] Batch ${batchNum}/${totalBatches}...`);
const batch = chunkContents.slice(i, i + BATCH_SIZE);
const batchEmbeddings = await generateEmbeddings(batch, embeddingModel);
embeddings.push(...batchEmbeddings);
if (onProgress) {
onProgress(Math.min(i + BATCH_SIZE, chunkContents.length), chunkContents.length);
}
}
// Create stored chunks with embeddings
const storedChunks: StoredChunk[] = textChunks.map((chunk, index) => ({
id: chunk.id,
documentId,
content: chunk.content,
embedding: embeddings[index],
startIndex: chunk.startIndex,
endIndex: chunk.endIndex,
tokenCount: estimateChunkTokens(chunk.content)
}));
// Store chunks and update document status
await db.transaction('rw', [db.documents, db.chunks], async () => {
await db.chunks.bulkAdd(storedChunks);
await db.documents.update(documentId, {
embeddingStatus: 'ready',
updatedAt: Date.now()
});
});
console.log('[Embedding] Complete for:', name);
const updatedDoc = await db.documents.get(documentId);
if (updatedDoc && onComplete) {
onComplete(updatedDoc);
}
} catch (error) {
console.error('[Embedding] Failed:', error);
await db.documents.update(documentId, { embeddingStatus: 'failed' });
if (onError) {
onError(error instanceof Error ? error : new Error(String(error)));
}
}
}, 0);
return document;
}
/** Options for similarity search */
export interface SearchOptions {
/** Maximum number of results to return */
topK?: number;
/** Minimum similarity threshold (0-1) */
threshold?: number;
/** Embedding model to use */
embeddingModel?: string;
/** Filter to documents in this project only (null = global only, undefined = all) */
projectId?: string | null;
}
/**
* Search for similar chunks across documents
* @param query - The search query
* @param options - Search options including projectId filter
*/
export async function searchSimilar(
query: string,
options: SearchOptions = {}
): Promise<SearchResult[]> {
const {
topK = 5,
threshold = 0.5,
embeddingModel = DEFAULT_EMBEDDING_MODEL,
projectId
} = options;
// Generate embedding for query
const queryEmbedding = await generateEmbedding(query, embeddingModel);
// Get all chunks (for small collections, this is fine)
// For larger collections, we'd want to implement approximate NN search
const allChunks = await db.chunks.toArray();
if (allChunks.length === 0) {
return [];
}
// Get document IDs that match the project filter
let allowedDocumentIds: Set<string> | null = null;
if (projectId !== undefined) {
const docs = await db.documents.toArray();
const filteredDocs = docs.filter((d) =>
projectId === null ? !d.projectId : d.projectId === projectId
);
allowedDocumentIds = new Set(filteredDocs.map((d) => d.id));
}
// Filter chunks by project and calculate similarities
const relevantChunks = allowedDocumentIds
? allChunks.filter((c) => allowedDocumentIds!.has(c.documentId))
: allChunks;
if (relevantChunks.length === 0) {
return [];
}
// Calculate similarities
const scored = relevantChunks.map((chunk) => ({
chunk,
similarity: cosineSimilarity(queryEmbedding, chunk.embedding)
}));
// Filter and sort
const filtered = scored
.filter((item) => item.similarity >= threshold)
.sort((a, b) => b.similarity - a.similarity)
.slice(0, topK);
// Fetch document info for results
const documentIds = [...new Set(filtered.map((r) => r.chunk.documentId))];
const documents = await db.documents.bulkGet(documentIds);
const documentMap = new Map(documents.filter(Boolean).map((d) => [d!.id, d!]));
// Build results
return filtered
.map((item) => ({
chunk: item.chunk,
document: documentMap.get(item.chunk.documentId)!,
similarity: item.similarity
}))
.filter((r) => r.document !== undefined);
}
/**
* Get all documents in the knowledge base
*/
export async function listDocuments(): Promise<StoredDocument[]> {
return db.documents.orderBy('updatedAt').reverse().toArray();
}
/**
* Get a document by ID
*/
export async function getDocument(id: string): Promise<StoredDocument | undefined> {
return db.documents.get(id);
}
/**
* Get all chunks for a document
*/
export async function getDocumentChunks(documentId: string): Promise<StoredChunk[]> {
return db.chunks.where('documentId').equals(documentId).toArray();
}
/**
* Delete a document and its chunks
*/
export async function deleteDocument(id: string): Promise<void> {
await db.transaction('rw', [db.documents, db.chunks], async () => {
await db.chunks.where('documentId').equals(id).delete();
await db.documents.delete(id);
});
}
/**
* Retry embedding for a stuck document
* Useful when HMR or page refresh interrupts background processing
*/
export async function retryDocumentEmbedding(
documentId: string,
onComplete?: (doc: StoredDocument) => void,
onError?: (error: Error) => void
): Promise<void> {
const doc = await db.documents.get(documentId);
if (!doc) {
throw new Error('Document not found');
}
// Only retry if stuck in pending or processing state
if (doc.embeddingStatus === 'ready') {
console.log('Document already has embeddings');
return;
}
// Delete any existing chunks for this document
await db.chunks.where('documentId').equals(documentId).delete();
// We need the original content, which we don't store
// So we need to mark it as failed - user will need to re-upload
// OR we could store the content temporarily...
// For now, just mark as failed so user knows to re-upload
await db.documents.update(documentId, { embeddingStatus: 'failed' });
if (onError) {
onError(new Error('Cannot retry - document content not cached. Please re-upload the file.'));
}
}
/**
* Reset stuck documents (pending/processing) to failed state
* Call this on app startup to clean up interrupted uploads
*/
export async function resetStuckDocuments(): Promise<number> {
// Get all documents and filter in memory (no index required)
const allDocs = await db.documents.toArray();
const stuckDocs = allDocs.filter(
doc => doc.embeddingStatus === 'pending' || doc.embeddingStatus === 'processing'
);
for (const doc of stuckDocs) {
await db.documents.update(doc.id, { embeddingStatus: 'failed' });
}
return stuckDocs.length;
}
/**
* Get total statistics for the knowledge base
*/
export async function getKnowledgeBaseStats(): Promise<{
documentCount: number;
chunkCount: number;
totalTokens: number;
}> {
const documents = await db.documents.count();
const chunks = await db.chunks.toArray();
return {
documentCount: documents,
chunkCount: chunks.length,
totalTokens: chunks.reduce((sum, c) => sum + c.tokenCount, 0)
};
}
/**
* Format search results as context for the LLM
*/
export function formatResultsAsContext(results: SearchResult[]): string {
if (results.length === 0) {
return '';
}
const sections = results.map((r, i) => {
const source = r.document.name;
return `[Source ${i + 1}: ${source}]\n${r.chunk.content}`;
});
return `Relevant context from knowledge base:\n\n${sections.join('\n\n---\n\n')}`;
}