Files
vessel/frontend/src/lib/memory/embeddings.ts
vikingowl 949802e935 feat: add global search page with semantic search and embedding model settings
- Add dedicated /search page with semantic, titles, and messages tabs
- Add embedding model selector in Settings > Memory Management
- Add background migration service to index existing conversations
- Fix sidebar search to navigate on Enter only (local filtering while typing)
- Fix search page input race condition with isTyping flag
- Update chat-indexer to use configured embedding model
2026-01-07 19:27:08 +01:00

175 lines
4.7 KiB
TypeScript

/**
* Embeddings service for RAG
*
* Generates embeddings using Ollama's /api/embed endpoint
* and provides vector similarity search capabilities.
*/
import { ollamaClient } from '$lib/ollama';
/** Preferred embedding model */
export const PREFERRED_EMBEDDING_MODEL = 'embeddinggemma:latest';
/** Fallback embedding model */
export const FALLBACK_EMBEDDING_MODEL = 'nomic-embed-text';
/** Default embedding model (prefer embeddinggemma, fallback to nomic) */
export const DEFAULT_EMBEDDING_MODEL = PREFERRED_EMBEDDING_MODEL;
/** Alternative embedding models that Ollama supports */
export const EMBEDDING_MODELS = [
'embeddinggemma:latest', // Preferred model
'nomic-embed-text', // Good general-purpose, 768 dimensions
'mxbai-embed-large', // High quality, 1024 dimensions
'all-minilm', // Smaller, faster, 384 dimensions
'snowflake-arctic-embed' // Good for retrieval, 1024 dimensions
] as const;
/**
* Generate embeddings for a text string
*/
export async function generateEmbedding(
text: string,
model: string = DEFAULT_EMBEDDING_MODEL
): Promise<number[]> {
const TIMEOUT_MS = 30000; // 30 second timeout
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), TIMEOUT_MS);
try {
const response = await ollamaClient.embed({
model,
input: text
}, controller.signal);
// Ollama returns an array of embeddings (one per input)
// We're only passing one input, so take the first
if (!response.embeddings || response.embeddings.length === 0) {
throw new Error(`No embeddings returned from model "${model}". Is the model available?`);
}
return response.embeddings[0];
} catch (error) {
if (error instanceof Error && error.name === 'AbortError') {
throw new Error(`Embedding generation timed out. Is the model "${model}" available?`);
}
throw error;
} finally {
clearTimeout(timeoutId);
}
}
/**
* Generate embeddings for multiple texts in a batch
*/
export async function generateEmbeddings(
texts: string[],
model: string = DEFAULT_EMBEDDING_MODEL
): Promise<number[][]> {
// Process in batches to avoid overwhelming the API
const BATCH_SIZE = 10;
const results: number[][] = [];
// Create abort controller with timeout
const TIMEOUT_MS = 30000; // 30 second timeout per batch
for (let i = 0; i < texts.length; i += BATCH_SIZE) {
const batch = texts.slice(i, i + BATCH_SIZE);
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), TIMEOUT_MS);
try {
const response = await ollamaClient.embed({
model,
input: batch
}, controller.signal);
results.push(...response.embeddings);
} catch (error) {
clearTimeout(timeoutId);
if (error instanceof Error && error.name === 'AbortError') {
throw new Error(`Embedding generation timed out. Is the model "${model}" available?`);
}
throw error;
} finally {
clearTimeout(timeoutId);
}
}
return results;
}
/**
* Calculate cosine similarity between two vectors
*/
export function cosineSimilarity(a: number[], b: number[]): number {
if (a.length !== b.length) {
throw new Error(`Vector dimensions don't match: ${a.length} vs ${b.length}`);
}
let dotProduct = 0;
let normA = 0;
let normB = 0;
for (let i = 0; i < a.length; i++) {
dotProduct += a[i] * b[i];
normA += a[i] * a[i];
normB += b[i] * b[i];
}
normA = Math.sqrt(normA);
normB = Math.sqrt(normB);
if (normA === 0 || normB === 0) {
return 0;
}
return dotProduct / (normA * normB);
}
/**
* Find the most similar vectors to a query vector
*/
export function findSimilar<T extends { embedding: number[] }>(
query: number[],
candidates: T[],
topK: number = 5,
threshold: number = 0.5
): Array<T & { similarity: number }> {
const scored = candidates
.map((candidate) => ({
...candidate,
similarity: cosineSimilarity(query, candidate.embedding)
}))
.filter((item) => item.similarity >= threshold)
.sort((a, b) => b.similarity - a.similarity)
.slice(0, topK);
return scored;
}
/**
* Normalize a vector (convert to unit vector)
*/
export function normalizeVector(vector: number[]): number[] {
const norm = Math.sqrt(vector.reduce((sum, val) => sum + val * val, 0));
if (norm === 0) return vector;
return vector.map((val) => val / norm);
}
/**
* Calculate the dimension of embeddings for a given model
* These are approximations - actual dimensions depend on the model
*/
export function getEmbeddingDimension(model: string): number {
const dimensions: Record<string, number> = {
'embeddinggemma:latest': 768, // Gemma-based embedding model
'embeddinggemma': 768,
'nomic-embed-text': 768,
'mxbai-embed-large': 1024,
'all-minilm': 384,
'snowflake-arctic-embed': 1024
};
return dimensions[model] ?? 768; // Default to 768
}