feat(ai): migrate to Google Gemini 2.5 Flash-Lite, drop Mistral/Ollama
Replace the Mistral + Ollama AI stack with a single Google Gemini provider backed by google.golang.org/genai. API key moves from env/Helm to the DB (AES-256-GCM, key derived from JWT_SECRET via HKDF) so it can be rotated via the admin UI without a pod restart. New: - pkg/crypto/secretbox — AES-256-GCM encrypt/decrypt for secrets at rest - pkg/ai/gemini — GeminiProvider with grounding, structured output, usage recording, and hot-reload (Reinitialize swaps client under mutex) - pkg/ai/usage — UsageRecorder interface + UsageEvent struct - domain/settings/store — DB-backed settings (model, grounding toggle, key) - domain/settings/usage — UsageRepo implementing UsageRecorder; ai_usage table - migrations 000021 (system_settings) + 000022 (ai_usage) - settings API: GET /ai, POST /ai/key, POST /ai/model, POST /ai/grounding, GET /ai/usage - admin UI: 4-card settings page — provider status, model selector, grounding toggle with quota, usage rollups + recent-calls table Removed: - pkg/ai/ollama, mistral_provider, ratelimiter (+ tests) - Helm AI_API_KEY, AI_PROVIDER, AI_MODEL_COMPLEX, AI_AGENT_DISCOVERY, AI_RATE_LIMIT_RPS env vars Call sites set Grounded+CallType: research (true/"research"), enrich Pass B (true/"enrich_b"), similarity (false/"similarity"). Integration test updated to use a stub ai.Provider instead of a fake Ollama HTTP server.
This commit is contained in:
@@ -1,15 +1,15 @@
|
||||
// discovery-eval measures discovery's AI-backed components against labelled
|
||||
// fixtures. Two modes:
|
||||
//
|
||||
// -mode similarity (default) — grades MistralSimilarityClassifier on
|
||||
// -mode similarity (default) — grades SimilarityClassifier on
|
||||
// pair-labelled fixtures. Precision/recall/F1/accuracy
|
||||
// + confidence calibration.
|
||||
// -mode category — grades MistralLLMEnricher's `category` output on
|
||||
// -mode category — grades LLMEnricher's `category` output on
|
||||
// row-labelled fixtures. Accuracy + per-label confusion.
|
||||
//
|
||||
// Usage:
|
||||
//
|
||||
// AI_API_KEY=... AI_MODEL_COMPLEX=mistral-large-latest \
|
||||
// GEMINI_API_KEY=... \
|
||||
// discovery-eval \
|
||||
// -mode similarity \
|
||||
// -fixture backend/cmd/discovery-eval/fixtures/similarity.json \
|
||||
@@ -18,7 +18,7 @@
|
||||
// -report eval-report.json
|
||||
//
|
||||
// Each mode has its own cache key so switching modes doesn't churn entries.
|
||||
// Bump AI_MODEL_COMPLEX or edit a fixture to force a refresh.
|
||||
// Set GEMINI_MODEL to override the model (default: gemini-2.5-flash-lite).
|
||||
package main
|
||||
|
||||
import (
|
||||
@@ -29,7 +29,6 @@ import (
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"marktvogt.de/backend/internal/config"
|
||||
"marktvogt.de/backend/internal/domain/discovery/enrich"
|
||||
"marktvogt.de/backend/internal/pkg/ai"
|
||||
"marktvogt.de/backend/internal/pkg/scrape"
|
||||
@@ -66,35 +65,29 @@ func realMain() int {
|
||||
)
|
||||
flag.Parse()
|
||||
|
||||
apiKey := os.Getenv("AI_MISTRAL_API_KEY")
|
||||
apiKey := os.Getenv("GEMINI_API_KEY")
|
||||
if apiKey == "" {
|
||||
apiKey = os.Getenv("AI_API_KEY") // legacy fallback
|
||||
slog.Error("GEMINI_API_KEY is required for eval")
|
||||
return 2
|
||||
}
|
||||
model := os.Getenv("AI_MISTRAL_MODEL")
|
||||
model := os.Getenv("GEMINI_MODEL")
|
||||
if model == "" {
|
||||
model = os.Getenv("AI_MODEL_COMPLEX") // legacy fallback
|
||||
}
|
||||
if model == "" {
|
||||
model = "mistral-large-latest"
|
||||
model = "gemini-2.5-flash-lite"
|
||||
}
|
||||
userAgent := os.Getenv("AI_USER_AGENT")
|
||||
if userAgent == "" {
|
||||
userAgent = "marktvogt-eval/1.0 (+https://marktvogt.de)"
|
||||
}
|
||||
client, err := ai.NewFromConfig(config.AIConfig{
|
||||
Provider: "mistral",
|
||||
MistralAPIKey: apiKey,
|
||||
MistralModel: model,
|
||||
RateLimitRPS: 1.0,
|
||||
})
|
||||
if err != nil {
|
||||
slog.Error("AI client not configured", "error", err)
|
||||
return 2
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Minute)
|
||||
defer cancel()
|
||||
|
||||
client, err := ai.NewGeminiProvider(ctx, apiKey, model, nil)
|
||||
if err != nil {
|
||||
slog.Error("AI client init failed", "error", err)
|
||||
return 2
|
||||
}
|
||||
|
||||
switch *mode {
|
||||
case modeSimilarity:
|
||||
cfg := evalConfig{
|
||||
|
||||
Reference in New Issue
Block a user