feat(ai): migrate to Google Gemini 2.5 Flash-Lite, drop Mistral/Ollama

Replace the Mistral + Ollama AI stack with a single Google Gemini provider backed by google.golang.org/genai. API key moves from env/Helm to the DB (AES-256-GCM, key derived from JWT_SECRET via HKDF) so it can be rotated via the admin UI without a pod restart. New: - pkg/crypto/secretbox — AES-256-GCM encrypt/decrypt for secrets at rest - pkg/ai/gemini — GeminiProvider with grounding, structured output, usage recording, and hot-reload (Reinitialize swaps client under mutex) - pkg/ai/usage — UsageRecorder interface + UsageEvent struct - domain/settings/store — DB-backed settings (model, grounding toggle, key) - domain/settings/usage — UsageRepo implementing UsageRecorder; ai_usage table - migrations 000021 (system_settings) + 000022 (ai_usage) - settings API: GET /ai, POST /ai/key, POST /ai/model, POST /ai/grounding, GET /ai/usage - admin UI: 4-card settings page — provider status, model selector, grounding toggle with quota, usage rollups + recent-calls table Removed: - pkg/ai/ollama, mistral_provider, ratelimiter (+ tests) - Helm AI_API_KEY, AI_PROVIDER, AI_MODEL_COMPLEX, AI_AGENT_DISCOVERY, AI_RATE_LIMIT_RPS env vars Call sites set Grounded+CallType: research (true/"research"), enrich Pass B (true/"enrich_b"), similarity (false/"similarity"). Integration test updated to use a stub ai.Provider instead of a fake Ollama HTTP server.
2026-04-25 09:54:49 +02:00
parent 80149de317
commit 3ddfd87408
40 changed files with 1392 additions and 897 deletions
--- a/backend/cmd/discovery-eval/main.go
+++ b/backend/cmd/discovery-eval/main.go
@@ -1,15 +1,15 @@
 // discovery-eval measures discovery's AI-backed components against labelled
 // fixtures. Two modes:
 //
-//	-mode similarity  (default) — grades MistralSimilarityClassifier on
+//	-mode similarity  (default) — grades SimilarityClassifier on
 //	                   pair-labelled fixtures. Precision/recall/F1/accuracy
 //	                   + confidence calibration.
-//	-mode category    — grades MistralLLMEnricher's `category` output on
+//	-mode category    — grades LLMEnricher's `category` output on
 //	                   row-labelled fixtures. Accuracy + per-label confusion.
 //
 // Usage:
 //
-//	AI_API_KEY=... AI_MODEL_COMPLEX=mistral-large-latest \
+//	GEMINI_API_KEY=... \
 //	  discovery-eval \
 //	    -mode    similarity \
 //	    -fixture backend/cmd/discovery-eval/fixtures/similarity.json \
@@ -18,7 +18,7 @@
 //	    -report  eval-report.json
 //
 // Each mode has its own cache key so switching modes doesn't churn entries.
-// Bump AI_MODEL_COMPLEX or edit a fixture to force a refresh.
+// Set GEMINI_MODEL to override the model (default: gemini-2.5-flash-lite).
 package main

 import (
@@ -29,7 +29,6 @@ import (
 	"os"
 	"time"

-	"marktvogt.de/backend/internal/config"
 	"marktvogt.de/backend/internal/domain/discovery/enrich"
 	"marktvogt.de/backend/internal/pkg/ai"
 	"marktvogt.de/backend/internal/pkg/scrape"
@@ -66,35 +65,29 @@ func realMain() int {
 	)
 	flag.Parse()

-	apiKey := os.Getenv("AI_MISTRAL_API_KEY")
+	apiKey := os.Getenv("GEMINI_API_KEY")
 	if apiKey == "" {
-		apiKey = os.Getenv("AI_API_KEY") // legacy fallback
+		slog.Error("GEMINI_API_KEY is required for eval")
+		return 2
 	}
-	model := os.Getenv("AI_MISTRAL_MODEL")
+	model := os.Getenv("GEMINI_MODEL")
 	if model == "" {
-		model = os.Getenv("AI_MODEL_COMPLEX") // legacy fallback
-	}
-	if model == "" {
-		model = "mistral-large-latest"
+		model = "gemini-2.5-flash-lite"
 	}
 	userAgent := os.Getenv("AI_USER_AGENT")
 	if userAgent == "" {
 		userAgent = "marktvogt-eval/1.0 (+https://marktvogt.de)"
 	}
-	client, err := ai.NewFromConfig(config.AIConfig{
-		Provider:      "mistral",
-		MistralAPIKey: apiKey,
-		MistralModel:  model,
-		RateLimitRPS:  1.0,
-	})
-	if err != nil {
-		slog.Error("AI client not configured", "error", err)
-		return 2
-	}

 	ctx, cancel := context.WithTimeout(context.Background(), 15*time.Minute)
 	defer cancel()

+	client, err := ai.NewGeminiProvider(ctx, apiKey, model, nil)
+	if err != nil {
+		slog.Error("AI client init failed", "error", err)
+		return 2
+	}
+
 	switch *mode {
 	case modeSimilarity:
 		cfg := evalConfig{
--- a/backend/deploy/helm/templates/ci-secrets.yaml
+++ b/backend/deploy/helm/templates/ci-secrets.yaml
@@ -1,4 +1,4 @@
-{{- if or .Values.ai.apiKey .Values.turnstile.secretKey .Values.discovery.token }}
+{{- if or .Values.turnstile.secretKey .Values.discovery.token }}
 apiVersion: v1
 kind: Secret
 metadata:
@@ -8,9 +8,6 @@ metadata:
    {{- include "marktvogt-backend.labels" . | nindent 4 }}
 type: Opaque
 stringData:
-  {{- if .Values.ai.apiKey }}
-  AI_API_KEY: {{ .Values.ai.apiKey | quote }}
-  {{- end }}
  {{- if .Values.turnstile.secretKey }}
  TURNSTILE_SECRET_KEY: {{ .Values.turnstile.secretKey | quote }}
  {{- end }}
--- a/backend/deploy/helm/templates/deployment.yaml
+++ b/backend/deploy/helm/templates/deployment.yaml
@@ -51,7 +51,7 @@ spec:
            - secretRef:
                name: {{ include "marktvogt-backend.fullname" . }}-smtp
            {{- end }}
-            {{- if or .Values.ai.apiKey .Values.turnstile.secretKey .Values.discovery.token }}
+            {{- if or .Values.turnstile.secretKey .Values.discovery.token }}
            # AI, Turnstile + Discovery credentials (Helm-managed, passed via CI)
            - secretRef:
                name: {{ include "marktvogt-backend.fullname" . }}-ci-secrets
@@ -97,10 +97,6 @@ spec:
                secretKeyRef:
                  name: {{ include "marktvogt-backend.fullname" . }}-ci-secrets
                  key: DISCOVERY_TOKEN
-            - name: AI_AGENT_DISCOVERY
-              value: {{ .Values.ai.agentDiscovery | quote }}
-            - name: AI_RATE_LIMIT_RPS
-              value: {{ .Values.ai.rateLimitRps | default 1 | quote }}
            - name: DISCOVERY_BATCH_SIZE
              value: {{ .Values.discovery.batchSize | default 4 | quote }}
            - name: DISCOVERY_FORWARD_MONTHS
--- a/backend/deploy/helm/values.yaml
+++ b/backend/deploy/helm/values.yaml
@@ -81,8 +81,6 @@ config:
  SMTP_FROM: "noreply@marktvogt.de"
  ADMIN_EMAIL: "contact@marktvogt.de"
  FRONTEND_URL: "https://marktvogt.de"
-  AI_MODEL_COMPLEX: "mistral-large-latest"
-
 # Name of the manually-created Secret containing:
 #   JWT_SECRET, SENTRY_DSN,
 #   OAUTH_{GOOGLE,APPLE,FACEBOOK,GITHUB}_{CLIENT_ID,CLIENT_SECRET}
@@ -95,12 +93,6 @@ smtp:
  user: ""
  password: ""

-# AI research credentials — passed via Woodpecker secrets during deploy.
-ai:
-  apiKey: ""
-  agentDiscovery: ""          # set via CI secret in production
-  rateLimitRps: 1
-
 # Discovery cron — token passed via CI secrets during deploy.
 discovery:
  enabled: true
--- a/backend/go.mod
+++ b/backend/go.mod
@@ -4,7 +4,6 @@ go 1.26

 require (
 	github.com/PuerkitoBio/goquery v1.12.0
-	github.com/VikingOwl91/mistral-go-sdk v1.3.0
 	github.com/gin-gonic/gin v1.11.0
 	github.com/go-playground/validator/v10 v10.30.1
 	github.com/golang-jwt/jwt/v5 v5.3.1
@@ -13,12 +12,17 @@ require (
 	github.com/pquerna/otp v1.5.0
 	github.com/santhosh-tekuri/jsonschema/v5 v5.3.1
 	github.com/valkey-io/valkey-go v1.0.72
-	golang.org/x/crypto v0.49.0
+	golang.org/x/crypto v0.50.0
 	golang.org/x/oauth2 v0.35.0
+	golang.org/x/sync v0.20.0
 	golang.org/x/time v0.14.0
+	google.golang.org/genai v1.54.0
 )

 require (
+	cloud.google.com/go v0.116.0 // indirect
+	cloud.google.com/go/auth v0.9.3 // indirect
+	cloud.google.com/go/compute/metadata v0.5.0 // indirect
 	github.com/andybalholm/cascadia v1.3.3 // indirect
 	github.com/boombuler/barcode v1.0.1-0.20190219062509-6c824513bacc // indirect
 	github.com/bytedance/sonic v1.14.0 // indirect
@@ -30,6 +34,11 @@ require (
 	github.com/go-playground/universal-translator v0.18.1 // indirect
 	github.com/goccy/go-json v0.10.2 // indirect
 	github.com/goccy/go-yaml v1.18.0 // indirect
+	github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
+	github.com/google/go-cmp v0.7.0 // indirect
+	github.com/google/s2a-go v0.1.8 // indirect
+	github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect
+	github.com/gorilla/websocket v1.5.3 // indirect
 	github.com/jackc/pgpassfile v1.0.0 // indirect
 	github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
 	github.com/jackc/puddle/v2 v2.2.2 // indirect
@@ -44,10 +53,12 @@ require (
 	github.com/quic-go/quic-go v0.57.0 // indirect
 	github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
 	github.com/ugorji/go/codec v1.3.0 // indirect
+	go.opencensus.io v0.24.0 // indirect
 	golang.org/x/arch v0.20.0 // indirect
 	golang.org/x/net v0.52.0 // indirect
-	golang.org/x/sync v0.20.0 // indirect
-	golang.org/x/sys v0.42.0 // indirect
-	golang.org/x/text v0.35.0 // indirect
+	golang.org/x/sys v0.43.0 // indirect
+	golang.org/x/text v0.36.0 // indirect
+	google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 // indirect
+	google.golang.org/grpc v1.66.2 // indirect
 	google.golang.org/protobuf v1.36.9 // indirect
 )
--- a/backend/go.sum
+++ b/backend/go.sum
@@ -1,7 +1,13 @@
+cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
+cloud.google.com/go v0.116.0 h1:B3fRrSDkLRt5qSHWe40ERJvhvnQwdZiHu0bJOpldweE=
+cloud.google.com/go v0.116.0/go.mod h1:cEPSRWPzZEswwdr9BxE6ChEn01dWlTaF05LiC2Xs70U=
+cloud.google.com/go/auth v0.9.3 h1:VOEUIAADkkLtyfr3BLa3R8Ed/j6w1jTBmARx+wb5w5U=
+cloud.google.com/go/auth v0.9.3/go.mod h1:7z6VY+7h3KUdRov5F1i8NDP5ZzWKYmEPO842BgCsmTk=
+cloud.google.com/go/compute/metadata v0.5.0 h1:Zr0eK8JbFv6+Wi4ilXAR8FJ3wyNdpxHKJNPos6LTZOY=
+cloud.google.com/go/compute/metadata v0.5.0/go.mod h1:aHnloV2TPI38yx4s9+wAZhHykWvVCfu7hQbF+9CWoiY=
+github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
 github.com/PuerkitoBio/goquery v1.12.0 h1:pAcL4g3WRXekcB9AU/y1mbKez2dbY2AajVhtkO8RIBo=
 github.com/PuerkitoBio/goquery v1.12.0/go.mod h1:802ej+gV2y7bbIhOIoPY5sT183ZW0YFofScC4q/hIpQ=
-github.com/VikingOwl91/mistral-go-sdk v1.3.0 h1:OkTsodDE5lmdf7p2cwScqD2vIk8sScQ2IGk65dUjuz0=
-github.com/VikingOwl91/mistral-go-sdk v1.3.0/go.mod h1:f4emNtHUx2zSqY3V0LBz6lNI1jE6q/zh+SEU+/hJ0i4=
 github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM=
 github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA=
 github.com/boombuler/barcode v1.0.1-0.20190219062509-6c824513bacc h1:biVzkmvwrH8WK8raXaxBx6fRVTlJILwEwQGL1I/ByEI=
@@ -10,11 +16,18 @@ github.com/bytedance/sonic v1.14.0 h1:/OfKt8HFw0kh2rj8N0F6C/qPGRESq0BbaNZgcNXXzQ
 github.com/bytedance/sonic v1.14.0/go.mod h1:WoEbx8WTcFJfzCe0hbmyTGrfjt8PzNEBdxlNUO24NhA=
 github.com/bytedance/sonic/loader v0.3.0 h1:dskwH8edlzNMctoruo8FPTJDF3vLtDT0sXZwvZJyqeA=
 github.com/bytedance/sonic/loader v0.3.0/go.mod h1:N8A3vUdtUebEY2/VQC0MyhYeKUFosQU6FxH2JmUe6VI=
+github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
+github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
 github.com/cloudwego/base64x v0.1.6 h1:t11wG9AECkCDk5fMSoxmufanudBtJ+/HemLstXDLI2M=
 github.com/cloudwego/base64x v0.1.6/go.mod h1:OFcloc187FXDaYHvrNIjxSe8ncn0OOM8gEHfghB2IPU=
+github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
+github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
+github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
+github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
 github.com/gabriel-vasile/mimetype v1.4.12 h1:e9hWvmLYvtp846tLHam2o++qitpguFiYCKbn0w9jyqw=
 github.com/gabriel-vasile/mimetype v1.4.12/go.mod h1:d+9Oxyo1wTzWdyVUPMmXFvp4F9tea18J8ufA774AB3s=
 github.com/gin-contrib/sse v1.1.0 h1:n0w2GMuUpWDVp7qSpvze6fAu9iRxJY4Hmj6AmBOU05w=
@@ -35,12 +48,39 @@ github.com/goccy/go-yaml v1.18.0 h1:8W7wMFS12Pcas7KU+VVkaiCng+kG8QiFeFwzFb+rwuw=
 github.com/goccy/go-yaml v1.18.0/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA=
 github.com/golang-jwt/jwt/v5 v5.3.1 h1:kYf81DTWFe7t+1VvL7eS+jKFVWaUnK9cB1qbwn63YCY=
 github.com/golang-jwt/jwt/v5 v5.3.1/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE=
+github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
+github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
+github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE=
+github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
+github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
+github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
+github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
+github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
+github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
+github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
+github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
+github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
+github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
+github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
+github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
+github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
 github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
 github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/google/s2a-go v0.1.8 h1:zZDs9gcbt9ZPLV0ndSyQk6Kacx2g/X+SKYovpnz3SMM=
+github.com/google/s2a-go v0.1.8/go.mod h1:6iNWHTpQ+nfNRN5E00MSdfDwVesa8hhS32PhPO8deJA=
+github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/googleapis/enterprise-certificate-proxy v0.3.4 h1:XYIDZApgAnrN1c855gTgghdIA6Stxb52D5RnLI1SLyw=
+github.com/googleapis/enterprise-certificate-proxy v0.3.4/go.mod h1:YKe7cfqYXjKGpGvmSg28/fFvhNzinZQm8DGnaburhGA=
+github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
+github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
 github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
 github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
 github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo=
@@ -69,6 +109,7 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/pquerna/otp v1.5.0 h1:NMMR+WrmaqXU4EzdGJEE1aUUI0AMRzsp96fFFWNPwxs=
 github.com/pquerna/otp v1.5.0/go.mod h1:dkJfzwRKNiegxyNb54X/3fLwhCynbMspSyWKnvi1AEg=
+github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
 github.com/quic-go/qpack v0.6.0 h1:g7W+BMYynC1LbYLSqRt8PBg5Tgwxn214ZZR34VIOjz8=
 github.com/quic-go/qpack v0.6.0/go.mod h1:lUpLKChi8njB4ty2bFLX2x4gzDqXwUpaO1DP9qMDZII=
 github.com/quic-go/quic-go v0.57.0 h1:AsSSrrMs4qI/hLrKlTH/TGQeTMY0ib1pAOX7vA3AdqE=
@@ -92,6 +133,8 @@ github.com/ugorji/go/codec v1.3.0/go.mod h1:pRBVtBSKl77K30Bv8R2P+cLSGaTtex6fsA2W
 github.com/valkey-io/valkey-go v1.0.72 h1:iRWt1hJyOchcEgbHSkRY3aKkcBudxvMaVMsmxuYxuxE=
 github.com/valkey-io/valkey-go v1.0.72/go.mod h1:VGhZ6fs68Qrn2+OhH+6waZH27bjpgQOiLyUQyXuYK5k=
 github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
+go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0=
+go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo=
 go.uber.org/mock v0.5.2 h1:LbtPTcP8A5k9WPXj54PPPbjcI4Y6lhyOZXn+VS7wNko=
 go.uber.org/mock v0.5.2/go.mod h1:wLlUxC2vVTPTaE3UD51E0BGOAElKrILxhVSDYQLld5o=
 go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
@@ -99,19 +142,30 @@ go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
 golang.org/x/arch v0.20.0 h1:dx1zTU0MAE98U+TQ8BLl7XsJbgze2WnNKF/8tGp/Q6c=
 golang.org/x/arch v0.20.0/go.mod h1:bdwinDaKcfZUGpH09BB7ZmOfhalA8lQdzl62l8gGWsk=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
 golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc=
 golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
 golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
 golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
-golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4=
-golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA=
+golang.org/x/crypto v0.50.0 h1:zO47/JPrL6vsNkINmLoo/PH1gcxpls50DNogFvB5ZGI=
+golang.org/x/crypto v0.50.0/go.mod h1:3muZ7vA7PBCE6xgPX7nkzzjiUq87kRItoJQM1Yo8S+Q=
+golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
+golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
+golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
 golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
 golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
 golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
 golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
 golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
 golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
 golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
 golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
 golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
@@ -122,8 +176,11 @@ golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
 golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
 golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0=
 golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw=
+golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.35.0 h1:Mv2mzuHuZuY2+bkyWXIHMfhNdJAdwW3FuWeCPYN5GVQ=
 golang.org/x/oauth2 v0.35.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
+golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -133,7 +190,10 @@ golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
 golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
 golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4=
 golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0=
+golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
@@ -145,8 +205,8 @@ golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
-golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
-golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
+golang.org/x/sys v0.43.0 h1:Rlag2XtaFTxp19wS8MXlJwTvoh8ArU6ezoyFsMyCTNI=
+golang.org/x/sys v0.43.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
 golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
@@ -165,20 +225,52 @@ golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
 golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
 golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
 golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
-golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8=
-golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA=
+golang.org/x/text v0.36.0 h1:JfKh3XmcRPqZPKevfXVpI1wXPTqbkE5f7JA92a55Yxg=
+golang.org/x/text v0.36.0/go.mod h1:NIdBknypM8iqVmPiuco0Dh6P5Jcdk8lJL0CUebqK164=
 golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI=
 golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
+golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
+golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
 golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
 golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58=
 golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
+google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
+google.golang.org/genai v1.54.0 h1:ZQCa70WMTJDI11FdqWCzGvZ5PanpcpfoO6jl/lrSnGU=
+google.golang.org/genai v1.54.0/go.mod h1:A3kkl0nyBjyFlNjgxIwKq70julKbIxpSxqKO5gw/gmk=
+google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
+google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
+google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 h1:pPJltXNxVzT4pK9yD8vR9X75DaWYYmLGMsEvBfFQZzQ=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU=
+google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
+google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
+google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=
+google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
+google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc=
+google.golang.org/grpc v1.66.2 h1:3QdXkuq3Bkh7w+ywLdLvM56cmGvQHUMZpiCzt6Rqaoo=
+google.golang.org/grpc v1.66.2/go.mod h1:s3/l6xSSCURdVfAnL+TqCNMyTDAGN6+lZeVxnZR128Y=
+google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
+google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
+google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
+google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
+google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
+google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
+google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
+google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
+google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
 google.golang.org/protobuf v1.36.9 h1:w2gp2mA27hUeUzj9Ex9FBjsBm40zfaDtEWow293U7Iw=
 google.golang.org/protobuf v1.36.9/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
+honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
--- a/backend/internal/config/config.go
+++ b/backend/internal/config/config.go
@@ -33,14 +33,14 @@ type DiscoveryConfig struct {
 }

 type AIConfig struct {
-	Provider     string  // "ollama" or "mistral"; default "ollama"
-	RateLimitRPS float64 // Max requests per second to upstream; 0 = disabled (Mistral only)
+	// GeminiAPIKey is the bootstrap API key from env (GEMINI_API_KEY).
+	// Used only on first startup when the DB has no key yet.
+	// Afterwards the key lives encrypted in system_settings.
+	GeminiAPIKey string

-	OllamaURL   string // default "http://localhost:11434"
-	OllamaModel string // default "qwen2.5:14b-instruct"
-
-	MistralAPIKey string
-	MistralModel  string // default "mistral-large-latest"
+	// GroundingDailyQuota is the number of free grounding requests per day.
+	// Default 1500. Used for cost estimation in the UI.
+	GroundingDailyQuota int
 }

 type SearchConfig struct {
@@ -188,11 +188,6 @@ func Load() (*Config, error) {
 		return nil, fmt.Errorf("SMTP_PORT: %w", err)
 	}

-	rpsAI, err := envFloat("AI_RATE_LIMIT_RPS", 1.0)
-	if err != nil {
-		return nil, fmt.Errorf("AI_RATE_LIMIT_RPS: %w", err)
-	}
-
 	discoveryToken := envStr("DISCOVERY_TOKEN", "")
 	if discoveryToken == "" {
 		slog.Warn("DISCOVERY_TOKEN is empty; /api/v1/admin/discovery/crawl is disabled")
@@ -282,12 +277,8 @@ func Load() (*Config, error) {
 			FrontendURL: envStr("FRONTEND_URL", "http://localhost:5173"),
 		},
 		AI: AIConfig{
-			Provider:      envStr("AI_PROVIDER", "ollama"),
-			RateLimitRPS:  rpsAI,
-			OllamaURL:     envStr("AI_OLLAMA_URL", "http://localhost:11434"),
-			OllamaModel:   envStr("AI_OLLAMA_MODEL", "qwen2.5:14b-instruct"),
-			MistralAPIKey: envStr("AI_MISTRAL_API_KEY", envStr("AI_API_KEY", "")),
-			MistralModel:  envStr("AI_MISTRAL_MODEL", envStr("AI_MODEL_COMPLEX", "mistral-large-latest")),
+			GeminiAPIKey:        envStr("GEMINI_API_KEY", ""),
+			GroundingDailyQuota: 1500,
 		},
 		Search: SearchConfig{
 			Provider:   envStr("SEARCH_PROVIDER", "searxng"),
--- a/backend/internal/domain/discovery/enrich/mistral.go
+++ b/backend/internal/domain/discovery/enrich/mistral.go
@@ -43,7 +43,7 @@ func NewLLMEnricher(provider ai.Provider, scraper Scraper) *ProviderLLMEnricher
 	return &ProviderLLMEnricher{AI: provider, Scraper: scraper}
 }

-// llmResponse is the JSON shape we instruct Mistral to return. Any field may
+// llmResponse is the JSON shape we instruct the LLM to return. Any field may
 // be absent if the content doesn't support it — the enricher only writes
 // what the model actually produced.
 type llmResponse struct {
@@ -58,7 +58,7 @@ type llmResponse struct {
 // text — empty-context LLM calls hallucinate.
 func (e *ProviderLLMEnricher) EnrichMissing(ctx context.Context, req LLMRequest) (Enrichment, error) {
 	if e.AI == nil || e.Scraper == nil {
-		return Enrichment{}, errors.New("mistral enricher not configured")
+		return Enrichment{}, errors.New("LLM enricher not configured")
 	}

 	urls := req.Quellen
@@ -89,6 +89,8 @@ func (e *ProviderLLMEnricher) EnrichMissing(ctx context.Context, req LLMRequest)
 		SystemPrompt: systemPrompt,
 		UserMessage:  userPrompt,
 		JSONMode:     true,
+		Grounded:     true,
+		CallType:     "enrich_b",
 	})
 	if err != nil {
 		return Enrichment{}, fmt.Errorf("chat: %w", err)
@@ -120,8 +122,8 @@ func (e *ProviderLLMEnricher) EnrichMissing(ctx context.Context, req LLMRequest)
 	return out, nil
 }

-// buildSystemPrompt returns the English instruction block. Mistral follows
-// English instructions more reliably; only the *output* is German.
+// buildSystemPrompt returns the English instruction block sent to the LLM.
+// Only the *output* is in German.
 func buildSystemPrompt() string {
 	return strings.TrimSpace(`
 You are enriching metadata for a medieval market (Mittelaltermarkt) in the
--- a/backend/internal/domain/discovery/enrich/similarity.go
+++ b/backend/internal/domain/discovery/enrich/similarity.go
@@ -94,7 +94,7 @@ func NewSimilarityClassifier(provider ai.Provider) *SimilarityClassifierLLM {
 	return &SimilarityClassifierLLM{AI: provider}
 }

-// simResponse is the JSON shape we instruct Mistral to return. Confidence
+// simResponse is the JSON shape we instruct the LLM to return. Confidence
 // must be parseable as a float 0..1; anything outside that range is clamped.
 type simResponse struct {
 	SameMarket bool    `json:"same_market"`
@@ -117,6 +117,8 @@ func (c *SimilarityClassifierLLM) Classify(ctx context.Context, a, b SimilarityR
 		SystemPrompt: systemPrompt,
 		UserMessage:  userPrompt,
 		JSONMode:     true,
+		Grounded:     false,
+		CallType:     "similarity",
 	})
 	if err != nil {
 		return Verdict{}, fmt.Errorf("chat: %w", err)
--- a/backend/internal/domain/discovery/model.go
+++ b/backend/internal/domain/discovery/model.go
@@ -120,7 +120,7 @@ const (
 )

 // AgentStatus constants.
-// Mistral Pass 0 produces: bestaetigt | unklar | vorjahr_unbestaetigt | abgesagt.
+// Pass 0 (LLM-enriched) produces: bestaetigt | unklar | vorjahr_unbestaetigt | abgesagt.
 // The crawler uses its own sentinel value so the validator's agent-specific
 // rules (e.g. bestaetigt+vorjahr_hinweis inconsistency) don't fire on crawler-
 // produced rows, and so operators can filter the queue by origin.
--- a/backend/internal/domain/discovery/service.go
+++ b/backend/internal/domain/discovery/service.go
@@ -55,7 +55,7 @@ type Service struct {
 	// server/routes.go using the shared Nominatim client (1 rps limited).
 	geocoder Geocoder
 	// llmEnricher is the AI-backed fallback pass. Nil-safe via NoopLLMEnricher
-	// in test wiring; production code passes a real MistralLLMEnricher.
+	// in test wiring; production code passes a real ProviderLLMEnricher.
 	llmEnricher enrich.LLMEnricher
 	// simClassifier is the AI-backed duplicate tiebreaker. Nil-safe via
 	// NoopSimilarityClassifier.
@@ -168,8 +168,7 @@ func (s *Service) Crawl(ctx context.Context) (CrawlSummary, error) {
 	defer cancel()

 	for _, m := range merged {
-		// Link verification was needed for Mistral's web_search output (often
-		// hallucinated URLs). Crawler URLs are parsed from actual HTML of
+		// Link verification is skipped here: crawler URLs come from actual HTML of
 		// trusted sources; they've been implicitly verified at parse time.
 		// Skipping the check makes the crawl complete in <2 minutes even for
 		// 1500+ events and avoids timing-related false positives where the
--- a/backend/internal/domain/market/research/assets/researcher_schema_simple.json
+++ b/backend/internal/domain/market/research/assets/researcher_schema_simple.json
--- a/backend/internal/domain/market/research/integration_test.go
+++ b/backend/internal/domain/market/research/integration_test.go
@@ -49,32 +49,15 @@ const validResearchJSON = `{
 	}
 }`

-// fakeOllamaHandler returns a valid Ollama non-streaming chat response whose
-// content field contains validResearchJSON.
-func fakeOllamaHandler(t *testing.T) http.Handler {
-	t.Helper()
-	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		if r.Method != http.MethodPost || r.URL.Path != "/api/chat" {
-			http.NotFound(w, r)
-			return
-		}
-		resp := map[string]any{
-			"model":      "test",
-			"created_at": "2026-04-24T00:00:00Z",
-			"message": map[string]string{
-				"role":    "assistant",
-				"content": validResearchJSON,
-			},
-			"done":              true,
-			"prompt_eval_count": 10,
-			"eval_count":        20,
-		}
-		w.Header().Set("Content-Type", "application/json")
-		if err := json.NewEncoder(w).Encode(resp); err != nil {
-			t.Errorf("fakeOllamaHandler: encode response: %v", err)
-		}
-	})
+// fakeProvider is a stub ai.Provider that returns validResearchJSON for any Chat call.
+type fakeProvider struct{}
+
+func (f *fakeProvider) Chat(_ context.Context, _ *ai.ChatRequest) (*ai.ChatResponse, error) {
+	return &ai.ChatResponse{Content: validResearchJSON}, nil
 }
+func (f *fakeProvider) SupportsJSONMode() bool   { return true }
+func (f *fakeProvider) SupportsJSONSchema() bool { return true }
+func (f *fakeProvider) Name() string             { return "fake" }

 // fakeSearxngHandler returns a SearxNG JSON response whose result URLs point
 // at the provided page server.
@@ -120,14 +103,11 @@ func TestIntegrationOrchestratorFullPipeline(t *testing.T) {
 	fakePage := httptest.NewServer(fakePageHandler(t, &pageHits))
 	defer fakePage.Close()

-	fakeOllama := httptest.NewServer(fakeOllamaHandler(t))
-	defer fakeOllama.Close()
-
 	fakeSearxng := httptest.NewServer(fakeSearxngHandler(t, fakePage.URL))
 	defer fakeSearxng.Close()

 	orch := &research.Orchestrator{
-		AI:          ai.NewOllamaProvider(ai.OllamaConfig{BaseURL: fakeOllama.URL, Model: "test"}),
+		AI:          &fakeProvider{},
 		Search:      search.NewSearxng(search.SearxngConfig{BaseURL: fakeSearxng.URL}),
 		Scraper:     scrape.New("test-agent/1.0"),
 		MaxPages:    4,
--- a/backend/internal/domain/market/research/orchestrator.go
+++ b/backend/internal/domain/market/research/orchestrator.go
@@ -76,21 +76,11 @@ func (o *Orchestrator) Run(ctx context.Context, in Input) (Output, error) {
 	}

 	// 4. LLM call with one retry on schema violation
-	// Providers with constrained decoding (Ollama) use a simplified schema
-	// without $defs, union types, or patterns — and are validated against that
-	// same simplified schema. Providers that embed the schema in the prompt
-	// (Mistral) get the full schema for both generation and validation.
-	constraintSchema := SchemaJSON
-	validationSchema := SchemaJSON
-	if o.AI.SupportsJSONSchema() {
-		constraintSchema = ConstraintSchemaJSON
-		validationSchema = ConstraintSchemaJSON
-	}
+	// Simplified schema for constrained decoding (no $defs, no union types)
 	validate := func(content string) error {
-		normalized := normalizeNullStrings(content)
-		return ai.ValidateSchema(validationSchema, []byte(normalized))
+		return ai.ValidateSchema(ConstraintSchemaJSON, []byte(normalizeNullStrings(content)))
 	}
-	resp, err := callLLM(ctx, o.AI, userPrompt, constraintSchema)
+	resp, err := callLLM(ctx, o.AI, userPrompt, ConstraintSchemaJSON)
 	if err == nil {
 		if verr := validate(resp.Content); verr != nil {
 			err = &ai.ProviderError{Code: ai.ErrSchemaViolation, Retryable: true, RawOutput: resp.Content, Inner: verr}
@@ -101,7 +91,7 @@ func (o *Orchestrator) Run(ctx context.Context, in Input) (Output, error) {
 	if err != nil {
 		var pe *ai.ProviderError
 		if errors.As(err, &pe) && pe.Code == ai.ErrSchemaViolation {
-			resp, err = callLLM(ctx, o.AI, userPrompt+"\n\nYour previous response failed schema validation. Re-emit the JSON strictly matching the schema.", constraintSchema)
+			resp, err = callLLM(ctx, o.AI, userPrompt+"\n\nYour previous response failed schema validation. Re-emit the JSON strictly matching the schema.", ConstraintSchemaJSON)
 			if err == nil {
 				if verr := validate(resp.Content); verr != nil {
 					err = &ai.ProviderError{Code: ai.ErrSchemaViolation, Retryable: false, RawOutput: resp.Content, Inner: verr}
@@ -128,6 +118,8 @@ func callLLM(ctx context.Context, p ai.Provider, userPrompt string, schema []byt
 		SystemPrompt: SystemPrompt,
 		UserMessage:  userPrompt,
 		JSONSchema:   schema,
+		Grounded:     true,
+		CallType:     "research",
 	})
 }

--- a/backend/internal/domain/market/research/schema.go
+++ b/backend/internal/domain/market/research/schema.go
@@ -7,8 +7,7 @@ var SchemaJSON []byte

 // ConstraintSchemaJSON is a simplified, flat JSON Schema for providers that
 // support constrained decoding but cannot handle $defs, union types, or
-// pattern constraints (e.g. Ollama with llama.cpp grammar generation).
-// Post-hoc validation always uses SchemaJSON regardless.
+// pattern constraints. Post-hoc validation always uses SchemaJSON regardless.
 //
-//go:embed assets/researcher_schema_ollama.json
+//go:embed assets/researcher_schema_simple.json
 var ConstraintSchemaJSON []byte
--- a/backend/internal/domain/settings/handler.go
+++ b/backend/internal/domain/settings/handler.go
@@ -2,67 +2,82 @@ package settings

 import (
 	"net/http"
+	"strconv"

 	"github.com/gin-gonic/gin"
+	"github.com/google/uuid"

 	"marktvogt.de/backend/internal/pkg/ai"
 )

 // AIStatus is the response payload for GET /admin/settings/ai.
 type AIStatus struct {
-	Provider  string   `json:"provider"`
-	Connected bool     `json:"connected"`
-	BaseURL   string   `json:"base_url,omitempty"`
-	Model     string   `json:"model"`
-	Models    []string `json:"models"`
+	Provider          string       `json:"provider"`
+	Connected         bool         `json:"connected"`
+	Model             string       `json:"model"`
+	Models            []string     `json:"models"`
+	APIKeyFingerprint string       `json:"api_key_fingerprint,omitempty"`
+	GroundingEnabled  bool         `json:"grounding_enabled"`
+	GroundingQuota    int          `json:"grounding_quota"`
+	Usage             UsageSummary `json:"usage"`
 }

-// Handler serves AI settings endpoints. ollama is nil when the active
-// provider is not Ollama.
+type UsageSummary struct {
+	Today              UsageStats `json:"today"`
+	Month              UsageStats `json:"month"`
+	GroundingUsedToday int        `json:"grounding_used_today"`
+}
+
+// Handler serves AI settings endpoints.
 type Handler struct {
-	ollama   *ai.OllamaProvider
-	provider string
+	provider  *ai.GeminiProvider
+	store     *Store
+	usageRepo *UsageRepo
 }

-func NewHandler(provider ai.Provider) *Handler {
-	ollama, _ := provider.(*ai.OllamaProvider)
-	return &Handler{ollama: ollama, provider: provider.Name()}
+func NewHandler(provider *ai.GeminiProvider, store *Store, usageRepo *UsageRepo) *Handler {
+	return &Handler{provider: provider, store: store, usageRepo: usageRepo}
 }

 func (h *Handler) GetAI(c *gin.Context) {
-	if h.ollama == nil {
-		c.JSON(http.StatusOK, gin.H{"data": AIStatus{
-			Provider:  h.provider,
-			Connected: true,
-			Model:     "",
-			Models:    []string{},
-		}})
-		return
+	ctx := c.Request.Context()
+
+	models, err := h.provider.ListModelNames(ctx)
+	connected := err == nil
+	if models == nil {
+		models = []string{}
 	}

-	models, err := h.ollama.ListModels(c.Request.Context())
-	status := AIStatus{
-		Provider: "ollama",
-		BaseURL:  h.ollama.BaseURL(),
-		Model:    h.ollama.Model(),
-		Models:   []string{},
+	// Fingerprint: last 4 chars of stored key (if any)
+	fingerprint := ""
+	if key, kerr := h.store.GetGeminiAPIKey(ctx); kerr == nil && len(key) >= 4 {
+		fingerprint = "•••" + key[len(key)-4:]
 	}
-	if err != nil {
-		status.Connected = false
-	} else {
-		status.Connected = true
-		for _, m := range models {
-			status.Models = append(status.Models, m.Name)
-		}
-	}
-	c.JSON(http.StatusOK, gin.H{"data": status})
+
+	grounding, _ := h.store.GetGroundingEnabled(ctx)
+
+	today, _ := h.usageRepo.Today(ctx)
+	month, _ := h.usageRepo.Month(ctx)
+	groundingToday, _ := h.usageRepo.GroundingToday(ctx)
+
+	c.JSON(http.StatusOK, gin.H{"data": AIStatus{
+		Provider:          "gemini",
+		Connected:         connected,
+		Model:             h.provider.Model(),
+		Models:            models,
+		APIKeyFingerprint: fingerprint,
+		GroundingEnabled:  grounding,
+		GroundingQuota:    1500,
+		Usage: UsageSummary{
+			Today:              today,
+			Month:              month,
+			GroundingUsedToday: groundingToday,
+		},
+	}})
 }

 func (h *Handler) SetModel(c *gin.Context) {
-	if h.ollama == nil {
-		c.JSON(http.StatusBadRequest, gin.H{"error": "model switching only supported for Ollama provider"})
-		return
-	}
+	ctx := c.Request.Context()
 	var req struct {
 		Model string `json:"model" binding:"required"`
 	}
@@ -70,6 +85,83 @@ func (h *Handler) SetModel(c *gin.Context) {
 		c.JSON(http.StatusBadRequest, gin.H{"error": "model is required"})
 		return
 	}
-	h.ollama.SetModel(req.Model)
+	userID := callerID(c)
+	if err := h.store.SetModel(ctx, req.Model, userID); err != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to save model"})
+		return
+	}
+	h.provider.SetModel(req.Model)
 	c.JSON(http.StatusOK, gin.H{"data": gin.H{"model": req.Model}})
 }
+
+func (h *Handler) SetAPIKey(c *gin.Context) {
+	ctx := c.Request.Context()
+	var req struct {
+		APIKey string `json:"api_key" binding:"required"`
+	}
+	if err := c.ShouldBindJSON(&req); err != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "api_key is required"})
+		return
+	}
+	userID := callerID(c)
+	if err := h.store.SetGeminiAPIKey(ctx, req.APIKey, userID); err != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to save api key"})
+		return
+	}
+	if err := h.provider.Reinitialize(ctx, req.APIKey); err != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "key saved but provider init failed: " + err.Error()})
+		return
+	}
+	fingerprint := ""
+	if len(req.APIKey) >= 4 {
+		fingerprint = "•••" + req.APIKey[len(req.APIKey)-4:]
+	}
+	c.JSON(http.StatusOK, gin.H{"data": gin.H{"api_key_fingerprint": fingerprint}})
+}
+
+func (h *Handler) SetGrounding(c *gin.Context) {
+	ctx := c.Request.Context()
+	var req struct {
+		Enabled bool `json:"enabled"`
+	}
+	if err := c.ShouldBindJSON(&req); err != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "enabled is required"})
+		return
+	}
+	userID := callerID(c)
+	if err := h.store.SetGroundingEnabled(ctx, req.Enabled, userID); err != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to save grounding setting"})
+		return
+	}
+	c.JSON(http.StatusOK, gin.H{"data": gin.H{"grounding_enabled": req.Enabled}})
+}
+
+func (h *Handler) GetUsage(c *gin.Context) {
+	ctx := c.Request.Context()
+	limit := 50
+	if l := c.Query("limit"); l != "" {
+		if n, err := strconv.Atoi(l); err == nil && n > 0 && n <= 200 {
+			limit = n
+		}
+	}
+	events, err := h.usageRepo.Recent(ctx, limit)
+	if err != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to load usage"})
+		return
+	}
+	if events == nil {
+		events = []UsageEvent{}
+	}
+	c.JSON(http.StatusOK, gin.H{"data": events})
+}
+
+// callerID extracts the authenticated user's UUID from gin context.
+// Returns uuid.Nil if not set (shouldn't happen behind requireAuth).
+func callerID(c *gin.Context) uuid.UUID {
+	if v, ok := c.Get("user_id"); ok {
+		if id, ok := v.(uuid.UUID); ok {
+			return id
+		}
+	}
+	return uuid.Nil
+}
--- a/backend/internal/domain/settings/routes.go
+++ b/backend/internal/domain/settings/routes.go
@@ -6,4 +6,7 @@ func RegisterRoutes(rg *gin.RouterGroup, h *Handler, requireAuth, requireAdmin g
 	admin := rg.Group("/admin", requireAuth, requireAdmin)
 	admin.GET("/settings/ai", h.GetAI)
 	admin.POST("/settings/ai/model", h.SetModel)
+	admin.POST("/settings/ai/key", h.SetAPIKey)
+	admin.POST("/settings/ai/grounding", h.SetGrounding)
+	admin.GET("/settings/ai/usage", h.GetUsage)
 }
--- a/backend/internal/domain/settings/store.go
+++ b/backend/internal/domain/settings/store.go
@@ -0,0 +1,112 @@
+package settings
+
+import (
+	"context"
+	"errors"
+	"fmt"
+
+	"github.com/google/uuid"
+	"github.com/jackc/pgx/v5"
+	"github.com/jackc/pgx/v5/pgxpool"
+
+	apicrypto "marktvogt.de/backend/internal/pkg/crypto"
+)
+
+const (
+	keyAPIKey           = "gemini.api_key"
+	keyModel            = "gemini.model"
+	keyGroundingEnabled = "gemini.grounding_enabled"
+)
+
+// Store persists AI provider configuration in system_settings.
+// Sensitive values (API key) are stored AES-256-GCM encrypted.
+type Store struct {
+	db  *pgxpool.Pool
+	enc [32]byte // derived from JWT_SECRET
+}
+
+func NewStore(db *pgxpool.Pool, encKey [32]byte) *Store {
+	return &Store{db: db, enc: encKey}
+}
+
+func (s *Store) GetGeminiAPIKey(ctx context.Context) (string, error) {
+	row := s.db.QueryRow(ctx,
+		`SELECT value_encrypted FROM system_settings WHERE key = $1`, keyAPIKey)
+	var enc []byte
+	if err := row.Scan(&enc); err != nil {
+		if errors.Is(err, pgx.ErrNoRows) {
+			return "", nil
+		}
+		return "", fmt.Errorf("settings: get api key: %w", err)
+	}
+	plain, err := apicrypto.Open(s.enc, enc)
+	if err != nil {
+		return "", fmt.Errorf("settings: decrypt api key: %w", err)
+	}
+	return string(plain), nil
+}
+
+func (s *Store) SetGeminiAPIKey(ctx context.Context, apiKey string, updatedBy uuid.UUID) error {
+	enc, err := apicrypto.Seal(s.enc, []byte(apiKey))
+	if err != nil {
+		return fmt.Errorf("settings: encrypt api key: %w", err)
+	}
+	_, err = s.db.Exec(ctx, `
+		INSERT INTO system_settings (key, value_encrypted, updated_by)
+		VALUES ($1, $2, $3)
+		ON CONFLICT (key) DO UPDATE
+		  SET value_encrypted = EXCLUDED.value_encrypted,
+		      updated_at      = now(),
+		      updated_by      = EXCLUDED.updated_by
+	`, keyAPIKey, enc, updatedBy)
+	return err
+}
+
+func (s *Store) GetModel(ctx context.Context) (string, error) {
+	return s.getText(ctx, keyModel, "gemini-2.5-flash-lite")
+}
+
+func (s *Store) SetModel(ctx context.Context, model string, updatedBy uuid.UUID) error {
+	return s.setText(ctx, keyModel, model, updatedBy)
+}
+
+func (s *Store) GetGroundingEnabled(ctx context.Context) (bool, error) {
+	v, err := s.getText(ctx, keyGroundingEnabled, "true")
+	if err != nil {
+		return true, err
+	}
+	return v != "false", nil
+}
+
+func (s *Store) SetGroundingEnabled(ctx context.Context, enabled bool, updatedBy uuid.UUID) error {
+	v := "false"
+	if enabled {
+		v = "true"
+	}
+	return s.setText(ctx, keyGroundingEnabled, v, updatedBy)
+}
+
+func (s *Store) getText(ctx context.Context, key, fallback string) (string, error) {
+	row := s.db.QueryRow(ctx,
+		`SELECT value_text FROM system_settings WHERE key = $1`, key)
+	var v string
+	if err := row.Scan(&v); err != nil {
+		if errors.Is(err, pgx.ErrNoRows) {
+			return fallback, nil
+		}
+		return fallback, fmt.Errorf("settings: get %s: %w", key, err)
+	}
+	return v, nil
+}
+
+func (s *Store) setText(ctx context.Context, key, value string, updatedBy uuid.UUID) error {
+	_, err := s.db.Exec(ctx, `
+		INSERT INTO system_settings (key, value_text, updated_by)
+		VALUES ($1, $2, $3)
+		ON CONFLICT (key) DO UPDATE
+		  SET value_text  = EXCLUDED.value_text,
+		      updated_at  = now(),
+		      updated_by  = EXCLUDED.updated_by
+	`, key, value, updatedBy)
+	return err
+}
--- a/backend/internal/domain/settings/usage.go
+++ b/backend/internal/domain/settings/usage.go
@@ -0,0 +1,125 @@
+package settings
+
+import (
+	"context"
+	"fmt"
+	"time"
+
+	"github.com/jackc/pgx/v5/pgxpool"
+
+	"marktvogt.de/backend/internal/pkg/ai"
+)
+
+// UsageRepo persists and queries AI call records.
+type UsageRepo struct {
+	db *pgxpool.Pool
+}
+
+func NewUsageRepo(db *pgxpool.Pool) *UsageRepo {
+	return &UsageRepo{db: db}
+}
+
+// Record writes a single usage event — implements ai.UsageRecorder.
+func (r *UsageRepo) Record(ctx context.Context, e ai.UsageEvent) error {
+	var errStr *string
+	if e.Error != "" {
+		errStr = &e.Error
+	}
+	_, err := r.db.Exec(ctx, `
+		INSERT INTO ai_usage
+		  (provider, model, call_type, input_tokens, output_tokens,
+		   grounded, duration_ms, estimated_cost_usd, error)
+		VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9)
+	`, e.Provider, e.Model, e.CallType, e.InputTokens, e.OutputTokens,
+		e.Grounded, e.DurationMs, e.EstimatedCostUSD, errStr)
+	if err != nil {
+		return fmt.Errorf("usage: record: %w", err)
+	}
+	return nil
+}
+
+// UsageStats is a rollup over a time window.
+type UsageStats struct {
+	Calls            int     `json:"calls"`
+	InputTokens      int     `json:"input_tokens"`
+	OutputTokens     int     `json:"output_tokens"`
+	GroundingCalls   int     `json:"grounding_calls"`
+	EstimatedCostUSD float64 `json:"estimated_cost_usd"`
+}
+
+func (r *UsageRepo) Today(ctx context.Context) (UsageStats, error) {
+	return r.statsWindow(ctx, "1 day")
+}
+
+func (r *UsageRepo) Month(ctx context.Context) (UsageStats, error) {
+	return r.statsWindow(ctx, "30 days")
+}
+
+func (r *UsageRepo) GroundingToday(ctx context.Context) (int, error) {
+	row := r.db.QueryRow(ctx, `
+		SELECT COUNT(*) FROM ai_usage
+		WHERE grounded AND created_at >= now() - INTERVAL '1 day'
+	`)
+	var n int
+	return n, row.Scan(&n)
+}
+
+func (r *UsageRepo) statsWindow(ctx context.Context, interval string) (UsageStats, error) {
+	row := r.db.QueryRow(ctx, fmt.Sprintf(`
+		SELECT
+		  COUNT(*)                      AS calls,
+		  COALESCE(SUM(input_tokens),0) AS input_tokens,
+		  COALESCE(SUM(output_tokens),0) AS output_tokens,
+		  COALESCE(SUM(CASE WHEN grounded THEN 1 ELSE 0 END),0) AS grounding_calls,
+		  COALESCE(SUM(estimated_cost_usd),0) AS cost
+		FROM ai_usage
+		WHERE created_at >= now() - INTERVAL '%s'
+	`, interval))
+	var s UsageStats
+	if err := row.Scan(&s.Calls, &s.InputTokens, &s.OutputTokens, &s.GroundingCalls, &s.EstimatedCostUSD); err != nil {
+		return s, fmt.Errorf("usage: stats(%s): %w", interval, err)
+	}
+	return s, nil
+}
+
+// UsageEvent is a single entry from ai_usage.
+type UsageEvent struct {
+	ID               int64     `json:"id"`
+	CreatedAt        time.Time `json:"created_at"`
+	Provider         string    `json:"provider"`
+	Model            string    `json:"model"`
+	CallType         string    `json:"call_type"`
+	InputTokens      int       `json:"input_tokens"`
+	OutputTokens     int       `json:"output_tokens"`
+	Grounded         bool      `json:"grounded"`
+	DurationMs       int       `json:"duration_ms"`
+	EstimatedCostUSD float64   `json:"estimated_cost_usd"`
+	Error            *string   `json:"error,omitempty"`
+}
+
+func (r *UsageRepo) Recent(ctx context.Context, limit int) ([]UsageEvent, error) {
+	rows, err := r.db.Query(ctx, `
+		SELECT id, created_at, provider, model, call_type,
+		       input_tokens, output_tokens, grounded, duration_ms,
+		       estimated_cost_usd, error
+		FROM ai_usage
+		ORDER BY created_at DESC
+		LIMIT $1
+	`, limit)
+	if err != nil {
+		return nil, fmt.Errorf("usage: recent: %w", err)
+	}
+	defer rows.Close()
+
+	var out []UsageEvent
+	for rows.Next() {
+		var e UsageEvent
+		if err := rows.Scan(&e.ID, &e.CreatedAt, &e.Provider, &e.Model, &e.CallType,
+			&e.InputTokens, &e.OutputTokens, &e.Grounded, &e.DurationMs,
+			&e.EstimatedCostUSD, &e.Error); err != nil {
+			return nil, fmt.Errorf("usage: scan: %w", err)
+		}
+		out = append(out, e)
+	}
+	return out, rows.Err()
+}
--- a/backend/internal/pkg/ai/factory.go
+++ b/backend/internal/pkg/ai/factory.go
@@ -3,39 +3,37 @@ package ai
 import (
 	"context"
 	"fmt"
-	"time"

-	mistral "github.com/VikingOwl91/mistral-go-sdk"
-	"github.com/VikingOwl91/mistral-go-sdk/chat"
 	"marktvogt.de/backend/internal/config"
 )

-const (
-	providerOllama  = "ollama"
-	providerMistral = "mistral"
-)
-
-func NewFromConfig(cfg config.AIConfig) (Provider, error) {
-	switch cfg.Provider {
-	case "", providerOllama:
-		return NewOllamaProvider(OllamaConfig{
-			BaseURL: cfg.OllamaURL,
-			Model:   cfg.OllamaModel,
-		}), nil
-	case providerMistral:
-		if cfg.MistralAPIKey == "" {
-			return nil, fmt.Errorf("ai: provider=%s requires AI_MISTRAL_API_KEY", providerMistral)
-		}
-		sdk := mistral.NewClient(
-			cfg.MistralAPIKey,
-			mistral.WithTimeout(120*time.Second),
-			mistral.WithRetry(2, 1*time.Second),
-		)
-		chatFn := func(ctx context.Context, req *chat.CompletionRequest) (*chat.CompletionResponse, error) {
-			return sdk.ChatComplete(ctx, req)
-		}
-		return newMistralProviderWithChat(cfg.MistralModel, chatFn, newRateLimiter(cfg.RateLimitRPS)), nil
-	default:
-		return nil, fmt.Errorf("ai: unknown provider %q (want %s|%s)", cfg.Provider, providerOllama, providerMistral)
-	}
+// KeySource provides the current Gemini API key. Implemented by settings.Store.
+type KeySource interface {
+	GetGeminiAPIKey(ctx context.Context) (string, error)
+	GetModel(ctx context.Context) (string, error)
+}
+
+// NewFromConfig creates a GeminiProvider. It reads the API key from store first;
+// if the store has no key, it falls back to cfg.GeminiAPIKey (env bootstrap).
+func NewFromConfig(ctx context.Context, cfg config.AIConfig, store KeySource, recorder UsageRecorder) (*GeminiProvider, error) {
+	apiKey, err := store.GetGeminiAPIKey(ctx)
+	if err != nil {
+		return nil, fmt.Errorf("ai: read api key from store: %w", err)
+	}
+	if apiKey == "" {
+		apiKey = cfg.GeminiAPIKey
+	}
+
+	model, err := store.GetModel(ctx)
+	if err != nil || model == "" {
+		model = "gemini-2.5-flash-lite"
+	}
+
+	if apiKey == "" {
+		// No key available. Return an unconfigured provider that will fail on use,
+		// but allows the server to start so the operator can configure the key via UI.
+		return newUnconfiguredGeminiProvider(model, recorder), nil
+	}
+
+	return NewGeminiProvider(ctx, apiKey, model, recorder)
 }
--- a/backend/internal/pkg/ai/factory_test.go
+++ b/backend/internal/pkg/ai/factory_test.go
@@ -1,29 +0,0 @@
-package ai
-
-import (
-	"testing"
-
-	"marktvogt.de/backend/internal/config"
-)
-
-func TestNewFromConfig_Ollama(t *testing.T) {
-	p, err := NewFromConfig(config.AIConfig{Provider: providerOllama, OllamaURL: "http://x:11434", OllamaModel: "m"})
-	if err != nil {
-		t.Fatalf("NewFromConfig: %v", err)
-	}
-	if p.Name() != providerOllama {
-		t.Fatalf("Name: %q", p.Name())
-	}
-}
-
-func TestNewFromConfig_MistralRequiresKey(t *testing.T) {
-	if _, err := NewFromConfig(config.AIConfig{Provider: providerMistral}); err == nil {
-		t.Fatal("want error when MistralAPIKey is empty")
-	}
-}
-
-func TestNewFromConfig_UnknownProvider(t *testing.T) {
-	if _, err := NewFromConfig(config.AIConfig{Provider: "llama-cpp"}); err == nil {
-		t.Fatal("want error for unknown provider")
-	}
-}
--- a/backend/internal/pkg/ai/gemini.go
+++ b/backend/internal/pkg/ai/gemini.go
@@ -0,0 +1,295 @@
+package ai
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"sync"
+	"time"
+
+	"google.golang.org/genai"
+)
+
+// Gemini API pricing (as of 2026-04). Refresh constants when pricing changes.
+// https://ai.google.dev/gemini-api/docs/pricing
+const (
+	geminiInputCostPerToken  = 0.10 / 1_000_000 // $0.10 / 1M tokens
+	geminiOutputCostPerToken = 0.40 / 1_000_000 // $0.40 / 1M tokens
+	geminiGroundingCostPer1k = 35.0 / 1_000     // $35 / 1k grounded prompts (above free tier)
+	geminiGroundingFreeDaily = 1_500            // daily free grounding requests
+)
+
+type GeminiProvider struct {
+	mu       sync.RWMutex
+	client   *genai.Client
+	model    string
+	recorder UsageRecorder
+
+	// groundingCallsToday is an in-process counter used for cost estimation only.
+	// It is not persisted and resets on restart. The authoritative count lives in ai_usage.
+	groundingCallsToday int
+	groundingDate       time.Time
+}
+
+// newUnconfiguredGeminiProvider returns a provider with no client set.
+// All Chat calls return ErrInternal until Reinitialize is called.
+func newUnconfiguredGeminiProvider(model string, recorder UsageRecorder) *GeminiProvider {
+	return &GeminiProvider{
+		model:         model,
+		recorder:      recorder,
+		groundingDate: time.Now().UTC().Truncate(24 * time.Hour),
+	}
+}
+
+// Reinitialize swaps the underlying genai.Client for a new API key.
+// Safe to call concurrently; callers block only briefly on the write lock.
+func (p *GeminiProvider) Reinitialize(ctx context.Context, apiKey string) error {
+	client, err := genai.NewClient(ctx, &genai.ClientConfig{
+		APIKey:  apiKey,
+		Backend: genai.BackendGeminiAPI,
+	})
+	if err != nil {
+		return fmt.Errorf("gemini: reinitialize client: %w", err)
+	}
+	p.mu.Lock()
+	p.client = client
+	p.mu.Unlock()
+	return nil
+}
+
+func NewGeminiProvider(ctx context.Context, apiKey, model string, recorder UsageRecorder) (*GeminiProvider, error) {
+	client, err := genai.NewClient(ctx, &genai.ClientConfig{
+		APIKey:  apiKey,
+		Backend: genai.BackendGeminiAPI,
+	})
+	if err != nil {
+		return nil, fmt.Errorf("gemini: new client: %w", err)
+	}
+	return &GeminiProvider{
+		client:        client,
+		model:         model,
+		recorder:      recorder,
+		groundingDate: time.Now().UTC().Truncate(24 * time.Hour),
+	}, nil
+}
+
+func (p *GeminiProvider) Name() string             { return "gemini" }
+func (p *GeminiProvider) BaseURL() string          { return "" }
+func (p *GeminiProvider) SupportsJSONMode() bool   { return true }
+func (p *GeminiProvider) SupportsJSONSchema() bool { return true }
+
+func (p *GeminiProvider) Model() string {
+	p.mu.RLock()
+	defer p.mu.RUnlock()
+	return p.model
+}
+
+func (p *GeminiProvider) SetModel(model string) {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	p.model = model
+}
+
+func (p *GeminiProvider) ListModelNames(ctx context.Context) ([]string, error) {
+	p.mu.RLock()
+	client := p.client
+	p.mu.RUnlock()
+	if client == nil {
+		return nil, nil
+	}
+	resp, err := client.Models.List(ctx, nil)
+	if err != nil {
+		return nil, fmt.Errorf("gemini: list models: %w", err)
+	}
+	var names []string
+	for _, m := range resp.Items {
+		for _, action := range m.SupportedActions {
+			if action == "generateContent" {
+				names = append(names, m.Name)
+				break
+			}
+		}
+	}
+	return names, nil
+}
+
+func (p *GeminiProvider) Chat(ctx context.Context, req *ChatRequest) (*ChatResponse, error) {
+	p.mu.RLock()
+	client := p.client
+	p.mu.RUnlock()
+	if client == nil {
+		return nil, &ProviderError{Code: ErrInternal, Message: "gemini api key not configured — set it in admin settings", Retryable: false}
+	}
+
+	start := time.Now()
+	model := req.Model
+	if model == "" {
+		model = p.Model()
+	}
+
+	cfg := &genai.GenerateContentConfig{}
+
+	// System instruction
+	if req.SystemPrompt != "" {
+		cfg.SystemInstruction = genai.NewContentFromText(req.SystemPrompt, genai.RoleUser)
+	}
+
+	// Structured output
+	switch {
+	case len(req.JSONSchema) > 0:
+		cfg.ResponseMIMEType = "application/json"
+		var schema map[string]any
+		if err := json.Unmarshal(req.JSONSchema, &schema); err != nil {
+			return nil, &ProviderError{Code: ErrInvalidRequest, Message: "invalid JSON schema", Retryable: false, Inner: err}
+		}
+		cfg.ResponseSchema = schemaFromMap(schema)
+	case req.JSONMode:
+		cfg.ResponseMIMEType = "application/json"
+	}
+
+	if req.Temperature != 0 {
+		t := float32(req.Temperature)
+		cfg.Temperature = &t
+	}
+	if req.MaxTokens != 0 {
+		cfg.MaxOutputTokens = int32(req.MaxTokens)
+	}
+
+	// Google Search grounding
+	if req.Grounded {
+		cfg.Tools = []*genai.Tool{
+			{GoogleSearchRetrieval: &genai.GoogleSearchRetrieval{}},
+		}
+	}
+
+	resp, err := client.Models.GenerateContent(ctx, model,
+		genai.Text(req.UserMessage), cfg)
+
+	durationMs := int(time.Since(start).Milliseconds())
+	event := p.buildUsageEvent(model, req, resp, err, durationMs)
+	p.record(ctx, event)
+
+	if err != nil {
+		return nil, ClassifyError(err)
+	}
+	if len(resp.Candidates) == 0 {
+		return nil, &ProviderError{Code: ErrInternal, Message: "no candidates in response", Retryable: false}
+	}
+
+	text := resp.Text()
+	if len(req.JSONSchema) > 0 {
+		if verr := ValidateSchema(req.JSONSchema, []byte(text)); verr != nil {
+			return nil, &ProviderError{
+				Code:      ErrSchemaViolation,
+				Message:   fmt.Sprintf("response does not match schema: %v", verr),
+				Retryable: true,
+				Inner:     verr,
+				RawOutput: text,
+			}
+		}
+	}
+
+	out := &ChatResponse{
+		Content: text,
+		Model:   resp.ModelVersion,
+	}
+	if resp.UsageMetadata != nil {
+		out.PromptTokens = int(resp.UsageMetadata.PromptTokenCount)
+		out.OutputTokens = int(resp.UsageMetadata.CandidatesTokenCount)
+		out.TotalTokens = int(resp.UsageMetadata.TotalTokenCount)
+	}
+	if c := resp.Candidates[0]; c.GroundingMetadata != nil {
+		out.SearchQueries = c.GroundingMetadata.WebSearchQueries
+	}
+	return out, nil
+}
+
+func (p *GeminiProvider) buildUsageEvent(model string, req *ChatRequest, resp *genai.GenerateContentResponse, callErr error, durationMs int) UsageEvent {
+	e := UsageEvent{
+		Provider:   "gemini",
+		Model:      model,
+		CallType:   req.CallType,
+		Grounded:   req.Grounded,
+		DurationMs: durationMs,
+	}
+	if callErr != nil {
+		e.Error = callErr.Error()
+	}
+	if resp != nil && resp.UsageMetadata != nil {
+		e.InputTokens = int(resp.UsageMetadata.PromptTokenCount)
+		e.OutputTokens = int(resp.UsageMetadata.CandidatesTokenCount)
+	}
+	e.EstimatedCostUSD = p.estimateCost(e.InputTokens, e.OutputTokens, req.Grounded)
+	return e
+}
+
+func (p *GeminiProvider) estimateCost(inputTokens, outputTokens int, grounded bool) float64 {
+	cost := float64(inputTokens)*geminiInputCostPerToken +
+		float64(outputTokens)*geminiOutputCostPerToken
+	if grounded {
+		p.mu.Lock()
+		today := time.Now().UTC().Truncate(24 * time.Hour)
+		if !today.Equal(p.groundingDate) {
+			p.groundingCallsToday = 0
+			p.groundingDate = today
+		}
+		p.groundingCallsToday++
+		aboveFree := p.groundingCallsToday - geminiGroundingFreeDaily
+		p.mu.Unlock()
+		if aboveFree > 0 {
+			cost += geminiGroundingCostPer1k
+		}
+	}
+	return cost
+}
+
+func (p *GeminiProvider) record(ctx context.Context, e UsageEvent) {
+	if p.recorder == nil {
+		return
+	}
+	_ = p.recorder.Record(ctx, e)
+}
+
+// schemaFromMap converts a raw JSON-schema map to genai.Schema for structured output.
+func schemaFromMap(m map[string]any) *genai.Schema {
+	s := &genai.Schema{}
+	if t, ok := m["type"].(string); ok {
+		s.Type = genai.Type(t)
+	}
+	if props, ok := m["properties"].(map[string]any); ok {
+		s.Properties = make(map[string]*genai.Schema, len(props))
+		for k, v := range props {
+			if vm, ok := v.(map[string]any); ok {
+				s.Properties[k] = schemaFromMap(vm)
+			}
+		}
+	}
+	if req, ok := m["required"].([]any); ok {
+		for _, r := range req {
+			if rs, ok := r.(string); ok {
+				s.Required = append(s.Required, rs)
+			}
+		}
+	}
+	if desc, ok := m["description"].(string); ok {
+		s.Description = desc
+	}
+	if enum, ok := m["enum"].([]any); ok {
+		for _, e := range enum {
+			if es, ok := e.(string); ok {
+				s.Enum = append(s.Enum, es)
+			}
+		}
+	}
+	if items, ok := m["items"].(map[string]any); ok {
+		s.Items = schemaFromMap(items)
+	}
+	if anyOf, ok := m["anyOf"].([]any); ok {
+		for _, a := range anyOf {
+			if am, ok := a.(map[string]any); ok {
+				s.AnyOf = append(s.AnyOf, schemaFromMap(am))
+			}
+		}
+	}
+	return s
+}
--- a/backend/internal/pkg/ai/mistral_provider.go
+++ b/backend/internal/pkg/ai/mistral_provider.go
@@ -1,99 +0,0 @@
-package ai
-
-import (
-	"context"
-	"fmt"
-
-	"github.com/VikingOwl91/mistral-go-sdk/chat"
-)
-
-type chatFunc func(ctx context.Context, req *chat.CompletionRequest) (*chat.CompletionResponse, error)
-
-type MistralProvider struct {
-	model   string
-	chatFn  chatFunc
-	limiter *rateLimiter // from ratelimit.go; nil disables
-}
-
-func newMistralProviderWithChat(model string, fn chatFunc, limiter *rateLimiter) *MistralProvider {
-	return &MistralProvider{model: model, chatFn: fn, limiter: limiter}
-}
-
-func (p *MistralProvider) Name() string             { return "mistral" }
-func (p *MistralProvider) SupportsJSONMode() bool   { return true }
-func (p *MistralProvider) SupportsJSONSchema() bool { return false }
-
-func (p *MistralProvider) Chat(ctx context.Context, req *ChatRequest) (*ChatResponse, error) {
-	if p.chatFn == nil {
-		return nil, &ProviderError{Code: ErrInternal, Message: "mistral provider not configured", Retryable: false}
-	}
-	if p.limiter != nil {
-		p.limiter.wait()
-	}
-
-	systemContent := req.SystemPrompt
-	if len(req.JSONSchema) > 0 {
-		if systemContent != "" {
-			systemContent += "\n\n"
-		}
-		systemContent += "Respond with a JSON object that conforms to the following JSON Schema. " +
-			"Do not output anything outside the JSON. Schema:\n" + string(req.JSONSchema)
-	}
-
-	msgs := []chat.Message{}
-	if systemContent != "" {
-		msgs = append(msgs, &chat.SystemMessage{Content: chat.TextContent(systemContent)})
-	}
-	msgs = append(msgs, &chat.UserMessage{Content: chat.TextContent(req.UserMessage)})
-
-	creq := &chat.CompletionRequest{
-		Model:    firstNonEmpty(req.Model, p.model),
-		Messages: msgs,
-	}
-	if req.JSONMode || len(req.JSONSchema) > 0 {
-		creq.ResponseFormat = &chat.ResponseFormat{Type: "json_object"}
-	}
-	if req.Temperature != 0 {
-		temp := float64(req.Temperature)
-		creq.Temperature = &temp
-	}
-	if req.MaxTokens != 0 {
-		creq.MaxTokens = &req.MaxTokens
-	}
-
-	resp, err := p.chatFn(ctx, creq)
-	if err != nil {
-		return nil, ClassifyError(err)
-	}
-	if len(resp.Choices) == 0 {
-		return nil, &ProviderError{Code: ErrInternal, Message: "no choices in response", Retryable: false}
-	}
-	content := resp.Choices[0].Message.Content.String()
-
-	if len(req.JSONSchema) > 0 {
-		if err := ValidateSchema(req.JSONSchema, []byte(content)); err != nil {
-			return nil, &ProviderError{
-				Code:      ErrSchemaViolation,
-				Message:   fmt.Sprintf("response does not match schema: %v", err),
-				Retryable: true,
-				Inner:     err,
-				RawOutput: content,
-			}
-		}
-	}
-
-	return &ChatResponse{
-		Content:      content,
-		Model:        resp.Model,
-		PromptTokens: resp.Usage.PromptTokens,
-		OutputTokens: resp.Usage.CompletionTokens,
-		TotalTokens:  resp.Usage.TotalTokens,
-	}, nil
-}
-
-func firstNonEmpty(a, b string) string {
-	if a != "" {
-		return a
-	}
-	return b
-}
--- a/backend/internal/pkg/ai/mistral_provider_test.go
+++ b/backend/internal/pkg/ai/mistral_provider_test.go
@@ -1,123 +0,0 @@
-package ai
-
-import (
-	"context"
-	"errors"
-	"testing"
-
-	"github.com/VikingOwl91/mistral-go-sdk/chat"
-)
-
-func TestMistral_Chat_PassesThroughContent(t *testing.T) {
-	fakeChat := func(ctx context.Context, req *chat.CompletionRequest) (*chat.CompletionResponse, error) {
-		return &chat.CompletionResponse{
-			Model: "mistral-large-latest",
-			Choices: []chat.CompletionChoice{
-				{Message: chat.AssistantMessage{Content: chat.TextContent("ok")}},
-			},
-			Usage: chat.UsageInfo{PromptTokens: 3, CompletionTokens: 1, TotalTokens: 4},
-		}, nil
-	}
-	p := newMistralProviderWithChat("mistral-large-latest", fakeChat, nil)
-
-	resp, err := p.Chat(context.Background(), &ChatRequest{SystemPrompt: "s", UserMessage: "u"})
-	if err != nil {
-		t.Fatalf("Chat: %v", err)
-	}
-	if resp.Content != "ok" || resp.TotalTokens != 4 {
-		t.Fatalf("unexpected: %+v", resp)
-	}
-}
-
-func TestMistral_Chat_JSONModeSetsResponseFormat(t *testing.T) {
-	var seen *chat.CompletionRequest
-	fakeChat := func(ctx context.Context, req *chat.CompletionRequest) (*chat.CompletionResponse, error) {
-		seen = req
-		return &chat.CompletionResponse{Choices: []chat.CompletionChoice{{Message: chat.AssistantMessage{Content: chat.TextContent("{}")}}}}, nil
-	}
-	p := newMistralProviderWithChat("m", fakeChat, nil)
-	_, err := p.Chat(context.Background(), &ChatRequest{UserMessage: "x", JSONMode: true})
-	if err != nil {
-		t.Fatalf("Chat: %v", err)
-	}
-	if seen == nil || seen.ResponseFormat == nil || seen.ResponseFormat.Type != "json_object" {
-		t.Fatalf("ResponseFormat not set: %+v", seen)
-	}
-}
-
-func TestMistral_Chat_SchemaEmbeddedInSystemPromptAndValidated(t *testing.T) {
-	schema := []byte(`{"type":"object","required":["foo"],"properties":{"foo":{"type":"string"}},"additionalProperties":false}`)
-	var seen *chat.CompletionRequest
-	fakeChat := func(ctx context.Context, req *chat.CompletionRequest) (*chat.CompletionResponse, error) {
-		seen = req
-		return &chat.CompletionResponse{Choices: []chat.CompletionChoice{{Message: chat.AssistantMessage{Content: chat.TextContent(`{"foo":"bar"}`)}}}}, nil
-	}
-	p := newMistralProviderWithChat("m", fakeChat, nil)
-	resp, err := p.Chat(context.Background(), &ChatRequest{SystemPrompt: "base system", UserMessage: "x", JSONSchema: schema})
-	if err != nil {
-		t.Fatalf("Chat: %v", err)
-	}
-	if resp.Content != `{"foo":"bar"}` {
-		t.Fatalf("content: %q", resp.Content)
-	}
-	sysMsg, ok := seen.Messages[0].(*chat.SystemMessage)
-	if !ok {
-		t.Fatalf("first message must be system: %T", seen.Messages[0])
-	}
-	sys := sysMsg.Content.String()
-	if !containsAll(sys, []string{"base system", "JSON Schema"}) {
-		t.Fatalf("system prompt missing expected fragments: %q", sys)
-	}
-}
-
-func TestMistral_Chat_SchemaViolationReturnsRetryableError(t *testing.T) {
-	schema := []byte(`{"type":"object","required":["foo"],"properties":{"foo":{"type":"string"}},"additionalProperties":false}`)
-	fakeChat := func(ctx context.Context, req *chat.CompletionRequest) (*chat.CompletionResponse, error) {
-		return &chat.CompletionResponse{Choices: []chat.CompletionChoice{{Message: chat.AssistantMessage{Content: chat.TextContent(`{"bar":1}`)}}}}, nil
-	}
-	p := newMistralProviderWithChat("m", fakeChat, nil)
-	_, err := p.Chat(context.Background(), &ChatRequest{UserMessage: "x", JSONSchema: schema})
-	if err == nil {
-		t.Fatal("want error")
-	}
-	var pe *ProviderError
-	if !errors.As(err, &pe) {
-		t.Fatalf("want *ProviderError, got %T", err)
-	}
-	if pe.Code != ErrSchemaViolation || !pe.Retryable || pe.RawOutput != `{"bar":1}` {
-		t.Fatalf("unexpected: %+v", pe)
-	}
-}
-
-func TestMistral_Supports(t *testing.T) {
-	p := newMistralProviderWithChat("m", nil, nil)
-	if !p.SupportsJSONMode() {
-		t.Fatal("Mistral supports JSON mode")
-	}
-	if p.SupportsJSONSchema() {
-		t.Fatal("Mistral does NOT natively support JSON schema (prompt-based only)")
-	}
-	if p.Name() != "mistral" {
-		t.Fatalf("Name: %q", p.Name())
-	}
-}
-
-func containsAll(s string, parts []string) bool {
-	for _, p := range parts {
-		if !contains(s, p) {
-			return false
-		}
-	}
-	return true
-}
-func contains(s, sub string) bool {
-	return len(sub) == 0 || (len(s) >= len(sub) && (s == sub || indexOf(s, sub) >= 0))
-}
-func indexOf(s, sub string) int {
-	for i := 0; i+len(sub) <= len(s); i++ {
-		if s[i:i+len(sub)] == sub {
-			return i
-		}
-	}
-	return -1
-}
--- a/backend/internal/pkg/ai/ollama.go
+++ b/backend/internal/pkg/ai/ollama.go
@@ -1,184 +0,0 @@
-package ai
-
-import (
-	"bytes"
-	"context"
-	"encoding/json"
-	"fmt"
-	"io"
-	"net/http"
-	"sync"
-	"time"
-)
-
-type OllamaConfig struct {
-	BaseURL string
-	Model   string
-	Timeout time.Duration
-}
-
-type OllamaProvider struct {
-	cfg         OllamaConfig
-	client      *http.Client
-	mu          sync.RWMutex
-	activeModel string
-}
-
-func NewOllamaProvider(cfg OllamaConfig) *OllamaProvider {
-	if cfg.Timeout == 0 {
-		cfg.Timeout = 300 * time.Second
-	}
-	return &OllamaProvider{
-		cfg:         cfg,
-		client:      &http.Client{Timeout: cfg.Timeout},
-		activeModel: cfg.Model,
-	}
-}
-
-func (p *OllamaProvider) Name() string             { return "ollama" }
-func (p *OllamaProvider) SupportsJSONMode() bool   { return true }
-func (p *OllamaProvider) SupportsJSONSchema() bool { return true }
-
-func (p *OllamaProvider) BaseURL() string {
-	return p.cfg.BaseURL
-}
-
-func (p *OllamaProvider) Model() string {
-	p.mu.RLock()
-	defer p.mu.RUnlock()
-	return p.activeModel
-}
-
-func (p *OllamaProvider) SetModel(model string) {
-	p.mu.Lock()
-	defer p.mu.Unlock()
-	p.activeModel = model
-}
-
-// OllamaModelInfo is a model entry from Ollama's /api/tags response.
-type OllamaModelInfo struct {
-	Name string `json:"name"`
-	Size int64  `json:"size"`
-}
-
-// ListModels calls Ollama's /api/tags endpoint and returns available models.
-func (p *OllamaProvider) ListModels(ctx context.Context) ([]OllamaModelInfo, error) {
-	req, err := http.NewRequestWithContext(ctx, http.MethodGet, p.cfg.BaseURL+"/api/tags", nil)
-	if err != nil {
-		return nil, err
-	}
-	resp, err := p.client.Do(req)
-	if err != nil {
-		return nil, err
-	}
-	defer func() { _ = resp.Body.Close() }()
-	if resp.StatusCode != http.StatusOK {
-		b, _ := io.ReadAll(resp.Body)
-		return nil, fmt.Errorf("ollama /api/tags: status %d: %s", resp.StatusCode, b)
-	}
-	var body struct {
-		Models []struct {
-			Name string `json:"name"`
-			Size int64  `json:"size"`
-		} `json:"models"`
-	}
-	if err := json.NewDecoder(resp.Body).Decode(&body); err != nil {
-		return nil, err
-	}
-	out := make([]OllamaModelInfo, len(body.Models))
-	for i, m := range body.Models {
-		out[i] = OllamaModelInfo{Name: m.Name, Size: m.Size}
-	}
-	return out, nil
-}
-
-type ollamaChatReq struct {
-	Model    string          `json:"model"`
-	Messages []ollamaMessage `json:"messages"`
-	Stream   bool            `json:"stream"`
-	Format   json.RawMessage `json:"format,omitempty"`
-	Options  *ollamaOptions  `json:"options,omitempty"`
-}
-
-type ollamaMessage struct {
-	Role    string `json:"role"`
-	Content string `json:"content"`
-}
-
-type ollamaOptions struct {
-	Temperature float32 `json:"temperature,omitempty"`
-	NumPredict  int     `json:"num_predict,omitempty"`
-}
-
-type ollamaChatResp struct {
-	Model           string        `json:"model"`
-	Message         ollamaMessage `json:"message"`
-	Done            bool          `json:"done"`
-	PromptEvalCount int           `json:"prompt_eval_count"`
-	EvalCount       int           `json:"eval_count"`
-}
-
-func (p *OllamaProvider) Chat(ctx context.Context, req *ChatRequest) (*ChatResponse, error) {
-	model := req.Model
-	if model == "" {
-		model = p.Model()
-	}
-	body := ollamaChatReq{
-		Model:    model,
-		Messages: buildOllamaMessages(req),
-		Stream:   false,
-	}
-	switch {
-	case len(req.JSONSchema) > 0:
-		body.Format = req.JSONSchema
-	case req.JSONMode:
-		body.Format = json.RawMessage(`"json"`)
-	}
-	if req.Temperature != 0 || req.MaxTokens != 0 {
-		body.Options = &ollamaOptions{Temperature: req.Temperature, NumPredict: req.MaxTokens}
-	}
-
-	buf, err := json.Marshal(body)
-	if err != nil {
-		return nil, &ProviderError{Code: ErrInvalidRequest, Message: "marshal request", Retryable: false, Inner: err}
-	}
-	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, p.cfg.BaseURL+"/api/chat", bytes.NewReader(buf))
-	if err != nil {
-		return nil, &ProviderError{Code: ErrInternal, Message: "new request", Retryable: false, Inner: err}
-	}
-	httpReq.Header.Set("Content-Type", "application/json")
-
-	resp, err := p.client.Do(httpReq)
-	if err != nil {
-		return nil, ClassifyError(err)
-	}
-	defer func() { _ = resp.Body.Close() }()
-	if resp.StatusCode >= 400 {
-		b, _ := io.ReadAll(resp.Body)
-		pe := ClassifyError(fmt.Errorf("ollama status %d: %s", resp.StatusCode, string(b)))
-		return nil, pe
-	}
-
-	var out ollamaChatResp
-	if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
-		return nil, &ProviderError{Code: ErrInternal, Message: "decode response", Retryable: false, Inner: err}
-	}
-	return &ChatResponse{
-		Content:      out.Message.Content,
-		Model:        out.Model,
-		PromptTokens: out.PromptEvalCount,
-		OutputTokens: out.EvalCount,
-		TotalTokens:  out.PromptEvalCount + out.EvalCount,
-	}, nil
-}
-
-func buildOllamaMessages(req *ChatRequest) []ollamaMessage {
-	msgs := make([]ollamaMessage, 0, 2)
-	if req.SystemPrompt != "" {
-		msgs = append(msgs, ollamaMessage{Role: "system", Content: req.SystemPrompt})
-	}
-	if req.UserMessage != "" {
-		msgs = append(msgs, ollamaMessage{Role: "user", Content: req.UserMessage})
-	}
-	return msgs
-}
--- a/backend/internal/pkg/ai/ollama_test.go
+++ b/backend/internal/pkg/ai/ollama_test.go
@@ -1,98 +0,0 @@
-package ai
-
-import (
-	"context"
-	"encoding/json"
-	"io"
-	"net/http"
-	"net/http/httptest"
-	"testing"
-	"time"
-)
-
-func TestOllama_Chat_SendsRequestAndParsesResponse(t *testing.T) {
-	var captured map[string]any
-	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		if r.URL.Path != "/api/chat" {
-			t.Errorf("path: got %s, want /api/chat", r.URL.Path)
-		}
-		body, _ := io.ReadAll(r.Body)
-		_ = json.Unmarshal(body, &captured)
-		w.Header().Set("Content-Type", "application/json")
-		_, _ = w.Write([]byte(`{
-			"model":"qwen2.5:14b-instruct",
-			"message":{"role":"assistant","content":"hello"},
-			"done":true,
-			"prompt_eval_count":10,
-			"eval_count":5
-		}`))
-	}))
-	defer srv.Close()
-
-	p := NewOllamaProvider(OllamaConfig{BaseURL: srv.URL, Model: "qwen2.5:14b-instruct", Timeout: 5 * time.Second})
-	resp, err := p.Chat(context.Background(), &ChatRequest{SystemPrompt: "be brief", UserMessage: "hi", JSONMode: true})
-	if err != nil {
-		t.Fatalf("Chat: %v", err)
-	}
-	if resp.Content != "hello" {
-		t.Fatalf("content: got %q", resp.Content)
-	}
-	if resp.PromptTokens != 10 || resp.OutputTokens != 5 || resp.TotalTokens != 15 {
-		t.Fatalf("tokens: %+v", resp)
-	}
-	if captured["stream"] != false {
-		t.Fatalf("stream must be false: %v", captured["stream"])
-	}
-	if captured["format"] != "json" {
-		t.Fatalf("format for JSONMode=true must be \"json\", got %v", captured["format"])
-	}
-}
-
-func TestOllama_Chat_ForwardsSchema(t *testing.T) {
-	var captured map[string]any
-	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		body, _ := io.ReadAll(r.Body)
-		_ = json.Unmarshal(body, &captured)
-		_, _ = w.Write([]byte(`{"model":"m","message":{"role":"assistant","content":"{}"},"done":true}`))
-	}))
-	defer srv.Close()
-
-	p := NewOllamaProvider(OllamaConfig{BaseURL: srv.URL, Model: "m", Timeout: time.Second})
-	schema := []byte(`{"type":"object"}`)
-	_, err := p.Chat(context.Background(), &ChatRequest{UserMessage: "x", JSONSchema: schema})
-	if err != nil {
-		t.Fatalf("Chat: %v", err)
-	}
-	fmtField, ok := captured["format"].(map[string]any)
-	if !ok {
-		t.Fatalf("format must be an object when JSONSchema set: %v", captured["format"])
-	}
-	if fmtField["type"] != "object" {
-		t.Fatalf("schema not forwarded: %v", fmtField)
-	}
-}
-
-func TestOllama_Chat_Unavailable(t *testing.T) {
-	p := NewOllamaProvider(OllamaConfig{BaseURL: "http://127.0.0.1:1", Timeout: 100 * time.Millisecond})
-	_, err := p.Chat(context.Background(), &ChatRequest{UserMessage: "x"})
-	if err == nil {
-		t.Fatal("want error, got nil")
-	}
-	pe := ClassifyError(err)
-	if pe.Code != ErrUnavailable && pe.Code != ErrTimeout {
-		t.Fatalf("expected Unavailable or Timeout, got %v", pe.Code)
-	}
-	if !pe.Retryable {
-		t.Fatal("must be retryable")
-	}
-}
-
-func TestOllama_Supports(t *testing.T) {
-	p := NewOllamaProvider(OllamaConfig{BaseURL: "x"})
-	if !p.SupportsJSONMode() || !p.SupportsJSONSchema() {
-		t.Fatal("Ollama supports both")
-	}
-	if p.Name() != "ollama" {
-		t.Fatalf("Name: %q", p.Name())
-	}
-}
--- a/backend/internal/pkg/ai/provider.go
+++ b/backend/internal/pkg/ai/provider.go
@@ -12,6 +12,14 @@ type Provider interface {
 	SupportsJSONSchema() bool
 }

+// ModelSelector is implemented by providers that support runtime model switching.
+type ModelSelector interface {
+	Model() string
+	SetModel(string)
+	ListModelNames(ctx context.Context) ([]string, error)
+	BaseURL() string
+}
+
 type ChatRequest struct {
 	SystemPrompt string
 	UserMessage  string
@@ -20,12 +28,15 @@ type ChatRequest struct {
 	Temperature  float32
 	JSONMode     bool
 	JSONSchema   json.RawMessage
+	Grounded     bool   // request Google Search grounding (Gemini only)
+	CallType     string // e.g. "research", "enrich_b", "similarity" — for usage tracking
 }

 type ChatResponse struct {
-	Content      string
-	Model        string
-	PromptTokens int
-	OutputTokens int
-	TotalTokens  int
+	Content       string
+	Model         string
+	PromptTokens  int
+	OutputTokens  int
+	TotalTokens   int
+	SearchQueries []string // populated when grounding was used
 }
--- a/backend/internal/pkg/ai/rate_limiter_test.go
+++ b/backend/internal/pkg/ai/rate_limiter_test.go
@@ -1,48 +0,0 @@
-package ai
-
-import (
-	"sort"
-	"sync"
-	"testing"
-	"time"
-)
-
-func TestRateLimiterSerializesCalls(t *testing.T) {
-	rl := newRateLimiter(2.0) // 2 req/s → minInterval 500ms
-	var (
-		mu    sync.Mutex
-		times []time.Time
-	)
-	var wg sync.WaitGroup
-	for i := 0; i < 3; i++ {
-		wg.Add(1)
-		go func() {
-			defer wg.Done()
-			rl.wait()
-			mu.Lock()
-			times = append(times, time.Now())
-			mu.Unlock()
-		}()
-	}
-	wg.Wait()
-
-	// Sort times; gaps between consecutive must be >= 500ms - small tolerance.
-	sort.Slice(times, func(i, j int) bool { return times[i].Before(times[j]) })
-	if gap := times[1].Sub(times[0]); gap < 400*time.Millisecond {
-		t.Errorf("gap[0->1] = %v, want >= 400ms", gap)
-	}
-	if gap := times[2].Sub(times[1]); gap < 400*time.Millisecond {
-		t.Errorf("gap[1->2] = %v, want >= 400ms", gap)
-	}
-}
-
-func TestRateLimiterDisabledWhenRPSZero(t *testing.T) {
-	rl := newRateLimiter(0) // disabled
-	start := time.Now()
-	for i := 0; i < 5; i++ {
-		rl.wait()
-	}
-	if elapsed := time.Since(start); elapsed > 50*time.Millisecond {
-		t.Errorf("expected no throttling when rps=0, elapsed %v", elapsed)
-	}
-}
--- a/backend/internal/pkg/ai/ratelimiter.go
+++ b/backend/internal/pkg/ai/ratelimiter.go
@@ -1,33 +0,0 @@
-package ai
-
-import (
-	"sync"
-	"time"
-)
-
-// rateLimiter enforces a minimum interval between calls. Set rps<=0 to disable.
-type rateLimiter struct {
-	mu          sync.Mutex
-	lastReq     time.Time
-	minInterval time.Duration
-}
-
-func newRateLimiter(rps float64) *rateLimiter {
-	if rps <= 0 {
-		return &rateLimiter{minInterval: 0}
-	}
-	return &rateLimiter{minInterval: time.Duration(float64(time.Second) / rps)}
-}
-
-func (rl *rateLimiter) wait() {
-	if rl.minInterval == 0 {
-		return
-	}
-	rl.mu.Lock()
-	defer rl.mu.Unlock()
-	since := time.Since(rl.lastReq)
-	if since < rl.minInterval {
-		time.Sleep(rl.minInterval - since)
-	}
-	rl.lastReq = time.Now()
-}
--- a/backend/internal/pkg/ai/usage.go
+++ b/backend/internal/pkg/ai/usage.go
@@ -0,0 +1,22 @@
+package ai
+
+import "context"
+
+// UsageEvent holds per-call telemetry recorded after each LLM call.
+type UsageEvent struct {
+	Provider         string
+	Model            string
+	CallType         string
+	InputTokens      int
+	OutputTokens     int
+	Grounded         bool
+	DurationMs       int
+	EstimatedCostUSD float64
+	Error            string // empty on success
+}
+
+// UsageRecorder persists a UsageEvent. Implementations must be safe for
+// concurrent use. A nil UsageRecorder is valid and silently discards events.
+type UsageRecorder interface {
+	Record(ctx context.Context, e UsageEvent) error
+}
--- a/backend/internal/pkg/crypto/secretbox.go
+++ b/backend/internal/pkg/crypto/secretbox.go
@@ -0,0 +1,63 @@
+package crypto
+
+import (
+	"crypto/aes"
+	"crypto/cipher"
+	"crypto/rand"
+	"crypto/sha256"
+	"errors"
+	"io"
+
+	"golang.org/x/crypto/hkdf"
+)
+
+var ErrDecryptFailed = errors.New("secretbox: decryption failed")
+
+// DeriveKey derives a 32-byte AES key from an arbitrary secret using
+// HKDF-SHA256 with a fixed application-specific info string.
+func DeriveKey(secret []byte) ([32]byte, error) {
+	r := hkdf.New(sha256.New, secret, nil, []byte("marktvogt:settings:v1"))
+	var key [32]byte
+	if _, err := io.ReadFull(r, key[:]); err != nil {
+		return key, err
+	}
+	return key, nil
+}
+
+// Seal encrypts plaintext with AES-256-GCM. Output format: nonce(12) || ciphertext.
+func Seal(key [32]byte, plaintext []byte) ([]byte, error) {
+	block, err := aes.NewCipher(key[:])
+	if err != nil {
+		return nil, err
+	}
+	gcm, err := cipher.NewGCM(block)
+	if err != nil {
+		return nil, err
+	}
+	nonce := make([]byte, gcm.NonceSize()) // 12 bytes
+	if _, err := io.ReadFull(rand.Reader, nonce); err != nil {
+		return nil, err
+	}
+	return gcm.Seal(nonce, nonce, plaintext, nil), nil
+}
+
+// Open decrypts a ciphertext produced by Seal.
+func Open(key [32]byte, ciphertext []byte) ([]byte, error) {
+	block, err := aes.NewCipher(key[:])
+	if err != nil {
+		return nil, err
+	}
+	gcm, err := cipher.NewGCM(block)
+	if err != nil {
+		return nil, err
+	}
+	ns := gcm.NonceSize()
+	if len(ciphertext) < ns {
+		return nil, ErrDecryptFailed
+	}
+	plaintext, err := gcm.Open(nil, ciphertext[:ns], ciphertext[ns:], nil)
+	if err != nil {
+		return nil, ErrDecryptFailed
+	}
+	return plaintext, nil
+}
--- a/backend/internal/pkg/crypto/secretbox_test.go
+++ b/backend/internal/pkg/crypto/secretbox_test.go
@@ -0,0 +1,61 @@
+package crypto
+
+import (
+	"bytes"
+	"errors"
+	"testing"
+)
+
+func TestRoundTrip(t *testing.T) {
+	key, err := DeriveKey([]byte("test-secret"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	plaintext := []byte("super-secret-api-key-AIzaSyXXXX")
+	ct, err := Seal(key, plaintext)
+	if err != nil {
+		t.Fatal(err)
+	}
+	got, err := Open(key, ct)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if !bytes.Equal(got, plaintext) {
+		t.Fatalf("got %q, want %q", got, plaintext)
+	}
+}
+
+func TestTamperedCiphertext(t *testing.T) {
+	key, err := DeriveKey([]byte("test-secret"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	ct, err := Seal(key, []byte("value"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	ct[len(ct)-1] ^= 0xFF
+	_, err = Open(key, ct)
+	if !errors.Is(err, ErrDecryptFailed) {
+		t.Fatalf("want ErrDecryptFailed, got %v", err)
+	}
+}
+
+func TestWrongKey(t *testing.T) {
+	k1, _ := DeriveKey([]byte("key-one"))
+	k2, _ := DeriveKey([]byte("key-two"))
+	ct, _ := Seal(k1, []byte("value"))
+	_, err := Open(k2, ct)
+	if !errors.Is(err, ErrDecryptFailed) {
+		t.Fatalf("want ErrDecryptFailed, got %v", err)
+	}
+}
+
+func TestSealProducesUniqueNonces(t *testing.T) {
+	key, _ := DeriveKey([]byte("k"))
+	ct1, _ := Seal(key, []byte("x"))
+	ct2, _ := Seal(key, []byte("x"))
+	if bytes.Equal(ct1, ct2) {
+		t.Fatal("same nonce produced twice — randomness broken")
+	}
+}
--- a/backend/internal/server/routes.go
+++ b/backend/internal/server/routes.go
@@ -1,6 +1,7 @@
 package server

 import (
+	"context"
 	"fmt"
 	"net/http"

@@ -15,6 +16,7 @@ import (
 	"marktvogt.de/backend/internal/domain/user"
 	"marktvogt.de/backend/internal/middleware"
 	"marktvogt.de/backend/internal/pkg/ai"
+	apicrypto "marktvogt.de/backend/internal/pkg/crypto"
 	"marktvogt.de/backend/internal/pkg/email"
 	"marktvogt.de/backend/internal/pkg/geocode"
 	"marktvogt.de/backend/internal/pkg/scrape"
@@ -69,12 +71,23 @@ func (s *Server) registerRoutes() {
 	geocodeLimit := middleware.RateLimit(10.0/60.0, 10) // 10 per minute per IP
 	market.RegisterRoutes(v1, marketHandler, submissionHandler, geocodeHandler, submitLimit, geocodeLimit)

-	// Admin market routes
-	adminMarketHandler := market.NewAdminHandler(marketSvc)
-	aiProvider, err := ai.NewFromConfig(s.cfg.AI)
+	// AI settings store + usage repo — used by AI provider and settings handler
+	encKey, err := apicrypto.DeriveKey([]byte(s.cfg.JWT.Secret))
+	if err != nil {
+		panic(fmt.Errorf("derive settings encryption key: %w", err))
+	}
+	settingsStore := settings.NewStore(s.db, encKey)
+	usageRepo := settings.NewUsageRepo(s.db)
+
+	// AI provider — reads key from DB, falls back to GEMINI_API_KEY env bootstrap
+	ctx := context.Background()
+	aiProvider, err := ai.NewFromConfig(ctx, s.cfg.AI, settingsStore, usageRepo)
 	if err != nil {
 		panic(fmt.Errorf("init ai provider: %w", err))
 	}
+
+	// Admin market routes
+	adminMarketHandler := market.NewAdminHandler(marketSvc)
 	scraper := scrape.New(s.cfg.Discovery.CrawlerUserAgent)
 	searchClient := search.NewSearxng(search.SearxngConfig{BaseURL: s.cfg.Search.SearxngURL})
 	researchHandler := market.NewResearchHandler(marketSvc, aiProvider, searchClient, scraper)
@@ -92,7 +105,7 @@ func (s *Server) registerRoutes() {
 	discovery.RegisterRoutes(v1, discoveryHandler, requireAuth, requireAdmin, requireTickToken)

 	// AI settings routes
-	settingsHandler := settings.NewHandler(aiProvider)
+	settingsHandler := settings.NewHandler(aiProvider, settingsStore, usageRepo)
 	settings.RegisterRoutes(v1, settingsHandler, requireAuth, requireAdmin)
 }

--- a/backend/migrations/000021_system_settings.down.sql
+++ b/backend/migrations/000021_system_settings.down.sql
@@ -0,0 +1 @@
+DROP TABLE IF EXISTS system_settings;
--- a/backend/migrations/000021_system_settings.up.sql
+++ b/backend/migrations/000021_system_settings.up.sql
@@ -0,0 +1,11 @@
+CREATE TABLE system_settings (
+    key             TEXT PRIMARY KEY,
+    value_text      TEXT,
+    value_encrypted BYTEA,
+    updated_at      TIMESTAMPTZ NOT NULL DEFAULT now(),
+    updated_by      UUID REFERENCES users (id) ON DELETE SET NULL
+);
+
+INSERT INTO system_settings (key, value_text) VALUES
+    ('gemini.model',             'gemini-2.5-flash-lite'),
+    ('gemini.grounding_enabled', 'true');
--- a/backend/migrations/000022_ai_usage.down.sql
+++ b/backend/migrations/000022_ai_usage.down.sql
@@ -0,0 +1 @@
+DROP TABLE IF EXISTS ai_usage;
--- a/backend/migrations/000022_ai_usage.up.sql
+++ b/backend/migrations/000022_ai_usage.up.sql
@@ -0,0 +1,16 @@
+CREATE TABLE ai_usage (
+    id                  BIGSERIAL PRIMARY KEY,
+    created_at          TIMESTAMPTZ   NOT NULL DEFAULT now(),
+    provider            TEXT          NOT NULL,
+    model               TEXT          NOT NULL,
+    call_type           TEXT          NOT NULL,
+    input_tokens        INT           NOT NULL DEFAULT 0,
+    output_tokens       INT           NOT NULL DEFAULT 0,
+    grounded            BOOLEAN       NOT NULL DEFAULT false,
+    duration_ms         INT           NOT NULL DEFAULT 0,
+    estimated_cost_usd  NUMERIC(10,6) NOT NULL DEFAULT 0,
+    error               TEXT
+);
+
+CREATE INDEX ai_usage_created_at_desc_idx ON ai_usage (created_at DESC);
+CREATE INDEX ai_usage_grounded_today_idx  ON ai_usage (created_at) WHERE grounded;
--- a/web/src/lib/api/types.ts
+++ b/web/src/lib/api/types.ts
@@ -184,12 +184,41 @@ export interface SubmitMarketRequest {
 }

 // AI settings
+export interface AIUsageStats {
+	calls: number;
+	input_tokens: number;
+	output_tokens: number;
+	grounding_calls: number;
+	estimated_cost_usd: number;
+}
+
+export interface AIUsageEvent {
+	id: number;
+	created_at: string;
+	provider: string;
+	model: string;
+	call_type: string;
+	input_tokens: number;
+	output_tokens: number;
+	grounded: boolean;
+	duration_ms: number;
+	estimated_cost_usd: number;
+	error?: string;
+}
+
 export interface AIStatus {
 	provider: string;
 	connected: boolean;
-	base_url?: string;
 	model: string;
 	models: string[];
+	api_key_fingerprint?: string;
+	grounding_enabled: boolean;
+	grounding_quota: number;
+	usage: {
+		today: AIUsageStats;
+		month: AIUsageStats;
+		grounding_used_today: number;
+	};
 }

 // AI Research types
--- a/web/src/routes/admin/einstellungen/+page.server.ts
+++ b/web/src/routes/admin/einstellungen/+page.server.ts
@@ -1,11 +1,14 @@
 import { fail } from '@sveltejs/kit';
 import { serverFetch } from '$lib/api/client.server.js';
-import type { AIStatus } from '$lib/api/types.js';
+import type { AIStatus, AIUsageEvent } from '$lib/api/types.js';
 import type { Actions, PageServerLoad } from './$types.js';

 export const load: PageServerLoad = async ({ cookies, fetch }) => {
-	const res = await serverFetch<AIStatus>('/admin/settings/ai', cookies, { fetch });
-	return { ai: res.data };
+	const [statusRes, usageRes] = await Promise.all([
+		serverFetch<AIStatus>('/admin/settings/ai', cookies, { fetch }),
+		serverFetch<AIUsageEvent[]>('/admin/settings/ai/usage?limit=20', cookies, { fetch })
+	]);
+	return { ai: statusRes.data, recentUsage: usageRes.data ?? [] };
 };

 export const actions: Actions = {
@@ -21,10 +24,46 @@ export const actions: Actions = {
 				body: JSON.stringify({ model }),
 				fetch
 			});
-			return { success: true, model };
+			return { success: true, action: 'model', model };
 		} catch (err) {
-			const message = err instanceof Error ? err.message : 'Modell konnte nicht gesetzt werden.';
-			return fail(500, { error: message });
+			return fail(500, { error: err instanceof Error ? err.message : 'Fehler beim Speichern.' });
+		}
+	},
+
+	setApiKey: async ({ cookies, fetch, request }) => {
+		const data = await request.formData();
+		const api_key = data.get('api_key');
+		if (!api_key || typeof api_key !== 'string') {
+			return fail(400, { error: 'API-Key fehlt.' });
+		}
+		try {
+			const res = await serverFetch<{ api_key_fingerprint: string }>(
+				'/admin/settings/ai/key',
+				cookies,
+				{
+					method: 'POST',
+					body: JSON.stringify({ api_key }),
+					fetch
+				}
+			);
+			return { success: true, action: 'apiKey', fingerprint: res.data.api_key_fingerprint };
+		} catch (err) {
+			return fail(500, { error: err instanceof Error ? err.message : 'Fehler beim Speichern.' });
+		}
+	},
+
+	setGrounding: async ({ cookies, fetch, request }) => {
+		const data = await request.formData();
+		const enabled = data.get('enabled') === 'true';
+		try {
+			await serverFetch('/admin/settings/ai/grounding', cookies, {
+				method: 'POST',
+				body: JSON.stringify({ enabled }),
+				fetch
+			});
+			return { success: true, action: 'grounding', enabled };
+		} catch (err) {
+			return fail(500, { error: err instanceof Error ? err.message : 'Fehler beim Speichern.' });
 		}
 	}
 };
--- a/web/src/routes/admin/einstellungen/+page.svelte
+++ b/web/src/routes/admin/einstellungen/+page.svelte
@@ -12,8 +12,22 @@

 	let selectedModel = $state(untrack(() => data.ai.model));
 	let saving = $state(false);
+	let showKeyInput = $state(!data.ai.api_key_fingerprint);
+	let groundingEnabled = $state(untrack(() => data.ai.grounding_enabled));

-	let activeModel = $derived(form?.success && form.model ? form.model : data.ai.model);
+	let activeModel = $derived(
+		form?.success && form.action === 'model' && form.model ? form.model : data.ai.model
+	);
+
+	function formatCost(usd: number): string {
+		if (usd === 0) return '$0.00';
+		if (usd < 0.001) return '<$0.001';
+		return `$${usd.toFixed(3)}`;
+	}
+
+	function formatDate(iso: string): string {
+		return new Date(iso).toLocaleString('de-DE', { dateStyle: 'short', timeStyle: 'short' });
+	}
 </script>

 <div class="space-y-6">
@@ -24,13 +38,13 @@
 		</p>
 	</div>

+	<!-- Card 1: Provider & API Key -->
 	<div class="rounded-lg border border-stone-200 bg-white dark:border-stone-700 dark:bg-stone-900">
 		<div class="border-b border-stone-200 px-6 py-4 dark:border-stone-700">
 			<h2 class="text-base font-semibold text-stone-900 dark:text-stone-100">KI-Provider</h2>
 		</div>
-
 		<div class="space-y-4 px-6 py-4">
-			<!-- Provider + status row -->
+			<!-- Provider + status -->
 			<div class="flex items-center gap-4">
 				<div>
 					<span class="text-xs font-medium tracking-wide text-stone-400 uppercase">Provider</span>
@@ -38,16 +52,6 @@
 						{data.ai.provider}
 					</p>
 				</div>
-
-				{#if data.ai.base_url}
-					<div>
-						<span class="text-xs font-medium tracking-wide text-stone-400 uppercase">URL</span>
-						<p class="mt-0.5 font-mono text-sm text-stone-600 dark:text-stone-400">
-							{data.ai.base_url}
-						</p>
-					</div>
-				{/if}
-
 				<div class="ml-auto">
 					{#if data.ai.connected}
 						<span
@@ -67,69 +71,236 @@
 				</div>
 			</div>

-			<!-- Model selector (Ollama only) -->
-			{#if data.ai.provider === 'ollama'}
-				<div class="border-t border-stone-100 pt-4 dark:border-stone-800">
-					<span class="text-xs font-medium tracking-wide text-stone-400 uppercase"
-						>Aktives Modell</span
-					>
-
-					{#if data.ai.connected && data.ai.models.length > 0}
-						<form
-							method="POST"
-							action="?/setModel"
-							use:enhance={() => {
-								saving = true;
-								return async ({ update }) => {
-									await update();
-									saving = false;
-								};
-							}}
-							class="mt-2 flex items-center gap-3"
+			<!-- API Key -->
+			<div class="border-t border-stone-100 pt-4 dark:border-stone-800">
+				<span class="text-xs font-medium tracking-wide text-stone-400 uppercase">API-Key</span>
+				{#if !showKeyInput && data.ai.api_key_fingerprint}
+					<div class="mt-1 flex items-center gap-3">
+						<p class="font-mono text-sm text-stone-600 dark:text-stone-400">
+							{data.ai.api_key_fingerprint}
+						</p>
+						<button
+							onclick={() => (showKeyInput = true)}
+							class="text-xs text-stone-400 underline hover:text-stone-600 dark:hover:text-stone-300"
 						>
-							<select
-								name="model"
-								bind:value={selectedModel}
-								class="focus:border-primary-500 focus:ring-primary-500 rounded-md border border-stone-300 bg-white px-3 py-2 text-sm text-stone-900 shadow-sm focus:ring-1 focus:outline-none dark:border-stone-600 dark:bg-stone-800 dark:text-stone-100"
+							Ersetzen
+						</button>
+					</div>
+					{#if form?.success && form.action === 'apiKey'}
+						<p class="mt-1 text-xs text-green-600 dark:text-green-400">
+							API-Key gespeichert. Neuer Fingerprint: <span class="font-mono"
+								>{form.fingerprint}</span
 							>
-								{#each data.ai.models as model}
-									<option value={model}>{model}</option>
-								{/each}
-							</select>
-
-							<button
-								type="submit"
-								disabled={saving || selectedModel === activeModel}
-								class="bg-primary-600 hover:bg-primary-700 rounded-md px-4 py-2 text-sm font-medium text-white shadow-sm disabled:cursor-not-allowed disabled:opacity-50"
-							>
-								{saving ? 'Speichert…' : 'Übernehmen'}
-							</button>
-						</form>
-
-						{#if form?.success}
-							<p class="mt-2 text-xs text-green-600 dark:text-green-400">
-								Modell auf <span class="font-mono">{form.model}</span> gesetzt.
-							</p>
-						{/if}
-						{#if form?.error}
-							<p class="mt-2 text-xs text-red-600 dark:text-red-400">{form.error}</p>
-						{/if}
-					{:else if data.ai.connected}
-						<p class="mt-1 text-sm text-stone-500 dark:text-stone-400">
-							Keine Modelle gefunden. Installiere ein Modell mit
-							<code class="rounded bg-stone-100 px-1 py-0.5 font-mono text-xs dark:bg-stone-800">
-								ollama pull &lt;model&gt;
-							</code>
-						</p>
-					{:else}
-						<p class="mt-1 font-mono text-sm text-stone-500 dark:text-stone-400">
-							{data.ai.model || '—'}
-						</p>
-						<p class="mt-1 text-xs text-red-500">
-							Ollama nicht erreichbar — prüfe ob der Dienst läuft.
 						</p>
 					{/if}
+				{:else}
+					<form
+						method="POST"
+						action="?/setApiKey"
+						use:enhance={() => {
+							saving = true;
+							return async ({ update }) => {
+								await update();
+								saving = false;
+								showKeyInput = false;
+							};
+						}}
+						class="mt-2 flex items-center gap-3"
+					>
+						<input
+							type="password"
+							name="api_key"
+							placeholder="AIzaSy…"
+							required
+							autocomplete="off"
+							class="focus:border-primary-500 focus:ring-primary-500 flex-1 rounded-md border border-stone-300 bg-white px-3 py-2 text-sm text-stone-900 shadow-sm focus:ring-1 focus:outline-none dark:border-stone-600 dark:bg-stone-800 dark:text-stone-100"
+						/>
+						<button
+							type="submit"
+							disabled={saving}
+							class="bg-primary-600 hover:bg-primary-700 rounded-md px-4 py-2 text-sm font-medium text-white shadow-sm disabled:opacity-50"
+						>
+							{saving ? 'Speichert…' : 'Speichern'}
+						</button>
+						{#if data.ai.api_key_fingerprint}
+							<button
+								type="button"
+								onclick={() => (showKeyInput = false)}
+								class="text-xs text-stone-400 underline"
+							>
+								Abbrechen
+							</button>
+						{/if}
+					</form>
+					{#if form?.error}
+						<p class="mt-1 text-xs text-red-500">{form.error}</p>
+					{/if}
+					{#if !data.ai.api_key_fingerprint}
+						<p class="mt-1 text-xs text-amber-600 dark:text-amber-400">
+							Kein API-Key konfiguriert — KI-Funktionen sind deaktiviert.
+						</p>
+					{/if}
+				{/if}
+			</div>
+		</div>
+	</div>
+
+	<!-- Card 2: Model Selector -->
+	<div class="rounded-lg border border-stone-200 bg-white dark:border-stone-700 dark:bg-stone-900">
+		<div class="border-b border-stone-200 px-6 py-4 dark:border-stone-700">
+			<h2 class="text-base font-semibold text-stone-900 dark:text-stone-100">Aktives Modell</h2>
+		</div>
+		<div class="px-6 py-4">
+			{#if data.ai.connected && data.ai.models.length > 0}
+				<form
+					method="POST"
+					action="?/setModel"
+					use:enhance={() => {
+						saving = true;
+						return async ({ update }) => {
+							await update();
+							saving = false;
+						};
+					}}
+					class="flex items-center gap-3"
+				>
+					<select
+						name="model"
+						bind:value={selectedModel}
+						class="focus:border-primary-500 focus:ring-primary-500 rounded-md border border-stone-300 bg-white px-3 py-2 text-sm text-stone-900 shadow-sm focus:ring-1 focus:outline-none dark:border-stone-600 dark:bg-stone-800 dark:text-stone-100"
+					>
+						{#each data.ai.models as model}
+							<option value={model}>{model}</option>
+						{/each}
+					</select>
+					<button
+						type="submit"
+						disabled={saving || selectedModel === activeModel}
+						class="bg-primary-600 hover:bg-primary-700 rounded-md px-4 py-2 text-sm font-medium text-white shadow-sm disabled:cursor-not-allowed disabled:opacity-50"
+					>
+						{saving ? 'Speichert…' : 'Übernehmen'}
+					</button>
+				</form>
+				{#if form?.success && form.action === 'model'}
+					<p class="mt-2 text-xs text-green-600 dark:text-green-400">
+						Modell auf <span class="font-mono">{form.model}</span> gesetzt.
+					</p>
+				{/if}
+			{:else if !data.ai.connected}
+				<p class="font-mono text-sm text-stone-500 dark:text-stone-400">{data.ai.model || '—'}</p>
+				<p class="mt-1 text-xs text-stone-400">Verbinde zuerst den API-Key.</p>
+			{:else}
+				<p class="font-mono text-sm text-stone-500 dark:text-stone-400">{data.ai.model || '—'}</p>
+				<p class="mt-1 text-xs text-stone-400">Keine Modelle geladen.</p>
+			{/if}
+		</div>
+	</div>
+
+	<!-- Card 3: Grounding -->
+	<div class="rounded-lg border border-stone-200 bg-white dark:border-stone-700 dark:bg-stone-900">
+		<div class="border-b border-stone-200 px-6 py-4 dark:border-stone-700">
+			<h2 class="text-base font-semibold text-stone-900 dark:text-stone-100">
+				Google Suche (Grounding)
+			</h2>
+		</div>
+		<div class="px-6 py-4">
+			<div class="flex items-center justify-between">
+				<div>
+					<p class="text-sm text-stone-700 dark:text-stone-300">
+						Google-Suchergebnisse in KI-Anfragen einbeziehen
+					</p>
+					<p class="mt-0.5 text-xs text-stone-400">
+						{data.ai.usage.grounding_used_today} / {data.ai.grounding_quota} freie Anfragen heute
+					</p>
 				</div>
+				<form
+					method="POST"
+					action="?/setGrounding"
+					use:enhance={() => {
+						return async ({ update }) => {
+							await update();
+						};
+					}}
+				>
+					<input type="hidden" name="enabled" value={groundingEnabled ? 'false' : 'true'} />
+					<button
+						type="submit"
+						onclick={() => (groundingEnabled = !groundingEnabled)}
+						aria-label={groundingEnabled ? 'Grounding deaktivieren' : 'Grounding aktivieren'}
+						class="relative inline-flex h-6 w-11 items-center rounded-full transition-colors {groundingEnabled
+							? 'bg-primary-600'
+							: 'bg-stone-300 dark:bg-stone-600'}"
+					>
+						<span
+							class="inline-block h-4 w-4 transform rounded-full bg-white shadow transition-transform {groundingEnabled
+								? 'translate-x-6'
+								: 'translate-x-1'}"
+						></span>
+					</button>
+				</form>
+			</div>
+			{#if form?.success && form.action === 'grounding'}
+				<p class="mt-2 text-xs text-green-600 dark:text-green-400">
+					Grounding {form.enabled ? 'aktiviert' : 'deaktiviert'}.
+				</p>
+			{/if}
+		</div>
+	</div>
+
+	<!-- Card 4: Usage -->
+	<div class="rounded-lg border border-stone-200 bg-white dark:border-stone-700 dark:bg-stone-900">
+		<div class="border-b border-stone-200 px-6 py-4 dark:border-stone-700">
+			<h2 class="text-base font-semibold text-stone-900 dark:text-stone-100">Verbrauch</h2>
+		</div>
+		<div class="space-y-4 px-6 py-4">
+			<!-- Rollup stats -->
+			<div class="grid grid-cols-2 gap-4 sm:grid-cols-4">
+				{#each [{ label: 'Anfragen heute', value: data.ai.usage.today.calls.toString() }, { label: 'Tokens heute', value: (data.ai.usage.today.input_tokens + data.ai.usage.today.output_tokens).toLocaleString('de-DE') }, { label: 'Kosten heute', value: formatCost(data.ai.usage.today.estimated_cost_usd) }, { label: 'Kosten (30 Tage)', value: formatCost(data.ai.usage.month.estimated_cost_usd) }] as stat}
+					<div class="rounded-md bg-stone-50 px-3 py-2 dark:bg-stone-800">
+						<p class="text-xs text-stone-400">{stat.label}</p>
+						<p class="mt-0.5 text-sm font-semibold text-stone-800 dark:text-stone-200">
+							{stat.value}
+						</p>
+					</div>
+				{/each}
+			</div>
+
+			<!-- Recent calls -->
+			{#if data.recentUsage.length > 0}
+				<div class="overflow-x-auto">
+					<table class="w-full text-xs">
+						<thead>
+							<tr class="text-left text-stone-400">
+								<th class="pr-4 pb-2 font-medium">Zeit</th>
+								<th class="pr-4 pb-2 font-medium">Typ</th>
+								<th class="pr-4 pb-2 font-medium">Modell</th>
+								<th class="pr-4 pb-2 font-medium">Tokens</th>
+								<th class="pr-4 pb-2 font-medium">Grounding</th>
+								<th class="pb-2 font-medium">Kosten</th>
+							</tr>
+						</thead>
+						<tbody>
+							{#each data.recentUsage as event}
+								<tr
+									class="border-t border-stone-100 dark:border-stone-800 {event.error
+										? 'text-red-500'
+										: 'text-stone-600 dark:text-stone-400'}"
+								>
+									<td class="py-1.5 pr-4 font-mono">{formatDate(event.created_at)}</td>
+									<td class="py-1.5 pr-4">{event.call_type}</td>
+									<td class="max-w-32 truncate py-1.5 pr-4 font-mono">{event.model}</td>
+									<td class="py-1.5 pr-4"
+										>{(event.input_tokens + event.output_tokens).toLocaleString('de-DE')}</td
+									>
+									<td class="py-1.5 pr-4">{event.grounded ? '✓' : '—'}</td>
+									<td class="py-1.5">{formatCost(event.estimated_cost_usd)}</td>
+								</tr>
+							{/each}
+						</tbody>
+					</table>
+				</div>
+			{:else}
+				<p class="text-xs text-stone-400">Noch keine KI-Anfragen aufgezeichnet.</p>
 			{/if}
 		</div>
 	</div>