feat(ai): migrate to Google Gemini 2.5 Flash-Lite, drop Mistral/Ollama

Replace the Mistral + Ollama AI stack with a single Google Gemini provider
backed by google.golang.org/genai. API key moves from env/Helm to the DB
(AES-256-GCM, key derived from JWT_SECRET via HKDF) so it can be rotated
via the admin UI without a pod restart.

New:
- pkg/crypto/secretbox — AES-256-GCM encrypt/decrypt for secrets at rest
- pkg/ai/gemini — GeminiProvider with grounding, structured output, usage
  recording, and hot-reload (Reinitialize swaps client under mutex)
- pkg/ai/usage — UsageRecorder interface + UsageEvent struct
- domain/settings/store — DB-backed settings (model, grounding toggle, key)
- domain/settings/usage — UsageRepo implementing UsageRecorder; ai_usage table
- migrations 000021 (system_settings) + 000022 (ai_usage)
- settings API: GET /ai, POST /ai/key, POST /ai/model, POST /ai/grounding,
  GET /ai/usage
- admin UI: 4-card settings page — provider status, model selector, grounding
  toggle with quota, usage rollups + recent-calls table

Removed:
- pkg/ai/ollama, mistral_provider, ratelimiter (+ tests)
- Helm AI_API_KEY, AI_PROVIDER, AI_MODEL_COMPLEX, AI_AGENT_DISCOVERY,
  AI_RATE_LIMIT_RPS env vars

Call sites set Grounded+CallType: research (true/"research"), enrich Pass B
(true/"enrich_b"), similarity (false/"similarity"). Integration test updated
to use a stub ai.Provider instead of a fake Ollama HTTP server.
This commit is contained in:
2026-04-25 09:54:49 +02:00
parent 80149de317
commit 3ddfd87408
40 changed files with 1392 additions and 897 deletions

View File

@@ -1,15 +1,15 @@
// discovery-eval measures discovery's AI-backed components against labelled
// fixtures. Two modes:
//
// -mode similarity (default) — grades MistralSimilarityClassifier on
// -mode similarity (default) — grades SimilarityClassifier on
// pair-labelled fixtures. Precision/recall/F1/accuracy
// + confidence calibration.
// -mode category — grades MistralLLMEnricher's `category` output on
// -mode category — grades LLMEnricher's `category` output on
// row-labelled fixtures. Accuracy + per-label confusion.
//
// Usage:
//
// AI_API_KEY=... AI_MODEL_COMPLEX=mistral-large-latest \
// GEMINI_API_KEY=... \
// discovery-eval \
// -mode similarity \
// -fixture backend/cmd/discovery-eval/fixtures/similarity.json \
@@ -18,7 +18,7 @@
// -report eval-report.json
//
// Each mode has its own cache key so switching modes doesn't churn entries.
// Bump AI_MODEL_COMPLEX or edit a fixture to force a refresh.
// Set GEMINI_MODEL to override the model (default: gemini-2.5-flash-lite).
package main
import (
@@ -29,7 +29,6 @@ import (
"os"
"time"
"marktvogt.de/backend/internal/config"
"marktvogt.de/backend/internal/domain/discovery/enrich"
"marktvogt.de/backend/internal/pkg/ai"
"marktvogt.de/backend/internal/pkg/scrape"
@@ -66,35 +65,29 @@ func realMain() int {
)
flag.Parse()
apiKey := os.Getenv("AI_MISTRAL_API_KEY")
apiKey := os.Getenv("GEMINI_API_KEY")
if apiKey == "" {
apiKey = os.Getenv("AI_API_KEY") // legacy fallback
slog.Error("GEMINI_API_KEY is required for eval")
return 2
}
model := os.Getenv("AI_MISTRAL_MODEL")
model := os.Getenv("GEMINI_MODEL")
if model == "" {
model = os.Getenv("AI_MODEL_COMPLEX") // legacy fallback
}
if model == "" {
model = "mistral-large-latest"
model = "gemini-2.5-flash-lite"
}
userAgent := os.Getenv("AI_USER_AGENT")
if userAgent == "" {
userAgent = "marktvogt-eval/1.0 (+https://marktvogt.de)"
}
client, err := ai.NewFromConfig(config.AIConfig{
Provider: "mistral",
MistralAPIKey: apiKey,
MistralModel: model,
RateLimitRPS: 1.0,
})
if err != nil {
slog.Error("AI client not configured", "error", err)
return 2
}
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Minute)
defer cancel()
client, err := ai.NewGeminiProvider(ctx, apiKey, model, nil)
if err != nil {
slog.Error("AI client init failed", "error", err)
return 2
}
switch *mode {
case modeSimilarity:
cfg := evalConfig{

View File

@@ -1,4 +1,4 @@
{{- if or .Values.ai.apiKey .Values.turnstile.secretKey .Values.discovery.token }}
{{- if or .Values.turnstile.secretKey .Values.discovery.token }}
apiVersion: v1
kind: Secret
metadata:
@@ -8,9 +8,6 @@ metadata:
{{- include "marktvogt-backend.labels" . | nindent 4 }}
type: Opaque
stringData:
{{- if .Values.ai.apiKey }}
AI_API_KEY: {{ .Values.ai.apiKey | quote }}
{{- end }}
{{- if .Values.turnstile.secretKey }}
TURNSTILE_SECRET_KEY: {{ .Values.turnstile.secretKey | quote }}
{{- end }}

View File

@@ -51,7 +51,7 @@ spec:
- secretRef:
name: {{ include "marktvogt-backend.fullname" . }}-smtp
{{- end }}
{{- if or .Values.ai.apiKey .Values.turnstile.secretKey .Values.discovery.token }}
{{- if or .Values.turnstile.secretKey .Values.discovery.token }}
# AI, Turnstile + Discovery credentials (Helm-managed, passed via CI)
- secretRef:
name: {{ include "marktvogt-backend.fullname" . }}-ci-secrets
@@ -97,10 +97,6 @@ spec:
secretKeyRef:
name: {{ include "marktvogt-backend.fullname" . }}-ci-secrets
key: DISCOVERY_TOKEN
- name: AI_AGENT_DISCOVERY
value: {{ .Values.ai.agentDiscovery | quote }}
- name: AI_RATE_LIMIT_RPS
value: {{ .Values.ai.rateLimitRps | default 1 | quote }}
- name: DISCOVERY_BATCH_SIZE
value: {{ .Values.discovery.batchSize | default 4 | quote }}
- name: DISCOVERY_FORWARD_MONTHS

View File

@@ -81,8 +81,6 @@ config:
SMTP_FROM: "noreply@marktvogt.de"
ADMIN_EMAIL: "contact@marktvogt.de"
FRONTEND_URL: "https://marktvogt.de"
AI_MODEL_COMPLEX: "mistral-large-latest"
# Name of the manually-created Secret containing:
# JWT_SECRET, SENTRY_DSN,
# OAUTH_{GOOGLE,APPLE,FACEBOOK,GITHUB}_{CLIENT_ID,CLIENT_SECRET}
@@ -95,12 +93,6 @@ smtp:
user: ""
password: ""
# AI research credentials — passed via Woodpecker secrets during deploy.
ai:
apiKey: ""
agentDiscovery: "" # set via CI secret in production
rateLimitRps: 1
# Discovery cron — token passed via CI secrets during deploy.
discovery:
enabled: true

View File

@@ -4,7 +4,6 @@ go 1.26
require (
github.com/PuerkitoBio/goquery v1.12.0
github.com/VikingOwl91/mistral-go-sdk v1.3.0
github.com/gin-gonic/gin v1.11.0
github.com/go-playground/validator/v10 v10.30.1
github.com/golang-jwt/jwt/v5 v5.3.1
@@ -13,12 +12,17 @@ require (
github.com/pquerna/otp v1.5.0
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1
github.com/valkey-io/valkey-go v1.0.72
golang.org/x/crypto v0.49.0
golang.org/x/crypto v0.50.0
golang.org/x/oauth2 v0.35.0
golang.org/x/sync v0.20.0
golang.org/x/time v0.14.0
google.golang.org/genai v1.54.0
)
require (
cloud.google.com/go v0.116.0 // indirect
cloud.google.com/go/auth v0.9.3 // indirect
cloud.google.com/go/compute/metadata v0.5.0 // indirect
github.com/andybalholm/cascadia v1.3.3 // indirect
github.com/boombuler/barcode v1.0.1-0.20190219062509-6c824513bacc // indirect
github.com/bytedance/sonic v1.14.0 // indirect
@@ -30,6 +34,11 @@ require (
github.com/go-playground/universal-translator v0.18.1 // indirect
github.com/goccy/go-json v0.10.2 // indirect
github.com/goccy/go-yaml v1.18.0 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/google/go-cmp v0.7.0 // indirect
github.com/google/s2a-go v0.1.8 // indirect
github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect
github.com/gorilla/websocket v1.5.3 // indirect
github.com/jackc/pgpassfile v1.0.0 // indirect
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
github.com/jackc/puddle/v2 v2.2.2 // indirect
@@ -44,10 +53,12 @@ require (
github.com/quic-go/quic-go v0.57.0 // indirect
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
github.com/ugorji/go/codec v1.3.0 // indirect
go.opencensus.io v0.24.0 // indirect
golang.org/x/arch v0.20.0 // indirect
golang.org/x/net v0.52.0 // indirect
golang.org/x/sync v0.20.0 // indirect
golang.org/x/sys v0.42.0 // indirect
golang.org/x/text v0.35.0 // indirect
golang.org/x/sys v0.43.0 // indirect
golang.org/x/text v0.36.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 // indirect
google.golang.org/grpc v1.66.2 // indirect
google.golang.org/protobuf v1.36.9 // indirect
)

View File

@@ -1,7 +1,13 @@
cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
cloud.google.com/go v0.116.0 h1:B3fRrSDkLRt5qSHWe40ERJvhvnQwdZiHu0bJOpldweE=
cloud.google.com/go v0.116.0/go.mod h1:cEPSRWPzZEswwdr9BxE6ChEn01dWlTaF05LiC2Xs70U=
cloud.google.com/go/auth v0.9.3 h1:VOEUIAADkkLtyfr3BLa3R8Ed/j6w1jTBmARx+wb5w5U=
cloud.google.com/go/auth v0.9.3/go.mod h1:7z6VY+7h3KUdRov5F1i8NDP5ZzWKYmEPO842BgCsmTk=
cloud.google.com/go/compute/metadata v0.5.0 h1:Zr0eK8JbFv6+Wi4ilXAR8FJ3wyNdpxHKJNPos6LTZOY=
cloud.google.com/go/compute/metadata v0.5.0/go.mod h1:aHnloV2TPI38yx4s9+wAZhHykWvVCfu7hQbF+9CWoiY=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/PuerkitoBio/goquery v1.12.0 h1:pAcL4g3WRXekcB9AU/y1mbKez2dbY2AajVhtkO8RIBo=
github.com/PuerkitoBio/goquery v1.12.0/go.mod h1:802ej+gV2y7bbIhOIoPY5sT183ZW0YFofScC4q/hIpQ=
github.com/VikingOwl91/mistral-go-sdk v1.3.0 h1:OkTsodDE5lmdf7p2cwScqD2vIk8sScQ2IGk65dUjuz0=
github.com/VikingOwl91/mistral-go-sdk v1.3.0/go.mod h1:f4emNtHUx2zSqY3V0LBz6lNI1jE6q/zh+SEU+/hJ0i4=
github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM=
github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA=
github.com/boombuler/barcode v1.0.1-0.20190219062509-6c824513bacc h1:biVzkmvwrH8WK8raXaxBx6fRVTlJILwEwQGL1I/ByEI=
@@ -10,11 +16,18 @@ github.com/bytedance/sonic v1.14.0 h1:/OfKt8HFw0kh2rj8N0F6C/qPGRESq0BbaNZgcNXXzQ
github.com/bytedance/sonic v1.14.0/go.mod h1:WoEbx8WTcFJfzCe0hbmyTGrfjt8PzNEBdxlNUO24NhA=
github.com/bytedance/sonic/loader v0.3.0 h1:dskwH8edlzNMctoruo8FPTJDF3vLtDT0sXZwvZJyqeA=
github.com/bytedance/sonic/loader v0.3.0/go.mod h1:N8A3vUdtUebEY2/VQC0MyhYeKUFosQU6FxH2JmUe6VI=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
github.com/cloudwego/base64x v0.1.6 h1:t11wG9AECkCDk5fMSoxmufanudBtJ+/HemLstXDLI2M=
github.com/cloudwego/base64x v0.1.6/go.mod h1:OFcloc187FXDaYHvrNIjxSe8ncn0OOM8gEHfghB2IPU=
github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
github.com/gabriel-vasile/mimetype v1.4.12 h1:e9hWvmLYvtp846tLHam2o++qitpguFiYCKbn0w9jyqw=
github.com/gabriel-vasile/mimetype v1.4.12/go.mod h1:d+9Oxyo1wTzWdyVUPMmXFvp4F9tea18J8ufA774AB3s=
github.com/gin-contrib/sse v1.1.0 h1:n0w2GMuUpWDVp7qSpvze6fAu9iRxJY4Hmj6AmBOU05w=
@@ -35,12 +48,39 @@ github.com/goccy/go-yaml v1.18.0 h1:8W7wMFS12Pcas7KU+VVkaiCng+kG8QiFeFwzFb+rwuw=
github.com/goccy/go-yaml v1.18.0/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA=
github.com/golang-jwt/jwt/v5 v5.3.1 h1:kYf81DTWFe7t+1VvL7eS+jKFVWaUnK9cB1qbwn63YCY=
github.com/golang-jwt/jwt/v5 v5.3.1/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE=
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/s2a-go v0.1.8 h1:zZDs9gcbt9ZPLV0ndSyQk6Kacx2g/X+SKYovpnz3SMM=
github.com/google/s2a-go v0.1.8/go.mod h1:6iNWHTpQ+nfNRN5E00MSdfDwVesa8hhS32PhPO8deJA=
github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/googleapis/enterprise-certificate-proxy v0.3.4 h1:XYIDZApgAnrN1c855gTgghdIA6Stxb52D5RnLI1SLyw=
github.com/googleapis/enterprise-certificate-proxy v0.3.4/go.mod h1:YKe7cfqYXjKGpGvmSg28/fFvhNzinZQm8DGnaburhGA=
github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo=
@@ -69,6 +109,7 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/pquerna/otp v1.5.0 h1:NMMR+WrmaqXU4EzdGJEE1aUUI0AMRzsp96fFFWNPwxs=
github.com/pquerna/otp v1.5.0/go.mod h1:dkJfzwRKNiegxyNb54X/3fLwhCynbMspSyWKnvi1AEg=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/quic-go/qpack v0.6.0 h1:g7W+BMYynC1LbYLSqRt8PBg5Tgwxn214ZZR34VIOjz8=
github.com/quic-go/qpack v0.6.0/go.mod h1:lUpLKChi8njB4ty2bFLX2x4gzDqXwUpaO1DP9qMDZII=
github.com/quic-go/quic-go v0.57.0 h1:AsSSrrMs4qI/hLrKlTH/TGQeTMY0ib1pAOX7vA3AdqE=
@@ -92,6 +133,8 @@ github.com/ugorji/go/codec v1.3.0/go.mod h1:pRBVtBSKl77K30Bv8R2P+cLSGaTtex6fsA2W
github.com/valkey-io/valkey-go v1.0.72 h1:iRWt1hJyOchcEgbHSkRY3aKkcBudxvMaVMsmxuYxuxE=
github.com/valkey-io/valkey-go v1.0.72/go.mod h1:VGhZ6fs68Qrn2+OhH+6waZH27bjpgQOiLyUQyXuYK5k=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0=
go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo=
go.uber.org/mock v0.5.2 h1:LbtPTcP8A5k9WPXj54PPPbjcI4Y6lhyOZXn+VS7wNko=
go.uber.org/mock v0.5.2/go.mod h1:wLlUxC2vVTPTaE3UD51E0BGOAElKrILxhVSDYQLld5o=
go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
@@ -99,19 +142,30 @@ go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
golang.org/x/arch v0.20.0 h1:dx1zTU0MAE98U+TQ8BLl7XsJbgze2WnNKF/8tGp/Q6c=
golang.org/x/arch v0.20.0/go.mod h1:bdwinDaKcfZUGpH09BB7ZmOfhalA8lQdzl62l8gGWsk=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc=
golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4=
golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA=
golang.org/x/crypto v0.50.0 h1:zO47/JPrL6vsNkINmLoo/PH1gcxpls50DNogFvB5ZGI=
golang.org/x/crypto v0.50.0/go.mod h1:3muZ7vA7PBCE6xgPX7nkzzjiUq87kRItoJQM1Yo8S+Q=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
@@ -122,8 +176,11 @@ golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0=
golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.35.0 h1:Mv2mzuHuZuY2+bkyWXIHMfhNdJAdwW3FuWeCPYN5GVQ=
golang.org/x/oauth2 v0.35.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -133,7 +190,10 @@ golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4=
golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
@@ -145,8 +205,8 @@ golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
golang.org/x/sys v0.43.0 h1:Rlag2XtaFTxp19wS8MXlJwTvoh8ArU6ezoyFsMyCTNI=
golang.org/x/sys v0.43.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
@@ -165,20 +225,52 @@ golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8=
golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA=
golang.org/x/text v0.36.0 h1:JfKh3XmcRPqZPKevfXVpI1wXPTqbkE5f7JA92a55Yxg=
golang.org/x/text v0.36.0/go.mod h1:NIdBknypM8iqVmPiuco0Dh6P5Jcdk8lJL0CUebqK164=
golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI=
golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58=
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/genai v1.54.0 h1:ZQCa70WMTJDI11FdqWCzGvZ5PanpcpfoO6jl/lrSnGU=
google.golang.org/genai v1.54.0/go.mod h1:A3kkl0nyBjyFlNjgxIwKq70julKbIxpSxqKO5gw/gmk=
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 h1:pPJltXNxVzT4pK9yD8vR9X75DaWYYmLGMsEvBfFQZzQ=
google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU=
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=
google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc=
google.golang.org/grpc v1.66.2 h1:3QdXkuq3Bkh7w+ywLdLvM56cmGvQHUMZpiCzt6Rqaoo=
google.golang.org/grpc v1.66.2/go.mod h1:s3/l6xSSCURdVfAnL+TqCNMyTDAGN6+lZeVxnZR128Y=
google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
google.golang.org/protobuf v1.36.9 h1:w2gp2mA27hUeUzj9Ex9FBjsBm40zfaDtEWow293U7Iw=
google.golang.org/protobuf v1.36.9/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=

View File

@@ -33,14 +33,14 @@ type DiscoveryConfig struct {
}
type AIConfig struct {
Provider string // "ollama" or "mistral"; default "ollama"
RateLimitRPS float64 // Max requests per second to upstream; 0 = disabled (Mistral only)
// GeminiAPIKey is the bootstrap API key from env (GEMINI_API_KEY).
// Used only on first startup when the DB has no key yet.
// Afterwards the key lives encrypted in system_settings.
GeminiAPIKey string
OllamaURL string // default "http://localhost:11434"
OllamaModel string // default "qwen2.5:14b-instruct"
MistralAPIKey string
MistralModel string // default "mistral-large-latest"
// GroundingDailyQuota is the number of free grounding requests per day.
// Default 1500. Used for cost estimation in the UI.
GroundingDailyQuota int
}
type SearchConfig struct {
@@ -188,11 +188,6 @@ func Load() (*Config, error) {
return nil, fmt.Errorf("SMTP_PORT: %w", err)
}
rpsAI, err := envFloat("AI_RATE_LIMIT_RPS", 1.0)
if err != nil {
return nil, fmt.Errorf("AI_RATE_LIMIT_RPS: %w", err)
}
discoveryToken := envStr("DISCOVERY_TOKEN", "")
if discoveryToken == "" {
slog.Warn("DISCOVERY_TOKEN is empty; /api/v1/admin/discovery/crawl is disabled")
@@ -282,12 +277,8 @@ func Load() (*Config, error) {
FrontendURL: envStr("FRONTEND_URL", "http://localhost:5173"),
},
AI: AIConfig{
Provider: envStr("AI_PROVIDER", "ollama"),
RateLimitRPS: rpsAI,
OllamaURL: envStr("AI_OLLAMA_URL", "http://localhost:11434"),
OllamaModel: envStr("AI_OLLAMA_MODEL", "qwen2.5:14b-instruct"),
MistralAPIKey: envStr("AI_MISTRAL_API_KEY", envStr("AI_API_KEY", "")),
MistralModel: envStr("AI_MISTRAL_MODEL", envStr("AI_MODEL_COMPLEX", "mistral-large-latest")),
GeminiAPIKey: envStr("GEMINI_API_KEY", ""),
GroundingDailyQuota: 1500,
},
Search: SearchConfig{
Provider: envStr("SEARCH_PROVIDER", "searxng"),

View File

@@ -43,7 +43,7 @@ func NewLLMEnricher(provider ai.Provider, scraper Scraper) *ProviderLLMEnricher
return &ProviderLLMEnricher{AI: provider, Scraper: scraper}
}
// llmResponse is the JSON shape we instruct Mistral to return. Any field may
// llmResponse is the JSON shape we instruct the LLM to return. Any field may
// be absent if the content doesn't support it — the enricher only writes
// what the model actually produced.
type llmResponse struct {
@@ -58,7 +58,7 @@ type llmResponse struct {
// text — empty-context LLM calls hallucinate.
func (e *ProviderLLMEnricher) EnrichMissing(ctx context.Context, req LLMRequest) (Enrichment, error) {
if e.AI == nil || e.Scraper == nil {
return Enrichment{}, errors.New("mistral enricher not configured")
return Enrichment{}, errors.New("LLM enricher not configured")
}
urls := req.Quellen
@@ -89,6 +89,8 @@ func (e *ProviderLLMEnricher) EnrichMissing(ctx context.Context, req LLMRequest)
SystemPrompt: systemPrompt,
UserMessage: userPrompt,
JSONMode: true,
Grounded: true,
CallType: "enrich_b",
})
if err != nil {
return Enrichment{}, fmt.Errorf("chat: %w", err)
@@ -120,8 +122,8 @@ func (e *ProviderLLMEnricher) EnrichMissing(ctx context.Context, req LLMRequest)
return out, nil
}
// buildSystemPrompt returns the English instruction block. Mistral follows
// English instructions more reliably; only the *output* is German.
// buildSystemPrompt returns the English instruction block sent to the LLM.
// Only the *output* is in German.
func buildSystemPrompt() string {
return strings.TrimSpace(`
You are enriching metadata for a medieval market (Mittelaltermarkt) in the

View File

@@ -94,7 +94,7 @@ func NewSimilarityClassifier(provider ai.Provider) *SimilarityClassifierLLM {
return &SimilarityClassifierLLM{AI: provider}
}
// simResponse is the JSON shape we instruct Mistral to return. Confidence
// simResponse is the JSON shape we instruct the LLM to return. Confidence
// must be parseable as a float 0..1; anything outside that range is clamped.
type simResponse struct {
SameMarket bool `json:"same_market"`
@@ -117,6 +117,8 @@ func (c *SimilarityClassifierLLM) Classify(ctx context.Context, a, b SimilarityR
SystemPrompt: systemPrompt,
UserMessage: userPrompt,
JSONMode: true,
Grounded: false,
CallType: "similarity",
})
if err != nil {
return Verdict{}, fmt.Errorf("chat: %w", err)

View File

@@ -120,7 +120,7 @@ const (
)
// AgentStatus constants.
// Mistral Pass 0 produces: bestaetigt | unklar | vorjahr_unbestaetigt | abgesagt.
// Pass 0 (LLM-enriched) produces: bestaetigt | unklar | vorjahr_unbestaetigt | abgesagt.
// The crawler uses its own sentinel value so the validator's agent-specific
// rules (e.g. bestaetigt+vorjahr_hinweis inconsistency) don't fire on crawler-
// produced rows, and so operators can filter the queue by origin.

View File

@@ -55,7 +55,7 @@ type Service struct {
// server/routes.go using the shared Nominatim client (1 rps limited).
geocoder Geocoder
// llmEnricher is the AI-backed fallback pass. Nil-safe via NoopLLMEnricher
// in test wiring; production code passes a real MistralLLMEnricher.
// in test wiring; production code passes a real ProviderLLMEnricher.
llmEnricher enrich.LLMEnricher
// simClassifier is the AI-backed duplicate tiebreaker. Nil-safe via
// NoopSimilarityClassifier.
@@ -168,8 +168,7 @@ func (s *Service) Crawl(ctx context.Context) (CrawlSummary, error) {
defer cancel()
for _, m := range merged {
// Link verification was needed for Mistral's web_search output (often
// hallucinated URLs). Crawler URLs are parsed from actual HTML of
// Link verification is skipped here: crawler URLs come from actual HTML of
// trusted sources; they've been implicitly verified at parse time.
// Skipping the check makes the crawl complete in <2 minutes even for
// 1500+ events and avoids timing-related false positives where the

View File

@@ -49,32 +49,15 @@ const validResearchJSON = `{
}
}`
// fakeOllamaHandler returns a valid Ollama non-streaming chat response whose
// content field contains validResearchJSON.
func fakeOllamaHandler(t *testing.T) http.Handler {
t.Helper()
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost || r.URL.Path != "/api/chat" {
http.NotFound(w, r)
return
}
resp := map[string]any{
"model": "test",
"created_at": "2026-04-24T00:00:00Z",
"message": map[string]string{
"role": "assistant",
"content": validResearchJSON,
},
"done": true,
"prompt_eval_count": 10,
"eval_count": 20,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(resp); err != nil {
t.Errorf("fakeOllamaHandler: encode response: %v", err)
}
})
// fakeProvider is a stub ai.Provider that returns validResearchJSON for any Chat call.
type fakeProvider struct{}
func (f *fakeProvider) Chat(_ context.Context, _ *ai.ChatRequest) (*ai.ChatResponse, error) {
return &ai.ChatResponse{Content: validResearchJSON}, nil
}
func (f *fakeProvider) SupportsJSONMode() bool { return true }
func (f *fakeProvider) SupportsJSONSchema() bool { return true }
func (f *fakeProvider) Name() string { return "fake" }
// fakeSearxngHandler returns a SearxNG JSON response whose result URLs point
// at the provided page server.
@@ -120,14 +103,11 @@ func TestIntegrationOrchestratorFullPipeline(t *testing.T) {
fakePage := httptest.NewServer(fakePageHandler(t, &pageHits))
defer fakePage.Close()
fakeOllama := httptest.NewServer(fakeOllamaHandler(t))
defer fakeOllama.Close()
fakeSearxng := httptest.NewServer(fakeSearxngHandler(t, fakePage.URL))
defer fakeSearxng.Close()
orch := &research.Orchestrator{
AI: ai.NewOllamaProvider(ai.OllamaConfig{BaseURL: fakeOllama.URL, Model: "test"}),
AI: &fakeProvider{},
Search: search.NewSearxng(search.SearxngConfig{BaseURL: fakeSearxng.URL}),
Scraper: scrape.New("test-agent/1.0"),
MaxPages: 4,

View File

@@ -76,21 +76,11 @@ func (o *Orchestrator) Run(ctx context.Context, in Input) (Output, error) {
}
// 4. LLM call with one retry on schema violation
// Providers with constrained decoding (Ollama) use a simplified schema
// without $defs, union types, or patterns — and are validated against that
// same simplified schema. Providers that embed the schema in the prompt
// (Mistral) get the full schema for both generation and validation.
constraintSchema := SchemaJSON
validationSchema := SchemaJSON
if o.AI.SupportsJSONSchema() {
constraintSchema = ConstraintSchemaJSON
validationSchema = ConstraintSchemaJSON
}
// Simplified schema for constrained decoding (no $defs, no union types)
validate := func(content string) error {
normalized := normalizeNullStrings(content)
return ai.ValidateSchema(validationSchema, []byte(normalized))
return ai.ValidateSchema(ConstraintSchemaJSON, []byte(normalizeNullStrings(content)))
}
resp, err := callLLM(ctx, o.AI, userPrompt, constraintSchema)
resp, err := callLLM(ctx, o.AI, userPrompt, ConstraintSchemaJSON)
if err == nil {
if verr := validate(resp.Content); verr != nil {
err = &ai.ProviderError{Code: ai.ErrSchemaViolation, Retryable: true, RawOutput: resp.Content, Inner: verr}
@@ -101,7 +91,7 @@ func (o *Orchestrator) Run(ctx context.Context, in Input) (Output, error) {
if err != nil {
var pe *ai.ProviderError
if errors.As(err, &pe) && pe.Code == ai.ErrSchemaViolation {
resp, err = callLLM(ctx, o.AI, userPrompt+"\n\nYour previous response failed schema validation. Re-emit the JSON strictly matching the schema.", constraintSchema)
resp, err = callLLM(ctx, o.AI, userPrompt+"\n\nYour previous response failed schema validation. Re-emit the JSON strictly matching the schema.", ConstraintSchemaJSON)
if err == nil {
if verr := validate(resp.Content); verr != nil {
err = &ai.ProviderError{Code: ai.ErrSchemaViolation, Retryable: false, RawOutput: resp.Content, Inner: verr}
@@ -128,6 +118,8 @@ func callLLM(ctx context.Context, p ai.Provider, userPrompt string, schema []byt
SystemPrompt: SystemPrompt,
UserMessage: userPrompt,
JSONSchema: schema,
Grounded: true,
CallType: "research",
})
}

View File

@@ -7,8 +7,7 @@ var SchemaJSON []byte
// ConstraintSchemaJSON is a simplified, flat JSON Schema for providers that
// support constrained decoding but cannot handle $defs, union types, or
// pattern constraints (e.g. Ollama with llama.cpp grammar generation).
// Post-hoc validation always uses SchemaJSON regardless.
// pattern constraints. Post-hoc validation always uses SchemaJSON regardless.
//
//go:embed assets/researcher_schema_ollama.json
//go:embed assets/researcher_schema_simple.json
var ConstraintSchemaJSON []byte

View File

@@ -2,67 +2,82 @@ package settings
import (
"net/http"
"strconv"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
"marktvogt.de/backend/internal/pkg/ai"
)
// AIStatus is the response payload for GET /admin/settings/ai.
type AIStatus struct {
Provider string `json:"provider"`
Connected bool `json:"connected"`
BaseURL string `json:"base_url,omitempty"`
Model string `json:"model"`
Models []string `json:"models"`
Provider string `json:"provider"`
Connected bool `json:"connected"`
Model string `json:"model"`
Models []string `json:"models"`
APIKeyFingerprint string `json:"api_key_fingerprint,omitempty"`
GroundingEnabled bool `json:"grounding_enabled"`
GroundingQuota int `json:"grounding_quota"`
Usage UsageSummary `json:"usage"`
}
// Handler serves AI settings endpoints. ollama is nil when the active
// provider is not Ollama.
type UsageSummary struct {
Today UsageStats `json:"today"`
Month UsageStats `json:"month"`
GroundingUsedToday int `json:"grounding_used_today"`
}
// Handler serves AI settings endpoints.
type Handler struct {
ollama *ai.OllamaProvider
provider string
provider *ai.GeminiProvider
store *Store
usageRepo *UsageRepo
}
func NewHandler(provider ai.Provider) *Handler {
ollama, _ := provider.(*ai.OllamaProvider)
return &Handler{ollama: ollama, provider: provider.Name()}
func NewHandler(provider *ai.GeminiProvider, store *Store, usageRepo *UsageRepo) *Handler {
return &Handler{provider: provider, store: store, usageRepo: usageRepo}
}
func (h *Handler) GetAI(c *gin.Context) {
if h.ollama == nil {
c.JSON(http.StatusOK, gin.H{"data": AIStatus{
Provider: h.provider,
Connected: true,
Model: "",
Models: []string{},
}})
return
ctx := c.Request.Context()
models, err := h.provider.ListModelNames(ctx)
connected := err == nil
if models == nil {
models = []string{}
}
models, err := h.ollama.ListModels(c.Request.Context())
status := AIStatus{
Provider: "ollama",
BaseURL: h.ollama.BaseURL(),
Model: h.ollama.Model(),
Models: []string{},
// Fingerprint: last 4 chars of stored key (if any)
fingerprint := ""
if key, kerr := h.store.GetGeminiAPIKey(ctx); kerr == nil && len(key) >= 4 {
fingerprint = "•••" + key[len(key)-4:]
}
if err != nil {
status.Connected = false
} else {
status.Connected = true
for _, m := range models {
status.Models = append(status.Models, m.Name)
}
}
c.JSON(http.StatusOK, gin.H{"data": status})
grounding, _ := h.store.GetGroundingEnabled(ctx)
today, _ := h.usageRepo.Today(ctx)
month, _ := h.usageRepo.Month(ctx)
groundingToday, _ := h.usageRepo.GroundingToday(ctx)
c.JSON(http.StatusOK, gin.H{"data": AIStatus{
Provider: "gemini",
Connected: connected,
Model: h.provider.Model(),
Models: models,
APIKeyFingerprint: fingerprint,
GroundingEnabled: grounding,
GroundingQuota: 1500,
Usage: UsageSummary{
Today: today,
Month: month,
GroundingUsedToday: groundingToday,
},
}})
}
func (h *Handler) SetModel(c *gin.Context) {
if h.ollama == nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "model switching only supported for Ollama provider"})
return
}
ctx := c.Request.Context()
var req struct {
Model string `json:"model" binding:"required"`
}
@@ -70,6 +85,83 @@ func (h *Handler) SetModel(c *gin.Context) {
c.JSON(http.StatusBadRequest, gin.H{"error": "model is required"})
return
}
h.ollama.SetModel(req.Model)
userID := callerID(c)
if err := h.store.SetModel(ctx, req.Model, userID); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to save model"})
return
}
h.provider.SetModel(req.Model)
c.JSON(http.StatusOK, gin.H{"data": gin.H{"model": req.Model}})
}
func (h *Handler) SetAPIKey(c *gin.Context) {
ctx := c.Request.Context()
var req struct {
APIKey string `json:"api_key" binding:"required"`
}
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "api_key is required"})
return
}
userID := callerID(c)
if err := h.store.SetGeminiAPIKey(ctx, req.APIKey, userID); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to save api key"})
return
}
if err := h.provider.Reinitialize(ctx, req.APIKey); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "key saved but provider init failed: " + err.Error()})
return
}
fingerprint := ""
if len(req.APIKey) >= 4 {
fingerprint = "•••" + req.APIKey[len(req.APIKey)-4:]
}
c.JSON(http.StatusOK, gin.H{"data": gin.H{"api_key_fingerprint": fingerprint}})
}
func (h *Handler) SetGrounding(c *gin.Context) {
ctx := c.Request.Context()
var req struct {
Enabled bool `json:"enabled"`
}
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "enabled is required"})
return
}
userID := callerID(c)
if err := h.store.SetGroundingEnabled(ctx, req.Enabled, userID); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to save grounding setting"})
return
}
c.JSON(http.StatusOK, gin.H{"data": gin.H{"grounding_enabled": req.Enabled}})
}
func (h *Handler) GetUsage(c *gin.Context) {
ctx := c.Request.Context()
limit := 50
if l := c.Query("limit"); l != "" {
if n, err := strconv.Atoi(l); err == nil && n > 0 && n <= 200 {
limit = n
}
}
events, err := h.usageRepo.Recent(ctx, limit)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to load usage"})
return
}
if events == nil {
events = []UsageEvent{}
}
c.JSON(http.StatusOK, gin.H{"data": events})
}
// callerID extracts the authenticated user's UUID from gin context.
// Returns uuid.Nil if not set (shouldn't happen behind requireAuth).
func callerID(c *gin.Context) uuid.UUID {
if v, ok := c.Get("user_id"); ok {
if id, ok := v.(uuid.UUID); ok {
return id
}
}
return uuid.Nil
}

View File

@@ -6,4 +6,7 @@ func RegisterRoutes(rg *gin.RouterGroup, h *Handler, requireAuth, requireAdmin g
admin := rg.Group("/admin", requireAuth, requireAdmin)
admin.GET("/settings/ai", h.GetAI)
admin.POST("/settings/ai/model", h.SetModel)
admin.POST("/settings/ai/key", h.SetAPIKey)
admin.POST("/settings/ai/grounding", h.SetGrounding)
admin.GET("/settings/ai/usage", h.GetUsage)
}

View File

@@ -0,0 +1,112 @@
package settings
import (
"context"
"errors"
"fmt"
"github.com/google/uuid"
"github.com/jackc/pgx/v5"
"github.com/jackc/pgx/v5/pgxpool"
apicrypto "marktvogt.de/backend/internal/pkg/crypto"
)
const (
keyAPIKey = "gemini.api_key"
keyModel = "gemini.model"
keyGroundingEnabled = "gemini.grounding_enabled"
)
// Store persists AI provider configuration in system_settings.
// Sensitive values (API key) are stored AES-256-GCM encrypted.
type Store struct {
db *pgxpool.Pool
enc [32]byte // derived from JWT_SECRET
}
func NewStore(db *pgxpool.Pool, encKey [32]byte) *Store {
return &Store{db: db, enc: encKey}
}
func (s *Store) GetGeminiAPIKey(ctx context.Context) (string, error) {
row := s.db.QueryRow(ctx,
`SELECT value_encrypted FROM system_settings WHERE key = $1`, keyAPIKey)
var enc []byte
if err := row.Scan(&enc); err != nil {
if errors.Is(err, pgx.ErrNoRows) {
return "", nil
}
return "", fmt.Errorf("settings: get api key: %w", err)
}
plain, err := apicrypto.Open(s.enc, enc)
if err != nil {
return "", fmt.Errorf("settings: decrypt api key: %w", err)
}
return string(plain), nil
}
func (s *Store) SetGeminiAPIKey(ctx context.Context, apiKey string, updatedBy uuid.UUID) error {
enc, err := apicrypto.Seal(s.enc, []byte(apiKey))
if err != nil {
return fmt.Errorf("settings: encrypt api key: %w", err)
}
_, err = s.db.Exec(ctx, `
INSERT INTO system_settings (key, value_encrypted, updated_by)
VALUES ($1, $2, $3)
ON CONFLICT (key) DO UPDATE
SET value_encrypted = EXCLUDED.value_encrypted,
updated_at = now(),
updated_by = EXCLUDED.updated_by
`, keyAPIKey, enc, updatedBy)
return err
}
func (s *Store) GetModel(ctx context.Context) (string, error) {
return s.getText(ctx, keyModel, "gemini-2.5-flash-lite")
}
func (s *Store) SetModel(ctx context.Context, model string, updatedBy uuid.UUID) error {
return s.setText(ctx, keyModel, model, updatedBy)
}
func (s *Store) GetGroundingEnabled(ctx context.Context) (bool, error) {
v, err := s.getText(ctx, keyGroundingEnabled, "true")
if err != nil {
return true, err
}
return v != "false", nil
}
func (s *Store) SetGroundingEnabled(ctx context.Context, enabled bool, updatedBy uuid.UUID) error {
v := "false"
if enabled {
v = "true"
}
return s.setText(ctx, keyGroundingEnabled, v, updatedBy)
}
func (s *Store) getText(ctx context.Context, key, fallback string) (string, error) {
row := s.db.QueryRow(ctx,
`SELECT value_text FROM system_settings WHERE key = $1`, key)
var v string
if err := row.Scan(&v); err != nil {
if errors.Is(err, pgx.ErrNoRows) {
return fallback, nil
}
return fallback, fmt.Errorf("settings: get %s: %w", key, err)
}
return v, nil
}
func (s *Store) setText(ctx context.Context, key, value string, updatedBy uuid.UUID) error {
_, err := s.db.Exec(ctx, `
INSERT INTO system_settings (key, value_text, updated_by)
VALUES ($1, $2, $3)
ON CONFLICT (key) DO UPDATE
SET value_text = EXCLUDED.value_text,
updated_at = now(),
updated_by = EXCLUDED.updated_by
`, key, value, updatedBy)
return err
}

View File

@@ -0,0 +1,125 @@
package settings
import (
"context"
"fmt"
"time"
"github.com/jackc/pgx/v5/pgxpool"
"marktvogt.de/backend/internal/pkg/ai"
)
// UsageRepo persists and queries AI call records.
type UsageRepo struct {
db *pgxpool.Pool
}
func NewUsageRepo(db *pgxpool.Pool) *UsageRepo {
return &UsageRepo{db: db}
}
// Record writes a single usage event — implements ai.UsageRecorder.
func (r *UsageRepo) Record(ctx context.Context, e ai.UsageEvent) error {
var errStr *string
if e.Error != "" {
errStr = &e.Error
}
_, err := r.db.Exec(ctx, `
INSERT INTO ai_usage
(provider, model, call_type, input_tokens, output_tokens,
grounded, duration_ms, estimated_cost_usd, error)
VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9)
`, e.Provider, e.Model, e.CallType, e.InputTokens, e.OutputTokens,
e.Grounded, e.DurationMs, e.EstimatedCostUSD, errStr)
if err != nil {
return fmt.Errorf("usage: record: %w", err)
}
return nil
}
// UsageStats is a rollup over a time window.
type UsageStats struct {
Calls int `json:"calls"`
InputTokens int `json:"input_tokens"`
OutputTokens int `json:"output_tokens"`
GroundingCalls int `json:"grounding_calls"`
EstimatedCostUSD float64 `json:"estimated_cost_usd"`
}
func (r *UsageRepo) Today(ctx context.Context) (UsageStats, error) {
return r.statsWindow(ctx, "1 day")
}
func (r *UsageRepo) Month(ctx context.Context) (UsageStats, error) {
return r.statsWindow(ctx, "30 days")
}
func (r *UsageRepo) GroundingToday(ctx context.Context) (int, error) {
row := r.db.QueryRow(ctx, `
SELECT COUNT(*) FROM ai_usage
WHERE grounded AND created_at >= now() - INTERVAL '1 day'
`)
var n int
return n, row.Scan(&n)
}
func (r *UsageRepo) statsWindow(ctx context.Context, interval string) (UsageStats, error) {
row := r.db.QueryRow(ctx, fmt.Sprintf(`
SELECT
COUNT(*) AS calls,
COALESCE(SUM(input_tokens),0) AS input_tokens,
COALESCE(SUM(output_tokens),0) AS output_tokens,
COALESCE(SUM(CASE WHEN grounded THEN 1 ELSE 0 END),0) AS grounding_calls,
COALESCE(SUM(estimated_cost_usd),0) AS cost
FROM ai_usage
WHERE created_at >= now() - INTERVAL '%s'
`, interval))
var s UsageStats
if err := row.Scan(&s.Calls, &s.InputTokens, &s.OutputTokens, &s.GroundingCalls, &s.EstimatedCostUSD); err != nil {
return s, fmt.Errorf("usage: stats(%s): %w", interval, err)
}
return s, nil
}
// UsageEvent is a single entry from ai_usage.
type UsageEvent struct {
ID int64 `json:"id"`
CreatedAt time.Time `json:"created_at"`
Provider string `json:"provider"`
Model string `json:"model"`
CallType string `json:"call_type"`
InputTokens int `json:"input_tokens"`
OutputTokens int `json:"output_tokens"`
Grounded bool `json:"grounded"`
DurationMs int `json:"duration_ms"`
EstimatedCostUSD float64 `json:"estimated_cost_usd"`
Error *string `json:"error,omitempty"`
}
func (r *UsageRepo) Recent(ctx context.Context, limit int) ([]UsageEvent, error) {
rows, err := r.db.Query(ctx, `
SELECT id, created_at, provider, model, call_type,
input_tokens, output_tokens, grounded, duration_ms,
estimated_cost_usd, error
FROM ai_usage
ORDER BY created_at DESC
LIMIT $1
`, limit)
if err != nil {
return nil, fmt.Errorf("usage: recent: %w", err)
}
defer rows.Close()
var out []UsageEvent
for rows.Next() {
var e UsageEvent
if err := rows.Scan(&e.ID, &e.CreatedAt, &e.Provider, &e.Model, &e.CallType,
&e.InputTokens, &e.OutputTokens, &e.Grounded, &e.DurationMs,
&e.EstimatedCostUSD, &e.Error); err != nil {
return nil, fmt.Errorf("usage: scan: %w", err)
}
out = append(out, e)
}
return out, rows.Err()
}

View File

@@ -3,39 +3,37 @@ package ai
import (
"context"
"fmt"
"time"
mistral "github.com/VikingOwl91/mistral-go-sdk"
"github.com/VikingOwl91/mistral-go-sdk/chat"
"marktvogt.de/backend/internal/config"
)
const (
providerOllama = "ollama"
providerMistral = "mistral"
)
func NewFromConfig(cfg config.AIConfig) (Provider, error) {
switch cfg.Provider {
case "", providerOllama:
return NewOllamaProvider(OllamaConfig{
BaseURL: cfg.OllamaURL,
Model: cfg.OllamaModel,
}), nil
case providerMistral:
if cfg.MistralAPIKey == "" {
return nil, fmt.Errorf("ai: provider=%s requires AI_MISTRAL_API_KEY", providerMistral)
}
sdk := mistral.NewClient(
cfg.MistralAPIKey,
mistral.WithTimeout(120*time.Second),
mistral.WithRetry(2, 1*time.Second),
)
chatFn := func(ctx context.Context, req *chat.CompletionRequest) (*chat.CompletionResponse, error) {
return sdk.ChatComplete(ctx, req)
}
return newMistralProviderWithChat(cfg.MistralModel, chatFn, newRateLimiter(cfg.RateLimitRPS)), nil
default:
return nil, fmt.Errorf("ai: unknown provider %q (want %s|%s)", cfg.Provider, providerOllama, providerMistral)
}
// KeySource provides the current Gemini API key. Implemented by settings.Store.
type KeySource interface {
GetGeminiAPIKey(ctx context.Context) (string, error)
GetModel(ctx context.Context) (string, error)
}
// NewFromConfig creates a GeminiProvider. It reads the API key from store first;
// if the store has no key, it falls back to cfg.GeminiAPIKey (env bootstrap).
func NewFromConfig(ctx context.Context, cfg config.AIConfig, store KeySource, recorder UsageRecorder) (*GeminiProvider, error) {
apiKey, err := store.GetGeminiAPIKey(ctx)
if err != nil {
return nil, fmt.Errorf("ai: read api key from store: %w", err)
}
if apiKey == "" {
apiKey = cfg.GeminiAPIKey
}
model, err := store.GetModel(ctx)
if err != nil || model == "" {
model = "gemini-2.5-flash-lite"
}
if apiKey == "" {
// No key available. Return an unconfigured provider that will fail on use,
// but allows the server to start so the operator can configure the key via UI.
return newUnconfiguredGeminiProvider(model, recorder), nil
}
return NewGeminiProvider(ctx, apiKey, model, recorder)
}

View File

@@ -1,29 +0,0 @@
package ai
import (
"testing"
"marktvogt.de/backend/internal/config"
)
func TestNewFromConfig_Ollama(t *testing.T) {
p, err := NewFromConfig(config.AIConfig{Provider: providerOllama, OllamaURL: "http://x:11434", OllamaModel: "m"})
if err != nil {
t.Fatalf("NewFromConfig: %v", err)
}
if p.Name() != providerOllama {
t.Fatalf("Name: %q", p.Name())
}
}
func TestNewFromConfig_MistralRequiresKey(t *testing.T) {
if _, err := NewFromConfig(config.AIConfig{Provider: providerMistral}); err == nil {
t.Fatal("want error when MistralAPIKey is empty")
}
}
func TestNewFromConfig_UnknownProvider(t *testing.T) {
if _, err := NewFromConfig(config.AIConfig{Provider: "llama-cpp"}); err == nil {
t.Fatal("want error for unknown provider")
}
}

View File

@@ -0,0 +1,295 @@
package ai
import (
"context"
"encoding/json"
"fmt"
"sync"
"time"
"google.golang.org/genai"
)
// Gemini API pricing (as of 2026-04). Refresh constants when pricing changes.
// https://ai.google.dev/gemini-api/docs/pricing
const (
geminiInputCostPerToken = 0.10 / 1_000_000 // $0.10 / 1M tokens
geminiOutputCostPerToken = 0.40 / 1_000_000 // $0.40 / 1M tokens
geminiGroundingCostPer1k = 35.0 / 1_000 // $35 / 1k grounded prompts (above free tier)
geminiGroundingFreeDaily = 1_500 // daily free grounding requests
)
type GeminiProvider struct {
mu sync.RWMutex
client *genai.Client
model string
recorder UsageRecorder
// groundingCallsToday is an in-process counter used for cost estimation only.
// It is not persisted and resets on restart. The authoritative count lives in ai_usage.
groundingCallsToday int
groundingDate time.Time
}
// newUnconfiguredGeminiProvider returns a provider with no client set.
// All Chat calls return ErrInternal until Reinitialize is called.
func newUnconfiguredGeminiProvider(model string, recorder UsageRecorder) *GeminiProvider {
return &GeminiProvider{
model: model,
recorder: recorder,
groundingDate: time.Now().UTC().Truncate(24 * time.Hour),
}
}
// Reinitialize swaps the underlying genai.Client for a new API key.
// Safe to call concurrently; callers block only briefly on the write lock.
func (p *GeminiProvider) Reinitialize(ctx context.Context, apiKey string) error {
client, err := genai.NewClient(ctx, &genai.ClientConfig{
APIKey: apiKey,
Backend: genai.BackendGeminiAPI,
})
if err != nil {
return fmt.Errorf("gemini: reinitialize client: %w", err)
}
p.mu.Lock()
p.client = client
p.mu.Unlock()
return nil
}
func NewGeminiProvider(ctx context.Context, apiKey, model string, recorder UsageRecorder) (*GeminiProvider, error) {
client, err := genai.NewClient(ctx, &genai.ClientConfig{
APIKey: apiKey,
Backend: genai.BackendGeminiAPI,
})
if err != nil {
return nil, fmt.Errorf("gemini: new client: %w", err)
}
return &GeminiProvider{
client: client,
model: model,
recorder: recorder,
groundingDate: time.Now().UTC().Truncate(24 * time.Hour),
}, nil
}
func (p *GeminiProvider) Name() string { return "gemini" }
func (p *GeminiProvider) BaseURL() string { return "" }
func (p *GeminiProvider) SupportsJSONMode() bool { return true }
func (p *GeminiProvider) SupportsJSONSchema() bool { return true }
func (p *GeminiProvider) Model() string {
p.mu.RLock()
defer p.mu.RUnlock()
return p.model
}
func (p *GeminiProvider) SetModel(model string) {
p.mu.Lock()
defer p.mu.Unlock()
p.model = model
}
func (p *GeminiProvider) ListModelNames(ctx context.Context) ([]string, error) {
p.mu.RLock()
client := p.client
p.mu.RUnlock()
if client == nil {
return nil, nil
}
resp, err := client.Models.List(ctx, nil)
if err != nil {
return nil, fmt.Errorf("gemini: list models: %w", err)
}
var names []string
for _, m := range resp.Items {
for _, action := range m.SupportedActions {
if action == "generateContent" {
names = append(names, m.Name)
break
}
}
}
return names, nil
}
func (p *GeminiProvider) Chat(ctx context.Context, req *ChatRequest) (*ChatResponse, error) {
p.mu.RLock()
client := p.client
p.mu.RUnlock()
if client == nil {
return nil, &ProviderError{Code: ErrInternal, Message: "gemini api key not configured — set it in admin settings", Retryable: false}
}
start := time.Now()
model := req.Model
if model == "" {
model = p.Model()
}
cfg := &genai.GenerateContentConfig{}
// System instruction
if req.SystemPrompt != "" {
cfg.SystemInstruction = genai.NewContentFromText(req.SystemPrompt, genai.RoleUser)
}
// Structured output
switch {
case len(req.JSONSchema) > 0:
cfg.ResponseMIMEType = "application/json"
var schema map[string]any
if err := json.Unmarshal(req.JSONSchema, &schema); err != nil {
return nil, &ProviderError{Code: ErrInvalidRequest, Message: "invalid JSON schema", Retryable: false, Inner: err}
}
cfg.ResponseSchema = schemaFromMap(schema)
case req.JSONMode:
cfg.ResponseMIMEType = "application/json"
}
if req.Temperature != 0 {
t := float32(req.Temperature)
cfg.Temperature = &t
}
if req.MaxTokens != 0 {
cfg.MaxOutputTokens = int32(req.MaxTokens)
}
// Google Search grounding
if req.Grounded {
cfg.Tools = []*genai.Tool{
{GoogleSearchRetrieval: &genai.GoogleSearchRetrieval{}},
}
}
resp, err := client.Models.GenerateContent(ctx, model,
genai.Text(req.UserMessage), cfg)
durationMs := int(time.Since(start).Milliseconds())
event := p.buildUsageEvent(model, req, resp, err, durationMs)
p.record(ctx, event)
if err != nil {
return nil, ClassifyError(err)
}
if len(resp.Candidates) == 0 {
return nil, &ProviderError{Code: ErrInternal, Message: "no candidates in response", Retryable: false}
}
text := resp.Text()
if len(req.JSONSchema) > 0 {
if verr := ValidateSchema(req.JSONSchema, []byte(text)); verr != nil {
return nil, &ProviderError{
Code: ErrSchemaViolation,
Message: fmt.Sprintf("response does not match schema: %v", verr),
Retryable: true,
Inner: verr,
RawOutput: text,
}
}
}
out := &ChatResponse{
Content: text,
Model: resp.ModelVersion,
}
if resp.UsageMetadata != nil {
out.PromptTokens = int(resp.UsageMetadata.PromptTokenCount)
out.OutputTokens = int(resp.UsageMetadata.CandidatesTokenCount)
out.TotalTokens = int(resp.UsageMetadata.TotalTokenCount)
}
if c := resp.Candidates[0]; c.GroundingMetadata != nil {
out.SearchQueries = c.GroundingMetadata.WebSearchQueries
}
return out, nil
}
func (p *GeminiProvider) buildUsageEvent(model string, req *ChatRequest, resp *genai.GenerateContentResponse, callErr error, durationMs int) UsageEvent {
e := UsageEvent{
Provider: "gemini",
Model: model,
CallType: req.CallType,
Grounded: req.Grounded,
DurationMs: durationMs,
}
if callErr != nil {
e.Error = callErr.Error()
}
if resp != nil && resp.UsageMetadata != nil {
e.InputTokens = int(resp.UsageMetadata.PromptTokenCount)
e.OutputTokens = int(resp.UsageMetadata.CandidatesTokenCount)
}
e.EstimatedCostUSD = p.estimateCost(e.InputTokens, e.OutputTokens, req.Grounded)
return e
}
func (p *GeminiProvider) estimateCost(inputTokens, outputTokens int, grounded bool) float64 {
cost := float64(inputTokens)*geminiInputCostPerToken +
float64(outputTokens)*geminiOutputCostPerToken
if grounded {
p.mu.Lock()
today := time.Now().UTC().Truncate(24 * time.Hour)
if !today.Equal(p.groundingDate) {
p.groundingCallsToday = 0
p.groundingDate = today
}
p.groundingCallsToday++
aboveFree := p.groundingCallsToday - geminiGroundingFreeDaily
p.mu.Unlock()
if aboveFree > 0 {
cost += geminiGroundingCostPer1k
}
}
return cost
}
func (p *GeminiProvider) record(ctx context.Context, e UsageEvent) {
if p.recorder == nil {
return
}
_ = p.recorder.Record(ctx, e)
}
// schemaFromMap converts a raw JSON-schema map to genai.Schema for structured output.
func schemaFromMap(m map[string]any) *genai.Schema {
s := &genai.Schema{}
if t, ok := m["type"].(string); ok {
s.Type = genai.Type(t)
}
if props, ok := m["properties"].(map[string]any); ok {
s.Properties = make(map[string]*genai.Schema, len(props))
for k, v := range props {
if vm, ok := v.(map[string]any); ok {
s.Properties[k] = schemaFromMap(vm)
}
}
}
if req, ok := m["required"].([]any); ok {
for _, r := range req {
if rs, ok := r.(string); ok {
s.Required = append(s.Required, rs)
}
}
}
if desc, ok := m["description"].(string); ok {
s.Description = desc
}
if enum, ok := m["enum"].([]any); ok {
for _, e := range enum {
if es, ok := e.(string); ok {
s.Enum = append(s.Enum, es)
}
}
}
if items, ok := m["items"].(map[string]any); ok {
s.Items = schemaFromMap(items)
}
if anyOf, ok := m["anyOf"].([]any); ok {
for _, a := range anyOf {
if am, ok := a.(map[string]any); ok {
s.AnyOf = append(s.AnyOf, schemaFromMap(am))
}
}
}
return s
}

View File

@@ -1,99 +0,0 @@
package ai
import (
"context"
"fmt"
"github.com/VikingOwl91/mistral-go-sdk/chat"
)
type chatFunc func(ctx context.Context, req *chat.CompletionRequest) (*chat.CompletionResponse, error)
type MistralProvider struct {
model string
chatFn chatFunc
limiter *rateLimiter // from ratelimit.go; nil disables
}
func newMistralProviderWithChat(model string, fn chatFunc, limiter *rateLimiter) *MistralProvider {
return &MistralProvider{model: model, chatFn: fn, limiter: limiter}
}
func (p *MistralProvider) Name() string { return "mistral" }
func (p *MistralProvider) SupportsJSONMode() bool { return true }
func (p *MistralProvider) SupportsJSONSchema() bool { return false }
func (p *MistralProvider) Chat(ctx context.Context, req *ChatRequest) (*ChatResponse, error) {
if p.chatFn == nil {
return nil, &ProviderError{Code: ErrInternal, Message: "mistral provider not configured", Retryable: false}
}
if p.limiter != nil {
p.limiter.wait()
}
systemContent := req.SystemPrompt
if len(req.JSONSchema) > 0 {
if systemContent != "" {
systemContent += "\n\n"
}
systemContent += "Respond with a JSON object that conforms to the following JSON Schema. " +
"Do not output anything outside the JSON. Schema:\n" + string(req.JSONSchema)
}
msgs := []chat.Message{}
if systemContent != "" {
msgs = append(msgs, &chat.SystemMessage{Content: chat.TextContent(systemContent)})
}
msgs = append(msgs, &chat.UserMessage{Content: chat.TextContent(req.UserMessage)})
creq := &chat.CompletionRequest{
Model: firstNonEmpty(req.Model, p.model),
Messages: msgs,
}
if req.JSONMode || len(req.JSONSchema) > 0 {
creq.ResponseFormat = &chat.ResponseFormat{Type: "json_object"}
}
if req.Temperature != 0 {
temp := float64(req.Temperature)
creq.Temperature = &temp
}
if req.MaxTokens != 0 {
creq.MaxTokens = &req.MaxTokens
}
resp, err := p.chatFn(ctx, creq)
if err != nil {
return nil, ClassifyError(err)
}
if len(resp.Choices) == 0 {
return nil, &ProviderError{Code: ErrInternal, Message: "no choices in response", Retryable: false}
}
content := resp.Choices[0].Message.Content.String()
if len(req.JSONSchema) > 0 {
if err := ValidateSchema(req.JSONSchema, []byte(content)); err != nil {
return nil, &ProviderError{
Code: ErrSchemaViolation,
Message: fmt.Sprintf("response does not match schema: %v", err),
Retryable: true,
Inner: err,
RawOutput: content,
}
}
}
return &ChatResponse{
Content: content,
Model: resp.Model,
PromptTokens: resp.Usage.PromptTokens,
OutputTokens: resp.Usage.CompletionTokens,
TotalTokens: resp.Usage.TotalTokens,
}, nil
}
func firstNonEmpty(a, b string) string {
if a != "" {
return a
}
return b
}

View File

@@ -1,123 +0,0 @@
package ai
import (
"context"
"errors"
"testing"
"github.com/VikingOwl91/mistral-go-sdk/chat"
)
func TestMistral_Chat_PassesThroughContent(t *testing.T) {
fakeChat := func(ctx context.Context, req *chat.CompletionRequest) (*chat.CompletionResponse, error) {
return &chat.CompletionResponse{
Model: "mistral-large-latest",
Choices: []chat.CompletionChoice{
{Message: chat.AssistantMessage{Content: chat.TextContent("ok")}},
},
Usage: chat.UsageInfo{PromptTokens: 3, CompletionTokens: 1, TotalTokens: 4},
}, nil
}
p := newMistralProviderWithChat("mistral-large-latest", fakeChat, nil)
resp, err := p.Chat(context.Background(), &ChatRequest{SystemPrompt: "s", UserMessage: "u"})
if err != nil {
t.Fatalf("Chat: %v", err)
}
if resp.Content != "ok" || resp.TotalTokens != 4 {
t.Fatalf("unexpected: %+v", resp)
}
}
func TestMistral_Chat_JSONModeSetsResponseFormat(t *testing.T) {
var seen *chat.CompletionRequest
fakeChat := func(ctx context.Context, req *chat.CompletionRequest) (*chat.CompletionResponse, error) {
seen = req
return &chat.CompletionResponse{Choices: []chat.CompletionChoice{{Message: chat.AssistantMessage{Content: chat.TextContent("{}")}}}}, nil
}
p := newMistralProviderWithChat("m", fakeChat, nil)
_, err := p.Chat(context.Background(), &ChatRequest{UserMessage: "x", JSONMode: true})
if err != nil {
t.Fatalf("Chat: %v", err)
}
if seen == nil || seen.ResponseFormat == nil || seen.ResponseFormat.Type != "json_object" {
t.Fatalf("ResponseFormat not set: %+v", seen)
}
}
func TestMistral_Chat_SchemaEmbeddedInSystemPromptAndValidated(t *testing.T) {
schema := []byte(`{"type":"object","required":["foo"],"properties":{"foo":{"type":"string"}},"additionalProperties":false}`)
var seen *chat.CompletionRequest
fakeChat := func(ctx context.Context, req *chat.CompletionRequest) (*chat.CompletionResponse, error) {
seen = req
return &chat.CompletionResponse{Choices: []chat.CompletionChoice{{Message: chat.AssistantMessage{Content: chat.TextContent(`{"foo":"bar"}`)}}}}, nil
}
p := newMistralProviderWithChat("m", fakeChat, nil)
resp, err := p.Chat(context.Background(), &ChatRequest{SystemPrompt: "base system", UserMessage: "x", JSONSchema: schema})
if err != nil {
t.Fatalf("Chat: %v", err)
}
if resp.Content != `{"foo":"bar"}` {
t.Fatalf("content: %q", resp.Content)
}
sysMsg, ok := seen.Messages[0].(*chat.SystemMessage)
if !ok {
t.Fatalf("first message must be system: %T", seen.Messages[0])
}
sys := sysMsg.Content.String()
if !containsAll(sys, []string{"base system", "JSON Schema"}) {
t.Fatalf("system prompt missing expected fragments: %q", sys)
}
}
func TestMistral_Chat_SchemaViolationReturnsRetryableError(t *testing.T) {
schema := []byte(`{"type":"object","required":["foo"],"properties":{"foo":{"type":"string"}},"additionalProperties":false}`)
fakeChat := func(ctx context.Context, req *chat.CompletionRequest) (*chat.CompletionResponse, error) {
return &chat.CompletionResponse{Choices: []chat.CompletionChoice{{Message: chat.AssistantMessage{Content: chat.TextContent(`{"bar":1}`)}}}}, nil
}
p := newMistralProviderWithChat("m", fakeChat, nil)
_, err := p.Chat(context.Background(), &ChatRequest{UserMessage: "x", JSONSchema: schema})
if err == nil {
t.Fatal("want error")
}
var pe *ProviderError
if !errors.As(err, &pe) {
t.Fatalf("want *ProviderError, got %T", err)
}
if pe.Code != ErrSchemaViolation || !pe.Retryable || pe.RawOutput != `{"bar":1}` {
t.Fatalf("unexpected: %+v", pe)
}
}
func TestMistral_Supports(t *testing.T) {
p := newMistralProviderWithChat("m", nil, nil)
if !p.SupportsJSONMode() {
t.Fatal("Mistral supports JSON mode")
}
if p.SupportsJSONSchema() {
t.Fatal("Mistral does NOT natively support JSON schema (prompt-based only)")
}
if p.Name() != "mistral" {
t.Fatalf("Name: %q", p.Name())
}
}
func containsAll(s string, parts []string) bool {
for _, p := range parts {
if !contains(s, p) {
return false
}
}
return true
}
func contains(s, sub string) bool {
return len(sub) == 0 || (len(s) >= len(sub) && (s == sub || indexOf(s, sub) >= 0))
}
func indexOf(s, sub string) int {
for i := 0; i+len(sub) <= len(s); i++ {
if s[i:i+len(sub)] == sub {
return i
}
}
return -1
}

View File

@@ -1,184 +0,0 @@
package ai
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"sync"
"time"
)
type OllamaConfig struct {
BaseURL string
Model string
Timeout time.Duration
}
type OllamaProvider struct {
cfg OllamaConfig
client *http.Client
mu sync.RWMutex
activeModel string
}
func NewOllamaProvider(cfg OllamaConfig) *OllamaProvider {
if cfg.Timeout == 0 {
cfg.Timeout = 300 * time.Second
}
return &OllamaProvider{
cfg: cfg,
client: &http.Client{Timeout: cfg.Timeout},
activeModel: cfg.Model,
}
}
func (p *OllamaProvider) Name() string { return "ollama" }
func (p *OllamaProvider) SupportsJSONMode() bool { return true }
func (p *OllamaProvider) SupportsJSONSchema() bool { return true }
func (p *OllamaProvider) BaseURL() string {
return p.cfg.BaseURL
}
func (p *OllamaProvider) Model() string {
p.mu.RLock()
defer p.mu.RUnlock()
return p.activeModel
}
func (p *OllamaProvider) SetModel(model string) {
p.mu.Lock()
defer p.mu.Unlock()
p.activeModel = model
}
// OllamaModelInfo is a model entry from Ollama's /api/tags response.
type OllamaModelInfo struct {
Name string `json:"name"`
Size int64 `json:"size"`
}
// ListModels calls Ollama's /api/tags endpoint and returns available models.
func (p *OllamaProvider) ListModels(ctx context.Context) ([]OllamaModelInfo, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, p.cfg.BaseURL+"/api/tags", nil)
if err != nil {
return nil, err
}
resp, err := p.client.Do(req)
if err != nil {
return nil, err
}
defer func() { _ = resp.Body.Close() }()
if resp.StatusCode != http.StatusOK {
b, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("ollama /api/tags: status %d: %s", resp.StatusCode, b)
}
var body struct {
Models []struct {
Name string `json:"name"`
Size int64 `json:"size"`
} `json:"models"`
}
if err := json.NewDecoder(resp.Body).Decode(&body); err != nil {
return nil, err
}
out := make([]OllamaModelInfo, len(body.Models))
for i, m := range body.Models {
out[i] = OllamaModelInfo{Name: m.Name, Size: m.Size}
}
return out, nil
}
type ollamaChatReq struct {
Model string `json:"model"`
Messages []ollamaMessage `json:"messages"`
Stream bool `json:"stream"`
Format json.RawMessage `json:"format,omitempty"`
Options *ollamaOptions `json:"options,omitempty"`
}
type ollamaMessage struct {
Role string `json:"role"`
Content string `json:"content"`
}
type ollamaOptions struct {
Temperature float32 `json:"temperature,omitempty"`
NumPredict int `json:"num_predict,omitempty"`
}
type ollamaChatResp struct {
Model string `json:"model"`
Message ollamaMessage `json:"message"`
Done bool `json:"done"`
PromptEvalCount int `json:"prompt_eval_count"`
EvalCount int `json:"eval_count"`
}
func (p *OllamaProvider) Chat(ctx context.Context, req *ChatRequest) (*ChatResponse, error) {
model := req.Model
if model == "" {
model = p.Model()
}
body := ollamaChatReq{
Model: model,
Messages: buildOllamaMessages(req),
Stream: false,
}
switch {
case len(req.JSONSchema) > 0:
body.Format = req.JSONSchema
case req.JSONMode:
body.Format = json.RawMessage(`"json"`)
}
if req.Temperature != 0 || req.MaxTokens != 0 {
body.Options = &ollamaOptions{Temperature: req.Temperature, NumPredict: req.MaxTokens}
}
buf, err := json.Marshal(body)
if err != nil {
return nil, &ProviderError{Code: ErrInvalidRequest, Message: "marshal request", Retryable: false, Inner: err}
}
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, p.cfg.BaseURL+"/api/chat", bytes.NewReader(buf))
if err != nil {
return nil, &ProviderError{Code: ErrInternal, Message: "new request", Retryable: false, Inner: err}
}
httpReq.Header.Set("Content-Type", "application/json")
resp, err := p.client.Do(httpReq)
if err != nil {
return nil, ClassifyError(err)
}
defer func() { _ = resp.Body.Close() }()
if resp.StatusCode >= 400 {
b, _ := io.ReadAll(resp.Body)
pe := ClassifyError(fmt.Errorf("ollama status %d: %s", resp.StatusCode, string(b)))
return nil, pe
}
var out ollamaChatResp
if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
return nil, &ProviderError{Code: ErrInternal, Message: "decode response", Retryable: false, Inner: err}
}
return &ChatResponse{
Content: out.Message.Content,
Model: out.Model,
PromptTokens: out.PromptEvalCount,
OutputTokens: out.EvalCount,
TotalTokens: out.PromptEvalCount + out.EvalCount,
}, nil
}
func buildOllamaMessages(req *ChatRequest) []ollamaMessage {
msgs := make([]ollamaMessage, 0, 2)
if req.SystemPrompt != "" {
msgs = append(msgs, ollamaMessage{Role: "system", Content: req.SystemPrompt})
}
if req.UserMessage != "" {
msgs = append(msgs, ollamaMessage{Role: "user", Content: req.UserMessage})
}
return msgs
}

View File

@@ -1,98 +0,0 @@
package ai
import (
"context"
"encoding/json"
"io"
"net/http"
"net/http/httptest"
"testing"
"time"
)
func TestOllama_Chat_SendsRequestAndParsesResponse(t *testing.T) {
var captured map[string]any
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/api/chat" {
t.Errorf("path: got %s, want /api/chat", r.URL.Path)
}
body, _ := io.ReadAll(r.Body)
_ = json.Unmarshal(body, &captured)
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{
"model":"qwen2.5:14b-instruct",
"message":{"role":"assistant","content":"hello"},
"done":true,
"prompt_eval_count":10,
"eval_count":5
}`))
}))
defer srv.Close()
p := NewOllamaProvider(OllamaConfig{BaseURL: srv.URL, Model: "qwen2.5:14b-instruct", Timeout: 5 * time.Second})
resp, err := p.Chat(context.Background(), &ChatRequest{SystemPrompt: "be brief", UserMessage: "hi", JSONMode: true})
if err != nil {
t.Fatalf("Chat: %v", err)
}
if resp.Content != "hello" {
t.Fatalf("content: got %q", resp.Content)
}
if resp.PromptTokens != 10 || resp.OutputTokens != 5 || resp.TotalTokens != 15 {
t.Fatalf("tokens: %+v", resp)
}
if captured["stream"] != false {
t.Fatalf("stream must be false: %v", captured["stream"])
}
if captured["format"] != "json" {
t.Fatalf("format for JSONMode=true must be \"json\", got %v", captured["format"])
}
}
func TestOllama_Chat_ForwardsSchema(t *testing.T) {
var captured map[string]any
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
body, _ := io.ReadAll(r.Body)
_ = json.Unmarshal(body, &captured)
_, _ = w.Write([]byte(`{"model":"m","message":{"role":"assistant","content":"{}"},"done":true}`))
}))
defer srv.Close()
p := NewOllamaProvider(OllamaConfig{BaseURL: srv.URL, Model: "m", Timeout: time.Second})
schema := []byte(`{"type":"object"}`)
_, err := p.Chat(context.Background(), &ChatRequest{UserMessage: "x", JSONSchema: schema})
if err != nil {
t.Fatalf("Chat: %v", err)
}
fmtField, ok := captured["format"].(map[string]any)
if !ok {
t.Fatalf("format must be an object when JSONSchema set: %v", captured["format"])
}
if fmtField["type"] != "object" {
t.Fatalf("schema not forwarded: %v", fmtField)
}
}
func TestOllama_Chat_Unavailable(t *testing.T) {
p := NewOllamaProvider(OllamaConfig{BaseURL: "http://127.0.0.1:1", Timeout: 100 * time.Millisecond})
_, err := p.Chat(context.Background(), &ChatRequest{UserMessage: "x"})
if err == nil {
t.Fatal("want error, got nil")
}
pe := ClassifyError(err)
if pe.Code != ErrUnavailable && pe.Code != ErrTimeout {
t.Fatalf("expected Unavailable or Timeout, got %v", pe.Code)
}
if !pe.Retryable {
t.Fatal("must be retryable")
}
}
func TestOllama_Supports(t *testing.T) {
p := NewOllamaProvider(OllamaConfig{BaseURL: "x"})
if !p.SupportsJSONMode() || !p.SupportsJSONSchema() {
t.Fatal("Ollama supports both")
}
if p.Name() != "ollama" {
t.Fatalf("Name: %q", p.Name())
}
}

View File

@@ -12,6 +12,14 @@ type Provider interface {
SupportsJSONSchema() bool
}
// ModelSelector is implemented by providers that support runtime model switching.
type ModelSelector interface {
Model() string
SetModel(string)
ListModelNames(ctx context.Context) ([]string, error)
BaseURL() string
}
type ChatRequest struct {
SystemPrompt string
UserMessage string
@@ -20,12 +28,15 @@ type ChatRequest struct {
Temperature float32
JSONMode bool
JSONSchema json.RawMessage
Grounded bool // request Google Search grounding (Gemini only)
CallType string // e.g. "research", "enrich_b", "similarity" — for usage tracking
}
type ChatResponse struct {
Content string
Model string
PromptTokens int
OutputTokens int
TotalTokens int
Content string
Model string
PromptTokens int
OutputTokens int
TotalTokens int
SearchQueries []string // populated when grounding was used
}

View File

@@ -1,48 +0,0 @@
package ai
import (
"sort"
"sync"
"testing"
"time"
)
func TestRateLimiterSerializesCalls(t *testing.T) {
rl := newRateLimiter(2.0) // 2 req/s → minInterval 500ms
var (
mu sync.Mutex
times []time.Time
)
var wg sync.WaitGroup
for i := 0; i < 3; i++ {
wg.Add(1)
go func() {
defer wg.Done()
rl.wait()
mu.Lock()
times = append(times, time.Now())
mu.Unlock()
}()
}
wg.Wait()
// Sort times; gaps between consecutive must be >= 500ms - small tolerance.
sort.Slice(times, func(i, j int) bool { return times[i].Before(times[j]) })
if gap := times[1].Sub(times[0]); gap < 400*time.Millisecond {
t.Errorf("gap[0->1] = %v, want >= 400ms", gap)
}
if gap := times[2].Sub(times[1]); gap < 400*time.Millisecond {
t.Errorf("gap[1->2] = %v, want >= 400ms", gap)
}
}
func TestRateLimiterDisabledWhenRPSZero(t *testing.T) {
rl := newRateLimiter(0) // disabled
start := time.Now()
for i := 0; i < 5; i++ {
rl.wait()
}
if elapsed := time.Since(start); elapsed > 50*time.Millisecond {
t.Errorf("expected no throttling when rps=0, elapsed %v", elapsed)
}
}

View File

@@ -1,33 +0,0 @@
package ai
import (
"sync"
"time"
)
// rateLimiter enforces a minimum interval between calls. Set rps<=0 to disable.
type rateLimiter struct {
mu sync.Mutex
lastReq time.Time
minInterval time.Duration
}
func newRateLimiter(rps float64) *rateLimiter {
if rps <= 0 {
return &rateLimiter{minInterval: 0}
}
return &rateLimiter{minInterval: time.Duration(float64(time.Second) / rps)}
}
func (rl *rateLimiter) wait() {
if rl.minInterval == 0 {
return
}
rl.mu.Lock()
defer rl.mu.Unlock()
since := time.Since(rl.lastReq)
if since < rl.minInterval {
time.Sleep(rl.minInterval - since)
}
rl.lastReq = time.Now()
}

View File

@@ -0,0 +1,22 @@
package ai
import "context"
// UsageEvent holds per-call telemetry recorded after each LLM call.
type UsageEvent struct {
Provider string
Model string
CallType string
InputTokens int
OutputTokens int
Grounded bool
DurationMs int
EstimatedCostUSD float64
Error string // empty on success
}
// UsageRecorder persists a UsageEvent. Implementations must be safe for
// concurrent use. A nil UsageRecorder is valid and silently discards events.
type UsageRecorder interface {
Record(ctx context.Context, e UsageEvent) error
}

View File

@@ -0,0 +1,63 @@
package crypto
import (
"crypto/aes"
"crypto/cipher"
"crypto/rand"
"crypto/sha256"
"errors"
"io"
"golang.org/x/crypto/hkdf"
)
var ErrDecryptFailed = errors.New("secretbox: decryption failed")
// DeriveKey derives a 32-byte AES key from an arbitrary secret using
// HKDF-SHA256 with a fixed application-specific info string.
func DeriveKey(secret []byte) ([32]byte, error) {
r := hkdf.New(sha256.New, secret, nil, []byte("marktvogt:settings:v1"))
var key [32]byte
if _, err := io.ReadFull(r, key[:]); err != nil {
return key, err
}
return key, nil
}
// Seal encrypts plaintext with AES-256-GCM. Output format: nonce(12) || ciphertext.
func Seal(key [32]byte, plaintext []byte) ([]byte, error) {
block, err := aes.NewCipher(key[:])
if err != nil {
return nil, err
}
gcm, err := cipher.NewGCM(block)
if err != nil {
return nil, err
}
nonce := make([]byte, gcm.NonceSize()) // 12 bytes
if _, err := io.ReadFull(rand.Reader, nonce); err != nil {
return nil, err
}
return gcm.Seal(nonce, nonce, plaintext, nil), nil
}
// Open decrypts a ciphertext produced by Seal.
func Open(key [32]byte, ciphertext []byte) ([]byte, error) {
block, err := aes.NewCipher(key[:])
if err != nil {
return nil, err
}
gcm, err := cipher.NewGCM(block)
if err != nil {
return nil, err
}
ns := gcm.NonceSize()
if len(ciphertext) < ns {
return nil, ErrDecryptFailed
}
plaintext, err := gcm.Open(nil, ciphertext[:ns], ciphertext[ns:], nil)
if err != nil {
return nil, ErrDecryptFailed
}
return plaintext, nil
}

View File

@@ -0,0 +1,61 @@
package crypto
import (
"bytes"
"errors"
"testing"
)
func TestRoundTrip(t *testing.T) {
key, err := DeriveKey([]byte("test-secret"))
if err != nil {
t.Fatal(err)
}
plaintext := []byte("super-secret-api-key-AIzaSyXXXX")
ct, err := Seal(key, plaintext)
if err != nil {
t.Fatal(err)
}
got, err := Open(key, ct)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(got, plaintext) {
t.Fatalf("got %q, want %q", got, plaintext)
}
}
func TestTamperedCiphertext(t *testing.T) {
key, err := DeriveKey([]byte("test-secret"))
if err != nil {
t.Fatal(err)
}
ct, err := Seal(key, []byte("value"))
if err != nil {
t.Fatal(err)
}
ct[len(ct)-1] ^= 0xFF
_, err = Open(key, ct)
if !errors.Is(err, ErrDecryptFailed) {
t.Fatalf("want ErrDecryptFailed, got %v", err)
}
}
func TestWrongKey(t *testing.T) {
k1, _ := DeriveKey([]byte("key-one"))
k2, _ := DeriveKey([]byte("key-two"))
ct, _ := Seal(k1, []byte("value"))
_, err := Open(k2, ct)
if !errors.Is(err, ErrDecryptFailed) {
t.Fatalf("want ErrDecryptFailed, got %v", err)
}
}
func TestSealProducesUniqueNonces(t *testing.T) {
key, _ := DeriveKey([]byte("k"))
ct1, _ := Seal(key, []byte("x"))
ct2, _ := Seal(key, []byte("x"))
if bytes.Equal(ct1, ct2) {
t.Fatal("same nonce produced twice — randomness broken")
}
}

View File

@@ -1,6 +1,7 @@
package server
import (
"context"
"fmt"
"net/http"
@@ -15,6 +16,7 @@ import (
"marktvogt.de/backend/internal/domain/user"
"marktvogt.de/backend/internal/middleware"
"marktvogt.de/backend/internal/pkg/ai"
apicrypto "marktvogt.de/backend/internal/pkg/crypto"
"marktvogt.de/backend/internal/pkg/email"
"marktvogt.de/backend/internal/pkg/geocode"
"marktvogt.de/backend/internal/pkg/scrape"
@@ -69,12 +71,23 @@ func (s *Server) registerRoutes() {
geocodeLimit := middleware.RateLimit(10.0/60.0, 10) // 10 per minute per IP
market.RegisterRoutes(v1, marketHandler, submissionHandler, geocodeHandler, submitLimit, geocodeLimit)
// Admin market routes
adminMarketHandler := market.NewAdminHandler(marketSvc)
aiProvider, err := ai.NewFromConfig(s.cfg.AI)
// AI settings store + usage repo — used by AI provider and settings handler
encKey, err := apicrypto.DeriveKey([]byte(s.cfg.JWT.Secret))
if err != nil {
panic(fmt.Errorf("derive settings encryption key: %w", err))
}
settingsStore := settings.NewStore(s.db, encKey)
usageRepo := settings.NewUsageRepo(s.db)
// AI provider — reads key from DB, falls back to GEMINI_API_KEY env bootstrap
ctx := context.Background()
aiProvider, err := ai.NewFromConfig(ctx, s.cfg.AI, settingsStore, usageRepo)
if err != nil {
panic(fmt.Errorf("init ai provider: %w", err))
}
// Admin market routes
adminMarketHandler := market.NewAdminHandler(marketSvc)
scraper := scrape.New(s.cfg.Discovery.CrawlerUserAgent)
searchClient := search.NewSearxng(search.SearxngConfig{BaseURL: s.cfg.Search.SearxngURL})
researchHandler := market.NewResearchHandler(marketSvc, aiProvider, searchClient, scraper)
@@ -92,7 +105,7 @@ func (s *Server) registerRoutes() {
discovery.RegisterRoutes(v1, discoveryHandler, requireAuth, requireAdmin, requireTickToken)
// AI settings routes
settingsHandler := settings.NewHandler(aiProvider)
settingsHandler := settings.NewHandler(aiProvider, settingsStore, usageRepo)
settings.RegisterRoutes(v1, settingsHandler, requireAuth, requireAdmin)
}

View File

@@ -0,0 +1 @@
DROP TABLE IF EXISTS system_settings;

View File

@@ -0,0 +1,11 @@
CREATE TABLE system_settings (
key TEXT PRIMARY KEY,
value_text TEXT,
value_encrypted BYTEA,
updated_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_by UUID REFERENCES users (id) ON DELETE SET NULL
);
INSERT INTO system_settings (key, value_text) VALUES
('gemini.model', 'gemini-2.5-flash-lite'),
('gemini.grounding_enabled', 'true');

View File

@@ -0,0 +1 @@
DROP TABLE IF EXISTS ai_usage;

View File

@@ -0,0 +1,16 @@
CREATE TABLE ai_usage (
id BIGSERIAL PRIMARY KEY,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
provider TEXT NOT NULL,
model TEXT NOT NULL,
call_type TEXT NOT NULL,
input_tokens INT NOT NULL DEFAULT 0,
output_tokens INT NOT NULL DEFAULT 0,
grounded BOOLEAN NOT NULL DEFAULT false,
duration_ms INT NOT NULL DEFAULT 0,
estimated_cost_usd NUMERIC(10,6) NOT NULL DEFAULT 0,
error TEXT
);
CREATE INDEX ai_usage_created_at_desc_idx ON ai_usage (created_at DESC);
CREATE INDEX ai_usage_grounded_today_idx ON ai_usage (created_at) WHERE grounded;

View File

@@ -184,12 +184,41 @@ export interface SubmitMarketRequest {
}
// AI settings
export interface AIUsageStats {
calls: number;
input_tokens: number;
output_tokens: number;
grounding_calls: number;
estimated_cost_usd: number;
}
export interface AIUsageEvent {
id: number;
created_at: string;
provider: string;
model: string;
call_type: string;
input_tokens: number;
output_tokens: number;
grounded: boolean;
duration_ms: number;
estimated_cost_usd: number;
error?: string;
}
export interface AIStatus {
provider: string;
connected: boolean;
base_url?: string;
model: string;
models: string[];
api_key_fingerprint?: string;
grounding_enabled: boolean;
grounding_quota: number;
usage: {
today: AIUsageStats;
month: AIUsageStats;
grounding_used_today: number;
};
}
// AI Research types

View File

@@ -1,11 +1,14 @@
import { fail } from '@sveltejs/kit';
import { serverFetch } from '$lib/api/client.server.js';
import type { AIStatus } from '$lib/api/types.js';
import type { AIStatus, AIUsageEvent } from '$lib/api/types.js';
import type { Actions, PageServerLoad } from './$types.js';
export const load: PageServerLoad = async ({ cookies, fetch }) => {
const res = await serverFetch<AIStatus>('/admin/settings/ai', cookies, { fetch });
return { ai: res.data };
const [statusRes, usageRes] = await Promise.all([
serverFetch<AIStatus>('/admin/settings/ai', cookies, { fetch }),
serverFetch<AIUsageEvent[]>('/admin/settings/ai/usage?limit=20', cookies, { fetch })
]);
return { ai: statusRes.data, recentUsage: usageRes.data ?? [] };
};
export const actions: Actions = {
@@ -21,10 +24,46 @@ export const actions: Actions = {
body: JSON.stringify({ model }),
fetch
});
return { success: true, model };
return { success: true, action: 'model', model };
} catch (err) {
const message = err instanceof Error ? err.message : 'Modell konnte nicht gesetzt werden.';
return fail(500, { error: message });
return fail(500, { error: err instanceof Error ? err.message : 'Fehler beim Speichern.' });
}
},
setApiKey: async ({ cookies, fetch, request }) => {
const data = await request.formData();
const api_key = data.get('api_key');
if (!api_key || typeof api_key !== 'string') {
return fail(400, { error: 'API-Key fehlt.' });
}
try {
const res = await serverFetch<{ api_key_fingerprint: string }>(
'/admin/settings/ai/key',
cookies,
{
method: 'POST',
body: JSON.stringify({ api_key }),
fetch
}
);
return { success: true, action: 'apiKey', fingerprint: res.data.api_key_fingerprint };
} catch (err) {
return fail(500, { error: err instanceof Error ? err.message : 'Fehler beim Speichern.' });
}
},
setGrounding: async ({ cookies, fetch, request }) => {
const data = await request.formData();
const enabled = data.get('enabled') === 'true';
try {
await serverFetch('/admin/settings/ai/grounding', cookies, {
method: 'POST',
body: JSON.stringify({ enabled }),
fetch
});
return { success: true, action: 'grounding', enabled };
} catch (err) {
return fail(500, { error: err instanceof Error ? err.message : 'Fehler beim Speichern.' });
}
}
};

View File

@@ -12,8 +12,22 @@
let selectedModel = $state(untrack(() => data.ai.model));
let saving = $state(false);
let showKeyInput = $state(!data.ai.api_key_fingerprint);
let groundingEnabled = $state(untrack(() => data.ai.grounding_enabled));
let activeModel = $derived(form?.success && form.model ? form.model : data.ai.model);
let activeModel = $derived(
form?.success && form.action === 'model' && form.model ? form.model : data.ai.model
);
function formatCost(usd: number): string {
if (usd === 0) return '$0.00';
if (usd < 0.001) return '<$0.001';
return `$${usd.toFixed(3)}`;
}
function formatDate(iso: string): string {
return new Date(iso).toLocaleString('de-DE', { dateStyle: 'short', timeStyle: 'short' });
}
</script>
<div class="space-y-6">
@@ -24,13 +38,13 @@
</p>
</div>
<!-- Card 1: Provider & API Key -->
<div class="rounded-lg border border-stone-200 bg-white dark:border-stone-700 dark:bg-stone-900">
<div class="border-b border-stone-200 px-6 py-4 dark:border-stone-700">
<h2 class="text-base font-semibold text-stone-900 dark:text-stone-100">KI-Provider</h2>
</div>
<div class="space-y-4 px-6 py-4">
<!-- Provider + status row -->
<!-- Provider + status -->
<div class="flex items-center gap-4">
<div>
<span class="text-xs font-medium tracking-wide text-stone-400 uppercase">Provider</span>
@@ -38,16 +52,6 @@
{data.ai.provider}
</p>
</div>
{#if data.ai.base_url}
<div>
<span class="text-xs font-medium tracking-wide text-stone-400 uppercase">URL</span>
<p class="mt-0.5 font-mono text-sm text-stone-600 dark:text-stone-400">
{data.ai.base_url}
</p>
</div>
{/if}
<div class="ml-auto">
{#if data.ai.connected}
<span
@@ -67,69 +71,236 @@
</div>
</div>
<!-- Model selector (Ollama only) -->
{#if data.ai.provider === 'ollama'}
<div class="border-t border-stone-100 pt-4 dark:border-stone-800">
<span class="text-xs font-medium tracking-wide text-stone-400 uppercase"
>Aktives Modell</span
>
{#if data.ai.connected && data.ai.models.length > 0}
<form
method="POST"
action="?/setModel"
use:enhance={() => {
saving = true;
return async ({ update }) => {
await update();
saving = false;
};
}}
class="mt-2 flex items-center gap-3"
<!-- API Key -->
<div class="border-t border-stone-100 pt-4 dark:border-stone-800">
<span class="text-xs font-medium tracking-wide text-stone-400 uppercase">API-Key</span>
{#if !showKeyInput && data.ai.api_key_fingerprint}
<div class="mt-1 flex items-center gap-3">
<p class="font-mono text-sm text-stone-600 dark:text-stone-400">
{data.ai.api_key_fingerprint}
</p>
<button
onclick={() => (showKeyInput = true)}
class="text-xs text-stone-400 underline hover:text-stone-600 dark:hover:text-stone-300"
>
<select
name="model"
bind:value={selectedModel}
class="focus:border-primary-500 focus:ring-primary-500 rounded-md border border-stone-300 bg-white px-3 py-2 text-sm text-stone-900 shadow-sm focus:ring-1 focus:outline-none dark:border-stone-600 dark:bg-stone-800 dark:text-stone-100"
Ersetzen
</button>
</div>
{#if form?.success && form.action === 'apiKey'}
<p class="mt-1 text-xs text-green-600 dark:text-green-400">
API-Key gespeichert. Neuer Fingerprint: <span class="font-mono"
>{form.fingerprint}</span
>
{#each data.ai.models as model}
<option value={model}>{model}</option>
{/each}
</select>
<button
type="submit"
disabled={saving || selectedModel === activeModel}
class="bg-primary-600 hover:bg-primary-700 rounded-md px-4 py-2 text-sm font-medium text-white shadow-sm disabled:cursor-not-allowed disabled:opacity-50"
>
{saving ? 'Speichert…' : 'Übernehmen'}
</button>
</form>
{#if form?.success}
<p class="mt-2 text-xs text-green-600 dark:text-green-400">
Modell auf <span class="font-mono">{form.model}</span> gesetzt.
</p>
{/if}
{#if form?.error}
<p class="mt-2 text-xs text-red-600 dark:text-red-400">{form.error}</p>
{/if}
{:else if data.ai.connected}
<p class="mt-1 text-sm text-stone-500 dark:text-stone-400">
Keine Modelle gefunden. Installiere ein Modell mit
<code class="rounded bg-stone-100 px-1 py-0.5 font-mono text-xs dark:bg-stone-800">
ollama pull &lt;model&gt;
</code>
</p>
{:else}
<p class="mt-1 font-mono text-sm text-stone-500 dark:text-stone-400">
{data.ai.model || '—'}
</p>
<p class="mt-1 text-xs text-red-500">
Ollama nicht erreichbar — prüfe ob der Dienst läuft.
</p>
{/if}
{:else}
<form
method="POST"
action="?/setApiKey"
use:enhance={() => {
saving = true;
return async ({ update }) => {
await update();
saving = false;
showKeyInput = false;
};
}}
class="mt-2 flex items-center gap-3"
>
<input
type="password"
name="api_key"
placeholder="AIzaSy…"
required
autocomplete="off"
class="focus:border-primary-500 focus:ring-primary-500 flex-1 rounded-md border border-stone-300 bg-white px-3 py-2 text-sm text-stone-900 shadow-sm focus:ring-1 focus:outline-none dark:border-stone-600 dark:bg-stone-800 dark:text-stone-100"
/>
<button
type="submit"
disabled={saving}
class="bg-primary-600 hover:bg-primary-700 rounded-md px-4 py-2 text-sm font-medium text-white shadow-sm disabled:opacity-50"
>
{saving ? 'Speichert…' : 'Speichern'}
</button>
{#if data.ai.api_key_fingerprint}
<button
type="button"
onclick={() => (showKeyInput = false)}
class="text-xs text-stone-400 underline"
>
Abbrechen
</button>
{/if}
</form>
{#if form?.error}
<p class="mt-1 text-xs text-red-500">{form.error}</p>
{/if}
{#if !data.ai.api_key_fingerprint}
<p class="mt-1 text-xs text-amber-600 dark:text-amber-400">
Kein API-Key konfiguriert — KI-Funktionen sind deaktiviert.
</p>
{/if}
{/if}
</div>
</div>
</div>
<!-- Card 2: Model Selector -->
<div class="rounded-lg border border-stone-200 bg-white dark:border-stone-700 dark:bg-stone-900">
<div class="border-b border-stone-200 px-6 py-4 dark:border-stone-700">
<h2 class="text-base font-semibold text-stone-900 dark:text-stone-100">Aktives Modell</h2>
</div>
<div class="px-6 py-4">
{#if data.ai.connected && data.ai.models.length > 0}
<form
method="POST"
action="?/setModel"
use:enhance={() => {
saving = true;
return async ({ update }) => {
await update();
saving = false;
};
}}
class="flex items-center gap-3"
>
<select
name="model"
bind:value={selectedModel}
class="focus:border-primary-500 focus:ring-primary-500 rounded-md border border-stone-300 bg-white px-3 py-2 text-sm text-stone-900 shadow-sm focus:ring-1 focus:outline-none dark:border-stone-600 dark:bg-stone-800 dark:text-stone-100"
>
{#each data.ai.models as model}
<option value={model}>{model}</option>
{/each}
</select>
<button
type="submit"
disabled={saving || selectedModel === activeModel}
class="bg-primary-600 hover:bg-primary-700 rounded-md px-4 py-2 text-sm font-medium text-white shadow-sm disabled:cursor-not-allowed disabled:opacity-50"
>
{saving ? 'Speichert…' : 'Übernehmen'}
</button>
</form>
{#if form?.success && form.action === 'model'}
<p class="mt-2 text-xs text-green-600 dark:text-green-400">
Modell auf <span class="font-mono">{form.model}</span> gesetzt.
</p>
{/if}
{:else if !data.ai.connected}
<p class="font-mono text-sm text-stone-500 dark:text-stone-400">{data.ai.model || '—'}</p>
<p class="mt-1 text-xs text-stone-400">Verbinde zuerst den API-Key.</p>
{:else}
<p class="font-mono text-sm text-stone-500 dark:text-stone-400">{data.ai.model || '—'}</p>
<p class="mt-1 text-xs text-stone-400">Keine Modelle geladen.</p>
{/if}
</div>
</div>
<!-- Card 3: Grounding -->
<div class="rounded-lg border border-stone-200 bg-white dark:border-stone-700 dark:bg-stone-900">
<div class="border-b border-stone-200 px-6 py-4 dark:border-stone-700">
<h2 class="text-base font-semibold text-stone-900 dark:text-stone-100">
Google Suche (Grounding)
</h2>
</div>
<div class="px-6 py-4">
<div class="flex items-center justify-between">
<div>
<p class="text-sm text-stone-700 dark:text-stone-300">
Google-Suchergebnisse in KI-Anfragen einbeziehen
</p>
<p class="mt-0.5 text-xs text-stone-400">
{data.ai.usage.grounding_used_today} / {data.ai.grounding_quota} freie Anfragen heute
</p>
</div>
<form
method="POST"
action="?/setGrounding"
use:enhance={() => {
return async ({ update }) => {
await update();
};
}}
>
<input type="hidden" name="enabled" value={groundingEnabled ? 'false' : 'true'} />
<button
type="submit"
onclick={() => (groundingEnabled = !groundingEnabled)}
aria-label={groundingEnabled ? 'Grounding deaktivieren' : 'Grounding aktivieren'}
class="relative inline-flex h-6 w-11 items-center rounded-full transition-colors {groundingEnabled
? 'bg-primary-600'
: 'bg-stone-300 dark:bg-stone-600'}"
>
<span
class="inline-block h-4 w-4 transform rounded-full bg-white shadow transition-transform {groundingEnabled
? 'translate-x-6'
: 'translate-x-1'}"
></span>
</button>
</form>
</div>
{#if form?.success && form.action === 'grounding'}
<p class="mt-2 text-xs text-green-600 dark:text-green-400">
Grounding {form.enabled ? 'aktiviert' : 'deaktiviert'}.
</p>
{/if}
</div>
</div>
<!-- Card 4: Usage -->
<div class="rounded-lg border border-stone-200 bg-white dark:border-stone-700 dark:bg-stone-900">
<div class="border-b border-stone-200 px-6 py-4 dark:border-stone-700">
<h2 class="text-base font-semibold text-stone-900 dark:text-stone-100">Verbrauch</h2>
</div>
<div class="space-y-4 px-6 py-4">
<!-- Rollup stats -->
<div class="grid grid-cols-2 gap-4 sm:grid-cols-4">
{#each [{ label: 'Anfragen heute', value: data.ai.usage.today.calls.toString() }, { label: 'Tokens heute', value: (data.ai.usage.today.input_tokens + data.ai.usage.today.output_tokens).toLocaleString('de-DE') }, { label: 'Kosten heute', value: formatCost(data.ai.usage.today.estimated_cost_usd) }, { label: 'Kosten (30 Tage)', value: formatCost(data.ai.usage.month.estimated_cost_usd) }] as stat}
<div class="rounded-md bg-stone-50 px-3 py-2 dark:bg-stone-800">
<p class="text-xs text-stone-400">{stat.label}</p>
<p class="mt-0.5 text-sm font-semibold text-stone-800 dark:text-stone-200">
{stat.value}
</p>
</div>
{/each}
</div>
<!-- Recent calls -->
{#if data.recentUsage.length > 0}
<div class="overflow-x-auto">
<table class="w-full text-xs">
<thead>
<tr class="text-left text-stone-400">
<th class="pr-4 pb-2 font-medium">Zeit</th>
<th class="pr-4 pb-2 font-medium">Typ</th>
<th class="pr-4 pb-2 font-medium">Modell</th>
<th class="pr-4 pb-2 font-medium">Tokens</th>
<th class="pr-4 pb-2 font-medium">Grounding</th>
<th class="pb-2 font-medium">Kosten</th>
</tr>
</thead>
<tbody>
{#each data.recentUsage as event}
<tr
class="border-t border-stone-100 dark:border-stone-800 {event.error
? 'text-red-500'
: 'text-stone-600 dark:text-stone-400'}"
>
<td class="py-1.5 pr-4 font-mono">{formatDate(event.created_at)}</td>
<td class="py-1.5 pr-4">{event.call_type}</td>
<td class="max-w-32 truncate py-1.5 pr-4 font-mono">{event.model}</td>
<td class="py-1.5 pr-4"
>{(event.input_tokens + event.output_tokens).toLocaleString('de-DE')}</td
>
<td class="py-1.5 pr-4">{event.grounded ? '✓' : '—'}</td>
<td class="py-1.5">{formatCost(event.estimated_cost_usd)}</td>
</tr>
{/each}
</tbody>
</table>
</div>
{:else}
<p class="text-xs text-stone-400">Noch keine KI-Anfragen aufgezeichnet.</p>
{/if}
</div>
</div>