diff --git a/CLAUDE.md b/CLAUDE.md index 9fb4373..d99c2d5 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -26,7 +26,7 @@ Monorepo at `gitlab.com/vikingowl/marktvogt.de`. Components are regular director | Mobile | Flutter | | Auth | Custom (Go libs), E-Mail+PW / Magic Link / OAuth / 2FA | | Payments | Stripe Connect | -| LLM | OpenRouter | +| LLM | Google Gemini | | CI/CD | GitLab CI (gitlab.com) — evaluation vs Woodpecker; sister project infinity-tales still on Woodpecker | | Hosting | Kubernetes (itsh.dev) | | Monitoring | Prometheus, Loki, Grafana, Sentry | diff --git a/backend/cmd/discovery-eval/README.md b/backend/cmd/discovery-eval/README.md index 0345b30..8aba51d 100644 --- a/backend/cmd/discovery-eval/README.md +++ b/backend/cmd/discovery-eval/README.md @@ -3,10 +3,10 @@ CLI that grades discovery's AI-backed components against labelled fixtures. Two modes: -- `-mode similarity` (default) — `MistralSimilarityClassifier` on pair- +- `-mode similarity` (default) — `SimilarityClassifier` on pair- labelled fixtures. Reports precision / recall / F1 / accuracy + a confidence calibration table. -- `-mode category` — `MistralLLMEnricher`'s `category` output on row- +- `-mode category` — `ProviderLLMEnricher`'s `category` output on row- labelled fixtures. Reports accuracy + a per-label confusion matrix. File-based cache keeps reruns free. Each mode has its own cache key shape, @@ -18,7 +18,7 @@ so switching modes doesn't churn entries. ``` export AI_API_KEY=... -export AI_MODEL_COMPLEX=mistral-large-latest +export AI_MODEL_COMPLEX=gemini-2.5-flash-lite go run ./backend/cmd/discovery-eval \ -mode similarity \ @@ -34,7 +34,7 @@ Exit code is 1 when `F1 < threshold` (0 = gating disabled). ``` export AI_API_KEY=... -export AI_MODEL_COMPLEX=mistral-large-latest +export AI_MODEL_COMPLEX=gemini-2.5-flash-lite go run ./backend/cmd/discovery-eval \ -mode category \ diff --git a/backend/internal/domain/discovery/enrich/crawl_test.go b/backend/internal/domain/discovery/enrich/crawl_test.go index 77b6ea9..7a20697 100644 --- a/backend/internal/domain/discovery/enrich/crawl_test.go +++ b/backend/internal/domain/discovery/enrich/crawl_test.go @@ -166,7 +166,7 @@ func TestMerge_BaseWinsOverOverlay(t *testing.T) { Description: "Ein großer Markt.", InputTokens: 100, OutputTokens: 50, - Model: "mistral-large-latest", + Model: "gemini-2.5-flash-lite", Sources: Sources{ "plz": ProvenanceLLM, "venue": ProvenanceLLM, diff --git a/backend/internal/domain/discovery/enrich/mistral.go b/backend/internal/domain/discovery/enrich/llm_enricher.go similarity index 100% rename from backend/internal/domain/discovery/enrich/mistral.go rename to backend/internal/domain/discovery/enrich/llm_enricher.go diff --git a/backend/internal/domain/discovery/enrich/mistral_test.go b/backend/internal/domain/discovery/enrich/llm_enricher_test.go similarity index 95% rename from backend/internal/domain/discovery/enrich/mistral_test.go rename to backend/internal/domain/discovery/enrich/llm_enricher_test.go index 7e70318..18b7d71 100644 --- a/backend/internal/domain/discovery/enrich/mistral_test.go +++ b/backend/internal/domain/discovery/enrich/llm_enricher_test.go @@ -23,7 +23,7 @@ func (s *stubScraper) Fetch(_ context.Context, url string) (string, error) { return s.responses[url], nil } -func TestMistralEnrich_HappyPath(t *testing.T) { +func TestLLMEnricher_HappyPath(t *testing.T) { scraper := &stubScraper{responses: map[string]string{ "https://a.example/markt": "Ein Mittelaltermarkt mit Ritterspielen und Markttreiben.", "https://b.example/info": "Sa-So jeweils 10-18 Uhr.", @@ -79,7 +79,7 @@ func TestMistralEnrich_HappyPath(t *testing.T) { } } -func TestMistralEnrich_AllScrapesFail(t *testing.T) { +func TestLLMEnricher_AllScrapesFail(t *testing.T) { scraper := &stubScraper{errs: map[string]error{ "https://a.example": errors.New("timeout"), "https://b.example": errors.New("404"), @@ -99,7 +99,7 @@ func TestMistralEnrich_AllScrapesFail(t *testing.T) { } } -func TestMistralEnrich_SomeScrapesFailStillCallsLLM(t *testing.T) { +func TestLLMEnricher_SomeScrapesFailStillCallsLLM(t *testing.T) { // One URL fails, one succeeds — the LLM should still run with partial // context, not fail because of the one bad source. scraper := &stubScraper{ @@ -124,7 +124,7 @@ func TestMistralEnrich_SomeScrapesFailStillCallsLLM(t *testing.T) { } } -func TestMistralEnrich_EmptyFieldsNoProvenance(t *testing.T) { +func TestLLMEnricher_EmptyFieldsNoProvenance(t *testing.T) { // LLM returns empty strings for fields it can't support. Those fields // must NOT appear in Sources — an empty provenance is misleading. scraper := &stubScraper{responses: map[string]string{"https://a.example": "Content."}} @@ -148,7 +148,7 @@ func TestMistralEnrich_EmptyFieldsNoProvenance(t *testing.T) { } } -func TestMistralEnrich_CapsURLsAtFive(t *testing.T) { +func TestLLMEnricher_CapsURLsAtFive(t *testing.T) { // Supply 7 URLs; only the first 5 should be fetched. urls := []string{"u1", "u2", "u3", "u4", "u5", "u6", "u7"} responses := map[string]string{} diff --git a/backend/internal/domain/discovery/enrich/similarity_test.go b/backend/internal/domain/discovery/enrich/similarity_test.go index 51550d9..2298936 100644 --- a/backend/internal/domain/discovery/enrich/similarity_test.go +++ b/backend/internal/domain/discovery/enrich/similarity_test.go @@ -31,7 +31,7 @@ func TestSimilarityPairKey_DifferentInputsDifferentKeys(t *testing.T) { } } -func TestMistralSimilarity_HappyPath(t *testing.T) { +func TestSimilarityClassifier_HappyPath(t *testing.T) { stub := &stubProvider{ content: `{"same_market":true,"confidence":0.82,"reason":"Gleicher Name, gleiche Stadt, gleiches Jahr."}`, } @@ -65,7 +65,7 @@ func TestMistralSimilarity_HappyPath(t *testing.T) { } } -func TestMistralSimilarity_ClampsConfidence(t *testing.T) { +func TestSimilarityClassifier_ClampsConfidence(t *testing.T) { tests := []struct { name string raw string @@ -89,15 +89,15 @@ func TestMistralSimilarity_ClampsConfidence(t *testing.T) { } } -func TestMistralSimilarity_PropagatesPass2Error(t *testing.T) { - c := NewSimilarityClassifier(&stubProvider{err: errors.New("mistral down")}) +func TestSimilarityClassifier_PropagatesPass2Error(t *testing.T) { + c := NewSimilarityClassifier(&stubProvider{err: errors.New("provider down")}) _, err := c.Classify(context.Background(), SimilarityRow{}, SimilarityRow{}) if err == nil { t.Fatal("expected error; got nil") } } -func TestMistralSimilarity_RejectsBadJSON(t *testing.T) { +func TestSimilarityClassifier_RejectsBadJSON(t *testing.T) { c := NewSimilarityClassifier(&stubProvider{content: "not json at all"}) _, err := c.Classify(context.Background(), SimilarityRow{}, SimilarityRow{}) if err == nil { diff --git a/backend/internal/domain/discovery/service_test.go b/backend/internal/domain/discovery/service_test.go index 69123dc..b18a422 100644 --- a/backend/internal/domain/discovery/service_test.go +++ b/backend/internal/domain/discovery/service_test.go @@ -696,7 +696,7 @@ func TestRunLLMEnrichOne_HappyPath(t *testing.T) { Category: catMittelaltermarkt, Description: "Ein großer Markt in der Altstadt.", Sources: enrich.Sources{"category": enrich.ProvenanceLLM, "description": enrich.ProvenanceLLM}, - Model: "mistral-large-latest", + Model: "gemini-2.5-flash-lite", InputTokens: 500, OutputTokens: 80, } @@ -841,7 +841,7 @@ func TestRunLLMEnrichOne_LLMErrorMarksFailed(t *testing.T) { return nil }, } - llm := &stubLLMEnricher{err: errors.New("mistral down")} + llm := &stubLLMEnricher{err: errors.New("provider down")} svc := NewService(repo, nil, noopLinkVerifier{}, noopMarketCreator{}, nil, llm, nil) _, err := svc.RunLLMEnrichOne(context.Background(), rowID) @@ -993,7 +993,7 @@ func TestClassifySimilarPair_LLMErrorPropagates(t *testing.T) { return nil }, } - sim := &stubSimilarityClassifier{err: errors.New("mistral 500")} + sim := &stubSimilarityClassifier{err: errors.New("provider 500")} svc := NewService(repo, nil, noopLinkVerifier{}, noopMarketCreator{}, nil, nil, sim) _, err := svc.ClassifySimilarPair(context.Background(), aID, bID) diff --git a/planning/13-programm.md b/planning/13-programm.md index ca7438f..ba4650d 100644 --- a/planning/13-programm.md +++ b/planning/13-programm.md @@ -37,7 +37,7 @@ Workflow: ## Entscheidungen -- [x] **LLM-Provider**: OpenRouter (viele Modelle verfuegbar, einfach austauschbar) +- [x] **LLM-Provider**: Google Gemini (gemini-2.5-flash-lite default) - [x] **Bild-Upload**: Ja, auch Bilder und Flyer-Scans werden per LLM geparsed - [x] **Push bei Aenderungen**: Ja, User mit Erinnerung fuer diesen Markt werden benachrichtigt - [x] **PDF-Export**: Ja, Programm als PDF exportierbar (fuer Aushang vor Ort) diff --git a/planning/16-admin.md b/planning/16-admin.md index 6baa262..8d5adcb 100644 --- a/planning/16-admin.md +++ b/planning/16-admin.md @@ -45,7 +45,7 @@ Internes Dashboard fuer Plattform-Betreiber. Nicht oeffentlich zugaenglich. - Programm-Parsing (PDF/Bild) - Bild-Moderation - Natuerliche Sprache Suche (spaeter) -- Provider: OpenRouter (Modell-Dropdown) +- Provider: Google Gemini (gemini-2.5-flash-lite default) - Fallback-Modell konfigurierbar - Token-Verbrauch / Kosten-Uebersicht pro Bereich - API-Key-Verwaltung diff --git a/planning/17-roadmap.md b/planning/17-roadmap.md index cafeeb2..89ea7c5 100644 --- a/planning/17-roadmap.md +++ b/planning/17-roadmap.md @@ -38,7 +38,7 @@ Ziel: Veranstalter uebernehmen ihre Eintraege, erstellen eigene Maerkte. - Markt beanspruchen ("Das ist mein Markt") - Markt anlegen / bearbeiten - Kontaktdaten hinterlegen -- Programm-Verwaltung (manuell + LLM-Parsing via OpenRouter) +- Programm-Verwaltung (manuell + LLM-Parsing via Google Gemini) - Mitarbeiter-Sub-Rolle (granulare Berechtigungen) - Wunschliste (gesuchte Kategorien/Epochen) @@ -125,7 +125,7 @@ Ziel: Interne Verwaltung und Moderation. - Admin-Rollen (Super-Admin, Moderator, Support) - Role-Assume (Impersonate) - Moderation (Bilder, Bewertungen, Chat) -- LLM-Konfiguration (Modell pro Bereich, OpenRouter) +- LLM-Konfiguration (Modell pro Bereich, Google Gemini) - Monitoring (Grafana-Einbettung, Plattform-Statistiken) - Audit Log - Admin-Benachrichtigungen (Push, E-Mail, Matrix Webhook) diff --git a/planning/plans/2026-04-24-pluggable-ai-and-local-research.md b/planning/plans/2026-04-24-pluggable-ai-and-local-research.md index 02a2d51..7ad3e5f 100644 --- a/planning/plans/2026-04-24-pluggable-ai-and-local-research.md +++ b/planning/plans/2026-04-24-pluggable-ai-and-local-research.md @@ -1,3 +1,5 @@ +> **SUPERSEDED 2026-04-25**: Replaced by Gemini-only migration (commit 3ddfd87). Kept for historical context. See `backend/internal/pkg/ai/gemini.go` for the implementation. + # Pluggable AI Provider + Local Research Orchestrator — Implementation Plan > **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. diff --git a/planning/specs/2026-04-24-pluggable-ai-and-local-research.md b/planning/specs/2026-04-24-pluggable-ai-and-local-research.md index ef780b7..092931b 100644 --- a/planning/specs/2026-04-24-pluggable-ai-and-local-research.md +++ b/planning/specs/2026-04-24-pluggable-ai-and-local-research.md @@ -1,3 +1,5 @@ +> **SUPERSEDED 2026-04-25**: Replaced by Gemini-only migration (commit 3ddfd87). Kept for historical context. See `backend/internal/pkg/ai/gemini.go` for the implementation. + # Spec: Pluggable AI Provider + Local Research Orchestrator - **Date:** 2026-04-24