fix(discovery): use JSON schema instead of JSONMode for LLM enricher

Replaces JSONMode:true with an embedded enricher_schema.json so Gemini
returns structured output against a typed schema, preventing empty {} responses.
Adds an all-empty warning when the LLM returns a valid but blank payload.
This commit is contained in:
2026-04-25 17:48:48 +02:00
parent eb169689d5
commit f151c0865e
3 changed files with 58 additions and 3 deletions

View File

@@ -0,0 +1,21 @@
{
"type": "object",
"required": ["category", "opening_hours", "description"],
"properties": {
"category": {
"type": "string",
"description": "Market category. Use empty string if unclear.",
"enum": ["mittelaltermarkt", "weihnachtsmarkt", "ritterfest",
"handwerkermarkt", "schlossfest", "ritterturnier",
"kirchweih", ""]
},
"opening_hours": {
"type": "string",
"description": "Brief German summary, e.g. Sa-So 10:00-18:00. Empty if unclear."
},
"description": {
"type": "string",
"description": "1-3 sentences in German. Empty if insufficient source info."
}
}
}

View File

@@ -2,6 +2,7 @@ package enrich
import (
"context"
_ "embed"
"encoding/json"
"errors"
"fmt"
@@ -12,6 +13,9 @@ import (
"marktvogt.de/backend/internal/pkg/ai"
)
//go:embed assets/enricher_schema.json
var enricherSchemaJSON []byte
// maxScrapeURLs limits how many quellen we pull per enrichment call.
// Real-world queue rows rarely have more than 2-3 sources; 5 is a safe cap
// that prevents a worst-case row from burning minutes on fetches.
@@ -88,7 +92,7 @@ func (e *ProviderLLMEnricher) EnrichMissing(ctx context.Context, req LLMRequest)
resp, err := e.AI.Chat(ctx, &ai.ChatRequest{
SystemPrompt: systemPrompt,
UserMessage: userPrompt,
JSONMode: true,
JSONSchema: enricherSchemaJSON,
Grounded: true,
CallType: "enrich_b",
Temperature: 0.1,
@@ -102,6 +106,11 @@ func (e *ProviderLLMEnricher) EnrichMissing(ctx context.Context, req LLMRequest)
return Enrichment{}, fmt.Errorf("parse llm response: %w (content=%q)", err, resp.Content)
}
if parsed.Category == "" && parsed.OpeningHours == "" && parsed.Description == "" {
slog.WarnContext(ctx, "llm enrich: all fields empty despite successful call",
"markt", req.MarktName, "stadt", req.Stadt)
}
// Build the Enrichment payload with only the fields the model produced.
now := time.Now().UTC()
out := Enrichment{

View File

@@ -44,8 +44,11 @@ func TestLLMEnricher_HappyPath(t *testing.T) {
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if stub.seen.JSONMode != true {
t.Fatalf("JSONMode must be set")
if stub.seen.JSONMode {
t.Fatal("JSONMode must not be set when JSONSchema is used")
}
if len(stub.seen.JSONSchema) == 0 {
t.Fatal("JSONSchema must be set")
}
// Result carries LLM fields and provenance llm.
@@ -148,6 +151,28 @@ func TestLLMEnricher_EmptyFieldsNoProvenance(t *testing.T) {
}
}
func TestLLMEnricher_SendsJSONSchema(t *testing.T) {
stub := &stubProvider{
content: `{"category":"mittelaltermarkt","opening_hours":"Sa-So 10-18","description":"Markt."}`,
}
scraper := &stubScraper{responses: map[string]string{"https://example.com": "some content"}}
e := NewLLMEnricher(stub, scraper)
_, err := e.EnrichMissing(context.Background(), LLMRequest{
MarktName: "Testmarkt",
Stadt: "Dresden",
Quellen: []string{"https://example.com"},
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if stub.seen.JSONMode {
t.Error("expected JSONMode to be false when schema is used")
}
if len(stub.seen.JSONSchema) == 0 {
t.Error("expected JSONSchema to be set")
}
}
func TestLLMEnricher_CapsURLsAtFive(t *testing.T) {
// Supply 7 URLs; only the first 5 should be fetched.
urls := []string{"u1", "u2", "u3", "u4", "u5", "u6", "u7"}