fix(discovery): skip enrichment cache for date-less rows (year=0)

Rows without start_datum all hash to year=0, causing cache collisions
across unrelated markets. Gate both cache reads and writes on year!=0.
This commit is contained in:
2026-04-25 17:50:27 +02:00
parent f151c0865e
commit 2e3141aaeb
2 changed files with 54 additions and 12 deletions

View File

@@ -717,20 +717,25 @@ func (s *Service) RunLLMEnrichOne(ctx context.Context, queueID uuid.UUID) (enric
if row.StartDatum != nil {
year = row.StartDatum.Year()
}
// Rows without a start_datum all hash to year=0, which would cause cache
// collisions across unrelated markets. Skip the cache entirely for these rows.
useCache := year != 0
cacheKey := enrich.CacheKey(row.NameNormalized, row.Stadt, year)
// Cache lookup — if we have a fresh LLM payload for this (name, city,
// year) tuple, skip the call. The merge still runs so a newly-populated
// crawl-enrich base gets its provenance preserved.
if cached, hit, err := s.repo.GetEnrichmentCache(ctx, cacheKey); err != nil {
slog.WarnContext(ctx, "enrichment cache get failed; continuing",
"cache_key", cacheKey, "error", err)
} else if hit {
merged := enrich.Merge(row.Enrichment, cached)
if err := s.repo.SetEnrichment(ctx, row.ID, merged, EnrichmentStatusDone); err != nil {
return enrich.Enrichment{}, fmt.Errorf("persist merged (cache hit): %w", err)
if useCache {
if cached, hit, err := s.repo.GetEnrichmentCache(ctx, cacheKey); err != nil {
slog.WarnContext(ctx, "enrichment cache get failed; continuing",
"cache_key", cacheKey, "error", err)
} else if hit {
merged := enrich.Merge(row.Enrichment, cached)
if err := s.repo.SetEnrichment(ctx, row.ID, merged, EnrichmentStatusDone); err != nil {
return enrich.Enrichment{}, fmt.Errorf("persist merged (cache hit): %w", err)
}
return merged, nil
}
return merged, nil
}
llmReq := enrich.LLMRequest{
@@ -753,10 +758,13 @@ func (s *Service) RunLLMEnrichOne(ctx context.Context, queueID uuid.UUID) (enric
// Cache the raw LLM output (not the merged result). A later re-crawl
// might change crawl-enrich fields; the cached answer should layer on
// top of whatever the current base is.
if err := s.repo.SetEnrichmentCache(ctx, cacheKey, llmPayload, enrich.DefaultCacheTTL); err != nil {
slog.WarnContext(ctx, "enrichment cache set failed; continuing",
"cache_key", cacheKey, "error", err)
// top of whatever the current base is. Skip for date-less rows (year=0)
// to avoid cross-row cache collisions.
if useCache {
if err := s.repo.SetEnrichmentCache(ctx, cacheKey, llmPayload, enrich.DefaultCacheTTL); err != nil {
slog.WarnContext(ctx, "enrichment cache set failed; continuing",
"cache_key", cacheKey, "error", err)
}
}
merged := enrich.Merge(row.Enrichment, llmPayload)

View File

@@ -853,6 +853,40 @@ func TestRunLLMEnrichOne_LLMErrorMarksFailed(t *testing.T) {
}
}
// TestRunLLMEnrichOne_SkipsCacheWhenYearZero: a row with no start_datum (year=0)
// must bypass the enrichment cache entirely — both reads and writes. Two calls
// for the same row must both hit the LLM.
func TestRunLLMEnrichOne_SkipsCacheWhenYearZero(t *testing.T) {
rowID := uuid.New()
cacheHits := 0
repo := &mockRepo{
getDiscoveredFn: func(_ context.Context, _ uuid.UUID) (DiscoveredMarket, error) {
return DiscoveredMarket{
ID: rowID,
MarktName: "Testmarkt",
Stadt: "Dresden",
NameNormalized: "testmarkt",
StartDatum: nil, // no date → year = 0
}, nil
},
getCacheFn: func(_ string) (enrich.Enrichment, bool, error) {
cacheHits++
return enrich.Enrichment{}, false, nil
},
}
llm := &stubLLMEnricher{result: enrich.Enrichment{Category: catMittelaltermarkt}}
svc := NewService(repo, nil, noopLinkVerifier{}, noopMarketCreator{}, nil, llm, nil)
_, _ = svc.RunLLMEnrichOne(context.Background(), rowID)
_, _ = svc.RunLLMEnrichOne(context.Background(), rowID)
// With year=0, cache must be skipped — GetEnrichmentCache must never be called.
if cacheHits != 0 {
t.Errorf("expected 0 cache hits for date-less row, got %d", cacheHits)
}
}
// TestRunCrawlEnrichAll_EmptyQueueNoOp: nothing pending, zero summary, no writes.
func TestRunCrawlEnrichAll_EmptyQueueNoOp(t *testing.T) {
var writes int