fix(discovery): skip enrichment cache for date-less rows (year=0)
Rows without start_datum all hash to year=0, causing cache collisions across unrelated markets. Gate both cache reads and writes on year!=0.
This commit is contained in:
@@ -717,20 +717,25 @@ func (s *Service) RunLLMEnrichOne(ctx context.Context, queueID uuid.UUID) (enric
|
||||
if row.StartDatum != nil {
|
||||
year = row.StartDatum.Year()
|
||||
}
|
||||
// Rows without a start_datum all hash to year=0, which would cause cache
|
||||
// collisions across unrelated markets. Skip the cache entirely for these rows.
|
||||
useCache := year != 0
|
||||
cacheKey := enrich.CacheKey(row.NameNormalized, row.Stadt, year)
|
||||
|
||||
// Cache lookup — if we have a fresh LLM payload for this (name, city,
|
||||
// year) tuple, skip the call. The merge still runs so a newly-populated
|
||||
// crawl-enrich base gets its provenance preserved.
|
||||
if cached, hit, err := s.repo.GetEnrichmentCache(ctx, cacheKey); err != nil {
|
||||
slog.WarnContext(ctx, "enrichment cache get failed; continuing",
|
||||
"cache_key", cacheKey, "error", err)
|
||||
} else if hit {
|
||||
merged := enrich.Merge(row.Enrichment, cached)
|
||||
if err := s.repo.SetEnrichment(ctx, row.ID, merged, EnrichmentStatusDone); err != nil {
|
||||
return enrich.Enrichment{}, fmt.Errorf("persist merged (cache hit): %w", err)
|
||||
if useCache {
|
||||
if cached, hit, err := s.repo.GetEnrichmentCache(ctx, cacheKey); err != nil {
|
||||
slog.WarnContext(ctx, "enrichment cache get failed; continuing",
|
||||
"cache_key", cacheKey, "error", err)
|
||||
} else if hit {
|
||||
merged := enrich.Merge(row.Enrichment, cached)
|
||||
if err := s.repo.SetEnrichment(ctx, row.ID, merged, EnrichmentStatusDone); err != nil {
|
||||
return enrich.Enrichment{}, fmt.Errorf("persist merged (cache hit): %w", err)
|
||||
}
|
||||
return merged, nil
|
||||
}
|
||||
return merged, nil
|
||||
}
|
||||
|
||||
llmReq := enrich.LLMRequest{
|
||||
@@ -753,10 +758,13 @@ func (s *Service) RunLLMEnrichOne(ctx context.Context, queueID uuid.UUID) (enric
|
||||
|
||||
// Cache the raw LLM output (not the merged result). A later re-crawl
|
||||
// might change crawl-enrich fields; the cached answer should layer on
|
||||
// top of whatever the current base is.
|
||||
if err := s.repo.SetEnrichmentCache(ctx, cacheKey, llmPayload, enrich.DefaultCacheTTL); err != nil {
|
||||
slog.WarnContext(ctx, "enrichment cache set failed; continuing",
|
||||
"cache_key", cacheKey, "error", err)
|
||||
// top of whatever the current base is. Skip for date-less rows (year=0)
|
||||
// to avoid cross-row cache collisions.
|
||||
if useCache {
|
||||
if err := s.repo.SetEnrichmentCache(ctx, cacheKey, llmPayload, enrich.DefaultCacheTTL); err != nil {
|
||||
slog.WarnContext(ctx, "enrichment cache set failed; continuing",
|
||||
"cache_key", cacheKey, "error", err)
|
||||
}
|
||||
}
|
||||
|
||||
merged := enrich.Merge(row.Enrichment, llmPayload)
|
||||
|
||||
@@ -853,6 +853,40 @@ func TestRunLLMEnrichOne_LLMErrorMarksFailed(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestRunLLMEnrichOne_SkipsCacheWhenYearZero: a row with no start_datum (year=0)
|
||||
// must bypass the enrichment cache entirely — both reads and writes. Two calls
|
||||
// for the same row must both hit the LLM.
|
||||
func TestRunLLMEnrichOne_SkipsCacheWhenYearZero(t *testing.T) {
|
||||
rowID := uuid.New()
|
||||
cacheHits := 0
|
||||
|
||||
repo := &mockRepo{
|
||||
getDiscoveredFn: func(_ context.Context, _ uuid.UUID) (DiscoveredMarket, error) {
|
||||
return DiscoveredMarket{
|
||||
ID: rowID,
|
||||
MarktName: "Testmarkt",
|
||||
Stadt: "Dresden",
|
||||
NameNormalized: "testmarkt",
|
||||
StartDatum: nil, // no date → year = 0
|
||||
}, nil
|
||||
},
|
||||
getCacheFn: func(_ string) (enrich.Enrichment, bool, error) {
|
||||
cacheHits++
|
||||
return enrich.Enrichment{}, false, nil
|
||||
},
|
||||
}
|
||||
llm := &stubLLMEnricher{result: enrich.Enrichment{Category: catMittelaltermarkt}}
|
||||
svc := NewService(repo, nil, noopLinkVerifier{}, noopMarketCreator{}, nil, llm, nil)
|
||||
|
||||
_, _ = svc.RunLLMEnrichOne(context.Background(), rowID)
|
||||
_, _ = svc.RunLLMEnrichOne(context.Background(), rowID)
|
||||
|
||||
// With year=0, cache must be skipped — GetEnrichmentCache must never be called.
|
||||
if cacheHits != 0 {
|
||||
t.Errorf("expected 0 cache hits for date-less row, got %d", cacheHits)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRunCrawlEnrichAll_EmptyQueueNoOp: nothing pending, zero summary, no writes.
|
||||
func TestRunCrawlEnrichAll_EmptyQueueNoOp(t *testing.T) {
|
||||
var writes int
|
||||
|
||||
Reference in New Issue
Block a user