marktvogt.de/backend/cmd/discovery-eval/cache_test.go

package main

import (
	"os"
	"path/filepath"
	"testing"

	"marktvogt.de/backend/internal/domain/discovery/enrich"
)

func TestCache_RoundTrip(t *testing.T) {
	dir := t.TempDir()
	path := filepath.Join(dir, "cache.json")

	c := newCache()
	a := enrich.SimilarityRow{NameNormalized: "ritterfest dresden", Stadt: "Dresden", Year: 2026}
	b := enrich.SimilarityRow{NameNormalized: "mittelaltermarkt dresden", Stadt: "Dresden", Year: 2026}
	want := CachedVerdict{Same: false, Confidence: 0.72, Reason: "distinct events", Model: "m"}
	c.Put(a, b, "m", want)

	if err := saveCache(path, c); err != nil {
		t.Fatalf("save: %v", err)
	}

	loaded, err := loadCache(path)
	if err != nil {
		t.Fatalf("load: %v", err)
	}
	got, ok := loaded.Get(a, b, "m")
	if !ok {
		t.Fatalf("cache miss after round-trip")
	}
	if got.Same != want.Same || got.Confidence != want.Confidence || got.Reason != want.Reason {
		t.Errorf("verdict changed across round-trip: got=%+v want=%+v", got, want)
	}
}

func TestCache_SymmetricKey(t *testing.T) {
	// (a, b) and (b, a) must hit the same entry — classifier is symmetric.
	c := newCache()
	a := enrich.SimilarityRow{NameNormalized: "a", Stadt: "A", Year: 2026}
	b := enrich.SimilarityRow{NameNormalized: "b", Stadt: "B", Year: 2026}
	c.Put(a, b, "m", CachedVerdict{Same: true, Confidence: 0.9})
	if _, ok := c.Get(b, a, "m"); !ok {
		t.Error("reversed lookup should hit the same entry (SimilarityPairKey is symmetric)")
	}
}

func TestCache_ModelScopesEntries(t *testing.T) {
	// Changing the model string must invalidate — two different model names
	// can produce different verdicts on the same pair.
	c := newCache()
	a := enrich.SimilarityRow{NameNormalized: "x", Stadt: "X", Year: 2026}
	b := enrich.SimilarityRow{NameNormalized: "y", Stadt: "Y", Year: 2026}
	c.Put(a, b, "m1", CachedVerdict{Same: true, Confidence: 0.9})
	if _, ok := c.Get(a, b, "m2"); ok {
		t.Error("cache hit under a different model; should be a miss")
	}
}

func TestLoadCache_MissingFile(t *testing.T) {
	c, err := loadCache(filepath.Join(t.TempDir(), "does-not-exist.json"))
	if err != nil {
		t.Errorf("missing file should not error; got %v", err)
	}
	if c == nil || c.Entries == nil {
		t.Error("missing-file load should return an empty usable cache")
	}
}

func TestLoadCache_CorruptFileStartsEmpty(t *testing.T) {
	dir := t.TempDir()
	path := filepath.Join(dir, "cache.json")
	if err := os.WriteFile(path, []byte("{not valid json"), 0o644); err != nil {
		t.Fatal(err)
	}
	c, err := loadCache(path)
	if err == nil {
		t.Error("expected a parse error to be reported (so the operator can investigate)")
	}
	if c == nil || c.Entries == nil {
		t.Error("corrupt file should still return a usable empty cache")
	}
}

func TestSaveCache_AtomicWrite(t *testing.T) {
	// Save, then check no .tmp files are left behind. Not a perfect test
	// of atomicity — that's hard to exercise without injecting a crash —
	// but catches the common failure where the tmp file is leaked.
	dir := t.TempDir()
	path := filepath.Join(dir, "cache.json")
	if err := saveCache(path, newCache()); err != nil {
		t.Fatal(err)
	}
	entries, err := os.ReadDir(dir)
	if err != nil {
		t.Fatal(err)
	}
	if len(entries) != 1 {
		names := make([]string, 0, len(entries))
		for _, e := range entries {
			names = append(names, e.Name())
		}
		t.Errorf("expected exactly cache.json in dir; got %v", names)
	}
}