- Extract readJSONFile + writeJSONAtomic in cache.go; category cache reuses them (saveCategoryCache is one line, loadCategoryCache uses the standard load-or-empty shape). - Drop dead errMsg param from scoreCategoryResult (always ""). - Wrap writeCategoryReport errors with context for consistency. - Wrap runSimilarityMode / runCategoryMode's 5 per-mode flags into an evalConfig struct so params don't drift. - Promote validModes to a package-level var. - Remove redundant cache = new...() fallback after load* (both load helpers already return a non-nil empty cache on error). - Strip narrating / diff-referencing comments per CLAUDE.md; keep the one genuine WHY on normalizeCategory (divergence from normalize.Name). Net -54 lines across 4 files; go build + go vet + tests green.
109 lines
2.6 KiB
Go
109 lines
2.6 KiB
Go
package main
|
|
|
|
import (
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"io/fs"
|
|
"os"
|
|
"path/filepath"
|
|
|
|
"marktvogt.de/backend/internal/domain/discovery/enrich"
|
|
)
|
|
|
|
type Cache struct {
|
|
Entries map[string]CachedVerdict `json:"entries"`
|
|
}
|
|
|
|
type CachedVerdict struct {
|
|
Same bool `json:"same"`
|
|
Confidence float64 `json:"confidence"`
|
|
Reason string `json:"reason"`
|
|
Model string `json:"model"`
|
|
}
|
|
|
|
func newCache() *Cache {
|
|
return &Cache{Entries: map[string]CachedVerdict{}}
|
|
}
|
|
|
|
func keyFor(a, b enrich.SimilarityRow, model string) string {
|
|
return enrich.SimilarityPairKey(a, b) + "|" + model
|
|
}
|
|
|
|
func (c *Cache) Get(a, b enrich.SimilarityRow, model string) (CachedVerdict, bool) {
|
|
v, ok := c.Entries[keyFor(a, b, model)]
|
|
return v, ok
|
|
}
|
|
|
|
func (c *Cache) Put(a, b enrich.SimilarityRow, model string, v CachedVerdict) {
|
|
c.Entries[keyFor(a, b, model)] = v
|
|
}
|
|
|
|
func loadCache(path string) (*Cache, error) {
|
|
c := newCache()
|
|
exists, err := readJSONFile(path, c)
|
|
if err != nil {
|
|
return newCache(), fmt.Errorf("parse cache (starting empty): %w", err)
|
|
}
|
|
if !exists {
|
|
return c, nil
|
|
}
|
|
if c.Entries == nil {
|
|
c.Entries = map[string]CachedVerdict{}
|
|
}
|
|
return c, nil
|
|
}
|
|
|
|
func saveCache(path string, c *Cache) error {
|
|
return writeJSONAtomic(path, ".eval-cache-*.tmp", c)
|
|
}
|
|
|
|
// readJSONFile unmarshals path into v. Returns (false, nil) if path does not
|
|
// exist — caller gets its zero value. Returns (true, err) on parse failure
|
|
// so the caller can distinguish "new file" from "corrupt file."
|
|
func readJSONFile(path string, v any) (exists bool, err error) {
|
|
data, err := os.ReadFile(path)
|
|
if err != nil {
|
|
if errors.Is(err, fs.ErrNotExist) {
|
|
return false, nil
|
|
}
|
|
return false, fmt.Errorf("read %s: %w", path, err)
|
|
}
|
|
if err := json.Unmarshal(data, v); err != nil {
|
|
return true, err
|
|
}
|
|
return true, nil
|
|
}
|
|
|
|
// writeJSONAtomic marshals v to path via a temp file + rename so a crash
|
|
// mid-write can't truncate the target to zero bytes.
|
|
func writeJSONAtomic(path, tmpPattern string, v any) error {
|
|
data, err := json.MarshalIndent(v, "", " ")
|
|
if err != nil {
|
|
return fmt.Errorf("marshal: %w", err)
|
|
}
|
|
dir := filepath.Dir(path)
|
|
if dir == "" {
|
|
dir = "."
|
|
}
|
|
tmp, err := os.CreateTemp(dir, tmpPattern)
|
|
if err != nil {
|
|
return fmt.Errorf("create tmp: %w", err)
|
|
}
|
|
tmpPath := tmp.Name()
|
|
if _, err := tmp.Write(data); err != nil {
|
|
_ = tmp.Close()
|
|
_ = os.Remove(tmpPath)
|
|
return fmt.Errorf("write tmp: %w", err)
|
|
}
|
|
if err := tmp.Close(); err != nil {
|
|
_ = os.Remove(tmpPath)
|
|
return fmt.Errorf("close tmp: %w", err)
|
|
}
|
|
if err := os.Rename(tmpPath, path); err != nil {
|
|
_ = os.Remove(tmpPath)
|
|
return fmt.Errorf("rename tmp: %w", err)
|
|
}
|
|
return nil
|
|
}
|