From 627030e3733751d3399ddb8f33a748d4731c7a3f Mon Sep 17 00:00:00 2001 From: vikingowl Date: Fri, 3 Apr 2026 11:37:01 +0200 Subject: [PATCH] feat(filter): keyword/regex pre-filter and LLM relevance scorer --- internal/filter/keyword.go | 32 ++++++++++++++++ internal/filter/keyword_test.go | 65 +++++++++++++++++++++++++++++++++ internal/filter/scorer.go | 25 +++++++++++++ internal/filter/scorer_test.go | 49 +++++++++++++++++++++++++ 4 files changed, 171 insertions(+) create mode 100644 internal/filter/keyword.go create mode 100644 internal/filter/keyword_test.go create mode 100644 internal/filter/scorer.go create mode 100644 internal/filter/scorer_test.go diff --git a/internal/filter/keyword.go b/internal/filter/keyword.go new file mode 100644 index 0000000..b92bbac --- /dev/null +++ b/internal/filter/keyword.go @@ -0,0 +1,32 @@ +package filter + +import ( + "regexp" + "strings" + + "somegit.dev/vikingowl/reddit-reader/internal/domain" +) + +func MatchesKeyword(post domain.Post, f domain.Filter) bool { + text := strings.ToLower(post.Title + " " + post.SelfText) + if f.IsRegex { + re, err := regexp.Compile("(?i)" + f.Pattern) + if err != nil { + return false + } + return re.MatchString(text) + } + return strings.Contains(text, strings.ToLower(f.Pattern)) +} + +func MatchesAny(post domain.Post, filters []domain.Filter) bool { + if len(filters) == 0 { + return true + } + for _, f := range filters { + if MatchesKeyword(post, f) { + return true + } + } + return false +} diff --git a/internal/filter/keyword_test.go b/internal/filter/keyword_test.go new file mode 100644 index 0000000..326842e --- /dev/null +++ b/internal/filter/keyword_test.go @@ -0,0 +1,65 @@ +package filter_test + +import ( + "testing" + + "somegit.dev/vikingowl/reddit-reader/internal/domain" + "somegit.dev/vikingowl/reddit-reader/internal/filter" +) + +func TestKeywordMatchPlain(t *testing.T) { + f := domain.Filter{Pattern: "golang", IsRegex: false} + if !filter.MatchesKeyword(domain.Post{Title: "Learning Golang the hard way"}, f) { + t.Error("expected match for 'golang' in title") + } +} + +func TestKeywordMatchCaseInsensitive(t *testing.T) { + f := domain.Filter{Pattern: "NixOS", IsRegex: false} + if !filter.MatchesKeyword(domain.Post{Title: "My new nixos setup"}, f) { + t.Error("expected case-insensitive match") + } +} + +func TestKeywordNoMatch(t *testing.T) { + f := domain.Filter{Pattern: "rust", IsRegex: false} + if filter.MatchesKeyword(domain.Post{Title: "Go 1.26 released", SelfText: "New features in Go"}, f) { + t.Error("expected no match") + } +} + +func TestKeywordMatchInSelfText(t *testing.T) { + f := domain.Filter{Pattern: "iterator", IsRegex: false} + if !filter.MatchesKeyword(domain.Post{Title: "New patterns", SelfText: "The iterator protocol is great"}, f) { + t.Error("expected match in selftext") + } +} + +func TestRegexMatch(t *testing.T) { + f := domain.Filter{Pattern: `go\s*1\.2[56]`, IsRegex: true} + if !filter.MatchesKeyword(domain.Post{Title: "Go 1.26 iterator changes"}, f) { + t.Error("expected regex match") + } +} + +func TestRegexNoMatch(t *testing.T) { + f := domain.Filter{Pattern: `go\s*1\.24`, IsRegex: true} + if filter.MatchesKeyword(domain.Post{Title: "Go 1.26 iterator changes"}, f) { + t.Error("expected no regex match") + } +} + +func TestMatchesAnyFilter(t *testing.T) { + filters := []domain.Filter{ + {Pattern: "python"}, {Pattern: "golang"}, + } + if !filter.MatchesAny(domain.Post{Title: "Golang tips"}, filters) { + t.Error("expected match on second filter") + } +} + +func TestMatchesAnyFilterEmpty(t *testing.T) { + if !filter.MatchesAny(domain.Post{Title: "Anything"}, nil) { + t.Error("empty filters should match everything") + } +} diff --git a/internal/filter/scorer.go b/internal/filter/scorer.go new file mode 100644 index 0000000..a801e2d --- /dev/null +++ b/internal/filter/scorer.go @@ -0,0 +1,25 @@ +package filter + +import ( + "context" + + "somegit.dev/vikingowl/reddit-reader/internal/domain" + "somegit.dev/vikingowl/reddit-reader/internal/llm" +) + +type Scorer struct { + llm llm.Summarizer + threshold float64 +} + +func NewScorer(l llm.Summarizer, threshold float64) *Scorer { + return &Scorer{llm: l, threshold: threshold} +} + +func (s *Scorer) ScorePost(ctx context.Context, post domain.Post, interests domain.Interests) (float64, bool, error) { + score, err := s.llm.Score(ctx, post, interests) + if err != nil { + return 0, false, err + } + return score, score >= s.threshold, nil +} diff --git a/internal/filter/scorer_test.go b/internal/filter/scorer_test.go new file mode 100644 index 0000000..49272c5 --- /dev/null +++ b/internal/filter/scorer_test.go @@ -0,0 +1,49 @@ +package filter_test + +import ( + "context" + "testing" + + "somegit.dev/vikingowl/reddit-reader/internal/domain" + "somegit.dev/vikingowl/reddit-reader/internal/filter" +) + +type mockSummarizer struct { + scoreVal float64 + scoreErr error + summaryVal string + summaryErr error +} + +func (m *mockSummarizer) Score(_ context.Context, _ domain.Post, _ domain.Interests) (float64, error) { + return m.scoreVal, m.scoreErr +} + +func (m *mockSummarizer) Summarize(_ context.Context, _ domain.Post) (string, error) { + return m.summaryVal, m.summaryErr +} + +func TestScorerAboveThreshold(t *testing.T) { + scorer := filter.NewScorer(&mockSummarizer{scoreVal: 0.8}, 0.6) + score, pass, err := scorer.ScorePost(context.Background(), domain.Post{Title: "Test"}, domain.Interests{}) + if err != nil { + t.Fatalf("ScorePost: %v", err) + } + if !pass { + t.Error("expected pass for score 0.8 with threshold 0.6") + } + if score != 0.8 { + t.Errorf("score = %f, want 0.8", score) + } +} + +func TestScorerBelowThreshold(t *testing.T) { + scorer := filter.NewScorer(&mockSummarizer{scoreVal: 0.3}, 0.6) + _, pass, err := scorer.ScorePost(context.Background(), domain.Post{Title: "Test"}, domain.Interests{}) + if err != nil { + t.Fatalf("ScorePost: %v", err) + } + if pass { + t.Error("expected fail for score 0.3 with threshold 0.6") + } +}