feat(filter): keyword/regex pre-filter and LLM relevance scorer

This commit is contained in:
2026-04-03 11:37:01 +02:00
parent 67c9ce3edd
commit 627030e373
4 changed files with 171 additions and 0 deletions

View File

@@ -0,0 +1,32 @@
package filter
import (
"regexp"
"strings"
"somegit.dev/vikingowl/reddit-reader/internal/domain"
)
func MatchesKeyword(post domain.Post, f domain.Filter) bool {
text := strings.ToLower(post.Title + " " + post.SelfText)
if f.IsRegex {
re, err := regexp.Compile("(?i)" + f.Pattern)
if err != nil {
return false
}
return re.MatchString(text)
}
return strings.Contains(text, strings.ToLower(f.Pattern))
}
func MatchesAny(post domain.Post, filters []domain.Filter) bool {
if len(filters) == 0 {
return true
}
for _, f := range filters {
if MatchesKeyword(post, f) {
return true
}
}
return false
}

View File

@@ -0,0 +1,65 @@
package filter_test
import (
"testing"
"somegit.dev/vikingowl/reddit-reader/internal/domain"
"somegit.dev/vikingowl/reddit-reader/internal/filter"
)
func TestKeywordMatchPlain(t *testing.T) {
f := domain.Filter{Pattern: "golang", IsRegex: false}
if !filter.MatchesKeyword(domain.Post{Title: "Learning Golang the hard way"}, f) {
t.Error("expected match for 'golang' in title")
}
}
func TestKeywordMatchCaseInsensitive(t *testing.T) {
f := domain.Filter{Pattern: "NixOS", IsRegex: false}
if !filter.MatchesKeyword(domain.Post{Title: "My new nixos setup"}, f) {
t.Error("expected case-insensitive match")
}
}
func TestKeywordNoMatch(t *testing.T) {
f := domain.Filter{Pattern: "rust", IsRegex: false}
if filter.MatchesKeyword(domain.Post{Title: "Go 1.26 released", SelfText: "New features in Go"}, f) {
t.Error("expected no match")
}
}
func TestKeywordMatchInSelfText(t *testing.T) {
f := domain.Filter{Pattern: "iterator", IsRegex: false}
if !filter.MatchesKeyword(domain.Post{Title: "New patterns", SelfText: "The iterator protocol is great"}, f) {
t.Error("expected match in selftext")
}
}
func TestRegexMatch(t *testing.T) {
f := domain.Filter{Pattern: `go\s*1\.2[56]`, IsRegex: true}
if !filter.MatchesKeyword(domain.Post{Title: "Go 1.26 iterator changes"}, f) {
t.Error("expected regex match")
}
}
func TestRegexNoMatch(t *testing.T) {
f := domain.Filter{Pattern: `go\s*1\.24`, IsRegex: true}
if filter.MatchesKeyword(domain.Post{Title: "Go 1.26 iterator changes"}, f) {
t.Error("expected no regex match")
}
}
func TestMatchesAnyFilter(t *testing.T) {
filters := []domain.Filter{
{Pattern: "python"}, {Pattern: "golang"},
}
if !filter.MatchesAny(domain.Post{Title: "Golang tips"}, filters) {
t.Error("expected match on second filter")
}
}
func TestMatchesAnyFilterEmpty(t *testing.T) {
if !filter.MatchesAny(domain.Post{Title: "Anything"}, nil) {
t.Error("empty filters should match everything")
}
}

25
internal/filter/scorer.go Normal file
View File

@@ -0,0 +1,25 @@
package filter
import (
"context"
"somegit.dev/vikingowl/reddit-reader/internal/domain"
"somegit.dev/vikingowl/reddit-reader/internal/llm"
)
type Scorer struct {
llm llm.Summarizer
threshold float64
}
func NewScorer(l llm.Summarizer, threshold float64) *Scorer {
return &Scorer{llm: l, threshold: threshold}
}
func (s *Scorer) ScorePost(ctx context.Context, post domain.Post, interests domain.Interests) (float64, bool, error) {
score, err := s.llm.Score(ctx, post, interests)
if err != nil {
return 0, false, err
}
return score, score >= s.threshold, nil
}

View File

@@ -0,0 +1,49 @@
package filter_test
import (
"context"
"testing"
"somegit.dev/vikingowl/reddit-reader/internal/domain"
"somegit.dev/vikingowl/reddit-reader/internal/filter"
)
type mockSummarizer struct {
scoreVal float64
scoreErr error
summaryVal string
summaryErr error
}
func (m *mockSummarizer) Score(_ context.Context, _ domain.Post, _ domain.Interests) (float64, error) {
return m.scoreVal, m.scoreErr
}
func (m *mockSummarizer) Summarize(_ context.Context, _ domain.Post) (string, error) {
return m.summaryVal, m.summaryErr
}
func TestScorerAboveThreshold(t *testing.T) {
scorer := filter.NewScorer(&mockSummarizer{scoreVal: 0.8}, 0.6)
score, pass, err := scorer.ScorePost(context.Background(), domain.Post{Title: "Test"}, domain.Interests{})
if err != nil {
t.Fatalf("ScorePost: %v", err)
}
if !pass {
t.Error("expected pass for score 0.8 with threshold 0.6")
}
if score != 0.8 {
t.Errorf("score = %f, want 0.8", score)
}
}
func TestScorerBelowThreshold(t *testing.T) {
scorer := filter.NewScorer(&mockSummarizer{scoreVal: 0.3}, 0.6)
_, pass, err := scorer.ScorePost(context.Background(), domain.Post{Title: "Test"}, domain.Interests{})
if err != nil {
t.Fatalf("ScorePost: %v", err)
}
if pass {
t.Error("expected fail for score 0.3 with threshold 0.6")
}
}