feat(filter): keyword/regex pre-filter and LLM relevance scorer
This commit is contained in:
32
internal/filter/keyword.go
Normal file
32
internal/filter/keyword.go
Normal file
@@ -0,0 +1,32 @@
|
||||
package filter
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"somegit.dev/vikingowl/reddit-reader/internal/domain"
|
||||
)
|
||||
|
||||
func MatchesKeyword(post domain.Post, f domain.Filter) bool {
|
||||
text := strings.ToLower(post.Title + " " + post.SelfText)
|
||||
if f.IsRegex {
|
||||
re, err := regexp.Compile("(?i)" + f.Pattern)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return re.MatchString(text)
|
||||
}
|
||||
return strings.Contains(text, strings.ToLower(f.Pattern))
|
||||
}
|
||||
|
||||
func MatchesAny(post domain.Post, filters []domain.Filter) bool {
|
||||
if len(filters) == 0 {
|
||||
return true
|
||||
}
|
||||
for _, f := range filters {
|
||||
if MatchesKeyword(post, f) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
65
internal/filter/keyword_test.go
Normal file
65
internal/filter/keyword_test.go
Normal file
@@ -0,0 +1,65 @@
|
||||
package filter_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"somegit.dev/vikingowl/reddit-reader/internal/domain"
|
||||
"somegit.dev/vikingowl/reddit-reader/internal/filter"
|
||||
)
|
||||
|
||||
func TestKeywordMatchPlain(t *testing.T) {
|
||||
f := domain.Filter{Pattern: "golang", IsRegex: false}
|
||||
if !filter.MatchesKeyword(domain.Post{Title: "Learning Golang the hard way"}, f) {
|
||||
t.Error("expected match for 'golang' in title")
|
||||
}
|
||||
}
|
||||
|
||||
func TestKeywordMatchCaseInsensitive(t *testing.T) {
|
||||
f := domain.Filter{Pattern: "NixOS", IsRegex: false}
|
||||
if !filter.MatchesKeyword(domain.Post{Title: "My new nixos setup"}, f) {
|
||||
t.Error("expected case-insensitive match")
|
||||
}
|
||||
}
|
||||
|
||||
func TestKeywordNoMatch(t *testing.T) {
|
||||
f := domain.Filter{Pattern: "rust", IsRegex: false}
|
||||
if filter.MatchesKeyword(domain.Post{Title: "Go 1.26 released", SelfText: "New features in Go"}, f) {
|
||||
t.Error("expected no match")
|
||||
}
|
||||
}
|
||||
|
||||
func TestKeywordMatchInSelfText(t *testing.T) {
|
||||
f := domain.Filter{Pattern: "iterator", IsRegex: false}
|
||||
if !filter.MatchesKeyword(domain.Post{Title: "New patterns", SelfText: "The iterator protocol is great"}, f) {
|
||||
t.Error("expected match in selftext")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegexMatch(t *testing.T) {
|
||||
f := domain.Filter{Pattern: `go\s*1\.2[56]`, IsRegex: true}
|
||||
if !filter.MatchesKeyword(domain.Post{Title: "Go 1.26 iterator changes"}, f) {
|
||||
t.Error("expected regex match")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegexNoMatch(t *testing.T) {
|
||||
f := domain.Filter{Pattern: `go\s*1\.24`, IsRegex: true}
|
||||
if filter.MatchesKeyword(domain.Post{Title: "Go 1.26 iterator changes"}, f) {
|
||||
t.Error("expected no regex match")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchesAnyFilter(t *testing.T) {
|
||||
filters := []domain.Filter{
|
||||
{Pattern: "python"}, {Pattern: "golang"},
|
||||
}
|
||||
if !filter.MatchesAny(domain.Post{Title: "Golang tips"}, filters) {
|
||||
t.Error("expected match on second filter")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchesAnyFilterEmpty(t *testing.T) {
|
||||
if !filter.MatchesAny(domain.Post{Title: "Anything"}, nil) {
|
||||
t.Error("empty filters should match everything")
|
||||
}
|
||||
}
|
||||
25
internal/filter/scorer.go
Normal file
25
internal/filter/scorer.go
Normal file
@@ -0,0 +1,25 @@
|
||||
package filter
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"somegit.dev/vikingowl/reddit-reader/internal/domain"
|
||||
"somegit.dev/vikingowl/reddit-reader/internal/llm"
|
||||
)
|
||||
|
||||
type Scorer struct {
|
||||
llm llm.Summarizer
|
||||
threshold float64
|
||||
}
|
||||
|
||||
func NewScorer(l llm.Summarizer, threshold float64) *Scorer {
|
||||
return &Scorer{llm: l, threshold: threshold}
|
||||
}
|
||||
|
||||
func (s *Scorer) ScorePost(ctx context.Context, post domain.Post, interests domain.Interests) (float64, bool, error) {
|
||||
score, err := s.llm.Score(ctx, post, interests)
|
||||
if err != nil {
|
||||
return 0, false, err
|
||||
}
|
||||
return score, score >= s.threshold, nil
|
||||
}
|
||||
49
internal/filter/scorer_test.go
Normal file
49
internal/filter/scorer_test.go
Normal file
@@ -0,0 +1,49 @@
|
||||
package filter_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
"somegit.dev/vikingowl/reddit-reader/internal/domain"
|
||||
"somegit.dev/vikingowl/reddit-reader/internal/filter"
|
||||
)
|
||||
|
||||
type mockSummarizer struct {
|
||||
scoreVal float64
|
||||
scoreErr error
|
||||
summaryVal string
|
||||
summaryErr error
|
||||
}
|
||||
|
||||
func (m *mockSummarizer) Score(_ context.Context, _ domain.Post, _ domain.Interests) (float64, error) {
|
||||
return m.scoreVal, m.scoreErr
|
||||
}
|
||||
|
||||
func (m *mockSummarizer) Summarize(_ context.Context, _ domain.Post) (string, error) {
|
||||
return m.summaryVal, m.summaryErr
|
||||
}
|
||||
|
||||
func TestScorerAboveThreshold(t *testing.T) {
|
||||
scorer := filter.NewScorer(&mockSummarizer{scoreVal: 0.8}, 0.6)
|
||||
score, pass, err := scorer.ScorePost(context.Background(), domain.Post{Title: "Test"}, domain.Interests{})
|
||||
if err != nil {
|
||||
t.Fatalf("ScorePost: %v", err)
|
||||
}
|
||||
if !pass {
|
||||
t.Error("expected pass for score 0.8 with threshold 0.6")
|
||||
}
|
||||
if score != 0.8 {
|
||||
t.Errorf("score = %f, want 0.8", score)
|
||||
}
|
||||
}
|
||||
|
||||
func TestScorerBelowThreshold(t *testing.T) {
|
||||
scorer := filter.NewScorer(&mockSummarizer{scoreVal: 0.3}, 0.6)
|
||||
_, pass, err := scorer.ScorePost(context.Background(), domain.Post{Title: "Test"}, domain.Interests{})
|
||||
if err != nil {
|
||||
t.Fatalf("ScorePost: %v", err)
|
||||
}
|
||||
if pass {
|
||||
t.Error("expected fail for score 0.3 with threshold 0.6")
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user