package api
import (
"testing"
)
func TestCleanHTML(t *testing.T) {
tests := []struct {
name string
input string
expected string
}{
{
name: "removes simple tags",
input: "bold text",
expected: "bold text",
},
{
name: "removes nested tags",
input: "
nested
",
expected: "nested",
},
{
name: "decodes html entities",
input: "& < > "",
expected: "& < > \"",
},
{
name: "decodes apostrophe",
input: "it's working",
expected: "it's working",
},
{
name: "replaces nbsp with space",
input: "word word",
expected: "word word",
},
{
name: "normalizes whitespace",
input: " multiple spaces ",
expected: "multiple spaces",
},
{
name: "handles empty string",
input: "",
expected: "",
},
{
name: "handles plain text",
input: "no html here",
expected: "no html here",
},
{
name: "handles complex html",
input: "Link & Text",
expected: "Link & Text",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := cleanHTML(tt.input)
if result != tt.expected {
t.Errorf("cleanHTML(%q) = %q, want %q", tt.input, result, tt.expected)
}
})
}
}
func TestDecodeURL(t *testing.T) {
tests := []struct {
name string
input string
expected string
}{
{
name: "extracts url from uddg parameter",
input: "//duckduckgo.com/l/?uddg=https%3A%2F%2Fexample.com%2Fpath&rut=abc",
expected: "https://example.com/path",
},
{
name: "adds https to protocol-relative urls",
input: "//example.com/path",
expected: "https://example.com/path",
},
{
name: "returns normal urls unchanged",
input: "https://example.com/page",
expected: "https://example.com/page",
},
{
name: "handles http urls",
input: "http://example.com",
expected: "http://example.com",
},
{
name: "handles empty string",
input: "",
expected: "",
},
{
name: "handles uddg with special chars",
input: "//duckduckgo.com/l/?uddg=https%3A%2F%2Fexample.com%2Fsearch%3Fq%3Dtest",
expected: "https://example.com/search?q=test",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := decodeURL(tt.input)
if result != tt.expected {
t.Errorf("decodeURL(%q) = %q, want %q", tt.input, result, tt.expected)
}
})
}
}
func TestParseDuckDuckGoResults(t *testing.T) {
// Test with realistic DuckDuckGo HTML structure
html := `
`
results := parseDuckDuckGoResults(html, 10)
if len(results) < 1 {
t.Fatalf("expected at least 1 result, got %d", len(results))
}
// Check first result
if results[0].Title != "Example Page 1" {
t.Errorf("first result title = %q, want %q", results[0].Title, "Example Page 1")
}
if results[0].URL != "https://example.com/page1" {
t.Errorf("first result URL = %q, want %q", results[0].URL, "https://example.com/page1")
}
}
func TestParseDuckDuckGoResultsMaxResults(t *testing.T) {
// Create HTML with many results
html := ""
for i := 0; i < 20; i++ {
html += ``
}
results := parseDuckDuckGoResults(html, 5)
if len(results) > 5 {
t.Errorf("expected max 5 results, got %d", len(results))
}
}
func TestParseDuckDuckGoResultsSkipsDuckDuckGoLinks(t *testing.T) {
html := `
`
results := parseDuckDuckGoResults(html, 10)
for _, r := range results {
if r.URL == "https://duckduckgo.com/something" {
t.Error("should have filtered out duckduckgo.com link")
}
}
}