feat(discovery/crawler): scaffold subpackage with Source interface and RawEvent types

This commit is contained in:
2026-04-18 11:36:07 +02:00
parent adf417b731
commit 5135f0a3be

View File

@@ -0,0 +1,63 @@
// Package crawler scrapes five DACH Mittelalter event calendars and emits
// RawEvent rows. The merger deduplicates across sources before the events
// flow into the existing discovery pipeline (link verify, validate, dedup).
package crawler
import (
"context"
"time"
)
// Source is implemented by each calendar adapter.
type Source interface {
Name() string
Fetch(ctx context.Context) ([]RawEvent, error)
}
// RawEvent is one event as reported by a single source before merging.
type RawEvent struct {
SourceName string
SourceURL string
DetailURL string
Name string
City string
PLZ string
Land string
Bundesland string
StartDate *time.Time
EndDate *time.Time
Website string
Venue string
Organizer string
}
// MergedEvent is one event after cross-source merging. Quellen holds the union
// of source URLs, Hinweis carries merger-generated notes (date_conflict, etc.).
type MergedEvent struct {
Name string
City string
PLZ string
Land string
Bundesland string
StartDate *time.Time
EndDate *time.Time
Website string
Venue string
Organizer string
Quellen []string
Hinweis string
Sources []string
}
// SourceError records a per-source failure without stopping the whole crawl.
type SourceError struct {
Name string
Err error
}
// CrawlResult is what the orchestrator returns to Service.Crawl.
type CrawlResult struct {
PerSource map[string][]RawEvent
SourceErrors []SourceError
PerSourceMS map[string]int64
}