diff --git a/backend/internal/config/config.go b/backend/internal/config/config.go index a5e4a7b..ed97540 100644 --- a/backend/internal/config/config.go +++ b/backend/internal/config/config.go @@ -26,9 +26,11 @@ type Config struct { } type DiscoveryConfig struct { - Token string // bearer token for /tick endpoint - BatchSize int // buckets per tick (default 4) - ForwardMonths int // forward window in months (default 12) + Token string // bearer token for /tick endpoint + BatchSize int // buckets per tick (default 4) + ForwardMonths int // forward window in months (default 12) + CrawlerUserAgent string // user-agent for crawler HTTP requests + CrawlerManualRateLimitPerHour int // max manual crawl requests per hour (1-3600, default 1) } type AIConfig struct { @@ -199,6 +201,11 @@ func Load() (*Config, error) { slog.Warn("DISCOVERY_TOKEN is empty; /api/v1/admin/discovery/tick is disabled") } + crawlerRateLimit, err := envInt("DISCOVERY_CRAWLER_MANUAL_RATE_LIMIT_PER_HOUR", 1) + if err != nil { + return nil, fmt.Errorf("DISCOVERY_CRAWLER_MANUAL_RATE_LIMIT_PER_HOUR: %w", err) + } + jwtSecret := envStr("JWT_SECRET", "") if jwtSecret == "" { return nil, fmt.Errorf("JWT_SECRET is required") @@ -285,9 +292,11 @@ func Load() (*Config, error) { RateLimitRPS: rpsAI, }, Discovery: DiscoveryConfig{ - Token: discoveryToken, - BatchSize: batchSize, - ForwardMonths: forwardMonths, + Token: discoveryToken, + BatchSize: batchSize, + ForwardMonths: forwardMonths, + CrawlerUserAgent: envStr("DISCOVERY_CRAWLER_USER_AGENT", "Mozilla/5.0 (X11; Linux x86_64; rv:135.0) Gecko/20100101 Firefox/135.0"), + CrawlerManualRateLimitPerHour: crawlerRateLimit, }, }, nil } diff --git a/backend/internal/domain/discovery/service.go b/backend/internal/domain/discovery/service.go index ed668a9..683adf5 100644 --- a/backend/internal/domain/discovery/service.go +++ b/backend/internal/domain/discovery/service.go @@ -56,6 +56,12 @@ func NewService(repo Repository, agent *AgentClient, mc marketCreator, batchSize } } +// SetCrawler attaches a crawler instance post-construction. For MR 1, this allows +// wiring both Tick (agent-driven) and Crawl (crawler-driven) paths on the same Service. +func (s *Service) SetCrawler(cr crawlerRunner) { + s.crawler = cr +} + // NewServiceWithCrawler constructs a Service wired for the crawler-driven // Crawl method. The existing Pass 0 Tick path is not wired here (no agent). // MR 2 will consolidate this with NewService once the Mistral path is removed. diff --git a/backend/internal/server/routes.go b/backend/internal/server/routes.go index 4989fd7..d6f4652 100644 --- a/backend/internal/server/routes.go +++ b/backend/internal/server/routes.go @@ -7,6 +7,7 @@ import ( "marktvogt.de/backend/internal/domain/auth" "marktvogt.de/backend/internal/domain/discovery" + "marktvogt.de/backend/internal/domain/discovery/crawler" "marktvogt.de/backend/internal/domain/market" "marktvogt.de/backend/internal/domain/user" "marktvogt.de/backend/internal/middleware" @@ -80,7 +81,10 @@ func (s *Server) registerRoutes() { s.cfg.Discovery.BatchSize, s.cfg.Discovery.ForwardMonths, ) - discoveryHandler := discovery.NewHandler(discoveryService, 1) + // Wire the crawler for the Crawl path (MR 1 keeps both Tick and Crawl paths) + crawlerInstance := crawler.NewCrawler(s.cfg.Discovery.CrawlerUserAgent, crawler.DefaultSourceConfigs()) + discoveryService.SetCrawler(crawlerInstance) + discoveryHandler := discovery.NewHandler(discoveryService, s.cfg.Discovery.CrawlerManualRateLimitPerHour) requireTickToken := middleware.RequireBearerToken(s.cfg.Discovery.Token) discovery.RegisterRoutes(v1, discoveryHandler, requireAuth, requireAdmin, requireTickToken) }