feat(config): crawler user-agent and manual rate-limit knobs

This commit is contained in:
2026-04-18 15:50:21 +02:00
parent 91cd4d89b3
commit 0bed4401fe
3 changed files with 26 additions and 7 deletions

View File

@@ -26,9 +26,11 @@ type Config struct {
}
type DiscoveryConfig struct {
Token string // bearer token for /tick endpoint
BatchSize int // buckets per tick (default 4)
ForwardMonths int // forward window in months (default 12)
Token string // bearer token for /tick endpoint
BatchSize int // buckets per tick (default 4)
ForwardMonths int // forward window in months (default 12)
CrawlerUserAgent string // user-agent for crawler HTTP requests
CrawlerManualRateLimitPerHour int // max manual crawl requests per hour (1-3600, default 1)
}
type AIConfig struct {
@@ -199,6 +201,11 @@ func Load() (*Config, error) {
slog.Warn("DISCOVERY_TOKEN is empty; /api/v1/admin/discovery/tick is disabled")
}
crawlerRateLimit, err := envInt("DISCOVERY_CRAWLER_MANUAL_RATE_LIMIT_PER_HOUR", 1)
if err != nil {
return nil, fmt.Errorf("DISCOVERY_CRAWLER_MANUAL_RATE_LIMIT_PER_HOUR: %w", err)
}
jwtSecret := envStr("JWT_SECRET", "")
if jwtSecret == "" {
return nil, fmt.Errorf("JWT_SECRET is required")
@@ -285,9 +292,11 @@ func Load() (*Config, error) {
RateLimitRPS: rpsAI,
},
Discovery: DiscoveryConfig{
Token: discoveryToken,
BatchSize: batchSize,
ForwardMonths: forwardMonths,
Token: discoveryToken,
BatchSize: batchSize,
ForwardMonths: forwardMonths,
CrawlerUserAgent: envStr("DISCOVERY_CRAWLER_USER_AGENT", "Mozilla/5.0 (X11; Linux x86_64; rv:135.0) Gecko/20100101 Firefox/135.0"),
CrawlerManualRateLimitPerHour: crawlerRateLimit,
},
}, nil
}

View File

@@ -56,6 +56,12 @@ func NewService(repo Repository, agent *AgentClient, mc marketCreator, batchSize
}
}
// SetCrawler attaches a crawler instance post-construction. For MR 1, this allows
// wiring both Tick (agent-driven) and Crawl (crawler-driven) paths on the same Service.
func (s *Service) SetCrawler(cr crawlerRunner) {
s.crawler = cr
}
// NewServiceWithCrawler constructs a Service wired for the crawler-driven
// Crawl method. The existing Pass 0 Tick path is not wired here (no agent).
// MR 2 will consolidate this with NewService once the Mistral path is removed.

View File

@@ -7,6 +7,7 @@ import (
"marktvogt.de/backend/internal/domain/auth"
"marktvogt.de/backend/internal/domain/discovery"
"marktvogt.de/backend/internal/domain/discovery/crawler"
"marktvogt.de/backend/internal/domain/market"
"marktvogt.de/backend/internal/domain/user"
"marktvogt.de/backend/internal/middleware"
@@ -80,7 +81,10 @@ func (s *Server) registerRoutes() {
s.cfg.Discovery.BatchSize,
s.cfg.Discovery.ForwardMonths,
)
discoveryHandler := discovery.NewHandler(discoveryService, 1)
// Wire the crawler for the Crawl path (MR 1 keeps both Tick and Crawl paths)
crawlerInstance := crawler.NewCrawler(s.cfg.Discovery.CrawlerUserAgent, crawler.DefaultSourceConfigs())
discoveryService.SetCrawler(crawlerInstance)
discoveryHandler := discovery.NewHandler(discoveryService, s.cfg.Discovery.CrawlerManualRateLimitPerHour)
requireTickToken := middleware.RequireBearerToken(s.cfg.Discovery.Token)
discovery.RegisterRoutes(v1, discoveryHandler, requireAuth, requireAdmin, requireTickToken)
}