feat(config): crawler user-agent and manual rate-limit knobs
This commit is contained in:
@@ -26,9 +26,11 @@ type Config struct {
|
||||
}
|
||||
|
||||
type DiscoveryConfig struct {
|
||||
Token string // bearer token for /tick endpoint
|
||||
BatchSize int // buckets per tick (default 4)
|
||||
ForwardMonths int // forward window in months (default 12)
|
||||
Token string // bearer token for /tick endpoint
|
||||
BatchSize int // buckets per tick (default 4)
|
||||
ForwardMonths int // forward window in months (default 12)
|
||||
CrawlerUserAgent string // user-agent for crawler HTTP requests
|
||||
CrawlerManualRateLimitPerHour int // max manual crawl requests per hour (1-3600, default 1)
|
||||
}
|
||||
|
||||
type AIConfig struct {
|
||||
@@ -199,6 +201,11 @@ func Load() (*Config, error) {
|
||||
slog.Warn("DISCOVERY_TOKEN is empty; /api/v1/admin/discovery/tick is disabled")
|
||||
}
|
||||
|
||||
crawlerRateLimit, err := envInt("DISCOVERY_CRAWLER_MANUAL_RATE_LIMIT_PER_HOUR", 1)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("DISCOVERY_CRAWLER_MANUAL_RATE_LIMIT_PER_HOUR: %w", err)
|
||||
}
|
||||
|
||||
jwtSecret := envStr("JWT_SECRET", "")
|
||||
if jwtSecret == "" {
|
||||
return nil, fmt.Errorf("JWT_SECRET is required")
|
||||
@@ -285,9 +292,11 @@ func Load() (*Config, error) {
|
||||
RateLimitRPS: rpsAI,
|
||||
},
|
||||
Discovery: DiscoveryConfig{
|
||||
Token: discoveryToken,
|
||||
BatchSize: batchSize,
|
||||
ForwardMonths: forwardMonths,
|
||||
Token: discoveryToken,
|
||||
BatchSize: batchSize,
|
||||
ForwardMonths: forwardMonths,
|
||||
CrawlerUserAgent: envStr("DISCOVERY_CRAWLER_USER_AGENT", "Mozilla/5.0 (X11; Linux x86_64; rv:135.0) Gecko/20100101 Firefox/135.0"),
|
||||
CrawlerManualRateLimitPerHour: crawlerRateLimit,
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
@@ -56,6 +56,12 @@ func NewService(repo Repository, agent *AgentClient, mc marketCreator, batchSize
|
||||
}
|
||||
}
|
||||
|
||||
// SetCrawler attaches a crawler instance post-construction. For MR 1, this allows
|
||||
// wiring both Tick (agent-driven) and Crawl (crawler-driven) paths on the same Service.
|
||||
func (s *Service) SetCrawler(cr crawlerRunner) {
|
||||
s.crawler = cr
|
||||
}
|
||||
|
||||
// NewServiceWithCrawler constructs a Service wired for the crawler-driven
|
||||
// Crawl method. The existing Pass 0 Tick path is not wired here (no agent).
|
||||
// MR 2 will consolidate this with NewService once the Mistral path is removed.
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
|
||||
"marktvogt.de/backend/internal/domain/auth"
|
||||
"marktvogt.de/backend/internal/domain/discovery"
|
||||
"marktvogt.de/backend/internal/domain/discovery/crawler"
|
||||
"marktvogt.de/backend/internal/domain/market"
|
||||
"marktvogt.de/backend/internal/domain/user"
|
||||
"marktvogt.de/backend/internal/middleware"
|
||||
@@ -80,7 +81,10 @@ func (s *Server) registerRoutes() {
|
||||
s.cfg.Discovery.BatchSize,
|
||||
s.cfg.Discovery.ForwardMonths,
|
||||
)
|
||||
discoveryHandler := discovery.NewHandler(discoveryService, 1)
|
||||
// Wire the crawler for the Crawl path (MR 1 keeps both Tick and Crawl paths)
|
||||
crawlerInstance := crawler.NewCrawler(s.cfg.Discovery.CrawlerUserAgent, crawler.DefaultSourceConfigs())
|
||||
discoveryService.SetCrawler(crawlerInstance)
|
||||
discoveryHandler := discovery.NewHandler(discoveryService, s.cfg.Discovery.CrawlerManualRateLimitPerHour)
|
||||
requireTickToken := middleware.RequireBearerToken(s.cfg.Discovery.Token)
|
||||
discovery.RegisterRoutes(v1, discoveryHandler, requireAuth, requireAdmin, requireTickToken)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user