From 9cbe654d5521d3c3c3af6bce37a1b8efeaf90ebb Mon Sep 17 00:00:00 2001 From: vikingowl Date: Fri, 24 Apr 2026 14:11:38 +0200 Subject: [PATCH] fix(discovery): raise enrich-all timeout + surface partial progress Pain: a 1400+ row pending queue can't finish crawl-enrich inside the old 10-minute cap (Nominatim's 1 rps means ~23m minimum). Operators saw a scary red "Crawl-enrich fehlgeschlagen: context deadline exceeded" banner even though the pipeline is resumable. - Introduce enrichAllTimeout constant (45m) sized for ~2700 rows per press; the original 10m assumed 600 rows worst-case. - On context.DeadlineExceeded, translate to a user-facing message ("Zeitlimit erreicht nach N von M Zeilen. Erneut starten, um die verbleibenden Zeilen zu bearbeiten.") instead of raw Go error. - Always stash the summary in handler state, even on error, so the UI can show partial progress (N/M processed) alongside the message. - Service: populate DurationMs on early-return too, so the status endpoint's duration reflects the partial run instead of zero. Behavior unchanged when a run finishes cleanly; the queue remains resumable across presses as before. --- backend/internal/domain/discovery/handler.go | 39 ++++++++++++++++---- backend/internal/domain/discovery/service.go | 1 + 2 files changed, 32 insertions(+), 8 deletions(-) diff --git a/backend/internal/domain/discovery/handler.go b/backend/internal/domain/discovery/handler.go index b2a9e9e..5bff84d 100644 --- a/backend/internal/domain/discovery/handler.go +++ b/backend/internal/domain/discovery/handler.go @@ -2,6 +2,7 @@ package discovery import ( "context" + "errors" "fmt" "log/slog" "net/http" @@ -16,6 +17,15 @@ import ( "marktvogt.de/backend/internal/pkg/apierror" ) +// enrichAllTimeout bounds a single RunCrawlEnrichAll goroutine. The pipeline +// is resumable — rows that don't finish stay enrichment_status='pending' and +// the next run picks them up — so hitting this cap is not a failure, just a +// signal to press the button again. +// +// Sized against Nominatim's 1 rps ceiling: 45m handles ~2700 rows per press. +// Larger queues simply need multiple presses. +const enrichAllTimeout = 45 * time.Minute + type Handler struct { service *Service @@ -354,24 +364,37 @@ func (h *Handler) RunCrawlEnrichAll(c *gin.Context) { }) } -// runEnrichAsync runs RunCrawlEnrichAll with a detached context. 10m cap is -// generous for Nominatim's 1rps: a 600-row queue is the worst case we expect. +// runEnrichAsync runs RunCrawlEnrichAll with a detached context. See +// enrichAllTimeout for the cap rationale. func (h *Handler) runEnrichAsync() { defer h.enrichRunning.Store(false) - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) + ctx, cancel := context.WithTimeout(context.Background(), enrichAllTimeout) defer cancel() summary, err := h.service.RunCrawlEnrichAll(ctx) h.enrichMu.Lock() h.enrichFinishedAt = time.Now().UTC() + // Always stash the summary — partial progress is meaningful even when the + // run didn't complete (e.g. deadline exceeded mid-loop). The UI can then + // show "N / Total processed" alongside any message. + sCopy := summary + h.enrichSummary = &sCopy if err != nil { - h.enrichError = err.Error() - slog.ErrorContext(ctx, "async crawl-enrich failed", "error", err) - } else { - sCopy := summary - h.enrichSummary = &sCopy + if errors.Is(err, context.DeadlineExceeded) { + // Resumable by design: remaining rows stay pending. Don't shout + // "failed" — this is a "press again" situation. + h.enrichError = fmt.Sprintf( + "Zeitlimit (%s) erreicht nach %d von %d Zeilen. Erneut starten, um die verbleibenden Zeilen zu bearbeiten.", + enrichAllTimeout, summary.Succeeded+summary.Failed, summary.Total, + ) + slog.WarnContext(ctx, "async crawl-enrich hit timeout", + "processed", summary.Succeeded+summary.Failed, "total", summary.Total) + } else { + h.enrichError = err.Error() + slog.ErrorContext(ctx, "async crawl-enrich failed", "error", err) + } } h.enrichMu.Unlock() } diff --git a/backend/internal/domain/discovery/service.go b/backend/internal/domain/discovery/service.go index 9f5fe3a..e92a352 100644 --- a/backend/internal/domain/discovery/service.go +++ b/backend/internal/domain/discovery/service.go @@ -856,6 +856,7 @@ func (s *Service) RunCrawlEnrichAll(ctx context.Context) (CrawlEnrichSummary, er // Caller cancelled — stop cleanly. Summary reflects partial // progress; the remaining rows stay in enrichment_status='pending' // and will be picked up by the next run. + summary.DurationMs = time.Since(summary.StartedAt).Milliseconds() return summary, err } in := enrich.Input{