Merge branch 'fix/discovery-enrich-timeout' into 'main'
fix(discovery): enrich-all timeout + partial progress See merge request vikingowl/marktvogt.de!21
This commit is contained in:
@@ -2,6 +2,7 @@ package discovery
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
@@ -16,6 +17,15 @@ import (
|
||||
"marktvogt.de/backend/internal/pkg/apierror"
|
||||
)
|
||||
|
||||
// enrichAllTimeout bounds a single RunCrawlEnrichAll goroutine. The pipeline
|
||||
// is resumable — rows that don't finish stay enrichment_status='pending' and
|
||||
// the next run picks them up — so hitting this cap is not a failure, just a
|
||||
// signal to press the button again.
|
||||
//
|
||||
// Sized against Nominatim's 1 rps ceiling: 45m handles ~2700 rows per press.
|
||||
// Larger queues simply need multiple presses.
|
||||
const enrichAllTimeout = 45 * time.Minute
|
||||
|
||||
type Handler struct {
|
||||
service *Service
|
||||
|
||||
@@ -354,24 +364,37 @@ func (h *Handler) RunCrawlEnrichAll(c *gin.Context) {
|
||||
})
|
||||
}
|
||||
|
||||
// runEnrichAsync runs RunCrawlEnrichAll with a detached context. 10m cap is
|
||||
// generous for Nominatim's 1rps: a 600-row queue is the worst case we expect.
|
||||
// runEnrichAsync runs RunCrawlEnrichAll with a detached context. See
|
||||
// enrichAllTimeout for the cap rationale.
|
||||
func (h *Handler) runEnrichAsync() {
|
||||
defer h.enrichRunning.Store(false)
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), enrichAllTimeout)
|
||||
defer cancel()
|
||||
|
||||
summary, err := h.service.RunCrawlEnrichAll(ctx)
|
||||
|
||||
h.enrichMu.Lock()
|
||||
h.enrichFinishedAt = time.Now().UTC()
|
||||
// Always stash the summary — partial progress is meaningful even when the
|
||||
// run didn't complete (e.g. deadline exceeded mid-loop). The UI can then
|
||||
// show "N / Total processed" alongside any message.
|
||||
sCopy := summary
|
||||
h.enrichSummary = &sCopy
|
||||
if err != nil {
|
||||
h.enrichError = err.Error()
|
||||
slog.ErrorContext(ctx, "async crawl-enrich failed", "error", err)
|
||||
} else {
|
||||
sCopy := summary
|
||||
h.enrichSummary = &sCopy
|
||||
if errors.Is(err, context.DeadlineExceeded) {
|
||||
// Resumable by design: remaining rows stay pending. Don't shout
|
||||
// "failed" — this is a "press again" situation.
|
||||
h.enrichError = fmt.Sprintf(
|
||||
"Zeitlimit (%s) erreicht nach %d von %d Zeilen. Erneut starten, um die verbleibenden Zeilen zu bearbeiten.",
|
||||
enrichAllTimeout, summary.Succeeded+summary.Failed, summary.Total,
|
||||
)
|
||||
slog.WarnContext(ctx, "async crawl-enrich hit timeout",
|
||||
"processed", summary.Succeeded+summary.Failed, "total", summary.Total)
|
||||
} else {
|
||||
h.enrichError = err.Error()
|
||||
slog.ErrorContext(ctx, "async crawl-enrich failed", "error", err)
|
||||
}
|
||||
}
|
||||
h.enrichMu.Unlock()
|
||||
}
|
||||
|
||||
@@ -856,6 +856,7 @@ func (s *Service) RunCrawlEnrichAll(ctx context.Context) (CrawlEnrichSummary, er
|
||||
// Caller cancelled — stop cleanly. Summary reflects partial
|
||||
// progress; the remaining rows stay in enrichment_status='pending'
|
||||
// and will be picked up by the next run.
|
||||
summary.DurationMs = time.Since(summary.StartedAt).Milliseconds()
|
||||
return summary, err
|
||||
}
|
||||
in := enrich.Input{
|
||||
|
||||
Reference in New Issue
Block a user