fix(discovery): defer rate-limited buckets + polish queue table

Rate limits (Mistral web_search 429) used to get counted as hard errors,
marking the bucket as queried and bumping the Errors(24h) strip — even
though the right behavior is to wait and try again later.

Backend:
- isRateLimit() matches "rate limit" / "status 429" in the error string.
- On persistent rate-limit after one 10s retry: leave last_queried_at
  unchanged (bucket stays eligible for next tick) and abort the
  remainder of this tick — Mistral's web_search budget is shared, no
  point hammering more buckets in the same batch.
- TickSummary gains rate_limited counter; Errors stays for real failures.

Frontend:
- Dates: RFC3339 → 'DD.MM.YYYY' German format, range rendered as
  'DD.MM.YYYY – DD.MM.YYYY'.
- Queue table: cell horizontal padding, uppercase compact headers,
  scrollable on narrow viewports, dark-mode variants on every color
  (emerald/amber badges, link color, reject button), Region folds
  bundesland||land into a single column (Land was always 'Deutschland'
  for DACH anyway).
This commit is contained in:
2026-04-18 09:21:05 +02:00
parent e4ef4adad6
commit 98eae40755
2 changed files with 136 additions and 79 deletions

View File

@@ -5,6 +5,7 @@ import (
"errors"
"fmt"
"log/slog"
"strings"
"time"
"github.com/google/uuid"
@@ -50,9 +51,13 @@ type TickSummary struct {
DedupedRejected int `json:"deduped_rejected"`
DedupedQueue int `json:"deduped_queue"`
Errors int `json:"errors"`
RateLimited int `json:"rate_limited"`
}
// Tick picks N stale buckets and runs Pass 0 for each, writing net-new discoveries.
// On a rate-limit hit, aborts the remainder of the tick: subsequent buckets in the
// same batch would almost certainly hit the same limit, and we want to give Mistral's
// web_search budget time to refill before trying again.
func (s *Service) Tick(ctx context.Context) (TickSummary, error) {
if s.agent == nil || !s.agent.Enabled() {
return TickSummary{}, errors.New("discovery agent not configured")
@@ -64,26 +69,42 @@ func (s *Service) Tick(ctx context.Context) (TickSummary, error) {
var summary TickSummary
summary.BucketsProcessed = len(buckets)
for _, b := range buckets {
s.processOneBucket(ctx, b, &summary)
if stop := s.processOneBucket(ctx, b, &summary); stop {
break
}
}
return summary, nil
}
func (s *Service) processOneBucket(ctx context.Context, b Bucket, summary *TickSummary) {
// processOneBucket runs Pass 0 for a single bucket. Returns stop=true when the tick
// should abort early (currently only on persistent rate limits).
func (s *Service) processOneBucket(ctx context.Context, b Bucket, summary *TickSummary) (stop bool) {
resp, err := s.agent.Discover(ctx, b)
if err != nil {
// Retry once after 2s per spec §9.
wait := 2 * time.Second
if isRateLimit(err) {
wait = 10 * time.Second
}
select {
case <-ctx.Done():
return
case <-time.After(2 * time.Second):
return false
case <-time.After(wait):
}
resp, err = s.agent.Discover(ctx, b)
if err != nil {
if isRateLimit(err) {
// Leave last_queried_at unchanged so the bucket is re-picked on the
// next tick, and abort the rest of this tick — Mistral's web_search
// budget is shared across buckets, no point hammering it further.
slog.InfoContext(ctx, "pass0 rate-limited; deferring bucket + aborting tick",
"bucket_id", b.ID, "region", b.Region, "year_month", b.YearMonth)
summary.RateLimited++
return true
}
slog.WarnContext(ctx, "pass0 failed twice", "bucket_id", b.ID, "error", err)
_ = s.repo.UpdateBucketQueried(ctx, b.ID, err.Error())
summary.Errors++
return
return false
}
}
sub := s.processBucketResponse(ctx, b, resp)
@@ -94,6 +115,19 @@ func (s *Service) processOneBucket(ctx context.Context, b Bucket, summary *TickS
if err := s.repo.UpdateBucketQueried(ctx, b.ID, ""); err != nil {
slog.ErrorContext(ctx, "update bucket queried", "bucket_id", b.ID, "error", err)
}
return false
}
// isRateLimit detects Mistral 429 / web_search rate-limit errors. Matches the
// error string shape surfaced by the SDK: '... rate limit reached. (status 429)'.
func isRateLimit(err error) bool {
if err == nil {
return false
}
msg := err.Error()
return strings.Contains(msg, "rate limit") ||
strings.Contains(msg, "status 429") ||
strings.Contains(msg, "429")
}
func (s *Service) processBucketResponse(ctx context.Context, b Bucket, resp Pass0Response) TickSummary {

View File

@@ -6,6 +6,21 @@
const queue = $derived(data.queue ?? []);
const recentErrors = $derived(data.stats.recent_errors ?? []);
// 'YYYY-MM-DDTHH:mm:ssZ' → 'DD.MM.YYYY' (German) for display.
function fmtDate(iso: string | null): string {
if (!iso) return '';
const d = iso.slice(0, 10); // 'YYYY-MM-DD'
const [y, m, day] = d.split('-');
return y && m && day ? `${day}.${m}.${y}` : d;
}
function fmtDateRange(start: string | null, end: string | null): string {
const s = fmtDate(start);
const e = fmtDate(end);
if (s && e && s !== e) return `${s} ${e}`;
return s || e || '';
}
const lastTickLabel = $derived.by(() => {
if (!data.stats.last_tick_at) return 'nie';
const ts = new Date(data.stats.last_tick_at).getTime();
@@ -100,79 +115,87 @@
Keine Einträge in der Warteschlange.
</p>
{:else}
<table class="mt-6 w-full text-left text-sm">
<thead class="border-b border-stone-200 text-stone-500">
<tr>
<th class="py-2">Land</th>
<th>Region</th>
<th>Markt</th>
<th>Stadt</th>
<th>Datum</th>
<th>Website</th>
<th>Quellen</th>
<th>Extraktion</th>
<th class="text-right">Aktion</th>
</tr>
</thead>
<tbody>
{#each queue as row (row.id)}
<tr class="border-b border-stone-100">
<td class="py-2">{row.land}</td>
<td>{row.bundesland}</td>
<td class="font-medium">{row.markt_name}</td>
<td>{row.stadt}</td>
<td>
{#if row.start_datum}
{row.start_datum}{row.end_datum ? ` ${row.end_datum}` : ''}
{:else}
<span class="text-stone-400"></span>
{/if}
</td>
<td>
{#if row.website}
<a
href={row.website}
target="_blank"
rel="noreferrer noopener"
class="text-blue-600 underline">link</a
>
{:else}
<span class="text-stone-400"></span>
{/if}
</td>
<td>{row.quellen?.length ?? 0}</td>
<td>
<span
class="inline-block rounded px-2 py-0.5 text-xs {row.extraktion === 'verbatim'
? 'bg-emerald-100 text-emerald-700'
: 'bg-amber-100 text-amber-700'}"
>
{row.extraktion || '—'}
</span>
</td>
<td class="text-right">
<form method="POST" action="?/accept" use:enhance class="inline">
<input type="hidden" name="id" value={row.id} />
<button
type="submit"
class="rounded bg-emerald-600 px-2 py-1 text-xs text-white hover:bg-emerald-700"
>
Accept
</button>
</form>
<form method="POST" action="?/reject" use:enhance class="inline">
<input type="hidden" name="id" value={row.id} />
<button
type="submit"
class="ml-1 rounded bg-stone-200 px-2 py-1 text-xs text-stone-700 hover:bg-stone-300"
>
Reject
</button>
</form>
</td>
<div class="mt-6 overflow-x-auto">
<table class="w-full text-left text-sm">
<thead
class="border-b border-stone-200 text-xs tracking-wider text-stone-500 uppercase dark:border-stone-700 dark:text-stone-400"
>
<tr>
<th class="py-2 pr-4 font-medium">Region</th>
<th class="py-2 pr-4 font-medium">Markt</th>
<th class="py-2 pr-4 font-medium">Stadt</th>
<th class="py-2 pr-4 font-medium">Datum</th>
<th class="py-2 pr-4 font-medium">Website</th>
<th class="py-2 pr-4 font-medium">Quellen</th>
<th class="py-2 pr-4 font-medium">Extraktion</th>
<th class="py-2 pl-4 text-right font-medium">Aktion</th>
</tr>
{/each}
</tbody>
</table>
</thead>
<tbody>
{#each queue as row (row.id)}
<tr class="border-b border-stone-100 align-top dark:border-stone-800">
<td class="py-3 pr-4 whitespace-nowrap text-stone-600 dark:text-stone-400">
{row.bundesland || row.land}
</td>
<td class="py-3 pr-4 font-medium">{row.markt_name}</td>
<td class="py-3 pr-4">{row.stadt}</td>
<td class="py-3 pr-4 whitespace-nowrap">
{#if row.start_datum}
{fmtDateRange(row.start_datum, row.end_datum)}
{:else}
<span class="text-stone-400"></span>
{/if}
</td>
<td class="py-3 pr-4">
{#if row.website}
<a
href={row.website}
target="_blank"
rel="noreferrer noopener"
class="text-blue-600 underline hover:text-blue-500 dark:text-blue-400"
>
link
</a>
{:else}
<span class="text-stone-400"></span>
{/if}
</td>
<td class="py-3 pr-4 text-center tabular-nums">
{row.quellen?.length ?? 0}
</td>
<td class="py-3 pr-4">
<span
class="inline-block rounded px-2 py-0.5 text-xs {row.extraktion === 'verbatim'
? 'bg-emerald-100 text-emerald-700 dark:bg-emerald-900/50 dark:text-emerald-300'
: 'bg-amber-100 text-amber-700 dark:bg-amber-900/50 dark:text-amber-300'}"
>
{row.extraktion || '—'}
</span>
</td>
<td class="py-3 pl-4 text-right whitespace-nowrap">
<form method="POST" action="?/accept" use:enhance class="inline">
<input type="hidden" name="id" value={row.id} />
<button
type="submit"
class="rounded bg-emerald-600 px-2 py-1 text-xs text-white hover:bg-emerald-700"
>
Accept
</button>
</form>
<form method="POST" action="?/reject" use:enhance class="inline">
<input type="hidden" name="id" value={row.id} />
<button
type="submit"
class="ml-1 rounded bg-stone-200 px-2 py-1 text-xs text-stone-700 hover:bg-stone-300 dark:bg-stone-700 dark:text-stone-200 dark:hover:bg-stone-600"
>
Reject
</button>
</form>
</td>
</tr>
{/each}
</tbody>
</table>
</div>
{/if}
</div>