diff --git a/backend/internal/domain/discovery/handler.go b/backend/internal/domain/discovery/handler.go index 0f7141a..883be5c 100644 --- a/backend/internal/domain/discovery/handler.go +++ b/backend/internal/domain/discovery/handler.go @@ -174,7 +174,9 @@ func (h *Handler) ListQueue(c *gin.Context) { if offset < 0 { offset = 0 } - rows, total, err := h.service.ListPendingQueuePaged(c.Request.Context(), limit, offset) + sortBy := normalizeQueueSort(c.Query("sort")) + order := normalizeQueueOrder(c.Query("order")) + rows, total, err := h.service.ListPendingQueuePaged(c.Request.Context(), sortBy, order, limit, offset) if err != nil { apiErr := apierror.Internal("list queue failed") c.JSON(apiErr.Status, apierror.NewResponse(apiErr)) @@ -188,10 +190,37 @@ func (h *Handler) ListQueue(c *gin.Context) { "total": total, "limit": limit, "offset": offset, + "sort": sortBy, + "order": order, }, }) } +// normalizeQueueSort maps a user-supplied sort key to a whitelisted value. +// Unknown or empty input resolves to the default (konfidenz). +func normalizeQueueSort(s string) string { + switch s { + case QueueSortKonfidenz, + QueueSortDiscoveredAt, + QueueSortStartDatum, + QueueSortStadt, + QueueSortMarktName, + QueueSortQuellenCount: + return s + default: + return QueueSortDefault + } +} + +// normalizeQueueOrder accepts only "asc" or "desc"; everything else becomes +// the default ("desc"). +func normalizeQueueOrder(s string) string { + if s == QueueOrderAsc { + return QueueOrderAsc + } + return QueueOrderDefault +} + func (h *Handler) Accept(c *gin.Context) { id, err := uuid.Parse(c.Param("id")) if err != nil { diff --git a/backend/internal/domain/discovery/handler_test.go b/backend/internal/domain/discovery/handler_test.go index 721acc0..4a87ce2 100644 --- a/backend/internal/domain/discovery/handler_test.go +++ b/backend/internal/domain/discovery/handler_test.go @@ -172,6 +172,78 @@ func TestCrawlHandlerConcurrentReturnsTooManyRequests(t *testing.T) { waitFor(t, 2*time.Second, func() bool { return !h.crawlRunning.Load() }) } +// TestListQueueSortParamWhitelist verifies that ListQueue parses sort/order +// query params, falls back to defaults on unknown input, and surfaces the +// effective values in the response envelope so the UI can reflect them. +func TestListQueueSortParamWhitelist(t *testing.T) { + tests := []struct { + name string + query string + wantSort string + wantOrder string + wantRepoSort string + wantRepoOrder string + }{ + {"empty defaults to konfidenz desc", "", QueueSortKonfidenz, QueueOrderDesc, QueueSortKonfidenz, QueueOrderDesc}, + {"explicit stadt asc", "sort=stadt&order=asc", QueueSortStadt, QueueOrderAsc, QueueSortStadt, QueueOrderAsc}, + {"explicit discovered_at desc", "sort=discovered_at&order=desc", QueueSortDiscoveredAt, QueueOrderDesc, QueueSortDiscoveredAt, QueueOrderDesc}, + {"quellen_count asc", "sort=quellen_count&order=asc", QueueSortQuellenCount, QueueOrderAsc, QueueSortQuellenCount, QueueOrderAsc}, + {"unknown sort falls back", "sort=DROP+TABLE&order=asc", QueueSortKonfidenz, QueueOrderAsc, QueueSortKonfidenz, QueueOrderAsc}, + {"unknown order falls back", "sort=stadt&order=sideways", QueueSortStadt, QueueOrderDesc, QueueSortStadt, QueueOrderDesc}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + repo := newMockRepo() + svc := NewService(repo, &stubCrawlerRunner{}, noopLinkVerifier{}, noopMarketCreator{}) + h := NewHandler(svc, 0) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + url := "/admin/discovery/queue" + if tc.query != "" { + url += "?" + tc.query + } + c.Request = httptest.NewRequest(http.MethodGet, url, nil) + + h.ListQueue(c) + + if w.Code != http.StatusOK { + t.Fatalf("expected 200, got %d body=%s", w.Code, w.Body.String()) + } + var resp struct { + Meta struct { + Sort string `json:"sort"` + Order string `json:"order"` + Limit int `json:"limit"` + Offset int `json:"offset"` + } `json:"meta"` + } + if err := json.NewDecoder(w.Body).Decode(&resp); err != nil { + t.Fatalf("decode response: %v", err) + } + if resp.Meta.Sort != tc.wantSort { + t.Errorf("meta.sort: got %q, want %q", resp.Meta.Sort, tc.wantSort) + } + if resp.Meta.Order != tc.wantOrder { + t.Errorf("meta.order: got %q, want %q", resp.Meta.Order, tc.wantOrder) + } + if len(repo.listQueueCalls) != 1 { + t.Fatalf("expected 1 ListQueue call, got %d", len(repo.listQueueCalls)) + } + call := repo.listQueueCalls[0] + if call.sortBy != tc.wantRepoSort { + t.Errorf("repo sortBy: got %q, want %q", call.sortBy, tc.wantRepoSort) + } + if call.order != tc.wantRepoOrder { + t.Errorf("repo order: got %q, want %q", call.order, tc.wantRepoOrder) + } + if call.status != StatusPending { + t.Errorf("repo status: got %q, want %q", call.status, StatusPending) + } + }) + } +} + // TestCrawlHandlerRateLimit verifies that consecutive manual requests within // the rate-limit window get HTTP 429 with Retry-After after the first finishes. func TestCrawlHandlerRateLimit(t *testing.T) { diff --git a/backend/internal/domain/discovery/mock_repo_test.go b/backend/internal/domain/discovery/mock_repo_test.go index ce7206e..ec571b5 100644 --- a/backend/internal/domain/discovery/mock_repo_test.go +++ b/backend/internal/domain/discovery/mock_repo_test.go @@ -27,6 +27,18 @@ type mockRepo struct { // inserted captures every DiscoveredMarket passed to InsertDiscovered. // Populated by newMockRepo(); nil when insertDiscFn is set externally. inserted []DiscoveredMarket + + // ListQueue capture + canned response for tests that care about sort plumbing. + listQueueCalls []listQueueCall + listQueueRows []DiscoveredMarket + listQueueErr error +} + +// listQueueCall records arguments passed to mockRepo.ListQueue so tests can +// assert the handler/service forwarded the expected sort/order/pagination. +type listQueueCall struct { + status, sortBy, order string + limit, offset int } func (m *mockRepo) ListSeriesByCity(ctx context.Context, c string) ([]SeriesCandidate, error) { @@ -44,8 +56,11 @@ func (m *mockRepo) IsRejected(ctx context.Context, n, s string, y int) (bool, er func (m *mockRepo) QueueHasPending(ctx context.Context, n, s string, sd *time.Time) (bool, error) { return m.queuePendingFn(ctx, n, s, sd) } -func (m *mockRepo) ListQueue(ctx context.Context, status string, l, o int) ([]DiscoveredMarket, error) { - return nil, nil +func (m *mockRepo) ListQueue(ctx context.Context, status, sortBy, order string, l, o int) ([]DiscoveredMarket, error) { + m.listQueueCalls = append(m.listQueueCalls, listQueueCall{ + status: status, sortBy: sortBy, order: order, limit: l, offset: o, + }) + return m.listQueueRows, m.listQueueErr } func (m *mockRepo) CountQueue(ctx context.Context, status string) (int, error) { if m.countQueueFn != nil { diff --git a/backend/internal/domain/discovery/model.go b/backend/internal/domain/discovery/model.go index 4398d60..5a61689 100644 --- a/backend/internal/domain/discovery/model.go +++ b/backend/internal/domain/discovery/model.go @@ -121,6 +121,25 @@ const ( KonfidenzNiedrig = "niedrig" ) +// Queue sort keys. Whitelist used by both handler (query-param validation) +// and repository (ORDER BY builder). Never concatenate user input into SQL. +const ( + QueueSortKonfidenz = "konfidenz" + QueueSortDiscoveredAt = "discovered_at" + QueueSortStartDatum = "start_datum" + QueueSortStadt = "stadt" + QueueSortMarktName = "markt_name" + QueueSortQuellenCount = "quellen_count" + + QueueOrderAsc = "asc" + QueueOrderDesc = "desc" + + // Defaults: highest-confidence first; tiebreak by soonest upcoming + // (handled in repository ORDER BY). + QueueSortDefault = QueueSortKonfidenz + QueueOrderDefault = QueueOrderDesc +) + // Stats is the discovery health snapshot used by the admin dashboard strip. type Stats struct { LastTickAt *time.Time `json:"last_tick_at"` diff --git a/backend/internal/domain/discovery/repository.go b/backend/internal/domain/discovery/repository.go index 752dc79..29e02dd 100644 --- a/backend/internal/domain/discovery/repository.go +++ b/backend/internal/domain/discovery/repository.go @@ -18,7 +18,7 @@ type Repository interface { InsertDiscovered(ctx context.Context, d DiscoveredMarket) (uuid.UUID, error) IsRejected(ctx context.Context, nameNormalized, stadt string, year int) (bool, error) QueueHasPending(ctx context.Context, nameNormalized, stadt string, startDatum *time.Time) (bool, error) - ListQueue(ctx context.Context, status string, limit, offset int) ([]DiscoveredMarket, error) + ListQueue(ctx context.Context, status, sortBy, order string, limit, offset int) ([]DiscoveredMarket, error) CountQueue(ctx context.Context, status string) (int, error) GetDiscovered(ctx context.Context, id uuid.UUID) (DiscoveredMarket, error) MarkAccepted(ctx context.Context, tx pgx.Tx, id, editionID, reviewer uuid.UUID) error @@ -118,8 +118,9 @@ SELECT EXISTS( return exists, err } -func (r *pgRepository) ListQueue(ctx context.Context, status string, limit, offset int) ([]DiscoveredMarket, error) { - rows, err := r.pool.Query(ctx, ` +func (r *pgRepository) ListQueue(ctx context.Context, status, sortBy, order string, limit, offset int) ([]DiscoveredMarket, error) { + orderBy := queueOrderByClause(sortBy, order) + query := ` SELECT id, bucket_id, markt_name, stadt, coalesce(bundesland,''), land, start_datum, end_datum, coalesce(website,''), quellen, coalesce(konfidenz,''), coalesce(agent_status,''), coalesce(hinweis,''), name_normalized, matched_series_id, status, @@ -127,8 +128,9 @@ SELECT id, bucket_id, markt_name, stadt, coalesce(bundesland,''), land, sources, source_contributions FROM discovered_markets WHERE status = $1 -ORDER BY discovered_at DESC -LIMIT $2 OFFSET $3`, status, limit, offset) +` + orderBy + ` +LIMIT $2 OFFSET $3` + rows, err := r.pool.Query(ctx, query, status, limit, offset) if err != nil { return nil, err } @@ -144,6 +146,41 @@ LIMIT $2 OFFSET $3`, status, limit, offset) return out, rows.Err() } +// queueOrderByClause builds an ORDER BY fragment from whitelisted keys only. +// Caller is responsible for passing pre-validated values; unknown sortBy falls +// back to the default (konfidenz desc). Never concatenate raw user input here — +// query parameters are not usable for identifiers or direction. +// +// Tiebreakers: +// - konfidenz: start_datum ASC NULLS LAST (soonest upcoming within tier), id +// - all others: id ASC (stable pagination) +func queueOrderByClause(sortBy, order string) string { + dir := "DESC" + if order == QueueOrderAsc { + dir = "ASC" + } + // konfidenz is stored as text; sort by its ordinal rank, not alphabetical. + const konfidenzRank = "CASE konfidenz WHEN 'hoch' THEN 3 WHEN 'mittel' THEN 2 WHEN 'niedrig' THEN 1 ELSE 0 END" + switch sortBy { + case QueueSortKonfidenz: + return fmt.Sprintf("ORDER BY %s %s, start_datum ASC NULLS LAST, id ASC", konfidenzRank, dir) + case QueueSortDiscoveredAt: + return fmt.Sprintf("ORDER BY discovered_at %s, id ASC", dir) + case QueueSortStartDatum: + // NULLS LAST regardless of direction — null dates belong at the bottom + // for review workflows. + return fmt.Sprintf("ORDER BY start_datum %s NULLS LAST, id ASC", dir) + case QueueSortStadt: + return fmt.Sprintf("ORDER BY stadt %s, id ASC", dir) + case QueueSortMarktName: + return fmt.Sprintf("ORDER BY markt_name %s, id ASC", dir) + case QueueSortQuellenCount: + return fmt.Sprintf("ORDER BY coalesce(cardinality(quellen),0) %s, id ASC", dir) + default: + return fmt.Sprintf("ORDER BY %s DESC, start_datum ASC NULLS LAST, id ASC", konfidenzRank) + } +} + func (r *pgRepository) CountQueue(ctx context.Context, status string) (int, error) { var n int err := r.pool.QueryRow(ctx, diff --git a/backend/internal/domain/discovery/repository_test.go b/backend/internal/domain/discovery/repository_test.go new file mode 100644 index 0000000..1ac996c --- /dev/null +++ b/backend/internal/domain/discovery/repository_test.go @@ -0,0 +1,59 @@ +package discovery + +import ( + "strings" + "testing" +) + +// TestQueueOrderByClauseWhitelist locks the SQL shape we emit for each +// whitelisted (sortBy, order) combination. The function is the only place +// user-influenced values touch raw SQL strings; unknown values must fall back +// to the default (konfidenz desc) rather than leak through. +func TestQueueOrderByClauseWhitelist(t *testing.T) { + konfidenzRank := "CASE konfidenz WHEN 'hoch' THEN 3 WHEN 'mittel' THEN 2 WHEN 'niedrig' THEN 1 ELSE 0 END" + tests := []struct { + name string + sort string + order string + wantAll []string // substrings that MUST appear + }{ + {"konfidenz desc (default)", QueueSortKonfidenz, QueueOrderDesc, + []string{konfidenzRank + " DESC", "start_datum ASC NULLS LAST", "id ASC"}}, + {"konfidenz asc still soonest-first tiebreak", QueueSortKonfidenz, QueueOrderAsc, + []string{konfidenzRank + " ASC", "start_datum ASC NULLS LAST", "id ASC"}}, + {"discovered_at desc", QueueSortDiscoveredAt, QueueOrderDesc, + []string{"ORDER BY discovered_at DESC", "id ASC"}}, + {"start_datum asc nulls last", QueueSortStartDatum, QueueOrderAsc, + []string{"ORDER BY start_datum ASC NULLS LAST", "id ASC"}}, + {"start_datum desc nulls last", QueueSortStartDatum, QueueOrderDesc, + []string{"ORDER BY start_datum DESC NULLS LAST", "id ASC"}}, + {"stadt asc", QueueSortStadt, QueueOrderAsc, + []string{"ORDER BY stadt ASC", "id ASC"}}, + {"markt_name asc", QueueSortMarktName, QueueOrderAsc, + []string{"ORDER BY markt_name ASC", "id ASC"}}, + {"quellen_count desc uses cardinality", QueueSortQuellenCount, QueueOrderDesc, + []string{"ORDER BY coalesce(cardinality(quellen),0) DESC", "id ASC"}}, + {"unknown sort falls back to konfidenz desc", "'; DROP TABLE--", "desc", + []string{konfidenzRank + " DESC", "start_datum ASC NULLS LAST", "id ASC"}}, + {"unknown order becomes desc", QueueSortStadt, "sideways", + []string{"ORDER BY stadt DESC", "id ASC"}}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got := queueOrderByClause(tc.sort, tc.order) + for _, want := range tc.wantAll { + if !strings.Contains(got, want) { + t.Errorf("clause %q missing expected substring %q", got, want) + } + } + // Safety: the raw sortBy string must never end up in the SQL. + if tc.sort != QueueSortKonfidenz && tc.sort != QueueSortDiscoveredAt && + tc.sort != QueueSortStartDatum && tc.sort != QueueSortStadt && + tc.sort != QueueSortMarktName && tc.sort != QueueSortQuellenCount { + if strings.Contains(got, tc.sort) { + t.Errorf("whitelist leak: raw sortBy %q appears in %q", tc.sort, got) + } + } + }) + } +} diff --git a/backend/internal/domain/discovery/service.go b/backend/internal/domain/discovery/service.go index 9000eb0..2a257f0 100644 --- a/backend/internal/domain/discovery/service.go +++ b/backend/internal/domain/discovery/service.go @@ -387,9 +387,11 @@ func landToISO(land string) string { } // ListPendingQueuePaged returns pending queue rows with the total count so -// the admin UI can paginate. limit/offset are passed through to the repo. -func (s *Service) ListPendingQueuePaged(ctx context.Context, limit, offset int) ([]DiscoveredMarket, int, error) { - rows, err := s.repo.ListQueue(ctx, StatusPending, limit, offset) +// the admin UI can paginate. sortBy/order are whitelisted by the repository; +// invalid values fall back to defaults (konfidenz desc). limit/offset are +// passed through. +func (s *Service) ListPendingQueuePaged(ctx context.Context, sortBy, order string, limit, offset int) ([]DiscoveredMarket, int, error) { + rows, err := s.repo.ListQueue(ctx, StatusPending, sortBy, order, limit, offset) if err != nil { return nil, 0, fmt.Errorf("list queue: %w", err) } @@ -439,7 +441,8 @@ func (s *Service) FindSimilarToQueueEntry(ctx context.Context, id uuid.UUID) ([] return nil, fmt.Errorf("load target: %w", err) } // Pull all pending queue rows; small DB in practice (< 1000 rows). - candidates, err := s.repo.ListQueue(ctx, StatusPending, 2000, 0) + // Sort order is irrelevant to similarity computation — use defaults. + candidates, err := s.repo.ListQueue(ctx, StatusPending, QueueSortDefault, QueueOrderDefault, 2000, 0) if err != nil { return nil, fmt.Errorf("list pending: %w", err) } diff --git a/backend/internal/domain/discovery/service_test.go b/backend/internal/domain/discovery/service_test.go index d54ae44..52ddbbf 100644 --- a/backend/internal/domain/discovery/service_test.go +++ b/backend/internal/domain/discovery/service_test.go @@ -328,7 +328,7 @@ func TestListPendingQueuePaged_ReturnsBothRowsAndTotal(t *testing.T) { countQueueFn: func(_ context.Context, _ string) (int, error) { return 42, nil }, } svc := NewService(m, nil, noopLinkVerifier{}, noopMarketCreator{}) - rows, total, err := svc.ListPendingQueuePaged(context.Background(), 50, 0) + rows, total, err := svc.ListPendingQueuePaged(context.Background(), QueueSortDefault, QueueOrderDefault, 50, 0) if err != nil { t.Fatalf("unexpected error: %v", err) } diff --git a/web/src/routes/admin/discovery/+page.server.ts b/web/src/routes/admin/discovery/+page.server.ts index aa2c831..8d792a1 100644 --- a/web/src/routes/admin/discovery/+page.server.ts +++ b/web/src/routes/admin/discovery/+page.server.ts @@ -62,22 +62,51 @@ function parseLimit(raw: string | null): ValidLimit { return (VALID_LIMITS as readonly number[]).includes(n) ? (n as ValidLimit) : 50; } +// Sort whitelist mirrors backend/internal/domain/discovery/model.go. +// Backend re-validates; client-side filtering just avoids round-tripping +// obviously bad values. +const VALID_SORTS = [ + 'konfidenz', + 'discovered_at', + 'start_datum', + 'stadt', + 'markt_name', + 'quellen_count' +] as const; +type SortKey = (typeof VALID_SORTS)[number]; +type SortOrder = 'asc' | 'desc'; + +function parseSort(raw: string | null): SortKey { + return (VALID_SORTS as readonly string[]).includes(raw ?? '') ? (raw as SortKey) : 'konfidenz'; +} + +function parseOrder(raw: string | null): SortOrder { + return raw === 'asc' ? 'asc' : 'desc'; +} + type QueueMeta = { total: number; limit: number; offset: number; + sort?: SortKey; + order?: SortOrder; }; export const load: PageServerLoad = async ({ cookies, url }) => { const limit = parseLimit(url.searchParams.get('limit')); const page = Math.max(1, Number(url.searchParams.get('page') ?? 1)); const offset = (page - 1) * limit; + const sort = parseSort(url.searchParams.get('sort')); + const order = parseOrder(url.searchParams.get('order')); + const qs = new URLSearchParams({ + limit: String(limit), + offset: String(offset), + sort, + order + }); const [queueRes, statsRes] = await Promise.all([ - serverFetch( - `/admin/discovery/queue?limit=${limit}&offset=${offset}`, - cookies - ), + serverFetch(`/admin/discovery/queue?${qs.toString()}`, cookies), serverFetch(`/admin/discovery/stats`, cookies) ]); // Pagination info lives on the envelope's `meta` field alongside `data`. @@ -93,7 +122,9 @@ export const load: PageServerLoad = async ({ cookies, url }) => { stats: statsRes.data, limit, offset, - page + page, + sort: meta.sort ?? sort, + order: meta.order ?? order }; }; diff --git a/web/src/routes/admin/discovery/+page.svelte b/web/src/routes/admin/discovery/+page.svelte index 3467100..3acadb6 100644 --- a/web/src/routes/admin/discovery/+page.svelte +++ b/web/src/routes/admin/discovery/+page.svelte @@ -91,12 +91,57 @@ // Pagination helpers. const totalPages = $derived(Math.ceil((data.total ?? 0) / data.limit)); + type SortKey = + | 'konfidenz' + | 'discovered_at' + | 'start_datum' + | 'stadt' + | 'markt_name' + | 'quellen_count'; + type SortOrder = 'asc' | 'desc'; + + // Default direction per column. Picked for the most useful "first click" + // behaviour: highest confidence / newest / soonest-upcoming / A–Z. + const columnDefaultOrder: Record = { + konfidenz: 'desc', + discovered_at: 'desc', + start_datum: 'asc', + stadt: 'asc', + markt_name: 'asc', + quellen_count: 'desc' + }; + + function buildQuery( + overrides: Partial<{ page: number; limit: number; sort: SortKey; order: SortOrder }> + ): string { + const params = new URLSearchParams({ + page: String(overrides.page ?? data.page), + limit: String(overrides.limit ?? data.limit), + sort: overrides.sort ?? (data.sort as SortKey), + order: overrides.order ?? (data.order as SortOrder) + }); + return `?${params.toString()}`; + } + function navigatePage(page: number) { - goto(`?page=${page}&limit=${data.limit}`); + goto(buildQuery({ page })); } function navigateLimit(newLimit: number) { - goto(`?page=1&limit=${newLimit}`); + goto(buildQuery({ page: 1, limit: newLimit })); + } + + function navigateSort(key: SortKey) { + const nextOrder: SortOrder = + data.sort === key ? (data.order === 'asc' ? 'desc' : 'asc') : columnDefaultOrder[key]; + // Sort change resets to page 1 — otherwise row at the top of page 2 under + // the old sort becomes a confusing "where did my sort go?" moment. + goto(buildQuery({ page: 1, sort: key, order: nextOrder })); + } + + function sortIndicator(key: SortKey): string { + if (data.sort !== key) return ''; + return data.order === 'asc' ? '▲' : '▼'; } const showingFrom = $derived(data.offset + 1); @@ -423,12 +468,72 @@ Region - Markt - Stadt - Datum + + + + + + + + + Website - Quellen - Konfidenz + + + + + + Aktion