feat(discovery): auto-accept high-confidence crawl rows during crawl
When a freshly-inserted discovered_market has a matched series, konfidenz "hoch" (≥2 sources), and both start/end dates present, Accept() is called inline with a nil reviewer (mapped to NULL reviewed_by) so the row goes straight to accepted without manual review. CrawlSummary gains auto_accepted counter; slog summary logs it. MarkAccepted / Service.Accept now take *uuid.UUID for reviewer so nil cleanly maps to NULL in the DB column (already nullable).
This commit is contained in:
@@ -256,7 +256,7 @@ func (h *Handler) Accept(c *gin.Context) {
|
||||
c.JSON(apiErr.Status, apierror.NewResponse(apiErr))
|
||||
return
|
||||
}
|
||||
seriesID, editionID, err := h.service.Accept(c.Request.Context(), id, reviewer)
|
||||
seriesID, editionID, err := h.service.Accept(c.Request.Context(), id, &reviewer)
|
||||
if err != nil {
|
||||
slog.WarnContext(c.Request.Context(), "accept failed", "queue_id", id, "error", err)
|
||||
apiErr := apierror.Internal("accept failed")
|
||||
|
||||
@@ -23,7 +23,7 @@ type mockRepo struct {
|
||||
countQueueFn func(ctx context.Context, status string) (int, error)
|
||||
getDiscoveredFn func(ctx context.Context, id uuid.UUID) (DiscoveredMarket, error)
|
||||
beginTxFn func(ctx context.Context) (pgx.Tx, error)
|
||||
markAcceptedFn func(ctx context.Context, tx pgx.Tx, id, eid, r uuid.UUID) error
|
||||
markAcceptedFn func(ctx context.Context, tx pgx.Tx, id, eid uuid.UUID, r *uuid.UUID) error
|
||||
markRejectedFn func(ctx context.Context, tx pgx.Tx, id, r uuid.UUID) error
|
||||
insertRejFn func(ctx context.Context, tx pgx.Tx, rej RejectedDiscovery) error
|
||||
|
||||
@@ -103,7 +103,7 @@ func (m *mockRepo) BeginTx(ctx context.Context) (pgx.Tx, error) {
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
func (m *mockRepo) MarkAccepted(ctx context.Context, tx pgx.Tx, id, eid, r uuid.UUID) error {
|
||||
func (m *mockRepo) MarkAccepted(ctx context.Context, tx pgx.Tx, id, eid uuid.UUID, r *uuid.UUID) error {
|
||||
if m.markAcceptedFn != nil {
|
||||
return m.markAcceptedFn(ctx, tx, id, eid, r)
|
||||
}
|
||||
|
||||
@@ -35,7 +35,7 @@ type Repository interface {
|
||||
ListQueue(ctx context.Context, status, sortBy, order string, limit, offset int) ([]DiscoveredMarket, error)
|
||||
CountQueue(ctx context.Context, status string) (int, error)
|
||||
GetDiscovered(ctx context.Context, id uuid.UUID) (DiscoveredMarket, error)
|
||||
MarkAccepted(ctx context.Context, tx pgx.Tx, id, editionID, reviewer uuid.UUID) error
|
||||
MarkAccepted(ctx context.Context, tx pgx.Tx, id, editionID uuid.UUID, reviewer *uuid.UUID) error
|
||||
MarkRejected(ctx context.Context, tx pgx.Tx, id uuid.UUID, reviewer uuid.UUID) error
|
||||
InsertRejection(ctx context.Context, tx pgx.Tx, r RejectedDiscovery) error
|
||||
BeginTx(ctx context.Context) (pgx.Tx, error)
|
||||
@@ -363,7 +363,7 @@ func scanDiscoveredMarket(s scanner) (DiscoveredMarket, error) {
|
||||
return d, nil
|
||||
}
|
||||
|
||||
func (r *pgRepository) MarkAccepted(ctx context.Context, tx pgx.Tx, id, editionID, reviewer uuid.UUID) error {
|
||||
func (r *pgRepository) MarkAccepted(ctx context.Context, tx pgx.Tx, id, editionID uuid.UUID, reviewer *uuid.UUID) error {
|
||||
_, err := tx.Exec(ctx, `
|
||||
UPDATE discovered_markets
|
||||
SET status='accepted', reviewed_at=now(), reviewed_by=$2, created_edition_id=$3
|
||||
|
||||
@@ -94,6 +94,8 @@ type CrawlSummary struct {
|
||||
DedupedExisting int `json:"deduped_existing"`
|
||||
DedupedRejected int `json:"deduped_rejected"`
|
||||
DedupedQueue int `json:"deduped_queue"`
|
||||
// AutoAccepted counts queue rows auto-accepted during this crawl (high-confidence, series match, dates set).
|
||||
AutoAccepted int `json:"auto_accepted"`
|
||||
// LinkCheckFailed is retained for JSON compatibility with the admin UI;
|
||||
// no longer populated since the crawler pipeline skips link verification.
|
||||
// Consider removing in a future schema version.
|
||||
@@ -286,11 +288,20 @@ func (s *Service) Crawl(ctx context.Context) (CrawlSummary, error) {
|
||||
continue
|
||||
}
|
||||
|
||||
if _, err := s.repo.InsertDiscovered(insertCtx, dm); err != nil {
|
||||
insertedID, err := s.repo.InsertDiscovered(insertCtx, dm)
|
||||
if err != nil {
|
||||
slog.WarnContext(ctx, "insert discovered", "error", err)
|
||||
continue
|
||||
}
|
||||
summary.Discovered++
|
||||
|
||||
if matchedSeriesID != nil && dm.Konfidenz == KonfidenzHoch && dm.StartDatum != nil && dm.EndDatum != nil {
|
||||
if _, _, err := s.Accept(insertCtx, insertedID, nil); err != nil {
|
||||
slog.WarnContext(ctx, "auto-accept failed", "id", insertedID, "error", err)
|
||||
} else {
|
||||
summary.AutoAccepted++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Automatically kick off Pass A enrichment (CrawlEnrich + Nominatim) for
|
||||
@@ -313,6 +324,7 @@ func (s *Service) Crawl(ctx context.Context) (CrawlSummary, error) {
|
||||
"merged", summary.Merged,
|
||||
"merged_across_sites", summary.MergedAcrossSites,
|
||||
"discovered", summary.Discovered,
|
||||
"auto_accepted", summary.AutoAccepted,
|
||||
"auto_merged", summary.AutoMerged,
|
||||
"deduped_existing", summary.DedupedExisting,
|
||||
"deduped_rejected", summary.DedupedRejected,
|
||||
@@ -462,7 +474,7 @@ func formatIssues(issues []Issue) string {
|
||||
|
||||
// Accept transitions a pending queue entry into a market edition.
|
||||
// Returns (seriesID, editionID, error).
|
||||
func (s *Service) Accept(ctx context.Context, queueID, reviewerID uuid.UUID) (uuid.UUID, uuid.UUID, error) {
|
||||
func (s *Service) Accept(ctx context.Context, queueID uuid.UUID, reviewerID *uuid.UUID) (uuid.UUID, uuid.UUID, error) {
|
||||
d, err := s.repo.GetDiscovered(ctx, queueID)
|
||||
if err != nil {
|
||||
return uuid.Nil, uuid.Nil, fmt.Errorf("load queue entry: %w", err)
|
||||
|
||||
@@ -144,11 +144,12 @@ func TestAccept_NewSeries_CallsCreate(t *testing.T) {
|
||||
return DiscoveredMarket{ID: qID, Status: StatusPending, MarktName: "X", Stadt: "Y", Land: "Deutschland", StartDatum: &start, EndDatum: &end}, nil
|
||||
},
|
||||
beginTxFn: func(_ context.Context) (pgx.Tx, error) { return noopTx{}, nil },
|
||||
markAcceptedFn: func(_ context.Context, _ pgx.Tx, _, _, _ uuid.UUID) error { return nil },
|
||||
markAcceptedFn: func(_ context.Context, _ pgx.Tx, _, _ uuid.UUID, _ *uuid.UUID) error { return nil },
|
||||
}
|
||||
mc := &stubCreator{}
|
||||
svc := NewService(m, nil, noopLinkVerifier{}, mc, nil, nil, nil)
|
||||
_, _, err := svc.Accept(context.Background(), qID, uuid.New())
|
||||
reviewerID := uuid.New()
|
||||
_, _, err := svc.Accept(context.Background(), qID, &reviewerID)
|
||||
if err != nil {
|
||||
t.Fatalf("accept err: %v", err)
|
||||
}
|
||||
@@ -166,11 +167,12 @@ func TestAccept_ExistingSeries_CallsCreateEditionForSeries(t *testing.T) {
|
||||
return DiscoveredMarket{Status: StatusPending, MarktName: "X", Stadt: "Y", Land: "Deutschland", StartDatum: &start, EndDatum: &end, MatchedSeriesID: &sid}, nil
|
||||
},
|
||||
beginTxFn: func(_ context.Context) (pgx.Tx, error) { return noopTx{}, nil },
|
||||
markAcceptedFn: func(_ context.Context, _ pgx.Tx, _, _, _ uuid.UUID) error { return nil },
|
||||
markAcceptedFn: func(_ context.Context, _ pgx.Tx, _, _ uuid.UUID, _ *uuid.UUID) error { return nil },
|
||||
}
|
||||
mc := &stubCreator{}
|
||||
svc := NewService(m, nil, noopLinkVerifier{}, mc, nil, nil, nil)
|
||||
_, _, err := svc.Accept(context.Background(), uuid.New(), uuid.New())
|
||||
reviewerID := uuid.New()
|
||||
_, _, err := svc.Accept(context.Background(), uuid.New(), &reviewerID)
|
||||
if err != nil {
|
||||
t.Fatalf("accept err: %v", err)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user