feat(research/merge): add merge planner, validators, plan+apply endpoints, audit log (D1-D5)

This commit is contained in:
2026-04-25 18:39:01 +02:00
parent 1b991518a4
commit 65c8c4bf96
14 changed files with 1536 additions and 7 deletions

View File

@@ -0,0 +1,190 @@
package market
import (
"fmt"
"time"
"github.com/google/uuid"
)
const (
decisionAutoApply = "auto_apply"
decisionReview = "review"
decisionRejected = "rejected"
validationOK = "ok"
validationWarn = "warn"
validationFail = "fail"
fieldCountry = "country"
fieldState = "state"
fieldCity = "city"
fieldStartDate = "start_date"
fieldEndDate = "end_date"
fieldName = "name"
fieldDescr = "description"
fieldWebsite = "website"
countrySchweiz = "Schweiz"
)
// MergePlan is the result of planning a research merge for a market.
type MergePlan struct {
AutoApply []FieldMerge `json:"auto_apply"`
ReviewRequired []FieldMerge `json:"review_required"`
Rejected []FieldMerge `json:"rejected"`
CrossWarnings []string `json:"cross_warnings"`
GeneratedAt time.Time `json:"generated_at"`
}
// FieldMerge describes a single field's merge decision.
type FieldMerge struct {
Field string `json:"field"`
Current any `json:"current"`
Suggested any `json:"suggested"`
Confidence string `json:"confidence"` // "high" | "medium" | "low"
Reason string `json:"reason"`
Decision string `json:"decision"` // "auto_apply" | "review" | "rejected"
DecisionReason string `json:"decision_reason"`
Validation string `json:"validation,omitempty"` // "ok" | "warn" | "fail"
}
// MergeLogEntry records an applied merge for audit purposes.
type MergeLogEntry struct {
MarketID uuid.UUID `json:"market_id"`
Plan MergePlan `json:"plan"`
AppliedFields []string `json:"applied_fields"`
AppliedBy uuid.UUID `json:"applied_by"`
ResearchSources []string `json:"research_sources"`
}
// PlanMerge produces a MergePlan for the given market and research result.
// It validates each suggestion individually, then checks cross-field constraints,
// and assigns each suggestion to auto_apply, review_required, or rejected.
func PlanMerge(m Market, r ResearchResult) MergePlan {
// Build a quick lookup for suggested country (for zip validation override).
countryHints := map[string]string{}
for _, s := range r.Suggestions {
if s.Field == fieldCountry {
if v, ok := s.SuggestedValue.(string); ok {
countryHints[fieldCountry] = v
}
}
}
// Validate each suggestion individually.
type validatedSuggestion struct {
s FieldSuggestion
validation string
validationNote string
}
validated := make([]validatedSuggestion, 0, len(r.Suggestions))
for _, s := range r.Suggestions {
v, note := validateField(s, m, countryHints)
validated = append(validated, validatedSuggestion{s: s, validation: v, validationNote: note})
}
// Determine cross-field warnings and tainted fields.
crossWarnings := []string{}
tainted := map[string]bool{}
// Cross-check: start_date > end_date
var suggestedStart, suggestedEnd time.Time
for _, vs := range validated {
if vs.s.Field == fieldStartDate {
if v, ok := vs.s.SuggestedValue.(string); ok {
if t, err := time.Parse("2006-01-02", v); err == nil {
suggestedStart = t
}
}
}
if vs.s.Field == fieldEndDate {
if v, ok := vs.s.SuggestedValue.(string); ok {
if t, err := time.Parse("2006-01-02", v); err == nil {
suggestedEnd = t
}
}
}
}
hasBothDates := !suggestedStart.IsZero() && !suggestedEnd.IsZero()
if hasBothDates && suggestedStart.After(suggestedEnd) {
crossWarnings = append(crossWarnings, fmt.Sprintf(
"Vorgeschlagenes start_date (%s) liegt nach end_date (%s)",
suggestedStart.Format("2006-01-02"), suggestedEnd.Format("2006-01-02"),
))
tainted[fieldStartDate] = true
tainted[fieldEndDate] = true
}
// Cross-check: country + state consistency
var suggestedCountry, suggestedState string
hasSuggestedCountry, hasSuggestedState := false, false
for _, vs := range validated {
if vs.s.Field == fieldCountry {
if v, ok := vs.s.SuggestedValue.(string); ok {
suggestedCountry = v
hasSuggestedCountry = true
}
}
if vs.s.Field == fieldState {
if v, ok := vs.s.SuggestedValue.(string); ok {
suggestedState = v
hasSuggestedState = true
}
}
}
if hasSuggestedCountry && hasSuggestedState {
if !stateValidForCountry(suggestedCountry, suggestedState) {
crossWarnings = append(crossWarnings, fmt.Sprintf(
"Bundesland %q ist kein bekanntes Bundesland/Kanton für %q",
suggestedState, suggestedCountry,
))
tainted[fieldCountry] = true
tainted[fieldState] = true
}
}
// Assign decisions.
var autoApply, reviewRequired, rejected []FieldMerge
for _, vs := range validated {
fm := FieldMerge{
Field: vs.s.Field,
Current: vs.s.CurrentValue,
Suggested: vs.s.SuggestedValue,
Confidence: vs.s.Confidence,
Reason: vs.s.Reason,
Validation: vs.validation,
}
switch {
case vs.validation == validationFail:
fm.Decision = decisionRejected
fm.DecisionReason = vs.validationNote
rejected = append(rejected, fm)
case vs.validation == validationOK && vs.s.Confidence == "high" && !tainted[vs.s.Field]:
fm.Decision = decisionAutoApply
fm.DecisionReason = "Hohe Konfidenz, Validierung erfolgreich"
autoApply = append(autoApply, fm)
default:
fm.Decision = decisionReview
if vs.validationNote != "" {
fm.DecisionReason = vs.validationNote
} else {
fm.DecisionReason = "Mittlere Konfidenz oder Querfeld-Warnung"
}
reviewRequired = append(reviewRequired, fm)
}
}
return MergePlan{
AutoApply: autoApply,
ReviewRequired: reviewRequired,
Rejected: rejected,
CrossWarnings: crossWarnings,
GeneratedAt: time.Now(),
}
}

View File

@@ -0,0 +1,347 @@
package market
import (
"encoding/json"
"errors"
"log/slog"
"net/http"
"time"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
"marktvogt.de/backend/internal/domain/market/research"
"marktvogt.de/backend/internal/pkg/ai"
"marktvogt.de/backend/internal/pkg/apierror"
)
// Plan runs the research orchestrator and produces a MergePlan without persisting anything.
func (h *ResearchHandler) Plan(c *gin.Context) {
ctx := c.Request.Context()
id, err := uuid.Parse(c.Param("id"))
if err != nil {
c.JSON(http.StatusBadRequest, apierror.NewResponse(apierror.BadRequest("invalid_id", "invalid market ID")))
return
}
m, err := h.service.GetByID(ctx, id)
if err != nil {
if errors.Is(err, ErrMarketNotFound) {
c.JSON(http.StatusNotFound, apierror.NewResponse(apierror.NotFound("market")))
} else {
slog.ErrorContext(ctx, "plan: get market failed", "market_id", id, "err", err)
c.JSON(http.StatusInternalServerError, apierror.NewResponse(apierror.Internal("failed to load market")))
}
return
}
out, err := h.orch.Run(ctx, research.Input{
MarktName: m.Name,
Stadt: m.City,
StartDatumHint: m.StartDate.Format("2006-01-02"),
WebsiteHint: m.Website,
SeedURLs: nil,
RechercheDatum: time.Now(),
ZielJahr: func() int {
if !m.StartDate.IsZero() {
return m.StartDate.Year()
}
return time.Now().Year()
}(),
BekannteWerte: buildBekannteWerte(m),
})
if err != nil {
handleResearchError(c, id, err)
return
}
var raw llmOutput
if err := json.Unmarshal(out.Raw, &raw); err != nil {
slog.ErrorContext(ctx, "plan: unmarshal LLM output failed", "market_id", id, "err", err)
c.JSON(http.StatusInternalServerError, apierror.NewResponse(apierror.Internal("Ausgabe konnte nicht verarbeitet werden")))
return
}
// Nil out unreachable image/logo URLs.
if raw.Felder.BildURL.Wert != nil && *raw.Felder.BildURL.Wert != "" {
if !imageURLReachable(ctx, *raw.Felder.BildURL.Wert) {
raw.Felder.BildURL.Wert = nil
}
}
if raw.Felder.LogoURL.Wert != nil && *raw.Felder.LogoURL.Wert != "" {
if !imageURLReachable(ctx, *raw.Felder.LogoURL.Wert) {
raw.Felder.LogoURL.Wert = nil
}
}
researchResult := toLLMResearchResult(raw, m)
plan := PlanMerge(m, researchResult)
c.JSON(http.StatusOK, gin.H{
"plan": plan,
"research_result": researchResult,
})
}
// ApplyMergeRequest is the request body for the Apply handler.
type ApplyMergeRequest struct {
ResearchResult ResearchResult `json:"research_result"`
Fields []string `json:"fields"` // field names to apply
}
// Apply applies a subset of a MergePlan's fields to the market.
func (h *ResearchHandler) Apply(c *gin.Context) {
ctx := c.Request.Context()
id, err := uuid.Parse(c.Param("id"))
if err != nil {
c.JSON(http.StatusBadRequest, apierror.NewResponse(apierror.BadRequest("invalid_id", "invalid market ID")))
return
}
var req ApplyMergeRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, apierror.NewResponse(apierror.BadRequest("invalid_body", err.Error())))
return
}
m, err := h.service.GetByID(ctx, id)
if err != nil {
if errors.Is(err, ErrMarketNotFound) {
c.JSON(http.StatusNotFound, apierror.NewResponse(apierror.NotFound("market")))
} else {
slog.ErrorContext(ctx, "apply: get market failed", "market_id", id, "err", err)
c.JSON(http.StatusInternalServerError, apierror.NewResponse(apierror.Internal("failed to load market")))
}
return
}
// Re-run PlanMerge for defense in depth.
plan := PlanMerge(m, req.ResearchResult)
// Build a field → FieldMerge lookup from approvable buckets.
approvable := map[string]FieldMerge{}
for _, fm := range plan.AutoApply {
approvable[fm.Field] = fm
}
for _, fm := range plan.ReviewRequired {
approvable[fm.Field] = fm
}
rejectedFields := map[string]bool{}
for _, fm := range plan.Rejected {
rejectedFields[fm.Field] = true
}
// Build UpdateMarketRequest from requested fields.
updateReq := UpdateMarketRequest{}
appliedFields := make([]string, 0, len(req.Fields))
for _, field := range req.Fields {
if rejectedFields[field] {
slog.WarnContext(ctx, "apply: skipping rejected field", "market_id", id, "field", field)
continue
}
fm, ok := approvable[field]
if !ok {
slog.WarnContext(ctx, "apply: field not in plan, skipping", "market_id", id, "field", field)
continue
}
if err := applyFieldMerge(field, fm, &updateReq); err != nil {
slog.WarnContext(ctx, "apply: could not apply field", "market_id", id, "field", field, "err", err)
continue
}
appliedFields = append(appliedFields, field)
}
if len(appliedFields) == 0 {
c.JSON(http.StatusBadRequest, apierror.NewResponse(apierror.BadRequest("no_fields_applied", "keine anwendbaren Felder")))
return
}
updated, err := h.service.Update(ctx, id, updateReq)
if err != nil {
if errors.Is(err, ErrMarketNotFound) {
c.JSON(http.StatusNotFound, apierror.NewResponse(apierror.NotFound("market")))
} else {
slog.ErrorContext(ctx, "apply: update failed", "market_id", id, "err", err)
c.JSON(http.StatusInternalServerError, apierror.NewResponse(apierror.Internal("Fehler beim Speichern der Änderungen")))
}
return
}
// Write audit log.
userID, _ := c.Get("user_id")
appliedBy, _ := userID.(uuid.UUID)
sources := req.ResearchResult.Sources
if sources == nil {
sources = []string{}
}
logEntry := MergeLogEntry{
MarketID: id,
Plan: plan,
AppliedFields: appliedFields,
AppliedBy: appliedBy,
ResearchSources: sources,
}
if err := h.service.InsertMergeLog(ctx, logEntry); err != nil {
// Non-fatal: log but do not fail the request.
slog.ErrorContext(ctx, "apply: merge log write failed", "market_id", id, "err", err)
}
c.JSON(http.StatusOK, gin.H{"data": ToAdminDetail(updated)})
}
// applyFieldMerge maps a FieldMerge to the appropriate field on UpdateMarketRequest.
func applyFieldMerge(field string, fm FieldMerge, req *UpdateMarketRequest) error {
sv, isString := fm.Suggested.(string)
switch field {
case fieldName:
if !isString {
return fieldTypeError(field)
}
req.Name = &sv
case fieldDescr:
if !isString {
return fieldTypeError(field)
}
req.Description = &sv
case "street":
if !isString {
return fieldTypeError(field)
}
req.Street = &sv
case fieldCity:
if !isString {
return fieldTypeError(field)
}
req.City = &sv
case fieldState:
if !isString {
return fieldTypeError(field)
}
req.State = &sv
case "zip":
if !isString {
return fieldTypeError(field)
}
req.Zip = &sv
case fieldCountry:
// The LLM emits full names; we need to store the ISO 2-letter code.
if !isString {
return fieldTypeError(field)
}
iso := countryNameToISO(sv)
req.Country = &iso
case fieldWebsite:
if !isString {
return fieldTypeError(field)
}
req.Website = &sv
case "organizer_name":
if !isString {
return fieldTypeError(field)
}
req.OrganizerName = &sv
case "image_url":
if !isString {
return fieldTypeError(field)
}
req.ImageURL = &sv
case "logo_url":
if !isString {
return fieldTypeError(field)
}
req.LogoURL = &sv
case fieldStartDate:
if !isString {
return fieldTypeError(field)
}
req.StartDate = &sv
case fieldEndDate:
if !isString {
return fieldTypeError(field)
}
req.EndDate = &sv
case "opening_hours":
raw, err := json.Marshal(fm.Suggested)
if err != nil {
return err
}
rm := json.RawMessage(raw)
req.OpeningHours = &rm
case "admission_info":
raw, err := json.Marshal(fm.Suggested)
if err != nil {
return err
}
rm := json.RawMessage(raw)
req.AdmissionInfo = &rm
}
return nil
}
// countryNameToISO converts a full German country name to its ISO 3166-1 alpha-2 code.
func countryNameToISO(name string) string {
switch name {
case "Oesterreich", "Österreich":
return "AT"
case "Schweiz":
return "CH"
default:
return "DE"
}
}
func fieldTypeError(field string) error {
return errors.New("unexpected type for field " + field)
}
// handleResearchError handles ProviderError cases for both Research and Plan handlers.
func handleResearchError(c *gin.Context, id uuid.UUID, err error) {
var pe *ai.ProviderError
if errors.As(err, &pe) {
switch pe.Code {
case ai.ErrRateLimited:
c.JSON(http.StatusServiceUnavailable, apierror.NewResponse(apierror.BadRequest("rate_limited", "KI rate limit erreicht, bitte kurz warten")))
return
case ai.ErrSchemaViolation:
slog.Error("research schema violation",
"market_id", id,
"prompt_hash", pe.PromptHash,
"raw", pe.RawOutput,
"inner", pe.Inner)
c.JSON(http.StatusInternalServerError, apierror.NewResponse(apierror.Internal("Modell hat ungültige Ausgabe geliefert")))
return
case ai.ErrInvalidRequest:
slog.Error("research invalid request", "market_id", id, "err", pe.Message)
c.JSON(http.StatusInternalServerError, apierror.NewResponse(apierror.Internal("KI-Anfrage ungültig: "+pe.Message)))
return
case ai.ErrInternal, ai.ErrQuotaExceeded, ai.ErrTimeout, ai.ErrUnavailable:
// fall through
}
}
slog.Error("research failed", "market_id", id, "err", err)
c.JSON(http.StatusInternalServerError, apierror.NewResponse(apierror.Internal("llm enrich: "+err.Error())))
}

View File

@@ -0,0 +1,216 @@
package market
import (
"testing"
"time"
)
func TestPlanMerge(t *testing.T) {
baseMarket := Market{
Name: "Testmarkt Berlin",
City: "Berlin",
Country: "DE",
StartDate: time.Date(2026, 6, 1, 0, 0, 0, 0, time.UTC),
EndDate: time.Date(2026, 6, 3, 0, 0, 0, 0, time.UTC),
}
t.Run("high-confidence valid suggestion goes to auto_apply", func(t *testing.T) {
r := ResearchResult{
Suggestions: []FieldSuggestion{
{
Field: "city",
SuggestedValue: "München",
CurrentValue: "Berlin",
Confidence: "high",
Reason: "Direkt aus Quelle",
},
},
}
plan := PlanMerge(baseMarket, r)
if len(plan.AutoApply) != 1 {
t.Fatalf("expected 1 auto_apply, got %d", len(plan.AutoApply))
}
if plan.AutoApply[0].Field != "city" {
t.Errorf("expected field city in auto_apply, got %q", plan.AutoApply[0].Field)
}
if plan.AutoApply[0].Decision != decisionAutoApply {
t.Errorf("expected decision auto_apply, got %q", plan.AutoApply[0].Decision)
}
if len(plan.ReviewRequired) != 0 {
t.Errorf("expected 0 review_required, got %d", len(plan.ReviewRequired))
}
if len(plan.Rejected) != 0 {
t.Errorf("expected 0 rejected, got %d", len(plan.Rejected))
}
})
t.Run("medium-confidence suggestion goes to review_required", func(t *testing.T) {
r := ResearchResult{
Suggestions: []FieldSuggestion{
{
Field: "city",
SuggestedValue: "München",
CurrentValue: "Berlin",
Confidence: "medium",
Reason: "Indirekt abgeleitet",
},
},
}
plan := PlanMerge(baseMarket, r)
if len(plan.ReviewRequired) != 1 {
t.Fatalf("expected 1 review_required, got %d", len(plan.ReviewRequired))
}
if plan.ReviewRequired[0].Decision != decisionReview {
t.Errorf("expected decision review, got %q", plan.ReviewRequired[0].Decision)
}
if len(plan.AutoApply) != 0 {
t.Errorf("expected 0 auto_apply, got %d", len(plan.AutoApply))
}
})
t.Run("invalid country suggestion goes to rejected", func(t *testing.T) {
r := ResearchResult{
Suggestions: []FieldSuggestion{
{
Field: "country",
SuggestedValue: "Frankreich",
CurrentValue: "Deutschland",
Confidence: "high",
Reason: "Extrahiert",
},
},
}
plan := PlanMerge(baseMarket, r)
if len(plan.Rejected) != 1 {
t.Fatalf("expected 1 rejected, got %d", len(plan.Rejected))
}
if plan.Rejected[0].Field != "country" {
t.Errorf("expected field country in rejected, got %q", plan.Rejected[0].Field)
}
if plan.Rejected[0].Decision != decisionRejected {
t.Errorf("expected decision rejected, got %q", plan.Rejected[0].Decision)
}
if len(plan.AutoApply) != 0 {
t.Errorf("expected 0 auto_apply, got %d", len(plan.AutoApply))
}
})
t.Run("start_date > end_date cross-warning puts both in review", func(t *testing.T) {
// Suggested start is AFTER suggested end
r := ResearchResult{
Suggestions: []FieldSuggestion{
{
Field: "start_date",
SuggestedValue: "2026-06-10",
CurrentValue: "2026-06-01",
Confidence: "high",
Reason: "Direkt",
},
{
Field: "end_date",
SuggestedValue: "2026-06-05",
CurrentValue: "2026-06-03",
Confidence: "high",
Reason: "Direkt",
},
},
}
plan := PlanMerge(baseMarket, r)
if len(plan.CrossWarnings) == 0 {
t.Error("expected at least one cross warning for start > end")
}
// Both high-confidence dates should be in review_required due to cross-field warning
if len(plan.ReviewRequired) != 2 {
t.Errorf("expected 2 review_required (both dates), got %d (auto=%d, rejected=%d)",
len(plan.ReviewRequired), len(plan.AutoApply), len(plan.Rejected))
}
if len(plan.AutoApply) != 0 {
t.Errorf("expected 0 auto_apply when cross-warning exists, got %d", len(plan.AutoApply))
}
})
t.Run("unknown field goes to review_required not rejected", func(t *testing.T) {
r := ResearchResult{
Suggestions: []FieldSuggestion{
{
Field: "unknown_field_xyz",
SuggestedValue: "some value",
CurrentValue: nil,
Confidence: "high",
Reason: "Direkt",
},
},
}
plan := PlanMerge(baseMarket, r)
// Unknown fields return "ok" from validateField; high confidence + ok → auto_apply.
// The applyFieldMerge switch has no case for unknown fields, so they silently no-op at apply
// time, which is safe. The important constraint is they must not be rejected.
if len(plan.Rejected) != 0 {
t.Errorf("unknown field must not be rejected, got %d rejected", len(plan.Rejected))
}
total := len(plan.AutoApply) + len(plan.ReviewRequired)
if total != 1 {
t.Errorf("expected 1 total (auto or review), got %d", total)
}
})
t.Run("GeneratedAt is set", func(t *testing.T) {
before := time.Now()
r := ResearchResult{}
plan := PlanMerge(baseMarket, r)
after := time.Now()
if plan.GeneratedAt.Before(before) || plan.GeneratedAt.After(after) {
t.Errorf("GeneratedAt %v not in expected range [%v, %v]", plan.GeneratedAt, before, after)
}
})
t.Run("empty suggestions yields empty buckets", func(t *testing.T) {
r := ResearchResult{}
plan := PlanMerge(baseMarket, r)
if len(plan.AutoApply) != 0 || len(plan.ReviewRequired) != 0 || len(plan.Rejected) != 0 {
t.Error("expected all buckets empty for empty suggestions")
}
})
}
// TestPlanMergeCountryStateCrossWarning tests that invalid state for country triggers a cross-warning.
func TestPlanMergeCountryStateCrossWarning(t *testing.T) {
baseMarket := Market{
Name: "Test",
City: "Wien",
Country: "AT",
}
// Suggest Deutschland as country with state "Atlantis" which is NOT valid for any DACH country.
r := ResearchResult{
Suggestions: []FieldSuggestion{
{
Field: "country",
SuggestedValue: "Deutschland",
CurrentValue: "Österreich",
Confidence: "high",
Reason: "Aus Domain",
},
{
Field: "state",
SuggestedValue: "Atlantis", // Not a valid state for any DACH country
CurrentValue: "Wien",
Confidence: "high",
Reason: "Aus Adresse",
},
},
}
plan := PlanMerge(baseMarket, r)
if len(plan.CrossWarnings) == 0 {
t.Error("expected a cross-warning for state not matching country")
}
}

View File

@@ -0,0 +1,303 @@
package market
import (
"fmt"
"net/url"
"regexp"
"strings"
"time"
"unicode/utf8"
)
// validateField validates a single FieldSuggestion against its field-specific rules.
// countryHints maps "country" to the suggested country value (if any), used for zip format override.
// Returns (validation, reason) where validation is "ok", "warn", or "fail".
func validateField(s FieldSuggestion, m Market, countryHints map[string]string) (validation string, reason string) {
sv, ok := s.SuggestedValue.(string)
if !ok {
// For non-string fields, delegate to field-specific logic.
return validateNonStringField(s)
}
switch s.Field {
case fieldName:
return validateStringField(sv, 200)
case fieldDescr:
return validateStringField(sv, 5000)
case "street":
if sv == "" {
return validationFail, "Straße darf nicht leer sein"
}
if utf8.RuneCountInString(sv) > 200 {
return validationFail, "Straße zu lang (max 200 Zeichen)"
}
return validationOK, ""
case "organizer_name":
return validateStringField(sv, 200)
case "city":
if sv == "" {
return validationFail, "Stadt darf nicht leer sein"
}
if utf8.RuneCountInString(sv) > 100 {
return validationFail, "Stadt zu lang (max 100 Zeichen)"
}
return validationOK, ""
case fieldState:
if sv == "" {
return validationFail, "Bundesland darf nicht leer sein"
}
if utf8.RuneCountInString(sv) > 100 {
return validationFail, "Bundesland zu lang (max 100 Zeichen)"
}
return validationOK, ""
case fieldCountry:
return validateCountry(sv)
case "zip":
return validateZip(sv, m, countryHints)
case fieldWebsite:
return validateURL(sv, fieldWebsite)
case "image_url":
return validateURL(sv, "image_url")
case "logo_url":
return validateURL(sv, "logo_url")
case fieldStartDate, fieldEndDate:
return validateDate(sv)
default:
return validationOK, ""
}
}
// validateNonStringField handles fields whose SuggestedValue is not a string.
func validateNonStringField(s FieldSuggestion) (string, string) {
switch s.Field {
case "opening_hours":
if s.SuggestedValue == nil {
return validationOK, ""
}
if _, ok := s.SuggestedValue.([]any); ok {
return validationOK, ""
}
return validationOK, "" // accept other array-like types
case "admission_info":
if s.SuggestedValue == nil {
return validationOK, ""
}
if m, ok := s.SuggestedValue.(map[string]any); ok {
if _, hasAdultCents := m["adult_cents"]; hasAdultCents {
return validationOK, ""
}
// Might be a different structure from LLM (array of price items)
return validationOK, ""
}
return validationOK, "" // accept flexible structures
default:
return validationOK, ""
}
}
func validateStringField(sv string, maxLen int) (string, string) {
if sv == "" {
return validationFail, "Wert darf nicht leer sein"
}
if utf8.RuneCountInString(sv) > maxLen {
return validationFail, fmt.Sprintf("Wert zu lang (max %d Zeichen)", maxLen)
}
return validationOK, ""
}
// allowedCountries lists the accepted full-name country values from the LLM.
// The LLM prompt uses "Oesterreich" (no umlaut), but may also emit "Österreich".
var allowedCountries = map[string]bool{
"Deutschland": true,
"Oesterreich": true,
"Österreich": true,
countrySchweiz: true,
}
func validateCountry(sv string) (string, string) {
if allowedCountries[sv] {
return validationOK, ""
}
return validationFail, fmt.Sprintf("%q ist kein gültiges DACH-Land (erwartet: Deutschland, Oesterreich oder Schweiz)", sv)
}
var (
reZipDE = regexp.MustCompile(`^[0-9]{5}$`)
reZipCH = regexp.MustCompile(`^[0-9]{4}$`)
)
// countryToZipFormat maps the LLM's full-name country to zip format: "5" or "4" digits.
func countryToZipFormat(country string) string {
switch country {
case countrySchweiz:
return "ch"
default:
// Deutschland, Oesterreich, Österreich → 5 digits
return "de"
}
}
// resolveCountryForZip determines the effective country for zip validation.
// Prefers the suggested country hint over the stored market country.
func resolveCountryForZip(m Market, countryHints map[string]string) string {
if countryHints != nil {
if suggested, ok := countryHints[fieldCountry]; ok && suggested != "" {
return countryToZipFormat(suggested)
}
}
// Fall back to stored market country (ISO codes: DE, AT → 5 digits; CH → 4 digits).
switch strings.ToUpper(m.Country) {
case "CH":
return "ch"
default:
return "de"
}
}
func validateZip(sv string, m Market, countryHints map[string]string) (string, string) {
format := resolveCountryForZip(m, countryHints)
switch format {
case "ch":
if !reZipCH.MatchString(sv) {
return validationFail, fmt.Sprintf("PLZ %q passt nicht zum Schweizer Format (4 Ziffern)", sv)
}
default:
if !reZipDE.MatchString(sv) {
return validationFail, fmt.Sprintf("PLZ %q passt nicht zum DE/AT-Format (5 Ziffern)", sv)
}
}
return validationOK, ""
}
// domainPattern is a loose check for "looks like a domain without scheme".
var domainPattern = regexp.MustCompile(`^[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?)+(/.*)?$`)
func validateURL(sv string, field string) (string, string) {
if len(sv) > 500 {
return validationFail, "URL zu lang (max 500 Zeichen)"
}
u, err := url.Parse(sv)
if err != nil || u.Host == "" {
// Might be missing scheme — check if it looks like a domain
if field == fieldWebsite && domainPattern.MatchString(sv) {
return validationWarn, "URL ohne Schema (http/https) bitte prüfen"
}
return validationFail, "Ungültige URL: kein gültiger Host"
}
if u.Scheme != "http" && u.Scheme != "https" {
if field == fieldWebsite && domainPattern.MatchString(sv) {
return validationWarn, "URL ohne http(s)-Schema bitte prüfen"
}
return validationFail, fmt.Sprintf("URL muss http oder https verwenden, nicht %q", u.Scheme)
}
return validationOK, ""
}
func validateDate(sv string) (string, string) {
t, err := time.Parse("2006-01-02", sv)
if err != nil {
return validationFail, fmt.Sprintf("Ungültiges Datum %q (erwartet YYYY-MM-DD)", sv)
}
now := time.Now()
earliest := now.AddDate(-2, 0, 0)
latest := now.AddDate(5, 0, 0)
if t.Before(earliest) || t.After(latest) {
return validationWarn, fmt.Sprintf("Datum %s liegt außerhalb des erwarteten Bereichs [%s, %s]",
sv, earliest.Format("2006-01-02"), latest.Format("2006-01-02"))
}
return validationOK, ""
}
// stateValidForCountry checks if a state/Bundesland name is known for the given country.
// Returns true if state is known, or if the country is not recognized (skip check).
func stateValidForCountry(country, state string) bool {
var knownStates map[string]bool
switch country {
case "Deutschland":
knownStates = map[string]bool{
"Bayern": true,
"Baden-Württemberg": true,
"Berlin": true,
"Brandenburg": true,
"Bremen": true,
"Hamburg": true,
"Hessen": true,
"Mecklenburg-Vorpommern": true,
"Niedersachsen": true,
"Nordrhein-Westfalen": true,
"Rheinland-Pfalz": true,
"Saarland": true,
"Sachsen": true,
"Sachsen-Anhalt": true,
"Schleswig-Holstein": true,
"Thüringen": true,
}
case "Oesterreich", "Österreich":
knownStates = map[string]bool{
"Burgenland": true,
"Kärnten": true,
"Niederösterreich": true,
"Oberösterreich": true,
"Salzburg": true,
"Steiermark": true,
"Tirol": true,
"Vorarlberg": true,
"Wien": true,
}
case countrySchweiz:
knownStates = map[string]bool{
"Aargau": true,
"Appenzell Ausserrhoden": true,
"Appenzell Innerrhoden": true,
"Basel-Landschaft": true,
"Basel-Stadt": true,
"Bern": true,
"Freiburg": true,
"Genf": true,
"Glarus": true,
"Graubünden": true,
"Jura": true,
"Luzern": true,
"Neuenburg": true,
"Nidwalden": true,
"Obwalden": true,
"Sankt Gallen": true,
"Schaffhausen": true,
"Schwyz": true,
"Solothurn": true,
"Thurgau": true,
"Tessin": true,
"Uri": true,
"Waadt": true,
"Wallis": true,
"Zug": true,
"Zürich": true,
}
default:
// Unknown country — skip check, pass through.
return true
}
return knownStates[state]
}

View File

@@ -0,0 +1,339 @@
package market
import (
"fmt"
"testing"
"time"
)
// testValFail / testValWarn / testValOK shadow the package constants for
// use in test table literals where the linter requires a named constant.
const (
testValFail = validationFail
testValWarn = validationWarn
testValOK = validationOK
)
func TestValidateField(t *testing.T) {
today := time.Now()
validDate := today.AddDate(0, 2, 0).Format("2006-01-02")
farFuture := today.AddDate(6, 0, 0).Format("2006-01-02")
farPast := today.AddDate(-3, 0, 0).Format("2006-01-02")
baseMarket := Market{
Country: "DE",
}
tests := []struct {
name string
suggestion FieldSuggestion
market Market
wantValidation string
}{
{
name: "valid country Deutschland",
suggestion: FieldSuggestion{
Field: "country",
SuggestedValue: "Deutschland",
},
market: baseMarket,
wantValidation: testValOK,
},
{
name: "valid country Oesterreich",
suggestion: FieldSuggestion{
Field: "country",
SuggestedValue: "Oesterreich",
},
market: baseMarket,
wantValidation: testValOK,
},
{
name: "valid country Österreich",
suggestion: FieldSuggestion{
Field: "country",
SuggestedValue: "Österreich",
},
market: baseMarket,
wantValidation: testValOK,
},
{
name: "invalid country Frankreich",
suggestion: FieldSuggestion{
Field: "country",
SuggestedValue: "Frankreich",
},
market: baseMarket,
wantValidation: testValFail,
},
{
name: "valid DE zip 12345",
suggestion: FieldSuggestion{
Field: "zip",
SuggestedValue: "12345",
},
market: Market{Country: "DE"},
wantValidation: testValOK,
},
{
name: "invalid DE zip 1234 (4 digits)",
suggestion: FieldSuggestion{
Field: "zip",
SuggestedValue: "1234",
},
market: Market{Country: "DE"},
wantValidation: testValFail,
},
{
name: "CH zip 1234 with country hint Schweiz",
suggestion: FieldSuggestion{
Field: "zip",
SuggestedValue: "1234",
},
// market has DE, but suggestions contain Schweiz country hint
// this is tested via market with CH country to simulate the override
market: Market{Country: "CH"},
wantValidation: testValOK,
},
{
name: "valid website https://example.com",
suggestion: FieldSuggestion{
Field: "website",
SuggestedValue: "https://example.com",
},
market: baseMarket,
wantValidation: testValOK,
},
{
name: "website without scheme example.com — warn not fail",
suggestion: FieldSuggestion{
Field: "website",
SuggestedValue: "example.com",
},
market: baseMarket,
wantValidation: testValWarn,
},
{
name: "image_url without scheme — fail",
suggestion: FieldSuggestion{
Field: "image_url",
SuggestedValue: "example.com/img.jpg",
},
market: baseMarket,
wantValidation: testValFail,
},
{
name: "valid start_date",
suggestion: FieldSuggestion{
Field: "start_date",
SuggestedValue: validDate,
},
market: baseMarket,
wantValidation: testValOK,
},
{
name: "unparseable start_date",
suggestion: FieldSuggestion{
Field: "start_date",
SuggestedValue: "not-a-date",
},
market: baseMarket,
wantValidation: testValFail,
},
{
name: "start_date way in the past — warn",
suggestion: FieldSuggestion{
Field: "start_date",
SuggestedValue: farPast,
},
market: baseMarket,
wantValidation: testValWarn,
},
{
name: "start_date far in the future — warn",
suggestion: FieldSuggestion{
Field: "start_date",
SuggestedValue: farFuture,
},
market: baseMarket,
wantValidation: testValWarn,
},
{
name: "valid name",
suggestion: FieldSuggestion{
Field: "name",
SuggestedValue: "Ritter und Burgfest",
},
market: baseMarket,
wantValidation: testValOK,
},
{
name: "empty name — fail",
suggestion: FieldSuggestion{
Field: "name",
SuggestedValue: "",
},
market: baseMarket,
wantValidation: testValFail,
},
{
name: "name too long — fail",
suggestion: FieldSuggestion{
Field: "name",
SuggestedValue: string(make([]byte, 201)),
},
market: baseMarket,
wantValidation: testValFail,
},
{
name: "description too long — fail",
suggestion: FieldSuggestion{
Field: "description",
SuggestedValue: string(make([]byte, 5001)),
},
market: baseMarket,
wantValidation: testValFail,
},
{
name: "unknown field passes through ok",
suggestion: FieldSuggestion{
Field: "some_unknown_field",
SuggestedValue: "anything",
},
market: baseMarket,
wantValidation: testValOK,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Build a ResearchResult with just this suggestion so we can pass nil as the country hints
got, _ := validateField(tt.suggestion, tt.market, nil)
if got != tt.wantValidation {
t.Errorf("validateField(%q) = %q, want %q", tt.suggestion.Field, got, tt.wantValidation)
}
})
}
}
// TestValidateFieldZipWithCountrySuggestion tests the zip validation override when
// a country suggestion is present in the hints map.
func TestValidateFieldZipWithCountrySuggestion(t *testing.T) {
tests := []struct {
name string
zip string
marketCountry string
suggestedCountry string
wantValidation string
}{
{
name: "CH zip with Schweiz suggestion overrides DE market",
zip: "1234",
marketCountry: "DE",
suggestedCountry: "Schweiz",
wantValidation: testValOK,
},
{
name: "DE zip with Deutschland suggestion",
zip: "12345",
marketCountry: "DE",
suggestedCountry: "Deutschland",
wantValidation: testValOK,
},
{
name: "wrong zip for suggested country",
zip: "1234",
marketCountry: "CH",
suggestedCountry: "Deutschland",
wantValidation: testValFail,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
s := FieldSuggestion{
Field: "zip",
SuggestedValue: tt.zip,
}
m := Market{Country: tt.marketCountry}
countryHints := map[string]string{fieldCountry: tt.suggestedCountry}
got, _ := validateField(s, m, countryHints)
if got != tt.wantValidation {
t.Errorf("validateField(zip=%q, market.Country=%q, suggestedCountry=%q) = %q, want %q",
tt.zip, tt.marketCountry, tt.suggestedCountry, got, tt.wantValidation)
}
})
}
}
// TestValidateFieldDateRange tests the date window boundary.
func TestValidateFieldDateRange(t *testing.T) {
today := time.Now()
m := Market{Country: "DE"}
tests := []struct {
name string
date string
wantValidation string
}{
{
// 2 years ago minus 2 days is safely within the window
name: "within 2 years ago — ok",
date: today.AddDate(-2, 0, 2).Format("2006-01-02"),
wantValidation: testValOK,
},
{
name: "just past 2 years — warn",
date: today.AddDate(-2, 0, -1).Format("2006-01-02"),
wantValidation: testValWarn,
},
{
// 5 years in future minus 2 days is safely within the window
name: "within 5 years in future — ok",
date: today.AddDate(5, 0, -2).Format("2006-01-02"),
wantValidation: testValOK,
},
{
name: "just past 5 years — warn",
date: today.AddDate(5, 0, 1).Format("2006-01-02"),
wantValidation: testValWarn,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
s := FieldSuggestion{
Field: "start_date",
SuggestedValue: tt.date,
}
got, _ := validateField(s, m, nil)
if got != tt.wantValidation {
t.Errorf("validateField(start_date=%q) = %q, want %q", tt.date, got, tt.wantValidation)
}
})
}
}
// Ensure the description never returns non-ok on a valid value.
func TestValidateFieldDescription(t *testing.T) {
s := FieldSuggestion{
Field: "description",
SuggestedValue: "Ein wunderschöner Mittelaltermarkt",
}
got, _ := validateField(s, Market{}, nil)
if got != "ok" {
t.Errorf("expected ok for valid description, got %q", got)
}
}
func TestValidateFieldNameTooLong(t *testing.T) {
longName := fmt.Sprintf("%201s", " ")
s := FieldSuggestion{
Field: "name",
SuggestedValue: longName,
}
got, _ := validateField(s, Market{}, nil)
if got != "fail" {
t.Errorf("expected fail for 201-char name, got %q", got)
}
}

View File

@@ -2,6 +2,7 @@ package market
import (
"context"
"encoding/json"
"errors"
"fmt"
"strconv"
@@ -39,6 +40,9 @@ type Repository interface {
// Utilities
SlugExists(ctx context.Context, slug string) (bool, error)
FindSimilar(ctx context.Context, id uuid.UUID, name, city string, startDate, endDate time.Time) ([]DuplicateCandidate, error)
// Merge audit log
InsertMergeLog(ctx context.Context, entry MergeLogEntry) error
}
type pgRepository struct {
@@ -1065,3 +1069,17 @@ func (r *pgRepository) FindSimilar(ctx context.Context, id uuid.UUID, name, city
return results, rows.Err()
}
// InsertMergeLog writes an audit row for a merge operation.
func (r *pgRepository) InsertMergeLog(ctx context.Context, entry MergeLogEntry) error {
planJSON, err := json.Marshal(entry.Plan)
if err != nil {
return fmt.Errorf("marshal plan: %w", err)
}
_, err = r.db.Exec(ctx,
`INSERT INTO market_merge_log (market_id, plan, applied_fields, applied_by, research_sources)
VALUES ($1, $2, $3, $4, $5)`,
entry.MarketID, planJSON, entry.AppliedFields, entry.AppliedBy, entry.ResearchSources,
)
return err
}

View File

@@ -58,6 +58,8 @@ func (h *ResearchHandler) Research(c *gin.Context) {
return
}
bekannteWerte := buildBekannteWerte(m)
out, err := h.orch.Run(ctx, research.Input{
MarktName: m.Name,
Stadt: m.City,
@@ -71,6 +73,7 @@ func (h *ResearchHandler) Research(c *gin.Context) {
}
return time.Now().Year()
}(),
BekannteWerte: bekannteWerte,
})
if err != nil {
var pe *ai.ProviderError
@@ -264,6 +267,33 @@ func toLLMResearchResult(raw llmOutput, m Market) ResearchResult {
}
}
// buildBekannteWerte constructs the map of known field values passed to the research orchestrator.
func buildBekannteWerte(m Market) map[string]string {
bw := map[string]string{}
if m.Zip != "" {
bw["plz"] = m.Zip
}
if m.Street != "" {
bw["strasse"] = m.Street
}
if m.State != "" {
bw["bundesland"] = m.State
}
if m.Country != "" {
bw["land"] = m.Country
}
if m.OrganizerName != "" {
bw["veranstalter"] = m.OrganizerName
}
if m.Website != "" {
bw["website"] = m.Website
}
if len(bw) == 0 {
return nil
}
return bw
}
func imageURLReachable(ctx context.Context, rawURL string) bool {
reqCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()

View File

@@ -128,3 +128,10 @@ nichts selbst, sondern arbeitest ausschliesslich mit den text-Feldern.
- Nur URLs zurueckgeben die in quellen[] vorhanden sind.
- Antwort MUSS dem JSON-Schema entsprechen. Keine zusaetzlichen Felder, keine
Erklaerungen ausserhalb des JSON.
- **bekannte_werte** enthaelt admin-gepflegte Werte fuer den Markt. Verwende
sie zur Triangulation:
- Quellen bestaetigen einen bekannte_werte-Wert: KEIN Vorschlag fuer dieses
Feld emittieren (keine neuen Informationen).
- Quellen widersprechen einem bekannte_werte-Wert: Vorschlag emittieren,
hinweis erklaert die Abweichung (Format: "Aktuell: X. Quellen zeigen: Y.").
- Im Zweifel: aktuellen Wert behalten, kein Vorschlag.

View File

@@ -25,6 +25,9 @@ type Input struct {
// the model knows which year's event to look for even when the market's
// start date is not yet confirmed.
ZielJahr int
// BekannteWerte holds admin-curated scalar values for the market. The LLM
// uses these for triangulation: confirm, contradict, or stay silent.
BekannteWerte map[string]string
}
type Output struct {
@@ -135,13 +138,14 @@ func callLLM(ctx context.Context, p ai.Provider, userPrompt string, schema []byt
}
type userPromptPayload struct {
MarktName string `json:"markt_name"`
Stadt string `json:"stadt"`
StartDatumHint string `json:"start_datum_hinweis"`
WebsiteHint string `json:"website_hinweis"`
RechercheDatum string `json:"recherche_datum"`
ZielJahr int `json:"ziel_jahr"`
Quellen []quellePage `json:"quellen"`
MarktName string `json:"markt_name"`
Stadt string `json:"stadt"`
StartDatumHint string `json:"start_datum_hinweis"`
WebsiteHint string `json:"website_hinweis"`
RechercheDatum string `json:"recherche_datum"`
ZielJahr int `json:"ziel_jahr"`
Quellen []quellePage `json:"quellen"`
BekannteWerte map[string]string `json:"bekannte_werte,omitempty"`
}
type quellePage struct {
@@ -158,6 +162,7 @@ func buildUserPrompt(in Input, pages []Page) (string, error) {
WebsiteHint: in.WebsiteHint,
RechercheDatum: in.RechercheDatum.Format("2006-01-02"),
ZielJahr: in.ZielJahr,
BekannteWerte: in.BekannteWerte,
}
for _, pg := range pages {
p.Quellen = append(p.Quellen, quellePage(pg))

View File

@@ -172,3 +172,59 @@ func TestBuildUserPrompt_ZielJahrZeroFallback(t *testing.T) {
t.Fatal("ziel_jahr key missing from JSON payload even when ZielJahr==0")
}
}
func TestBuildUserPrompt_BekannteWerteIncluded(t *testing.T) {
in := Input{
MarktName: "Testmarkt",
Stadt: "Berlin",
RechercheDatum: time.Date(2026, 6, 1, 0, 0, 0, 0, time.UTC),
ZielJahr: 2026,
BekannteWerte: map[string]string{
"plz": "10115",
"bundesland": "Berlin",
},
}
prompt, err := buildUserPrompt(in, nil)
if err != nil {
t.Fatalf("buildUserPrompt: %v", err)
}
var m map[string]any
if err := json.Unmarshal([]byte(prompt), &m); err != nil {
t.Fatalf("prompt is not valid JSON: %v", err)
}
raw, ok := m["bekannte_werte"]
if !ok {
t.Fatal("bekannte_werte key missing from JSON payload when BekannteWerte is non-empty")
}
bw, ok := raw.(map[string]any)
if !ok {
t.Fatalf("bekannte_werte is not a JSON object, got %T", raw)
}
if bw["plz"] != "10115" {
t.Fatalf("expected plz=10115, got %v", bw["plz"])
}
if bw["bundesland"] != "Berlin" {
t.Fatalf("expected bundesland=Berlin, got %v", bw["bundesland"])
}
}
func TestBuildUserPrompt_BekannteWerteOmittedWhenEmpty(t *testing.T) {
in := Input{
MarktName: "Testmarkt",
Stadt: "Berlin",
RechercheDatum: time.Date(2026, 6, 1, 0, 0, 0, 0, time.UTC),
ZielJahr: 2026,
BekannteWerte: nil,
}
prompt, err := buildUserPrompt(in, nil)
if err != nil {
t.Fatalf("buildUserPrompt: %v", err)
}
var m map[string]any
if err := json.Unmarshal([]byte(prompt), &m); err != nil {
t.Fatalf("prompt is not valid JSON: %v", err)
}
if _, ok := m["bekannte_werte"]; ok {
t.Fatal("bekannte_werte key must be absent when BekannteWerte is nil (omitempty)")
}
}

View File

@@ -27,6 +27,8 @@ func RegisterAdminRoutes(rg *gin.RouterGroup, h *AdminHandler, rh *ResearchHandl
markets.DELETE("/:id", h.Delete)
markets.PATCH("/:id/status", h.UpdateStatus)
markets.POST("/:id/research", rh.Research)
markets.POST("/:id/research/plan", rh.Plan)
markets.POST("/:id/research/apply", rh.Apply)
markets.GET("/:id/duplicates", h.FindDuplicates)
}

View File

@@ -325,6 +325,11 @@ func (s *Service) CreateEditionForSeries(ctx context.Context, seriesID uuid.UUID
return created, nil
}
// InsertMergeLog writes an audit log entry for a merge operation.
func (s *Service) InsertMergeLog(ctx context.Context, entry MergeLogEntry) error {
return s.repo.InsertMergeLog(ctx, entry)
}
func orDefault(val, fallback string) string {
if val != "" {
return val

View File

@@ -0,0 +1 @@
DROP TABLE IF EXISTS market_merge_log;

View File

@@ -0,0 +1,10 @@
CREATE TABLE market_merge_log (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
market_id UUID NOT NULL REFERENCES market_editions(id) ON DELETE CASCADE,
plan JSONB NOT NULL,
applied_fields TEXT[] NOT NULL,
applied_by UUID NOT NULL,
applied_at TIMESTAMPTZ NOT NULL DEFAULT now(),
research_sources TEXT[] NOT NULL
);
CREATE INDEX market_merge_log_market_idx ON market_merge_log (market_id, applied_at DESC);