fix(dedup): wire merge advisor JSON schema + flexible field_merges parser
Gemini returned field_merges as an array without structure constraint, causing json.Unmarshal to fail with "cannot unmarshal array into Go struct field of type map[string]mergeFieldDecision". - Pass merge_advisor_schema.json via JSONSchema instead of bare JSONMode - Add parseFieldMerges() that accepts both object and array LLM formats - Validate target_id is one of the two input market IDs after parsing - Fix schemaFromMap: minimum/maximum are supported by genai.Schema v1.54
This commit is contained in:
@@ -17,6 +17,9 @@ import (
|
||||
//go:embed assets/merge_advisor_prompt.de.md
|
||||
var mergeAdvisorPromptDE string
|
||||
|
||||
//go:embed assets/merge_advisor_schema.json
|
||||
var mergeAdvisorSchemaJSON []byte
|
||||
|
||||
// ErrNotDuplicate is returned by MergeAdvisor.Propose when the similarity
|
||||
// verdict confidently says the two markets are not the same.
|
||||
var ErrNotDuplicate = errors.New("markets are not duplicates")
|
||||
@@ -41,12 +44,12 @@ type mergeFieldDecision struct {
|
||||
|
||||
// mergeAdvisorResponse is the raw JSON shape the LLM returns.
|
||||
type mergeAdvisorResponse struct {
|
||||
TargetID string `json:"target_id"`
|
||||
TargetReason string `json:"target_reason"`
|
||||
FieldMerges map[string]mergeFieldDecision `json:"field_merges"`
|
||||
Flags []string `json:"flags"`
|
||||
Confidence float64 `json:"confidence"`
|
||||
Summary string `json:"summary"`
|
||||
TargetID string `json:"target_id"`
|
||||
TargetReason string `json:"target_reason"`
|
||||
FieldMerges json.RawMessage `json:"field_merges"`
|
||||
Flags []string `json:"flags"`
|
||||
Confidence float64 `json:"confidence"`
|
||||
Summary string `json:"summary"`
|
||||
}
|
||||
|
||||
// MergeAdvisor calls the LLM to propose how to merge two market editions.
|
||||
@@ -74,7 +77,7 @@ func (a *MergeAdvisor) Propose(ctx context.Context, mA, mB Market, verdict enric
|
||||
resp, err := a.ai.Chat(ctx, &ai.ChatRequest{
|
||||
SystemPrompt: mergeAdvisorPromptDE,
|
||||
UserMessage: userMsg,
|
||||
JSONMode: true,
|
||||
JSONSchema: json.RawMessage(mergeAdvisorSchemaJSON),
|
||||
Grounded: false,
|
||||
Temperature: 0.1,
|
||||
CallType: "merge_advisor",
|
||||
@@ -93,6 +96,14 @@ func (a *MergeAdvisor) Propose(ctx context.Context, mA, mB Market, verdict enric
|
||||
if err != nil {
|
||||
return MarketMergeProposal{}, fmt.Errorf("invalid target_id %q: %w", raw.TargetID, err)
|
||||
}
|
||||
if targetID != mA.ID && targetID != mB.ID {
|
||||
return MarketMergeProposal{}, fmt.Errorf("target_id %q is not one of the two input markets", raw.TargetID)
|
||||
}
|
||||
|
||||
fieldMerges, err := parseFieldMerges(raw.FieldMerges)
|
||||
if err != nil {
|
||||
return MarketMergeProposal{}, fmt.Errorf("parse field_merges: %w", err)
|
||||
}
|
||||
|
||||
conf := raw.Confidence
|
||||
if conf < 0 {
|
||||
@@ -110,7 +121,7 @@ func (a *MergeAdvisor) Propose(ctx context.Context, mA, mB Market, verdict enric
|
||||
return MarketMergeProposal{
|
||||
TargetID: targetID,
|
||||
TargetReason: raw.TargetReason,
|
||||
FieldMerges: raw.FieldMerges,
|
||||
FieldMerges: fieldMerges,
|
||||
Flags: flags,
|
||||
Confidence: conf,
|
||||
Summary: raw.Summary,
|
||||
@@ -118,6 +129,40 @@ func (a *MergeAdvisor) Propose(ctx context.Context, mA, mB Market, verdict enric
|
||||
}, nil
|
||||
}
|
||||
|
||||
// parseFieldMerges handles two LLM output formats for field_merges:
|
||||
// - object: {"name": {"source":"a","value":"...","reason":"..."}, ...}
|
||||
// - array: [{"field":"name","source":"a","value":"...","reason":"..."}, ...]
|
||||
func parseFieldMerges(raw json.RawMessage) (map[string]mergeFieldDecision, error) {
|
||||
if len(raw) == 0 {
|
||||
return map[string]mergeFieldDecision{}, nil
|
||||
}
|
||||
var m map[string]mergeFieldDecision
|
||||
if err := json.Unmarshal(raw, &m); err == nil {
|
||||
return m, nil
|
||||
}
|
||||
var arr []struct {
|
||||
Field string `json:"field"`
|
||||
Source string `json:"source"`
|
||||
Value json.RawMessage `json:"value"`
|
||||
Reason string `json:"reason"`
|
||||
}
|
||||
if err := json.Unmarshal(raw, &arr); err != nil {
|
||||
return nil, fmt.Errorf("cannot parse as object or array: %w", err)
|
||||
}
|
||||
m = make(map[string]mergeFieldDecision, len(arr))
|
||||
for _, e := range arr {
|
||||
if e.Field == "" {
|
||||
continue
|
||||
}
|
||||
var val any
|
||||
if len(e.Value) > 0 {
|
||||
_ = json.Unmarshal(e.Value, &val)
|
||||
}
|
||||
m[e.Field] = mergeFieldDecision{Source: e.Source, Value: val, Reason: e.Reason}
|
||||
}
|
||||
return m, nil
|
||||
}
|
||||
|
||||
type advisorMarketInput struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
|
||||
@@ -29,15 +29,16 @@ func (p *stubAIProvider) Chat(_ context.Context, _ *ai.ChatRequest) (*ai.ChatRes
|
||||
}
|
||||
|
||||
func validAdvisorResponse(targetID string) string {
|
||||
fm, _ := json.Marshal(map[string]mergeFieldDecision{
|
||||
"name": {Source: "a", Value: "Testmarkt Berlin", Reason: "established name"},
|
||||
})
|
||||
resp := mergeAdvisorResponse{
|
||||
TargetID: targetID,
|
||||
TargetReason: "Market A has more complete data",
|
||||
FieldMerges: map[string]mergeFieldDecision{
|
||||
"name": {Source: "a", Value: "Testmarkt Berlin", Reason: "established name"},
|
||||
},
|
||||
Flags: []string{},
|
||||
Confidence: 0.9,
|
||||
Summary: "Beide Eintraege bezeichnen denselben Markt. Daten von A werden uebernommen.",
|
||||
FieldMerges: json.RawMessage(fm),
|
||||
Flags: []string{},
|
||||
Confidence: 0.9,
|
||||
Summary: "Beide Eintraege bezeichnen denselben Markt. Daten von A werden uebernommen.",
|
||||
}
|
||||
b, _ := json.Marshal(resp)
|
||||
return string(b)
|
||||
@@ -118,6 +119,38 @@ func TestMergeAdvisor_ProviderErrorPropagates(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestMergeAdvisor_ParsesArrayFieldMerges(t *testing.T) {
|
||||
a := Market{Name: "Markt A", City: "Berlin"}
|
||||
b := Market{Name: "Markt B", City: "Berlin"}
|
||||
// LLM returns field_merges as array (Gemini's unguided default format)
|
||||
response := `{
|
||||
"target_id": "` + a.ID.String() + `",
|
||||
"target_reason": "More complete",
|
||||
"field_merges": [
|
||||
{"field": "name", "source": "a", "value": "Testmarkt", "reason": "shorter"},
|
||||
{"field": "stadt", "source": "b", "value": "Hamburg", "reason": "correct"}
|
||||
],
|
||||
"flags": [],
|
||||
"confidence": 0.85,
|
||||
"summary": "Same market."
|
||||
}`
|
||||
p := &stubAIProvider{response: response}
|
||||
v := enrich.Verdict{Same: true, Confidence: 0.9}
|
||||
|
||||
advisor := NewMergeAdvisor(p)
|
||||
proposal, err := advisor.Propose(context.Background(), a, b, v)
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if len(proposal.FieldMerges) != 2 {
|
||||
t.Errorf("expected 2 field merges, got %d", len(proposal.FieldMerges))
|
||||
}
|
||||
if proposal.FieldMerges["name"].Source != "a" {
|
||||
t.Errorf("expected name.source=a, got %q", proposal.FieldMerges["name"].Source)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMergeAdvisor_InvalidJSONErrors(t *testing.T) {
|
||||
p := &stubAIProvider{response: "not json at all"}
|
||||
a := Market{Name: "Markt A"}
|
||||
|
||||
@@ -346,7 +346,7 @@ func promptHashShort(system, user string) string {
|
||||
func schemaFromMap(m map[string]any) *genai.Schema {
|
||||
// Keys that genai.Schema does not support — log a warning so the workaround
|
||||
// (researcher_schema_simple.json) stays visible rather than silently disappearing.
|
||||
unsupported := []string{"pattern", "minLength", "maxLength", "minimum", "maximum",
|
||||
unsupported := []string{"pattern", "minLength", "maxLength",
|
||||
"additionalProperties", "$ref", "$defs", "if", "then", "else"}
|
||||
var dropped []string
|
||||
for _, k := range unsupported {
|
||||
@@ -398,5 +398,11 @@ func schemaFromMap(m map[string]any) *genai.Schema {
|
||||
}
|
||||
}
|
||||
}
|
||||
if minVal, ok := m["minimum"].(float64); ok {
|
||||
s.Minimum = &minVal
|
||||
}
|
||||
if maxVal, ok := m["maximum"].(float64); ok {
|
||||
s.Maximum = &maxVal
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user