feat(discovery/crawler): marktkalendarium.de parser
This commit is contained in:
@@ -3,6 +3,7 @@ module marktvogt.de/backend
|
||||
go 1.26
|
||||
|
||||
require (
|
||||
github.com/PuerkitoBio/goquery v1.12.0
|
||||
github.com/VikingOwl91/mistral-go-sdk v1.3.0
|
||||
github.com/gin-gonic/gin v1.11.0
|
||||
github.com/go-playground/validator/v10 v10.30.1
|
||||
@@ -11,12 +12,13 @@ require (
|
||||
github.com/jackc/pgx/v5 v5.8.0
|
||||
github.com/pquerna/otp v1.5.0
|
||||
github.com/valkey-io/valkey-go v1.0.72
|
||||
golang.org/x/crypto v0.48.0
|
||||
golang.org/x/crypto v0.49.0
|
||||
golang.org/x/oauth2 v0.35.0
|
||||
golang.org/x/time v0.14.0
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/andybalholm/cascadia v1.3.3 // indirect
|
||||
github.com/boombuler/barcode v1.0.1-0.20190219062509-6c824513bacc // indirect
|
||||
github.com/bytedance/sonic v1.14.0 // indirect
|
||||
github.com/bytedance/sonic/loader v0.3.0 // indirect
|
||||
@@ -42,9 +44,9 @@ require (
|
||||
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
|
||||
github.com/ugorji/go/codec v1.3.0 // indirect
|
||||
golang.org/x/arch v0.20.0 // indirect
|
||||
golang.org/x/net v0.49.0 // indirect
|
||||
golang.org/x/sync v0.19.0 // indirect
|
||||
golang.org/x/sys v0.41.0 // indirect
|
||||
golang.org/x/text v0.34.0 // indirect
|
||||
golang.org/x/net v0.52.0 // indirect
|
||||
golang.org/x/sync v0.20.0 // indirect
|
||||
golang.org/x/sys v0.42.0 // indirect
|
||||
golang.org/x/text v0.35.0 // indirect
|
||||
google.golang.org/protobuf v1.36.9 // indirect
|
||||
)
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
github.com/PuerkitoBio/goquery v1.12.0 h1:pAcL4g3WRXekcB9AU/y1mbKez2dbY2AajVhtkO8RIBo=
|
||||
github.com/PuerkitoBio/goquery v1.12.0/go.mod h1:802ej+gV2y7bbIhOIoPY5sT183ZW0YFofScC4q/hIpQ=
|
||||
github.com/VikingOwl91/mistral-go-sdk v1.3.0 h1:OkTsodDE5lmdf7p2cwScqD2vIk8sScQ2IGk65dUjuz0=
|
||||
github.com/VikingOwl91/mistral-go-sdk v1.3.0/go.mod h1:f4emNtHUx2zSqY3V0LBz6lNI1jE6q/zh+SEU+/hJ0i4=
|
||||
github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM=
|
||||
github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA=
|
||||
github.com/boombuler/barcode v1.0.1-0.20190219062509-6c824513bacc h1:biVzkmvwrH8WK8raXaxBx6fRVTlJILwEwQGL1I/ByEI=
|
||||
github.com/boombuler/barcode v1.0.1-0.20190219062509-6c824513bacc/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8=
|
||||
github.com/bytedance/sonic v1.14.0 h1:/OfKt8HFw0kh2rj8N0F6C/qPGRESq0BbaNZgcNXXzQQ=
|
||||
@@ -31,6 +35,7 @@ github.com/goccy/go-yaml v1.18.0 h1:8W7wMFS12Pcas7KU+VVkaiCng+kG8QiFeFwzFb+rwuw=
|
||||
github.com/goccy/go-yaml v1.18.0/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA=
|
||||
github.com/golang-jwt/jwt/v5 v5.3.1 h1:kYf81DTWFe7t+1VvL7eS+jKFVWaUnK9cB1qbwn63YCY=
|
||||
github.com/golang-jwt/jwt/v5 v5.3.1/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE=
|
||||
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
||||
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
|
||||
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
||||
@@ -84,27 +89,91 @@ github.com/ugorji/go/codec v1.3.0 h1:Qd2W2sQawAfG8XSvzwhBeoGq71zXOC/Q1E9y/wUcsUA
|
||||
github.com/ugorji/go/codec v1.3.0/go.mod h1:pRBVtBSKl77K30Bv8R2P+cLSGaTtex6fsA2Wjqmfxj4=
|
||||
github.com/valkey-io/valkey-go v1.0.72 h1:iRWt1hJyOchcEgbHSkRY3aKkcBudxvMaVMsmxuYxuxE=
|
||||
github.com/valkey-io/valkey-go v1.0.72/go.mod h1:VGhZ6fs68Qrn2+OhH+6waZH27bjpgQOiLyUQyXuYK5k=
|
||||
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
|
||||
go.uber.org/mock v0.5.2 h1:LbtPTcP8A5k9WPXj54PPPbjcI4Y6lhyOZXn+VS7wNko=
|
||||
go.uber.org/mock v0.5.2/go.mod h1:wLlUxC2vVTPTaE3UD51E0BGOAElKrILxhVSDYQLld5o=
|
||||
go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
|
||||
go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
|
||||
golang.org/x/arch v0.20.0 h1:dx1zTU0MAE98U+TQ8BLl7XsJbgze2WnNKF/8tGp/Q6c=
|
||||
golang.org/x/arch v0.20.0/go.mod h1:bdwinDaKcfZUGpH09BB7ZmOfhalA8lQdzl62l8gGWsk=
|
||||
golang.org/x/crypto v0.48.0 h1:/VRzVqiRSggnhY7gNRxPauEQ5Drw9haKdM0jqfcCFts=
|
||||
golang.org/x/crypto v0.48.0/go.mod h1:r0kV5h3qnFPlQnBSrULhlsRfryS2pmewsg+XfMgkVos=
|
||||
golang.org/x/net v0.49.0 h1:eeHFmOGUTtaaPSGNmjBKpbng9MulQsJURQUAfUwY++o=
|
||||
golang.org/x/net v0.49.0/go.mod h1:/ysNB2EvaqvesRkuLAyjI1ycPZlQHM3q01F02UY/MV8=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
|
||||
golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc=
|
||||
golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
|
||||
golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
|
||||
golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
|
||||
golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4=
|
||||
golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA=
|
||||
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
|
||||
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
|
||||
golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
|
||||
golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
||||
golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
||||
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
|
||||
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
|
||||
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
|
||||
golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk=
|
||||
golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
|
||||
golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
|
||||
golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
|
||||
golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0=
|
||||
golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw=
|
||||
golang.org/x/oauth2 v0.35.0 h1:Mv2mzuHuZuY2+bkyWXIHMfhNdJAdwW3FuWeCPYN5GVQ=
|
||||
golang.org/x/oauth2 v0.35.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
|
||||
golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4=
|
||||
golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
|
||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
|
||||
golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||
golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||
golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||
golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4=
|
||||
golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.41.0 h1:Ivj+2Cp/ylzLiEU89QhWblYnOE9zerudt9Ftecq2C6k=
|
||||
golang.org/x/sys v0.41.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
|
||||
golang.org/x/text v0.34.0 h1:oL/Qq0Kdaqxa1KbNeMKwQq0reLCCaFtqu2eNuSeNHbk=
|
||||
golang.org/x/text v0.34.0/go.mod h1:homfLqTYRFyVYemLBFl5GgL/DWEiH5wcsQ5gSh1yziA=
|
||||
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
|
||||
golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
|
||||
golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
||||
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
|
||||
golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
|
||||
golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU=
|
||||
golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
|
||||
golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
|
||||
golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
||||
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
|
||||
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
|
||||
golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
|
||||
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
|
||||
golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
|
||||
golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
|
||||
golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8=
|
||||
golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA=
|
||||
golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI=
|
||||
golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
|
||||
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
|
||||
golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58=
|
||||
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
|
||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
google.golang.org/protobuf v1.36.9 h1:w2gp2mA27hUeUzj9Ex9FBjsBm40zfaDtEWow293U7Iw=
|
||||
google.golang.org/protobuf v1.36.9/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
|
||||
157
backend/internal/domain/discovery/crawler/marktkalendarium.go
Normal file
157
backend/internal/domain/discovery/crawler/marktkalendarium.go
Normal file
@@ -0,0 +1,157 @@
|
||||
package crawler
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
const (
|
||||
landDeutschland = "Deutschland"
|
||||
landOesterreich = "Oesterreich"
|
||||
landSchweiz = "Schweiz"
|
||||
)
|
||||
|
||||
// MarktkalendariumSource scrapes www.marktkalendarium.de. Table rows:
|
||||
// Von | Bis | Veranstaltung | Ort | Platz | Webseite | Veranstalter
|
||||
type MarktkalendariumSource struct {
|
||||
fetcher *Fetcher
|
||||
urls []string
|
||||
}
|
||||
|
||||
func NewMarktkalendarium(f *Fetcher, urls []string) *MarktkalendariumSource {
|
||||
return &MarktkalendariumSource{fetcher: f, urls: urls}
|
||||
}
|
||||
|
||||
func (s *MarktkalendariumSource) Name() string { return "marktkalendarium" }
|
||||
|
||||
func (s *MarktkalendariumSource) Fetch(ctx context.Context) ([]RawEvent, error) {
|
||||
var all []RawEvent
|
||||
for i, url := range s.urls {
|
||||
if i > 0 {
|
||||
if err := sleepCtx(ctx, 2*time.Second); err != nil {
|
||||
return all, err
|
||||
}
|
||||
}
|
||||
body, err := s.fetcher.Get(ctx, url, "")
|
||||
if err != nil {
|
||||
return all, fmt.Errorf("marktkalendarium %s: %w", url, err)
|
||||
}
|
||||
events, err := parseMarktkalendarium(body, url)
|
||||
if err != nil {
|
||||
return all, fmt.Errorf("marktkalendarium parse %s: %w", url, err)
|
||||
}
|
||||
all = append(all, events...)
|
||||
}
|
||||
return all, nil
|
||||
}
|
||||
|
||||
func parseMarktkalendarium(data []byte, sourceURL string) ([]RawEvent, error) {
|
||||
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(data))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var events []RawEvent
|
||||
doc.Find("table tr").Each(func(_ int, tr *goquery.Selection) {
|
||||
cells := tr.Find("td")
|
||||
if cells.Length() < 4 {
|
||||
return // header row or layout
|
||||
}
|
||||
von := strings.TrimSpace(cells.Eq(0).Text())
|
||||
bis := strings.TrimSpace(cells.Eq(1).Text())
|
||||
name := strings.TrimSpace(cells.Eq(2).Text())
|
||||
ort := strings.TrimSpace(cells.Eq(3).Text())
|
||||
platz := ""
|
||||
website := ""
|
||||
organizer := ""
|
||||
if cells.Length() >= 5 {
|
||||
platz = strings.TrimSpace(cells.Eq(4).Text())
|
||||
}
|
||||
if cells.Length() >= 6 {
|
||||
// First anchor href, not the text — text may be "[Facebook link]".
|
||||
href, _ := cells.Eq(5).Find("a").First().Attr("href")
|
||||
website = strings.TrimSpace(href)
|
||||
if website == "" {
|
||||
website = strings.TrimSpace(cells.Eq(5).Text())
|
||||
}
|
||||
}
|
||||
if cells.Length() >= 7 {
|
||||
organizer = strings.TrimSpace(cells.Eq(6).Text())
|
||||
}
|
||||
|
||||
if name == "" || von == "" {
|
||||
return
|
||||
}
|
||||
|
||||
start := parseDEDate(von)
|
||||
end := parseDEDate(bis)
|
||||
if start == nil {
|
||||
return
|
||||
}
|
||||
|
||||
land, plz, city := splitMarktkalendariumOrt(ort)
|
||||
|
||||
events = append(events, RawEvent{
|
||||
SourceName: "marktkalendarium",
|
||||
SourceURL: sourceURL,
|
||||
Name: name,
|
||||
City: city,
|
||||
PLZ: plz,
|
||||
Land: land,
|
||||
StartDate: start,
|
||||
EndDate: end,
|
||||
Website: website,
|
||||
Venue: platz,
|
||||
Organizer: organizer,
|
||||
})
|
||||
})
|
||||
return events, nil
|
||||
}
|
||||
|
||||
// splitMarktkalendariumOrt parses "D-49186 Bad Iburg" into (land, PLZ, city).
|
||||
// Returns ("", "", raw) when the prefix or PLZ doesn't match the expected shape.
|
||||
func splitMarktkalendariumOrt(ort string) (land, plz, city string) {
|
||||
idx := strings.Index(ort, "-")
|
||||
if idx < 1 || idx > 3 {
|
||||
return "", "", ort
|
||||
}
|
||||
prefix := ort[:idx]
|
||||
rest := ort[idx+1:]
|
||||
|
||||
switch prefix {
|
||||
case "D":
|
||||
land = landDeutschland
|
||||
case "A":
|
||||
land = landOesterreich
|
||||
case "CH":
|
||||
land = landSchweiz
|
||||
default:
|
||||
return "", "", ort
|
||||
}
|
||||
|
||||
sp := strings.IndexByte(rest, ' ')
|
||||
if sp < 0 {
|
||||
return land, "", strings.TrimSpace(rest)
|
||||
}
|
||||
plz = strings.TrimSpace(rest[:sp])
|
||||
city = strings.TrimSpace(rest[sp+1:])
|
||||
return land, plz, city
|
||||
}
|
||||
|
||||
// parseDEDate parses "3.4.2026" and returns nil on any failure.
|
||||
func parseDEDate(s string) *time.Time {
|
||||
s = strings.TrimSpace(s)
|
||||
if s == "" {
|
||||
return nil
|
||||
}
|
||||
for _, layout := range []string{"2.1.2006", "02.01.2006"} {
|
||||
if t, err := time.Parse(layout, s); err == nil {
|
||||
return &t
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
package crawler
|
||||
|
||||
import (
|
||||
"os"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestMarktkalendariumParse(t *testing.T) {
|
||||
data, err := os.ReadFile("testdata/marktkalendarium.html")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
events, err := parseMarktkalendarium(data, "https://www.marktkalendarium.de/maerkte2026.php")
|
||||
if err != nil {
|
||||
t.Fatalf("parse: %v", err)
|
||||
}
|
||||
if len(events) < 10 {
|
||||
t.Fatalf("got %d events; expected at least 10", len(events))
|
||||
}
|
||||
|
||||
// Basic shape checks on the first event — tighten once fixture is stable.
|
||||
e := events[0]
|
||||
if e.SourceName != "marktkalendarium" {
|
||||
t.Errorf("SourceName = %q; want marktkalendarium", e.SourceName)
|
||||
}
|
||||
if e.Name == "" {
|
||||
t.Error("Name empty")
|
||||
}
|
||||
if e.City == "" {
|
||||
t.Error("City empty")
|
||||
}
|
||||
if e.Land != "Deutschland" && e.Land != "Oesterreich" && e.Land != "Schweiz" {
|
||||
t.Errorf("Land = %q; want DACH country", e.Land)
|
||||
}
|
||||
if e.StartDate == nil {
|
||||
t.Error("StartDate nil")
|
||||
}
|
||||
if e.SourceURL != "https://www.marktkalendarium.de/maerkte2026.php" {
|
||||
t.Errorf("SourceURL = %q", e.SourceURL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseMarktkalendariumOrtField(t *testing.T) {
|
||||
tests := []struct {
|
||||
in string
|
||||
wantLand, wantPLZ, wantCity string
|
||||
}{
|
||||
{"D-49186 Bad Iburg", "Deutschland", "49186", "Bad Iburg"},
|
||||
{"A-1010 Wien", "Oesterreich", "1010", "Wien"},
|
||||
{"CH-8001 Zuerich", "Schweiz", "8001", "Zuerich"},
|
||||
{"D-94152 Neuhaus am Inn", "Deutschland", "94152", "Neuhaus am Inn"},
|
||||
{"garbage", "", "", "garbage"},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.in, func(t *testing.T) {
|
||||
land, plz, city := splitMarktkalendariumOrt(tc.in)
|
||||
if land != tc.wantLand || plz != tc.wantPLZ || city != tc.wantCity {
|
||||
t.Errorf("splitMarktkalendariumOrt(%q) = (%q, %q, %q); want (%q, %q, %q)",
|
||||
tc.in, land, plz, city, tc.wantLand, tc.wantPLZ, tc.wantCity)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -17,7 +17,7 @@ func InferLand(plz string) string {
|
||||
if !isAllDigits(plz) {
|
||||
return ""
|
||||
}
|
||||
return "Deutschland"
|
||||
return landDeutschland
|
||||
case 4:
|
||||
if !isAllDigits(plz) {
|
||||
return ""
|
||||
@@ -25,13 +25,13 @@ func InferLand(plz string) string {
|
||||
switch plz[0] {
|
||||
case '1':
|
||||
if plz >= "1200" && plz <= "1299" {
|
||||
return "Schweiz"
|
||||
return landSchweiz
|
||||
}
|
||||
return "Oesterreich"
|
||||
return landOesterreich
|
||||
case '3', '4', '6', '8', '9':
|
||||
return "Schweiz"
|
||||
return landSchweiz
|
||||
default:
|
||||
return "Oesterreich"
|
||||
return landOesterreich
|
||||
}
|
||||
}
|
||||
return ""
|
||||
|
||||
Reference in New Issue
Block a user