Skip to content

Commit

Permalink
refactor: or-1972 extract methods for duplicates
Browse files Browse the repository at this point in the history
  • Loading branch information
koenmetsu committed Nov 16, 2023
1 parent 218712a commit e6aac38
Showing 1 changed file with 57 additions and 60 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
using DuplicateVerenigingDetection;
using Nest;
using Schema.Search;
using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Linq;
using System.Linq.Expressions;
using System.Threading.Tasks;
using Vereniging;

Expand All @@ -24,74 +26,69 @@ public async Task<IReadOnlyCollection<DuplicaatVereniging>> GetDuplicates(Vereni
var postcodes = locatiesMetAdres.Select(l => l.Adres!.Postcode).ToArray();
var gemeentes = locatiesMetAdres.Select(l => l.Adres!.Gemeente).ToArray();

var propereNaam = naam
.ToString()
.Trim()
.Normalize();

var searchResponse =
await _client
.SearchAsync<DuplicateDetectionDocument>(
s => s
.Query(
q => q.Bool(
b => b.Must(must => must
.Match(m => m
.Field(f => f.Naam)
.Query(naam)
.Analyzer(DuplicateDetectionDocumentMapping
.DuplicateAnalyzer)
.Fuzziness(Fuzziness.Auto) // Assumes this analyzer applies lowercase and asciifolding
.MinimumShouldMatch("90%") // You can adjust this percentage as needed
))
.Filter(f => f
.Bool(fb => fb
.Should( // Use should within a filter context for municipalities and postal codes
gemeentesQuery => gemeentesQuery
.Nested(n => n
.Path(p => p.Locaties)
.Query(nq => nq
.Match(m => m
.Field(f => f.Locaties
.First()
.Gemeente)
.Query(
string.Join(
separator: " ",
gemeentes))
.Fuzziness(
Fuzziness.Auto)
.Analyzer(
DuplicateDetectionDocumentMapping
.DuplicateAnalyzer)
)
)
),
postalCodesQuery => postalCodesQuery
.Nested(n => n
.Path(p => p.Locaties)
.Query(nq => nq
.Terms(t => t
.Field(f => f.Locaties
.First()
.Postcode)
.Terms(postcodes)
)
)
)
)
.MinimumShouldMatch(
1) // At least one of the location conditions must match
)
)
)
)
);
s => s.Query(
q => q.Bool(
b => b.Must(must => must.Match(m => FuzzyMatchOpNaam(m, f => f.Naam, naam)))
.Filter(f => f.Bool(
fb => fb.Should(MatchGemeente(gemeentes),
MatchPostcode(postcodes))
.MinimumShouldMatch(1))))));

return searchResponse.Documents.Select(ToDuplicateVereniging)
.ToArray();
}

private static Func<QueryContainerDescriptor<DuplicateDetectionDocument>, QueryContainer> MatchPostcode(string[] postcodes)
{
return postalCodesQuery => postalCodesQuery
.Nested(n => n
.Path(p => p.Locaties)
.Query(nq => nq
.Terms(t => t
.Field(f => f.Locaties
.First()
.Postcode)
.Terms(postcodes)
)
)
);
}

private static Func<QueryContainerDescriptor<DuplicateDetectionDocument>, QueryContainer> MatchGemeente(string[] gemeentes)
{
return gemeentesQuery => gemeentesQuery
.Nested(n => n
.Path(p => p.Locaties)
.Query(nq => nq
.Match(m =>
FuzzyMatchOpNaam(m,
f => f.Locaties
.First()
.Gemeente, string.Join(
separator: " ",
gemeentes))
)
)
);
}

private static MatchQueryDescriptor<DuplicateDetectionDocument> FuzzyMatchOpNaam(
MatchQueryDescriptor<DuplicateDetectionDocument> m,
Expression<Func<DuplicateDetectionDocument, string>> path,
string query)
{
return m
.Field(path)
.Query(query)
.Analyzer(DuplicateDetectionDocumentMapping
.DuplicateAnalyzer)
.Fuzziness(Fuzziness.Auto) // Assumes this analyzer applies lowercase and asciifolding
.MinimumShouldMatch("90%");
}

private static DuplicaatVereniging ToDuplicateVereniging(DuplicateDetectionDocument document)
=> new(
document.VCode,
Expand Down

0 comments on commit e6aac38

Please sign in to comment.