From e6aac38a1ab88c3bcdbc64724eb386e22a3f1c45 Mon Sep 17 00:00:00 2001 From: Koen Metsu Date: Thu, 16 Nov 2023 16:00:09 +0100 Subject: [PATCH] refactor: or-1972 extract methods for duplicates --- ...archDuplicateVerenigingDetectionService.cs | 117 +++++++++--------- 1 file changed, 57 insertions(+), 60 deletions(-) diff --git a/src/AssociationRegistry.Admin.Api/DuplicateDetection/SearchDuplicateVerenigingDetectionService.cs b/src/AssociationRegistry.Admin.Api/DuplicateDetection/SearchDuplicateVerenigingDetectionService.cs index 164c73ca5..d3dad2529 100644 --- a/src/AssociationRegistry.Admin.Api/DuplicateDetection/SearchDuplicateVerenigingDetectionService.cs +++ b/src/AssociationRegistry.Admin.Api/DuplicateDetection/SearchDuplicateVerenigingDetectionService.cs @@ -3,9 +3,11 @@ using DuplicateVerenigingDetection; using Nest; using Schema.Search; +using System; using System.Collections.Generic; using System.Collections.Immutable; using System.Linq; +using System.Linq.Expressions; using System.Threading.Tasks; using Vereniging; @@ -24,74 +26,69 @@ public async Task> GetDuplicates(Vereni var postcodes = locatiesMetAdres.Select(l => l.Adres!.Postcode).ToArray(); var gemeentes = locatiesMetAdres.Select(l => l.Adres!.Gemeente).ToArray(); - var propereNaam = naam - .ToString() - .Trim() - .Normalize(); - var searchResponse = await _client .SearchAsync( - s => s - .Query( - q => q.Bool( - b => b.Must(must => must - .Match(m => m - .Field(f => f.Naam) - .Query(naam) - .Analyzer(DuplicateDetectionDocumentMapping - .DuplicateAnalyzer) - .Fuzziness(Fuzziness.Auto) // Assumes this analyzer applies lowercase and asciifolding - .MinimumShouldMatch("90%") // You can adjust this percentage as needed - )) - .Filter(f => f - .Bool(fb => fb - .Should( // Use should within a filter context for municipalities and postal codes - gemeentesQuery => gemeentesQuery - .Nested(n => n - .Path(p => p.Locaties) - .Query(nq => nq - .Match(m => m - .Field(f => f.Locaties - .First() - .Gemeente) - .Query( - string.Join( - separator: " ", - gemeentes)) - .Fuzziness( - Fuzziness.Auto) - .Analyzer( - DuplicateDetectionDocumentMapping - .DuplicateAnalyzer) - ) - ) - ), - postalCodesQuery => postalCodesQuery - .Nested(n => n - .Path(p => p.Locaties) - .Query(nq => nq - .Terms(t => t - .Field(f => f.Locaties - .First() - .Postcode) - .Terms(postcodes) - ) - ) - ) - ) - .MinimumShouldMatch( - 1) // At least one of the location conditions must match - ) - ) - ) - ) - ); + s => s.Query( + q => q.Bool( + b => b.Must(must => must.Match(m => FuzzyMatchOpNaam(m, f => f.Naam, naam))) + .Filter(f => f.Bool( + fb => fb.Should(MatchGemeente(gemeentes), + MatchPostcode(postcodes)) + .MinimumShouldMatch(1)))))); return searchResponse.Documents.Select(ToDuplicateVereniging) .ToArray(); } + private static Func, QueryContainer> MatchPostcode(string[] postcodes) + { + return postalCodesQuery => postalCodesQuery + .Nested(n => n + .Path(p => p.Locaties) + .Query(nq => nq + .Terms(t => t + .Field(f => f.Locaties + .First() + .Postcode) + .Terms(postcodes) + ) + ) + ); + } + + private static Func, QueryContainer> MatchGemeente(string[] gemeentes) + { + return gemeentesQuery => gemeentesQuery + .Nested(n => n + .Path(p => p.Locaties) + .Query(nq => nq + .Match(m => + FuzzyMatchOpNaam(m, + f => f.Locaties + .First() + .Gemeente, string.Join( + separator: " ", + gemeentes)) + ) + ) + ); + } + + private static MatchQueryDescriptor FuzzyMatchOpNaam( + MatchQueryDescriptor m, + Expression> path, + string query) + { + return m + .Field(path) + .Query(query) + .Analyzer(DuplicateDetectionDocumentMapping + .DuplicateAnalyzer) + .Fuzziness(Fuzziness.Auto) // Assumes this analyzer applies lowercase and asciifolding + .MinimumShouldMatch("90%"); + } + private static DuplicaatVereniging ToDuplicateVereniging(DuplicateDetectionDocument document) => new( document.VCode,