Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Or 1972 underscores #566

Merged
merged 5 commits into from
Nov 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -29,59 +29,103 @@ public async Task<IReadOnlyCollection<DuplicaatVereniging>> GetDuplicates(Vereni
var postcodes = locatiesMetAdres.Select(l => l.Adres!.Postcode).ToArray();
var gemeentes = locatiesMetAdres.Select(l => l.Adres!.Gemeente).ToArray();

_client.Indices.Refresh(new RefreshRequest());
var searchResponse = await _client.SearchAsync<DuplicateDetectionDocument>(
s => s
.Query(q => q
.Bool(b => b
.Should(
sh => sh.Bool(sb => sb
.Must(
mu => MatchNaam(mu, naam),
mu => MatchGemeente(mu, gemeentes)
)
),
sh => sh.Bool(sb => sb
.Must(
mu => MatchNaam(mu, naam),
mu => MatchPostcode(mu, postcodes)
)
)
)
)
)
);

var searchResponse =
await _client
.SearchAsync<DuplicateDetectionDocument>(
s => s
.Size(50)
.Query(
q => q.Bool(
b => b.Must(must => must
.Match(m => FuzzyMatchOpNaam(m, path: f => f.Naam, naam))
)
.Filter(f => f.Bool(
fb => fb.Should(
MatchGemeente(gemeentes),
MatchPostcode(postcodes)
)
.MinimumShouldMatch(1))))));
return searchResponse.Documents.Select(ToDuplicateVereniging)
.ToArray();
}

return searchResponse.Documents.Select(ToDuplicateVereniging).ToArray();
private static QueryContainer MatchNaam(QueryContainerDescriptor<DuplicateDetectionDocument> mu, VerenigingsNaam naam)
{
return mu
.Match(
m => m
.Field(
f => f
.Naam)
.Query(
naam)
.Analyzer(
DuplicateDetectionDocumentMapping
.DuplicateAnalyzer)
.Fuzziness(
Fuzziness
.Auto));
}

private static Func<QueryContainerDescriptor<DuplicateDetectionDocument>, QueryContainer> MatchPostcode(string[] postcodes)
private static QueryContainer MatchGemeente(QueryContainerDescriptor<DuplicateDetectionDocument> mu, string[] gemeentes)
{
return postalCodesQuery => postalCodesQuery
.Nested(n => n
.Path(p => p.Locaties)
.Query(nq => nq
.Terms(t => t
.Field(f => f.Locaties
.First()
.Postcode)
.Terms(postcodes)
)
)
return mu
.Nested(
n => n
.Path(
p => p
.Locaties)
.Query(
nq
=> nq
.Match(
m =>
FuzzyMatchOpNaam(
m,
path
: f
=> f
.Locaties
.First()
.Gemeente,
string
.Join(
separator
: " ",
gemeentes))
)
)
);
}

private static Func<QueryContainerDescriptor<DuplicateDetectionDocument>, QueryContainer> MatchGemeente(string[] gemeentes)
private static QueryContainer MatchPostcode(QueryContainerDescriptor<DuplicateDetectionDocument> mu, string[] postcodes)
{
return gemeentesQuery => gemeentesQuery
.Nested(n => n
.Path(p => p.Locaties)
.Query(nq => nq
.Match(m =>
FuzzyMatchOpNaam(m,
path: f => f.Locaties
.First()
.Gemeente, string.Join(
separator: " ",
gemeentes))
)
)
return mu
.Nested(
n => n
.Path(
p => p
.Locaties)
.Query(
nq
=> nq
.Terms(
t => t
.Field(
f => f
.Locaties
.First()
.Postcode)
.Terms(
postcodes)
)
)
);
}

Expand All @@ -92,9 +136,10 @@ private static MatchQueryDescriptor<DuplicateDetectionDocument> FuzzyMatchOpNaam
=> m
.Field(path)
.Query(query)
.Analyzer(DuplicateDetectionDocumentMapping.DuplicateAnalyzer)
.Analyzer(DuplicateDetectionDocumentMapping
.DuplicateAnalyzer)
.Fuzziness(Fuzziness.Auto) // Assumes this analyzer applies lowercase and asciifolding
.MinimumShouldMatch("70%");
.MinimumShouldMatch("90%");

private static DuplicaatVereniging ToDuplicateVereniging(DuplicateDetectionDocument document)
=> new(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,26 @@ public static void CreateVerenigingIndex(this IndicesNamespace indicesNamespace,
descriptor.Map<VerenigingZoekDocument>(VerenigingZoekDocumentMapping.Get));

public static void CreateDuplicateDetectionIndex(this IndicesNamespace indicesNamespace, IndexName index)
=> indicesNamespace.Create(
{
var createIndexResponse = indicesNamespace.Create(
index,
selector: c => c
.Settings(s => s
.Analysis(a => a
.CharFilters(cf => cf.PatternReplace(name: "dot_replace",
selector: prcf
=> prcf.Pattern("\\.").Replacement(""))
.PatternReplace(name: "underscore_replace",
selector: prcf
=> prcf.Pattern("_").Replacement(" ")))
.Analyzers(AddDuplicateDetectionAnalyzer)
.TokenFilters(AddDutchStopWordsFilter)))
.Map<DuplicateDetectionDocument>(DuplicateDetectionDocumentMapping.Get));

if (!createIndexResponse.IsValid)
throw createIndexResponse.OriginalException;
}

private static TokenFiltersDescriptor AddDutchStopWordsFilter(TokenFiltersDescriptor tf)
=> tf.Stop(name: "dutch_stop", selector: st => st
.StopWords("_dutch_") // Or provide your custom list
Expand All @@ -31,7 +42,8 @@ private static AnalyzersDescriptor AddDuplicateDetectionAnalyzer(AnalyzersDescri
=> ad.Custom(DuplicateDetectionDocumentMapping.DuplicateAnalyzer,
selector: ca
=> ca
.Tokenizer("lowercase")
.Filters("asciifolding", "dutch_stop")
.Tokenizer("standard")
.CharFilters("underscore_replace", "dot_replace")
.Filters("lowercase", "asciifolding", "dutch_stop")
);
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ namespace AssociationRegistry.Public.Api.Verenigingen.Search;
using FluentValidation;
using Microsoft.AspNetCore.Http;
using Microsoft.AspNetCore.Mvc;
using Microsoft.Extensions.Logging;
using Nest;
using RequestModels;
using ResponseModels;
Expand Down Expand Up @@ -132,6 +133,7 @@ public async Task<IActionResult> Zoeken(
string? hoofdactiviteitenVerenigingsloket,
[FromQuery] PaginationQueryParams paginationQueryParams,
[FromServices] IValidator<PaginationQueryParams> validator,
[FromServices] ILogger<SearchVerenigingenController> logger,
CancellationToken cancellationToken)
{
await validator.ValidateAndThrowAsync(paginationQueryParams, cancellationToken);
Expand All @@ -142,19 +144,20 @@ public async Task<IActionResult> Zoeken(
var searchResponse = await Search(_elasticClient, q, sort, hoofdActiviteitenArray, paginationQueryParams, _typeMapping);

if (searchResponse.ApiCall.HttpStatusCode == 400)
return MapBadRequest(searchResponse);
return MapBadRequest(logger, searchResponse);

var response = _responseMapper.ToSearchVereningenResponse(searchResponse, paginationQueryParams, q, hoofdActiviteitenArray);

return Ok(response);
}

private IActionResult MapBadRequest(
ISearchResponse<VerenigingZoekDocument> searchResponse)
private IActionResult MapBadRequest(ILogger logger, ISearchResponse<VerenigingZoekDocument> searchResponse)
{
var match = Regex.Match(searchResponse.ServerError.Error.RootCause.First().Reason,
pattern: @"No mapping found for \[(.*).keyword\] in order to sort on");

logger.LogError(searchResponse.OriginalException, "Fout bij het aanroepen van ElasticSearch");

if (match.Success)
throw new ZoekOpdrachtBevatOnbekendeSorteerVelden(match.Groups[1].Value);

Expand Down
13 changes: 0 additions & 13 deletions test/AssociationRegistry.Test.Public.Api/appsettings.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,6 @@
}
},
"BaseUrl": "https://127.0.0.1:11003/",
"S3BlobClientOptions": {
"Buckets": {
"Verenigingen": {
"Name": "verenigingen",
"Blobs": {
"Data": "data.json",
"AlleVerenigingen": "alleVerenigingen.json",
"ListVerenigingenContext": "list-verenigingen-context.json",
"DetailVerenigingContext": "detail-vereniging-context.json"
}
}
}
},
"OrganisationRegistryUri": "https://organisation.dev-vlaanderen.be/",
"OAuth2IntrospectionOptions": {
"ClientId": "association-registry-local-dev",
Expand Down
Loading