Skip to content

Commit

Permalink
test: or-2350 wip but better than before
Browse files Browse the repository at this point in the history
  • Loading branch information
koenmetsu committed Jan 10, 2025
1 parent 8a80506 commit bbea99a
Show file tree
Hide file tree
Showing 7 changed files with 102 additions and 42 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,34 +34,26 @@ await _client
.SearchAsync<DuplicateDetectionDocument>(
s => s
.TrackScores()
//.MinScore(1)
.MinScore(1)
.Explain()
.Query(
q => q.Bool(
b => b
.Should(
// Original must query
s1 => s1.Bool(

b => b.Must(
MatchOpNaam(naam)
)),
s2 => s2.Bool(b => b.Must(m => m.Match(ma => ma
.Field(f => f.Naam)
.Query(naam)
.Analyzer(DuplicateDetectionDocumentMapping
.DuplicateFullNameAnalyzer)
.Fuzziness(Fuzziness.Auto)
.Boost(0.5)
.MinimumShouldMatch(2)
)
)
))
.MinimumShouldMatch(1) // At least one of the clauses must match
.Filter(MatchOpPostcodeOfGemeente(gemeentes, postcodes),
IsNietGestopt,
IsNietDubbel,
IsNietVerwijderd)
s2 => s2.Bool(
b => b.Must(
MatchOpFullNaam(naam))
))
.MinimumShouldMatch(1) // At least one of the clauses must match
.Filter(MatchOpPostcodeOfGemeente(gemeentes, postcodes),
IsNietGestopt,
IsNietDubbel,
IsNietVerwijderd)

)
));
Expand Down Expand Up @@ -157,14 +149,26 @@ private static IEnumerable<Func<QueryContainerDescriptor<DuplicateDetectionDocum
}

private static Func<QueryContainerDescriptor<DuplicateDetectionDocument>, QueryContainer> MatchOpNaam(VerenigingsNaam naam)
{
return must => must
.Match(m => m
.Field(f => f.Naam)
.Query(naam)
.Analyzer(DuplicateDetectionDocumentMapping.DuplicateAnalyzer)
.Fuzziness(Fuzziness.AutoLength(2, 3))
.MinimumShouldMatch("3<75%"));
}

private static Func<QueryContainerDescriptor<DuplicateDetectionDocument>, QueryContainer> MatchOpFullNaam(VerenigingsNaam naam)
{
return must => must
.Match(m => m
.Field(f => f.Naam)
.Query(naam)
.Analyzer(DuplicateDetectionDocumentMapping.DuplicateAnalyzer)
.Fuzziness(Fuzziness.AutoLength(3, 3))); // Assumes this analyzer applies lowercase and asciifolding
//.MinimumShouldMatch("90%") // You can adjust this percentage as needed);
.Field("naam.naamFull")
.Query(naam)//.ToString().Replace(" ", ""))
.Analyzer(DuplicateDetectionDocumentMapping.DuplicateFullNameAnalyzer)
.Fuzziness(Fuzziness.AutoLength(3,3))
.MinimumShouldMatch("75%")
); // You can adjust this percentage as needed);
}

private static DuplicaatVereniging ToDuplicateVereniging(DuplicateDetectionDocument document)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,17 +76,12 @@ public static async Task<CreateIndexResponse> CreateDuplicateDetectionIndexAsync
selector: prcf
=> prcf.Pattern("_").Replacement(" ")))
.Analyzers(AddDuplicateDetectionAnalyzer)
.TokenFilters(AddDutchStopWordsFilter) ))
.TokenFilters(AddDutchStopWordsFilter)))
.Map<DuplicateDetectionDocument>(DuplicateDetectionDocumentMapping.Get));

private static TokenFiltersDescriptor AddDutchStopWordsFilter(TokenFiltersDescriptor tf)
=> tf.Stop(name: "dutch_stop", selector: st => st
.StopWords("_dutch_") // Or provide your custom list
)
.NGram("ngram_filter", ng => ng
.MinGram(5)
.MaxGram(5)
.PreserveOriginal()
);

private static AnalyzersDescriptor AddDuplicateDetectionAnalyzer(AnalyzersDescriptor ad)
Expand All @@ -99,9 +94,9 @@ private static AnalyzersDescriptor AddDuplicateDetectionAnalyzer(AnalyzersDescri
).Custom(DuplicateDetectionDocumentMapping.DuplicateFullNameAnalyzer,
selector: ca
=> ca
.Tokenizer("standard")
.Tokenizer("keyword")
.CharFilters("underscore_replace", "dot_replace")
.Filters("lowercase", "asciifolding", "dutch_stop", "ngram_filter")
.Filters("lowercase", "asciifolding", "dutch_stop")
);

private static NormalizersDescriptor AddVerenigingZoekNormalizer(NormalizersDescriptor ad)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,16 @@ public static TypeMappingDescriptor<DuplicateDetectionDocument> Get(TypeMappingD
.Name(document => document.VCode))
.Text(
propertyDescriptor => propertyDescriptor
.Name(document => document.Naam)
.Analyzer(DuplicateAnalyzer)
.Analyzer(DuplicateFullNameAnalyzer))
.Name(document => document.Naam)
.Fields(fields => fields
.Text(subField => subField
.Name(x => x.Naam)
.Analyzer(DuplicateAnalyzer)
)
.Text(subField => subField
.Name("naamFull")
.Analyzer(DuplicateFullNameAnalyzer)
)))
.Text(propertyDescriptor => propertyDescriptor
.Name(document => document.KorteNaam)
)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,18 +1,23 @@
namespace AssociationRegistry.Test.E2E.Framework.ApiSetup;

using Admin.Api;
using Admin.Api.Infrastructure.Extensions;
using Alba;
using AlbaHost;
using Amazon.SQS;
using AssociationRegistry.Framework;
using Common.Clients;
using Hosts.Configuration;
using Hosts.Configuration.ConfigurationBindings;
using IdentityModel.AspNetCore.OAuth2Introspection;
using Marten;
using Marten.Events;
using Microsoft.AspNetCore.Hosting;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging.Abstractions;
using Nest;
using NodaTime;
using NodaTime.Text;
using Oakton;
Expand Down Expand Up @@ -46,6 +51,10 @@ public async Task InitializeAsync()
var clients = new Clients(adminApiHost.Services.GetRequiredService<OAuth2IntrospectionOptions>(),
createClientFunc: () => new HttpClient());

var elasticSearchOptions = AdminApiConfiguration.GetElasticSearchOptionsSection();
ElasticClient = ElasticSearchExtensions.CreateElasticClient(elasticSearchOptions, NullLogger.Instance);
ElasticClient.Indices.DeleteAsync(elasticSearchOptions.Indices.DuplicateDetection).GetAwaiter().GetResult();

SuperAdminHttpClient = clients.SuperAdmin.HttpClient;

AdminApiHost = adminApiHost.EnsureEachCallIsAuthenticated(clients.Authenticated.HttpClient);
Expand All @@ -70,9 +79,14 @@ public async Task InitializeAsync()
SqsClientWrapper = AdminApiHost.Services.GetRequiredService<ISqsClientWrapper>();
AmazonSqs = AdminApiHost.Services.GetRequiredService<IAmazonSQS>();

ElasticClient = AdminApiHost.Services.GetRequiredService<IElasticClient>();



await AdminApiHost.DocumentStore().Storage.ApplyAllConfiguredChangesToDatabaseAsync();
}

public IElasticClient ElasticClient { get; set; }
public HttpClient SuperAdminHttpClient { get; private set; }

private void SetUpAdminApiConfiguration()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ public async Task<KeyValuePair<string, IEvent[]>[]> GivenEvents(IVCodeService se

Metadata = fixture.Create<CommandMetadata>() with { ExpectedVersion = null };

var events = fixture.CreateMany<FeitelijkeVerenigingWerdGeregistreerd>(12).ToArray();
var events = fixture.CreateMany<FeitelijkeVerenigingWerdGeregistreerd>(17).ToArray();

events[0] = events[0] with { Naam = "KORTRIJK SPURS" };
events[1] = events[1] with { Naam = "JUDOSCHOOL KORTRIJK" };
Expand All @@ -37,6 +37,12 @@ public async Task<KeyValuePair<string, IEvent[]>[]> GivenEvents(IVCodeService se
events[9] = events[9] with { Naam = "Kortrijkse Ultimate Frisbee Club" };
events[10] = events[10] with { Naam = "Ruygi KORTRIJK" };
events[11] = events[11] with { Naam = "Ruygo Judoschool KORTRIJK" };
events[12] = events[12] with { Naam = "Schaakclub Kortrijk" };
events[13] = events[13] with { Naam = "Wielerclub FC De ratjes" };
events[14] = events[14] with { Naam = "Club Kortrijk" };
events[15] = events[15] with { Naam = "Kortrijkse C# fanclub" };
events[16] = events[16] with { Naam = "Clubben met de vrienden" };


events = events.Select(
@event => @event with
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ public async Task WithDuplicateVerenigingen(RegistreerFeitelijkeVerenigingReques
s.Header(WellknownHeaderNames.Sequence).ShouldNotBeWritten();
})).ReadAsTextAsync();

ExtractDuplicateVerenigingsnamen(response).Should().BeEquivalentTo(expectedDuplicateVerenigingen);
ExtractDuplicateVerenigingsnamen(response).Should().BeEquivalentTo(expectedDuplicateVerenigingen,
because: $"'{request.Naam}' did not expect these duplicates");
}


Expand All @@ -48,13 +49,48 @@ public static IEnumerable<object[]> Scenarios()
yield return
[
RegistreerFeitelijkeVerenigingRequest(autoFixture, "Ultimate Frisbee club"),
new[] { "Ultimate Frisbee club Kortrijk" },
new[]
{
"Kortrijkse Ultimate Frisbee Club",
},
];

yield return
[
RegistreerFeitelijkeVerenigingRequest(autoFixture, "Ryugi Kortrijk"),
new[] { "Ruygo Kortrijk" },
new[]
{
"Ruygi KORTRIJK",
"Ruygo Judoschool KORTRIJK"
},
];

yield return
[
RegistreerFeitelijkeVerenigingRequest(autoFixture, "Judo School Kortrijk"),
new[]
{
"JUDOSCHOOL KORTRIJK",
},
];

yield return
[
RegistreerFeitelijkeVerenigingRequest(autoFixture, "Ryugi"),
new[]
{
"Ruygi KORTRIJK",
"Ruygo Judoschool KORTRIJK"
},
];

yield return
[
RegistreerFeitelijkeVerenigingRequest(autoFixture, "Osu Judoschool Kortrijk"),
new[]
{
"JUDOSCHOOL KORTRIJK",
},
];
}

Expand All @@ -63,7 +99,7 @@ private static RegistreerFeitelijkeVerenigingRequest RegistreerFeitelijkeVerenig
var request = autoFixture.Create<RegistreerFeitelijkeVerenigingRequest>();
request.Locaties = autoFixture.CreateMany<ToeTeVoegenLocatie>().ToArray();
request.Naam = verenigingsnaam;
request.Locaties[0].Adres.Postcode = "AAAA";
request.Locaties[0].Adres.Postcode = "8500";
request.Locaties[0].Adres.Gemeente = "FictieveGemeentenaam";

return request;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ public class TestData

public List<FeitelijkeVerenigingWerdGeregistreerd> Events { get; set; }
public static string FictieveGemeentenaam = "FictieveGemeentenaam";
public static string FictievePostcode = "AAAA";
public static string FictievePostcode = "8500";

public TestData()
{
Expand Down Expand Up @@ -87,9 +87,7 @@ public override async Task InitializeAsync()
await ApiSetup.ExecuteGiven(_scenario);

await ApiSetup.AdminApiHost.WaitForNonStaleProjectionDataAsync(TimeSpan.FromSeconds(60));

await ApiSetup.AdminProjectionHost.WaitForNonStaleProjectionDataAsync(TimeSpan.FromSeconds(10));
await ApiSetup.AdminProjectionHost.Services.GetRequiredService<IElasticClient>().Indices.RefreshAsync(Indices.AllIndices);
}

}

0 comments on commit bbea99a

Please sign in to comment.