From c8117105ac917e1b1eb63e574597c9c4b51b950e Mon Sep 17 00:00:00 2001 From: marko-bekhta Date: Mon, 13 Jan 2025 10:07:19 +0100 Subject: [PATCH] Add possible suggestion to the empty results response --- .../io/quarkus/search/app/SearchService.java | 92 +++++++++++++++---- .../quarkus/search/app/dto/SearchResult.java | 6 +- .../io/quarkus/search/app/entity/Guide.java | 3 + .../app/hibernate/AnalysisConfigurer.java | 35 +++++++ src/main/resources/web/app/qs-form.ts | 1 + src/main/resources/web/app/qs-target.ts | 4 +- 6 files changed, 118 insertions(+), 23 deletions(-) diff --git a/src/main/java/io/quarkus/search/app/SearchService.java b/src/main/java/io/quarkus/search/app/SearchService.java index 1c70084c..1714fb15 100644 --- a/src/main/java/io/quarkus/search/app/SearchService.java +++ b/src/main/java/io/quarkus/search/app/SearchService.java @@ -1,7 +1,9 @@ package io.quarkus.search.app; +import java.lang.reflect.Field; import java.util.List; +import com.google.gson.JsonArray; import jakarta.enterprise.context.ApplicationScoped; import jakarta.enterprise.event.Observes; import jakarta.inject.Inject; @@ -22,14 +24,20 @@ import io.quarkus.runtime.LaunchMode; +import org.hibernate.search.backend.elasticsearch.ElasticsearchExtension; +import org.hibernate.search.backend.elasticsearch.search.query.ElasticsearchSearchResult; +import org.hibernate.search.backend.elasticsearch.search.query.impl.ElasticsearchSearchQueryImpl; import org.hibernate.search.engine.search.common.BooleanOperator; import org.hibernate.search.engine.search.common.ValueModel; import org.hibernate.search.engine.search.predicate.dsl.SimpleQueryFlag; +import org.hibernate.search.engine.search.query.SearchQuery; import org.hibernate.search.mapper.pojo.standalone.mapping.SearchMapping; import org.eclipse.microprofile.openapi.annotations.Operation; import org.jboss.resteasy.reactive.RestQuery; +import com.google.gson.JsonObject; + import io.vertx.ext.web.Router; @ApplicationScoped @@ -68,7 +76,7 @@ public SearchResult search(@RestQuery @DefaultValue(QuarkusVersi @RestQuery @DefaultValue("1") @Min(0) @Max(value = 10, message = MAX_FOR_PERF_MESSAGE) int contentSnippets, @RestQuery @DefaultValue("100") @Min(0) @Max(value = 200, message = MAX_FOR_PERF_MESSAGE) int contentSnippetsLength) { try (var session = searchMapping.createSession()) { - var result = session.search(Guide.class) + SearchQuery query = session.search(Guide.class) .select(f -> f.composite().from( f.id(), f.field("type"), @@ -111,28 +119,74 @@ public SearchResult search(@RestQuery @DefaultValue(QuarkusVersi .highlighter(f -> f.fastVector() // Highlighters are going to use spans-with-classes so that we will have more control over styling the visual on the search results screen. .tag("", "")) - .highlighter("highlighter_title_or_summary", f -> f.fastVector() - // We want the whole text of the field, regardless of whether it has a match or not. - .noMatchSize(TITLE_OR_SUMMARY_MAX_SIZE) - .fragmentSize(TITLE_OR_SUMMARY_MAX_SIZE) - // We want the whole text as a single fragment - .numberOfFragments(1)) - .highlighter("highlighter_content", f -> f.fastVector() - // If there's no match in the full content we don't want to return anything. - .noMatchSize(0) - // Content is really huge, so we want to only get small parts of the sentences. - // We give control to the caller on the content snippet length and the number of these fragments - .numberOfFragments(contentSnippets) - .fragmentSize(contentSnippetsLength) - // The rest of fragment configuration is static - .orderByScore(true) - // We don't use sentence boundaries because those can result in huge fragments - .boundaryScanner().chars().boundaryMaxScan(10).end()) + .highlighter( + "highlighter_title_or_summary", f -> f.fastVector() + // We want the whole text of the field, regardless of whether it has a match or not. + .noMatchSize(TITLE_OR_SUMMARY_MAX_SIZE) + .fragmentSize(TITLE_OR_SUMMARY_MAX_SIZE) + // We want the whole text as a single fragment + .numberOfFragments(1)) + .highlighter( + "highlighter_content", f -> f.fastVector() + // If there's no match in the full content we don't want to return anything. + .noMatchSize(0) + // Content is really huge, so we want to only get small parts of the sentences. + // We give control to the caller on the content snippet length and the number of these fragments + .numberOfFragments(contentSnippets) + .fragmentSize(contentSnippetsLength) + // The rest of fragment configuration is static + .orderByScore(true) + // We don't use sentence boundaries because those can result in huge fragments + .boundaryScanner().chars().boundaryMaxScan(10).end()) .sort(f -> f.score().then().field(language.addSuffix("title_sort"))) .routing(QuarkusVersionAndLanguageRoutingBinder.searchKeys(version, language)) .totalHitCountThreshold(TOTAL_HIT_COUNT_THRESHOLD + (page + 1) * PAGE_SIZE) + .toQuery(); + + hackQuery(query, q, language, highlightCssClass); + + var result = query + .extension(ElasticsearchExtension.get()) + .fetch(page * PAGE_SIZE, PAGE_SIZE); - return new SearchResult<>(result); + + return new SearchResult<>(result, extractSuggestion(result)); + } + } + + private String extractSuggestion(ElasticsearchSearchResult result) { + JsonArray options = result.responseBody().getAsJsonObject("suggest") + .getAsJsonArray("didYouMean") + .get(0).getAsJsonObject() + .getAsJsonArray("options"); + if (options != null && !options.isEmpty()) { + return options.get(0).getAsJsonObject().get("highlighted").getAsString(); + } else { + return null; + } + } + + private void hackQuery(SearchQuery query, String q, Language language, String highlightCssClass) { + try { + Field payloadField = ElasticsearchSearchQueryImpl.class.getDeclaredField("payload"); + payloadField.setAccessible(true); + JsonObject payload = (JsonObject) payloadField.get(query); + JsonObject suggest = new JsonObject(); + payload.add("suggest", suggest); + suggest.addProperty("text", q); + JsonObject suggestion = new JsonObject(); + suggest.add("didYouMean", suggestion); + JsonObject phrase = new JsonObject(); + suggestion.add("phrase", phrase); + phrase.addProperty("field", language.addSuffix("fullContent_suggestion")); + phrase.addProperty("size", 1); + phrase.addProperty("gram_size", 1); + JsonObject highlight = new JsonObject(); + phrase.add("highlight", highlight); + highlight.addProperty("pre_tag", ""); + highlight.addProperty("post_tag", ""); + } catch (NoSuchFieldException | IllegalAccessException e) { + throw new RuntimeException(e); } } diff --git a/src/main/java/io/quarkus/search/app/dto/SearchResult.java b/src/main/java/io/quarkus/search/app/dto/SearchResult.java index 11f0be13..9a0a9480 100644 --- a/src/main/java/io/quarkus/search/app/dto/SearchResult.java +++ b/src/main/java/io/quarkus/search/app/dto/SearchResult.java @@ -2,12 +2,12 @@ import java.util.List; -public record SearchResult(Total total, List hits) { +public record SearchResult(Total total, List hits, String suggestion) { - public SearchResult(org.hibernate.search.engine.search.query.SearchResult result) { + public SearchResult(org.hibernate.search.engine.search.query.SearchResult result, String suggestion) { this(new Total(result.total().isHitCountExact() ? result.total().hitCount() : null, result.total().hitCountLowerBound()), - result.hits()); + result.hits(), suggestion); } public record Total(Long exact, Long lowerBound) { diff --git a/src/main/java/io/quarkus/search/app/entity/Guide.java b/src/main/java/io/quarkus/search/app/entity/Guide.java index 4206777b..33cf1a3d 100644 --- a/src/main/java/io/quarkus/search/app/entity/Guide.java +++ b/src/main/java/io/quarkus/search/app/entity/Guide.java @@ -48,11 +48,13 @@ public class Guide { @I18nFullTextField(highlightable = Highlightable.FAST_VECTOR, termVector = TermVector.WITH_POSITIONS_OFFSETS, analyzerPrefix = AnalysisConfigurer.DEFAULT, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH) @I18nFullTextField(name = "title_autocomplete", analyzerPrefix = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH) + @I18nFullTextField(name = "title_suggestion", analyzerPrefix = AnalysisConfigurer.SUGGESTION, searchAnalyzerPrefix = AnalysisConfigurer.SUGGESTION) @I18nKeywordField(name = "title_sort", normalizerPrefix = AnalysisConfigurer.SORT, searchable = Searchable.NO, sortable = Sortable.YES) public I18nData title = new I18nData<>(); @I18nFullTextField(highlightable = Highlightable.FAST_VECTOR, termVector = TermVector.WITH_POSITIONS_OFFSETS, analyzerPrefix = AnalysisConfigurer.DEFAULT, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH) @I18nFullTextField(name = "summary_autocomplete", analyzerPrefix = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH) + @I18nFullTextField(name = "summary_suggestion", analyzerPrefix = AnalysisConfigurer.SUGGESTION, searchAnalyzerPrefix = AnalysisConfigurer.SUGGESTION) public I18nData summary = new I18nData<>(); @I18nFullTextField(analyzerPrefix = AnalysisConfigurer.DEFAULT, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH) @@ -61,6 +63,7 @@ public class Guide { @I18nFullTextField(name = "fullContent", valueBridge = @ValueBridgeRef(type = InputProviderHtmlBodyTextBridge.class), highlightable = Highlightable.FAST_VECTOR, termVector = TermVector.WITH_POSITIONS_OFFSETS, analyzerPrefix = AnalysisConfigurer.DEFAULT, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH) @I18nFullTextField(name = "fullContent_autocomplete", valueBridge = @ValueBridgeRef(type = InputProviderHtmlBodyTextBridge.class), analyzerPrefix = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH) + @I18nFullTextField(name = "fullContent_suggestion", valueBridge = @ValueBridgeRef(type = InputProviderHtmlBodyTextBridge.class), analyzerPrefix = AnalysisConfigurer.SUGGESTION, searchAnalyzerPrefix = AnalysisConfigurer.SUGGESTION) @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.NO) public I18nData htmlFullContentProvider = new I18nData<>(); diff --git a/src/main/java/io/quarkus/search/app/hibernate/AnalysisConfigurer.java b/src/main/java/io/quarkus/search/app/hibernate/AnalysisConfigurer.java index 896d8142..d00e7872 100644 --- a/src/main/java/io/quarkus/search/app/hibernate/AnalysisConfigurer.java +++ b/src/main/java/io/quarkus/search/app/hibernate/AnalysisConfigurer.java @@ -25,6 +25,7 @@ public class AnalysisConfigurer implements ElasticsearchAnalysisConfigurer { public static final String DEFAULT = "basic_analyzer"; public static final String DEFAULT_SEARCH = DEFAULT + "_search"; + public static final String SUGGESTION = "suggestion"; public static final String AUTOCOMPLETE = "autocomplete"; public static final String SORT = "sort"; // This is simplified by assuming no default package, lowercase package names and capitalized class name, @@ -36,6 +37,10 @@ public static String defaultAnalyzer(Language language) { return language.addSuffix(DEFAULT); } + public static String suggestionAnalyzer(Language language) { + return language.addSuffix(SUGGESTION); + } + public static String defaultSearchAnalyzer(Language language) { return language.addSuffix(DEFAULT_SEARCH); } @@ -101,6 +106,16 @@ void configureEnglishLikeLanguage(ElasticsearchAnalysisConfigurationContext cont "asciifolding") .charFilters("html_strip"); + context.analyzer(suggestionAnalyzer(language)).custom() + .tokenizer("standard") + .tokenFilters( + // To make all words in lowercase. + "lowercase", + // To convert characters into ascii ones, e.g. à to a or ę to e etc. + "asciifolding", + "shingle") + .charFilters("html_strip"); + // The analyzer to be applied to the user-input text. context.analyzer(defaultSearchAnalyzer(language)).custom() .tokenizer("standard") @@ -158,6 +173,16 @@ void configureJapanese(ElasticsearchAnalysisConfigurationContext context) { "icu_normalizer", "html_strip"); + context.analyzer(suggestionAnalyzer(language)).custom() + .tokenizer("kuromoji_tokenizer") + .tokenFilters( + // To make all words in lowercase. + "lowercase", + // To convert characters into ascii ones, e.g. à to a or ę to e etc. + "asciifolding", + "shingle") + .charFilters("html_strip"); + context.analyzer(defaultSearchAnalyzer(language)).custom() .tokenizer("kuromoji_tokenizer") .tokenFilters( @@ -211,6 +236,16 @@ void configureChinese(ElasticsearchAnalysisConfigurationContext context) { "asciifolding") .charFilters("html_strip"); + context.analyzer(suggestionAnalyzer(language)).custom() + .tokenizer("smartcn_tokenizer") + .tokenFilters( + // To make all words in lowercase. + "lowercase", + // To convert characters into ascii ones, e.g. à to a or ę to e etc. + "asciifolding", + "shingle") + .charFilters("html_strip"); + // The analyzer to be applied to the user-input text. context.analyzer(defaultSearchAnalyzer(language)).custom() .tokenizer("smartcn_tokenizer") diff --git a/src/main/resources/web/app/qs-form.ts b/src/main/resources/web/app/qs-form.ts index e7f301e6..90253eb1 100644 --- a/src/main/resources/web/app/qs-form.ts +++ b/src/main/resources/web/app/qs-form.ts @@ -10,6 +10,7 @@ export const QS_NEXT_PAGE_EVENT = 'qs-next-page'; export interface QsResult { hits: QsHit[]; hasMoreHits: boolean; + suggestion: string; } export interface QsHit { diff --git a/src/main/resources/web/app/qs-target.ts b/src/main/resources/web/app/qs-target.ts index da752ed0..8dd5a4f6 100644 --- a/src/main/resources/web/app/qs-target.ts +++ b/src/main/resources/web/app/qs-target.ts @@ -4,6 +4,7 @@ import './qs-guide' import {QS_NEXT_PAGE_EVENT, QS_RESULT_EVENT, QS_START_EVENT, QsResult} from "./qs-form"; import debounce from 'lodash/debounce'; import icons from "./assets/icons"; +import { unsafeHTML } from 'lit/directives/unsafe-html.js'; /** @@ -92,7 +93,8 @@ export class QsTarget extends LitElement { if (this._result.hits.length === 0) { return html`
-

Sorry, no ${this.type}s matched your search. Please try again.

+

Sorry, no ${this.type}s matched your search. + ${unsafeHTML(this._result.suggestion ? `Did you mean ${this._result.suggestion}` : 'Please try again.')}

`; }