From 575baa1da9307578c8f560b39624949420574887 Mon Sep 17 00:00:00 2001 From: Ziqi Wang Date: Tue, 10 Dec 2024 10:26:12 -0800 Subject: [PATCH 1/3] fvh adaptor to highlight the top boost only. --- build.gradle | 2 +- .../main/proto/yelp/nrtsearch/search.proto | 2 + grpc-gateway/luceneserver.swagger.json | 4 + grpc-gateway/search.pb.go | 179 ++++++++++-------- .../highlights/HighlightSettings.java | 21 +- .../highlights/HighlightUtils.java | 9 + .../highlights/NRTFastVectorHighlighter.java | 13 +- .../TopBoostOnlyFragmentsBuilderAdaptor.java | 89 +++++++++ .../NRTFastVectorHighlighterTest.java | 74 +++++++- 9 files changed, 300 insertions(+), 93 deletions(-) create mode 100644 src/main/java/com/yelp/nrtsearch/server/luceneserver/highlights/TopBoostOnlyFragmentsBuilderAdaptor.java diff --git a/build.gradle b/build.gradle index d30895bce..1713520d7 100644 --- a/build.gradle +++ b/build.gradle @@ -26,7 +26,7 @@ java { } allprojects { - version = '0.42.0' + version = '0.42.1' group = 'com.yelp.nrtsearch' } diff --git a/clientlib/src/main/proto/yelp/nrtsearch/search.proto b/clientlib/src/main/proto/yelp/nrtsearch/search.proto index ee03bce97..50ec89150 100644 --- a/clientlib/src/main/proto/yelp/nrtsearch/search.proto +++ b/clientlib/src/main/proto/yelp/nrtsearch/search.proto @@ -1135,6 +1135,8 @@ message Highlight { google.protobuf.UInt32Value boundary_max_scan = 15; // Locale used in boundary scanner when using "word" or "sentence" boundary_scanner. Examples: "en-US", "ch-ZH". google.protobuf.StringValue boundary_scanner_locale = 16; + // Only highlight the top matched phrases (with the highest boost value) per fragment. By default, it is false. + google.protobuf.BoolValue top_boost_only = 17; } // Highlight settings diff --git a/grpc-gateway/luceneserver.swagger.json b/grpc-gateway/luceneserver.swagger.json index 1664c9bc6..43912fa44 100644 --- a/grpc-gateway/luceneserver.swagger.json +++ b/grpc-gateway/luceneserver.swagger.json @@ -1765,6 +1765,10 @@ "boundaryScannerLocale": { "type": "string", "description": "Locale used in boundary scanner when using \"word\" or \"sentence\" boundary_scanner. Examples: \"en-US\", \"ch-ZH\"." + }, + "topBoostOnly": { + "type": "boolean", + "description": "Only highlight the top matched phrases (with the highest boost value) per fragment. By default, it is false." } } }, diff --git a/grpc-gateway/search.pb.go b/grpc-gateway/search.pb.go index cf6175c5e..4cbcbc2f5 100644 --- a/grpc-gateway/search.pb.go +++ b/grpc-gateway/search.pb.go @@ -8670,6 +8670,8 @@ type Highlight_Settings struct { BoundaryMaxScan *wrapperspb.UInt32Value `protobuf:"bytes,15,opt,name=boundary_max_scan,json=boundaryMaxScan,proto3" json:"boundary_max_scan,omitempty"` // Locale used in boundary scanner when using "word" or "sentence" boundary_scanner. Examples: "en-US", "ch-ZH". BoundaryScannerLocale *wrapperspb.StringValue `protobuf:"bytes,16,opt,name=boundary_scanner_locale,json=boundaryScannerLocale,proto3" json:"boundary_scanner_locale,omitempty"` + // Only highlight the top matched phrases (with the highest boost value) per fragment. By default, it is false. + TopBoostOnly *wrapperspb.BoolValue `protobuf:"bytes,17,opt,name=top_boost_only,json=topBoostOnly,proto3" json:"top_boost_only,omitempty"` } func (x *Highlight_Settings) Reset() { @@ -8816,6 +8818,13 @@ func (x *Highlight_Settings) GetBoundaryScannerLocale() *wrapperspb.StringValue return nil } +func (x *Highlight_Settings) GetTopBoostOnly() *wrapperspb.BoolValue { + if x != nil { + return x.TopBoostOnly + } + return nil +} + var File_yelp_nrtsearch_search_proto protoreflect.FileDescriptor var file_yelp_nrtsearch_search_proto_rawDesc = []byte{ @@ -10194,7 +10203,7 @@ var file_yelp_nrtsearch_search_proto_rawDesc = []byte{ 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x52, 0x06, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x12, 0x1c, 0x0a, 0x09, 0x68, 0x69, 0x74, 0x73, 0x54, 0x6f, 0x4c, 0x6f, 0x67, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, - 0x52, 0x09, 0x68, 0x69, 0x74, 0x73, 0x54, 0x6f, 0x4c, 0x6f, 0x67, 0x22, 0xfd, 0x0a, 0x0a, 0x09, + 0x52, 0x09, 0x68, 0x69, 0x74, 0x73, 0x54, 0x6f, 0x4c, 0x6f, 0x67, 0x22, 0xbf, 0x0b, 0x0a, 0x09, 0x48, 0x69, 0x67, 0x68, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x12, 0x3c, 0x0a, 0x08, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x20, 0x2e, 0x6c, 0x75, 0x63, 0x65, 0x6e, 0x65, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x48, 0x69, 0x67, 0x68, 0x6c, @@ -10206,7 +10215,7 @@ var file_yelp_nrtsearch_search_proto_rawDesc = []byte{ 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x48, 0x69, 0x67, 0x68, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x53, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0d, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x53, 0x65, 0x74, 0x74, 0x69, 0x6e, - 0x67, 0x73, 0x1a, 0xa5, 0x08, 0x0a, 0x08, 0x53, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x12, + 0x67, 0x73, 0x1a, 0xe7, 0x08, 0x0a, 0x08, 0x53, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x12, 0x47, 0x0a, 0x10, 0x68, 0x69, 0x67, 0x68, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x65, 0x72, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x1c, 0x2e, 0x6c, 0x75, 0x63, 0x65, 0x6e, 0x65, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x48, 0x69, 0x67, 0x68, 0x6c, 0x69, 0x67, @@ -10272,80 +10281,85 @@ var file_yelp_nrtsearch_search_proto_rawDesc = []byte{ 0x10, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, 0x15, 0x62, 0x6f, 0x75, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x53, 0x63, 0x61, - 0x6e, 0x6e, 0x65, 0x72, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x65, 0x1a, 0x62, 0x0a, 0x12, 0x46, 0x69, - 0x65, 0x6c, 0x64, 0x53, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, - 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, - 0x65, 0x79, 0x12, 0x36, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, - 0x0b, 0x32, 0x20, 0x2e, 0x6c, 0x75, 0x63, 0x65, 0x6e, 0x65, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, - 0x2e, 0x48, 0x69, 0x67, 0x68, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x53, 0x65, 0x74, 0x74, 0x69, - 0x6e, 0x67, 0x73, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x3b, - 0x0a, 0x04, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0b, 0x0a, 0x07, 0x44, 0x45, 0x46, 0x41, 0x55, 0x4c, - 0x54, 0x10, 0x00, 0x12, 0x0f, 0x0a, 0x0b, 0x46, 0x41, 0x53, 0x54, 0x5f, 0x56, 0x45, 0x43, 0x54, - 0x4f, 0x52, 0x10, 0x01, 0x12, 0x09, 0x0a, 0x05, 0x50, 0x4c, 0x41, 0x49, 0x4e, 0x10, 0x02, 0x12, - 0x0a, 0x0a, 0x06, 0x43, 0x55, 0x53, 0x54, 0x4f, 0x4d, 0x10, 0x03, 0x2a, 0x25, 0x0a, 0x0d, 0x4d, - 0x61, 0x74, 0x63, 0x68, 0x4f, 0x70, 0x65, 0x72, 0x61, 0x74, 0x6f, 0x72, 0x12, 0x0a, 0x0a, 0x06, - 0x53, 0x48, 0x4f, 0x55, 0x4c, 0x44, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x4d, 0x55, 0x53, 0x54, - 0x10, 0x01, 0x2a, 0x95, 0x01, 0x0a, 0x0d, 0x52, 0x65, 0x77, 0x72, 0x69, 0x74, 0x65, 0x4d, 0x65, - 0x74, 0x68, 0x6f, 0x64, 0x12, 0x12, 0x0a, 0x0e, 0x43, 0x4f, 0x4e, 0x53, 0x54, 0x41, 0x4e, 0x54, - 0x5f, 0x53, 0x43, 0x4f, 0x52, 0x45, 0x10, 0x00, 0x12, 0x1a, 0x0a, 0x16, 0x43, 0x4f, 0x4e, 0x53, - 0x54, 0x41, 0x4e, 0x54, 0x5f, 0x53, 0x43, 0x4f, 0x52, 0x45, 0x5f, 0x42, 0x4f, 0x4f, 0x4c, 0x45, - 0x41, 0x4e, 0x10, 0x01, 0x12, 0x13, 0x0a, 0x0f, 0x53, 0x43, 0x4f, 0x52, 0x49, 0x4e, 0x47, 0x5f, - 0x42, 0x4f, 0x4f, 0x4c, 0x45, 0x41, 0x4e, 0x10, 0x02, 0x12, 0x1b, 0x0a, 0x17, 0x54, 0x4f, 0x50, - 0x5f, 0x54, 0x45, 0x52, 0x4d, 0x53, 0x5f, 0x42, 0x4c, 0x45, 0x4e, 0x44, 0x45, 0x44, 0x5f, 0x46, - 0x52, 0x45, 0x51, 0x53, 0x10, 0x03, 0x12, 0x13, 0x0a, 0x0f, 0x54, 0x4f, 0x50, 0x5f, 0x54, 0x45, - 0x52, 0x4d, 0x53, 0x5f, 0x42, 0x4f, 0x4f, 0x53, 0x54, 0x10, 0x04, 0x12, 0x0d, 0x0a, 0x09, 0x54, - 0x4f, 0x50, 0x5f, 0x54, 0x45, 0x52, 0x4d, 0x53, 0x10, 0x05, 0x2a, 0x38, 0x0a, 0x13, 0x43, 0x6f, - 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x79, 0x70, - 0x65, 0x12, 0x10, 0x0a, 0x0c, 0x50, 0x52, 0x45, 0x46, 0x49, 0x58, 0x5f, 0x51, 0x55, 0x45, 0x52, - 0x59, 0x10, 0x00, 0x12, 0x0f, 0x0a, 0x0b, 0x46, 0x55, 0x5a, 0x5a, 0x59, 0x5f, 0x51, 0x55, 0x45, - 0x52, 0x59, 0x10, 0x01, 0x2a, 0xb0, 0x01, 0x0a, 0x0a, 0x52, 0x65, 0x67, 0x65, 0x78, 0x70, 0x46, - 0x6c, 0x61, 0x67, 0x12, 0x0e, 0x0a, 0x0a, 0x52, 0x45, 0x47, 0x45, 0x58, 0x50, 0x5f, 0x41, 0x4c, - 0x4c, 0x10, 0x00, 0x12, 0x14, 0x0a, 0x10, 0x52, 0x45, 0x47, 0x45, 0x58, 0x50, 0x5f, 0x41, 0x4e, - 0x59, 0x53, 0x54, 0x52, 0x49, 0x4e, 0x47, 0x10, 0x01, 0x12, 0x14, 0x0a, 0x10, 0x52, 0x45, 0x47, - 0x45, 0x58, 0x50, 0x5f, 0x41, 0x55, 0x54, 0x4f, 0x4d, 0x41, 0x54, 0x4f, 0x4e, 0x10, 0x02, 0x12, - 0x15, 0x0a, 0x11, 0x52, 0x45, 0x47, 0x45, 0x58, 0x50, 0x5f, 0x43, 0x4f, 0x4d, 0x50, 0x4c, 0x45, - 0x4d, 0x45, 0x4e, 0x54, 0x10, 0x03, 0x12, 0x10, 0x0a, 0x0c, 0x52, 0x45, 0x47, 0x45, 0x58, 0x50, - 0x5f, 0x45, 0x4d, 0x50, 0x54, 0x59, 0x10, 0x04, 0x12, 0x17, 0x0a, 0x13, 0x52, 0x45, 0x47, 0x45, - 0x58, 0x50, 0x5f, 0x49, 0x4e, 0x54, 0x45, 0x52, 0x53, 0x45, 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x10, - 0x05, 0x12, 0x13, 0x0a, 0x0f, 0x52, 0x45, 0x47, 0x45, 0x58, 0x50, 0x5f, 0x49, 0x4e, 0x54, 0x45, - 0x52, 0x56, 0x41, 0x4c, 0x10, 0x06, 0x12, 0x0f, 0x0a, 0x0b, 0x52, 0x45, 0x47, 0x45, 0x58, 0x50, - 0x5f, 0x4e, 0x4f, 0x4e, 0x45, 0x10, 0x07, 0x2a, 0xa6, 0x03, 0x0a, 0x09, 0x51, 0x75, 0x65, 0x72, - 0x79, 0x54, 0x79, 0x70, 0x65, 0x12, 0x08, 0x0a, 0x04, 0x4e, 0x4f, 0x4e, 0x45, 0x10, 0x00, 0x12, - 0x11, 0x0a, 0x0d, 0x42, 0x4f, 0x4f, 0x4c, 0x45, 0x41, 0x4e, 0x5f, 0x51, 0x55, 0x45, 0x52, 0x59, - 0x10, 0x01, 0x12, 0x10, 0x0a, 0x0c, 0x50, 0x48, 0x52, 0x41, 0x53, 0x45, 0x5f, 0x51, 0x55, 0x45, - 0x52, 0x59, 0x10, 0x02, 0x12, 0x18, 0x0a, 0x14, 0x46, 0x55, 0x4e, 0x43, 0x54, 0x49, 0x4f, 0x4e, - 0x5f, 0x53, 0x43, 0x4f, 0x52, 0x45, 0x5f, 0x51, 0x55, 0x45, 0x52, 0x59, 0x10, 0x03, 0x12, 0x0e, - 0x0a, 0x0a, 0x54, 0x45, 0x52, 0x4d, 0x5f, 0x51, 0x55, 0x45, 0x52, 0x59, 0x10, 0x04, 0x12, 0x15, - 0x0a, 0x11, 0x54, 0x45, 0x52, 0x4d, 0x5f, 0x49, 0x4e, 0x5f, 0x53, 0x45, 0x54, 0x5f, 0x51, 0x55, - 0x45, 0x52, 0x59, 0x10, 0x05, 0x12, 0x13, 0x0a, 0x0f, 0x44, 0x49, 0x53, 0x4a, 0x55, 0x4e, 0x43, - 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x4d, 0x41, 0x58, 0x10, 0x06, 0x12, 0x09, 0x0a, 0x05, 0x4d, 0x41, - 0x54, 0x43, 0x48, 0x10, 0x07, 0x12, 0x10, 0x0a, 0x0c, 0x4d, 0x41, 0x54, 0x43, 0x48, 0x5f, 0x50, - 0x48, 0x52, 0x41, 0x53, 0x45, 0x10, 0x08, 0x12, 0x0f, 0x0a, 0x0b, 0x4d, 0x55, 0x4c, 0x54, 0x49, - 0x5f, 0x4d, 0x41, 0x54, 0x43, 0x48, 0x10, 0x09, 0x12, 0x09, 0x0a, 0x05, 0x52, 0x41, 0x4e, 0x47, - 0x45, 0x10, 0x0a, 0x12, 0x14, 0x0a, 0x10, 0x47, 0x45, 0x4f, 0x5f, 0x42, 0x4f, 0x55, 0x4e, 0x44, - 0x49, 0x4e, 0x47, 0x5f, 0x42, 0x4f, 0x58, 0x10, 0x0b, 0x12, 0x0d, 0x0a, 0x09, 0x47, 0x45, 0x4f, - 0x5f, 0x50, 0x4f, 0x49, 0x4e, 0x54, 0x10, 0x0c, 0x12, 0x0a, 0x0a, 0x06, 0x4e, 0x45, 0x53, 0x54, - 0x45, 0x44, 0x10, 0x0d, 0x12, 0x0a, 0x0a, 0x06, 0x45, 0x58, 0x49, 0x53, 0x54, 0x53, 0x10, 0x0e, - 0x12, 0x0e, 0x0a, 0x0a, 0x47, 0x45, 0x4f, 0x5f, 0x52, 0x41, 0x44, 0x49, 0x55, 0x53, 0x10, 0x0f, - 0x12, 0x0e, 0x0a, 0x0a, 0x43, 0x4f, 0x4d, 0x50, 0x4c, 0x45, 0x54, 0x49, 0x4f, 0x4e, 0x10, 0x10, - 0x12, 0x1e, 0x0a, 0x1a, 0x4d, 0x55, 0x4c, 0x54, 0x49, 0x5f, 0x46, 0x55, 0x4e, 0x43, 0x54, 0x49, - 0x4f, 0x4e, 0x5f, 0x53, 0x43, 0x4f, 0x52, 0x45, 0x5f, 0x51, 0x55, 0x45, 0x52, 0x59, 0x10, 0x11, - 0x12, 0x17, 0x0a, 0x13, 0x4d, 0x41, 0x54, 0x43, 0x48, 0x5f, 0x50, 0x48, 0x52, 0x41, 0x53, 0x45, - 0x5f, 0x50, 0x52, 0x45, 0x46, 0x49, 0x58, 0x10, 0x12, 0x12, 0x0a, 0x0a, 0x06, 0x50, 0x52, 0x45, - 0x46, 0x49, 0x58, 0x10, 0x13, 0x12, 0x18, 0x0a, 0x14, 0x43, 0x4f, 0x4e, 0x53, 0x54, 0x41, 0x4e, - 0x54, 0x5f, 0x53, 0x43, 0x4f, 0x52, 0x45, 0x5f, 0x51, 0x55, 0x45, 0x52, 0x59, 0x10, 0x14, 0x12, - 0x0f, 0x0a, 0x0b, 0x47, 0x45, 0x4f, 0x5f, 0x50, 0x4f, 0x4c, 0x59, 0x47, 0x4f, 0x4e, 0x10, 0x15, - 0x12, 0x0e, 0x0a, 0x0a, 0x53, 0x50, 0x41, 0x4e, 0x5f, 0x51, 0x55, 0x45, 0x52, 0x59, 0x10, 0x16, - 0x2a, 0x3c, 0x0a, 0x08, 0x53, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x12, 0x07, 0x0a, 0x03, - 0x4d, 0x49, 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, 0x4d, 0x41, 0x58, 0x10, 0x01, 0x12, 0x0e, - 0x0a, 0x0a, 0x4d, 0x49, 0x44, 0x44, 0x4c, 0x45, 0x5f, 0x4d, 0x49, 0x4e, 0x10, 0x02, 0x12, 0x0e, - 0x0a, 0x0a, 0x4d, 0x49, 0x44, 0x44, 0x4c, 0x45, 0x5f, 0x4d, 0x41, 0x58, 0x10, 0x03, 0x42, 0x58, - 0x0a, 0x1e, 0x63, 0x6f, 0x6d, 0x2e, 0x79, 0x65, 0x6c, 0x70, 0x2e, 0x6e, 0x72, 0x74, 0x73, 0x65, - 0x61, 0x72, 0x63, 0x68, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x67, 0x72, 0x70, 0x63, - 0x42, 0x13, 0x53, 0x65, 0x61, 0x72, 0x63, 0x68, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, - 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x50, 0x01, 0x5a, 0x19, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, - 0x63, 0x6f, 0x6d, 0x2f, 0x59, 0x65, 0x6c, 0x70, 0x2f, 0x6e, 0x72, 0x74, 0x73, 0x65, 0x61, 0x72, - 0x63, 0x68, 0xa2, 0x02, 0x03, 0x48, 0x4c, 0x57, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x6e, 0x6e, 0x65, 0x72, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x40, 0x0a, 0x0e, 0x74, 0x6f, + 0x70, 0x5f, 0x62, 0x6f, 0x6f, 0x73, 0x74, 0x5f, 0x6f, 0x6e, 0x6c, 0x79, 0x18, 0x11, 0x20, 0x01, + 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x42, 0x6f, 0x6f, 0x6c, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, 0x0c, + 0x74, 0x6f, 0x70, 0x42, 0x6f, 0x6f, 0x73, 0x74, 0x4f, 0x6e, 0x6c, 0x79, 0x1a, 0x62, 0x0a, 0x12, + 0x46, 0x69, 0x65, 0x6c, 0x64, 0x53, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x45, 0x6e, 0x74, + 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x03, 0x6b, 0x65, 0x79, 0x12, 0x36, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x0b, 0x32, 0x20, 0x2e, 0x6c, 0x75, 0x63, 0x65, 0x6e, 0x65, 0x73, 0x65, 0x72, 0x76, + 0x65, 0x72, 0x2e, 0x48, 0x69, 0x67, 0x68, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x53, 0x65, 0x74, + 0x74, 0x69, 0x6e, 0x67, 0x73, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, + 0x22, 0x3b, 0x0a, 0x04, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0b, 0x0a, 0x07, 0x44, 0x45, 0x46, 0x41, + 0x55, 0x4c, 0x54, 0x10, 0x00, 0x12, 0x0f, 0x0a, 0x0b, 0x46, 0x41, 0x53, 0x54, 0x5f, 0x56, 0x45, + 0x43, 0x54, 0x4f, 0x52, 0x10, 0x01, 0x12, 0x09, 0x0a, 0x05, 0x50, 0x4c, 0x41, 0x49, 0x4e, 0x10, + 0x02, 0x12, 0x0a, 0x0a, 0x06, 0x43, 0x55, 0x53, 0x54, 0x4f, 0x4d, 0x10, 0x03, 0x2a, 0x25, 0x0a, + 0x0d, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x4f, 0x70, 0x65, 0x72, 0x61, 0x74, 0x6f, 0x72, 0x12, 0x0a, + 0x0a, 0x06, 0x53, 0x48, 0x4f, 0x55, 0x4c, 0x44, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x4d, 0x55, + 0x53, 0x54, 0x10, 0x01, 0x2a, 0x95, 0x01, 0x0a, 0x0d, 0x52, 0x65, 0x77, 0x72, 0x69, 0x74, 0x65, + 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x12, 0x12, 0x0a, 0x0e, 0x43, 0x4f, 0x4e, 0x53, 0x54, 0x41, + 0x4e, 0x54, 0x5f, 0x53, 0x43, 0x4f, 0x52, 0x45, 0x10, 0x00, 0x12, 0x1a, 0x0a, 0x16, 0x43, 0x4f, + 0x4e, 0x53, 0x54, 0x41, 0x4e, 0x54, 0x5f, 0x53, 0x43, 0x4f, 0x52, 0x45, 0x5f, 0x42, 0x4f, 0x4f, + 0x4c, 0x45, 0x41, 0x4e, 0x10, 0x01, 0x12, 0x13, 0x0a, 0x0f, 0x53, 0x43, 0x4f, 0x52, 0x49, 0x4e, + 0x47, 0x5f, 0x42, 0x4f, 0x4f, 0x4c, 0x45, 0x41, 0x4e, 0x10, 0x02, 0x12, 0x1b, 0x0a, 0x17, 0x54, + 0x4f, 0x50, 0x5f, 0x54, 0x45, 0x52, 0x4d, 0x53, 0x5f, 0x42, 0x4c, 0x45, 0x4e, 0x44, 0x45, 0x44, + 0x5f, 0x46, 0x52, 0x45, 0x51, 0x53, 0x10, 0x03, 0x12, 0x13, 0x0a, 0x0f, 0x54, 0x4f, 0x50, 0x5f, + 0x54, 0x45, 0x52, 0x4d, 0x53, 0x5f, 0x42, 0x4f, 0x4f, 0x53, 0x54, 0x10, 0x04, 0x12, 0x0d, 0x0a, + 0x09, 0x54, 0x4f, 0x50, 0x5f, 0x54, 0x45, 0x52, 0x4d, 0x53, 0x10, 0x05, 0x2a, 0x38, 0x0a, 0x13, + 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, + 0x79, 0x70, 0x65, 0x12, 0x10, 0x0a, 0x0c, 0x50, 0x52, 0x45, 0x46, 0x49, 0x58, 0x5f, 0x51, 0x55, + 0x45, 0x52, 0x59, 0x10, 0x00, 0x12, 0x0f, 0x0a, 0x0b, 0x46, 0x55, 0x5a, 0x5a, 0x59, 0x5f, 0x51, + 0x55, 0x45, 0x52, 0x59, 0x10, 0x01, 0x2a, 0xb0, 0x01, 0x0a, 0x0a, 0x52, 0x65, 0x67, 0x65, 0x78, + 0x70, 0x46, 0x6c, 0x61, 0x67, 0x12, 0x0e, 0x0a, 0x0a, 0x52, 0x45, 0x47, 0x45, 0x58, 0x50, 0x5f, + 0x41, 0x4c, 0x4c, 0x10, 0x00, 0x12, 0x14, 0x0a, 0x10, 0x52, 0x45, 0x47, 0x45, 0x58, 0x50, 0x5f, + 0x41, 0x4e, 0x59, 0x53, 0x54, 0x52, 0x49, 0x4e, 0x47, 0x10, 0x01, 0x12, 0x14, 0x0a, 0x10, 0x52, + 0x45, 0x47, 0x45, 0x58, 0x50, 0x5f, 0x41, 0x55, 0x54, 0x4f, 0x4d, 0x41, 0x54, 0x4f, 0x4e, 0x10, + 0x02, 0x12, 0x15, 0x0a, 0x11, 0x52, 0x45, 0x47, 0x45, 0x58, 0x50, 0x5f, 0x43, 0x4f, 0x4d, 0x50, + 0x4c, 0x45, 0x4d, 0x45, 0x4e, 0x54, 0x10, 0x03, 0x12, 0x10, 0x0a, 0x0c, 0x52, 0x45, 0x47, 0x45, + 0x58, 0x50, 0x5f, 0x45, 0x4d, 0x50, 0x54, 0x59, 0x10, 0x04, 0x12, 0x17, 0x0a, 0x13, 0x52, 0x45, + 0x47, 0x45, 0x58, 0x50, 0x5f, 0x49, 0x4e, 0x54, 0x45, 0x52, 0x53, 0x45, 0x43, 0x54, 0x49, 0x4f, + 0x4e, 0x10, 0x05, 0x12, 0x13, 0x0a, 0x0f, 0x52, 0x45, 0x47, 0x45, 0x58, 0x50, 0x5f, 0x49, 0x4e, + 0x54, 0x45, 0x52, 0x56, 0x41, 0x4c, 0x10, 0x06, 0x12, 0x0f, 0x0a, 0x0b, 0x52, 0x45, 0x47, 0x45, + 0x58, 0x50, 0x5f, 0x4e, 0x4f, 0x4e, 0x45, 0x10, 0x07, 0x2a, 0xa6, 0x03, 0x0a, 0x09, 0x51, 0x75, + 0x65, 0x72, 0x79, 0x54, 0x79, 0x70, 0x65, 0x12, 0x08, 0x0a, 0x04, 0x4e, 0x4f, 0x4e, 0x45, 0x10, + 0x00, 0x12, 0x11, 0x0a, 0x0d, 0x42, 0x4f, 0x4f, 0x4c, 0x45, 0x41, 0x4e, 0x5f, 0x51, 0x55, 0x45, + 0x52, 0x59, 0x10, 0x01, 0x12, 0x10, 0x0a, 0x0c, 0x50, 0x48, 0x52, 0x41, 0x53, 0x45, 0x5f, 0x51, + 0x55, 0x45, 0x52, 0x59, 0x10, 0x02, 0x12, 0x18, 0x0a, 0x14, 0x46, 0x55, 0x4e, 0x43, 0x54, 0x49, + 0x4f, 0x4e, 0x5f, 0x53, 0x43, 0x4f, 0x52, 0x45, 0x5f, 0x51, 0x55, 0x45, 0x52, 0x59, 0x10, 0x03, + 0x12, 0x0e, 0x0a, 0x0a, 0x54, 0x45, 0x52, 0x4d, 0x5f, 0x51, 0x55, 0x45, 0x52, 0x59, 0x10, 0x04, + 0x12, 0x15, 0x0a, 0x11, 0x54, 0x45, 0x52, 0x4d, 0x5f, 0x49, 0x4e, 0x5f, 0x53, 0x45, 0x54, 0x5f, + 0x51, 0x55, 0x45, 0x52, 0x59, 0x10, 0x05, 0x12, 0x13, 0x0a, 0x0f, 0x44, 0x49, 0x53, 0x4a, 0x55, + 0x4e, 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x4d, 0x41, 0x58, 0x10, 0x06, 0x12, 0x09, 0x0a, 0x05, + 0x4d, 0x41, 0x54, 0x43, 0x48, 0x10, 0x07, 0x12, 0x10, 0x0a, 0x0c, 0x4d, 0x41, 0x54, 0x43, 0x48, + 0x5f, 0x50, 0x48, 0x52, 0x41, 0x53, 0x45, 0x10, 0x08, 0x12, 0x0f, 0x0a, 0x0b, 0x4d, 0x55, 0x4c, + 0x54, 0x49, 0x5f, 0x4d, 0x41, 0x54, 0x43, 0x48, 0x10, 0x09, 0x12, 0x09, 0x0a, 0x05, 0x52, 0x41, + 0x4e, 0x47, 0x45, 0x10, 0x0a, 0x12, 0x14, 0x0a, 0x10, 0x47, 0x45, 0x4f, 0x5f, 0x42, 0x4f, 0x55, + 0x4e, 0x44, 0x49, 0x4e, 0x47, 0x5f, 0x42, 0x4f, 0x58, 0x10, 0x0b, 0x12, 0x0d, 0x0a, 0x09, 0x47, + 0x45, 0x4f, 0x5f, 0x50, 0x4f, 0x49, 0x4e, 0x54, 0x10, 0x0c, 0x12, 0x0a, 0x0a, 0x06, 0x4e, 0x45, + 0x53, 0x54, 0x45, 0x44, 0x10, 0x0d, 0x12, 0x0a, 0x0a, 0x06, 0x45, 0x58, 0x49, 0x53, 0x54, 0x53, + 0x10, 0x0e, 0x12, 0x0e, 0x0a, 0x0a, 0x47, 0x45, 0x4f, 0x5f, 0x52, 0x41, 0x44, 0x49, 0x55, 0x53, + 0x10, 0x0f, 0x12, 0x0e, 0x0a, 0x0a, 0x43, 0x4f, 0x4d, 0x50, 0x4c, 0x45, 0x54, 0x49, 0x4f, 0x4e, + 0x10, 0x10, 0x12, 0x1e, 0x0a, 0x1a, 0x4d, 0x55, 0x4c, 0x54, 0x49, 0x5f, 0x46, 0x55, 0x4e, 0x43, + 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x53, 0x43, 0x4f, 0x52, 0x45, 0x5f, 0x51, 0x55, 0x45, 0x52, 0x59, + 0x10, 0x11, 0x12, 0x17, 0x0a, 0x13, 0x4d, 0x41, 0x54, 0x43, 0x48, 0x5f, 0x50, 0x48, 0x52, 0x41, + 0x53, 0x45, 0x5f, 0x50, 0x52, 0x45, 0x46, 0x49, 0x58, 0x10, 0x12, 0x12, 0x0a, 0x0a, 0x06, 0x50, + 0x52, 0x45, 0x46, 0x49, 0x58, 0x10, 0x13, 0x12, 0x18, 0x0a, 0x14, 0x43, 0x4f, 0x4e, 0x53, 0x54, + 0x41, 0x4e, 0x54, 0x5f, 0x53, 0x43, 0x4f, 0x52, 0x45, 0x5f, 0x51, 0x55, 0x45, 0x52, 0x59, 0x10, + 0x14, 0x12, 0x0f, 0x0a, 0x0b, 0x47, 0x45, 0x4f, 0x5f, 0x50, 0x4f, 0x4c, 0x59, 0x47, 0x4f, 0x4e, + 0x10, 0x15, 0x12, 0x0e, 0x0a, 0x0a, 0x53, 0x50, 0x41, 0x4e, 0x5f, 0x51, 0x55, 0x45, 0x52, 0x59, + 0x10, 0x16, 0x2a, 0x3c, 0x0a, 0x08, 0x53, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x12, 0x07, + 0x0a, 0x03, 0x4d, 0x49, 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, 0x4d, 0x41, 0x58, 0x10, 0x01, + 0x12, 0x0e, 0x0a, 0x0a, 0x4d, 0x49, 0x44, 0x44, 0x4c, 0x45, 0x5f, 0x4d, 0x49, 0x4e, 0x10, 0x02, + 0x12, 0x0e, 0x0a, 0x0a, 0x4d, 0x49, 0x44, 0x44, 0x4c, 0x45, 0x5f, 0x4d, 0x41, 0x58, 0x10, 0x03, + 0x42, 0x58, 0x0a, 0x1e, 0x63, 0x6f, 0x6d, 0x2e, 0x79, 0x65, 0x6c, 0x70, 0x2e, 0x6e, 0x72, 0x74, + 0x73, 0x65, 0x61, 0x72, 0x63, 0x68, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x67, 0x72, + 0x70, 0x63, 0x42, 0x13, 0x53, 0x65, 0x61, 0x72, 0x63, 0x68, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, + 0x73, 0x65, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x50, 0x01, 0x5a, 0x19, 0x67, 0x69, 0x74, 0x68, 0x75, + 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x59, 0x65, 0x6c, 0x70, 0x2f, 0x6e, 0x72, 0x74, 0x73, 0x65, + 0x61, 0x72, 0x63, 0x68, 0xa2, 0x02, 0x03, 0x48, 0x4c, 0x57, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x33, } var ( @@ -10701,12 +10715,13 @@ var file_yelp_nrtsearch_search_proto_depIdxs = []int32{ 135, // 197: luceneserver.Highlight.Settings.boundary_chars:type_name -> google.protobuf.StringValue 133, // 198: luceneserver.Highlight.Settings.boundary_max_scan:type_name -> google.protobuf.UInt32Value 135, // 199: luceneserver.Highlight.Settings.boundary_scanner_locale:type_name -> google.protobuf.StringValue - 125, // 200: luceneserver.Highlight.FieldSettingsEntry.value:type_name -> luceneserver.Highlight.Settings - 201, // [201:201] is the sub-list for method output_type - 201, // [201:201] is the sub-list for method input_type - 201, // [201:201] is the sub-list for extension type_name - 201, // [201:201] is the sub-list for extension extendee - 0, // [0:201] is the sub-list for field type_name + 134, // 200: luceneserver.Highlight.Settings.top_boost_only:type_name -> google.protobuf.BoolValue + 125, // 201: luceneserver.Highlight.FieldSettingsEntry.value:type_name -> luceneserver.Highlight.Settings + 202, // [202:202] is the sub-list for method output_type + 202, // [202:202] is the sub-list for method input_type + 202, // [202:202] is the sub-list for extension type_name + 202, // [202:202] is the sub-list for extension extendee + 0, // [0:202] is the sub-list for field type_name } func init() { file_yelp_nrtsearch_search_proto_init() } diff --git a/src/main/java/com/yelp/nrtsearch/server/luceneserver/highlights/HighlightSettings.java b/src/main/java/com/yelp/nrtsearch/server/luceneserver/highlights/HighlightSettings.java index eafd4dea6..ad86b70dd 100644 --- a/src/main/java/com/yelp/nrtsearch/server/luceneserver/highlights/HighlightSettings.java +++ b/src/main/java/com/yelp/nrtsearch/server/luceneserver/highlights/HighlightSettings.java @@ -38,6 +38,7 @@ public class HighlightSettings { private final Character[] boundaryChars; private final int boundaryMaxScan; private final Locale boundaryScannerLocale; + private final boolean topBoostOnly; public HighlightSettings( Highlighter highlighter, @@ -54,6 +55,7 @@ public HighlightSettings( Character[] boundaryChars, int boundaryMaxScan, Locale boundaryScannerLocale, + boolean topBoostOnly, Map customHighlighterParams) { this.highlighter = highlighter; this.preTags = preTags; @@ -69,6 +71,7 @@ public HighlightSettings( this.boundaryChars = boundaryChars; this.boundaryMaxScan = boundaryMaxScan; this.boundaryScannerLocale = boundaryScannerLocale; + this.topBoostOnly = topBoostOnly; this.customHighlighterParams = customHighlighterParams; } @@ -88,6 +91,7 @@ public Builder toBuilder() { .withBoundaryChars(this.boundaryChars) .withBoundaryMaxScan(this.boundaryMaxScan) .withBoundaryScannerLocale(this.boundaryScannerLocale) + .withTopBoostOnly(this.topBoostOnly) .withCustomHighlighterParams(this.customHighlighterParams); } @@ -147,6 +151,10 @@ public Locale getBoundaryScannerLocale() { return boundaryScannerLocale; } + public boolean getTopBoostOnly() { + return topBoostOnly; + } + public Map getCustomHighlighterParams() { return customHighlighterParams; } @@ -182,10 +190,12 @@ public String toString() { + '\'' + ", boundaryChars=" + Arrays.toString(boundaryChars) - + ", boundaryCharsMaxScan=" + + ", boundaryMaxScan=" + boundaryMaxScan + ", boundaryScannerLocale=" - + boundaryScannerLocale.toLanguageTag() + + boundaryScannerLocale + + ", topBoostOnly=" + + topBoostOnly + '}'; } @@ -205,6 +215,7 @@ public static final class Builder { private Character[] boundaryChars; private int boundaryMaxScan; private Locale boundaryScannerLocale; + private boolean topBoostOnly; private Map customHighlighterParams; public Builder() {} @@ -279,6 +290,11 @@ public Builder withBoundaryScannerLocale(Locale boundaryScannerLocale) { return this; } + public Builder withTopBoostOnly(boolean topBoostOnly) { + this.topBoostOnly = topBoostOnly; + return this; + } + public Builder withCustomHighlighterParams(Map customHighlighterParams) { this.customHighlighterParams = customHighlighterParams; return this; @@ -300,6 +316,7 @@ public HighlightSettings build() { boundaryChars, boundaryMaxScan, boundaryScannerLocale, + topBoostOnly, customHighlighterParams); } } diff --git a/src/main/java/com/yelp/nrtsearch/server/luceneserver/highlights/HighlightUtils.java b/src/main/java/com/yelp/nrtsearch/server/luceneserver/highlights/HighlightUtils.java index 51d50e894..63c9c39f7 100644 --- a/src/main/java/com/yelp/nrtsearch/server/luceneserver/highlights/HighlightUtils.java +++ b/src/main/java/com/yelp/nrtsearch/server/luceneserver/highlights/HighlightUtils.java @@ -44,6 +44,7 @@ public class HighlightUtils { private static final boolean DEFAULT_DISCRETE_MULTIVALUE = false; private static final Character[] DEFAULT_BOUNDARY_CHARS = SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS; + private static final boolean DEFAULT_TOP_PHRASE_ONCE = false; private static final int DEFAULT_BOUNDARY_MAX_SCAN = SimpleBoundaryScanner.DEFAULT_MAX_SCAN; private static final Locale DEFAULT_BOUNDARY_SCANNER_LOCALE = Locale.ROOT; private static final QueryNodeMapper QUERY_NODE_MAPPER = QueryNodeMapper.getInstance(); @@ -132,6 +133,10 @@ static Map createPerFieldSettings( settings.hasBoundaryScannerLocale() ? Locale.forLanguageTag(settings.getBoundaryScannerLocale().getValue()) : globalSettings.getBoundaryScannerLocale()) + .withTopBoostOnly( + settings.hasTopBoostOnly() + ? settings.getTopBoostOnly().getValue() + : globalSettings.getTopBoostOnly()) .withCustomHighlighterParams( settings.hasCustomHighlighterParams() ? StructValueTransformer.transformStruct( @@ -212,6 +217,10 @@ private static HighlightSettings createGlobalFieldSettings( settings.hasBoundaryScannerLocale() ? Locale.forLanguageTag(settings.getBoundaryScannerLocale().getValue()) : DEFAULT_BOUNDARY_SCANNER_LOCALE) + .withTopBoostOnly( + settings.hasTopBoostOnly() + ? settings.getTopBoostOnly().getValue() + : DEFAULT_TOP_PHRASE_ONCE) .withCustomHighlighterParams( settings.hasCustomHighlighterParams() ? StructValueTransformer.transformStruct(settings.getCustomHighlighterParams()) diff --git a/src/main/java/com/yelp/nrtsearch/server/luceneserver/highlights/NRTFastVectorHighlighter.java b/src/main/java/com/yelp/nrtsearch/server/luceneserver/highlights/NRTFastVectorHighlighter.java index 7a5f9b6e1..632616d56 100644 --- a/src/main/java/com/yelp/nrtsearch/server/luceneserver/highlights/NRTFastVectorHighlighter.java +++ b/src/main/java/com/yelp/nrtsearch/server/luceneserver/highlights/NRTFastVectorHighlighter.java @@ -119,12 +119,13 @@ public String[] getHighlights( "Unknown boundary scanner: " + settings.getBoundaryScanner()); } - BaseFragmentsBuilder fragmentsBuilder; - if (settings.isScoreOrdered()) { - fragmentsBuilder = new ScoreOrderFragmentsBuilder(boundaryScanner); - } else { - fragmentsBuilder = new SimpleFragmentsBuilder(boundaryScanner); - } + BaseFragmentsBuilder fragmentsBuilder = + new TopBoostOnlyFragmentsBuilderAdaptor( + settings.isScoreOrdered() + ? new ScoreOrderFragmentsBuilder() + : new SimpleFragmentsBuilder(), + boundaryScanner, + settings.getTopBoostOnly()); fragmentsBuilder.setDiscreteMultiValueHighlighting(settings.getDiscreteMultivalue()); try { diff --git a/src/main/java/com/yelp/nrtsearch/server/luceneserver/highlights/TopBoostOnlyFragmentsBuilderAdaptor.java b/src/main/java/com/yelp/nrtsearch/server/luceneserver/highlights/TopBoostOnlyFragmentsBuilderAdaptor.java new file mode 100644 index 000000000..ba8206ccd --- /dev/null +++ b/src/main/java/com/yelp/nrtsearch/server/luceneserver/highlights/TopBoostOnlyFragmentsBuilderAdaptor.java @@ -0,0 +1,89 @@ +/* + * Copyright 2024 Yelp Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.yelp.nrtsearch.server.luceneserver.highlights; + +import java.util.List; +import org.apache.lucene.document.Field; +import org.apache.lucene.search.highlight.Encoder; +import org.apache.lucene.search.vectorhighlight.BaseFragmentsBuilder; +import org.apache.lucene.search.vectorhighlight.BoundaryScanner; +import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo; +import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo.SubInfo; +import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo.Toffs; + +public class TopBoostOnlyFragmentsBuilderAdaptor extends BaseFragmentsBuilder { + private final BaseFragmentsBuilder innerBaseFragmentsBuilder; + private final boolean topBoostOnly; + + /** a constructor. */ + public TopBoostOnlyFragmentsBuilderAdaptor( + BaseFragmentsBuilder baseFragmentsBuilder, + BoundaryScanner boundaryScanner, + boolean topBoostOnly) { + super(boundaryScanner); + this.innerBaseFragmentsBuilder = baseFragmentsBuilder; + this.topBoostOnly = topBoostOnly; + } + + @Override + public List getWeightedFragInfoList(List src) { + return innerBaseFragmentsBuilder.getWeightedFragInfoList(src); + } + + @Override + protected String makeFragment( + StringBuilder buffer, + int[] index, + Field[] values, + WeightedFragInfo fragInfo, + String[] preTags, + String[] postTags, + Encoder encoder) { + if (!topBoostOnly) { + return super.makeFragment(buffer, index, values, fragInfo, preTags, postTags, encoder); + } + StringBuilder fragment = new StringBuilder(); + final int s = fragInfo.getStartOffset(); + int[] modifiedStartOffset = {s}; + String src = + getFragmentSourceMSO( + buffer, index, values, s, fragInfo.getEndOffset(), modifiedStartOffset); + int srcIndex = 0; + double topBoostValue = + fragInfo.getSubInfos().stream().map(SubInfo::getBoost).max(Float::compare).orElse(0f); + for (SubInfo subInfo : fragInfo.getSubInfos()) { + if (subInfo.getBoost() < topBoostValue) { + continue; + } + for (Toffs to : subInfo.getTermsOffsets()) { + fragment + .append( + encoder.encodeText( + src.substring(srcIndex, to.getStartOffset() - modifiedStartOffset[0]))) + .append(getPreTag(preTags, subInfo.getSeqnum())) + .append( + encoder.encodeText( + src.substring( + to.getStartOffset() - modifiedStartOffset[0], + to.getEndOffset() - modifiedStartOffset[0]))) + .append(getPostTag(postTags, subInfo.getSeqnum())); + srcIndex = to.getEndOffset() - modifiedStartOffset[0]; + } + } + fragment.append(encoder.encodeText(src.substring(srcIndex))); + return fragment.toString(); + } +} diff --git a/src/test/java/com/yelp/nrtsearch/server/luceneserver/highlights/NRTFastVectorHighlighterTest.java b/src/test/java/com/yelp/nrtsearch/server/luceneserver/highlights/NRTFastVectorHighlighterTest.java index de02c5fb1..ea119d1be 100644 --- a/src/test/java/com/yelp/nrtsearch/server/luceneserver/highlights/NRTFastVectorHighlighterTest.java +++ b/src/test/java/com/yelp/nrtsearch/server/luceneserver/highlights/NRTFastVectorHighlighterTest.java @@ -24,6 +24,8 @@ import com.google.protobuf.UInt32Value; import com.yelp.nrtsearch.server.grpc.AddDocumentRequest; import com.yelp.nrtsearch.server.grpc.AddDocumentRequest.MultiValuedField; +import com.yelp.nrtsearch.server.grpc.BooleanClause; +import com.yelp.nrtsearch.server.grpc.BooleanQuery; import com.yelp.nrtsearch.server.grpc.FieldDefRequest; import com.yelp.nrtsearch.server.grpc.Highlight; import com.yelp.nrtsearch.server.grpc.Highlight.Settings; @@ -78,8 +80,9 @@ protected void initIndex(String name) throws Exception { .addAllValue( List.of( "The food is good there, but the service is terrible.", - "I personally don't like the staff at this place", - "Not all food are good.")) + "I personally don't like the staff at this place.", + "Not all food are good.", + "The margarita pizza and the marinara pizza in this pizzeria are yummy and inexpensive.")) .build()) .putFields( "boundary_scanner_field", @@ -190,6 +193,73 @@ public void testHighlightMultivalueField() { assertThat(response.getDiagnostics().getHighlightTimeMs()).isGreaterThan(0); } + @Test + public void testHighlightMultivalueFieldWithTopBoostOnly() { + Highlight highlight = + Highlight.newBuilder() + .addFields("comment_multivalue") + .setSettings( + Settings.newBuilder() + .setHighlightQuery( + Query.newBuilder() + .setBooleanQuery( + BooleanQuery.newBuilder() + .addClauses( + BooleanClause.newBuilder() + .setQuery( + Query.newBuilder() + .setPhraseQuery( + PhraseQuery.newBuilder() + .setField("comment_multivalue") + .addAllTerms( + List.of("margarita", "pizza"))) + .setBoost(3)) + .setOccurValue(BooleanClause.Occur.SHOULD_VALUE)) + .addClauses( + BooleanClause.newBuilder() + .setQuery( + Query.newBuilder() + .setPhraseQuery( + PhraseQuery.newBuilder() + .setField("comment_multivalue") + .addAllTerms( + List.of("marinara", "pizza"))) + .setBoost(3)) + .setOccurValue(BooleanClause.Occur.SHOULD_VALUE)) + .addClauses( + BooleanClause.newBuilder() + .setQuery( + Query.newBuilder() + .setTermQuery( + TermQuery.newBuilder() + .setField("comment_multivalue") + .setTextValue("delicious")) + .setBoost(4))) + .addClauses( + BooleanClause.newBuilder() + .setQuery( + Query.newBuilder() + .setTermQuery( + TermQuery.newBuilder() + .setField("comment_multivalue") + .setTextValue("yummy")) + .setBoost(2))))) + .setMaxNumberOfFragments(UInt32Value.of(1)) + .setFragmentSize(UInt32Value.of(250)) + .setTopBoostOnly(BoolValue.of(true)) + .setScoreOrdered(BoolValue.of(true)) + .setDiscreteMultivalue(BoolValue.of(true))) + .build(); + SearchResponse response = doHighlightQuery(highlight); + + assertFields(response); + + assertThat(response.getHits(0).getHighlightsMap().get("comment_multivalue").getFragmentsList()) + .containsExactly( + "The margarita pizza and the marinara pizza in this pizzeria are yummy and inexpensive."); + assertThat(response.getDiagnostics().getHighlightTimeMs()).isGreaterThan(0); + } + @Test public void testHighlightGlobalSettings() { Settings settings = From 1e3ff4db558fc8f5fa9f3e04cd4953b0cb929550 Mon Sep 17 00:00:00 2001 From: Ziqi Wang Date: Wed, 15 Jan 2025 12:16:58 -0800 Subject: [PATCH 2/3] resolve comments --- .../TopBoostOnlyFragmentsBuilderAdaptor.java | 29 +++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/yelp/nrtsearch/server/luceneserver/highlights/TopBoostOnlyFragmentsBuilderAdaptor.java b/src/main/java/com/yelp/nrtsearch/server/luceneserver/highlights/TopBoostOnlyFragmentsBuilderAdaptor.java index ba8206ccd..310d32749 100644 --- a/src/main/java/com/yelp/nrtsearch/server/luceneserver/highlights/TopBoostOnlyFragmentsBuilderAdaptor.java +++ b/src/main/java/com/yelp/nrtsearch/server/luceneserver/highlights/TopBoostOnlyFragmentsBuilderAdaptor.java @@ -15,6 +15,7 @@ */ package com.yelp.nrtsearch.server.luceneserver.highlights; +import java.util.ArrayList; import java.util.List; import org.apache.lucene.document.Field; import org.apache.lucene.search.highlight.Encoder; @@ -24,6 +25,13 @@ import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo.SubInfo; import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo.Toffs; +/** + * Adapter for {@link org.apache.lucene.search.vectorhighlight.FragmentsBuilder} that highlights + * only the top matched phrases based on the boost value in the query. This adapter does not alter + * the order or score of the generated fragments. All phrases contribute to scoring if the + * innerBaseFragmentsBuilder is a {@link + * org.apache.lucene.search.vectorhighlight.ScoreOrderFragmentsBuilder}. + */ public class TopBoostOnlyFragmentsBuilderAdaptor extends BaseFragmentsBuilder { private final BaseFragmentsBuilder innerBaseFragmentsBuilder; private final boolean topBoostOnly; @@ -38,11 +46,16 @@ public TopBoostOnlyFragmentsBuilderAdaptor( this.topBoostOnly = topBoostOnly; } + /** Delegates the inner FragmentsBuilder to determine the fragment order. */ @Override public List getWeightedFragInfoList(List src) { return innerBaseFragmentsBuilder.getWeightedFragInfoList(src); } + /** + * Creates a fragment containing only the top boost phrase if the `topBoostOnly` flag is set. + * Otherwise, it delegates to the base implementation. + */ @Override protected String makeFragment( StringBuilder buffer, @@ -62,9 +75,21 @@ protected String makeFragment( getFragmentSourceMSO( buffer, index, values, s, fragInfo.getEndOffset(), modifiedStartOffset); int srcIndex = 0; - double topBoostValue = - fragInfo.getSubInfos().stream().map(SubInfo::getBoost).max(Float::compare).orElse(0f); + // filter out the phrases with lower boost at the fragment creation time only + float topBoostValue = 0; + List topSubInfoList = new ArrayList<>(); for (SubInfo subInfo : fragInfo.getSubInfos()) { + float boost = subInfo.getBoost(); + if (boost > topBoostValue) { + topBoostValue = boost; + topSubInfoList.clear(); + topSubInfoList.add(subInfo); + } else if (boost == topBoostValue) { + topSubInfoList.add(subInfo); + } + } + + for (SubInfo subInfo : topSubInfoList) { if (subInfo.getBoost() < topBoostValue) { continue; } From 531edbf715bd20e16c6cf3d8c66f5124d18efbcc Mon Sep 17 00:00:00 2001 From: Ziqi Wang Date: Wed, 15 Jan 2025 12:27:39 -0800 Subject: [PATCH 3/3] update rst --- docs/highlighting.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/highlighting.rst b/docs/highlighting.rst index b4f0f3fba..c9434a3d3 100644 --- a/docs/highlighting.rst +++ b/docs/highlighting.rst @@ -74,6 +74,8 @@ This is the proto definition for Highlight message which can be specified in Sea google.protobuf.UInt32Value boundary_max_scan = 15; // Locale used in boundary scanner when using "word" or "sentence" boundary_scanner. Examples: "en-US", "ch-ZH". google.protobuf.StringValue boundary_scanner_locale = 16; + // Only highlight the top matched phrases (with the highest boost value) per fragment. By default, it is false. + google.protobuf.BoolValue top_boost_only = 17; } // Highlight settings