From 5a0619f2668b61c6e6edfb6ef43ef65008d60a70 Mon Sep 17 00:00:00 2001 From: waziqi89 <89210409+waziqi89@users.noreply.github.com> Date: Fri, 17 Jan 2025 12:44:51 -0500 Subject: [PATCH] fvh adaptor to highlight the top boost only. (#815) --- build.gradle | 2 +- .../main/proto/yelp/nrtsearch/search.proto | 2 + docs/highlighting.rst | 2 + grpc-gateway/luceneserver.swagger.json | 4 + grpc-gateway/search.pb.go | 208 ++++++++++-------- .../server/highlights/HighlightSettings.java | 21 +- .../server/highlights/HighlightUtils.java | 9 + .../highlights/NRTFastVectorHighlighter.java | 13 +- .../TopBoostOnlyFragmentsBuilderAdaptor.java | 115 ++++++++++ .../NRTFastVectorHighlighterTest.java | 74 ++++++- 10 files changed, 342 insertions(+), 108 deletions(-) create mode 100644 src/main/java/com/yelp/nrtsearch/server/highlights/TopBoostOnlyFragmentsBuilderAdaptor.java diff --git a/build.gradle b/build.gradle index 1ece92999..26e37903d 100644 --- a/build.gradle +++ b/build.gradle @@ -26,7 +26,7 @@ java { } allprojects { - version = '1.0.0-beta.2' + version = '1.0.0-beta.3' group = 'com.yelp.nrtsearch' } diff --git a/clientlib/src/main/proto/yelp/nrtsearch/search.proto b/clientlib/src/main/proto/yelp/nrtsearch/search.proto index d3ffe0622..473eba4b8 100644 --- a/clientlib/src/main/proto/yelp/nrtsearch/search.proto +++ b/clientlib/src/main/proto/yelp/nrtsearch/search.proto @@ -1457,6 +1457,8 @@ message Highlight { google.protobuf.UInt32Value boundary_max_scan = 15; // Locale used in boundary scanner when using "word" or "sentence" boundary_scanner. Examples: "en-US", "ch-ZH". google.protobuf.StringValue boundary_scanner_locale = 16; + // Only highlight the top matched phrases (with the highest boost value) per fragment. By default, it is false. + google.protobuf.BoolValue top_boost_only = 17; } // Highlight settings diff --git a/docs/highlighting.rst b/docs/highlighting.rst index 1d5a60c9a..df10fb949 100644 --- a/docs/highlighting.rst +++ b/docs/highlighting.rst @@ -73,6 +73,8 @@ This is the proto definition for Highlight message which can be specified in Sea google.protobuf.UInt32Value boundary_max_scan = 15; // Locale used in boundary scanner when using "word" or "sentence" boundary_scanner. Examples: "en-US", "ch-ZH". google.protobuf.StringValue boundary_scanner_locale = 16; + // Only highlight the top matched phrases (with the highest boost value) per fragment. By default, it is false. + google.protobuf.BoolValue top_boost_only = 17; } // Highlight settings diff --git a/grpc-gateway/luceneserver.swagger.json b/grpc-gateway/luceneserver.swagger.json index a33338eac..fbaa28ef2 100644 --- a/grpc-gateway/luceneserver.swagger.json +++ b/grpc-gateway/luceneserver.swagger.json @@ -1771,6 +1771,10 @@ "boundaryScannerLocale": { "type": "string", "description": "Locale used in boundary scanner when using \"word\" or \"sentence\" boundary_scanner. Examples: \"en-US\", \"ch-ZH\"." + }, + "topBoostOnly": { + "type": "boolean", + "description": "Only highlight the top matched phrases (with the highest boost value) per fragment. By default, it is false." } } }, diff --git a/grpc-gateway/search.pb.go b/grpc-gateway/search.pb.go index d74d78042..1050fe498 100644 --- a/grpc-gateway/search.pb.go +++ b/grpc-gateway/search.pb.go @@ -9359,6 +9359,8 @@ type Highlight_Settings struct { BoundaryMaxScan *wrapperspb.UInt32Value `protobuf:"bytes,15,opt,name=boundary_max_scan,json=boundaryMaxScan,proto3" json:"boundary_max_scan,omitempty"` // Locale used in boundary scanner when using "word" or "sentence" boundary_scanner. Examples: "en-US", "ch-ZH". BoundaryScannerLocale *wrapperspb.StringValue `protobuf:"bytes,16,opt,name=boundary_scanner_locale,json=boundaryScannerLocale,proto3" json:"boundary_scanner_locale,omitempty"` + // Only highlight the top matched phrases (with the highest boost value) per fragment. By default, it is false. + TopBoostOnly *wrapperspb.BoolValue `protobuf:"bytes,17,opt,name=top_boost_only,json=topBoostOnly,proto3" json:"top_boost_only,omitempty"` } func (x *Highlight_Settings) Reset() { @@ -9505,6 +9507,13 @@ func (x *Highlight_Settings) GetBoundaryScannerLocale() *wrapperspb.StringValue return nil } +func (x *Highlight_Settings) GetTopBoostOnly() *wrapperspb.BoolValue { + if x != nil { + return x.TopBoostOnly + } + return nil +} + var File_yelp_nrtsearch_search_proto protoreflect.FileDescriptor var file_yelp_nrtsearch_search_proto_rawDesc = []byte{ @@ -10932,7 +10941,7 @@ var file_yelp_nrtsearch_search_proto_rawDesc = []byte{ 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x52, 0x06, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x12, 0x1c, 0x0a, 0x09, 0x68, 0x69, 0x74, 0x73, 0x54, 0x6f, 0x4c, 0x6f, 0x67, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x09, 0x68, - 0x69, 0x74, 0x73, 0x54, 0x6f, 0x4c, 0x6f, 0x67, 0x22, 0xfd, 0x0a, 0x0a, 0x09, 0x48, 0x69, 0x67, + 0x69, 0x74, 0x73, 0x54, 0x6f, 0x4c, 0x6f, 0x67, 0x22, 0xbf, 0x0b, 0x0a, 0x09, 0x48, 0x69, 0x67, 0x68, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x12, 0x3c, 0x0a, 0x08, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x20, 0x2e, 0x6c, 0x75, 0x63, 0x65, 0x6e, 0x65, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x48, 0x69, 0x67, 0x68, 0x6c, 0x69, 0x67, 0x68, @@ -10944,7 +10953,7 @@ var file_yelp_nrtsearch_search_proto_rawDesc = []byte{ 0x76, 0x65, 0x72, 0x2e, 0x48, 0x69, 0x67, 0x68, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x53, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0d, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x53, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x1a, - 0xa5, 0x08, 0x0a, 0x08, 0x53, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x12, 0x47, 0x0a, 0x10, + 0xe7, 0x08, 0x0a, 0x08, 0x53, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x12, 0x47, 0x0a, 0x10, 0x68, 0x69, 0x67, 0x68, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x65, 0x72, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x1c, 0x2e, 0x6c, 0x75, 0x63, 0x65, 0x6e, 0x65, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x48, 0x69, 0x67, 0x68, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, @@ -11010,95 +11019,99 @@ var file_yelp_nrtsearch_search_proto_rawDesc = []byte{ 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, 0x15, 0x62, 0x6f, 0x75, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x53, 0x63, 0x61, 0x6e, 0x6e, 0x65, - 0x72, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x65, 0x1a, 0x62, 0x0a, 0x12, 0x46, 0x69, 0x65, 0x6c, 0x64, - 0x53, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, - 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, - 0x36, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x20, - 0x2e, 0x6c, 0x75, 0x63, 0x65, 0x6e, 0x65, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x48, 0x69, - 0x67, 0x68, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x53, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, - 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x3b, 0x0a, 0x04, 0x54, - 0x79, 0x70, 0x65, 0x12, 0x0b, 0x0a, 0x07, 0x44, 0x45, 0x46, 0x41, 0x55, 0x4c, 0x54, 0x10, 0x00, - 0x12, 0x0f, 0x0a, 0x0b, 0x46, 0x41, 0x53, 0x54, 0x5f, 0x56, 0x45, 0x43, 0x54, 0x4f, 0x52, 0x10, - 0x01, 0x12, 0x09, 0x0a, 0x05, 0x50, 0x4c, 0x41, 0x49, 0x4e, 0x10, 0x02, 0x12, 0x0a, 0x0a, 0x06, - 0x43, 0x55, 0x53, 0x54, 0x4f, 0x4d, 0x10, 0x03, 0x22, 0xe7, 0x01, 0x0a, 0x08, 0x4b, 0x6e, 0x6e, - 0x51, 0x75, 0x65, 0x72, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x18, 0x01, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x12, 0x2b, 0x0a, 0x06, 0x66, - 0x69, 0x6c, 0x74, 0x65, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x13, 0x2e, 0x6c, 0x75, - 0x63, 0x65, 0x6e, 0x65, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x51, 0x75, 0x65, 0x72, 0x79, - 0x52, 0x06, 0x66, 0x69, 0x6c, 0x74, 0x65, 0x72, 0x12, 0x0c, 0x0a, 0x01, 0x6b, 0x18, 0x03, 0x20, - 0x01, 0x28, 0x05, 0x52, 0x01, 0x6b, 0x12, 0x25, 0x0a, 0x0e, 0x6e, 0x75, 0x6d, 0x5f, 0x63, 0x61, - 0x6e, 0x64, 0x69, 0x64, 0x61, 0x74, 0x65, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0d, - 0x6e, 0x75, 0x6d, 0x43, 0x61, 0x6e, 0x64, 0x69, 0x64, 0x61, 0x74, 0x65, 0x73, 0x12, 0x21, 0x0a, - 0x0c, 0x71, 0x75, 0x65, 0x72, 0x79, 0x5f, 0x76, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x18, 0x05, 0x20, - 0x03, 0x28, 0x02, 0x52, 0x0b, 0x71, 0x75, 0x65, 0x72, 0x79, 0x56, 0x65, 0x63, 0x74, 0x6f, 0x72, - 0x12, 0x2a, 0x0a, 0x11, 0x71, 0x75, 0x65, 0x72, 0x79, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x76, - 0x65, 0x63, 0x74, 0x6f, 0x72, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x0f, 0x71, 0x75, 0x65, - 0x72, 0x79, 0x42, 0x79, 0x74, 0x65, 0x56, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x12, 0x14, 0x0a, 0x05, - 0x62, 0x6f, 0x6f, 0x73, 0x74, 0x18, 0x07, 0x20, 0x01, 0x28, 0x02, 0x52, 0x05, 0x62, 0x6f, 0x6f, - 0x73, 0x74, 0x2a, 0x25, 0x0a, 0x0d, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x4f, 0x70, 0x65, 0x72, 0x61, - 0x74, 0x6f, 0x72, 0x12, 0x0a, 0x0a, 0x06, 0x53, 0x48, 0x4f, 0x55, 0x4c, 0x44, 0x10, 0x00, 0x12, - 0x08, 0x0a, 0x04, 0x4d, 0x55, 0x53, 0x54, 0x10, 0x01, 0x2a, 0x95, 0x01, 0x0a, 0x0d, 0x52, 0x65, - 0x77, 0x72, 0x69, 0x74, 0x65, 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x12, 0x12, 0x0a, 0x0e, 0x43, - 0x4f, 0x4e, 0x53, 0x54, 0x41, 0x4e, 0x54, 0x5f, 0x53, 0x43, 0x4f, 0x52, 0x45, 0x10, 0x00, 0x12, - 0x1a, 0x0a, 0x16, 0x43, 0x4f, 0x4e, 0x53, 0x54, 0x41, 0x4e, 0x54, 0x5f, 0x53, 0x43, 0x4f, 0x52, - 0x45, 0x5f, 0x42, 0x4f, 0x4f, 0x4c, 0x45, 0x41, 0x4e, 0x10, 0x01, 0x12, 0x13, 0x0a, 0x0f, 0x53, - 0x43, 0x4f, 0x52, 0x49, 0x4e, 0x47, 0x5f, 0x42, 0x4f, 0x4f, 0x4c, 0x45, 0x41, 0x4e, 0x10, 0x02, - 0x12, 0x1b, 0x0a, 0x17, 0x54, 0x4f, 0x50, 0x5f, 0x54, 0x45, 0x52, 0x4d, 0x53, 0x5f, 0x42, 0x4c, - 0x45, 0x4e, 0x44, 0x45, 0x44, 0x5f, 0x46, 0x52, 0x45, 0x51, 0x53, 0x10, 0x03, 0x12, 0x13, 0x0a, - 0x0f, 0x54, 0x4f, 0x50, 0x5f, 0x54, 0x45, 0x52, 0x4d, 0x53, 0x5f, 0x42, 0x4f, 0x4f, 0x53, 0x54, - 0x10, 0x04, 0x12, 0x0d, 0x0a, 0x09, 0x54, 0x4f, 0x50, 0x5f, 0x54, 0x45, 0x52, 0x4d, 0x53, 0x10, - 0x05, 0x2a, 0x38, 0x0a, 0x13, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e, 0x51, - 0x75, 0x65, 0x72, 0x79, 0x54, 0x79, 0x70, 0x65, 0x12, 0x10, 0x0a, 0x0c, 0x50, 0x52, 0x45, 0x46, - 0x49, 0x58, 0x5f, 0x51, 0x55, 0x45, 0x52, 0x59, 0x10, 0x00, 0x12, 0x0f, 0x0a, 0x0b, 0x46, 0x55, - 0x5a, 0x5a, 0x59, 0x5f, 0x51, 0x55, 0x45, 0x52, 0x59, 0x10, 0x01, 0x2a, 0xb0, 0x01, 0x0a, 0x0a, - 0x52, 0x65, 0x67, 0x65, 0x78, 0x70, 0x46, 0x6c, 0x61, 0x67, 0x12, 0x0e, 0x0a, 0x0a, 0x52, 0x45, - 0x47, 0x45, 0x58, 0x50, 0x5f, 0x41, 0x4c, 0x4c, 0x10, 0x00, 0x12, 0x14, 0x0a, 0x10, 0x52, 0x45, - 0x47, 0x45, 0x58, 0x50, 0x5f, 0x41, 0x4e, 0x59, 0x53, 0x54, 0x52, 0x49, 0x4e, 0x47, 0x10, 0x01, - 0x12, 0x14, 0x0a, 0x10, 0x52, 0x45, 0x47, 0x45, 0x58, 0x50, 0x5f, 0x41, 0x55, 0x54, 0x4f, 0x4d, - 0x41, 0x54, 0x4f, 0x4e, 0x10, 0x02, 0x12, 0x15, 0x0a, 0x11, 0x52, 0x45, 0x47, 0x45, 0x58, 0x50, - 0x5f, 0x43, 0x4f, 0x4d, 0x50, 0x4c, 0x45, 0x4d, 0x45, 0x4e, 0x54, 0x10, 0x03, 0x12, 0x10, 0x0a, - 0x0c, 0x52, 0x45, 0x47, 0x45, 0x58, 0x50, 0x5f, 0x45, 0x4d, 0x50, 0x54, 0x59, 0x10, 0x04, 0x12, - 0x17, 0x0a, 0x13, 0x52, 0x45, 0x47, 0x45, 0x58, 0x50, 0x5f, 0x49, 0x4e, 0x54, 0x45, 0x52, 0x53, - 0x45, 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x10, 0x05, 0x12, 0x13, 0x0a, 0x0f, 0x52, 0x45, 0x47, 0x45, - 0x58, 0x50, 0x5f, 0x49, 0x4e, 0x54, 0x45, 0x52, 0x56, 0x41, 0x4c, 0x10, 0x06, 0x12, 0x0f, 0x0a, - 0x0b, 0x52, 0x45, 0x47, 0x45, 0x58, 0x50, 0x5f, 0x4e, 0x4f, 0x4e, 0x45, 0x10, 0x07, 0x2a, 0xa6, - 0x03, 0x0a, 0x09, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x79, 0x70, 0x65, 0x12, 0x08, 0x0a, 0x04, - 0x4e, 0x4f, 0x4e, 0x45, 0x10, 0x00, 0x12, 0x11, 0x0a, 0x0d, 0x42, 0x4f, 0x4f, 0x4c, 0x45, 0x41, - 0x4e, 0x5f, 0x51, 0x55, 0x45, 0x52, 0x59, 0x10, 0x01, 0x12, 0x10, 0x0a, 0x0c, 0x50, 0x48, 0x52, - 0x41, 0x53, 0x45, 0x5f, 0x51, 0x55, 0x45, 0x52, 0x59, 0x10, 0x02, 0x12, 0x18, 0x0a, 0x14, 0x46, - 0x55, 0x4e, 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x53, 0x43, 0x4f, 0x52, 0x45, 0x5f, 0x51, 0x55, - 0x45, 0x52, 0x59, 0x10, 0x03, 0x12, 0x0e, 0x0a, 0x0a, 0x54, 0x45, 0x52, 0x4d, 0x5f, 0x51, 0x55, - 0x45, 0x52, 0x59, 0x10, 0x04, 0x12, 0x15, 0x0a, 0x11, 0x54, 0x45, 0x52, 0x4d, 0x5f, 0x49, 0x4e, - 0x5f, 0x53, 0x45, 0x54, 0x5f, 0x51, 0x55, 0x45, 0x52, 0x59, 0x10, 0x05, 0x12, 0x13, 0x0a, 0x0f, - 0x44, 0x49, 0x53, 0x4a, 0x55, 0x4e, 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x4d, 0x41, 0x58, 0x10, - 0x06, 0x12, 0x09, 0x0a, 0x05, 0x4d, 0x41, 0x54, 0x43, 0x48, 0x10, 0x07, 0x12, 0x10, 0x0a, 0x0c, - 0x4d, 0x41, 0x54, 0x43, 0x48, 0x5f, 0x50, 0x48, 0x52, 0x41, 0x53, 0x45, 0x10, 0x08, 0x12, 0x0f, - 0x0a, 0x0b, 0x4d, 0x55, 0x4c, 0x54, 0x49, 0x5f, 0x4d, 0x41, 0x54, 0x43, 0x48, 0x10, 0x09, 0x12, - 0x09, 0x0a, 0x05, 0x52, 0x41, 0x4e, 0x47, 0x45, 0x10, 0x0a, 0x12, 0x14, 0x0a, 0x10, 0x47, 0x45, - 0x4f, 0x5f, 0x42, 0x4f, 0x55, 0x4e, 0x44, 0x49, 0x4e, 0x47, 0x5f, 0x42, 0x4f, 0x58, 0x10, 0x0b, - 0x12, 0x0d, 0x0a, 0x09, 0x47, 0x45, 0x4f, 0x5f, 0x50, 0x4f, 0x49, 0x4e, 0x54, 0x10, 0x0c, 0x12, - 0x0a, 0x0a, 0x06, 0x4e, 0x45, 0x53, 0x54, 0x45, 0x44, 0x10, 0x0d, 0x12, 0x0a, 0x0a, 0x06, 0x45, - 0x58, 0x49, 0x53, 0x54, 0x53, 0x10, 0x0e, 0x12, 0x0e, 0x0a, 0x0a, 0x47, 0x45, 0x4f, 0x5f, 0x52, - 0x41, 0x44, 0x49, 0x55, 0x53, 0x10, 0x0f, 0x12, 0x0e, 0x0a, 0x0a, 0x43, 0x4f, 0x4d, 0x50, 0x4c, - 0x45, 0x54, 0x49, 0x4f, 0x4e, 0x10, 0x10, 0x12, 0x1e, 0x0a, 0x1a, 0x4d, 0x55, 0x4c, 0x54, 0x49, - 0x5f, 0x46, 0x55, 0x4e, 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x53, 0x43, 0x4f, 0x52, 0x45, 0x5f, - 0x51, 0x55, 0x45, 0x52, 0x59, 0x10, 0x11, 0x12, 0x17, 0x0a, 0x13, 0x4d, 0x41, 0x54, 0x43, 0x48, - 0x5f, 0x50, 0x48, 0x52, 0x41, 0x53, 0x45, 0x5f, 0x50, 0x52, 0x45, 0x46, 0x49, 0x58, 0x10, 0x12, - 0x12, 0x0a, 0x0a, 0x06, 0x50, 0x52, 0x45, 0x46, 0x49, 0x58, 0x10, 0x13, 0x12, 0x18, 0x0a, 0x14, - 0x43, 0x4f, 0x4e, 0x53, 0x54, 0x41, 0x4e, 0x54, 0x5f, 0x53, 0x43, 0x4f, 0x52, 0x45, 0x5f, 0x51, - 0x55, 0x45, 0x52, 0x59, 0x10, 0x14, 0x12, 0x0f, 0x0a, 0x0b, 0x47, 0x45, 0x4f, 0x5f, 0x50, 0x4f, - 0x4c, 0x59, 0x47, 0x4f, 0x4e, 0x10, 0x15, 0x12, 0x0e, 0x0a, 0x0a, 0x53, 0x50, 0x41, 0x4e, 0x5f, - 0x51, 0x55, 0x45, 0x52, 0x59, 0x10, 0x16, 0x2a, 0x3c, 0x0a, 0x08, 0x53, 0x65, 0x6c, 0x65, 0x63, - 0x74, 0x6f, 0x72, 0x12, 0x07, 0x0a, 0x03, 0x4d, 0x49, 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, - 0x4d, 0x41, 0x58, 0x10, 0x01, 0x12, 0x0e, 0x0a, 0x0a, 0x4d, 0x49, 0x44, 0x44, 0x4c, 0x45, 0x5f, - 0x4d, 0x49, 0x4e, 0x10, 0x02, 0x12, 0x0e, 0x0a, 0x0a, 0x4d, 0x49, 0x44, 0x44, 0x4c, 0x45, 0x5f, - 0x4d, 0x41, 0x58, 0x10, 0x03, 0x42, 0x58, 0x0a, 0x1e, 0x63, 0x6f, 0x6d, 0x2e, 0x79, 0x65, 0x6c, - 0x70, 0x2e, 0x6e, 0x72, 0x74, 0x73, 0x65, 0x61, 0x72, 0x63, 0x68, 0x2e, 0x73, 0x65, 0x72, 0x76, - 0x65, 0x72, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x42, 0x13, 0x53, 0x65, 0x61, 0x72, 0x63, 0x68, 0x52, - 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x50, 0x01, 0x5a, 0x19, - 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x59, 0x65, 0x6c, 0x70, 0x2f, - 0x6e, 0x72, 0x74, 0x73, 0x65, 0x61, 0x72, 0x63, 0x68, 0xa2, 0x02, 0x03, 0x48, 0x4c, 0x57, 0x62, - 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x72, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x40, 0x0a, 0x0e, 0x74, 0x6f, 0x70, 0x5f, 0x62, + 0x6f, 0x6f, 0x73, 0x74, 0x5f, 0x6f, 0x6e, 0x6c, 0x79, 0x18, 0x11, 0x20, 0x01, 0x28, 0x0b, 0x32, + 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, + 0x66, 0x2e, 0x42, 0x6f, 0x6f, 0x6c, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, 0x0c, 0x74, 0x6f, 0x70, + 0x42, 0x6f, 0x6f, 0x73, 0x74, 0x4f, 0x6e, 0x6c, 0x79, 0x1a, 0x62, 0x0a, 0x12, 0x46, 0x69, 0x65, + 0x6c, 0x64, 0x53, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, + 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, + 0x79, 0x12, 0x36, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, + 0x32, 0x20, 0x2e, 0x6c, 0x75, 0x63, 0x65, 0x6e, 0x65, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, + 0x48, 0x69, 0x67, 0x68, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x53, 0x65, 0x74, 0x74, 0x69, 0x6e, + 0x67, 0x73, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x3b, 0x0a, + 0x04, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0b, 0x0a, 0x07, 0x44, 0x45, 0x46, 0x41, 0x55, 0x4c, 0x54, + 0x10, 0x00, 0x12, 0x0f, 0x0a, 0x0b, 0x46, 0x41, 0x53, 0x54, 0x5f, 0x56, 0x45, 0x43, 0x54, 0x4f, + 0x52, 0x10, 0x01, 0x12, 0x09, 0x0a, 0x05, 0x50, 0x4c, 0x41, 0x49, 0x4e, 0x10, 0x02, 0x12, 0x0a, + 0x0a, 0x06, 0x43, 0x55, 0x53, 0x54, 0x4f, 0x4d, 0x10, 0x03, 0x22, 0xe7, 0x01, 0x0a, 0x08, 0x4b, + 0x6e, 0x6e, 0x51, 0x75, 0x65, 0x72, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x66, 0x69, 0x65, 0x6c, 0x64, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x12, 0x2b, 0x0a, + 0x06, 0x66, 0x69, 0x6c, 0x74, 0x65, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x13, 0x2e, + 0x6c, 0x75, 0x63, 0x65, 0x6e, 0x65, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x51, 0x75, 0x65, + 0x72, 0x79, 0x52, 0x06, 0x66, 0x69, 0x6c, 0x74, 0x65, 0x72, 0x12, 0x0c, 0x0a, 0x01, 0x6b, 0x18, + 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x01, 0x6b, 0x12, 0x25, 0x0a, 0x0e, 0x6e, 0x75, 0x6d, 0x5f, + 0x63, 0x61, 0x6e, 0x64, 0x69, 0x64, 0x61, 0x74, 0x65, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, + 0x52, 0x0d, 0x6e, 0x75, 0x6d, 0x43, 0x61, 0x6e, 0x64, 0x69, 0x64, 0x61, 0x74, 0x65, 0x73, 0x12, + 0x21, 0x0a, 0x0c, 0x71, 0x75, 0x65, 0x72, 0x79, 0x5f, 0x76, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x18, + 0x05, 0x20, 0x03, 0x28, 0x02, 0x52, 0x0b, 0x71, 0x75, 0x65, 0x72, 0x79, 0x56, 0x65, 0x63, 0x74, + 0x6f, 0x72, 0x12, 0x2a, 0x0a, 0x11, 0x71, 0x75, 0x65, 0x72, 0x79, 0x5f, 0x62, 0x79, 0x74, 0x65, + 0x5f, 0x76, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x0f, 0x71, + 0x75, 0x65, 0x72, 0x79, 0x42, 0x79, 0x74, 0x65, 0x56, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x12, 0x14, + 0x0a, 0x05, 0x62, 0x6f, 0x6f, 0x73, 0x74, 0x18, 0x07, 0x20, 0x01, 0x28, 0x02, 0x52, 0x05, 0x62, + 0x6f, 0x6f, 0x73, 0x74, 0x2a, 0x25, 0x0a, 0x0d, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x4f, 0x70, 0x65, + 0x72, 0x61, 0x74, 0x6f, 0x72, 0x12, 0x0a, 0x0a, 0x06, 0x53, 0x48, 0x4f, 0x55, 0x4c, 0x44, 0x10, + 0x00, 0x12, 0x08, 0x0a, 0x04, 0x4d, 0x55, 0x53, 0x54, 0x10, 0x01, 0x2a, 0x95, 0x01, 0x0a, 0x0d, + 0x52, 0x65, 0x77, 0x72, 0x69, 0x74, 0x65, 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x12, 0x12, 0x0a, + 0x0e, 0x43, 0x4f, 0x4e, 0x53, 0x54, 0x41, 0x4e, 0x54, 0x5f, 0x53, 0x43, 0x4f, 0x52, 0x45, 0x10, + 0x00, 0x12, 0x1a, 0x0a, 0x16, 0x43, 0x4f, 0x4e, 0x53, 0x54, 0x41, 0x4e, 0x54, 0x5f, 0x53, 0x43, + 0x4f, 0x52, 0x45, 0x5f, 0x42, 0x4f, 0x4f, 0x4c, 0x45, 0x41, 0x4e, 0x10, 0x01, 0x12, 0x13, 0x0a, + 0x0f, 0x53, 0x43, 0x4f, 0x52, 0x49, 0x4e, 0x47, 0x5f, 0x42, 0x4f, 0x4f, 0x4c, 0x45, 0x41, 0x4e, + 0x10, 0x02, 0x12, 0x1b, 0x0a, 0x17, 0x54, 0x4f, 0x50, 0x5f, 0x54, 0x45, 0x52, 0x4d, 0x53, 0x5f, + 0x42, 0x4c, 0x45, 0x4e, 0x44, 0x45, 0x44, 0x5f, 0x46, 0x52, 0x45, 0x51, 0x53, 0x10, 0x03, 0x12, + 0x13, 0x0a, 0x0f, 0x54, 0x4f, 0x50, 0x5f, 0x54, 0x45, 0x52, 0x4d, 0x53, 0x5f, 0x42, 0x4f, 0x4f, + 0x53, 0x54, 0x10, 0x04, 0x12, 0x0d, 0x0a, 0x09, 0x54, 0x4f, 0x50, 0x5f, 0x54, 0x45, 0x52, 0x4d, + 0x53, 0x10, 0x05, 0x2a, 0x38, 0x0a, 0x13, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f, + 0x6e, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x79, 0x70, 0x65, 0x12, 0x10, 0x0a, 0x0c, 0x50, 0x52, + 0x45, 0x46, 0x49, 0x58, 0x5f, 0x51, 0x55, 0x45, 0x52, 0x59, 0x10, 0x00, 0x12, 0x0f, 0x0a, 0x0b, + 0x46, 0x55, 0x5a, 0x5a, 0x59, 0x5f, 0x51, 0x55, 0x45, 0x52, 0x59, 0x10, 0x01, 0x2a, 0xb0, 0x01, + 0x0a, 0x0a, 0x52, 0x65, 0x67, 0x65, 0x78, 0x70, 0x46, 0x6c, 0x61, 0x67, 0x12, 0x0e, 0x0a, 0x0a, + 0x52, 0x45, 0x47, 0x45, 0x58, 0x50, 0x5f, 0x41, 0x4c, 0x4c, 0x10, 0x00, 0x12, 0x14, 0x0a, 0x10, + 0x52, 0x45, 0x47, 0x45, 0x58, 0x50, 0x5f, 0x41, 0x4e, 0x59, 0x53, 0x54, 0x52, 0x49, 0x4e, 0x47, + 0x10, 0x01, 0x12, 0x14, 0x0a, 0x10, 0x52, 0x45, 0x47, 0x45, 0x58, 0x50, 0x5f, 0x41, 0x55, 0x54, + 0x4f, 0x4d, 0x41, 0x54, 0x4f, 0x4e, 0x10, 0x02, 0x12, 0x15, 0x0a, 0x11, 0x52, 0x45, 0x47, 0x45, + 0x58, 0x50, 0x5f, 0x43, 0x4f, 0x4d, 0x50, 0x4c, 0x45, 0x4d, 0x45, 0x4e, 0x54, 0x10, 0x03, 0x12, + 0x10, 0x0a, 0x0c, 0x52, 0x45, 0x47, 0x45, 0x58, 0x50, 0x5f, 0x45, 0x4d, 0x50, 0x54, 0x59, 0x10, + 0x04, 0x12, 0x17, 0x0a, 0x13, 0x52, 0x45, 0x47, 0x45, 0x58, 0x50, 0x5f, 0x49, 0x4e, 0x54, 0x45, + 0x52, 0x53, 0x45, 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x10, 0x05, 0x12, 0x13, 0x0a, 0x0f, 0x52, 0x45, + 0x47, 0x45, 0x58, 0x50, 0x5f, 0x49, 0x4e, 0x54, 0x45, 0x52, 0x56, 0x41, 0x4c, 0x10, 0x06, 0x12, + 0x0f, 0x0a, 0x0b, 0x52, 0x45, 0x47, 0x45, 0x58, 0x50, 0x5f, 0x4e, 0x4f, 0x4e, 0x45, 0x10, 0x07, + 0x2a, 0xa6, 0x03, 0x0a, 0x09, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x79, 0x70, 0x65, 0x12, 0x08, + 0x0a, 0x04, 0x4e, 0x4f, 0x4e, 0x45, 0x10, 0x00, 0x12, 0x11, 0x0a, 0x0d, 0x42, 0x4f, 0x4f, 0x4c, + 0x45, 0x41, 0x4e, 0x5f, 0x51, 0x55, 0x45, 0x52, 0x59, 0x10, 0x01, 0x12, 0x10, 0x0a, 0x0c, 0x50, + 0x48, 0x52, 0x41, 0x53, 0x45, 0x5f, 0x51, 0x55, 0x45, 0x52, 0x59, 0x10, 0x02, 0x12, 0x18, 0x0a, + 0x14, 0x46, 0x55, 0x4e, 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x53, 0x43, 0x4f, 0x52, 0x45, 0x5f, + 0x51, 0x55, 0x45, 0x52, 0x59, 0x10, 0x03, 0x12, 0x0e, 0x0a, 0x0a, 0x54, 0x45, 0x52, 0x4d, 0x5f, + 0x51, 0x55, 0x45, 0x52, 0x59, 0x10, 0x04, 0x12, 0x15, 0x0a, 0x11, 0x54, 0x45, 0x52, 0x4d, 0x5f, + 0x49, 0x4e, 0x5f, 0x53, 0x45, 0x54, 0x5f, 0x51, 0x55, 0x45, 0x52, 0x59, 0x10, 0x05, 0x12, 0x13, + 0x0a, 0x0f, 0x44, 0x49, 0x53, 0x4a, 0x55, 0x4e, 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x4d, 0x41, + 0x58, 0x10, 0x06, 0x12, 0x09, 0x0a, 0x05, 0x4d, 0x41, 0x54, 0x43, 0x48, 0x10, 0x07, 0x12, 0x10, + 0x0a, 0x0c, 0x4d, 0x41, 0x54, 0x43, 0x48, 0x5f, 0x50, 0x48, 0x52, 0x41, 0x53, 0x45, 0x10, 0x08, + 0x12, 0x0f, 0x0a, 0x0b, 0x4d, 0x55, 0x4c, 0x54, 0x49, 0x5f, 0x4d, 0x41, 0x54, 0x43, 0x48, 0x10, + 0x09, 0x12, 0x09, 0x0a, 0x05, 0x52, 0x41, 0x4e, 0x47, 0x45, 0x10, 0x0a, 0x12, 0x14, 0x0a, 0x10, + 0x47, 0x45, 0x4f, 0x5f, 0x42, 0x4f, 0x55, 0x4e, 0x44, 0x49, 0x4e, 0x47, 0x5f, 0x42, 0x4f, 0x58, + 0x10, 0x0b, 0x12, 0x0d, 0x0a, 0x09, 0x47, 0x45, 0x4f, 0x5f, 0x50, 0x4f, 0x49, 0x4e, 0x54, 0x10, + 0x0c, 0x12, 0x0a, 0x0a, 0x06, 0x4e, 0x45, 0x53, 0x54, 0x45, 0x44, 0x10, 0x0d, 0x12, 0x0a, 0x0a, + 0x06, 0x45, 0x58, 0x49, 0x53, 0x54, 0x53, 0x10, 0x0e, 0x12, 0x0e, 0x0a, 0x0a, 0x47, 0x45, 0x4f, + 0x5f, 0x52, 0x41, 0x44, 0x49, 0x55, 0x53, 0x10, 0x0f, 0x12, 0x0e, 0x0a, 0x0a, 0x43, 0x4f, 0x4d, + 0x50, 0x4c, 0x45, 0x54, 0x49, 0x4f, 0x4e, 0x10, 0x10, 0x12, 0x1e, 0x0a, 0x1a, 0x4d, 0x55, 0x4c, + 0x54, 0x49, 0x5f, 0x46, 0x55, 0x4e, 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x53, 0x43, 0x4f, 0x52, + 0x45, 0x5f, 0x51, 0x55, 0x45, 0x52, 0x59, 0x10, 0x11, 0x12, 0x17, 0x0a, 0x13, 0x4d, 0x41, 0x54, + 0x43, 0x48, 0x5f, 0x50, 0x48, 0x52, 0x41, 0x53, 0x45, 0x5f, 0x50, 0x52, 0x45, 0x46, 0x49, 0x58, + 0x10, 0x12, 0x12, 0x0a, 0x0a, 0x06, 0x50, 0x52, 0x45, 0x46, 0x49, 0x58, 0x10, 0x13, 0x12, 0x18, + 0x0a, 0x14, 0x43, 0x4f, 0x4e, 0x53, 0x54, 0x41, 0x4e, 0x54, 0x5f, 0x53, 0x43, 0x4f, 0x52, 0x45, + 0x5f, 0x51, 0x55, 0x45, 0x52, 0x59, 0x10, 0x14, 0x12, 0x0f, 0x0a, 0x0b, 0x47, 0x45, 0x4f, 0x5f, + 0x50, 0x4f, 0x4c, 0x59, 0x47, 0x4f, 0x4e, 0x10, 0x15, 0x12, 0x0e, 0x0a, 0x0a, 0x53, 0x50, 0x41, + 0x4e, 0x5f, 0x51, 0x55, 0x45, 0x52, 0x59, 0x10, 0x16, 0x2a, 0x3c, 0x0a, 0x08, 0x53, 0x65, 0x6c, + 0x65, 0x63, 0x74, 0x6f, 0x72, 0x12, 0x07, 0x0a, 0x03, 0x4d, 0x49, 0x4e, 0x10, 0x00, 0x12, 0x07, + 0x0a, 0x03, 0x4d, 0x41, 0x58, 0x10, 0x01, 0x12, 0x0e, 0x0a, 0x0a, 0x4d, 0x49, 0x44, 0x44, 0x4c, + 0x45, 0x5f, 0x4d, 0x49, 0x4e, 0x10, 0x02, 0x12, 0x0e, 0x0a, 0x0a, 0x4d, 0x49, 0x44, 0x44, 0x4c, + 0x45, 0x5f, 0x4d, 0x41, 0x58, 0x10, 0x03, 0x42, 0x58, 0x0a, 0x1e, 0x63, 0x6f, 0x6d, 0x2e, 0x79, + 0x65, 0x6c, 0x70, 0x2e, 0x6e, 0x72, 0x74, 0x73, 0x65, 0x61, 0x72, 0x63, 0x68, 0x2e, 0x73, 0x65, + 0x72, 0x76, 0x65, 0x72, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x42, 0x13, 0x53, 0x65, 0x61, 0x72, 0x63, + 0x68, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x50, 0x01, + 0x5a, 0x19, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x59, 0x65, 0x6c, + 0x70, 0x2f, 0x6e, 0x72, 0x74, 0x73, 0x65, 0x61, 0x72, 0x63, 0x68, 0xa2, 0x02, 0x03, 0x48, 0x4c, + 0x57, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( @@ -11466,12 +11479,13 @@ var file_yelp_nrtsearch_search_proto_depIdxs = []int32{ 140, // 204: luceneserver.Highlight.Settings.boundary_chars:type_name -> google.protobuf.StringValue 138, // 205: luceneserver.Highlight.Settings.boundary_max_scan:type_name -> google.protobuf.UInt32Value 140, // 206: luceneserver.Highlight.Settings.boundary_scanner_locale:type_name -> google.protobuf.StringValue - 130, // 207: luceneserver.Highlight.FieldSettingsEntry.value:type_name -> luceneserver.Highlight.Settings - 208, // [208:208] is the sub-list for method output_type - 208, // [208:208] is the sub-list for method input_type - 208, // [208:208] is the sub-list for extension type_name - 208, // [208:208] is the sub-list for extension extendee - 0, // [0:208] is the sub-list for field type_name + 139, // 207: luceneserver.Highlight.Settings.top_boost_only:type_name -> google.protobuf.BoolValue + 130, // 208: luceneserver.Highlight.FieldSettingsEntry.value:type_name -> luceneserver.Highlight.Settings + 209, // [209:209] is the sub-list for method output_type + 209, // [209:209] is the sub-list for method input_type + 209, // [209:209] is the sub-list for extension type_name + 209, // [209:209] is the sub-list for extension extendee + 0, // [0:209] is the sub-list for field type_name } func init() { file_yelp_nrtsearch_search_proto_init() } diff --git a/src/main/java/com/yelp/nrtsearch/server/highlights/HighlightSettings.java b/src/main/java/com/yelp/nrtsearch/server/highlights/HighlightSettings.java index b763d35cb..cc2bced56 100644 --- a/src/main/java/com/yelp/nrtsearch/server/highlights/HighlightSettings.java +++ b/src/main/java/com/yelp/nrtsearch/server/highlights/HighlightSettings.java @@ -38,6 +38,7 @@ public class HighlightSettings { private final char[] boundaryChars; private final int boundaryMaxScan; private final Locale boundaryScannerLocale; + private final boolean topBoostOnly; public HighlightSettings( Highlighter highlighter, @@ -54,6 +55,7 @@ public HighlightSettings( char[] boundaryChars, int boundaryMaxScan, Locale boundaryScannerLocale, + boolean topBoostOnly, Map customHighlighterParams) { this.highlighter = highlighter; this.preTags = preTags; @@ -69,6 +71,7 @@ public HighlightSettings( this.boundaryChars = boundaryChars; this.boundaryMaxScan = boundaryMaxScan; this.boundaryScannerLocale = boundaryScannerLocale; + this.topBoostOnly = topBoostOnly; this.customHighlighterParams = customHighlighterParams; } @@ -88,6 +91,7 @@ public Builder toBuilder() { .withBoundaryChars(this.boundaryChars) .withBoundaryMaxScan(this.boundaryMaxScan) .withBoundaryScannerLocale(this.boundaryScannerLocale) + .withTopBoostOnly(this.topBoostOnly) .withCustomHighlighterParams(this.customHighlighterParams); } @@ -147,6 +151,10 @@ public Locale getBoundaryScannerLocale() { return boundaryScannerLocale; } + public boolean getTopBoostOnly() { + return topBoostOnly; + } + public Map getCustomHighlighterParams() { return customHighlighterParams; } @@ -182,10 +190,12 @@ public String toString() { + '\'' + ", boundaryChars=" + Arrays.toString(boundaryChars) - + ", boundaryCharsMaxScan=" + + ", boundaryMaxScan=" + boundaryMaxScan + ", boundaryScannerLocale=" - + boundaryScannerLocale.toLanguageTag() + + boundaryScannerLocale + + ", topBoostOnly=" + + topBoostOnly + '}'; } @@ -205,6 +215,7 @@ public static final class Builder { private char[] boundaryChars; private int boundaryMaxScan; private Locale boundaryScannerLocale; + private boolean topBoostOnly; private Map customHighlighterParams; public Builder() {} @@ -279,6 +290,11 @@ public Builder withBoundaryScannerLocale(Locale boundaryScannerLocale) { return this; } + public Builder withTopBoostOnly(boolean topBoostOnly) { + this.topBoostOnly = topBoostOnly; + return this; + } + public Builder withCustomHighlighterParams(Map customHighlighterParams) { this.customHighlighterParams = customHighlighterParams; return this; @@ -300,6 +316,7 @@ public HighlightSettings build() { boundaryChars, boundaryMaxScan, boundaryScannerLocale, + topBoostOnly, customHighlighterParams); } } diff --git a/src/main/java/com/yelp/nrtsearch/server/highlights/HighlightUtils.java b/src/main/java/com/yelp/nrtsearch/server/highlights/HighlightUtils.java index 2b70194df..75fe92243 100644 --- a/src/main/java/com/yelp/nrtsearch/server/highlights/HighlightUtils.java +++ b/src/main/java/com/yelp/nrtsearch/server/highlights/HighlightUtils.java @@ -42,6 +42,7 @@ public class HighlightUtils { private static final boolean DEFAULT_FIELD_MATCH = false; private static final boolean DEFAULT_DISCRETE_MULTIVALUE = false; private static final char[] DEFAULT_BOUNDARY_CHARS = SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS; + private static final boolean DEFAULT_TOP_PHRASE_ONCE = false; private static final int DEFAULT_BOUNDARY_MAX_SCAN = SimpleBoundaryScanner.DEFAULT_MAX_SCAN; private static final Locale DEFAULT_BOUNDARY_SCANNER_LOCALE = Locale.ROOT; private static final QueryNodeMapper QUERY_NODE_MAPPER = QueryNodeMapper.getInstance(); @@ -125,6 +126,10 @@ static Map createPerFieldSettings( settings.hasBoundaryScannerLocale() ? Locale.forLanguageTag(settings.getBoundaryScannerLocale().getValue()) : globalSettings.getBoundaryScannerLocale()) + .withTopBoostOnly( + settings.hasTopBoostOnly() + ? settings.getTopBoostOnly().getValue() + : globalSettings.getTopBoostOnly()) .withCustomHighlighterParams( settings.hasCustomHighlighterParams() ? StructValueTransformer.transformStruct( @@ -200,6 +205,10 @@ private static HighlightSettings createGlobalFieldSettings( settings.hasBoundaryScannerLocale() ? Locale.forLanguageTag(settings.getBoundaryScannerLocale().getValue()) : DEFAULT_BOUNDARY_SCANNER_LOCALE) + .withTopBoostOnly( + settings.hasTopBoostOnly() + ? settings.getTopBoostOnly().getValue() + : DEFAULT_TOP_PHRASE_ONCE) .withCustomHighlighterParams( settings.hasCustomHighlighterParams() ? StructValueTransformer.transformStruct(settings.getCustomHighlighterParams()) diff --git a/src/main/java/com/yelp/nrtsearch/server/highlights/NRTFastVectorHighlighter.java b/src/main/java/com/yelp/nrtsearch/server/highlights/NRTFastVectorHighlighter.java index 878cb668d..9a6a29828 100644 --- a/src/main/java/com/yelp/nrtsearch/server/highlights/NRTFastVectorHighlighter.java +++ b/src/main/java/com/yelp/nrtsearch/server/highlights/NRTFastVectorHighlighter.java @@ -119,12 +119,13 @@ public String[] getHighlights( "Unknown boundary scanner: " + settings.getBoundaryScanner()); } - BaseFragmentsBuilder fragmentsBuilder; - if (settings.isScoreOrdered()) { - fragmentsBuilder = new ScoreOrderFragmentsBuilder(boundaryScanner); - } else { - fragmentsBuilder = new SimpleFragmentsBuilder(boundaryScanner); - } + BaseFragmentsBuilder fragmentsBuilder = + new TopBoostOnlyFragmentsBuilderAdaptor( + settings.isScoreOrdered() + ? new ScoreOrderFragmentsBuilder() + : new SimpleFragmentsBuilder(), + boundaryScanner, + settings.getTopBoostOnly()); fragmentsBuilder.setDiscreteMultiValueHighlighting(settings.getDiscreteMultivalue()); try { diff --git a/src/main/java/com/yelp/nrtsearch/server/highlights/TopBoostOnlyFragmentsBuilderAdaptor.java b/src/main/java/com/yelp/nrtsearch/server/highlights/TopBoostOnlyFragmentsBuilderAdaptor.java new file mode 100644 index 000000000..ca1e28f2b --- /dev/null +++ b/src/main/java/com/yelp/nrtsearch/server/highlights/TopBoostOnlyFragmentsBuilderAdaptor.java @@ -0,0 +1,115 @@ +/* + * Copyright 2024 Yelp Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.yelp.nrtsearch.server.highlights; + +import java.util.ArrayList; +import java.util.List; +import org.apache.lucene.document.Field; +import org.apache.lucene.search.highlight.Encoder; +import org.apache.lucene.search.vectorhighlight.BaseFragmentsBuilder; +import org.apache.lucene.search.vectorhighlight.BoundaryScanner; +import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo; +import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo.SubInfo; +import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo.Toffs; + +/** + * Adapter for {@link org.apache.lucene.search.vectorhighlight.FragmentsBuilder} that highlights + * only the top matched phrases based on the boost value in the query. This adapter does not alter + * the order or score of the generated fragments. All phrases contribute to scoring if the + * innerBaseFragmentsBuilder is a {@link + * org.apache.lucene.search.vectorhighlight.ScoreOrderFragmentsBuilder}. + */ +public class TopBoostOnlyFragmentsBuilderAdaptor extends BaseFragmentsBuilder { + private final BaseFragmentsBuilder innerBaseFragmentsBuilder; + private final boolean topBoostOnly; + + /** a constructor. */ + public TopBoostOnlyFragmentsBuilderAdaptor( + BaseFragmentsBuilder baseFragmentsBuilder, + BoundaryScanner boundaryScanner, + boolean topBoostOnly) { + super(boundaryScanner); + this.innerBaseFragmentsBuilder = baseFragmentsBuilder; + this.topBoostOnly = topBoostOnly; + } + + /** Delegates the inner FragmentsBuilder to determine the fragment order. */ + @Override + public List getWeightedFragInfoList(List src) { + return innerBaseFragmentsBuilder.getWeightedFragInfoList(src); + } + + /** + * Creates a fragment containing only the top boost phrase if the `topBoostOnly` flag is set. + * Otherwise, it delegates to the base implementation. + */ + @Override + protected String makeFragment( + StringBuilder buffer, + int[] index, + Field[] values, + WeightedFragInfo fragInfo, + String[] preTags, + String[] postTags, + Encoder encoder) { + if (!topBoostOnly) { + return super.makeFragment(buffer, index, values, fragInfo, preTags, postTags, encoder); + } + StringBuilder fragment = new StringBuilder(); + final int s = fragInfo.getStartOffset(); + int[] modifiedStartOffset = {s}; + String src = + getFragmentSourceMSO( + buffer, index, values, s, fragInfo.getEndOffset(), modifiedStartOffset); + int srcIndex = 0; + + // filter out the phrases with lower boost at the fragment creation time only + float topBoostValue = 0; + List topSubInfoList = new ArrayList<>(); + for (SubInfo subInfo : fragInfo.getSubInfos()) { + float boost = subInfo.boost(); + if (boost > topBoostValue) { + topBoostValue = boost; + topSubInfoList.clear(); + topSubInfoList.add(subInfo); + } else if (boost == topBoostValue) { + topSubInfoList.add(subInfo); + } + } + + for (SubInfo subInfo : topSubInfoList) { + if (subInfo.boost() < topBoostValue) { + continue; + } + for (Toffs to : subInfo.termsOffsets()) { + fragment + .append( + encoder.encodeText( + src.substring(srcIndex, to.getStartOffset() - modifiedStartOffset[0]))) + .append(getPreTag(preTags, subInfo.seqnum())) + .append( + encoder.encodeText( + src.substring( + to.getStartOffset() - modifiedStartOffset[0], + to.getEndOffset() - modifiedStartOffset[0]))) + .append(getPostTag(postTags, subInfo.seqnum())); + srcIndex = to.getEndOffset() - modifiedStartOffset[0]; + } + } + fragment.append(encoder.encodeText(src.substring(srcIndex))); + return fragment.toString(); + } +} diff --git a/src/test/java/com/yelp/nrtsearch/server/highlights/NRTFastVectorHighlighterTest.java b/src/test/java/com/yelp/nrtsearch/server/highlights/NRTFastVectorHighlighterTest.java index 2aee727ed..b1cb9933b 100644 --- a/src/test/java/com/yelp/nrtsearch/server/highlights/NRTFastVectorHighlighterTest.java +++ b/src/test/java/com/yelp/nrtsearch/server/highlights/NRTFastVectorHighlighterTest.java @@ -25,6 +25,8 @@ import com.yelp.nrtsearch.server.ServerTestCase; import com.yelp.nrtsearch.server.grpc.AddDocumentRequest; import com.yelp.nrtsearch.server.grpc.AddDocumentRequest.MultiValuedField; +import com.yelp.nrtsearch.server.grpc.BooleanClause; +import com.yelp.nrtsearch.server.grpc.BooleanQuery; import com.yelp.nrtsearch.server.grpc.FieldDefRequest; import com.yelp.nrtsearch.server.grpc.Highlight; import com.yelp.nrtsearch.server.grpc.Highlight.Settings; @@ -78,8 +80,9 @@ protected void initIndex(String name) throws Exception { .addAllValue( List.of( "The food is good there, but the service is terrible.", - "I personally don't like the staff at this place", - "Not all food are good.")) + "I personally don't like the staff at this place.", + "Not all food are good.", + "The margarita pizza and the marinara pizza in this pizzeria are yummy and inexpensive.")) .build()) .putFields( "boundary_scanner_field", @@ -190,6 +193,73 @@ public void testHighlightMultivalueField() { assertThat(response.getDiagnostics().getHighlightTimeMs()).isGreaterThan(0); } + @Test + public void testHighlightMultivalueFieldWithTopBoostOnly() { + Highlight highlight = + Highlight.newBuilder() + .addFields("comment_multivalue") + .setSettings( + Settings.newBuilder() + .setHighlightQuery( + Query.newBuilder() + .setBooleanQuery( + BooleanQuery.newBuilder() + .addClauses( + BooleanClause.newBuilder() + .setQuery( + Query.newBuilder() + .setPhraseQuery( + PhraseQuery.newBuilder() + .setField("comment_multivalue") + .addAllTerms( + List.of("margarita", "pizza"))) + .setBoost(3)) + .setOccurValue(BooleanClause.Occur.SHOULD_VALUE)) + .addClauses( + BooleanClause.newBuilder() + .setQuery( + Query.newBuilder() + .setPhraseQuery( + PhraseQuery.newBuilder() + .setField("comment_multivalue") + .addAllTerms( + List.of("marinara", "pizza"))) + .setBoost(3)) + .setOccurValue(BooleanClause.Occur.SHOULD_VALUE)) + .addClauses( + BooleanClause.newBuilder() + .setQuery( + Query.newBuilder() + .setTermQuery( + TermQuery.newBuilder() + .setField("comment_multivalue") + .setTextValue("delicious")) + .setBoost(4))) + .addClauses( + BooleanClause.newBuilder() + .setQuery( + Query.newBuilder() + .setTermQuery( + TermQuery.newBuilder() + .setField("comment_multivalue") + .setTextValue("yummy")) + .setBoost(2))))) + .setMaxNumberOfFragments(UInt32Value.of(1)) + .setFragmentSize(UInt32Value.of(250)) + .setTopBoostOnly(BoolValue.of(true)) + .setScoreOrdered(BoolValue.of(true)) + .setDiscreteMultivalue(BoolValue.of(true))) + .build(); + SearchResponse response = doHighlightQuery(highlight); + + assertFields(response); + + assertThat(response.getHits(0).getHighlightsMap().get("comment_multivalue").getFragmentsList()) + .containsExactly( + "The margarita pizza and the marinara pizza in this pizzeria are yummy and inexpensive."); + assertThat(response.getDiagnostics().getHighlightTimeMs()).isGreaterThan(0); + } + @Test public void testHighlightGlobalSettings() { Settings settings =