Skip to content

Commit

Permalink
Add highlighting for match_only_text type (#17101) (#17214)
Browse files Browse the repository at this point in the history
  • Loading branch information
msfroh authored Jan 31, 2025
1 parent 93c072b commit 8c9554d
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 3 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Fix GRPC AUX_TRANSPORT_PORT and SETTING_GRPC_PORT settings and remove lingering HTTP terminology ([#17037](https://github.com/opensearch-project/OpenSearch/pull/17037))
- [WLM] Fix the QueryGroupTask logging bug ([#17169](https://github.com/opensearch-project/OpenSearch/pull/17169))
- Use OpenSearch version to deserialize remote custom metadata([#16494](https://github.com/opensearch-project/OpenSearch/pull/16494))
- Add highlighting for wildcard search on `match_only_text` field ([#17101](https://github.com/opensearch-project/OpenSearch/pull/17101))
- Fix the failing CI's with `Failed to load eclipse jdt formatter` error ([#17172](https://github.com/opensearch-project/OpenSearch/pull/17172))
- Fix AutoDateHistogramAggregator rounding assertion failure ([#17023](https://github.com/opensearch-project/OpenSearch/pull/17023))

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# integration tests for queries with specific analysis chains

---
"match query with stacked stems":
- skip:
version: " - 2.11.99"
Expand Down Expand Up @@ -68,3 +68,80 @@
query: fox runs
operator: AND
- match: {hits.total: 2}

---
"wildcard highlighting on match_only_text":
- skip:
version: " - 2.99.99"
reason: "wildcard highlighting on match_only_text type was added in 2.19"
- do:
indices.create:
index: test
body:
settings:
number_of_shards: 1
number_of_replicas: 1
analysis:
analyzer:
index:
tokenizer: standard
filter: [lowercase]
search:
rest_total_hits_as_int: true
tokenizer: standard
filter: [lowercase, keyword_repeat, porter_stem, unique_stem]
filter:
unique_stem:
type: unique
only_on_same_position: true
mappings:
properties:
text:
type: match_only_text
analyzer: index
search_analyzer: search

- do:
index:
index: test
id: 1
body: { "text": "the fox runs across the street" }
refresh: true

- do:
search:
rest_total_hits_as_int: true
body:
query:
match:
text:
query: fox runs
operator: AND
highlight:
fields:
- text: {}
- match: {hits.total: 1}
- match: {hits.hits.0.highlight.text.0: "the <em>fox</em> <em>runs</em> across the street"}

- do:
index:
index: test
id: 2
body: { "text": "run fox run" }
refresh: true

- do:
search:
rest_total_hits_as_int: true
body:
query:
match:
text:
query: fox runs
operator: AND
highlight:
fields:
- text: {}
- match: {hits.total: 2}
- match: {hits.hits.0.highlight.text.0: "the <em>fox</em> <em>runs</em> across the street"}
- match: {hits.hits.1.highlight.text.0: "<em>run</em> <em>fox</em> <em>run</em>"}
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import org.opensearch.common.regex.Regex;
import org.opensearch.index.mapper.KeywordFieldMapper;
import org.opensearch.index.mapper.MappedFieldType;
import org.opensearch.index.mapper.MatchOnlyTextFieldMapper;
import org.opensearch.index.mapper.SourceFieldMapper;
import org.opensearch.index.mapper.TextFieldMapper;
import org.opensearch.search.fetch.FetchContext;
Expand Down Expand Up @@ -152,7 +153,8 @@ private Map<String, Function<HitContext, FieldHighlightContext>> contextBuilders
continue;
}

// We should prevent highlighting if a field is anything but a text or keyword field.
// We should prevent highlighting if a field is anything but a text, match_only_text
// or keyword field.
// However, someone might implement a custom field type that has text and still want to
// highlight on that. We cannot know in advance if the highlighter will be able to
// highlight such a field and so we do the following:
Expand All @@ -162,7 +164,8 @@ private Map<String, Function<HitContext, FieldHighlightContext>> contextBuilders
// what they were doing and try to highlight anyway.
if (fieldNameContainsWildcards) {
if (fieldType.typeName().equals(TextFieldMapper.CONTENT_TYPE) == false
&& fieldType.typeName().equals(KeywordFieldMapper.CONTENT_TYPE) == false) {
&& fieldType.typeName().equals(KeywordFieldMapper.CONTENT_TYPE) == false
&& fieldType.typeName().equals(MatchOnlyTextFieldMapper.CONTENT_TYPE) == false) {
continue;
}
if (highlighter.canHighlight(fieldType) == false) {
Expand Down

0 comments on commit 8c9554d

Please sign in to comment.