forked from opensearch-project/OpenSearch
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implementation for match_only_text field (opensearch-project#11039)
* Implementation for match_only_text field Signed-off-by: Rishabh Maurya <rishabhmaurya05@gmail.com> * Fix build failures Signed-off-by: Rishabh Maurya <rishabhmaurya05@gmail.com> * Fix bugs Signed-off-by: Rishabh Maurya <rishabhmaurya05@gmail.com> * Added mapper tests, stil failing on prefix and phrase tests Signed-off-by: Rishabh Maurya <rishabhmaurya05@gmail.com> * Disable index prefix and phrase mapper Signed-off-by: Rishabh Maurya <rishabhmaurya05@gmail.com> * Added unit tests for phrase and multiphrase query validation Signed-off-by: Rishabh Maurya <rishabhmaurya05@gmail.com> * Add unit tests for prefix and prefix phrase queries Signed-off-by: Rishabh Maurya <rishabhmaurya05@gmail.com> * Add a test to cover 3 word with synonym match phrase prefix query Signed-off-by: Rishabh Maurya <rishabhmaurya05@gmail.com> * Add unit test for SourceFieldMatchQuery Signed-off-by: Rishabh Maurya <rishabhmaurya05@gmail.com> * Added test for _source disabled case Signed-off-by: Rishabh Maurya <rishabhmaurya05@gmail.com> * Add unit test for missing field Signed-off-by: Rishabh Maurya <rishabhmaurya05@gmail.com> * more validation tests and changelog update Signed-off-by: Rishabh Maurya <rishabhmaurya05@gmail.com> * Added integration tests for match_only_text replicating text field integ tests Signed-off-by: Rishabh Maurya <rishabhmaurya05@gmail.com> * Added skip section in integ test to fix mixed cluster failures Signed-off-by: Rishabh Maurya <rishabhmaurya05@gmail.com> * remove unused import Signed-off-by: Rishabh Maurya <rishabhmaurya05@gmail.com> * Address PR comments Signed-off-by: Rishabh Maurya <rishabhmaurya05@gmail.com> * fix integ tests Signed-off-by: Rishabh Maurya <rishabhmaurya05@gmail.com> * Fix flaky test due to random indexwriter Signed-off-by: Rishabh Maurya <rishabhmaurya05@gmail.com> * pr comment: header modification Signed-off-by: Rishabh Maurya <rishabhmaurya05@gmail.com> * Address PR comments Signed-off-by: Rishabh Maurya <rishabhmaurya05@gmail.com> * addded change to the right section of CHANGELOG Signed-off-by: Rishabh Maurya <rishabhmaurya05@gmail.com> * overriding the textFieldType before every test Signed-off-by: Rishabh Maurya <rishabhmaurya05@gmail.com> * rename @before method Signed-off-by: Rishabh Maurya <rishabhmaurya05@gmail.com> * update changelog description Signed-off-by: Rishabh Maurya <rishabhmaurya05@gmail.com> --------- Signed-off-by: Rishabh Maurya <rishabhmaurya05@gmail.com>
- Loading branch information
1 parent
c52d4a3
commit 7b1c2c7
Showing
36 changed files
with
3,959 additions
and
169 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
70 changes: 70 additions & 0 deletions
70
...yamlRestTest/resources/rest-api-spec/test/search.query/11_match_field_match_only_text.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
# integration tests for queries with specific analysis chains | ||
|
||
"match query with stacked stems": | ||
- skip: | ||
version: " - 2.99.99" | ||
reason: "match_only_text was added in 3.0" | ||
# Tests the match query stemmed tokens are "stacked" on top of the unstemmed | ||
# versions in the same position. | ||
- do: | ||
indices.create: | ||
index: test | ||
body: | ||
settings: | ||
number_of_shards: 1 | ||
number_of_replicas: 1 | ||
analysis: | ||
analyzer: | ||
index: | ||
tokenizer: standard | ||
filter: [lowercase] | ||
search: | ||
rest_total_hits_as_int: true | ||
tokenizer: standard | ||
filter: [lowercase, keyword_repeat, porter_stem, unique_stem] | ||
filter: | ||
unique_stem: | ||
type: unique | ||
only_on_same_position: true | ||
mappings: | ||
properties: | ||
text: | ||
type: match_only_text | ||
analyzer: index | ||
search_analyzer: search | ||
|
||
- do: | ||
index: | ||
index: test | ||
id: 1 | ||
body: { "text": "the fox runs across the street" } | ||
refresh: true | ||
|
||
- do: | ||
search: | ||
rest_total_hits_as_int: true | ||
body: | ||
query: | ||
match: | ||
text: | ||
query: fox runs | ||
operator: AND | ||
- match: {hits.total: 1} | ||
|
||
- do: | ||
index: | ||
index: test | ||
id: 2 | ||
body: { "text": "run fox run" } | ||
refresh: true | ||
|
||
- do: | ||
search: | ||
rest_total_hits_as_int: true | ||
body: | ||
query: | ||
match: | ||
text: | ||
query: fox runs | ||
operator: AND | ||
- match: {hits.total: 2} |
144 changes: 144 additions & 0 deletions
144
...tTest/resources/rest-api-spec/test/search.query/20_ngram_search_field_match_only_text.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
"ngram search": | ||
- skip: | ||
version: " - 2.99.99" | ||
reason: "match_only_text was added in 3.0" | ||
- do: | ||
indices.create: | ||
index: test | ||
body: | ||
settings: | ||
number_of_shards: 1 | ||
number_of_replicas: 0 | ||
analysis: | ||
analyzer: | ||
my_analyzer: | ||
tokenizer: standard | ||
filter: [my_ngram] | ||
filter: | ||
my_ngram: | ||
type: ngram | ||
min: 2, | ||
max: 2 | ||
mappings: | ||
properties: | ||
text: | ||
type: match_only_text | ||
analyzer: my_analyzer | ||
|
||
- do: | ||
index: | ||
index: test | ||
id: 1 | ||
body: { "text": "foo bar baz" } | ||
refresh: true | ||
|
||
- do: | ||
search: | ||
rest_total_hits_as_int: true | ||
body: | ||
query: | ||
match: | ||
text: | ||
query: foa | ||
- match: {hits.total: 1} | ||
|
||
--- | ||
"testNGramCopyField": | ||
- skip: | ||
version: " - 2.99.99" | ||
reason: "match_only_text was added in 3.0" | ||
- do: | ||
indices.create: | ||
index: test | ||
body: | ||
settings: | ||
number_of_shards: 1 | ||
number_of_replicas: 0 | ||
max_ngram_diff: 9 | ||
analysis: | ||
analyzer: | ||
my_ngram_analyzer: | ||
tokenizer: my_ngram_tokenizer | ||
tokenizer: | ||
my_ngram_tokenizer: | ||
type: ngram | ||
min: 1, | ||
max: 10 | ||
token_chars: [] | ||
mappings: | ||
properties: | ||
origin: | ||
type: match_only_text | ||
copy_to: meta | ||
meta: | ||
type: match_only_text | ||
analyzer: my_ngram_analyzer | ||
|
||
- do: | ||
index: | ||
index: test | ||
id: 1 | ||
body: { "origin": "C.A1234.5678" } | ||
refresh: true | ||
|
||
- do: | ||
search: | ||
rest_total_hits_as_int: true | ||
body: | ||
query: | ||
match: | ||
meta: | ||
query: 1234 | ||
- match: {hits.total: 1} | ||
|
||
- do: | ||
search: | ||
rest_total_hits_as_int: true | ||
body: | ||
query: | ||
match: | ||
meta: | ||
query: 1234.56 | ||
- match: {hits.total: 1} | ||
|
||
- do: | ||
search: | ||
rest_total_hits_as_int: true | ||
body: | ||
query: | ||
match: | ||
meta: | ||
query: A1234 | ||
- match: {hits.total: 1} | ||
|
||
- do: | ||
search: | ||
rest_total_hits_as_int: true | ||
body: | ||
query: | ||
term: | ||
meta: | ||
value: a1234 | ||
- match: {hits.total: 0} | ||
|
||
- do: | ||
search: | ||
rest_total_hits_as_int: true | ||
body: | ||
query: | ||
match: | ||
meta: | ||
query: A1234 | ||
analyzer: my_ngram_analyzer | ||
- match: {hits.total: 1} | ||
|
||
- do: | ||
search: | ||
rest_total_hits_as_int: true | ||
body: | ||
query: | ||
match: | ||
meta: | ||
query: a1234 | ||
analyzer: my_ngram_analyzer | ||
- match: {hits.total: 1} |
137 changes: 137 additions & 0 deletions
137
...resources/rest-api-spec/test/search.query/30_ngram_highligthing_field_match_only_text.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
"ngram highlighting": | ||
- skip: | ||
version: " - 2.99.99" | ||
reason: "match_only_text was added in 3.0" | ||
- do: | ||
indices.create: | ||
index: test | ||
body: | ||
settings: | ||
number_of_shards: 1 | ||
number_of_replicas: 0 | ||
index.max_ngram_diff: 19 | ||
analysis: | ||
tokenizer: | ||
my_ngramt: | ||
type: ngram | ||
min_gram: 1 | ||
max_gram: 20 | ||
token_chars: letter,digit | ||
filter: | ||
my_ngram: | ||
type: ngram | ||
min_gram: 1 | ||
max_gram: 20 | ||
analyzer: | ||
name2_index_analyzer: | ||
tokenizer: whitespace | ||
filter: [my_ngram] | ||
name_index_analyzer: | ||
tokenizer: my_ngramt | ||
name_search_analyzer: | ||
tokenizer: whitespace | ||
mappings: | ||
properties: | ||
name: | ||
type: match_only_text | ||
term_vector: with_positions_offsets | ||
analyzer: name_index_analyzer | ||
search_analyzer: name_search_analyzer | ||
name2: | ||
type: match_only_text | ||
term_vector: with_positions_offsets | ||
analyzer: name2_index_analyzer | ||
search_analyzer: name_search_analyzer | ||
|
||
- do: | ||
index: | ||
index: test | ||
id: 1 | ||
refresh: true | ||
body: | ||
name: logicacmg ehemals avinci - the know how company | ||
name2: logicacmg ehemals avinci - the know how company | ||
|
||
- do: | ||
search: | ||
rest_total_hits_as_int: true | ||
body: | ||
query: | ||
match: | ||
name: | ||
query: logica m | ||
highlight: | ||
fields: | ||
- name: {} | ||
- match: {hits.total: 1} | ||
- match: {hits.hits.0.highlight.name.0: "<em>logica</em>c<em>m</em>g ehe<em>m</em>als avinci - the know how co<em>m</em>pany"} | ||
|
||
- do: | ||
search: | ||
rest_total_hits_as_int: true | ||
body: | ||
query: | ||
match: | ||
name: | ||
query: logica ma | ||
highlight: | ||
fields: | ||
- name: {} | ||
- match: {hits.total: 1} | ||
- match: {hits.hits.0.highlight.name.0: "<em>logica</em>cmg ehe<em>ma</em>ls avinci - the know how company"} | ||
|
||
- do: | ||
search: | ||
rest_total_hits_as_int: true | ||
body: | ||
query: | ||
match: | ||
name: | ||
query: logica | ||
highlight: | ||
fields: | ||
- name: {} | ||
- match: {hits.total: 1} | ||
- match: {hits.hits.0.highlight.name.0: "<em>logica</em>cmg ehemals avinci - the know how company"} | ||
|
||
- do: | ||
search: | ||
rest_total_hits_as_int: true | ||
body: | ||
query: | ||
match: | ||
name2: | ||
query: logica m | ||
highlight: | ||
fields: | ||
- name2: {} | ||
- match: {hits.total: 1} | ||
- match: {hits.hits.0.highlight.name2.0: "<em>logicacmg</em> <em>ehemals</em> avinci - the know how <em>company</em>"} | ||
|
||
- do: | ||
search: | ||
rest_total_hits_as_int: true | ||
body: | ||
query: | ||
match: | ||
name2: | ||
query: logica ma | ||
highlight: | ||
fields: | ||
- name2: {} | ||
- match: {hits.total: 1} | ||
- match: {hits.hits.0.highlight.name2.0: "<em>logicacmg</em> <em>ehemals</em> avinci - the know how company"} | ||
|
||
- do: | ||
search: | ||
rest_total_hits_as_int: true | ||
body: | ||
query: | ||
match: | ||
name2: | ||
query: logica | ||
highlight: | ||
fields: | ||
- name2: {} | ||
- match: {hits.total: 1} | ||
- match: {hits.hits.0.highlight.name2.0: "<em>logicacmg</em> ehemals avinci - the know how company"} |
Oops, something went wrong.