Skip to content

Commit

Permalink
support termQueryCaseInsensitive/termQuery can search from doc_value …
Browse files Browse the repository at this point in the history
…in flat_object/keyword field

Signed-off-by: kkewwei <kewei.11@bytedance.com>
Signed-off-by: kkewwei <kkewwei@163.com>
  • Loading branch information
kkewwei committed Jan 10, 2025
1 parent 486f392 commit 637141b
Show file tree
Hide file tree
Showing 7 changed files with 436 additions and 42 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Added a precaution to handle extreme date values during sorting to prevent `arithmetic_exception: long overflow` ([#16812](https://github.com/opensearch-project/OpenSearch/pull/16812)).
- Add search replica stats to segment replication stats API ([#16678](https://github.com/opensearch-project/OpenSearch/pull/16678))
- Introduce framework for auxiliary transports and an experimental gRPC transport plugin ([#16534](https://github.com/opensearch-project/OpenSearch/pull/16534))
- Support searching from doc_value using termQueryCaseInsensitive/termQuery in flat_object/keyword field([#16974](https://github.com/opensearch-project/OpenSearch/pull/16974/))

### Dependencies
- Bump `com.google.cloud:google-cloud-core-http` from 2.23.0 to 2.47.0 ([#16504](https://github.com/opensearch-project/OpenSearch/pull/16504))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ setup:
{"order":"order7","issue":{"labels":{"number":7,"name":"abc7","status":1}}}
{"index":{"_index":"flat_object_doc_values_test","_id":"8"}}
{"order":"order8","issue":{"labels":{"number":8,"name":"abc8","status":1}}}
{"index":{"_index":"flat_object_doc_values_test","_id":"9"}}
{"order":"order9","issue":{"labels":{"number":9,"name":"abC8","status":1}}}
---
# Delete Index when connection is teardown
Expand All @@ -67,7 +69,53 @@ teardown:
}
}

- length: { hits.hits: 9 }
- length: { hits.hits: 10 }

# Case Insensitive Term Query with exact dot path.
- do:
search:
body: {
_source: true,
query: {
bool: {
must: [
{
term: {
issue.labels.name: {
value: "abc8",
case_insensitive: "true"
}
}
}
]
}
}
}

- length: { hits.hits: 2 }

# Case Insensitive Term Query with no path.
- do:
search:
body: {
_source: true,
query: {
bool: {
must: [
{
term: {
issue.labels: {
value: "abc8",
case_insensitive: "true"
}
}
}
]
}
}
}

- length: { hits.hits: 2 }

# Term Query with exact dot path.
- do:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1126,8 +1126,8 @@ setup:
"search on fields with only doc_values enabled":
- skip:
features: [ "headers" ]
version: " - 2.18.99"
reason: "searching with only doc_values was finally added in 2.19.0"
version: " - 2.99.99"
reason: "searching with only doc_values was finally added in 3.0.0"
- do:
indices.create:
index: test-doc-values
Expand Down Expand Up @@ -1198,6 +1198,37 @@ setup:
- '{ "some_keyword": "400", "byte": 121, "double": 101.0, "float": "801.0", "half_float": "401.0", "integer": 1291, "long": 13457, "short": 151, "unsigned_long": 10223372036854775801, "ip_field": "192.168.0.2", "boolean": true, "date_nanos": "2020-10-29T12:12:12.123456789Z", "date": "2020-10-29T12:12:12.987Z" }'
- '{ "index": { "_index": "test-doc-values", "_id": "3" } }'
- '{ "some_keyword": "5", "byte": 122, "double": 102.0, "float": "802.0", "half_float": "402.0", "integer": 1292, "long": 13458, "short": 152, "unsigned_long": 10223372036854775802, "ip_field": "192.168.0.3", "boolean": false, "date_nanos": "2024-10-29T12:12:12.123456789Z", "date": "2024-10-29T12:12:12.987Z" }'
- '{ "index": { "_index": "test-doc-values", "_id": "4" } }'
- '{ "some_keyword": "Keyword1" }'
- '{ "index": { "_index": "test-doc-values", "_id": "5" } }'
- '{ "some_keyword": "keyword1" }'

- do:
search:
rest_total_hits_as_int: true
index: test-doc-values
body:
query:
term: {
"some_keyword": {
"value": "Keyword1"
} }

- match: { hits.total: 1 }

- do:
search:
rest_total_hits_as_int: true
index: test-doc-values
body:
query:
term: {
"some_keyword": {
"value": "keyword1",
"case_insensitive": "true"
} }

- match: { hits.total: 2 }

- do:
search:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.FieldExistsQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.Query;
Expand Down Expand Up @@ -336,23 +335,17 @@ private KeywordFieldType valueFieldType() {
return (mappedFieldTypeName == null) ? valueFieldType : valueAndPathFieldType;
}

@Override
public Query termQueryCaseInsensitive(Object value, QueryShardContext context) {
return valueFieldType().termQueryCaseInsensitive(rewriteValue(inputToString(value)), context);
}

/**
* redirect queries with rewrite value to rewriteSearchValue and directSubFieldName
*/
@Override
public Query termQuery(Object value, @Nullable QueryShardContext context) {

String searchValueString = inputToString(value);
String directSubFieldName = directSubfield();
String rewriteSearchValue = rewriteValue(searchValueString);

failIfNotIndexed();
Query query;
query = new TermQuery(new Term(directSubFieldName, indexedValueForSearch(rewriteSearchValue)));
if (boost() != 1f) {
query = new BoostQuery(query, boost());
}
return query;
return valueFieldType().termQuery(rewriteValue(inputToString(value)), context);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexOrDocValuesQuery;
import org.apache.lucene.search.MultiTermQuery;
Expand Down Expand Up @@ -391,6 +392,46 @@ protected Object rewriteForDocValue(Object value) {
return value;
}

@Override
public Query termQueryCaseInsensitive(Object value, QueryShardContext context) {
failIfNotIndexedAndNoDocValues();
if (isSearchable()) {
return super.termQueryCaseInsensitive(value, context);
} else {
BytesRef bytesRef = indexedValueForSearch(rewriteForDocValue(value));
Term term = new Term(name(), bytesRef);
Query query = AutomatonQueries.createAutomatonQuery(
term,
AutomatonQueries.toCaseInsensitiveString(bytesRef.utf8ToString(), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT),
MultiTermQuery.DOC_VALUES_REWRITE
);
if (boost() != 1f) {
query = new BoostQuery(query, boost());

Check warning on line 409 in server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java#L409

Added line #L409 was not covered by tests
}
return query;
}
}

@Override
public Query termQuery(Object value, QueryShardContext context) {
failIfNotIndexedAndNoDocValues();
if (isSearchable()) {
return super.termQuery(value, context);
} else {
Query query = SortedSetDocValuesField.newSlowRangeQuery(
name(),
indexedValueForSearch(rewriteForDocValue(value)),
indexedValueForSearch(rewriteForDocValue(value)),
true,
true
);
if (boost() != 1f) {
query = new BoostQuery(query, boost());

Check warning on line 429 in server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java#L429

Added line #L429 was not covered by tests
}
return query;
}
}

@Override
public Query termsQuery(List<?> values, QueryShardContext context) {
failIfNotIndexedAndNoDocValues();
Expand Down
Loading

0 comments on commit 637141b

Please sign in to comment.