Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for wildcard_query function to the new engine (#156) #1108

Merged
merged 4 commits into from
Dec 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions core/src/main/java/org/opensearch/sql/expression/DSL.java
Original file line number Diff line number Diff line change
Expand Up @@ -715,6 +715,10 @@ public static FunctionExpression match_bool_prefix(Expression... args) {
return compile(FunctionProperties.None, BuiltinFunctionName.MATCH_BOOL_PREFIX, args);
}

public static FunctionExpression wildcard_query(Expression... args) {
return compile(FunctionProperties.None,BuiltinFunctionName.WILDCARD_QUERY, args);
}

public static FunctionExpression now(FunctionProperties functionProperties,
Expression... args) {
return compile(functionProperties, BuiltinFunctionName.NOW, args);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,9 @@ public enum BuiltinFunctionName {
MATCHQUERY(FunctionName.of("matchquery")),
MULTI_MATCH(FunctionName.of("multi_match")),
MULTIMATCH(FunctionName.of("multimatch")),
MULTIMATCHQUERY(FunctionName.of("multimatchquery"));
MULTIMATCHQUERY(FunctionName.of("multimatchquery")),
WILDCARDQUERY(FunctionName.of("wildcardquery")),
WILDCARD_QUERY(FunctionName.of("wildcard_query"));

private final FunctionName name;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ public void register(BuiltinFunctionRepository repository) {
repository.register(match_phrase(BuiltinFunctionName.MATCHPHRASE));
repository.register(match_phrase(BuiltinFunctionName.MATCHPHRASEQUERY));
repository.register(match_phrase_prefix());
repository.register(wildcard_query(BuiltinFunctionName.WILDCARD_QUERY));
repository.register(wildcard_query(BuiltinFunctionName.WILDCARDQUERY));
}

private static FunctionResolver match_bool_prefix() {
Expand Down Expand Up @@ -83,6 +85,11 @@ private static FunctionResolver query_string() {
return new RelevanceFunctionResolver(funcName, STRUCT);
}

private static FunctionResolver wildcard_query(BuiltinFunctionName wildcardQuery) {
FunctionName funcName = wildcardQuery.getName();
return new RelevanceFunctionResolver(funcName, STRING);
}

public static class OpenSearchFunction extends FunctionExpression {
private final FunctionName functionName;
private final List<Expression> arguments;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,34 @@ void query_string_expression_two_fields() {
AstDSL.unresolvedArg("query", stringLiteral("query_value"))));
}

@Test
void wildcard_query_expression() {
assertAnalyzeEqual(
DSL.wildcard_query(
DSL.namedArgument("field", DSL.literal("test")),
DSL.namedArgument("query", DSL.literal("query_value*"))),
AstDSL.function("wildcard_query",
unresolvedArg("field", stringLiteral("test")),
unresolvedArg("query", stringLiteral("query_value*"))));
}

@Test
void wildcard_query_expression_all_params() {
assertAnalyzeEqual(
DSL.wildcard_query(
DSL.namedArgument("field", DSL.literal("test")),
DSL.namedArgument("query", DSL.literal("query_value*")),
DSL.namedArgument("boost", DSL.literal("1.5")),
DSL.namedArgument("case_insensitive", DSL.literal("true")),
DSL.namedArgument("rewrite", DSL.literal("scoring_boolean"))),
AstDSL.function("wildcard_query",
unresolvedArg("field", stringLiteral("test")),
unresolvedArg("query", stringLiteral("query_value*")),
unresolvedArg("boost", stringLiteral("1.5")),
unresolvedArg("case_insensitive", stringLiteral("true")),
unresolvedArg("rewrite", stringLiteral("scoring_boolean"))));
}

@Test
public void match_phrase_prefix_all_params() {
assertAnalyzeEqual(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -197,4 +197,12 @@ void query_string() {
fields.getValue(), query.getValue()),
expr.toString());
}

@Test
void wildcard_query() {
FunctionExpression expr = DSL.wildcard_query(field, query);
assertEquals(String.format("wildcard_query(field=%s, query=%s)",
field.getValue(), query.getValue()),
expr.toString());
}
}
55 changes: 55 additions & 0 deletions docs/user/dql/functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3299,6 +3299,59 @@ Example searching for field Tags::
| [Winnie-the-<em>Pooh</em>] |
+----------------------------------------------+

WILDCARD_QUERY
------------

Description
>>>>>>>>>>>

``wildcard_query(field_expression, query_expression[, option=<option_value>]*)``

The ``wildcard_query`` function maps to the ``wildcard_query`` query used in search engine. It returns documents that match provided text in the specified field.
OpenSearch supports wildcard characters ``*`` and ``?``. See the full description here: https://opensearch.org/docs/latest/opensearch/query-dsl/term/#wildcards.
You may include a backslash ``\`` to escape SQL wildcard characters ``\%`` and ``\_``.

Available parameters include:

- boost
- case_insensitive
- rewrite

For backward compatibility, ``wildcardquery`` is also supported and mapped to ``wildcard_query`` query as well.

Example with only ``field`` and ``query`` expressions, and all other parameters are set default values::

os> select Body from wildcard where wildcard_query(Body, 'test wildcard*');
fetched rows / total rows = 7/7
+-------------------------------------------+
| Body |
|-------------------------------------------|
| test wildcard |
| test wildcard in the end of the text% |
| test wildcard in % the middle of the text |
| test wildcard %% beside each other |
| test wildcard in the end of the text_ |
| test wildcard in _ the middle of the text |
| test wildcard __ beside each other |
+-------------------------------------------+

Another example to show how to set custom values for the optional parameters::

os> select Body from wildcard where wildcard_query(Body, 'test wildcard*', boost=0.7, case_insensitive=true, rewrite='constant_score');
fetched rows / total rows = 8/8
+-------------------------------------------+
| Body |
|-------------------------------------------|
| test wildcard |
| test wildcard in the end of the text% |
| test wildcard in % the middle of the text |
| test wildcard %% beside each other |
| test wildcard in the end of the text_ |
| test wildcard in _ the middle of the text |
| test wildcard __ beside each other |
| tEsT wIlDcArD sensitive cases |
+-------------------------------------------+

System Functions
================

Expand All @@ -3323,3 +3376,5 @@ Example::
|----------------+---------------+-----------------+------------------|
| DATE | INTEGER | DATETIME | STRUCT |
+----------------+---------------+-----------------+------------------+


22 changes: 22 additions & 0 deletions doctest/test_data/wildcard.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{"index":{"_id":"0"}}
{"Body":"test wildcard"}
{"index":{"_id":"1"}}
{"Body":"test wildcard in the end of the text%"}
{"index":{"_id":"2"}}
{"Body":"%test wildcard in the beginning of the text"}
{"index":{"_id":"3"}}
{"Body":"test wildcard in % the middle of the text"}
{"index":{"_id":"4"}}
{"Body":"test wildcard %% beside each other"}
{"index":{"_id":"5"}}
{"Body":"test wildcard in the end of the text_"}
{"index":{"_id":"6"}}
{"Body":"_test wildcard in the beginning of the text"}
{"index":{"_id":"7"}}
{"Body":"test wildcard in _ the middle of the text"}
{"index":{"_id":"8"}}
{"Body":"test wildcard __ beside each other"}
{"index":{"_id":"9"}}
{"Body":"test backslash wildcard \\_"}
{"index":{"_id":"10"}}
{"Body":"tEsT wIlDcArD sensitive cases"}
4 changes: 3 additions & 1 deletion doctest/test_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
NYC_TAXI = "nyc_taxi"
BOOKS = "books"
APACHE = "apache"
WILDCARD = "wildcard"


class DocTestConnection(OpenSearchConnection):
Expand Down Expand Up @@ -92,6 +93,7 @@ def set_up_test_indices(test):
load_file("nyc_taxi.json", index_name=NYC_TAXI)
load_file("books.json", index_name=BOOKS)
load_file("apache.json", index_name=APACHE)
load_file("wildcard.json", index_name=WILDCARD)


def load_file(filename, index_name):
Expand Down Expand Up @@ -120,7 +122,7 @@ def set_up(test):

def tear_down(test):
# drop leftover tables after each test
test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI, BOOKS, APACHE], ignore_unavailable=True)
test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI, BOOKS, APACHE, WILDCARD], ignore_unavailable=True)


docsuite = partial(doctest.DocFileSuite,
Expand Down
9 changes: 9 additions & 0 deletions doctest/test_mapping/wildcard.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"mappings" : {
"properties" : {
"Body" : {
"type" : "keyword"
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,11 @@ public enum Index {
CALCS(TestsConstants.TEST_INDEX_CALCS,
"calcs",
getMappingFile("calcs_index_mappings.json"),
"src/test/resources/calcs.json"),;
"src/test/resources/calcs.json"),
WILDCARD(TestsConstants.TEST_INDEX_WILDCARD,
"wildcard",
getMappingFile("wildcard_index_mappings.json"),
"src/test/resources/wildcard.json"),;

private final String name;
private final String type;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ public class TestsConstants {
public final static String TEST_INDEX_BEER = TEST_INDEX + "_beer";
public final static String TEST_INDEX_NULL_MISSING = TEST_INDEX + "_null_missing";
public final static String TEST_INDEX_CALCS = TEST_INDEX + "_calcs";
public final static String TEST_INDEX_WILDCARD = TEST_INDEX + "_wildcard";

public final static String DATE_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'";
public final static String TS_DATE_FORMAT = "yyyy-MM-dd HH:mm:ss.SSS";
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/


package org.opensearch.sql.ppl;

import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_WILDCARD;
import static org.opensearch.sql.util.MatcherUtils.rows;
import static org.opensearch.sql.util.MatcherUtils.verifyDataRows;

import java.io.IOException;
import org.json.JSONObject;
import org.junit.Test;

public class LikeQueryIT extends PPLIntegTestCase {

@Override
public void init() throws IOException {
loadIndex(Index.WILDCARD);
}

@Test
public void test_like_with_percent() throws IOException {
String query = "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(KeywordBody, 'test wildcard%') | fields KeywordBody";
JSONObject result = executeQuery(query);
verifyDataRows(result,
rows("test wildcard"),
rows("test wildcard in the end of the text%"),
rows("test wildcard in % the middle of the text"),
rows("test wildcard %% beside each other"),
rows("test wildcard in the end of the text_"),
rows("test wildcard in _ the middle of the text"),
rows("test wildcard __ beside each other"));
}

@Test
public void test_like_with_escaped_percent() throws IOException {
String query = "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(KeywordBody, '\\\\%test wildcard%') | fields KeywordBody";
JSONObject result = executeQuery(query);
verifyDataRows(result,
rows("%test wildcard in the beginning of the text"));
}

@Test
public void test_like_in_where_with_escaped_underscore() throws IOException {
String query = "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(KeywordBody, '\\\\_test wildcard%') | fields KeywordBody";
JSONObject result = executeQuery(query);
verifyDataRows(result,
rows("_test wildcard in the beginning of the text"));
}

@Test
public void test_like_on_text_field_with_one_word() throws IOException {
String query = "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextBody, 'test*') | fields TextBody";
JSONObject result = executeQuery(query);
assertEquals(9, result.getInt("total"));
}

@Test
public void test_like_on_text_keyword_field_with_one_word() throws IOException {
String query = "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextKeywordBody, 'test*') | fields TextKeywordBody";
JSONObject result = executeQuery(query);
assertEquals(8, result.getInt("total"));
}

@Test
public void test_like_on_text_keyword_field_with_greater_than_one_word() throws IOException {
String query = "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextKeywordBody, 'test wild*') | fields TextKeywordBody";
JSONObject result = executeQuery(query);
assertEquals(7, result.getInt("total"));
}

@Test
public void test_like_on_text_field_with_greater_than_one_word() throws IOException {
String query = "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextBody, 'test wild*') | fields TextBody";
JSONObject result = executeQuery(query);
assertEquals(0, result.getInt("total"));
}

@Test
public void test_convert_field_text_to_keyword() throws IOException {
String query = "source=" + TEST_INDEX_WILDCARD + " | WHERE Like(TextKeywordBody, '*') | fields TextKeywordBody";
String result = explainQueryToString(query);
assertTrue(result.contains("TextKeywordBody.keyword"));
}
}
Loading