Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update tests and test data for relevancy search functions #707

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
01af4b6
bump ml-client to 2.0 (#568) (#569)
opensearch-trigger-bot[bot] Apr 20, 2022
40f881e
2.0.0.0-rc1 release notes (#582) (#585)
opensearch-trigger-bot[bot] Apr 21, 2022
eb65d2d
Remove explicit node version for workbench (#588) (#589)
opensearch-trigger-bot[bot] Apr 25, 2022
6b37929
Remove rc1 qualifier for 2.0 (#600)
joshuali925 May 13, 2022
7e70f0b
Release Notes 2.0.0.0
vamsimanohar May 18, 2022
1317394
Merge branch 'main' of https://github.com/opensearch-project/sql into…
MitchellGale Jul 11, 2022
5432ba3
Added test cases for using pipe in flags argument inside Simple_Query…
MitchellGale Jul 13, 2022
69bf216
Added test cases for using pipe in flags argument inside Simple_Query…
MitchellGale Jul 13, 2022
475946f
Removed extra new line.
MitchellGale Jul 14, 2022
ad999c6
removed un-used imports in SimpleQueryStringQuery.java.
MitchellGale Jul 14, 2022
922471f
Removed extra spaces in import group.
MitchellGale Jul 14, 2022
478c27c
Merge pull request #89 from Bit-Quill/dev-enumlikevaluessupportforfla…
MitchellGale Jul 15, 2022
8b7e93c
Merge branch 'main' of https://github.com/opensearch-project/sql into…
MitchellGale Jul 15, 2022
4cacdb1
Some renamings to put tests under same order.
Yury-Fridlyand Jul 18, 2022
17d8f87
Rework on test: reduce data set, add new tests, etc.
Yury-Fridlyand Jul 18, 2022
00ae2c3
Update `match_phrase` and `match_phrase_prefix` tests.
Yury-Fridlyand Jul 18, 2022
6e1cd62
Merge remote-tracking branch 'origin/Integ-enumLikeValuesdSupprtForFa…
Yury-Fridlyand Jul 21, 2022
951a717
Update `simple_query_string` tests by adding complex cases for flags.
Yury-Fridlyand Jul 21, 2022
2dcb002
Merge pull request #90 from Bit-Quill/dev-update-test-data-relevancy-…
Yury-Fridlyand Jul 22, 2022
f638c40
Merge remote-tracking branch 'upstream/main' into integ-update-test-d…
Yury-Fridlyand Jul 22, 2022
109afe9
Merge remote-tracking branch 'upstream/main' into integ-update-test-d…
Yury-Fridlyand Jul 22, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions integ-test/src/test/java/org/opensearch/sql/ppl/MatchIT.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/


package org.opensearch.sql.ppl;

import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK;
import static org.opensearch.sql.util.MatcherUtils.rows;
import static org.opensearch.sql.util.MatcherUtils.verifyDataRows;

import java.io.IOException;
import org.json.JSONObject;
import org.junit.jupiter.api.Test;

public class MatchIT extends PPLIntegTestCase {

@Override
public void init() throws IOException {
loadIndex(Index.BANK);
}

@Test
public void test_match_function() throws IOException {
JSONObject result =
executeQuery(
String.format(
"source=%s | where match(firstname, 'Hattie') | fields firstname",
TEST_INDEX_BANK));
verifyDataRows(result, rows("Hattie"));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/


package org.opensearch.sql.ppl;

import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_PHRASE;
import static org.opensearch.sql.util.MatcherUtils.rows;
import static org.opensearch.sql.util.MatcherUtils.verifyDataRows;

import java.io.IOException;
import org.json.JSONObject;
import org.junit.Ignore;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;

public class MatchPhraseIT extends PPLIntegTestCase {

@Override
public void init() throws IOException {
loadIndex(Index.PHRASE);
}

@Test
public void test_match_phrase_function() throws IOException {
JSONObject result =
executeQuery(
String.format(
"source=%s | where match_phrase(phrase, 'quick fox') | fields phrase", TEST_INDEX_PHRASE));
verifyDataRows(result, rows("quick fox"), rows("quick fox here"));
}

@Test
@Ignore("Not supported actually in PPL")
public void test_matchphrase_legacy_function() throws IOException {
JSONObject result =
executeQuery(
String.format(
"source=%s | where matchphrase(phrase, 'quick fox') | fields phrase", TEST_INDEX_PHRASE));
verifyDataRows(result, rows("quick fox"), rows("quick fox here"));
}

@Test
public void test_match_phrase_with_slop() throws IOException {
JSONObject result =
executeQuery(
String.format(
"source=%s | where match_phrase(phrase, 'brown fox', slop = 2) | fields phrase", TEST_INDEX_PHRASE));
verifyDataRows(result, rows("brown fox"), rows("fox brown"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import org.json.JSONObject;
import org.junit.Test;

public class MatchPhrasePrefixWhereCommandIT extends PPLIntegTestCase {
public class MatchPhrasePrefixIT extends PPLIntegTestCase {

@Override
public void init() throws IOException {
Expand Down Expand Up @@ -93,7 +93,7 @@ public void zero_term_query_all() throws IOException {

@Test
public void slop_is_2() throws IOException {
// When slop is 0, the terms are matched exactly in the order specified.
// When slop is 2, the terms are matched exactly in the order specified.
// 'open' is used to match prefix of the next term.
String query = "source = %s" +
"| where match_phrase_prefix(Tags, 'gas ta', slop=2) " +
Expand All @@ -104,7 +104,7 @@ public void slop_is_2() throws IOException {

@Test
public void slop_is_3() throws IOException {
// When slop is 2, results will include phrases where the query terms are transposed.
// When slop is 3, results will include phrases where the query terms are transposed.
String query = "source = %s" +
"| where match_phrase_prefix(Tags, 'gas ta', slop=3)" +
"| fields Tags";
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/


package org.opensearch.sql.ppl;

import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BEER;

import java.io.IOException;

import org.json.JSONObject;
import org.junit.Test;

public class MultiMatchIT extends PPLIntegTestCase {

@Override
public void init() throws IOException {
loadIndex(Index.BEER);
}

@Test
public void test_multi_match() throws IOException {
String query = "SOURCE=" + TEST_INDEX_BEER
+ " | WHERE multi_match([\\\"Tags\\\" ^ 1.5, Title, `Body` 4.2], 'taste') | fields Id";
var result = executeQuery(query);
assertEquals(16, result.getInt("total"));
}

@Test
public void test_multi_match_all_params() throws IOException {
String query = "SOURCE=" + TEST_INDEX_BEER
+ " | WHERE multi_match(['Body', Tags], 'taste beer', operator='and', analyzer=english,"
+ "auto_generate_synonyms_phrase_query=true, boost = 0.77, cutoff_frequency=0.33,"
+ "fuzziness = 'AUTO:1,5', fuzzy_transpositions = false, lenient = true, max_expansions = 25,"
+ "minimum_should_match = '2<-25% 9<-3', prefix_length = 7, tie_breaker = 0.3,"
+ "type = most_fields, slop = 2, zero_terms_query = 'ALL') | fields Id";
var result = executeQuery(query);
assertEquals(10, result.getInt("total"));
}

@Test
public void test_wildcard_multi_match() throws IOException {
String query1 = "SOURCE=" + TEST_INDEX_BEER
+ " | WHERE multi_match(['Tags'], 'taste') | fields Id";
var result1 = executeQuery(query1);
String query2 = "SOURCE=" + TEST_INDEX_BEER
+ " | WHERE multi_match(['T*'], 'taste') | fields Id";
var result2 = executeQuery(query2);
assertNotEquals(result2.getInt("total"), result1.getInt("total"));

String query3 = "source=" + TEST_INDEX_BEER
+ " | where simple_query_string(['*Date'], '2014-01-22')";
JSONObject result3 = executeQuery(query3);
assertEquals(10, result3.getInt("total"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@ public void init() throws IOException {
public void all_fields_test() throws IOException {
String query = "source=" + TEST_INDEX_BEER + " | where query_string([`*`], 'taste')";
JSONObject result = executeQuery(query);
assertEquals(713, result.getInt("total"));
assertEquals(16, result.getInt("total"));
}

@Test
public void mandatory_params_test() throws IOException {
String query = "source=" + TEST_INDEX_BEER + " | where query_string([\\\"Tags\\\" ^ 1.5, Title, `Body` 4.2], 'taste')";
JSONObject result = executeQuery(query);
assertEquals(713, result.getInt("total"));
assertEquals(16, result.getInt("total"));
}

@Test
Expand All @@ -44,7 +44,7 @@ public void all_params_test() throws IOException {
+ "fuzzy_transpositions = false, lenient = true, fuzzy_max_expansions = 25,"
+ "minimum_should_match = '2<-25% 9<-3', fuzzy_prefix_length = 7)";
JSONObject result = executeQuery(query);
assertEquals(1990, result.getInt("total"));
assertEquals(49, result.getInt("total"));
}

@Test
Expand All @@ -57,8 +57,8 @@ public void wildcard_test() throws IOException {
JSONObject result2 = executeQuery(query2);
assertNotEquals(result1.getInt("total"), result2.getInt("total"));

String query3 = "source=" + TEST_INDEX_BEER + " | where query_string(['*Date'], '2015-01-29')";
String query3 = "source=" + TEST_INDEX_BEER + " | where query_string(['*Date'], '2014-01-22')";
JSONObject result3 = executeQuery(query3);
assertEquals(5, result3.getInt("total"));
assertEquals(10, result3.getInt("total"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
import static org.opensearch.sql.util.MatcherUtils.verifySome;

import java.io.IOException;

import org.json.JSONObject;
import org.junit.Test;

public class RelevanceFunctionIT extends PPLIntegTestCase {
Expand All @@ -22,64 +24,102 @@ public void init() throws IOException {
}

@Test
public void test_multi_match() throws IOException {
String query = "SOURCE=" + TEST_INDEX_BEER
+ " | WHERE multi_match([\\\"Tags\\\" ^ 1.5, Title, `Body` 4.2], 'taste') | fields Id";
var result = executeQuery(query);
assertEquals(713, result.getInt("total"));
public void test_wildcard_simple_query_string() throws IOException {
String query1 = "SOURCE=" + TEST_INDEX_BEER
+ " | WHERE simple_query_string(['Tags'], 'taste') | fields Id";
var result1 = executeQuery(query1);
String query2 = "SOURCE=" + TEST_INDEX_BEER
+ " | WHERE simple_query_string(['T*'], 'taste') | fields Id";
var result2 = executeQuery(query2);
assertNotEquals(result2.getInt("total"), result1.getInt("total"));
}

/*
Dash/minus ('-') character is interpreted as NOT flag if it is activated by NOT or ALL `flags` value
`query1` searches for entries with '-free' in `Body`, `query2` - for entries without 'free' in `Body`
*/
@Test
public void test_simple_query_string() throws IOException {
String query = "SOURCE=" + TEST_INDEX_BEER
+ " | WHERE simple_query_string([\\\"Tags\\\" ^ 1.5, Title, `Body` 4.2], 'taste') | fields Id";
public void verify_flags_in_simple_query_string() throws IOException {
String query1 = "SOURCE="
+ TEST_INDEX_BEER + " | WHERE simple_query_string(['Body'], '-free', flags='NONE|PREFIX|ESCAPE')";
var result1 = executeQuery(query1);
String query2 = "SOURCE="
+ TEST_INDEX_BEER + " | WHERE simple_query_string([Body], '-free', flags='NOT|AND|OR')";
var result2 = executeQuery(query2);
assertNotEquals(result2.getInt("total"), result1.getInt("total"));

String query = "SOURCE=" + TEST_INDEX_BEER;
var result = executeQuery(query);
assertEquals(713, result.getInt("total"));
assertEquals(result2.getInt("total") + result1.getInt("total"), result.getInt("total"));
}

/*
`escape` parameter switches regex-specific character escaping.
`query1` searches for entries with "\\?" in `Title`, `query2` - for "?"
Ref: QueryParserBase::escape in lucene code.
*/
@Test
public void test_multi_match_all_params() throws IOException {
String query = "SOURCE=" + TEST_INDEX_BEER
+ " | WHERE multi_match(['Body', Tags], 'taste beer', operator='and', analyzer=english,"
+ "auto_generate_synonyms_phrase_query=true, boost = 0.77, cutoff_frequency=0.33,"
+ "fuzziness = 'AUTO:1,5', fuzzy_transpositions = false, lenient = true, max_expansions = 25,"
+ "minimum_should_match = '2<-25% 9<-3', prefix_length = 7, tie_breaker = 0.3,"
+ "type = most_fields, slop = 2, zero_terms_query = 'ALL') | fields Id";
var result = executeQuery(query);
assertEquals(424, result.getInt("total"));
public void verify_escape_in_query_string() throws IOException {
String query1 = "SOURCE="
+ TEST_INDEX_BEER + " | WHERE query_string([Title], '?', escape=true);";
var result1 = executeQuery(query1);
String query2 = "SOURCE="
+ TEST_INDEX_BEER + " | WHERE query_string([Title], '?', escape=false);";
var result2 = executeQuery(query2);
assertEquals(0, result1.getInt("total"));
assertEquals(8, result2.getInt("total"));
}

/*
`default_operator`/`operator` in relevance search functions defines whether to search for all or for any words given.
`query1` returns matches with 'beer' and matches with 'taste',
`query2` returns matches with 'beer' and with 'taste' together.
*/
@Test
public void test_simple_query_string_all_params() throws IOException {
String query = "SOURCE=" + TEST_INDEX_BEER
+ " | WHERE simple_query_string(['Body', Tags, Title], 'taste beer', default_operator='or',"
+ "analyzer=english, analyze_wildcard = false, quote_field_suffix = '.exact',"
+ "auto_generate_synonyms_phrase_query=true, boost = 0.77, flags='PREFIX',"
+ "fuzzy_transpositions = false, lenient = true, fuzzy_max_expansions = 25,"
+ "minimum_should_match = '2<-25% 9<-3', fuzzy_prefix_length = 7) | fields Id";
var result = executeQuery(query);
assertEquals(1990, result.getInt("total"));
public void verify_default_operator_in_query_string() throws IOException {
String query1 = "SOURCE="
+ TEST_INDEX_BEER + " | WHERE query_string([Title], 'beer taste', default_operator='OR')";
var result1 = executeQuery(query1);
String query2 = "SOURCE="
+ TEST_INDEX_BEER + " | WHERE query_string([Title], 'beer taste', default_operator='AND')";
var result2 = executeQuery(query2);
assertEquals(16, result1.getInt("total"));
assertEquals(4, result2.getInt("total"));
}

@Test
public void test_wildcard_multi_match() throws IOException {
String query1 = "SOURCE=" + TEST_INDEX_BEER
+ " | WHERE multi_match(['Tags'], 'taste') | fields Id";
public void verify_default_operator_in_simple_query_string() throws IOException {
String query1 = "SOURCE="
+ TEST_INDEX_BEER + " | WHERE simple_query_string([Title], 'beer taste', default_operator='OR')";
var result1 = executeQuery(query1);
String query2 = "SOURCE=" + TEST_INDEX_BEER
+ " | WHERE multi_match(['T*'], 'taste') | fields Id";
String query2 = "SOURCE="
+ TEST_INDEX_BEER + " | WHERE simple_query_string([Title], 'beer taste', default_operator='AND')";
var result2 = executeQuery(query2);
assertNotEquals(result2.getInt("total"), result1.getInt("total"));
assertEquals(16, result1.getInt("total"));
assertEquals(4, result2.getInt("total"));
}

@Test
public void test_wildcard_simple_query_string() throws IOException {
String query1 = "SOURCE=" + TEST_INDEX_BEER
+ " | WHERE simple_query_string(['Tags'], 'taste') | fields Id";
public void verify_default_operator_in_multi_match() throws IOException {
String query1 = "SOURCE="
+ TEST_INDEX_BEER + " | WHERE multi_match([Title], 'beer taste', operator='OR')";
var result1 = executeQuery(query1);
String query2 = "SOURCE=" + TEST_INDEX_BEER
+ " | WHERE simple_query_string(['T*'], 'taste') | fields Id";
String query2 = "SOURCE="
+ TEST_INDEX_BEER + " | WHERE multi_match([Title], 'beer taste', operator='AND')";
var result2 = executeQuery(query2);
assertNotEquals(result2.getInt("total"), result1.getInt("total"));
assertEquals(16, result1.getInt("total"));
assertEquals(4, result2.getInt("total"));
}

@Test
public void verify_operator_in_match() throws IOException {
String query1 = "SOURCE="
+ TEST_INDEX_BEER + " | WHERE match(Title, 'beer taste', operator='OR')";
var result1 = executeQuery(query1);
String query2 = "SOURCE="
+ TEST_INDEX_BEER + " | WHERE match(Title, 'beer taste', operator='AND')";
var result2 = executeQuery(query2);
assertEquals(16, result1.getInt("total"));
assertEquals(4, result2.getInt("total"));
}
}
Loading