From 231421869895f54b33cdfaabaf1b38d505b30202 Mon Sep 17 00:00:00 2001 From: Niyati Aggarwal Date: Fri, 5 Apr 2024 09:09:39 -0700 Subject: [PATCH 1/4] Refactoring globMatch using simpleMatchWithNormalizedStrings from Regex Signed-off-by: Niyati Aggarwal --- .../main/java/org/opensearch/common/Glob.java | 56 +++++++++---------- .../org/opensearch/common/regex/Regex.java | 35 +----------- 2 files changed, 29 insertions(+), 62 deletions(-) diff --git a/libs/common/src/main/java/org/opensearch/common/Glob.java b/libs/common/src/main/java/org/opensearch/common/Glob.java index daf045dd49e3a..509f9391a9cdd 100644 --- a/libs/common/src/main/java/org/opensearch/common/Glob.java +++ b/libs/common/src/main/java/org/opensearch/common/Glob.java @@ -49,37 +49,35 @@ public class Glob { * @return whether the String matches the given pattern */ public static boolean globMatch(String pattern, String str) { - if (pattern == null || str == null) { - return false; - } - int firstIndex = pattern.indexOf('*'); - if (firstIndex == -1) { - return pattern.equals(str); - } - if (firstIndex == 0) { - if (pattern.length() == 1) { - return true; - } - int nextIndex = pattern.indexOf('*', firstIndex + 1); - if (nextIndex == -1) { - return str.endsWith(pattern.substring(1)); - } else if (nextIndex == 1) { - // Double wildcard "**" - skipping the first "*" - return globMatch(pattern.substring(1), str); - } - String part = pattern.substring(1, nextIndex); - int partIndex = str.indexOf(part); - while (partIndex != -1) { - if (globMatch(pattern.substring(nextIndex), str.substring(partIndex + part.length()))) { - return true; - } - partIndex = str.indexOf(part, partIndex + 1); + int sIdx = 0, pIdx = 0, match = 0, wildcardIdx = -1; + while (sIdx < str.length()) { + // both chars matching, incrementing both pointers + if (pIdx < pattern.length() && str.charAt(sIdx) == pattern.charAt(pIdx)) { + sIdx++; + pIdx++; + } else if (pIdx < pattern.length() && pattern.charAt(pIdx) == '*') { + // wildcard found, only incrementing pattern pointer + wildcardIdx = pIdx; + match = sIdx; + pIdx++; + } else if (wildcardIdx != -1) { + // last pattern pointer was a wildcard, incrementing string pointer + pIdx = wildcardIdx + 1; + match++; + sIdx = match; + } else { + // current pattern pointer is not a wildcard, last pattern pointer was also not a wildcard + // characters do not match + return false; } - return false; } - return (str.length() >= firstIndex - && pattern.substring(0, firstIndex).equals(str.substring(0, firstIndex)) - && globMatch(pattern.substring(firstIndex), str.substring(firstIndex))); + + // check for remaining characters in pattern + while (pIdx < pattern.length() && pattern.charAt(pIdx) == '*') { + pIdx++; + } + + return pIdx == pattern.length(); } } diff --git a/server/src/main/java/org/opensearch/common/regex/Regex.java b/server/src/main/java/org/opensearch/common/regex/Regex.java index 323b460af62df..6d8b5c3585c4c 100644 --- a/server/src/main/java/org/opensearch/common/regex/Regex.java +++ b/server/src/main/java/org/opensearch/common/regex/Regex.java @@ -35,6 +35,7 @@ import org.apache.lucene.util.automaton.Automata; import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.Operations; +import org.opensearch.common.Glob; import org.opensearch.core.common.Strings; import java.util.ArrayList; @@ -125,39 +126,7 @@ public static boolean simpleMatch(String pattern, String str, boolean caseInsens pattern = Strings.toLowercaseAscii(pattern); str = Strings.toLowercaseAscii(str); } - return simpleMatchWithNormalizedStrings(pattern, str); - } - - private static boolean simpleMatchWithNormalizedStrings(String pattern, String str) { - int sIdx = 0, pIdx = 0, match = 0, wildcardIdx = -1; - while (sIdx < str.length()) { - // both chars matching, incrementing both pointers - if (pIdx < pattern.length() && str.charAt(sIdx) == pattern.charAt(pIdx)) { - sIdx++; - pIdx++; - } else if (pIdx < pattern.length() && pattern.charAt(pIdx) == '*') { - // wildcard found, only incrementing pattern pointer - wildcardIdx = pIdx; - match = sIdx; - pIdx++; - } else if (wildcardIdx != -1) { - // last pattern pointer was a wildcard, incrementing string pointer - pIdx = wildcardIdx + 1; - match++; - sIdx = match; - } else { - // current pattern pointer is not a wildcard, last pattern pointer was also not a wildcard - // characters do not match - return false; - } - } - - // check for remaining characters in pattern - while (pIdx < pattern.length() && pattern.charAt(pIdx) == '*') { - pIdx++; - } - - return pIdx == pattern.length(); + return Glob.globMatch(pattern, str); } /** From fb3f588af9ea10a981c5e56913d1670656d3efc3 Mon Sep 17 00:00:00 2001 From: Niyati Aggarwal Date: Fri, 5 Apr 2024 09:31:35 -0700 Subject: [PATCH 2/4] Adding entry to CHANGELOG.md Signed-off-by: Niyati Aggarwal --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 06ad1f38c4872..a05a59c5d8604 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -110,6 +110,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [Concurrent Segment Search] Perform buildAggregation concurrently and support Composite Aggregations ([#12697](https://github.com/opensearch-project/OpenSearch/pull/12697)) - [Concurrent Segment Search] Disable concurrent segment search for system indices and throttled requests ([#12954](https://github.com/opensearch-project/OpenSearch/pull/12954)) - Derived fields support to derive field values at query time without indexing ([#12569](https://github.com/opensearch-project/OpenSearch/pull/12569)) +- Refactoring globMatch using simpleMatchWithNormalizedStrings from Regex ([#13104](https://github.com/opensearch-project/OpenSearch/pull/13104)) - Detect breaking changes on pull requests ([#9044](https://github.com/opensearch-project/OpenSearch/pull/9044)) - Add cluster primary balance contraint for rebalancing with buffer ([#12656](https://github.com/opensearch-project/OpenSearch/pull/12656)) - [Remote Store] Make translog transfer timeout configurable ([#12704](https://github.com/opensearch-project/OpenSearch/pull/12704)) From ab6c653bc823d87425fb5b138c24ba9c1db0048d Mon Sep 17 00:00:00 2001 From: Niyati Aggarwal Date: Fri, 5 Apr 2024 13:35:59 -0700 Subject: [PATCH 3/4] Adding tests for GlobMatch Signed-off-by: Niyati Aggarwal --- .../main/java/org/opensearch/common/Glob.java | 3 + .../java/org/opensearch/common/GlobTests.java | 67 +++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 server/src/test/java/org/opensearch/common/GlobTests.java diff --git a/libs/common/src/main/java/org/opensearch/common/Glob.java b/libs/common/src/main/java/org/opensearch/common/Glob.java index 509f9391a9cdd..b390a3ca84182 100644 --- a/libs/common/src/main/java/org/opensearch/common/Glob.java +++ b/libs/common/src/main/java/org/opensearch/common/Glob.java @@ -49,6 +49,9 @@ public class Glob { * @return whether the String matches the given pattern */ public static boolean globMatch(String pattern, String str) { + if (pattern == null || str == null) { + return false; + } int sIdx = 0, pIdx = 0, match = 0, wildcardIdx = -1; while (sIdx < str.length()) { // both chars matching, incrementing both pointers diff --git a/server/src/test/java/org/opensearch/common/GlobTests.java b/server/src/test/java/org/opensearch/common/GlobTests.java new file mode 100644 index 0000000000000..2bbe157be43cc --- /dev/null +++ b/server/src/test/java/org/opensearch/common/GlobTests.java @@ -0,0 +1,67 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.common; + +import org.opensearch.test.OpenSearchTestCase; + +public class GlobTests extends OpenSearchTestCase { + + public void testGlobMatchForNull() { + assertFalse(Glob.globMatch(null, "test")); + assertFalse(Glob.globMatch("test", null)); + assertFalse(Glob.globMatch(null, null)); + } + + public void testGlobMatchNoWildcard() { + assertTrue(Glob.globMatch("abcd", "abcd")); + assertFalse(Glob.globMatch("abcd", "foobar")); + } + + public void testGlobMatchSingleWildcard() { + assertTrue(Glob.globMatch("*foo", "barfoo")); + assertFalse(Glob.globMatch("*foo", "foobar")); + assertTrue(Glob.globMatch("foo*", "foobarfoo")); + assertFalse(Glob.globMatch("foo*", "barfoobar")); + assertTrue(Glob.globMatch("foo*bar", "foobarnfoosbar")); + } + + public void testGlobMatchMultipleWildcards() { + assertTrue(Glob.globMatch("*foo*", "barfoobar")); + assertFalse(Glob.globMatch("*foo*", "baroofbar")); + assertTrue(Glob.globMatch("*foo*bar", "abcdfooefghbar")); + assertFalse(Glob.globMatch("*foo*bar", "foonotbars")); + } + + public void testGlobalMatchDoubleWildcard() { + assertTrue(Glob.globMatch("**foo", "barbarfoo")); + assertFalse(Glob.globMatch("**foo", "barbarfoowoof")); + assertTrue(Glob.globMatch("**bar**", "foobarfoo")); + assertFalse(Glob.globMatch("**bar**", "foobanfoo")); + } + + public void testGlobMatchMultipleCharactersWithSingleWildcard() { + assertTrue(Glob.globMatch("a*b", "acb")); + assertTrue(Glob.globMatch("f*oo", "foo")); + assertTrue(Glob.globMatch("a*b", "aab")); + assertTrue(Glob.globMatch("a*b", "aaab")); + } + + public void testGlobMatchWildcardWithEmptyString() { + assertTrue(Glob.globMatch("*", "")); + assertTrue(Glob.globMatch("a*", "a")); + assertFalse(Glob.globMatch("a*", "")); + } + + public void testGlobMatchMultipleWildcardsWithMultipleCharacters() { + assertTrue(Glob.globMatch("a*b*c", "abc")); + assertTrue(Glob.globMatch("a*b*c", "axxxbxbc")); + assertFalse(Glob.globMatch("a*b*c", "abca")); + assertFalse(Glob.globMatch("a*b*c", "ac")); + } +} From e9fd70118e2be690223847ddd36d51fabdecf138 Mon Sep 17 00:00:00 2001 From: Niyati Aggarwal Date: Thu, 11 Apr 2024 13:35:03 -0700 Subject: [PATCH 4/4] Moving entry to Changed section in CHANGELOG.md Signed-off-by: Niyati Aggarwal --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a05a59c5d8604..fa806541aea42 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -110,7 +110,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [Concurrent Segment Search] Perform buildAggregation concurrently and support Composite Aggregations ([#12697](https://github.com/opensearch-project/OpenSearch/pull/12697)) - [Concurrent Segment Search] Disable concurrent segment search for system indices and throttled requests ([#12954](https://github.com/opensearch-project/OpenSearch/pull/12954)) - Derived fields support to derive field values at query time without indexing ([#12569](https://github.com/opensearch-project/OpenSearch/pull/12569)) -- Refactoring globMatch using simpleMatchWithNormalizedStrings from Regex ([#13104](https://github.com/opensearch-project/OpenSearch/pull/13104)) - Detect breaking changes on pull requests ([#9044](https://github.com/opensearch-project/OpenSearch/pull/9044)) - Add cluster primary balance contraint for rebalancing with buffer ([#12656](https://github.com/opensearch-project/OpenSearch/pull/12656)) - [Remote Store] Make translog transfer timeout configurable ([#12704](https://github.com/opensearch-project/OpenSearch/pull/12704)) @@ -131,6 +130,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [BWC and API enforcement] Enforcing the presence of API annotations at build time ([#12872](https://github.com/opensearch-project/OpenSearch/pull/12872)) - Improve built-in secure transports support ([#12907](https://github.com/opensearch-project/OpenSearch/pull/12907)) - Update links to documentation in rest-api-spec ([#13043](https://github.com/opensearch-project/OpenSearch/pull/13043)) +- Refactoring globMatch using simpleMatchWithNormalizedStrings from Regex ([#13104](https://github.com/opensearch-project/OpenSearch/pull/13104)) ### Deprecated