-
Notifications
You must be signed in to change notification settings - Fork 33
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
unquote text and identifiers in PPL parsing
Signed-off-by: Sean Kao <seankao@amazon.com>
- Loading branch information
1 parent
0f53448
commit f73afbd
Showing
4 changed files
with
167 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
100 changes: 100 additions & 0 deletions
100
ppl-spark-integration/src/main/java/org/opensearch/sql/common/utils/StringUtils.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.sql.common.utils; | ||
|
||
import com.google.common.base.Strings; | ||
|
||
import java.util.IllegalFormatException; | ||
import java.util.Locale; | ||
|
||
public class StringUtils { | ||
/** | ||
* Unquote Identifier which has " or ' as mark. Strings quoted by ' or " with two of these quotes | ||
* appearing next to each other in the quote acts as an escape<br> | ||
* Example: 'Test''s' will result in 'Test's', similar with those single quotes being replaced | ||
* with double quote. Supports escaping quotes (single/double) and escape characters using the `\` | ||
* characters. | ||
* | ||
* @param text string | ||
* @return An unquoted string whose outer pair of (single/double) quotes have been removed | ||
*/ | ||
public static String unquoteText(String text) { | ||
if (text.length() < 2) { | ||
return text; | ||
} | ||
|
||
char enclosingQuote = 0; | ||
char firstChar = text.charAt(0); | ||
char lastChar = text.charAt(text.length() - 1); | ||
|
||
if (firstChar != lastChar) { | ||
return text; | ||
} | ||
|
||
if (firstChar == '`') { | ||
return text.substring(1, text.length() - 1); | ||
} | ||
|
||
if (firstChar == lastChar && (firstChar == '\'' || firstChar == '"')) { | ||
enclosingQuote = firstChar; | ||
} else { | ||
return text; | ||
} | ||
|
||
char currentChar; | ||
char nextChar; | ||
|
||
StringBuilder textSB = new StringBuilder(); | ||
|
||
// Ignores first and last character as they are the quotes that should be removed | ||
for (int chIndex = 1; chIndex < text.length() - 1; chIndex++) { | ||
currentChar = text.charAt(chIndex); | ||
nextChar = text.charAt(chIndex + 1); | ||
|
||
if ((currentChar == '\\' && (nextChar == '"' || nextChar == '\\' || nextChar == '\'')) | ||
|| (currentChar == nextChar && currentChar == enclosingQuote)) { | ||
chIndex++; | ||
currentChar = nextChar; | ||
} | ||
textSB.append(currentChar); | ||
} | ||
return textSB.toString(); | ||
} | ||
|
||
/** | ||
* Unquote Identifier which has ` as mark. | ||
* | ||
* @param identifier identifier that possibly enclosed by double quotes or back ticks | ||
* @return An unquoted string whose outer pair of (double/back-tick) quotes have been removed | ||
*/ | ||
public static String unquoteIdentifier(String identifier) { | ||
if (isQuoted(identifier, "`")) { | ||
return identifier.substring(1, identifier.length() - 1); | ||
} else { | ||
return identifier; | ||
} | ||
} | ||
|
||
/** | ||
* Returns a formatted string using the specified format string and arguments, as well as the | ||
* {@link Locale#ROOT} locale. | ||
* | ||
* @param format format string | ||
* @param args arguments referenced by the format specifiers in the format string | ||
* @return A formatted string | ||
* @throws IllegalFormatException If a format string contains an illegal syntax, a format | ||
* specifier that is incompatible with the given arguments, insufficient arguments given the | ||
* format string, or other illegal conditions. | ||
* @see String#format(Locale, String, Object...) | ||
*/ | ||
public static String format(final String format, Object... args) { | ||
return String.format(Locale.ROOT, format, args); | ||
} | ||
|
||
private static boolean isQuoted(String text, String mark) { | ||
return !Strings.isNullOrEmpty(text) && text.startsWith(mark) && text.endsWith(mark); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters