Skip to content

Commit

Permalink
Fix not escaping special characters in search pattern (#5938)
Browse files Browse the repository at this point in the history
* Fix not escaping special characters in search pattern

fixes #5892

* add method to get search pattern for searched words with escaped
 javascript regexp special characters (for search without regular
 expressions)

* in preview viewer use search pattern with escaped javascript regexp
 special characters

* Refactoring and performance improvement

* use enum to specify special characters escape mode

* use compiled regex pattern instead of string

* Refactoring: braces in if..else

* Refactoring, minor changes: names, comments

* Add tests of escaping special characters in search patterns
  • Loading branch information
dawidm authored Feb 14, 2020
1 parent 741630b commit ec93ad3
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 3 deletions.
4 changes: 2 additions & 2 deletions src/main/java/org/jabref/gui/preview/PreviewViewer.java
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ public class PreviewViewer extends ScrollPane implements InvalidationListener {
private boolean registered;

private ChangeListener<Optional<SearchQuery>> listener = (queryObservable, queryOldValue, queryNewValue) -> {
searchHighlightPattern = queryNewValue.flatMap(SearchQuery::getPatternForWords);
searchHighlightPattern = queryNewValue.flatMap(SearchQuery::getJavaScriptPatternForWords);
highlightSearchPattern();
};

Expand Down Expand Up @@ -131,7 +131,7 @@ public void setTheme(String theme) {

private void highlightSearchPattern() {
if (searchHighlightPattern.isPresent()) {
String pattern = searchHighlightPattern.get().pattern().replace("\\Q", "").replace("\\E", "");
String pattern = searchHighlightPattern.get().pattern();

previewView.getEngine().executeScript(
"var markInstance = new Mark(document.getElementById(\"content\"));" +
Expand Down
46 changes: 45 additions & 1 deletion src/main/java/org/jabref/logic/search/SearchQuery.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,25 @@

public class SearchQuery implements SearchMatcher {

/**
* Regex pattern for escaping special characters in javascript regular expressions
*/
public static final Pattern JAVASCRIPT_ESCAPED_CHARS_PATTERN = Pattern.compile("[\\.\\*\\+\\?\\^\\$\\{\\}\\(\\)\\|\\[\\]\\\\/]");

/**
* The mode of escaping special characters in regular expressions
*/
private enum EscapeMode {
/**
* using \Q and \E marks
*/
JAVA,
/**
* escaping all javascript regex special characters separately
*/
JAVASCRIPT
}

private final String query;
private final boolean caseSensitive;
private final boolean regularExpression;
Expand Down Expand Up @@ -124,6 +143,18 @@ public List<String> getSearchWords() {

// Returns a regular expression pattern in the form (w1)|(w2)| ... wi are escaped if no regular expression search is enabled
public Optional<Pattern> getPatternForWords() {
return joinWordsToPattern(EscapeMode.JAVA);
}

// Returns a regular expression pattern in the form (w1)|(w2)| ... wi are escaped for javascript if no regular expression search is enabled
public Optional<Pattern> getJavaScriptPatternForWords() {
return joinWordsToPattern(EscapeMode.JAVASCRIPT);
}

/** Returns a regular expression pattern in the form (w1)|(w2)| ... wi are escaped if no regular expression search is enabled
* @param escapeMode the mode of escaping special characters in wi
*/
private Optional<Pattern> joinWordsToPattern(EscapeMode escapeMode) {
List<String> words = getSearchWords();

if ((words == null) || words.isEmpty() || words.get(0).isEmpty()) {
Expand All @@ -133,7 +164,20 @@ public Optional<Pattern> getPatternForWords() {
// compile the words to a regular expression in the form (w1)|(w2)|(w3)
StringJoiner joiner = new StringJoiner(")|(", "(", ")");
for (String word : words) {
joiner.add(regularExpression ? word : Pattern.quote(word));
if (regularExpression) {
joiner.add(word);
} else {
switch (escapeMode) {
case JAVA:
joiner.add(Pattern.quote(word));
break;
case JAVASCRIPT:
joiner.add(JAVASCRIPT_ESCAPED_CHARS_PATTERN.matcher(word).replaceAll("\\\\$0"));
break;
default:
throw new IllegalArgumentException("Unknown special characters escape mode: " + escapeMode);
}
}
}
String searchPattern = joiner.toString();

Expand Down
34 changes: 34 additions & 0 deletions src/test/java/org/jabref/logic/search/SearchQueryTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -203,4 +203,38 @@ public void testGetPattern() {
//We can't directly compare the pattern objects
assertEquals(Optional.of(pattern.toString()), result.getPatternForWords().map(Pattern::toString));
}

@Test
public void testGetRegexpPattern() {
String queryText = "[a-c]\\d* \\d*";
SearchQuery regexQuery = new SearchQuery(queryText, false, true);
Pattern pattern = Pattern.compile("([a-c]\\d* \\d*)");
assertEquals(Optional.of(pattern.toString()), regexQuery.getPatternForWords().map(Pattern::toString));
}

@Test
public void testGetRegexpJavascriptPattern() {
String queryText = "[a-c]\\d* \\d*";
SearchQuery regexQuery = new SearchQuery(queryText, false, true);
Pattern pattern = Pattern.compile("([a-c]\\d* \\d*)");
assertEquals(Optional.of(pattern.toString()), regexQuery.getJavaScriptPatternForWords().map(Pattern::toString));
}

@Test
public void testEscapingInPattern() {
//first word contain all java special regex characters
String queryText = "<([{\\\\^-=$!|]})?*+.> word1 word2.";
SearchQuery textQueryWithSpecialChars = new SearchQuery(queryText, false, false);
String pattern = "(\\Q<([{\\^-=$!|]})?*+.>\\E)|(\\Qword1\\E)|(\\Qword2.\\E)";
assertEquals(Optional.of(pattern), textQueryWithSpecialChars.getPatternForWords().map(Pattern::toString));
}

@Test
public void testEscapingInJavascriptPattern() {
//first word contain all javascript special regex characters that should be escaped individually in text based search
String queryText = "([{\\\\^$|]})?*+./ word1 word2.";
SearchQuery textQueryWithSpecialChars = new SearchQuery(queryText, false, false);
String pattern = "(\\(\\[\\{\\\\\\^\\$\\|\\]\\}\\)\\?\\*\\+\\.\\/)|(word1)|(word2\\.)";
assertEquals(Optional.of(pattern), textQueryWithSpecialChars.getJavaScriptPatternForWords().map(Pattern::toString));
}
}

0 comments on commit ec93ad3

Please sign in to comment.