Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix instances of exponential backtracking found by recheck #5144

Merged
merged 1 commit into from
May 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion demo/kitchen-sink/docs/clojure.clj
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,8 @@
(println (parting)) ; -> Goodbye, World
(println (parting "Mark")) ; -> Goodbye, Mark
(println (parting "Mark" "es")) ; -> Adios, Mark
(println (parting "Mark", "xy")) ; -> java.lang.IllegalArgumentException: unsupported language xy
(println (parting "Mark", "xy")) ; -> java.lang.IllegalArgumentException: unsupported language xy

(print (re-matches #"abc(.*)
(r)" "abcxyz
r") )
59 changes: 19 additions & 40 deletions src/mode/_test/highlight_rules_test.js
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,8 @@ function testMode(modeName, i) {
var Mode = require("../" + modeName).Mode;
var tokenizer = new Mode().getTokenizer();

checkBacktracking(tokenizer);
// TODO this is too slow to run in regular tests
if (RECHECK) checkBacktracking(tokenizer);

var state = "start";
data.forEach(function(lineData) {
Expand Down Expand Up @@ -323,51 +324,29 @@ function padNumber(num, digits) {
return (" " + num).slice(-digits);
}

function maybeCatastrophicBacktracking(regex) {
var tokens = regexpTokenizer.tokenize(regex.source);
var quantifiedGroups = [];
var groups = [];
var groupIndex = 0;
for (var i = 0; i < tokens.length; i++) {
var token = tokens[i];
if (token.type == "group.start") {
var endIndex = tokens.indexOf(token.end, i);
var next = tokens[endIndex + 1];
if (next && next.type == "quantifier" && next.value != "?") {
quantifiedGroups.push(token.end);
}
groups.push(token.end);
}
if (token.type == "group.end") {
if (quantifiedGroups[quantifiedGroups.length - 1] == token)
quantifiedGroups.pop();
if (groups[groups.length - 1] == token)
groups.pop();
if (groups.length == 0)
groupIndex++;
}
if (token.type == "quantifier" && quantifiedGroups.length >= 1 && token.value != "?") {
return groupIndex;
}
}
return null;
}
function checkBacktracking(tokenizer) {
var regExps = tokenizer.regExps;
Object.keys(regExps || {}).forEach(function(state) {
var i = maybeCatastrophicBacktracking(regExps[state]);
if (i != null) {
i = tokenizer.matchMappings[state][i];
var rule = tokenizer.states[state][i];
console.log("\tPossible error in", state, rule && rule.token, i);
}
var states = tokenizer.states;
Object.keys(states || {}).forEach(function(state) {
states[state].forEach(function(rule) {
var regex = rule.regex;
if (regex && typeof regex != "string") regex = regex.source;
if (!regex) return;
var result = require("recheck").checkSync(regex, "gmi", {
checker: "automaton",
timeout: 100000
});
if (result.status != "safe") {
if (result.attack && result.attack.string) delete result.attack.string;

console.log("\tPossible error in", state, rule, result);
}
});
});
}



// cli
var arg = process.argv[2];
var RECHECK = process.argv.indexOf("--recheck") !== -1;
if (!arg) {
test();
checkModes();
Expand Down
27 changes: 27 additions & 0 deletions src/mode/_test/tokens_clojure.json
Original file line number Diff line number Diff line change
Expand Up @@ -159,4 +159,31 @@
["keyword","))"],
["text"," "],
["comment","; -> java.lang.IllegalArgumentException: unsupported language xy"]
],[
"start"
],[
"regex",
["keyword","("],
["support.function","print"],
["text"," "],
["keyword","("],
["support.function","re-matches"],
["text"," "],
["string.regexp","#\"abc"],
["constant.language.escape","(.*)"]
],[
"string",
["string.regexp"," "],
["constant.language.escape","("],
["string.regexp","r"],
["constant.language.escape",")"],
["string.regexp","\""],
["text"," "],
["string","\"abcxyz"]
],[
"start",
["string"," r\""],
["keyword",")"],
["text"," "],
["keyword",")"]
]]
52 changes: 46 additions & 6 deletions src/mode/clojure_highlight_rules.js
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,10 @@ var ClojureHighlightRules = function() {
}, {
token : "keyword", //vectors
regex : "[\\[|\\]]"
}, {
token : "string.regexp", //Regular Expressions
regex : '#"',
next: "regex"
}, {
token : "keyword", //sets and maps
regex : "[\\{|\\}|\\#\\{|\\#\\}]"
Expand Down Expand Up @@ -141,23 +145,59 @@ var ClojureHighlightRules = function() {
}, {
token : "constant", // symbol
regex : /:[^()\[\]{}'"\^%`,;\s]+/
}, {
token : "string.regexp", //Regular Expressions
regex : '/#"(?:\\.|(?:\\")|[^""\n])*"/g'
}

],
"string" : [
{
token : "constant.language.escape",
regex : "\\\\.|\\\\$"
}, {
token : "string",
regex : '[^"\\\\]+'
}, {
token : "string",
regex : '"',
next : "start"
}, {
defaultToken: "string"
}
],
"regex": [
{
// escapes
token: "regexp.keyword.operator",
regex: "\\\\(?:u[\\da-fA-F]{4}|x[\\da-fA-F]{2}|.)"
}, {
// flag
token: "string.regexp",
regex: '"',
next: "start"
}, {
// operators
token : "constant.language.escape",
regex: /\(\?[:=!]|\)|\{\d+\b,?\d*\}|[+*]\?|[()$^+*?.]/
}, {
token : "constant.language.delimiter",
regex: /\|/
}, {
token: "constant.language.escape",
regex: /\[\^?/,
next: "regex_character_class"
}, {
defaultToken: "string.regexp"
}
],
"regex_character_class": [
{
token: "regexp.charclass.keyword.operator",
regex: "\\\\(?:u[\\da-fA-F]{4}|x[\\da-fA-F]{2}|.)"
}, {
token: "constant.language.escape",
regex: "]",
next: "regex"
}, {
token: "constant.language.escape",
regex: "-"
}, {
defaultToken: "string.regexp.charachterclass"
}
]
};
Expand Down
2 changes: 1 addition & 1 deletion src/mode/ion_highlight_rules.js
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@
"variable.language.annotation.ion",
"punctuation.definition.annotation.ion"
],
"regex": "('(?:[^']|\\\\\\\\|\\\\')*')\\s*(::)"
"regex": /('(?:[^'\\]|\\.)*')\s*(::)/
},
{
"token": [
Expand Down
12 changes: 1 addition & 11 deletions src/mode/raku_highlight_rules.js
Original file line number Diff line number Diff line change
Expand Up @@ -173,11 +173,7 @@ var RakuHighlightRules = function() {
// Numbers - Hexadecimal
var hex = { token : "constant.numeric", regex : "0x[0-9a-fA-F]+\\b" };
// Numbers - Num & Rat
var num_rat = { token : "constant.numeric", regex : "[+-.]?\\d+(?:(?:\\.\\d*)?(?:[eE][+-]?\\d+)?)?\\b" };
// Numbers - With _
var num_with_ = { token : "constant.numeric", regex : "(?:\\d+_?\\d+)+\\b" };
// Numbers - Complex
var complex_numbers = { token : "constant.numeric", regex : "\\+?\\d+i\\b" };
var num_rat = { token : "constant.numeric", regex : "[+-.]?\\d[\\d_]*(?:(?:\\.\\d[\\d_]*)?(?:[eE][+-]?\\d[\\d_]*)?)?i?\\b" };
// Booleans
var booleans = { token : "constant.language.boolean", regex : "(?:True|False)\\b" };
// Versions
Expand Down Expand Up @@ -238,8 +234,6 @@ var RakuHighlightRules = function() {
},
hex,
num_rat,
num_with_,
complex_numbers,
booleans,
versions,
lang_keywords,
Expand Down Expand Up @@ -283,8 +277,6 @@ var RakuHighlightRules = function() {
"qqinterpolation" : [
hex,
num_rat,
num_with_,
complex_numbers,
booleans,
versions,
lang_keywords,
Expand Down Expand Up @@ -338,8 +330,6 @@ var RakuHighlightRules = function() {
"qqheredocinterpolation" : [
hex,
num_rat,
num_with_,
complex_numbers,
booleans,
versions,
lang_keywords,
Expand Down