Skip to content

Commit

Permalink
Merge pull request #5144 from ajaxorg/recheck
Browse files Browse the repository at this point in the history
fix instances of exponential backtracking found by recheck
  • Loading branch information
nightwing authored May 8, 2023
2 parents 3d99f13 + 948d9a5 commit c6ec82c
Show file tree
Hide file tree
Showing 6 changed files with 99 additions and 59 deletions.
6 changes: 5 additions & 1 deletion demo/kitchen-sink/docs/clojure.clj
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,8 @@
(println (parting)) ; -> Goodbye, World
(println (parting "Mark")) ; -> Goodbye, Mark
(println (parting "Mark" "es")) ; -> Adios, Mark
(println (parting "Mark", "xy")) ; -> java.lang.IllegalArgumentException: unsupported language xy
(println (parting "Mark", "xy")) ; -> java.lang.IllegalArgumentException: unsupported language xy

(print (re-matches #"abc(.*)
(r)" "abcxyz
r") )
59 changes: 19 additions & 40 deletions src/mode/_test/highlight_rules_test.js
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,8 @@ function testMode(modeName, i) {
var Mode = require("../" + modeName).Mode;
var tokenizer = new Mode().getTokenizer();

checkBacktracking(tokenizer);
// TODO this is too slow to run in regular tests
if (RECHECK) checkBacktracking(tokenizer);

var state = "start";
data.forEach(function(lineData) {
Expand Down Expand Up @@ -323,51 +324,29 @@ function padNumber(num, digits) {
return (" " + num).slice(-digits);
}

function maybeCatastrophicBacktracking(regex) {
var tokens = regexpTokenizer.tokenize(regex.source);
var quantifiedGroups = [];
var groups = [];
var groupIndex = 0;
for (var i = 0; i < tokens.length; i++) {
var token = tokens[i];
if (token.type == "group.start") {
var endIndex = tokens.indexOf(token.end, i);
var next = tokens[endIndex + 1];
if (next && next.type == "quantifier" && next.value != "?") {
quantifiedGroups.push(token.end);
}
groups.push(token.end);
}
if (token.type == "group.end") {
if (quantifiedGroups[quantifiedGroups.length - 1] == token)
quantifiedGroups.pop();
if (groups[groups.length - 1] == token)
groups.pop();
if (groups.length == 0)
groupIndex++;
}
if (token.type == "quantifier" && quantifiedGroups.length >= 1 && token.value != "?") {
return groupIndex;
}
}
return null;
}
function checkBacktracking(tokenizer) {
var regExps = tokenizer.regExps;
Object.keys(regExps || {}).forEach(function(state) {
var i = maybeCatastrophicBacktracking(regExps[state]);
if (i != null) {
i = tokenizer.matchMappings[state][i];
var rule = tokenizer.states[state][i];
console.log("\tPossible error in", state, rule && rule.token, i);
}
var states = tokenizer.states;
Object.keys(states || {}).forEach(function(state) {
states[state].forEach(function(rule) {
var regex = rule.regex;
if (regex && typeof regex != "string") regex = regex.source;
if (!regex) return;
var result = require("recheck").checkSync(regex, "gmi", {
checker: "automaton",
timeout: 100000
});
if (result.status != "safe") {
if (result.attack && result.attack.string) delete result.attack.string;

console.log("\tPossible error in", state, rule, result);
}
});
});
}



// cli
var arg = process.argv[2];
var RECHECK = process.argv.indexOf("--recheck") !== -1;
if (!arg) {
test();
checkModes();
Expand Down
27 changes: 27 additions & 0 deletions src/mode/_test/tokens_clojure.json
Original file line number Diff line number Diff line change
Expand Up @@ -159,4 +159,31 @@
["keyword","))"],
["text"," "],
["comment","; -> java.lang.IllegalArgumentException: unsupported language xy"]
],[
"start"
],[
"regex",
["keyword","("],
["support.function","print"],
["text"," "],
["keyword","("],
["support.function","re-matches"],
["text"," "],
["string.regexp","#\"abc"],
["constant.language.escape","(.*)"]
],[
"string",
["string.regexp"," "],
["constant.language.escape","("],
["string.regexp","r"],
["constant.language.escape",")"],
["string.regexp","\""],
["text"," "],
["string","\"abcxyz"]
],[
"start",
["string"," r\""],
["keyword",")"],
["text"," "],
["keyword",")"]
]]
52 changes: 46 additions & 6 deletions src/mode/clojure_highlight_rules.js
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,10 @@ var ClojureHighlightRules = function() {
}, {
token : "keyword", //vectors
regex : "[\\[|\\]]"
}, {
token : "string.regexp", //Regular Expressions
regex : '#"',
next: "regex"
}, {
token : "keyword", //sets and maps
regex : "[\\{|\\}|\\#\\{|\\#\\}]"
Expand Down Expand Up @@ -141,23 +145,59 @@ var ClojureHighlightRules = function() {
}, {
token : "constant", // symbol
regex : /:[^()\[\]{}'"\^%`,;\s]+/
}, {
token : "string.regexp", //Regular Expressions
regex : '/#"(?:\\.|(?:\\")|[^""\n])*"/g'
}

],
"string" : [
{
token : "constant.language.escape",
regex : "\\\\.|\\\\$"
}, {
token : "string",
regex : '[^"\\\\]+'
}, {
token : "string",
regex : '"',
next : "start"
}, {
defaultToken: "string"
}
],
"regex": [
{
// escapes
token: "regexp.keyword.operator",
regex: "\\\\(?:u[\\da-fA-F]{4}|x[\\da-fA-F]{2}|.)"
}, {
// flag
token: "string.regexp",
regex: '"',
next: "start"
}, {
// operators
token : "constant.language.escape",
regex: /\(\?[:=!]|\)|\{\d+\b,?\d*\}|[+*]\?|[()$^+*?.]/
}, {
token : "constant.language.delimiter",
regex: /\|/
}, {
token: "constant.language.escape",
regex: /\[\^?/,
next: "regex_character_class"
}, {
defaultToken: "string.regexp"
}
],
"regex_character_class": [
{
token: "regexp.charclass.keyword.operator",
regex: "\\\\(?:u[\\da-fA-F]{4}|x[\\da-fA-F]{2}|.)"
}, {
token: "constant.language.escape",
regex: "]",
next: "regex"
}, {
token: "constant.language.escape",
regex: "-"
}, {
defaultToken: "string.regexp.charachterclass"
}
]
};
Expand Down
2 changes: 1 addition & 1 deletion src/mode/ion_highlight_rules.js
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@
"variable.language.annotation.ion",
"punctuation.definition.annotation.ion"
],
"regex": "('(?:[^']|\\\\\\\\|\\\\')*')\\s*(::)"
"regex": /('(?:[^'\\]|\\.)*')\s*(::)/
},
{
"token": [
Expand Down
12 changes: 1 addition & 11 deletions src/mode/raku_highlight_rules.js
Original file line number Diff line number Diff line change
Expand Up @@ -173,11 +173,7 @@ var RakuHighlightRules = function() {
// Numbers - Hexadecimal
var hex = { token : "constant.numeric", regex : "0x[0-9a-fA-F]+\\b" };
// Numbers - Num & Rat
var num_rat = { token : "constant.numeric", regex : "[+-.]?\\d+(?:(?:\\.\\d*)?(?:[eE][+-]?\\d+)?)?\\b" };
// Numbers - With _
var num_with_ = { token : "constant.numeric", regex : "(?:\\d+_?\\d+)+\\b" };
// Numbers - Complex
var complex_numbers = { token : "constant.numeric", regex : "\\+?\\d+i\\b" };
var num_rat = { token : "constant.numeric", regex : "[+-.]?\\d[\\d_]*(?:(?:\\.\\d[\\d_]*)?(?:[eE][+-]?\\d[\\d_]*)?)?i?\\b" };
// Booleans
var booleans = { token : "constant.language.boolean", regex : "(?:True|False)\\b" };
// Versions
Expand Down Expand Up @@ -238,8 +234,6 @@ var RakuHighlightRules = function() {
},
hex,
num_rat,
num_with_,
complex_numbers,
booleans,
versions,
lang_keywords,
Expand Down Expand Up @@ -283,8 +277,6 @@ var RakuHighlightRules = function() {
"qqinterpolation" : [
hex,
num_rat,
num_with_,
complex_numbers,
booleans,
versions,
lang_keywords,
Expand Down Expand Up @@ -338,8 +330,6 @@ var RakuHighlightRules = function() {
"qqheredocinterpolation" : [
hex,
num_rat,
num_with_,
complex_numbers,
booleans,
versions,
lang_keywords,
Expand Down

0 comments on commit c6ec82c

Please sign in to comment.