diff --git a/packages/next/compiled/regexr-lexer/lexer.js b/packages/next/compiled/regexr-lexer/lexer.js new file mode 100644 index 0000000000000..b94a783e13a57 --- /dev/null +++ b/packages/next/compiled/regexr-lexer/lexer.js @@ -0,0 +1,935 @@ +'use strict'; + +/* +RegExr: Learn, Build, & Test RegEx +Copyright (C) 2017 gskinner.com, inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ +class ExpressionLexer { + constructor() { + this.profile = null; + } + + set profile(profile) { + this._profile = profile; + this.string = this.token = this.errors = this.captureGroups = this.namedGroups = null; + } + + parse(str) { + if (!this._profile) { + return null; + } + + if (str === this.string) { + return this.token; + } + + this.token = null; + this._modes = {}; + this.string = str; + this.errors = []; + let capgroups = this.captureGroups = []; + let namedgroups = this.namedGroups = {}; + let brgroups = this.branchResetGroups = []; + let groups = [], + refs = [], + i = 0, + l = str.length; + let o, + c, + token, + charset = null; // previous is the previous token, prv is the previous "active" token (!ignore) + + let prev = null, + prv = null; + let profile = this._profile, + unquantifiable = profile.unquantifiable; + let charTypes = profile.charTypes; + let closeIndex = str.lastIndexOf("/"); + + for (let i = closeIndex + 1; i < l; i++) { + this._modes[str[i]] = true; + } + + while (i < l) { + c = str[i]; + token = { + i: i, + l: 1, + prev: prev, + prv: prv, + modes: this._modes + }; + + if (prev) { + prev.next = token; + } else { + this.token = token; + } + + if (i === 0 || i >= closeIndex) { + this.parseFlag(str, token); + } else if (c === "(" && !charset) { + this.parseParen(str, token); + + if (token.close === null) { + token.depth = groups.length; + groups.push(token); + } + + if (token.capture) { + this.addCaptureGroup(token, groups); + } + } else if (c === ")" && !charset) { + token.type = "groupclose"; + + if (groups.length) { + o = token.open = groups.pop(); + o.close = token; + + if (o.type === "branchreset") { + brgroups.pop(); + } + } else { + token.error = { + id: "groupclose" + }; + } + } else if (c === "[") { + charset = this.parseSquareBracket(str, token, charset); + } else if (c === "]" && charset) { + token.type = "setclose"; + token.open = charset; + charset.close = token; + charset = null; + } else if (c === "+" && prv && prv.clss === "quant" && profile.tokens.possessive) { + token.type = "possessive"; + token.related = [prv]; + } else if ((c === "+" || c === "*") && !charset) { + token.type = charTypes[c]; + token.clss = "quant"; + token.min = c === "+" ? 1 : 0; + token.max = -1; + } else if (c === "{" && !charset && str.substr(i).search(/^{\d+,?\d*}/) !== -1) { + this.parseQuant(str, token); + } else if (c === "\\") { + this.parseBackSlash(str, token, charset, closeIndex); + } else if (c === "?" && !charset) { + if (!prv || prv.clss !== "quant") { + token.type = charTypes[c]; + token.clss = "quant"; + token.min = 0; + token.max = 1; + } else { + token.type = "lazy"; + token.related = [prv]; + } + } else if (c === "-" && charset && prv.code !== undefined && prv.prv && prv.prv.type !== "range") { + // this may be the start of a range, but we'll need to validate after the next token. + token.type = "range"; + } else { + this.parseChar(str, token, charset); + + if (!charset && this._modes.x && /\s/.test(c)) { + token.ignore = true; + token.type = "ignorews"; + } + } // post process token: + // quantifier: + + + if (token.clss === "quant") { + if (!prv || prv.close !== undefined || unquantifiable[prv.type] || prv.open && unquantifiable[prv.open.type]) { + token.error = { + id: "quanttarg" + }; + } else { + token.related = [prv.open || prv]; + } + } // reference: + + + if (token.group === true) { + refs.push(token); + } // conditional: + + + let curGroup = groups.length ? groups[groups.length - 1] : null; + + if (curGroup && (curGroup.type === "conditional" || curGroup.type === "conditionalgroup") && token.type === "alt") { + if (!curGroup.alt) { + curGroup.alt = token; + } else { + token.error = { + id: "extraelse" + }; + } + + token.related = [curGroup]; + token.type = "conditionalelse"; + token.clss = "special"; + } else if (curGroup && curGroup.type === "branchreset") { + // reset group + curGroup.curGroupNum = curGroup.inGroupNum; + } // range: + + + if (prv && prv.type === "range" && prv.l === 1) { + this.validateRange(str, token); + } // js warnings: + // TODO: this isn't ideal, but I'm hesitant to write a more robust solution for a couple of edge cases. + + + if (profile.id === "js") { + this.addJSWarnings(token); + } // general: + + + if (token.open && !token.clss) { + token.clss = token.open.clss; + } + + if (token.error) { + this.addError(token); + } + + i += token.l; + prev = token; + + if (!token.ignore) { + prv = token; + } + } // post processing: + + + while (groups.length) { + this.addError(groups.pop(), { + id: "groupopen" + }); + } + + this.matchRefs(refs, capgroups, namedgroups); + + if (charset) { + this.addError(charset, { + id: "setopen" + }); + } + + return this.token; + } + + addError(token, error = token.error) { + token.error = error; + this.errors.push(token); + } + + addJSWarnings(token) { + if (token.error) { + return; + } + + if (token.type === "neglookbehind" || token.type === "poslookbehind" || token.type === "sticky" || token.type === "unicode" || token.type == "dotall" || token.type === "unicodecat" || token.type === "unicodescript" || token.type === "namedgroup") { + token.error = { + id: "jsfuture", + warning: true + }; + } + } + + addCaptureGroup(token, groups) { + // it would be nice to make branch reset groups actually highlight all of the groups that share the same number + // that would require switching to arrays of groups for each group num - requires rearchitecture throughout the app. + let capgroups = this.captureGroups, + brgroups = this.branchResetGroups, + namedgroups = this.namedGroups; + let curGroup = groups.length ? groups[groups.length - 1] : null; + + if (brgroups.length) { + let brgroup = brgroups[brgroups.length - 1]; + token.num = ++brgroup.curGroupNum; + } else { + token.num = capgroups.length + 1; + } + + if (!capgroups[token.num - 1]) { + capgroups.push(token); + } + + if (token.name && !token.error) { + if (/\d/.test(token.name[0])) { + token.error = { + id: "badname" + }; + } else if (namedgroups[token.name]) { + token.error = { + id: "dupname" + }; + token.related = [namedgroups[token.name]]; + } else { + namedgroups[token.name] = token; + } + } + } + + getRef(token, str) { + token.clss = "ref"; + token.group = true; + token.relIndex = this.captureGroups.length; + token.name = str; + } + + matchRefs(refs, indexes, names) { + while (refs.length) { + let token = refs.pop(), + name = token.name, + group = names[name]; + + if (!group && !isNaN(name)) { + let sign = name[0], + index = parseInt(name) + (sign === "+" || sign === "-" ? token.relIndex : 0); + + if (sign === "-") { + index++; + } + + group = indexes[index - 1]; + } + + if (group) { + token.group = group; + token.related = [group]; + token.dir = token.i < group.i ? 1 : !group.close || token.i < group.close.i ? 0 : -1; + } else { + delete token.group; + delete token.relIndex; + this.refToOctal(token); + + if (token.error) { + this.errors.push(token.error); + } + } + } + } + + refToOctal(token) { + // PCRE: \# unmatched, \0 \00 \## = octal + // JS: \# \0 \00 \## = octal + // PCRE matches \8 \9 to "8" "9" + // JS: without the u flag \8 \9 match "8" "9" in IE, FF & Chrome, and "\8" "\9" in Safari. We support the former. + // JS: with the u flag, Chrome & FF throw an esc error, Safari does not. + // TODO: handle \0 for PCRE? Would need more testing. + // TODO: this doesn't handle two digit refs with 8/9 in them. Ex. \18 - not even sure what this is interpreted as. + let name = token.name, + profile = this._profile; + + if (token.type !== "numref") { + // not a simple \4 style reference, so can't decompose into an octal. + token.error = { + id: "unmatchedref" + }; + } else if (/^[0-7]{2}$/.test(name) || profile.config.reftooctalalways && /^[0-7]$/.test(name)) { + // octal + let next = token.next, + char = String.fromCharCode(next.code); + + if (next.type === "char" && char >= "0" && char <= "7" && parseInt(name + char, 8) <= 255) { + name += char; + this.mergeNext(token); + } + + token.code = parseInt(name, 8); + token.clss = "esc"; + token.type = "escoctal"; + delete token.name; + } else if (name === "8" || name === "9") { + this.parseEscChar(token, name); + delete token.name; + } else { + token.error = { + id: "unmatchedref" + }; + } + } + + mergeNext(token) { + let next = token.next; + token.next = next.next; + token.next.prev = token; + token.l++; + } + + parseFlag(str, token) { + // note that this doesn't deal with misformed patterns or incorrect flags. + let i = token.i, + c = str[i]; + + if (str[i] === "/") { + token.type = i === 0 ? "open" : "close"; + + if (i !== 0) { + token.related = [this.token]; + this.token.related = [token]; + } + } else { + token.type = this._profile.flags[c]; + } //token.clear = true; + + } + + parseChar(str, token, charset) { + let c = str[token.i]; + token.type = !charset && this._profile.charTypes[c] || "char"; + + if (!charset && c === "/") { + token.error = { + id: "fwdslash" + }; + } + + if (token.type === "char") { + token.code = c.charCodeAt(0); + } else if (ExpressionLexer.ANCHOR_TYPES[token.type]) { + token.clss = "anchor"; + } else if (token.type === "dot") { + token.clss = "charclass"; + } + + return token; + } + + parseSquareBracket(str, token, charset) { + let match; + + if (this._profile.tokens.posixcharclass && (match = str.substr(token.i).match(/^\[(:|\.)([^\]]*?)\1]/))) { + // posixcharclass: [:alpha:] + // posixcollseq: [.ch.] + // currently neither flavor supports posixcollseq, but PCRE does flag as an error: + // TODO: the expression above currently does not catch [.\].] + token.l = match[0].length; + token.value = match[2]; + token.clss = "charclass"; + + if (match[1] === ":") { + token.type = "posixcharclass"; + + if (!this._profile.posixCharClasses[match[2]]) { + token.error = { + id: "posixcharclassbad" + }; + } else if (!charset) { + token.error = { + id: "posixcharclassnoset" + }; + } + } else { + token.type = "posixcollseq"; // TODO: can this be generalized? Right now, no, because we assign ids that aren't in the profile. + + token.error = { + id: "notsupported" + }; + } + } else if (!charset) { + // set [a-z] [aeiou] + // setnot [^a-z] + token.type = token.clss = "set"; + + if (str[token.i + 1] === "^") { + token.l++; + token.type += "not"; + } + + charset = token; + } else { + // [[] (square bracket inside a set) + this.parseChar(str, token, charset); + } + + return charset; + } + + parseParen(str, token) { + /* + core: + . group: + . lookahead: ?= ?! + . noncap: ?: + PCRE: + . lookbehind: ?<= ? ?'name' ? + . namedref: ?P=name Also: \g'name' \k'name' etc + . comment: ?# + . atomic: ?> + . recursion: ?0 ?R Also: \g<0> + . define: ?(DEFINE) + . subroutine: ?1 ?-1 ?&name ?P>name + conditionalgroup: ?(1)a|b ?(-1)a|b ?(name)a|b + conditional: ?(?=if)then|else + mode: ?c-i + branchreset: ?| + */ + token.clss = token.type = "group"; + + if (str[token.i + 1] !== "?") { + token.close = null; // indicates that it needs a close token. + + token.capture = true; + return token; + } + + let sub = str.substr(token.i + 2), + match, + s = sub[0]; + + if (s === ":") { + // (?:foo) + token.type = "noncapgroup"; + token.close = null; + token.l = 3; + } else if (s === ">") { + // (?>foo) + token.type = "atomic"; + token.close = null; + token.l = 3; + } else if (s === "|") { + // (?|(a)|(b)) + token.type = "branchreset"; + token.close = null; + token.l = 3; + token.inGroupNum = token.curGroupNum = this.captureGroups.length; + this.branchResetGroups.push(token); + } else if (s === "#" && (match = sub.match(/[^)]*\)/))) { + // (?#foo) + token.clss = token.type = "comment"; + token.ignore = true; + token.l = 2 + match[0].length; + } else if (/^(R|0)\)/.test(sub)) { + // (?R) (?0) + token.clss = "ref"; + token.type = "recursion"; + token.l = 4; + } else if (match = sub.match(/^P=(\w+)\)/i)) { + // (?P=name) + token.type = "namedref"; + this.getRef(token, match[1]); + token.l = match[0].length + 2; + } else if (/^\(DEFINE\)/.test(sub)) { + // (?(DEFINE)foo) + token.type = "define"; + token.close = null; + token.l = 10; + } else if (match = sub.match(/^/)) || this._profile.config.namedgroupalt && ((match = sub.match(/^'(\w+)'/)) || (match = sub.match(/^P<(\w+)>/)))) { + // (?foo) (?'name'foo) (?Pfoo) + token.type = "namedgroup"; + token.close = null; + token.name = match[1]; + token.capture = true; + token.l = match[0].length + 2; + } else if ((match = sub.match(/^([-+]?\d\d?)\)/)) || (match = sub.match(/^(?:&|P>)(\w+)\)/))) { + // (?1) (?-1) (?&name) (?P>name) + token.type = (isNaN(match[1]) ? "named" : "num") + "subroutine"; + this.getRef(token, match[1]); + token.l = match[0].length + 2; + } else if ((match = sub.match(/^\(([-+]?\d\d?)\)/)) || (match = sub.match(/^\((\w+)\)/))) { + // (?(1)a|b) (?(-1)a|b) (?(name)a|b) + this.getRef(token, match[1]); + token.clss = "special"; + token.type = "conditionalgroup"; + token.close = null; + token.l = match[0].length + 2; + } else if (/^\(\?255). In theory it should allow 4? + + if (isNaN(val) || val > 255 || /[^\da-f]/i.test(match[1])) { + token.error = { + id: "esccharbad" + }; + } else { + token.code = val; + } + } else if (match = sub.match(/^x([\da-fA-F]{0,2})/)) { + // hex ascii: \xFF + token.type = "eschexadecimal"; + token.l += match[0].length; + token.code = parseInt(match[1] || 0, 16); + } else if (match = sub.match(/^c([a-zA-Z])?/)) { + // control char: \cA \cz + // also handles: \c + // not supported in JS strings + token.type = "esccontrolchar"; + + if (match[1]) { + token.code = match[1].toUpperCase().charCodeAt(0) - 64; // A=65 + + token.l += 2; + } else if (profile.config.ctrlcodeerr) { + token.l++; + token.error = { + id: "esccharbad" + }; + } else { + return this.parseChar(str, token, charset); // this builds the "/" token + } + } else if (match = sub.match(/^[0-7]{1,3}/)) { + // octal ascii: \011 + token.type = "escoctal"; + sub = match[0]; + + if (parseInt(sub, 8) > 255) { + sub = sub.substr(0, 2); + } + + token.l += sub.length; + token.code = parseInt(sub, 8); + } else if (profile.tokens.escoctalo && (match = sub.match(/^o\{(.*?)}/i))) { + // \o{377} + token.type = "escoctal"; + token.l += match[0].length; + val = parseInt(match[1], 8); + + if (isNaN(val) || val > 255 || /[^0-7]/.test(match[1])) { + token.error = { + id: "esccharbad" + }; + } else { + token.code = val; + } + } else { + // single char + if (token.type = profile.escCharTypes[c]) { + token.l++; + token.clss = ExpressionLexer.ANCHOR_TYPES[token.type] ? "anchor" : "charclass"; + return token; + } + + token.code = profile.escCharCodes[c]; + + if (token.code === undefined || token.code === false) { + // unrecognized. + return this.parseEscChar(token, c); + } // update SubstLexer if this changes: + + + token.l++; + token.type = "esc_" + token.code; + } + + token.clss = "esc"; + return token; + } + + parseEscChar(token, c) { + // unrecognized escchar: \u \a \8, etc + // JS: allowed except if u flag set, Safari still allows \8 \9 + // PCRE: allows \8 \9 but not others // TODO: support? + let profile = this._profile; + token.l = 2; + + if (!profile.badEscChars[c] && profile.tokens.escchar && !this._modes.u || profile.escChars[c]) { + token.type = "escchar"; + token.code = c.charCodeAt(0); + token.clss = "esc"; + } else { + token.error = { + id: "esccharbad" + }; + } + } + + parseRef(token, sub) { + // namedref: \k \k'name' \k{name} \g{name} + // namedsubroutine: \g \g'name' + // numref: \g1 \g+2 \g{2} + // numsubroutine: \g<-1> \g'1' + // recursion: \g<0> \g'0' + let c = sub[0], + s = "", + match; + + if (match = sub.match(/^[gk](?:'\w*'|<\w*>|{\w*})/)) { + s = match[0].substr(2, match[0].length - 3); + + if (c === "k" && !isNaN(s)) { + s = ""; + } // TODO: specific error for numeric \k? + + } else if (match = sub.match(/^g(?:({[-+]?\d+}|<[-+]?\d+>|'[-+]?\d+')|([-+]?\d+))/)) { + s = match[2] !== undefined ? match[2] : match[1].substr(1, match[1].length - 2); + } + + let isRef = c === "k" || !(sub[1] === "'" || sub[1] === "<"); + + if (!isRef && s == 0) { + token.type = "recursion"; + token.clss = "ref"; + } else { + // namedref, extnumref, namedsubroutine, numsubroutine + token.type = (isNaN(s) ? "named" : (isRef ? "ext" : "") + "num") + (isRef ? "ref" : "subroutine"); + this.getRef(token, s); + } + + token.l += match ? match[0].length : 1; + } + + parseUnicode(token, sub) { + // unicodescript: \p{Cherokee} + // unicodecat: \p{Ll} \pL + // not: \P{Ll} \p{^Lu} + let match = sub.match(/p\{\^?([^}]*)}/i), + val = match && match[1], + not = sub[0] === "P"; + + if (!match && (match = sub.match(/[pP]([LMZSNPC])/))) { + val = match[1]; + } else { + not = not !== (sub[2] === "^"); + } + + token.l += match ? match[0].length : 1; + token.type = "unicodecat"; + + if (this._profile.unicodeScripts[val]) { + token.type = "unicodescript"; + } else if (!this._profile.unicodeCategories[val]) { + val = null; + } + + if (not) { + token.type = "not" + token.type; + } + + if (!this._profile.config.unicodenegated && sub[2] === "^" || !val) { + token.error = { + id: "unicodebad" + }; + } + + token.value = val; + token.clss = "charclass"; + return token; + } + + parseMode(token, sub) { + // (?i-x) + // supported modes in PCRE: i-caseinsens, x-freespacing, s-dotall, m-multiline, U-switchlazy, [J-samename] + let match = sub.match(/^[-a-z]+\)/i); + + if (!match) { + return; + } + + let supModes = this._profile.modes; + let modes = Object.assign({}, this._modes), + bad = false, + not = false, + s = match[0], + c; + token.on = token.off = ""; + + for (let i = 0, l = s.length - 1; i < l; i++) { + c = s[i]; + + if (c === "-") { + not = true; + continue; + } + + if (!supModes[c]) { + bad = true; + break; + } + + modes[c] = !not; + token.on = token.on.replace(c, ""); + + if (not) { + token.off = token.off.replace(c, ""); + token.off += c; + } else { + token.on += c; + } + } + + token.clss = "special"; + token.type = "mode"; + token.l = match[0].length + 2; + + if (bad) { + token.error = { + id: "modebad" + }; + token.errmode = c; + } else { + this._modes = modes; + } + + return token; + } + + parseQuant(str, token) { + // quantifier: {0,3} {3} {1,} + token.type = token.clss = "quant"; + let i = token.i; + let end = str.indexOf("}", i + 1); + token.l += end - i; + let arr = str.substring(i + 1, end).split(","); + token.min = parseInt(arr[0]); + token.max = arr[1] === undefined ? token.min : arr[1] === "" ? -1 : parseInt(arr[1]); + + if (token.max !== -1 && token.min > token.max) { + token.error = { + id: "quantrev" + }; + } + + return token; + } + + validateRange(str, end) { + // char range: [a-z] [\11-\n] + let next = end, + token = end.prv, + prv = token.prv; + + if (prv.code === undefined || next.code === undefined) { + // not a range, rewrite as a char: + this.parseChar(str, token); + } else { + token.clss = "set"; + + if (prv.code > next.code) { + // this gets added here because parse has already moved to the next token: + this.errors.push(token.error = { + id: "rangerev" + }); + } // preserve as separate tokens, but treat as one in the UI: + + + next.proxy = prv.proxy = token; + token.set = [prv, token, next]; + } + } + +} +ExpressionLexer.ANCHOR_TYPES = { + "bof": true, + "eof": true, + "bos": true, + "eos": true, + "abseos": true, + "wordboundary": true, + "notwordboundary": true, + "prevmatchend": true +}; + +module.exports = ExpressionLexer; +//# sourceMappingURL=lexer.js.map diff --git a/packages/next/compiled/regexr-lexer/profiles.js b/packages/next/compiled/regexr-lexer/profiles.js new file mode 100644 index 0000000000000..73f49837158bf --- /dev/null +++ b/packages/next/compiled/regexr-lexer/profiles.js @@ -0,0 +1,838 @@ +'use strict'; + +/* +RegExr: Learn, Build, & Test RegEx +Copyright (C) 2017 gskinner.com, inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +/* +The core profile essentially defines every feature we support, and is then pared down by other profiles. All values should be y (true). + +It also acts in part as pseudo documentation for all of the "type" values. + */ +let y = true, + n = false; +let core = { + id: "core", + flags: { + "g": "global", + // note that this is not a real flag in some flavors, but a different method call + "i": "caseinsensitive", + "m": "multiline", + "s": "dotall", + "u": "unicode", + "y": "sticky", + "x": "extended", + "U": "ungreedy" + }, + // reserved characters that need to be escaped: + escChars: "+*?^$\\.[]{}()|/".split("").reduce((o, c) => { + o[c] = y; + return o; + }, {}), + // escape chars that are specifically not supported by the flavor: + badEscChars: n, + escCharCodes: { + "0": 0, + // null + "a": 7, + // bell + "t": 9, + // tab + "n": 10, + // lf + "v": 11, + // vertical tab + "f": 12, + // form feed + "r": 13, + // cr + "e": 27 // escape + + }, + escCharTypes: { + "A": "bos", + "b": "wordboundary", + "B": "notwordboundary", + "d": "digit", + "D": "notdigit", + "G": "prevmatchend", + "h": "hwhitespace", + "H": "nothwhitespace", + "K": "keepout", + "N": "notlinebreak", + "R": "linebreak", + "s": "whitespace", + "S": "notwhitespace", + "v": "vwhitespace", + "V": "notvwhitespace", + "w": "word", + "W": "notword", + "X": "unicodegrapheme", + "Z": "eos", + "z": "abseos" + }, + charTypes: { + ".": "dot", + "|": "alt", + "$": "eof", + "^": "bof", + "?": "opt", + // also: "lazy" + "+": "plus", + // also: "possessive" + "*": "star" + }, + unquantifiable: { + // all group/set open tokens are unquantifiable by default (ie. tokens with a .close value) + "quant": y, + "plus": y, + "star": y, + "opt": y, + "lazy": y, + "possessive": y, + "eof": y, + "bof": y, + "eos": y, + "abseos": y, + "alt": y, + "open": y, + "mode": y, + "comment": y, + // TODO: this should actually be ignored by quantifiers. + "condition": y + }, + unicodeScripts: { + // from: http://www.pcre.org/original/doc/html/pcrepattern.html + "Arabic": y, + "Armenian": y, + "Avestan": y, + "Balinese": y, + "Bamum": y, + "Bassa_Vah": y, + "Batak": y, + "Bengali": y, + "Bopomofo": y, + "Brahmi": y, + "Braille": y, + "Buginese": y, + "Buhid": y, + "Canadian_Aboriginal": y, + "Carian": y, + "Caucasian_Albanian": y, + "Chakma": y, + "Cham": y, + "Cherokee": y, + "Common": y, + "Coptic": y, + "Cuneiform": y, + "Cypriot": y, + "Cyrillic": y, + "Deseret": y, + "Devanagari": y, + "Duployan": y, + "Egyptian_Hieroglyphs": y, + "Elbasan": y, + "Ethiopic": y, + "Georgian": y, + "Glagolitic": y, + "Gothic": y, + "Grantha": y, + "Greek": y, + "Gujarati": y, + "Gurmukhi": y, + "Han": y, + "Hangul": y, + "Hanunoo": y, + "Hebrew": y, + "Hiragana": y, + "Imperial_Aramaic": y, + "Inherited": y, + "Inscriptional_Pahlavi": y, + "Inscriptional_Parthian": y, + "Javanese": y, + "Kaithi": y, + "Kannada": y, + "Katakana": y, + "Kayah_Li": y, + "Kharoshthi": y, + "Khmer": y, + "Khojki": y, + "Khudawadi": y, + "Lao": y, + "Latin": y, + "Lepcha": y, + "Limbu": y, + "Linear_A": y, + "Linear_B": y, + "Lisu": y, + "Lycian": y, + "Lydian": y, + "Mahajani": y, + "Malayalam": y, + "Mandaic": y, + "Manichaean": y, + "Meetei_Mayek": y, + "Mende_Kikakui": y, + "Meroitic_Cursive": y, + "Meroitic_Hieroglyphs": y, + "Miao": y, + "Modi": y, + "Mongolian": y, + "Mro": y, + "Myanmar": y, + "Nabataean": y, + "New_Tai_Lue": y, + "Nko": y, + "Ogham": y, + "Ol_Chiki": y, + "Old_Italic": y, + "Old_North_Arabian": y, + "Old_Permic": y, + "Old_Persian": y, + "Old_South_Arabian": y, + "Old_Turkic": y, + "Oriya": y, + "Osmanya": y, + "Pahawh_Hmong": y, + "Palmyrene": y, + "Pau_Cin_Hau": y, + "Phags_Pa": y, + "Phoenician": y, + "Psalter_Pahlavi": y, + "Rejang": y, + "Runic": y, + "Samaritan": y, + "Saurashtra": y, + "Sharada": y, + "Shavian": y, + "Siddham": y, + "Sinhala": y, + "Sora_Sompeng": y, + "Sundanese": y, + "Syloti_Nagri": y, + "Syriac": y, + "Tagalog": y, + "Tagbanwa": y, + "Tai_Le": y, + "Tai_Tham": y, + "Tai_Viet": y, + "Takri": y, + "Tamil": y, + "Telugu": y, + "Thaana": y, + "Thai": y, + "Tibetan": y, + "Tifinagh": y, + "Tirhuta": y, + "Ugaritic": y, + "Vai": y, + "Warang_Citi": y, + "Yi": y + }, + unicodeCategories: { + // from: http://www.pcre.org/original/doc/html/pcrepattern.html + "C": y, + // Other + "Cc": y, + // Control + "Cf": y, + // Format + "Cn": y, + // Unassigned + "Co": y, + // Private use + "Cs": y, + // Surrogate + "L": y, + // Letter + "L&": y, + // Any letter + "Ll": y, + // Lower case letter + "Lm": y, + // Modifier letter + "Lo": y, + // Other letter + "Lt": y, + // Title case letter + "Lu": y, + // Upper case letter + "M": y, + // Mark + "Mc": y, + // Spacing mark + "Me": y, + // Enclosing mark + "Mn": y, + // Non-spacing mark + "N": y, + // Number + "Nd": y, + // Decimal number + "Nl": y, + // Letter number + "No": y, + // Other number + "P": y, + // Punctuation + "Pc": y, + // Connector punctuation + "Pd": y, + // Dash punctuation + "Pe": y, + // Close punctuation + "Pf": y, + // Final punctuation + "Pi": y, + // Initial punctuation + "Po": y, + // Other punctuation + "Ps": y, + // Open punctuation + "S": y, + // Symbol + "Sc": y, + // Currency symbol + "Sk": y, + // Modifier symbol + "Sm": y, + // Mathematical symbol + "So": y, + // Other symbol + "Z": y, + // Separator + "Zl": y, + // Line separator + "Zp": y, + // Paragraph separator + "Zs": y // Space separator + + }, + posixCharClasses: { + // from: http://www.pcre.org/original/doc/html/pcrepattern.html + "alnum": y, + // letters and digits + "alpha": y, + // letters + "ascii": y, + // character codes 0 - 127 + "blank": y, + // space or tab only + "cntrl": y, + // control characters + "digit": y, + // decimal digits (same as \d) + "graph": y, + // printing characters, excluding space + "lower": y, + // lower case letters + "print": y, + // printing characters, including space + "punct": y, + // printing characters, excluding letters and digits and space + "space": y, + // white space (the same as \s from PCRE 8.34) + "upper": y, + // upper case letters + "word": y, + // "word" characters (same as \w) + "xdigit": y // hexadecimal digits + + }, + modes: { + "i": "caseinsensitive", + "s": "dotall", + "m": "multiline", + "x": "freespacing", + "J": "samename", + "U": "switchlazy" + }, + tokens: { + // note that not all of these are actively used in the lexer, but are included for completeness. + "open": y, + // opening / + "close": y, + // closing / + "char": y, + // abc + // classes: + // also in escCharTypes and charTypes + "set": y, + // [a-z] + "setnot": y, + // [^a-z] + "setclose": y, + // ] + "range": y, + // [a-z] + "unicodecat": y, + // \p{Ll} \P{^Ll} \pL + "notunicodecat": y, + // \P{Ll} \p{^Ll} \PL + "unicodescript": y, + // \p{Cherokee} \P{^Cherokee} + "notunicodescript": y, + // \P{Cherokee} \p{^Cherokee} + "posixcharclass": y, + // [[:alpha:]] + // not in supported flavors: "posixcollseq": y, // [[.foo.]] // this is recognized by the lexer, currently returns "notsupported" error + // not in supported flavors: "unicodeblock": y, // \p{InThai} \p{IsThai} and NOT \P + // not in supported flavors: "subtract": y, // [base-[subtract]] + // not in supported flavors: "intersect": y, // [base&&[intersect]] + // esc: + // also in escCharCodes and escCharTypes + "escoctal": y, + // \11 + "escunicodeu": y, + // \uFFFF + "escunicodeub": y, + // \u{00A9} + "escunicodexb": y, + // \x{00A9} + "escsequence": y, + // \Q...\E + "eschexadecimal": y, + // \xFF + "esccontrolchar": y, + // \cA + "escoctalo": y, + // \o{377} // resolved to escoctal in lexer, no docs required + "escchar": y, + // \m (unrecognized escapes) // no reference documentation required + // group: + "group": y, + // (foo) + "groupclose": y, + // ) + "noncapgroup": y, + // (?:foo) + "namedgroup": y, + // (?Pfoo) (?foo) (?'name'foo) + "atomic": y, + // (?>foo|bar) + "define": y, + // (?(DEFINE)foo) + "branchreset": y, + // (?|(a)|(b)) + // lookaround: + "poslookbehind": y, + // (?<=foo) + "neglookbehind": y, + // (? \k'name' \k{name} (?P=name) \g{name} + "numref": y, + // \1 + "extnumref": y, + // \g{-1} \g{+1} \g{1} \g1 \g-1 + "recursion": y, + // (?R) (?0) \g<0> \g'0' + "numsubroutine": y, + // \g<1> \g'-1' (?1) (?-1) + "namedsubroutine": y, + // \g \g'name' (?&name) (?P>name) + // quantifiers: + // also in specialChars + "quant": y, + // {1,2} + "possessive": y, + // ++ + "lazy": y, + // ? + // special: + "conditional": y, + // (?(?=if)then|else) + "condition": y, + // (?=if) any lookaround + "conditionalelse": y, + // | + "conditionalgroup": y, + // (?(1)a|b) (?(-1)a|b) (?(name)a|b) + "mode": y, + // (?i-x) see modes above + "comment": y, + // (?#comment) + // meta: + "matchanyset": y // [\s\S] + + }, + substTokens: { + // named references aren't supported in JS or PCRE / PHP + "subst_$esc": y, + // $$ + "subst_$&match": y, + // $& + "subst_$before": y, + // $` + "subst_$after": y, + // $' + "subst_$group": y, + // $1 $99 // resolved to subst_group in lexer, no docs required + "subst_$bgroup": y, + // ${1} ${99} // resolved to subst_group in lexer, no docs required + "subst_bsgroup": y, + // \1 \99 // resolved to subst_group in lexer, no docs required + "subst_group": y, + // $1 \1 \{1} // combined in docs, not used by lexer + "subst_0match": y, + // $0 \0 \{0} + // this isn't a feature of the engine, but of RegExr: + "subst_esc": y // \n \r \u1234 + + }, + config: { + "forwardref": y, + // \1(a) + "nestedref": y, + // (\1a|b)+ + "ctrlcodeerr": y, + // does \c error? (vs decompose) + "reftooctalalways": y, + // does a single digit reference \1 become an octal? (vs remain an unmatched ref) + "substdecomposeref": y, + // will a subst reference decompose? (ex. \3 becomes "\" & "3" if < 3 groups) + "looseesc": y, + // should unrecognized escape sequences match the character (ex. \u could match "u") // disabled when `u` flag is set + "unicodenegated": y, + // \p{^etc}" + "namedgroupalt": y // if false, only support (?foo) + + }, + docs: {// for example: + //possessive: {desc: "+This will be appended to the existing entry." }, + //namedgroup: {tip: "This will overwrite the existing entry." } + } +}; + +/* +RegExr: Learn, Build, & Test RegEx +Copyright (C) 2017 gskinner.com, inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +/* +The PCRE profile is almost a straight copy of the core profile. +*/ +let y$1 = true, + n$1 = false; +let pcre = { + id: "pcre", + label: "PCRE", + browser: false, + flags: { + "u": n$1, + "y": n$1 + }, + badEscChars: "uUlLN".split("").reduce((o, c) => { + o[c] = y$1; + return o; + }, {}), + escCharCodes: { + "v": n$1 // vertical tab // PCRE support \v as vertical whitespace + + }, + tokens: { + "escunicodeu": n$1, + // \uFFFF + "escunicodeub": n$1 // \u{00A9} + // octalo PCRE 8.34+ + + }, + substTokens: { + "subst_$esc": n$1, + // $$ + "subst_$&match": n$1, + // $& + "subst_$before": n$1, + // $` + "subst_$after": n$1 // $' + + }, + config: { + "reftooctalalways": n$1, + // does a single digit reference \1 become an octal? (vs remain an unmatched ref) + "substdecomposeref": n$1, + // will a subst reference decompose? (ex. \3 becomes "\" & "3" if < 3 groups) + "looseesc": n$1 // should unrecognized escape sequences match the character (ex. \u could match "u") // disabled when `u` flag is set + + }, + docs: { + "escoctal": { + ext: "+

The syntax \\o{FFF} is also supported.

" + }, + "numref": { + ext: "

There are multiple syntaxes for this feature: \\1 \\g1 \\g{1}.

" + "

The latter syntaxes support relative values preceded by + or -. For example \\g-1 would match the group preceding the reference.

" + }, + "lazy": { + ext: "+

This behaviour is reversed by the ungreedy (U) flag/modifier.

" + } + } +}; + +/* +RegExr: Learn, Build, & Test RegEx +Copyright (C) 2017 gskinner.com, inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +/* +The javascript profile disables a large number of features. + +Note that JS warnings are currently added in addJSWarnings in the ExpresssionLexer. +*/ +let n$2 = false; + +function test(expr, flag) { + try { + return new RegExp(expr, flag) && undefined; + } catch (e) { + return n$2; + } +} + +function testFlag(flag) { + return test(".", flag); +} + +let unicodeFlag = testFlag("u"); +let stickyFlag = testFlag("y"); +let dotallFlag = testFlag("s"); +let lookbehind = test("(?<=A)"); +let namedgroup = test("(?B)"); +let unicodecat = test("\\p{Ll}", "u"); // disabled when `u` flag is not set + +let javascript = { + id: "js", + label: "JavaScript", + browser: true, + flags: { + "s": dotallFlag, + // warning + "x": n$2, + "u": unicodeFlag, + // warning + "y": stickyFlag, + // warning + "U": n$2 + }, + escCharCodes: { + "a": n$2, + // bell + "e": n$2 // escape + + }, + escCharTypes: { + "A": n$2, + // bos + "G": n$2, + // prevmatchend + "h": n$2, + // hwhitespace + "H": n$2, + // nothwhitespace + "K": n$2, + // keepout + "N": n$2, + // notlinebreak + "R": n$2, + // newline + "v": n$2, + // vwhitespace + "V": n$2, + // notvwhitespace + "X": n$2, + // unicodegrapheme + "Z": n$2, + // eos + "z": n$2 // abseos + + }, + unicodeScripts: unicodecat, + unicodeCategories: unicodecat, + posixCharClasses: n$2, + modes: n$2, + tokens: { + // classes: + // also in escCharSpecials and specialChars + "unicodecat": unicodecat, + // \p{Ll} \P{^Ll} \pL + "notunicodecat": unicodecat, + // \P{Ll} \p{^Ll} \PL + "unicodescript": unicodecat, + // \p{Cherokee} \P{^Cherokee} + "notunicodescript": unicodecat, + // \P{Cherokee} \p{^Cherokee} + "posixcharclass": n$2, + // [[:alpha:]] + // esc: + // also in escCharCodes and escCharSpecials + "escunicodeub": unicodeFlag, + // \u{00A9} + "escunicodexb": n$2, + // \x{00A9} + "escsequence": n$2, + // \Q...\E + "escoctalo": n$2, + // \o{377} + // group: + "namedgroup": namedgroup, + // (?Pfoo) (?foo) (?'name'foo) + "atomic": n$2, + // (?>foo|bar) + "define": n$2, + // (?(DEFINE)foo) + "branchreset": n$2, + // (?|(a)|(b)) + // lookaround: + "poslookbehind": lookbehind, + // (?<=foo) // warning + "neglookbehind": lookbehind, + // (? \k'name' \k{name} (?P=name) \g{name} + "extnumref": n$2, + // \g{-1} \g{+1} \g{1} \g1 \g-1 + "recursion": n$2, + // (?R) (?0) \g<0> \g'0' + "numsubroutine": n$2, + // \g<1> \g'-1' (?1) (?-1) + "namedsubroutine": n$2, + // \g \g'name' (?&name) (?P>name) + // quantifiers: + // also in specialChars + "possessive": n$2, + // special: + "conditional": n$2, + // (?(?=if)then|else) + "conditionalif": n$2, + // (?=if) any lookaround + "conditionalelse": n$2, + // | + "conditionalgroup": n$2, + // (?(1)a|b) (?(-1)a|b) (?(name)a|b) + "mode": n$2, + // (?i-x) see modes above + "comment": n$2 // (?#comment) + + }, + config: { + "forwardref": n$2, + // \1(a) + "nestedref": n$2, + // (\1a|b)+ + "ctrlcodeerr": n$2, + // does \c error, or decompose? + "unicodenegated": n$2, + // \p{^etc} + "namedgroupalt": n$2 // if false, only support (?foo) + + }, + substTokens: { + "subst_0match": n$2, + // $0 \0 \{0} + "subst_$bgroup": n$2, + // ${1} ${99} + "subst_bsgroup": n$2 // \1 \99 + + }, + docs: { + "subst_group": { + ext: "" + }, + // remove other syntaxes. + "namedgroup": { + ext: "" + }, + // remove other syntaxes. + "unicodecat": { + ext: "

Requires the u flag.

" + "

For a list of values, see this MDN page.

" + } // notunicodecat, unicodescript, notunicodescript are copied from unicodecat below. + + } +}; +javascript.docs.notunicodecat = javascript.docs.unicodescript = javascript.docs.notunicodescript = javascript.docs.unicodecat; + +/* +RegExr: Learn, Build, & Test RegEx +Copyright (C) 2017 gskinner.com, inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ +let profiles = { + core +}; +profiles.pcre = merge(core, pcre); +profiles.js = merge(core, javascript); + +function merge(p1, p2) { + // merges p1 into p2, essentially just a simple deep copy without array support. + for (let n in p1) { + if (p2[n] === false) { + continue; + } else if (typeof p1[n] === "object") { + p2[n] = merge(p1[n], p2[n] || {}); + } else if (p2[n] === undefined) { + p2[n] = p1[n]; + } + } + + return p2; +} + +module.exports = profiles; +//# sourceMappingURL=profiles.js.map diff --git a/packages/next/lib/load-custom-routes.ts b/packages/next/lib/load-custom-routes.ts index e66f6885be44a..19227cec0e66b 100644 --- a/packages/next/lib/load-custom-routes.ts +++ b/packages/next/lib/load-custom-routes.ts @@ -9,7 +9,9 @@ import { import { execOnce } from '../next-server/lib/utils' import * as Log from '../build/output/log' // @ts-ignore -import Lexer from './regexr/expression-lexer' +import Lexer from 'next/dist/compiled/regexr-lexer/lexer' +// @ts-ignore +import lexerProfiles from 'next/dist/compiled/regexr-lexer/profiles' export type RouteHas = | { @@ -338,6 +340,7 @@ function checkCustomRoutes( if (hasItem.value) { const matcher = new RegExp(`^${hasItem.value}$`) const lexer = new Lexer() + lexer.profile = lexerProfiles.js lexer.parse(`/${matcher.source}/`) Object.keys(lexer.namedGroups).forEach((groupKey) => { diff --git a/packages/next/package.json b/packages/next/package.json index d188702ea2be5..c8e96027df4bd 100644 --- a/packages/next/package.json +++ b/packages/next/package.json @@ -227,6 +227,7 @@ "postcss-preset-env": "6.7.0", "postcss-scss": "3.0.4", "recast": "0.18.5", + "regexr": "https://github.com/ijjk/regexr-lexer.git#3bcf3d1c4bc6dd9239c47acb1fb7b419823f8337", "resolve-url-loader": "3.1.2", "sass-loader": "10.0.5", "schema-utils": "2.7.1", diff --git a/packages/next/taskfile.js b/packages/next/taskfile.js index c88180356350d..bae2e36933fd8 100644 --- a/packages/next/taskfile.js +++ b/packages/next/taskfile.js @@ -1,6 +1,6 @@ // eslint-disable-next-line import/no-extraneous-dependencies const notifier = require('node-notifier') -const { relative, basename, resolve } = require('path') +const { relative, basename, resolve, join, dirname } = require('path') const { Module } = require('module') // Note: @@ -687,9 +687,20 @@ export async function path_to_regexp(task, opts) { .target('dist/compiled/path-to-regexp') } +export async function copy_regexr_lexer(task, opts) { + await task + .source( + join( + relative(__dirname, dirname(require.resolve('regexr/package.json'))), + 'lexer-dist/**/*' + ) + ) + .target('compiled/regexr-lexer') +} + export async function precompile(task, opts) { await task.parallel( - ['browser_polyfills', 'path_to_regexp', 'copy_ncced'], + ['browser_polyfills', 'path_to_regexp', 'copy_ncced', 'copy_regexr_lexer'], opts ) } diff --git a/yarn.lock b/yarn.lock index 9d044714858c4..9e045e0b14168 100644 --- a/yarn.lock +++ b/yarn.lock @@ -13725,6 +13725,10 @@ regexpu-core@^4.7.1: unicode-match-property-ecmascript "^1.0.4" unicode-match-property-value-ecmascript "^1.2.0" +"regexr@https://github.com/ijjk/regexr-lexer.git#3bcf3d1c4bc6dd9239c47acb1fb7b419823f8337": + version "3.8.0" + resolved "https://github.com/ijjk/regexr-lexer.git#3bcf3d1c4bc6dd9239c47acb1fb7b419823f8337" + registry-auth-token@3.3.2: version "3.3.2" resolved "https://registry.yarnpkg.com/registry-auth-token/-/registry-auth-token-3.3.2.tgz#851fd49038eecb586911115af845260eec983f20"