From 7d836ba2637095bf13d794a10efa27d692f09ac2 Mon Sep 17 00:00:00 2001 From: JJ Kasper Date: Thu, 1 Apr 2021 21:48:25 -0500 Subject: [PATCH 1/3] Use regex lexar for gathering named groups from has --- .eslintignore | 3 +- docs/api-reference/next.config.js/headers.md | 3 + .../api-reference/next.config.js/redirects.md | 3 + docs/api-reference/next.config.js/rewrites.md | 5 +- packages/next/lib/load-custom-routes.ts | 26 +- packages/next/lib/regexr/expression-lexer.js | 951 ++++++++++++++++++ packages/next/lib/regexr/profile/core.js | 420 ++++++++ packages/next/lib/regexr/profile/index.js | 15 + .../next/lib/regexr/profile/javascript.js | 160 +++ .../lib/router/utils/prepare-destination.ts | 10 +- test/integration/custom-routes/next.config.js | 24 +- .../custom-routes/test/index.test.js | 57 +- 12 files changed, 1651 insertions(+), 26 deletions(-) create mode 100644 packages/next/lib/regexr/expression-lexer.js create mode 100644 packages/next/lib/regexr/profile/core.js create mode 100644 packages/next/lib/regexr/profile/index.js create mode 100644 packages/next/lib/regexr/profile/javascript.js diff --git a/.eslintignore b/.eslintignore index 37e0a229afb6d..48f68189799e1 100644 --- a/.eslintignore +++ b/.eslintignore @@ -16,4 +16,5 @@ packages/next-codemod/**/*.js packages/next-codemod/**/*.d.ts packages/next-env/**/*.d.ts test/integration/async-modules/** -test-timings.json \ No newline at end of file +test-timings.json +packages/next/lib/regexr/**/* \ No newline at end of file diff --git a/docs/api-reference/next.config.js/headers.md b/docs/api-reference/next.config.js/headers.md index 0e56ce85a1235..07fedce813117 100644 --- a/docs/api-reference/next.config.js/headers.md +++ b/docs/api-reference/next.config.js/headers.md @@ -193,6 +193,9 @@ module.exports = { { type: 'query', key: 'page', + // the page value will not be available in the + // header key/values since value is provided and + // doesn't use a named capture group e.g. (?home) value: 'home', }, { diff --git a/docs/api-reference/next.config.js/redirects.md b/docs/api-reference/next.config.js/redirects.md index fe946b4260f1f..6fbc094556f1b 100644 --- a/docs/api-reference/next.config.js/redirects.md +++ b/docs/api-reference/next.config.js/redirects.md @@ -133,6 +133,9 @@ module.exports = { { type: 'query', key: 'page', + // the page value will not be available in the + // destination since value is provided and doesn't + // use a named capture group e.g. (?home) value: 'home', }, { diff --git a/docs/api-reference/next.config.js/rewrites.md b/docs/api-reference/next.config.js/rewrites.md index 4db8b04b5199c..6e69e0b037081 100644 --- a/docs/api-reference/next.config.js/rewrites.md +++ b/docs/api-reference/next.config.js/rewrites.md @@ -212,6 +212,9 @@ module.exports = { { type: 'query', key: 'page', + // the page value will not be available in the + // destination since value is provided and doesn't + // use a named capture group e.g. (?home) value: 'home', }, { @@ -220,7 +223,7 @@ module.exports = { value: 'true', }, ], - destination: '/:path*/:page', + destination: '/:path*/home', }, // if the header `x-authorized` is present and // contains a matching value, this rewrite will be applied diff --git a/packages/next/lib/load-custom-routes.ts b/packages/next/lib/load-custom-routes.ts index 45ec7d22fe36a..e66f6885be44a 100644 --- a/packages/next/lib/load-custom-routes.ts +++ b/packages/next/lib/load-custom-routes.ts @@ -8,7 +8,8 @@ import { } from '../next-server/lib/constants' import { execOnce } from '../next-server/lib/utils' import * as Log from '../build/output/log' -import { getSafeParamName } from '../next-server/lib/router/utils/prepare-destination' +// @ts-ignore +import Lexer from './regexr/expression-lexer' export type RouteHas = | { @@ -336,20 +337,15 @@ function checkCustomRoutes( if (hasItem.value) { const matcher = new RegExp(`^${hasItem.value}$`) - const matches = matcher.exec('') - - if (matches) { - if (matches.groups) { - Object.keys(matches.groups).forEach((groupKey) => { - const safeKey = getSafeParamName(groupKey) - - if (safeKey && matches.groups![groupKey]) { - hasSegments.add(safeKey) - } - }) - } else { - hasSegments.add(hasItem.key || 'host') - } + const lexer = new Lexer() + lexer.parse(`/${matcher.source}/`) + + Object.keys(lexer.namedGroups).forEach((groupKey) => { + hasSegments.add(groupKey) + }) + + if (hasItem.type === 'host') { + hasSegments.add('host') } } } diff --git a/packages/next/lib/regexr/expression-lexer.js b/packages/next/lib/regexr/expression-lexer.js new file mode 100644 index 0000000000000..2beccd460473a --- /dev/null +++ b/packages/next/lib/regexr/expression-lexer.js @@ -0,0 +1,951 @@ +/* +RegExr: Learn, Build, & Test RegEx +Copyright (C) 2017 gskinner.com, inc. +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +const profile = require('./profile') + +const Utils = { + copy: Object.assign, +} + +const SUPPORT_MAP_PROPS = { + // 1 = reverse, 0 - normal + flags: 1, + // escape is handled separately + // escCharCodes is handled separately + escCharTypes: 1, + charTypes: 1, + // unquantifiables not included + // unicodeScripts not included + // unicodeCategories not included + // posixCharClasses not included + // modes not included + tokens: 0, + substTokens: 0, + // config not included + // docs not included +} + +class ExpressionLexer { + constructor() { + this._profile = profile + this.string = this.token = this.errors = this.captureGroups = this.namedGroups = null + this._buildSupportMap(this._profile) + } + + parse(str) { + if (!this._profile) { + return null + } + if (str === this.string) { + return this.token + } + + this.token = null + this._modes = {} + this.string = str + this.errors = [] + let capgroups = (this.captureGroups = []) + let namedgroups = (this.namedGroups = {}) + let brgroups = (this.branchResetGroups = []) + let groups = [], + refs = [], + i = 0, + l = str.length + let o, + c, + token, + charset = null + // previous is the previous token, prv is the previous "active" token (!ignore) + let prev = null, + prv = null + let profile = this._profile, + unquantifiable = profile.unquantifiable + let charTypes = profile.charTypes + let closeIndex = str.lastIndexOf('/') + + for (let i = closeIndex + 1; i < l; i++) { + this._modes[str[i]] = true + } + + while (i < l) { + c = str[i] + + token = { i: i, l: 1, prev: prev, prv: prv, modes: this._modes } + if (prev) { + prev.next = token + } else { + this.token = token + } + + if (i === 0 || i >= closeIndex) { + this.parseFlag(str, token) + } else if (c === '(' && !charset) { + this.parseParen(str, token) + if (token.close === null) { + token.depth = groups.length + groups.push(token) + } + if (token.capture) { + this.addCaptureGroup(token, groups) + } + } else if (c === ')' && !charset) { + token.type = 'groupclose' + if (groups.length) { + o = token.open = groups.pop() + o.close = token + if (o.type === 'branchreset') { + brgroups.pop() + } + } else { + token.error = { id: 'groupclose' } + } + } else if (c === '[') { + charset = this.parseSquareBracket(str, token, charset) + } else if (c === ']' && charset) { + token.type = 'setclose' + token.open = charset + charset.close = token + charset = null + } else if ( + c === '+' && + prv && + prv.clss === 'quant' && + profile.tokens.possessive + ) { + token.type = 'possessive' + token.related = [prv] + } else if ((c === '+' || c === '*') && !charset) { + token.type = charTypes[c] + token.clss = 'quant' + token.min = c === '+' ? 1 : 0 + token.max = -1 + } else if ( + c === '{' && + !charset && + str.substr(i).search(/^{\d+,?\d*}/) !== -1 + ) { + this.parseQuant(str, token) + } else if (c === '\\') { + this.parseBackSlash(str, token, charset, closeIndex) + } else if (c === '?' && !charset) { + if (!prv || prv.clss !== 'quant') { + token.type = charTypes[c] + token.clss = 'quant' + token.min = 0 + token.max = 1 + } else { + token.type = 'lazy' + token.related = [prv] + } + } else if ( + c === '-' && + charset && + prv.code !== undefined && + prv.prv && + prv.prv.type !== 'range' + ) { + // this may be the start of a range, but we'll need to validate after the next token. + token.type = 'range' + } else { + this.parseChar(str, token, charset) + if (!charset && this._modes.x && /\s/.test(c)) { + token.ignore = true + token.type = 'ignorews' + } + } + + // post process token: + // quantifier: + if (token.clss === 'quant') { + if ( + !prv || + prv.close !== undefined || + unquantifiable[prv.type] || + (prv.open && unquantifiable[prv.open.type]) + ) { + token.error = { id: 'quanttarg' } + } else { + token.related = [prv.open || prv] + } + } + + // reference: + if (token.group === true) { + refs.push(token) + } + + // conditional: + let curGroup = groups.length ? groups[groups.length - 1] : null + if ( + curGroup && + (curGroup.type === 'conditional' || + curGroup.type === 'conditionalgroup') && + token.type === 'alt' + ) { + if (!curGroup.alt) { + curGroup.alt = token + } else { + token.error = { id: 'extraelse' } + } + token.related = [curGroup] + token.type = 'conditionalelse' + token.clss = 'special' + } else if (curGroup && curGroup.type === 'branchreset') { + // reset group + curGroup.curGroupNum = curGroup.inGroupNum + } + + // range: + if (prv && prv.type === 'range' && prv.l === 1) { + this.validateRange(str, token) + } + + // js warnings: + // TODO: this isn't ideal, but I'm hesitant to write a more robust solution for a couple of edge cases. + if (profile.id === 'js') { + this.addJSWarnings(token) + } + + // general: + if (token.open && !token.clss) { + token.clss = token.open.clss + } + if (token.error) { + this.addError(token) + } + i += token.l + prev = token + if (!token.ignore) { + prv = token + } + } + + // post processing: + while (groups.length) { + this.addError(groups.pop(), { id: 'groupopen' }) + } + this.matchRefs(refs, capgroups, namedgroups) + if (charset) { + this.addError(charset, { id: 'setopen' }) + } + + return this.token + } + + _buildSupportMap(profile) { + if (profile._supportMap) { + return + } + let map = (profile._supportMap = {}), + props = SUPPORT_MAP_PROPS, + n + for (n in props) { + this._addToSupportMap(map, profile[n], !!props[n]) + } + let o = profile.escCharCodes, + esc = profile.escChars + for (n in o) { + map['esc_' + o[n]] = true + } + for (n in esc) { + map['esc_' + esc[n]] = true + } + } + + _addToSupportMap(map, o, rev) { + if (rev) { + for (let n in o) { + map[o[n]] = true + } + } else { + for (let n in o) { + map[n] = o[n] + } + } + } + + addError(token, error = token.error) { + token.error = error + this.errors.push(token) + } + + addJSWarnings(token) { + if (token.error) { + return + } + if ( + token.type === 'neglookbehind' || + token.type === 'poslookbehind' || + token.type === 'sticky' || + token.type === 'unicode' || + token.type == 'dotall' || + token.type === 'unicodecat' || + token.type === 'unicodescript' || + token.type === 'namedgroup' + ) { + token.error = { id: 'jsfuture', warning: true } + } + } + + addCaptureGroup(token, groups) { + // it would be nice to make branch reset groups actually highlight all of the groups that share the same number + // that would require switching to arrays of groups for each group num - requires rearchitecture throughout the app. + let capgroups = this.captureGroups, + brgroups = this.branchResetGroups, + namedgroups = this.namedGroups + let curGroup = groups.length ? groups[groups.length - 1] : null + if (brgroups.length) { + let brgroup = brgroups[brgroups.length - 1] + token.num = ++brgroup.curGroupNum + } else { + token.num = capgroups.length + 1 + } + if (!capgroups[token.num - 1]) { + capgroups.push(token) + } + if (token.name && !token.error) { + if (/\d/.test(token.name[0])) { + token.error = { id: 'badname' } + } else if (namedgroups[token.name]) { + token.error = { id: 'dupname' } + token.related = [namedgroups[token.name]] + } else { + namedgroups[token.name] = token + } + } + } + + getRef(token, str) { + token.clss = 'ref' + token.group = true + token.relIndex = this.captureGroups.length + token.name = str + } + + matchRefs(refs, indexes, names) { + while (refs.length) { + let token = refs.pop(), + name = token.name, + group = names[name] + + if (!group && !isNaN(name)) { + let sign = name[0], + index = + parseInt(name) + (sign === '+' || sign === '-' ? token.relIndex : 0) + if (sign === '-') { + index++ + } + group = indexes[index - 1] + } + if (group) { + token.group = group + token.related = [group] + token.dir = + token.i < group.i + ? 1 + : !group.close || token.i < group.close.i + ? 0 + : -1 + } else { + delete token.group + delete token.relIndex + this.refToOctal(token) + if (token.error) { + this.errors.push(token.error) + } + } + } + } + + refToOctal(token) { + // PCRE: \# unmatched, \0 \00 \## = octal + // JS: \# \0 \00 \## = octal + // PCRE matches \8 \9 to "8" "9" + // JS: without the u flag \8 \9 match "8" "9" in IE, FF & Chrome, and "\8" "\9" in Safari. We support the former. + // JS: with the u flag, Chrome & FF throw an esc error, Safari does not. + + // TODO: handle \0 for PCRE? Would need more testing. + // TODO: this doesn't handle two digit refs with 8/9 in them. Ex. \18 - not even sure what this is interpreted as. + let name = token.name, + profile = this._profile + if (token.type !== 'numref') { + // not a simple \4 style reference, so can't decompose into an octal. + token.error = { id: 'unmatchedref' } + } else if ( + /^[0-7]{2}$/.test(name) || + (profile.config.reftooctalalways && /^[0-7]$/.test(name)) + ) { + // octal + let next = token.next, + char = String.fromCharCode(next.code) + if ( + next.type === 'char' && + char >= '0' && + char <= '7' && + parseInt(name + char, 8) <= 255 + ) { + name += char + this.mergeNext(token) + } + token.code = parseInt(name, 8) + token.clss = 'esc' + token.type = 'escoctal' + delete token.name + } else if (name === '8' || name === '9') { + this.parseEscChar(token, name) + delete token.name + } else { + token.error = { id: 'unmatchedref' } + } + } + + mergeNext(token) { + let next = token.next + token.next = next.next + token.next.prev = token + token.l++ + } + + parseFlag(str, token) { + // note that this doesn't deal with misformed patterns or incorrect flags. + let i = token.i, + c = str[i] + if (str[i] === '/') { + token.type = i === 0 ? 'open' : 'close' + if (i !== 0) { + token.related = [this.token] + this.token.related = [token] + } + } else { + token.type = this._profile.flags[c] + } + //token.clear = true; + } + + parseChar(str, token, charset) { + let c = str[token.i] + token.type = (!charset && this._profile.charTypes[c]) || 'char' + if (!charset && c === '/') { + token.error = { id: 'fwdslash' } + } + if (token.type === 'char') { + token.code = c.charCodeAt(0) + } else if (ExpressionLexer.ANCHOR_TYPES[token.type]) { + token.clss = 'anchor' + } else if (token.type === 'dot') { + token.clss = 'charclass' + } + return token + } + + parseSquareBracket(str, token, charset) { + let match + if ( + this._profile.tokens.posixcharclass && + (match = str.substr(token.i).match(/^\[(:|\.)([^\]]*?)\1]/)) + ) { + // posixcharclass: [:alpha:] + // posixcollseq: [.ch.] + // currently neither flavor supports posixcollseq, but PCRE does flag as an error: + // TODO: the expression above currently does not catch [.\].] + token.l = match[0].length + token.value = match[2] + token.clss = 'charclass' + if (match[1] === ':') { + token.type = 'posixcharclass' + if (!this._profile.posixCharClasses[match[2]]) { + token.error = { id: 'posixcharclassbad' } + } else if (!charset) { + token.error = { id: 'posixcharclassnoset' } + } + } else { + token.type = 'posixcollseq' + // TODO: can this be generalized? Right now, no, because we assign ids that aren't in the profile. + token.error = { id: 'notsupported' } + } + } else if (!charset) { + // set [a-z] [aeiou] + // setnot [^a-z] + token.type = token.clss = 'set' + if (str[token.i + 1] === '^') { + token.l++ + token.type += 'not' + } + charset = token + } else { + // [[] (square bracket inside a set) + this.parseChar(str, token, charset) + } + return charset + } + + parseParen(str, token) { + /* + core: + . group: + . lookahead: ?= ?! + . noncap: ?: + PCRE: + . lookbehind: ?<= ? ?'name' ? + . namedref: ?P=name Also: \g'name' \k'name' etc + . comment: ?# + . atomic: ?> + . recursion: ?0 ?R Also: \g<0> + . define: ?(DEFINE) + . subroutine: ?1 ?-1 ?&name ?P>name + conditionalgroup: ?(1)a|b ?(-1)a|b ?(name)a|b + conditional: ?(?=if)then|else + mode: ?c-i + branchreset: ?| + */ + + token.clss = token.type = 'group' + if (str[token.i + 1] !== '?') { + token.close = null // indicates that it needs a close token. + token.capture = true + return token + } + + let sub = str.substr(token.i + 2), + match, + s = sub[0] + + if (s === ':') { + // (?:foo) + token.type = 'noncapgroup' + token.close = null + token.l = 3 + } else if (s === '>') { + // (?>foo) + token.type = 'atomic' + token.close = null + token.l = 3 + } else if (s === '|') { + // (?|(a)|(b)) + token.type = 'branchreset' + token.close = null + token.l = 3 + token.inGroupNum = token.curGroupNum = this.captureGroups.length + this.branchResetGroups.push(token) + } else if (s === '#' && (match = sub.match(/[^)]*\)/))) { + // (?#foo) + token.clss = token.type = 'comment' + token.ignore = true + token.l = 2 + match[0].length + } else if (/^(R|0)\)/.test(sub)) { + // (?R) (?0) + token.clss = 'ref' + token.type = 'recursion' + token.l = 4 + } else if ((match = sub.match(/^P=(\w+)\)/i))) { + // (?P=name) + token.type = 'namedref' + this.getRef(token, match[1]) + token.l = match[0].length + 2 + } else if (/^\(DEFINE\)/.test(sub)) { + // (?(DEFINE)foo) + token.type = 'define' + token.close = null + token.l = 10 + } else if ((match = sub.match(/^/)) || + (this._profile.config.namedgroupalt && + ((match = sub.match(/^'(\w+)'/)) || (match = sub.match(/^P<(\w+)>/)))) + ) { + // (?foo) (?'name'foo) (?Pfoo) + token.type = 'namedgroup' + token.close = null + token.name = match[1] + token.capture = true + token.l = match[0].length + 2 + } else if ( + (match = sub.match(/^([-+]?\d\d?)\)/)) || + (match = sub.match(/^(?:&|P>)(\w+)\)/)) + ) { + // (?1) (?-1) (?&name) (?P>name) + token.type = (isNaN(match[1]) ? 'named' : 'num') + 'subroutine' + this.getRef(token, match[1]) + token.l = match[0].length + 2 + } else if ( + (match = sub.match(/^\(([-+]?\d\d?)\)/)) || + (match = sub.match(/^\((\w+)\)/)) + ) { + // (?(1)a|b) (?(-1)a|b) (?(name)a|b) + this.getRef(token, match[1]) + token.clss = 'special' + token.type = 'conditionalgroup' + token.close = null + token.l = match[0].length + 2 + } else if (/^\(\?255). In theory it should allow 4? + if (isNaN(val) || val > 255 || /[^\da-f]/i.test(match[1])) { + token.error = { id: 'esccharbad' } + } else { + token.code = val + } + } else if ((match = sub.match(/^x([\da-fA-F]{0,2})/))) { + // hex ascii: \xFF + token.type = 'eschexadecimal' + token.l += match[0].length + token.code = parseInt(match[1] || 0, 16) + } else if ((match = sub.match(/^c([a-zA-Z])?/))) { + // control char: \cA \cz + // also handles: \c + // not supported in JS strings + token.type = 'esccontrolchar' + if (match[1]) { + token.code = match[1].toUpperCase().charCodeAt(0) - 64 // A=65 + token.l += 2 + } else if (profile.config.ctrlcodeerr) { + token.l++ + token.error = { id: 'esccharbad' } + } else { + return this.parseChar(str, token, charset) // this builds the "/" token + } + } else if ((match = sub.match(/^[0-7]{1,3}/))) { + // octal ascii: \011 + token.type = 'escoctal' + sub = match[0] + if (parseInt(sub, 8) > 255) { + sub = sub.substr(0, 2) + } + token.l += sub.length + token.code = parseInt(sub, 8) + } else if (profile.tokens.escoctalo && (match = sub.match(/^o\{(.*?)}/i))) { + // \o{377} + token.type = 'escoctal' + token.l += match[0].length + val = parseInt(match[1], 8) + if (isNaN(val) || val > 255 || /[^0-7]/.test(match[1])) { + token.error = { id: 'esccharbad' } + } else { + token.code = val + } + } else { + // single char + if ((token.type = profile.escCharTypes[c])) { + token.l++ + token.clss = ExpressionLexer.ANCHOR_TYPES[token.type] + ? 'anchor' + : 'charclass' + return token + } + + token.code = profile.escCharCodes[c] + if (token.code === undefined || token.code === false) { + // unrecognized. + return this.parseEscChar(token, c) + } + + // update SubstLexer if this changes: + token.l++ + token.type = 'esc_' + token.code + } + token.clss = 'esc' + return token + } + + parseEscChar(token, c) { + // unrecognized escchar: \u \a \8, etc + // JS: allowed except if u flag set, Safari still allows \8 \9 + // PCRE: allows \8 \9 but not others // TODO: support? + let profile = this._profile + token.l = 2 + if ( + (!profile.badEscChars[c] && profile.tokens.escchar && !this._modes.u) || + profile.escChars[c] + ) { + token.type = 'escchar' + token.code = c.charCodeAt(0) + token.clss = 'esc' + } else { + token.error = { id: 'esccharbad' } + } + } + + parseRef(token, sub) { + // namedref: \k \k'name' \k{name} \g{name} + // namedsubroutine: \g \g'name' + // numref: \g1 \g+2 \g{2} + // numsubroutine: \g<-1> \g'1' + // recursion: \g<0> \g'0' + let c = sub[0], + s = '', + match + if ((match = sub.match(/^[gk](?:'\w*'|<\w*>|{\w*})/))) { + s = match[0].substr(2, match[0].length - 3) + if (c === 'k' && !isNaN(s)) { + s = '' + } // TODO: specific error for numeric \k? + } else if ( + (match = sub.match(/^g(?:({[-+]?\d+}|<[-+]?\d+>|'[-+]?\d+')|([-+]?\d+))/)) + ) { + s = + match[2] !== undefined + ? match[2] + : match[1].substr(1, match[1].length - 2) + } + let isRef = c === 'k' || !(sub[1] === "'" || sub[1] === '<') + if (!isRef && s == 0) { + token.type = 'recursion' + token.clss = 'ref' + } else { + // namedref, extnumref, namedsubroutine, numsubroutine + token.type = + (isNaN(s) ? 'named' : (isRef ? 'ext' : '') + 'num') + + (isRef ? 'ref' : 'subroutine') + this.getRef(token, s) + } + token.l += match ? match[0].length : 1 + } + + parseUnicode(token, sub) { + // unicodescript: \p{Cherokee} + // unicodecat: \p{Ll} \pL + // not: \P{Ll} \p{^Lu} + let match = sub.match(/p\{\^?([^}]*)}/i), + val = match && match[1], + not = sub[0] === 'P' + if (!match && (match = sub.match(/[pP]([LMZSNPC])/))) { + val = match[1] + } else { + not = not !== (sub[2] === '^') + } + token.l += match ? match[0].length : 1 + token.type = 'unicodecat' + if (this._profile.unicodeScripts[val]) { + token.type = 'unicodescript' + } else if (!this._profile.unicodeCategories[val]) { + val = null + } + if (not) { + token.type = 'not' + token.type + } + if ((!this._profile.config.unicodenegated && sub[2] === '^') || !val) { + token.error = { id: 'unicodebad' } + } + token.value = val + token.clss = 'charclass' + return token + } + + parseMode(token, sub) { + // (?i-x) + // supported modes in PCRE: i-caseinsens, x-freespacing, s-dotall, m-multiline, U-switchlazy, [J-samename] + let match = sub.match(/^[-a-z]+\)/i) + if (!match) { + return + } + let supModes = this._profile.modes + let modes = Utils.copy({}, this._modes), + bad = false, + not = false, + s = match[0], + c + token.on = token.off = '' + + for (let i = 0, l = s.length - 1; i < l; i++) { + c = s[i] + if (c === '-') { + not = true + continue + } + if (!supModes[c]) { + bad = true + break + } + modes[c] = !not + + token.on = token.on.replace(c, '') + if (not) { + token.off = token.off.replace(c, '') + token.off += c + } else { + token.on += c + } + } + + token.clss = 'special' + token.type = 'mode' + token.l = match[0].length + 2 + + if (bad) { + token.error = { id: 'modebad' } + token.errmode = c + } else { + this._modes = modes + } + return token + } + + parseQuant(str, token) { + // quantifier: {0,3} {3} {1,} + token.type = token.clss = 'quant' + let i = token.i + let end = str.indexOf('}', i + 1) + token.l += end - i + let arr = str.substring(i + 1, end).split(',') + token.min = parseInt(arr[0]) + token.max = + arr[1] === undefined ? token.min : arr[1] === '' ? -1 : parseInt(arr[1]) + if (token.max !== -1 && token.min > token.max) { + token.error = { id: 'quantrev' } + } + return token + } + + validateRange(str, end) { + // char range: [a-z] [\11-\n] + let next = end, + token = end.prv, + prv = token.prv + if (prv.code === undefined || next.code === undefined) { + // not a range, rewrite as a char: + this.parseChar(str, token) + } else { + token.clss = 'set' + if (prv.code > next.code) { + // this gets added here because parse has already moved to the next token: + this.errors.push((token.error = { id: 'rangerev' })) + } + // preserve as separate tokens, but treat as one in the UI: + next.proxy = prv.proxy = token + token.set = [prv, token, next] + } + } +} + +ExpressionLexer.ANCHOR_TYPES = { + bof: true, + eof: true, + bos: true, + eos: true, + abseos: true, + wordboundary: true, + notwordboundary: true, + prevmatchend: true, +} + +module.exports = ExpressionLexer diff --git a/packages/next/lib/regexr/profile/core.js b/packages/next/lib/regexr/profile/core.js new file mode 100644 index 0000000000000..cd2ada92addff --- /dev/null +++ b/packages/next/lib/regexr/profile/core.js @@ -0,0 +1,420 @@ +/* +RegExr: Learn, Build, & Test RegEx +Copyright (C) 2017 gskinner.com, inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +/* +The core profile essentially defines every feature we support, and is then pared down by other profiles. All values should be y (true). + +It also acts in part as pseudo documentation for all of the "type" values. + */ +let y = true, + n = false + +let core = { + id: 'core', + + flags: { + g: 'global', // note that this is not a real flag in some flavors, but a different method call + i: 'caseinsensitive', + m: 'multiline', + s: 'dotall', + u: 'unicode', + y: 'sticky', + x: 'extended', + U: 'ungreedy', + }, + + // reserved characters that need to be escaped: + escChars: '+*?^$\\.[]{}()|/'.split('').reduce((o, c) => { + o[c] = y + return o + }, {}), + + // escape chars that are specifically not supported by the flavor: + badEscChars: n, + + escCharCodes: { + '0': 0, // null + a: 7, // bell + t: 9, // tab + n: 10, // lf + v: 11, // vertical tab + f: 12, // form feed + r: 13, // cr + e: 27, // escape + }, + + escCharTypes: { + A: 'bos', + b: 'wordboundary', + B: 'notwordboundary', + d: 'digit', + D: 'notdigit', + G: 'prevmatchend', + h: 'hwhitespace', + H: 'nothwhitespace', + K: 'keepout', + N: 'notlinebreak', + R: 'linebreak', + s: 'whitespace', + S: 'notwhitespace', + v: 'vwhitespace', + V: 'notvwhitespace', + w: 'word', + W: 'notword', + X: 'unicodegrapheme', + Z: 'eos', + z: 'abseos', + }, + + charTypes: { + '.': 'dot', + '|': 'alt', + $: 'eof', + '^': 'bof', + '?': 'opt', // also: "lazy" + '+': 'plus', // also: "possessive" + '*': 'star', + }, + + unquantifiable: { + // all group/set open tokens are unquantifiable by default (ie. tokens with a .close value) + quant: y, + plus: y, + star: y, + opt: y, + lazy: y, + possessive: y, + eof: y, + bof: y, + eos: y, + abseos: y, + alt: y, + open: y, + mode: y, + comment: y, // TODO: this should actually be ignored by quantifiers. + condition: y, + }, + + unicodeScripts: { + // from: http://www.pcre.org/original/doc/html/pcrepattern.html + Arabic: y, + Armenian: y, + Avestan: y, + Balinese: y, + Bamum: y, + Bassa_Vah: y, + Batak: y, + Bengali: y, + Bopomofo: y, + Brahmi: y, + Braille: y, + Buginese: y, + Buhid: y, + Canadian_Aboriginal: y, + Carian: y, + Caucasian_Albanian: y, + Chakma: y, + Cham: y, + Cherokee: y, + Common: y, + Coptic: y, + Cuneiform: y, + Cypriot: y, + Cyrillic: y, + Deseret: y, + Devanagari: y, + Duployan: y, + Egyptian_Hieroglyphs: y, + Elbasan: y, + Ethiopic: y, + Georgian: y, + Glagolitic: y, + Gothic: y, + Grantha: y, + Greek: y, + Gujarati: y, + Gurmukhi: y, + Han: y, + Hangul: y, + Hanunoo: y, + Hebrew: y, + Hiragana: y, + Imperial_Aramaic: y, + Inherited: y, + Inscriptional_Pahlavi: y, + Inscriptional_Parthian: y, + Javanese: y, + Kaithi: y, + Kannada: y, + Katakana: y, + Kayah_Li: y, + Kharoshthi: y, + Khmer: y, + Khojki: y, + Khudawadi: y, + Lao: y, + Latin: y, + Lepcha: y, + Limbu: y, + Linear_A: y, + Linear_B: y, + Lisu: y, + Lycian: y, + Lydian: y, + Mahajani: y, + Malayalam: y, + Mandaic: y, + Manichaean: y, + Meetei_Mayek: y, + Mende_Kikakui: y, + Meroitic_Cursive: y, + Meroitic_Hieroglyphs: y, + Miao: y, + Modi: y, + Mongolian: y, + Mro: y, + Myanmar: y, + Nabataean: y, + New_Tai_Lue: y, + Nko: y, + Ogham: y, + Ol_Chiki: y, + Old_Italic: y, + Old_North_Arabian: y, + Old_Permic: y, + Old_Persian: y, + Old_South_Arabian: y, + Old_Turkic: y, + Oriya: y, + Osmanya: y, + Pahawh_Hmong: y, + Palmyrene: y, + Pau_Cin_Hau: y, + Phags_Pa: y, + Phoenician: y, + Psalter_Pahlavi: y, + Rejang: y, + Runic: y, + Samaritan: y, + Saurashtra: y, + Sharada: y, + Shavian: y, + Siddham: y, + Sinhala: y, + Sora_Sompeng: y, + Sundanese: y, + Syloti_Nagri: y, + Syriac: y, + Tagalog: y, + Tagbanwa: y, + Tai_Le: y, + Tai_Tham: y, + Tai_Viet: y, + Takri: y, + Tamil: y, + Telugu: y, + Thaana: y, + Thai: y, + Tibetan: y, + Tifinagh: y, + Tirhuta: y, + Ugaritic: y, + Vai: y, + Warang_Citi: y, + Yi: y, + }, + + unicodeCategories: { + // from: http://www.pcre.org/original/doc/html/pcrepattern.html + C: y, // Other + Cc: y, // Control + Cf: y, // Format + Cn: y, // Unassigned + Co: y, // Private use + Cs: y, // Surrogate + L: y, // Letter + 'L&': y, // Any letter + Ll: y, // Lower case letter + Lm: y, // Modifier letter + Lo: y, // Other letter + Lt: y, // Title case letter + Lu: y, // Upper case letter + M: y, // Mark + Mc: y, // Spacing mark + Me: y, // Enclosing mark + Mn: y, // Non-spacing mark + N: y, // Number + Nd: y, // Decimal number + Nl: y, // Letter number + No: y, // Other number + P: y, // Punctuation + Pc: y, // Connector punctuation + Pd: y, // Dash punctuation + Pe: y, // Close punctuation + Pf: y, // Final punctuation + Pi: y, // Initial punctuation + Po: y, // Other punctuation + Ps: y, // Open punctuation + S: y, // Symbol + Sc: y, // Currency symbol + Sk: y, // Modifier symbol + Sm: y, // Mathematical symbol + So: y, // Other symbol + Z: y, // Separator + Zl: y, // Line separator + Zp: y, // Paragraph separator + Zs: y, // Space separator + }, + + posixCharClasses: { + // from: http://www.pcre.org/original/doc/html/pcrepattern.html + alnum: y, // letters and digits + alpha: y, // letters + ascii: y, // character codes 0 - 127 + blank: y, // space or tab only + cntrl: y, // control characters + digit: y, // decimal digits (same as \d) + graph: y, // printing characters, excluding space + lower: y, // lower case letters + print: y, // printing characters, including space + punct: y, // printing characters, excluding letters and digits and space + space: y, // white space (the same as \s from PCRE 8.34) + upper: y, // upper case letters + word: y, // "word" characters (same as \w) + xdigit: y, // hexadecimal digits + }, + + modes: { + i: 'caseinsensitive', + s: 'dotall', + m: 'multiline', + x: 'freespacing', + J: 'samename', + U: 'switchlazy', + }, + + tokens: { + // note that not all of these are actively used in the lexer, but are included for completeness. + open: y, // opening / + close: y, // closing / + char: y, // abc + + // classes: + // also in escCharTypes and charTypes + set: y, // [a-z] + setnot: y, // [^a-z] + setclose: y, // ] + range: y, // [a-z] + unicodecat: y, // \p{Ll} \P{^Ll} \pL + notunicodecat: y, // \P{Ll} \p{^Ll} \PL + unicodescript: y, // \p{Cherokee} \P{^Cherokee} + notunicodescript: y, // \P{Cherokee} \p{^Cherokee} + posixcharclass: y, // [[:alpha:]] + // not in supported flavors: "posixcollseq": y, // [[.foo.]] // this is recognized by the lexer, currently returns "notsupported" error + // not in supported flavors: "unicodeblock": y, // \p{InThai} \p{IsThai} and NOT \P + // not in supported flavors: "subtract": y, // [base-[subtract]] + // not in supported flavors: "intersect": y, // [base&&[intersect]] + + // esc: + // also in escCharCodes and escCharTypes + escoctal: y, // \11 + escunicodeu: y, // \uFFFF + escunicodeub: y, // \u{00A9} + escunicodexb: y, // \x{00A9} + escsequence: y, // \Q...\E + eschexadecimal: y, // \xFF + esccontrolchar: y, // \cA + escoctalo: y, // \o{377} // resolved to escoctal in lexer, no docs required + escchar: y, // \m (unrecognized escapes) // no reference documentation required + + // group: + group: y, // (foo) + groupclose: y, // ) + noncapgroup: y, // (?:foo) + namedgroup: y, // (?Pfoo) (?foo) (?'name'foo) + atomic: y, // (?>foo|bar) + define: y, // (?(DEFINE)foo) + branchreset: y, // (?|(a)|(b)) + + // lookaround: + poslookbehind: y, // (?<=foo) + neglookbehind: y, // (? \k'name' \k{name} (?P=name) \g{name} + numref: y, // \1 + extnumref: y, // \g{-1} \g{+1} \g{1} \g1 \g-1 + recursion: y, // (?R) (?0) \g<0> \g'0' + numsubroutine: y, // \g<1> \g'-1' (?1) (?-1) + namedsubroutine: y, // \g \g'name' (?&name) (?P>name) + + // quantifiers: + // also in specialChars + quant: y, // {1,2} + possessive: y, // ++ + lazy: y, // ? + + // special: + conditional: y, // (?(?=if)then|else) + condition: y, // (?=if) any lookaround + conditionalelse: y, // | + conditionalgroup: y, // (?(1)a|b) (?(-1)a|b) (?(name)a|b) + mode: y, // (?i-x) see modes above + comment: y, // (?#comment) + + // meta: + matchanyset: y, // [\s\S] + }, + + substTokens: { + // named references aren't supported in JS or PCRE / PHP + subst_$esc: y, // $$ + 'subst_$&match': y, // $& + subst_$before: y, // $` + subst_$after: y, // $' + subst_$group: y, // $1 $99 // resolved to subst_group in lexer, no docs required + subst_$bgroup: y, // ${1} ${99} // resolved to subst_group in lexer, no docs required + subst_bsgroup: y, // \1 \99 // resolved to subst_group in lexer, no docs required + subst_group: y, // $1 \1 \{1} // combined in docs, not used by lexer + subst_0match: y, // $0 \0 \{0} + + // this isn't a feature of the engine, but of RegExr: + subst_esc: y, // \n \r \u1234 + }, + + config: { + forwardref: y, // \1(a) + nestedref: y, // (\1a|b)+ + ctrlcodeerr: y, // does \c error? (vs decompose) + reftooctalalways: y, // does a single digit reference \1 become an octal? (vs remain an unmatched ref) + substdecomposeref: y, // will a subst reference decompose? (ex. \3 becomes "\" & "3" if < 3 groups) + looseesc: y, // should unrecognized escape sequences match the character (ex. \u could match "u") // disabled when `u` flag is set + unicodenegated: y, // \p{^etc}" + namedgroupalt: y, // if false, only support (?foo) + }, + + docs: { + // for example: + //possessive: {desc: "+This will be appended to the existing entry." }, + //namedgroup: {tip: "This will overwrite the existing entry." } + }, +} + +module.exports = core diff --git a/packages/next/lib/regexr/profile/index.js b/packages/next/lib/regexr/profile/index.js new file mode 100644 index 0000000000000..70614719269bb --- /dev/null +++ b/packages/next/lib/regexr/profile/index.js @@ -0,0 +1,15 @@ +function merge(p1, p2) { + // merges p1 into p2, essentially just a simple deep copy without array support. + for (let n in p1) { + if (p2[n] === false) { + continue + } else if (typeof p1[n] === 'object') { + p2[n] = merge(p1[n], p2[n] || {}) + } else if (p2[n] === undefined) { + p2[n] = p1[n] + } + } + return p2 +} + +module.exports = merge(require('./core'), require('./javascript')) diff --git a/packages/next/lib/regexr/profile/javascript.js b/packages/next/lib/regexr/profile/javascript.js new file mode 100644 index 0000000000000..89fd01490cd35 --- /dev/null +++ b/packages/next/lib/regexr/profile/javascript.js @@ -0,0 +1,160 @@ +/* +RegExr: Learn, Build, & Test RegEx +Copyright (C) 2017 gskinner.com, inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +/* +The javascript profile disables a large number of features. + +Note that JS warnings are currently added in addJSWarnings in the ExpresssionLexer. +*/ + +let y = true, + n = false +function test(expr, flag) { + try { + return new RegExp(expr, flag) && undefined + } catch (e) { + return n + } +} +function testFlag(flag) { + return test('.', flag) +} +let unicodeFlag = testFlag('u') +let stickyFlag = testFlag('y') +let dotallFlag = testFlag('s') +let lookbehind = test('(?<=A)') +let namedgroup = test('(?B)') +let unicodecat = test('\\p{Ll}', 'u') // disabled when `u` flag is not set + +let javascript = { + id: 'js', + label: 'JavaScript', + browser: true, + + flags: { + s: dotallFlag, // warning + x: n, + u: unicodeFlag, // warning + y: stickyFlag, // warning + U: n, + }, + + escCharCodes: { + a: n, // bell + e: n, // escape + }, + + escCharTypes: { + A: n, // bos + G: n, // prevmatchend + h: n, // hwhitespace + H: n, // nothwhitespace + K: n, // keepout + N: n, // notlinebreak + R: n, // newline + v: n, // vwhitespace + V: n, // notvwhitespace + X: n, // unicodegrapheme + Z: n, // eos + z: n, // abseos + }, + + unicodeScripts: unicodecat, + + unicodeCategories: unicodecat, + + posixCharClasses: n, + + modes: n, + + tokens: { + // classes: + // also in escCharSpecials and specialChars + unicodecat: unicodecat, // \p{Ll} \P{^Ll} \pL + notunicodecat: unicodecat, // \P{Ll} \p{^Ll} \PL + unicodescript: unicodecat, // \p{Cherokee} \P{^Cherokee} + notunicodescript: unicodecat, // \P{Cherokee} \p{^Cherokee} + posixcharclass: n, // [[:alpha:]] + + // esc: + // also in escCharCodes and escCharSpecials + escunicodeub: unicodeFlag, // \u{00A9} + escunicodexb: n, // \x{00A9} + escsequence: n, // \Q...\E + escoctalo: n, // \o{377} + + // group: + namedgroup: namedgroup, // (?Pfoo) (?foo) (?'name'foo) + atomic: n, // (?>foo|bar) + define: n, // (?(DEFINE)foo) + branchreset: n, // (?|(a)|(b)) + + // lookaround: + poslookbehind: lookbehind, // (?<=foo) // warning + neglookbehind: lookbehind, // (? \k'name' \k{name} (?P=name) \g{name} + extnumref: n, // \g{-1} \g{+1} \g{1} \g1 \g-1 + recursion: n, // (?R) (?0) \g<0> \g'0' + numsubroutine: n, // \g<1> \g'-1' (?1) (?-1) + namedsubroutine: n, // \g \g'name' (?&name) (?P>name) + + // quantifiers: + // also in specialChars + possessive: n, + + // special: + conditional: n, // (?(?=if)then|else) + conditionalif: n, // (?=if) any lookaround + conditionalelse: n, // | + conditionalgroup: n, // (?(1)a|b) (?(-1)a|b) (?(name)a|b) + mode: n, // (?i-x) see modes above + comment: n, // (?#comment) + }, + + config: { + forwardref: n, // \1(a) + nestedref: n, // (\1a|b)+ + ctrlcodeerr: n, // does \c error, or decompose? + unicodenegated: n, // \p{^etc} + namedgroupalt: n, // if false, only support (?foo) + }, + + substTokens: { + subst_0match: n, // $0 \0 \{0} + subst_$bgroup: n, // ${1} ${99} + subst_bsgroup: n, // \1 \99 + }, + + docs: { + subst_group: { ext: '' }, // remove other syntaxes. + namedgroup: { ext: '' }, // remove other syntaxes. + unicodecat: { + ext: + '

Requires the u flag.

' + + "

For a list of values, see this MDN page.

", + }, + // notunicodecat, unicodescript, notunicodescript are copied from unicodecat below. + }, +} + +javascript.docs.notunicodecat = javascript.docs.unicodescript = javascript.docs.notunicodescript = + javascript.docs.unicodecat + +module.exports = javascript diff --git a/packages/next/next-server/lib/router/utils/prepare-destination.ts b/packages/next/next-server/lib/router/utils/prepare-destination.ts index 3de8bd4d0f112..1cf7272ca9a03 100644 --- a/packages/next/next-server/lib/router/utils/prepare-destination.ts +++ b/packages/next/next-server/lib/router/utils/prepare-destination.ts @@ -71,14 +71,10 @@ export function matchHas( if (matches) { if (matches.groups) { Object.keys(matches.groups).forEach((groupKey) => { - const safeKey = getSafeParamName(groupKey) - - if (safeKey && matches.groups![groupKey]) { - params[safeKey] = matches.groups![groupKey] - } + params[groupKey] = matches.groups![groupKey] }) - } else { - params[getSafeParamName(key || 'host')] = matches[0] + } else if (hasItem.type === 'host' && matches[0]) { + params.host = matches[0] } return true } diff --git a/test/integration/custom-routes/next.config.js b/test/integration/custom-routes/next.config.js index 769dfa1fbe0bc..047b59c659f0d 100644 --- a/test/integration/custom-routes/next.config.js +++ b/test/integration/custom-routes/next.config.js @@ -134,7 +134,7 @@ module.exports = { { type: 'cookie', key: 'loggedIn', - value: 'true', + value: '(?true)', }, ], destination: '/with-params?authorized=1', @@ -159,6 +159,28 @@ module.exports = { ], destination: '/:hasParam', }, + { + source: '/has-rewrite-6', + has: [ + { + type: 'header', + key: 'hasParam', + value: 'with-params', + }, + ], + destination: '/with-params', + }, + { + source: '/has-rewrite-7', + has: [ + { + type: 'query', + key: 'hasParam', + value: '(?with-params|hello)', + }, + ], + destination: '/with-params?idk=:idk', + }, ], beforeFiles: [ { diff --git a/test/integration/custom-routes/test/index.test.js b/test/integration/custom-routes/test/index.test.js index 3fe63aa024ab5..d5bb4ed476695 100644 --- a/test/integration/custom-routes/test/index.test.js +++ b/test/integration/custom-routes/test/index.test.js @@ -708,6 +708,37 @@ const runTests = (isDev = false) => { }) }) + it('should not pass non captured has value for rewrite correctly', async () => { + const res1 = await fetchViaHTTP(appPort, '/has-rewrite-6') + expect(res1.status).toBe(404) + + const res = await fetchViaHTTP(appPort, '/has-rewrite-6', undefined, { + headers: { + hasParam: 'with-params', + }, + }) + expect(res.status).toBe(200) + + const $ = cheerio.load(await res.text()) + expect(JSON.parse($('#query').text())).toEqual({}) + }) + + it('should pass captured has value for rewrite correctly', async () => { + const res1 = await fetchViaHTTP(appPort, '/has-rewrite-7') + expect(res1.status).toBe(404) + + const res = await fetchViaHTTP(appPort, '/has-rewrite-7', { + hasParam: 'with-params', + }) + expect(res.status).toBe(200) + + const $ = cheerio.load(await res.text()) + expect(JSON.parse($('#query').text())).toEqual({ + hasParam: 'with-params', + idk: 'with-params', + }) + }) + it('should match has rewrite correctly before files', async () => { const res1 = await fetchViaHTTP(appPort, '/hello') expect(res1.status).toBe(200) @@ -1508,7 +1539,7 @@ const runTests = (isDev = false) => { { key: 'loggedIn', type: 'cookie', - value: 'true', + value: '(?true)', }, ], regex: normalizeRegEx('^\\/has-rewrite-3$'), @@ -1536,6 +1567,30 @@ const runTests = (isDev = false) => { regex: normalizeRegEx('^\\/has-rewrite-5$'), source: '/has-rewrite-5', }, + { + destination: '/with-params', + has: [ + { + key: 'hasParam', + type: 'header', + value: 'with-params', + }, + ], + regex: normalizeRegEx('^\\/has-rewrite-6$'), + source: '/has-rewrite-6', + }, + { + destination: '/with-params?idk=:idk', + has: [ + { + key: 'hasParam', + type: 'query', + value: '(?with-params|hello)', + }, + ], + regex: normalizeRegEx('^\\/has-rewrite-7$'), + source: '/has-rewrite-7', + }, ], fallback: [], }, From e591f927a9459be884b62fe53d33677e3ce40d9a Mon Sep 17 00:00:00 2001 From: JJ Kasper Date: Mon, 12 Apr 2021 16:29:45 -0500 Subject: [PATCH 2/3] Update to use shared repo for regex lexer --- packages/next/compiled/regexr-lexer/lexer.js | 935 ++++++++++++++++++ .../next/compiled/regexr-lexer/profiles.js | 838 ++++++++++++++++ packages/next/lib/load-custom-routes.ts | 5 +- packages/next/package.json | 1 + packages/next/taskfile.js | 15 +- yarn.lock | 4 + 6 files changed, 1795 insertions(+), 3 deletions(-) create mode 100644 packages/next/compiled/regexr-lexer/lexer.js create mode 100644 packages/next/compiled/regexr-lexer/profiles.js diff --git a/packages/next/compiled/regexr-lexer/lexer.js b/packages/next/compiled/regexr-lexer/lexer.js new file mode 100644 index 0000000000000..b94a783e13a57 --- /dev/null +++ b/packages/next/compiled/regexr-lexer/lexer.js @@ -0,0 +1,935 @@ +'use strict'; + +/* +RegExr: Learn, Build, & Test RegEx +Copyright (C) 2017 gskinner.com, inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ +class ExpressionLexer { + constructor() { + this.profile = null; + } + + set profile(profile) { + this._profile = profile; + this.string = this.token = this.errors = this.captureGroups = this.namedGroups = null; + } + + parse(str) { + if (!this._profile) { + return null; + } + + if (str === this.string) { + return this.token; + } + + this.token = null; + this._modes = {}; + this.string = str; + this.errors = []; + let capgroups = this.captureGroups = []; + let namedgroups = this.namedGroups = {}; + let brgroups = this.branchResetGroups = []; + let groups = [], + refs = [], + i = 0, + l = str.length; + let o, + c, + token, + charset = null; // previous is the previous token, prv is the previous "active" token (!ignore) + + let prev = null, + prv = null; + let profile = this._profile, + unquantifiable = profile.unquantifiable; + let charTypes = profile.charTypes; + let closeIndex = str.lastIndexOf("/"); + + for (let i = closeIndex + 1; i < l; i++) { + this._modes[str[i]] = true; + } + + while (i < l) { + c = str[i]; + token = { + i: i, + l: 1, + prev: prev, + prv: prv, + modes: this._modes + }; + + if (prev) { + prev.next = token; + } else { + this.token = token; + } + + if (i === 0 || i >= closeIndex) { + this.parseFlag(str, token); + } else if (c === "(" && !charset) { + this.parseParen(str, token); + + if (token.close === null) { + token.depth = groups.length; + groups.push(token); + } + + if (token.capture) { + this.addCaptureGroup(token, groups); + } + } else if (c === ")" && !charset) { + token.type = "groupclose"; + + if (groups.length) { + o = token.open = groups.pop(); + o.close = token; + + if (o.type === "branchreset") { + brgroups.pop(); + } + } else { + token.error = { + id: "groupclose" + }; + } + } else if (c === "[") { + charset = this.parseSquareBracket(str, token, charset); + } else if (c === "]" && charset) { + token.type = "setclose"; + token.open = charset; + charset.close = token; + charset = null; + } else if (c === "+" && prv && prv.clss === "quant" && profile.tokens.possessive) { + token.type = "possessive"; + token.related = [prv]; + } else if ((c === "+" || c === "*") && !charset) { + token.type = charTypes[c]; + token.clss = "quant"; + token.min = c === "+" ? 1 : 0; + token.max = -1; + } else if (c === "{" && !charset && str.substr(i).search(/^{\d+,?\d*}/) !== -1) { + this.parseQuant(str, token); + } else if (c === "\\") { + this.parseBackSlash(str, token, charset, closeIndex); + } else if (c === "?" && !charset) { + if (!prv || prv.clss !== "quant") { + token.type = charTypes[c]; + token.clss = "quant"; + token.min = 0; + token.max = 1; + } else { + token.type = "lazy"; + token.related = [prv]; + } + } else if (c === "-" && charset && prv.code !== undefined && prv.prv && prv.prv.type !== "range") { + // this may be the start of a range, but we'll need to validate after the next token. + token.type = "range"; + } else { + this.parseChar(str, token, charset); + + if (!charset && this._modes.x && /\s/.test(c)) { + token.ignore = true; + token.type = "ignorews"; + } + } // post process token: + // quantifier: + + + if (token.clss === "quant") { + if (!prv || prv.close !== undefined || unquantifiable[prv.type] || prv.open && unquantifiable[prv.open.type]) { + token.error = { + id: "quanttarg" + }; + } else { + token.related = [prv.open || prv]; + } + } // reference: + + + if (token.group === true) { + refs.push(token); + } // conditional: + + + let curGroup = groups.length ? groups[groups.length - 1] : null; + + if (curGroup && (curGroup.type === "conditional" || curGroup.type === "conditionalgroup") && token.type === "alt") { + if (!curGroup.alt) { + curGroup.alt = token; + } else { + token.error = { + id: "extraelse" + }; + } + + token.related = [curGroup]; + token.type = "conditionalelse"; + token.clss = "special"; + } else if (curGroup && curGroup.type === "branchreset") { + // reset group + curGroup.curGroupNum = curGroup.inGroupNum; + } // range: + + + if (prv && prv.type === "range" && prv.l === 1) { + this.validateRange(str, token); + } // js warnings: + // TODO: this isn't ideal, but I'm hesitant to write a more robust solution for a couple of edge cases. + + + if (profile.id === "js") { + this.addJSWarnings(token); + } // general: + + + if (token.open && !token.clss) { + token.clss = token.open.clss; + } + + if (token.error) { + this.addError(token); + } + + i += token.l; + prev = token; + + if (!token.ignore) { + prv = token; + } + } // post processing: + + + while (groups.length) { + this.addError(groups.pop(), { + id: "groupopen" + }); + } + + this.matchRefs(refs, capgroups, namedgroups); + + if (charset) { + this.addError(charset, { + id: "setopen" + }); + } + + return this.token; + } + + addError(token, error = token.error) { + token.error = error; + this.errors.push(token); + } + + addJSWarnings(token) { + if (token.error) { + return; + } + + if (token.type === "neglookbehind" || token.type === "poslookbehind" || token.type === "sticky" || token.type === "unicode" || token.type == "dotall" || token.type === "unicodecat" || token.type === "unicodescript" || token.type === "namedgroup") { + token.error = { + id: "jsfuture", + warning: true + }; + } + } + + addCaptureGroup(token, groups) { + // it would be nice to make branch reset groups actually highlight all of the groups that share the same number + // that would require switching to arrays of groups for each group num - requires rearchitecture throughout the app. + let capgroups = this.captureGroups, + brgroups = this.branchResetGroups, + namedgroups = this.namedGroups; + let curGroup = groups.length ? groups[groups.length - 1] : null; + + if (brgroups.length) { + let brgroup = brgroups[brgroups.length - 1]; + token.num = ++brgroup.curGroupNum; + } else { + token.num = capgroups.length + 1; + } + + if (!capgroups[token.num - 1]) { + capgroups.push(token); + } + + if (token.name && !token.error) { + if (/\d/.test(token.name[0])) { + token.error = { + id: "badname" + }; + } else if (namedgroups[token.name]) { + token.error = { + id: "dupname" + }; + token.related = [namedgroups[token.name]]; + } else { + namedgroups[token.name] = token; + } + } + } + + getRef(token, str) { + token.clss = "ref"; + token.group = true; + token.relIndex = this.captureGroups.length; + token.name = str; + } + + matchRefs(refs, indexes, names) { + while (refs.length) { + let token = refs.pop(), + name = token.name, + group = names[name]; + + if (!group && !isNaN(name)) { + let sign = name[0], + index = parseInt(name) + (sign === "+" || sign === "-" ? token.relIndex : 0); + + if (sign === "-") { + index++; + } + + group = indexes[index - 1]; + } + + if (group) { + token.group = group; + token.related = [group]; + token.dir = token.i < group.i ? 1 : !group.close || token.i < group.close.i ? 0 : -1; + } else { + delete token.group; + delete token.relIndex; + this.refToOctal(token); + + if (token.error) { + this.errors.push(token.error); + } + } + } + } + + refToOctal(token) { + // PCRE: \# unmatched, \0 \00 \## = octal + // JS: \# \0 \00 \## = octal + // PCRE matches \8 \9 to "8" "9" + // JS: without the u flag \8 \9 match "8" "9" in IE, FF & Chrome, and "\8" "\9" in Safari. We support the former. + // JS: with the u flag, Chrome & FF throw an esc error, Safari does not. + // TODO: handle \0 for PCRE? Would need more testing. + // TODO: this doesn't handle two digit refs with 8/9 in them. Ex. \18 - not even sure what this is interpreted as. + let name = token.name, + profile = this._profile; + + if (token.type !== "numref") { + // not a simple \4 style reference, so can't decompose into an octal. + token.error = { + id: "unmatchedref" + }; + } else if (/^[0-7]{2}$/.test(name) || profile.config.reftooctalalways && /^[0-7]$/.test(name)) { + // octal + let next = token.next, + char = String.fromCharCode(next.code); + + if (next.type === "char" && char >= "0" && char <= "7" && parseInt(name + char, 8) <= 255) { + name += char; + this.mergeNext(token); + } + + token.code = parseInt(name, 8); + token.clss = "esc"; + token.type = "escoctal"; + delete token.name; + } else if (name === "8" || name === "9") { + this.parseEscChar(token, name); + delete token.name; + } else { + token.error = { + id: "unmatchedref" + }; + } + } + + mergeNext(token) { + let next = token.next; + token.next = next.next; + token.next.prev = token; + token.l++; + } + + parseFlag(str, token) { + // note that this doesn't deal with misformed patterns or incorrect flags. + let i = token.i, + c = str[i]; + + if (str[i] === "/") { + token.type = i === 0 ? "open" : "close"; + + if (i !== 0) { + token.related = [this.token]; + this.token.related = [token]; + } + } else { + token.type = this._profile.flags[c]; + } //token.clear = true; + + } + + parseChar(str, token, charset) { + let c = str[token.i]; + token.type = !charset && this._profile.charTypes[c] || "char"; + + if (!charset && c === "/") { + token.error = { + id: "fwdslash" + }; + } + + if (token.type === "char") { + token.code = c.charCodeAt(0); + } else if (ExpressionLexer.ANCHOR_TYPES[token.type]) { + token.clss = "anchor"; + } else if (token.type === "dot") { + token.clss = "charclass"; + } + + return token; + } + + parseSquareBracket(str, token, charset) { + let match; + + if (this._profile.tokens.posixcharclass && (match = str.substr(token.i).match(/^\[(:|\.)([^\]]*?)\1]/))) { + // posixcharclass: [:alpha:] + // posixcollseq: [.ch.] + // currently neither flavor supports posixcollseq, but PCRE does flag as an error: + // TODO: the expression above currently does not catch [.\].] + token.l = match[0].length; + token.value = match[2]; + token.clss = "charclass"; + + if (match[1] === ":") { + token.type = "posixcharclass"; + + if (!this._profile.posixCharClasses[match[2]]) { + token.error = { + id: "posixcharclassbad" + }; + } else if (!charset) { + token.error = { + id: "posixcharclassnoset" + }; + } + } else { + token.type = "posixcollseq"; // TODO: can this be generalized? Right now, no, because we assign ids that aren't in the profile. + + token.error = { + id: "notsupported" + }; + } + } else if (!charset) { + // set [a-z] [aeiou] + // setnot [^a-z] + token.type = token.clss = "set"; + + if (str[token.i + 1] === "^") { + token.l++; + token.type += "not"; + } + + charset = token; + } else { + // [[] (square bracket inside a set) + this.parseChar(str, token, charset); + } + + return charset; + } + + parseParen(str, token) { + /* + core: + . group: + . lookahead: ?= ?! + . noncap: ?: + PCRE: + . lookbehind: ?<= ? ?'name' ? + . namedref: ?P=name Also: \g'name' \k'name' etc + . comment: ?# + . atomic: ?> + . recursion: ?0 ?R Also: \g<0> + . define: ?(DEFINE) + . subroutine: ?1 ?-1 ?&name ?P>name + conditionalgroup: ?(1)a|b ?(-1)a|b ?(name)a|b + conditional: ?(?=if)then|else + mode: ?c-i + branchreset: ?| + */ + token.clss = token.type = "group"; + + if (str[token.i + 1] !== "?") { + token.close = null; // indicates that it needs a close token. + + token.capture = true; + return token; + } + + let sub = str.substr(token.i + 2), + match, + s = sub[0]; + + if (s === ":") { + // (?:foo) + token.type = "noncapgroup"; + token.close = null; + token.l = 3; + } else if (s === ">") { + // (?>foo) + token.type = "atomic"; + token.close = null; + token.l = 3; + } else if (s === "|") { + // (?|(a)|(b)) + token.type = "branchreset"; + token.close = null; + token.l = 3; + token.inGroupNum = token.curGroupNum = this.captureGroups.length; + this.branchResetGroups.push(token); + } else if (s === "#" && (match = sub.match(/[^)]*\)/))) { + // (?#foo) + token.clss = token.type = "comment"; + token.ignore = true; + token.l = 2 + match[0].length; + } else if (/^(R|0)\)/.test(sub)) { + // (?R) (?0) + token.clss = "ref"; + token.type = "recursion"; + token.l = 4; + } else if (match = sub.match(/^P=(\w+)\)/i)) { + // (?P=name) + token.type = "namedref"; + this.getRef(token, match[1]); + token.l = match[0].length + 2; + } else if (/^\(DEFINE\)/.test(sub)) { + // (?(DEFINE)foo) + token.type = "define"; + token.close = null; + token.l = 10; + } else if (match = sub.match(/^/)) || this._profile.config.namedgroupalt && ((match = sub.match(/^'(\w+)'/)) || (match = sub.match(/^P<(\w+)>/)))) { + // (?foo) (?'name'foo) (?Pfoo) + token.type = "namedgroup"; + token.close = null; + token.name = match[1]; + token.capture = true; + token.l = match[0].length + 2; + } else if ((match = sub.match(/^([-+]?\d\d?)\)/)) || (match = sub.match(/^(?:&|P>)(\w+)\)/))) { + // (?1) (?-1) (?&name) (?P>name) + token.type = (isNaN(match[1]) ? "named" : "num") + "subroutine"; + this.getRef(token, match[1]); + token.l = match[0].length + 2; + } else if ((match = sub.match(/^\(([-+]?\d\d?)\)/)) || (match = sub.match(/^\((\w+)\)/))) { + // (?(1)a|b) (?(-1)a|b) (?(name)a|b) + this.getRef(token, match[1]); + token.clss = "special"; + token.type = "conditionalgroup"; + token.close = null; + token.l = match[0].length + 2; + } else if (/^\(\?255). In theory it should allow 4? + + if (isNaN(val) || val > 255 || /[^\da-f]/i.test(match[1])) { + token.error = { + id: "esccharbad" + }; + } else { + token.code = val; + } + } else if (match = sub.match(/^x([\da-fA-F]{0,2})/)) { + // hex ascii: \xFF + token.type = "eschexadecimal"; + token.l += match[0].length; + token.code = parseInt(match[1] || 0, 16); + } else if (match = sub.match(/^c([a-zA-Z])?/)) { + // control char: \cA \cz + // also handles: \c + // not supported in JS strings + token.type = "esccontrolchar"; + + if (match[1]) { + token.code = match[1].toUpperCase().charCodeAt(0) - 64; // A=65 + + token.l += 2; + } else if (profile.config.ctrlcodeerr) { + token.l++; + token.error = { + id: "esccharbad" + }; + } else { + return this.parseChar(str, token, charset); // this builds the "/" token + } + } else if (match = sub.match(/^[0-7]{1,3}/)) { + // octal ascii: \011 + token.type = "escoctal"; + sub = match[0]; + + if (parseInt(sub, 8) > 255) { + sub = sub.substr(0, 2); + } + + token.l += sub.length; + token.code = parseInt(sub, 8); + } else if (profile.tokens.escoctalo && (match = sub.match(/^o\{(.*?)}/i))) { + // \o{377} + token.type = "escoctal"; + token.l += match[0].length; + val = parseInt(match[1], 8); + + if (isNaN(val) || val > 255 || /[^0-7]/.test(match[1])) { + token.error = { + id: "esccharbad" + }; + } else { + token.code = val; + } + } else { + // single char + if (token.type = profile.escCharTypes[c]) { + token.l++; + token.clss = ExpressionLexer.ANCHOR_TYPES[token.type] ? "anchor" : "charclass"; + return token; + } + + token.code = profile.escCharCodes[c]; + + if (token.code === undefined || token.code === false) { + // unrecognized. + return this.parseEscChar(token, c); + } // update SubstLexer if this changes: + + + token.l++; + token.type = "esc_" + token.code; + } + + token.clss = "esc"; + return token; + } + + parseEscChar(token, c) { + // unrecognized escchar: \u \a \8, etc + // JS: allowed except if u flag set, Safari still allows \8 \9 + // PCRE: allows \8 \9 but not others // TODO: support? + let profile = this._profile; + token.l = 2; + + if (!profile.badEscChars[c] && profile.tokens.escchar && !this._modes.u || profile.escChars[c]) { + token.type = "escchar"; + token.code = c.charCodeAt(0); + token.clss = "esc"; + } else { + token.error = { + id: "esccharbad" + }; + } + } + + parseRef(token, sub) { + // namedref: \k \k'name' \k{name} \g{name} + // namedsubroutine: \g \g'name' + // numref: \g1 \g+2 \g{2} + // numsubroutine: \g<-1> \g'1' + // recursion: \g<0> \g'0' + let c = sub[0], + s = "", + match; + + if (match = sub.match(/^[gk](?:'\w*'|<\w*>|{\w*})/)) { + s = match[0].substr(2, match[0].length - 3); + + if (c === "k" && !isNaN(s)) { + s = ""; + } // TODO: specific error for numeric \k? + + } else if (match = sub.match(/^g(?:({[-+]?\d+}|<[-+]?\d+>|'[-+]?\d+')|([-+]?\d+))/)) { + s = match[2] !== undefined ? match[2] : match[1].substr(1, match[1].length - 2); + } + + let isRef = c === "k" || !(sub[1] === "'" || sub[1] === "<"); + + if (!isRef && s == 0) { + token.type = "recursion"; + token.clss = "ref"; + } else { + // namedref, extnumref, namedsubroutine, numsubroutine + token.type = (isNaN(s) ? "named" : (isRef ? "ext" : "") + "num") + (isRef ? "ref" : "subroutine"); + this.getRef(token, s); + } + + token.l += match ? match[0].length : 1; + } + + parseUnicode(token, sub) { + // unicodescript: \p{Cherokee} + // unicodecat: \p{Ll} \pL + // not: \P{Ll} \p{^Lu} + let match = sub.match(/p\{\^?([^}]*)}/i), + val = match && match[1], + not = sub[0] === "P"; + + if (!match && (match = sub.match(/[pP]([LMZSNPC])/))) { + val = match[1]; + } else { + not = not !== (sub[2] === "^"); + } + + token.l += match ? match[0].length : 1; + token.type = "unicodecat"; + + if (this._profile.unicodeScripts[val]) { + token.type = "unicodescript"; + } else if (!this._profile.unicodeCategories[val]) { + val = null; + } + + if (not) { + token.type = "not" + token.type; + } + + if (!this._profile.config.unicodenegated && sub[2] === "^" || !val) { + token.error = { + id: "unicodebad" + }; + } + + token.value = val; + token.clss = "charclass"; + return token; + } + + parseMode(token, sub) { + // (?i-x) + // supported modes in PCRE: i-caseinsens, x-freespacing, s-dotall, m-multiline, U-switchlazy, [J-samename] + let match = sub.match(/^[-a-z]+\)/i); + + if (!match) { + return; + } + + let supModes = this._profile.modes; + let modes = Object.assign({}, this._modes), + bad = false, + not = false, + s = match[0], + c; + token.on = token.off = ""; + + for (let i = 0, l = s.length - 1; i < l; i++) { + c = s[i]; + + if (c === "-") { + not = true; + continue; + } + + if (!supModes[c]) { + bad = true; + break; + } + + modes[c] = !not; + token.on = token.on.replace(c, ""); + + if (not) { + token.off = token.off.replace(c, ""); + token.off += c; + } else { + token.on += c; + } + } + + token.clss = "special"; + token.type = "mode"; + token.l = match[0].length + 2; + + if (bad) { + token.error = { + id: "modebad" + }; + token.errmode = c; + } else { + this._modes = modes; + } + + return token; + } + + parseQuant(str, token) { + // quantifier: {0,3} {3} {1,} + token.type = token.clss = "quant"; + let i = token.i; + let end = str.indexOf("}", i + 1); + token.l += end - i; + let arr = str.substring(i + 1, end).split(","); + token.min = parseInt(arr[0]); + token.max = arr[1] === undefined ? token.min : arr[1] === "" ? -1 : parseInt(arr[1]); + + if (token.max !== -1 && token.min > token.max) { + token.error = { + id: "quantrev" + }; + } + + return token; + } + + validateRange(str, end) { + // char range: [a-z] [\11-\n] + let next = end, + token = end.prv, + prv = token.prv; + + if (prv.code === undefined || next.code === undefined) { + // not a range, rewrite as a char: + this.parseChar(str, token); + } else { + token.clss = "set"; + + if (prv.code > next.code) { + // this gets added here because parse has already moved to the next token: + this.errors.push(token.error = { + id: "rangerev" + }); + } // preserve as separate tokens, but treat as one in the UI: + + + next.proxy = prv.proxy = token; + token.set = [prv, token, next]; + } + } + +} +ExpressionLexer.ANCHOR_TYPES = { + "bof": true, + "eof": true, + "bos": true, + "eos": true, + "abseos": true, + "wordboundary": true, + "notwordboundary": true, + "prevmatchend": true +}; + +module.exports = ExpressionLexer; +//# sourceMappingURL=lexer.js.map diff --git a/packages/next/compiled/regexr-lexer/profiles.js b/packages/next/compiled/regexr-lexer/profiles.js new file mode 100644 index 0000000000000..73f49837158bf --- /dev/null +++ b/packages/next/compiled/regexr-lexer/profiles.js @@ -0,0 +1,838 @@ +'use strict'; + +/* +RegExr: Learn, Build, & Test RegEx +Copyright (C) 2017 gskinner.com, inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +/* +The core profile essentially defines every feature we support, and is then pared down by other profiles. All values should be y (true). + +It also acts in part as pseudo documentation for all of the "type" values. + */ +let y = true, + n = false; +let core = { + id: "core", + flags: { + "g": "global", + // note that this is not a real flag in some flavors, but a different method call + "i": "caseinsensitive", + "m": "multiline", + "s": "dotall", + "u": "unicode", + "y": "sticky", + "x": "extended", + "U": "ungreedy" + }, + // reserved characters that need to be escaped: + escChars: "+*?^$\\.[]{}()|/".split("").reduce((o, c) => { + o[c] = y; + return o; + }, {}), + // escape chars that are specifically not supported by the flavor: + badEscChars: n, + escCharCodes: { + "0": 0, + // null + "a": 7, + // bell + "t": 9, + // tab + "n": 10, + // lf + "v": 11, + // vertical tab + "f": 12, + // form feed + "r": 13, + // cr + "e": 27 // escape + + }, + escCharTypes: { + "A": "bos", + "b": "wordboundary", + "B": "notwordboundary", + "d": "digit", + "D": "notdigit", + "G": "prevmatchend", + "h": "hwhitespace", + "H": "nothwhitespace", + "K": "keepout", + "N": "notlinebreak", + "R": "linebreak", + "s": "whitespace", + "S": "notwhitespace", + "v": "vwhitespace", + "V": "notvwhitespace", + "w": "word", + "W": "notword", + "X": "unicodegrapheme", + "Z": "eos", + "z": "abseos" + }, + charTypes: { + ".": "dot", + "|": "alt", + "$": "eof", + "^": "bof", + "?": "opt", + // also: "lazy" + "+": "plus", + // also: "possessive" + "*": "star" + }, + unquantifiable: { + // all group/set open tokens are unquantifiable by default (ie. tokens with a .close value) + "quant": y, + "plus": y, + "star": y, + "opt": y, + "lazy": y, + "possessive": y, + "eof": y, + "bof": y, + "eos": y, + "abseos": y, + "alt": y, + "open": y, + "mode": y, + "comment": y, + // TODO: this should actually be ignored by quantifiers. + "condition": y + }, + unicodeScripts: { + // from: http://www.pcre.org/original/doc/html/pcrepattern.html + "Arabic": y, + "Armenian": y, + "Avestan": y, + "Balinese": y, + "Bamum": y, + "Bassa_Vah": y, + "Batak": y, + "Bengali": y, + "Bopomofo": y, + "Brahmi": y, + "Braille": y, + "Buginese": y, + "Buhid": y, + "Canadian_Aboriginal": y, + "Carian": y, + "Caucasian_Albanian": y, + "Chakma": y, + "Cham": y, + "Cherokee": y, + "Common": y, + "Coptic": y, + "Cuneiform": y, + "Cypriot": y, + "Cyrillic": y, + "Deseret": y, + "Devanagari": y, + "Duployan": y, + "Egyptian_Hieroglyphs": y, + "Elbasan": y, + "Ethiopic": y, + "Georgian": y, + "Glagolitic": y, + "Gothic": y, + "Grantha": y, + "Greek": y, + "Gujarati": y, + "Gurmukhi": y, + "Han": y, + "Hangul": y, + "Hanunoo": y, + "Hebrew": y, + "Hiragana": y, + "Imperial_Aramaic": y, + "Inherited": y, + "Inscriptional_Pahlavi": y, + "Inscriptional_Parthian": y, + "Javanese": y, + "Kaithi": y, + "Kannada": y, + "Katakana": y, + "Kayah_Li": y, + "Kharoshthi": y, + "Khmer": y, + "Khojki": y, + "Khudawadi": y, + "Lao": y, + "Latin": y, + "Lepcha": y, + "Limbu": y, + "Linear_A": y, + "Linear_B": y, + "Lisu": y, + "Lycian": y, + "Lydian": y, + "Mahajani": y, + "Malayalam": y, + "Mandaic": y, + "Manichaean": y, + "Meetei_Mayek": y, + "Mende_Kikakui": y, + "Meroitic_Cursive": y, + "Meroitic_Hieroglyphs": y, + "Miao": y, + "Modi": y, + "Mongolian": y, + "Mro": y, + "Myanmar": y, + "Nabataean": y, + "New_Tai_Lue": y, + "Nko": y, + "Ogham": y, + "Ol_Chiki": y, + "Old_Italic": y, + "Old_North_Arabian": y, + "Old_Permic": y, + "Old_Persian": y, + "Old_South_Arabian": y, + "Old_Turkic": y, + "Oriya": y, + "Osmanya": y, + "Pahawh_Hmong": y, + "Palmyrene": y, + "Pau_Cin_Hau": y, + "Phags_Pa": y, + "Phoenician": y, + "Psalter_Pahlavi": y, + "Rejang": y, + "Runic": y, + "Samaritan": y, + "Saurashtra": y, + "Sharada": y, + "Shavian": y, + "Siddham": y, + "Sinhala": y, + "Sora_Sompeng": y, + "Sundanese": y, + "Syloti_Nagri": y, + "Syriac": y, + "Tagalog": y, + "Tagbanwa": y, + "Tai_Le": y, + "Tai_Tham": y, + "Tai_Viet": y, + "Takri": y, + "Tamil": y, + "Telugu": y, + "Thaana": y, + "Thai": y, + "Tibetan": y, + "Tifinagh": y, + "Tirhuta": y, + "Ugaritic": y, + "Vai": y, + "Warang_Citi": y, + "Yi": y + }, + unicodeCategories: { + // from: http://www.pcre.org/original/doc/html/pcrepattern.html + "C": y, + // Other + "Cc": y, + // Control + "Cf": y, + // Format + "Cn": y, + // Unassigned + "Co": y, + // Private use + "Cs": y, + // Surrogate + "L": y, + // Letter + "L&": y, + // Any letter + "Ll": y, + // Lower case letter + "Lm": y, + // Modifier letter + "Lo": y, + // Other letter + "Lt": y, + // Title case letter + "Lu": y, + // Upper case letter + "M": y, + // Mark + "Mc": y, + // Spacing mark + "Me": y, + // Enclosing mark + "Mn": y, + // Non-spacing mark + "N": y, + // Number + "Nd": y, + // Decimal number + "Nl": y, + // Letter number + "No": y, + // Other number + "P": y, + // Punctuation + "Pc": y, + // Connector punctuation + "Pd": y, + // Dash punctuation + "Pe": y, + // Close punctuation + "Pf": y, + // Final punctuation + "Pi": y, + // Initial punctuation + "Po": y, + // Other punctuation + "Ps": y, + // Open punctuation + "S": y, + // Symbol + "Sc": y, + // Currency symbol + "Sk": y, + // Modifier symbol + "Sm": y, + // Mathematical symbol + "So": y, + // Other symbol + "Z": y, + // Separator + "Zl": y, + // Line separator + "Zp": y, + // Paragraph separator + "Zs": y // Space separator + + }, + posixCharClasses: { + // from: http://www.pcre.org/original/doc/html/pcrepattern.html + "alnum": y, + // letters and digits + "alpha": y, + // letters + "ascii": y, + // character codes 0 - 127 + "blank": y, + // space or tab only + "cntrl": y, + // control characters + "digit": y, + // decimal digits (same as \d) + "graph": y, + // printing characters, excluding space + "lower": y, + // lower case letters + "print": y, + // printing characters, including space + "punct": y, + // printing characters, excluding letters and digits and space + "space": y, + // white space (the same as \s from PCRE 8.34) + "upper": y, + // upper case letters + "word": y, + // "word" characters (same as \w) + "xdigit": y // hexadecimal digits + + }, + modes: { + "i": "caseinsensitive", + "s": "dotall", + "m": "multiline", + "x": "freespacing", + "J": "samename", + "U": "switchlazy" + }, + tokens: { + // note that not all of these are actively used in the lexer, but are included for completeness. + "open": y, + // opening / + "close": y, + // closing / + "char": y, + // abc + // classes: + // also in escCharTypes and charTypes + "set": y, + // [a-z] + "setnot": y, + // [^a-z] + "setclose": y, + // ] + "range": y, + // [a-z] + "unicodecat": y, + // \p{Ll} \P{^Ll} \pL + "notunicodecat": y, + // \P{Ll} \p{^Ll} \PL + "unicodescript": y, + // \p{Cherokee} \P{^Cherokee} + "notunicodescript": y, + // \P{Cherokee} \p{^Cherokee} + "posixcharclass": y, + // [[:alpha:]] + // not in supported flavors: "posixcollseq": y, // [[.foo.]] // this is recognized by the lexer, currently returns "notsupported" error + // not in supported flavors: "unicodeblock": y, // \p{InThai} \p{IsThai} and NOT \P + // not in supported flavors: "subtract": y, // [base-[subtract]] + // not in supported flavors: "intersect": y, // [base&&[intersect]] + // esc: + // also in escCharCodes and escCharTypes + "escoctal": y, + // \11 + "escunicodeu": y, + // \uFFFF + "escunicodeub": y, + // \u{00A9} + "escunicodexb": y, + // \x{00A9} + "escsequence": y, + // \Q...\E + "eschexadecimal": y, + // \xFF + "esccontrolchar": y, + // \cA + "escoctalo": y, + // \o{377} // resolved to escoctal in lexer, no docs required + "escchar": y, + // \m (unrecognized escapes) // no reference documentation required + // group: + "group": y, + // (foo) + "groupclose": y, + // ) + "noncapgroup": y, + // (?:foo) + "namedgroup": y, + // (?Pfoo) (?foo) (?'name'foo) + "atomic": y, + // (?>foo|bar) + "define": y, + // (?(DEFINE)foo) + "branchreset": y, + // (?|(a)|(b)) + // lookaround: + "poslookbehind": y, + // (?<=foo) + "neglookbehind": y, + // (? \k'name' \k{name} (?P=name) \g{name} + "numref": y, + // \1 + "extnumref": y, + // \g{-1} \g{+1} \g{1} \g1 \g-1 + "recursion": y, + // (?R) (?0) \g<0> \g'0' + "numsubroutine": y, + // \g<1> \g'-1' (?1) (?-1) + "namedsubroutine": y, + // \g \g'name' (?&name) (?P>name) + // quantifiers: + // also in specialChars + "quant": y, + // {1,2} + "possessive": y, + // ++ + "lazy": y, + // ? + // special: + "conditional": y, + // (?(?=if)then|else) + "condition": y, + // (?=if) any lookaround + "conditionalelse": y, + // | + "conditionalgroup": y, + // (?(1)a|b) (?(-1)a|b) (?(name)a|b) + "mode": y, + // (?i-x) see modes above + "comment": y, + // (?#comment) + // meta: + "matchanyset": y // [\s\S] + + }, + substTokens: { + // named references aren't supported in JS or PCRE / PHP + "subst_$esc": y, + // $$ + "subst_$&match": y, + // $& + "subst_$before": y, + // $` + "subst_$after": y, + // $' + "subst_$group": y, + // $1 $99 // resolved to subst_group in lexer, no docs required + "subst_$bgroup": y, + // ${1} ${99} // resolved to subst_group in lexer, no docs required + "subst_bsgroup": y, + // \1 \99 // resolved to subst_group in lexer, no docs required + "subst_group": y, + // $1 \1 \{1} // combined in docs, not used by lexer + "subst_0match": y, + // $0 \0 \{0} + // this isn't a feature of the engine, but of RegExr: + "subst_esc": y // \n \r \u1234 + + }, + config: { + "forwardref": y, + // \1(a) + "nestedref": y, + // (\1a|b)+ + "ctrlcodeerr": y, + // does \c error? (vs decompose) + "reftooctalalways": y, + // does a single digit reference \1 become an octal? (vs remain an unmatched ref) + "substdecomposeref": y, + // will a subst reference decompose? (ex. \3 becomes "\" & "3" if < 3 groups) + "looseesc": y, + // should unrecognized escape sequences match the character (ex. \u could match "u") // disabled when `u` flag is set + "unicodenegated": y, + // \p{^etc}" + "namedgroupalt": y // if false, only support (?foo) + + }, + docs: {// for example: + //possessive: {desc: "+This will be appended to the existing entry." }, + //namedgroup: {tip: "This will overwrite the existing entry." } + } +}; + +/* +RegExr: Learn, Build, & Test RegEx +Copyright (C) 2017 gskinner.com, inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +/* +The PCRE profile is almost a straight copy of the core profile. +*/ +let y$1 = true, + n$1 = false; +let pcre = { + id: "pcre", + label: "PCRE", + browser: false, + flags: { + "u": n$1, + "y": n$1 + }, + badEscChars: "uUlLN".split("").reduce((o, c) => { + o[c] = y$1; + return o; + }, {}), + escCharCodes: { + "v": n$1 // vertical tab // PCRE support \v as vertical whitespace + + }, + tokens: { + "escunicodeu": n$1, + // \uFFFF + "escunicodeub": n$1 // \u{00A9} + // octalo PCRE 8.34+ + + }, + substTokens: { + "subst_$esc": n$1, + // $$ + "subst_$&match": n$1, + // $& + "subst_$before": n$1, + // $` + "subst_$after": n$1 // $' + + }, + config: { + "reftooctalalways": n$1, + // does a single digit reference \1 become an octal? (vs remain an unmatched ref) + "substdecomposeref": n$1, + // will a subst reference decompose? (ex. \3 becomes "\" & "3" if < 3 groups) + "looseesc": n$1 // should unrecognized escape sequences match the character (ex. \u could match "u") // disabled when `u` flag is set + + }, + docs: { + "escoctal": { + ext: "+

The syntax \\o{FFF} is also supported.

" + }, + "numref": { + ext: "

There are multiple syntaxes for this feature: \\1 \\g1 \\g{1}.

" + "

The latter syntaxes support relative values preceded by + or -. For example \\g-1 would match the group preceding the reference.

" + }, + "lazy": { + ext: "+

This behaviour is reversed by the ungreedy (U) flag/modifier.

" + } + } +}; + +/* +RegExr: Learn, Build, & Test RegEx +Copyright (C) 2017 gskinner.com, inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +/* +The javascript profile disables a large number of features. + +Note that JS warnings are currently added in addJSWarnings in the ExpresssionLexer. +*/ +let n$2 = false; + +function test(expr, flag) { + try { + return new RegExp(expr, flag) && undefined; + } catch (e) { + return n$2; + } +} + +function testFlag(flag) { + return test(".", flag); +} + +let unicodeFlag = testFlag("u"); +let stickyFlag = testFlag("y"); +let dotallFlag = testFlag("s"); +let lookbehind = test("(?<=A)"); +let namedgroup = test("(?B)"); +let unicodecat = test("\\p{Ll}", "u"); // disabled when `u` flag is not set + +let javascript = { + id: "js", + label: "JavaScript", + browser: true, + flags: { + "s": dotallFlag, + // warning + "x": n$2, + "u": unicodeFlag, + // warning + "y": stickyFlag, + // warning + "U": n$2 + }, + escCharCodes: { + "a": n$2, + // bell + "e": n$2 // escape + + }, + escCharTypes: { + "A": n$2, + // bos + "G": n$2, + // prevmatchend + "h": n$2, + // hwhitespace + "H": n$2, + // nothwhitespace + "K": n$2, + // keepout + "N": n$2, + // notlinebreak + "R": n$2, + // newline + "v": n$2, + // vwhitespace + "V": n$2, + // notvwhitespace + "X": n$2, + // unicodegrapheme + "Z": n$2, + // eos + "z": n$2 // abseos + + }, + unicodeScripts: unicodecat, + unicodeCategories: unicodecat, + posixCharClasses: n$2, + modes: n$2, + tokens: { + // classes: + // also in escCharSpecials and specialChars + "unicodecat": unicodecat, + // \p{Ll} \P{^Ll} \pL + "notunicodecat": unicodecat, + // \P{Ll} \p{^Ll} \PL + "unicodescript": unicodecat, + // \p{Cherokee} \P{^Cherokee} + "notunicodescript": unicodecat, + // \P{Cherokee} \p{^Cherokee} + "posixcharclass": n$2, + // [[:alpha:]] + // esc: + // also in escCharCodes and escCharSpecials + "escunicodeub": unicodeFlag, + // \u{00A9} + "escunicodexb": n$2, + // \x{00A9} + "escsequence": n$2, + // \Q...\E + "escoctalo": n$2, + // \o{377} + // group: + "namedgroup": namedgroup, + // (?Pfoo) (?foo) (?'name'foo) + "atomic": n$2, + // (?>foo|bar) + "define": n$2, + // (?(DEFINE)foo) + "branchreset": n$2, + // (?|(a)|(b)) + // lookaround: + "poslookbehind": lookbehind, + // (?<=foo) // warning + "neglookbehind": lookbehind, + // (? \k'name' \k{name} (?P=name) \g{name} + "extnumref": n$2, + // \g{-1} \g{+1} \g{1} \g1 \g-1 + "recursion": n$2, + // (?R) (?0) \g<0> \g'0' + "numsubroutine": n$2, + // \g<1> \g'-1' (?1) (?-1) + "namedsubroutine": n$2, + // \g \g'name' (?&name) (?P>name) + // quantifiers: + // also in specialChars + "possessive": n$2, + // special: + "conditional": n$2, + // (?(?=if)then|else) + "conditionalif": n$2, + // (?=if) any lookaround + "conditionalelse": n$2, + // | + "conditionalgroup": n$2, + // (?(1)a|b) (?(-1)a|b) (?(name)a|b) + "mode": n$2, + // (?i-x) see modes above + "comment": n$2 // (?#comment) + + }, + config: { + "forwardref": n$2, + // \1(a) + "nestedref": n$2, + // (\1a|b)+ + "ctrlcodeerr": n$2, + // does \c error, or decompose? + "unicodenegated": n$2, + // \p{^etc} + "namedgroupalt": n$2 // if false, only support (?foo) + + }, + substTokens: { + "subst_0match": n$2, + // $0 \0 \{0} + "subst_$bgroup": n$2, + // ${1} ${99} + "subst_bsgroup": n$2 // \1 \99 + + }, + docs: { + "subst_group": { + ext: "" + }, + // remove other syntaxes. + "namedgroup": { + ext: "" + }, + // remove other syntaxes. + "unicodecat": { + ext: "

Requires the u flag.

" + "

For a list of values, see this MDN page.

" + } // notunicodecat, unicodescript, notunicodescript are copied from unicodecat below. + + } +}; +javascript.docs.notunicodecat = javascript.docs.unicodescript = javascript.docs.notunicodescript = javascript.docs.unicodecat; + +/* +RegExr: Learn, Build, & Test RegEx +Copyright (C) 2017 gskinner.com, inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ +let profiles = { + core +}; +profiles.pcre = merge(core, pcre); +profiles.js = merge(core, javascript); + +function merge(p1, p2) { + // merges p1 into p2, essentially just a simple deep copy without array support. + for (let n in p1) { + if (p2[n] === false) { + continue; + } else if (typeof p1[n] === "object") { + p2[n] = merge(p1[n], p2[n] || {}); + } else if (p2[n] === undefined) { + p2[n] = p1[n]; + } + } + + return p2; +} + +module.exports = profiles; +//# sourceMappingURL=profiles.js.map diff --git a/packages/next/lib/load-custom-routes.ts b/packages/next/lib/load-custom-routes.ts index e66f6885be44a..19227cec0e66b 100644 --- a/packages/next/lib/load-custom-routes.ts +++ b/packages/next/lib/load-custom-routes.ts @@ -9,7 +9,9 @@ import { import { execOnce } from '../next-server/lib/utils' import * as Log from '../build/output/log' // @ts-ignore -import Lexer from './regexr/expression-lexer' +import Lexer from 'next/dist/compiled/regexr-lexer/lexer' +// @ts-ignore +import lexerProfiles from 'next/dist/compiled/regexr-lexer/profiles' export type RouteHas = | { @@ -338,6 +340,7 @@ function checkCustomRoutes( if (hasItem.value) { const matcher = new RegExp(`^${hasItem.value}$`) const lexer = new Lexer() + lexer.profile = lexerProfiles.js lexer.parse(`/${matcher.source}/`) Object.keys(lexer.namedGroups).forEach((groupKey) => { diff --git a/packages/next/package.json b/packages/next/package.json index d188702ea2be5..c8e96027df4bd 100644 --- a/packages/next/package.json +++ b/packages/next/package.json @@ -227,6 +227,7 @@ "postcss-preset-env": "6.7.0", "postcss-scss": "3.0.4", "recast": "0.18.5", + "regexr": "https://github.com/ijjk/regexr-lexer.git#3bcf3d1c4bc6dd9239c47acb1fb7b419823f8337", "resolve-url-loader": "3.1.2", "sass-loader": "10.0.5", "schema-utils": "2.7.1", diff --git a/packages/next/taskfile.js b/packages/next/taskfile.js index c88180356350d..bae2e36933fd8 100644 --- a/packages/next/taskfile.js +++ b/packages/next/taskfile.js @@ -1,6 +1,6 @@ // eslint-disable-next-line import/no-extraneous-dependencies const notifier = require('node-notifier') -const { relative, basename, resolve } = require('path') +const { relative, basename, resolve, join, dirname } = require('path') const { Module } = require('module') // Note: @@ -687,9 +687,20 @@ export async function path_to_regexp(task, opts) { .target('dist/compiled/path-to-regexp') } +export async function copy_regexr_lexer(task, opts) { + await task + .source( + join( + relative(__dirname, dirname(require.resolve('regexr/package.json'))), + 'lexer-dist/**/*' + ) + ) + .target('compiled/regexr-lexer') +} + export async function precompile(task, opts) { await task.parallel( - ['browser_polyfills', 'path_to_regexp', 'copy_ncced'], + ['browser_polyfills', 'path_to_regexp', 'copy_ncced', 'copy_regexr_lexer'], opts ) } diff --git a/yarn.lock b/yarn.lock index 9d044714858c4..9e045e0b14168 100644 --- a/yarn.lock +++ b/yarn.lock @@ -13725,6 +13725,10 @@ regexpu-core@^4.7.1: unicode-match-property-ecmascript "^1.0.4" unicode-match-property-value-ecmascript "^1.2.0" +"regexr@https://github.com/ijjk/regexr-lexer.git#3bcf3d1c4bc6dd9239c47acb1fb7b419823f8337": + version "3.8.0" + resolved "https://github.com/ijjk/regexr-lexer.git#3bcf3d1c4bc6dd9239c47acb1fb7b419823f8337" + registry-auth-token@3.3.2: version "3.3.2" resolved "https://registry.yarnpkg.com/registry-auth-token/-/registry-auth-token-3.3.2.tgz#851fd49038eecb586911115af845260eec983f20" From ea1af0448527fb9aac831d9c01a4efc99ae436a4 Mon Sep 17 00:00:00 2001 From: JJ Kasper Date: Mon, 12 Apr 2021 17:12:16 -0500 Subject: [PATCH 3/3] update output location --- packages/next/compiled/regexr-lexer/lexer.js | 935 ------------------ .../next/compiled/regexr-lexer/profiles.js | 838 ---------------- packages/next/taskfile.js | 2 +- 3 files changed, 1 insertion(+), 1774 deletions(-) delete mode 100644 packages/next/compiled/regexr-lexer/lexer.js delete mode 100644 packages/next/compiled/regexr-lexer/profiles.js diff --git a/packages/next/compiled/regexr-lexer/lexer.js b/packages/next/compiled/regexr-lexer/lexer.js deleted file mode 100644 index b94a783e13a57..0000000000000 --- a/packages/next/compiled/regexr-lexer/lexer.js +++ /dev/null @@ -1,935 +0,0 @@ -'use strict'; - -/* -RegExr: Learn, Build, & Test RegEx -Copyright (C) 2017 gskinner.com, inc. - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . -*/ -class ExpressionLexer { - constructor() { - this.profile = null; - } - - set profile(profile) { - this._profile = profile; - this.string = this.token = this.errors = this.captureGroups = this.namedGroups = null; - } - - parse(str) { - if (!this._profile) { - return null; - } - - if (str === this.string) { - return this.token; - } - - this.token = null; - this._modes = {}; - this.string = str; - this.errors = []; - let capgroups = this.captureGroups = []; - let namedgroups = this.namedGroups = {}; - let brgroups = this.branchResetGroups = []; - let groups = [], - refs = [], - i = 0, - l = str.length; - let o, - c, - token, - charset = null; // previous is the previous token, prv is the previous "active" token (!ignore) - - let prev = null, - prv = null; - let profile = this._profile, - unquantifiable = profile.unquantifiable; - let charTypes = profile.charTypes; - let closeIndex = str.lastIndexOf("/"); - - for (let i = closeIndex + 1; i < l; i++) { - this._modes[str[i]] = true; - } - - while (i < l) { - c = str[i]; - token = { - i: i, - l: 1, - prev: prev, - prv: prv, - modes: this._modes - }; - - if (prev) { - prev.next = token; - } else { - this.token = token; - } - - if (i === 0 || i >= closeIndex) { - this.parseFlag(str, token); - } else if (c === "(" && !charset) { - this.parseParen(str, token); - - if (token.close === null) { - token.depth = groups.length; - groups.push(token); - } - - if (token.capture) { - this.addCaptureGroup(token, groups); - } - } else if (c === ")" && !charset) { - token.type = "groupclose"; - - if (groups.length) { - o = token.open = groups.pop(); - o.close = token; - - if (o.type === "branchreset") { - brgroups.pop(); - } - } else { - token.error = { - id: "groupclose" - }; - } - } else if (c === "[") { - charset = this.parseSquareBracket(str, token, charset); - } else if (c === "]" && charset) { - token.type = "setclose"; - token.open = charset; - charset.close = token; - charset = null; - } else if (c === "+" && prv && prv.clss === "quant" && profile.tokens.possessive) { - token.type = "possessive"; - token.related = [prv]; - } else if ((c === "+" || c === "*") && !charset) { - token.type = charTypes[c]; - token.clss = "quant"; - token.min = c === "+" ? 1 : 0; - token.max = -1; - } else if (c === "{" && !charset && str.substr(i).search(/^{\d+,?\d*}/) !== -1) { - this.parseQuant(str, token); - } else if (c === "\\") { - this.parseBackSlash(str, token, charset, closeIndex); - } else if (c === "?" && !charset) { - if (!prv || prv.clss !== "quant") { - token.type = charTypes[c]; - token.clss = "quant"; - token.min = 0; - token.max = 1; - } else { - token.type = "lazy"; - token.related = [prv]; - } - } else if (c === "-" && charset && prv.code !== undefined && prv.prv && prv.prv.type !== "range") { - // this may be the start of a range, but we'll need to validate after the next token. - token.type = "range"; - } else { - this.parseChar(str, token, charset); - - if (!charset && this._modes.x && /\s/.test(c)) { - token.ignore = true; - token.type = "ignorews"; - } - } // post process token: - // quantifier: - - - if (token.clss === "quant") { - if (!prv || prv.close !== undefined || unquantifiable[prv.type] || prv.open && unquantifiable[prv.open.type]) { - token.error = { - id: "quanttarg" - }; - } else { - token.related = [prv.open || prv]; - } - } // reference: - - - if (token.group === true) { - refs.push(token); - } // conditional: - - - let curGroup = groups.length ? groups[groups.length - 1] : null; - - if (curGroup && (curGroup.type === "conditional" || curGroup.type === "conditionalgroup") && token.type === "alt") { - if (!curGroup.alt) { - curGroup.alt = token; - } else { - token.error = { - id: "extraelse" - }; - } - - token.related = [curGroup]; - token.type = "conditionalelse"; - token.clss = "special"; - } else if (curGroup && curGroup.type === "branchreset") { - // reset group - curGroup.curGroupNum = curGroup.inGroupNum; - } // range: - - - if (prv && prv.type === "range" && prv.l === 1) { - this.validateRange(str, token); - } // js warnings: - // TODO: this isn't ideal, but I'm hesitant to write a more robust solution for a couple of edge cases. - - - if (profile.id === "js") { - this.addJSWarnings(token); - } // general: - - - if (token.open && !token.clss) { - token.clss = token.open.clss; - } - - if (token.error) { - this.addError(token); - } - - i += token.l; - prev = token; - - if (!token.ignore) { - prv = token; - } - } // post processing: - - - while (groups.length) { - this.addError(groups.pop(), { - id: "groupopen" - }); - } - - this.matchRefs(refs, capgroups, namedgroups); - - if (charset) { - this.addError(charset, { - id: "setopen" - }); - } - - return this.token; - } - - addError(token, error = token.error) { - token.error = error; - this.errors.push(token); - } - - addJSWarnings(token) { - if (token.error) { - return; - } - - if (token.type === "neglookbehind" || token.type === "poslookbehind" || token.type === "sticky" || token.type === "unicode" || token.type == "dotall" || token.type === "unicodecat" || token.type === "unicodescript" || token.type === "namedgroup") { - token.error = { - id: "jsfuture", - warning: true - }; - } - } - - addCaptureGroup(token, groups) { - // it would be nice to make branch reset groups actually highlight all of the groups that share the same number - // that would require switching to arrays of groups for each group num - requires rearchitecture throughout the app. - let capgroups = this.captureGroups, - brgroups = this.branchResetGroups, - namedgroups = this.namedGroups; - let curGroup = groups.length ? groups[groups.length - 1] : null; - - if (brgroups.length) { - let brgroup = brgroups[brgroups.length - 1]; - token.num = ++brgroup.curGroupNum; - } else { - token.num = capgroups.length + 1; - } - - if (!capgroups[token.num - 1]) { - capgroups.push(token); - } - - if (token.name && !token.error) { - if (/\d/.test(token.name[0])) { - token.error = { - id: "badname" - }; - } else if (namedgroups[token.name]) { - token.error = { - id: "dupname" - }; - token.related = [namedgroups[token.name]]; - } else { - namedgroups[token.name] = token; - } - } - } - - getRef(token, str) { - token.clss = "ref"; - token.group = true; - token.relIndex = this.captureGroups.length; - token.name = str; - } - - matchRefs(refs, indexes, names) { - while (refs.length) { - let token = refs.pop(), - name = token.name, - group = names[name]; - - if (!group && !isNaN(name)) { - let sign = name[0], - index = parseInt(name) + (sign === "+" || sign === "-" ? token.relIndex : 0); - - if (sign === "-") { - index++; - } - - group = indexes[index - 1]; - } - - if (group) { - token.group = group; - token.related = [group]; - token.dir = token.i < group.i ? 1 : !group.close || token.i < group.close.i ? 0 : -1; - } else { - delete token.group; - delete token.relIndex; - this.refToOctal(token); - - if (token.error) { - this.errors.push(token.error); - } - } - } - } - - refToOctal(token) { - // PCRE: \# unmatched, \0 \00 \## = octal - // JS: \# \0 \00 \## = octal - // PCRE matches \8 \9 to "8" "9" - // JS: without the u flag \8 \9 match "8" "9" in IE, FF & Chrome, and "\8" "\9" in Safari. We support the former. - // JS: with the u flag, Chrome & FF throw an esc error, Safari does not. - // TODO: handle \0 for PCRE? Would need more testing. - // TODO: this doesn't handle two digit refs with 8/9 in them. Ex. \18 - not even sure what this is interpreted as. - let name = token.name, - profile = this._profile; - - if (token.type !== "numref") { - // not a simple \4 style reference, so can't decompose into an octal. - token.error = { - id: "unmatchedref" - }; - } else if (/^[0-7]{2}$/.test(name) || profile.config.reftooctalalways && /^[0-7]$/.test(name)) { - // octal - let next = token.next, - char = String.fromCharCode(next.code); - - if (next.type === "char" && char >= "0" && char <= "7" && parseInt(name + char, 8) <= 255) { - name += char; - this.mergeNext(token); - } - - token.code = parseInt(name, 8); - token.clss = "esc"; - token.type = "escoctal"; - delete token.name; - } else if (name === "8" || name === "9") { - this.parseEscChar(token, name); - delete token.name; - } else { - token.error = { - id: "unmatchedref" - }; - } - } - - mergeNext(token) { - let next = token.next; - token.next = next.next; - token.next.prev = token; - token.l++; - } - - parseFlag(str, token) { - // note that this doesn't deal with misformed patterns or incorrect flags. - let i = token.i, - c = str[i]; - - if (str[i] === "/") { - token.type = i === 0 ? "open" : "close"; - - if (i !== 0) { - token.related = [this.token]; - this.token.related = [token]; - } - } else { - token.type = this._profile.flags[c]; - } //token.clear = true; - - } - - parseChar(str, token, charset) { - let c = str[token.i]; - token.type = !charset && this._profile.charTypes[c] || "char"; - - if (!charset && c === "/") { - token.error = { - id: "fwdslash" - }; - } - - if (token.type === "char") { - token.code = c.charCodeAt(0); - } else if (ExpressionLexer.ANCHOR_TYPES[token.type]) { - token.clss = "anchor"; - } else if (token.type === "dot") { - token.clss = "charclass"; - } - - return token; - } - - parseSquareBracket(str, token, charset) { - let match; - - if (this._profile.tokens.posixcharclass && (match = str.substr(token.i).match(/^\[(:|\.)([^\]]*?)\1]/))) { - // posixcharclass: [:alpha:] - // posixcollseq: [.ch.] - // currently neither flavor supports posixcollseq, but PCRE does flag as an error: - // TODO: the expression above currently does not catch [.\].] - token.l = match[0].length; - token.value = match[2]; - token.clss = "charclass"; - - if (match[1] === ":") { - token.type = "posixcharclass"; - - if (!this._profile.posixCharClasses[match[2]]) { - token.error = { - id: "posixcharclassbad" - }; - } else if (!charset) { - token.error = { - id: "posixcharclassnoset" - }; - } - } else { - token.type = "posixcollseq"; // TODO: can this be generalized? Right now, no, because we assign ids that aren't in the profile. - - token.error = { - id: "notsupported" - }; - } - } else if (!charset) { - // set [a-z] [aeiou] - // setnot [^a-z] - token.type = token.clss = "set"; - - if (str[token.i + 1] === "^") { - token.l++; - token.type += "not"; - } - - charset = token; - } else { - // [[] (square bracket inside a set) - this.parseChar(str, token, charset); - } - - return charset; - } - - parseParen(str, token) { - /* - core: - . group: - . lookahead: ?= ?! - . noncap: ?: - PCRE: - . lookbehind: ?<= ? ?'name' ? - . namedref: ?P=name Also: \g'name' \k'name' etc - . comment: ?# - . atomic: ?> - . recursion: ?0 ?R Also: \g<0> - . define: ?(DEFINE) - . subroutine: ?1 ?-1 ?&name ?P>name - conditionalgroup: ?(1)a|b ?(-1)a|b ?(name)a|b - conditional: ?(?=if)then|else - mode: ?c-i - branchreset: ?| - */ - token.clss = token.type = "group"; - - if (str[token.i + 1] !== "?") { - token.close = null; // indicates that it needs a close token. - - token.capture = true; - return token; - } - - let sub = str.substr(token.i + 2), - match, - s = sub[0]; - - if (s === ":") { - // (?:foo) - token.type = "noncapgroup"; - token.close = null; - token.l = 3; - } else if (s === ">") { - // (?>foo) - token.type = "atomic"; - token.close = null; - token.l = 3; - } else if (s === "|") { - // (?|(a)|(b)) - token.type = "branchreset"; - token.close = null; - token.l = 3; - token.inGroupNum = token.curGroupNum = this.captureGroups.length; - this.branchResetGroups.push(token); - } else if (s === "#" && (match = sub.match(/[^)]*\)/))) { - // (?#foo) - token.clss = token.type = "comment"; - token.ignore = true; - token.l = 2 + match[0].length; - } else if (/^(R|0)\)/.test(sub)) { - // (?R) (?0) - token.clss = "ref"; - token.type = "recursion"; - token.l = 4; - } else if (match = sub.match(/^P=(\w+)\)/i)) { - // (?P=name) - token.type = "namedref"; - this.getRef(token, match[1]); - token.l = match[0].length + 2; - } else if (/^\(DEFINE\)/.test(sub)) { - // (?(DEFINE)foo) - token.type = "define"; - token.close = null; - token.l = 10; - } else if (match = sub.match(/^/)) || this._profile.config.namedgroupalt && ((match = sub.match(/^'(\w+)'/)) || (match = sub.match(/^P<(\w+)>/)))) { - // (?foo) (?'name'foo) (?Pfoo) - token.type = "namedgroup"; - token.close = null; - token.name = match[1]; - token.capture = true; - token.l = match[0].length + 2; - } else if ((match = sub.match(/^([-+]?\d\d?)\)/)) || (match = sub.match(/^(?:&|P>)(\w+)\)/))) { - // (?1) (?-1) (?&name) (?P>name) - token.type = (isNaN(match[1]) ? "named" : "num") + "subroutine"; - this.getRef(token, match[1]); - token.l = match[0].length + 2; - } else if ((match = sub.match(/^\(([-+]?\d\d?)\)/)) || (match = sub.match(/^\((\w+)\)/))) { - // (?(1)a|b) (?(-1)a|b) (?(name)a|b) - this.getRef(token, match[1]); - token.clss = "special"; - token.type = "conditionalgroup"; - token.close = null; - token.l = match[0].length + 2; - } else if (/^\(\?255). In theory it should allow 4? - - if (isNaN(val) || val > 255 || /[^\da-f]/i.test(match[1])) { - token.error = { - id: "esccharbad" - }; - } else { - token.code = val; - } - } else if (match = sub.match(/^x([\da-fA-F]{0,2})/)) { - // hex ascii: \xFF - token.type = "eschexadecimal"; - token.l += match[0].length; - token.code = parseInt(match[1] || 0, 16); - } else if (match = sub.match(/^c([a-zA-Z])?/)) { - // control char: \cA \cz - // also handles: \c - // not supported in JS strings - token.type = "esccontrolchar"; - - if (match[1]) { - token.code = match[1].toUpperCase().charCodeAt(0) - 64; // A=65 - - token.l += 2; - } else if (profile.config.ctrlcodeerr) { - token.l++; - token.error = { - id: "esccharbad" - }; - } else { - return this.parseChar(str, token, charset); // this builds the "/" token - } - } else if (match = sub.match(/^[0-7]{1,3}/)) { - // octal ascii: \011 - token.type = "escoctal"; - sub = match[0]; - - if (parseInt(sub, 8) > 255) { - sub = sub.substr(0, 2); - } - - token.l += sub.length; - token.code = parseInt(sub, 8); - } else if (profile.tokens.escoctalo && (match = sub.match(/^o\{(.*?)}/i))) { - // \o{377} - token.type = "escoctal"; - token.l += match[0].length; - val = parseInt(match[1], 8); - - if (isNaN(val) || val > 255 || /[^0-7]/.test(match[1])) { - token.error = { - id: "esccharbad" - }; - } else { - token.code = val; - } - } else { - // single char - if (token.type = profile.escCharTypes[c]) { - token.l++; - token.clss = ExpressionLexer.ANCHOR_TYPES[token.type] ? "anchor" : "charclass"; - return token; - } - - token.code = profile.escCharCodes[c]; - - if (token.code === undefined || token.code === false) { - // unrecognized. - return this.parseEscChar(token, c); - } // update SubstLexer if this changes: - - - token.l++; - token.type = "esc_" + token.code; - } - - token.clss = "esc"; - return token; - } - - parseEscChar(token, c) { - // unrecognized escchar: \u \a \8, etc - // JS: allowed except if u flag set, Safari still allows \8 \9 - // PCRE: allows \8 \9 but not others // TODO: support? - let profile = this._profile; - token.l = 2; - - if (!profile.badEscChars[c] && profile.tokens.escchar && !this._modes.u || profile.escChars[c]) { - token.type = "escchar"; - token.code = c.charCodeAt(0); - token.clss = "esc"; - } else { - token.error = { - id: "esccharbad" - }; - } - } - - parseRef(token, sub) { - // namedref: \k \k'name' \k{name} \g{name} - // namedsubroutine: \g \g'name' - // numref: \g1 \g+2 \g{2} - // numsubroutine: \g<-1> \g'1' - // recursion: \g<0> \g'0' - let c = sub[0], - s = "", - match; - - if (match = sub.match(/^[gk](?:'\w*'|<\w*>|{\w*})/)) { - s = match[0].substr(2, match[0].length - 3); - - if (c === "k" && !isNaN(s)) { - s = ""; - } // TODO: specific error for numeric \k? - - } else if (match = sub.match(/^g(?:({[-+]?\d+}|<[-+]?\d+>|'[-+]?\d+')|([-+]?\d+))/)) { - s = match[2] !== undefined ? match[2] : match[1].substr(1, match[1].length - 2); - } - - let isRef = c === "k" || !(sub[1] === "'" || sub[1] === "<"); - - if (!isRef && s == 0) { - token.type = "recursion"; - token.clss = "ref"; - } else { - // namedref, extnumref, namedsubroutine, numsubroutine - token.type = (isNaN(s) ? "named" : (isRef ? "ext" : "") + "num") + (isRef ? "ref" : "subroutine"); - this.getRef(token, s); - } - - token.l += match ? match[0].length : 1; - } - - parseUnicode(token, sub) { - // unicodescript: \p{Cherokee} - // unicodecat: \p{Ll} \pL - // not: \P{Ll} \p{^Lu} - let match = sub.match(/p\{\^?([^}]*)}/i), - val = match && match[1], - not = sub[0] === "P"; - - if (!match && (match = sub.match(/[pP]([LMZSNPC])/))) { - val = match[1]; - } else { - not = not !== (sub[2] === "^"); - } - - token.l += match ? match[0].length : 1; - token.type = "unicodecat"; - - if (this._profile.unicodeScripts[val]) { - token.type = "unicodescript"; - } else if (!this._profile.unicodeCategories[val]) { - val = null; - } - - if (not) { - token.type = "not" + token.type; - } - - if (!this._profile.config.unicodenegated && sub[2] === "^" || !val) { - token.error = { - id: "unicodebad" - }; - } - - token.value = val; - token.clss = "charclass"; - return token; - } - - parseMode(token, sub) { - // (?i-x) - // supported modes in PCRE: i-caseinsens, x-freespacing, s-dotall, m-multiline, U-switchlazy, [J-samename] - let match = sub.match(/^[-a-z]+\)/i); - - if (!match) { - return; - } - - let supModes = this._profile.modes; - let modes = Object.assign({}, this._modes), - bad = false, - not = false, - s = match[0], - c; - token.on = token.off = ""; - - for (let i = 0, l = s.length - 1; i < l; i++) { - c = s[i]; - - if (c === "-") { - not = true; - continue; - } - - if (!supModes[c]) { - bad = true; - break; - } - - modes[c] = !not; - token.on = token.on.replace(c, ""); - - if (not) { - token.off = token.off.replace(c, ""); - token.off += c; - } else { - token.on += c; - } - } - - token.clss = "special"; - token.type = "mode"; - token.l = match[0].length + 2; - - if (bad) { - token.error = { - id: "modebad" - }; - token.errmode = c; - } else { - this._modes = modes; - } - - return token; - } - - parseQuant(str, token) { - // quantifier: {0,3} {3} {1,} - token.type = token.clss = "quant"; - let i = token.i; - let end = str.indexOf("}", i + 1); - token.l += end - i; - let arr = str.substring(i + 1, end).split(","); - token.min = parseInt(arr[0]); - token.max = arr[1] === undefined ? token.min : arr[1] === "" ? -1 : parseInt(arr[1]); - - if (token.max !== -1 && token.min > token.max) { - token.error = { - id: "quantrev" - }; - } - - return token; - } - - validateRange(str, end) { - // char range: [a-z] [\11-\n] - let next = end, - token = end.prv, - prv = token.prv; - - if (prv.code === undefined || next.code === undefined) { - // not a range, rewrite as a char: - this.parseChar(str, token); - } else { - token.clss = "set"; - - if (prv.code > next.code) { - // this gets added here because parse has already moved to the next token: - this.errors.push(token.error = { - id: "rangerev" - }); - } // preserve as separate tokens, but treat as one in the UI: - - - next.proxy = prv.proxy = token; - token.set = [prv, token, next]; - } - } - -} -ExpressionLexer.ANCHOR_TYPES = { - "bof": true, - "eof": true, - "bos": true, - "eos": true, - "abseos": true, - "wordboundary": true, - "notwordboundary": true, - "prevmatchend": true -}; - -module.exports = ExpressionLexer; -//# sourceMappingURL=lexer.js.map diff --git a/packages/next/compiled/regexr-lexer/profiles.js b/packages/next/compiled/regexr-lexer/profiles.js deleted file mode 100644 index 73f49837158bf..0000000000000 --- a/packages/next/compiled/regexr-lexer/profiles.js +++ /dev/null @@ -1,838 +0,0 @@ -'use strict'; - -/* -RegExr: Learn, Build, & Test RegEx -Copyright (C) 2017 gskinner.com, inc. - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . -*/ - -/* -The core profile essentially defines every feature we support, and is then pared down by other profiles. All values should be y (true). - -It also acts in part as pseudo documentation for all of the "type" values. - */ -let y = true, - n = false; -let core = { - id: "core", - flags: { - "g": "global", - // note that this is not a real flag in some flavors, but a different method call - "i": "caseinsensitive", - "m": "multiline", - "s": "dotall", - "u": "unicode", - "y": "sticky", - "x": "extended", - "U": "ungreedy" - }, - // reserved characters that need to be escaped: - escChars: "+*?^$\\.[]{}()|/".split("").reduce((o, c) => { - o[c] = y; - return o; - }, {}), - // escape chars that are specifically not supported by the flavor: - badEscChars: n, - escCharCodes: { - "0": 0, - // null - "a": 7, - // bell - "t": 9, - // tab - "n": 10, - // lf - "v": 11, - // vertical tab - "f": 12, - // form feed - "r": 13, - // cr - "e": 27 // escape - - }, - escCharTypes: { - "A": "bos", - "b": "wordboundary", - "B": "notwordboundary", - "d": "digit", - "D": "notdigit", - "G": "prevmatchend", - "h": "hwhitespace", - "H": "nothwhitespace", - "K": "keepout", - "N": "notlinebreak", - "R": "linebreak", - "s": "whitespace", - "S": "notwhitespace", - "v": "vwhitespace", - "V": "notvwhitespace", - "w": "word", - "W": "notword", - "X": "unicodegrapheme", - "Z": "eos", - "z": "abseos" - }, - charTypes: { - ".": "dot", - "|": "alt", - "$": "eof", - "^": "bof", - "?": "opt", - // also: "lazy" - "+": "plus", - // also: "possessive" - "*": "star" - }, - unquantifiable: { - // all group/set open tokens are unquantifiable by default (ie. tokens with a .close value) - "quant": y, - "plus": y, - "star": y, - "opt": y, - "lazy": y, - "possessive": y, - "eof": y, - "bof": y, - "eos": y, - "abseos": y, - "alt": y, - "open": y, - "mode": y, - "comment": y, - // TODO: this should actually be ignored by quantifiers. - "condition": y - }, - unicodeScripts: { - // from: http://www.pcre.org/original/doc/html/pcrepattern.html - "Arabic": y, - "Armenian": y, - "Avestan": y, - "Balinese": y, - "Bamum": y, - "Bassa_Vah": y, - "Batak": y, - "Bengali": y, - "Bopomofo": y, - "Brahmi": y, - "Braille": y, - "Buginese": y, - "Buhid": y, - "Canadian_Aboriginal": y, - "Carian": y, - "Caucasian_Albanian": y, - "Chakma": y, - "Cham": y, - "Cherokee": y, - "Common": y, - "Coptic": y, - "Cuneiform": y, - "Cypriot": y, - "Cyrillic": y, - "Deseret": y, - "Devanagari": y, - "Duployan": y, - "Egyptian_Hieroglyphs": y, - "Elbasan": y, - "Ethiopic": y, - "Georgian": y, - "Glagolitic": y, - "Gothic": y, - "Grantha": y, - "Greek": y, - "Gujarati": y, - "Gurmukhi": y, - "Han": y, - "Hangul": y, - "Hanunoo": y, - "Hebrew": y, - "Hiragana": y, - "Imperial_Aramaic": y, - "Inherited": y, - "Inscriptional_Pahlavi": y, - "Inscriptional_Parthian": y, - "Javanese": y, - "Kaithi": y, - "Kannada": y, - "Katakana": y, - "Kayah_Li": y, - "Kharoshthi": y, - "Khmer": y, - "Khojki": y, - "Khudawadi": y, - "Lao": y, - "Latin": y, - "Lepcha": y, - "Limbu": y, - "Linear_A": y, - "Linear_B": y, - "Lisu": y, - "Lycian": y, - "Lydian": y, - "Mahajani": y, - "Malayalam": y, - "Mandaic": y, - "Manichaean": y, - "Meetei_Mayek": y, - "Mende_Kikakui": y, - "Meroitic_Cursive": y, - "Meroitic_Hieroglyphs": y, - "Miao": y, - "Modi": y, - "Mongolian": y, - "Mro": y, - "Myanmar": y, - "Nabataean": y, - "New_Tai_Lue": y, - "Nko": y, - "Ogham": y, - "Ol_Chiki": y, - "Old_Italic": y, - "Old_North_Arabian": y, - "Old_Permic": y, - "Old_Persian": y, - "Old_South_Arabian": y, - "Old_Turkic": y, - "Oriya": y, - "Osmanya": y, - "Pahawh_Hmong": y, - "Palmyrene": y, - "Pau_Cin_Hau": y, - "Phags_Pa": y, - "Phoenician": y, - "Psalter_Pahlavi": y, - "Rejang": y, - "Runic": y, - "Samaritan": y, - "Saurashtra": y, - "Sharada": y, - "Shavian": y, - "Siddham": y, - "Sinhala": y, - "Sora_Sompeng": y, - "Sundanese": y, - "Syloti_Nagri": y, - "Syriac": y, - "Tagalog": y, - "Tagbanwa": y, - "Tai_Le": y, - "Tai_Tham": y, - "Tai_Viet": y, - "Takri": y, - "Tamil": y, - "Telugu": y, - "Thaana": y, - "Thai": y, - "Tibetan": y, - "Tifinagh": y, - "Tirhuta": y, - "Ugaritic": y, - "Vai": y, - "Warang_Citi": y, - "Yi": y - }, - unicodeCategories: { - // from: http://www.pcre.org/original/doc/html/pcrepattern.html - "C": y, - // Other - "Cc": y, - // Control - "Cf": y, - // Format - "Cn": y, - // Unassigned - "Co": y, - // Private use - "Cs": y, - // Surrogate - "L": y, - // Letter - "L&": y, - // Any letter - "Ll": y, - // Lower case letter - "Lm": y, - // Modifier letter - "Lo": y, - // Other letter - "Lt": y, - // Title case letter - "Lu": y, - // Upper case letter - "M": y, - // Mark - "Mc": y, - // Spacing mark - "Me": y, - // Enclosing mark - "Mn": y, - // Non-spacing mark - "N": y, - // Number - "Nd": y, - // Decimal number - "Nl": y, - // Letter number - "No": y, - // Other number - "P": y, - // Punctuation - "Pc": y, - // Connector punctuation - "Pd": y, - // Dash punctuation - "Pe": y, - // Close punctuation - "Pf": y, - // Final punctuation - "Pi": y, - // Initial punctuation - "Po": y, - // Other punctuation - "Ps": y, - // Open punctuation - "S": y, - // Symbol - "Sc": y, - // Currency symbol - "Sk": y, - // Modifier symbol - "Sm": y, - // Mathematical symbol - "So": y, - // Other symbol - "Z": y, - // Separator - "Zl": y, - // Line separator - "Zp": y, - // Paragraph separator - "Zs": y // Space separator - - }, - posixCharClasses: { - // from: http://www.pcre.org/original/doc/html/pcrepattern.html - "alnum": y, - // letters and digits - "alpha": y, - // letters - "ascii": y, - // character codes 0 - 127 - "blank": y, - // space or tab only - "cntrl": y, - // control characters - "digit": y, - // decimal digits (same as \d) - "graph": y, - // printing characters, excluding space - "lower": y, - // lower case letters - "print": y, - // printing characters, including space - "punct": y, - // printing characters, excluding letters and digits and space - "space": y, - // white space (the same as \s from PCRE 8.34) - "upper": y, - // upper case letters - "word": y, - // "word" characters (same as \w) - "xdigit": y // hexadecimal digits - - }, - modes: { - "i": "caseinsensitive", - "s": "dotall", - "m": "multiline", - "x": "freespacing", - "J": "samename", - "U": "switchlazy" - }, - tokens: { - // note that not all of these are actively used in the lexer, but are included for completeness. - "open": y, - // opening / - "close": y, - // closing / - "char": y, - // abc - // classes: - // also in escCharTypes and charTypes - "set": y, - // [a-z] - "setnot": y, - // [^a-z] - "setclose": y, - // ] - "range": y, - // [a-z] - "unicodecat": y, - // \p{Ll} \P{^Ll} \pL - "notunicodecat": y, - // \P{Ll} \p{^Ll} \PL - "unicodescript": y, - // \p{Cherokee} \P{^Cherokee} - "notunicodescript": y, - // \P{Cherokee} \p{^Cherokee} - "posixcharclass": y, - // [[:alpha:]] - // not in supported flavors: "posixcollseq": y, // [[.foo.]] // this is recognized by the lexer, currently returns "notsupported" error - // not in supported flavors: "unicodeblock": y, // \p{InThai} \p{IsThai} and NOT \P - // not in supported flavors: "subtract": y, // [base-[subtract]] - // not in supported flavors: "intersect": y, // [base&&[intersect]] - // esc: - // also in escCharCodes and escCharTypes - "escoctal": y, - // \11 - "escunicodeu": y, - // \uFFFF - "escunicodeub": y, - // \u{00A9} - "escunicodexb": y, - // \x{00A9} - "escsequence": y, - // \Q...\E - "eschexadecimal": y, - // \xFF - "esccontrolchar": y, - // \cA - "escoctalo": y, - // \o{377} // resolved to escoctal in lexer, no docs required - "escchar": y, - // \m (unrecognized escapes) // no reference documentation required - // group: - "group": y, - // (foo) - "groupclose": y, - // ) - "noncapgroup": y, - // (?:foo) - "namedgroup": y, - // (?Pfoo) (?foo) (?'name'foo) - "atomic": y, - // (?>foo|bar) - "define": y, - // (?(DEFINE)foo) - "branchreset": y, - // (?|(a)|(b)) - // lookaround: - "poslookbehind": y, - // (?<=foo) - "neglookbehind": y, - // (? \k'name' \k{name} (?P=name) \g{name} - "numref": y, - // \1 - "extnumref": y, - // \g{-1} \g{+1} \g{1} \g1 \g-1 - "recursion": y, - // (?R) (?0) \g<0> \g'0' - "numsubroutine": y, - // \g<1> \g'-1' (?1) (?-1) - "namedsubroutine": y, - // \g \g'name' (?&name) (?P>name) - // quantifiers: - // also in specialChars - "quant": y, - // {1,2} - "possessive": y, - // ++ - "lazy": y, - // ? - // special: - "conditional": y, - // (?(?=if)then|else) - "condition": y, - // (?=if) any lookaround - "conditionalelse": y, - // | - "conditionalgroup": y, - // (?(1)a|b) (?(-1)a|b) (?(name)a|b) - "mode": y, - // (?i-x) see modes above - "comment": y, - // (?#comment) - // meta: - "matchanyset": y // [\s\S] - - }, - substTokens: { - // named references aren't supported in JS or PCRE / PHP - "subst_$esc": y, - // $$ - "subst_$&match": y, - // $& - "subst_$before": y, - // $` - "subst_$after": y, - // $' - "subst_$group": y, - // $1 $99 // resolved to subst_group in lexer, no docs required - "subst_$bgroup": y, - // ${1} ${99} // resolved to subst_group in lexer, no docs required - "subst_bsgroup": y, - // \1 \99 // resolved to subst_group in lexer, no docs required - "subst_group": y, - // $1 \1 \{1} // combined in docs, not used by lexer - "subst_0match": y, - // $0 \0 \{0} - // this isn't a feature of the engine, but of RegExr: - "subst_esc": y // \n \r \u1234 - - }, - config: { - "forwardref": y, - // \1(a) - "nestedref": y, - // (\1a|b)+ - "ctrlcodeerr": y, - // does \c error? (vs decompose) - "reftooctalalways": y, - // does a single digit reference \1 become an octal? (vs remain an unmatched ref) - "substdecomposeref": y, - // will a subst reference decompose? (ex. \3 becomes "\" & "3" if < 3 groups) - "looseesc": y, - // should unrecognized escape sequences match the character (ex. \u could match "u") // disabled when `u` flag is set - "unicodenegated": y, - // \p{^etc}" - "namedgroupalt": y // if false, only support (?foo) - - }, - docs: {// for example: - //possessive: {desc: "+This will be appended to the existing entry." }, - //namedgroup: {tip: "This will overwrite the existing entry." } - } -}; - -/* -RegExr: Learn, Build, & Test RegEx -Copyright (C) 2017 gskinner.com, inc. - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . -*/ - -/* -The PCRE profile is almost a straight copy of the core profile. -*/ -let y$1 = true, - n$1 = false; -let pcre = { - id: "pcre", - label: "PCRE", - browser: false, - flags: { - "u": n$1, - "y": n$1 - }, - badEscChars: "uUlLN".split("").reduce((o, c) => { - o[c] = y$1; - return o; - }, {}), - escCharCodes: { - "v": n$1 // vertical tab // PCRE support \v as vertical whitespace - - }, - tokens: { - "escunicodeu": n$1, - // \uFFFF - "escunicodeub": n$1 // \u{00A9} - // octalo PCRE 8.34+ - - }, - substTokens: { - "subst_$esc": n$1, - // $$ - "subst_$&match": n$1, - // $& - "subst_$before": n$1, - // $` - "subst_$after": n$1 // $' - - }, - config: { - "reftooctalalways": n$1, - // does a single digit reference \1 become an octal? (vs remain an unmatched ref) - "substdecomposeref": n$1, - // will a subst reference decompose? (ex. \3 becomes "\" & "3" if < 3 groups) - "looseesc": n$1 // should unrecognized escape sequences match the character (ex. \u could match "u") // disabled when `u` flag is set - - }, - docs: { - "escoctal": { - ext: "+

The syntax \\o{FFF} is also supported.

" - }, - "numref": { - ext: "

There are multiple syntaxes for this feature: \\1 \\g1 \\g{1}.

" + "

The latter syntaxes support relative values preceded by + or -. For example \\g-1 would match the group preceding the reference.

" - }, - "lazy": { - ext: "+

This behaviour is reversed by the ungreedy (U) flag/modifier.

" - } - } -}; - -/* -RegExr: Learn, Build, & Test RegEx -Copyright (C) 2017 gskinner.com, inc. - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . -*/ - -/* -The javascript profile disables a large number of features. - -Note that JS warnings are currently added in addJSWarnings in the ExpresssionLexer. -*/ -let n$2 = false; - -function test(expr, flag) { - try { - return new RegExp(expr, flag) && undefined; - } catch (e) { - return n$2; - } -} - -function testFlag(flag) { - return test(".", flag); -} - -let unicodeFlag = testFlag("u"); -let stickyFlag = testFlag("y"); -let dotallFlag = testFlag("s"); -let lookbehind = test("(?<=A)"); -let namedgroup = test("(?B)"); -let unicodecat = test("\\p{Ll}", "u"); // disabled when `u` flag is not set - -let javascript = { - id: "js", - label: "JavaScript", - browser: true, - flags: { - "s": dotallFlag, - // warning - "x": n$2, - "u": unicodeFlag, - // warning - "y": stickyFlag, - // warning - "U": n$2 - }, - escCharCodes: { - "a": n$2, - // bell - "e": n$2 // escape - - }, - escCharTypes: { - "A": n$2, - // bos - "G": n$2, - // prevmatchend - "h": n$2, - // hwhitespace - "H": n$2, - // nothwhitespace - "K": n$2, - // keepout - "N": n$2, - // notlinebreak - "R": n$2, - // newline - "v": n$2, - // vwhitespace - "V": n$2, - // notvwhitespace - "X": n$2, - // unicodegrapheme - "Z": n$2, - // eos - "z": n$2 // abseos - - }, - unicodeScripts: unicodecat, - unicodeCategories: unicodecat, - posixCharClasses: n$2, - modes: n$2, - tokens: { - // classes: - // also in escCharSpecials and specialChars - "unicodecat": unicodecat, - // \p{Ll} \P{^Ll} \pL - "notunicodecat": unicodecat, - // \P{Ll} \p{^Ll} \PL - "unicodescript": unicodecat, - // \p{Cherokee} \P{^Cherokee} - "notunicodescript": unicodecat, - // \P{Cherokee} \p{^Cherokee} - "posixcharclass": n$2, - // [[:alpha:]] - // esc: - // also in escCharCodes and escCharSpecials - "escunicodeub": unicodeFlag, - // \u{00A9} - "escunicodexb": n$2, - // \x{00A9} - "escsequence": n$2, - // \Q...\E - "escoctalo": n$2, - // \o{377} - // group: - "namedgroup": namedgroup, - // (?Pfoo) (?foo) (?'name'foo) - "atomic": n$2, - // (?>foo|bar) - "define": n$2, - // (?(DEFINE)foo) - "branchreset": n$2, - // (?|(a)|(b)) - // lookaround: - "poslookbehind": lookbehind, - // (?<=foo) // warning - "neglookbehind": lookbehind, - // (? \k'name' \k{name} (?P=name) \g{name} - "extnumref": n$2, - // \g{-1} \g{+1} \g{1} \g1 \g-1 - "recursion": n$2, - // (?R) (?0) \g<0> \g'0' - "numsubroutine": n$2, - // \g<1> \g'-1' (?1) (?-1) - "namedsubroutine": n$2, - // \g \g'name' (?&name) (?P>name) - // quantifiers: - // also in specialChars - "possessive": n$2, - // special: - "conditional": n$2, - // (?(?=if)then|else) - "conditionalif": n$2, - // (?=if) any lookaround - "conditionalelse": n$2, - // | - "conditionalgroup": n$2, - // (?(1)a|b) (?(-1)a|b) (?(name)a|b) - "mode": n$2, - // (?i-x) see modes above - "comment": n$2 // (?#comment) - - }, - config: { - "forwardref": n$2, - // \1(a) - "nestedref": n$2, - // (\1a|b)+ - "ctrlcodeerr": n$2, - // does \c error, or decompose? - "unicodenegated": n$2, - // \p{^etc} - "namedgroupalt": n$2 // if false, only support (?foo) - - }, - substTokens: { - "subst_0match": n$2, - // $0 \0 \{0} - "subst_$bgroup": n$2, - // ${1} ${99} - "subst_bsgroup": n$2 // \1 \99 - - }, - docs: { - "subst_group": { - ext: "" - }, - // remove other syntaxes. - "namedgroup": { - ext: "" - }, - // remove other syntaxes. - "unicodecat": { - ext: "

Requires the u flag.

" + "

For a list of values, see this MDN page.

" - } // notunicodecat, unicodescript, notunicodescript are copied from unicodecat below. - - } -}; -javascript.docs.notunicodecat = javascript.docs.unicodescript = javascript.docs.notunicodescript = javascript.docs.unicodecat; - -/* -RegExr: Learn, Build, & Test RegEx -Copyright (C) 2017 gskinner.com, inc. - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . -*/ -let profiles = { - core -}; -profiles.pcre = merge(core, pcre); -profiles.js = merge(core, javascript); - -function merge(p1, p2) { - // merges p1 into p2, essentially just a simple deep copy without array support. - for (let n in p1) { - if (p2[n] === false) { - continue; - } else if (typeof p1[n] === "object") { - p2[n] = merge(p1[n], p2[n] || {}); - } else if (p2[n] === undefined) { - p2[n] = p1[n]; - } - } - - return p2; -} - -module.exports = profiles; -//# sourceMappingURL=profiles.js.map diff --git a/packages/next/taskfile.js b/packages/next/taskfile.js index bae2e36933fd8..15b71f3fd3072 100644 --- a/packages/next/taskfile.js +++ b/packages/next/taskfile.js @@ -695,7 +695,7 @@ export async function copy_regexr_lexer(task, opts) { 'lexer-dist/**/*' ) ) - .target('compiled/regexr-lexer') + .target('dist/compiled/regexr-lexer') } export async function precompile(task, opts) {