Skip to content

Commit

Permalink
Add ability to parse removeparam= as queryprune=
Browse files Browse the repository at this point in the history
Related issue:
- uBlockOrigin/uBlock-issues#1356

Related commit:
- bde3164

It is not possible to achieve perfect compatiblity at this
point, but reasonable compatibility should be achieved for
a majority of instances of `removeparam=`.

Notable differences:
--------------------

uBO always matches in a case insensitive manner, there is
no need to ask for case-insensitivity, and no need to use
uppercase characters in `queryprune=` values.

uBO does not escape special regex characters since the
`queryprune=` values are always assumed to be literal
regex expression (leaving out the documented special
characters). This means `removeparam=` with characters
which are special regex characters won't be properly
translated and are unlikely to work properly in uBO.

For example, the `queryprune` value of a filter such as
`$removeparam=__xts__[0]` internally become the literal
regex `/__xts__[0]/`, and consequently would not match
a query parameter such as `...?__xts__[0]=...`.

Notes:
------

Additionally, for performance reason, when uBO encounter
a pattern-less `queryprune=` (or `removeparam=`) filter,
it will try to extract a valid pattern from the
`queryprune=` value. For instance, the following filter:

    $queryprune=utm_campaign

Will be translated internally into:

    utm_campaign$queryprune=utm_campaign

The logger will reflect this internal translation.
  • Loading branch information
gorhill committed Nov 26, 2020
1 parent 80413df commit 6ac09a2
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 20 deletions.
2 changes: 2 additions & 0 deletions src/js/static-filtering-parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -2092,6 +2092,7 @@ const netOptionTokenDescriptors = new Map([
[ 'popunder', OPTTokenPopunder | OPTNonNetworkType | OPTNonCspableType | OPTNonRedirectableType ],
[ 'popup', OPTTokenPopup | OPTNonNetworkType | OPTCanNegate | OPTNonCspableType | OPTNonRedirectableType ],
[ 'queryprune', OPTTokenQueryprune | OPTMustAssign | OPTAllowMayAssign | OPTModifierType | OPTNonCspableType | OPTNonRedirectableType ],
[ 'removeparam', OPTTokenQueryprune | OPTMustAssign | OPTAllowMayAssign | OPTModifierType | OPTNonCspableType | OPTNonRedirectableType ],
[ 'redirect', OPTTokenRedirect | OPTMustAssign | OPTAllowMayAssign | OPTModifierType ],
[ 'redirect-rule', OPTTokenRedirectRule | OPTMustAssign | OPTAllowMayAssign | OPTModifierType | OPTNonCspableType ],
[ 'script', OPTTokenScript | OPTCanNegate | OPTNetworkType | OPTModifiableType | OPTRedirectableType | OPTNonCspableType ],
Expand Down Expand Up @@ -2147,6 +2148,7 @@ Parser.netOptionTokenIds = new Map([
[ 'popunder', OPTTokenPopunder ],
[ 'popup', OPTTokenPopup ],
[ 'queryprune', OPTTokenQueryprune ],
[ 'removeparam', OPTTokenQueryprune ],
[ 'redirect', OPTTokenRedirect ],
[ 'redirect-rule', OPTTokenRedirectRule ],
[ 'script', OPTTokenScript ],
Expand Down
92 changes: 72 additions & 20 deletions src/js/static-net-filtering.js
Original file line number Diff line number Diff line change
Expand Up @@ -2628,7 +2628,7 @@ const FilterParser = class {
this.noTokenHash = urlTokenizer.noTokenHash;
this.reIsolateHostname = /^(\*?\.)?([^\x00-\x24\x26-\x2C\x2F\x3A-\x5E\x60\x7B-\x7F]+)(.*)/;
this.reBadCSP = /(?:=|;)\s*report-(?:to|uri)\b/;
this.reRegexToken = /[%0-9A-Za-z]+/g;
this.reToken = /[%0-9A-Za-z]+/g;
this.reRegexTokenAbort = /[\(\)\[\]]/;
this.reRegexBadPrefix = /(^|[^\\]\.|\\[%SDWsdw]|[^\\][()*+?[\\\]{}])$/;
this.reRegexBadSuffix = /^([^\\]\.|\\[%SDWsdw]|[()*+?[\]{}]|$)/;
Expand Down Expand Up @@ -3110,34 +3110,48 @@ const FilterParser = class {
// i.e. very common with a high probability of ending up as a miss,
// are not good. Avoid if possible. This has a significant positive
// impact on performance.
//
// For pattern-less queryprune filters, try to derive a pattern from
// the queryprune value.

makeToken() {
if ( this.pattern === '*' ) { return; }
if ( this.pattern === '*' ) {
if (
this.modifyType !== this.parser.OPTTokenQueryprune ||
this.makePatternFromQuerypruneValue() === false
) {
return;
}
}
if ( this.isRegex ) {
return this.extractTokenFromRegex();
}
const match = this.extractTokenFromPattern();
if ( match === null ) { return; }
this.token = match.token;
this.tokenHash = urlTokenizer.tokenHashFromString(this.token);
this.tokenBeg = match.pos;
this.extractTokenFromPattern();
}

// Note: a one-char token is better than a documented bad token.
extractTokenFromPattern() {
this.reToken.lastIndex = 0;
const pattern = this.pattern;
let bestMatch = null;
let bestBadness = 0x7FFFFFFF;
for ( const match of this.parser.patternTokens() ) {
const badness = match.token.length > 1
? this.badTokens.get(match.token) || 0
for (;;) {
const match = this.reToken.exec(pattern);
if ( match === null ) { break; }
const badness = match[0].length > 1
? this.badTokens.get(match[0]) || 0
: 1;
if ( badness === 0 ) { return match; }
if ( badness < bestBadness ) {
bestMatch = match;
if ( badness === 0 ) { break; }
bestBadness = badness;
}
}
return bestMatch;
if ( bestMatch !== null ) {
this.token = bestMatch[0];
this.tokenHash = urlTokenizer.tokenHashFromString(this.token);
this.tokenBeg = bestMatch.index;
}
}

// https://github.com/gorhill/uBlock/issues/2781
Expand All @@ -3147,15 +3161,16 @@ const FilterParser = class {
// Mind `\b` directives: `/\bads\b/` should result in token being `ads`,
// not `bads`.
extractTokenFromRegex() {
this.reRegexToken.lastIndex = 0;
const s = this.pattern;
this.reToken.lastIndex = 0;
const pattern = this.pattern;
let bestToken;
let bestBadness = 0x7FFFFFFF;
for (;;) {
const matches = this.reRegexToken.exec(s);
const matches = this.reToken.exec(pattern);
if ( matches === null ) { break; }
let token = matches[0];
let prefix = s.slice(0, matches.index);
let suffix = s.slice(this.reRegexToken.lastIndex);
let prefix = pattern.slice(0, matches.index);
let suffix = pattern.slice(this.reToken.lastIndex);
if (
this.reRegexTokenAbort.test(prefix) &&
this.reRegexTokenAbort.test(suffix)
Expand All @@ -3181,13 +3196,47 @@ const FilterParser = class {
? this.badTokens.get(token) || 0
: 1;
if ( badness < bestBadness ) {
this.token = token.toLowerCase();
this.tokenHash = urlTokenizer.tokenHashFromString(this.token);
this.tokenBeg = matches.index;
bestToken = token;
if ( badness === 0 ) { break; }
bestBadness = badness;
}
}
if ( bestToken !== undefined ) {
this.token = bestToken.toLowerCase();
this.tokenHash = urlTokenizer.tokenHashFromString(this.token);
}
}

makePatternFromQuerypruneValue() {
let pattern = this.modifyValue;
if ( pattern === '*' || pattern.charCodeAt(0) === 0x21 /* '!' */ ) {
return false;
}
if ( /^\w+$/.test(pattern) ) {
this.pattern = `${pattern}=`;
return true;
}
const reRegex = /^\/(.+)\/i?$/;
if ( reRegex.test(pattern) ) {
pattern = reRegex.exec(pattern)[1];
} else {
let prefix = '', suffix = '';
if ( pattern.startsWith('|') ) {
pattern = pattern.slice(1);
prefix = '\\b';
}
if ( pattern.endsWith('|') ) {
pattern = pattern.slice(0, -1);
suffix = '\\b';
}
if ( pattern.indexOf('|') !== -1 ) {
pattern = `(?:${pattern})`;
}
pattern = prefix + pattern + suffix;
}
this.pattern = pattern;
this.isRegex = true;
return true;
}

hasNoOptionUnits() {
Expand Down Expand Up @@ -4288,6 +4337,7 @@ FilterContainer.prototype.filterQuery = function(fctxt) {

FilterContainer.prototype.parseFilterPruneValue = function(modifier) {
const cache = {};
const reRegex = /^\/(.+)\/i?$/;
let retext = modifier.value;
if ( retext === '*' ) {
cache.all = true;
Expand All @@ -4296,6 +4346,8 @@ FilterContainer.prototype.parseFilterPruneValue = function(modifier) {
if ( cache.not ) { retext = retext.slice(1); }
if ( /^\w+$/.test(retext) ) {
retext = `^${retext}=`;
} else if ( reRegex.test(retext) ) {
retext = reRegex.exec(retext)[1];
} else {
if ( retext.startsWith('|') ) { retext = `^${retext.slice(1)}`; }
if ( retext.endsWith('|') ) { retext = `${retext.slice(0,-1)}$`; }
Expand Down

0 comments on commit 6ac09a2

Please sign in to comment.