diff --git a/platform/webext/vapi-webrequest.js b/platform/webext/vapi-webrequest.js
index 2caeb33e7bb51..c2551ffbc366a 100644
--- a/platform/webext/vapi-webrequest.js
+++ b/platform/webext/vapi-webrequest.js
@@ -29,7 +29,11 @@ vAPI.net = {
onBeforeRequest: {},
onBeforeMaybeSpuriousCSPReport: {},
onHeadersReceived: {},
- nativeCSPReportFiltering: true
+ nativeCSPReportFiltering: true,
+ webRequest: browser.webRequest,
+ canFilterResponseBody:
+ typeof browser.webRequest === 'object' &&
+ typeof browser.webRequest.filterResponseData === 'function'
};
/******************************************************************************/
diff --git a/src/background.html b/src/background.html
index d3ccd04992c7f..168e9ae2044d4 100644
--- a/src/background.html
+++ b/src/background.html
@@ -22,7 +22,10 @@
+
+
+
diff --git a/src/js/assets.js b/src/js/assets.js
index 659f83b0968b4..845ef68b47416 100644
--- a/src/js/assets.js
+++ b/src/js/assets.js
@@ -53,11 +53,10 @@ api.removeObserver = function(observer) {
};
var fireNotification = function(topic, details) {
- var result;
+ var result, r;
for ( var i = 0; i < observers.length; i++ ) {
- if ( observers[i](topic, details) === false ) {
- result = false;
- }
+ r = observers[i](topic, details);
+ if ( r !== undefined ) { result = r; }
}
return result;
};
@@ -955,7 +954,7 @@ var updateNext = function() {
fireNotification(
'before-asset-updated',
{ assetKey: assetKey, type: assetEntry.content }
- ) !== false
+ ) === true
) {
return assetKey;
}
diff --git a/src/js/background.js b/src/js/background.js
index c1a85de06250c..3d9a33018c3d1 100644
--- a/src/js/background.js
+++ b/src/js/background.js
@@ -95,6 +95,7 @@ var µBlock = (function() { // jshint ignore:line
// Features detection.
privacySettingsSupported: vAPI.browserSettings instanceof Object,
cloudStorageSupported: vAPI.cloud instanceof Object,
+ canFilterResponseBody: vAPI.net.canFilterResponseBody === true,
// https://github.com/chrisaljoudi/uBlock/issues/180
// Whitelist directives need to be loaded once the PSL is available
@@ -120,8 +121,8 @@ var µBlock = (function() { // jshint ignore:line
// read-only
systemSettings: {
- compiledMagic: 'vrgorlgelgws',
- selfieMagic: 'pxpclstriajk'
+ compiledMagic: 'puuijtkfpspv',
+ selfieMagic: 'puuijtkfpspv'
},
restoreBackupSettings: {
diff --git a/src/js/contentscript.js b/src/js/contentscript.js
index a231524af8233..1ba69ad5c1494 100644
--- a/src/js/contentscript.js
+++ b/src/js/contentscript.js
@@ -1379,20 +1379,9 @@ vAPI.domSurveyor = (function() {
// Library of resources is located at:
// https://github.com/gorhill/uBlock/blob/master/assets/ublock/resources.txt
- if ( cfeDetails.scripts ) {
- // Have the injected script tag remove itself when execution completes:
- // to keep DOM as clean as possible.
- var text = cfeDetails.scripts +
- "\n" +
- "(function() {\n" +
- " var c = document.currentScript,\n" +
- " p = c && c.parentNode;\n" +
- " if ( p ) {\n" +
- " p.removeChild(c);\n" +
- " }\n" +
- "})();";
- vAPI.injectScriptlet(document, text);
- vAPI.injectedScripts = text;
+ if ( response.scriptlets ) {
+ vAPI.injectScriptlet(document, response.scriptlets);
+ vAPI.injectedScripts = response.scriptlets;
}
if ( vAPI.domSurveyor instanceof Object ) {
@@ -1414,13 +1403,11 @@ vAPI.domSurveyor = (function() {
};
// This starts bootstrap process.
- var url = window.location.href;
vAPI.messaging.send(
'contentscript',
{
what: 'retrieveContentScriptParameters',
- pageURL: url,
- locationURL: url,
+ url: window.location.href,
isRootFrame: window === window.top
},
bootstrapPhase1
diff --git a/src/js/cosmetic-filtering.js b/src/js/cosmetic-filtering.js
index 0e21e8c46270b..2561b8c37f50b 100644
--- a/src/js/cosmetic-filtering.js
+++ b/src/js/cosmetic-filtering.js
@@ -19,9 +19,6 @@
Home: https://github.com/gorhill/uBlock
*/
-/* jshint bitwise: false */
-/* global punycode */
-
'use strict';
/******************************************************************************/
@@ -31,56 +28,6 @@
/******************************************************************************/
var µb = µBlock;
-
-/******************************************************************************/
-
-var isValidCSSSelector = (function() {
- var div = document.createElement('div'),
- matchesFn;
- // Keep in mind:
- // https://github.com/gorhill/uBlock/issues/693
- // https://github.com/gorhill/uBlock/issues/1955
- if ( div.matches instanceof Function ) {
- matchesFn = div.matches.bind(div);
- } else if ( div.mozMatchesSelector instanceof Function ) {
- matchesFn = div.mozMatchesSelector.bind(div);
- } else if ( div.webkitMatchesSelector instanceof Function ) {
- matchesFn = div.webkitMatchesSelector.bind(div);
- } else if ( div.msMatchesSelector instanceof Function ) {
- matchesFn = div.msMatchesSelector.bind(div);
- } else {
- matchesFn = div.querySelector.bind(div);
- }
- // https://github.com/gorhill/uBlock/issues/3111
- // Workaround until https://bugzilla.mozilla.org/show_bug.cgi?id=1406817
- // is fixed.
- try {
- matchesFn(':scope');
- } catch (ex) {
- matchesFn = div.querySelector.bind(div);
- }
- return function(s) {
- try {
- matchesFn(s + ', ' + s + ':not(#foo)');
- } catch (ex) {
- return false;
- }
- return true;
- };
-})();
-
-var reIsRegexLiteral = /^\/.+\/$/;
-
-var isBadRegex = function(s) {
- try {
- void new RegExp(s);
- } catch (ex) {
- isBadRegex.message = ex.toString();
- return true;
- }
- return false;
-};
-
var cosmeticSurveyingMissCountMax = parseInt(vAPI.localStorage.getItem('cosmeticSurveyingMissCountMax'), 10) || 15;
/******************************************************************************/
@@ -222,174 +169,6 @@ registerFilterClass(FilterBucket);
/******************************************************************************/
/******************************************************************************/
-var FilterParser = function() {
- this.prefix = this.suffix = '';
- this.unhide = 0;
- this.hostnames = [];
- this.invalid = false;
- this.cosmetic = true;
- this.reNeedHostname = /^(?:script:contains|script:inject|.+?:-abp-contains|.+?:-abp-has|.+?:contains|.+?:has|.+?:has-text|.+?:if|.+?:if-not|.+?:matches-css(?:-before|-after)?|.*?:xpath)\(.+\)$/;
-};
-
-/******************************************************************************/
-
-FilterParser.prototype.reset = function() {
- this.raw = '';
- this.prefix = this.suffix = '';
- this.unhide = 0;
- this.hostnames.length = 0;
- this.invalid = false;
- this.cosmetic = true;
- return this;
-};
-
-/******************************************************************************/
-
-FilterParser.prototype.parse = function(raw) {
- // important!
- this.reset();
-
- this.raw = raw;
-
- // Find the bounds of the anchor.
- var lpos = raw.indexOf('#');
- if ( lpos === -1 ) {
- this.cosmetic = false;
- return this;
- }
- var rpos = raw.indexOf('#', lpos + 1);
- if ( rpos === -1 ) {
- this.cosmetic = false;
- return this;
- }
-
- // Coarse-check that the anchor is valid.
- // `##`: l = 1
- // `#@#`, `#$#`, `#%#`, `#?#`: l = 2
- // `#@$#`, `#@%#`, `#@?#`: l = 3
- if ( (rpos - lpos) > 3 ) {
- this.cosmetic = false;
- return this;
- }
-
- // Find out type of cosmetic filter.
- // Exception filter?
- if ( raw.charCodeAt(lpos + 1) === 0x40 /* '@' */ ) {
- this.unhide = 1;
- }
-
- // https://github.com/gorhill/uBlock/issues/952
- // Find out whether we are dealing with an Adguard-specific cosmetic
- // filter, and if so, translate it if supported, or discard it if not
- // supported.
- var cCode = raw.charCodeAt(rpos - 1);
- if ( cCode !== 0x23 /* '#' */ && cCode !== 0x40 /* '@' */ ) {
- // We have an Adguard/ABP cosmetic filter if and only if the character
- // is `$`, `%` or `?`, otherwise it's not a cosmetic filter.
- if (
- cCode !== 0x24 /* '$' */ &&
- cCode !== 0x25 /* '%' */ &&
- cCode !== 0x3F /* '?' */
- ) {
- this.cosmetic = false;
- return this;
- }
- // Adguard's scriptlet injection: not supported.
- if ( cCode === 0x25 /* '%' */ ) {
- this.invalid = true;
- return this;
- }
- // Adguard's style injection: supported, but translate to uBO's format.
- if ( cCode === 0x24 /* '$' */ ) {
- raw = this.translateAdguardCSSInjectionFilter(raw);
- if ( raw === '' ) {
- this.invalid = true;
- return this;
- }
- }
- rpos = raw.indexOf('#', lpos + 1);
- }
-
- // Extract the hostname(s).
- if ( lpos !== 0 ) {
- this.prefix = raw.slice(0, lpos);
- }
-
- // Extract the selector.
- this.suffix = raw.slice(rpos + 1).trim();
- if ( this.suffix.length === 0 ) {
- this.cosmetic = false;
- return this;
- }
-
- // 2014-05-23:
- // https://github.com/gorhill/httpswitchboard/issues/260
- // Any sequence of `#` longer than one means the line is not a valid
- // cosmetic filter.
- if ( this.suffix.indexOf('##') !== -1 ) {
- this.cosmetic = false;
- return this;
- }
-
- // Normalize high-medium selectors: `href` is assumed to imply `a` tag. We
- // need to do this here in order to correctly avoid duplicates. The test
- // is designed to minimize overhead -- this is a low occurrence filter.
- if ( this.suffix.startsWith('[href^="', 1) ) {
- this.suffix = this.suffix.slice(1);
- }
-
- if ( this.prefix !== '' ) {
- this.hostnames = this.prefix.split(/\s*,\s*/);
- }
-
- // For some selectors, it is mandatory to have a hostname or entity:
- // ##script:contains(...)
- // ##script:inject(...)
- // ##.foo:-abp-contains(...)
- // ##.foo:-abp-has(...)
- // ##.foo:contains(...)
- // ##.foo:has(...)
- // ##.foo:has-text(...)
- // ##.foo:if(...)
- // ##.foo:if-not(...)
- // ##.foo:matches-css(...)
- // ##.foo:matches-css-after(...)
- // ##.foo:matches-css-before(...)
- // ##:xpath(...)
- if (
- this.hostnames.length === 0 &&
- this.unhide === 0 &&
- this.reNeedHostname.test(this.suffix)
- ) {
- this.invalid = true;
- return this;
- }
-
- return this;
-};
-
-/******************************************************************************/
-
-// Reference: https://adguard.com/en/filterrules.html#cssInjection
-
-FilterParser.prototype.translateAdguardCSSInjectionFilter = function(raw) {
- var matches = /^([^#]*)#(@?)\$#([^{]+)\{([^}]+)\}$/.exec(raw);
- if ( matches === null ) {
- return '';
- }
- // For now we do not allow generic CSS injections (prolly never).
- if ( matches[1] === '' && matches[2] !== '@' ) {
- return '';
- }
- return matches[1] +
- '#' + matches[2] + '#' +
- matches[3].trim() +
- ':style(' + matches[4].trim() + ')';
-};
-
-/******************************************************************************/
-/******************************************************************************/
-
var SelectorCacheEntry = function() {
this.reset();
};
@@ -538,17 +317,11 @@ SelectorCacheEntry.prototype.retrieve = function(type, out) {
/******************************************************************************/
/******************************************************************************/
-// Two Unicode characters:
-// T0HHHHHHH HHHHHHHHH
-// | | |
-// | | |
-// | | |
-// | | +-- bit 8-0 of FNV
-// | |
-// | +-- bit 15-9 of FNV
-// |
-// +-- filter type (0=hide 1=unhide)
-//
+// 0000HHHHHHHHHHHH
+// |
+// |
+// |
+// +-- bit 11-0 of FNV
var makeHash = function(token) {
// Ref: Given a URL, returns a unique 4-character long hash string
@@ -607,7 +380,6 @@ var makeHash = function(token) {
var FilterContainer = function() {
this.noDomainHash = '-';
- this.parser = new FilterParser();
this.reHasUnicode = /[^\x00-\x7F]/;
this.rePlainSelector = /^[#.][\w\\-]+/;
this.rePlainSelectorEscaped = /^[#.](?:\\[0-9A-Fa-f]+ |\\.|\w|-)+/;
@@ -615,8 +387,25 @@ var FilterContainer = function() {
this.reEscapeSequence = /\\([0-9A-Fa-f]+ |.)/g;
this.reSimpleHighGeneric1 = /^[a-z]*\[[^[]+]$/;
this.reHighMedium = /^\[href\^="https?:\/\/([^"]{8})[^"]*"\]$/;
- this.reScriptSelector = /^script:(contains|inject)\((.+)\)$/;
- this.punycode = punycode;
+ this.reNeedHostname = new RegExp([
+ '^',
+ '(?:',
+ [
+ 'script:contains',
+ '.+?:has',
+ '.+?:has-text',
+ '.+?:if',
+ '.+?:if-not',
+ '.+?:matches-css(?:-before|-after)?',
+ '.*?:xpath',
+ '.+?:-abp-contains', // ABP-specific for `:has-text`
+ '.+?:-abp-has', // ABP-specific for `:if`
+ '.+?:contains' // Adguard-specific for `:has-text`
+ ].join('|'),
+ ')',
+ '\\(.+\\)',
+ '$'
+ ].join(''));
this.selectorCache = new Map();
this.selectorCachePruneDelay = 10 * 60 * 1000; // 10 minutes
@@ -630,6 +419,9 @@ var FilterContainer = function() {
// generic exception filters
this.genericDonthideSet = new Set();
+ // TODO: Think about reusing µb.staticExtFilteringEngine.HostnameBasedDB
+ // for both specific and procedural filters. This would require some
+ // refactoring.
// hostname, entity-based filters
this.specificFilters = new Map();
this.proceduralFilters = new Map();
@@ -664,9 +456,6 @@ var FilterContainer = function() {
mru: new µb.MRUCache(16)
};
- this.userScripts = new Map();
- this.userScriptCache = new µb.MRUCache(32);
-
// Short-lived: content is valid only during one function call. These
// is to prevent repeated allocation/deallocation overheads -- the
// constructors/destructors of javascript Set/Map is assumed to be costlier
@@ -684,7 +473,6 @@ var FilterContainer = function() {
// Reset all, thus reducing to a minimum memory footprint of the context.
FilterContainer.prototype.reset = function() {
- this.parser.reset();
this.µburi = µb.URI;
this.frozen = false;
this.acceptedCount = 0;
@@ -720,12 +508,6 @@ FilterContainer.prototype.reset = function() {
this.highlyGeneric.complex.dict.clear();
this.highlyGeneric.complex.str = '';
this.highlyGeneric.complex.mru.reset();
-
- this.scriptTagFilters = {};
- this.scriptTagFilterCount = 0;
-
- this.userScripts.clear();
- this.userScriptCache.reset();
};
/******************************************************************************/
@@ -759,345 +541,9 @@ FilterContainer.prototype.freeze = function() {
this.highlyGeneric.simple.str = µb.arrayFrom(this.highlyGeneric.simple.dict).join(',\n');
this.highlyGeneric.complex.str = µb.arrayFrom(this.highlyGeneric.complex.dict).join(',\n');
- this.parser.reset();
- this.compileSelector.reset();
- this.compileProceduralSelector.reset();
this.frozen = true;
};
-/******************************************************************************/
-
-// https://github.com/chrisaljoudi/uBlock/issues/1004
-// Detect and report invalid CSS selectors.
-
-// Discard new ABP's `-abp-properties` directive until it is
-// implemented (if ever). Unlikely, see:
-// https://github.com/gorhill/uBlock/issues/1752
-
-// https://github.com/gorhill/uBlock/issues/2624
-// Convert Adguard's `-ext-has='...'` into uBO's `:has(...)`.
-
-FilterContainer.prototype.compileSelector = (function() {
- var reAfterBeforeSelector = /^(.+?)(::?after|::?before)$/,
- reStyleSelector = /^(.+?):style\((.+?)\)$/,
- reStyleBad = /url\([^)]+\)/,
- reExtendedSyntax = /\[-(?:abp|ext)-[a-z-]+=(['"])(?:.+?)(?:\1)\]/,
- reExtendedSyntaxParser = /\[-(?:abp|ext)-([a-z-]+)=(['"])(.+?)\2\]/,
- div = document.createElement('div');
-
- var normalizedExtendedSyntaxOperators = new Map([
- [ 'contains', ':has-text' ],
- [ 'has', ':if' ],
- [ 'matches-css', ':matches-css' ],
- [ 'matches-css-after', ':matches-css-after' ],
- [ 'matches-css-before', ':matches-css-before' ],
- ]);
-
- var isValidStyleProperty = function(cssText) {
- if ( reStyleBad.test(cssText) ) { return false; }
- div.style.cssText = cssText;
- if ( div.style.cssText === '' ) { return false; }
- div.style.cssText = '';
- return true;
- };
-
- var entryPoint = function(raw) {
- var extendedSyntax = reExtendedSyntax.test(raw);
- if ( isValidCSSSelector(raw) && extendedSyntax === false ) {
- return raw;
- }
-
- // We rarely reach this point -- majority of selectors are plain
- // CSS selectors.
-
- var matches, operator;
-
- // Supported Adguard/ABP advanced selector syntax: will translate into
- // uBO's syntax before further processing.
- // Mind unsupported advanced selector syntax, such as ABP's
- // `-abp-properties`.
- // Note: extended selector syntax has been deprecated in ABP, in favor
- // of the procedural one (i.e. `:operator(...)`). See
- // https://issues.adblockplus.org/ticket/5287
- if ( extendedSyntax ) {
- while ( (matches = reExtendedSyntaxParser.exec(raw)) !== null ) {
- operator = normalizedExtendedSyntaxOperators.get(matches[1]);
- if ( operator === undefined ) { return; }
- raw = raw.slice(0, matches.index) +
- operator + '(' + matches[3] + ')' +
- raw.slice(matches.index + matches[0].length);
- }
- return this.compileSelector(raw);
- }
-
- var selector = raw,
- pseudoclass, style;
-
- // `:style` selector?
- if ( (matches = reStyleSelector.exec(selector)) !== null ) {
- selector = matches[1];
- style = matches[2];
- }
-
- // https://github.com/gorhill/uBlock/issues/2448
- // :after- or :before-based selector?
- if ( (matches = reAfterBeforeSelector.exec(selector)) ) {
- selector = matches[1];
- pseudoclass = matches[2];
- }
-
- if ( style !== undefined || pseudoclass !== undefined ) {
- if ( isValidCSSSelector(selector) === false ) {
- return;
- }
- if ( pseudoclass !== undefined ) {
- selector += pseudoclass;
- }
- if ( style !== undefined ) {
- if ( isValidStyleProperty(style) === false ) { return; }
- return JSON.stringify({
- raw: raw,
- style: [ selector, style ]
- });
- }
- return JSON.stringify({
- raw: raw,
- pseudoclass: true
- });
- }
-
- // `script:` filter?
- if ( (matches = this.reScriptSelector.exec(raw)) !== null ) {
- // :inject
- if ( matches[1] === 'inject' ) {
- return raw;
- }
- // :contains
- if (
- reIsRegexLiteral.test(matches[2]) === false ||
- isBadRegex(matches[2].slice(1, -1)) === false
- ) {
- return raw;
- }
- }
-
- // Procedural selector?
- var compiled;
- if ( (compiled = this.compileProceduralSelector(raw)) ) {
- return compiled;
- }
-
- µb.logger.writeOne('', 'error', 'Cosmetic filtering – invalid filter: ' + raw);
- };
-
- entryPoint.reset = function() {
- };
-
- return entryPoint;
-})();
-
-/******************************************************************************/
-
-FilterContainer.prototype.compileProceduralSelector = (function() {
- var reOperatorParser = /(:(?:-abp-contains|-abp-has|contains|has|has-text|if|if-not|matches-css|matches-css-after|matches-css-before|xpath))\(.+\)$/,
- reFirstParentheses = /^\(*/,
- reLastParentheses = /\)*$/,
- reEscapeRegex = /[.*+?^${}()|[\]\\]/g,
- reNeedScope = /^\s*[+>~]/;
-
- var lastProceduralSelector = '',
- lastProceduralSelectorCompiled,
- regexToRawValue = new Map();
-
- var compileCSSSelector = function(s) {
- // https://github.com/AdguardTeam/ExtendedCss/issues/31#issuecomment-302391277
- // Prepend `:scope ` if needed.
- if ( reNeedScope.test(s) ) {
- s = ':scope ' + s;
- }
- if ( isValidCSSSelector(s) ) {
- return s;
- }
- };
-
- var compileText = function(s) {
- var reText;
- if ( reIsRegexLiteral.test(s) ) {
- reText = s.slice(1, -1);
- if ( isBadRegex(reText) ) { return; }
- } else {
- reText = s.replace(reEscapeRegex, '\\$&');
- regexToRawValue.set(reText, s);
- }
- return reText;
- };
-
- var compileCSSDeclaration = function(s) {
- var name, value, reText,
- pos = s.indexOf(':');
- if ( pos === -1 ) { return; }
- name = s.slice(0, pos).trim();
- value = s.slice(pos + 1).trim();
- if ( reIsRegexLiteral.test(value) ) {
- reText = value.slice(1, -1);
- if ( isBadRegex(reText) ) { return; }
- } else {
- reText = '^' + value.replace(reEscapeRegex, '\\$&') + '$';
- regexToRawValue.set(reText, value);
- }
- return { name: name, value: reText };
- };
-
- var compileConditionalSelector = function(s) {
- // https://github.com/AdguardTeam/ExtendedCss/issues/31#issuecomment-302391277
- // Prepend `:scope ` if needed.
- if ( reNeedScope.test(s) ) {
- s = ':scope ' + s;
- }
- return compile(s);
- };
-
- var compileXpathExpression = function(s) {
- var dummy;
- try {
- dummy = document.createExpression(s, null) instanceof XPathExpression;
- } catch (e) {
- return;
- }
- return s;
- };
-
- // https://github.com/gorhill/uBlock/issues/2793
- var normalizedOperators = new Map([
- [ ':-abp-contains', ':has-text' ],
- [ ':-abp-has', ':if' ],
- [ ':contains', ':has-text' ]
- ]);
-
- var compileArgument = new Map([
- [ ':has', compileCSSSelector ],
- [ ':has-text', compileText ],
- [ ':if', compileConditionalSelector ],
- [ ':if-not', compileConditionalSelector ],
- [ ':matches-css', compileCSSDeclaration ],
- [ ':matches-css-after', compileCSSDeclaration ],
- [ ':matches-css-before', compileCSSDeclaration ],
- [ ':xpath', compileXpathExpression ]
- ]);
-
- // https://github.com/gorhill/uBlock/issues/2793#issuecomment-333269387
- // - Normalize (somewhat) the stringified version of procedural cosmetic
- // filters -- this increase the likelihood of detecting duplicates given
- // that uBO is able to understand syntax specific to other blockers.
- // The normalized string version is what is reported in the logger, by
- // design.
- var decompile = function(compiled) {
- var raw = [ compiled.selector ],
- tasks = compiled.tasks,
- value;
- if ( Array.isArray(tasks) ) {
- for ( var i = 0, n = tasks.length, task; i < n; i++ ) {
- task = tasks[i];
- switch ( task[0] ) {
- case ':has':
- case ':xpath':
- raw.push(task[0], '(', task[1], ')');
- break;
- case ':has-text':
- value = regexToRawValue.get(task[1]);
- if ( value === undefined ) {
- value = '/' + task[1] + '/';
- }
- raw.push(task[0], '(', value, ')');
- break;
- case ':matches-css':
- case ':matches-css-after':
- case ':matches-css-before':
- value = regexToRawValue.get(task[1].value);
- if ( value === undefined ) {
- value = '/' + task[1].value + '/';
- }
- raw.push(task[0], '(', task[1].name, ': ', value, ')');
- break;
- case ':if':
- case ':if-not':
- raw.push(task[0], '(', decompile(task[1]), ')');
- break;
- }
- }
- }
- return raw.join('');
- };
-
- var compile = function(raw) {
- var matches = reOperatorParser.exec(raw);
- if ( matches === null ) {
- if ( isValidCSSSelector(raw) ) { return { selector: raw }; }
- return;
- }
- var tasks = [],
- firstOperand = raw.slice(0, matches.index),
- currentOperator = matches[1],
- selector = raw.slice(matches.index + currentOperator.length),
- currentArgument = '', nextOperand, nextOperator,
- depth = 0, opening, closing;
- if ( firstOperand !== '' && isValidCSSSelector(firstOperand) === false ) { return; }
- for (;;) {
- matches = reOperatorParser.exec(selector);
- if ( matches !== null ) {
- nextOperand = selector.slice(0, matches.index);
- nextOperator = matches[1];
- } else {
- nextOperand = selector;
- nextOperator = '';
- }
- opening = reFirstParentheses.exec(nextOperand)[0].length;
- closing = reLastParentheses.exec(nextOperand)[0].length;
- if ( opening > closing ) {
- if ( depth === 0 ) { currentArgument = ''; }
- depth += 1;
- } else if ( closing > opening && depth > 0 ) {
- depth -= 1;
- if ( depth === 0 ) { nextOperand = currentArgument + nextOperand; }
- }
- if ( depth !== 0 ) {
- currentArgument += nextOperand + nextOperator;
- } else {
- currentOperator = normalizedOperators.get(currentOperator) || currentOperator;
- currentArgument = compileArgument.get(currentOperator)(nextOperand.slice(1, -1));
- if ( currentArgument === undefined ) { return; }
- tasks.push([ currentOperator, currentArgument ]);
- currentOperator = nextOperator;
- }
- if ( nextOperator === '' ) { break; }
- selector = selector.slice(matches.index + nextOperator.length);
- }
- if ( tasks.length === 0 || depth !== 0 ) { return; }
- return { selector: firstOperand, tasks: tasks };
- };
-
- var entryPoint = function(raw) {
- if ( raw === lastProceduralSelector ) {
- return lastProceduralSelectorCompiled;
- }
- lastProceduralSelector = raw;
- var compiled = compile(raw);
- if ( compiled !== undefined ) {
- compiled.raw = decompile(compiled);
- compiled = JSON.stringify(compiled);
- }
- lastProceduralSelectorCompiled = compiled;
- return compiled;
- };
-
- entryPoint.reset = function() {
- regexToRawValue = new Map();
- lastProceduralSelector = '';
- lastProceduralSelectorCompiled = undefined;
- };
-
- return entryPoint;
-})();
/******************************************************************************/
@@ -1135,17 +581,12 @@ FilterContainer.prototype.keyFromSelector = function(selector) {
/******************************************************************************/
-FilterContainer.prototype.compile = function(s, writer) {
- var parsed = this.parser.parse(s);
- if ( parsed.cosmetic === false ) {
- return false;
- }
- if ( parsed.invalid ) {
- return true;
- }
+FilterContainer.prototype.compile = function(parsed, writer) {
+ // 1000 = cosmetic filtering
+ writer.select(1000);
- var hostnames = parsed.hostnames;
- var i = hostnames.length;
+ var hostnames = parsed.hostnames,
+ i = hostnames.length;
if ( i === 0 ) {
this.compileGenericSelector(parsed, writer);
return true;
@@ -1155,9 +596,8 @@ FilterContainer.prototype.compile = function(s, writer) {
// Negated hostname means the filter applies to all non-negated hostnames
// of same filter OR globally if there is no non-negated hostnames.
var applyGlobally = true;
- var hostname;
while ( i-- ) {
- hostname = hostnames[i];
+ var hostname = hostnames[i];
if ( hostname.startsWith('~') === false ) {
applyGlobally = false;
}
@@ -1173,7 +613,7 @@ FilterContainer.prototype.compile = function(s, writer) {
/******************************************************************************/
FilterContainer.prototype.compileGenericSelector = function(parsed, writer) {
- if ( parsed.unhide === 0 ) {
+ if ( parsed.exception === false ) {
this.compileGenericHideSelector(parsed, writer);
} else {
this.compileGenericUnhideSelector(parsed, writer);
@@ -1183,6 +623,20 @@ FilterContainer.prototype.compileGenericSelector = function(parsed, writer) {
/******************************************************************************/
FilterContainer.prototype.compileGenericHideSelector = function(parsed, writer) {
+ // For some selectors, it is mandatory to have a hostname or entity:
+ // ##.foo:-abp-contains(...)
+ // ##.foo:-abp-has(...)
+ // ##.foo:contains(...)
+ // ##.foo:has(...)
+ // ##.foo:has-text(...)
+ // ##.foo:if(...)
+ // ##.foo:if-not(...)
+ // ##.foo:matches-css(...)
+ // ##.foo:matches-css-after(...)
+ // ##.foo:matches-css-before(...)
+ // ##:xpath(...)
+ if ( this.reNeedHostname.test(selector) ) { return; }
+
var selector = parsed.suffix,
type = selector.charCodeAt(0),
key;
@@ -1198,7 +652,7 @@ FilterContainer.prototype.compileGenericHideSelector = function(parsed, writer)
return;
}
// Complex selector-based CSS rule.
- if ( this.compileSelector(selector) !== undefined ) {
+ if ( µb.staticExtFilteringEngine.compileSelector(selector) !== undefined ) {
writer.push([ 1 /* lg+ */, key.slice(1), selector ]);
}
return;
@@ -1215,13 +669,13 @@ FilterContainer.prototype.compileGenericHideSelector = function(parsed, writer)
return;
}
// Complex selector-based CSS rule.
- if ( this.compileSelector(selector) !== undefined ) {
+ if ( µb.staticExtFilteringEngine.compileSelector(selector) !== undefined ) {
writer.push([ 3 /* lg+ */, key.slice(1), selector ]);
}
return;
}
- var compiled = this.compileSelector(selector);
+ var compiled = µb.staticExtFilteringEngine.compileSelector(selector);
if ( compiled === undefined ) { return; }
// TODO: Detect and error on procedural cosmetic filters.
@@ -1259,18 +713,12 @@ FilterContainer.prototype.compileGenericHideSelector = function(parsed, writer)
/******************************************************************************/
-FilterContainer.prototype.compileGenericUnhideSelector = function(parsed, writer) {
- var selector = parsed.suffix;
-
- // script:contains(...)
- // script:inject(...)
- if ( this.reScriptSelector.test(selector) ) {
- writer.push([ 6 /* js */, '!', '', selector ]);
- return;
- }
-
+FilterContainer.prototype.compileGenericUnhideSelector = function(
+ parsed,
+ writer
+) {
// Procedural cosmetic filters are acceptable as generic exception filters.
- var compiled = this.compileSelector(selector);
+ var compiled = µb.staticExtFilteringEngine.compileSelector(parsed.suffix);
if ( compiled === undefined ) { return; }
// https://github.com/chrisaljoudi/uBlock/issues/497
@@ -1281,37 +729,24 @@ FilterContainer.prototype.compileGenericUnhideSelector = function(parsed, writer
/******************************************************************************/
-FilterContainer.prototype.compileHostnameSelector = function(hostname, parsed, writer) {
+FilterContainer.prototype.compileHostnameSelector = function(
+ hostname,
+ parsed,
+ writer
+) {
// https://github.com/chrisaljoudi/uBlock/issues/145
- var unhide = parsed.unhide;
+ var unhide = parsed.exception ? 1 : 0;
if ( hostname.startsWith('~') ) {
hostname = hostname.slice(1);
unhide ^= 1;
}
- // punycode if needed
- if ( this.reHasUnicode.test(hostname) ) {
- hostname = this.punycode.toASCII(hostname);
- }
+ var compiled = µb.staticExtFilteringEngine.compileSelector(parsed.suffix);
+ if ( compiled === undefined ) { return; }
- var selector = parsed.suffix,
- domain = this.µburi.domainFromHostname(hostname),
+ var domain = this.µburi.domainFromHostname(hostname),
hash;
- // script:contains(...)
- // script:inject(...)
- if ( this.reScriptSelector.test(selector) ) {
- hash = domain !== '' ? domain : this.noDomainHash;
- if ( unhide ) {
- hash = '!' + hash;
- }
- writer.push([ 6 /* js */, hash, hostname, selector ]);
- return;
- }
-
- var compiled = this.compileSelector(selector);
- if ( compiled === undefined ) { return; }
-
// https://github.com/chrisaljoudi/uBlock/issues/188
// If not a real domain as per PSL, assign a synthetic one
if ( hostname.endsWith('.*') === false ) {
@@ -1319,7 +754,7 @@ FilterContainer.prototype.compileHostnameSelector = function(hostname, parsed, w
} else {
hash = makeHash(hostname);
}
- if ( unhide ) {
+ if ( unhide === 1 ) {
hash = '!' + hash;
}
@@ -1336,23 +771,22 @@ FilterContainer.prototype.compileHostnameSelector = function(hostname, parsed, w
/******************************************************************************/
-FilterContainer.prototype.fromCompiledContent = function(
- reader,
- skipGenericCosmetic,
- skipCosmetic
-) {
- if ( skipCosmetic ) {
+FilterContainer.prototype.fromCompiledContent = function(reader, options) {
+ if ( options.skipCosmetic ) {
this.skipCompiledContent(reader);
return;
}
- if ( skipGenericCosmetic ) {
+ if ( options.skipGenericCosmetic ) {
this.skipGenericCompiledContent(reader);
return;
}
var fingerprint, args, db, filter, bucket;
- while ( reader.next() === true ) {
+ // 1000 = cosmetic filtering
+ reader.select(1000);
+
+ while ( reader.next() ) {
this.acceptedCount += 1;
fingerprint = reader.fingerprint();
if ( this.duplicateBuster.has(fingerprint) ) {
@@ -1410,12 +844,6 @@ FilterContainer.prototype.fromCompiledContent = function(
this.highlyGeneric.complex.dict.add(args[1]);
break;
- // js, hash, example.com, script:contains(...)
- // js, hash, example.com, script:inject(...)
- case 6:
- this.createScriptFilter(args);
- break;
-
// https://github.com/chrisaljoudi/uBlock/issues/497
// Generic exception filters: expected to be a rare occurrence.
// #@#.tweet
@@ -1451,7 +879,10 @@ FilterContainer.prototype.fromCompiledContent = function(
FilterContainer.prototype.skipGenericCompiledContent = function(reader) {
var fingerprint, args, db, filter, bucket;
- while ( reader.next() === true ) {
+ // 1000 = cosmetic filtering
+ reader.select(1000);
+
+ while ( reader.next() ) {
this.acceptedCount += 1;
fingerprint = reader.fingerprint();
if ( this.duplicateBuster.has(fingerprint) ) {
@@ -1463,13 +894,6 @@ FilterContainer.prototype.skipGenericCompiledContent = function(reader) {
switch ( args[0] ) {
- // js, hash, example.com, script:contains(...)
- // js, hash, example.com, script:inject(...)
- case 6:
- this.duplicateBuster.add(fingerprint);
- this.createScriptFilter(args);
- break;
-
// https://github.com/chrisaljoudi/uBlock/issues/497
// Generic exception filters: expected to be a rare occurrence.
case 7:
@@ -1504,268 +928,17 @@ FilterContainer.prototype.skipGenericCompiledContent = function(reader) {
/******************************************************************************/
FilterContainer.prototype.skipCompiledContent = function(reader) {
- var fingerprint, args;
+ // 1000 = cosmetic filtering
+ reader.select(1000);
- while ( reader.next() === true ) {
+ while ( reader.next() ) {
this.acceptedCount += 1;
-
- args = reader.args();
-
- // js, hash, example.com, script:contains(...)
- // js, hash, example.com, script:inject(...)
- if ( args[0] === 6 ) {
- fingerprint = reader.fingerprint();
- if ( this.duplicateBuster.has(fingerprint) === false ) {
- this.duplicateBuster.add(fingerprint);
- this.createScriptFilter(args);
- }
- continue;
- }
-
this.discardedCount += 1;
}
};
/******************************************************************************/
-FilterContainer.prototype.createScriptFilter = function(args) {
- if ( args[3].startsWith('script:inject') ) {
- return this.createUserScriptRule(args);
- }
- if ( args[3].startsWith('script:contains') ) {
- return this.createScriptTagFilter(args);
- }
-};
-
-/******************************************************************************/
-
-// 0123456789012345678901
-// script:contains(token)
-// ^ ^
-// 16 -1
-
-FilterContainer.prototype.createScriptTagFilter = function(args) {
- var hostname = args[2],
- token = args[3].slice(16, -1);
- token = token.startsWith('/') && token.endsWith('/')
- ? token.slice(1, -1)
- : token.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
-
- if ( this.scriptTagFilters.hasOwnProperty(hostname) ) {
- this.scriptTagFilters[hostname] += '|' + token;
- } else {
- this.scriptTagFilters[hostname] = token;
- }
-
- this.scriptTagFilterCount += 1;
-};
-
-/******************************************************************************/
-
-FilterContainer.prototype.retrieveScriptTagHostnames = function() {
- return Object.keys(this.scriptTagFilters);
-};
-
-/******************************************************************************/
-
-FilterContainer.prototype.retrieveScriptTagRegex = function(domain, hostname) {
- if ( this.scriptTagFilterCount === 0 ) {
- return;
- }
- var out = [], hn = hostname, pos;
-
- // Hostname-based
- for (;;) {
- if ( this.scriptTagFilters.hasOwnProperty(hn) ) {
- out.push(this.scriptTagFilters[hn]);
- }
- if ( hn === domain ) {
- break;
- }
- pos = hn.indexOf('.');
- if ( pos === -1 ) {
- break;
- }
- hn = hn.slice(pos + 1);
- }
-
- // Entity-based
- pos = domain.indexOf('.');
- if ( pos !== -1 ) {
- hn = domain.slice(0, pos) + '.*';
- if ( this.scriptTagFilters.hasOwnProperty(hn) ) {
- out.push(this.scriptTagFilters[hn]);
- }
- }
- if ( out.length !== 0 ) {
- return out.join('|');
- }
-};
-
-/******************************************************************************/
-
-// userScripts{hash} => FilterHostname | FilterBucket
-
-FilterContainer.prototype.createUserScriptRule = function(args) {
- var hash = args[1],
- filter = new FilterHostname(args[3].slice(14, -1), args[2]);
- var bucket = this.userScripts.get(hash);
- if ( bucket === undefined ) {
- this.userScripts.set(hash, filter);
- } else if ( bucket instanceof FilterBucket ) {
- bucket.add(filter);
- } else {
- this.userScripts.set(hash, new FilterBucket(bucket, filter));
- }
-};
-
-/******************************************************************************/
-
-// https://github.com/gorhill/uBlock/issues/1954
-
-// 01234567890123456789
-// script:inject(token[, arg[, ...]])
-// ^ ^
-// 14 -1
-
-FilterContainer.prototype.retrieveUserScripts = function(
- domain,
- hostname,
- details
-) {
- if ( this.userScripts.size === 0 ) { return; }
- if ( µb.hiddenSettings.ignoreScriptInjectFilters === true ) { return; }
-
- var reng = µb.redirectEngine;
- if ( !reng ) { return; }
-
- this.mapRegister0.clear();
-
- var toInject = this.mapRegister0,
- pos = domain.indexOf('.'),
- entity = pos !== -1 ? domain.slice(0, pos) + '.*' : '';
-
- // Implicit
- var hn = hostname;
- for (;;) {
- this._lookupUserScript(hn + '.js', reng, toInject);
- if ( hn === domain ) { break; }
- pos = hn.indexOf('.');
- if ( pos === -1 ) { break; }
- hn = hn.slice(pos + 1);
- }
- if ( entity !== '' ) {
- this._lookupUserScript(entity + '.js', reng, toInject);
- }
-
- // Explicit (hash is domain).
- var selectors = new Set(),
- bucket;
- if ( (bucket = this.userScripts.get(domain)) ) {
- bucket.retrieve(hostname, selectors);
- }
- if ( entity !== '' && (bucket = this.userScripts.get(entity)) ) {
- bucket.retrieve(entity, selectors);
- }
- for ( var selector of selectors ) {
- this._lookupUserScript(selector, reng, toInject);
- }
-
- if ( toInject.size === 0 ) { return; }
-
- // https://github.com/gorhill/uBlock/issues/2835
- // Do not inject scriptlets if the site is under an `allow` rule.
- if (
- µb.userSettings.advancedUserEnabled === true &&
- µb.sessionFirewall.evaluateCellZY(hostname, hostname, '*') === 2
- ) {
- return;
- }
-
- // Exceptions should be rare, so we check for exception only if there are
- // scriptlets returned.
- var exceptions = new Set();
- if ( (bucket = this.userScripts.get('!' + domain)) ) {
- bucket.retrieve(hostname, exceptions);
- }
- if ( entity !== '' && (bucket = this.userScripts.get('!' + entity)) ) {
- bucket.retrieve(hostname, exceptions);
- }
-
- // Return an array of scriptlets, and log results if needed.
- var out = [],
- logger = µb.logger.isEnabled() ? µb.logger : null,
- isException;
-
- for ( var entry of toInject ) {
- if ( (isException = exceptions.has(entry[0])) === false ) {
- out.push(entry[1]);
- }
- if ( logger === null ) { continue; }
- logger.writeOne(
- details.tabId,
- 'cosmetic',
- {
- source: 'cosmetic',
- raw: (isException ? '#@#' : '##') + 'script:inject(' + entry[0] + ')'
- },
- 'dom',
- details.locationURL,
- null,
- hostname
- );
- }
-
- return out.join('\n');
-};
-
-FilterContainer.prototype._lookupUserScript = function(raw, reng, toInject) {
- if ( toInject.has(raw) ) { return; }
- if ( this.userScriptCache.resetTime < reng.modifyTime ) {
- this.userScriptCache.reset();
- }
- var content = this.userScriptCache.lookup(raw);
- if ( content === undefined ) {
- var token, args,
- pos = raw.indexOf(',');
- if ( pos === -1 ) {
- token = raw;
- } else {
- token = raw.slice(0, pos).trim();
- args = raw.slice(pos + 1).trim();
- }
- content = reng.resourceContentFromName(token, 'application/javascript');
- if ( !content ) { return; }
- if ( args ) {
- content = this._fillupUserScript(content, args);
- if ( !content ) { return; }
- }
- this.userScriptCache.add(raw, content);
- }
- toInject.set(raw, content);
-};
-
-// Fill template placeholders. Return falsy if:
-// - At least one argument contains anything else than /\w/ and `.`
-
-FilterContainer.prototype._fillupUserScript = function(content, args) {
- var i = 1,
- pos, arg;
- while ( args !== '' ) {
- pos = args.indexOf(',');
- if ( pos === -1 ) { pos = args.length; }
- arg = args.slice(0, pos).trim().replace(this._reEscapeScriptArg, '\\$&');
- content = content.replace('{{' + i + '}}', arg);
- args = args.slice(pos + 1).trim();
- i++;
- }
- return content;
-};
-
-FilterContainer.prototype._reEscapeScriptArg = /[\\'"]/g;
-
-/******************************************************************************/
-
FilterContainer.prototype.toSelfie = function() {
var selfieFromMap = function(map) {
var selfie = [];
@@ -1788,10 +961,7 @@ FilterContainer.prototype.toSelfie = function() {
lowlyGenericCCL: µb.arrayFrom(this.lowlyGeneric.cl.complex),
highSimpleGenericHideArray: µb.arrayFrom(this.highlyGeneric.simple.dict),
highComplexGenericHideArray: µb.arrayFrom(this.highlyGeneric.complex.dict),
- genericDonthideArray: µb.arrayFrom(this.genericDonthideSet),
- scriptTagFilters: this.scriptTagFilters,
- scriptTagFilterCount: this.scriptTagFilterCount,
- userScripts: selfieFromMap(this.userScripts)
+ genericDonthideArray: µb.arrayFrom(this.genericDonthideSet)
};
};
@@ -1823,9 +993,6 @@ FilterContainer.prototype.fromSelfie = function(selfie) {
this.highlyGeneric.complex.dict = new Set(selfie.highComplexGenericHideArray);
this.highlyGeneric.complex.str = selfie.highComplexGenericHideArray.join(',\n');
this.genericDonthideSet = new Set(selfie.genericDonthideArray);
- this.scriptTagFilters = selfie.scriptTagFilters;
- this.scriptTagFilterCount = selfie.scriptTagFilterCount;
- this.userScripts = mapFromSelfie(selfie.userScripts);
this.frozen = true;
};
@@ -2055,14 +1222,10 @@ FilterContainer.prototype.retrieveDomainSelectors = function(
request,
options
) {
- if ( !request.locationURL ) { return; }
-
//console.time('cosmeticFilteringEngine.retrieveDomainSelectors');
- var hostname = this.µburi.hostnameFromURI(request.locationURL),
- domain = this.µburi.domainFromHostname(hostname) || hostname,
- pos = domain.indexOf('.'),
- entity = pos === -1 ? '' : domain.slice(0, pos - domain.length) + '.*',
+ var hostname = request.hostname,
+ entity = request.entity,
cacheEntry = this.selectorCache.get(hostname),
entry;
@@ -2076,8 +1239,7 @@ FilterContainer.prototype.retrieveDomainSelectors = function(
var out = {
ready: this.frozen,
hostname: hostname,
- domain: domain,
- entity: entity,
+ domain: request.domain,
declarativeFilters: [],
exceptionFilters: [],
hideNodeAttr: this.randomAlphaToken(),
@@ -2087,12 +1249,11 @@ FilterContainer.prototype.retrieveDomainSelectors = function(
injectedHideFilters: '',
networkFilters: '',
noDOMSurveying: this.hasGenericHide === false,
- proceduralFilters: [],
- scripts: undefined
+ proceduralFilters: []
};
if ( options.noCosmeticFiltering !== true ) {
- var domainHash = makeHash(domain),
+ var domainHash = makeHash(request.domain),
entityHash = entity !== '' ? makeHash(entity) : undefined,
exception, bucket;
@@ -2228,9 +1389,6 @@ FilterContainer.prototype.retrieveDomainSelectors = function(
this.setRegister2.clear();
}
- // Scriptlet injection.
- out.scripts = this.retrieveUserScripts(domain, hostname, request);
-
// CSS selectors for collapsible blocked elements
if ( cacheEntry ) {
var networkFilters = [];
diff --git a/src/js/html-filtering.js b/src/js/html-filtering.js
new file mode 100644
index 0000000000000..890c874d991f7
--- /dev/null
+++ b/src/js/html-filtering.js
@@ -0,0 +1,357 @@
+/*******************************************************************************
+
+ uBlock Origin - a browser extension to block requests.
+ Copyright (C) 2017 Raymond Hill
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see {http://www.gnu.org/licenses/}.
+
+ Home: https://github.com/gorhill/uBlock
+*/
+
+'use strict';
+
+/******************************************************************************/
+
+µBlock.htmlFilteringEngine = (function() {
+ var api = {};
+
+ var µb = µBlock,
+ filterDB = new µb.staticExtFilteringEngine.HostnameBasedDB(),
+ pselectors = new Map(),
+ duplicates = new Set(),
+ docRegister, loggerRegister;
+
+ var PSelectorHasTask = function(task) {
+ this.selector = task[1];
+ };
+ PSelectorHasTask.prototype.exec = function(input) {
+ var output = [];
+ for ( var node of input ) {
+ if ( node.querySelector(this.selector) !== null ) {
+ output.push(node);
+ }
+ }
+ return output;
+ };
+
+ var PSelectorHasTextTask = function(task) {
+ this.needle = new RegExp(task[1]);
+ };
+ PSelectorHasTextTask.prototype.exec = function(input) {
+ var output = [];
+ for ( var node of input ) {
+ if ( this.needle.test(node.textContent) ) {
+ output.push(node);
+ }
+ }
+ return output;
+ };
+
+ var PSelectorIfTask = function(task) {
+ this.pselector = new PSelector(task[1]);
+ };
+ PSelectorIfTask.prototype.target = true;
+ PSelectorIfTask.prototype.exec = function(input) {
+ var output = [];
+ for ( var node of input ) {
+ if ( this.pselector.test(node) === this.target ) {
+ output.push(node);
+ }
+ }
+ return output;
+ };
+
+ var PSelectorIfNotTask = function(task) {
+ PSelectorIfTask.call(this, task);
+ this.target = false;
+ };
+ PSelectorIfNotTask.prototype = Object.create(PSelectorIfTask.prototype);
+ PSelectorIfNotTask.prototype.constructor = PSelectorIfNotTask;
+
+ var PSelectorXpathTask = function(task) {
+ this.xpe = task[1];
+ };
+ PSelectorXpathTask.prototype.exec = function(input) {
+ var output = [],
+ xpe = docRegister.createExpression(this.xpe, null),
+ xpr = null;
+ for ( var node of input ) {
+ xpr = xpe.evaluate(
+ node,
+ XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE,
+ xpr
+ );
+ var j = xpr.snapshotLength;
+ while ( j-- ) {
+ node = xpr.snapshotItem(j);
+ if ( node.nodeType === 1 ) {
+ output.push(node);
+ }
+ }
+ }
+ return output;
+ };
+
+ var PSelector = function(o) {
+ if ( PSelector.prototype.operatorToTaskMap === undefined ) {
+ PSelector.prototype.operatorToTaskMap = new Map([
+ [ ':has', PSelectorHasTask ],
+ [ ':has-text', PSelectorHasTextTask ],
+ [ ':if', PSelectorIfTask ],
+ [ ':if-not', PSelectorIfNotTask ],
+ [ ':xpath', PSelectorXpathTask ]
+ ]);
+ }
+ this.invalid = false;
+ this.raw = o.raw;
+ this.selector = o.selector;
+ this.tasks = [];
+ var tasks = o.tasks;
+ if ( !tasks ) { return; }
+ for ( var task of tasks ) {
+ var ctor = this.operatorToTaskMap.get(task[0]);
+ if ( ctor === undefined ) {
+ this.invalid = true;
+ break;
+ }
+ this.tasks.push(new ctor(task));
+ }
+ };
+ PSelector.prototype.operatorToTaskMap = undefined;
+ PSelector.prototype.prime = function(input) {
+ var root = input || docRegister;
+ if ( this.selector !== '' ) {
+ return root.querySelectorAll(this.selector);
+ }
+ return [ root ];
+ };
+ PSelector.prototype.exec = function(input) {
+ if ( this.invalid ) { return; }
+ var nodes = this.prime(input);
+ for ( var task of this.tasks ) {
+ if ( nodes.length === 0 ) { break; }
+ nodes = task.exec(nodes);
+ }
+ return nodes;
+ };
+
+ var logOne = function(details, selector) {
+ loggerRegister.writeOne(
+ details.tabId,
+ 'cosmetic',
+ { source: 'cosmetic', raw: '##^' + selector },
+ 'dom',
+ details.url,
+ null,
+ details.hostname
+ );
+ };
+
+ var applyProceduralSelector = function(details, selector) {
+ var pselector = pselectors.get(selector);
+ if ( pselector === undefined ) {
+ pselector = new PSelector(JSON.parse(selector));
+ pselectors.set(selector, pselector);
+ }
+ var nodes = pselector.exec(),
+ i = nodes.length,
+ modified = false;
+ while ( i-- ) {
+ var node = nodes[i];
+ if ( node.parentNode !== null ) {
+ node.parentNode.removeChild(node);
+ modified = true;
+ }
+ }
+ if ( modified && loggerRegister.isEnabled() ) {
+ logOne(details, pselector.raw);
+ }
+ return modified;
+ };
+
+ var applyCSSSelector = function(details, selector) {
+ var nodes = docRegister.querySelectorAll(selector),
+ i = nodes.length,
+ modified = false;
+ while ( i-- ) {
+ var node = nodes[i];
+ if ( node.parentNode !== null ) {
+ node.parentNode.removeChild(node);
+ modified = true;
+ }
+ }
+ if ( modified && loggerRegister.isEnabled() ) {
+ logOne(details, selector);
+ }
+ return modified;
+ };
+
+ api.reset = function() {
+ filterDB.clear();
+ pselectors.clear();
+ duplicates.clear();
+ };
+
+ api.freeze = function() {
+ duplicates.clear();
+ };
+
+ api.compile = function(parsed, writer) {
+ var selector = parsed.suffix.slice(1).trim(),
+ compiled = µb.staticExtFilteringEngine.compileSelector(selector);
+ if ( compiled === undefined ) { return; }
+
+ // 1002 = html filtering
+ writer.select(1002);
+
+ // TODO: Mind negated hostnames, they are currently discarded.
+
+ for ( var hostname of parsed.hostnames ) {
+ if ( hostname.charCodeAt(0) === 0x7E /* '~' */ ) { continue; }
+ var domain = µb.URI.domainFromHostname(hostname);
+ writer.push([
+ compiled.charCodeAt(0) !== 0x7B /* '{' */ ? 64 : 65,
+ parsed.exception ? '!' + domain : domain,
+ hostname,
+ compiled
+ ]);
+ }
+ };
+
+ api.fromCompiledContent = function(reader) {
+ // Don't bother loading filters if stream filtering is not supported.
+ //if ( µb.canFilterResponseBody === false ) { return; }
+
+ // 1002 = html filtering
+ reader.select(1002);
+
+ while ( reader.next() ) {
+ var fingerprint = reader.fingerprint();
+ if ( duplicates.has(fingerprint) ) { continue; }
+ duplicates.add(fingerprint);
+ var args = reader.args();
+ filterDB.add(args[1], {
+ type: args[0],
+ hostname: args[2],
+ selector: args[3]
+ });
+ }
+ };
+
+ api.retrieve = function(request) {
+ var hostname = request.hostname;
+
+ // https://github.com/gorhill/uBlock/issues/2835
+ // Do not filter if the site is under an `allow` rule.
+ if (
+ µb.userSettings.advancedUserEnabled &&
+ µb.sessionFirewall.evaluateCellZY(hostname, hostname, '*') === 2
+ ) {
+ return;
+ }
+
+ var out = [];
+ if ( request.domain !== '' ) {
+ filterDB.retrieve(request.domain, hostname, out);
+ filterDB.retrieve(request.entity, request.entity, out);
+ }
+ filterDB.retrieve('', hostname, out);
+
+ // TODO: handle exceptions.
+
+ if ( out.length !== 0 ) {
+ return out;
+ }
+ };
+
+ api.apply = function(doc, details) {
+ docRegister = doc;
+ loggerRegister = µb.logger;
+ var modified = false;
+ for ( var entry of details.selectors ) {
+ if ( entry.type === 64 ) {
+ if ( applyCSSSelector(details, entry.selector) ) {
+ modified = true;
+ }
+ } else {
+ if ( applyProceduralSelector(details, entry.selector) ) {
+ modified = true;
+ }
+ }
+ }
+
+ docRegister = loggerRegister = undefined;
+ return modified;
+ };
+
+ api.toSelfie = function() {
+ return filterDB.toSelfie();
+ };
+
+ api.fromSelfie = function(selfie) {
+ filterDB = new µb.staticExtFilteringEngine.HostnameBasedDB(selfie);
+ pselectors.clear();
+ };
+
+ // TODO: Following methods is useful only to legacy Firefox. This can be
+ // removed once support for legacy Firefox is dropped. The only care
+ // at this point is for the code to work, not to be efficient.
+ // Only `script:has-text` selectors are considered.
+
+ api.retrieveScriptTagHostnames = function() {
+ var out = new Set();
+ for ( var entry of filterDB ) {
+ if ( entry.type !== 65 ) { continue; }
+ var o = JSON.parse(entry.selector);
+ if (
+ o.tasks.length === 1 &&
+ o.tasks[0].length === 2 &&
+ o.tasks[0][0] === ':has-text'
+ ) {
+ out.add(entry.hostname);
+ }
+ }
+ if ( out.size !== 0 ) {
+ return Array.from(out);
+ }
+ };
+
+ api.retrieveScriptTagRegex = function(domain, hostname) {
+ var entries = api.retrieve({
+ hostname: hostname,
+ domain: domain,
+ entity: µb.URI.entityFromDomain(domain)
+ });
+ if ( entries === undefined ) { return; }
+ var out = new Set();
+ for ( var entry of entries ) {
+ if ( entry.type !== 65 ) { continue; }
+ var o = JSON.parse(entry.selector);
+ if (
+ o.tasks.length === 1 &&
+ o.tasks[0].length === 2 &&
+ o.tasks[0][0] === ':has-text'
+ ) {
+ out.add(o.tasks[0][1]);
+ }
+ }
+ if ( out.size !== 0 ) {
+ return Array.from(out).join('|');
+ }
+ };
+
+ return api;
+})();
+
+/******************************************************************************/
diff --git a/src/js/messaging.js b/src/js/messaging.js
index 40ccaee61f1d2..51c91606cccb5 100644
--- a/src/js/messaging.js
+++ b/src/js/messaging.js
@@ -102,7 +102,7 @@ var onMessage = function(request, sender, callback) {
break;
case 'compileCosmeticFilterSelector':
- response = µb.cosmeticFilteringEngine.compileSelector(request.selector);
+ response = µb.staticExtFilteringEngine.compileSelector(request.selector);
break;
case 'cosmeticFiltersInjected':
@@ -465,7 +465,7 @@ var onMessage = function(request, sender, callback) {
var µb = µBlock,
response,
tabId, frameId,
- pageStore;
+ pageStore = null;
if ( sender && sender.tab ) {
tabId = sender.tab.id;
@@ -491,21 +491,33 @@ var onMessage = function(request, sender, callback) {
break;
case 'retrieveContentScriptParameters':
- if ( pageStore && pageStore.getNetFilteringSwitch() ) {
- response = {
- collapseBlocked: µb.userSettings.collapseBlocked,
- noCosmeticFiltering: pageStore.noCosmeticFiltering === true,
- noGenericCosmeticFiltering:
- pageStore.noGenericCosmeticFiltering === true
- };
- request.tabId = tabId;
- request.frameId = frameId;
- response.specificCosmeticFilters =
- µb.cosmeticFilteringEngine
- .retrieveDomainSelectors(request, response);
- if ( request.isRootFrame && µb.logger.isEnabled() ) {
- µb.logCosmeticFilters(tabId);
- }
+ if (
+ pageStore === null ||
+ pageStore.getNetFilteringSwitch() === false ||
+ !request.url
+ ) {
+ break;
+ }
+ response = {
+ collapseBlocked: µb.userSettings.collapseBlocked,
+ noCosmeticFiltering: pageStore.noCosmeticFiltering === true,
+ noGenericCosmeticFiltering:
+ pageStore.noGenericCosmeticFiltering === true
+ };
+ request.tabId = tabId;
+ request.frameId = frameId;
+ request.hostname = µb.URI.hostnameFromURI(request.url);
+ request.domain = µb.URI.domainFromHostname(request.hostname);
+ request.entity = µb.URI.entityFromDomain(request.domain);
+ response.specificCosmeticFilters =
+ µb.cosmeticFilteringEngine.retrieveDomainSelectors(request, response);
+ // If response body filtering is supported, than the scriptlets have
+ // already been injected.
+ if ( µb.canFilterResponseBody === false ) {
+ response.scriptlets = µb.scriptletFilteringEngine.retrieve(request);
+ }
+ if ( request.isRootFrame && µb.logger.isEnabled() ) {
+ µb.logCosmeticFilters(tabId);
}
break;
diff --git a/src/js/reverselookup-worker.js b/src/js/reverselookup-worker.js
index 7b2662c8782e5..2073f9b8cebef 100644
--- a/src/js/reverselookup-worker.js
+++ b/src/js/reverselookup-worker.js
@@ -26,7 +26,26 @@
/******************************************************************************/
var listEntries = Object.create(null),
- filterClassSeparator = '\n/* end of network - start of cosmetic */\n';
+ reBlockStart = /^#block-start-(\d+)\n/gm;
+
+/******************************************************************************/
+
+var extractBlocks = function(content, begId, endId) {
+ reBlockStart.lastIndex = 0;
+ var out = [];
+ var match = reBlockStart.exec(content);
+ while ( match !== null ) {
+ var beg = match.index + match[0].length;
+ var blockId = parseInt(match[1], 10);
+ if ( blockId >= begId && blockId < endId ) {
+ var end = content.indexOf('#block-end-' + match[1], beg);
+ out.push(content.slice(beg, end));
+ reBlockStart.lastIndex = end;
+ }
+ match = reBlockStart.exec(content);
+ }
+ return out.join('\n');
+};
/******************************************************************************/
@@ -34,13 +53,11 @@ var fromNetFilter = function(details) {
var lists = [],
compiledFilter = details.compiledFilter,
entry, content, pos, notFound;
+
for ( var assetKey in listEntries ) {
entry = listEntries[assetKey];
if ( entry === undefined ) { continue; }
- content = entry.content.slice(
- 0,
- entry.content.indexOf(filterClassSeparator)
- );
+ content = extractBlocks(entry.content, 0, 1000);
pos = 0;
for (;;) {
pos = content.indexOf(compiledFilter, pos);
@@ -96,7 +113,7 @@ var fromNetFilter = function(details) {
// the various compiled versions.
var fromCosmeticFilter = function(details) {
- var match = /^#@?#/.exec(details.rawFilter),
+ var match = /^#@?#\^?/.exec(details.rawFilter),
prefix = match[0],
selector = details.rawFilter.slice(prefix.length);
@@ -138,15 +155,14 @@ var fromCosmeticFilter = function(details) {
}
var response = Object.create(null),
- assetKey, entry, content, found, beg, end, fargs;
+ assetKey, entry, content,
+ found, beg, end,
+ fargs, isProcedural;
for ( assetKey in listEntries ) {
entry = listEntries[assetKey];
if ( entry === undefined ) { continue; }
- content = entry.content.slice(
- entry.content.indexOf(filterClassSeparator) +
- filterClassSeparator.length
- );
+ content = extractBlocks(entry.content, 1000, 2000);
found = undefined;
while ( (match = reNeedle.exec(content)) !== null ) {
beg = content.lastIndexOf('\n', match.index);
@@ -194,12 +210,15 @@ var fromCosmeticFilter = function(details) {
found = prefix + selector;
}
break;
- case 6:
case 8:
case 9:
+ case 32:
+ case 64:
+ case 65:
+ isProcedural = fargs[3].charCodeAt(0) === 0x7B;
if (
- fargs[0] !== 9 && fargs[3] !== selector ||
- fargs[0] === 9 && JSON.parse(fargs[3]).raw !== selector
+ isProcedural === false && fargs[3] !== selector ||
+ isProcedural && JSON.parse(fargs[3]).raw !== selector
) {
break;
}
diff --git a/src/js/rpcreceiver.js b/src/js/rpcreceiver.js
index 681d0f7d017ef..86adbcb5960cf 100644
--- a/src/js/rpcreceiver.js
+++ b/src/js/rpcreceiver.js
@@ -35,22 +35,21 @@ if ( typeof vAPI.rpcReceiver !== 'object' ) {
vAPI.rpcReceiver.getScriptTagHostnames = function() {
var µb = µBlock;
- var cfe = µb.cosmeticFilteringEngine;
- if ( !cfe ) { return; }
- return cfe.retrieveScriptTagHostnames();
+ if ( µb.htmlFilteringEngine ) {
+ return µb.htmlFilteringEngine.retrieveScriptTagHostnames();
+ }
};
/******************************************************************************/
vAPI.rpcReceiver.getScriptTagFilters = function(details) {
var µb = µBlock;
- var cfe = µb.cosmeticFilteringEngine;
- if ( !cfe ) { return; }
+ if ( !µb.htmlFilteringEngine ) { return; }
// Fetching the script tag filters first: assuming it is faster than
// checking whether the site is whitelisted.
var hostname = details.frameHostname;
- var r = cfe.retrieveScriptTagRegex(
- µb.URI.domainFromHostname(hostname) || hostname,
+ var r = µb.htmlFilteringEngine.retrieveScriptTagRegex(
+ µb.URI.domainFromHostname(hostname),
hostname
);
// https://github.com/gorhill/uBlock/issues/838
diff --git a/src/js/scriptlet-filtering.js b/src/js/scriptlet-filtering.js
new file mode 100644
index 0000000000000..b10066cf11bd8
--- /dev/null
+++ b/src/js/scriptlet-filtering.js
@@ -0,0 +1,270 @@
+/*******************************************************************************
+
+ uBlock Origin - a browser extension to block requests.
+ Copyright (C) 2017 Raymond Hill
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see {http://www.gnu.org/licenses/}.
+
+ Home: https://github.com/gorhill/uBlock
+*/
+
+'use strict';
+
+/******************************************************************************/
+
+µBlock.scriptletFilteringEngine = (function() {
+ var api = {};
+
+ var µb = µBlock,
+ scriptletDB = new µb.staticExtFilteringEngine.HostnameBasedDB(),
+ duplicates = new Set(),
+ scriptletCache = new µb.MRUCache(32),
+ exceptionsRegister = new Set(),
+ scriptletsRegister = new Map(),
+ reEscapeScriptArg = /[\\'"]/g;
+
+ var scriptletRemover = [
+ '(function() {',
+ ' var c = document.currentScript, p = c && c.parentNode;',
+ ' if ( p ) { p.removeChild(c); }',
+ '})();'
+ ].join('\n');
+
+
+ var lookupScriptlet = function(raw, reng, toInject) {
+ if ( toInject.has(raw) ) { return; }
+ if ( scriptletCache.resetTime < reng.modifyTime ) {
+ scriptletCache.reset();
+ }
+ var content = scriptletCache.lookup(raw);
+ if ( content === undefined ) {
+ var token, args,
+ pos = raw.indexOf(',');
+ if ( pos === -1 ) {
+ token = raw;
+ } else {
+ token = raw.slice(0, pos).trim();
+ args = raw.slice(pos + 1).trim();
+ }
+ content = reng.resourceContentFromName(token, 'application/javascript');
+ if ( !content ) { return; }
+ if ( args ) {
+ content = patchScriptlet(content, args);
+ if ( !content ) { return; }
+ }
+ scriptletCache.add(raw, content);
+ }
+ toInject.set(raw, content);
+ };
+
+ // Fill template placeholders. Return falsy if:
+ // - At least one argument contains anything else than /\w/ and `.`
+
+ var patchScriptlet = function(content, args) {
+ var i = 1,
+ pos, arg;
+ while ( args !== '' ) {
+ pos = args.indexOf(',');
+ if ( pos === -1 ) { pos = args.length; }
+ arg = args.slice(0, pos).trim().replace(reEscapeScriptArg, '\\$&');
+ content = content.replace('{{' + i + '}}', arg);
+ args = args.slice(pos + 1).trim();
+ i++;
+ }
+ return content;
+ };
+
+ var logOne = function(isException, token, details) {
+ µb.logger.writeOne(
+ details.tabId,
+ 'cosmetic',
+ {
+ source: 'cosmetic',
+ raw: (isException ? '#@#' : '##') + 'script:inject(' + token + ')'
+ },
+ 'dom',
+ details.url,
+ null,
+ details.hostname
+ );
+ };
+
+ api.reset = function() {
+ scriptletDB.clear();
+ duplicates.clear();
+ };
+
+ api.freeze = function() {
+ duplicates.clear();
+ };
+
+ api.compile = function(parsed, writer) {
+ // 1001 = scriptlet injection
+ writer.select(1001);
+
+ // Only exception filters are allowed to be global.
+
+ if ( parsed.hostnames.length === 0 ) {
+ if ( parsed.exception ) {
+ writer.push([ 32, '!', '', parsed.suffix ]);
+ }
+ return;
+ }
+
+ // https://github.com/gorhill/uBlock/issues/3375
+ // Ignore instances of exception filter with negated hostnames,
+ // because there is no way to create an exception to an exception.
+
+ var µburi = µb.URI;
+
+ for ( var hostname of parsed.hostnames ) {
+ var negated = hostname.charCodeAt(0) === 0x7E /* '~' */;
+ if ( negated ) {
+ hostname = hostname.slice(1);
+ }
+ var hash = µburi.domainFromHostname(hostname);
+ if ( parsed.exception ) {
+ if ( negated ) { continue; }
+ hash = '!' + hash;
+ } else if ( negated ) {
+ hash = '!' + hash;
+ }
+ writer.push([ 32, hash, hostname, parsed.suffix ]);
+ }
+ };
+
+ // 01234567890123456789
+ // script:inject(token[, arg[, ...]])
+ // ^ ^
+ // 14 -1
+
+ api.fromCompiledContent = function(reader) {
+ // 1001 = scriptlet injection
+ reader.select(1001);
+
+ while ( reader.next() ) {
+ var fingerprint = reader.fingerprint();
+ if ( duplicates.has(fingerprint) ) { continue; }
+ duplicates.add(fingerprint);
+ var args = reader.args();
+ if ( args.length < 4 ) { continue; }
+ scriptletDB.add(
+ args[1],
+ { hostname: args[2], token: args[3].slice(14, -1) }
+ );
+ }
+ };
+
+ api.retrieve = function(request) {
+ if ( scriptletDB.size === 0 ) { return; }
+ if ( µb.hiddenSettings.ignoreScriptInjectFilters ) { return; }
+
+ var reng = µb.redirectEngine;
+ if ( !reng ) { return; }
+
+ var hostname = request.hostname;
+
+ // https://github.com/gorhill/uBlock/issues/2835
+ // Do not inject scriptlets if the site is under an `allow` rule.
+ if (
+ µb.userSettings.advancedUserEnabled &&
+ µb.sessionFirewall.evaluateCellZY(hostname, hostname, '*') === 2
+ ) {
+ return;
+ }
+
+ var domain = request.domain,
+ entity = request.entity,
+ entries, entry;
+
+ // https://github.com/gorhill/uBlock/issues/1954
+ // Implicit
+ var hn = hostname;
+ for (;;) {
+ lookupScriptlet(hn + '.js', reng, scriptletsRegister);
+ if ( hn === domain ) { break; }
+ var pos = hn.indexOf('.');
+ if ( pos === -1 ) { break; }
+ hn = hn.slice(pos + 1);
+ }
+ if ( entity !== '' ) {
+ lookupScriptlet(entity + '.js', reng, scriptletsRegister);
+ }
+
+ // Explicit
+ entries = [];
+ if ( domain !== '' ) {
+ scriptletDB.retrieve(domain, hostname, entries);
+ scriptletDB.retrieve(entity, entity, entries);
+ }
+ scriptletDB.retrieve('', hostname, entries);
+ for ( entry of entries ) {
+ lookupScriptlet(entry.token, reng, scriptletsRegister);
+ }
+
+ if ( scriptletsRegister.size === 0 ) { return; }
+
+ // Collect exception filters.
+ entries = [];
+ if ( domain !== '' ) {
+ scriptletDB.retrieve('!' + domain, hostname, entries);
+ scriptletDB.retrieve('!' + entity, entity, entries);
+ }
+ scriptletDB.retrieve('!', hostname, entries);
+ for ( entry of entries ) {
+ exceptionsRegister.add(entry.token);
+ }
+
+ // Return an array of scriptlets, and log results if needed.
+ var out = [],
+ logger = µb.logger.isEnabled() ? µb.logger : null,
+ isException;
+ for ( entry of scriptletsRegister ) {
+ if ( (isException = exceptionsRegister.has(entry[0])) === false ) {
+ out.push(entry[1]);
+ }
+ if ( logger !== null ) {
+ logOne(isException, entry[0], request);
+ }
+ }
+
+ scriptletsRegister.clear();
+ exceptionsRegister.clear();
+
+ if ( out.length === 0 ) { return; }
+
+ out.push(scriptletRemover);
+
+ return out.join('\n');
+ };
+
+ api.apply = function(doc, details) {
+ var script = doc.createElement('script');
+ script.textContent = details.scriptlets;
+ doc.head.insertBefore(script, doc.head.firstChild);
+ return true;
+ };
+
+ api.toSelfie = function() {
+ return scriptletDB.toSelfie();
+ };
+
+ api.fromSelfie = function(selfie) {
+ scriptletDB = new µb.staticExtFilteringEngine.HostnameBasedDB(selfie);
+ };
+
+ return api;
+})();
+
+/******************************************************************************/
diff --git a/src/js/start.js b/src/js/start.js
index f28280cf735a0..57f340812c282 100644
--- a/src/js/start.js
+++ b/src/js/start.js
@@ -39,7 +39,7 @@ vAPI.app.onShutdown = function() {
µb.staticFilteringReverseLookup.shutdown();
µb.assets.updateStop();
µb.staticNetFilteringEngine.reset();
- µb.cosmeticFilteringEngine.reset();
+ µb.staticExtFilteringEngine.reset();
µb.sessionFirewall.reset();
µb.permanentFirewall.reset();
µb.permanentFirewall.reset();
@@ -139,7 +139,7 @@ var onSelfieReady = function(selfie) {
µb.availableFilterLists = selfie.availableFilterLists;
µb.staticNetFilteringEngine.fromSelfie(selfie.staticNetFilteringEngine);
µb.redirectEngine.fromSelfie(selfie.redirectEngine);
- µb.cosmeticFilteringEngine.fromSelfie(selfie.cosmeticFilteringEngine);
+ µb.staticExtFilteringEngine.fromSelfie(selfie.staticExtFilteringEngine);
return true;
};
diff --git a/src/js/static-ext-filtering.js b/src/js/static-ext-filtering.js
new file mode 100644
index 0000000000000..da5224c9ead2d
--- /dev/null
+++ b/src/js/static-ext-filtering.js
@@ -0,0 +1,680 @@
+/*******************************************************************************
+
+ uBlock Origin - a browser extension to block requests.
+ Copyright (C) 2017 Raymond Hill
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see {http://www.gnu.org/licenses/}.
+
+ Home: https://github.com/gorhill/uBlock
+*/
+
+/* global punycode */
+
+'use strict';
+
+/*******************************************************************************
+
+ All static extended filters are of the form:
+
+ field 1: one hostname, or a list of comma-separated hostnames
+ field 2: `##` or `#@#`
+ field 3: selector
+
+ The purpose of the static extended filtering engine is to coarse-parse and
+ dispatch to appropriate specialized filtering engines. There are currently
+ three specialized filtering engines:
+
+ - cosmetic filtering (aka "element hiding" in Adblock Plus)
+ - scriptlet injection: selector starts with `script:inject`
+ - html filtering: selector starts with `^`
+
+ Depending on the specialized filtering engine, field 1 may or may not be
+ optional.
+
+ The static extended filtering engine also offers parsing capabilities which
+ are available to all other specialized fitlering engines. For example,
+ cosmetic and html filtering can ask the extended filtering engine to
+ compile/validate selectors.
+
+**/
+
+µBlock.staticExtFilteringEngine = (function() {
+ var µb = µBlock,
+ reHostnameSeparator = /\s*,\s*/,
+ reHasUnicode = /[^\x00-\x7F]/,
+ reIsRegexLiteral = /^\/.+\/$/,
+ emptyArray = [],
+ parsed = {
+ hostnames: [],
+ exception: false,
+ suffix: ''
+ };
+
+ var isValidCSSSelector = (function() {
+ var div = document.createElement('div'),
+ matchesFn;
+ // Keep in mind:
+ // https://github.com/gorhill/uBlock/issues/693
+ // https://github.com/gorhill/uBlock/issues/1955
+ if ( div.matches instanceof Function ) {
+ matchesFn = div.matches.bind(div);
+ } else if ( div.mozMatchesSelector instanceof Function ) {
+ matchesFn = div.mozMatchesSelector.bind(div);
+ } else if ( div.webkitMatchesSelector instanceof Function ) {
+ matchesFn = div.webkitMatchesSelector.bind(div);
+ } else if ( div.msMatchesSelector instanceof Function ) {
+ matchesFn = div.msMatchesSelector.bind(div);
+ } else {
+ matchesFn = div.querySelector.bind(div);
+ }
+ // https://github.com/gorhill/uBlock/issues/3111
+ // Workaround until https://bugzilla.mozilla.org/show_bug.cgi?id=1406817
+ // is fixed.
+ try {
+ matchesFn(':scope');
+ } catch (ex) {
+ matchesFn = div.querySelector.bind(div);
+ }
+ return function(s) {
+ try {
+ matchesFn(s + ', ' + s + ':not(#foo)');
+ } catch (ex) {
+ return false;
+ }
+ return true;
+ };
+ })();
+
+
+ var isBadRegex = function(s) {
+ try {
+ void new RegExp(s);
+ } catch (ex) {
+ isBadRegex.message = ex.toString();
+ return true;
+ }
+ return false;
+ };
+
+ var translateAdguardCSSInjectionFilter = function(suffix) {
+ var matches = /^([^{]+)\{([^}]+)\}$/.exec(suffix);
+ if ( matches === null ) { return ''; }
+ return matches[1].trim() + ':style(' + matches[2].trim() + ')';
+ };
+
+ var toASCIIHostname = function(hostname) {
+ if ( hostname.charCodeAt(0) === 0x7E /* '~' */ ) {
+ return '~' + punycode.toASCII(hostname.slice(1));
+ }
+ return punycode.toASCII(hostname);
+ };
+
+ var compileProceduralSelector = (function() {
+ var reOperatorParser = new RegExp([
+ '(:(?:',
+ [
+ '-abp-contains',
+ '-abp-has',
+ 'contains',
+ 'has',
+ 'has-text',
+ 'if',
+ 'if-not',
+ 'matches-css',
+ 'matches-css-after',
+ 'matches-css-before',
+ 'xpath'
+ ].join('|'),
+ '))\\(.+\\)$'
+ ].join(''));
+
+ var reFirstParentheses = /^\(*/,
+ reLastParentheses = /\)*$/,
+ reEscapeRegex = /[.*+?^${}()|[\]\\]/g,
+ reNeedScope = /^\s*[+>~]/;
+
+ var lastProceduralSelector = '',
+ lastProceduralSelectorCompiled,
+ regexToRawValue = new Map();
+
+ var compileCSSSelector = function(s) {
+ // https://github.com/AdguardTeam/ExtendedCss/issues/31#issuecomment-302391277
+ // Prepend `:scope ` if needed.
+ if ( reNeedScope.test(s) ) {
+ s = ':scope ' + s;
+ }
+ if ( isValidCSSSelector(s) ) {
+ return s;
+ }
+ };
+
+ var compileText = function(s) {
+ var reText;
+ if ( reIsRegexLiteral.test(s) ) {
+ reText = s.slice(1, -1);
+ if ( isBadRegex(reText) ) { return; }
+ } else {
+ reText = s.replace(reEscapeRegex, '\\$&');
+ regexToRawValue.set(reText, s);
+ }
+ return reText;
+ };
+
+ var compileCSSDeclaration = function(s) {
+ var name, value, reText,
+ pos = s.indexOf(':');
+ if ( pos === -1 ) { return; }
+ name = s.slice(0, pos).trim();
+ value = s.slice(pos + 1).trim();
+ if ( reIsRegexLiteral.test(value) ) {
+ reText = value.slice(1, -1);
+ if ( isBadRegex(reText) ) { return; }
+ } else {
+ reText = '^' + value.replace(reEscapeRegex, '\\$&') + '$';
+ regexToRawValue.set(reText, value);
+ }
+ return { name: name, value: reText };
+ };
+
+ var compileConditionalSelector = function(s) {
+ // https://github.com/AdguardTeam/ExtendedCss/issues/31#issuecomment-302391277
+ // Prepend `:scope ` if needed.
+ if ( reNeedScope.test(s) ) {
+ s = ':scope ' + s;
+ }
+ return compile(s);
+ };
+
+ var compileXpathExpression = function(s) {
+ try {
+ document.createExpression(s, null);
+ } catch (e) {
+ return;
+ }
+ return s;
+ };
+
+ // https://github.com/gorhill/uBlock/issues/2793
+ var normalizedOperators = new Map([
+ [ ':-abp-contains', ':has-text' ],
+ [ ':-abp-has', ':if' ],
+ [ ':contains', ':has-text' ]
+ ]);
+
+ var compileArgument = new Map([
+ [ ':has', compileCSSSelector ],
+ [ ':has-text', compileText ],
+ [ ':if', compileConditionalSelector ],
+ [ ':if-not', compileConditionalSelector ],
+ [ ':matches-css', compileCSSDeclaration ],
+ [ ':matches-css-after', compileCSSDeclaration ],
+ [ ':matches-css-before', compileCSSDeclaration ],
+ [ ':xpath', compileXpathExpression ]
+ ]);
+
+ // https://github.com/gorhill/uBlock/issues/2793#issuecomment-333269387
+ // Normalize (somewhat) the stringified version of procedural
+ // cosmetic filters -- this increase the likelihood of detecting
+ // duplicates given that uBO is able to understand syntax specific
+ // to other blockers.
+ // The normalized string version is what is reported in the logger,
+ // by design.
+ var decompile = function(compiled) {
+ var raw = [ compiled.selector ],
+ tasks = compiled.tasks,
+ value;
+ if ( Array.isArray(tasks) ) {
+ for ( var i = 0, n = tasks.length, task; i < n; i++ ) {
+ task = tasks[i];
+ switch ( task[0] ) {
+ case ':has':
+ case ':xpath':
+ raw.push(task[0], '(', task[1], ')');
+ break;
+ case ':has-text':
+ value = regexToRawValue.get(task[1]);
+ if ( value === undefined ) {
+ value = '/' + task[1] + '/';
+ }
+ raw.push(task[0], '(', value, ')');
+ break;
+ case ':matches-css':
+ case ':matches-css-after':
+ case ':matches-css-before':
+ value = regexToRawValue.get(task[1].value);
+ if ( value === undefined ) {
+ value = '/' + task[1].value + '/';
+ }
+ raw.push(task[0], '(', task[1].name, ': ', value, ')');
+ break;
+ case ':if':
+ case ':if-not':
+ raw.push(task[0], '(', decompile(task[1]), ')');
+ break;
+ }
+ }
+ }
+ return raw.join('');
+ };
+
+ var compile = function(raw) {
+ var matches = reOperatorParser.exec(raw);
+ if ( matches === null ) {
+ if ( isValidCSSSelector(raw) ) { return { selector: raw }; }
+ return;
+ }
+ var tasks = [],
+ firstOperand = raw.slice(0, matches.index),
+ currentOperator = matches[1],
+ selector = raw.slice(matches.index + currentOperator.length),
+ currentArgument = '', nextOperand, nextOperator,
+ depth = 0, opening, closing;
+ if (
+ firstOperand !== '' &&
+ isValidCSSSelector(firstOperand) === false
+ ) {
+ return;
+ }
+ for (;;) {
+ matches = reOperatorParser.exec(selector);
+ if ( matches !== null ) {
+ nextOperand = selector.slice(0, matches.index);
+ nextOperator = matches[1];
+ } else {
+ nextOperand = selector;
+ nextOperator = '';
+ }
+ opening = reFirstParentheses.exec(nextOperand)[0].length;
+ closing = reLastParentheses.exec(nextOperand)[0].length;
+ if ( opening > closing ) {
+ if ( depth === 0 ) { currentArgument = ''; }
+ depth += 1;
+ } else if ( closing > opening && depth > 0 ) {
+ depth -= 1;
+ if ( depth === 0 ) {
+ nextOperand = currentArgument + nextOperand;
+ }
+ }
+ if ( depth !== 0 ) {
+ currentArgument += nextOperand + nextOperator;
+ } else {
+ currentOperator =
+ normalizedOperators.get(currentOperator) ||
+ currentOperator;
+ currentArgument =
+ compileArgument.get(currentOperator)(
+ nextOperand.slice(1, -1)
+ );
+ if ( currentArgument === undefined ) { return; }
+ tasks.push([ currentOperator, currentArgument ]);
+ currentOperator = nextOperator;
+ }
+ if ( nextOperator === '' ) { break; }
+ selector = selector.slice(matches.index + nextOperator.length);
+ }
+ if ( tasks.length === 0 || depth !== 0 ) { return; }
+ return { selector: firstOperand, tasks: tasks };
+ };
+
+ var entryPoint = function(raw) {
+ if ( raw === lastProceduralSelector ) {
+ return lastProceduralSelectorCompiled;
+ }
+ lastProceduralSelector = raw;
+ var compiled = compile(raw);
+ if ( compiled !== undefined ) {
+ compiled.raw = decompile(compiled);
+ compiled = JSON.stringify(compiled);
+ }
+ lastProceduralSelectorCompiled = compiled;
+ return compiled;
+ };
+
+ entryPoint.reset = function() {
+ regexToRawValue = new Map();
+ lastProceduralSelector = '';
+ lastProceduralSelectorCompiled = undefined;
+ };
+
+ return entryPoint;
+ })();
+
+ //--------------------------------------------------------------------------
+ // Public API
+ //--------------------------------------------------------------------------
+
+ var api = {};
+
+ //--------------------------------------------------------------------------
+ // Public classes
+ //--------------------------------------------------------------------------
+
+ api.HostnameBasedDB = function(selfie) {
+ if ( selfie !== undefined ) {
+ this.db = new Map(selfie.map);
+ this.size = selfie.size;
+ } else {
+ this.db = new Map();
+ this.size = 0;
+ }
+ };
+
+ api.HostnameBasedDB.prototype = {
+ add: function(hash, entry) {
+ var bucket = this.db.get(hash);
+ if ( bucket === undefined ) {
+ this.db.set(hash, entry);
+ } else if ( Array.isArray(bucket) ) {
+ bucket.push(entry);
+ } else {
+ this.db.set(hash, [ bucket, entry ]);
+ }
+ this.size += 1;
+ },
+ clear: function() {
+ this.db.clear();
+ this.size = 0;
+ },
+ retrieve: function(hash, hostname, out) {
+ var bucket = this.db.get(hash);
+ if ( bucket === undefined ) { return; }
+ if ( Array.isArray(bucket) === false ) {
+ if ( hostname.endsWith(bucket.hostname) ) { out.push(bucket); }
+ return;
+ }
+ var i = bucket.length;
+ while ( i-- ) {
+ var entry = bucket[i];
+ if ( hostname.endsWith(entry.hostname) ) { out.push(entry); }
+ }
+ },
+ toSelfie: function() {
+ return {
+ map: Array.from(this.db),
+ size: this.size
+ };
+ }
+ };
+
+ api.HostnameBasedDB.prototype[Symbol.iterator] = (function() {
+ var Iter = function(db) {
+ this.mapIter = db.values();
+ this.arrayIter = undefined;
+ };
+ Iter.prototype.next = function() {
+ var result;
+ if ( this.arrayIter !== undefined ) {
+ result = this.arrayIter.next();
+ if ( result.done === false ) { return result; }
+ this.arrayIter = undefined;
+ }
+ result = this.mapIter.next();
+ if ( result.done || Array.isArray(result.value) === false ) {
+ return result;
+ }
+ this.arrayIter = result.value[Symbol.iterator]();
+ return this.arrayIter.next(); // array should never be empty
+ };
+ return function() {
+ return new Iter(this.db);
+ };
+ })();
+
+ //--------------------------------------------------------------------------
+ // Public methods
+ //--------------------------------------------------------------------------
+
+ api.reset = function() {
+ compileProceduralSelector.reset();
+ µb.cosmeticFilteringEngine.reset();
+ µb.scriptletFilteringEngine.reset();
+ µb.htmlFilteringEngine.reset();
+ };
+
+ api.freeze = function() {
+ compileProceduralSelector.reset();
+ µb.cosmeticFilteringEngine.freeze();
+ µb.scriptletFilteringEngine.freeze();
+ µb.htmlFilteringEngine.freeze();
+ };
+
+ // https://github.com/chrisaljoudi/uBlock/issues/1004
+ // Detect and report invalid CSS selectors.
+
+ // Discard new ABP's `-abp-properties` directive until it is
+ // implemented (if ever). Unlikely, see:
+ // https://github.com/gorhill/uBlock/issues/1752
+
+ // https://github.com/gorhill/uBlock/issues/2624
+ // Convert Adguard's `-ext-has='...'` into uBO's `:has(...)`.
+
+ api.compileSelector = (function() {
+ var reAfterBeforeSelector = /^(.+?)(::?after|::?before)$/,
+ reStyleSelector = /^(.+?):style\((.+?)\)$/,
+ reStyleBad = /url\([^)]+\)/,
+ reExtendedSyntax = /\[-(?:abp|ext)-[a-z-]+=(['"])(?:.+?)(?:\1)\]/,
+ reExtendedSyntaxParser = /\[-(?:abp|ext)-([a-z-]+)=(['"])(.+?)\2\]/,
+ div = document.createElement('div');
+
+ var normalizedExtendedSyntaxOperators = new Map([
+ [ 'contains', ':has-text' ],
+ [ 'has', ':if' ],
+ [ 'matches-css', ':matches-css' ],
+ [ 'matches-css-after', ':matches-css-after' ],
+ [ 'matches-css-before', ':matches-css-before' ],
+ ]);
+
+ var isValidStyleProperty = function(cssText) {
+ if ( reStyleBad.test(cssText) ) { return false; }
+ div.style.cssText = cssText;
+ if ( div.style.cssText === '' ) { return false; }
+ div.style.cssText = '';
+ return true;
+ };
+
+ var entryPoint = function(raw) {
+ var extendedSyntax = reExtendedSyntax.test(raw);
+ if ( isValidCSSSelector(raw) && extendedSyntax === false ) {
+ return raw;
+ }
+
+ // We rarely reach this point -- majority of selectors are plain
+ // CSS selectors.
+
+ var matches, operator;
+
+ // Supported Adguard/ABP advanced selector syntax: will translate into
+ // uBO's syntax before further processing.
+ // Mind unsupported advanced selector syntax, such as ABP's
+ // `-abp-properties`.
+ // Note: extended selector syntax has been deprecated in ABP, in favor
+ // of the procedural one (i.e. `:operator(...)`). See
+ // https://issues.adblockplus.org/ticket/5287
+ if ( extendedSyntax ) {
+ while ( (matches = reExtendedSyntaxParser.exec(raw)) !== null ) {
+ operator = normalizedExtendedSyntaxOperators.get(matches[1]);
+ if ( operator === undefined ) { return; }
+ raw = raw.slice(0, matches.index) +
+ operator + '(' + matches[3] + ')' +
+ raw.slice(matches.index + matches[0].length);
+ }
+ return entryPoint(raw);
+ }
+
+ var selector = raw,
+ pseudoclass, style;
+
+ // `:style` selector?
+ if ( (matches = reStyleSelector.exec(selector)) !== null ) {
+ selector = matches[1];
+ style = matches[2];
+ }
+
+ // https://github.com/gorhill/uBlock/issues/2448
+ // :after- or :before-based selector?
+ if ( (matches = reAfterBeforeSelector.exec(selector)) ) {
+ selector = matches[1];
+ pseudoclass = matches[2];
+ }
+
+ if ( style !== undefined || pseudoclass !== undefined ) {
+ if ( isValidCSSSelector(selector) === false ) {
+ return;
+ }
+ if ( pseudoclass !== undefined ) {
+ selector += pseudoclass;
+ }
+ if ( style !== undefined ) {
+ if ( isValidStyleProperty(style) === false ) { return; }
+ return JSON.stringify({
+ raw: raw,
+ style: [ selector, style ]
+ });
+ }
+ return JSON.stringify({
+ raw: raw,
+ pseudoclass: true
+ });
+ }
+
+ // Procedural selector?
+ var compiled;
+ if ( (compiled = compileProceduralSelector(raw)) ) {
+ return compiled;
+ }
+
+ µb.logger.writeOne(
+ '',
+ 'error',
+ 'Cosmetic filtering – invalid filter: ' + raw
+ );
+ };
+
+ return entryPoint;
+ })();
+
+ api.compile = function(raw, writer) {
+ var lpos = raw.indexOf('#');
+ if ( lpos === -1 ) { return false; }
+ var rpos = lpos + 1;
+ if ( raw.charCodeAt(rpos) !== 0x23 /* '#' */ ) {
+ rpos = raw.indexOf('#', rpos + 1);
+ if ( rpos === -1 ) { return false; }
+ }
+
+ // Coarse-check that the anchor is valid.
+ // `##`: l = 1
+ // `#@#`, `#$#`, `#%#`, `#?#`: l = 2
+ // `#@$#`, `#@%#`, `#@?#`: l = 3
+ if ( (rpos - lpos) > 3 ) { return false; }
+
+ // Extract the selector.
+ var suffix = parsed.suffix = raw.slice(rpos + 1).trim();
+ if ( suffix.length === 0 ) { return false; }
+
+ // https://github.com/gorhill/uBlock/issues/952
+ // Find out whether we are dealing with an Adguard-specific cosmetic
+ // filter, and if so, translate it if supported, or discard it if not
+ // supported.
+ // We have an Adguard/ABP cosmetic filter if and only if the
+ // character is `$`, `%` or `?`, otherwise it's not a cosmetic
+ // filter.
+ var cCode = raw.charCodeAt(rpos - 1);
+ if ( cCode !== 0x23 /* '#' */ && cCode !== 0x40 /* '@' */ ) {
+ // Adguard's scriptlet injection: not supported.
+ if ( cCode === 0x25 /* '%' */ ) { return true; }
+ // Not a known extended filter.
+ if ( cCode !== 0x24 /* '$' */ && cCode !== 0x3F /* '?' */ ) {
+ return false;
+ }
+ // Adguard's style injection: translate to uBO's format.
+ if ( cCode === 0x24 /* '$' */ ) {
+ suffix = translateAdguardCSSInjectionFilter(suffix);
+ if ( suffix === '' ) { return true; }
+ }
+ }
+
+ // Exception filter?
+ parsed.exception = raw.charCodeAt(lpos + 1) === 0x40 /* '@' */;
+
+ // Extract the hostname(s), punycode if required.
+ if ( lpos === 0 ) {
+ parsed.hostnames = emptyArray;
+ } else {
+ var prefix = raw.slice(0, lpos);
+ parsed.hostnames = prefix.split(reHostnameSeparator);
+ if ( reHasUnicode.test(prefix) ) {
+ for ( var hostname of parsed.hostnames ) {
+ parsed.hostnames = toASCIIHostname(hostname);
+ }
+ }
+ }
+
+ if ( suffix.startsWith('script:') ) {
+ // Scriptlet injection engine.
+ if ( suffix.startsWith('script:inject') ) {
+ µb.scriptletFilteringEngine.compile(parsed, writer);
+ return true;
+ }
+ // Script tag filtering: courtesy-conversion to HTML filtering.
+ if ( parsed.suffix.startsWith('script:contains') ) {
+ console.info(
+ 'uBO: ##script:contains(...) is deprecated, ' +
+ 'converting to ##^script:has-text(...)'
+ );
+ suffix = parsed.suffix = suffix.replace(
+ /^script:contains/,
+ '^script:has-text'
+ );
+ }
+ }
+
+ // HTML filtering engine.
+ // TODO: evaluate converting Adguard's `$$` syntax into uBO's HTML
+ // filtering syntax.
+ if ( suffix.charCodeAt(0) === 0x5E /* '^' */ ) {
+ µb.htmlFilteringEngine.compile(parsed, writer);
+ return true;
+ }
+
+ // Cosmetic filtering engine.
+ µb.cosmeticFilteringEngine.compile(parsed, writer);
+ return true;
+ };
+
+ api.fromCompiledContent = function(reader, options) {
+ µb.cosmeticFilteringEngine.fromCompiledContent(reader, options);
+ µb.scriptletFilteringEngine.fromCompiledContent(reader, options);
+ µb.htmlFilteringEngine.fromCompiledContent(reader, options);
+ };
+
+ api.toSelfie = function() {
+ return {
+ cosmetic: µb.cosmeticFilteringEngine.toSelfie(),
+ scriptlets: µb.scriptletFilteringEngine.toSelfie(),
+ html: µb.htmlFilteringEngine.toSelfie()
+
+ };
+ };
+
+ api.fromSelfie = function(selfie) {
+ µb.cosmeticFilteringEngine.fromSelfie(selfie.cosmetic);
+ µb.scriptletFilteringEngine.fromSelfie(selfie.scriptlets);
+ µb.htmlFilteringEngine.fromSelfie(selfie.html);
+ };
+
+ return api;
+})();
+
+/******************************************************************************/
diff --git a/src/js/static-net-filtering.js b/src/js/static-net-filtering.js
index c32281520299c..d228282eebffe 100644
--- a/src/js/static-net-filtering.js
+++ b/src/js/static-net-filtering.js
@@ -2116,6 +2116,9 @@ FilterContainer.prototype.compile = function(raw, writer) {
return false;
}
+ // 0 = network filters
+ writer.select(0);
+
// Pure hostnames, use more efficient dictionary lookup
// https://github.com/chrisaljoudi/uBlock/issues/665
// Create a dict keyed on request type etc.
@@ -2268,6 +2271,9 @@ FilterContainer.prototype.fromCompiledContent = function(reader) {
args, bits, bucket, entry,
tokenHash, fdata, fingerprint;
+ // 0 = network filters
+ reader.select(0);
+
while ( reader.next() === true ) {
args = reader.args();
bits = args[0];
diff --git a/src/js/storage.js b/src/js/storage.js
index 256e80d15298b..d8dfaf1d07bd9 100644
--- a/src/js/storage.js
+++ b/src/js/storage.js
@@ -347,7 +347,7 @@
vAPI.storage.set({ 'availableFilterLists': µb.availableFilterLists });
µb.staticNetFilteringEngine.freeze();
µb.redirectEngine.freeze();
- µb.cosmeticFilteringEngine.freeze();
+ µb.staticExtFilteringEngine.freeze();
µb.selfieManager.destroy();
};
@@ -543,7 +543,7 @@
var onDone = function() {
µb.staticNetFilteringEngine.freeze();
- µb.cosmeticFilteringEngine.freeze();
+ µb.staticExtFilteringEngine.freeze();
µb.redirectEngine.freeze();
vAPI.storage.set({ 'availableFilterLists': µb.availableFilterLists });
@@ -586,7 +586,7 @@
µb.availableFilterLists = lists;
µb.redirectEngine.reset();
- µb.cosmeticFilteringEngine.reset();
+ µb.staticExtFilteringEngine.reset();
µb.staticNetFilteringEngine.reset();
µb.selfieManager.destroy();
µb.staticFilteringReverseLookup.resetLists();
@@ -703,23 +703,22 @@
/******************************************************************************/
µBlock.compileFilters = function(rawText) {
- var networkFilters = new this.CompiledLineWriter(),
- cosmeticFilters = new this.CompiledLineWriter();
+ var writer = new this.CompiledLineWriter();
// Useful references:
// https://adblockplus.org/en/filter-cheatsheet
// https://adblockplus.org/en/filters
var staticNetFilteringEngine = this.staticNetFilteringEngine,
- cosmeticFilteringEngine = this.cosmeticFilteringEngine,
+ staticExtFilteringEngine = this.staticExtFilteringEngine,
reIsWhitespaceChar = /\s/,
reMaybeLocalIp = /^[\d:f]/,
- reIsLocalhostRedirect = /\s+(?:broadcasthost|local|localhost|localhost\.localdomain)(?=\s|$)/,
+ reIsLocalhostRedirect = /\s+(?:broadcasthost|local|localhost|localhost\.localdomain)\b/,
reLocalIp = /^(?:0\.0\.0\.0|127\.0\.0\.1|::1|fe80::1%lo0)/,
- line, lineRaw, c, pos,
+ line, c, pos,
lineIter = new this.LineIterator(rawText);
while ( lineIter.eot() === false ) {
- line = lineRaw = lineIter.next().trim();
+ line = lineIter.next().trim();
// rhill 2014-04-18: The trim is important here, as without it there
// could be a lingering `\r` which would cause problems in the
@@ -733,9 +732,7 @@
// Parse or skip cosmetic filters
// All cosmetic filters are caught here
- if ( cosmeticFilteringEngine.compile(line, cosmeticFilters) ) {
- continue;
- }
+ if ( staticExtFilteringEngine.compile(line, writer) ) { continue; }
// Whatever else is next can be assumed to not be a cosmetic filter
@@ -767,12 +764,10 @@
if ( line.length === 0 ) { continue; }
- staticNetFilteringEngine.compile(line, networkFilters);
+ staticNetFilteringEngine.compile(line, writer);
}
- return networkFilters.toString() +
- '\n/* end of network - start of cosmetic */\n' +
- cosmeticFilters.toString();
+ return writer.toString();
};
/******************************************************************************/
@@ -783,15 +778,12 @@
µBlock.applyCompiledFilters = function(rawText, firstparty) {
if ( rawText === '' ) { return; }
- var separator = '\n/* end of network - start of cosmetic */\n',
- pos = rawText.indexOf(separator),
- reader = new this.CompiledLineReader(rawText.slice(0, pos));
+ var reader = new this.CompiledLineReader(rawText);
this.staticNetFilteringEngine.fromCompiledContent(reader);
- this.cosmeticFilteringEngine.fromCompiledContent(
- reader.reset(rawText.slice(pos + separator.length)),
- this.userSettings.ignoreGenericCosmeticFilters,
- !firstparty && !this.userSettings.parseAllABPHideFilters
- );
+ this.staticExtFilteringEngine.fromCompiledContent(reader, {
+ skipGenericCosmetic: this.userSettings.ignoreGenericCosmeticFilters,
+ skipCosmetic: !firstparty && !this.userSettings.parseAllABPHideFilters
+ });
};
/******************************************************************************/
@@ -885,7 +877,7 @@
availableFilterLists: this.availableFilterLists,
staticNetFilteringEngine: this.staticNetFilteringEngine.toSelfie(),
redirectEngine: this.redirectEngine.toSelfie(),
- cosmeticFilteringEngine: this.cosmeticFilteringEngine.toSelfie()
+ staticExtFilteringEngine: this.staticExtFilteringEngine.toSelfie()
};
vAPI.cacheStorage.set({ selfie: selfie });
}.bind(µBlock);
@@ -1068,7 +1060,7 @@
this.availableFilterLists.hasOwnProperty(details.assetKey) === false ||
this.selectedFilterLists.indexOf(details.assetKey) === -1
) {
- return false;
+ return;
}
}
// https://github.com/gorhill/uBlock/issues/2594
@@ -1077,10 +1069,10 @@
this.hiddenSettings.ignoreRedirectFilters === true &&
this.hiddenSettings.ignoreScriptInjectFilters === true
) {
- return false;
+ return;
}
}
- return;
+ return true;
}
// Compile the list while we have the raw version in memory
diff --git a/src/js/traffic.js b/src/js/traffic.js
index eaea84158b4a6..1ff0c01d0913a 100644
--- a/src/js/traffic.js
+++ b/src/js/traffic.js
@@ -480,9 +480,10 @@ onBeforeMaybeSpuriousCSPReport.textDecoder = undefined;
/******************************************************************************/
// To handle:
-// - inline script tags
-// - websockets
-// - media elements larger than n kB
+// - Media elements larger than n kB
+// - Scriptlet injection (requires ability to modify response body)
+// - HTML filtering (requires ability to modify response body)
+// - CSP injection
var onHeadersReceived = function(details) {
// Do not interfere with behind-the-scene requests.
@@ -490,15 +491,17 @@ var onHeadersReceived = function(details) {
if ( vAPI.isBehindTheSceneTabId(tabId) ) { return; }
var µb = µBlock,
- requestType = details.type;
+ requestType = details.type,
+ isRootDoc = requestType === 'main_frame',
+ isDoc = isRootDoc || requestType === 'sub_frame';
- if ( requestType === 'main_frame' ) {
+ if ( isRootDoc ) {
µb.tabContextManager.push(tabId, details.url);
}
var pageStore = µb.pageStoreFromTabId(tabId);
if ( pageStore === null ) {
- if ( requestType !== 'main_frame' ) { return; }
+ if ( isRootDoc === false ) { return; }
pageStore = µb.bindTabToPageStats(tabId, 'beforeRequest');
}
if ( pageStore.getNetFilteringSwitch() === false ) { return; }
@@ -507,24 +510,283 @@ var onHeadersReceived = function(details) {
return foilLargeMediaElement(pageStore, details);
}
+ if ( isDoc && µb.canFilterResponseBody ) {
+ filterDocument(details);
+ }
+
// https://github.com/gorhill/uBlock/issues/2813
// Disable the blocking of large media elements if the document is itself
// a media element: the resource was not prevented from loading so no
// point to further block large media elements for the current document.
- if ( requestType === 'main_frame' ) {
+ if ( isRootDoc ) {
if ( reMediaContentTypes.test(headerValueFromName('content-type', details.responseHeaders)) ) {
pageStore.allowLargeMediaElementsUntil = Date.now() + 86400000;
}
return injectCSP(pageStore, details);
}
- if ( requestType === 'sub_frame' ) {
+ if ( isDoc ) {
return injectCSP(pageStore, details);
}
};
var reMediaContentTypes = /^(?:audio|image|video)\//;
+/*******************************************************************************
+
+ The response body filterer is responsible for:
+
+ - Scriptlet filtering
+ - HTML filtering
+
+ In the spirit of efficiency, the response body filterer works this way:
+
+ If:
+ - HTML filtering: no.
+ - Scriptlet filtering: no.
+ Then:
+ No response body filtering is initiated.
+
+ If:
+ - HTML filtering: no.
+ - Scriptlet filtering: yes.
+ Then:
+ Inject scriptlets before first chunk of response body data reported
+ then immediately disconnect response body data listener.
+
+ If:
+ - HTML filtering: yes.
+ - Scriptlet filtering: no/yes.
+ Then:
+ Assemble all response body data into a single buffer. Once all the
+ response data has been received, create a document from it. Then:
+ - Inject scriptlets in the resulting DOM.
+ - Remove all DOM elements matching HTML filters.
+ Then serialize the resulting modified document as the new response
+ body.
+
+ This way, the overhead is minimal for when only scriptlets need to be
+ injected.
+
+ If the platform does not support response body filtering, the scriptlets
+ will be injected the old way, through the content script.
+
+**/
+
+var filterDocument = (function() {
+ var µb = µBlock,
+ filterers = new Map(),
+ reDoctype = /^\s*]+?>/,
+ reJustASCII = /^[\x00-\x7E]*$/,
+ domParser, xmlSerializer,
+ textDecoderCharset, textDecoder, textEncoder;
+
+ var streamJobDone = function(filterer, responseBytes) {
+ if (
+ filterer.scriptlets === undefined ||
+ filterer.selectors !== undefined ||
+ filterer.charset !== undefined
+ ) {
+ return false;
+ }
+ if ( textDecoder === undefined ) {
+ textDecoder = new TextDecoder();
+ }
+ // We need to insert after DOCTYPE, or else the browser may falls into
+ // quirks mode.
+ var responseStr = textDecoder.decode(responseBytes);
+ var match = reDoctype.exec(responseStr);
+ if ( match === null ) { return false; }
+ filterers.delete(filterer.stream);
+ if ( textEncoder === undefined ) {
+ textEncoder = new TextEncoder();
+ }
+ var beforeByteLength = match.index + match[0].length;
+ var beforeBytes = reJustASCII.test(match[0]) ?
+ new Uint8Array(responseBytes, 0, beforeByteLength) :
+ textEncoder.encode(responseStr.slice(0, beforeByteLength));
+ filterer.stream.write(beforeBytes);
+ filterer.stream.write(
+ textEncoder.encode('')
+ );
+ filterer.stream.write(
+ new Uint8Array(responseBytes, beforeBytes.byteLength)
+ );
+ filterer.stream.disconnect();
+ return true;
+ };
+
+ var streamClose = function(filterer, buffer) {
+ if ( buffer !== undefined ) {
+ filterer.stream.write(buffer);
+ } else if ( filterer.buffer !== undefined ) {
+ filterer.stream.write(filterer.buffer);
+ }
+ filterer.stream.close();
+ };
+
+ var onStreamData = function(ev) {
+ var filterer = filterers.get(this);
+ if ( filterer === undefined ) {
+ this.write(ev.data);
+ this.disconnect();
+ return;
+ }
+ if (
+ this.status !== 'transferringdata' &&
+ this.status !== 'finishedtransferringdata'
+ ) {
+ filterers.delete(this);
+ this.disconnect();
+ return;
+ }
+ // TODO: possibly improve buffer growth, if benchmarking shows it's
+ // worth it.
+ if ( filterer.buffer === null ) {
+ if ( streamJobDone(filterer, ev.data) ) { return; }
+ filterer.buffer = new Uint8Array(ev.data);
+ return;
+ }
+ var buffer = new Uint8Array(
+ filterer.buffer.byteLength +
+ ev.data.byteLength
+ );
+ buffer.set(filterer.buffer);
+ buffer.set(new Uint8Array(ev.data), filterer.buffer.byteLength);
+ filterer.buffer = buffer;
+ };
+
+ var onStreamStop = function() {
+ var filterer = filterers.get(this);
+ filterers.delete(this);
+ if ( filterer === undefined || filterer.buffer === null ) {
+ this.close();
+ return;
+ }
+ if ( this.status !== 'finishedtransferringdata' ) { return; }
+
+ if ( domParser === undefined ) {
+ domParser = new DOMParser();
+ xmlSerializer = new XMLSerializer();
+ }
+ if ( textEncoder === undefined ) {
+ textEncoder = new TextEncoder();
+ }
+
+ // In case of unknown charset, assume utf-8.
+ if ( filterer.charset !== textDecoderCharset ) {
+ textDecoder = undefined;
+ }
+ if ( textDecoder === undefined ) {
+ try {
+ textDecoder = new TextDecoder(filterer.charset);
+ textDecoderCharset = filterer.charset;
+ } catch(ex) {
+ textDecoder = new TextDecoder();
+ textDecoderCharset = undefined;
+ }
+ }
+
+ var doc = domParser.parseFromString(
+ textDecoder.decode(filterer.buffer),
+ 'text/html'
+ );
+
+ var modified = false;
+ if ( filterer.selectors !== undefined ) {
+ if ( µb.htmlFilteringEngine.apply(doc, filterer) ) {
+ modified = true;
+ }
+ }
+ if ( filterer.scriptlets !== undefined ) {
+ if ( µb.scriptletFilteringEngine.apply(doc, filterer) ) {
+ modified = true;
+ }
+ }
+
+ if ( modified === false ) {
+ streamClose(filterer);
+ return;
+ }
+
+ // If the charset of the document was not utf-8, we need to change it
+ // to utf-8.
+ if ( textDecoderCharset !== undefined ) {
+ var meta = doc.createElement('meta');
+ meta.setAttribute('charset', 'utf-8');
+ doc.head.insertBefore(meta, doc.head.firstChild);
+ }
+
+ // https://stackoverflow.com/questions/6088972/get-doctype-of-an-html-as-string-with-javascript/10162353#10162353
+ var doctypeStr = doc.doctype instanceof Object ?
+ xmlSerializer.serializeToString(doc.doctype) + '\n' :
+ '';
+
+ streamClose(
+ filterer,
+ textEncoder.encode(doctypeStr + doc.documentElement.outerHTML)
+ );
+ };
+
+ var onStreamError = function() {
+ filterers.delete(this);
+ };
+
+ return function(details) {
+ var hostname = µb.URI.hostnameFromURI(details.url);
+ if ( hostname === '' ) { return; }
+
+ var domain = µb.URI.domainFromHostname(hostname);
+
+ var request = {
+ stream: undefined,
+ tabId: details.tabId,
+ url: details.url,
+ hostname: hostname,
+ domain: domain,
+ entity: µb.URI.entityFromDomain(domain),
+ selectors: undefined,
+ scriptlets: undefined,
+ buffer: null,
+ charset: undefined
+ };
+ request.selectors = µb.htmlFilteringEngine.retrieve(request);
+ request.scriptlets = µb.scriptletFilteringEngine.retrieve(request);
+
+ if (
+ request.selectors === undefined &&
+ request.scriptlets === undefined
+ ) {
+ return;
+ }
+
+ var headers = details.responseHeaders,
+ contentType = headerValueFromName('content-type', headers);
+ if ( contentType !== '' ) {
+ if ( reContentTypeDocument.test(contentType) === false ) { return; }
+ var match = reContentTypeCharset.exec(contentType);
+ if ( match !== null ) {
+ var charset = match[1].toLowerCase();
+ if ( charset !== 'utf-8' ) {
+ request.charset = charset;
+ }
+ }
+ }
+ // https://bugzilla.mozilla.org/show_bug.cgi?id=1426789
+ if ( headerValueFromName('content-disposition', headers) ) { return; }
+
+ var stream = request.stream =
+ vAPI.net.webRequest.filterResponseData(details.requestId);
+ stream.ondata = onStreamData;
+ stream.onstop = onStreamStop;
+ stream.onerror = onStreamError;
+ filterers.set(stream, request);
+ };
+})();
+
+var reContentTypeDocument = /^(?:text\/html|application\/xhtml+xml)/i;
+var reContentTypeCharset = /charset=['"]?([^'" ]+)/i;
+
/******************************************************************************/
var injectCSP = function(pageStore, details) {
diff --git a/src/js/uritools.js b/src/js/uritools.js
index eaca7731d51d7..a988af4e0053d 100644
--- a/src/js/uritools.js
+++ b/src/js/uritools.js
@@ -1,7 +1,7 @@
/*******************************************************************************
uBlock Origin - a browser extension to block requests.
- Copyright (C) 2014-2016 Raymond Hill
+ Copyright (C) 2014-2017 Raymond Hill
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -309,6 +309,13 @@ var psl = publicSuffixList;
/******************************************************************************/
+URI.entityFromDomain = function(domain) {
+ var pos = domain.indexOf('.');
+ return pos !== -1 ? domain.slice(0, pos) + '.*' : '';
+};
+
+/******************************************************************************/
+
URI.pathFromURI = function(uri) {
var matches = rePathFromURI.exec(uri);
return matches !== null ? matches[1] : '';
diff --git a/src/js/utils.js b/src/js/utils.js
index 709937524fe6c..918a49b1b4f9e 100644
--- a/src/js/utils.js
+++ b/src/js/utils.js
@@ -225,7 +225,9 @@
/******************************************************************************/
µBlock.CompiledLineWriter = function() {
- this.output = [];
+ this.blockId = undefined;
+ this.block = undefined;
+ this.blocks = new Map();
this.stringifier = JSON.stringify;
};
@@ -235,46 +237,81 @@
µBlock.CompiledLineWriter.prototype = {
push: function(args) {
- this.output[this.output.length] = this.stringifier(args);
+ this.block[this.block.length] = this.stringifier(args);
+ },
+ select: function(blockId) {
+ if ( blockId === this.blockId ) { return; }
+ this.blockId = blockId;
+ this.block = this.blocks.get(blockId);
+ if ( this.block === undefined ) {
+ this.blocks.set(blockId, (this.block = []));
+ }
},
toString: function() {
- return this.output.join('\n');
+ var result = [];
+ for ( var entry of this.blocks ) {
+ if ( entry[1].length === 0 ) { continue; }
+ result.push(
+ '#block-start-' + entry[0],
+ entry[1].join('\n'),
+ '#block-end-' + entry[0]
+ );
+ }
+ return result.join('\n');
}
};
-µBlock.CompiledLineReader = function(raw) {
- this.reset(raw);
+/******************************************************************************/
+
+µBlock.CompiledLineReader = function(raw, blockId) {
+ this.block = '';
+ this.len = 0;
+ this.offset = 0;
+ this.line = '';
this.parser = JSON.parse;
+ this.blocks = new Map();
+ var reBlockStart = /^#block-start-(\d+)\n/gm,
+ match = reBlockStart.exec(raw),
+ beg, end;
+ while ( match !== null ) {
+ beg = match.index + match[0].length;
+ end = raw.indexOf('#block-end-' + match[1], beg);
+ this.blocks.set(parseInt(match[1], 10), raw.slice(beg, end));
+ reBlockStart.lastIndex = end;
+ match = reBlockStart.exec(raw);
+ }
+ if ( blockId !== undefined ) {
+ this.select(blockId);
+ }
};
µBlock.CompiledLineReader.prototype = {
- reset: function(raw) {
- this.input = raw;
- this.len = raw.length;
- this.offset = 0;
- this.s = '';
- return this;
- },
next: function() {
if ( this.offset === this.len ) {
- this.s = '';
+ this.line = '';
return false;
}
- var pos = this.input.indexOf('\n', this.offset);
+ var pos = this.block.indexOf('\n', this.offset);
if ( pos !== -1 ) {
- this.s = this.input.slice(this.offset, pos);
+ this.line = this.block.slice(this.offset, pos);
this.offset = pos + 1;
} else {
- this.s = this.input.slice(this.offset);
+ this.line = this.block.slice(this.offset);
this.offset = this.len;
}
return true;
},
+ select: function(blockId) {
+ this.block = this.blocks.get(blockId) || '';
+ this.len = this.block.length;
+ this.offset = 0;
+ return this;
+ },
fingerprint: function() {
- return this.s;
+ return this.line;
},
args: function() {
- return this.parser(this.s);
+ return this.parser(this.line);
}
};