Skip to content

Commit

Permalink
Bug 1845095: Bloom filter for fast-rejecting :has(). r=emilio
Browse files Browse the repository at this point in the history
  • Loading branch information
dshin-moz committed Jul 27, 2023
1 parent 191027b commit 516b5ac
Show file tree
Hide file tree
Showing 12 changed files with 246 additions and 20 deletions.
5 changes: 4 additions & 1 deletion selectors/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use crate::bloom::BloomFilter;
use crate::nth_index_cache::{NthIndexCache, NthIndexCacheInner};
use crate::parser::{Selector, SelectorImpl};
use crate::relative_selector::cache::RelativeSelectorCache;
use crate::relative_selector::filter::RelativeSelectorFilterMap;
use crate::tree::{Element, OpaqueElement};

/// What kind of selector matching mode we should use.
Expand Down Expand Up @@ -142,13 +143,15 @@ impl RelativeSelectorMatchingState {
}
}

/// Set of caches that speed up expensive selector matches.
/// Set of caches (And cache-likes) that speed up expensive selector matches.
#[derive(Default)]
pub struct SelectorCaches {
/// A cache to speed up nth-index-like selectors.
pub nth_index: NthIndexCache,
/// A cache to speed up relative selector matches. See module documentation.
pub relative_selector: RelativeSelectorCache,
/// A map of bloom filters to fast-reject relative selector matches.
pub relative_selector_filter_map: RelativeSelectorFilterMap,
}

/// Data associated with the matching process for a element. This context is
Expand Down
19 changes: 19 additions & 0 deletions selectors/matching.rs
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,25 @@ fn matches_relative_selectors<E: Element>(
// Did not match, continue on.
continue;
}
// See if we can fast-reject.
if context
.selector_caches
.relative_selector_filter_map
.fast_reject(
element,
relative_selector,
context.quirks_mode(),
)
{
// Alright, add as unmatched to cache.
context.selector_caches.relative_selector.add(
element.opaque(),
relative_selector,
RelativeSelectorCachedMatch::NotMatched,
);
// Then continue on.
continue;
}

let matched = matches_relative_selector(relative_selector, element, context, rightmost);
context.selector_caches.relative_selector.add(
Expand Down
49 changes: 35 additions & 14 deletions selectors/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -203,9 +203,9 @@ macro_rules! with_all_bounds {
pub trait SelectorImpl: Clone + Debug + Sized + 'static {
type ExtraMatchingData<'a>: Sized + Default;
type AttrValue: $($InSelector)*;
type Identifier: $($InSelector)*;
type LocalName: $($InSelector)* + Borrow<Self::BorrowedLocalName>;
type NamespaceUrl: $($CommonBounds)* + Default + Borrow<Self::BorrowedNamespaceUrl>;
type Identifier: $($InSelector)* + PrecomputedHash;
type LocalName: $($InSelector)* + Borrow<Self::BorrowedLocalName> + PrecomputedHash;
type NamespaceUrl: $($CommonBounds)* + Default + Borrow<Self::BorrowedNamespaceUrl> + PrecomputedHash;
type NamespacePrefix: $($InSelector)* + Default;
type BorrowedNamespaceUrl: ?Sized + Eq;
type BorrowedLocalName: ?Sized + Eq;
Expand Down Expand Up @@ -512,18 +512,17 @@ pub struct AncestorHashes {
pub packed_hashes: [u32; 3],
}

fn collect_ancestor_hashes<Impl: SelectorImpl>(
iter: SelectorIter<Impl>,
pub(crate) fn collect_selector_hashes<'a, Impl: SelectorImpl, Iter>(
iter: Iter,
quirks_mode: QuirksMode,
hashes: &mut [u32; 4],
len: &mut usize,
create_inner_iterator: fn(&'a Selector<Impl>) -> Iter,
) -> bool
where
Impl::Identifier: PrecomputedHash,
Impl::LocalName: PrecomputedHash,
Impl::NamespaceUrl: PrecomputedHash,
Iter: Iterator<Item = &'a Component<Impl>>,
{
for component in AncestorIter::new(iter) {
for component in iter {
let hash = match *component {
Component::LocalName(LocalName {
ref name,
Expand Down Expand Up @@ -579,7 +578,13 @@ where
// in the filter if there's more than one selector, as that'd
// exclude elements that may match one of the other selectors.
if list.len() == 1 &&
!collect_ancestor_hashes(list[0].iter(), quirks_mode, hashes, len)
!collect_selector_hashes(
create_inner_iterator(&list[0]),
quirks_mode,
hashes,
len,
create_inner_iterator,
)
{
return false;
}
Expand All @@ -597,12 +602,17 @@ where
true
}

fn collect_ancestor_hashes<Impl: SelectorImpl>(
iter: SelectorIter<Impl>,
quirks_mode: QuirksMode,
hashes: &mut [u32; 4],
len: &mut usize,
) {
collect_selector_hashes(AncestorIter::new(iter), quirks_mode, hashes, len, |s| AncestorIter(s.iter()));
}

impl AncestorHashes {
pub fn new<Impl: SelectorImpl>(selector: &Selector<Impl>, quirks_mode: QuirksMode) -> Self
where
Impl::Identifier: PrecomputedHash,
Impl::LocalName: PrecomputedHash,
Impl::NamespaceUrl: PrecomputedHash,
{
// Compute ancestor hashes for the bloom filter.
let mut hashes = [0u32; 4];
Expand Down Expand Up @@ -830,6 +840,17 @@ impl<Impl: SelectorImpl> Selector<Impl> {
}
}

/// Returns an iterator over this selector in matching order (right-to-left),
/// skipping the leftmost |offset| Components.
#[inline]
pub fn iter_until(&self, offset: usize) -> SelectorIter<Impl> {
let iter = self.0.slice()[..self.len() - offset].iter();
SelectorIter {
iter,
next_combinator: None,
}
}

/// Returns the combinator at index `index` (zero-indexed from the right),
/// or panics if the component is not a combinator.
#[inline]
Expand Down
158 changes: 158 additions & 0 deletions selectors/relative_selector/filter.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */

/// Bloom filter for relative selectors.
use fxhash::FxHashMap;

use crate::bloom::BloomFilter;
use crate::context::QuirksMode;
use crate::parser::{
collect_selector_hashes, RelativeSelector, RelativeSelectorMatchHint,
};
use crate::tree::{Element, OpaqueElement};
use crate::SelectorImpl;

enum Entry {
/// Filter lookup happened once. Construction of the filter is expensive,
/// so this is set when the element for subtree traversal is encountered.
Lookup,
/// Filter lookup happened more than once, and the filter for this element's
/// subtree traversal is constructed. Could use special handlings for pseudo-classes
/// such as `:hover` and `:focus`, see Bug 1845503.
HasFilter(Box<BloomFilter>),
}

#[derive(Clone, Copy, Hash, Eq, PartialEq)]
enum TraversalKind {
Children,
Descendants,
}

fn add_to_filter<E: Element>(element: &E, filter: &mut BloomFilter, kind: TraversalKind) -> bool {
let mut child = element.first_element_child();
while let Some(e) = child {
if !e.add_element_unique_hashes(filter) {
return false;
}
if kind == TraversalKind::Descendants {
if !add_to_filter(&e, filter, kind) {
return false;
}
}
child = e.next_sibling_element();
}
true
}

#[derive(Clone, Copy, Hash, Eq, PartialEq)]
struct Key(OpaqueElement, TraversalKind);

/// Map of bloom filters for fast-rejecting relative selectors.
#[derive(Default)]
pub struct RelativeSelectorFilterMap {
map: FxHashMap<Key, Entry>,
}

fn fast_reject<Impl: SelectorImpl>(
selector: &RelativeSelector<Impl>,
quirks_mode: QuirksMode,
filter: &BloomFilter,
) -> bool {
let mut hashes = [0u32; 4];
let mut len = 0;
// For inner selectors, we only collect from the single rightmost compound.
// This is because inner selectors can cause breakouts: e.g. `.anchor:has(:is(.a .b) .c)`
// can match when `.a` is the ancestor of `.anchor`. Including `.a` would possibly fast
// reject the subtree for not having `.a`, even if the selector would match.
// Technically, if the selector's traversal is non-sibling subtree, we can traverse
// inner selectors up to the point where a descendant/child combinator is encountered
// (e.g. In `.anchor:has(:is(.a ~ .b) .c)`, `.a` is guaranteed to be the a descendant
// of `.anchor`). While that can be separately handled, well, this is simpler.
collect_selector_hashes(
selector.selector.iter(),
quirks_mode,
&mut hashes,
&mut len,
|s| s.iter(),
);
for i in 0..len {
if !filter.might_contain_hash(hashes[i]) {
// Definitely rejected.
return true;
}
}
false
}

impl RelativeSelectorFilterMap {
fn get_filter<E: Element>(&mut self, element: &E, kind: TraversalKind) -> Option<&BloomFilter> {
// Insert flag to indicate that we looked up the filter once, and
// create the filter if and only if that flag is there.
let key = Key(element.opaque(), kind);
let entry = self
.map
.entry(key)
.and_modify(|entry| {
if !matches!(entry, Entry::Lookup) {
return;
}
let mut filter = BloomFilter::new();
// Go through all children/descendants of this element and add their hashes.
if add_to_filter(element, &mut filter, kind) {
*entry = Entry::HasFilter(Box::new(filter));
}
})
.or_insert(Entry::Lookup);
match entry {
Entry::Lookup => None,
Entry::HasFilter(ref filter) => Some(filter.as_ref()),
}
}

/// Potentially reject the given selector for this element.
/// This may seem redundant in presence of the cache, but the cache keys into the
/// selector-element pair specifically, while this filter keys to the element
/// and the traversal kind, so it is useful for handling multiple selectors
/// that effectively end up looking at the same(-ish, for siblings) subtree.
pub fn fast_reject<Impl: SelectorImpl, E: Element>(
&mut self,
element: &E,
selector: &RelativeSelector<Impl>,
quirks_mode: QuirksMode,
) -> bool {
if matches!(selector.match_hint, RelativeSelectorMatchHint::InNextSibling) {
// Don't bother.
return false;
}
let is_sibling = matches!(
selector.match_hint,
RelativeSelectorMatchHint::InSibling |
RelativeSelectorMatchHint::InNextSiblingSubtree |
RelativeSelectorMatchHint::InSiblingSubtree
);
let is_subtree = matches!(
selector.match_hint,
RelativeSelectorMatchHint::InSubtree |
RelativeSelectorMatchHint::InNextSiblingSubtree |
RelativeSelectorMatchHint::InSiblingSubtree
);
let kind = if is_subtree {
TraversalKind::Descendants
} else {
TraversalKind::Children
};
if is_sibling {
// Contain the entirety of the parent's children/subtree in the filter, and use that.
// This is less likely to reject, especially for sibling subtree matches; however, it's less
// expensive memory-wise, compared to storing filters for each sibling.
element.parent_element().map_or(false, |parent| {
self.get_filter(&parent, kind)
.map_or(false, |filter| fast_reject(selector, quirks_mode, filter))
})
} else {
self.get_filter(element, kind)
.map_or(false, |filter| fast_reject(selector, quirks_mode, filter))
}
}
}
1 change: 1 addition & 0 deletions selectors/relative_selector/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */

pub mod cache;
pub mod filter;
5 changes: 5 additions & 0 deletions selectors/tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
//! between layout and style.
use crate::attr::{AttrSelectorOperation, CaseSensitivity, NamespaceConstraint};
use crate::bloom::BloomFilter;
use crate::matching::{ElementSelectorFlags, MatchingContext};
use crate::parser::SelectorImpl;
use std::fmt::Debug;
Expand Down Expand Up @@ -160,4 +161,8 @@ pub trait Element: Sized + Clone + Debug {
fn ignores_nth_child_selectors(&self) -> bool {
false
}

/// Add hashes unique to this element to the given filter, returning true
/// if any got added.
fn add_element_unique_hashes(&self, filter: &mut BloomFilter) -> bool;
}
3 changes: 2 additions & 1 deletion style/bloom.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,8 @@ pub fn is_attr_name_excluded_from_filter(atom: &crate::Atom) -> bool {
*atom == atom!("class") || *atom == atom!("id") || *atom == atom!("style")
}

fn each_relevant_element_hash<E, F>(element: E, mut f: F)
/// Gather all relevant hash for fast-reject filters from an element.
pub fn each_relevant_element_hash<E, F>(element: E, mut f: F)
where
E: TElement,
F: FnMut(u32),
Expand Down
2 changes: 1 addition & 1 deletion style/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -651,7 +651,7 @@ pub struct ThreadLocalStyleContext<E: TElement> {
/// A checker used to ensure that parallel.rs does not recurse indefinitely
/// even on arbitrarily deep trees. See Gecko bug 1376883.
pub stack_limit_checker: StackLimitChecker,
/// Collection of caches for speeding up expensive selector matches.
/// Collection of caches (And cache-likes) for speeding up expensive selector matches.
pub selector_caches: SelectorCaches,
}

Expand Down
8 changes: 6 additions & 2 deletions style/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -312,8 +312,12 @@ impl ElementData {
return InvalidationResult::empty();
}

let mut processor =
StateAndAttrInvalidationProcessor::new(shared_context, element, self, selector_caches);
let mut processor = StateAndAttrInvalidationProcessor::new(
shared_context,
element,
self,
selector_caches,
);

let invalidator = TreeStyleInvalidator::new(element, stack_limit_checker, &mut processor);

Expand Down
7 changes: 7 additions & 0 deletions style/gecko/wrapper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
//! the separation between the style system implementation and everything else.
use crate::applicable_declarations::ApplicableDeclarationBlock;
use crate::bloom::each_relevant_element_hash;
use crate::context::{PostAnimationTasks, QuirksMode, SharedStyleContext, UpdateAnimationsTasks};
use crate::data::ElementData;
use crate::dom::{LayoutIterator, NodeInfo, OpaqueNode, TDocument, TElement, TNode, TShadowRoot};
Expand Down Expand Up @@ -69,6 +70,7 @@ use dom::{DocumentState, ElementState};
use euclid::default::Size2D;
use fxhash::FxHashMap;
use selectors::attr::{AttrSelectorOperation, CaseSensitivity, NamespaceConstraint};
use selectors::bloom::{BloomFilter, BLOOM_HASH_MASK};
use selectors::matching::VisitedHandlingMode;
use selectors::matching::{ElementSelectorFlags, MatchingContext};
use selectors::sink::Push;
Expand Down Expand Up @@ -2090,4 +2092,9 @@ impl<'le> ::selectors::Element for GeckoElement<'le> {
fn ignores_nth_child_selectors(&self) -> bool {
self.is_root_of_native_anonymous_subtree()
}

fn add_element_unique_hashes(&self, filter: &mut BloomFilter) -> bool {
each_relevant_element_hash(*self, |hash| filter.insert_hash(hash & BLOOM_HASH_MASK));
true
}
}
Loading

0 comments on commit 516b5ac

Please sign in to comment.