Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Respect Expires field for custom adblock subscriptions #18836

Merged
merged 4 commits into from
Jun 15, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions browser/brave_shields/ad_block_service_browsertest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -792,6 +792,7 @@ IN_PROC_BROWSER_TEST_F(AdBlockServiceTest, SubscribeToCustomSubscription) {
ASSERT_EQ(subscriptions[0].last_successful_update_attempt, base::Time());
ASSERT_EQ(subscriptions[0].enabled, true);
ASSERT_EQ(subscriptions[0].homepage, absl::nullopt);
ASSERT_EQ(subscriptions[0].expires, 7 * 24);
ASSERT_EQ(subscriptions[0].title, absl::nullopt);
}

Expand Down Expand Up @@ -840,6 +841,7 @@ IN_PROC_BROWSER_TEST_F(AdBlockServiceTest, SubscribeToCustomSubscription) {
subscriptions[0].last_update_attempt);
ASSERT_EQ(subscriptions[0].enabled, false);
ASSERT_EQ(subscriptions[0].homepage, "https://example.com/list.txt");
ASSERT_EQ(subscriptions[0].expires, 3 * 24);
ASSERT_EQ(subscriptions[0].title, "Test list");
}

Expand Down
69 changes: 43 additions & 26 deletions components/adblock_rust_ffi/src/lib.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
*/
typedef struct C_Engine C_Engine;

typedef struct C_EngineDebugInfo C_EngineDebugInfo;

/**
* Includes information about any "special comments" as described by
* https://help.eyeo.com/adblockplus/how-to-write-filters#special-comments
Expand Down Expand Up @@ -56,15 +58,15 @@ struct C_Engine* engine_create_from_buffer_with_metadata(
struct C_FilterListMetadata** metadata);

/**
* Create a new `Engine`, interpreting `rules` as a null-terminated C string and
* then parsing as a filter list in ABP syntax.
* Create a new `Engine`, interpreting `rules` as a null-terminated C string
* and then parsing as a filter list in ABP syntax.
*/
struct C_Engine* engine_create(const char* rules);

/**
* Create a new `Engine`, interpreting `rules` as a null-terminated C string and
* then parsing as a filter list in ABP syntax. Also populates metadata from the
* filter list into `metadata`.
* Create a new `Engine`, interpreting `rules` as a null-terminated C string
* and then parsing as a filter list in ABP syntax. Also populates metadata
* from the filter list into `metadata`.
*/
struct C_Engine* engine_create_with_metadata(
const char* rules,
Expand Down Expand Up @@ -156,66 +158,76 @@ bool filter_list_metadata_homepage(const struct C_FilterListMetadata* metadata,
char** homepage);

/**
* Puts a pointer to the title of the `FilterListMetadata` into `title`. Returns
* `true` if a title was returned.
* Puts a pointer to the title of the `FilterListMetadata` into `title`.
* Returns `true` if a title was returned.
*/
bool filter_list_metadata_title(const struct C_FilterListMetadata* metadata,
char** title);

/**
* Destroy a `FilterListMetadata` once you are done with it.
* Returns the amount of time this filter list should be considered valid for,
* in hours. Defaults to 168 (i.e. 7 days) if unspecified by the
* `FilterListMetadata`.
*/
void filter_list_metadata_destroy(struct C_FilterListMetadata* metadata);
uint16_t filter_list_metadata_expires(
const struct C_FilterListMetadata* metadata);

/**
* Destroy a `*c_char` once you are done with it.
*/
void c_char_buffer_destroy(char* s);

/**
* A structure to hold debug information of engine. Matches to rust
* EngineDebugInfo.
* Destroy a `FilterListMetadata` once you are done with it.
*/
typedef struct C_Engine_Debug_Info C_Engine_Debug_Info;
void filter_list_metadata_destroy(struct C_FilterListMetadata* metadata);

/**
* Get EngineDebugInfo from the engine. Should be destoyed later by calling
* engine_debug_info_destroy(..).
*/
C_Engine_Debug_Info* get_engine_debug_info(struct C_Engine* engine);
struct C_EngineDebugInfo* get_engine_debug_info(struct C_Engine* engine);

// Returns the field of EngineDebugInfo structure.
void engine_debug_info_get_attr(struct C_Engine_Debug_Info* debug_info,
/**
* Returns the field of EngineDebugInfo structure.
*/
void engine_debug_info_get_attr(struct C_EngineDebugInfo* debug_info,
size_t* compiled_regex_count,
size_t* regex_data_size);

// Returns the fields of EngineDebugInfo->regex_data[index].
// |regex| stay untouched if it ==None in the original structure.
// |index| must be in range [0, regex_data.len() - 1].
void engine_debug_info_get_regex_entry(struct C_Engine_Debug_Info* debug_info,
/**
* Returns the fields of EngineDebugInfo->regex_data[index].
*
* |regex| stay untouched if it ==None in the original structure.
*
* |index| must be in range [0, regex_data.len() - 1].
*/
void engine_debug_info_get_regex_entry(struct C_EngineDebugInfo* debug_info,
size_t index,
uint64_t* id,
char** regex,
uint64_t* unused_sec,
size_t* usage_count);
uintptr_t* usage_count);

/**
* Destroy a `EngineDebugInfo` once you are done with it.
*/
void engine_debug_info_destroy(struct C_Engine_Debug_Info* debug_info);
void engine_debug_info_destroy(struct C_EngineDebugInfo* debug_info);

void discard_regex(struct C_Engine* engine, uint64_t regex_id);

/**
* Setup discard policy for adblock regexps.
*
* |cleanup_interval_sec| how ofter the engine should check the policy.
*
* |discard_unused_sec| time in sec after unused regex will be discarded. Zero
* means disable discarding completely.
*/
void setup_discard_policy(struct C_Engine* engine,
uint64_t cleanup_interval_sec,
uint64_t discard_unused_sec);

/**
* Destroy a `*c_char` once you are done with it.
*/
void c_char_buffer_destroy(char* s);

/**
* Returns a set of cosmetic filtering resources specific to the given url, in
* JSON format
Expand All @@ -237,6 +249,11 @@ char* engine_hidden_class_id_selectors(struct C_Engine* engine,
size_t exceptions_size);

#if BUILDFLAG(IS_IOS)
/**
* Converts a list in adblock syntax to its corresponding iOS content-blocking
* syntax. `truncated` will be set to indicate whether or not some rules had to
* be removed to avoid iOS's maximum rule count limit.
*/
char* convert_rules_to_content_blocking(const char* rules, bool* truncated);
#endif

Expand Down
31 changes: 31 additions & 0 deletions components/adblock_rust_ffi/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,22 @@ pub unsafe extern "C" fn filter_list_metadata_title(
}
}

/// Returns the amount of time this filter list should be considered valid for,
/// in hours. Defaults to 168 (i.e. 7 days) if unspecified by the
/// `FilterListMetadata`.
#[no_mangle]
pub unsafe extern "C" fn filter_list_metadata_expires(metadata: *const FilterListMetadata) -> u16 {
use adblock::lists::ExpiresInterval;

const DEFAULT_EXPIRES_HOURS: u16 = 7 * 24;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's the same as kSubscriptionDefaultExpiresHours.
Could we avoid duplication of this const somehow?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suppose this could be exported as part of the FFI, I'll give it a try

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

exporting worked without any issues; deduplicated everywhere


match (*metadata).expires.as_ref() {
Some(ExpiresInterval::Days(d)) => 24 * *d as u16,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder do we really need to have the dedicated enum values for days and hours.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That API is intended as an AST-like representation - i.e. "just" parsing without applying any application logic. Could be used to construct a list header as well.

Some(ExpiresInterval::Hours(h)) => *h,
None => DEFAULT_EXPIRES_HOURS,
}
}

/// Destroy a `FilterListMetadata` once you are done with it.
#[no_mangle]
pub unsafe extern "C" fn filter_list_metadata_destroy(metadata: *mut FilterListMetadata) {
Expand All @@ -351,13 +367,16 @@ pub unsafe extern "C" fn filter_list_metadata_destroy(metadata: *mut FilterListM
}
}

/// Get EngineDebugInfo from the engine. Should be destoyed later by calling
/// engine_debug_info_destroy(..).
#[no_mangle]
pub unsafe extern "C" fn get_engine_debug_info(engine: *mut Engine) -> *mut EngineDebugInfo {
assert!(!engine.is_null());
let engine = Box::leak(Box::from_raw(engine));
Box::into_raw(Box::new(engine.get_debug_info()))
}

/// Returns the field of EngineDebugInfo structure.
#[no_mangle]
pub unsafe extern "C" fn engine_debug_info_get_attr(
debug_info: *mut EngineDebugInfo,
Expand All @@ -371,6 +390,11 @@ pub unsafe extern "C" fn engine_debug_info_get_attr(
*regex_data_size = info.blocker_debug_info.regex_data.len();
}

/// Returns the fields of EngineDebugInfo->regex_data[index].
///
/// |regex| stay untouched if it ==None in the original structure.
///
/// |index| must be in range [0, regex_data.len() - 1].
#[no_mangle]
pub unsafe extern "C" fn engine_debug_info_get_regex_entry(
debug_info: *mut EngineDebugInfo,
Expand All @@ -394,6 +418,7 @@ pub unsafe extern "C" fn engine_debug_info_get_regex_entry(
*usage_count = entry.usage_count;
}

/// Destroy a `EngineDebugInfo` once you are done with it.
#[no_mangle]
pub unsafe extern "C" fn engine_debug_info_destroy(debug_info: *mut EngineDebugInfo) {
if !debug_info.is_null() {
Expand All @@ -408,6 +433,12 @@ pub unsafe extern "C" fn discard_regex(engine: *mut Engine, regex_id: u64) {
engine.discard_regex(regex_id);
}

/// Setup discard policy for adblock regexps.
///
/// |cleanup_interval_sec| how ofter the engine should check the policy.
///
/// |discard_unused_sec| time in sec after unused regex will be discarded. Zero
/// means disable discarding completely.
#[no_mangle]
pub unsafe extern "C" fn setup_discard_policy(
engine: *mut Engine,
Expand Down
2 changes: 2 additions & 0 deletions components/adblock_rust_ffi/src/wrapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ FilterListMetadata::FilterListMetadata(C_FilterListMetadata* metadata) {
title = absl::make_optional(std::string(str_buffer));
c_char_buffer_destroy(str_buffer);
}

expires = filter_list_metadata_expires(metadata);
}

FilterListMetadata::FilterListMetadata(const std::string& list)
Expand Down
1 change: 1 addition & 0 deletions components/adblock_rust_ffi/src/wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ typedef ADBLOCK_EXPORT struct FilterListMetadata {

Copy link
Collaborator

@atuchin-m atuchin-m Jun 12, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FilterListMetadata has a default ctor.
What expires should be set in that case?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch - I added a default value in SubscriptionInfo but this should get one too.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added default value

absl::optional<std::string> homepage;
absl::optional<std::string> title;
uint16_t expires;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just wonder, why 2 bytes integer is used here? Why not something like int64 which size matches to a word?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, could we change name to expires_in_hours?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maximum valid value is 14 days i.e. 336 hours, so it will definitely always fit in a uint16_t.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The names here correspond directly to the in-list representation, but I could add a doc comment at least

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added doc comment

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maximum valid value is 14 days i.e. 336 hours, so it will definitely always fit in a uint16_t.
It's definitely fit, but all integers with size less than a machine word are slower that (u)int64.

So using uint16 instead uint64 is about saving a few bytes against perf loss.
Smaller integer make sense for large vectors, serialization, etc.


FilterListMetadata(FilterListMetadata&&);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,22 @@ bool ParseOptionalStringField(const base::Value* value,
}
}

const base::TimeDelta kListUpdateInterval = base::Days(7);
bool ParseExpiresWithFallback(const base::Value* value, uint16_t* field) {
if (value == nullptr) {
*field = kSubscriptionDefaultExpiresHours;
return true;
} else if (!value->is_int()) {
return false;
} else {
int64_t i = value->GetInt();
if (i < 0 || i > 14 * 24) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lets make another const or use 2 * kSubscriptionDefaultExpiresHours

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll make another const here; it's just a "maximum valid" value which doesn't necessarily have anything to do with the default

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added kSubscriptionMaxExpiresHours

return false;
}
*field = (uint16_t)i;
return true;
}
}

const base::TimeDelta kListRetryInterval = base::Hours(1);
const base::TimeDelta kListCheckInitialDelay = base::Minutes(1);

Expand Down Expand Up @@ -101,6 +116,8 @@ void SubscriptionInfo::RegisterJSONConverter(
"homepage", &SubscriptionInfo::homepage, &ParseOptionalStringField);
converter->RegisterCustomValueField<absl::optional<std::string>>(
"title", &SubscriptionInfo::title, &ParseOptionalStringField);
converter->RegisterCustomValueField<uint16_t>(
"expires", &SubscriptionInfo::expires, &ParseExpiresWithFallback);
}

AdBlockSubscriptionServiceManager::AdBlockSubscriptionServiceManager(
Expand Down Expand Up @@ -172,7 +189,8 @@ void AdBlockSubscriptionServiceManager::OnUpdateTimer(
info = BuildInfoFromDict(sub_url, *list_subscription_dict);

base::TimeDelta until_next_refresh =
kListUpdateInterval - (base::Time::Now() - info.last_update_attempt);
base::Hours(info.expires) -
(base::Time::Now() - info.last_update_attempt);

if (info.enabled &&
((info.last_update_attempt != info.last_successful_update_attempt) ||
Expand Down Expand Up @@ -357,6 +375,8 @@ void AdBlockSubscriptionServiceManager::OnListMetadata(
info->homepage = absl::nullopt;
}

info->expires = metadata.expires;

UpdateSubscriptionPrefs(sub_url, *info);

NotifyObserversOfServiceEvent();
Expand Down Expand Up @@ -447,6 +467,7 @@ void AdBlockSubscriptionServiceManager::UpdateSubscriptionPrefs(
if (info.title) {
subscription_dict.Set("title", *info.title);
}
subscription_dict.Set("expires", info.expires);
subscriptions.Set(sub_url.spec(), std::move(subscription_dict));

// TODO(bridiver) - change to pref registrar
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ class AdBlockServiceTest;

namespace brave_shields {

const uint16_t kSubscriptionDefaultExpiresHours = 7 * 24;

struct SubscriptionInfo {
SubscriptionInfo();
~SubscriptionInfo();
Expand All @@ -64,6 +66,7 @@ struct SubscriptionInfo {

absl::optional<std::string> homepage;
absl::optional<std::string> title;
uint16_t expires = kSubscriptionDefaultExpiresHours;

static void RegisterJSONConverter(
base::JSONValueConverter<SubscriptionInfo>*);
Expand Down
1 change: 1 addition & 0 deletions test/data/list.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
! Title: Test list
! Homepage: https://example.com/list.txt
! Expires: 3 days
||b.com^*logo.png^