Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Metrics-based bad token detector #3172

Merged
merged 39 commits into from
Dec 23, 2024
Merged
Show file tree
Hide file tree
Changes from 31 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
c745cb2
Reduce dependencies of trace call detector for use in driver
MartinquaXD Dec 5, 2024
4353d3a
Allow executing pre-interactions before hand
MartinquaXD Dec 5, 2024
d506d2e
TBC
MartinquaXD Dec 6, 2024
5e5241a
TBC
MartinquaXD Dec 10, 2024
d445982
wip
m-lord-renkse Dec 12, 2024
8ff6825
fix
m-lord-renkse Dec 12, 2024
1cca6b0
Merge remote-tracking branch 'origin/main' into kill-bad-tokens-1
MartinquaXD Dec 16, 2024
9dce0be
Refactor filtering logic to avoid allocations
MartinquaXD Dec 16, 2024
b1b174d
Remove filter helper function
MartinquaXD Dec 16, 2024
144521e
reference-count `Cache` internally for simpler API
MartinquaXD Dec 16, 2024
41c41ed
Make some functions private
MartinquaXD Dec 16, 2024
bda4d1c
Simplify config logic
MartinquaXD Dec 16, 2024
1600736
fixup comment
MartinquaXD Dec 16, 2024
90877b9
Merge remote-tracking branch 'origin/main' into kill-bad-tokens-1
MartinquaXD Dec 17, 2024
5c560dc
Rename and simplify
MartinquaXD Dec 17, 2024
7637891
Split logic into separate files
MartinquaXD Dec 17, 2024
434181e
some cleanup
MartinquaXD Dec 17, 2024
3fc408b
Add request sharing to bad token detection
MartinquaXD Dec 17, 2024
1e2ac01
fixup
MartinquaXD Dec 17, 2024
f3650a1
enable driver bad token detection in e2e tests
MartinquaXD Dec 17, 2024
a825099
Reduce diff
MartinquaXD Dec 17, 2024
b4d907b
fixup
MartinquaXD Dec 17, 2024
4cec1a2
fixup
MartinquaXD Dec 17, 2024
f74a8bb
Merge branch 'main' into kill-bad-tokens-1
MartinquaXD Dec 17, 2024
9787753
Merge branch 'main' into kill-bad-tokens-1
MartinquaXD Dec 17, 2024
f1e32d7
Fix cache eviction logic
MartinquaXD Dec 18, 2024
3f29b2e
Replace `.with_config()` with `new()`
MartinquaXD Dec 18, 2024
fb5794c
Implement metrics-based bad token detection strategy
squadgazzz Dec 18, 2024
e8f6962
Docs
squadgazzz Dec 18, 2024
cd7fa0f
More docs
squadgazzz Dec 18, 2024
838ae16
Stop incrementing counter once threshold is reached
squadgazzz Dec 18, 2024
8e6bc46
Reworked logic
squadgazzz Dec 19, 2024
95536d4
Remove hashset
squadgazzz Dec 19, 2024
ac3ad22
Typo
squadgazzz Dec 19, 2024
ec35222
Review comments
squadgazzz Dec 20, 2024
4b2de81
Shared detector
squadgazzz Dec 20, 2024
8134837
Naming
squadgazzz Dec 20, 2024
444e92e
Typo
squadgazzz Dec 20, 2024
d740867
Merge branch 'main' into bad-token/metrics
squadgazzz Dec 20, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ bigdecimal = "0.3"
cached = { version = "0.49.3", default-features = false }
chrono = { version = "0.4.38", default-features = false }
clap = { version = "4.5.6", features = ["derive", "env"] }
dashmap = "6.1.0"
derivative = "2.2.0"
derive_more = { version = "1.0.0", features = ["full"] }
ethcontract = { version = "0.25.7", default-features = false, features = ["aws-kms"] }
Expand Down
8 changes: 4 additions & 4 deletions crates/autopilot/src/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -274,16 +274,16 @@ pub async fn run(args: Arguments) {

let trace_call_detector = args.tracing_node_url.as_ref().map(|tracing_node_url| {
CachingDetector::new(
Box::new(TraceCallDetector {
web3: shared::ethrpc::web3(
Box::new(TraceCallDetector::new(
shared::ethrpc::web3(
&args.shared.ethrpc,
&http_factory,
tracing_node_url,
"trace",
),
eth.contracts().settlement().address(),
finder,
settlement_contract: eth.contracts().settlement().address(),
}),
)),
args.shared.token_quality_cache_expiry,
args.shared.token_quality_cache_prefetch_time,
)
Expand Down
1 change: 1 addition & 0 deletions crates/driver/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ axum = { workspace = true }
bigdecimal = { workspace = true }
chrono = { workspace = true, features = ["clock"], default-features = false }
cow-amm = { path = "../cow-amm" }
dashmap = { workspace = true }
derive_more = { workspace = true }
ethabi = "18.0"
ethereum-types = { workspace = true }
Expand Down
2 changes: 1 addition & 1 deletion crates/driver/src/domain/competition/auction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ pub struct Auction {
/// See the [`Self::id`] method.
id: Option<Id>,
/// See the [`Self::orders`] method.
orders: Vec<competition::Order>,
pub(crate) orders: Vec<competition::Order>,
/// The tokens that are used in the orders of this auction.
tokens: Tokens,
gas_price: eth::GasPrice,
Expand Down
87 changes: 87 additions & 0 deletions crates/driver/src/domain/competition/bad_tokens/cache.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
use {
crate::domain::{competition::bad_tokens::Quality, eth},
dashmap::{DashMap, Entry},
std::{
sync::Arc,
time::{Duration, Instant},
},
};

/// Cache keeping track of whether or not a token is considered supported or
/// not. Internally reference counted for cheap clones and easy sharing.
/// Stores a map instead of a set to not recompute the quality of good tokens
/// over and over.
/// Evicts cached value after a configurable period of time.
#[derive(Clone)]
pub struct Cache(Arc<Inner>);

struct Inner {
cache: DashMap<eth::TokenAddress, CacheEntry>,
/// entries older than this get ignored and evicted
max_age: Duration,
}

struct CacheEntry {
/// when the decision on the token quality was made
timestamp: Instant,
/// whether the token is supported or not
quality: Quality,
}

impl Cache {
/// Creates a new instance which evicts cached values after a period of
/// time.
pub fn new(max_age: Duration) -> Self {
Self(Arc::new(Inner {
max_age,
cache: DashMap::default(),
}))
}

/// Updates whether or not a token should be considered supported.
pub fn update_quality(&self, token: eth::TokenAddress, quality: Quality, now: Instant) {
match self.0.cache.entry(token) {
Entry::Occupied(mut o) => {
let value = o.get_mut();
if now.duration_since(value.timestamp) > self.0.max_age
|| quality == Quality::Unsupported
{
// Only update the value if the cached value is outdated by now or
// if the new value is "Unsupported". This means on conflicting updates
// we err on the conservative side and assume a token is unsupported.
value.quality = quality;
}
value.timestamp = now;
}
Entry::Vacant(v) => {
v.insert(CacheEntry {
quality,
timestamp: now,
});
}
}
}

pub fn evict_outdated_entries(&self) {
let now = Instant::now();
self.0
.cache
.retain(|_, value| now.duration_since(value.timestamp) < self.0.max_age);
}

/// Returns the quality of the token. If the cached value is older than the
/// `max_age` it gets ignored and the token evicted.
pub fn get_quality(&self, token: eth::TokenAddress, now: Instant) -> Option<Quality> {
let Entry::Occupied(entry) = self.0.cache.entry(token) else {
return None;
};

let value = entry.get();
if now.duration_since(value.timestamp) > self.0.max_age {
entry.remove();
return None;
}

Some(value.quality)
}
}
52 changes: 52 additions & 0 deletions crates/driver/src/domain/competition/bad_tokens/metrics.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
use {
super::Quality,
crate::domain::eth,
dashmap::DashMap,
std::{collections::HashSet, sync::Arc},
};

/// Monitors tokens to determine whether they are considered "unsupported" based
/// on the number of consecutive participation in failing settlement encoding.
/// Tokens that consistently participate in failures beyond a predefined
/// threshold are marked as unsupported. Once token participates in a successful
/// settlement encoding, it is removed from the cache.
#[derive(Default)]
pub struct Detector(Arc<Inner>);

#[derive(Default)]
struct Inner {
squadgazzz marked this conversation as resolved.
Show resolved Hide resolved
counter: DashMap<eth::TokenAddress, u8>,
}

impl Detector {
/// Defines the threshold for the number of consecutive unsupported
/// quality detections before a token is considered unsupported.
const UNSUPPORTED_THRESHOLD: u8 = 5;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we need to be less blunt with this heuristic. Let's say there is only 1 market order USDC <> SHITCOIN then the solver will try to solve that over and over and this detector will end up marking USDC as unsupported too.
A better approach might be to count how often a token could be encoded vs. how often it couldn't. That way the heuristic shouldn't flag USDC as unsupported because it will be involved in a ton of trades with "normal" tokens which increase its "goodness" ratio.
I believe the simplest implementation of this strategy needs just 2 variables:

  • number of measurements you require at least per token
  • ratio of failing encodings a token must have

E.g. at least 20 measurements and a at least 90% of good measurements.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

True, thanks.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

at least 90% of good measurements

Or 90% of failures?


pub fn get_quality(&self, token: eth::TokenAddress) -> Option<Quality> {
squadgazzz marked this conversation as resolved.
Show resolved Hide resolved
self.0
.counter
.get(&token)
.filter(|counter| **counter >= Self::UNSUPPORTED_THRESHOLD)
.map(|_| Quality::Unsupported)
}

/// Increments the counter of failures for each token.
pub fn update_failing_tokens(&self, tokens: HashSet<eth::TokenAddress>) {
squadgazzz marked this conversation as resolved.
Show resolved Hide resolved
for token in tokens {
self.0
.counter
.entry(token)
.and_modify(|counter| *counter = Self::UNSUPPORTED_THRESHOLD.min(*counter + 1))
.or_insert(1);
}
}

/// Removes tokens from the cache since they all participated in a
/// successful settlement encoding.
pub fn update_successful_tokens(&self, tokens: HashSet<eth::TokenAddress>) {
for token in tokens {
self.0.counter.remove(&token);
}
}
}
141 changes: 141 additions & 0 deletions crates/driver/src/domain/competition/bad_tokens/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
use {
crate::domain::{competition::Auction, eth},
futures::StreamExt,
std::{
collections::{HashMap, HashSet},
fmt,
time::Instant,
},
};

pub mod cache;
pub mod metrics;
pub mod simulation;

#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum Quality {
/// Solver is likely to produce working solutions when computing
/// routes for this token.
Supported,
/// Solver will likely produce failing solutions when computing
/// routes for this token. This can have many reasons:
/// * fees on transfer
/// * token enforces max transfer amount
/// * trader is deny listed
/// * bugs in the solidity compiler make it incompatible with the settlement
/// contract - see <https://github.com/cowprotocol/services/pull/781>
/// * probably tons of other reasons
Unsupported,
}

#[derive(Default)]
pub struct Detector {
/// manually configured list of supported and unsupported tokens. Only
/// tokens that get detected incorrectly by the automatic detectors get
/// listed here and therefore have a higher precedence.
hardcoded: HashMap<eth::TokenAddress, Quality>,
simulation_detector: Option<simulation::Detector>,
metrics: Option<metrics::Detector>,
}

impl Detector {
/// Hardcodes tokens as (un)supported based on the provided config. This has
/// the highest priority when looking up a token's quality.
pub fn new(config: HashMap<eth::TokenAddress, Quality>) -> Self {
Self {
hardcoded: config,
..Default::default()
}
}

/// Enables detection of unsupported tokens via simulation based detection
/// methods.
pub fn with_simulation_detector(&mut self, detector: simulation::Detector) -> &mut Self {
self.simulation_detector = Some(detector);
self
}

/// Enables detection of unsupported tokens based on heuristics.
pub fn with_heuristic_detector(&mut self) -> &mut Self {
self.metrics = Some(metrics::Detector::default());
self
}

/// Removes all unsupported orders from the auction.
pub async fn filter_unsupported_orders_in_auction(&self, mut auction: Auction) -> Auction {
let now = Instant::now();

let filtered_orders = futures::stream::iter(auction.orders.into_iter())
.filter_map(move |order| async move {
let sell = self.get_token_quality(order.sell.token, now);
let buy = self.get_token_quality(order.sell.token, now);
match (sell, buy) {
// both tokens supported => keep order
(Some(Quality::Supported), Some(Quality::Supported)) => Some(order),
// at least 1 token unsupported => drop order
(Some(Quality::Unsupported), _) | (_, Some(Quality::Unsupported)) => None,
// sell token quality is unknown => keep order if token is supported
(None, _) => {
let Some(detector) = &self.simulation_detector else {
// we can't determine quality => assume order is good
return Some(order);
};
let quality = detector.determine_sell_token_quality(&order, now).await;
(quality == Some(Quality::Supported)).then_some(order)
}
// buy token quality is unknown => keep order (because we can't
// determine quality and assume it's good)
(_, None) => Some(order),
}
})
.collect::<Vec<_>>()
.await;
auction.orders = filtered_orders;

if let Some(detector) = &self.simulation_detector {
detector.evict_outdated_entries();
}

auction
}

/// Updates the tokens quality metric for successful operation.
pub fn encoding_succeeded(&self, tokens: HashSet<eth::TokenAddress>) {
if let Some(metrics) = &self.metrics {
metrics.update_successful_tokens(tokens);
}
}

/// Updates the tokens quality metric for failures.
pub fn encoding_failed(&self, tokens: HashSet<eth::TokenAddress>) {
if let Some(metrics) = &self.metrics {
metrics.update_failing_tokens(tokens);
}
}

fn get_token_quality(&self, token: eth::TokenAddress, now: Instant) -> Option<Quality> {
if let Some(quality) = self.hardcoded.get(&token) {
return Some(*quality);
}

if let Some(detector) = &self.simulation_detector {
if let Some(quality) = detector.get_quality(token, now) {
return Some(quality);
}
}

if let Some(metrics) = &self.metrics {
return metrics.get_quality(token);
}

None
}
}

impl fmt::Debug for Detector {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("Detector")
.field("hardcoded", &self.hardcoded)
.finish()
}
}
Loading
Loading