Skip to content

Commit

Permalink
Metrics-based bad token detector (#3172)
Browse files Browse the repository at this point in the history
# Description
Follow-up to #3156. This PR introduces an in-memory, ratio-based bad
token detection strategy that complements the existing heuristics.
Instead of relying on consecutive failures, it keeps track of both
successful and failed settlement attempts for each token. The logic is
as follows:

1. When a settlement encoding fails, every token involved in that
attempt has its statistics updated: both total attempts and failed
attempts are incremented.
2. Otherwise, every token involved has its total attempts incremented
but not its failures.
3. Before marking a token as unsupported, the detector requires at least
20 recorded attempts. Once this threshold is met, if the token's failure
ratio (`fails / attempts`) is at least 90%, it is considered
unsupported.

This approach is more resilient than just counting consecutive failures.
A highly utilized and generally reliable token that occasionally appears
in failing trades with problematic tokens won't be prematurely flagged
as unsupported because its overall success ratio remains high.

Due to the nature of the implementation, all the statistics get
discarded on every restart. Implementing a persistence layer might make
sense in the future, but problems with bad tokens are usually temporal.

## How to test
A forked e2e test for the simulation-based detector is required first,
and it is expected to be implemented in a separate PR.

---------

Co-authored-by: MartinquaXD <[email protected]>
Co-authored-by: Mateo <[email protected]>
  • Loading branch information
3 people authored Dec 23, 2024
1 parent 1451574 commit 7e52015
Show file tree
Hide file tree
Showing 8 changed files with 115 additions and 12 deletions.
61 changes: 56 additions & 5 deletions crates/driver/src/domain/competition/bad_tokens/metrics.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,62 @@
use {super::Quality, crate::domain::eth};
use {super::Quality, crate::domain::eth, dashmap::DashMap, std::sync::Arc};

/// Monitors tokens to determine whether they are considered "unsupported" based
/// on the ratio of failing to total settlement encoding attempts. A token must
/// have participated in at least `REQUIRED_MEASUREMENTS` attempts to be
/// evaluated. If, at that point, the ratio of failures is greater than or equal
/// to `FAILURE_RATIO`, the token is considered unsupported.
#[derive(Default, Clone)]
pub struct Detector(Arc<Inner>);

#[derive(Default)]
pub struct Detector;
struct Inner {
counter: DashMap<eth::TokenAddress, TokenStatistics>,
}

#[derive(Default)]
struct TokenStatistics {
attempts: u32,
fails: u32,
}

impl Detector {
pub fn get_quality(&self, _token: eth::TokenAddress) -> Option<Quality> {
// TODO implement a reasonable heuristic
None
/// The ratio of failures to attempts that qualifies a token as unsupported.
const FAILURE_RATIO: f64 = 0.9;
/// The minimum number of attempts required before evaluating a token’s
/// quality.
const REQUIRED_MEASUREMENTS: u32 = 20;

pub fn get_quality(&self, token: &eth::TokenAddress) -> Option<Quality> {
let measurements = self.0.counter.get(token)?;
let is_unsupported = measurements.attempts >= Self::REQUIRED_MEASUREMENTS
&& (measurements.fails as f64 / measurements.attempts as f64) >= Self::FAILURE_RATIO;

is_unsupported.then_some(Quality::Unsupported)
}

/// Updates the tokens that participated in settlements by
/// incrementing their attempt count.
/// `failure` indicates whether the settlement was successful or not.
pub fn update_tokens(
&self,
token_pairs: &[(eth::TokenAddress, eth::TokenAddress)],
failure: bool,
) {
token_pairs
.iter()
.flat_map(|(token_a, token_b)| [token_a, token_b])
.for_each(|token| {
self.0
.counter
.entry(*token)
.and_modify(|counter| {
counter.attempts += 1;
counter.fails += u32::from(failure)
})
.or_insert_with(|| TokenStatistics {
attempts: 1,
fails: u32::from(failure),
});
});
}
}
20 changes: 17 additions & 3 deletions crates/driver/src/domain/competition/bad_tokens/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ impl Detector {
}

/// Enables detection of unsupported tokens based on heuristics.
pub fn with_heuristic_detector(&mut self) -> &mut Self {
self.metrics = Some(metrics::Detector);
pub fn with_metrics_detector(&mut self, detector: metrics::Detector) -> &mut Self {
self.metrics = Some(detector);
self
}

Expand Down Expand Up @@ -106,6 +106,20 @@ impl Detector {
auction
}

/// Updates the tokens quality metric for successful operation.
pub fn encoding_succeeded(&self, token_pairs: &[(eth::TokenAddress, eth::TokenAddress)]) {
if let Some(metrics) = &self.metrics {
metrics.update_tokens(token_pairs, false);
}
}

/// Updates the tokens quality metric for failures.
pub fn encoding_failed(&self, token_pairs: &[(eth::TokenAddress, eth::TokenAddress)]) {
if let Some(metrics) = &self.metrics {
metrics.update_tokens(token_pairs, true);
}
}

fn get_token_quality(&self, token: eth::TokenAddress, now: Instant) -> Option<Quality> {
if let Some(quality) = self.hardcoded.get(&token) {
return Some(*quality);
Expand All @@ -118,7 +132,7 @@ impl Detector {
}

if let Some(metrics) = &self.metrics {
return metrics.get_quality(token);
return metrics.get_quality(&token);
}

None
Expand Down
12 changes: 8 additions & 4 deletions crates/driver/src/domain/competition/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ impl Competition {
.into_iter()
.map(|solution| async move {
let id = solution.id().clone();
let token_pairs = solution.token_pairs();
observe::encoding(&id);
let settlement = solution
.encode(
Expand All @@ -131,16 +132,19 @@ impl Competition {
self.solver.solver_native_token(),
)
.await;
(id, settlement)
(id, token_pairs, settlement)
})
.collect::<FuturesUnordered<_>>()
.filter_map(|(id, result)| async move {
.filter_map(|(id, token_pairs, result)| async move {
match result {
Ok(solution) => Some(solution),
Ok(solution) => {
self.bad_tokens.encoding_succeeded(&token_pairs);
Some(solution)
}
// don't report on errors coming from solution merging
Err(_err) if id.solutions().len() > 1 => None,
Err(err) => {
// TODO update metrics of bad token detection
self.bad_tokens.encoding_failed(&token_pairs);
observe::encoding_failed(self.solver.name(), &id, &err);
notify::encoding_failed(&self.solver, auction.id(), &id, &err);
None
Expand Down
17 changes: 17 additions & 0 deletions crates/driver/src/domain/competition/solution/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,23 @@ impl Solution {
&self.trades
}

/// Returns all the token pairs involved in the solution.
pub fn token_pairs(&self) -> Vec<(TokenAddress, TokenAddress)> {
self.trades
.iter()
.map(|trade| match trade {
Trade::Fulfillment(fulfillment) => {
let order = fulfillment.order();
(order.sell.token, order.buy.token)
}
Trade::Jit(jit) => {
let order = jit.order();
(order.sell.token, order.buy.token)
}
})
.collect()
}

/// Interactions executed by this solution.
pub fn interactions(&self) -> &[Interaction] {
&self.interactions
Expand Down
9 changes: 9 additions & 0 deletions crates/driver/src/infra/api/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ impl Api {
app = routes::metrics(app);
app = routes::healthz(app);

let metrics_bad_token_detector = bad_tokens::metrics::Detector::default();

// Multiplex each solver as part of the API. Multiple solvers are multiplexed
// on the same driver so only one liquidity collector collects the liquidity
// for all of them. This is important because liquidity collection is
Expand All @@ -80,6 +82,13 @@ impl Api {
bad_tokens.with_simulation_detector(self.bad_token_detector.clone());
}

if solver
.bad_token_detection()
.enable_metrics_based_bad_token_detection
{
bad_tokens.with_metrics_detector(metrics_bad_token_detector.clone());
}

let router = router.with_state(State(Arc::new(Inner {
eth: self.eth.clone(),
solver: solver.clone(),
Expand Down
2 changes: 2 additions & 0 deletions crates/driver/src/infra/config/file/load.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ pub async fn load(chain: chain::Id, path: &Path) -> infra::Config {
.collect(),
enable_simulation_based_bad_token_detection: config
.enable_simulation_bad_token_detection,
enable_metrics_based_bad_token_detection: config
.enable_metrics_bad_token_detection,
},
}
}))
Expand Down
5 changes: 5 additions & 0 deletions crates/driver/src/infra/config/file/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,11 @@ struct SolverConfig {
/// tokens with `trace_callMany` based simulation.
#[serde(default)]
enable_simulation_bad_token_detection: bool,

/// Whether or not the solver opted into detecting unsupported
/// tokens with metrics-based detection.
#[serde(default)]
enable_metrics_bad_token_detection: bool,
}

#[derive(Clone, Copy, Debug, Default, Deserialize, PartialEq, Serialize)]
Expand Down
1 change: 1 addition & 0 deletions crates/driver/src/infra/solver/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -307,4 +307,5 @@ pub struct BadTokenDetection {
/// Tokens that are explicitly allow- or deny-listed.
pub tokens_supported: HashMap<eth::TokenAddress, bad_tokens::Quality>,
pub enable_simulation_based_bad_token_detection: bool,
pub enable_metrics_based_bad_token_detection: bool,
}

0 comments on commit 7e52015

Please sign in to comment.