cowprotocol · squadgazzz · Dec 23, 2024 · Dec 5, 2024 · Dec 5, 2024 · Dec 6, 2024
@@ -1,11 +1,67 @@
-use {super::Quality, crate::domain::eth};
+use {super::Quality, crate::domain::eth, dashmap::DashMap, std::sync::Arc};
 
 #[derive(Default)]
-pub struct Detector;
+struct TokenStatistics {
+    attempts: u32,
+    fails: u32,
+}
+
+#[derive(Default, Clone)]
+pub struct DetectorBuilder(Arc<DashMap<eth::TokenAddress, TokenStatistics>>);
+
+impl DetectorBuilder {
+    pub fn build(self, failure_ratio: f64, required_measurements: u32) -> Detector {
+        Detector {
+            failure_ratio,
+            required_measurements,
+            counter: self.0,
+        }
+    }
+}
+
+/// Monitors tokens to determine whether they are considered "unsupported" based
+/// on the ratio of failing to total settlement encoding attempts. A token must
+/// have participated in at least `REQUIRED_MEASUREMENTS` attempts to be
+/// evaluated. If, at that point, the ratio of failures is greater than or equal
+/// to `FAILURE_RATIO`, the token is considered unsupported.
+#[derive(Clone)]
+pub struct Detector {
+    failure_ratio: f64,
+    required_measurements: u32,
+    counter: Arc<DashMap<eth::TokenAddress, TokenStatistics>>,
+}
 
 impl Detector {
-    pub fn get_quality(&self, _token: eth::TokenAddress) -> Option<Quality> {
-        // TODO implement a reasonable heuristic
-        None
+    pub fn get_quality(&self, token: &eth::TokenAddress) -> Option<Quality> {
+        let measurements = self.counter.get(token)?;
+        let is_unsupported = measurements.attempts >= self.required_measurements
+            && (measurements.fails as f64 / measurements.attempts as f64) >= self.failure_ratio;
+
+        is_unsupported.then_some(Quality::Unsupported)
+    }
+
+    /// Updates the tokens that participated in settlements by
+    /// incrementing their attempt count.
+    /// `failure` indicates whether the settlement was successful or not.
+    pub fn update_tokens(
+        &self,
+        token_pairs: &[(eth::TokenAddress, eth::TokenAddress)],
+        failure: bool,
+    ) {
+        token_pairs
+            .iter()
+            .flat_map(|(token_a, token_b)| [token_a, token_b])
+            .for_each(|token| {
+                self.counter
+                    .entry(*token)
+                    .and_modify(|counter| {
+                        counter.attempts += 1;
+                        counter.fails += u32::from(failure)
+                    })
+                    .or_insert_with(|| TokenStatistics {
+                        attempts: 1,
+                        fails: u32::from(failure),
+                    });
+            });
     }
 }
diff --git a/crates/driver/src/domain/competition/bad_tokens/mod.rs b/crates/driver/src/domain/competition/bad_tokens/mod.rs
@@ -53,8 +53,8 @@ impl Detector {
     }
 
     /// Enables detection of unsupported tokens based on heuristics.
-    pub fn with_heuristic_detector(&mut self) -> &mut Self {
-        self.metrics = Some(metrics::Detector);
+    pub fn with_metrics_detector(&mut self, detector: metrics::Detector) -> &mut Self {
+        self.metrics = Some(detector);
         self
     }
 
@@ -106,6 +106,20 @@ impl Detector {
         auction
     }
 
+    /// Updates the tokens quality metric for successful operation.
+    pub fn encoding_succeeded(&self, token_pairs: &[(eth::TokenAddress, eth::TokenAddress)]) {
+        if let Some(metrics) = &self.metrics {
+            metrics.update_tokens(token_pairs, false);
+        }
+    }
+
+    /// Updates the tokens quality metric for failures.
+    pub fn encoding_failed(&self, token_pairs: &[(eth::TokenAddress, eth::TokenAddress)]) {
+        if let Some(metrics) = &self.metrics {
+            metrics.update_tokens(token_pairs, true);
+        }
+    }
+
     fn get_token_quality(&self, token: eth::TokenAddress, now: Instant) -> Option<Quality> {
         if let Some(quality) = self.hardcoded.get(&token) {
             return Some(*quality);
@@ -118,7 +132,7 @@ impl Detector {
         }
 
         if let Some(metrics) = &self.metrics {
-            return metrics.get_quality(token);
+            return metrics.get_quality(&token);
         }
 
         None

diff --git a/crates/driver/src/domain/competition/mod.rs b/crates/driver/src/domain/competition/mod.rs
@@ -122,6 +122,7 @@ impl Competition {
             .into_iter()
             .map(|solution| async move {
                 let id = solution.id().clone();
+                let token_pairs = solution.token_pairs();
                 observe::encoding(&id);
                 let settlement = solution
                     .encode(
@@ -131,16 +132,19 @@ impl Competition {
                         self.solver.solver_native_token(),
                     )
                     .await;
-                (id, settlement)
+                (id, token_pairs, settlement)
             })
             .collect::<FuturesUnordered<_>>()
-            .filter_map(|(id, result)| async move {
+            .filter_map(|(id, token_pairs, result)| async move {
                 match result {
-                    Ok(solution) => Some(solution),
+                    Ok(solution) => {
+                        self.bad_tokens.encoding_succeeded(&token_pairs);
+                        Some(solution)
+                    }
                     // don't report on errors coming from solution merging
                     Err(_err) if id.solutions().len() > 1 => None,
                     Err(err) => {
-                        // TODO update metrics of bad token detection
+                        self.bad_tokens.encoding_failed(&token_pairs);
                         observe::encoding_failed(self.solver.name(), &id, &err);
                         notify::encoding_failed(&self.solver, auction.id(), &id, &err);
                         None

diff --git a/crates/driver/src/domain/competition/solution/mod.rs b/crates/driver/src/domain/competition/solution/mod.rs
@@ -169,6 +169,23 @@ impl Solution {
         &self.trades
     }
 
+    /// Returns all the token pairs involved in the solution.
+    pub fn token_pairs(&self) -> Vec<(TokenAddress, TokenAddress)> {
+        self.trades
+            .iter()
+            .map(|trade| match trade {
+                Trade::Fulfillment(fulfillment) => {
+                    let order = fulfillment.order();
+                    (order.sell.token, order.buy.token)
+                }
+                Trade::Jit(jit) => {
+                    let order = jit.order();
+                    (order.sell.token, order.buy.token)
+                }
+            })
+            .collect()
+    }
+
     /// Interactions executed by this solution.
     pub fn interactions(&self) -> &[Interaction] {
         &self.interactions

@@ -58,6 +58,8 @@ impl Api {
         app = routes::metrics(app);
         app = routes::healthz(app);
 
+        let metrics_bad_token_detector_builder = bad_tokens::metrics::DetectorBuilder::default();
+
         // Multiplex each solver as part of the API. Multiple solvers are multiplexed
         // on the same driver so only one liquidity collector collects the liquidity
         // for all of them. This is important because liquidity collection is
@@ -73,13 +75,21 @@ impl Api {
 
             let mut bad_tokens =
                 bad_tokens::Detector::new(solver.bad_token_detection().tokens_supported.clone());
-            if solver
-                .bad_token_detection()
-                .enable_simulation_based_bad_token_detection
-            {
+            if solver.bad_token_detection().enable_simulation_strategy {
                 bad_tokens.with_simulation_detector(self.bad_token_detector.clone());
             }
 
+            if solver.bad_token_detection().enable_metrics_strategy {
+                bad_tokens.with_metrics_detector(
+                    metrics_bad_token_detector_builder.clone().build(
+                        solver.bad_token_detection().metrics_strategy_failure_ratio,
+                        solver
+                            .bad_token_detection()
+                            .metrics_strategy_required_measurements,
+                    ),
+                );
+            }
+
             let router = router.with_state(State(Arc::new(Inner {
                 eth: self.eth.clone(),
                 solver: solver.clone(),

@@ -96,6 +96,7 @@ pub async fn load(chain: chain::Id, path: &Path) -> infra::Config {
                 response_size_limit_max_bytes: config.response_size_limit_max_bytes,
                 bad_token_detection: BadTokenDetection {
                     tokens_supported: config
+                        .bad_token_detection
                         .token_supported
                         .iter()
                         .map(|(token, supported)| {
@@ -108,8 +109,16 @@ pub async fn load(chain: chain::Id, path: &Path) -> infra::Config {
                             )
                         })
                         .collect(),
-                    enable_simulation_based_bad_token_detection: config
-                        .enable_simulation_bad_token_detection,
+                    enable_simulation_strategy: config
+                        .bad_token_detection
+                        .enable_simulation_strategy,
+                    enable_metrics_strategy: config.bad_token_detection.enable_metrics_strategy,
+                    metrics_strategy_failure_ratio: config
+                        .bad_token_detection
+                        .metrics_strategy_failure_ratio,
+                    metrics_strategy_required_measurements: config
+                        .bad_token_detection
+                        .metrics_strategy_required_measurements,
                 },
             }
         }))

@@ -269,14 +269,9 @@ struct SolverConfig {
     #[serde(default = "default_response_size_limit_max_bytes")]
     response_size_limit_max_bytes: usize,
 
-    /// Which tokens are explicitly supported or unsupported by the solver.
-    #[serde(default)]
-    token_supported: HashMap<eth::H160, bool>,
-
-    /// Whether or not the solver opted into detecting unsupported
-    /// tokens with `trace_callMany` based simulation.
-    #[serde(default)]
-    enable_simulation_bad_token_detection: bool,
+    /// Configuration for bad token detection.
+    #[serde(default, flatten)]
+    bad_token_detection: BadTokenDetectionConfig,
 }
 
 #[derive(Clone, Copy, Debug, Default, Deserialize, PartialEq, Serialize)]
@@ -675,3 +670,51 @@ fn default_max_order_age() -> Option<Duration> {
 fn default_simulation_bad_token_max_age() -> Duration {
     Duration::from_secs(600)
 }
+
+#[serde_as]
+#[derive(Clone, Debug, Deserialize)]
+#[serde(rename_all = "kebab-case", deny_unknown_fields)]
+pub struct BadTokenDetectionConfig {
+    /// Which tokens are explicitly supported or unsupported by the solver.
+    #[serde(default)]
+    pub token_supported: HashMap<eth::H160, bool>,
+
+    /// Whether the solver opted into detecting unsupported
+    /// tokens with `trace_callMany` based simulation.
+    #[serde(default, rename = "enable-simulation-bad-token-detection")]
+    pub enable_simulation_strategy: bool,
+
+    /// Whether the solver opted into detecting unsupported
+    /// tokens with metrics-based detection.
+    #[serde(default, rename = "enable-metrics-bad-token-detection")]
+    pub enable_metrics_strategy: bool,
+
+    /// The ratio of failures to attempts that qualifies a token as unsupported.
+    #[serde(
+        default = "default_metrics_bad_token_detector_failure_ratio",
+        rename = "metrics-bad-token-detection-failure-ratio"
+    )]
+    pub metrics_strategy_failure_ratio: f64,
+
+    /// The minimum number of attempts required before evaluating a token’s
+    /// quality.
+    #[serde(
+        default = "default_metrics_bad_token_detector_required_measurements",
+        rename = "metrics-bad-token-detection-required-measurements"
+    )]
+    pub metrics_strategy_required_measurements: u32,
+}
+
+impl Default for BadTokenDetectionConfig {
+    fn default() -> Self {
+        serde_json::from_str("{}").expect("MetricsBadTokenDetectorConfig uses default values")
+    }
+}
+
+fn default_metrics_bad_token_detector_failure_ratio() -> f64 {
+    0.9
+}
+
+fn default_metrics_bad_token_detector_required_measurements() -> u32 {
+    20
+}
@@ -306,5 +306,8 @@ impl Error {
 pub struct BadTokenDetection {
     /// Tokens that are explicitly allow- or deny-listed.
     pub tokens_supported: HashMap<eth::TokenAddress, bad_tokens::Quality>,
-    pub enable_simulation_based_bad_token_detection: bool,
+    pub enable_simulation_strategy: bool,
+    pub enable_metrics_strategy: bool,
+    pub metrics_strategy_failure_ratio: f64,
+    pub metrics_strategy_required_measurements: u32,
 }
@@ -125,6 +125,7 @@ account = "{account}"
 merge-solutions = {merge_solutions}
 quote-using-limit-orders = {quote_using_limit_orders}
 enable-simulation-bad-token-detection = true
+enable-metrics-bad-token-detection = true
 "#
                 )
             },