diff --git a/crates/sui-bridge-cli/src/main.rs b/crates/sui-bridge-cli/src/main.rs index 0919dfe6838e8..0fa205dc26edf 100644 --- a/crates/sui-bridge-cli/src/main.rs +++ b/crates/sui-bridge-cli/src/main.rs @@ -7,6 +7,7 @@ use ethers::types::Address as EthAddress; use fastcrypto::encoding::{Encoding, Hex}; use shared_crypto::intent::Intent; use shared_crypto::intent::IntentMessage; +use std::collections::BTreeMap; use std::collections::HashMap; use std::str::from_utf8; use std::str::FromStr; @@ -83,7 +84,7 @@ async fn main() -> anyhow::Result<()> { let config = LoadedBridgeCliConfig::load(config).await?; let metrics = Arc::new(BridgeMetrics::new_for_testing()); let sui_bridge_client = - SuiClient::::new(&config.sui_rpc_url, metrics).await?; + SuiClient::::new(&config.sui_rpc_url, metrics.clone()).await?; let (sui_key, sui_address, gas_object_ref) = config .get_sui_account_info() @@ -99,7 +100,11 @@ async fn main() -> anyhow::Result<()> { .await .expect("Failed to get bridge committee"), ); - let agg = BridgeAuthorityAggregator::new(bridge_committee); + let agg = BridgeAuthorityAggregator::new( + bridge_committee, + metrics, + Arc::new(BTreeMap::new()), + ); // Handle Sui Side if chain_id.is_sui_chain() { diff --git a/crates/sui-bridge/src/action_executor.rs b/crates/sui-bridge/src/action_executor.rs index e8d54728e4011..8e0fabafb8191 100644 --- a/crates/sui-bridge/src/action_executor.rs +++ b/crates/sui-bridge/src/action_executor.rs @@ -1513,9 +1513,9 @@ mod tests { let committee = BridgeCommittee::new(authorities).unwrap(); - let agg = Arc::new(ArcSwap::new(Arc::new(BridgeAuthorityAggregator::new( - Arc::new(committee), - )))); + let agg = Arc::new(ArcSwap::new(Arc::new( + BridgeAuthorityAggregator::new_for_testing(Arc::new(committee)), + ))); let metrics = Arc::new(BridgeMetrics::new(®istry)); let sui_token_type_tags = sui_client.get_token_id_map().await.unwrap(); let sui_token_type_tags = Arc::new(ArcSwap::new(Arc::new(sui_token_type_tags))); diff --git a/crates/sui-bridge/src/client/bridge_authority_aggregator.rs b/crates/sui-bridge/src/client/bridge_authority_aggregator.rs index 1de1c41635a7e..5a6e2ab3750ce 100644 --- a/crates/sui-bridge/src/client/bridge_authority_aggregator.rs +++ b/crates/sui-bridge/src/client/bridge_authority_aggregator.rs @@ -7,6 +7,7 @@ use crate::client::bridge_client::BridgeClient; use crate::crypto::BridgeAuthorityPublicKeyBytes; use crate::crypto::BridgeAuthoritySignInfo; use crate::error::{BridgeError, BridgeResult}; +use crate::metrics::BridgeMetrics; use crate::types::BridgeCommitteeValiditySignInfo; use crate::types::{ BridgeAction, BridgeCommittee, CertifiedBridgeAction, VerifiedCertifiedBridgeAction, @@ -30,10 +31,16 @@ const PREFETCH_TIMEOUT_MS: u64 = 1500; pub struct BridgeAuthorityAggregator { pub committee: Arc, pub clients: Arc>>, + pub metrics: Arc, + pub committee_keys_to_names: Arc>, } impl BridgeAuthorityAggregator { - pub fn new(committee: Arc) -> Self { + pub fn new( + committee: Arc, + metrics: Arc, + committee_keys_to_names: Arc>, + ) -> Self { let clients: BTreeMap> = committee .members() .iter() @@ -62,14 +69,30 @@ impl BridgeAuthorityAggregator { Self { committee, clients: Arc::new(clients), + metrics, + committee_keys_to_names, } } + #[cfg(test)] + pub fn new_for_testing(committee: Arc) -> Self { + Self::new( + committee, + Arc::new(BridgeMetrics::new_for_testing()), + Arc::new(BTreeMap::new()), + ) + } + pub async fn request_committee_signatures( &self, action: BridgeAction, ) -> BridgeResult { - let state = GetSigsState::new(action.approval_threshold(), self.committee.clone()); + let state = GetSigsState::new( + action.approval_threshold(), + self.committee.clone(), + self.metrics.clone(), + self.committee_keys_to_names.clone(), + ); request_sign_bridge_action_into_certification( action, self.committee.clone(), @@ -88,16 +111,25 @@ struct GetSigsState { sigs: BTreeMap, validity_threshold: StakeUnit, committee: Arc, + metrics: Arc, + committee_keys_to_names: Arc>, } impl GetSigsState { - fn new(validity_threshold: StakeUnit, committee: Arc) -> Self { + fn new( + validity_threshold: StakeUnit, + committee: Arc, + metrics: Arc, + committee_keys_to_names: Arc>, + ) -> Self { Self { committee, total_bad_stake: 0, total_ok_stake: 0, sigs: BTreeMap::new(), validity_threshold, + metrics, + committee_keys_to_names, } } @@ -119,7 +151,7 @@ impl GetSigsState { match self.sigs.entry(name.clone()) { Entry::Vacant(e) => { e.insert(signed_action.auth_sig().clone()); - self.total_ok_stake += stake; + self.add_ok_stake(stake, &name); } Entry::Occupied(_e) => { return Err(BridgeError::AuthoritySignatureDuplication(format!( @@ -156,7 +188,23 @@ impl GetSigsState { } } - fn add_bad_stake(&mut self, bad_stake: StakeUnit) { + fn add_ok_stake(&mut self, ok_stake: StakeUnit, name: &BridgeAuthorityPublicKeyBytes) { + if let Some(host_name) = self.committee_keys_to_names.get(name) { + self.metrics + .auth_agg_ok_responses + .with_label_values(&[host_name]) + .inc(); + } + self.total_ok_stake += ok_stake; + } + + fn add_bad_stake(&mut self, bad_stake: StakeUnit, name: &BridgeAuthorityPublicKeyBytes) { + if let Some(host_name) = self.committee_keys_to_names.get(name) { + self.metrics + .auth_agg_bad_responses + .with_label_values(&[host_name]) + .inc(); + } self.total_bad_stake += bad_stake; } @@ -223,7 +271,7 @@ async fn request_sign_bridge_action_into_certification( name.concise(), e ); - state.add_bad_stake(stake); + state.add_bad_stake(stake, &name); } } } @@ -233,7 +281,7 @@ async fn request_sign_bridge_action_into_certification( name.concise(), e ); - state.add_bad_stake(stake); + state.add_bad_stake(stake, &name); } }; @@ -296,7 +344,7 @@ mod tests { } let committee = BridgeCommittee::new(authorities.clone()).unwrap(); - let agg = BridgeAuthorityAggregator::new(Arc::new(committee)); + let agg = BridgeAuthorityAggregator::new_for_testing(Arc::new(committee)); assert_eq!( agg.clients.keys().cloned().collect::>(), BTreeSet::from_iter(vec![ @@ -310,7 +358,7 @@ mod tests { // authority 2 is blocklisted authorities[2].is_blocklisted = true; let committee = BridgeCommittee::new(authorities.clone()).unwrap(); - let agg = BridgeAuthorityAggregator::new(Arc::new(committee)); + let agg = BridgeAuthorityAggregator::new_for_testing(Arc::new(committee)); assert_eq!( agg.clients.keys().cloned().collect::>(), BTreeSet::from_iter(vec![ @@ -323,7 +371,7 @@ mod tests { // authority 3 has bad url authorities[3].base_url = "".into(); let committee = BridgeCommittee::new(authorities.clone()).unwrap(); - let agg = BridgeAuthorityAggregator::new(Arc::new(committee)); + let agg = BridgeAuthorityAggregator::new_for_testing(Arc::new(committee)); assert_eq!( agg.clients.keys().cloned().collect::>(), BTreeSet::from_iter(vec![ @@ -351,7 +399,7 @@ mod tests { let committee = BridgeCommittee::new(authorities).unwrap(); - let agg = BridgeAuthorityAggregator::new(Arc::new(committee)); + let agg = BridgeAuthorityAggregator::new_for_testing(Arc::new(committee)); let sui_tx_digest = TransactionDigest::random(); let sui_tx_event_index = 0; @@ -468,7 +516,7 @@ mod tests { let authorities_clone = authorities.clone(); let committee = Arc::new(BridgeCommittee::new(authorities_clone).unwrap()); - let agg = BridgeAuthorityAggregator::new(committee.clone()); + let agg = BridgeAuthorityAggregator::new_for_testing(committee.clone()); let sui_tx_digest = TransactionDigest::random(); let sui_tx_event_index = 0; @@ -542,7 +590,13 @@ mod tests { // we should receive all signatures in time, but only aggregate 2 authorities // to achieve quorum - let state = GetSigsState::new(action.approval_threshold(), committee.clone()); + let metrics = Arc::new(BridgeMetrics::new_for_testing()); + let state = GetSigsState::new( + action.approval_threshold(), + committee.clone(), + metrics.clone(), + Arc::new(BTreeMap::new()), + ); let resp = request_sign_bridge_action_into_certification( action.clone(), agg.committee.clone(), @@ -559,7 +613,12 @@ mod tests { // we should receive all but the highest stake signatures in time, but still be able to // achieve quorum with 3 sigs - let state = GetSigsState::new(action.approval_threshold(), committee.clone()); + let state = GetSigsState::new( + action.approval_threshold(), + committee.clone(), + metrics.clone(), + Arc::new(BTreeMap::new()), + ); let resp = request_sign_bridge_action_into_certification( action.clone(), agg.committee.clone(), @@ -576,7 +635,12 @@ mod tests { assert!(!sig_keys.contains(&authorities[8].pubkey_bytes())); // we should have fallen back to arrival order given that we timeout before we reach quorum - let state = GetSigsState::new(action.approval_threshold(), committee.clone()); + let state = GetSigsState::new( + action.approval_threshold(), + committee.clone(), + metrics.clone(), + Arc::new(BTreeMap::new()), + ); let start = std::time::Instant::now(); let resp = request_sign_bridge_action_into_certification( action.clone(), @@ -625,7 +689,7 @@ mod tests { let committee = BridgeCommittee::new(authorities.clone()).unwrap(); - let agg = BridgeAuthorityAggregator::new(Arc::new(committee)); + let agg = BridgeAuthorityAggregator::new_for_testing(Arc::new(committee)); let sui_tx_digest = TransactionDigest::random(); let sui_tx_event_index = 0; @@ -721,40 +785,52 @@ mod tests { let committee = BridgeCommittee::new(authorities.clone()).unwrap(); let threshold = VALIDITY_THRESHOLD; - let mut state = GetSigsState::new(threshold, Arc::new(committee)); + let metrics = Arc::new(BridgeMetrics::new_for_testing()); + let mut state = GetSigsState::new( + threshold, + Arc::new(committee), + metrics.clone(), + Arc::new(BTreeMap::new()), + ); assert!(!state.is_too_many_error()); - + let dummy = authorities[0].pubkey_bytes(); // bad stake: 2500 - state.add_bad_stake(2500); + state.add_bad_stake(2500, &dummy); assert!(!state.is_too_many_error()); // bad stake ; 5000 - state.add_bad_stake(2500); + state.add_bad_stake(2500, &dummy); assert!(!state.is_too_many_error()); // bad stake : 6666 - state.add_bad_stake(1666); + state.add_bad_stake(1666, &dummy); assert!(!state.is_too_many_error()); // bad stake : 6667 - too many errors - state.add_bad_stake(1); + state.add_bad_stake(1, &dummy); assert!(state.is_too_many_error()); // Authority 0 is blocklisted, we lose 2500 stake authorities[0].is_blocklisted = true; let committee = BridgeCommittee::new(authorities.clone()).unwrap(); let threshold = VALIDITY_THRESHOLD; - let mut state = GetSigsState::new(threshold, Arc::new(committee)); + let metrics = Arc::new(BridgeMetrics::new_for_testing()); + let mut state = GetSigsState::new( + threshold, + Arc::new(committee), + metrics.clone(), + Arc::new(BTreeMap::new()), + ); assert!(!state.is_too_many_error()); // bad stake: 2500 + 2500 - state.add_bad_stake(2500); + state.add_bad_stake(2500, &dummy); assert!(!state.is_too_many_error()); // bad stake: 5000 + 2500 - too many errors - state.add_bad_stake(2500); + state.add_bad_stake(2500, &dummy); assert!(state.is_too_many_error()); // Below we test `handle_verified_signed_action` @@ -764,7 +840,12 @@ mod tests { authorities[3].is_blocklisted = true; // blocklist authority 3 let committee = BridgeCommittee::new(authorities.clone()).unwrap(); let threshold = VALIDITY_THRESHOLD; - let mut state = GetSigsState::new(threshold, Arc::new(committee.clone())); + let mut state = GetSigsState::new( + threshold, + Arc::new(committee.clone()), + metrics.clone(), + Arc::new(BTreeMap::new()), + ); let sui_tx_digest = TransactionDigest::random(); let sui_tx_event_index = 0; diff --git a/crates/sui-bridge/src/e2e_tests/basic.rs b/crates/sui-bridge/src/e2e_tests/basic.rs index abdde652e9c13..1b042abd11280 100644 --- a/crates/sui-bridge/src/e2e_tests/basic.rs +++ b/crates/sui-bridge/src/e2e_tests/basic.rs @@ -190,7 +190,6 @@ async fn test_add_new_coins_on_sui_and_eth() { .with_num_validators(3) .build() .await; - let bridge_arg = bridge_test_cluster.get_mut_bridge_arg().await.unwrap(); // Register tokens on Sui @@ -239,7 +238,7 @@ async fn test_add_new_coins_on_sui_and_eth() { .await .expect("Failed to get bridge committee"), ); - let agg = BridgeAuthorityAggregator::new(bridge_committee); + let agg = BridgeAuthorityAggregator::new_for_testing(bridge_committee); let certified_sui_action = agg .request_committee_signatures(sui_action) .await diff --git a/crates/sui-bridge/src/e2e_tests/complex.rs b/crates/sui-bridge/src/e2e_tests/complex.rs index d822074146ae6..cb93dc6f01588 100644 --- a/crates/sui-bridge/src/e2e_tests/complex.rs +++ b/crates/sui-bridge/src/e2e_tests/complex.rs @@ -70,7 +70,7 @@ async fn test_sui_bridge_paused() { // get pause bridge signatures from committee let bridge_committee = Arc::new(bridge_client.get_bridge_committee().await.unwrap()); - let agg = BridgeAuthorityAggregator::new(bridge_committee); + let agg = BridgeAuthorityAggregator::new_for_testing(bridge_committee); let certified_action = agg .request_committee_signatures(pause_action) .await diff --git a/crates/sui-bridge/src/metrics.rs b/crates/sui-bridge/src/metrics.rs index 6d1fdda6e2a7c..c147787dc11c6 100644 --- a/crates/sui-bridge/src/metrics.rs +++ b/crates/sui-bridge/src/metrics.rs @@ -114,6 +114,9 @@ pub struct BridgeMetrics { pub(crate) sui_rpc_errors: IntCounterVec, pub(crate) observed_governance_actions: IntCounterVec, pub(crate) current_bridge_voting_rights: IntGaugeVec, + + pub(crate) auth_agg_ok_responses: IntCounterVec, + pub(crate) auth_agg_bad_responses: IntCounterVec, } impl BridgeMetrics { @@ -325,6 +328,20 @@ impl BridgeMetrics { registry ) .unwrap(), + auth_agg_ok_responses: register_int_counter_vec_with_registry!( + "bridge_auth_agg_ok_responses", + "Total number of ok respones from auth agg", + &["authority"], + registry, + ) + .unwrap(), + auth_agg_bad_responses: register_int_counter_vec_with_registry!( + "bridge_auth_agg_bad_responses", + "Total number of bad respones from auth agg", + &["authority"], + registry, + ) + .unwrap(), } } diff --git a/crates/sui-bridge/src/monitor.rs b/crates/sui-bridge/src/monitor.rs index af169737f81d5..af837c394acff 100644 --- a/crates/sui-bridge/src/monitor.rs +++ b/crates/sui-bridge/src/monitor.rs @@ -162,9 +162,12 @@ where Duration::from_secs(10), ) .await; - bridge_auth_agg.store(Arc::new(BridgeAuthorityAggregator::new(Arc::new( - new_committee, - )))); + let committee_names = bridge_auth_agg.load().committee_keys_to_names.clone(); + bridge_auth_agg.store(Arc::new(BridgeAuthorityAggregator::new( + Arc::new(new_committee), + bridge_metrics.clone(), + committee_names, + ))); info!("Committee updated with CommitteeMemberUrlUpdateEvent"); } @@ -180,9 +183,12 @@ where Duration::from_secs(10), ) .await; - bridge_auth_agg.store(Arc::new(BridgeAuthorityAggregator::new(Arc::new( - new_committee, - )))); + let committee_names = bridge_auth_agg.load().committee_keys_to_names.clone(); + bridge_auth_agg.store(Arc::new(BridgeAuthorityAggregator::new( + Arc::new(new_committee), + bridge_metrics.clone(), + committee_names, + ))); info!("Committee updated with BlocklistValidatorEvent"); } @@ -926,9 +932,9 @@ mod tests { bridge_metrics, ) = setup(); let old_committee = BridgeCommittee::new(authorities.clone()).unwrap(); - let agg = Arc::new(ArcSwap::new(Arc::new(BridgeAuthorityAggregator::new( - Arc::new(old_committee), - )))); + let agg = Arc::new(ArcSwap::new(Arc::new( + BridgeAuthorityAggregator::new_for_testing(Arc::new(old_committee)), + ))); let sui_token_type_tags = Arc::new(ArcSwap::from(Arc::new(HashMap::new()))); let _handle = tokio::task::spawn( BridgeMonitor::new( @@ -985,9 +991,9 @@ mod tests { bridge_metrics, ) = setup(); let old_committee = BridgeCommittee::new(authorities.clone()).unwrap(); - let agg = Arc::new(ArcSwap::new(Arc::new(BridgeAuthorityAggregator::new( - Arc::new(old_committee), - )))); + let agg = Arc::new(ArcSwap::new(Arc::new( + BridgeAuthorityAggregator::new_for_testing(Arc::new(old_committee)), + ))); let sui_token_type_tags = Arc::new(ArcSwap::from(Arc::new(HashMap::new()))); let _handle = tokio::task::spawn( BridgeMonitor::new( @@ -1045,9 +1051,9 @@ mod tests { frozen: !*bridge_pause_tx.borrow(), // toggle the bridge pause status }; let committee = BridgeCommittee::new(authorities.clone()).unwrap(); - let agg = Arc::new(ArcSwap::new(Arc::new(BridgeAuthorityAggregator::new( - Arc::new(committee), - )))); + let agg = Arc::new(ArcSwap::new(Arc::new( + BridgeAuthorityAggregator::new_for_testing(Arc::new(committee)), + ))); let sui_token_type_tags = Arc::new(ArcSwap::from(Arc::new(HashMap::new()))); let _handle = tokio::task::spawn( BridgeMonitor::new( @@ -1095,9 +1101,9 @@ mod tests { notional_value: 100000000, }; let committee = BridgeCommittee::new(authorities.clone()).unwrap(); - let agg = Arc::new(ArcSwap::new(Arc::new(BridgeAuthorityAggregator::new( - Arc::new(committee), - )))); + let agg = Arc::new(ArcSwap::new(Arc::new( + BridgeAuthorityAggregator::new_for_testing(Arc::new(committee)), + ))); let sui_token_type_tags = Arc::new(ArcSwap::from(Arc::new(HashMap::new()))); let sui_token_type_tags_clone = sui_token_type_tags.clone(); let _handle = tokio::task::spawn( diff --git a/crates/sui-bridge/src/node.rs b/crates/sui-bridge/src/node.rs index 97b0b2caf22e9..4c22cdab2eb2a 100644 --- a/crates/sui-bridge/src/node.rs +++ b/crates/sui-bridge/src/node.rs @@ -1,8 +1,9 @@ // Copyright (c) Mysten Labs, Inc. // SPDX-License-Identifier: Apache-2.0 +use crate::crypto::BridgeAuthorityPublicKeyBytes; use crate::types::BridgeCommittee; -use crate::utils::get_committee_voting_power_by_name; +use crate::utils::{get_committee_voting_power_by_name, get_validator_names_by_pub_keys}; use crate::{ action_executor::BridgeActionExecutor, client::bridge_authority_aggregator::BridgeAuthorityAggregator, @@ -19,6 +20,7 @@ use crate::{ use arc_swap::ArcSwap; use ethers::types::Address as EthAddress; use mysten_metrics::spawn_logged_monitored_task; +use std::collections::BTreeMap; use std::{ collections::HashMap, net::{IpAddr, Ipv4Addr, SocketAddr}, @@ -71,12 +73,6 @@ pub async fn run_bridge_node( .await .expect("Failed to get committee"), ); - // Start Client - let _handles = if let Some(client_config) = client_config { - start_client_components(client_config, committee.clone(), metrics.clone()).await - } else { - Ok(vec![]) - }?; // Update voting right metrics // Before reconfiguration happens we only set it once when the node starts @@ -86,7 +82,23 @@ pub async fn run_bridge_node( .governance_api() .get_latest_sui_system_state() .await?; - let committee_name_mapping = get_committee_voting_power_by_name(&committee, sui_system).await; + + // Start Client + let _handles = if let Some(client_config) = client_config { + let committee_keys_to_names = + Arc::new(get_validator_names_by_pub_keys(&committee, &sui_system).await); + start_client_components( + client_config, + committee.clone(), + committee_keys_to_names, + metrics.clone(), + ) + .await + } else { + Ok(vec![]) + }?; + + let committee_name_mapping = get_committee_voting_power_by_name(&committee, &sui_system).await; for (name, voting_power) in committee_name_mapping.into_iter() { metrics .current_bridge_voting_rights @@ -117,6 +129,7 @@ pub async fn run_bridge_node( async fn start_client_components( client_config: BridgeClientConfig, committee: Arc, + committee_keys_to_names: Arc>, metrics: Arc, ) -> anyhow::Result>> { let store: std::sync::Arc = @@ -154,6 +167,8 @@ async fn start_client_components( let bridge_auth_agg = Arc::new(ArcSwap::from(Arc::new(BridgeAuthorityAggregator::new( committee, + metrics.clone(), + committee_keys_to_names, )))); // TODO: should we use one query instead of two? let sui_token_type_tags = sui_client.get_token_id_map().await.unwrap(); diff --git a/crates/sui-bridge/src/utils.rs b/crates/sui-bridge/src/utils.rs index 7990ec79e0cec..17a6b629764fc 100644 --- a/crates/sui-bridge/src/utils.rs +++ b/crates/sui-bridge/src/utils.rs @@ -389,7 +389,7 @@ pub async fn wait_for_server_to_be_up(server_url: String, timeout_sec: u64) -> a /// If a validator is not in the Sui committee, we will use its base URL as the name. pub async fn get_committee_voting_power_by_name( bridge_committee: &Arc, - system_state: SuiSystemStateSummary, + system_state: &SuiSystemStateSummary, ) -> BTreeMap { let mut sui_committee: BTreeMap<_, _> = system_state .active_validators @@ -409,3 +409,28 @@ pub async fn get_committee_voting_power_by_name( }) .collect() } + +/// Return a mappping from validator pub keys to their names. +/// If a validator is not in the Sui committee, we will use its base URL as the name. +pub async fn get_validator_names_by_pub_keys( + bridge_committee: &Arc, + system_state: &SuiSystemStateSummary, +) -> BTreeMap { + let mut sui_committee: BTreeMap<_, _> = system_state + .active_validators + .iter() + .map(|v| (v.sui_address, v.name.clone())) + .collect(); + bridge_committee + .members() + .iter() + .map(|(name, validator)| { + ( + name.clone(), + sui_committee + .remove(&validator.sui_address) + .unwrap_or(validator.base_url.clone()), + ) + }) + .collect() +}