Skip to content

Trust quorum: reconfiguration and commit behavior #8052

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
381 changes: 361 additions & 20 deletions trust-quorum/src/coordinator_state.rs

Large diffs are not rendered by default.

74 changes: 71 additions & 3 deletions trust-quorum/src/crypto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,13 @@

//! Various cryptographic constructs used by trust quroum.

use crate::{Epoch, Threshold};
use bootstore::trust_quorum::RackSecret as LrtqRackSecret;
use chacha20poly1305::{ChaCha20Poly1305, Key, KeyInit, aead, aead::Aead};
use derive_more::From;
use gfss::shamir::{self, CombineError, SecretShares, Share, SplitError};
use hkdf::Hkdf;
use omicron_uuid_kinds::{GenericUuid, RackUuid};
use rand::RngCore;
use rand::rngs::OsRng;
use secrecy::{DebugSecret, ExposeSecret, Secret};
Expand All @@ -15,9 +19,7 @@ use sha3::{Digest, Sha3_256};
use slog_error_chain::SlogInlineError;
use std::fmt::Debug;
use subtle::ConstantTimeEq;
use zeroize::{Zeroize, ZeroizeOnDrop};

use crate::Threshold;
use zeroize::{Zeroize, ZeroizeOnDrop, Zeroizing};

/// Each share contains a byte for the y-coordinate of 32 points on 32 different
/// polynomials over Ed25519. All points share an x-coordinate, which is the 0th
Expand Down Expand Up @@ -203,6 +205,15 @@ impl RackSecret {
let secret = shamir::compute_secret(shares)?.try_into()?;
Ok(secret)
}

pub fn reconstruct_from_iter<'a>(
shares: impl Iterator<Item = &'a Share>,
) -> Result<ReconstructedRackSecret, RackSecretReconstructError> {
let mut shares: Vec<Share> = shares.cloned().collect();
let res = RackSecret::reconstruct(&shares);
shares.zeroize();
res
}
}

impl DebugSecret for RackSecret {}
Expand Down Expand Up @@ -242,6 +253,63 @@ impl Default for Salt {
}
}

/// Encrypt the old rack secret with a key derived from the new rack secret.
///
/// A random salt is generated and returned along with the encrypted secret. Key
/// derivation context includes `rack_id`, `old_epoch`, and `new_epoch`.
pub fn encrypt_old_rack_secret(
old_rack_secret: ReconstructedRackSecret,
new_rack_secret: ReconstructedRackSecret,
rack_id: RackUuid,
old_epoch: Epoch,
new_epoch: Epoch,
) -> aead::Result<(EncryptedRackSecret, Salt)> {
let salt = Salt::new();
let cipher = derive_encryption_key_for_rack_secret(
new_rack_secret,
salt,
rack_id,
old_epoch,
new_epoch,
);

// This key is only used to encrypt one plaintext. A nonce of all zeroes is
// all that's required.
let nonce = [0u8; 12].into();
let encrypted_rack_secret = EncryptedRackSecret(
cipher.encrypt(&nonce, old_rack_secret.expose_secret().as_ref())?,
);

Ok((encrypted_rack_secret, salt))
}

fn derive_encryption_key_for_rack_secret(
new_rack_secret: ReconstructedRackSecret,
salt: Salt,
rack_id: RackUuid,
old_epoch: Epoch,
new_epoch: Epoch,
) -> ChaCha20Poly1305 {
let prk = Hkdf::<Sha3_256>::new(
Some(&salt.0[..]),
new_rack_secret.expose_secret(),
);

// The "info" string is context to bind the key to its purpose
let mut key = Zeroizing::new([0u8; 32]);
prk.expand_multi_info(
&[
b"trust-quorum-v1-rack-secret",
rack_id.as_untyped_uuid().as_ref(),
&new_epoch.0.to_be_bytes(),
&old_epoch.0.to_be_bytes(),
],
key.as_mut(),
)
.unwrap();
ChaCha20Poly1305::new(Key::from_slice(key.as_ref()))
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
103 changes: 103 additions & 0 deletions trust-quorum/src/errors.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

//! Various errors for the trust quorum APIs

use crate::configuration::ConfigurationError;
use crate::{Epoch, PlatformId, Threshold};
use omicron_uuid_kinds::RackUuid;

#[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)]
pub enum CommitError {
#[error("invalid rack id")]
InvalidRackId(
#[from]
#[source]
MismatchedRackIdError,
),

#[error("missing prepare msg")]
MissingPrepare,

#[error("prepare for a later configuration exists")]
OutOfOrderCommit,
}

#[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)]
#[error(
"sled was decommissioned on msg from {from:?} at epoch {epoch:?}: last prepared epoch = {last_prepared_epoch:?}"
)]
pub struct SledDecommissionedError {
pub from: PlatformId,
pub epoch: Epoch,
pub last_prepared_epoch: Option<Epoch>,
}

#[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)]
#[error("mismatched rack id: expected {expected:?}, got {got:?}")]
pub struct MismatchedRackIdError {
pub expected: RackUuid,
pub got: RackUuid,
}

#[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)]
pub enum ReconfigurationError {
#[error("reconfiguration coordinator must be a member of the new group")]
CoordinatorMustBeAMemberOfNewGroup,

#[error("upgrade from LRTQ required")]
UpgradeFromLrtqRequired,

#[error(
"number of members: {num_members:?} must be greater than threshold: {threshold:?}"
)]
ThresholdMismatch { num_members: usize, threshold: Threshold },

#[error(
"invalid membership size: {0:?}: must be between 3 and 32 inclusive"
)]
InvalidMembershipSize(usize),

#[error(
"invalid threshold: {0:?}: threshold must be between 2 and 31 inclusive"
)]
InvalidThreshold(Threshold),

#[error(
"Node has last committed epoch of {node_epoch:?}, message contains {msg_epoch:?}"
)]
LastCommittedEpochMismatch {
node_epoch: Option<Epoch>,
msg_epoch: Option<Epoch>,
},

#[error(
"sled has already prepared a request at epoch {existing:?}, and cannot prepare another at a smaller or equivalent epoch {new:?}"
)]
PreparedEpochMismatch { existing: Epoch, new: Epoch },

#[error("invalid rack id in reconfigure msg")]
InvalidRackId(
#[from]
#[source]
MismatchedRackIdError,
),

#[error("cannot reconfigure a decommissioned sled")]
DecommissionedSled(
#[from]
#[source]
SledDecommissionedError,
),
#[error(
"reconfiguration in progress at epoch {current_epoch:?}: cannot reconfigure for older epoch {msg_epoch:?}"
)]
ReconfigurationInProgress { current_epoch: Epoch, msg_epoch: Epoch },

#[error("mismatched reconfiguration requests for epoch {0:?}")]
MismatchedReconfigurationForSameEpoch(Epoch),

#[error(transparent)]
Configuration(#[from] ConfigurationError),
}
8 changes: 8 additions & 0 deletions trust-quorum/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ use serde::{Deserialize, Serialize};
mod configuration;
mod coordinator_state;
pub(crate) mod crypto;
pub(crate) mod errors;
mod messages;
mod node;
mod persistent_state;
Expand All @@ -40,6 +41,13 @@ pub use persistent_state::{PersistentState, PersistentStateSummary};
)]
pub struct Epoch(pub u64);

impl Epoch {
// Increment the epoch and return the new value
pub fn inc(&self) -> Epoch {
Epoch(self.0 + 1)
}
}

/// The number of shares required to reconstruct the rack secret
///
/// Typically referred to as `k` in the docs
Expand Down
Loading
Loading