Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions trust-quorum/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ pub use configuration::Configuration;
pub use coordinator_state::{
CoordinatorOperation, CoordinatorState, CoordinatorStateDiff,
};
pub use rack_secret_loader::{LoadRackSecretError, RackSecretLoaderDiff};
pub use validators::ValidatedReconfigureMsgDiff;
mod alarm;

Expand Down
58 changes: 47 additions & 11 deletions trust-quorum/src/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@
//! Node, and so this should not be problematic.

use crate::compute_key_share::KeyShareComputer;
use crate::crypto::ReconstructedRackSecret;
use crate::rack_secret_loader::{
LoadRackSecretError, RackSecretLoader, RackSecretLoaderDiff,
};
use crate::validators::{
MismatchedRackIdError, ReconfigurationError, ValidatedReconfigureMsg,
};
Expand Down Expand Up @@ -44,18 +48,26 @@ pub struct Node {
/// In memory state for when this node is trying to compute its own key
/// share for a committed epoch.
key_share_computer: Option<KeyShareComputer>,

/// A mechanism for loading rack secrets by collecting key shares
/// for the latest committed epoch.
rack_secret_loader: RackSecretLoader,
}

// For diffs we want to allow access to all fields, but not make them public in
// the `Node` type itself.
impl NodeDiff<'_> {
impl<'a> NodeDiff<'a> {
pub fn coordinator_state(&self) -> Leaf<Option<&CoordinatorState>> {
self.coordinator_state
}

pub fn key_share_computer(&self) -> Leaf<Option<&KeyShareComputer>> {
self.key_share_computer
}

pub fn rack_secret_loader(&self) -> &RackSecretLoaderDiff<'a> {
&self.rack_secret_loader
}
}

#[cfg(feature = "danger_partial_eq_ct_wrapper")]
Expand All @@ -74,7 +86,32 @@ impl Node {
let id_str = format!("{:?}", ctx.platform_id());
let log =
log.new(o!("component" => "trust-quorum", "platform_id" => id_str));
Node { log, coordinator_state: None, key_share_computer: None }
let rack_secret_loader = RackSecretLoader::new(&log);
Node {
log,
coordinator_state: None,
key_share_computer: None,
rack_secret_loader,
}
}

/// Attempt to load a rack secret at the given epoch.
///
/// If no secrets are loaded the node will start collecting shares for the
/// latest committed epoch and return `Ok(None)`. `Ok(None)` will continue
/// to be returned while share collection is in progress. The secret will
/// be returned on the next call after it becomes available.
pub fn load_rack_secret(
&mut self,
ctx: &mut impl NodeHandlerCtx,
epoch: Epoch,
) -> Result<Option<ReconstructedRackSecret>, LoadRackSecretError> {
self.rack_secret_loader.load(ctx, epoch)
}

/// Clear all loaded rack secrets cached in memory
pub fn clear_secrets(&mut self) {
self.rack_secret_loader.clear_secrets();
}

/// Start coordinating a reconfiguration
Expand Down Expand Up @@ -126,6 +163,10 @@ impl Node {
self.key_share_computer.is_some()
}

pub fn is_collecting_shares_for_rack_secret(&self, epoch: Epoch) -> bool {
self.rack_secret_loader.is_collecting_shares_for_rack_secret(epoch)
}

/// Commit a configuration
///
/// This is triggered by a message from Nexus for each node in the
Expand Down Expand Up @@ -659,22 +700,17 @@ impl Node {
share: Share,
) {
if let Some(cs) = &mut self.coordinator_state {
cs.handle_share(ctx, from, epoch, share);
cs.handle_share(ctx, from.clone(), epoch, share.clone());
} else if let Some(ksc) = &mut self.key_share_computer {
if ksc.handle_share(ctx, from, epoch, share) {
if ksc.handle_share(ctx, from.clone(), epoch, share.clone()) {
// We're have completed computing our share and saved it to
// our persistent state. We have also marked the configuration
// committed.
self.key_share_computer = None;
}
} else {
warn!(
self.log,
"Received share when not coordinating or computing share";
"from" => %from,
"epoch" => %epoch
);
}

self.rack_secret_loader.handle_share(ctx, from, epoch, share);
}

fn handle_prepare(
Expand Down
64 changes: 56 additions & 8 deletions trust-quorum/src/rack_secret_loader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use crate::{
Alarm, Configuration, Epoch, NodeHandlerCtx, PeerMsgKind, PlatformId,
RackSecret, Share,
};
use daft::{BTreeMapDiff, Diffable, Leaf};
use slog::{Logger, error, info, o};

#[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)]
Expand All @@ -27,22 +28,45 @@ pub enum LoadRackSecretError {
}

/// Manage retrieval of key shares to load various rack secrets
#[derive(Debug, Clone)]
#[derive(Debug, Clone, Diffable)]
pub struct RackSecretLoader {
#[daft(ignore)]
log: Logger,
loaded: BTreeMap<Epoch, ReconstructedRackSecret>,
// We can only collect shares for the latest committed epoch. We then derive
// a key from the computed rack secret to decrypt rack secrets for prior
// configurations.
#[daft(leaf)]
collector: Option<ShareCollector>,
}

impl<'daft> RackSecretLoaderDiff<'daft> {
pub fn loaded(
&self,
) -> &BTreeMapDiff<'daft, Epoch, ReconstructedRackSecret> {
&self.loaded
}

pub fn collector(&self) -> Leaf<&'daft Option<ShareCollector>> {
self.collector
}
}

impl RackSecretLoader {
pub fn new(log: &Logger) -> RackSecretLoader {
let log = log.new(o!("component" => "tq-rack-secret-loader"));
RackSecretLoader { log, loaded: BTreeMap::new(), collector: None }
}

pub fn is_collecting_shares_for_rack_secret(&self, epoch: Epoch) -> bool {
let Some(c) = &self.collector else {
return false;
};
// We collect for the latest committed epoch which must be greater than
// or equal to the epoch for the rack secret we are interested in.
c.config.epoch >= epoch
}

pub fn load(
&mut self,
ctx: &mut impl NodeHandlerCtx,
Expand All @@ -60,17 +84,20 @@ impl RackSecretLoader {
return Err(LoadRackSecretError::NoCommittedConfigurations);
};

if epoch > latest_committed_epoch {
return Err(LoadRackSecretError::NotCommitted(epoch));
}

// If we have loaded the latest committed epoch, then we have loaded all
// possible rack secrets. Secrets for prior epochs are unavailable.
if self.loaded.contains_key(&latest_committed_epoch) {
if epoch < latest_committed_epoch {
return Err(LoadRackSecretError::NotAvailable(epoch));
} else if epoch > latest_committed_epoch {
return Err(LoadRackSecretError::NotCommitted(epoch));
} else {
unreachable!(
"already would have returned rack secret for latest \
committed epoch ({epoch}) if requested"
"epoch comparisons for epoch({epoch}) \
<= latest_committed_epoch({latest_committed_epoch}) \
already handled"
);
}
}
Expand Down Expand Up @@ -107,7 +134,7 @@ impl RackSecretLoader {
latest_committed_epoch,
collecting_epoch: collector.config.epoch,
});
return Err(LoadRackSecretError::Alarm);
Err(LoadRackSecretError::Alarm)
}
}
}
Expand Down Expand Up @@ -154,14 +181,35 @@ impl RackSecretLoader {
}
}

#[derive(Debug, Clone)]
struct ShareCollector {
// Pub only for use in daft
#[derive(Debug, Clone, Diffable)]
pub struct ShareCollector {
#[daft(ignore)]
log: Logger,
// A copy of the configuration stored in persistent state
#[daft(leaf)]
config: Configuration,
shares: BTreeMap<PlatformId, Share>,
}

impl PartialEq for ShareCollector {
fn eq(&self, other: &Self) -> bool {
self.config == other.config && self.shares == other.shares
}
}

impl Eq for ShareCollector {}

impl<'daft> ShareCollectorDiff<'daft> {
pub fn config(&self) -> Leaf<&'daft Configuration> {
self.config
}

pub fn shares(&self) -> &BTreeMapDiff<'daft, PlatformId, Share> {
&self.shares
}
}

impl ShareCollector {
pub fn new(
log: &Logger,
Expand Down
4 changes: 4 additions & 0 deletions trust-quorum/test-utils/src/event.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ pub enum Event {
/// Since replay is deterministic, we actually know what this value is,
/// even though a prior event may not have yet sent the message.
DeliverEnvelope(Envelope),
LoadRackSecret(PlatformId, Epoch),
ClearSecrets(PlatformId),
/// Pull a `NexusReply` off the underlay network and update the `NexusState`
DeliverNexusReply(NexusReply),
CommitConfiguration(PlatformId),
Expand All @@ -44,6 +46,8 @@ impl Event {
Self::SendNexusReplyOnUnderlay(_) => vec![],
Self::DeliverEnvelope(envelope) => vec![envelope.to.clone()],
Self::DeliverNexusReply(_) => vec![],
Self::LoadRackSecret(id, _) => vec![id.clone()],
Self::ClearSecrets(id) => vec![id.clone()],
Self::CommitConfiguration(id) => vec![id.clone()],
Self::Reconfigure(_) => vec![],
}
Expand Down
75 changes: 71 additions & 4 deletions trust-quorum/test-utils/src/state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@ use std::collections::{BTreeMap, BTreeSet};
use std::fmt::Display;
use trust_quorum::{
Configuration, CoordinatorOperation, CoordinatorStateDiff, Envelope, Epoch,
Node, NodeCallerCtx, NodeCommonCtx, NodeCtx, NodeCtxDiff, NodeDiff,
PeerMsgKind, PlatformId, ValidatedReconfigureMsgDiff,
LoadRackSecretError, Node, NodeCallerCtx, NodeCommonCtx, NodeCtx,
NodeCtxDiff, NodeDiff, PeerMsgKind, PlatformId,
ValidatedReconfigureMsgDiff,
};

// The state of our entire system including the system under test and
Expand Down Expand Up @@ -204,6 +205,12 @@ impl TqState {
Event::DeliverEnvelope(envelope) => {
self.apply_event_deliver_envelope(envelope);
}
Event::LoadRackSecret(id, epoch) => {
self.apply_event_load_rack_secret(id, epoch);
}
Event::ClearSecrets(id) => {
self.apply_event_clear_secrets(id);
}
Event::DeliverNexusReply(reply) => {
self.apply_event_deliver_nexus_reply(reply);
}
Expand Down Expand Up @@ -258,8 +265,7 @@ impl TqState {
fn apply_event_commit(&mut self, id: PlatformId) {
let rack_id = self.nexus.rack_id;
let latest_config = self.nexus.latest_config();
let (node, ctx) =
self.sut.nodes.get_mut(&id).expect("destination exists");
let (node, ctx) = self.sut.nodes.get_mut(&id).expect("node exists");
node.commit_configuration(ctx, rack_id, latest_config.epoch)
.expect("commit succeeded");

Expand All @@ -269,6 +275,54 @@ impl TqState {
});
}

fn apply_event_load_rack_secret(&mut self, id: PlatformId, epoch: Epoch) {
let (node, ctx) = self.sut.nodes.get_mut(&id).expect("node exists");

// Postcondition checks
match node.load_rack_secret(ctx, epoch) {
Ok(None) => {
assert!(node.is_collecting_shares_for_rack_secret(epoch));
}
Ok(Some(_)) => {
// We may be collecting for a later epoch, but haven't thrown
// out the old secret, so we don't check if we are collecting as
// in the `Ok(None)` clause above.

// If we can load a rack secret then we have either committed
// for this epoch or a later epoch.
assert!(
ctx.persistent_state()
.latest_committed_epoch()
.expect("at least one committed epoch")
>= epoch
);
}
Err(LoadRackSecretError::NoCommittedConfigurations) => {
assert!(ctx.persistent_state().is_uninitialized());
}
Err(LoadRackSecretError::NotCommitted(epoch)) => {
assert!(!ctx.persistent_state().commits.contains(&epoch));
}
Err(LoadRackSecretError::Alarm) => {
// We should not see any alarms in this test
panic!("alarm seen");
}
Err(LoadRackSecretError::NotAvailable(_)) => {
assert!(
ctx.persistent_state()
.latest_committed_epoch()
.expect("latest committed epoch exists")
> epoch
);
}
}
}

fn apply_event_clear_secrets(&mut self, id: PlatformId) {
let (node, _) = self.sut.nodes.get_mut(&id).expect("node exists");
node.clear_secrets();
}

fn apply_event_send_nexus_reply_on_underlay(&mut self, reply: NexusReply) {
self.underlay_network.push(reply);
}
Expand Down Expand Up @@ -802,6 +856,19 @@ fn display_node_diff(
}
}

if node_diff.rack_secret_loader().collector().is_modified() {
// It's too tedious to do the diff work here right now.
writeln!(f, " Rack secret collector changed")?;
}
if !node_diff.rack_secret_loader().loaded().added.is_empty() {
// It's too tedious to do the diff work here right now.
writeln!(f, " Rack secrets loaded")?;
}
if !node_diff.rack_secret_loader().loaded().removed.is_empty() {
// It's too tedious to do the diff work here right now.
writeln!(f, " Rack secrets cleared")?;
}

Ok(())
}

Expand Down
1 change: 1 addition & 0 deletions trust-quorum/tests/cluster.proptest-regressions

Large diffs are not rendered by default.

Loading
Loading