Skip to content

Commit 565e59d

Browse files
authored
TQ: Add NodeCtx for use in Node API (#8629)
Rather than passing in separate parameters to `Node` methods, we pass in a context shared with the caller. Importantly, due to the use of wrapper traits, the caller and methods are only able to mutate things that they each should be able to mutate. This makes the function signatures shorter and also eliminates the case where we end up returning an `Option<PersistentState>`. This was getting tedious, not just here, but in other code not yet pushed. Since the caller already has to check if there are any outgoing messages to send after each api call, they can also check to see if the persistent state has changed. This isn't much of a burden and simplifies the `Node` internals somewhat. It also eliminates a clone. We can add other state to the context as needed over time.
1 parent 0bef6e9 commit 565e59d

File tree

6 files changed

+288
-172
lines changed

6 files changed

+288
-172
lines changed

trust-quorum/src/coordinator_state.rs

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,11 @@
44

55
//! State of a reconfiguration coordinator inside a [`crate::Node`]
66
7+
use crate::NodeHandlerCtx;
78
use crate::crypto::{LrtqShare, Sha3_256Digest, ShareDigestLrtq};
89
use crate::messages::PeerMsg;
910
use crate::validators::{ReconfigurationError, ValidatedReconfigureMsg};
10-
use crate::{Configuration, Envelope, Epoch, PeerMsgKind, PlatformId};
11+
use crate::{Configuration, Epoch, PeerMsgKind, PlatformId};
1112
use gfss::shamir::Share;
1213
use slog::{Logger, o, warn};
1314
use std::collections::{BTreeMap, BTreeSet};
@@ -147,7 +148,8 @@ impl CoordinatorState {
147148
//
148149
// This method is "in progress" - allow unused parameters for now
149150
#[expect(unused)]
150-
pub fn send_msgs(&mut self, now: Instant, outbox: &mut Vec<Envelope>) {
151+
pub fn send_msgs(&mut self, ctx: &mut impl NodeHandlerCtx) {
152+
let now = ctx.now();
151153
if now < self.retry_deadline {
152154
return;
153155
}
@@ -165,14 +167,13 @@ impl CoordinatorState {
165167
for (platform_id, (config, share)) in
166168
prepares.clone().into_iter()
167169
{
168-
outbox.push(Envelope {
169-
to: platform_id,
170-
from: self.reconfigure_msg.coordinator_id().clone(),
171-
msg: PeerMsg {
170+
ctx.send(
171+
platform_id,
172+
PeerMsg {
172173
rack_id,
173174
kind: PeerMsgKind::Prepare { config, share },
174175
},
175-
});
176+
);
176177
}
177178
}
178179
}

trust-quorum/src/crypto.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@ use crate::{Epoch, Threshold};
3232
const LRTQ_SHARE_SIZE: usize = 33;
3333

3434
// The size in bytes of a single rack secret
35-
const SECRET_LEN: usize = 32;
35+
//
36+
// Public only for docs
37+
pub const SECRET_LEN: usize = 32;
3638

3739
// The size in bytes of an `Epoch`
3840
const EPOCH_LEN: usize = size_of::<Epoch>();

trust-quorum/src/lib.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,17 @@ mod coordinator_state;
1717
pub(crate) mod crypto;
1818
mod messages;
1919
mod node;
20+
mod node_ctx;
2021
mod persistent_state;
2122
mod validators;
2223
pub use configuration::Configuration;
2324
pub(crate) use coordinator_state::CoordinatorState;
2425
pub use crypto::RackSecret;
2526
pub use messages::*;
2627
pub use node::Node;
28+
// public only for docs.
29+
pub use node_ctx::NodeHandlerCtx;
30+
pub use node_ctx::{NodeCallerCtx, NodeCommonCtx, NodeCtx};
2731
pub use persistent_state::{PersistentState, PersistentStateSummary};
2832

2933
#[derive(

trust-quorum/src/node.rs

Lines changed: 52 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,8 @@
1616
//! Node, and so this should not be problematic.
1717
1818
use crate::validators::{ReconfigurationError, ValidatedReconfigureMsg};
19-
use crate::{
20-
CoordinatorState, Envelope, Epoch, PersistentState, PlatformId, messages::*,
21-
};
19+
use crate::{CoordinatorState, Epoch, NodeHandlerCtx, PlatformId, messages::*};
2220
use slog::{Logger, error, o, warn};
23-
use std::time::Instant;
2421

2522
/// An entity capable of participating in trust quorum
2623
///
@@ -30,30 +27,16 @@ use std::time::Instant;
3027
pub struct Node {
3128
log: Logger,
3229

33-
/// The unique hardware ID of a sled
34-
platform_id: PlatformId,
35-
36-
/// State that gets persistenly stored in ledgers
37-
persistent_state: PersistentState,
38-
3930
/// In memory state for when this node is coordinating a reconfiguration
4031
coordinator_state: Option<CoordinatorState>,
4132
}
4233

4334
impl Node {
44-
pub fn new(
45-
log: Logger,
46-
platform_id: PlatformId,
47-
persistent_state: PersistentState,
48-
) -> Node {
49-
let id_str = format!("{platform_id:?}");
35+
pub fn new(log: Logger, ctx: &mut impl NodeHandlerCtx) -> Node {
36+
let id_str = format!("{:?}", ctx.platform_id());
5037
let log =
5138
log.new(o!("component" => "trust-quorum", "platform_id" => id_str));
52-
Node { log, platform_id, persistent_state, coordinator_state: None }
53-
}
54-
55-
pub fn platform_id(&self) -> &PlatformId {
56-
&self.platform_id
39+
Node { log, coordinator_state: None }
5740
}
5841

5942
/// Start coordinating a reconfiguration
@@ -64,58 +47,54 @@ impl Node {
6447
/// For upgrading from LRTQ, use `coordinate_upgrade_from_lrtq`
6548
pub fn coordinate_reconfiguration(
6649
&mut self,
67-
now: Instant,
68-
outbox: &mut Vec<Envelope>,
50+
ctx: &mut impl NodeHandlerCtx,
6951
msg: ReconfigureMsg,
70-
) -> Result<Option<PersistentState>, ReconfigurationError> {
52+
) -> Result<(), ReconfigurationError> {
7153
let Some(validated_msg) = ValidatedReconfigureMsg::new(
7254
&self.log,
73-
&self.platform_id,
55+
ctx.platform_id(),
7456
msg,
75-
(&self.persistent_state).into(),
57+
ctx.persistent_state().into(),
7658
self.coordinator_state.as_ref().map(|cs| cs.reconfigure_msg()),
7759
)?
7860
else {
7961
// This was an idempotent (duplicate) request.
80-
return Ok(None);
62+
return Ok(());
8163
};
8264

83-
let persistent_state =
84-
self.set_coordinator_state(now, validated_msg)?;
85-
self.send_coordinator_msgs(now, outbox);
86-
Ok(persistent_state)
65+
self.set_coordinator_state(ctx, validated_msg)?;
66+
self.send_coordinator_msgs(ctx);
67+
Ok(())
8768
}
8869

8970
/// Process a timer tick
9071
///
9172
/// Ticks are issued by the caller in order to move the protocol forward.
9273
/// The current time is passed in to make the calls deterministic.
93-
pub fn tick(&mut self, now: Instant, outbox: &mut Vec<Envelope>) {
94-
self.send_coordinator_msgs(now, outbox);
74+
pub fn tick(&mut self, ctx: &mut impl NodeHandlerCtx) {
75+
self.send_coordinator_msgs(ctx);
9576
}
9677

9778
/// Handle a message from another node
9879
pub fn handle(
9980
&mut self,
100-
_now: Instant,
101-
_outbox: &mut Vec<Envelope>,
81+
ctx: &mut impl NodeHandlerCtx,
10282
from: PlatformId,
10383
msg: PeerMsg,
104-
) -> Option<PersistentState> {
105-
if let Some(rack_id) = self.persistent_state.rack_id() {
84+
) {
85+
if let Some(rack_id) = ctx.persistent_state().rack_id() {
10686
if rack_id != msg.rack_id {
10787
error!(self.log, "Mismatched rack id";
10888
"from" => %from,
10989
"msg" => msg.kind.name(),
11090
"expected" => %rack_id,
11191
"got" => %msg.rack_id);
112-
return None;
92+
return;
11393
}
11494
}
11595
match msg.kind {
11696
PeerMsgKind::PrepareAck(epoch) => {
11797
self.handle_prepare_ack(from, epoch);
118-
None
11998
}
12099
_ => todo!(
121100
"cannot handle message variant yet - not implemented: {msg:?}"
@@ -154,16 +133,12 @@ impl Node {
154133
}
155134

156135
// Send any required messages as a reconfiguration coordinator
157-
fn send_coordinator_msgs(
158-
&mut self,
159-
now: Instant,
160-
outbox: &mut Vec<Envelope>,
161-
) {
136+
fn send_coordinator_msgs(&mut self, ctx: &mut impl NodeHandlerCtx) {
162137
// This function is called unconditionally in `tick` callbacks. In this
163138
// case we may not actually be a coordinator. We ignore the call in
164139
// that case.
165140
if let Some(c) = self.coordinator_state.as_mut() {
166-
c.send_msgs(now, outbox);
141+
c.send_msgs(ctx);
167142
}
168143
}
169144

@@ -175,53 +150,51 @@ impl Node {
175150
/// we have a `ValidatedReconfigureMsg`.
176151
fn set_coordinator_state(
177152
&mut self,
178-
now: Instant,
153+
ctx: &mut impl NodeHandlerCtx,
179154
msg: ValidatedReconfigureMsg,
180-
) -> Result<Option<PersistentState>, ReconfigurationError> {
155+
) -> Result<(), ReconfigurationError> {
181156
// We have no committed configuration or lrtq ledger
182-
if self.persistent_state.is_uninitialized() {
157+
if ctx.persistent_state().is_uninitialized() {
183158
let (coordinator_state, my_config, my_share) =
184159
CoordinatorState::new_uninitialized(
185160
self.log.clone(),
186-
now,
161+
ctx.now(),
187162
msg,
188163
)?;
189164
self.coordinator_state = Some(coordinator_state);
190-
self.persistent_state.shares.insert(my_config.epoch, my_share);
191-
self.persistent_state
192-
.configs
193-
.insert_unique(my_config)
194-
.expect("empty state");
165+
ctx.update_persistent_state(move |ps| {
166+
ps.shares.insert(my_config.epoch, my_share);
167+
ps.configs.insert_unique(my_config).expect("empty state");
168+
true
169+
});
195170

196-
return Ok(Some(self.persistent_state.clone()));
171+
return Ok(());
197172
}
198173

199174
// We have a committed configuration that is not LRTQ
200175
let config =
201-
self.persistent_state.latest_committed_configuration().unwrap();
176+
ctx.persistent_state().latest_committed_configuration().unwrap();
202177

203178
self.coordinator_state = Some(CoordinatorState::new_reconfiguration(
204179
self.log.clone(),
205-
now,
180+
ctx.now(),
206181
msg,
207182
&config,
208183
)?);
209184

210-
Ok(None)
185+
Ok(())
211186
}
212187
}
213188

214189
#[cfg(test)]
215190
mod tests {
216-
use std::time::Duration;
217-
218-
use crate::{Epoch, Threshold};
219-
220191
use super::*;
192+
use crate::{Epoch, NodeCallerCtx, NodeCommonCtx, NodeCtx, Threshold};
221193
use assert_matches::assert_matches;
222194
use omicron_test_utils::dev::test_setup_log;
223195
use omicron_uuid_kinds::RackUuid;
224196
use proptest::prelude::*;
197+
use std::time::Duration;
225198
use test_strategy::{Arbitrary, proptest};
226199

227200
fn arb_member() -> impl Strategy<Value = PlatformId> {
@@ -259,21 +232,21 @@ mod tests {
259232
let logctx = test_setup_log("initial_configuration");
260233
let my_platform_id =
261234
input.reconfigure_msg.members.first().unwrap().clone();
262-
let mut node = Node::new(
263-
logctx.log.clone(),
264-
my_platform_id.clone(),
265-
PersistentState::empty(),
266-
);
235+
let mut ctx = NodeCtx::new(my_platform_id.clone());
236+
let mut node = Node::new(logctx.log.clone(), &mut ctx);
237+
238+
node.coordinate_reconfiguration(
239+
&mut ctx,
240+
input.reconfigure_msg.clone(),
241+
)
242+
.expect("success");
243+
244+
// An initial configuraration always causes a change to persistent state
245+
assert!(ctx.persistent_state_change_check_and_reset());
246+
// Checking if the persistent state has changed above cleared the bit
247+
assert!(!ctx.persistent_state_change_check_and_reset());
267248

268-
let mut outbox = Vec::new();
269-
let persistent_state = node
270-
.coordinate_reconfiguration(
271-
Instant::now(),
272-
&mut outbox,
273-
input.reconfigure_msg.clone(),
274-
)
275-
.expect("success")
276-
.expect("persistent state");
249+
let persistent_state = ctx.persistent_state().clone();
277250

278251
// A PersistentState should always be returned
279252
// It should include the `PrepareMsg` for this node.
@@ -288,15 +261,15 @@ mod tests {
288261
persistent_state.configs.get(&input.reconfigure_msg.epoch).unwrap();
289262

290263
assert_eq!(config.epoch, input.reconfigure_msg.epoch);
291-
assert_eq!(config.coordinator, *node.platform_id());
264+
assert_eq!(config.coordinator, *ctx.platform_id());
292265
assert_eq!(config.members.len(), input.reconfigure_msg.members.len());
293266
assert_eq!(config.threshold, input.reconfigure_msg.threshold);
294267
assert!(config.encrypted_rack_secrets.is_none());
295268

296269
// Ensure that prepare messages are properly put in the outbox to be
297270
// sent by the I/O parts of the codebase
298-
assert_eq!(outbox.len(), config.members.len() - 1);
299-
for envelope in outbox {
271+
assert_eq!(ctx.num_envelopes(), config.members.len() - 1);
272+
for envelope in ctx.drain_envelopes() {
300273
assert_matches!(
301274
envelope.msg.kind,
302275
PeerMsgKind::Prepare{ config: prepare_config, .. } => {

0 commit comments

Comments
 (0)