Skip to content

Commit ff3c4a9

Browse files
committed
refactor: align operator doppelgänger grace period with message TTL window
Synchronizes the operator doppelgänger grace period with the complete message validation TTL formula to prevent false positives from late-arriving messages after restart. Changes: - Export LATE_MESSAGE_MARGIN from message_validator for reuse - Calculate grace period using full TTL: (slots_per_epoch + LATE_SLOT_ALLOWANCE) × slot_duration + LATE_MESSAGE_MARGIN - Remove obsolete OPERATOR_DOPPELGANGER_GRACE_PERIOD_SECS from network - Simplify client startup by moving grace period calculation into service - Update tests to dynamically compute durations from service config Grace period on mainnet: (32 + 2) × 12s + 3s = 411 seconds This matches the complete message acceptance window for Committee and Aggregator roles, eliminating the theoretical 3-second vulnerability.
1 parent 550aea5 commit ff3c4a9

File tree

7 files changed

+62
-72
lines changed

7 files changed

+62
-72
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

anchor/client/src/lib.rs

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -92,20 +92,7 @@ fn start_operator_doppelganger(
9292
wait_epochs: u64,
9393
executor: &TaskExecutor,
9494
) {
95-
info!(
96-
wait_epochs = wait_epochs,
97-
grace_period_secs = network::OPERATOR_DOPPELGANGER_GRACE_PERIOD_SECS,
98-
"Operator doppelgänger: starting monitoring period"
99-
);
100-
101-
// Spawn background task to end monitoring after grace period + wait epochs
102-
// Grace period prevents false positives from receiving our own old messages after
103-
// restart (they remain in gossip cache for ~4.2s)
104-
service.spawn_monitor_task(
105-
Duration::from_secs(network::OPERATOR_DOPPELGANGER_GRACE_PERIOD_SECS),
106-
wait_epochs,
107-
executor,
108-
);
95+
service.clone().spawn_monitor_task(wait_epochs, executor);
10996
}
11097

11198
impl Client {

anchor/message_validator/src/lib.rs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -582,8 +582,15 @@ pub(crate) fn validate_beacon_duty(
582582
/// clockErrorTolerance is the maximum amount of clock error we expect to see between nodes.
583583
const CLOCK_ERROR_TOLERANCE: Duration = Duration::from_millis(50);
584584
/// lateMessageMargin is the duration past a message's TTL in which it is still considered valid.
585-
const LATE_MESSAGE_MARGIN: Duration = Duration::from_secs(3);
586-
const LATE_SLOT_ALLOWANCE: u64 = 2;
585+
///
586+
/// This margin is added to the deadline calculation after converting slot-based TTL to time.
587+
/// The full message acceptance window is: (ttl_slots × slot_duration) + LATE_MESSAGE_MARGIN
588+
pub const LATE_MESSAGE_MARGIN: Duration = Duration::from_secs(3);
589+
/// Number of slots added to TTL windows for late message acceptance
590+
///
591+
/// Used in calculating message acceptance deadlines for Committee and Aggregator roles.
592+
/// The actual TTL is: slots_per_epoch + LATE_SLOT_ALLOWANCE
593+
pub const LATE_SLOT_ALLOWANCE: u64 = 2;
587594

588595
/// Validates that the message's slot timing is correct
589596
pub(crate) fn validate_slot_time(

anchor/network/src/behaviour.rs

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,16 +26,6 @@ pub const GOSSIPSUB_HEARTBEAT_INTERVAL_MILLIS: u64 = 700;
2626
/// Messages remain in mcache for: history_length × heartbeat_interval (6 × 700ms = 4.2s)
2727
pub const GOSSIPSUB_HISTORY_LENGTH: usize = 6;
2828

29-
/// Operator doppelgänger grace period in seconds
30-
///
31-
/// Wait after startup before detecting twins to prevent false positives from own old messages.
32-
/// Automatically derived as: (gossip_cache_window + 1 second buffer)
33-
/// Default: 5s (cache window is 4.2s)
34-
pub const OPERATOR_DOPPELGANGER_GRACE_PERIOD_SECS: u64 = {
35-
let cache_window_millis = GOSSIPSUB_HISTORY_LENGTH as u64 * GOSSIPSUB_HEARTBEAT_INTERVAL_MILLIS;
36-
cache_window_millis / 1000 + 1
37-
};
38-
3929
/// Custom message ID function matching Go-SSV implementation.
4030
/// Uses xxhash64 of the full message to ensure uniqueness across operators.
4131
///

anchor/network/src/lib.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ mod peer_manager;
1111
mod scoring;
1212
mod transport;
1313

14-
pub use behaviour::OPERATOR_DOPPELGANGER_GRACE_PERIOD_SECS;
1514
pub use config::{Config, DEFAULT_DISC_PORT, DEFAULT_QUIC_PORT, DEFAULT_TCP_PORT};
1615
pub use network::Network;
1716
pub use network_utils::listen_addr::{ListenAddr, ListenAddress};

anchor/operator_doppelganger/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ edition = { workspace = true }
66
[dependencies]
77
database = { workspace = true }
88
futures = { workspace = true }
9+
message_validator = { workspace = true }
910
parking_lot = { workspace = true }
1011
ssv_types = { workspace = true }
1112
task_executor = { workspace = true }

anchor/operator_doppelganger/src/service.rs

Lines changed: 50 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -115,39 +115,40 @@ impl OperatorDoppelgangerService {
115115
}
116116

117117
/// Spawn a background task to end monitoring after the configured wait period
118-
///
119-
/// The task sleeps for the grace period plus the monitoring duration, then transitions
120-
/// to active mode.
121-
///
122-
/// # Arguments
123-
///
124-
/// * `grace_period` - Duration to wait before starting twin detection. Should be slightly
125-
/// longer than the gossip message cache window (history_length × heartbeat_interval ≈ 4.2s)
126-
/// to ensure our own old messages have expired from the network. See `DoppelgangerState`
127-
/// documentation for details on why this prevents false positives.
128-
/// * `wait_epochs` - Number of epochs to monitor for doppelgängers after grace period ends
129-
pub fn spawn_monitor_task(
130-
self: Arc<Self>,
131-
grace_period: Duration,
132-
wait_epochs: u64,
133-
executor: &TaskExecutor,
134-
) {
135-
// Calculate monitoring duration (after grace period)
136-
let monitoring_duration =
137-
Duration::from_secs(wait_epochs * self.slots_per_epoch * self.slot_duration.as_secs());
118+
pub fn spawn_monitor_task(self: Arc<Self>, wait_epochs: u64, executor: &TaskExecutor) {
119+
// Grace period must match the full message TTL window to prevent false positives
120+
// from late-arriving messages after restart.
121+
//
122+
// Full TTL = (slots_per_epoch + LATE_SLOT_ALLOWANCE) × slot_duration + LATE_MESSAGE_MARGIN
123+
// This matches the complete deadline calculation in message validation.
124+
let grace_period_slots = self.slots_per_epoch + message_validator::LATE_SLOT_ALLOWANCE;
125+
let grace_period_secs = grace_period_slots * self.slot_duration.as_secs();
126+
let grace_period =
127+
Duration::from_secs(grace_period_secs) + message_validator::LATE_MESSAGE_MARGIN;
128+
129+
let monitoring_slots = wait_epochs * self.slots_per_epoch;
130+
let monitoring_secs = monitoring_slots * self.slot_duration.as_secs();
131+
let monitoring_duration = Duration::from_secs(monitoring_secs);
138132

139133
executor.spawn_without_exit(
140134
async move {
141-
// Wait for grace period - prevents false positives from own old messages
135+
info!(
136+
grace_period_slots = grace_period_slots,
137+
grace_period_secs = grace_period.as_secs(),
138+
"Operator doppelgänger: entering grace period"
139+
);
142140
tokio::time::sleep(grace_period).await;
143141

144-
// Grace period complete - start detecting twins
142+
info!(
143+
monitoring_epochs = wait_epochs,
144+
monitoring_secs = monitoring_duration.as_secs(),
145+
"Operator doppelgänger: grace period complete, starting monitoring"
146+
);
145147
self.state.write().end_grace_period();
146148

147-
// Wait for monitoring period to complete
148149
tokio::time::sleep(monitoring_duration).await;
149150

150-
// Monitoring complete - stop checking for doppelgängers
151+
info!("Operator doppelgänger: monitoring period complete");
151152
self.end_monitoring_period();
152153
},
153154
"doppelganger-monitor",
@@ -273,25 +274,26 @@ mod tests {
273274
TaskExecutor::new(handle, exit, shutdown_tx, "doppelganger_test".into())
274275
}
275276

276-
/// Helper to spawn monitor task and advance time past grace period
277-
///
278-
/// Returns the service in monitoring mode with grace period complete,
279-
/// ready for twin detection tests.
280277
async fn spawn_and_advance_past_grace_period(
281278
service: Arc<OperatorDoppelgangerService>,
282279
executor: &TaskExecutor,
283280
) {
284-
let grace_period = Duration::from_secs(5);
285281
let wait_epochs = 2;
286282

287283
// Spawn monitor task
288-
service
289-
.clone()
290-
.spawn_monitor_task(grace_period, wait_epochs, executor);
284+
service.clone().spawn_monitor_task(wait_epochs, executor);
291285

292286
// Give the spawned task a chance to start
293287
tokio::task::yield_now().await;
294288

289+
// Calculate grace period from service configuration
290+
// Grace period = (slots_per_epoch + LATE_SLOT_ALLOWANCE) × slot_duration +
291+
// LATE_MESSAGE_MARGIN
292+
let grace_period_slots = service.slots_per_epoch + message_validator::LATE_SLOT_ALLOWANCE;
293+
let grace_period =
294+
Duration::from_secs(grace_period_slots * service.slot_duration.as_secs())
295+
+ message_validator::LATE_MESSAGE_MARGIN;
296+
295297
// Advance time past grace period
296298
tokio::time::advance(grace_period).await;
297299

@@ -430,12 +432,9 @@ mod tests {
430432
let committee_id = CommitteeId([1u8; 32]);
431433
let executor = create_test_executor();
432434

433-
// Spawn the monitor task with grace period
434-
let grace_period = Duration::from_secs(5);
435+
// Spawn the monitor task
435436
let wait_epochs = 2;
436-
service
437-
.clone()
438-
.spawn_monitor_task(grace_period, wait_epochs, &executor);
437+
service.clone().spawn_monitor_task(wait_epochs, &executor);
439438

440439
// Still in grace period (don't advance time)
441440
assert!(!service.is_monitoring());
@@ -505,17 +504,23 @@ mod tests {
505504
let executor = create_test_executor();
506505

507506
// Spawn the monitor task
508-
let grace_period = Duration::from_secs(5);
509507
let wait_epochs = 2;
510-
service
511-
.clone()
512-
.spawn_monitor_task(grace_period, wait_epochs, &executor);
508+
service.clone().spawn_monitor_task(wait_epochs, &executor);
513509

514510
// Give the spawned task a chance to start
515511
tokio::task::yield_now().await;
516512

517-
// Calculate monitoring duration
518-
let monitoring_duration = Duration::from_secs(wait_epochs * 12); // epochs * (slots_per_epoch=1) * (slot_duration=12s)
513+
// Calculate durations from service configuration
514+
// Grace period = (slots_per_epoch + LATE_SLOT_ALLOWANCE) × slot_duration +
515+
// LATE_MESSAGE_MARGIN
516+
let grace_period_slots = service.slots_per_epoch + message_validator::LATE_SLOT_ALLOWANCE;
517+
let grace_period =
518+
Duration::from_secs(grace_period_slots * service.slot_duration.as_secs())
519+
+ message_validator::LATE_MESSAGE_MARGIN;
520+
521+
let monitoring_slots = wait_epochs * service.slots_per_epoch;
522+
let monitoring_duration =
523+
Duration::from_secs(monitoring_slots * service.slot_duration.as_secs());
519524

520525
// Advance time past grace period first
521526
tokio::time::advance(grace_period).await;
@@ -532,11 +537,11 @@ mod tests {
532537
let (signed_message, qbft_message) =
533538
create_test_message(committee_id, vec![OperatorId(1)], 10, 0);
534539

535-
// This should NOT detect a twin (monitoring period ended via timer)
540+
// This should NOT detect a twin (monitoring period completed via timer)
536541
let result = service.is_doppelganger(&signed_message, Some(&qbft_message));
537542
assert!(
538543
!result,
539-
"Message after monitoring period should NOT detect twin"
544+
"Message after monitoring period should NOT detect twin (monitoring complete)"
540545
);
541546
}
542547
}

0 commit comments

Comments
 (0)