From d275f0f062f7219a839e99280941cd8a2c2be62c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lancelot=20de=20Ferri=C3=A8re?= Date: Fri, 13 Mar 2026 11:52:08 +0100 Subject: [PATCH 01/22] Refactor mempool storage - split in 2 + explicit backend. PR-Branch: mempool-split --- src/mempool.rs | 14 +- src/mempool/block_construction.rs | 2 +- src/mempool/dissemination.rs | 4 +- src/mempool/module.rs | 2 +- src/mempool/own_lane.rs | 2 +- src/mempool/proposal_storage.rs | 281 +++++++++++++++ src/mempool/storage.rs | 457 +++++++++++++++++++----- src/mempool/storage_fjall.rs | 570 ------------------------------ src/mempool/storage_memory.rs | 375 -------------------- src/mempool/verify_tx.rs | 2 +- 10 files changed, 657 insertions(+), 1052 deletions(-) create mode 100644 src/mempool/proposal_storage.rs delete mode 100644 src/mempool/storage_fjall.rs delete mode 100644 src/mempool/storage_memory.rs diff --git a/src/mempool.rs b/src/mempool.rs index 60bc57772..1caebc75c 100644 --- a/src/mempool.rs +++ b/src/mempool.rs @@ -15,6 +15,7 @@ use api::RestApiMessage; use block_construction::BlockUnderConstruction; use borsh::{BorshDeserialize, BorshSerialize}; use client_sdk::tcp_client::TcpServerMessage; +use hyli_crypto::BlstCrypto; use hyli_crypto::SharedBlstCrypto; use hyli_modules::{bus::BusMessage, module_bus_client}; use hyli_net::ordered_join_set::OrderedJoinSet; @@ -31,16 +32,10 @@ use std::{ path::PathBuf, sync::Arc, }; -use storage::Storage; -use verify_tx::DataProposalVerdict; -// Pick one of the two implementations -// use storage_memory::LanesStorage; -// Pick one of the two implementations by changing the re-export below. -// pub use storage_memory::{shared_lanes_storage, LanesStorage}; -use hyli_crypto::BlstCrypto; -pub use storage_fjall::{shared_lanes_storage, LanesStorage}; +pub use storage::{shared_lanes_storage, LanesStorage}; use strum_macros::IntoStaticStr; use tracing::{debug, info}; +use verify_tx::DataProposalVerdict; pub mod api; pub mod block_construction; @@ -48,9 +43,8 @@ pub mod dissemination; pub mod metrics; pub mod module; pub mod own_lane; +pub mod proposal_storage; pub mod storage; -pub mod storage_fjall; -pub mod storage_memory; pub mod verifiers; pub mod verify_tx; diff --git a/src/mempool/block_construction.rs b/src/mempool/block_construction.rs index 01389c7ee..a5e655a54 100644 --- a/src/mempool/block_construction.rs +++ b/src/mempool/block_construction.rs @@ -10,7 +10,7 @@ use futures::StreamExt; use hyli_modules::{log_error, log_warn}; use super::{ - storage::{LaneEntryMetadata, MetadataOrMissingHash, Storage}, + storage::{LaneEntryMetadata, MetadataOrMissingHash}, DisseminationEvent, ValidatorDAG, }; use anyhow::{bail, Context, Result}; diff --git a/src/mempool/dissemination.rs b/src/mempool/dissemination.rs index ede12f3d4..48ccfedd4 100644 --- a/src/mempool/dissemination.rs +++ b/src/mempool/dissemination.rs @@ -29,7 +29,7 @@ use hyli_turmoil_shims::collections::HashMap; use super::{ metrics::MempoolMetrics, shared_lanes_storage, - storage::{LaneEntryMetadata, MetadataOrMissingHash, Storage}, + storage::{LaneEntryMetadata, MetadataOrMissingHash}, LanesStorage, MempoolNetMessage, }; @@ -475,7 +475,7 @@ impl DisseminationManager { Some(dp_hash) => { debug!("Updating SyncRequest to {:?}-{} as {} is present", request.from, dp_hash, request.to); request.to = dp_hash.clone(); - }, + } None => warn!("Malformed syncRequest - 'to' {} has no parent but 'from' {:?} is a DP", request.to, request.from), } } diff --git a/src/mempool/module.rs b/src/mempool/module.rs index 2718b5189..07b42ebe3 100644 --- a/src/mempool/module.rs +++ b/src/mempool/module.rs @@ -16,7 +16,7 @@ use crate::model::SharedRunContext; use super::{ api, mempool_bus_client::MempoolBusClient, metrics::MempoolMetrics, shared_lanes_storage, - storage::Storage, Mempool, MempoolStore, + Mempool, MempoolStore, }; use anyhow::Result; diff --git a/src/mempool/own_lane.rs b/src/mempool/own_lane.rs index 6afa91db7..7e795ab81 100644 --- a/src/mempool/own_lane.rs +++ b/src/mempool/own_lane.rs @@ -8,11 +8,11 @@ use client_sdk::tcp_client::TcpServerMessage; use hyli_turmoil_shims::collections::HashMap; use tracing::{debug, info, trace}; +use super::api::RestApiMessage; use super::verifiers::{verify_proof, verify_recursive_proof}; use super::DisseminationEvent; #[cfg(test)] use super::MempoolNetMessage; -use super::{api::RestApiMessage, storage::Storage}; use indexmap::IndexMap; use std::{collections::HashSet, sync::Arc}; use tokio::task::Id as TaskId; diff --git a/src/mempool/proposal_storage.rs b/src/mempool/proposal_storage.rs new file mode 100644 index 000000000..7bb5dc426 --- /dev/null +++ b/src/mempool/proposal_storage.rs @@ -0,0 +1,281 @@ +#![expect(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] + +use std::{ + collections::{BTreeMap, HashMap}, + path::Path, + sync::{Arc, RwLock}, +}; + +use anyhow::Result; +use borsh::BorshSerialize; +use fjall::{Database, Keyspace, KeyspaceCreateOptions, KvSeparationOptions}; +use hyli_model::{LaneId, ProofData}; + +use crate::model::{DataProposal, DataProposalHash, Hashed}; + +type ProposalKey = (LaneId, DataProposalHash); +type Proofs = Vec<(u64, ProofData)>; +type SharedProofs = Arc; + +pub trait KvEncode: Send + Sync + 'static { + fn encode(&self) -> Result>; +} + +impl KvEncode for T +where + T: BorshSerialize + Send + Sync + 'static, +{ + fn encode(&self) -> Result> { + borsh::to_vec(self).map_err(Into::into) + } +} + +pub trait KvBackend: Send + Sync { + fn get(&self, key: &[u8]) -> Result>>; + fn put(&self, key: &[u8], value: Arc) -> Result<()>; + fn delete(&self, key: &[u8]) -> Result<()>; + fn persist(&self) -> Result<()>; +} + +#[derive(Clone, Default)] +pub struct InMemoryKvBackend { + data: Arc, Vec>>>, +} + +impl InMemoryKvBackend { + pub fn new() -> Self { + Self::default() + } +} + +impl KvBackend for InMemoryKvBackend { + fn get(&self, key: &[u8]) -> Result>> { + Ok(self.data.read().unwrap().get(key).cloned()) + } + + fn put(&self, key: &[u8], value: Arc) -> Result<()> { + self.data + .write() + .unwrap() + .insert(key.to_vec(), value.encode()?); + Ok(()) + } + + fn delete(&self, key: &[u8]) -> Result<()> { + self.data.write().unwrap().remove(key); + Ok(()) + } + + fn persist(&self) -> Result<()> { + Ok(()) + } +} + +pub struct FjallKvBackend { + db: Database, + dp_data: Keyspace, + dp_proofs: Keyspace, +} + +impl FjallKvBackend { + pub fn new(path: &Path) -> Result { + let db = Database::builder(path) + .cache_size(256 * 1024 * 1024) + .max_journaling_size(512 * 1024 * 1024) + .open()?; + + let dp_data = db.keyspace("dp_data", || { + KeyspaceCreateOptions::default() + .with_kv_separation(Some( + KvSeparationOptions::default().file_target_size(256 * 1024 * 1024), + )) + .manual_journal_persist(true) + .max_memtable_size(128 * 1024 * 1024) + })?; + + let dp_proofs = db.keyspace("dp_proofs", || { + KeyspaceCreateOptions::default() + .with_kv_separation(Some( + KvSeparationOptions::default().file_target_size(256 * 1024 * 1024), + )) + .manual_journal_persist(true) + .max_memtable_size(64 * 1024 * 1024) + })?; + + Ok(Self { + db, + dp_data, + dp_proofs, + }) + } + + fn keyspace_for_key(&self, key: &[u8]) -> &Keyspace { + if key.starts_with(b"proofs:") { + &self.dp_proofs + } else { + &self.dp_data + } + } +} + +impl KvBackend for FjallKvBackend { + fn get(&self, key: &[u8]) -> Result>> { + Ok(self + .keyspace_for_key(key) + .get(key)? + .map(|slice| slice.to_vec())) + } + + fn put(&self, key: &[u8], value: Arc) -> Result<()> { + self.keyspace_for_key(key).insert(key, value.encode()?)?; + Ok(()) + } + + fn delete(&self, key: &[u8]) -> Result<()> { + self.keyspace_for_key(key).remove(key)?; + Ok(()) + } + + fn persist(&self) -> Result<()> { + self.db.persist(fjall::PersistMode::Buffer)?; + Ok(()) + } +} + +pub struct ProposalStorage { + backend: Box, + data_cache: RwLock>>, + proofs_cache: RwLock>, +} + +impl ProposalStorage { + pub fn new(path: &Path) -> Result { + Ok(Self { + backend: Box::new(FjallKvBackend::new(path)?), + data_cache: RwLock::new(HashMap::new()), + proofs_cache: RwLock::new(HashMap::new()), + }) + } + + pub fn persist(&self) -> Result<()> { + self.backend.persist() + } + + pub fn get_dp_by_hash( + &self, + lane_id: &LaneId, + dp_hash: &DataProposalHash, + ) -> Result> { + let key = proposal_key(lane_id, dp_hash); + let data_proposal = + if let Some(data_proposal) = self.data_cache.read().unwrap().get(&key).cloned() { + data_proposal + } else { + let Some(bytes) = self.backend.get(&data_key(lane_id, dp_hash)?)? else { + return Ok(None); + }; + let data_proposal = Arc::new(borsh::from_slice::(&bytes)?); + self.data_cache + .write() + .unwrap() + .insert(key, Arc::clone(&data_proposal)); + data_proposal + }; + + let mut data_proposal = data_proposal.as_ref().clone(); + // SAFETY: this hash came from the storage key for this value. + unsafe { + data_proposal.unsafe_set_hash(dp_hash); + } + Ok(Some(data_proposal)) + } + + pub fn get_proofs_by_hash( + &self, + lane_id: &LaneId, + dp_hash: &DataProposalHash, + ) -> Result> { + let key = proposal_key(lane_id, dp_hash); + if let Some(proofs) = self.proofs_cache.read().unwrap().get(&key).cloned() { + return Ok(Some(proofs.as_ref().clone())); + } + + let Some(bytes) = self.backend.get(&proofs_key(lane_id, dp_hash)?)? else { + return Ok(None); + }; + let proofs = Arc::new(borsh::from_slice::(&bytes)?); + self.proofs_cache + .write() + .unwrap() + .insert(key, Arc::clone(&proofs)); + Ok(Some(proofs.as_ref().clone())) + } + + pub fn delete_proofs(&self, lane_id: &LaneId, dp_hash: &DataProposalHash) -> Result<()> { + let key = proposal_key(lane_id, dp_hash); + self.backend.delete(&proofs_key(lane_id, dp_hash)?)?; + self.proofs_cache.write().unwrap().remove(&key); + Ok(()) + } + + pub fn remove_by_hash( + &self, + lane_id: &LaneId, + dp_hash: &DataProposalHash, + ) -> Result> { + let Some(data_proposal) = self.get_dp_by_hash(lane_id, dp_hash)? else { + return Ok(None); + }; + + let key = proposal_key(lane_id, dp_hash); + self.backend.delete(&data_key(lane_id, dp_hash)?)?; + self.backend.delete(&proofs_key(lane_id, dp_hash)?)?; + self.data_cache.write().unwrap().remove(&key); + self.proofs_cache.write().unwrap().remove(&key); + + Ok(Some((dp_hash.clone(), data_proposal))) + } + + pub fn put_no_verification(&self, lane_id: LaneId, data_proposal: DataProposal) -> Result<()> { + let dp_hash = data_proposal.hashed(); + let key = (lane_id, dp_hash); + let mut data_to_store = data_proposal; + let proofs = data_to_store.take_proofs(); + let data = Arc::new(data_to_store); + let proofs = Arc::new(proofs); + + self.backend.put( + &data_key(&key.0, &key.1)?, + Arc::clone(&data) as Arc, + )?; + self.backend.put( + &proofs_key(&key.0, &key.1)?, + Arc::clone(&proofs) as Arc, + )?; + self.data_cache.write().unwrap().insert(key.clone(), data); + self.proofs_cache.write().unwrap().insert(key, proofs); + Ok(()) + } + #[cfg(test)] + pub fn remove_lane_entry(&self, lane_id: &LaneId, dp_hash: &DataProposalHash) { + let _ = self.remove_by_hash(lane_id, dp_hash); + } +} + +fn proposal_key(lane_id: &LaneId, dp_hash: &DataProposalHash) -> ProposalKey { + (lane_id.clone(), dp_hash.clone()) +} + +fn data_key(lane_id: &LaneId, dp_hash: &DataProposalHash) -> Result> { + namespaced_key(b"dp:", &(lane_id, dp_hash)) +} + +fn proofs_key(lane_id: &LaneId, dp_hash: &DataProposalHash) -> Result> { + namespaced_key(b"proofs:", &(lane_id, dp_hash)) +} + +fn namespaced_key(prefix: &[u8], key: &K) -> Result> { + let mut encoded = prefix.to_vec(); + encoded.extend(borsh::to_vec(key)?); + Ok(encoded) +} diff --git a/src/mempool/storage.rs b/src/mempool/storage.rs index 7b6211f1e..0ac40de5e 100644 --- a/src/mempool/storage.rs +++ b/src/mempool/storage.rs @@ -3,21 +3,36 @@ use async_stream::try_stream; use borsh::{BorshDeserialize, BorshSerialize}; use futures::{Stream, StreamExt}; use hyli_crypto::BlstCrypto; -use hyli_model::{DataSized, LaneId, ProofData}; +use hyli_model::{DataSized, LaneId}; use serde::{Deserialize, Serialize}; use staking::state::Staking; -use std::{future::Future, vec}; -use tracing::{error, trace}; - -use crate::model::{ - Cut, DataProposal, DataProposalHash, DataProposalParent, Hashed, PoDA, SignedByValidator, - ValidatorPublicKey, +use std::{ + collections::{BTreeMap, HashMap}, + ops::Deref, + path::{Path, PathBuf}, + sync::{Arc, Mutex, OnceLock, RwLock, RwLockReadGuard}, + vec, +}; +use tracing::{error, info, trace, warn}; + +use crate::{ + mempool::proposal_storage::ProposalStorage, + model::{ + Cut, DataProposal, DataProposalHash, DataProposalParent, Hashed, PoDA, SignedByValidator, + ValidatorPublicKey, + }, }; use super::ValidatorDAG; pub use hyli_model::LaneBytesSize; +static SHARED_LANES: OnceLock>> = OnceLock::new(); + +pub fn shared_lanes_storage(path: &Path) -> Result { + LanesStorage::shared(path) +} + pub enum CanBePutOnTop { Yes, No, @@ -46,64 +61,234 @@ pub struct LaneEntryMetadata { pub cached_poda: Option, } -pub trait Storage { - fn persist(&self) -> Result<()>; +#[derive(Clone)] +pub struct LanesStorage { + pub lanes_tip: Arc>>, + pub(crate) lane_entries: Arc>>, + pub proposals: Arc, + // Used by the shared storage registry to know when it can drop this handle. + ref_token: Arc<()>, +} + +impl LanesStorage { + /// Shared handle between mempool (read/write) and dissemination (read-only), + /// keyed by `config.data_dir` to allow multiple nodes in one process. + pub fn shared(path: &Path) -> Result { + let registry = SHARED_LANES.get_or_init(|| Mutex::new(HashMap::new())); + let mut guard = registry + .lock() + .unwrap_or_else(|poisoned| poisoned.into_inner()); + + if let Some(existing) = guard.get(path) { + tracing::debug!( + "Reusing existing shared lanes storage at {}", + path.to_string_lossy() + ); + return Ok(existing.clone()); + } + + guard.retain(|_, storage| storage.ref_count() > 1); - fn contains(&self, lane_id: &LaneId, dp_hash: &DataProposalHash) -> bool; - fn get_metadata_by_hash( + tracing::debug!( + "Creating new shared lanes storage at {}", + path.to_string_lossy() + ); + + let lanes_tip = load_lanes_tip(path); + let storage = LanesStorage::new(path, lanes_tip)?; + guard.insert(path.to_path_buf(), storage.clone()); + Ok(storage) + } + + pub fn new( + _path: &Path, + lanes_tip: BTreeMap, + ) -> Result { + Ok(Self { + lanes_tip: Arc::new(RwLock::new(lanes_tip)), + lane_entries: Arc::new(RwLock::new(HashMap::new())), + proposals: Arc::new(ProposalStorage::new(_path)?), + ref_token: Arc::new(()), + }) + } + + pub fn new_handle(&self) -> Self { + self.clone() + } + + pub(crate) fn ref_count(&self) -> usize { + Arc::strong_count(&self.ref_token) + } + + pub fn record_metrics(&self) {} + + pub fn set_metrics_context(&mut self, _node_id: impl Into) {} + + pub fn get_lane_ids(&self) -> Vec { + #[allow(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] + let guard = self.lanes_tip.read().unwrap(); + guard.keys().cloned().collect() + } + + pub fn get_lane_hash_tip(&self, lane_id: &LaneId) -> Option { + #[allow(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] + let guard = self.lanes_tip.read().unwrap(); + guard.get(lane_id).map(|(hash, _)| hash.clone()) + } + + pub fn get_lane_size_tip(&self, lane_id: &LaneId) -> Option { + #[allow(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] + let guard = self.lanes_tip.read().unwrap(); + guard.get(lane_id).map(|(_, size)| *size) + } + + pub fn update_lane_tip( + &mut self, + lane_id: LaneId, + dp_hash: DataProposalHash, + size: LaneBytesSize, + ) -> Option<(DataProposalHash, LaneBytesSize)> { + tracing::trace!("Updating lane tip for lane {} to {:?}", lane_id, dp_hash); + #[allow(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] + let mut guard = self.lanes_tip.write().unwrap(); + guard.insert(lane_id, (dp_hash, size)) + } + + pub fn lane_tips_snapshot(&self) -> BTreeMap { + #[allow(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] + let guard = self.lanes_tip.read().unwrap(); + guard.clone() + } + + pub fn lane_tips_read( &self, - lane_id: &LaneId, - dp_hash: &DataProposalHash, - ) -> Result>; - fn get_dp_by_hash( + ) -> RwLockReadGuard<'_, BTreeMap> { + #[allow(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] + self.lanes_tip.read().unwrap() + } + + pub fn contains(&self, lane_id: &LaneId, dp_hash: &DataProposalHash) -> bool { + #[allow(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] + self.lane_entries + .read() + .unwrap() + .contains_key(&(lane_id.clone(), dp_hash.clone())) + } + + pub fn get_metadata_by_hash( &self, lane_id: &LaneId, dp_hash: &DataProposalHash, - ) -> Result>; - fn get_proofs_by_hash( + ) -> Result> { + #[allow(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] + Ok(self + .lane_entries + .read() + .unwrap() + .get(&(lane_id.clone(), dp_hash.clone())) + .cloned()) + } + + pub fn put_no_verification( &self, - lane_id: &LaneId, - dp_hash: &DataProposalHash, - ) -> Result>>; - fn delete_proofs(&mut self, lane_id: &LaneId, dp_hash: &DataProposalHash) -> Result<()>; - fn remove_by_hash( - &mut self, - lane_id: &LaneId, - dp_hash: &DataProposalHash, - ) -> Result>; - fn put_no_verification( - &mut self, lane_id: LaneId, - entry: (LaneEntryMetadata, DataProposal), - ) -> Result<()>; + (lane_entry, data_proposal): (LaneEntryMetadata, DataProposal), + ) -> Result<()> { + let dp_hash = data_proposal.hashed(); + self.proposals + .put_no_verification(lane_id.clone(), data_proposal)?; + #[allow(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] + self.lane_entries + .write() + .unwrap() + .insert((lane_id, dp_hash), lane_entry); + Ok(()) + } - fn add_signatures>( - &mut self, + pub fn add_signatures>( + &self, lane_id: &LaneId, dp_hash: &DataProposalHash, vote_msgs: T, - ) -> Result>; - fn set_cached_poda( - &mut self, + ) -> Result> { + #[allow(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] + let mut entries = self.lane_entries.write().unwrap(); + let Some(metadata) = entries.get_mut(&(lane_id.clone(), dp_hash.clone())) else { + anyhow::bail!( + "Can't find lane entry metadata {} for lane {}", + dp_hash, + lane_id + ); + }; + + for msg in vote_msgs { + let (dph, cumul_size) = &msg.msg; + if &metadata.cumul_size != cumul_size || dp_hash != dph { + tracing::warn!( + "Received a DataVote message with wrong hash or size: {:?}", + msg.msg + ); + continue; + } + match metadata + .signatures + .binary_search_by(|probe| probe.signature.cmp(&msg.signature)) + { + Ok(_) => {} + Err(pos) => metadata.signatures.insert(pos, msg), + } + } + + Ok(metadata.signatures.clone()) + } + + pub fn set_cached_poda( + &self, lane_id: &LaneId, dp_hash: &DataProposalHash, poda: PoDA, - ) -> Result<()>; + ) -> Result<()> { + #[allow(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] + let mut entries = self.lane_entries.write().unwrap(); + let Some(metadata) = entries.get_mut(&(lane_id.clone(), dp_hash.clone())) else { + anyhow::bail!( + "Can't find lane entry metadata {} for lane {}", + dp_hash, + lane_id + ); + }; - fn get_lane_ids(&self) -> Vec; - fn get_lane_hash_tip(&self, lane_id: &LaneId) -> Option; - fn get_lane_size_tip(&self, lane_id: &LaneId) -> Option; + metadata.cached_poda = Some(poda); + Ok(()) + } - fn update_lane_tip( - &mut self, - lane_id: LaneId, - dp_hash: DataProposalHash, - size: LaneBytesSize, - ) -> Option<(DataProposalHash, LaneBytesSize)>; - #[cfg(test)] - fn remove_lane_entry(&mut self, lane_id: &LaneId, dp_hash: &DataProposalHash); + pub fn remove_by_hash( + &self, + lane_id: &LaneId, + dp_hash: &DataProposalHash, + ) -> Result> { + let Some(data_proposal) = self.proposals.remove_by_hash(lane_id, dp_hash)? else { + return Ok(None); + }; + #[allow(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] + let metadata = self + .lane_entries + .write() + .unwrap() + .remove(&(lane_id.clone(), dp_hash.clone())); - fn get_latest_car( + let Some(metadata) = metadata else { + anyhow::bail!( + "Can't find lane entry metadata {} for lane {} where data could be found", + dp_hash, + lane_id + ); + }; + + Ok(Some((data_proposal.0, (metadata, data_proposal.1)))) + } + + pub fn get_latest_car( &self, lane_id: &LaneId, staking: &Staking, @@ -188,7 +373,7 @@ pub trait Storage { } /// Signs the data proposal before creating a new LaneEntry and puting it in the lane - fn store_data_proposal( + pub fn store_data_proposal( &mut self, crypto: &BlstCrypto, lane_id: &LaneId, @@ -276,21 +461,48 @@ pub trait Storage { Ok((data_proposal_hash, cumul_size)) } - // Implemented in the actual modules to potentially benefit from optimizations - // in the underlying storage - fn get_entries_between_hashes( - &self, - lane_id: &LaneId, + pub fn get_entries_between_hashes<'a>( + &'a self, + lane_id: &'a LaneId, from_data_proposal_hash: Option, to_data_proposal_hash: Option, - ) -> impl Stream>; + ) -> impl Stream> + 'a { + try_stream! { + let metadata_stream = self.get_entries_metadata_between_hashes( + lane_id, + from_data_proposal_hash, + to_data_proposal_hash, + ); + futures::pin_mut!(metadata_stream); - fn get_entries_metadata_between_hashes( - &self, - lane_id: &LaneId, + while let Some(md) = metadata_stream.next().await { + match md? { + MetadataOrMissingHash::Metadata(metadata, dp_hash) => { + match self.get_dp_by_hash(lane_id, &dp_hash)? { + Some(data_proposal) => { + yield EntryOrMissingHash::Entry(metadata, data_proposal); + } + None => { + yield EntryOrMissingHash::MissingHash(dp_hash); + break; + } + } + } + MetadataOrMissingHash::MissingHash(hash) => { + yield EntryOrMissingHash::MissingHash(hash); + break; + } + } + } + } + } + + pub fn get_entries_metadata_between_hashes<'a>( + &'a self, + lane_id: &'a LaneId, from_data_proposal_hash: Option, to_data_proposal_hash: Option, - ) -> impl Stream> { + ) -> impl Stream> + 'a { // If no dp hash is provided, we use the tip of the lane let initial_dp_hash: Option = to_data_proposal_hash.or(self.get_lane_hash_tip(lane_id)); @@ -320,7 +532,7 @@ pub trait Storage { } } - fn get_lane_size_at( + pub fn get_lane_size_at( &self, lane_id: &LaneId, dp_hash: &DataProposalHash, @@ -336,11 +548,11 @@ pub trait Storage { }) } - fn get_pending_entries_in_lane( - &self, - lane_id: &LaneId, + pub fn get_pending_entries_in_lane<'a>( + &'a self, + lane_id: &'a LaneId, last_cut: Option, - ) -> impl Stream> { + ) -> impl Stream> + 'a { let lane_tip = self.get_lane_hash_tip(lane_id); let last_committed_dp_hash = match last_cut { @@ -354,40 +566,38 @@ pub trait Storage { } /// Get oldest entry in the lane wrt the last committed cut. - fn get_oldest_pending_entry( + pub async fn get_oldest_pending_entry( &self, lane_id: &LaneId, last_cut: Option, - ) -> impl Future>> { - async move { - let mut stream = Box::pin(self.get_pending_entries_in_lane(lane_id, last_cut)); - let mut last_entry = None; - - while let Some(entry) = stream.next().await { - match entry? { - MetadataOrMissingHash::Metadata(metadata, dp_hash) => { - last_entry = Some((metadata, dp_hash)); - } - MetadataOrMissingHash::MissingHash(_) => { - // Missing entry, should not happen - tracing::warn!( - "Missing entry in lane {} while trying to get oldest entry", - lane_id - ); - return Ok(None); - } + ) -> Result> { + let mut stream = Box::pin(self.get_pending_entries_in_lane(lane_id, last_cut)); + let mut last_entry = None; + + while let Some(entry) = stream.next().await { + match entry? { + MetadataOrMissingHash::Metadata(metadata, dp_hash) => { + last_entry = Some((metadata, dp_hash)); + } + MetadataOrMissingHash::MissingHash(_) => { + // Missing entry, should not happen + tracing::warn!( + "Missing entry in lane {} while trying to get oldest entry", + lane_id + ); + return Ok(None); } } - - Ok(last_entry) } + + Ok(last_entry) } /// Returns CanBePutOnTop::Yes if the DataProposal can be put in the lane /// Returns CanBePutOnTop::No if the DataProposal can't be put in the lane because the parent is unknown /// Returns CanBePutOnTop::Fork if the DataProposal creates an identified fork /// Returns CanBePutOnTop::AlreadyOnTop if the DataProposal is already on top of the lane - fn can_be_put_on_top( + pub fn can_be_put_on_top( &mut self, lane_id: &LaneId, data_proposal_hash: &DataProposalHash, @@ -418,6 +628,66 @@ pub trait Storage { } } } + + #[cfg(test)] + pub fn remove_lane_entry(&mut self, lane_id: &LaneId, dp_hash: &DataProposalHash) { + #[allow(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] + self.lane_entries + .write() + .unwrap() + .remove(&(lane_id.clone(), dp_hash.clone())); + self.proposals.remove_lane_entry(lane_id, dp_hash); + } + + #[cfg(test)] + pub fn put_metadata_only( + &mut self, + lane_id: &LaneId, + dp_hash: &DataProposalHash, + metadata: LaneEntryMetadata, + ) -> Result<()> { + #[allow(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] + self.lane_entries + .write() + .unwrap() + .insert((lane_id.clone(), dp_hash.clone()), metadata); + Ok(()) + } +} + +impl Deref for LanesStorage { + type Target = ProposalStorage; + + fn deref(&self) -> &Self::Target { + &self.proposals + } +} + +fn load_lanes_tip(path: &Path) -> BTreeMap { + let file = path.join("mempool_lanes_tip.bin"); + match std::fs::File::open(&file) { + Ok(mut reader) => match borsh::from_reader(&mut reader) { + Ok(data) => { + info!("Loaded data from disk {}", file.to_string_lossy()); + data + } + Err(err) => { + warn!( + "Failed to load lanes tip from {}: {}", + file.to_string_lossy(), + err + ); + BTreeMap::default() + } + }, + Err(_) => { + info!( + "File {} not found for lanes tip (using default)", + file.to_string_lossy() + ); + BTreeMap::default() + } + } } #[cfg(test)] @@ -425,7 +695,6 @@ mod tests { use std::collections::BTreeMap; use super::*; - use crate::mempool::storage_memory::LanesStorage; use crate::model::*; use assertables::assert_none; use futures::StreamExt; @@ -441,7 +710,7 @@ mod tests { async fn test_put_contains_get() { let crypto = BlstCrypto::new("1").unwrap(); let lane_id = &LaneId::new(crypto.validator_pubkey().clone()); - let mut storage = setup_storage(); + let storage = setup_storage(); let data_proposal = DataProposal::new_root(lane_id.clone(), vec![]); let cumul_size: LaneBytesSize = LaneBytesSize(data_proposal.estimate_size() as u64); @@ -465,7 +734,11 @@ mod tests { entry ); assert_eq!( - storage.get_dp_by_hash(lane_id, &dp_hash).unwrap().unwrap(), + storage + .proposals + .get_dp_by_hash(lane_id, &dp_hash) + .unwrap() + .unwrap(), data_proposal ); } @@ -474,7 +747,7 @@ mod tests { async fn test_store_proofs_separately_and_hydrate() { let crypto = BlstCrypto::new("proofs").unwrap(); let lane_id = &LaneId::new(crypto.validator_pubkey().clone()); - let mut storage = setup_storage(); + let storage = setup_storage(); // Build a DataProposal with a VerifiedProof tx that includes an inlined proof let proof = ProofData(vec![1, 2, 3, 4]); @@ -514,6 +787,7 @@ mod tests { // Stored DP must have proofs removed let stored_dp = storage + .proposals .get_dp_by_hash(lane_id, &dp_hash) .unwrap() .expect("stored dp"); @@ -527,6 +801,7 @@ mod tests { // Proofs must be available in the side-store let proofs = storage + .proposals .get_proofs_by_hash(lane_id, &dp_hash) .unwrap() .expect("proofs stored"); @@ -547,7 +822,7 @@ mod tests { async fn test_update() { let crypto: BlstCrypto = BlstCrypto::new("1").unwrap(); let lane_id = &LaneId::new(crypto.validator_pubkey().clone()); - let mut storage = setup_storage(); + let storage = setup_storage(); let data_proposal = DataProposal::new_root(lane_id.clone(), vec![]); let cumul_size: LaneBytesSize = LaneBytesSize(data_proposal.estimate_size() as u64); let mut entry = LaneEntryMetadata { diff --git a/src/mempool/storage_fjall.rs b/src/mempool/storage_fjall.rs deleted file mode 100644 index 660c8c2e5..000000000 --- a/src/mempool/storage_fjall.rs +++ /dev/null @@ -1,570 +0,0 @@ -use std::{ - collections::{BTreeMap, HashMap}, - path::{Path, PathBuf}, - sync::{Arc, Mutex, OnceLock, RwLock}, -}; - -use anyhow::{bail, Result}; -use async_stream::try_stream; -use fjall::{Database, Keyspace, KeyspaceCreateOptions, KvSeparationOptions, Slice}; -use futures::Stream; -use hyli_model::{LaneId, ProofData}; -use hyli_modules::utils::fjall_metrics::FjallMetrics; -use std::time::Instant; -use tracing::{info, warn}; - -use crate::{ - mempool::storage::MetadataOrMissingHash, - model::{DataProposal, DataProposalHash, Hashed, PoDA}, -}; -use hyli_modules::log_warn; - -use super::{ - storage::{EntryOrMissingHash, LaneEntryMetadata, Storage}, - ValidatorDAG, -}; - -pub use hyli_model::LaneBytesSize; - -#[derive(Clone)] -pub struct LanesStorage { - lanes_tip: Arc>>, - db: Database, - by_hash_metadata: Keyspace, - by_hash_data: Keyspace, - dp_proofs: Keyspace, - metrics: FjallMetrics, - // Used by the shared storage to know when it can drop this handle. - ref_token: Arc<()>, -} - -static SHARED_LANES: OnceLock>> = OnceLock::new(); - -/// Shared handle between mempool (read/write) and dissemination (read-only), -/// keyed by `config.data_dir` to allow multiple nodes in one process. -pub fn shared_lanes_storage(path: &Path) -> Result { - let registry = SHARED_LANES.get_or_init(|| Mutex::new(HashMap::new())); - let mut guard = registry - .lock() - .unwrap_or_else(|poisoned| poisoned.into_inner()); - - if let Some(existing) = guard.get(path) { - tracing::debug!( - "Reusing existing shared lanes storage at {}", - path.to_string_lossy() - ); - return Ok(existing.clone()); - } - - // Drop cached storages that are only held by this map (ref_count == 1). - // This allowes closing opened files and avoid breaking OS limits during tests. - guard.retain(|_, storage| storage.ref_count() > 1); - - tracing::debug!( - "Creating new shared lanes storage at {}", - path.to_string_lossy() - ); - - let lanes_tip = load_lanes_tip(path); - let storage = LanesStorage::new(path, lanes_tip)?; - guard.insert(path.to_path_buf(), storage.clone()); - Ok(storage) -} - -fn load_lanes_tip(path: &Path) -> BTreeMap { - let file = path.join("mempool_lanes_tip.bin"); - match std::fs::File::open(&file) { - Ok(mut reader) => match borsh::from_reader(&mut reader) { - Ok(data) => { - info!("Loaded data from disk {}", file.to_string_lossy()); - data - } - Err(err) => { - warn!( - "Failed to load lanes tip from {}: {}", - file.to_string_lossy(), - err - ); - BTreeMap::default() - } - }, - Err(_) => { - info!( - "File {} not found for lanes tip (using default)", - file.to_string_lossy() - ); - BTreeMap::default() - } - } -} - -impl LanesStorage { - fn ref_count(&self) -> usize { - Arc::strong_count(&self.ref_token) - } - - /// Create another set of handles to share the same storage and lane tip view. - pub fn new_handle(&self) -> LanesStorage { - self.clone() - } - - pub fn lane_tips_snapshot(&self) -> BTreeMap { - #[allow(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] - let guard = self.lanes_tip.read().unwrap(); - guard.clone() - } - - pub fn lane_tips_read( - &self, - ) -> std::sync::RwLockReadGuard<'_, BTreeMap> { - #[allow(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] - self.lanes_tip.read().unwrap() - } - - pub fn record_metrics(&self) { - self.metrics.record_db(&self.db); - self.metrics - .record_keyspace("dp_metadata", &self.by_hash_metadata); - self.metrics.record_keyspace("dp_data", &self.by_hash_data); - self.metrics.record_keyspace("dp_proofs", &self.dp_proofs); - } - - pub fn new( - path: &Path, - lanes_tip: BTreeMap, - ) -> Result { - let db = Database::builder(path) - .cache_size(256 * 1024 * 1024) - .max_journaling_size(512 * 1024 * 1024) - .open()?; - - let by_hash_metadata = db.keyspace("dp_metadata", || { - KeyspaceCreateOptions::default() - .with_kv_separation(Some( - KvSeparationOptions::default().file_target_size(256 * 1024 * 1024), - )) - .manual_journal_persist(true) - .max_memtable_size(128 * 1024 * 1024) - })?; - - let by_hash_data = db.keyspace("dp_data", || { - KeyspaceCreateOptions::default() - .with_kv_separation(Some( - KvSeparationOptions::default().file_target_size(256 * 1024 * 1024), - )) - .manual_journal_persist(true) - .max_memtable_size(128 * 1024 * 1024) - })?; - - let dp_proofs = db.keyspace("dp_proofs", || { - KeyspaceCreateOptions::default() - .with_kv_separation(Some( - KvSeparationOptions::default().file_target_size(256 * 1024 * 1024), - )) - .manual_journal_persist(true) - .max_memtable_size(64 * 1024 * 1024) - })?; - - info!("{} DP(s) available", by_hash_metadata.len()?); - - Ok(LanesStorage { - lanes_tip: Arc::new(RwLock::new(lanes_tip)), - db, - by_hash_metadata, - by_hash_data, - dp_proofs, - metrics: FjallMetrics::global("mempool", "unknown", "mempool"), - ref_token: Arc::new(()), - }) - } - - pub fn set_metrics_context(&mut self, node_id: impl Into) { - self.metrics = FjallMetrics::global("mempool", node_id, "mempool"); - } - - #[cfg(test)] - pub fn put_metadata_only( - &mut self, - lane_id: &LaneId, - dp_hash: &DataProposalHash, - metadata: LaneEntryMetadata, - ) -> Result<()> { - let start = Instant::now(); - let res = self - .by_hash_metadata - .insert(format!("{lane_id}:{dp_hash}"), borsh::to_vec(&metadata)?) - .map_err(Into::into); - self.metrics.record_op( - "put_metadata_only", - "dp_metadata", - start.elapsed().as_micros() as u64, - ); - res - } -} - -impl Storage for LanesStorage { - fn persist(&self) -> Result<()> { - let start = Instant::now(); - let res = self - .db - .persist(fjall::PersistMode::Buffer) - .map_err(Into::into); - self.metrics - .record_op("persist", "db", start.elapsed().as_micros() as u64); - res - } - - fn contains(&self, lane_id: &LaneId, dp_hash: &DataProposalHash) -> bool { - let start = Instant::now(); - let res = self - .by_hash_metadata - .contains_key(format!("{lane_id}:{dp_hash}")) - .unwrap_or(false); - self.metrics.record_op( - "contains", - "dp_metadata", - start.elapsed().as_micros() as u64, - ); - res - } - - fn get_metadata_by_hash( - &self, - lane_id: &LaneId, - dp_hash: &DataProposalHash, - ) -> Result> { - let start = Instant::now(); - let item = log_warn!( - self.by_hash_metadata.get(format!("{lane_id}:{dp_hash}")), - "Can't find DP metadata {} for validator {}", - dp_hash, - lane_id - )?; - let res = item.map(decode_metadata_from_item).transpose(); - self.metrics.record_op( - "get_metadata_by_hash", - "dp_metadata", - start.elapsed().as_micros() as u64, - ); - res - } - - fn get_dp_by_hash( - &self, - lane_id: &LaneId, - dp_hash: &DataProposalHash, - ) -> Result> { - let start = Instant::now(); - let item = log_warn!( - self.by_hash_data.get(format!("{lane_id}:{dp_hash}")), - "Can't find DP data {} for validator {}", - dp_hash, - lane_id - )?; - let res = item - .map(|s| { - decode_data_proposal_from_item(s).map(|mut dp| { - // SAFETY: we trust our own fjall storage - unsafe { - dp.unsafe_set_hash(dp_hash); - } - dp - }) - }) - .transpose(); - self.metrics.record_op( - "get_dp_by_hash", - "dp_data", - start.elapsed().as_micros() as u64, - ); - res - } - - fn get_proofs_by_hash( - &self, - lane_id: &LaneId, - dp_hash: &DataProposalHash, - ) -> Result>> { - let start = Instant::now(); - let item = log_warn!( - self.dp_proofs.get(format!("{lane_id}:{dp_hash}")), - "Can't find DP proofs {} for validator {}", - dp_hash, - lane_id - )?; - let res = item - .map(|s| borsh::from_slice(&s).map_err(Into::into)) - .transpose(); - self.metrics.record_op( - "get_proofs_by_hash", - "dp_proofs", - start.elapsed().as_micros() as u64, - ); - res - } - - fn delete_proofs(&mut self, lane_id: &LaneId, dp_hash: &DataProposalHash) -> Result<()> { - let start = Instant::now(); - self.dp_proofs.remove(format!("{lane_id}:{dp_hash}"))?; - // NOTE: Garbage collection is now automatic in fjall 3.0 - self.metrics.record_op( - "delete_proofs", - "dp_proofs", - start.elapsed().as_micros() as u64, - ); - Ok(()) - } - - fn remove_by_hash( - &mut self, - lane_id: &LaneId, - dp_hash: &DataProposalHash, - ) -> Result> { - let start = Instant::now(); - if let Some(lane_entry) = self.get_metadata_by_hash(lane_id, dp_hash)? { - self.by_hash_metadata - .remove(format!("{lane_id}:{dp_hash}"))?; - // Check if have the data locally after regardless - if we don't, print an error but delete metadata anyways for consistency. - let Some(dp) = self.get_dp_by_hash(lane_id, dp_hash)? else { - bail!( - "Can't find DP data {} for lane {} where metadata could be found", - dp_hash, - lane_id - ); - }; - self.by_hash_data.remove(format!("{lane_id}:{dp_hash}"))?; - self.dp_proofs.remove(format!("{lane_id}:{dp_hash}"))?; - self.metrics.record_op( - "remove_by_hash", - "dp_metadata", - start.elapsed().as_micros() as u64, - ); - return Ok(Some((dp_hash.clone(), (lane_entry, dp)))); - } - self.metrics.record_op( - "remove_by_hash", - "dp_metadata", - start.elapsed().as_micros() as u64, - ); - Ok(None) - } - - fn put_no_verification( - &mut self, - lane_id: LaneId, - (lane_entry, data_proposal): (LaneEntryMetadata, DataProposal), - ) -> Result<()> { - let start = Instant::now(); - let dp_hash = data_proposal.hashed(); - let mut dp_to_store = data_proposal; - // Save full proofs separately and strip them from the stored DataProposal - let proofs = dp_to_store.take_proofs(); - let key = format!("{lane_id}:{dp_hash}"); - let metadata = encode_metadata_to_item(lane_entry)?; - let data = encode_data_proposal_to_item(dp_to_store)?; - let proofs = Slice::from(borsh::to_vec(&proofs)?); - - let mut batch = self.db.batch(); - batch.insert(&self.by_hash_metadata, key.clone(), metadata); - batch.insert(&self.by_hash_data, key.clone(), data); - batch.insert(&self.dp_proofs, key, proofs); - let res = batch.commit().map_err(Into::into); - self.metrics.record_op( - "put_no_verification", - "batch", - start.elapsed().as_micros() as u64, - ); - res - } - - fn add_signatures>( - &mut self, - lane_id: &LaneId, - dp_hash: &DataProposalHash, - vote_msgs: T, - ) -> Result> { - let start = Instant::now(); - let key = format!("{lane_id}:{dp_hash}"); - let Some(mut lem) = log_warn!( - self.by_hash_metadata.get(key.clone()), - "Can't find lane entry metadata {} for lane {}", - dp_hash, - lane_id - )? - .map(decode_metadata_from_item) - .transpose()? - else { - bail!( - "Can't find lane entry metadata {} for lane {}", - dp_hash, - lane_id - ); - }; - - for msg in vote_msgs { - let (dph, cumul_size) = &msg.msg; - if &lem.cumul_size != cumul_size || dp_hash != dph { - tracing::warn!( - "Received a DataVote message with wrong hash or size: {:?}", - msg.msg - ); - continue; - } - // Insert the new messages if they're not already in - match lem - .signatures - .binary_search_by(|probe| probe.signature.cmp(&msg.signature)) - { - Ok(_) => {} - Err(pos) => lem.signatures.insert(pos, msg), - } - } - let signatures = lem.signatures.clone(); - self.by_hash_metadata - .insert(key, encode_metadata_to_item(lem)?)?; - self.metrics.record_op( - "add_signatures", - "dp_metadata", - start.elapsed().as_micros() as u64, - ); - Ok(signatures) - } - - fn set_cached_poda( - &mut self, - lane_id: &LaneId, - dp_hash: &DataProposalHash, - poda: PoDA, - ) -> Result<()> { - let start = Instant::now(); - let key = format!("{lane_id}:{dp_hash}"); - let Some(mut lem) = log_warn!( - self.by_hash_metadata.get(key.clone()), - "Can't find lane entry metadata {} for lane {}", - dp_hash, - lane_id - )? - .map(decode_metadata_from_item) - .transpose()? - else { - bail!( - "Can't find lane entry metadata {} for lane {}", - dp_hash, - lane_id - ); - }; - - lem.cached_poda = Some(poda); - self.by_hash_metadata - .insert(key, encode_metadata_to_item(lem)?)?; - self.metrics.record_op( - "set_cached_poda", - "dp_metadata", - start.elapsed().as_micros() as u64, - ); - Ok(()) - } - - fn get_lane_ids(&self) -> Vec { - #[allow(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] - let guard = self.lanes_tip.read().unwrap(); - guard.keys().cloned().collect() - } - - fn get_lane_hash_tip(&self, lane_id: &LaneId) -> Option { - #[allow(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] - let guard = self.lanes_tip.read().unwrap(); - guard.get(lane_id).map(|(hash, _)| hash.clone()) - } - - fn get_lane_size_tip(&self, lane_id: &LaneId) -> Option { - #[allow(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] - let guard = self.lanes_tip.read().unwrap(); - guard.get(lane_id).map(|(_, size)| *size) - } - - fn update_lane_tip( - &mut self, - lane_id: LaneId, - dp_hash: DataProposalHash, - size: LaneBytesSize, - ) -> Option<(DataProposalHash, LaneBytesSize)> { - tracing::trace!("Updating lane tip for lane {} to {:?}", lane_id, dp_hash); - #[allow(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] - let mut guard = self.lanes_tip.write().unwrap(); - guard.insert(lane_id, (dp_hash, size)) - } - - fn get_entries_between_hashes( - &self, - lane_id: &LaneId, - from_data_proposal_hash: Option, - to_data_proposal_hash: Option, - ) -> impl Stream> { - tracing::trace!( - "Getting entries between hashes for lane {}: from {:?} to {:?}", - lane_id, - from_data_proposal_hash, - to_data_proposal_hash - ); - let metadata_stream = self.get_entries_metadata_between_hashes( - lane_id, - from_data_proposal_hash, - to_data_proposal_hash, - ); - - try_stream! { - for await md in metadata_stream { - match md? { - MetadataOrMissingHash::Metadata(metadata, dp_hash) => { - match self.get_dp_by_hash(lane_id, &dp_hash)? { - Some(data_proposal) => { - yield EntryOrMissingHash::Entry(metadata, data_proposal); - } - None => { - yield EntryOrMissingHash::MissingHash(dp_hash); - break; - } - } - } - - MetadataOrMissingHash::MissingHash(hash) => { - yield EntryOrMissingHash::MissingHash(hash); - break; - } - } - } - } - } - - #[cfg(test)] - fn remove_lane_entry(&mut self, lane_id: &LaneId, dp_hash: &DataProposalHash) { - self.by_hash_metadata - .remove(format!("{lane_id}:{dp_hash}")) - .unwrap(); - self.by_hash_data - .remove(format!("{lane_id}:{dp_hash}")) - .unwrap(); - } -} - -fn decode_metadata_from_item(item: Slice) -> Result { - borsh::from_slice(&item).map_err(Into::into) -} - -fn encode_metadata_to_item(metadata: LaneEntryMetadata) -> Result { - borsh::to_vec(&metadata) - .map(Slice::from) - .map_err(Into::into) -} - -fn decode_data_proposal_from_item(item: Slice) -> Result { - borsh::from_slice(&item).map_err(Into::into) -} - -fn encode_data_proposal_to_item(data_proposal: DataProposal) -> Result { - borsh::to_vec(&data_proposal) - .map(Slice::from) - .map_err(Into::into) -} diff --git a/src/mempool/storage_memory.rs b/src/mempool/storage_memory.rs deleted file mode 100644 index fbf7d61e4..000000000 --- a/src/mempool/storage_memory.rs +++ /dev/null @@ -1,375 +0,0 @@ -use std::{ - collections::{BTreeMap, HashMap}, - path::{Path, PathBuf}, - sync::{Arc, Mutex, OnceLock, RwLock}, -}; - -use anyhow::{bail, Result}; -use async_stream::try_stream; -use futures::Stream; -use hyli_model::{LaneBytesSize, LaneId, ProofData}; -use tracing::{info, warn}; - -use super::{ - storage::{EntryOrMissingHash, LaneEntryMetadata, Storage}, - ValidatorDAG, -}; -use crate::{ - mempool::storage::MetadataOrMissingHash, - model::{DataProposal, DataProposalHash, Hashed, PoDA}, -}; - -#[derive(Clone)] -#[allow(clippy::type_complexity)] -pub struct LanesStorage { - pub lanes_tip: Arc>>, - // NB: do not iterate on these as they're unordered - pub by_hash: - Arc>>>, - // Full proofs store: key = (dp_hash, ordered tx_index/proof pairs) - pub proofs: Arc>>>>, -} - -impl Default for LanesStorage { - fn default() -> Self { - LanesStorage { - lanes_tip: Arc::new(RwLock::new(BTreeMap::default())), - by_hash: Arc::new(RwLock::new(HashMap::default())), - proofs: Arc::new(RwLock::new(HashMap::default())), - } - } -} - -static SHARED_LANES: OnceLock>> = OnceLock::new(); - -/// Shared handle between mempool (read/write) and dissemination (read-only), -/// keyed by `config.data_dir` to allow multiple nodes in one process. -pub async fn shared_lanes_storage(path: &Path) -> Result { - let registry = SHARED_LANES.get_or_init(|| Mutex::new(HashMap::new())); - let mut guard = registry - .lock() - .unwrap_or_else(|poisoned| poisoned.into_inner()); - - if let Some(existing) = guard.get(path) { - tracing::debug!( - "Reusing existing shared lanes storage at {}", - path.to_string_lossy() - ); - return Ok(existing.clone()); - } - - tracing::debug!( - "Creating new shared lanes storage at {}", - path.to_string_lossy() - ); - - let lanes_tip = load_lanes_tip(path); - let storage = LanesStorage::new(path, lanes_tip)?; - guard.insert(path.to_path_buf(), storage.clone()); - Ok(storage) -} - -fn load_lanes_tip(path: &Path) -> BTreeMap { - let file = path.join("mempool_lanes_tip.bin"); - match std::fs::File::open(&file) { - Ok(mut reader) => match borsh::from_reader(&mut reader) { - Ok(data) => { - info!("Loaded data from disk {}", file.to_string_lossy()); - data - } - Err(err) => { - warn!( - "Failed to load lanes tip from {}: {}", - file.to_string_lossy(), - err - ); - BTreeMap::default() - } - }, - Err(_) => { - info!( - "File {} not found for lanes tip (using default)", - file.to_string_lossy() - ); - BTreeMap::default() - } - } -} - -impl LanesStorage { - pub fn new_handle(&self) -> LanesStorage { - LanesStorage { - lanes_tip: Arc::clone(&self.lanes_tip), - by_hash: Arc::clone(&self.by_hash), - proofs: Arc::clone(&self.proofs), - } - } - - pub fn lane_tips_snapshot(&self) -> BTreeMap { - #[allow(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] - let guard = self.lanes_tip.read().unwrap(); - guard.clone() - } - - pub fn lane_tips_read( - &self, - ) -> std::sync::RwLockReadGuard<'_, BTreeMap> { - #[allow(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] - self.lanes_tip.read().unwrap() - } - pub fn new( - _path: &Path, - lanes_tip: BTreeMap, - ) -> Result { - // FIXME: load from disk - let by_hash = HashMap::default(); - - info!("{} DP(s) available", by_hash.len()); - - Ok(LanesStorage { - lanes_tip: Arc::new(RwLock::new(lanes_tip)), - by_hash: Arc::new(RwLock::new(by_hash)), - proofs: Arc::new(RwLock::new(HashMap::default())), - }) - } -} - -impl Storage for LanesStorage { - fn persist(&self) -> Result<()> { - Ok(()) - } - - fn contains(&self, lane_id: &LaneId, dp_hash: &DataProposalHash) -> bool { - #[allow(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] - let guard = self.by_hash.read().unwrap(); - guard - .get(lane_id) - .map(|lane| lane.contains_key(dp_hash)) - .unwrap_or(false) - } - - fn get_metadata_by_hash( - &self, - lane_id: &LaneId, - dp_hash: &DataProposalHash, - ) -> Result> { - #[allow(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] - let guard = self.by_hash.read().unwrap(); - Ok(guard - .get(lane_id) - .and_then(|lane| lane.get(dp_hash).map(|(metadata, _)| metadata.clone()))) - } - fn get_dp_by_hash( - &self, - lane_id: &LaneId, - dp_hash: &DataProposalHash, - ) -> Result> { - #[allow(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] - let guard = self.by_hash.read().unwrap(); - Ok(guard - .get(lane_id) - .and_then(|lane| lane.get(dp_hash).map(|(_, data)| data.clone()))) - } - - fn get_proofs_by_hash( - &self, - lane_id: &LaneId, - dp_hash: &DataProposalHash, - ) -> Result>> { - #[allow(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] - let guard = self.proofs.read().unwrap(); - Ok(guard - .get(lane_id) - .and_then(|dp_map| dp_map.get(dp_hash)) - .cloned()) - } - - fn delete_proofs(&mut self, lane_id: &LaneId, dp_hash: &DataProposalHash) -> Result<()> { - #[allow(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] - let mut guard = self.proofs.write().unwrap(); - if let Some(dp_map) = guard.get_mut(lane_id) { - dp_map.remove(dp_hash); - } - Ok(()) - } - - fn remove_by_hash( - &mut self, - lane_id: &LaneId, - dp_hash: &DataProposalHash, - ) -> Result> { - let entry = { - #[allow(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] - let mut guard = self.by_hash.write().unwrap(); - guard.get_mut(lane_id).and_then(|lane| lane.remove(dp_hash)) - }; - if let Some(entry) = entry { - #[allow(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] - let mut proofs_guard = self.proofs.write().unwrap(); - if let Some(dp_map) = proofs_guard.get_mut(lane_id) { - dp_map.remove(dp_hash); - } - return Ok(Some((dp_hash.clone(), entry))); - } - Ok(None) - } - - fn put_no_verification( - &mut self, - lane_id: LaneId, - mut entry: (LaneEntryMetadata, DataProposal), - ) -> Result<()> { - let dp_hash = entry.1.hashed(); - // Save full proofs separately and strip them from the stored DataProposal - let proofs = entry.1.take_proofs(); - #[allow(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] - let mut guard = self.by_hash.write().unwrap(); - guard - .entry(lane_id.clone()) - .or_default() - .insert(dp_hash.clone(), entry); - #[allow(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] - let mut proofs_guard = self.proofs.write().unwrap(); - proofs_guard - .entry(lane_id) - .or_default() - .insert(dp_hash, proofs); - Ok(()) - } - - fn add_signatures>( - &mut self, - lane_id: &LaneId, - dp_hash: &DataProposalHash, - vote_msgs: T, - ) -> Result> { - #[allow(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] - let mut guard = self.by_hash.write().unwrap(); - let Some(lane) = guard.get_mut(lane_id) else { - bail!("Can't find validator {}", lane_id); - }; - - let Some((metadata, _data_proposal)) = lane.get_mut(dp_hash) else { - bail!("Can't find DP {} for validator {}", dp_hash, lane_id); - }; - - for msg in vote_msgs { - let (dph, cumul_size) = &msg.msg; - if &metadata.cumul_size != cumul_size || dp_hash != dph { - tracing::warn!( - "Received a DataVote message with wrong hash or size: {:?}", - msg.msg - ); - continue; - } - // Insert the new messages if they're not already in - match metadata - .signatures - .binary_search_by(|probe| probe.signature.cmp(&msg.signature)) - { - Ok(_) => { - tracing::trace!( - "Received duplicate DataVote message for {dph} from {}", - msg.signature.validator - ); - } - Err(pos) => metadata.signatures.insert(pos, msg), - } - } - Ok(metadata.signatures.clone()) - } - - fn set_cached_poda( - &mut self, - lane_id: &LaneId, - dp_hash: &DataProposalHash, - poda: PoDA, - ) -> Result<()> { - #[allow(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] - let mut guard = self.by_hash.write().unwrap(); - let Some(lane) = guard.get_mut(lane_id) else { - bail!("Can't find validator {}", lane_id); - }; - - let Some((metadata, _data_proposal)) = lane.get_mut(dp_hash) else { - bail!("Can't find DP {} for validator {}", dp_hash, lane_id); - }; - - metadata.cached_poda = Some(poda); - Ok(()) - } - - fn get_lane_ids(&self) -> Vec { - #[allow(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] - let guard = self.lanes_tip.read().unwrap(); - guard.keys().cloned().collect() - } - - fn get_lane_hash_tip(&self, lane_id: &LaneId) -> Option { - #[allow(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] - let guard = self.lanes_tip.read().unwrap(); - guard.get(lane_id).map(|(hash, _)| hash.clone()) - } - - fn get_lane_size_tip(&self, lane_id: &LaneId) -> Option { - #[allow(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] - let guard = self.lanes_tip.read().unwrap(); - guard.get(lane_id).map(|(_, size)| *size) - } - - fn update_lane_tip( - &mut self, - lane_id: LaneId, - dp_hash: DataProposalHash, - size: LaneBytesSize, - ) -> Option<(DataProposalHash, LaneBytesSize)> { - tracing::trace!("Updating lane tip for lane {} to {:?}", lane_id, dp_hash); - #[allow(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] - let mut guard = self.lanes_tip.write().unwrap(); - guard.insert(lane_id, (dp_hash, size)) - } - - fn get_entries_between_hashes( - &self, - lane_id: &LaneId, - from_data_proposal_hash: Option, - to_data_proposal_hash: Option, - ) -> impl Stream> { - let metadata_stream = self.get_entries_metadata_between_hashes( - lane_id, - from_data_proposal_hash, - to_data_proposal_hash, - ); - - try_stream! { - for await md in metadata_stream { - match md? { - MetadataOrMissingHash::MissingHash(dp_hash) => { - yield EntryOrMissingHash::MissingHash(dp_hash); - break; - } - MetadataOrMissingHash::Metadata(metadata, dp_hash) => { - match self.get_dp_by_hash(lane_id, &dp_hash)? { - Some(data_proposal) => { - yield EntryOrMissingHash::Entry(metadata, data_proposal); - } - None => { - yield EntryOrMissingHash::MissingHash(dp_hash); - break; - } - } - } - } - } - } - } - - #[cfg(test)] - fn remove_lane_entry(&mut self, lane_id: &LaneId, dp_hash: &DataProposalHash) { - #[allow(clippy::unwrap_used, reason = "RwLock cannot be poisoned in our usage")] - let mut guard = self.by_hash.write().unwrap(); - if let Some(lane) = guard.get_mut(lane_id) { - lane.remove(dp_hash); - } - } -} diff --git a/src/mempool/verify_tx.rs b/src/mempool/verify_tx.rs index fe3a0d4f3..072cda414 100644 --- a/src/mempool/verify_tx.rs +++ b/src/mempool/verify_tx.rs @@ -9,7 +9,7 @@ use crate::{ }; use super::{ - storage::{CanBePutOnTop, Storage}, + storage::CanBePutOnTop, verifiers::{verify_proof, verify_recursive_proof}, }; From e113e4a6b1b20173a7eaf3fdab01445d45018871 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lancelot=20de=20Ferri=C3=A8re?= Date: Tue, 17 Mar 2026 13:43:49 +0100 Subject: [PATCH 02/22] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Reuse=20shared=20DP?= =?UTF-8?q?=20storage=20for=20signed=20blocks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR-Branch: reuse-dp-storage --- .../data_availability/blocks_memory.rs | 134 ----------------- .../src/modules/data_availability/mod.rs | 8 -- crates/hyli-modules/src/modules/mod.rs | 1 - src/data_availability.rs | 9 +- .../data_availability/block_storage.rs | 136 +++++++++++++----- src/data_availability/local_da_replayer.rs | 3 +- src/mempool/proposal_storage.rs | 30 +++- src/mempool/storage.rs | 2 +- 8 files changed, 138 insertions(+), 185 deletions(-) delete mode 100644 crates/hyli-modules/src/modules/data_availability/blocks_memory.rs delete mode 100644 crates/hyli-modules/src/modules/data_availability/mod.rs rename crates/hyli-modules/src/modules/data_availability/blocks_fjall.rs => src/data_availability/block_storage.rs (64%) diff --git a/crates/hyli-modules/src/modules/data_availability/blocks_memory.rs b/crates/hyli-modules/src/modules/data_availability/blocks_memory.rs deleted file mode 100644 index cd5048eef..000000000 --- a/crates/hyli-modules/src/modules/data_availability/blocks_memory.rs +++ /dev/null @@ -1,134 +0,0 @@ -#![allow(unused)] -use anyhow::Result; -use indexmap::IndexMap; -use sdk::{BlockHeight, ConsensusProposalHash, Hashed, SignedBlock}; -use std::path::Path; -use tracing::{debug, info, trace}; - -#[derive(Debug)] -pub struct Blocks { - data: IndexMap, -} - -impl Blocks { - pub fn new(_: &Path) -> Result { - Ok(Self { - data: IndexMap::new(), - }) - } - - pub fn is_empty(&self) -> bool { - self.data.is_empty() - } - - pub fn persist(&self) -> Result<()> { - Ok(()) - } - - pub fn record_metrics(&self) {} - - pub fn record_op( - &self, - _op: &'static str, - _keyspace: &'static str, - _elapsed: std::time::Duration, - ) { - } - - pub fn put(&mut self, data: SignedBlock) -> Result<()> { - let block_hash = data.hashed(); - if self.contains(&block_hash) { - return Ok(()); - } - trace!("📦 storing block {}", data.height()); - self.data.insert(block_hash, data); - Ok(()) - } - - pub fn get(&self, block_hash: &ConsensusProposalHash) -> Result> { - Ok(self.data.get(block_hash).cloned()) - } - - pub fn get_by_height(&self, height: BlockHeight) -> Result> { - let Ok(index) = self - .data - .binary_search_by(|_, block| block.height().0.cmp(&height.0)) - else { - return Ok(None); - }; - Ok(self.data.get_index(index).map(|(_, block)| block.clone())) - } - - pub fn has_by_height(&self, height: BlockHeight) -> Result { - Ok(self - .data - .binary_search_by(|_, block| block.height().0.cmp(&height.0)) - .is_ok()) - } - - pub fn contains(&mut self, block_hash: &ConsensusProposalHash) -> bool { - self.data.contains_key(block_hash) - } - - pub fn last(&self) -> Option { - self.data.last().map(|(_, block)| block.clone()) - } - - pub fn last_block_hash(&self) -> Option { - self.last().map(|b| b.hashed()) - } - - pub fn range( - &self, - min: BlockHeight, - max: BlockHeight, - ) -> Box> + '_> { - // Items are in order but we don't know where they are. Binary search. - let Ok(min) = self - .data - .binary_search_by(|_, block| block.height().0.cmp(&min.0)) - else { - return Box::new(::std::iter::empty()); - }; - let Ok(max) = self - .data - .binary_search_by(|_, block| block.height().0.cmp(&(max.0 - 1))) - else { - return Box::new(::std::iter::empty()); - }; - let Some(iter) = self.data.get_range(min..max + 1) else { - return Box::new(::std::iter::empty()); - }; - Box::new(iter.values().map(|block| Ok(block.hashed().clone()))) - } - - /// Scan the whole by_height table and returns the first missing height - pub fn first_hole_by_height(&self) -> Result> { - let Some(upper_bound) = self.last().map(|block| block.height()) else { - anyhow::bail!("Empty InMemory storage can't have holes"); - }; - - debug!( - "Start scanning blocks in memory to find first missing block up to {:?}", - upper_bound - ); - - for i in 0..upper_bound.0 { - if i % 1000 == 0 { - trace!("Checking block #{} is present or not", i); - } - if self - .data - .binary_search_by(|_, block| block.height().0.cmp(&i)) - .is_err() - { - info!("Found hole at height {}", i); - return Ok(Some(BlockHeight(i))); - } - } - - debug!("No holes found in InMemory storage up to {:?}", upper_bound); - - Ok(None) - } -} diff --git a/crates/hyli-modules/src/modules/data_availability/mod.rs b/crates/hyli-modules/src/modules/data_availability/mod.rs deleted file mode 100644 index c8ee3eba4..000000000 --- a/crates/hyli-modules/src/modules/data_availability/mod.rs +++ /dev/null @@ -1,8 +0,0 @@ -pub mod blocks_memory; - -#[cfg(feature = "fjall")] -pub mod blocks_fjall; -#[cfg(not(feature = "fjall"))] -pub use blocks_memory as blocks_fjall; - -pub use blocks_fjall::Blocks; diff --git a/crates/hyli-modules/src/modules/mod.rs b/crates/hyli-modules/src/modules/mod.rs index 25d2f2245..adf984433 100644 --- a/crates/hyli-modules/src/modules/mod.rs +++ b/crates/hyli-modules/src/modules/mod.rs @@ -18,7 +18,6 @@ pub mod contract_listener; pub mod contract_state_indexer; pub mod da_listener; pub mod da_listener_metrics; -pub mod data_availability; #[cfg(feature = "gcs")] pub mod gcs_uploader; #[cfg(feature = "gcs")] diff --git a/src/data_availability.rs b/src/data_availability.rs index 02e6c9e1a..6af467785 100644 --- a/src/data_availability.rs +++ b/src/data_availability.rs @@ -1,12 +1,10 @@ //! Minimal block storage layer for data availability. +pub mod block_storage; pub mod local_da_replayer; -// Pick one of the two implementations -use hyli_modules::modules::data_availability::blocks_fjall::Blocks; -use hyli_modules::utils::da_codec::DataAvailabilityServer; -//use hyli_modules::modules::data_availability::blocks_memory::Blocks; use hyli_modules::modules::da_listener::{DaStreamPoll, SignedDaStream}; +use hyli_modules::utils::da_codec::DataAvailabilityServer; use hyli_modules::{bus::SharedMessageBus, modules::Module}; use hyli_modules::{log_error, module_bus_client, module_handle_messages}; use hyli_net::tcp::TcpEvent; @@ -30,6 +28,7 @@ use strum_macros::AsRefStr; use tokio::task::JoinSet; use tracing::{debug, error, info, trace, warn}; +use self::block_storage::Blocks; use crate::model::SharedRunContext; impl Module for DataAvailability { @@ -38,7 +37,7 @@ impl Module for DataAvailability { async fn build(bus: SharedMessageBus, ctx: Self::Context) -> anyhow::Result { let bus = DABusClient::new_from_bus(bus.new_handle()).await; - let mut blocks = Blocks::new(&ctx.config.data_directory.join("data_availability.db"))?; + let mut blocks = Blocks::new(&ctx.config.data_directory)?; blocks.set_metrics_context(ctx.config.id.clone()); let highest_block = blocks.highest(); diff --git a/crates/hyli-modules/src/modules/data_availability/blocks_fjall.rs b/src/data_availability/block_storage.rs similarity index 64% rename from crates/hyli-modules/src/modules/data_availability/blocks_fjall.rs rename to src/data_availability/block_storage.rs index 7c3e0170a..fa5366b10 100644 --- a/crates/hyli-modules/src/modules/data_availability/blocks_fjall.rs +++ b/src/data_availability/block_storage.rs @@ -1,10 +1,23 @@ -use crate::utils::fjall_metrics::FjallMetrics; use anyhow::{Context, Result}; +use borsh::{BorshDeserialize, BorshSerialize}; use fjall::{Database, Keyspace, KeyspaceCreateOptions, KvSeparationOptions, Slice}; -use sdk::{BlockHeight, ConsensusProposalHash, Hashed, SignedBlock}; -use std::{fmt::Debug, path::Path, time::Instant}; +use hyli_model::{ + AggregateSignature, BlockHeight, ConsensusProposal, ConsensusProposalHash, DataProposalHash, + Hashed, LaneId, SignedBlock, +}; +use hyli_modules::utils::fjall_metrics::FjallMetrics; +use std::{fmt::Debug, path::Path, sync::Arc, time::Instant}; use tracing::{debug, info, trace}; +use crate::mempool::proposal_storage::ProposalStorage; + +#[derive(Clone, BorshSerialize, BorshDeserialize)] +struct StoredSignedBlock { + data_proposals: Vec<(LaneId, Vec)>, + consensus_proposal: ConsensusProposal, + certificate: AggregateSignature, +} + struct FjallHashKey(ConsensusProposalHash); struct FjallHeightKey([u8; 8]); struct FjallValue(Vec); @@ -35,8 +48,9 @@ impl AsRef<[u8]> for FjallHeightKey { impl FjallValue { fn new_with_block(block: &SignedBlock) -> Result { - Ok(Self(borsh::to_vec(block)?)) + Ok(Self(borsh::to_vec(&StoredSignedBlock::from_signed_block(block))?)) } + fn new_with_block_hash(block_hash: &ConsensusProposalHash) -> Result { Ok(Self(borsh::to_vec(block_hash)?)) } @@ -53,6 +67,56 @@ pub struct Blocks { by_hash: Keyspace, by_height: Keyspace, metrics: FjallMetrics, + proposals: Arc, +} + +impl StoredSignedBlock { + fn from_signed_block(block: &SignedBlock) -> Self { + Self { + data_proposals: block + .data_proposals + .iter() + .map(|(lane_id, data_proposals)| { + ( + lane_id.clone(), + data_proposals.iter().map(|dp| dp.hashed()).collect(), + ) + }) + .collect(), + consensus_proposal: block.consensus_proposal.clone(), + certificate: block.certificate.clone(), + } + } + + fn hydrate(self, proposals: &ProposalStorage) -> Result { + let block_hash = self.consensus_proposal.hashed(); + let data_proposals = self + .data_proposals + .into_iter() + .map(|(lane_id, hashes)| { + let data_proposals = hashes + .into_iter() + .map(|dp_hash| { + proposals + .get_dp_by_hash(&lane_id, &dp_hash)? + .with_context(|| { + format!( + "missing proposal {dp_hash} for lane {} while hydrating block {}", + lane_id, block_hash + ) + }) + }) + .collect::>>()?; + Ok((lane_id, data_proposals)) + }) + .collect::>>()?; + + Ok(SignedBlock { + data_proposals, + consensus_proposal: self.consensus_proposal, + certificate: self.certificate, + }) + } } impl Blocks { @@ -62,28 +126,31 @@ impl Blocks { by_hash: self.by_hash.clone(), by_height: self.by_height.clone(), metrics: self.metrics.clone(), + proposals: Arc::clone(&self.proposals), } } - fn decode_block(item: Slice) -> Result { + fn decode_block(&self, item: Slice) -> Result { + let stored = borsh::from_slice::(&item)?; + stored.hydrate(&self.proposals) + } + + fn decode_block_hash(item: Slice) -> Result { borsh::from_slice(&item).map_err(Into::into) } + fn decode_height(item: Slice) -> Result { let key = item.first_chunk::<8>().context("Malformed key")?; Ok(BlockHeight::from(FjallHeightKey(*key))) } - fn decode_block_hash(item: Slice) -> Result { - borsh::from_slice(&item).map_err(Into::into) - } pub fn new(path: &Path) -> Result { - let db = Database::builder(path) + let db = Database::builder(&path.join("data_availability.db")) .cache_size(256 * 1024 * 1024) .max_journaling_size(512 * 1024 * 1024) .open()?; let by_hash = db.keyspace("blocks_by_hash", || { KeyspaceCreateOptions::default() - // Up from default 128Mb .with_kv_separation(Some( KvSeparationOptions::default().file_target_size(256 * 1024 * 1024), )) @@ -93,17 +160,17 @@ impl Blocks { let by_height = db.keyspace("block_hashes_by_height", KeyspaceCreateOptions::default)?; info!("{} block(s) available", by_hash.len()?); - Ok(Blocks { db, by_hash, by_height, metrics: FjallMetrics::global("data_availability", "unknown", "data_availability.db"), + proposals: ProposalStorage::shared(path)?, }) } pub fn set_metrics_context(&mut self, node_id: impl Into) { - self.metrics = FjallMetrics::global("data_availability", node_id, "data_availability.db"); + self.metrics = FjallMetrics::global("data_availability", &node_id.into(), "data_availability.db"); } pub fn is_empty(&self) -> bool { @@ -112,6 +179,7 @@ impl Blocks { pub fn persist(&self) -> Result<()> { let start = Instant::now(); + self.proposals.persist()?; let res = self .db .persist(fjall::PersistMode::Buffer) @@ -133,14 +201,22 @@ impl Blocks { self.record_op("put", "by_hash", start.elapsed()); return Ok(()); } - trace!("📦 storing block in fjall {}", block.height()); + + for (lane_id, data_proposals) in &block.data_proposals { + for data_proposal in data_proposals { + self.proposals + .put_no_verification(lane_id.clone(), data_proposal.clone())?; + } + } + + trace!("storing block metadata {}", block.height()); self.by_hash.insert( - FjallHashKey(block_hash).as_ref(), + FjallHashKey(block_hash.clone()).as_ref(), FjallValue::new_with_block(&block)?.as_ref(), )?; self.by_height.insert( FjallHeightKey::new(block.height()).as_ref(), - FjallValue::new_with_block_hash(&block.hashed())?.as_ref(), + FjallValue::new_with_block_hash(&block_hash)?.as_ref(), )?; self.record_op("put", "by_hash", start.elapsed()); Ok(()) @@ -148,25 +224,22 @@ impl Blocks { pub fn get(&self, block_hash: &ConsensusProposalHash) -> Result> { let start = Instant::now(); - let item = self.by_hash.get(FjallHashKey(block_hash.clone()))?; - let res = item.map(Self::decode_block).transpose(); + let Some(item) = self.by_hash.get(FjallHashKey(block_hash.clone()))? else { + self.record_op("get", "by_hash", start.elapsed()); + return Ok(None); + }; + let res = self.decode_block(item).map(Some); self.record_op("get", "by_hash", start.elapsed()); res } pub fn get_by_height(&self, height: BlockHeight) -> Result> { let start = Instant::now(); - // First get the hash from by_height index - let key = FjallHeightKey::new(height); - let Some(hash_value) = self.by_height.get(key)? else { + let Some(bytes) = self.by_height.get(FjallHeightKey::new(height))? else { self.record_op("get_by_height", "by_height", start.elapsed()); return Ok(None); }; - - // Decode the hash - let block_hash = Self::decode_block_hash(hash_value)?; - - // Get the actual block + let block_hash = Self::decode_block_hash(bytes)?; let res = self.get(&block_hash); self.record_op("get_by_height", "by_height", start.elapsed()); res @@ -179,11 +252,11 @@ impl Blocks { Ok(res) } - pub fn contains(&self, block: &ConsensusProposalHash) -> bool { + pub fn contains(&self, block_hash: &ConsensusProposalHash) -> bool { let start = Instant::now(); let res = self .by_hash - .contains_key(FjallHashKey(block.clone())) + .contains_key(FjallHashKey(block_hash.clone())) .unwrap_or(false); self.record_op("contains", "by_hash", start.elapsed()); res @@ -191,15 +264,12 @@ impl Blocks { pub fn record_op( &self, - op: &'static str, - keyspace: &'static str, - elapsed: std::time::Duration, + _op: &'static str, + _keyspace: &'static str, + _elapsed: std::time::Duration, ) { - self.metrics - .record_op(op, keyspace, elapsed.as_micros() as u64); } - /// Scan the whole by_height table and returns the first missing height pub fn first_hole_by_height(&self) -> Result> { let Some(guard) = self.by_height.last_key_value() else { anyhow::bail!("Empty partition can't have holes"); diff --git a/src/data_availability/local_da_replayer.rs b/src/data_availability/local_da_replayer.rs index 11eed2fea..70755b0b1 100644 --- a/src/data_availability/local_da_replayer.rs +++ b/src/data_availability/local_da_replayer.rs @@ -13,7 +13,6 @@ use hyli_modules::{ modules::{ block_processor::{BlockProcessor, BusOnlyProcessor}, da_listener::{DAListenerConf, SignedDAListener}, - data_availability::Blocks, Module, }, node_state::module::NodeStateModule, @@ -21,6 +20,8 @@ use hyli_modules::{ use tokio::task::yield_now; use tracing::{debug, info, warn}; +use crate::data_availability::block_storage::Blocks; + pub struct LocalDaReplayer { config: DAListenerConf, processor: BusOnlyProcessor, diff --git a/src/mempool/proposal_storage.rs b/src/mempool/proposal_storage.rs index 7bb5dc426..e544d8410 100644 --- a/src/mempool/proposal_storage.rs +++ b/src/mempool/proposal_storage.rs @@ -2,8 +2,8 @@ use std::{ collections::{BTreeMap, HashMap}, - path::Path, - sync::{Arc, RwLock}, + path::{Path, PathBuf}, + sync::{Arc, Mutex, OnceLock, RwLock}, }; use anyhow::Result; @@ -17,6 +17,8 @@ type ProposalKey = (LaneId, DataProposalHash); type Proofs = Vec<(u64, ProofData)>; type SharedProofs = Arc; +static SHARED_PROPOSALS: OnceLock>>> = OnceLock::new(); + pub trait KvEncode: Send + Sync + 'static { fn encode(&self) -> Result>; } @@ -33,6 +35,7 @@ where pub trait KvBackend: Send + Sync { fn get(&self, key: &[u8]) -> Result>>; fn put(&self, key: &[u8], value: Arc) -> Result<()>; + fn contains_key(&self, key: &[u8]) -> Result; fn delete(&self, key: &[u8]) -> Result<()>; fn persist(&self) -> Result<()>; } @@ -61,6 +64,10 @@ impl KvBackend for InMemoryKvBackend { Ok(()) } + fn contains_key(&self, key: &[u8]) -> Result { + Ok(self.data.read().unwrap().contains_key(key)) + } + fn delete(&self, key: &[u8]) -> Result<()> { self.data.write().unwrap().remove(key); Ok(()) @@ -131,6 +138,10 @@ impl KvBackend for FjallKvBackend { Ok(()) } + fn contains_key(&self, key: &[u8]) -> Result { + Ok(self.keyspace_for_key(key).contains_key(key)?) + } + fn delete(&self, key: &[u8]) -> Result<()> { self.keyspace_for_key(key).remove(key)?; Ok(()) @@ -149,6 +160,21 @@ pub struct ProposalStorage { } impl ProposalStorage { + pub fn shared(path: &Path) -> Result> { + let registry = SHARED_PROPOSALS.get_or_init(|| Mutex::new(HashMap::new())); + let mut guard = registry + .lock() + .unwrap_or_else(|poisoned| poisoned.into_inner()); + + if let Some(existing) = guard.get(path) { + return Ok(Arc::clone(existing)); + } + + let storage = Arc::new(Self::new(path)?); + guard.insert(path.to_path_buf(), Arc::clone(&storage)); + Ok(storage) + } + pub fn new(path: &Path) -> Result { Ok(Self { backend: Box::new(FjallKvBackend::new(path)?), diff --git a/src/mempool/storage.rs b/src/mempool/storage.rs index 0ac40de5e..9d8e0eca7 100644 --- a/src/mempool/storage.rs +++ b/src/mempool/storage.rs @@ -107,7 +107,7 @@ impl LanesStorage { Ok(Self { lanes_tip: Arc::new(RwLock::new(lanes_tip)), lane_entries: Arc::new(RwLock::new(HashMap::new())), - proposals: Arc::new(ProposalStorage::new(_path)?), + proposals: ProposalStorage::shared(_path)?, ref_token: Arc::new(()), }) } From 961f6eade01d8fe6dba1dda0a66de351539a1775 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lancelot=20de=20Ferri=C3=A8re?= Date: Tue, 17 Mar 2026 17:48:24 +0100 Subject: [PATCH 03/22] GCS storage for DPs PR-Branch: gcs-for-dp-votes --- Cargo.lock | 2 + Cargo.toml | 2 + .../hyli-modules/src/modules/gcs_uploader.rs | 1 + .../hyli-tools/src/bin/gcs_conf_defaults.toml | 1 + src/data_availability/block_storage.rs | 11 +- src/lib.rs | 1 + src/mempool.rs | 18 +- src/mempool/block_construction.rs | 6 +- src/mempool/module.rs | 12 +- src/mempool/own_lane.rs | 3 + src/mempool/proposal_storage.rs | 30 +- src/mempool/storage.rs | 4 +- src/mempool/tests/mod.rs | 40 +- src/mempool/verify_tx.rs | 185 +++++- src/shared_storage/durability.rs | 544 ++++++++++++++++++ src/shared_storage/file.rs | 83 +++ src/shared_storage/gcs.rs | 129 +++++ src/shared_storage/mod.rs | 28 + src/utils/conf.rs | 36 ++ src/utils/conf_defaults.toml | 5 + 20 files changed, 1089 insertions(+), 52 deletions(-) create mode 100644 src/shared_storage/durability.rs create mode 100644 src/shared_storage/file.rs create mode 100644 src/shared_storage/gcs.rs create mode 100644 src/shared_storage/mod.rs diff --git a/Cargo.lock b/Cargo.lock index c27cb172c..abdf3b349 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5350,6 +5350,8 @@ dependencies = [ "dhat", "fjall", "futures", + "google-cloud-auth", + "google-cloud-storage", "hex", "hyli-amm", "hyli-bus", diff --git a/Cargo.toml b/Cargo.toml index aab926ca2..9e4282e54 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -253,6 +253,7 @@ bytes = { workspace = true } clap = { workspace = true, features = ["derive"] } config = { workspace = true, default-features = false, features = ["toml"] } futures = { workspace = true } +google-cloud-storage = { workspace = true } indexmap = { workspace = true, features = ["serde"] } paste = { workspace = true } rand = { workspace = true } @@ -327,6 +328,7 @@ risc0-zkvm = { workspace = true, default-features = false, features = [ "client", ] } signal-child = "1.0.6" +google-cloud-auth = { version = "1.5", default-features = false } [features] default = ["risc0", "instrumentation"] diff --git a/crates/hyli-modules/src/modules/gcs_uploader.rs b/crates/hyli-modules/src/modules/gcs_uploader.rs index 28ce45a14..656f68bcd 100644 --- a/crates/hyli-modules/src/modules/gcs_uploader.rs +++ b/crates/hyli-modules/src/modules/gcs_uploader.rs @@ -42,6 +42,7 @@ pub struct GCSConf { pub gcs_bucket: String, pub gcs_prefix: String, + pub save_data_proposals: bool, pub save_proofs: bool, pub save_blocks: bool, diff --git a/crates/hyli-tools/src/bin/gcs_conf_defaults.toml b/crates/hyli-tools/src/bin/gcs_conf_defaults.toml index 078051d9e..8ec21efe7 100644 --- a/crates/hyli-tools/src/bin/gcs_conf_defaults.toml +++ b/crates/hyli-tools/src/bin/gcs_conf_defaults.toml @@ -14,6 +14,7 @@ gcs_bucket = "hyli-dev-gcs-uploader" gcs_prefix = "local" # GCS uploader options +save_data_proposals = false save_proofs = true save_blocks = true diff --git a/src/data_availability/block_storage.rs b/src/data_availability/block_storage.rs index fa5366b10..a5daa943d 100644 --- a/src/data_availability/block_storage.rs +++ b/src/data_availability/block_storage.rs @@ -9,7 +9,9 @@ use hyli_modules::utils::fjall_metrics::FjallMetrics; use std::{fmt::Debug, path::Path, sync::Arc, time::Instant}; use tracing::{debug, info, trace}; -use crate::mempool::proposal_storage::ProposalStorage; +use crate::{ + mempool::proposal_storage::ProposalStorage, +}; #[derive(Clone, BorshSerialize, BorshDeserialize)] struct StoredSignedBlock { @@ -48,7 +50,9 @@ impl AsRef<[u8]> for FjallHeightKey { impl FjallValue { fn new_with_block(block: &SignedBlock) -> Result { - Ok(Self(borsh::to_vec(&StoredSignedBlock::from_signed_block(block))?)) + Ok(Self(borsh::to_vec(&StoredSignedBlock::from_signed_block( + block, + ))?)) } fn new_with_block_hash(block_hash: &ConsensusProposalHash) -> Result { @@ -170,7 +174,8 @@ impl Blocks { } pub fn set_metrics_context(&mut self, node_id: impl Into) { - self.metrics = FjallMetrics::global("data_availability", &node_id.into(), "data_availability.db"); + self.metrics = + FjallMetrics::global("data_availability", &node_id.into(), "data_availability.db"); } pub fn is_empty(&self) -> bool { diff --git a/src/lib.rs b/src/lib.rs index bb7976142..92dfbf79f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -27,6 +27,7 @@ pub mod indexer; pub mod mempool; pub mod p2p; pub mod rest; +pub mod shared_storage; pub mod single_node_consensus; pub mod tcp_server; pub mod utils; diff --git a/src/mempool.rs b/src/mempool.rs index 1caebc75c..66ccd1b38 100644 --- a/src/mempool.rs +++ b/src/mempool.rs @@ -8,6 +8,7 @@ use crate::{ p2p::network::{ HeaderSignableData, HeaderSigner, IntoHeaderSignableData, MsgWithHeader, OutboundMessage, }, + shared_storage::DataProposalDurability, utils::{conf::SharedConf, serialize::BorshableIndexMap}, }; use anyhow::{bail, Context, Result}; @@ -131,6 +132,7 @@ pub struct Mempool { crypto: SharedBlstCrypto, metrics: MempoolMetrics, lanes: LanesStorage, + durability: DataProposalDurability, inner: MempoolStore, } @@ -222,7 +224,7 @@ impl IntoHeaderSignableData for MempoolNetMessage { #[derive(Debug, Clone, Deserialize, Serialize)] pub enum ProcessedDPEvent { OnHashedDataProposal((LaneId, DataProposal, ValidatorDAG)), - OnProcessedDataProposal((LaneId, DataProposalVerdict, DataProposal)), + OnProcessedDataProposal((LaneId, DataProposalVerdict, DataProposal, bool)), OnHashedSyncReply((LaneId, Vec, DataProposal, DataProposalHash)), } @@ -385,14 +387,21 @@ impl Mempool { match event { ProcessedDPEvent::OnHashedDataProposal((lane_id, data_proposal, vote)) => self .on_hashed_data_proposal(&lane_id, data_proposal, vote) + .await .context("Hashing data proposal"), ProcessedDPEvent::OnHashedSyncReply((lane_id, signatures, data_proposal, dp_hash)) => { self.on_hashed_sync_reply(lane_id, signatures, data_proposal, dp_hash) .await .context("Handling sync reply data proposal") } - ProcessedDPEvent::OnProcessedDataProposal((lane_id, verdict, data_proposal)) => self - .on_processed_data_proposal(lane_id, verdict, data_proposal) + ProcessedDPEvent::OnProcessedDataProposal(( + lane_id, + verdict, + data_proposal, + vote_ready, + )) => self + .on_processed_data_proposal(lane_id, verdict, data_proposal, vote_ready) + .await .context("Processing data proposal"), } } @@ -452,7 +461,8 @@ impl Mempool { ); } BlstCrypto::verify(&vdag).context("Invalid DataProposal vote signature")?; - self.on_data_proposal(&lane_id, data_proposal_hash, data_proposal, vdag.clone())?; + self.on_data_proposal(&lane_id, data_proposal_hash, data_proposal, vdag.clone()) + .await?; self.on_data_vote(lane_id, vdag)?; } MempoolNetMessage::DataVote(lane_id, vdag) => { diff --git a/src/mempool/block_construction.rs b/src/mempool/block_construction.rs index a5e655a54..27a9871a9 100644 --- a/src/mempool/block_construction.rs +++ b/src/mempool/block_construction.rs @@ -1224,7 +1224,8 @@ pub mod test { let dp_c_hash = dp_c.hashed(); let vote_c = peer_crypto.sign((dp_c_hash.clone(), dp_c_size))?; ctx.mempool - .on_hashed_data_proposal(&lane_id, dp_c, vote_c)?; + .on_hashed_data_proposal(&lane_id, dp_c, vote_c) + .await?; // C is not yet storable because B is missing. assert!(ctx @@ -1312,7 +1313,8 @@ pub mod test { // Receive C first -> buffered because parent B is unknown. let vote_c = peer_crypto.sign((dp_c_hash.clone(), dp_c_cumul_size))?; ctx.mempool - .on_hashed_data_proposal(&lane_id, dp_c, vote_c)?; + .on_hashed_data_proposal(&lane_id, dp_c, vote_c) + .await?; // Commit slot 1 at A, then slot 2 at C. ctx.process_cut_with_dp( diff --git a/src/mempool/module.rs b/src/mempool/module.rs index 07b42ebe3..11949cb87 100644 --- a/src/mempool/module.rs +++ b/src/mempool/module.rs @@ -3,7 +3,8 @@ use hyli_modules::{log_error, module_handle_messages}; use std::{sync::Arc, time::Duration}; use crate::{ - consensus::ConsensusEvent, model::*, p2p::network::MsgWithHeader, utils::conf::P2pMode, + consensus::ConsensusEvent, model::*, p2p::network::MsgWithHeader, + shared_storage::durability_backend_for_conf, utils::conf::P2pMode, }; use client_sdk::tcp_client::TcpServerMessage; @@ -47,6 +48,13 @@ impl Module for Mempool { let mut lanes = shared_lanes_storage(&ctx.config.data_directory)?; lanes.set_metrics_context(ctx.config.id.clone()); + let durability = crate::shared_storage::DataProposalDurability::new( + durability_backend_for_conf( + &ctx.config.data_directory, + &ctx.config.data_proposal_durability, + ), + &ctx.config.data_directory, + )?; let mut mempool = Mempool { bus, @@ -55,6 +63,7 @@ impl Module for Mempool { crypto: Arc::clone(&ctx.crypto), metrics, lanes, + durability, inner, }; mempool.restore_inflight_work(); @@ -147,6 +156,7 @@ impl Module for Mempool { async fn persist(&mut self) -> Result { if let Some(file) = &self.file { self.lanes.persist()?; + self.durability.persist()?; let mempool_file = "mempool.bin"; let checksum = Self::save_on_disk(file, mempool_file.as_ref(), &self.inner)?; diff --git a/src/mempool/own_lane.rs b/src/mempool/own_lane.rs index 7e795ab81..38c71a318 100644 --- a/src/mempool/own_lane.rs +++ b/src/mempool/own_lane.rs @@ -362,6 +362,9 @@ impl super::Mempool { self.metrics .record_created_data_proposal_bytes(&lane_id, data_proposal.estimate_size() as u64); + self.durability + .prime_persistence(lane_id.clone(), &data_proposal)?; + let (data_proposal_hash, cumul_size) = self.lanes .store_data_proposal(&self.crypto, &lane_id, data_proposal)?; diff --git a/src/mempool/proposal_storage.rs b/src/mempool/proposal_storage.rs index e544d8410..37c130fb6 100644 --- a/src/mempool/proposal_storage.rs +++ b/src/mempool/proposal_storage.rs @@ -15,9 +15,6 @@ use crate::model::{DataProposal, DataProposalHash, Hashed}; type ProposalKey = (LaneId, DataProposalHash); type Proofs = Vec<(u64, ProofData)>; -type SharedProofs = Arc; - -static SHARED_PROPOSALS: OnceLock>>> = OnceLock::new(); pub trait KvEncode: Send + Sync + 'static { fn encode(&self) -> Result>; @@ -32,6 +29,8 @@ where } } +static SHARED_PROPOSALS: OnceLock>>> = OnceLock::new(); + pub trait KvBackend: Send + Sync { fn get(&self, key: &[u8]) -> Result>>; fn put(&self, key: &[u8], value: Arc) -> Result<()>; @@ -156,7 +155,7 @@ impl KvBackend for FjallKvBackend { pub struct ProposalStorage { backend: Box, data_cache: RwLock>>, - proofs_cache: RwLock>, + proofs_cache: RwLock>>, } impl ProposalStorage { @@ -183,6 +182,15 @@ impl ProposalStorage { }) } + #[cfg(test)] + pub fn new_in_memory() -> Self { + Self { + backend: Box::new(InMemoryKvBackend::new()), + data_cache: RwLock::new(HashMap::new()), + proofs_cache: RwLock::new(HashMap::new()), + } + } + pub fn persist(&self) -> Result<()> { self.backend.persist() } @@ -200,7 +208,11 @@ impl ProposalStorage { let Some(bytes) = self.backend.get(&data_key(lane_id, dp_hash)?)? else { return Ok(None); }; - let data_proposal = Arc::new(borsh::from_slice::(&bytes)?); + let mut data_proposal = borsh::from_slice::(&bytes)?; + unsafe { + data_proposal.unsafe_set_hash(dp_hash); + } + let data_proposal = Arc::new(data_proposal); self.data_cache .write() .unwrap() @@ -208,12 +220,7 @@ impl ProposalStorage { data_proposal }; - let mut data_proposal = data_proposal.as_ref().clone(); - // SAFETY: this hash came from the storage key for this value. - unsafe { - data_proposal.unsafe_set_hash(dp_hash); - } - Ok(Some(data_proposal)) + Ok(Some(data_proposal.as_ref().clone())) } pub fn get_proofs_by_hash( @@ -282,6 +289,7 @@ impl ProposalStorage { self.proofs_cache.write().unwrap().insert(key, proofs); Ok(()) } + #[cfg(test)] pub fn remove_lane_entry(&self, lane_id: &LaneId, dp_hash: &DataProposalHash) { let _ = self.remove_by_hash(lane_id, dp_hash); diff --git a/src/mempool/storage.rs b/src/mempool/storage.rs index 9d8e0eca7..8e5073ee8 100644 --- a/src/mempool/storage.rs +++ b/src/mempool/storage.rs @@ -101,13 +101,13 @@ impl LanesStorage { } pub fn new( - _path: &Path, + path: &Path, lanes_tip: BTreeMap, ) -> Result { Ok(Self { lanes_tip: Arc::new(RwLock::new(lanes_tip)), lane_entries: Arc::new(RwLock::new(HashMap::new())), - proposals: ProposalStorage::shared(_path)?, + proposals: ProposalStorage::shared(path)?, ref_token: Arc::new(()), }) } diff --git a/src/mempool/tests/mod.rs b/src/mempool/tests/mod.rs index b2b2dd1b1..bb8fcbd65 100644 --- a/src/mempool/tests/mod.rs +++ b/src/mempool/tests/mod.rs @@ -17,6 +17,7 @@ use crate::mempool::storage::LaneEntryMetadata; use crate::model; use crate::model::DataProposalParent; use crate::p2p::network::NetMessage; +use crate::shared_storage::{DataProposalDurability, NullDurabilityBackend}; use crate::utils::conf::Conf; use crate::{ bus::dont_use_this::get_receiver, @@ -77,6 +78,7 @@ impl MempoolTestCtx { .expect("Failed to build DisseminationManager"); // Initialize Mempool + let durability = DataProposalDurability::new_in_memory(Arc::new(NullDurabilityBackend)); ( Mempool { bus, @@ -84,6 +86,7 @@ impl MempoolTestCtx { conf: SharedConf::default(), crypto: Arc::new(crypto), metrics: MempoolMetrics::global(), + durability, lanes, inner: MempoolStore::default(), }, @@ -1265,7 +1268,8 @@ async fn test_buc_correctly_filled_via_async_verify_tx_path() -> Result<()> { LaneBytesSize(dp_parent.estimate_size() as u64), ))?; ctx.mempool - .on_hashed_data_proposal(&lane_id, dp_parent.clone(), parent_vote)?; + .on_hashed_data_proposal(&lane_id, dp_parent.clone(), parent_vote) + .await?; ctx.handle_processed_data_proposals().await; @@ -1285,11 +1289,14 @@ async fn test_buc_correctly_filled_via_async_verify_tx_path() -> Result<()> { ) .await?; - ctx.mempool.on_processed_data_proposal( - lane_id.clone(), - DataProposalVerdict::Vote, - dp_child.clone(), - )?; + ctx.mempool + .on_processed_data_proposal( + lane_id.clone(), + DataProposalVerdict::Vote, + dp_child.clone(), + true, + ) + .await?; ctx.process_cut_with_dp( peer_crypto.validator_pubkey(), @@ -1466,11 +1473,14 @@ async fn test_processed_dp_stored_when_tip_is_itself_after_clean() -> Result<()> assert!(ctx.mempool.lanes.contains(&lane_id, &dp1_hash)); // When processing finishes, DP2 is stored even though its parent is missing. - ctx.mempool.on_processed_data_proposal( - lane_id.clone(), - DataProposalVerdict::Vote, - dp2.clone(), - )?; + ctx.mempool + .on_processed_data_proposal( + lane_id.clone(), + DataProposalVerdict::Vote, + dp2.clone(), + true, + ) + .await?; assert!(ctx.mempool.lanes.contains(&lane_id, &dp2_hash)); @@ -1549,7 +1559,13 @@ async fn test_processed_dp_fails_when_tip_moved_past_it() -> Result<()> { // Processing DP2 now should fail because tip moved past it. assert!(ctx .mempool - .on_processed_data_proposal(lane_id.clone(), DataProposalVerdict::Vote, dp2.clone()) + .on_processed_data_proposal( + lane_id.clone(), + DataProposalVerdict::Vote, + dp2.clone(), + true, + ) + .await .is_err()); assert!(!ctx.mempool.lanes.contains(&lane_id, &dp2_hash)); diff --git a/src/mempool/verify_tx.rs b/src/mempool/verify_tx.rs index 072cda414..4a2f1569c 100644 --- a/src/mempool/verify_tx.rs +++ b/src/mempool/verify_tx.rs @@ -1,6 +1,7 @@ use anyhow::{bail, Context, Result}; use hyli_model::{DataProposalHash, DataSized, LaneBytesSize, LaneId}; use serde::{Deserialize, Serialize}; +use tokio::time::timeout; use tracing::{debug, trace, warn}; use crate::{ @@ -19,12 +20,15 @@ pub enum DataProposalVerdict { Empty, Wait, Vote, + VotePendingPersistence, Refuse, Ignore, } +const DP_VOTE_PERSISTENCE_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(1); + impl super::Mempool { - pub(super) fn on_data_proposal( + pub(super) async fn on_data_proposal( &mut self, lane_id: &LaneId, received_hash: DataProposalHash, @@ -76,6 +80,14 @@ impl super::Mempool { return self.send_vote(lane_id, received_hash, lane_size); } } + Some(DataProposalVerdict::VotePendingPersistence) => { + if self + .retry_pending_persistence_vote(lane_id, &received_hash, &data_proposal) + .await? + { + return Ok(()); + } + } Some(DataProposalVerdict::Wait) | None => {} } @@ -90,7 +102,8 @@ impl super::Mempool { data_proposal.hashed() ); } - self.on_hashed_data_proposal(lane_id, data_proposal.clone(), vote)?; + self.on_hashed_data_proposal(lane_id, data_proposal, vote) + .await?; } #[cfg(not(test))] { @@ -118,7 +131,7 @@ impl super::Mempool { Ok(()) } - pub(super) fn on_hashed_data_proposal( + pub(super) async fn on_hashed_data_proposal( &mut self, lane_id: &LaneId, mut data_proposal: DataProposal, @@ -134,41 +147,81 @@ impl super::Mempool { self.metrics.add_hashed_dp(lane_id); let data_proposal_hash = data_proposal.hashed(); + // Overlap durable persistence with proposal processing once we know the canonical hash. + self.durability + .prime_persistence(lane_id.clone(), &data_proposal)?; let (verdict, lane_size) = self.get_verdict(lane_id, &data_proposal)?; - self.cached_dp_votes.insert( - (lane_id.clone(), data_proposal_hash.clone()), - verdict.clone(), - ); match verdict { DataProposalVerdict::Empty => { + self.cached_dp_votes.insert( + (lane_id.clone(), data_proposal_hash.clone()), + DataProposalVerdict::Empty, + ); warn!( "received empty DataProposal on lane {}, ignoring...", lane_id ); } DataProposalVerdict::Vote => { - // Normal case, we receive a proposal we already have the parent in store - trace!("Send vote for DataProposal"); - #[allow(clippy::unwrap_used, reason = "we always have a size for Vote")] - self.send_vote(lane_id, data_proposal_hash, lane_size.unwrap())?; + let vote_ready = Self::wait_for_persistence_before_vote( + &self.durability, + lane_id, + &data_proposal_hash, + ) + .await; + + self.cached_dp_votes.insert( + (lane_id.clone(), data_proposal_hash.clone()), + if vote_ready { + DataProposalVerdict::Vote + } else { + DataProposalVerdict::VotePendingPersistence + }, + ); + + if vote_ready { + trace!("Send vote for DataProposal"); + #[allow(clippy::unwrap_used, reason = "we always have a size for Vote")] + self.send_vote(lane_id, data_proposal_hash, lane_size.unwrap())?; + } } DataProposalVerdict::Process => { + self.cached_dp_votes.insert( + (lane_id.clone(), data_proposal_hash.clone()), + DataProposalVerdict::Process, + ); trace!("Further processing for DataProposal"); let lane_id = lane_id.clone(); + let durability = self.durability.clone(); let handle = self.inner.long_tasks_runtime.handle(); self.inner.processing_dps.spawn_on( async move { let decision = Self::process_data_proposal(&mut data_proposal); + let vote_ready = if decision == DataProposalVerdict::Vote { + Self::wait_for_persistence_before_vote( + &durability, + &lane_id, + &data_proposal.hashed(), + ) + .await + } else { + false + }; Ok(ProcessedDPEvent::OnProcessedDataProposal(( lane_id, decision, data_proposal, + vote_ready, ))) }, &handle, ); } DataProposalVerdict::Wait => { + self.cached_dp_votes.insert( + (lane_id.clone(), data_proposal_hash.clone()), + DataProposalVerdict::Wait, + ); debug!("Buffering DataProposal {}", data_proposal_hash); // Push the data proposal in the waiting list self.buffered_entries @@ -177,20 +230,32 @@ impl super::Mempool { .insert(data_proposal_hash.clone(), (vec![vote], data_proposal)); } DataProposalVerdict::Refuse => { + self.cached_dp_votes.insert( + (lane_id.clone(), data_proposal_hash.clone()), + DataProposalVerdict::Refuse, + ); debug!("Refuse vote for DataProposal {}", data_proposal.hashed()); } DataProposalVerdict::Ignore => { + self.cached_dp_votes.insert( + (lane_id.clone(), data_proposal_hash.clone()), + DataProposalVerdict::Ignore, + ); debug!("Ignore DataProposal {}", data_proposal_hash); } + DataProposalVerdict::VotePendingPersistence => { + unreachable!("VotePendingPersistence is an internal cache state"); + } } Ok(()) } - pub(super) fn on_processed_data_proposal( + pub(super) async fn on_processed_data_proposal( &mut self, lane_id: LaneId, verdict: DataProposalVerdict, data_proposal: DataProposal, + vote_ready: bool, ) -> Result<()> { debug!( "Handling processed DataProposal {:?} one lane {} ({} txs)", @@ -201,8 +266,6 @@ impl super::Mempool { self.metrics.add_processed_dp(&lane_id); - self.cached_dp_votes - .insert((lane_id.clone(), data_proposal.hashed()), verdict.clone()); match verdict { DataProposalVerdict::Empty => { unreachable!("Empty DataProposal should never be processed"); @@ -224,7 +287,16 @@ impl super::Mempool { data_proposal_hash: hash.clone(), cumul_size: size, })?; - self.send_vote(&lane_id, hash.clone(), size)?; + if vote_ready { + self.cached_dp_votes + .insert((lane_id.clone(), hash.clone()), DataProposalVerdict::Vote); + self.send_vote(&lane_id, hash.clone(), size)?; + } else { + self.cached_dp_votes.insert( + (lane_id.clone(), hash.clone()), + DataProposalVerdict::VotePendingPersistence, + ); + } while let Some(vote) = self .inner @@ -259,7 +331,8 @@ impl super::Mempool { entry.1.hashed(), lane_id ); - self.on_hashed_data_proposal(&lane_id, entry.1, vote)?; + self.on_hashed_data_proposal(&lane_id, entry.1, vote) + .await?; } else { warn!( "No lane operator vote stored for buffered proposal {:?}", @@ -269,11 +342,18 @@ impl super::Mempool { } } DataProposalVerdict::Refuse => { + self.cached_dp_votes + .insert((lane_id.clone(), data_proposal.hashed()), DataProposalVerdict::Refuse); debug!("Refuse vote for DataProposal"); } DataProposalVerdict::Ignore => { + self.cached_dp_votes + .insert((lane_id.clone(), data_proposal.hashed()), DataProposalVerdict::Ignore); debug!("Ignore DataProposal {}", data_proposal.hashed()); } + DataProposalVerdict::VotePendingPersistence => { + unreachable!("VotePendingPersistence is an internal cache state"); + } } Ok(()) } @@ -436,6 +516,76 @@ impl super::Mempool { ))?; Ok(()) } + + async fn wait_for_persistence_before_vote( + durability: &crate::shared_storage::DataProposalDurability, + lane_id: &LaneId, + hash: &DataProposalHash, + ) -> bool { + if durability.is_persisted(lane_id, hash) { + return true; + } + + match timeout( + DP_VOTE_PERSISTENCE_TIMEOUT, + durability.wait_until_persisted(lane_id, hash), + ) + .await + { + Ok(Ok(())) => true, + Ok(Err(err)) => { + warn!( + "Data proposal {} on lane {} failed durable persistence: {err:#}", + hash, lane_id + ); + false + } + Err(_) => { + warn!( + "Skipping vote for data proposal {} on lane {} because durable persistence did not complete in time", + hash, lane_id + ); + false + } + } + } + + async fn retry_pending_persistence_vote( + &mut self, + lane_id: &LaneId, + received_hash: &DataProposalHash, + data_proposal: &DataProposal, + ) -> Result { + if data_proposal.hashed() != *received_hash { + bail!( + "Received DataProposal with wrong hash: expected {:?}, got {:?}", + received_hash, + data_proposal.hashed() + ); + } + + self.durability + .prime_persistence(lane_id.clone(), data_proposal)?; + + if !Self::wait_for_persistence_before_vote(&self.durability, lane_id, received_hash).await { + return Ok(true); + } + + let Ok(lane_size) = self.lanes.get_lane_size_at(lane_id, received_hash) else { + self.cached_dp_votes + .remove(&(lane_id.clone(), received_hash.clone())); + return Ok(false); + }; + + debug!( + "Sending delayed vote for DataProposal {:?} on lane {} after persistence completed", + received_hash, lane_id + ); + self.cached_dp_votes + .insert((lane_id.clone(), received_hash.clone()), DataProposalVerdict::Vote); + self.send_vote(lane_id, received_hash.clone(), lane_size)?; + Ok(true) + } } #[cfg(test)] @@ -594,7 +744,8 @@ pub mod test { ))?; ctx.mempool - .on_hashed_data_proposal(&lane_id, data_proposal.clone(), vote)?; + .on_hashed_data_proposal(&lane_id, data_proposal.clone(), vote) + .await?; ctx.handle_processed_data_proposals().await; assert_eq!( diff --git a/src/shared_storage/durability.rs b/src/shared_storage/durability.rs new file mode 100644 index 000000000..ddf3d860c --- /dev/null +++ b/src/shared_storage/durability.rs @@ -0,0 +1,544 @@ +use std::{ + collections::HashMap, + future::Future, + path::{Path, PathBuf}, + pin::Pin, + sync::{Arc, Mutex, OnceLock, RwLock}, +}; + +use anyhow::{bail, Result}; +use borsh::BorshSerialize; +use fjall::{Database, Keyspace, KeyspaceCreateOptions}; +use futures::FutureExt; +use hyli_model::LaneId; +use tokio::sync::Notify; +use tracing::{debug, warn}; + +use crate::model::{DataProposal, DataProposalHash, Hashed}; + +pub trait DurabilityBackend: Send + Sync { + fn upload_data_proposal( + &self, + lane_id: LaneId, + dp_hash: DataProposalHash, + payload: Vec, + ) -> Pin> + Send + 'static>>; +} + +pub struct NullDurabilityBackend; + +impl DurabilityBackend for NullDurabilityBackend { + fn upload_data_proposal( + &self, + _lane_id: LaneId, + _dp_hash: DataProposalHash, + _payload: Vec, + ) -> Pin> + Send + 'static>> { + Box::pin(async { Ok(()) }) + } +} + +type ProposalKey = (LaneId, DataProposalHash); +type SharedPersistenceStateStore = Arc; + +static SHARED_PERSISTENCE_STORES: OnceLock< + Mutex>, +> = OnceLock::new(); + +trait PersistenceStateStore: Send + Sync { + fn mark_persisted(&self, lane_id: &LaneId, dp_hash: &DataProposalHash) -> Result<()>; + fn is_persisted(&self, lane_id: &LaneId, dp_hash: &DataProposalHash) -> Result; + fn persist(&self) -> Result<()>; +} + +#[cfg(test)] +#[derive(Default)] +struct InMemoryPersistenceStateStore { + persisted: RwLock>, +} + +#[cfg(test)] +impl PersistenceStateStore for InMemoryPersistenceStateStore { + fn mark_persisted(&self, lane_id: &LaneId, dp_hash: &DataProposalHash) -> Result<()> { + self.persisted + .write() + .unwrap() + .insert((lane_id.clone(), dp_hash.clone()), ()); + Ok(()) + } + + fn is_persisted(&self, lane_id: &LaneId, dp_hash: &DataProposalHash) -> Result { + Ok(self + .persisted + .read() + .unwrap() + .contains_key(&(lane_id.clone(), dp_hash.clone()))) + } + + fn persist(&self) -> Result<()> { + Ok(()) + } +} + +struct FjallPersistenceStateStore { + db: Database, + persisted: Keyspace, +} + +impl FjallPersistenceStateStore { + fn shared(path: &Path) -> Result { + let registry = SHARED_PERSISTENCE_STORES.get_or_init(|| Mutex::new(HashMap::new())); + let mut guard = registry + .lock() + .unwrap_or_else(|poisoned| poisoned.into_inner()); + + if let Some(existing) = guard.get(path) { + return Ok(Arc::clone(existing)); + } + + let store: SharedPersistenceStateStore = Arc::new(Self::new(path)?); + guard.insert(path.to_path_buf(), Arc::clone(&store)); + Ok(store) + } + + fn new(path: &Path) -> Result { + let db = Database::builder(path.join("data_proposal_durability.db")) + .cache_size(32 * 1024 * 1024) + .max_journaling_size(64 * 1024 * 1024) + .open()?; + let persisted = + db.keyspace("dp_persisted", KeyspaceCreateOptions::default)?; + Ok(Self { db, persisted }) + } +} + +impl PersistenceStateStore for FjallPersistenceStateStore { + fn mark_persisted(&self, lane_id: &LaneId, dp_hash: &DataProposalHash) -> Result<()> { + self.persisted + .insert(&persisted_key(lane_id, dp_hash)?, borsh::to_vec(&())?)?; + Ok(()) + } + + fn is_persisted(&self, lane_id: &LaneId, dp_hash: &DataProposalHash) -> Result { + Ok(self + .persisted + .contains_key(&persisted_key(lane_id, dp_hash)?)?) + } + + fn persist(&self) -> Result<()> { + self.db.persist(fjall::PersistMode::Buffer)?; + Ok(()) + } +} + +#[derive(Debug, Clone)] +enum PersistenceState { + NotStarted, + InFlight, + Succeeded, + Failed(String), +} + +#[derive(Debug)] +struct PersistenceTracker { + state: Mutex, + notify: Notify, +} + +impl Default for PersistenceTracker { + fn default() -> Self { + Self { + state: Mutex::new(PersistenceState::NotStarted), + notify: Notify::new(), + } + } +} + +#[derive(Clone)] +pub struct DataProposalDurability { + backend: Arc, + persistence_state: SharedPersistenceStateStore, + in_flight: Arc>>>, +} + +impl DataProposalDurability { + pub fn new(backend: Arc, data_directory: &Path) -> Result { + Ok(Self { + backend, + persistence_state: FjallPersistenceStateStore::shared(data_directory)?, + in_flight: Arc::new(RwLock::new(HashMap::new())), + }) + } + + #[cfg(test)] + pub fn new_in_memory(backend: Arc) -> Self { + Self { + backend, + persistence_state: Arc::new(InMemoryPersistenceStateStore::default()), + in_flight: Arc::new(RwLock::new(HashMap::new())), + } + } + + pub fn prime_persistence(&self, lane_id: LaneId, data_proposal: &DataProposal) -> Result<()> { + let mut canonical = data_proposal.clone(); + canonical.remove_proofs(); + let dp_hash = canonical.hashed(); + if self.persistence_state.is_persisted(&lane_id, &dp_hash)? { + return Ok(()); + } + let key = (lane_id.clone(), dp_hash.clone()); + let tracker = self.persistence_tracker(key.clone()); + + { + let mut state = tracker + .state + .lock() + .unwrap_or_else(|poisoned| poisoned.into_inner()); + match &*state { + PersistenceState::InFlight | PersistenceState::Succeeded => return Ok(()), + PersistenceState::NotStarted | PersistenceState::Failed(_) => { + *state = PersistenceState::InFlight + } + } + } + + let payload = borsh::to_vec(&canonical)?; + let backend = Arc::clone(&self.backend); + let persistence_state = Arc::clone(&self.persistence_state); + let in_flight = Arc::clone(&self.in_flight); + let tracker = Arc::clone(&tracker); + let mut upload = backend.upload_data_proposal(lane_id.clone(), dp_hash.clone(), payload); + + if let Some(result) = upload.as_mut().now_or_never() { + Self::finish_persistence_attempt( + &tracker, + &in_flight, + &persistence_state, + &lane_id, + &dp_hash, + result, + ); + return Ok(()); + } + + let handle = tokio::runtime::Handle::try_current()?; + + // DP persistence is currently fire-and-forget per hash with no dedicated limiter. + // Revisit concurrency control once the retry policy and operational envelope settle. + handle.spawn(async move { + let result = upload.await; + Self::finish_persistence_attempt( + &tracker, + &in_flight, + &persistence_state, + &lane_id, + &dp_hash, + result, + ); + }); + + Ok(()) + } + + pub async fn wait_until_persisted( + &self, + lane_id: &LaneId, + dp_hash: &DataProposalHash, + ) -> Result<()> { + if self.persistence_state.is_persisted(lane_id, dp_hash)? { + return Ok(()); + } + + let tracker = self.persistence_tracker((lane_id.clone(), dp_hash.clone())); + loop { + let notified = tracker.notify.notified(); + let state = tracker + .state + .lock() + .unwrap_or_else(|poisoned| poisoned.into_inner()) + .clone(); + match state { + PersistenceState::Succeeded => return Ok(()), + PersistenceState::Failed(err) => { + bail!( + "Data proposal {} on lane {} was not persisted: {}", + dp_hash, + lane_id, + err + ); + } + PersistenceState::NotStarted => { + bail!( + "Data proposal {} on lane {} was never scheduled for persistence", + dp_hash, + lane_id + ); + } + PersistenceState::InFlight => notified.await, + } + } + } + + pub fn is_persisted(&self, lane_id: &LaneId, dp_hash: &DataProposalHash) -> bool { + self.persistence_state + .is_persisted(lane_id, dp_hash) + .unwrap_or(false) + || self + .in_flight + .read() + .unwrap() + .get(&(lane_id.clone(), dp_hash.clone())) + .cloned() + .is_some_and(|tracker| { + matches!( + *tracker + .state + .lock() + .unwrap_or_else(|poisoned| poisoned.into_inner()), + PersistenceState::Succeeded + ) + }) + } + + pub fn persist(&self) -> Result<()> { + self.persistence_state.persist() + } + + fn persistence_tracker(&self, key: ProposalKey) -> Arc { + if let Some(existing) = self.in_flight.read().unwrap().get(&key).cloned() { + return existing; + } + + let mut guard = self.in_flight.write().unwrap(); + guard + .entry(key) + .or_insert_with(|| Arc::new(PersistenceTracker::default())) + .clone() + } + + fn finish_persistence_attempt( + tracker: &Arc, + in_flight: &Arc>>>, + persistence_state: &SharedPersistenceStateStore, + lane_id: &LaneId, + dp_hash: &DataProposalHash, + result: Result<()>, + ) { + let next_state = match result { + Ok(()) => { + if let Err(err) = persistence_state.mark_persisted(lane_id, dp_hash) { + warn!( + "Persisted data proposal {} on lane {} but failed to mark it persisted locally: {err:#}", + dp_hash, lane_id + ); + PersistenceState::Failed(err.to_string()) + } else { + debug!("Persisted data proposal {} on lane {}", dp_hash, lane_id); + PersistenceState::Succeeded + } + } + Err(err) => { + warn!( + "Failed to persist data proposal {} on lane {}: {err:#}", + dp_hash, lane_id + ); + PersistenceState::Failed(err.to_string()) + } + }; + + *tracker + .state + .lock() + .unwrap_or_else(|poisoned| poisoned.into_inner()) = next_state; + tracker.notify.notify_waiters(); + if matches!( + *tracker + .state + .lock() + .unwrap_or_else(|poisoned| poisoned.into_inner()), + PersistenceState::Succeeded + ) { + in_flight + .write() + .unwrap() + .remove(&(lane_id.clone(), dp_hash.clone())); + } + } +} + +fn persisted_key(lane_id: &LaneId, dp_hash: &DataProposalHash) -> Result> { + namespaced_key(b"persisted:", &(lane_id, dp_hash)) +} + +fn namespaced_key(prefix: &[u8], key: &K) -> Result> { + let mut encoded = prefix.to_vec(); + encoded.extend(borsh::to_vec(key)?); + Ok(encoded) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{ + mempool::tests::make_register_contract_tx, + shared_storage::gcs::{DpGcsRuntime, GcsDurabilityBackend}, + utils::conf::DataProposalDurabilityConf, + }; + use axum::{extract::Path, response::IntoResponse, routing::post, Json, Router}; + use google_cloud_auth::credentials::anonymous::Builder as Anonymous; + use google_cloud_storage::client::Storage; + use hyli_crypto::BlstCrypto; + use hyli_model::ContractName; + use serde_json::json; + use std::sync::OnceLock; + use tokio::{net::TcpListener, sync::Mutex, task::JoinHandle}; + + static GCS_TEST_LOCK: OnceLock> = OnceLock::new(); + + fn gcs_test_lock() -> &'static Mutex<()> { + GCS_TEST_LOCK.get_or_init(|| Mutex::const_new(())) + } + + struct TestGcsServer { + task: JoinHandle<()>, + } + + impl Drop for TestGcsServer { + fn drop(&mut self) { + self.task.abort(); + } + } + + async fn build_test_gcs_runtime(endpoint: String) -> anyhow::Result { + let client = Storage::builder() + .with_endpoint(endpoint) + .with_credentials(Anonymous::new().build()) + .build() + .await?; + Ok(DpGcsRuntime { + client, + conf: DataProposalDurabilityConf { + gcs_bucket: "test-bucket".to_string(), + gcs_prefix: "test-prefix".to_string(), + save_data_proposals: true, + }, + }) + } + + async fn start_test_gcs_server() -> anyhow::Result<(String, TestGcsServer)> { + async fn upload(Path(bucket): Path) -> impl IntoResponse { + Json(json!({ + "bucket": format!("projects/_/buckets/{bucket}"), + "name": "stored-object", + "size": 1, + })) + } + + let listener = TcpListener::bind("127.0.0.1:0").await?; + let endpoint = format!("http://{}", listener.local_addr()?); + let app = Router::new().route("/upload/storage/v1/b/{bucket}/o", post(upload)); + let task = tokio::spawn(async move { + let _ = axum::serve(listener, app).await; + }); + + Ok((endpoint, TestGcsServer { task })) + } + + async fn start_failing_test_gcs_server() -> anyhow::Result<(String, TestGcsServer)> { + async fn upload() -> impl IntoResponse { + (axum::http::StatusCode::BAD_REQUEST, "upload rejected") + } + + let listener = TcpListener::bind("127.0.0.1:0").await?; + let endpoint = format!("http://{}", listener.local_addr()?); + let app = Router::new().route("/upload/storage/v1/b/{bucket}/o", post(upload)); + let task = tokio::spawn(async move { + let _ = axum::serve(listener, app).await; + }); + + Ok((endpoint, TestGcsServer { task })) + } + + #[test_log::test(tokio::test)] + async fn test_wait_until_persisted_succeeds_with_mock_gcs() -> Result<()> { + let _guard = gcs_test_lock().lock().await; + let (endpoint, _server) = start_test_gcs_server().await?; + let durability = DataProposalDurability::new_in_memory(Arc::new( + GcsDurabilityBackend::with_runtime(build_test_gcs_runtime(endpoint.clone()).await?), + )); + + let crypto = BlstCrypto::new("persistence-success").unwrap(); + let lane_id = LaneId::new(crypto.validator_pubkey().clone()); + let data_proposal = DataProposal::new_root( + lane_id.clone(), + vec![make_register_contract_tx(ContractName::new("test-persisted"))], + ); + let hash = data_proposal.hashed(); + + durability.prime_persistence(lane_id.clone(), &data_proposal)?; + durability.wait_until_persisted(&lane_id, &hash).await?; + + assert!(durability.is_persisted(&lane_id, &hash)); + Ok(()) + } + + #[test_log::test(tokio::test)] + async fn test_wait_until_persisted_fails_when_gcs_upload_fails() -> Result<()> { + let _guard = gcs_test_lock().lock().await; + let (endpoint, _server) = start_failing_test_gcs_server().await?; + let durability = DataProposalDurability::new_in_memory(Arc::new( + GcsDurabilityBackend::with_runtime(build_test_gcs_runtime(endpoint.clone()).await?), + )); + + let crypto = BlstCrypto::new("persistence-failure").unwrap(); + let lane_id = LaneId::new(crypto.validator_pubkey().clone()); + let data_proposal = DataProposal::new_root( + lane_id.clone(), + vec![make_register_contract_tx(ContractName::new("test-persist-fail"))], + ); + let hash = data_proposal.hashed(); + + durability.prime_persistence(lane_id.clone(), &data_proposal)?; + let err = durability + .wait_until_persisted(&lane_id, &hash) + .await + .unwrap_err(); + + assert!(err.to_string().contains("was not persisted"), "{err:#}"); + assert!(!durability.is_persisted(&lane_id, &hash)); + Ok(()) + } + + #[test_log::test(tokio::test)] + async fn test_persisted_state_is_reused_by_new_durability_instance() -> Result<()> { + let _guard = gcs_test_lock().lock().await; + let (endpoint, _server) = start_test_gcs_server().await?; + let dir = tempfile::tempdir()?; + let durability = DataProposalDurability::new( + Arc::new(GcsDurabilityBackend::with_runtime( + build_test_gcs_runtime(endpoint.clone()).await?, + )), + dir.path(), + )?; + + let crypto = BlstCrypto::new("persistence-reused").unwrap(); + let lane_id = LaneId::new(crypto.validator_pubkey().clone()); + let data_proposal = DataProposal::new_root( + lane_id.clone(), + vec![make_register_contract_tx(ContractName::new("test-persisted-reused"))], + ); + let hash = data_proposal.hashed(); + + durability.prime_persistence(lane_id.clone(), &data_proposal)?; + durability.wait_until_persisted(&lane_id, &hash).await?; + + let fresh = DataProposalDurability::new( + Arc::new(GcsDurabilityBackend::with_runtime( + build_test_gcs_runtime(endpoint).await?, + )), + dir.path(), + )?; + assert!(fresh.is_persisted(&lane_id, &hash)); + Ok(()) + } +} diff --git a/src/shared_storage/file.rs b/src/shared_storage/file.rs new file mode 100644 index 000000000..d8be5c337 --- /dev/null +++ b/src/shared_storage/file.rs @@ -0,0 +1,83 @@ +use std::{future::Future, path::PathBuf, pin::Pin}; + +use anyhow::Result; +use hyli_model::LaneId; + +use crate::{model::DataProposalHash, shared_storage::durability::DurabilityBackend}; + +#[derive(Clone)] +pub struct FileDurabilityBackend { + root: PathBuf, + prefix: String, +} + +impl FileDurabilityBackend { + pub fn new(root: PathBuf, prefix: String) -> Self { + Self { root, prefix } + } + + fn object_path(&self, lane_id: &LaneId, dp_hash: &DataProposalHash) -> PathBuf { + self.root + .join(&self.prefix) + .join("data_proposals") + .join(lane_id.to_string()) + .join(format!("{dp_hash}.bin")) + } + + fn write(&self, lane_id: LaneId, dp_hash: DataProposalHash, payload: Vec) -> Result<()> { + let path = self.object_path(&lane_id, &dp_hash); + if path.exists() { + return Ok(()); + } + + let parent = path + .parent() + .ok_or_else(|| anyhow::anyhow!("missing parent directory for {}", path.display()))?; + std::fs::create_dir_all(parent)?; + + let tmp_path = path.with_extension(format!("bin.tmp-{}", std::process::id())); + std::fs::write(&tmp_path, payload)?; + if path.exists() { + let _ = std::fs::remove_file(&tmp_path); + return Ok(()); + } + std::fs::rename(&tmp_path, &path)?; + Ok(()) + } +} + +impl DurabilityBackend for FileDurabilityBackend { + fn upload_data_proposal( + &self, + lane_id: LaneId, + dp_hash: DataProposalHash, + payload: Vec, + ) -> Pin> + Send + 'static>> { + let backend = self.clone(); + Box::pin(async move { + tokio::task::spawn_blocking(move || backend.write(lane_id, dp_hash, payload)).await? + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::model::{DataProposal, Hashed}; + + #[test_log::test(tokio::test)] + async fn test_file_durability_backend_writes_payload() -> Result<()> { + let dir = tempfile::tempdir()?; + let backend = FileDurabilityBackend::new(dir.path().to_path_buf(), "test-prefix".into()); + let lane_id = LaneId::default(); + let dp_hash = DataProposal::new_root(lane_id.clone(), vec![]).hashed(); + + backend + .upload_data_proposal(lane_id.clone(), dp_hash.clone(), b"payload".to_vec()) + .await?; + + let path = backend.object_path(&lane_id, &dp_hash); + assert_eq!(std::fs::read(path)?, b"payload"); + Ok(()) + } +} diff --git a/src/shared_storage/gcs.rs b/src/shared_storage/gcs.rs new file mode 100644 index 000000000..074290795 --- /dev/null +++ b/src/shared_storage/gcs.rs @@ -0,0 +1,129 @@ +use std::{future::Future, pin::Pin, sync::Arc}; + +use anyhow::{anyhow, Result}; +use bytes::Bytes; +use google_cloud_storage::client::Storage as GcsStorageClient; +use hyli_model::LaneId; +use tokio::sync::OnceCell; + +use crate::{ + model::DataProposalHash, shared_storage::durability::DurabilityBackend, + utils::conf::DataProposalDurabilityConf, +}; + +#[derive(Clone)] +pub struct DpGcsRuntime { + pub client: GcsStorageClient, + pub conf: DataProposalDurabilityConf, +} + +#[derive(Clone)] +pub struct GcsDurabilityBackend { + source: Arc, +} + +enum GcsRuntimeSource { + Lazy { + conf: DataProposalDurabilityConf, + runtime: OnceCell, + }, + #[cfg(test)] + Fixed(DpGcsRuntime), +} + +impl GcsDurabilityBackend { + pub fn new(conf: DataProposalDurabilityConf) -> Self { + Self { + source: Arc::new(GcsRuntimeSource::Lazy { + conf, + runtime: OnceCell::new(), + }), + } + } + + #[cfg(test)] + pub fn with_runtime(runtime: DpGcsRuntime) -> Self { + Self { + source: Arc::new(GcsRuntimeSource::Fixed(runtime)), + } + } + + async fn runtime(&self) -> Result { + match self.source.as_ref() { + GcsRuntimeSource::Lazy { conf, runtime } => Ok(runtime + .get_or_try_init(|| async { + Ok::(DpGcsRuntime { + client: GcsStorageClient::builder().build().await?, + conf: conf.clone(), + }) + }) + .await? + .clone()), + #[cfg(test)] + GcsRuntimeSource::Fixed(runtime) => Ok(runtime.clone()), + } + } + + async fn upload( + runtime: DpGcsRuntime, + lane_id: LaneId, + dp_hash: DataProposalHash, + payload: Vec, + ) -> Result<()> { + match runtime + .client + .write_object( + bucket_path(&runtime.conf.gcs_bucket), + object_name(&runtime.conf, &lane_id, &dp_hash), + Bytes::from(payload), + ) + .set_if_generation_match(0_i64) + .send_buffered() + .await + { + Ok(_) => Ok(()), + Err(err) + if err + .status() + .is_some_and(|status| matches!(status.code as i32, 6 | 9)) => + { + Ok(()) + } + Err(err) => Err(anyhow!(err)), + } + } +} + +impl DurabilityBackend for GcsDurabilityBackend { + fn upload_data_proposal( + &self, + lane_id: LaneId, + dp_hash: DataProposalHash, + payload: Vec, + ) -> Pin> + Send + 'static>> { + let backend = self.clone(); + Box::pin(async move { + let runtime = backend.runtime().await?; + Self::upload(runtime, lane_id, dp_hash, payload).await + }) + } +} + +fn bucket_path(bucket: &str) -> String { + if bucket.starts_with("projects/") { + bucket.to_string() + } else { + format!("projects/_/buckets/{bucket}") + } +} + +fn object_name( + conf: &DataProposalDurabilityConf, + lane_id: &LaneId, + dp_hash: &DataProposalHash, +) -> String { + format!( + "{}/data_proposals/{}/{}.bin", + conf.gcs_prefix, lane_id, dp_hash + ) +} diff --git a/src/shared_storage/mod.rs b/src/shared_storage/mod.rs new file mode 100644 index 000000000..b453f5a44 --- /dev/null +++ b/src/shared_storage/mod.rs @@ -0,0 +1,28 @@ +pub mod durability; +pub mod file; +pub mod gcs; + +use std::path::Path; +use std::sync::Arc; + +use crate::utils::conf::DataProposalDurabilityConf; + +pub use durability::{DataProposalDurability, DurabilityBackend, NullDurabilityBackend}; +pub use file::FileDurabilityBackend; +pub use gcs::{DpGcsRuntime, GcsDurabilityBackend}; + +pub fn durability_backend_for_conf( + data_directory: &Path, + conf: &DataProposalDurabilityConf, +) -> Arc { + if conf.gcs_enabled() { + Arc::new(GcsDurabilityBackend::new(conf.clone())) + } else if conf.file_enabled() { + Arc::new(FileDurabilityBackend::new( + data_directory.join("durable_data_proposals"), + conf.gcs_prefix.clone(), + )) + } else { + Arc::new(NullDurabilityBackend) + } +} diff --git a/src/utils/conf.rs b/src/utils/conf.rs index faa3ef86c..512c6e852 100644 --- a/src/utils/conf.rs +++ b/src/utils/conf.rs @@ -1,5 +1,6 @@ use anyhow::{Context, Result}; use config::{Config, Environment, File}; +use hyli_modules::modules::gcs_uploader::GCSConf; use hyli_modules::modules::websocket::WebSocketConfig; use serde::{Deserialize, Serialize}; use serde_with::serde_as; @@ -142,6 +143,37 @@ impl Default for OwnLaneConf { } } +#[derive(Serialize, Deserialize, Clone, Debug, Default)] +pub struct DataProposalDurabilityConf { + pub gcs_bucket: String, + pub gcs_prefix: String, + pub save_data_proposals: bool, +} + +impl DataProposalDurabilityConf { + pub fn gcs_enabled(&self) -> bool { + self.save_data_proposals && !self.gcs_bucket.trim().is_empty() + } + + pub fn file_enabled(&self) -> bool { + self.save_data_proposals && self.gcs_bucket.trim().is_empty() + } + + pub fn enabled(&self) -> bool { + self.gcs_enabled() || self.file_enabled() + } +} + +impl From for DataProposalDurabilityConf { + fn from(value: GCSConf) -> Self { + Self { + gcs_bucket: value.gcs_bucket, + gcs_prefix: value.gcs_prefix, + save_data_proposals: value.save_data_proposals, + } + } +} + impl From for WebSocketConfig { fn from(config: NodeWebSocketConfig) -> Self { Self { @@ -232,6 +264,10 @@ pub struct Conf { /// Own-lane configuration pub own_lanes: OwnLaneConf, + + /// Durable canonical data proposal storage configuration. + #[serde(alias = "gcs")] + pub data_proposal_durability: DataProposalDurabilityConf, } impl Conf { diff --git a/src/utils/conf_defaults.toml b/src/utils/conf_defaults.toml index bcc2040da..91749d1f8 100644 --- a/src/utils/conf_defaults.toml +++ b/src/utils/conf_defaults.toml @@ -115,3 +115,8 @@ index_tx_events = true suffixes = ["default"] default_blob_suffix = "default" default_proof_suffix = "default" + +[data_proposal_durability] +gcs_bucket = "" +gcs_prefix = "local" +save_data_proposals = false From e34d0b82f2783ae1676bb881aa89e2afff9b34ee Mon Sep 17 00:00:00 2001 From: Alexandre Careil Date: Mon, 30 Mar 2026 13:07:14 +0200 Subject: [PATCH 04/22] fix clippy fmt --- src/data_availability/block_storage.rs | 4 +-- src/mempool/verify_tx.rs | 18 ++++++++---- src/shared_storage/durability.rs | 38 ++++++++++++++------------ 3 files changed, 34 insertions(+), 26 deletions(-) diff --git a/src/data_availability/block_storage.rs b/src/data_availability/block_storage.rs index a5daa943d..7ab87b0e5 100644 --- a/src/data_availability/block_storage.rs +++ b/src/data_availability/block_storage.rs @@ -9,9 +9,7 @@ use hyli_modules::utils::fjall_metrics::FjallMetrics; use std::{fmt::Debug, path::Path, sync::Arc, time::Instant}; use tracing::{debug, info, trace}; -use crate::{ - mempool::proposal_storage::ProposalStorage, -}; +use crate::mempool::proposal_storage::ProposalStorage; #[derive(Clone, BorshSerialize, BorshDeserialize)] struct StoredSignedBlock { diff --git a/src/mempool/verify_tx.rs b/src/mempool/verify_tx.rs index 4a2f1569c..8f2aaa735 100644 --- a/src/mempool/verify_tx.rs +++ b/src/mempool/verify_tx.rs @@ -342,13 +342,17 @@ impl super::Mempool { } } DataProposalVerdict::Refuse => { - self.cached_dp_votes - .insert((lane_id.clone(), data_proposal.hashed()), DataProposalVerdict::Refuse); + self.cached_dp_votes.insert( + (lane_id.clone(), data_proposal.hashed()), + DataProposalVerdict::Refuse, + ); debug!("Refuse vote for DataProposal"); } DataProposalVerdict::Ignore => { - self.cached_dp_votes - .insert((lane_id.clone(), data_proposal.hashed()), DataProposalVerdict::Ignore); + self.cached_dp_votes.insert( + (lane_id.clone(), data_proposal.hashed()), + DataProposalVerdict::Ignore, + ); debug!("Ignore DataProposal {}", data_proposal.hashed()); } DataProposalVerdict::VotePendingPersistence => { @@ -581,8 +585,10 @@ impl super::Mempool { "Sending delayed vote for DataProposal {:?} on lane {} after persistence completed", received_hash, lane_id ); - self.cached_dp_votes - .insert((lane_id.clone(), received_hash.clone()), DataProposalVerdict::Vote); + self.cached_dp_votes.insert( + (lane_id.clone(), received_hash.clone()), + DataProposalVerdict::Vote, + ); self.send_vote(lane_id, received_hash.clone(), lane_size)?; Ok(true) } diff --git a/src/shared_storage/durability.rs b/src/shared_storage/durability.rs index ddf3d860c..c30667f02 100644 --- a/src/shared_storage/durability.rs +++ b/src/shared_storage/durability.rs @@ -41,9 +41,8 @@ impl DurabilityBackend for NullDurabilityBackend { type ProposalKey = (LaneId, DataProposalHash); type SharedPersistenceStateStore = Arc; -static SHARED_PERSISTENCE_STORES: OnceLock< - Mutex>, -> = OnceLock::new(); +static SHARED_PERSISTENCE_STORES: OnceLock>> = + OnceLock::new(); trait PersistenceStateStore: Send + Sync { fn mark_persisted(&self, lane_id: &LaneId, dp_hash: &DataProposalHash) -> Result<()>; @@ -106,8 +105,7 @@ impl FjallPersistenceStateStore { .cache_size(32 * 1024 * 1024) .max_journaling_size(64 * 1024 * 1024) .open()?; - let persisted = - db.keyspace("dp_persisted", KeyspaceCreateOptions::default)?; + let persisted = db.keyspace("dp_persisted", KeyspaceCreateOptions::default)?; Ok(Self { db, persisted }) } } @@ -289,15 +287,15 @@ impl DataProposalDurability { .unwrap() .get(&(lane_id.clone(), dp_hash.clone())) .cloned() - .is_some_and(|tracker| { - matches!( - *tracker - .state - .lock() - .unwrap_or_else(|poisoned| poisoned.into_inner()), - PersistenceState::Succeeded - ) - }) + .is_some_and(|tracker| { + matches!( + *tracker + .state + .lock() + .unwrap_or_else(|poisoned| poisoned.into_inner()), + PersistenceState::Succeeded + ) + }) } pub fn persist(&self) -> Result<()> { @@ -471,7 +469,9 @@ mod tests { let lane_id = LaneId::new(crypto.validator_pubkey().clone()); let data_proposal = DataProposal::new_root( lane_id.clone(), - vec![make_register_contract_tx(ContractName::new("test-persisted"))], + vec![make_register_contract_tx(ContractName::new( + "test-persisted", + ))], ); let hash = data_proposal.hashed(); @@ -494,7 +494,9 @@ mod tests { let lane_id = LaneId::new(crypto.validator_pubkey().clone()); let data_proposal = DataProposal::new_root( lane_id.clone(), - vec![make_register_contract_tx(ContractName::new("test-persist-fail"))], + vec![make_register_contract_tx(ContractName::new( + "test-persist-fail", + ))], ); let hash = data_proposal.hashed(); @@ -525,7 +527,9 @@ mod tests { let lane_id = LaneId::new(crypto.validator_pubkey().clone()); let data_proposal = DataProposal::new_root( lane_id.clone(), - vec![make_register_contract_tx(ContractName::new("test-persisted-reused"))], + vec![make_register_contract_tx(ContractName::new( + "test-persisted-reused", + ))], ); let hash = data_proposal.hashed(); From 64321376295e039e868169b6ac46fe2f137aa714 Mon Sep 17 00:00:00 2001 From: Alexandre Careil Date: Mon, 30 Mar 2026 13:10:24 +0200 Subject: [PATCH 05/22] fix clippy 2 --- src/data_availability/block_storage.rs | 4 ++-- src/mempool/proposal_storage.rs | 1 + src/shared_storage/durability.rs | 17 +++++++++++++---- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/data_availability/block_storage.rs b/src/data_availability/block_storage.rs index 7ab87b0e5..7928598e9 100644 --- a/src/data_availability/block_storage.rs +++ b/src/data_availability/block_storage.rs @@ -147,7 +147,7 @@ impl Blocks { } pub fn new(path: &Path) -> Result { - let db = Database::builder(&path.join("data_availability.db")) + let db = Database::builder(path.join("data_availability.db")) .cache_size(256 * 1024 * 1024) .max_journaling_size(512 * 1024 * 1024) .open()?; @@ -173,7 +173,7 @@ impl Blocks { pub fn set_metrics_context(&mut self, node_id: impl Into) { self.metrics = - FjallMetrics::global("data_availability", &node_id.into(), "data_availability.db"); + FjallMetrics::global("data_availability", node_id.into(), "data_availability.db"); } pub fn is_empty(&self) -> bool { diff --git a/src/mempool/proposal_storage.rs b/src/mempool/proposal_storage.rs index 37c130fb6..30ab671cd 100644 --- a/src/mempool/proposal_storage.rs +++ b/src/mempool/proposal_storage.rs @@ -209,6 +209,7 @@ impl ProposalStorage { return Ok(None); }; let mut data_proposal = borsh::from_slice::(&bytes)?; + // Safety: the proposal bytes were loaded by the same hash key we are restoring. unsafe { data_proposal.unsafe_set_hash(dp_hash); } diff --git a/src/shared_storage/durability.rs b/src/shared_storage/durability.rs index c30667f02..ad1e6e0e4 100644 --- a/src/shared_storage/durability.rs +++ b/src/shared_storage/durability.rs @@ -284,7 +284,7 @@ impl DataProposalDurability { || self .in_flight .read() - .unwrap() + .unwrap_or_else(|poisoned| poisoned.into_inner()) .get(&(lane_id.clone(), dp_hash.clone())) .cloned() .is_some_and(|tracker| { @@ -303,11 +303,20 @@ impl DataProposalDurability { } fn persistence_tracker(&self, key: ProposalKey) -> Arc { - if let Some(existing) = self.in_flight.read().unwrap().get(&key).cloned() { + if let Some(existing) = self + .in_flight + .read() + .unwrap_or_else(|poisoned| poisoned.into_inner()) + .get(&key) + .cloned() + { return existing; } - let mut guard = self.in_flight.write().unwrap(); + let mut guard = self + .in_flight + .write() + .unwrap_or_else(|poisoned| poisoned.into_inner()); guard .entry(key) .or_insert_with(|| Arc::new(PersistenceTracker::default())) @@ -358,7 +367,7 @@ impl DataProposalDurability { ) { in_flight .write() - .unwrap() + .unwrap_or_else(|poisoned| poisoned.into_inner()) .remove(&(lane_id.clone(), dp_hash.clone())); } } From b7c2981214ff71c54982017a5d29f288c7354b06 Mon Sep 17 00:00:00 2001 From: Alexandre Careil Date: Mon, 30 Mar 2026 15:05:44 +0200 Subject: [PATCH 06/22] add missing rusttls crypto provider --- Cargo.lock | 1 + Cargo.toml | 1 + src/bin/hyli.rs | 5 +++++ src/bin/indexer.rs | 5 +++++ 4 files changed, 12 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 2801d2a86..02de19ddf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5376,6 +5376,7 @@ dependencies = [ "paste", "rand 0.9.2", "risc0-zkvm", + "rustls 0.23.36", "seq-macro", "serde", "serde_json", diff --git a/Cargo.toml b/Cargo.toml index 6c0aba1e1..ae6728736 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -250,6 +250,7 @@ tracing = { workspace = true } assertables = { workspace = true } axum = { workspace = true, features = ["macros", "multipart"] } bytes = { workspace = true } +rustls = { workspace = true, features = ["aws-lc-rs"] } clap = { workspace = true, features = ["derive"] } config = { workspace = true, default-features = false, features = ["toml"] } futures = { workspace = true } diff --git a/src/bin/hyli.rs b/src/bin/hyli.rs index 9fa2b9e67..99dc86745 100644 --- a/src/bin/hyli.rs +++ b/src/bin/hyli.rs @@ -75,6 +75,11 @@ fn main() { } } async fn inner_main() -> Result<()> { + // Required for rustls 0.23.x consumers on this binary path, including GCS auth. + rustls::crypto::aws_lc_rs::default_provider() + .install_default() + .map_err(|_| anyhow::anyhow!("Failed to install default crypto provider"))?; + #[cfg(feature = "dhat")] let _profiler = { info!("Running with dhat memory profiler"); diff --git a/src/bin/indexer.rs b/src/bin/indexer.rs index 1019154a8..24344ec57 100644 --- a/src/bin/indexer.rs +++ b/src/bin/indexer.rs @@ -45,6 +45,11 @@ fn main() { } async fn inner_main() -> Result<()> { + // Required for rustls 0.23.x consumers on this binary path, including GCS auth. + rustls::crypto::aws_lc_rs::default_provider() + .install_default() + .map_err(|_| anyhow::anyhow!("Failed to install default crypto provider"))?; + #[cfg(feature = "dhat")] let _profiler = { tracing::info!("Running with dhat memory profiler"); From 78e9c0998a52407c5db6670f805b1f34d8df7ec0 Mon Sep 17 00:00:00 2001 From: Alexandre Careil Date: Mon, 30 Mar 2026 16:25:38 +0200 Subject: [PATCH 07/22] Enable timers in long task runtime --- crates/hyli-turmoil-shims/src/runtime.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/hyli-turmoil-shims/src/runtime.rs b/crates/hyli-turmoil-shims/src/runtime.rs index 4d2158a50..d65837a21 100644 --- a/crates/hyli-turmoil-shims/src/runtime.rs +++ b/crates/hyli-turmoil-shims/src/runtime.rs @@ -83,6 +83,7 @@ impl LongTasksRuntime { // Limit the number of threads arbitrarily to lower the maximal impact on the whole node .worker_threads(threads) .thread_name(thread_name) + .enable_time() .build() .expect("Failed to create hashing runtime"), ))) From 1921cd40aa207585945ecae77143d179d3ded28a Mon Sep 17 00:00:00 2001 From: Alexandre Careil Date: Mon, 30 Mar 2026 16:36:40 +0200 Subject: [PATCH 08/22] add timestamp prefix to store data proposals --- src/shared_storage/durability.rs | 87 +++++++++++++++++++++++++++++++- src/shared_storage/gcs.rs | 67 +++++++++++++++++++----- src/shared_storage/mod.rs | 2 +- 3 files changed, 139 insertions(+), 17 deletions(-) diff --git a/src/shared_storage/durability.rs b/src/shared_storage/durability.rs index ad1e6e0e4..ab590d36e 100644 --- a/src/shared_storage/durability.rs +++ b/src/shared_storage/durability.rs @@ -391,13 +391,18 @@ mod tests { shared_storage::gcs::{DpGcsRuntime, GcsDurabilityBackend}, utils::conf::DataProposalDurabilityConf, }; - use axum::{extract::Path, response::IntoResponse, routing::post, Json, Router}; + use axum::{ + extract::{Path, Request}, + response::IntoResponse, + routing::post, + Json, Router, + }; use google_cloud_auth::credentials::anonymous::Builder as Anonymous; use google_cloud_storage::client::Storage; use hyli_crypto::BlstCrypto; use hyli_model::ContractName; use serde_json::json; - use std::sync::OnceLock; + use std::sync::{Arc, OnceLock}; use tokio::{net::TcpListener, sync::Mutex, task::JoinHandle}; static GCS_TEST_LOCK: OnceLock> = OnceLock::new(); @@ -429,6 +434,7 @@ mod tests { gcs_prefix: "test-prefix".to_string(), save_data_proposals: true, }, + genesis_timestamp_folder: None, }) } @@ -466,6 +472,41 @@ mod tests { Ok((endpoint, TestGcsServer { task })) } + async fn start_recording_test_gcs_server( + recorded_queries: Arc>>, + ) -> anyhow::Result<(String, TestGcsServer)> { + async fn upload( + Path(bucket): Path, + request: Request, + recorded_queries: Arc>>, + ) -> impl IntoResponse { + recorded_queries + .lock() + .await + .push(request.uri().query().unwrap_or_default().to_string()); + Json(json!({ + "bucket": format!("projects/_/buckets/{bucket}"), + "name": "stored-object", + "size": 1, + })) + } + + let listener = TcpListener::bind("127.0.0.1:0").await?; + let endpoint = format!("http://{}", listener.local_addr()?); + let app = Router::new().route( + "/upload/storage/v1/b/{bucket}/o", + post({ + let recorded_queries = Arc::clone(&recorded_queries); + move |path, request| upload(path, request, Arc::clone(&recorded_queries)) + }), + ); + let task = tokio::spawn(async move { + let _ = axum::serve(listener, app).await; + }); + + Ok((endpoint, TestGcsServer { task })) + } + #[test_log::test(tokio::test)] async fn test_wait_until_persisted_succeeds_with_mock_gcs() -> Result<()> { let _guard = gcs_test_lock().lock().await; @@ -554,4 +595,46 @@ mod tests { assert!(fresh.is_persisted(&lane_id, &hash)); Ok(()) } + + #[test_log::test(tokio::test)] + async fn test_gcs_upload_prefix_includes_genesis_timestamp_folder() -> Result<()> { + let _guard = gcs_test_lock().lock().await; + let recorded_queries = Arc::new(Mutex::new(Vec::new())); + let (endpoint, _server) = + start_recording_test_gcs_server(Arc::clone(&recorded_queries)).await?; + let durability = DataProposalDurability::new_in_memory(Arc::new( + GcsDurabilityBackend::with_runtime(DpGcsRuntime { + client: Storage::builder() + .with_endpoint(endpoint) + .with_credentials(Anonymous::new().build()) + .build() + .await?, + conf: DataProposalDurabilityConf { + gcs_bucket: "test-bucket".to_string(), + gcs_prefix: "test-prefix".to_string(), + save_data_proposals: true, + }, + genesis_timestamp_folder: Some("2026-03-30T14-00-00Z".to_string()), + }), + )); + + let crypto = BlstCrypto::new("persistence-prefix").unwrap(); + let lane_id = LaneId::new(crypto.validator_pubkey().clone()); + let data_proposal = DataProposal::new_root( + lane_id.clone(), + vec![make_register_contract_tx(ContractName::new( + "test-prefix-dp", + ))], + ); + let hash = data_proposal.hashed(); + + durability.prime_persistence(lane_id.clone(), &data_proposal)?; + durability.wait_until_persisted(&lane_id, &hash).await?; + + let queries = recorded_queries.lock().await; + assert!(queries.iter().any(|query| { + query.contains("name=test-prefix%2F2026-03-30T14-00-00Z%2Fdata_proposals%2F") + })); + Ok(()) + } } diff --git a/src/shared_storage/gcs.rs b/src/shared_storage/gcs.rs index 074290795..8caec2bbf 100644 --- a/src/shared_storage/gcs.rs +++ b/src/shared_storage/gcs.rs @@ -1,6 +1,13 @@ -use std::{future::Future, pin::Pin, sync::Arc}; +use std::{ + fs::File, + future::Future, + path::{Path, PathBuf}, + pin::Pin, + sync::Arc, +}; -use anyhow::{anyhow, Result}; +use anyhow::{anyhow, Context, Result}; +use borsh::{BorshDeserialize, BorshSerialize}; use bytes::Bytes; use google_cloud_storage::client::Storage as GcsStorageClient; use hyli_model::LaneId; @@ -15,6 +22,7 @@ use crate::{ pub struct DpGcsRuntime { pub client: GcsStorageClient, pub conf: DataProposalDurabilityConf, + pub genesis_timestamp_folder: Option, } #[derive(Clone)] @@ -25,6 +33,7 @@ pub struct GcsDurabilityBackend { enum GcsRuntimeSource { Lazy { conf: DataProposalDurabilityConf, + data_directory: PathBuf, runtime: OnceCell, }, #[cfg(test)] @@ -32,10 +41,11 @@ enum GcsRuntimeSource { } impl GcsDurabilityBackend { - pub fn new(conf: DataProposalDurabilityConf) -> Self { + pub fn new(data_directory: &Path, conf: DataProposalDurabilityConf) -> Self { Self { source: Arc::new(GcsRuntimeSource::Lazy { conf, + data_directory: data_directory.to_path_buf(), runtime: OnceCell::new(), }), } @@ -50,11 +60,16 @@ impl GcsDurabilityBackend { async fn runtime(&self) -> Result { match self.source.as_ref() { - GcsRuntimeSource::Lazy { conf, runtime } => Ok(runtime + GcsRuntimeSource::Lazy { + conf, + data_directory, + runtime, + } => Ok(runtime .get_or_try_init(|| async { Ok::(DpGcsRuntime { client: GcsStorageClient::builder().build().await?, conf: conf.clone(), + genesis_timestamp_folder: load_genesis_timestamp_folder(data_directory)?, }) }) .await? @@ -74,7 +89,7 @@ impl GcsDurabilityBackend { .client .write_object( bucket_path(&runtime.conf.gcs_bucket), - object_name(&runtime.conf, &lane_id, &dp_hash), + object_name(&runtime, &lane_id, &dp_hash), Bytes::from(payload), ) .set_if_generation_match(0_i64) @@ -117,13 +132,37 @@ fn bucket_path(bucket: &str) -> String { } } -fn object_name( - conf: &DataProposalDurabilityConf, - lane_id: &LaneId, - dp_hash: &DataProposalHash, -) -> String { - format!( - "{}/data_proposals/{}/{}.bin", - conf.gcs_prefix, lane_id, dp_hash - ) +fn object_name(runtime: &DpGcsRuntime, lane_id: &LaneId, dp_hash: &DataProposalHash) -> String { + let prefix = match runtime.genesis_timestamp_folder.as_deref() { + Some(genesis_timestamp_folder) => { + format!("{}/{genesis_timestamp_folder}", runtime.conf.gcs_prefix) + } + None => runtime.conf.gcs_prefix.clone(), + }; + + format!("{}/data_proposals/{}/{}.bin", prefix, lane_id, dp_hash) +} + +#[derive(Debug, Clone, BorshSerialize, BorshDeserialize)] +struct GenesisTimestampStore { + timestamp_folder: String, +} + +const GENESIS_TIMESTAMP_FILE: &str = "gcs_genesis_timestamp.bin"; + +fn load_genesis_timestamp_folder(data_directory: &Path) -> Result> { + let full_path = data_directory.join(PathBuf::from(GENESIS_TIMESTAMP_FILE)); + let mut handle = match File::open(&full_path) { + Ok(handle) => handle, + Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None), + Err(err) => return Err(err).context("Opening genesis timestamp file"), + }; + + let store: GenesisTimestampStore = + borsh::from_reader(&mut handle).context("Deserializing genesis timestamp file")?; + + chrono::NaiveDateTime::parse_from_str(&store.timestamp_folder, "%Y-%m-%dT%H-%M-%SZ") + .context("Parsing genesis timestamp")?; + + Ok(Some(store.timestamp_folder)) } diff --git a/src/shared_storage/mod.rs b/src/shared_storage/mod.rs index b453f5a44..a5b34d10d 100644 --- a/src/shared_storage/mod.rs +++ b/src/shared_storage/mod.rs @@ -16,7 +16,7 @@ pub fn durability_backend_for_conf( conf: &DataProposalDurabilityConf, ) -> Arc { if conf.gcs_enabled() { - Arc::new(GcsDurabilityBackend::new(conf.clone())) + Arc::new(GcsDurabilityBackend::new(data_directory, conf.clone())) } else if conf.file_enabled() { Arc::new(FileDurabilityBackend::new( data_directory.join("durable_data_proposals"), From 776b4d47e51605441023d4d7eebe2da85f0286c4 Mon Sep 17 00:00:00 2001 From: Alexandre Careil Date: Mon, 30 Mar 2026 17:08:02 +0200 Subject: [PATCH 09/22] Scope DP GCS uploads by genesis timestamp --- src/shared_storage/gcs.rs | 82 ++++++++++++++++++++++++++++++++++----- 1 file changed, 72 insertions(+), 10 deletions(-) diff --git a/src/shared_storage/gcs.rs b/src/shared_storage/gcs.rs index 8caec2bbf..6101fbde8 100644 --- a/src/shared_storage/gcs.rs +++ b/src/shared_storage/gcs.rs @@ -10,12 +10,12 @@ use anyhow::{anyhow, Context, Result}; use borsh::{BorshDeserialize, BorshSerialize}; use bytes::Bytes; use google_cloud_storage::client::Storage as GcsStorageClient; -use hyli_model::LaneId; +use hyli_model::{BlockHeight, LaneId}; use tokio::sync::OnceCell; use crate::{ - model::DataProposalHash, shared_storage::durability::DurabilityBackend, - utils::conf::DataProposalDurabilityConf, + data_availability::block_storage::Blocks, model::DataProposalHash, + shared_storage::durability::DurabilityBackend, utils::conf::DataProposalDurabilityConf, }; #[derive(Clone)] @@ -133,16 +133,23 @@ fn bucket_path(bucket: &str) -> String { } fn object_name(runtime: &DpGcsRuntime, lane_id: &LaneId, dp_hash: &DataProposalHash) -> String { - let prefix = match runtime.genesis_timestamp_folder.as_deref() { - Some(genesis_timestamp_folder) => { - format!("{}/{genesis_timestamp_folder}", runtime.conf.gcs_prefix) - } - None => runtime.conf.gcs_prefix.clone(), - }; + let prefix = effective_prefix( + &runtime.conf.gcs_prefix, + runtime.genesis_timestamp_folder.as_deref(), + ); format!("{}/data_proposals/{}/{}.bin", prefix, lane_id, dp_hash) } +fn effective_prefix(gcs_prefix: &str, genesis_timestamp_folder: Option<&str>) -> String { + match genesis_timestamp_folder { + Some(genesis_timestamp_folder) => { + format!("{gcs_prefix}/{genesis_timestamp_folder}") + } + None => gcs_prefix.to_string(), + } +} + #[derive(Debug, Clone, BorshSerialize, BorshDeserialize)] struct GenesisTimestampStore { timestamp_folder: String, @@ -154,7 +161,9 @@ fn load_genesis_timestamp_folder(data_directory: &Path) -> Result let full_path = data_directory.join(PathBuf::from(GENESIS_TIMESTAMP_FILE)); let mut handle = match File::open(&full_path) { Ok(handle) => handle, - Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None), + Err(err) if err.kind() == std::io::ErrorKind::NotFound => { + return load_genesis_timestamp_folder_from_blocks(data_directory) + } Err(err) => return Err(err).context("Opening genesis timestamp file"), }; @@ -166,3 +175,56 @@ fn load_genesis_timestamp_folder(data_directory: &Path) -> Result Ok(Some(store.timestamp_folder)) } + +fn load_genesis_timestamp_folder_from_blocks(data_directory: &Path) -> Result> { + let blocks = Blocks::new(data_directory)?; + let Some(genesis_block) = blocks.get_by_height(BlockHeight(0))? else { + return Ok(None); + }; + + Ok(Some(timestamp_to_folder_name( + genesis_block.consensus_proposal.timestamp.0, + ))) +} + +fn timestamp_to_folder_name(timestamp_ms: u128) -> String { + let secs = (timestamp_ms / 1000) as i64; + let datetime = + chrono::DateTime::::from_timestamp(secs, 0).expect("Invalid timestamp"); + datetime.format("%Y-%m-%dT%H-%M-%SZ").to_string() +} + +#[cfg(test)] +mod tests { + use super::*; + use hyli_model::{utils::TimestampMs, SignedBlock}; + + #[test] + fn load_genesis_timestamp_folder_falls_back_to_local_blocks() -> Result<()> { + let tmpdir = tempfile::tempdir()?; + { + let mut blocks = Blocks::new(tmpdir.path())?; + let mut genesis = SignedBlock::default(); + genesis.consensus_proposal.timestamp = TimestampMs(1_743_336_506_000); + blocks.put(genesis)?; + } + + let timestamp_folder = load_genesis_timestamp_folder(tmpdir.path())?; + + assert_eq!(timestamp_folder.as_deref(), Some("2025-03-30T12-08-26Z")); + Ok(()) + } + + #[test] + fn effective_prefix_uses_genesis_timestamp_folder_when_available() { + let lane_id = LaneId::default(); + let dp_hash = DataProposalHash::from_hex("deadbeef").unwrap(); + let prefix = effective_prefix("camelot", Some("2026-03-30T12-08-26Z")); + let object_name = format!("{}/data_proposals/{}/{}.bin", prefix, lane_id, dp_hash); + + assert_eq!( + object_name, + format!("camelot/2026-03-30T12-08-26Z/data_proposals/{lane_id}/{dp_hash}.bin") + ); + } +} From e90c2374dced58a1161740f4ff9c2852b2c04c04 Mon Sep 17 00:00:00 2001 From: Alexandre Careil Date: Mon, 30 Mar 2026 17:30:40 +0200 Subject: [PATCH 10/22] fix gcs timestamp --- src/data_availability.rs | 9 +++++++ src/shared_storage/gcs.rs | 51 ++++++++++++++++++++------------------- 2 files changed, 35 insertions(+), 25 deletions(-) diff --git a/src/data_availability.rs b/src/data_availability.rs index 6af467785..12e21c74b 100644 --- a/src/data_availability.rs +++ b/src/data_availability.rs @@ -16,6 +16,7 @@ use crate::{ genesis::GenesisEvent, model::*, p2p::network::{OutboundMessage, PeerEvent}, + shared_storage::gcs::persist_genesis_timestamp_for_gcs, utils::conf::SharedConf, }; use anyhow::{Context, Result}; @@ -1057,6 +1058,14 @@ impl DataAvailability { } fn store_block(&mut self, block: &SignedBlock) -> Result<()> { + if block.height() == BlockHeight(0) { + persist_genesis_timestamp_for_gcs( + &self.config.data_directory, + block.consensus_proposal.timestamp.0, + ) + .context("Persisting genesis timestamp for GCS")?; + } + self.blocks .put(block.clone()) .context(format!("Storing block {}", block.height()))?; diff --git a/src/shared_storage/gcs.rs b/src/shared_storage/gcs.rs index 6101fbde8..b834ee48f 100644 --- a/src/shared_storage/gcs.rs +++ b/src/shared_storage/gcs.rs @@ -10,12 +10,12 @@ use anyhow::{anyhow, Context, Result}; use borsh::{BorshDeserialize, BorshSerialize}; use bytes::Bytes; use google_cloud_storage::client::Storage as GcsStorageClient; -use hyli_model::{BlockHeight, LaneId}; +use hyli_model::LaneId; use tokio::sync::OnceCell; use crate::{ - data_availability::block_storage::Blocks, model::DataProposalHash, - shared_storage::durability::DurabilityBackend, utils::conf::DataProposalDurabilityConf, + model::DataProposalHash, shared_storage::durability::DurabilityBackend, + utils::conf::DataProposalDurabilityConf, }; #[derive(Clone)] @@ -124,6 +124,16 @@ impl DurabilityBackend for GcsDurabilityBackend { } } +pub fn persist_genesis_timestamp_for_gcs(data_directory: &Path, timestamp_ms: u128) -> Result<()> { + let store = GenesisTimestampStore { + timestamp_folder: timestamp_to_folder_name(timestamp_ms), + }; + let path = data_directory.join(GENESIS_TIMESTAMP_FILE); + let mut file = File::create(&path).context("Creating genesis timestamp file")?; + borsh::to_writer(&mut file, &store).context("Serializing genesis timestamp file")?; + Ok(()) +} + fn bucket_path(bucket: &str) -> String { if bucket.starts_with("projects/") { bucket.to_string() @@ -161,9 +171,7 @@ fn load_genesis_timestamp_folder(data_directory: &Path) -> Result let full_path = data_directory.join(PathBuf::from(GENESIS_TIMESTAMP_FILE)); let mut handle = match File::open(&full_path) { Ok(handle) => handle, - Err(err) if err.kind() == std::io::ErrorKind::NotFound => { - return load_genesis_timestamp_folder_from_blocks(data_directory) - } + Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None), Err(err) => return Err(err).context("Opening genesis timestamp file"), }; @@ -176,17 +184,6 @@ fn load_genesis_timestamp_folder(data_directory: &Path) -> Result Ok(Some(store.timestamp_folder)) } -fn load_genesis_timestamp_folder_from_blocks(data_directory: &Path) -> Result> { - let blocks = Blocks::new(data_directory)?; - let Some(genesis_block) = blocks.get_by_height(BlockHeight(0))? else { - return Ok(None); - }; - - Ok(Some(timestamp_to_folder_name( - genesis_block.consensus_proposal.timestamp.0, - ))) -} - fn timestamp_to_folder_name(timestamp_ms: u128) -> String { let secs = (timestamp_ms / 1000) as i64; let datetime = @@ -197,17 +194,12 @@ fn timestamp_to_folder_name(timestamp_ms: u128) -> String { #[cfg(test)] mod tests { use super::*; - use hyli_model::{utils::TimestampMs, SignedBlock}; + use std::fs; #[test] - fn load_genesis_timestamp_folder_falls_back_to_local_blocks() -> Result<()> { + fn persist_and_load_genesis_timestamp_folder_round_trip() -> Result<()> { let tmpdir = tempfile::tempdir()?; - { - let mut blocks = Blocks::new(tmpdir.path())?; - let mut genesis = SignedBlock::default(); - genesis.consensus_proposal.timestamp = TimestampMs(1_743_336_506_000); - blocks.put(genesis)?; - } + persist_genesis_timestamp_for_gcs(tmpdir.path(), 1_743_336_506_000)?; let timestamp_folder = load_genesis_timestamp_folder(tmpdir.path())?; @@ -215,6 +207,15 @@ mod tests { Ok(()) } + #[test] + fn load_genesis_timestamp_folder_returns_none_without_file() -> Result<()> { + let tmpdir = tempfile::tempdir()?; + fs::create_dir_all(tmpdir.path())?; + + assert_eq!(load_genesis_timestamp_folder(tmpdir.path())?, None); + Ok(()) + } + #[test] fn effective_prefix_uses_genesis_timestamp_folder_when_available() { let lane_id = LaneId::default(); From 730f98cf61c1d33e437df2c8030f5111469076aa Mon Sep 17 00:00:00 2001 From: Alexandre Careil Date: Tue, 31 Mar 2026 13:45:57 +0200 Subject: [PATCH 11/22] Use chain timestamp metadata for DP GCS paths --- crates/hyli-modules/Cargo.toml | 4 +- crates/hyli-modules/src/node_state.rs | 43 ++++++ src/data_availability.rs | 18 +-- src/entrypoint.rs | 6 + src/mempool/module.rs | 11 +- src/shared_storage/durability.rs | 4 +- src/shared_storage/gcs.rs | 195 +++++++++++++++++++------- src/shared_storage/mod.rs | 19 ++- 8 files changed, 228 insertions(+), 72 deletions(-) diff --git a/crates/hyli-modules/Cargo.toml b/crates/hyli-modules/Cargo.toml index e2a008aa9..f19f47ec1 100644 --- a/crates/hyli-modules/Cargo.toml +++ b/crates/hyli-modules/Cargo.toml @@ -34,7 +34,7 @@ sqlx = { workspace = true, optional = true, features = ["chrono"] } # GCS google-cloud-storage = { workspace = true, optional = true } -chrono = { workspace = true, optional = true } +chrono = { workspace = true } tokio = { workspace = true, features = ["full", "tracing"] } tokio-util = { workspace = true } @@ -83,4 +83,4 @@ instrumentation = [ ] indexer = ["client-sdk/csi", "db"] db = ["dep:sqlx", "hyli-model/sqlx", "sqlx/runtime-tokio", "sqlx/postgres"] -gcs = ["dep:google-cloud-storage", "dep:chrono"] +gcs = ["dep:google-cloud-storage"] diff --git a/crates/hyli-modules/src/node_state.rs b/crates/hyli-modules/src/node_state.rs index ca8d1742e..88edada93 100644 --- a/crates/hyli-modules/src/node_state.rs +++ b/crates/hyli-modules/src/node_state.rs @@ -370,6 +370,7 @@ impl<'a> std::ops::DerefMut for NodeStateProcessing<'a> { pub struct NodeStateStore { timeouts: Timeouts, pub current_height: BlockHeight, + pub current_chain_timestamp: Option, // This field is public for testing purposes pub contracts: HashMap, unsettled_transactions: OrderedTxMap, @@ -392,6 +393,7 @@ impl Default for NodeStateStore { let mut ret = Self { timeouts: Timeouts::default(), current_height: BlockHeight(0), + current_chain_timestamp: None, contracts: HashMap::new(), unsettled_transactions: OrderedTxMap::default(), }; @@ -444,6 +446,22 @@ impl<'any> NodeStateProcessing<'any> { debug!("Handling signed block: {:?}", signed_block.height()); self.current_height = signed_block.height(); + if initial_block { + self.current_chain_timestamp = Some( + chrono::DateTime::::from_timestamp( + (signed_block.consensus_proposal.timestamp.0 / 1000) as i64, + 0, + ) + .ok_or_else(|| { + anyhow::anyhow!( + "Invalid genesis timestamp: {}", + signed_block.consensus_proposal.timestamp.0 + ) + })? + .format("%Y-%m-%dT%H-%M-%SZ") + .to_string(), + ); + } self.clear_timeouts(); @@ -1985,6 +2003,7 @@ pub mod test { pub use helpers::*; use super::*; + use hyli_model::utils::TimestampMs; use hyli_net::clock::TimestampMsClock; use sdk::verifiers::ShaBlob; use sha3::Digest; @@ -1996,6 +2015,30 @@ pub mod test { } } + #[test] + fn genesis_block_sets_current_chain_timestamp() { + let mut node_state = NodeState { + metrics: NodeStateMetrics::global("test"), + store: NodeStateStore::default(), + }; + let genesis_block = SignedBlock { + certificate: AggregateSignature::default(), + consensus_proposal: ConsensusProposal { + slot: 0, + timestamp: TimestampMs(1_743_336_506_000), + ..ConsensusProposal::default() + }, + data_proposals: Vec::new(), + }; + + node_state.handle_signed_block(genesis_block).unwrap(); + + assert_eq!( + node_state.store.current_chain_timestamp.as_deref(), + Some("2025-03-30T12-08-26Z") + ); + } + fn new_blob(contract: &str) -> Blob { Blob { contract_name: ContractName::new(contract), diff --git a/src/data_availability.rs b/src/data_availability.rs index 12e21c74b..8bd31e201 100644 --- a/src/data_availability.rs +++ b/src/data_availability.rs @@ -16,7 +16,7 @@ use crate::{ genesis::GenesisEvent, model::*, p2p::network::{OutboundMessage, PeerEvent}, - shared_storage::gcs::persist_genesis_timestamp_for_gcs, + shared_storage::gcs::persist_current_chain_timestamp_for_block, utils::conf::SharedConf, }; use anyhow::{Context, Result}; @@ -1058,14 +1058,6 @@ impl DataAvailability { } fn store_block(&mut self, block: &SignedBlock) -> Result<()> { - if block.height() == BlockHeight(0) { - persist_genesis_timestamp_for_gcs( - &self.config.data_directory, - block.consensus_proposal.timestamp.0, - ) - .context("Persisting genesis timestamp for GCS")?; - } - self.blocks .put(block.clone()) .context(format!("Storing block {}", block.height()))?; @@ -1108,6 +1100,14 @@ impl DataAvailability { ) -> anyhow::Result<()> { self.store_block(&block)?; + if block.height() == BlockHeight(0) && self.config.data_proposal_durability.enabled() { + persist_current_chain_timestamp_for_block( + &self.config.data_directory, + block.consensus_proposal.timestamp.0, + ) + .context("Persisting current chain timestamp")?; + } + let block_hash = block.hashed(); // Add new block to all streaming peer queues to ensure ordering diff --git a/src/entrypoint.rs b/src/entrypoint.rs index 20b58df61..c59450777 100644 --- a/src/entrypoint.rs +++ b/src/entrypoint.rs @@ -11,6 +11,7 @@ use crate::{ model::{api::NodeInfo, ContractName, SharedRunContext}, p2p::P2P, rest::{ApiDoc, RestApi, RestApiRunContext}, + shared_storage::gcs::persist_current_chain_timestamp_from_node_state, single_node_consensus::SingleNodeConsensus, tcp_server::TcpServer, utils::{ @@ -464,6 +465,9 @@ pub async fn common_main( ), "Writing checksum manifest for fast catchup stores" )?; + + persist_current_chain_timestamp_from_node_state(&config.data_directory) + .context("Persisting current chain timestamp from fast catchup node state")?; } else { let reason = bootstrap_failure_reason .unwrap_or_else(|| "no peer responded successfully".to_string()); @@ -481,6 +485,8 @@ pub async fn common_main( NODE_STATE_BIN, config.data_directory.display() ); + persist_current_chain_timestamp_from_node_state(&config.data_directory) + .context("Persisting current chain timestamp from existing node state")?; } } diff --git a/src/mempool/module.rs b/src/mempool/module.rs index 11949cb87..ba14f7aad 100644 --- a/src/mempool/module.rs +++ b/src/mempool/module.rs @@ -48,11 +48,14 @@ impl Module for Mempool { let mut lanes = shared_lanes_storage(&ctx.config.data_directory)?; lanes.set_metrics_context(ctx.config.id.clone()); + let durability_backend = durability_backend_for_conf( + &ctx.config.data_directory, + &ctx.config.data_proposal_durability, + ctx.config.run_fast_catchup, + ) + .await?; let durability = crate::shared_storage::DataProposalDurability::new( - durability_backend_for_conf( - &ctx.config.data_directory, - &ctx.config.data_proposal_durability, - ), + durability_backend, &ctx.config.data_directory, )?; diff --git a/src/shared_storage/durability.rs b/src/shared_storage/durability.rs index ab590d36e..167b02920 100644 --- a/src/shared_storage/durability.rs +++ b/src/shared_storage/durability.rs @@ -434,7 +434,7 @@ mod tests { gcs_prefix: "test-prefix".to_string(), save_data_proposals: true, }, - genesis_timestamp_folder: None, + current_chain_timestamp: "2026-03-30T14-00-00Z".to_string(), }) } @@ -614,7 +614,7 @@ mod tests { gcs_prefix: "test-prefix".to_string(), save_data_proposals: true, }, - genesis_timestamp_folder: Some("2026-03-30T14-00-00Z".to_string()), + current_chain_timestamp: "2026-03-30T14-00-00Z".to_string(), }), )); diff --git a/src/shared_storage/gcs.rs b/src/shared_storage/gcs.rs index b834ee48f..d928f9ea1 100644 --- a/src/shared_storage/gcs.rs +++ b/src/shared_storage/gcs.rs @@ -10,7 +10,10 @@ use anyhow::{anyhow, Context, Result}; use borsh::{BorshDeserialize, BorshSerialize}; use bytes::Bytes; use google_cloud_storage::client::Storage as GcsStorageClient; +use hyli_bus::modules::files::NODE_STATE_BIN; use hyli_model::LaneId; +use hyli_modules::modules::Module; +use hyli_modules::node_state::{module::NodeStateModule, NodeStateStore}; use tokio::sync::OnceCell; use crate::{ @@ -22,7 +25,7 @@ use crate::{ pub struct DpGcsRuntime { pub client: GcsStorageClient, pub conf: DataProposalDurabilityConf, - pub genesis_timestamp_folder: Option, + pub current_chain_timestamp: String, } #[derive(Clone)] @@ -58,6 +61,11 @@ impl GcsDurabilityBackend { } } + pub async fn initialize(&self) -> Result<()> { + let _ = self.runtime().await?; + Ok(()) + } + async fn runtime(&self) -> Result { match self.source.as_ref() { GcsRuntimeSource::Lazy { @@ -66,10 +74,18 @@ impl GcsDurabilityBackend { runtime, } => Ok(runtime .get_or_try_init(|| async { + let current_chain_timestamp = load_current_chain_timestamp(data_directory) + .with_context(|| { + format!( + "Loading current chain timestamp from {}", + data_directory.display() + ) + })?; + Ok::(DpGcsRuntime { client: GcsStorageClient::builder().build().await?, conf: conf.clone(), - genesis_timestamp_folder: load_genesis_timestamp_folder(data_directory)?, + current_chain_timestamp, }) }) .await? @@ -124,16 +140,29 @@ impl DurabilityBackend for GcsDurabilityBackend { } } -pub fn persist_genesis_timestamp_for_gcs(data_directory: &Path, timestamp_ms: u128) -> Result<()> { - let store = GenesisTimestampStore { - timestamp_folder: timestamp_to_folder_name(timestamp_ms), +pub fn persist_current_chain_timestamp( + data_directory: &Path, + current_chain_timestamp: &str, +) -> Result<()> { + let store = CurrentChainTimestampStore { + timestamp_folder: current_chain_timestamp.to_string(), }; - let path = data_directory.join(GENESIS_TIMESTAMP_FILE); - let mut file = File::create(&path).context("Creating genesis timestamp file")?; - borsh::to_writer(&mut file, &store).context("Serializing genesis timestamp file")?; + let path = data_directory.join(CURRENT_CHAIN_TIMESTAMP_FILE); + let mut file = File::create(&path).context("Creating current chain timestamp file")?; + borsh::to_writer(&mut file, &store).context("Serializing current chain timestamp file")?; Ok(()) } +pub fn persist_current_chain_timestamp_for_block( + data_directory: &Path, + timestamp_ms: u128, +) -> Result<()> { + let store = CurrentChainTimestampStore { + timestamp_folder: timestamp_to_folder_name(timestamp_ms)?, + }; + persist_current_chain_timestamp(data_directory, &store.timestamp_folder) +} + fn bucket_path(bucket: &str) -> String { if bucket.starts_with("projects/") { bucket.to_string() @@ -143,84 +172,89 @@ fn bucket_path(bucket: &str) -> String { } fn object_name(runtime: &DpGcsRuntime, lane_id: &LaneId, dp_hash: &DataProposalHash) -> String { - let prefix = effective_prefix( - &runtime.conf.gcs_prefix, - runtime.genesis_timestamp_folder.as_deref(), - ); + let prefix = effective_prefix(&runtime.conf.gcs_prefix, &runtime.current_chain_timestamp); format!("{}/data_proposals/{}/{}.bin", prefix, lane_id, dp_hash) } -fn effective_prefix(gcs_prefix: &str, genesis_timestamp_folder: Option<&str>) -> String { - match genesis_timestamp_folder { - Some(genesis_timestamp_folder) => { - format!("{gcs_prefix}/{genesis_timestamp_folder}") - } - None => gcs_prefix.to_string(), - } +fn effective_prefix(gcs_prefix: &str, current_chain_timestamp: &str) -> String { + format!("{gcs_prefix}/{current_chain_timestamp}") } #[derive(Debug, Clone, BorshSerialize, BorshDeserialize)] -struct GenesisTimestampStore { +struct CurrentChainTimestampStore { timestamp_folder: String, } -const GENESIS_TIMESTAMP_FILE: &str = "gcs_genesis_timestamp.bin"; +const CURRENT_CHAIN_TIMESTAMP_FILE: &str = "current_chain_timestamp.bin"; -fn load_genesis_timestamp_folder(data_directory: &Path) -> Result> { - let full_path = data_directory.join(PathBuf::from(GENESIS_TIMESTAMP_FILE)); - let mut handle = match File::open(&full_path) { - Ok(handle) => handle, - Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None), - Err(err) => return Err(err).context("Opening genesis timestamp file"), - }; +fn load_current_chain_timestamp(data_directory: &Path) -> Result { + let full_path = data_directory.join(CURRENT_CHAIN_TIMESTAMP_FILE); + let mut handle = File::open(&full_path).with_context(|| { + format!( + "Opening required current chain timestamp file {}", + full_path.display() + ) + })?; - let store: GenesisTimestampStore = - borsh::from_reader(&mut handle).context("Deserializing genesis timestamp file")?; + let store: CurrentChainTimestampStore = + borsh::from_reader(&mut handle).context("Deserializing current chain timestamp file")?; chrono::NaiveDateTime::parse_from_str(&store.timestamp_folder, "%Y-%m-%dT%H-%M-%SZ") - .context("Parsing genesis timestamp")?; + .context("Parsing current chain timestamp")?; - Ok(Some(store.timestamp_folder)) + Ok(store.timestamp_folder) } -fn timestamp_to_folder_name(timestamp_ms: u128) -> String { +pub fn persist_current_chain_timestamp_from_node_state(data_directory: &Path) -> Result<()> { + let store = + NodeStateModule::load_from_disk::(data_directory, NODE_STATE_BIN.as_ref())? + .context("Missing node_state.bin while loading current chain timestamp")?; + let current_chain_timestamp = store.current_chain_timestamp.context( + "Missing current_chain_timestamp in node_state.bin while loading chain metadata", + )?; + persist_current_chain_timestamp(data_directory, ¤t_chain_timestamp) +} + +fn timestamp_to_folder_name(timestamp_ms: u128) -> Result { let secs = (timestamp_ms / 1000) as i64; - let datetime = - chrono::DateTime::::from_timestamp(secs, 0).expect("Invalid timestamp"); - datetime.format("%Y-%m-%dT%H-%M-%SZ").to_string() + let datetime = chrono::DateTime::::from_timestamp(secs, 0) + .ok_or_else(|| anyhow!("Invalid timestamp: {timestamp_ms}"))?; + Ok(datetime.format("%Y-%m-%dT%H-%M-%SZ").to_string()) } #[cfg(test)] mod tests { use super::*; - use std::fs; + use hyli_bus::modules::write_manifest; + use hyli_modules::node_state::module::NodeStateModule; + + fn install_rustls_provider_for_tests() { + let _ = rustls::crypto::aws_lc_rs::default_provider().install_default(); + } #[test] - fn persist_and_load_genesis_timestamp_folder_round_trip() -> Result<()> { + fn current_chain_timestamp_round_trip() -> Result<()> { let tmpdir = tempfile::tempdir()?; - persist_genesis_timestamp_for_gcs(tmpdir.path(), 1_743_336_506_000)?; - - let timestamp_folder = load_genesis_timestamp_folder(tmpdir.path())?; + persist_current_chain_timestamp_for_block(tmpdir.path(), 1_743_336_506_000)?; + let decoded = load_current_chain_timestamp(tmpdir.path())?; - assert_eq!(timestamp_folder.as_deref(), Some("2025-03-30T12-08-26Z")); + assert_eq!(decoded, "2025-03-30T12-08-26Z"); Ok(()) } #[test] - fn load_genesis_timestamp_folder_returns_none_without_file() -> Result<()> { - let tmpdir = tempfile::tempdir()?; - fs::create_dir_all(tmpdir.path())?; + fn load_current_chain_timestamp_requires_file() { + let tmpdir = tempfile::tempdir().unwrap(); - assert_eq!(load_genesis_timestamp_folder(tmpdir.path())?, None); - Ok(()) + assert!(load_current_chain_timestamp(tmpdir.path()).is_err()); } #[test] - fn effective_prefix_uses_genesis_timestamp_folder_when_available() { + fn effective_prefix_uses_current_chain_timestamp() { let lane_id = LaneId::default(); let dp_hash = DataProposalHash::from_hex("deadbeef").unwrap(); - let prefix = effective_prefix("camelot", Some("2026-03-30T12-08-26Z")); + let prefix = effective_prefix("camelot", "2026-03-30T12-08-26Z"); let object_name = format!("{}/data_proposals/{}/{}.bin", prefix, lane_id, dp_hash); assert_eq!( @@ -228,4 +262,67 @@ mod tests { format!("camelot/2026-03-30T12-08-26Z/data_proposals/{lane_id}/{dp_hash}.bin") ); } + + #[test] + fn persist_current_chain_timestamp_from_node_state_writes_local_file() -> Result<()> { + let tmpdir = tempfile::tempdir()?; + let mut node_state = NodeStateStore::default(); + node_state.current_chain_timestamp = Some("2026-03-30T15-47-06Z".to_string()); + + let checksum = + NodeStateModule::save_on_disk(tmpdir.path(), NODE_STATE_BIN.as_ref(), &node_state)?; + write_manifest( + tmpdir.path(), + &[(tmpdir.path().join(NODE_STATE_BIN), checksum)], + )?; + persist_current_chain_timestamp_from_node_state(tmpdir.path())?; + + assert_eq!( + load_current_chain_timestamp(tmpdir.path())?, + "2026-03-30T15-47-06Z" + ); + Ok(()) + } + + #[tokio::test] + async fn gcs_runtime_loads_timestamp_written_after_backend_construction() -> Result<()> { + install_rustls_provider_for_tests(); + let tmpdir = tempfile::tempdir()?; + let backend = GcsDurabilityBackend::new( + tmpdir.path(), + DataProposalDurabilityConf { + gcs_bucket: "test-bucket".to_string(), + gcs_prefix: "camelot".to_string(), + save_data_proposals: true, + }, + ); + + persist_current_chain_timestamp(tmpdir.path(), "2026-03-30T15-47-06Z")?; + + let runtime = backend.runtime().await?; + + assert_eq!(runtime.current_chain_timestamp, "2026-03-30T15-47-06Z"); + Ok(()) + } + + #[tokio::test] + async fn gcs_runtime_initialize_requires_existing_timestamp_file() { + install_rustls_provider_for_tests(); + let tmpdir = tempfile::tempdir().unwrap(); + let backend = GcsDurabilityBackend::new( + tmpdir.path(), + DataProposalDurabilityConf { + gcs_bucket: "test-bucket".to_string(), + gcs_prefix: "camelot".to_string(), + save_data_proposals: true, + }, + ); + + let err = backend.initialize().await.unwrap_err(); + + assert!( + err.to_string().contains("current chain timestamp"), + "unexpected error: {err:#}" + ); + } } diff --git a/src/shared_storage/mod.rs b/src/shared_storage/mod.rs index a5b34d10d..20698fc05 100644 --- a/src/shared_storage/mod.rs +++ b/src/shared_storage/mod.rs @@ -5,24 +5,31 @@ pub mod gcs; use std::path::Path; use std::sync::Arc; +use anyhow::Result; + use crate::utils::conf::DataProposalDurabilityConf; pub use durability::{DataProposalDurability, DurabilityBackend, NullDurabilityBackend}; pub use file::FileDurabilityBackend; pub use gcs::{DpGcsRuntime, GcsDurabilityBackend}; -pub fn durability_backend_for_conf( +pub async fn durability_backend_for_conf( data_directory: &Path, conf: &DataProposalDurabilityConf, -) -> Arc { + run_fast_catchup: bool, +) -> Result> { if conf.gcs_enabled() { - Arc::new(GcsDurabilityBackend::new(data_directory, conf.clone())) + let backend = GcsDurabilityBackend::new(data_directory, conf.clone()); + if run_fast_catchup { + backend.initialize().await?; + } + Ok(Arc::new(backend)) } else if conf.file_enabled() { - Arc::new(FileDurabilityBackend::new( + Ok(Arc::new(FileDurabilityBackend::new( data_directory.join("durable_data_proposals"), conf.gcs_prefix.clone(), - )) + ))) } else { - Arc::new(NullDurabilityBackend) + Ok(Arc::new(NullDurabilityBackend)) } } From c4ce042422cd4e3c620a707fce54408684c1c025 Mon Sep 17 00:00:00 2001 From: Alexandre Careil Date: Tue, 31 Mar 2026 14:49:04 +0200 Subject: [PATCH 12/22] Simplify DP chain timestamp persistence --- crates/hyli-modules/src/node_state/module.rs | 53 +++++- src/data_availability.rs | 9 - src/entrypoint.rs | 29 +-- src/mempool/module.rs | 13 +- src/shared_storage/durability.rs | 34 +++- src/shared_storage/file.rs | 3 +- src/shared_storage/gcs.rs | 189 +++++-------------- src/shared_storage/mod.rs | 5 +- 8 files changed, 156 insertions(+), 179 deletions(-) diff --git a/crates/hyli-modules/src/node_state/module.rs b/crates/hyli-modules/src/node_state/module.rs index 9387fbd6f..5a8bf173c 100644 --- a/crates/hyli-modules/src/node_state/module.rs +++ b/crates/hyli-modules/src/node_state/module.rs @@ -9,10 +9,11 @@ use crate::modules::admin::{QueryNodeStateStore, QueryNodeStateStoreResponse}; use crate::modules::files::NODE_STATE_BIN; use crate::modules::{module_bus_client, Module, SharedBuildApiCtx}; use crate::{log_error, log_warn}; -use anyhow::Result; +use anyhow::{Context, Result}; +use borsh::{BorshDeserialize, BorshSerialize}; use hyli_bus::modules::ModulePersistOutput; use sdk::*; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use tracing::{info, warn}; /// NodeStateModule maintains a NodeState, @@ -59,6 +60,44 @@ pub struct NodeStateCtx { pub api: SharedBuildApiCtx, } +pub const CURRENT_CHAIN_TIMESTAMP_BIN: &str = "current_chain_timestamp.bin"; + +#[derive(Debug, Clone, BorshSerialize, BorshDeserialize)] +pub struct CurrentChainTimestampStore { + pub timestamp_folder: String, +} + +pub fn persist_current_chain_timestamp( + data_directory: &Path, + store: &NodeStateStore, +) -> Result> { + let Some(timestamp_folder) = store.current_chain_timestamp.clone() else { + return Ok(None); + }; + + let file = PathBuf::from(CURRENT_CHAIN_TIMESTAMP_BIN); + let checksum = NodeStateModule::save_on_disk::( + data_directory, + &file, + &CurrentChainTimestampStore { timestamp_folder }, + )?; + + Ok(Some((data_directory.join(&file), checksum))) +} + +pub fn load_current_chain_timestamp(data_directory: &Path) -> Result { + let store = NodeStateModule::load_from_disk::( + data_directory, + CURRENT_CHAIN_TIMESTAMP_BIN.as_ref(), + )? + .context("Missing current_chain_timestamp.bin while loading chain metadata")?; + + chrono::NaiveDateTime::parse_from_str(&store.timestamp_folder, "%Y-%m-%dT%H-%M-%SZ") + .context("Parsing current chain timestamp")?; + + Ok(store.timestamp_folder) +} + impl Module for NodeStateModule { type Context = NodeStateCtx; @@ -155,7 +194,15 @@ impl Module for NodeStateModule { let file = PathBuf::from(NODE_STATE_BIN); let checksum = Self::save_on_disk::(&self.data_directory, &file, &self.inner)?; - Ok(vec![(self.data_directory.join(file), checksum)]) + let mut persisted = vec![(self.data_directory.join(file), checksum)]; + + if let Some(timestamp_file) = + persist_current_chain_timestamp(&self.data_directory, &self.inner.store)? + { + persisted.push(timestamp_file); + } + + Ok(persisted) } } diff --git a/src/data_availability.rs b/src/data_availability.rs index 8bd31e201..6af467785 100644 --- a/src/data_availability.rs +++ b/src/data_availability.rs @@ -16,7 +16,6 @@ use crate::{ genesis::GenesisEvent, model::*, p2p::network::{OutboundMessage, PeerEvent}, - shared_storage::gcs::persist_current_chain_timestamp_for_block, utils::conf::SharedConf, }; use anyhow::{Context, Result}; @@ -1100,14 +1099,6 @@ impl DataAvailability { ) -> anyhow::Result<()> { self.store_block(&block)?; - if block.height() == BlockHeight(0) && self.config.data_proposal_durability.enabled() { - persist_current_chain_timestamp_for_block( - &self.config.data_directory, - block.consensus_proposal.timestamp.0, - ) - .context("Persisting current chain timestamp")?; - } - let block_hash = block.hashed(); // Add new block to all streaming peer queues to ensure ordering diff --git a/src/entrypoint.rs b/src/entrypoint.rs index c59450777..80e432cca 100644 --- a/src/entrypoint.rs +++ b/src/entrypoint.rs @@ -11,7 +11,6 @@ use crate::{ model::{api::NodeInfo, ContractName, SharedRunContext}, p2p::P2P, rest::{ApiDoc, RestApi, RestApiRunContext}, - shared_storage::gcs::persist_current_chain_timestamp_from_node_state, single_node_consensus::SingleNodeConsensus, tcp_server::TcpServer, utils::{ @@ -40,7 +39,7 @@ use hyli_modules::{ BuildApiContextInner, Module, }, node_state::{ - module::{NodeStateCtx, NodeStateModule}, + module::{persist_current_chain_timestamp, NodeStateCtx, NodeStateModule}, NodeStateStore, }, utils::db::use_fresh_db, @@ -455,19 +454,23 @@ pub async fn common_main( "Saving node state store" )?; + let mut persisted = vec![ + (consensus_path, consensus_checksum), + (node_state_path, node_state_checksum), + ]; + if let Some(timestamp_file) = + persist_current_chain_timestamp(&config.data_directory, &node_state_store) + .context( + "Persisting current chain timestamp from fast catchup node state", + )? + { + persisted.push(timestamp_file); + } + log_error!( - write_manifest( - &config.data_directory, - &[ - (consensus_path, consensus_checksum), - (node_state_path, node_state_checksum), - ], - ), + write_manifest(&config.data_directory, &persisted), "Writing checksum manifest for fast catchup stores" )?; - - persist_current_chain_timestamp_from_node_state(&config.data_directory) - .context("Persisting current chain timestamp from fast catchup node state")?; } else { let reason = bootstrap_failure_reason .unwrap_or_else(|| "no peer responded successfully".to_string()); @@ -485,8 +488,6 @@ pub async fn common_main( NODE_STATE_BIN, config.data_directory.display() ); - persist_current_chain_timestamp_from_node_state(&config.data_directory) - .context("Persisting current chain timestamp from existing node state")?; } } diff --git a/src/mempool/module.rs b/src/mempool/module.rs index ba14f7aad..ade4dd064 100644 --- a/src/mempool/module.rs +++ b/src/mempool/module.rs @@ -3,8 +3,11 @@ use hyli_modules::{log_error, module_handle_messages}; use std::{sync::Arc, time::Duration}; use crate::{ - consensus::ConsensusEvent, model::*, p2p::network::MsgWithHeader, - shared_storage::durability_backend_for_conf, utils::conf::P2pMode, + consensus::ConsensusEvent, + model::*, + p2p::network::MsgWithHeader, + shared_storage::{durability_backend_for_conf, gcs::timestamp_to_folder_name}, + utils::conf::P2pMode, }; use client_sdk::tcp_client::TcpServerMessage; @@ -99,6 +102,12 @@ impl Module for Mempool { } listen cmd => { let NodeStateEvent::NewBlock(block) = cmd; + if block.signed_block.height() == BlockHeight(0) { + let current_chain_timestamp = + timestamp_to_folder_name(block.signed_block.consensus_proposal.timestamp.0)?; + self.durability + .set_current_chain_timestamp(current_chain_timestamp); + } // In this p2p mode we don't receive consensus events so we must update manually. if self.conf.p2p.mode == P2pMode::LaneManager { if let Err(e) = self.staking.process_block(&block.staking_data) { diff --git a/src/shared_storage/durability.rs b/src/shared_storage/durability.rs index 167b02920..ee4d90b43 100644 --- a/src/shared_storage/durability.rs +++ b/src/shared_storage/durability.rs @@ -11,6 +11,7 @@ use borsh::BorshSerialize; use fjall::{Database, Keyspace, KeyspaceCreateOptions}; use futures::FutureExt; use hyli_model::LaneId; +use hyli_modules::node_state::module::load_current_chain_timestamp; use tokio::sync::Notify; use tracing::{debug, warn}; @@ -22,6 +23,7 @@ pub trait DurabilityBackend: Send + Sync { lane_id: LaneId, dp_hash: DataProposalHash, payload: Vec, + current_chain_timestamp: Option, ) -> Pin> + Send + 'static>>; } @@ -33,6 +35,7 @@ impl DurabilityBackend for NullDurabilityBackend { _lane_id: LaneId, _dp_hash: DataProposalHash, _payload: Vec, + _current_chain_timestamp: Option, ) -> Pin> + Send + 'static>> { Box::pin(async { Ok(()) }) } @@ -155,6 +158,7 @@ impl Default for PersistenceTracker { #[derive(Clone)] pub struct DataProposalDurability { backend: Arc, + current_chain_timestamp: Arc>>, persistence_state: SharedPersistenceStateStore, in_flight: Arc>>>, } @@ -163,6 +167,9 @@ impl DataProposalDurability { pub fn new(backend: Arc, data_directory: &Path) -> Result { Ok(Self { backend, + current_chain_timestamp: Arc::new(RwLock::new( + load_current_chain_timestamp(data_directory).ok(), + )), persistence_state: FjallPersistenceStateStore::shared(data_directory)?, in_flight: Arc::new(RwLock::new(HashMap::new())), }) @@ -172,11 +179,20 @@ impl DataProposalDurability { pub fn new_in_memory(backend: Arc) -> Self { Self { backend, + current_chain_timestamp: Arc::new(RwLock::new(None)), persistence_state: Arc::new(InMemoryPersistenceStateStore::default()), in_flight: Arc::new(RwLock::new(HashMap::new())), } } + pub fn set_current_chain_timestamp(&self, current_chain_timestamp: String) { + let mut guard = self + .current_chain_timestamp + .write() + .unwrap_or_else(|poisoned| poisoned.into_inner()); + *guard = Some(current_chain_timestamp); + } + pub fn prime_persistence(&self, lane_id: LaneId, data_proposal: &DataProposal) -> Result<()> { let mut canonical = data_proposal.clone(); canonical.remove_proofs(); @@ -202,10 +218,20 @@ impl DataProposalDurability { let payload = borsh::to_vec(&canonical)?; let backend = Arc::clone(&self.backend); + let current_chain_timestamp = self + .current_chain_timestamp + .read() + .unwrap_or_else(|poisoned| poisoned.into_inner()) + .clone(); let persistence_state = Arc::clone(&self.persistence_state); let in_flight = Arc::clone(&self.in_flight); let tracker = Arc::clone(&tracker); - let mut upload = backend.upload_data_proposal(lane_id.clone(), dp_hash.clone(), payload); + let mut upload = backend.upload_data_proposal( + lane_id.clone(), + dp_hash.clone(), + payload, + current_chain_timestamp, + ); if let Some(result) = upload.as_mut().now_or_never() { Self::finish_persistence_attempt( @@ -434,7 +460,6 @@ mod tests { gcs_prefix: "test-prefix".to_string(), save_data_proposals: true, }, - current_chain_timestamp: "2026-03-30T14-00-00Z".to_string(), }) } @@ -514,6 +539,7 @@ mod tests { let durability = DataProposalDurability::new_in_memory(Arc::new( GcsDurabilityBackend::with_runtime(build_test_gcs_runtime(endpoint.clone()).await?), )); + durability.set_current_chain_timestamp("2026-03-30T14-00-00Z".to_string()); let crypto = BlstCrypto::new("persistence-success").unwrap(); let lane_id = LaneId::new(crypto.validator_pubkey().clone()); @@ -539,6 +565,7 @@ mod tests { let durability = DataProposalDurability::new_in_memory(Arc::new( GcsDurabilityBackend::with_runtime(build_test_gcs_runtime(endpoint.clone()).await?), )); + durability.set_current_chain_timestamp("2026-03-30T14-00-00Z".to_string()); let crypto = BlstCrypto::new("persistence-failure").unwrap(); let lane_id = LaneId::new(crypto.validator_pubkey().clone()); @@ -572,6 +599,7 @@ mod tests { )), dir.path(), )?; + durability.set_current_chain_timestamp("2026-03-30T14-00-00Z".to_string()); let crypto = BlstCrypto::new("persistence-reused").unwrap(); let lane_id = LaneId::new(crypto.validator_pubkey().clone()); @@ -614,9 +642,9 @@ mod tests { gcs_prefix: "test-prefix".to_string(), save_data_proposals: true, }, - current_chain_timestamp: "2026-03-30T14-00-00Z".to_string(), }), )); + durability.set_current_chain_timestamp("2026-03-30T14-00-00Z".to_string()); let crypto = BlstCrypto::new("persistence-prefix").unwrap(); let lane_id = LaneId::new(crypto.validator_pubkey().clone()); diff --git a/src/shared_storage/file.rs b/src/shared_storage/file.rs index d8be5c337..2d0ae41bc 100644 --- a/src/shared_storage/file.rs +++ b/src/shared_storage/file.rs @@ -52,6 +52,7 @@ impl DurabilityBackend for FileDurabilityBackend { lane_id: LaneId, dp_hash: DataProposalHash, payload: Vec, + _current_chain_timestamp: Option, ) -> Pin> + Send + 'static>> { let backend = self.clone(); Box::pin(async move { @@ -73,7 +74,7 @@ mod tests { let dp_hash = DataProposal::new_root(lane_id.clone(), vec![]).hashed(); backend - .upload_data_proposal(lane_id.clone(), dp_hash.clone(), b"payload".to_vec()) + .upload_data_proposal(lane_id.clone(), dp_hash.clone(), b"payload".to_vec(), None) .await?; let path = backend.object_path(&lane_id, &dp_hash); diff --git a/src/shared_storage/gcs.rs b/src/shared_storage/gcs.rs index d928f9ea1..3ed71220a 100644 --- a/src/shared_storage/gcs.rs +++ b/src/shared_storage/gcs.rs @@ -1,19 +1,9 @@ -use std::{ - fs::File, - future::Future, - path::{Path, PathBuf}, - pin::Pin, - sync::Arc, -}; +use std::{future::Future, pin::Pin, sync::Arc}; -use anyhow::{anyhow, Context, Result}; -use borsh::{BorshDeserialize, BorshSerialize}; +use anyhow::{anyhow, Result}; use bytes::Bytes; use google_cloud_storage::client::Storage as GcsStorageClient; -use hyli_bus::modules::files::NODE_STATE_BIN; use hyli_model::LaneId; -use hyli_modules::modules::Module; -use hyli_modules::node_state::{module::NodeStateModule, NodeStateStore}; use tokio::sync::OnceCell; use crate::{ @@ -25,7 +15,6 @@ use crate::{ pub struct DpGcsRuntime { pub client: GcsStorageClient, pub conf: DataProposalDurabilityConf, - pub current_chain_timestamp: String, } #[derive(Clone)] @@ -36,7 +25,6 @@ pub struct GcsDurabilityBackend { enum GcsRuntimeSource { Lazy { conf: DataProposalDurabilityConf, - data_directory: PathBuf, runtime: OnceCell, }, #[cfg(test)] @@ -44,11 +32,10 @@ enum GcsRuntimeSource { } impl GcsDurabilityBackend { - pub fn new(data_directory: &Path, conf: DataProposalDurabilityConf) -> Self { + pub fn new(conf: DataProposalDurabilityConf) -> Self { Self { source: Arc::new(GcsRuntimeSource::Lazy { conf, - data_directory: data_directory.to_path_buf(), runtime: OnceCell::new(), }), } @@ -60,32 +47,13 @@ impl GcsDurabilityBackend { source: Arc::new(GcsRuntimeSource::Fixed(runtime)), } } - - pub async fn initialize(&self) -> Result<()> { - let _ = self.runtime().await?; - Ok(()) - } - async fn runtime(&self) -> Result { match self.source.as_ref() { - GcsRuntimeSource::Lazy { - conf, - data_directory, - runtime, - } => Ok(runtime + GcsRuntimeSource::Lazy { conf, runtime } => Ok(runtime .get_or_try_init(|| async { - let current_chain_timestamp = load_current_chain_timestamp(data_directory) - .with_context(|| { - format!( - "Loading current chain timestamp from {}", - data_directory.display() - ) - })?; - Ok::(DpGcsRuntime { client: GcsStorageClient::builder().build().await?, conf: conf.clone(), - current_chain_timestamp, }) }) .await? @@ -97,15 +65,24 @@ impl GcsDurabilityBackend { async fn upload( runtime: DpGcsRuntime, + current_chain_timestamp: Option, lane_id: LaneId, dp_hash: DataProposalHash, payload: Vec, ) -> Result<()> { + let current_chain_timestamp = current_chain_timestamp.ok_or_else(|| { + anyhow!("Current chain timestamp is not available for GCS durability") + })?; match runtime .client .write_object( bucket_path(&runtime.conf.gcs_bucket), - object_name(&runtime, &lane_id, &dp_hash), + object_name( + &runtime.conf.gcs_prefix, + ¤t_chain_timestamp, + &lane_id, + &dp_hash, + ), Bytes::from(payload), ) .set_if_generation_match(0_i64) @@ -131,38 +108,16 @@ impl DurabilityBackend for GcsDurabilityBackend { lane_id: LaneId, dp_hash: DataProposalHash, payload: Vec, + current_chain_timestamp: Option, ) -> Pin> + Send + 'static>> { let backend = self.clone(); Box::pin(async move { let runtime = backend.runtime().await?; - Self::upload(runtime, lane_id, dp_hash, payload).await + Self::upload(runtime, current_chain_timestamp, lane_id, dp_hash, payload).await }) } } -pub fn persist_current_chain_timestamp( - data_directory: &Path, - current_chain_timestamp: &str, -) -> Result<()> { - let store = CurrentChainTimestampStore { - timestamp_folder: current_chain_timestamp.to_string(), - }; - let path = data_directory.join(CURRENT_CHAIN_TIMESTAMP_FILE); - let mut file = File::create(&path).context("Creating current chain timestamp file")?; - borsh::to_writer(&mut file, &store).context("Serializing current chain timestamp file")?; - Ok(()) -} - -pub fn persist_current_chain_timestamp_for_block( - data_directory: &Path, - timestamp_ms: u128, -) -> Result<()> { - let store = CurrentChainTimestampStore { - timestamp_folder: timestamp_to_folder_name(timestamp_ms)?, - }; - persist_current_chain_timestamp(data_directory, &store.timestamp_folder) -} - fn bucket_path(bucket: &str) -> String { if bucket.starts_with("projects/") { bucket.to_string() @@ -171,9 +126,13 @@ fn bucket_path(bucket: &str) -> String { } } -fn object_name(runtime: &DpGcsRuntime, lane_id: &LaneId, dp_hash: &DataProposalHash) -> String { - let prefix = effective_prefix(&runtime.conf.gcs_prefix, &runtime.current_chain_timestamp); - +fn object_name( + gcs_prefix: &str, + current_chain_timestamp: &str, + lane_id: &LaneId, + dp_hash: &DataProposalHash, +) -> String { + let prefix = effective_prefix(gcs_prefix, current_chain_timestamp); format!("{}/data_proposals/{}/{}.bin", prefix, lane_id, dp_hash) } @@ -181,42 +140,7 @@ fn effective_prefix(gcs_prefix: &str, current_chain_timestamp: &str) -> String { format!("{gcs_prefix}/{current_chain_timestamp}") } -#[derive(Debug, Clone, BorshSerialize, BorshDeserialize)] -struct CurrentChainTimestampStore { - timestamp_folder: String, -} - -const CURRENT_CHAIN_TIMESTAMP_FILE: &str = "current_chain_timestamp.bin"; - -fn load_current_chain_timestamp(data_directory: &Path) -> Result { - let full_path = data_directory.join(CURRENT_CHAIN_TIMESTAMP_FILE); - let mut handle = File::open(&full_path).with_context(|| { - format!( - "Opening required current chain timestamp file {}", - full_path.display() - ) - })?; - - let store: CurrentChainTimestampStore = - borsh::from_reader(&mut handle).context("Deserializing current chain timestamp file")?; - - chrono::NaiveDateTime::parse_from_str(&store.timestamp_folder, "%Y-%m-%dT%H-%M-%SZ") - .context("Parsing current chain timestamp")?; - - Ok(store.timestamp_folder) -} - -pub fn persist_current_chain_timestamp_from_node_state(data_directory: &Path) -> Result<()> { - let store = - NodeStateModule::load_from_disk::(data_directory, NODE_STATE_BIN.as_ref())? - .context("Missing node_state.bin while loading current chain timestamp")?; - let current_chain_timestamp = store.current_chain_timestamp.context( - "Missing current_chain_timestamp in node_state.bin while loading chain metadata", - )?; - persist_current_chain_timestamp(data_directory, ¤t_chain_timestamp) -} - -fn timestamp_to_folder_name(timestamp_ms: u128) -> Result { +pub fn timestamp_to_folder_name(timestamp_ms: u128) -> Result { let secs = (timestamp_ms / 1000) as i64; let datetime = chrono::DateTime::::from_timestamp(secs, 0) .ok_or_else(|| anyhow!("Invalid timestamp: {timestamp_ms}"))?; @@ -227,7 +151,10 @@ fn timestamp_to_folder_name(timestamp_ms: u128) -> Result { mod tests { use super::*; use hyli_bus::modules::write_manifest; - use hyli_modules::node_state::module::NodeStateModule; + use hyli_modules::node_state::module::{ + load_current_chain_timestamp, persist_current_chain_timestamp, + }; + use hyli_modules::node_state::NodeStateStore; fn install_rustls_provider_for_tests() { let _ = rustls::crypto::aws_lc_rs::default_provider().install_default(); @@ -236,7 +163,11 @@ mod tests { #[test] fn current_chain_timestamp_round_trip() -> Result<()> { let tmpdir = tempfile::tempdir()?; - persist_current_chain_timestamp_for_block(tmpdir.path(), 1_743_336_506_000)?; + let mut store = NodeStateStore::default(); + store.current_chain_timestamp = Some("2025-03-30T12-08-26Z".to_string()); + let persisted = persist_current_chain_timestamp(tmpdir.path(), &store)? + .expect("timestamp file should be written"); + write_manifest(tmpdir.path(), &[persisted])?; let decoded = load_current_chain_timestamp(tmpdir.path())?; assert_eq!(decoded, "2025-03-30T12-08-26Z"); @@ -264,18 +195,13 @@ mod tests { } #[test] - fn persist_current_chain_timestamp_from_node_state_writes_local_file() -> Result<()> { + fn persist_current_chain_timestamp_writes_local_file() -> Result<()> { let tmpdir = tempfile::tempdir()?; let mut node_state = NodeStateStore::default(); node_state.current_chain_timestamp = Some("2026-03-30T15-47-06Z".to_string()); - - let checksum = - NodeStateModule::save_on_disk(tmpdir.path(), NODE_STATE_BIN.as_ref(), &node_state)?; - write_manifest( - tmpdir.path(), - &[(tmpdir.path().join(NODE_STATE_BIN), checksum)], - )?; - persist_current_chain_timestamp_from_node_state(tmpdir.path())?; + let persisted = persist_current_chain_timestamp(tmpdir.path(), &node_state)? + .expect("timestamp file should be written"); + write_manifest(tmpdir.path(), &[persisted])?; assert_eq!( load_current_chain_timestamp(tmpdir.path())?, @@ -285,44 +211,17 @@ mod tests { } #[tokio::test] - async fn gcs_runtime_loads_timestamp_written_after_backend_construction() -> Result<()> { + async fn gcs_runtime_builds_after_backend_construction() -> Result<()> { install_rustls_provider_for_tests(); - let tmpdir = tempfile::tempdir()?; - let backend = GcsDurabilityBackend::new( - tmpdir.path(), - DataProposalDurabilityConf { - gcs_bucket: "test-bucket".to_string(), - gcs_prefix: "camelot".to_string(), - save_data_proposals: true, - }, - ); - - persist_current_chain_timestamp(tmpdir.path(), "2026-03-30T15-47-06Z")?; + let backend = GcsDurabilityBackend::new(DataProposalDurabilityConf { + gcs_bucket: "test-bucket".to_string(), + gcs_prefix: "camelot".to_string(), + save_data_proposals: true, + }); let runtime = backend.runtime().await?; - assert_eq!(runtime.current_chain_timestamp, "2026-03-30T15-47-06Z"); + assert_eq!(runtime.conf.gcs_prefix, "camelot"); Ok(()) } - - #[tokio::test] - async fn gcs_runtime_initialize_requires_existing_timestamp_file() { - install_rustls_provider_for_tests(); - let tmpdir = tempfile::tempdir().unwrap(); - let backend = GcsDurabilityBackend::new( - tmpdir.path(), - DataProposalDurabilityConf { - gcs_bucket: "test-bucket".to_string(), - gcs_prefix: "camelot".to_string(), - save_data_proposals: true, - }, - ); - - let err = backend.initialize().await.unwrap_err(); - - assert!( - err.to_string().contains("current chain timestamp"), - "unexpected error: {err:#}" - ); - } } diff --git a/src/shared_storage/mod.rs b/src/shared_storage/mod.rs index 20698fc05..778c8120f 100644 --- a/src/shared_storage/mod.rs +++ b/src/shared_storage/mod.rs @@ -6,6 +6,7 @@ use std::path::Path; use std::sync::Arc; use anyhow::Result; +use hyli_modules::node_state::module::load_current_chain_timestamp; use crate::utils::conf::DataProposalDurabilityConf; @@ -19,10 +20,10 @@ pub async fn durability_backend_for_conf( run_fast_catchup: bool, ) -> Result> { if conf.gcs_enabled() { - let backend = GcsDurabilityBackend::new(data_directory, conf.clone()); if run_fast_catchup { - backend.initialize().await?; + load_current_chain_timestamp(data_directory)?; } + let backend = GcsDurabilityBackend::new(conf.clone()); Ok(Arc::new(backend)) } else if conf.file_enabled() { Ok(Arc::new(FileDurabilityBackend::new( From 00e8d0886373b697191622765b24678d158ebef5 Mon Sep 17 00:00:00 2001 From: Alexandre Careil Date: Tue, 31 Mar 2026 17:35:51 +0200 Subject: [PATCH 13/22] Store StoredSignedBlocks on GCS --- src/data_availability.rs | 5 +- src/data_availability/block_storage.rs | 740 +++++++++++++++++++++++-- 2 files changed, 684 insertions(+), 61 deletions(-) diff --git a/src/data_availability.rs b/src/data_availability.rs index 6af467785..6c7adfa5c 100644 --- a/src/data_availability.rs +++ b/src/data_availability.rs @@ -37,7 +37,10 @@ impl Module for DataAvailability { async fn build(bus: SharedMessageBus, ctx: Self::Context) -> anyhow::Result { let bus = DABusClient::new_from_bus(bus.new_handle()).await; - let mut blocks = Blocks::new(&ctx.config.data_directory)?; + let mut blocks = Blocks::new_with_durability( + &ctx.config.data_directory, + &ctx.config.data_proposal_durability, + )?; blocks.set_metrics_context(ctx.config.id.clone()); let highest_block = blocks.highest(); diff --git a/src/data_availability/block_storage.rs b/src/data_availability/block_storage.rs index 7928598e9..d9a6f493d 100644 --- a/src/data_availability/block_storage.rs +++ b/src/data_availability/block_storage.rs @@ -1,15 +1,31 @@ -use anyhow::{Context, Result}; +use anyhow::{anyhow, bail, Context, Result}; use borsh::{BorshDeserialize, BorshSerialize}; use fjall::{Database, Keyspace, KeyspaceCreateOptions, KvSeparationOptions, Slice}; +use google_cloud_storage::{ + client::{Storage as GcsStorageClient, StorageControl}, + model::ListObjectsRequest, +}; use hyli_model::{ AggregateSignature, BlockHeight, ConsensusProposal, ConsensusProposalHash, DataProposalHash, Hashed, LaneId, SignedBlock, }; -use hyli_modules::utils::fjall_metrics::FjallMetrics; -use std::{fmt::Debug, path::Path, sync::Arc, time::Instant}; +use hyli_modules::{ + node_state::module::load_current_chain_timestamp, utils::fjall_metrics::FjallMetrics, +}; +use std::{ + collections::{BTreeMap, HashMap}, + fmt::Debug, + future::Future, + path::{Path, PathBuf}, + sync::{Arc, RwLock}, + time::Instant, +}; use tracing::{debug, info, trace}; -use crate::mempool::proposal_storage::ProposalStorage; +use crate::{ + mempool::proposal_storage::ProposalStorage, shared_storage::gcs::timestamp_to_folder_name, + utils::conf::DataProposalDurabilityConf, +}; #[derive(Clone, BorshSerialize, BorshDeserialize)] struct StoredSignedBlock { @@ -22,6 +38,42 @@ struct FjallHashKey(ConsensusProposalHash); struct FjallHeightKey([u8; 8]); struct FjallValue(Vec); +struct FjallBlocks { + db: Database, + by_hash: Keyspace, + by_height: Keyspace, + metrics: FjallMetrics, + proposals: Arc, +} + +struct GcsBlocks { + runtime: Arc, + client: GcsStorageClient, + control: StorageControl, + bucket_path: String, + gcs_prefix: String, + data_directory: PathBuf, + proposals: Arc, + state: RwLock, +} + +#[derive(Default)] +struct GcsBlockState { + current_chain_timestamp: Option, + loaded_timestamp: Option, + by_height: BTreeMap, + by_hash: HashMap, +} + +pub struct Blocks { + backend: BlocksBackend, +} + +enum BlocksBackend { + Fjall(Box), + Gcs(Arc), +} + impl AsRef<[u8]> for FjallHashKey { fn as_ref(&self) -> &[u8] { self.0 .0.as_slice() @@ -64,14 +116,6 @@ impl AsRef<[u8]> for FjallValue { } } -pub struct Blocks { - db: Database, - by_hash: Keyspace, - by_height: Keyspace, - metrics: FjallMetrics, - proposals: Arc, -} - impl StoredSignedBlock { fn from_signed_block(block: &SignedBlock) -> Self { Self { @@ -121,32 +165,8 @@ impl StoredSignedBlock { } } -impl Blocks { - pub fn new_handle(&self) -> Blocks { - Blocks { - db: self.db.clone(), - by_hash: self.by_hash.clone(), - by_height: self.by_height.clone(), - metrics: self.metrics.clone(), - proposals: Arc::clone(&self.proposals), - } - } - - fn decode_block(&self, item: Slice) -> Result { - let stored = borsh::from_slice::(&item)?; - stored.hydrate(&self.proposals) - } - - fn decode_block_hash(item: Slice) -> Result { - borsh::from_slice(&item).map_err(Into::into) - } - - fn decode_height(item: Slice) -> Result { - let key = item.first_chunk::<8>().context("Malformed key")?; - Ok(BlockHeight::from(FjallHeightKey(*key))) - } - - pub fn new(path: &Path) -> Result { +impl FjallBlocks { + fn new(path: &Path) -> Result { let db = Database::builder(path.join("data_availability.db")) .cache_size(256 * 1024 * 1024) .max_journaling_size(512 * 1024 * 1024) @@ -162,7 +182,7 @@ impl Blocks { let by_height = db.keyspace("block_hashes_by_height", KeyspaceCreateOptions::default)?; info!("{} block(s) available", by_hash.len()?); - Ok(Blocks { + Ok(Self { db, by_hash, by_height, @@ -171,16 +191,40 @@ impl Blocks { }) } - pub fn set_metrics_context(&mut self, node_id: impl Into) { + fn new_handle(&self) -> Self { + Self { + db: self.db.clone(), + by_hash: self.by_hash.clone(), + by_height: self.by_height.clone(), + metrics: self.metrics.clone(), + proposals: Arc::clone(&self.proposals), + } + } + + fn decode_block(&self, item: Slice) -> Result { + let stored = borsh::from_slice::(&item)?; + stored.hydrate(&self.proposals) + } + + fn decode_block_hash(item: Slice) -> Result { + borsh::from_slice(&item).map_err(Into::into) + } + + fn decode_height(item: Slice) -> Result { + let key = item.first_chunk::<8>().context("Malformed key")?; + Ok(BlockHeight::from(FjallHeightKey(*key))) + } + + fn set_metrics_context(&mut self, node_id: impl Into) { self.metrics = FjallMetrics::global("data_availability", node_id.into(), "data_availability.db"); } - pub fn is_empty(&self) -> bool { + fn is_empty(&self) -> bool { self.by_hash.is_empty().unwrap_or(true) } - pub fn persist(&self) -> Result<()> { + fn persist(&self) -> Result<()> { let start = Instant::now(); self.proposals.persist()?; let res = self @@ -191,13 +235,13 @@ impl Blocks { res } - pub fn record_metrics(&self) { + fn record_metrics(&self) { self.metrics.record_db(&self.db); self.metrics.record_keyspace("by_hash", &self.by_hash); self.metrics.record_keyspace("by_height", &self.by_height); } - pub fn put(&mut self, block: SignedBlock) -> Result<()> { + fn put(&mut self, block: SignedBlock) -> Result<()> { let start = Instant::now(); let block_hash = block.hashed(); if self.contains(&block_hash) { @@ -225,7 +269,7 @@ impl Blocks { Ok(()) } - pub fn get(&self, block_hash: &ConsensusProposalHash) -> Result> { + fn get(&self, block_hash: &ConsensusProposalHash) -> Result> { let start = Instant::now(); let Some(item) = self.by_hash.get(FjallHashKey(block_hash.clone()))? else { self.record_op("get", "by_hash", start.elapsed()); @@ -236,7 +280,7 @@ impl Blocks { res } - pub fn get_by_height(&self, height: BlockHeight) -> Result> { + fn get_by_height(&self, height: BlockHeight) -> Result> { let start = Instant::now(); let Some(bytes) = self.by_height.get(FjallHeightKey::new(height))? else { self.record_op("get_by_height", "by_height", start.elapsed()); @@ -248,14 +292,14 @@ impl Blocks { res } - pub fn has_by_height(&self, height: BlockHeight) -> Result { + fn has_by_height(&self, height: BlockHeight) -> Result { let start = Instant::now(); let res = self.by_height.contains_key(FjallHeightKey::new(height))?; self.record_op("has_by_height", "by_height", start.elapsed()); Ok(res) } - pub fn contains(&self, block_hash: &ConsensusProposalHash) -> bool { + fn contains(&self, block_hash: &ConsensusProposalHash) -> bool { let start = Instant::now(); let res = self .by_hash @@ -265,15 +309,10 @@ impl Blocks { res } - pub fn record_op( - &self, - _op: &'static str, - _keyspace: &'static str, - _elapsed: std::time::Duration, - ) { + fn record_op(&self, _op: &'static str, _keyspace: &'static str, _elapsed: std::time::Duration) { } - pub fn first_hole_by_height(&self) -> Result> { + fn first_hole_by_height(&self) -> Result> { let Some(guard) = self.by_height.last_key_value() else { anyhow::bail!("Empty partition can't have holes"); }; @@ -308,22 +347,22 @@ impl Blocks { Ok(None) } - pub fn last(&self) -> Option { + fn last(&self) -> Option { let guard = self.by_height.last_key_value()?; let (_k, v) = guard.into_inner().ok()?; let hash = Self::decode_block_hash(v).ok()?; self.get(&hash).ok().flatten() } - pub fn highest(&self) -> BlockHeight { + fn highest(&self) -> BlockHeight { self.last().map_or(BlockHeight(0), |b| b.height()) } - pub fn last_block_hash(&self) -> Option { + fn last_block_hash(&self) -> Option { self.last().map(|b| b.hashed()) } - pub fn range( + fn range( &mut self, min: BlockHeight, max: BlockHeight, @@ -337,10 +376,591 @@ impl Blocks { } } +impl GcsBlockState { + fn reset_for(&mut self, timestamp: String) { + self.loaded_timestamp = Some(timestamp.clone()); + self.current_chain_timestamp = Some(timestamp); + self.by_height.clear(); + self.by_hash.clear(); + } +} + +impl GcsBlocks { + fn new(path: &Path, conf: &DataProposalDurabilityConf) -> Result { + let runtime = Arc::new( + tokio::runtime::Builder::new_current_thread() + .enable_all() + .build()?, + ); + + let (client, control) = runtime.block_on(async { + Ok::<_, anyhow::Error>(( + GcsStorageClient::builder().build().await?, + StorageControl::builder().build().await?, + )) + })?; + + Ok(Self { + runtime, + client, + control, + bucket_path: bucket_path(&conf.gcs_bucket), + gcs_prefix: conf.gcs_prefix.clone(), + data_directory: path.to_path_buf(), + proposals: ProposalStorage::shared(path)?, + state: RwLock::new(GcsBlockState { + current_chain_timestamp: load_current_chain_timestamp(path).ok(), + ..Default::default() + }), + }) + } + + fn block_on(&self, future: F) -> T + where + F: Future, + { + if tokio::runtime::Handle::try_current().is_ok() { + tokio::task::block_in_place(|| self.runtime.block_on(future)) + } else { + self.runtime.block_on(future) + } + } + + fn set_metrics_context(&self, _node_id: impl Into) {} + + fn record_metrics(&self) {} + + fn is_empty(&self) -> bool { + self.ensure_index_loaded().is_ok_and(|_| { + self.state + .read() + .unwrap_or_else(|poisoned| poisoned.into_inner()) + .by_height + .is_empty() + }) + } + + fn persist(&self) -> Result<()> { + self.proposals.persist() + } + + fn resolve_current_chain_timestamp( + &self, + block: Option<&SignedBlock>, + ) -> Result> { + { + let guard = self + .state + .read() + .unwrap_or_else(|poisoned| poisoned.into_inner()); + if let Some(timestamp) = &guard.current_chain_timestamp { + return Ok(Some(timestamp.clone())); + } + } + + if let Ok(timestamp) = load_current_chain_timestamp(&self.data_directory) { + let mut guard = self + .state + .write() + .unwrap_or_else(|poisoned| poisoned.into_inner()); + guard.current_chain_timestamp = Some(timestamp.clone()); + return Ok(Some(timestamp)); + } + + let Some(block) = block else { + return Ok(None); + }; + if block.height() != BlockHeight(0) { + return Ok(None); + } + + let timestamp = timestamp_to_folder_name(block.consensus_proposal.timestamp.0)?; + let mut guard = self + .state + .write() + .unwrap_or_else(|poisoned| poisoned.into_inner()); + guard.current_chain_timestamp = Some(timestamp.clone()); + Ok(Some(timestamp)) + } + + fn ensure_index_loaded(&self) -> Result<()> { + let Some(current_chain_timestamp) = self.resolve_current_chain_timestamp(None)? else { + return Ok(()); + }; + + { + let guard = self + .state + .read() + .unwrap_or_else(|poisoned| poisoned.into_inner()); + if guard.loaded_timestamp.as_deref() == Some(current_chain_timestamp.as_str()) { + return Ok(()); + } + } + + let prefix = block_object_prefix(&self.gcs_prefix, ¤t_chain_timestamp); + let objects = self.block_on(self.list_objects(&prefix))?; + + let mut by_height = BTreeMap::new(); + let mut by_hash = HashMap::new(); + for object_name in objects { + let Some(height) = parse_height_from_object_name(&prefix, &object_name) else { + continue; + }; + let stored = self.block_on(self.read_stored_signed_block(&object_name))?; + let block_hash = stored.consensus_proposal.hashed(); + by_height.insert(height, block_hash.clone()); + by_hash.insert(block_hash, stored); + } + + let mut guard = self + .state + .write() + .unwrap_or_else(|poisoned| poisoned.into_inner()); + guard.current_chain_timestamp = Some(current_chain_timestamp.clone()); + guard.loaded_timestamp = Some(current_chain_timestamp); + guard.by_height = by_height; + guard.by_hash = by_hash; + Ok(()) + } + + async fn list_objects(&self, prefix: &str) -> Result> { + let mut objects = Vec::new(); + let mut page_token = None; + + loop { + let mut request = ListObjectsRequest::new() + .set_parent(&self.bucket_path) + .set_prefix(prefix); + if let Some(token) = &page_token { + request = request.set_page_token(token); + } + + let response = self + .control + .list_objects() + .with_request(request) + .send() + .await?; + + objects.extend(response.objects.into_iter().map(|object| object.name)); + + if response.next_page_token.is_empty() { + break; + } + page_token = Some(response.next_page_token); + } + + Ok(objects) + } + + async fn read_stored_signed_block(&self, object_name: &str) -> Result { + let mut reader = self + .client + .read_object(&self.bucket_path, object_name) + .send() + .await?; + let mut bytes = Vec::new(); + while let Some(chunk) = reader.next().await.transpose()? { + bytes.extend_from_slice(&chunk); + } + borsh::from_slice(&bytes).map_err(Into::into) + } + + fn put(&self, block: SignedBlock) -> Result<()> { + let block_hash = block.hashed(); + if self.contains(&block_hash) { + return Ok(()); + } + + let current_chain_timestamp = self + .resolve_current_chain_timestamp(Some(&block))? + .context("Current chain timestamp is required for GCS block storage")?; + + for (lane_id, data_proposals) in &block.data_proposals { + for data_proposal in data_proposals { + self.proposals + .put_no_verification(lane_id.clone(), data_proposal.clone())?; + } + } + + let stored = StoredSignedBlock::from_signed_block(&block); + let object_name = + block_object_name(&self.gcs_prefix, ¤t_chain_timestamp, block.height()); + let payload = borsh::to_vec(&stored)?; + + match self.block_on(async { + self.client + .write_object( + self.bucket_path.clone(), + object_name, + bytes::Bytes::from(payload), + ) + .set_if_generation_match(0_i64) + .send_buffered() + .await + }) { + Ok(_) => {} + Err(err) + if err + .status() + .is_some_and(|status| matches!(status.code as i32, 6 | 9)) => {} + Err(err) => return Err(anyhow!(err)), + } + + let mut guard = self + .state + .write() + .unwrap_or_else(|poisoned| poisoned.into_inner()); + if guard.loaded_timestamp.as_deref() != Some(current_chain_timestamp.as_str()) { + guard.reset_for(current_chain_timestamp); + } + guard.by_height.insert(block.height(), block_hash.clone()); + guard.by_hash.insert(block_hash, stored); + Ok(()) + } + + fn get(&self, block_hash: &ConsensusProposalHash) -> Result> { + self.ensure_index_loaded()?; + let guard = self + .state + .read() + .unwrap_or_else(|poisoned| poisoned.into_inner()); + guard + .by_hash + .get(block_hash) + .cloned() + .map(|stored| stored.hydrate(&self.proposals)) + .transpose() + } + + fn get_by_height(&self, height: BlockHeight) -> Result> { + self.ensure_index_loaded()?; + let block_hash = { + let guard = self + .state + .read() + .unwrap_or_else(|poisoned| poisoned.into_inner()); + guard.by_height.get(&height).cloned() + }; + block_hash + .as_ref() + .map(|hash| self.get(hash)) + .transpose() + .map(|opt| opt.flatten()) + } + + fn has_by_height(&self, height: BlockHeight) -> Result { + self.ensure_index_loaded()?; + Ok(self + .state + .read() + .unwrap_or_else(|poisoned| poisoned.into_inner()) + .by_height + .contains_key(&height)) + } + + fn contains(&self, block_hash: &ConsensusProposalHash) -> bool { + self.ensure_index_loaded().is_ok_and(|_| { + self.state + .read() + .unwrap_or_else(|poisoned| poisoned.into_inner()) + .by_hash + .contains_key(block_hash) + }) + } + + fn first_hole_by_height(&self) -> Result> { + self.ensure_index_loaded()?; + let guard = self + .state + .read() + .unwrap_or_else(|poisoned| poisoned.into_inner()); + let Some((&upper_bound, _)) = guard.by_height.last_key_value() else { + bail!("Empty partition can't have holes"); + }; + + for height in 0..upper_bound.0 { + if !guard.by_height.contains_key(&BlockHeight(height)) { + return Ok(Some(BlockHeight(height))); + } + } + + Ok(None) + } + + fn last(&self) -> Option { + self.ensure_index_loaded().ok()?; + let stored = { + let guard = self + .state + .read() + .unwrap_or_else(|poisoned| poisoned.into_inner()); + let hash = guard.by_height.last_key_value()?.1.clone(); + guard.by_hash.get(&hash).cloned()? + }; + stored.hydrate(&self.proposals).ok() + } + + fn highest(&self) -> BlockHeight { + self.last().map_or(BlockHeight(0), |b| b.height()) + } + + fn last_block_hash(&self) -> Option { + self.ensure_index_loaded().ok()?; + self.state + .read() + .unwrap_or_else(|poisoned| poisoned.into_inner()) + .by_height + .last_key_value() + .map(|(_height, hash)| hash.clone()) + } + + fn range( + &self, + min: BlockHeight, + max: BlockHeight, + ) -> impl Iterator> { + let values = match self.ensure_index_loaded() { + Ok(()) => self + .state + .read() + .unwrap_or_else(|poisoned| poisoned.into_inner()) + .by_height + .range(min..max) + .map(|(_height, hash)| Ok(hash.clone())) + .collect(), + Err(err) => vec![Err(err)], + }; + values.into_iter() + } +} + +impl Blocks { + pub fn new(path: &Path) -> Result { + Ok(Self { + backend: BlocksBackend::Fjall(Box::new(FjallBlocks::new(path)?)), + }) + } + + pub fn new_with_durability(path: &Path, conf: &DataProposalDurabilityConf) -> Result { + let backend = if should_store_blocks_in_gcs(conf) { + BlocksBackend::Gcs(Arc::new(GcsBlocks::new(path, conf)?)) + } else { + BlocksBackend::Fjall(Box::new(FjallBlocks::new(path)?)) + }; + Ok(Self { backend }) + } + + pub fn new_handle(&self) -> Blocks { + let backend = match &self.backend { + BlocksBackend::Fjall(inner) => BlocksBackend::Fjall(Box::new(inner.new_handle())), + BlocksBackend::Gcs(inner) => BlocksBackend::Gcs(Arc::clone(inner)), + }; + Blocks { backend } + } + + pub fn set_metrics_context(&mut self, node_id: impl Into) { + match &mut self.backend { + BlocksBackend::Fjall(inner) => inner.set_metrics_context(node_id), + BlocksBackend::Gcs(inner) => inner.set_metrics_context(node_id), + } + } + + pub fn is_empty(&self) -> bool { + match &self.backend { + BlocksBackend::Fjall(inner) => inner.is_empty(), + BlocksBackend::Gcs(inner) => inner.is_empty(), + } + } + + pub fn persist(&self) -> Result<()> { + match &self.backend { + BlocksBackend::Fjall(inner) => inner.persist(), + BlocksBackend::Gcs(inner) => inner.persist(), + } + } + + pub fn record_metrics(&self) { + match &self.backend { + BlocksBackend::Fjall(inner) => inner.record_metrics(), + BlocksBackend::Gcs(inner) => inner.record_metrics(), + } + } + + pub fn put(&mut self, block: SignedBlock) -> Result<()> { + match &mut self.backend { + BlocksBackend::Fjall(inner) => inner.put(block), + BlocksBackend::Gcs(inner) => inner.put(block), + } + } + + pub fn get(&self, block_hash: &ConsensusProposalHash) -> Result> { + match &self.backend { + BlocksBackend::Fjall(inner) => inner.get(block_hash), + BlocksBackend::Gcs(inner) => inner.get(block_hash), + } + } + + pub fn get_by_height(&self, height: BlockHeight) -> Result> { + match &self.backend { + BlocksBackend::Fjall(inner) => inner.get_by_height(height), + BlocksBackend::Gcs(inner) => inner.get_by_height(height), + } + } + + pub fn has_by_height(&self, height: BlockHeight) -> Result { + match &self.backend { + BlocksBackend::Fjall(inner) => inner.has_by_height(height), + BlocksBackend::Gcs(inner) => inner.has_by_height(height), + } + } + + pub fn contains(&self, block_hash: &ConsensusProposalHash) -> bool { + match &self.backend { + BlocksBackend::Fjall(inner) => inner.contains(block_hash), + BlocksBackend::Gcs(inner) => inner.contains(block_hash), + } + } + + pub fn record_op( + &self, + _op: &'static str, + _keyspace: &'static str, + _elapsed: std::time::Duration, + ) { + } + + pub fn first_hole_by_height(&self) -> Result> { + match &self.backend { + BlocksBackend::Fjall(inner) => inner.first_hole_by_height(), + BlocksBackend::Gcs(inner) => inner.first_hole_by_height(), + } + } + + pub fn last(&self) -> Option { + match &self.backend { + BlocksBackend::Fjall(inner) => inner.last(), + BlocksBackend::Gcs(inner) => inner.last(), + } + } + + pub fn highest(&self) -> BlockHeight { + match &self.backend { + BlocksBackend::Fjall(inner) => inner.highest(), + BlocksBackend::Gcs(inner) => inner.highest(), + } + } + + pub fn last_block_hash(&self) -> Option { + match &self.backend { + BlocksBackend::Fjall(inner) => inner.last_block_hash(), + BlocksBackend::Gcs(inner) => inner.last_block_hash(), + } + } + + pub fn range( + &mut self, + min: BlockHeight, + max: BlockHeight, + ) -> impl Iterator> { + match &mut self.backend { + BlocksBackend::Fjall(inner) => inner.range(min, max).collect::>().into_iter(), + BlocksBackend::Gcs(inner) => inner.range(min, max).collect::>().into_iter(), + } + } +} + impl Debug for Blocks { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("Blocks") - .field("len", &self.by_height.len()) + .field("highest", &self.highest()) .finish() } } + +fn should_store_blocks_in_gcs(conf: &DataProposalDurabilityConf) -> bool { + !conf.gcs_bucket.trim().is_empty() +} + +fn bucket_path(bucket: &str) -> String { + if bucket.starts_with("projects/") { + bucket.to_string() + } else { + format!("projects/_/buckets/{bucket}") + } +} + +fn block_object_prefix(gcs_prefix: &str, current_chain_timestamp: &str) -> String { + format!("{gcs_prefix}/{current_chain_timestamp}/stored_signed_blocks/block_") +} + +fn block_object_name( + gcs_prefix: &str, + current_chain_timestamp: &str, + height: BlockHeight, +) -> String { + format!( + "{}{}.bin", + block_object_prefix(gcs_prefix, current_chain_timestamp), + height.0 + ) +} + +fn parse_height_from_object_name(prefix: &str, object_name: &str) -> Option { + let height = object_name + .strip_prefix(prefix)? + .strip_suffix(".bin")? + .parse() + .ok()?; + Some(BlockHeight(height)) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::utils::conf::DataProposalDurabilityConf; + + #[test] + fn block_object_name_is_namespaced_under_stored_signed_blocks() { + assert_eq!( + block_object_name("camelot", "2026-03-31T10-00-00Z", BlockHeight(12)), + "camelot/2026-03-31T10-00-00Z/stored_signed_blocks/block_12.bin" + ); + } + + #[test] + fn parse_height_from_object_name_matches_expected_format() { + let prefix = block_object_prefix("camelot", "2026-03-31T10-00-00Z"); + assert_eq!( + parse_height_from_object_name( + &prefix, + "camelot/2026-03-31T10-00-00Z/stored_signed_blocks/block_42.bin" + ), + Some(BlockHeight(42)) + ); + assert_eq!( + parse_height_from_object_name( + &prefix, + "camelot/other/stored_signed_blocks/block_42.bin" + ), + None + ); + } + + #[test] + fn blocks_default_to_fjall_without_bucket() -> Result<()> { + let tmpdir = tempfile::tempdir()?; + let blocks = Blocks::new_with_durability( + tmpdir.path(), + &DataProposalDurabilityConf { + gcs_bucket: String::new(), + gcs_prefix: "camelot".to_string(), + save_data_proposals: true, + }, + )?; + + assert!(matches!(blocks.backend, BlocksBackend::Fjall(_))); + Ok(()) + } +} From 3512302727806d98ab721e48a2a36b87570d66da Mon Sep 17 00:00:00 2001 From: Alexandre Careil Date: Tue, 31 Mar 2026 17:47:27 +0200 Subject: [PATCH 14/22] Stream stored signed blocks over DA --- crates/hyli-model/src/block.rs | 41 +++++ .../hyli-model/src/node/data_availability.rs | 5 + .../hyli-modules/src/modules/da_listener.rs | 1 + src/data_availability.rs | 146 ++++++++++------ src/data_availability/block_storage.rs | 164 +++++++++++------- 5 files changed, 242 insertions(+), 115 deletions(-) diff --git a/crates/hyli-model/src/block.rs b/crates/hyli-model/src/block.rs index ef4c081df..9d169488b 100644 --- a/crates/hyli-model/src/block.rs +++ b/crates/hyli-model/src/block.rs @@ -13,6 +13,16 @@ pub struct SignedBlock { pub certificate: AggregateSignature, } +#[derive( + Debug, Serialize, Deserialize, Clone, PartialEq, Eq, BorshSerialize, BorshDeserialize, Display, +)] +#[display("")] +pub struct StoredSignedBlock { + pub data_proposals: Vec<(LaneId, Vec)>, + pub consensus_proposal: ConsensusProposal, + pub certificate: AggregateSignature, +} + impl SignedBlock { pub fn parent_hash(&self) -> &ConsensusProposalHash { &self.consensus_proposal.parent_hash @@ -63,6 +73,37 @@ impl Hashed for SignedBlock { } } +impl Hashed for StoredSignedBlock { + fn hashed(&self) -> ConsensusProposalHash { + self.consensus_proposal.hashed() + } +} + +impl StoredSignedBlock { + pub fn height(&self) -> BlockHeight { + BlockHeight(self.consensus_proposal.slot) + } +} + +impl From<&SignedBlock> for StoredSignedBlock { + fn from(block: &SignedBlock) -> Self { + Self { + data_proposals: block + .data_proposals + .iter() + .map(|(lane_id, data_proposals)| { + ( + lane_id.clone(), + data_proposals.iter().map(|dp| dp.hashed()).collect(), + ) + }) + .collect(), + consensus_proposal: block.consensus_proposal.clone(), + certificate: block.certificate.clone(), + } + } +} + impl Ord for SignedBlock { fn cmp(&self, other: &Self) -> Ordering { self.height().0.cmp(&other.height().0) diff --git a/crates/hyli-model/src/node/data_availability.rs b/crates/hyli-model/src/node/data_availability.rs index 397abe705..10108d3b4 100644 --- a/crates/hyli-model/src/node/data_availability.rs +++ b/crates/hyli-model/src/node/data_availability.rs @@ -201,13 +201,18 @@ pub enum NodeStateEvent { pub enum DataAvailabilityRequest { /// Start streaming blocks from a given height StreamFromHeight(BlockHeight), + /// Start streaming stored signed blocks from a given height + StreamStoredFromHeight(BlockHeight), /// Request a specific block by height (prioritized) BlockRequest(BlockHeight), + /// Request a specific stored signed block by height (prioritized) + StoredBlockRequest(BlockHeight), } #[derive(Clone, Debug, PartialEq, Eq, BorshSerialize, BorshDeserialize, TcpMessageLabel)] pub enum DataAvailabilityEvent { SignedBlock(SignedBlock), + StoredSignedBlock(StoredSignedBlock), MempoolStatusEvent(MempoolStatusEvent), /// Block not found at the requested height BlockNotFound(BlockHeight), diff --git a/crates/hyli-modules/src/modules/da_listener.rs b/crates/hyli-modules/src/modules/da_listener.rs index 09295c3ca..6e92391dc 100644 --- a/crates/hyli-modules/src/modules/da_listener.rs +++ b/crates/hyli-modules/src/modules/da_listener.rs @@ -677,6 +677,7 @@ impl SignedDAListener

{ DataAvailabilityEvent::SignedBlock(block) => { self.handle_signed_block(block).await?; } + DataAvailabilityEvent::StoredSignedBlock(_) => {} DataAvailabilityEvent::MempoolStatusEvent(status) => { self.processor.process_mempool_status(status).await?; } diff --git a/src/data_availability.rs b/src/data_availability.rs index 6c7adfa5c..a9fc9a4eb 100644 --- a/src/data_availability.rs +++ b/src/data_availability.rs @@ -117,7 +117,28 @@ pub struct DataAvailability { allow_peer_catchup: bool, // Track blocks to send to each streaming peer (ensures ordering) - peer_send_queues: HashMap>, + peer_send_queues: HashMap, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum PeerStreamMode { + Signed, + Stored, +} + +#[derive(Debug)] +struct PeerSendQueue { + mode: PeerStreamMode, + hashes: VecDeque, +} + +impl Default for PeerSendQueue { + fn default() -> Self { + Self { + mode: PeerStreamMode::Signed, + hashes: VecDeque::new(), + } + } } #[derive(Debug, Clone, AsRefStr)] @@ -707,6 +728,7 @@ impl DataAvailability { _ = log_error!( self.start_streaming_to_peer( start_height, + PeerStreamMode::Signed, &mut catchup_joinset, &socket_addr, &mut server, @@ -714,12 +736,40 @@ impl DataAvailability { "Starting streaming to peer" ); } + DataAvailabilityRequest::StreamStoredFromHeight(start_height) => { + _ = log_error!( + self.start_streaming_to_peer( + start_height, + PeerStreamMode::Stored, + &mut catchup_joinset, + &socket_addr, + &mut server, + ).await, + "Starting stored block streaming to peer" + ); + } DataAvailabilityRequest::BlockRequest(block_height) => { _ = log_error!( - self.handle_block_request(block_height, &socket_addr, &mut server).await, + self.handle_block_request( + block_height, + PeerStreamMode::Signed, + &socket_addr, + &mut server + ).await, "Handling block request" ); } + DataAvailabilityRequest::StoredBlockRequest(block_height) => { + _ = log_error!( + self.handle_block_request( + block_height, + PeerStreamMode::Stored, + &socket_addr, + &mut server + ).await, + "Handling stored block request" + ); + } } } TcpEvent::Closed { socket_addr } => { @@ -790,9 +840,9 @@ impl DataAvailability { } // Get next block from this peer's queue - let hash = match self.peer_send_queues.get_mut(&peer_ip) { - Some(queue) => match queue.pop_front() { - Some(h) => h, + let (mode, hash) = match self.peer_send_queues.get_mut(&peer_ip) { + Some(queue) => match queue.hashes.pop_front() { + Some(h) => (queue.mode, h), None => { // Queue is empty - peer is caught up and waiting for new blocks // Keep them in the map but don't spawn a new task yet @@ -805,23 +855,25 @@ impl DataAvailability { return Ok(()); } }; - debug!("📡 Sending block {} to peer {}", &hash, &peer_ip); - if let Ok(Some(signed_block)) = self.blocks.get(&hash) { - // Errors will be handled when sending new blocks, ignore here. - match server.send( - peer_ip.clone(), - DataAvailabilityEvent::SignedBlock(signed_block), - vec![], - ) { + let event = match mode { + PeerStreamMode::Signed => self + .blocks + .get(&hash)? + .map(DataAvailabilityEvent::SignedBlock), + PeerStreamMode::Stored => self + .blocks + .get_stored(&hash)? + .map(DataAvailabilityEvent::StoredSignedBlock), + }; + if let Some(event) = event { + match server.send(peer_ip.clone(), event, vec![]) { Ok(()) => { - // Successfully sent, continue with next block catchup_joinset.spawn(async move { (peer_ip, 0) }); } Err(_) => { - // Retry sending the same block (put it back at front of queue) if let Some(queue) = self.peer_send_queues.get_mut(&peer_ip) { - queue.push_front(hash); + queue.hashes.push_front(hash); } catchup_joinset.spawn(async move { tokio::time::sleep(Duration::from_millis(100 * (retries as u64))).await; @@ -843,6 +895,7 @@ impl DataAvailability { async fn handle_block_request( &mut self, block_height: BlockHeight, + mode: PeerStreamMode, socket_addr: &str, server: &mut DataAvailabilityServer, ) -> Result<()> { @@ -852,28 +905,32 @@ impl DataAvailability { ); // Check if block exists in storage - match self.blocks.get_by_height(block_height) { - Ok(Some(block)) => { + let response = match mode { + PeerStreamMode::Signed => self + .blocks + .get_by_height(block_height)? + .map(DataAvailabilityEvent::SignedBlock), + PeerStreamMode::Stored => self + .blocks + .get_stored_by_height(block_height)? + .map(DataAvailabilityEvent::StoredSignedBlock), + }; + + match response { + Some(event) => { debug!( "📦 Found block at height {}, sending to {}", block_height, socket_addr ); - // Send immediately - this is inserted next in the send queue - if let Err(e) = server.send( - socket_addr.to_string(), - DataAvailabilityEvent::SignedBlock(block), - vec![], - ) { + if let Err(e) = server.send(socket_addr.to_string(), event, vec![]) { warn!( "📦 Error while responding to block request at height {} for {}: {:#}. Dropping socket.", block_height, socket_addr, e ); server.drop_peer_stream(socket_addr.to_string()); - return Ok(()); } } - Ok(None) => { - // Block not in storage - this is a gap + None => { error!( "📦 Block at height {} not found in storage, sending BlockNotFound to {}", block_height, socket_addr @@ -888,25 +945,6 @@ impl DataAvailability { block_height, socket_addr, e ); server.drop_peer_stream(socket_addr.to_string()); - return Ok(()); - } - } - Err(e) => { - error!( - "📦 Error retrieving block at height {}: {:#}", - block_height, e - ); - if let Err(e) = server.send( - socket_addr.to_string(), - DataAvailabilityEvent::BlockNotFound(block_height), - vec![], - ) { - warn!( - "📦 Error while responding BlockNotFound at height {} for {}: {:#}. Dropping socket.", - block_height, socket_addr, e - ); - server.drop_peer_stream(socket_addr.to_string()); - return Ok(()); } } } @@ -1107,8 +1145,8 @@ impl DataAvailability { // Add new block to all streaming peer queues to ensure ordering // (instead of broadcasting which can cause out-of-order delivery) for (peer, queue) in self.peer_send_queues.iter_mut() { - let was_empty = queue.is_empty(); - queue.push_back(block_hash.clone()); + let was_empty = queue.hashes.is_empty(); + queue.hashes.push_back(block_hash.clone()); // If queue was empty (peer was caught up), restart their send task if was_empty { @@ -1123,7 +1161,7 @@ impl DataAvailability { "Appending block {} to queue for peer {} (queue size: {})", block_hash, peer, - queue.len() + queue.hashes.len() ); } } @@ -1142,6 +1180,7 @@ impl DataAvailability { async fn start_streaming_to_peer( &mut self, start_height: BlockHeight, + mode: PeerStreamMode, catchup_joinset: &mut JoinSet<(String, usize)>, peer_ip: &str, server: &mut DataAvailabilityServer, @@ -1206,8 +1245,13 @@ impl DataAvailability { // Store queue for this peer - new blocks will be appended here let peer_ip_string = peer_ip.to_string(); - self.peer_send_queues - .insert(peer_ip_string.clone(), processed_block_hashes); + self.peer_send_queues.insert( + peer_ip_string.clone(), + PeerSendQueue { + mode, + hashes: processed_block_hashes, + }, + ); // Start the send task for this peer catchup_joinset.spawn(async move { (peer_ip_string, 0) }); diff --git a/src/data_availability/block_storage.rs b/src/data_availability/block_storage.rs index d9a6f493d..0926c663a 100644 --- a/src/data_availability/block_storage.rs +++ b/src/data_availability/block_storage.rs @@ -1,14 +1,10 @@ use anyhow::{anyhow, bail, Context, Result}; -use borsh::{BorshDeserialize, BorshSerialize}; use fjall::{Database, Keyspace, KeyspaceCreateOptions, KvSeparationOptions, Slice}; use google_cloud_storage::{ client::{Storage as GcsStorageClient, StorageControl}, model::ListObjectsRequest, }; -use hyli_model::{ - AggregateSignature, BlockHeight, ConsensusProposal, ConsensusProposalHash, DataProposalHash, - Hashed, LaneId, SignedBlock, -}; +use hyli_model::{BlockHeight, ConsensusProposalHash, Hashed, SignedBlock, StoredSignedBlock}; use hyli_modules::{ node_state::module::load_current_chain_timestamp, utils::fjall_metrics::FjallMetrics, }; @@ -27,13 +23,6 @@ use crate::{ utils::conf::DataProposalDurabilityConf, }; -#[derive(Clone, BorshSerialize, BorshDeserialize)] -struct StoredSignedBlock { - data_proposals: Vec<(LaneId, Vec)>, - consensus_proposal: ConsensusProposal, - certificate: AggregateSignature, -} - struct FjallHashKey(ConsensusProposalHash); struct FjallHeightKey([u8; 8]); struct FjallValue(Vec); @@ -100,9 +89,7 @@ impl AsRef<[u8]> for FjallHeightKey { impl FjallValue { fn new_with_block(block: &SignedBlock) -> Result { - Ok(Self(borsh::to_vec(&StoredSignedBlock::from_signed_block( - block, - ))?)) + Ok(Self(borsh::to_vec(&StoredSignedBlock::from(block))?)) } fn new_with_block_hash(block_hash: &ConsensusProposalHash) -> Result { @@ -116,53 +103,37 @@ impl AsRef<[u8]> for FjallValue { } } -impl StoredSignedBlock { - fn from_signed_block(block: &SignedBlock) -> Self { - Self { - data_proposals: block - .data_proposals - .iter() - .map(|(lane_id, data_proposals)| { - ( - lane_id.clone(), - data_proposals.iter().map(|dp| dp.hashed()).collect(), - ) +fn hydrate_stored_signed_block( + stored: StoredSignedBlock, + proposals: &ProposalStorage, +) -> Result { + let block_hash = stored.consensus_proposal.hashed(); + let data_proposals = stored + .data_proposals + .into_iter() + .map(|(lane_id, hashes)| { + let data_proposals = hashes + .into_iter() + .map(|dp_hash| { + proposals + .get_dp_by_hash(&lane_id, &dp_hash)? + .with_context(|| { + format!( + "missing proposal {dp_hash} for lane {} while hydrating block {}", + lane_id, block_hash + ) + }) }) - .collect(), - consensus_proposal: block.consensus_proposal.clone(), - certificate: block.certificate.clone(), - } - } - - fn hydrate(self, proposals: &ProposalStorage) -> Result { - let block_hash = self.consensus_proposal.hashed(); - let data_proposals = self - .data_proposals - .into_iter() - .map(|(lane_id, hashes)| { - let data_proposals = hashes - .into_iter() - .map(|dp_hash| { - proposals - .get_dp_by_hash(&lane_id, &dp_hash)? - .with_context(|| { - format!( - "missing proposal {dp_hash} for lane {} while hydrating block {}", - lane_id, block_hash - ) - }) - }) - .collect::>>()?; - Ok((lane_id, data_proposals)) - }) - .collect::>>()?; - - Ok(SignedBlock { - data_proposals, - consensus_proposal: self.consensus_proposal, - certificate: self.certificate, + .collect::>>()?; + Ok((lane_id, data_proposals)) }) - } + .collect::>>()?; + + Ok(SignedBlock { + data_proposals, + consensus_proposal: stored.consensus_proposal, + certificate: stored.certificate, + }) } impl FjallBlocks { @@ -203,7 +174,11 @@ impl FjallBlocks { fn decode_block(&self, item: Slice) -> Result { let stored = borsh::from_slice::(&item)?; - stored.hydrate(&self.proposals) + hydrate_stored_signed_block(stored, &self.proposals) + } + + fn decode_stored_block(item: Slice) -> Result { + borsh::from_slice::(&item).map_err(Into::into) } fn decode_block_hash(item: Slice) -> Result { @@ -280,6 +255,13 @@ impl FjallBlocks { res } + fn get_stored(&self, block_hash: &ConsensusProposalHash) -> Result> { + let Some(item) = self.by_hash.get(FjallHashKey(block_hash.clone()))? else { + return Ok(None); + }; + Self::decode_stored_block(item).map(Some) + } + fn get_by_height(&self, height: BlockHeight) -> Result> { let start = Instant::now(); let Some(bytes) = self.by_height.get(FjallHeightKey::new(height))? else { @@ -292,6 +274,14 @@ impl FjallBlocks { res } + fn get_stored_by_height(&self, height: BlockHeight) -> Result> { + let Some(bytes) = self.by_height.get(FjallHeightKey::new(height))? else { + return Ok(None); + }; + let block_hash = Self::decode_block_hash(bytes)?; + self.get_stored(&block_hash) + } + fn has_by_height(&self, height: BlockHeight) -> Result { let start = Instant::now(); let res = self.by_height.contains_key(FjallHeightKey::new(height))?; @@ -584,7 +574,7 @@ impl GcsBlocks { } } - let stored = StoredSignedBlock::from_signed_block(&block); + let stored = StoredSignedBlock::from(&block); let object_name = block_object_name(&self.gcs_prefix, ¤t_chain_timestamp, block.height()); let payload = borsh::to_vec(&stored)?; @@ -630,10 +620,21 @@ impl GcsBlocks { .by_hash .get(block_hash) .cloned() - .map(|stored| stored.hydrate(&self.proposals)) + .map(|stored| hydrate_stored_signed_block(stored, &self.proposals)) .transpose() } + fn get_stored(&self, block_hash: &ConsensusProposalHash) -> Result> { + self.ensure_index_loaded()?; + Ok(self + .state + .read() + .unwrap_or_else(|poisoned| poisoned.into_inner()) + .by_hash + .get(block_hash) + .cloned()) + } + fn get_by_height(&self, height: BlockHeight) -> Result> { self.ensure_index_loaded()?; let block_hash = { @@ -650,6 +651,24 @@ impl GcsBlocks { .map(|opt| opt.flatten()) } + fn get_stored_by_height(&self, height: BlockHeight) -> Result> { + self.ensure_index_loaded()?; + Ok(self + .state + .read() + .unwrap_or_else(|poisoned| poisoned.into_inner()) + .by_height + .get(&height) + .and_then(|hash| { + self.state + .read() + .unwrap_or_else(|poisoned| poisoned.into_inner()) + .by_hash + .get(hash) + .cloned() + })) + } + fn has_by_height(&self, height: BlockHeight) -> Result { self.ensure_index_loaded()?; Ok(self @@ -699,7 +718,7 @@ impl GcsBlocks { let hash = guard.by_height.last_key_value()?.1.clone(); guard.by_hash.get(&hash).cloned()? }; - stored.hydrate(&self.proposals).ok() + hydrate_stored_signed_block(stored, &self.proposals).ok() } fn highest(&self) -> BlockHeight { @@ -802,6 +821,16 @@ impl Blocks { } } + pub fn get_stored( + &self, + block_hash: &ConsensusProposalHash, + ) -> Result> { + match &self.backend { + BlocksBackend::Fjall(inner) => inner.get_stored(block_hash), + BlocksBackend::Gcs(inner) => inner.get_stored(block_hash), + } + } + pub fn get_by_height(&self, height: BlockHeight) -> Result> { match &self.backend { BlocksBackend::Fjall(inner) => inner.get_by_height(height), @@ -809,6 +838,13 @@ impl Blocks { } } + pub fn get_stored_by_height(&self, height: BlockHeight) -> Result> { + match &self.backend { + BlocksBackend::Fjall(inner) => inner.get_stored_by_height(height), + BlocksBackend::Gcs(inner) => inner.get_stored_by_height(height), + } + } + pub fn has_by_height(&self, height: BlockHeight) -> Result { match &self.backend { BlocksBackend::Fjall(inner) => inner.has_by_height(height), From c7b5b5587db23dbca283cdbed9be30d5da26dc47 Mon Sep 17 00:00:00 2001 From: Alexandre Careil Date: Wed, 1 Apr 2026 11:35:27 +0200 Subject: [PATCH 15/22] Make GCS block storage initialization async --- src/data_availability.rs | 23 +++++++++++----------- src/data_availability/block_storage.rs | 19 +++++++++--------- src/data_availability/local_da_replayer.rs | 2 +- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/src/data_availability.rs b/src/data_availability.rs index a9fc9a4eb..00a0044b9 100644 --- a/src/data_availability.rs +++ b/src/data_availability.rs @@ -40,7 +40,8 @@ impl Module for DataAvailability { let mut blocks = Blocks::new_with_durability( &ctx.config.data_directory, &ctx.config.data_proposal_durability, - )?; + ) + .await?; blocks.set_metrics_context(ctx.config.id.clone()); let highest_block = blocks.highest(); @@ -1294,7 +1295,7 @@ pub mod tests { pub async fn new(shared_bus: crate::bus::SharedMessageBus) -> Self { let path = tempfile::tempdir().unwrap().keep(); let tmpdir = path; - let blocks = Blocks::new(&tmpdir).unwrap(); + let blocks = Blocks::new(&tmpdir).await.unwrap(); let bus = super::DABusClient::new_from_bus(shared_bus.new_handle()).await; let node_state_bus = NodeStateBusClient::new_from_bus(shared_bus).await; @@ -1346,10 +1347,10 @@ pub mod tests { } } - #[test_log::test] - fn test_blocks() -> Result<()> { + #[test_log::test(tokio::test)] + async fn test_blocks() -> Result<()> { let tmpdir = tempfile::tempdir().unwrap().keep(); - let mut blocks = Blocks::new(&tmpdir).unwrap(); + let mut blocks = Blocks::new(&tmpdir).await.unwrap(); let block = SignedBlock::default(); blocks.put(block.clone())?; assert!(blocks.last().unwrap().height() == block.height()); @@ -1362,7 +1363,7 @@ pub mod tests { #[test_log::test(tokio::test)] async fn test_pop_buffer_large() { let tmpdir = tempfile::tempdir().unwrap().keep(); - let blocks = Blocks::new(&tmpdir).unwrap(); + let blocks = Blocks::new(&tmpdir).await.unwrap(); let port = find_available_port().await; let mut server = DataAvailabilityServer::start(port, "DaServer") @@ -1415,7 +1416,7 @@ pub mod tests { #[test_log::test(tokio::test)] async fn test_da_streaming() { let tmpdir = tempfile::tempdir().unwrap().keep(); - let blocks = Blocks::new(&tmpdir).unwrap(); + let blocks = Blocks::new(&tmpdir).await.unwrap(); let global_bus = crate::bus::SharedMessageBus::new(); let bus = super::DABusClient::new_from_bus(global_bus.new_handle()).await; @@ -1907,7 +1908,7 @@ pub mod tests { async fn test_block_request_while_streaming() { // Create DA server with blocks 0-9 already stored let tmpdir = tempfile::tempdir().unwrap().keep(); - let mut blocks_storage = Blocks::new(&tmpdir).unwrap(); + let mut blocks_storage = Blocks::new(&tmpdir).await.unwrap(); let global_bus = crate::bus::SharedMessageBus::new(); let bus = super::DABusClient::new_from_bus(global_bus.new_handle()).await; @@ -2054,7 +2055,7 @@ pub mod tests { #[test_log::test(tokio::test)] async fn test_stream_rejected_when_start_height_missing() { let tmpdir = tempfile::tempdir().unwrap().keep(); - let mut blocks_storage = Blocks::new(&tmpdir).unwrap(); + let mut blocks_storage = Blocks::new(&tmpdir).await.unwrap(); let global_bus = crate::bus::SharedMessageBus::new(); let bus = super::DABusClient::new_from_bus(global_bus.new_handle()).await; @@ -2121,7 +2122,7 @@ pub mod tests { #[test_log::test(tokio::test)] async fn test_stream_rejected_when_requested_range_has_gap() { let tmpdir = tempfile::tempdir().unwrap().keep(); - let mut blocks_storage = Blocks::new(&tmpdir).unwrap(); + let mut blocks_storage = Blocks::new(&tmpdir).await.unwrap(); let global_bus = crate::bus::SharedMessageBus::new(); let bus = super::DABusClient::new_from_bus(global_bus.new_handle()).await; @@ -2465,7 +2466,7 @@ pub mod tests { #[test_log::test(tokio::test)] async fn test_stream_accepts_when_peer_is_already_up_to_date() { let tmpdir = tempfile::tempdir().unwrap().keep(); - let mut blocks_storage = Blocks::new(&tmpdir).unwrap(); + let mut blocks_storage = Blocks::new(&tmpdir).await.unwrap(); let global_bus = crate::bus::SharedMessageBus::new(); let bus = super::DABusClient::new_from_bus(global_bus.new_handle()).await; diff --git a/src/data_availability/block_storage.rs b/src/data_availability/block_storage.rs index 0926c663a..85981c152 100644 --- a/src/data_availability/block_storage.rs +++ b/src/data_availability/block_storage.rs @@ -376,19 +376,15 @@ impl GcsBlockState { } impl GcsBlocks { - fn new(path: &Path, conf: &DataProposalDurabilityConf) -> Result { + async fn new(path: &Path, conf: &DataProposalDurabilityConf) -> Result { let runtime = Arc::new( tokio::runtime::Builder::new_current_thread() .enable_all() .build()?, ); - let (client, control) = runtime.block_on(async { - Ok::<_, anyhow::Error>(( - GcsStorageClient::builder().build().await?, - StorageControl::builder().build().await?, - )) - })?; + let client = GcsStorageClient::builder().build().await?; + let control = StorageControl::builder().build().await?; Ok(Self { runtime, @@ -756,15 +752,18 @@ impl GcsBlocks { } impl Blocks { - pub fn new(path: &Path) -> Result { + pub async fn new(path: &Path) -> Result { Ok(Self { backend: BlocksBackend::Fjall(Box::new(FjallBlocks::new(path)?)), }) } - pub fn new_with_durability(path: &Path, conf: &DataProposalDurabilityConf) -> Result { + pub async fn new_with_durability( + path: &Path, + conf: &DataProposalDurabilityConf, + ) -> Result { let backend = if should_store_blocks_in_gcs(conf) { - BlocksBackend::Gcs(Arc::new(GcsBlocks::new(path, conf)?)) + BlocksBackend::Gcs(Arc::new(GcsBlocks::new(path, conf).await?)) } else { BlocksBackend::Fjall(Box::new(FjallBlocks::new(path)?)) }; diff --git a/src/data_availability/local_da_replayer.rs b/src/data_availability/local_da_replayer.rs index 70755b0b1..7bd6026c8 100644 --- a/src/data_availability/local_da_replayer.rs +++ b/src/data_availability/local_da_replayer.rs @@ -95,7 +95,7 @@ impl LocalDaReplayer { async fn replay_da_storage(&mut self, folder: &str) -> Result<()> { info!("Reading blocks from DA {folder}"); - let mut blocks = Blocks::new(&PathBuf::from(folder))?; + let mut blocks = Blocks::new(&PathBuf::from(folder)).await?; let block_hashes = blocks .range(BlockHeight(0), BlockHeight(u64::MAX)) .collect::>>()?; From db57f137b2f13a5168abe9e93910ca4de296a534 Mon Sep 17 00:00:00 2001 From: Alexandre Careil Date: Wed, 1 Apr 2026 13:11:31 +0200 Subject: [PATCH 16/22] Use stored DA blocks in standalone indexer --- src/entrypoint.rs | 40 +++-- src/indexer_da_client.rs | 356 +++++++++++++++++++++++++++++++++++++++ src/lib.rs | 1 + 3 files changed, 385 insertions(+), 12 deletions(-) create mode 100644 src/indexer_da_client.rs diff --git a/src/entrypoint.rs b/src/entrypoint.rs index 80e432cca..9d8017ec0 100644 --- a/src/entrypoint.rs +++ b/src/entrypoint.rs @@ -7,6 +7,7 @@ use crate::{ explorer::Explorer, genesis::Genesis, indexer::Indexer, + indexer_da_client::{StoredDaIndexerClient, StoredDaIndexerClientCtx}, mempool::{dissemination::DisseminationManager, Mempool}, model::{api::NodeInfo, ContractName, SharedRunContext}, p2p::P2P, @@ -617,18 +618,33 @@ pub async fn common_main( handler.build_module::(ctx.clone()).await?; } else if config.run_indexer { - LocalDaReplayer::build_bus_only_source( - &mut handler, - DAListenerConf { - data_directory: config.data_directory.clone(), - da_read_from: config.da_read_from.clone(), - start_block: None, - timeout_client_secs: config.da_timeout_client_secs, - da_fallback_addresses: config.da_fallback_addresses.clone(), - processor_config: (), - }, - ) - .await?; + if config.data_proposal_durability.gcs_enabled() + && !config.da_read_from.starts_with("folder:") + && !config.da_read_from.starts_with("blob:") + { + handler + .build_module::(StoredDaIndexerClientCtx { + data_directory: config.data_directory.clone(), + da_read_from: config.da_read_from.clone(), + da_fallback_addresses: config.da_fallback_addresses.clone(), + timeout_client_secs: config.da_timeout_client_secs, + gcs_conf: config.data_proposal_durability.clone(), + }) + .await?; + } else { + LocalDaReplayer::build_bus_only_source( + &mut handler, + DAListenerConf { + data_directory: config.data_directory.clone(), + da_read_from: config.da_read_from.clone(), + start_block: None, + timeout_client_secs: config.da_timeout_client_secs, + da_fallback_addresses: config.da_fallback_addresses.clone(), + processor_config: (), + }, + ) + .await?; + } } if config.websocket.enabled { diff --git a/src/indexer_da_client.rs b/src/indexer_da_client.rs new file mode 100644 index 000000000..95b2ab0dc --- /dev/null +++ b/src/indexer_da_client.rs @@ -0,0 +1,356 @@ +use std::{ + collections::BTreeMap, + path::PathBuf, + time::{Duration, Instant}, +}; + +use anyhow::{anyhow, Context, Result}; +use google_cloud_storage::client::Storage as GcsStorageClient; +use hyli_bus::modules::ModulePersistOutput; +use hyli_model::{ + BlockHeight, DataAvailabilityEvent, DataAvailabilityRequest, DataProposal, Hashed, LaneId, + SignedBlock, StoredSignedBlock, +}; +use hyli_modules::{ + bus::{BusClientSender, SharedMessageBus}, + module_bus_client, module_handle_messages, + modules::Module, + node_state::{ + module::{load_current_chain_timestamp, persist_current_chain_timestamp, NodeStateModule}, + NodeStateStore, + }, + utils::da_codec::DataAvailabilityClient, +}; +use tracing::{debug, info, warn}; + +use crate::{ + model::{DataEvent, MempoolStatusEvent}, + shared_storage::gcs::timestamp_to_folder_name, + utils::conf::DataProposalDurabilityConf, +}; + +pub struct StoredDaIndexerClientCtx { + pub data_directory: PathBuf, + pub da_read_from: String, + pub da_fallback_addresses: Vec, + pub timeout_client_secs: u64, + pub gcs_conf: DataProposalDurabilityConf, +} + +module_bus_client! { +#[derive(Debug)] +struct StoredDaIndexerClientBus { + sender(DataEvent), + sender(MempoolStatusEvent), +} +} + +pub struct StoredDaIndexerClient { + bus: StoredDaIndexerClientBus, + config: StoredDaIndexerClientCtx, + da_client: Option, + gcs_client: GcsStorageClient, + current_block: BlockHeight, + current_chain_timestamp: Option, + block_buffer: BTreeMap, + deadline: Instant, +} + +impl Module for StoredDaIndexerClient { + type Context = StoredDaIndexerClientCtx; + + async fn build(bus: SharedMessageBus, ctx: Self::Context) -> Result { + let start_block_in_file = match NodeStateModule::load_from_disk::( + &ctx.data_directory, + "da_start_height.bin".as_ref(), + )? { + Some(height) => height, + None => { + warn!("Starting StoredDaIndexerClient from default block height."); + BlockHeight(0) + } + }; + + let bus = StoredDaIndexerClientBus::new_from_bus(bus.new_handle()).await; + let gcs_client = GcsStorageClient::builder().build().await?; + let current_chain_timestamp = load_current_chain_timestamp(&ctx.data_directory).ok(); + + Ok(Self { + bus, + config: ctx, + da_client: None, + gcs_client, + current_block: start_block_in_file, + current_chain_timestamp, + block_buffer: BTreeMap::new(), + deadline: Instant::now(), + }) + } + + async fn run(&mut self) -> Result<()> { + self.start().await + } + + async fn persist(&mut self) -> Result { + let file = PathBuf::from("da_start_height.bin"); + let checksum = + NodeStateModule::save_on_disk(&self.config.data_directory, &file, &self.current_block)?; + + let mut persisted = vec![(self.config.data_directory.join(file), checksum)]; + if let Some(timestamp_folder) = self.current_chain_timestamp.clone() { + let mut store = NodeStateStore::default(); + store.current_chain_timestamp = Some(timestamp_folder); + if let Some(timestamp_file) = + persist_current_chain_timestamp(&self.config.data_directory, &store)? + { + persisted.push(timestamp_file); + } + } + Ok(persisted) + } +} + +impl StoredDaIndexerClient { + async fn start(&mut self) -> Result<()> { + self.connect().await?; + + info!( + "Starting DA client for stored signed blocks at block {}", + self.current_block + ); + + module_handle_messages! { + on_self self, + recv = self.recv_next() => { + match recv? { + Some(DataAvailabilityEvent::StoredSignedBlock(block)) => { + self.handle_stored_signed_block(block).await?; + } + Some(DataAvailabilityEvent::MempoolStatusEvent(status)) => { + self.bus.send_waiting_if_full(status).await?; + } + Some(DataAvailabilityEvent::BlockNotFound(height)) => { + return Err(anyhow!("Stored block {height} was not found by DA server")); + } + Some(DataAvailabilityEvent::SignedBlock(_)) => { + debug!("Ignoring signed block on stored block client"); + } + None => { + warn!("Stored DA stream closed. Reconnecting after sleeping 1s..."); + tokio::time::sleep(Duration::from_secs(1)).await; + self.connect().await?; + } + } + } + }; + Ok(()) + } + + async fn connect(&mut self) -> Result<()> { + let mut addresses = vec![self.config.da_read_from.clone()]; + addresses.extend(self.config.da_fallback_addresses.clone()); + + for address in addresses { + match DataAvailabilityClient::connect_with_opts( + "stored_da_indexer", + Some(1024 * 1024 * 1024), + address.clone(), + ) + .await + { + Ok(mut client) => { + client + .send(DataAvailabilityRequest::StreamStoredFromHeight( + self.current_block, + )) + .await?; + self.deadline = + Instant::now() + Duration::from_secs(self.config.timeout_client_secs); + self.da_client = Some(client); + return Ok(()); + } + Err(err) => { + warn!("Failed to connect to DA server {}: {}", address, err); + } + } + } + + Err(anyhow!("Failed to connect to any DA server")) + } + + async fn recv_next(&mut self) -> Result> { + let Some(client) = self.da_client.as_mut() else { + return Ok(None); + }; + + match tokio::time::timeout( + self.deadline.saturating_duration_since(Instant::now()), + client.recv(), + ) + .await + { + Ok(event) => { + self.deadline = + Instant::now() + Duration::from_secs(self.config.timeout_client_secs); + Ok(event) + } + Err(_) => { + warn!("Stored DA client timed out. Reconnecting."); + self.connect().await?; + Ok(None) + } + } + } + + async fn handle_stored_signed_block(&mut self, block: StoredSignedBlock) -> Result<()> { + let block_height = block.height(); + + match block_height.cmp(&self.current_block) { + std::cmp::Ordering::Less => { + warn!("Ignoring past stored block {}", block_height); + } + std::cmp::Ordering::Equal => { + self.emit_reconstructed_block(block).await?; + self.current_block = block_height + 1; + self.process_buffered_blocks().await?; + } + std::cmp::Ordering::Greater => { + for missing_height in self.current_block.0..block_height.0 { + self.request_specific_block(BlockHeight(missing_height)) + .await?; + } + self.block_buffer.insert(block_height, block); + } + } + + Ok(()) + } + + async fn process_buffered_blocks(&mut self) -> Result<()> { + while let Some(block) = self.block_buffer.remove(&self.current_block) { + let height = block.height(); + self.emit_reconstructed_block(block).await?; + self.current_block = height + 1; + } + Ok(()) + } + + async fn request_specific_block(&mut self, height: BlockHeight) -> Result<()> { + let Some(client) = self.da_client.as_mut() else { + return Err(anyhow!("DA client not initialized")); + }; + client + .send(DataAvailabilityRequest::StoredBlockRequest(height)) + .await?; + Ok(()) + } + + async fn emit_reconstructed_block(&mut self, block: StoredSignedBlock) -> Result<()> { + let signed_block = self.reconstruct_signed_block(block).await?; + self.bus + .send_waiting_if_full(DataEvent::OrderedSignedBlock(signed_block)) + .await + } + + async fn reconstruct_signed_block(&mut self, block: StoredSignedBlock) -> Result { + let current_chain_timestamp = self + .resolve_current_chain_timestamp(&block) + .await? + .context("Current chain timestamp is required to fetch data proposals from GCS")?; + + let mut data_proposals = Vec::with_capacity(block.data_proposals.len()); + for (lane_id, hashes) in &block.data_proposals { + let mut proposals = Vec::with_capacity(hashes.len()); + for dp_hash in hashes { + proposals.push( + self.fetch_data_proposal(¤t_chain_timestamp, lane_id, dp_hash) + .await?, + ); + } + data_proposals.push((lane_id.clone(), proposals)); + } + + Ok(SignedBlock { + data_proposals, + consensus_proposal: block.consensus_proposal, + certificate: block.certificate, + }) + } + + async fn resolve_current_chain_timestamp( + &mut self, + block: &StoredSignedBlock, + ) -> Result> { + if let Some(current_chain_timestamp) = &self.current_chain_timestamp { + return Ok(Some(current_chain_timestamp.clone())); + } + + if block.height() != BlockHeight(0) { + return Ok(None); + } + + let current_chain_timestamp = + timestamp_to_folder_name(block.consensus_proposal.timestamp.0)?; + self.current_chain_timestamp = Some(current_chain_timestamp.clone()); + + let mut store = NodeStateStore::default(); + store.current_chain_timestamp = Some(current_chain_timestamp.clone()); + let _ = persist_current_chain_timestamp(&self.config.data_directory, &store)?; + + Ok(Some(current_chain_timestamp)) + } + + async fn fetch_data_proposal( + &self, + current_chain_timestamp: &str, + lane_id: &LaneId, + dp_hash: &hyli_model::DataProposalHash, + ) -> Result { + let mut reader = self + .gcs_client + .read_object( + bucket_path(&self.config.gcs_conf.gcs_bucket), + data_proposal_object_name( + &self.config.gcs_conf.gcs_prefix, + current_chain_timestamp, + lane_id, + dp_hash, + ), + ) + .send() + .await?; + let mut bytes = Vec::new(); + while let Some(chunk) = reader.next().await.transpose()? { + bytes.extend_from_slice(&chunk); + } + let proposal = borsh::from_slice::(&bytes) + .context("Deserializing data proposal from GCS")?; + if proposal.hashed() != *dp_hash { + return Err(anyhow!( + "Fetched data proposal hash mismatch for lane {}", + lane_id + )); + } + Ok(proposal) + } +} + +fn bucket_path(bucket: &str) -> String { + if bucket.starts_with("projects/") { + bucket.to_string() + } else { + format!("projects/_/buckets/{bucket}") + } +} + +fn data_proposal_object_name( + gcs_prefix: &str, + current_chain_timestamp: &str, + lane_id: &LaneId, + dp_hash: &hyli_model::DataProposalHash, +) -> String { + format!( + "{}/{}/data_proposals/{}/{}.bin", + gcs_prefix, current_chain_timestamp, lane_id, dp_hash + ) +} diff --git a/src/lib.rs b/src/lib.rs index 92dfbf79f..ecfcfbcbf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -24,6 +24,7 @@ pub mod entrypoint; pub mod explorer; pub mod genesis; pub mod indexer; +pub mod indexer_da_client; pub mod mempool; pub mod p2p; pub mod rest; From 8cded793bfb8fdc75c5f41344a150ee1919337ab Mon Sep 17 00:00:00 2001 From: Alexandre Careil Date: Wed, 1 Apr 2026 15:57:16 +0200 Subject: [PATCH 17/22] Persist genesis data proposals before DA streaming --- src/data_availability.rs | 1 + src/data_availability/block_storage.rs | 7 +-- src/genesis.rs | 65 ++++++++++++++++++++++++++ src/indexer_da_client.rs | 52 +++++++++++++++------ 4 files changed, 108 insertions(+), 17 deletions(-) diff --git a/src/data_availability.rs b/src/data_availability.rs index 00a0044b9..c7f3e97be 100644 --- a/src/data_availability.rs +++ b/src/data_availability.rs @@ -1996,6 +1996,7 @@ pub mod tests { assert_eq!(height.0, 100, "Should be BlockNotFound for block 100"); received_block_not_found = true; } + DataAvailabilityEvent::StoredSignedBlock(_) => {} DataAvailabilityEvent::MempoolStatusEvent(_) => {} } diff --git a/src/data_availability/block_storage.rs b/src/data_availability/block_storage.rs index 85981c152..93bf1d15e 100644 --- a/src/data_availability/block_storage.rs +++ b/src/data_availability/block_storage.rs @@ -983,8 +983,8 @@ mod tests { ); } - #[test] - fn blocks_default_to_fjall_without_bucket() -> Result<()> { + #[test_log::test(tokio::test)] + async fn blocks_default_to_fjall_without_bucket() -> Result<()> { let tmpdir = tempfile::tempdir()?; let blocks = Blocks::new_with_durability( tmpdir.path(), @@ -993,7 +993,8 @@ mod tests { gcs_prefix: "camelot".to_string(), save_data_proposals: true, }, - )?; + ) + .await?; assert!(matches!(blocks.backend, BlocksBackend::Fjall(_))); Ok(()) diff --git a/src/genesis.rs b/src/genesis.rs index 9992209b3..aefcaed5f 100644 --- a/src/genesis.rs +++ b/src/genesis.rs @@ -1,8 +1,12 @@ use std::{ collections::{BTreeMap, HashMap}, path::PathBuf, + sync::Arc, }; +use crate::shared_storage::{ + durability_backend_for_conf, gcs::timestamp_to_folder_name, DataProposalDurability, +}; use crate::{model::*, p2p::network::PeerEvent, utils::conf::SharedConf}; use anyhow::{Error, Result}; use client_sdk::{ @@ -57,6 +61,7 @@ pub struct Genesis { bus: GenesisBusClient, peer_pubkey: PeerPublicKeyMap, crypto: SharedBlstCrypto, + durability: Arc, already_handled_genesis: bool, peer_timestamps: Vec, start_timestamp: TimestampMs, @@ -75,11 +80,21 @@ impl Module for Genesis { } }; let bus = GenesisBusClient::new_from_bus(bus.new_handle()).await; + let durability_backend = durability_backend_for_conf( + &ctx.config.data_directory, + &ctx.config.data_proposal_durability, + false, + ) + .await?; Ok(Genesis { config: ctx.config.clone(), bus, peer_pubkey: BTreeMap::new(), crypto: ctx.crypto.clone(), + durability: Arc::new(DataProposalDurability::new( + durability_backend, + &ctx.config.data_directory, + )?), already_handled_genesis, peer_timestamps: Vec::new(), start_timestamp: ctx.start_timestamp, @@ -214,12 +229,58 @@ impl Genesis { let signed_block = self.make_genesis_block(genesis_txs, initial_validators, genesis_timestamp); + self.persist_genesis_data_proposals(&signed_block).await?; + // At this point, we can setup the genesis block. _ = self.bus.send(GenesisEvent::GenesisBlock(signed_block)); Ok(()) } + async fn persist_genesis_data_proposals(&self, signed_block: &SignedBlock) -> Result<()> { + if !self.config.data_proposal_durability.enabled() { + return Ok(()); + } + + let current_chain_timestamp = + timestamp_to_folder_name(signed_block.consensus_proposal.timestamp.0)?; + self.durability + .set_current_chain_timestamp(current_chain_timestamp.clone()); + + let total_dps: usize = signed_block + .data_proposals + .iter() + .map(|(_, data_proposals)| data_proposals.len()) + .sum(); + info!( + "🌱 Persisting {total_dps} genesis data proposals for chain timestamp {current_chain_timestamp}" + ); + + for (lane_id, data_proposals) in &signed_block.data_proposals { + info!( + "🌱 Persisting {} genesis data proposals on lane {}", + data_proposals.len(), + lane_id + ); + for data_proposal in data_proposals { + let mut canonical = data_proposal.clone(); + canonical.remove_proofs(); + let dp_hash = canonical.hashed(); + self.durability + .prime_persistence(lane_id.clone(), data_proposal)?; + self.durability + .wait_until_persisted(lane_id, &dp_hash) + .await?; + debug!( + "🌱 Persisted genesis data proposal {} on lane {}", + dp_hash, lane_id + ); + } + } + + Ok(()) + } + pub async fn generate_genesis_txs( &self, peer_pubkey: &PeerPublicKeyMap, @@ -745,6 +806,7 @@ mod tests { use super::*; use crate::bus::{BusClientReceiver, SharedMessageBus}; + use crate::shared_storage::{DataProposalDurability, NullDurabilityBackend}; use crate::utils::conf::Conf; use hyli_crypto::BlstCrypto; use std::sync::Arc; @@ -767,6 +829,9 @@ mod tests { bus, peer_pubkey: BTreeMap::new(), crypto, + durability: Arc::new(DataProposalDurability::new_in_memory(Arc::new( + NullDurabilityBackend, + ))), already_handled_genesis: false, peer_timestamps: Vec::new(), start_timestamp: TimestampMs(1000000), diff --git a/src/indexer_da_client.rs b/src/indexer_da_client.rs index 95b2ab0dc..e947f6c9e 100644 --- a/src/indexer_da_client.rs +++ b/src/indexer_da_client.rs @@ -246,7 +246,11 @@ impl StoredDaIndexerClient { } async fn emit_reconstructed_block(&mut self, block: StoredSignedBlock) -> Result<()> { - let signed_block = self.reconstruct_signed_block(block).await?; + let block_height = block.height(); + let signed_block = self + .reconstruct_signed_block(block) + .await + .with_context(|| format!("Reconstructing stored signed block {block_height}"))?; self.bus .send_waiting_if_full(DataEvent::OrderedSignedBlock(signed_block)) .await @@ -263,8 +267,13 @@ impl StoredDaIndexerClient { let mut proposals = Vec::with_capacity(hashes.len()); for dp_hash in hashes { proposals.push( - self.fetch_data_proposal(¤t_chain_timestamp, lane_id, dp_hash) - .await?, + self.fetch_data_proposal( + block.height(), + ¤t_chain_timestamp, + lane_id, + dp_hash, + ) + .await?, ); } data_proposals.push((lane_id.clone(), proposals)); @@ -302,33 +311,48 @@ impl StoredDaIndexerClient { async fn fetch_data_proposal( &self, + block_height: BlockHeight, current_chain_timestamp: &str, lane_id: &LaneId, dp_hash: &hyli_model::DataProposalHash, ) -> Result { + let object_name = data_proposal_object_name( + &self.config.gcs_conf.gcs_prefix, + current_chain_timestamp, + lane_id, + dp_hash, + ); let mut reader = self .gcs_client .read_object( bucket_path(&self.config.gcs_conf.gcs_bucket), - data_proposal_object_name( - &self.config.gcs_conf.gcs_prefix, - current_chain_timestamp, - lane_id, - dp_hash, - ), + object_name.clone(), ) .send() - .await?; + .await + .with_context(|| { + format!( + "Fetching data proposal for block {block_height} from GCS object {}/{} (lane {}, hash {})", + self.config.gcs_conf.gcs_bucket, object_name, lane_id, dp_hash + ) + })?; let mut bytes = Vec::new(); while let Some(chunk) = reader.next().await.transpose()? { bytes.extend_from_slice(&chunk); } - let proposal = borsh::from_slice::(&bytes) - .context("Deserializing data proposal from GCS")?; + let proposal = borsh::from_slice::(&bytes).with_context(|| { + format!( + "Deserializing data proposal for block {block_height} from GCS object {}/{}", + self.config.gcs_conf.gcs_bucket, object_name + ) + })?; if proposal.hashed() != *dp_hash { return Err(anyhow!( - "Fetched data proposal hash mismatch for lane {}", - lane_id + "Fetched data proposal hash mismatch for block {} on lane {} from {}/{}", + block_height, + lane_id, + self.config.gcs_conf.gcs_bucket, + object_name )); } Ok(proposal) From a2ed59c5b6ff9939836a50088570fb1943907293 Mon Sep 17 00:00:00 2001 From: Alexandre Careil Date: Wed, 1 Apr 2026 16:26:48 +0200 Subject: [PATCH 18/22] Buffer DPs until chain timestamp is known --- src/mempool.rs | 6 ++ src/mempool/module.rs | 1 + src/mempool/own_lane.rs | 6 ++ src/mempool/verify_tx.rs | 120 ++++++++++++++++++++++++++++++- src/shared_storage/durability.rs | 7 ++ 5 files changed, 137 insertions(+), 3 deletions(-) diff --git a/src/mempool.rs b/src/mempool.rs index 66ccd1b38..937a18949 100644 --- a/src/mempool.rs +++ b/src/mempool.rs @@ -102,6 +102,8 @@ pub struct MempoolStore { // Skipped to clear on reset #[borsh(skip)] buffered_entries: BTreeMap>, + #[borsh(skip)] + pending_chain_timestamp_entries: BTreeMap>, // Skipped to clear on reset #[borsh(skip)] buffered_votes: BTreeMap>>, @@ -229,6 +231,10 @@ pub enum ProcessedDPEvent { } impl Mempool { + pub(super) fn requires_current_chain_timestamp_for_dp_durability(&self) -> bool { + self.conf.data_proposal_durability.gcs_enabled() + } + pub(super) fn restore_inflight_work(&mut self) { let txs_to_restore = self .inner diff --git a/src/mempool/module.rs b/src/mempool/module.rs index ade4dd064..10e2d93dd 100644 --- a/src/mempool/module.rs +++ b/src/mempool/module.rs @@ -107,6 +107,7 @@ impl Module for Mempool { timestamp_to_folder_name(block.signed_block.consensus_proposal.timestamp.0)?; self.durability .set_current_chain_timestamp(current_chain_timestamp); + self.flush_pending_chain_timestamp_entries().await?; } // In this p2p mode we don't receive consensus events so we must update manually. if self.conf.p2p.mode == P2pMode::LaneManager { diff --git a/src/mempool/own_lane.rs b/src/mempool/own_lane.rs index 38c71a318..c826daea2 100644 --- a/src/mempool/own_lane.rs +++ b/src/mempool/own_lane.rs @@ -176,6 +176,12 @@ impl super::Mempool { trace!("Skipping own-lane DP creation until first CCP is received"); return Ok(false); } + if self.requires_current_chain_timestamp_for_dp_durability() + && !self.durability.has_current_chain_timestamp() + { + trace!("Skipping own-lane DP creation until current_chain_timestamp is available"); + return Ok(false); + } trace!("🐣 Prepare new owned data proposal"); let mut started = false; let lane_ids: Vec = self.waiting_dissemination_txs.keys().cloned().collect(); diff --git a/src/mempool/verify_tx.rs b/src/mempool/verify_tx.rs index 8f2aaa735..452f6f0f5 100644 --- a/src/mempool/verify_tx.rs +++ b/src/mempool/verify_tx.rs @@ -19,6 +19,7 @@ pub enum DataProposalVerdict { Process, Empty, Wait, + WaitForChainTimestamp, Vote, VotePendingPersistence, Refuse, @@ -69,6 +70,13 @@ impl super::Mempool { ); return Ok(()); } + Some(DataProposalVerdict::WaitForChainTimestamp) => { + debug!( + "Still waiting on current_chain_timestamp for DataProposal {:?} on lane {}", + received_hash, lane_id + ); + return Ok(()); + } Some(DataProposalVerdict::Vote) => { // Resend our vote // First fetch the lane size, if we somehow don't have it ignore. @@ -147,6 +155,26 @@ impl super::Mempool { self.metrics.add_hashed_dp(lane_id); let data_proposal_hash = data_proposal.hashed(); + if self.requires_current_chain_timestamp_for_dp_durability() + && !self.durability.has_current_chain_timestamp() + { + self.cached_dp_votes.insert( + (lane_id.clone(), data_proposal_hash.clone()), + DataProposalVerdict::WaitForChainTimestamp, + ); + debug!( + "Buffering DataProposal {} on lane {} until current_chain_timestamp is available", + data_proposal_hash, lane_id + ); + self.pending_chain_timestamp_entries + .entry(lane_id.clone()) + .or_default() + .entry(data_proposal_hash) + .and_modify(|(votes, _)| votes.push(vote.clone())) + .or_insert((vec![vote], data_proposal)); + return Ok(()); + } + // Overlap durable persistence with proposal processing once we know the canonical hash. self.durability .prime_persistence(lane_id.clone(), &data_proposal)?; @@ -162,6 +190,11 @@ impl super::Mempool { lane_id ); } + DataProposalVerdict::WaitForChainTimestamp => { + unreachable!( + "DataProposal should already have been buffered until current_chain_timestamp exists" + ); + } DataProposalVerdict::Vote => { let vote_ready = Self::wait_for_persistence_before_vote( &self.durability, @@ -276,6 +309,9 @@ impl super::Mempool { DataProposalVerdict::Wait => { unreachable!("DataProposal has already been processed"); } + DataProposalVerdict::WaitForChainTimestamp => { + unreachable!("DataProposal should be flushed once current_chain_timestamp exists"); + } DataProposalVerdict::Vote => { trace!("Send vote for DataProposal"); let crypto = self.crypto.clone(); @@ -592,6 +628,32 @@ impl super::Mempool { self.send_vote(lane_id, received_hash.clone(), lane_size)?; Ok(true) } + + pub(super) async fn flush_pending_chain_timestamp_entries(&mut self) -> Result<()> { + if self.pending_chain_timestamp_entries.is_empty() { + return Ok(()); + } + + let pending = std::mem::take(&mut self.pending_chain_timestamp_entries); + for (lane_id, entries) in pending { + for (data_proposal_hash, (mut votes, data_proposal)) in entries { + let Some(vote) = votes.pop() else { + warn!( + "No lane operator vote stored for current_chain_timestamp-buffered proposal {:?}", + data_proposal_hash + ); + continue; + }; + debug!( + "Flushing DataProposal {} on lane {} after current_chain_timestamp became available", + data_proposal_hash, lane_id + ); + self.on_hashed_data_proposal(&lane_id, data_proposal, vote) + .await?; + } + } + Ok(()) + } } #[cfg(test)] @@ -606,9 +668,9 @@ pub mod test { }; use hyli_crypto::BlstCrypto; use hyli_model::{ - BlobIndex, BlobProofOutput, ContractName, DataProposalHash, HyliOutput, ProgramId, - ProofData, SignedByValidator, Transaction, TransactionData, VerifiedProofTransaction, - Verifier, + BlobIndex, BlobProofOutput, ContractName, DataProposalHash, HyliOutput, LaneBytesSize, + ProgramId, ProofData, SignedByValidator, Transaction, TransactionData, + VerifiedProofTransaction, Verifier, }; #[test_log::test(tokio::test)] @@ -763,4 +825,56 @@ pub mod test { Ok(()) } + + #[test_log::test(tokio::test)] + async fn test_buffers_data_proposal_until_current_chain_timestamp_is_available() -> Result<()> { + let mut ctx = MempoolTestCtx::new("mempool").await; + let mut conf = crate::utils::conf::Conf::default(); + conf.data_proposal_durability.gcs_bucket = "test-bucket".into(); + conf.data_proposal_durability.save_data_proposals = true; + ctx.mempool.conf = std::sync::Arc::new(conf); + + let lane_id = LaneId::new(ctx.mempool.crypto.validator_pubkey().clone()); + let data_proposal = DataProposal::new_root(lane_id.clone(), vec![Transaction::default()]); + let data_proposal_hash = data_proposal.hashed(); + let vote = ctx + .mempool + .crypto + .sign((data_proposal_hash.clone(), LaneBytesSize(0)))?; + + ctx.mempool + .on_hashed_data_proposal(&lane_id, data_proposal.clone(), vote) + .await?; + + assert_eq!( + ctx.mempool + .cached_dp_votes + .get(&(lane_id.clone(), data_proposal_hash.clone())), + Some(&DataProposalVerdict::WaitForChainTimestamp) + ); + assert!(ctx + .mempool + .pending_chain_timestamp_entries + .get(&lane_id) + .is_some_and(|entries| entries.contains_key(&data_proposal_hash))); + + ctx.mempool + .durability + .set_current_chain_timestamp("2026-04-01T14-13-55Z".into()); + ctx.mempool.flush_pending_chain_timestamp_entries().await?; + + assert_eq!( + ctx.mempool + .cached_dp_votes + .get(&(lane_id.clone(), data_proposal_hash.clone())), + Some(&DataProposalVerdict::Process) + ); + assert!(ctx + .mempool + .pending_chain_timestamp_entries + .get(&lane_id) + .is_none_or(|entries| !entries.contains_key(&data_proposal_hash))); + + Ok(()) + } } diff --git a/src/shared_storage/durability.rs b/src/shared_storage/durability.rs index ee4d90b43..26b195c4b 100644 --- a/src/shared_storage/durability.rs +++ b/src/shared_storage/durability.rs @@ -193,6 +193,13 @@ impl DataProposalDurability { *guard = Some(current_chain_timestamp); } + pub fn has_current_chain_timestamp(&self) -> bool { + self.current_chain_timestamp + .read() + .unwrap_or_else(|poisoned| poisoned.into_inner()) + .is_some() + } + pub fn prime_persistence(&self, lane_id: LaneId, data_proposal: &DataProposal) -> Result<()> { let mut canonical = data_proposal.clone(); canonical.remove_proofs(); From 2707fb2dbda953ed6161579ecb42e6cd03270529 Mon Sep 17 00:00:00 2001 From: Alexandre Careil Date: Thu, 2 Apr 2026 10:54:24 +0200 Subject: [PATCH 19/22] Persist synced DPs before hole fill --- src/mempool/block_construction.rs | 84 +++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/src/mempool/block_construction.rs b/src/mempool/block_construction.rs index 27a9871a9..97f88f22b 100644 --- a/src/mempool/block_construction.rs +++ b/src/mempool/block_construction.rs @@ -109,6 +109,7 @@ impl super::Mempool { .clone(); let next_hole = Self::fill_hole_from_entry( &mut self.lanes, + &self.durability, &mut self.bus, buc, &lane_id, @@ -228,6 +229,7 @@ impl super::Mempool { }; match Self::fill_hole_from_entry( &mut self.lanes, + &self.durability, &mut self.bus, buc, &lane_id, @@ -263,6 +265,7 @@ impl super::Mempool { #[expect(clippy::too_many_arguments, reason = "Split to avoid double borrowing")] fn fill_hole_from_entry( lanes: &mut super::LanesStorage, + durability: &crate::shared_storage::DataProposalDurability, bus: &mut MempoolBusClient, buc: &mut BlockUnderConstruction, lane_id: &LaneId, @@ -300,6 +303,7 @@ impl super::Mempool { signatures, cached_poda: None, }; + durability.prime_persistence(lane_id.clone(), &data_proposal)?; lanes.put_no_verification(lane_id.clone(), (metadata, data_proposal))?; debug!( "Filled hole {} for lane {} in BUC (slot: {})", @@ -758,16 +762,39 @@ impl super::Mempool { #[cfg(test)] pub mod test { + use std::sync::{Arc, Mutex}; + + use anyhow::Result; use hyli_crypto::BlstCrypto; use staking::state::Staking; use utils::TimestampMs; use crate::mempool::MempoolNetMessage; + use crate::shared_storage::{DataProposalDurability, DurabilityBackend}; use crate::tests::autobahn_testing_macros::assert_chanmsg_matches; use super::super::tests::*; use super::*; + #[derive(Default)] + struct RecordingDurabilityBackend { + uploads: Mutex>, + } + + impl DurabilityBackend for RecordingDurabilityBackend { + fn upload_data_proposal( + &self, + lane_id: LaneId, + dp_hash: DataProposalHash, + _payload: Vec, + _current_chain_timestamp: Option, + ) -> std::pin::Pin> + Send + 'static>> + { + self.uploads.lock().unwrap().push((lane_id, dp_hash)); + Box::pin(async { Ok(()) }) + } + } + #[test_log::test(tokio::test)] async fn signed_block_basic() -> Result<()> { let mut ctx = MempoolTestCtx::new("mempool").await; @@ -807,6 +834,63 @@ pub mod test { Ok(()) } + #[test_log::test(tokio::test)] + async fn sync_reply_hole_fill_primes_durability() -> Result<()> { + let mut ctx = MempoolTestCtx::new("mempool").await; + let backend = Arc::new(RecordingDurabilityBackend::default()); + ctx.mempool.durability = + DataProposalDurability::new_in_memory(backend.clone() as Arc); + + let peer_crypto = BlstCrypto::new("peer-sync-durability").unwrap(); + let lane_id = LaneId::new(peer_crypto.validator_pubkey().clone()); + let data_proposal = DataProposal::new_root(lane_id.clone(), vec![Transaction::default()]); + let dp_hash = data_proposal.hashed(); + let lane_size = LaneBytesSize(data_proposal.estimate_size() as u64); + let signatures = vec![peer_crypto.sign((dp_hash.clone(), lane_size))?]; + + ctx.mempool + .inner + .blocks_under_contruction + .push_back(BlockUnderConstruction { + from: None, + ccp: CommittedConsensusProposal { + consensus_proposal: ConsensusProposal { + slot: 1, + cut: vec![( + lane_id.clone(), + dp_hash.clone(), + lane_size, + AggregateSignature::default(), + )], + staking_actions: vec![], + timestamp: TimestampMs(0), + parent_hash: b"test".into(), + }, + staking: Staking::default(), + certificate: AggregateSignature::default(), + }, + holes_tops: HashMap::from([(lane_id.clone(), (dp_hash.clone(), lane_size))]), + holes_materialized: false, + }); + + ctx.mempool + .on_hashed_sync_reply( + lane_id.clone(), + signatures, + data_proposal.clone(), + dp_hash.clone(), + ) + .await?; + + assert!(ctx.mempool.lanes.contains(&lane_id, &dp_hash)); + assert_eq!( + backend.uploads.lock().unwrap().as_slice(), + &[(lane_id, dp_hash)] + ); + + Ok(()) + } + #[test_log::test(tokio::test)] async fn proofs_deleted_after_commit() -> Result<()> { use crate::model::{ From 94ddfb085eb951b1347a69902a80332b52914cbd Mon Sep 17 00:00:00 2001 From: Alexandre Careil Date: Thu, 2 Apr 2026 14:54:11 +0200 Subject: [PATCH 20/22] Gate mempool storage on DP durability --- src/mempool/verify_tx.rs | 91 +++++++++++++++++++++++++++++++++------- 1 file changed, 77 insertions(+), 14 deletions(-) diff --git a/src/mempool/verify_tx.rs b/src/mempool/verify_tx.rs index 452f6f0f5..13d9948e4 100644 --- a/src/mempool/verify_tx.rs +++ b/src/mempool/verify_tx.rs @@ -313,6 +313,19 @@ impl super::Mempool { unreachable!("DataProposal should be flushed once current_chain_timestamp exists"); } DataProposalVerdict::Vote => { + if !vote_ready { + debug!( + "Delaying storage of DataProposal {:?} on lane {} until durable persistence completes", + data_proposal.hashed(), + lane_id + ); + self.cached_dp_votes.insert( + (lane_id.clone(), data_proposal.hashed()), + DataProposalVerdict::VotePendingPersistence, + ); + return Ok(()); + } + trace!("Send vote for DataProposal"); let crypto = self.crypto.clone(); let (hash, size) = @@ -323,16 +336,9 @@ impl super::Mempool { data_proposal_hash: hash.clone(), cumul_size: size, })?; - if vote_ready { - self.cached_dp_votes - .insert((lane_id.clone(), hash.clone()), DataProposalVerdict::Vote); - self.send_vote(&lane_id, hash.clone(), size)?; - } else { - self.cached_dp_votes.insert( - (lane_id.clone(), hash.clone()), - DataProposalVerdict::VotePendingPersistence, - ); - } + self.cached_dp_votes + .insert((lane_id.clone(), hash.clone()), DataProposalVerdict::Vote); + self.send_vote(&lane_id, hash.clone(), size)?; while let Some(vote) = self .inner @@ -611,10 +617,20 @@ impl super::Mempool { return Ok(true); } - let Ok(lane_size) = self.lanes.get_lane_size_at(lane_id, received_hash) else { - self.cached_dp_votes - .remove(&(lane_id.clone(), received_hash.clone())); - return Ok(false); + let lane_size = match self.lanes.get_lane_size_at(lane_id, received_hash) { + Ok(size) => size, + Err(_) => { + let crypto = self.crypto.clone(); + let (hash, size) = + self.lanes + .store_data_proposal(&crypto, lane_id, data_proposal.clone())?; + self.send_dissemination_event(DisseminationEvent::DpStored { + lane_id: lane_id.clone(), + data_proposal_hash: hash.clone(), + cumul_size: size, + })?; + size + } }; debug!( @@ -658,6 +674,8 @@ impl super::Mempool { #[cfg(test)] pub mod test { + use std::{future::Future, pin::Pin, sync::Arc}; + use super::*; use crate::{ mempool::{ @@ -665,6 +683,7 @@ pub mod test { MempoolNetMessage, }, p2p::network::HeaderSigner, + shared_storage::{DataProposalDurability, DurabilityBackend}, }; use hyli_crypto::BlstCrypto; use hyli_model::{ @@ -673,6 +692,20 @@ pub mod test { VerifiedProofTransaction, Verifier, }; + struct FailingDurabilityBackend; + + impl DurabilityBackend for FailingDurabilityBackend { + fn upload_data_proposal( + &self, + _lane_id: LaneId, + _dp_hash: DataProposalHash, + _payload: Vec, + _current_chain_timestamp: Option, + ) -> Pin> + Send + 'static>> { + Box::pin(async { anyhow::bail!("durability failure") }) + } + } + #[test_log::test(tokio::test)] async fn test_get_verdict() { let mut ctx = MempoolTestCtx::new("mempool").await; @@ -877,4 +910,34 @@ pub mod test { Ok(()) } + + #[test_log::test(tokio::test)] + async fn test_vote_dp_is_not_stored_when_durability_fails() -> Result<()> { + let mut ctx = MempoolTestCtx::new("mempool").await; + ctx.mempool.durability = + DataProposalDurability::new_in_memory(Arc::new(FailingDurabilityBackend)); + + let lane_id = LaneId::new(ctx.mempool.crypto.validator_pubkey().clone()); + let data_proposal = DataProposal::new_root(lane_id.clone(), vec![Transaction::default()]); + let data_proposal_hash = data_proposal.hashed(); + let vote = ctx + .mempool + .crypto + .sign((data_proposal_hash.clone(), LaneBytesSize(0)))?; + + ctx.mempool + .on_hashed_data_proposal(&lane_id, data_proposal.clone(), vote) + .await?; + ctx.handle_processed_data_proposals().await; + + assert_eq!( + ctx.mempool + .cached_dp_votes + .get(&(lane_id.clone(), data_proposal_hash.clone())), + Some(&DataProposalVerdict::VotePendingPersistence) + ); + assert!(!ctx.mempool.lanes.contains(&lane_id, &data_proposal_hash)); + + Ok(()) + } } From fa993c54c516e48bb0f2712a98bb69c8f83c3213 Mon Sep 17 00:00:00 2001 From: Alexandre Careil Date: Thu, 2 Apr 2026 17:09:08 +0200 Subject: [PATCH 21/22] Add GCS event indexer client --- Cargo.lock | 2 + Cargo.toml | 3 + src/entrypoint.rs | 18 +- src/indexer_da_client.rs | 485 ++++++++++++++++++++++++++++++++++- src/utils/conf.rs | 2 + src/utils/conf_defaults.toml | 1 + 6 files changed, 495 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 143a4bc36..1bbb5aafe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5398,6 +5398,7 @@ dependencies = [ "async-stream", "axum 0.8.8", "axum-test", + "base64 0.22.1", "borsh", "bytes", "chrono", @@ -5432,6 +5433,7 @@ dependencies = [ "native-tls", "paste", "rand 0.9.2", + "reqwest 0.13.2", "risc0-zkvm", "rustls 0.23.36", "seq-macro", diff --git a/Cargo.toml b/Cargo.toml index c698879b0..085c352ac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -249,6 +249,7 @@ hyli-bus = { workspace = true } anyhow = { workspace = true } +base64 = { workspace = true } borsh = { workspace = true, features = ["rc"] } chrono = { workspace = true, features = ["std", "serde"] } hex = { workspace = true, features = ["std"] } @@ -267,9 +268,11 @@ clap = { workspace = true, features = ["derive"] } config = { workspace = true, default-features = false, features = ["toml"] } futures = { workspace = true } google-cloud-storage = { workspace = true } +google-cloud-auth = { version = "1.5", default-features = false } indexmap = { workspace = true, features = ["serde"] } paste = { workspace = true } rand = { workspace = true } +reqwest = { version = "0.13.2", default-features = false, features = ["json", "rustls"] } sqlx = { workspace = true, features = [ "runtime-tokio", "postgres", diff --git a/src/entrypoint.rs b/src/entrypoint.rs index 19d7a8fd5..bd0d2b094 100644 --- a/src/entrypoint.rs +++ b/src/entrypoint.rs @@ -7,7 +7,10 @@ use crate::{ explorer::Explorer, genesis::Genesis, indexer::Indexer, - indexer_da_client::{StoredDaIndexerClient, StoredDaIndexerClientCtx}, + indexer_da_client::{ + GcsEventIndexerClient, GcsEventIndexerClientCtx, StoredDaIndexerClient, + StoredDaIndexerClientCtx, + }, mempool::{dissemination::DisseminationManager, Mempool}, model::{api::NodeInfo, ContractName, SharedRunContext}, p2p::P2P, @@ -622,7 +625,18 @@ pub async fn common_main( handler.build_module::(ctx.clone()).await?; } else if config.run_indexer { - if config.data_proposal_durability.gcs_enabled() + if !config.gcs_stored_block_subscription.is_empty() + && config.data_proposal_durability.gcs_enabled() + { + handler + .build_module::(GcsEventIndexerClientCtx { + data_directory: config.data_directory.clone(), + timeout_client_secs: config.da_timeout_client_secs, + gcs_conf: config.data_proposal_durability.clone(), + subscription: config.gcs_stored_block_subscription.clone(), + }) + .await?; + } else if config.data_proposal_durability.gcs_enabled() && !config.da_read_from.starts_with("folder:") && !config.da_read_from.starts_with("blob:") { diff --git a/src/indexer_da_client.rs b/src/indexer_da_client.rs index e947f6c9e..3158a453b 100644 --- a/src/indexer_da_client.rs +++ b/src/indexer_da_client.rs @@ -5,6 +5,8 @@ use std::{ }; use anyhow::{anyhow, Context, Result}; +use base64::{engine::general_purpose::STANDARD as BASE64, Engine as _}; +use google_cloud_auth::credentials::{AccessTokenCredentials, Builder}; use google_cloud_storage::client::Storage as GcsStorageClient; use hyli_bus::modules::ModulePersistOutput; use hyli_model::{ @@ -21,6 +23,8 @@ use hyli_modules::{ }, utils::da_codec::DataAvailabilityClient, }; +use reqwest::Client as HttpClient; +use serde::{Deserialize, Serialize}; use tracing::{debug, info, warn}; use crate::{ @@ -37,6 +41,13 @@ pub struct StoredDaIndexerClientCtx { pub gcs_conf: DataProposalDurabilityConf, } +pub struct GcsEventIndexerClientCtx { + pub data_directory: PathBuf, + pub timeout_client_secs: u64, + pub gcs_conf: DataProposalDurabilityConf, + pub subscription: String, +} + module_bus_client! { #[derive(Debug)] struct StoredDaIndexerClientBus { @@ -56,6 +67,58 @@ pub struct StoredDaIndexerClient { deadline: Instant, } +pub struct GcsEventIndexerClient { + bus: StoredDaIndexerClientBus, + config: GcsEventIndexerClientCtx, + gcs_client: GcsStorageClient, + http_client: HttpClient, + access_token_credentials: AccessTokenCredentials, + current_block: BlockHeight, + current_chain_timestamp: Option, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +struct PubsubPullResponse { + #[serde(default)] + received_messages: Vec, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +struct PubsubReceivedMessage { + ack_id: String, + message: PubsubMessage, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +struct PubsubMessage { + #[serde(default)] + data: String, +} + +#[derive(Debug, Serialize)] +#[serde(rename_all = "camelCase")] +struct PubsubPullRequest { + max_messages: u32, +} + +#[derive(Debug, Serialize)] +#[serde(rename_all = "camelCase")] +struct PubsubAcknowledgeRequest { + ack_ids: Vec, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +struct GcsObjectNotification { + #[serde(default)] + event_type: String, + #[serde(default, alias = "name")] + object_id: String, +} + impl Module for StoredDaIndexerClient { type Context = StoredDaIndexerClientCtx; @@ -92,21 +155,56 @@ impl Module for StoredDaIndexerClient { } async fn persist(&mut self) -> Result { - let file = PathBuf::from("da_start_height.bin"); - let checksum = - NodeStateModule::save_on_disk(&self.config.data_directory, &file, &self.current_block)?; - - let mut persisted = vec![(self.config.data_directory.join(file), checksum)]; - if let Some(timestamp_folder) = self.current_chain_timestamp.clone() { - let mut store = NodeStateStore::default(); - store.current_chain_timestamp = Some(timestamp_folder); - if let Some(timestamp_file) = - persist_current_chain_timestamp(&self.config.data_directory, &store)? - { - persisted.push(timestamp_file); + persist_indexer_progress( + &self.config.data_directory, + self.current_block, + self.current_chain_timestamp.clone(), + ) + } +} + +impl Module for GcsEventIndexerClient { + type Context = GcsEventIndexerClientCtx; + + async fn build(bus: SharedMessageBus, ctx: Self::Context) -> Result { + let start_block_in_file = match NodeStateModule::load_from_disk::( + &ctx.data_directory, + "da_start_height.bin".as_ref(), + )? { + Some(height) => height, + None => { + warn!("Starting GcsEventIndexerClient from default block height."); + BlockHeight(0) } - } - Ok(persisted) + }; + + let bus = StoredDaIndexerClientBus::new_from_bus(bus.new_handle()).await; + let gcs_client = GcsStorageClient::builder().build().await?; + let access_token_credentials = Builder::default() + .with_scopes(["https://www.googleapis.com/auth/cloud-platform"]) + .build_access_token_credentials()?; + + Ok(Self { + bus, + current_block: start_block_in_file, + current_chain_timestamp: load_current_chain_timestamp(&ctx.data_directory).ok(), + config: ctx, + gcs_client, + http_client: HttpClient::builder().build()?, + access_token_credentials, + }) + } + + async fn run(&mut self) -> Result<()> { + self.start().await + } + + async fn persist(&mut self) -> Result { + persist_indexer_progress( + &self.config.data_directory, + self.current_block, + self.current_chain_timestamp.clone(), + ) } } @@ -359,6 +457,152 @@ impl StoredDaIndexerClient { } } +impl GcsEventIndexerClient { + async fn start(&mut self) -> Result<()> { + self.process_ready_blocks().await?; + + loop { + let ack_ids = self.pull_notification_batch().await?; + if !ack_ids.is_empty() { + self.process_ready_blocks().await?; + self.acknowledge(ack_ids).await?; + } else { + tokio::time::sleep(Duration::from_secs(1)).await; + } + } + } + + async fn pull_notification_batch(&mut self) -> Result> { + let access_token = self + .access_token_credentials + .access_token() + .await + .context("Fetching Pub/Sub access token")?; + let response = self + .http_client + .post(format!( + "https://pubsub.googleapis.com/v1/{}:pull", + self.config.subscription + )) + .bearer_auth(access_token.token) + .json(&PubsubPullRequest { max_messages: 32 }) + .send() + .await? + .error_for_status()? + .json::() + .await?; + + let mut ack_ids = Vec::with_capacity(response.received_messages.len()); + for received in response.received_messages { + let Some((timestamp, _height)) = parse_notification_object_name( + &self.config.gcs_conf.gcs_prefix, + &received.message.data, + )? + else { + ack_ids.push(received.ack_id); + continue; + }; + + if self.current_chain_timestamp.is_none() { + self.current_chain_timestamp = Some(timestamp.clone()); + persist_current_chain_timestamp_value(&self.config.data_directory, timestamp)?; + } + + ack_ids.push(received.ack_id); + } + + Ok(ack_ids) + } + + async fn acknowledge(&self, ack_ids: Vec) -> Result<()> { + if ack_ids.is_empty() { + return Ok(()); + } + + let access_token = self + .access_token_credentials + .access_token() + .await + .context("Fetching Pub/Sub access token for acknowledge")?; + self.http_client + .post(format!( + "https://pubsub.googleapis.com/v1/{}:acknowledge", + self.config.subscription + )) + .bearer_auth(access_token.token) + .json(&PubsubAcknowledgeRequest { ack_ids }) + .send() + .await? + .error_for_status()?; + Ok(()) + } + + async fn process_ready_blocks(&mut self) -> Result<()> { + loop { + let Some(current_chain_timestamp) = self.current_chain_timestamp.clone() else { + return Ok(()); + }; + + let maybe_block = fetch_stored_signed_block_from_gcs( + &self.gcs_client, + &self.config.gcs_conf, + ¤t_chain_timestamp, + self.current_block, + ) + .await?; + + let Some(block) = maybe_block else { + return Ok(()); + }; + + let signed_block = reconstruct_signed_block_from_gcs( + &self.gcs_client, + &self.config.gcs_conf, + &self.config.data_directory, + &mut self.current_chain_timestamp, + block, + ) + .await + .with_context(|| { + format!("Reconstructing stored signed block {}", self.current_block) + })?; + self.bus + .send_waiting_if_full(DataEvent::OrderedSignedBlock(signed_block)) + .await?; + self.current_block = self.current_block + 1; + } + } +} + +fn persist_indexer_progress( + data_directory: &std::path::Path, + current_block: BlockHeight, + current_chain_timestamp: Option, +) -> Result { + let file = PathBuf::from("da_start_height.bin"); + let checksum = NodeStateModule::save_on_disk(data_directory, &file, ¤t_block)?; + + let mut persisted = vec![(data_directory.join(file), checksum)]; + if let Some(timestamp_folder) = current_chain_timestamp { + let mut store = NodeStateStore::default(); + store.current_chain_timestamp = Some(timestamp_folder); + if let Some(timestamp_file) = persist_current_chain_timestamp(data_directory, &store)? { + persisted.push(timestamp_file); + } + } + Ok(persisted) +} + +fn persist_current_chain_timestamp_value( + data_directory: &std::path::Path, + current_chain_timestamp: String, +) -> Result<()> { + let mut store = NodeStateStore::default(); + store.current_chain_timestamp = Some(current_chain_timestamp); + let _ = persist_current_chain_timestamp(data_directory, &store)?; + Ok(()) +} + fn bucket_path(bucket: &str) -> String { if bucket.starts_with("projects/") { bucket.to_string() @@ -378,3 +622,216 @@ fn data_proposal_object_name( gcs_prefix, current_chain_timestamp, lane_id, dp_hash ) } + +fn stored_signed_block_object_name( + gcs_prefix: &str, + current_chain_timestamp: &str, + height: BlockHeight, +) -> String { + format!( + "{}/{}/stored_signed_blocks/block_{}.bin", + gcs_prefix, current_chain_timestamp, height.0 + ) +} + +async fn fetch_stored_signed_block_from_gcs( + gcs_client: &GcsStorageClient, + gcs_conf: &DataProposalDurabilityConf, + current_chain_timestamp: &str, + height: BlockHeight, +) -> Result> { + let object_name = + stored_signed_block_object_name(&gcs_conf.gcs_prefix, current_chain_timestamp, height); + let mut reader = match gcs_client + .read_object(bucket_path(&gcs_conf.gcs_bucket), object_name.clone()) + .send() + .await + { + Ok(reader) => reader, + Err(err) + if err.to_string().contains("404") || err.to_string().contains("No such object") => + { + return Ok(None); + } + Err(err) => { + return Err(err).with_context(|| { + format!( + "Fetching stored signed block {height} from GCS object {}/{}", + gcs_conf.gcs_bucket, object_name + ) + }); + } + }; + + let mut bytes = Vec::new(); + while let Some(chunk) = reader.next().await.transpose()? { + bytes.extend_from_slice(&chunk); + } + let block = borsh::from_slice::(&bytes).with_context(|| { + format!( + "Deserializing stored signed block {height} from GCS object {}/{}", + gcs_conf.gcs_bucket, object_name + ) + })?; + Ok(Some(block)) +} + +async fn reconstruct_signed_block_from_gcs( + gcs_client: &GcsStorageClient, + gcs_conf: &DataProposalDurabilityConf, + data_directory: &std::path::Path, + current_chain_timestamp: &mut Option, + block: StoredSignedBlock, +) -> Result { + let current_chain_timestamp = + resolve_current_chain_timestamp(data_directory, current_chain_timestamp, &block)? + .context("Current chain timestamp is required to fetch data proposals from GCS")?; + + let mut data_proposals = Vec::with_capacity(block.data_proposals.len()); + for (lane_id, hashes) in &block.data_proposals { + let mut proposals = Vec::with_capacity(hashes.len()); + for dp_hash in hashes { + proposals.push( + fetch_data_proposal_from_gcs( + gcs_client, + gcs_conf, + block.height(), + ¤t_chain_timestamp, + lane_id, + dp_hash, + ) + .await?, + ); + } + data_proposals.push((lane_id.clone(), proposals)); + } + + Ok(SignedBlock { + data_proposals, + consensus_proposal: block.consensus_proposal, + certificate: block.certificate, + }) +} + +fn resolve_current_chain_timestamp( + data_directory: &std::path::Path, + current_chain_timestamp: &mut Option, + block: &StoredSignedBlock, +) -> Result> { + if let Some(current_chain_timestamp) = current_chain_timestamp { + return Ok(Some(current_chain_timestamp.clone())); + } + + if block.height() != BlockHeight(0) { + return Ok(None); + } + + let resolved = timestamp_to_folder_name(block.consensus_proposal.timestamp.0)?; + *current_chain_timestamp = Some(resolved.clone()); + persist_current_chain_timestamp_value(data_directory, resolved.clone())?; + Ok(Some(resolved)) +} + +async fn fetch_data_proposal_from_gcs( + gcs_client: &GcsStorageClient, + gcs_conf: &DataProposalDurabilityConf, + block_height: BlockHeight, + current_chain_timestamp: &str, + lane_id: &LaneId, + dp_hash: &hyli_model::DataProposalHash, +) -> Result { + let object_name = data_proposal_object_name( + &gcs_conf.gcs_prefix, + current_chain_timestamp, + lane_id, + dp_hash, + ); + let mut reader = gcs_client + .read_object(bucket_path(&gcs_conf.gcs_bucket), object_name.clone()) + .send() + .await + .with_context(|| { + format!( + "Fetching data proposal for block {block_height} from GCS object {}/{} (lane {}, hash {})", + gcs_conf.gcs_bucket, object_name, lane_id, dp_hash + ) + })?; + let mut bytes = Vec::new(); + while let Some(chunk) = reader.next().await.transpose()? { + bytes.extend_from_slice(&chunk); + } + let proposal = borsh::from_slice::(&bytes).with_context(|| { + format!( + "Deserializing data proposal for block {block_height} from GCS object {}/{}", + gcs_conf.gcs_bucket, object_name + ) + })?; + if proposal.hashed() != *dp_hash { + return Err(anyhow!( + "Fetched data proposal hash mismatch for block {} on lane {} from {}/{}", + block_height, + lane_id, + gcs_conf.gcs_bucket, + object_name + )); + } + Ok(proposal) +} + +fn parse_notification_object_name( + gcs_prefix: &str, + encoded_message: &str, +) -> Result> { + let message = BASE64.decode(encoded_message)?; + let notification = serde_json::from_slice::(&message)?; + if !notification.event_type.is_empty() && notification.event_type != "OBJECT_FINALIZE" { + return Ok(None); + } + + let prefix = format!("{gcs_prefix}/"); + let suffix = "/stored_signed_blocks/block_"; + let object_id = notification.object_id; + let Some(rest) = object_id.strip_prefix(&prefix) else { + return Ok(None); + }; + let Some((timestamp, block_suffix)) = rest.split_once(suffix) else { + return Ok(None); + }; + let Some(height) = block_suffix.strip_suffix(".bin") else { + return Ok(None); + }; + let Ok(height) = height.parse::() else { + return Ok(None); + }; + Ok(Some((timestamp.to_string(), BlockHeight(height)))) +} + +#[cfg(test)] +mod gcs_event_tests { + use super::*; + + #[test] + fn parse_notification_object_name_extracts_timestamp_and_height() -> Result<()> { + let payload = serde_json::json!({ + "eventType": "OBJECT_FINALIZE", + "objectId": "camelot/2026-04-02T13-11-49Z/stored_signed_blocks/block_42.bin" + }); + let encoded = BASE64.encode(serde_json::to_vec(&payload)?); + assert_eq!( + parse_notification_object_name("camelot", &encoded)?, + Some(("2026-04-02T13-11-49Z".to_string(), BlockHeight(42))) + ); + Ok(()) + } + + #[test] + fn parse_notification_object_name_ignores_non_finalize_events() -> Result<()> { + let payload = serde_json::json!({ + "eventType": "OBJECT_DELETE", + "objectId": "camelot/2026-04-02T13-11-49Z/stored_signed_blocks/block_42.bin" + }); + let encoded = BASE64.encode(serde_json::to_vec(&payload)?); + assert_eq!(parse_notification_object_name("camelot", &encoded)?, None); + Ok(()) + } +} diff --git a/src/utils/conf.rs b/src/utils/conf.rs index 9d9ee951e..64dc1e0c6 100644 --- a/src/utils/conf.rs +++ b/src/utils/conf.rs @@ -311,6 +311,8 @@ pub struct Conf { pub da_timeout_client_secs: u64, /// Fallback DA server addresses for block requests when blocks are missing pub da_fallback_addresses: Vec, + /// Pub/Sub subscription used by standalone indexers to receive GCS stored-block notifications. + pub gcs_stored_block_subscription: String, /// Websocket configuration pub websocket: NodeWebSocketConfig, diff --git a/src/utils/conf_defaults.toml b/src/utils/conf_defaults.toml index 4ccbff332..b943b634e 100644 --- a/src/utils/conf_defaults.toml +++ b/src/utils/conf_defaults.toml @@ -44,6 +44,7 @@ da_read_from = "127.0.0.1:4141" da_timeout_client_secs = 10 # Fallback DA server addresses for block requests (comma-separated) da_fallback_addresses = [] +gcs_stored_block_subscription = "" [p2p] # "FullValidator" runs a full node, "LaneManager" skips consensus, or "None" to disable most modules. From 05c9dddbfc49c3fba5aeaea365717973d62b584d Mon Sep 17 00:00:00 2001 From: Alexandre Careil Date: Fri, 3 Apr 2026 14:26:29 +0200 Subject: [PATCH 22/22] Avoid DA empty-range panic --- src/data_availability.rs | 71 +++++++++++++++++++++++--- src/data_availability/block_storage.rs | 50 ++++++++++++++---- 2 files changed, 103 insertions(+), 18 deletions(-) diff --git a/src/data_availability.rs b/src/data_availability.rs index c7f3e97be..2f8022869 100644 --- a/src/data_availability.rs +++ b/src/data_availability.rs @@ -1192,18 +1192,21 @@ impl DataAvailability { .last() .map_or(start_height, |block| block.height()); - // Collect all blocks from start_height to current highest - let processed_block_hashes: VecDeque<_> = self - .blocks - .range(start_height, highest + 1) - .filter_map(|item| item.ok()) - .collect(); + // If requester starts beyond our current tip, they are already caught up: + // the valid stream response is an empty queue (wait for future blocks), not BlockNotFound. + let processed_block_hashes: VecDeque<_> = if start_height > highest { + VecDeque::new() + } else { + // Collect all blocks from start_height to current highest. + self.blocks + .range(start_height, highest + 1) + .filter_map(|item| item.ok()) + .collect() + }; self.blocks .record_op("range_collect", "by_height", range_start.elapsed()); let expected = highest.0.saturating_sub(start_height.0).saturating_add(1); - // If requester starts beyond our current tip, they are already caught up: - // the valid stream response is an empty queue (wait for future blocks), not BlockNotFound. let expected = if start_height > highest { 0 } else { expected }; if processed_block_hashes.len() as u64 != expected { let first_missing = (start_height.0..=highest.0) @@ -2191,6 +2194,58 @@ pub mod tests { } } + #[test_log::test(tokio::test)] + async fn test_stream_from_height_above_tip_returns_empty_stream_without_rejection() { + let tmpdir = tempfile::tempdir().unwrap().keep(); + let mut blocks_storage = Blocks::new(&tmpdir).await.unwrap(); + + let global_bus = crate::bus::SharedMessageBus::new(); + let bus = super::DABusClient::new_from_bus(global_bus.new_handle()).await; + + let mut config: Conf = Conf::new(vec![], None, None).unwrap(); + config.da_server_port = find_available_port().await; + config.da_public_address = format!("127.0.0.1:{}", config.da_server_port); + + let mut block = SignedBlock::default(); + block.consensus_proposal.slot = 0; + blocks_storage.put(block.clone()).unwrap(); + block.consensus_proposal.parent_hash = block.hashed(); + block.consensus_proposal.slot = 1; + blocks_storage.put(block).unwrap(); + + let mut da = super::DataAvailability { + config: config.clone().into(), + bus, + blocks: blocks_storage, + buffered_signed_blocks: Default::default(), + catchupper: Default::default(), + allow_peer_catchup: false, + peer_send_queues: HashMap::new(), + }; + + tokio::spawn(async move { + da.start().await.unwrap(); + }); + + tokio::time::sleep(std::time::Duration::from_millis(100)).await; + + let mut client = + DataAvailabilityClient::connect("client_id", config.da_public_address.clone()) + .await + .unwrap(); + + client + .send(DataAvailabilityRequest::StreamFromHeight(BlockHeight(10))) + .await + .unwrap(); + + let first_event = tokio::time::timeout(Duration::from_millis(500), client.recv()).await; + assert!( + first_event.is_err(), + "No event should be emitted when requester is already beyond tip" + ); + } + #[test_log::test(tokio::test)] async fn test_regular_mode_stops_only_when_reaching_ceiling() { let (tx, _rx) = tokio::sync::mpsc::channel::(1); diff --git a/src/data_availability/block_storage.rs b/src/data_availability/block_storage.rs index 93bf1d15e..ed4b17b93 100644 --- a/src/data_availability/block_storage.rs +++ b/src/data_availability/block_storage.rs @@ -736,16 +736,20 @@ impl GcsBlocks { min: BlockHeight, max: BlockHeight, ) -> impl Iterator> { - let values = match self.ensure_index_loaded() { - Ok(()) => self - .state - .read() - .unwrap_or_else(|poisoned| poisoned.into_inner()) - .by_height - .range(min..max) - .map(|(_height, hash)| Ok(hash.clone())) - .collect(), - Err(err) => vec![Err(err)], + let values = if min >= max { + Vec::new() + } else { + match self.ensure_index_loaded() { + Ok(()) => self + .state + .read() + .unwrap_or_else(|poisoned| poisoned.into_inner()) + .by_height + .range(min..max) + .map(|(_height, hash)| Ok(hash.clone())) + .collect(), + Err(err) => vec![Err(err)], + } }; values.into_iter() } @@ -999,4 +1003,30 @@ mod tests { assert!(matches!(blocks.backend, BlocksBackend::Fjall(_))); Ok(()) } + + #[test] + fn gcs_range_returns_empty_when_min_is_not_less_than_max() { + let mut by_height = BTreeMap::new(); + by_height.insert(BlockHeight(0), ConsensusProposalHash::default()); + + let equal: Vec<_> = by_height + .range(BlockHeight(1)..BlockHeight(1)) + .map(|(_, hash)| hash.clone()) + .collect(); + assert!(equal.is_empty()); + + let values: Vec> = if BlockHeight(2) >= BlockHeight(1) + { + Vec::new() + } else { + by_height + .range(BlockHeight(2)..BlockHeight(1)) + .map(|(_, hash)| Ok(hash.clone())) + .collect() + }; + assert!( + values.is_empty(), + "Invalid GCS-backed ranges should return an empty iterator instead of panicking" + ); + } }