Skip to content

Commit dd94651

Browse files
jkrvivianmuXxer
andcommitted
feat: disable write stall on fullnodes perpetual db (#6817)
# Description of change Port MystenLabs/sui@42dbb62 From Sui's commit description: For fullnodes that do not prune the `perpetual` DB, especially `transactions` and `effects` cfs, they can run into write stalls occasionally that makes the fullnode non operational. Since fullnodes have to accept all writes from checkpoints, throttling writes do not seem to make much sense. Write stalling on validators is left enabled. ## Links to any relevant issues Part of #3990 ## Type of change - Enhancement --------- Co-authored-by: muXxer <[email protected]>
1 parent 132aff6 commit dd94651

File tree

5 files changed

+91
-31
lines changed

5 files changed

+91
-31
lines changed

crates/iota-config/src/node.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,12 @@ pub struct NodeConfig {
250250
#[serde(default)]
251251
pub verifier_signing_config: VerifierSigningConfig,
252252

253+
/// If a value is set, it determines if writes to DB can stall, which can
254+
/// halt the whole process. By default, write stall is enabled on
255+
/// validators but not on fullnodes.
256+
#[serde(skip_serializing_if = "Option::is_none")]
257+
pub enable_db_write_stall: Option<bool>,
258+
253259
#[serde(default, skip_serializing_if = "Option::is_none")]
254260
pub iota_names_config: Option<IotaNamesConfig>,
255261
}

crates/iota-core/src/authority/authority_store_tables.rs

Lines changed: 69 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,10 @@ use typed_store::{
1313
DBMapUtils,
1414
metrics::SamplingInterval,
1515
rocks::{
16-
DBBatch, DBMap, DBOptions, MetricConf, ReadWriteOptions, default_db_options,
16+
DBBatch, DBMap, DBMapTableConfigMap, DBOptions, MetricConf, default_db_options,
1717
read_size_from_env,
1818
util::{empty_compaction_filter, reference_count_merge_operator},
1919
},
20-
rocksdb::Options,
2120
traits::{Map, TableSummary, TypedStoreDebug},
2221
};
2322

@@ -37,6 +36,22 @@ const ENV_VAR_EFFECTS_BLOCK_CACHE_SIZE: &str = "EFFECTS_BLOCK_CACHE_MB";
3736
const ENV_VAR_EVENTS_BLOCK_CACHE_SIZE: &str = "EVENTS_BLOCK_CACHE_MB";
3837
const ENV_VAR_INDIRECT_OBJECTS_BLOCK_CACHE_SIZE: &str = "INDIRECT_OBJECTS_BLOCK_CACHE_MB";
3938

39+
/// Options to apply to every column family of the `perpetual` DB.
40+
#[derive(Default)]
41+
pub struct AuthorityPerpetualTablesOptions {
42+
/// Whether to enable write stalling on all column families.
43+
pub enable_write_stall: bool,
44+
}
45+
46+
impl AuthorityPerpetualTablesOptions {
47+
fn apply_to(&self, mut db_options: DBOptions) -> DBOptions {
48+
if !self.enable_write_stall {
49+
db_options = db_options.disable_write_throttling();
50+
}
51+
db_options
52+
}
53+
}
54+
4055
/// AuthorityPerpetualTables contains data that must be preserved from one epoch
4156
/// to the next.
4257
#[derive(DBMapUtils)]
@@ -56,21 +71,17 @@ pub struct AuthorityPerpetualTables {
5671
/// executed transactions whose effects have not yet been written out,
5772
/// and which must be retried. But, they cannot be retried unless their
5873
/// input objects are still accessible!
59-
#[default_options_override_fn = "objects_table_default_config"]
6074
pub(crate) objects: DBMap<ObjectKey, StoreObjectWrapper>,
6175

62-
#[default_options_override_fn = "indirect_move_objects_table_default_config"]
6376
pub(crate) indirect_move_objects: DBMap<ObjectContentDigest, StoreMoveObjectWrapper>,
6477

6578
/// Object references of currently active objects that can be mutated.
66-
#[default_options_override_fn = "live_owned_object_markers_table_default_config"]
6779
pub(crate) live_owned_object_markers: DBMap<ObjectRef, ()>,
6880

6981
/// This is a map between the transaction digest and the corresponding
7082
/// transaction that's known to be executable. This means that it may
7183
/// have been executed locally, or it may have been synced through
7284
/// state-sync but hasn't been executed yet.
73-
#[default_options_override_fn = "transactions_table_default_config"]
7485
pub(crate) transactions: DBMap<TransactionDigest, TrustedTransaction>,
7586

7687
/// A map between the transaction digest of a certificate to the effects of
@@ -85,7 +96,6 @@ pub struct AuthorityPerpetualTables {
8596
///
8697
/// It's also possible for the effects to be reverted if the transaction
8798
/// didn't make it into the epoch.
88-
#[default_options_override_fn = "effects_table_default_config"]
8999
pub(crate) effects: DBMap<TransactionEffectsDigest, TransactionEffects>,
90100

91101
/// Transactions that have been executed locally on this node. We need this
@@ -99,7 +109,6 @@ pub struct AuthorityPerpetualTables {
99109
// We could potentially remove this if we decided not to provide events in the execution path.
100110
// TODO: Figure out what to do with this table in the long run.
101111
// Also we need a pruning policy for this table. We can prune this table along with tx/effects.
102-
#[default_options_override_fn = "events_table_default_config"]
103112
pub(crate) events: DBMap<(TransactionEventsDigest, usize), Event>,
104113

105114
/// Epoch and checkpoint of transactions finalized by checkpoint
@@ -156,13 +165,45 @@ impl AuthorityPerpetualTables {
156165
parent_path.join("perpetual")
157166
}
158167

159-
pub fn open(parent_path: &Path, db_options: Option<Options>) -> Self {
168+
pub fn open(
169+
parent_path: &Path,
170+
db_options_override: Option<AuthorityPerpetualTablesOptions>,
171+
) -> Self {
172+
let db_options_override = db_options_override.unwrap_or_default();
173+
let db_options =
174+
db_options_override.apply_to(default_db_options().optimize_db_for_write_throughput(4));
175+
let table_options = DBMapTableConfigMap::new(BTreeMap::from([
176+
(
177+
"objects".to_string(),
178+
objects_table_config(db_options.clone()),
179+
),
180+
(
181+
"indirect_move_objects".to_string(),
182+
indirect_move_objects_table_config(db_options.clone()),
183+
),
184+
(
185+
"live_owned_object_markers".to_string(),
186+
live_owned_object_markers_table_config(db_options.clone()),
187+
),
188+
(
189+
"transactions".to_string(),
190+
transactions_table_config(db_options.clone()),
191+
),
192+
(
193+
"effects".to_string(),
194+
effects_table_config(db_options.clone()),
195+
),
196+
(
197+
"events".to_string(),
198+
events_table_config(db_options.clone()),
199+
),
200+
]));
160201
Self::open_tables_read_write(
161202
Self::path(parent_path),
162203
MetricConf::new("perpetual")
163204
.with_sampling(SamplingInterval::new(Duration::from_secs(60), 0)),
164-
db_options,
165-
None,
205+
Some(db_options.options),
206+
Some(table_options),
166207
)
167208
}
168209

@@ -605,57 +646,58 @@ impl Iterator for LiveSetIter<'_> {
605646
}
606647

607648
// These functions are used to initialize the DB tables
608-
fn live_owned_object_markers_table_default_config() -> DBOptions {
649+
fn live_owned_object_markers_table_config(db_options: DBOptions) -> DBOptions {
609650
DBOptions {
610-
options: default_db_options()
651+
options: db_options
652+
.clone()
611653
.optimize_for_write_throughput()
612654
.optimize_for_read(read_size_from_env(ENV_VAR_LOCKS_BLOCK_CACHE_SIZE).unwrap_or(1024))
613655
.options,
614-
rw_options: ReadWriteOptions::default().set_ignore_range_deletions(false),
656+
rw_options: db_options.rw_options.set_ignore_range_deletions(false),
615657
}
616658
}
617659

618-
fn objects_table_default_config() -> DBOptions {
619-
default_db_options()
660+
fn objects_table_config(db_options: DBOptions) -> DBOptions {
661+
db_options
620662
.optimize_for_write_throughput()
621663
.optimize_for_read(read_size_from_env(ENV_VAR_OBJECTS_BLOCK_CACHE_SIZE).unwrap_or(5 * 1024))
622664
}
623665

624-
fn transactions_table_default_config() -> DBOptions {
625-
default_db_options()
666+
fn transactions_table_config(db_options: DBOptions) -> DBOptions {
667+
db_options
626668
.optimize_for_write_throughput()
627669
.optimize_for_point_lookup(
628670
read_size_from_env(ENV_VAR_TRANSACTIONS_BLOCK_CACHE_SIZE).unwrap_or(512),
629671
)
630672
}
631673

632-
fn effects_table_default_config() -> DBOptions {
633-
default_db_options()
674+
fn effects_table_config(db_options: DBOptions) -> DBOptions {
675+
db_options
634676
.optimize_for_write_throughput()
635677
.optimize_for_point_lookup(
636678
read_size_from_env(ENV_VAR_EFFECTS_BLOCK_CACHE_SIZE).unwrap_or(1024),
637679
)
638680
}
639681

640-
fn events_table_default_config() -> DBOptions {
641-
default_db_options()
682+
fn events_table_config(db_options: DBOptions) -> DBOptions {
683+
db_options
642684
.optimize_for_write_throughput()
643685
.optimize_for_read(read_size_from_env(ENV_VAR_EVENTS_BLOCK_CACHE_SIZE).unwrap_or(1024))
644686
}
645687

646-
fn indirect_move_objects_table_default_config() -> DBOptions {
647-
let mut options = default_db_options()
688+
fn indirect_move_objects_table_config(mut db_options: DBOptions) -> DBOptions {
689+
db_options = db_options
648690
.optimize_for_write_throughput()
649691
.optimize_for_point_lookup(
650692
read_size_from_env(ENV_VAR_INDIRECT_OBJECTS_BLOCK_CACHE_SIZE).unwrap_or(512),
651693
);
652-
options.options.set_merge_operator(
694+
db_options.options.set_merge_operator(
653695
"refcount operator",
654696
reference_count_merge_operator,
655697
reference_count_merge_operator,
656698
);
657-
options
699+
db_options
658700
.options
659701
.set_compaction_filter("empty filter", empty_compaction_filter);
660-
options
702+
db_options
661703
}

crates/iota-node/src/lib.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ use iota_core::{
3535
authority::{
3636
AuthorityState, AuthorityStore, CHAIN_IDENTIFIER, RandomnessRoundReceiver,
3737
authority_per_epoch_store::AuthorityPerEpochStore,
38-
authority_store_tables::AuthorityPerpetualTables,
38+
authority_store_tables::{AuthorityPerpetualTables, AuthorityPerpetualTablesOptions},
3939
epoch_start_configuration::{EpochFlag, EpochStartConfigTrait, EpochStartConfiguration},
4040
},
4141
authority_aggregator::{AuthAggMetrics, AuthorityAggregator},
@@ -479,10 +479,12 @@ impl IotaNode {
479479
None,
480480
));
481481

482-
let perpetual_options = default_db_options().optimize_db_for_write_throughput(4);
482+
// By default, only enable write stall on validators for perpetual db.
483+
let enable_write_stall = config.enable_db_write_stall.unwrap_or(is_validator);
484+
let perpetual_tables_options = AuthorityPerpetualTablesOptions { enable_write_stall };
483485
let perpetual_tables = Arc::new(AuthorityPerpetualTables::open(
484486
&config.db_path().join("store"),
485-
Some(perpetual_options.options),
487+
Some(perpetual_tables_options),
486488
));
487489
let is_genesis = perpetual_tables
488490
.database_is_empty()

crates/iota-swarm-config/src/node_config_builder.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,7 @@ impl ValidatorConfigBuilder {
225225
execution_cache: ExecutionCacheConfig::default(),
226226
enable_validator_tx_finalizer: true,
227227
verifier_signing_config: VerifierSigningConfig::default(),
228+
enable_db_write_stall: None,
228229
iota_names_config: None,
229230
}
230231
}
@@ -519,6 +520,7 @@ impl FullnodeConfigBuilder {
519520
// This is a validator specific feature.
520521
enable_validator_tx_finalizer: false,
521522
verifier_signing_config: VerifierSigningConfig::default(),
523+
enable_db_write_stall: None,
522524
iota_names_config: None,
523525
}
524526
}

crates/typed-store-derive/src/lib.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,14 @@ pub fn derive_dbmap_utils_general(input: TokenStream) -> TokenStream {
323323
remove_deprecated_tables: bool,
324324
) -> Self {
325325
let path = &path;
326+
let default_cf_opt = if let Some(opt) = global_db_options_override.as_ref() {
327+
typed_store::rocks::DBOptions {
328+
options: opt.clone(),
329+
rw_options: typed_store::rocks::default_db_options().rw_options,
330+
}
331+
} else {
332+
typed_store::rocks::default_db_options()
333+
};
326334
let (db, rwopt_cfs) = {
327335
let opt_cfs = match tables_db_options_override {
328336
None => [
@@ -332,7 +340,7 @@ pub fn derive_dbmap_utils_general(input: TokenStream) -> TokenStream {
332340
],
333341
Some(o) => [
334342
#(
335-
(stringify!(#cf_names).to_owned(), o.to_map().get(stringify!(#cf_names)).unwrap().clone()),
343+
(stringify!(#cf_names).to_owned(), o.to_map().get(stringify!(#cf_names)).unwrap_or(&default_cf_opt).clone()),
336344
)*
337345
]
338346
};

0 commit comments

Comments
 (0)