Skip to content

Commit 3fc0ed5

Browse files
committed
catalog: Hook up expression cache into startup
This commit hooks up the expression cache into the startup process. During startup if an expression is found in the cache, then we use that expression instead of re-optimizing an item. If an expression is not found in the cache, then we optimize the item and insert the optimized expression into the cache. The insertion into the cache happens in bulk as a single operation at the end of startup instead of once for each object. The reason is that inserting new expressions may need to invalidate old expressions. Inserting all expressions in bulk allows us to easily figure out what should be invalidated vs what shouldn't be invalidated. If we update the expressions one at a time, then we may accidentally invalidate new expressions. Much of the code in this commit involves plumbing things around and keeping track of what is and isn't cached. After startup we do not cache newly created items, this is left for future work. As a result, if environmentd crashes in-between releases then recovery may be slow if there are many new objects. However, read-only instances should fully populate the cache during a release. Works towards resolving #MaterializeInc/database-issues/8384
1 parent 1c24796 commit 3fc0ed5

File tree

13 files changed

+871
-332
lines changed

13 files changed

+871
-332
lines changed

src/adapter-types/src/dyncfgs.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,13 @@ pub const ENABLE_CONTINUAL_TASK_BUILTINS: Config<bool> = Config::new(
104104
"Create system builtin continual tasks on boot.",
105105
);
106106

107+
/// Whether to use an expression cache on boot.
108+
pub const ENABLE_EXPRESSION_CACHE: Config<bool> = Config::new(
109+
"enable_expression_cache",
110+
true,
111+
"Use a cache to store optimized expressions to help speed up start times.",
112+
);
113+
107114
/// Adds the full set of all compute `Config`s.
108115
pub fn all_dyncfgs(configs: ConfigSet) -> ConfigSet {
109116
configs
@@ -120,4 +127,5 @@ pub fn all_dyncfgs(configs: ConfigSet) -> ConfigSet {
120127
.add(&PLAN_INSIGHTS_NOTICE_FAST_PATH_CLUSTERS_OPTIMIZE_DURATION)
121128
.add(&DEFAULT_SINK_PARTITION_STRATEGY)
122129
.add(&ENABLE_CONTINUAL_TASK_BUILTINS)
130+
.add(&ENABLE_EXPRESSION_CACHE)
123131
}

src/adapter/src/catalog.rs

Lines changed: 107 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,12 @@
1212
//! Persistent metadata storage for the coordinator.
1313
1414
use std::borrow::Cow;
15-
use std::collections::{BTreeMap, BTreeSet};
15+
use std::collections::{BTreeMap, BTreeSet, VecDeque};
1616
use std::convert;
1717
use std::sync::Arc;
1818

19-
use futures::Future;
19+
use futures::future::BoxFuture;
20+
use futures::{Future, FutureExt};
2021
use itertools::Itertools;
2122
use mz_adapter_types::connection::ConnectionId;
2223
use mz_audit_log::{EventType, FullNameV1, ObjectType, VersionedStorageUsage};
@@ -29,6 +30,7 @@ use mz_catalog::config::{BuiltinItemMigrationConfig, ClusterReplicaSizeMap, Conf
2930
#[cfg(test)]
3031
use mz_catalog::durable::CatalogError;
3132
use mz_catalog::durable::{test_bootstrap_args, DurableCatalogState, TestCatalogStateBuilder};
33+
use mz_catalog::expr_cache::{ExpressionCacheHandle, GlobalExpressions, LocalExpressions};
3234
use mz_catalog::memory::error::{Error, ErrorKind};
3335
use mz_catalog::memory::objects::{
3436
CatalogEntry, Cluster, ClusterReplica, Database, NetworkPolicy, Role, Schema,
@@ -37,6 +39,7 @@ use mz_compute_types::dataflows::DataflowDescription;
3739
use mz_controller::clusters::ReplicaLocation;
3840
use mz_controller_types::{ClusterId, ReplicaId};
3941
use mz_expr::OptimizedMirRelationExpr;
42+
use mz_ore::collections::HashSet;
4043
use mz_ore::metrics::MetricsRegistry;
4144
use mz_ore::now::{EpochMillis, NowFn, SYSTEM_TIME};
4245
use mz_ore::option::FallibleMapExt;
@@ -52,9 +55,9 @@ use mz_repr::{Diff, GlobalId, ScalarType};
5255
use mz_secrets::InMemorySecretsController;
5356
use mz_sql::catalog::{
5457
CatalogCluster, CatalogClusterReplica, CatalogDatabase, CatalogError as SqlCatalogError,
55-
CatalogItem as SqlCatalogItem, CatalogItemType as SqlCatalogItemType, CatalogNetworkPolicy,
56-
CatalogRole, CatalogSchema, DefaultPrivilegeAclItem, DefaultPrivilegeObject, EnvironmentId,
57-
SessionCatalog, SystemObjectType,
58+
CatalogItem as SqlCatalogItem, CatalogItemType as SqlCatalogItemType, CatalogItemType,
59+
CatalogNetworkPolicy, CatalogRole, CatalogSchema, DefaultPrivilegeAclItem,
60+
DefaultPrivilegeObject, EnvironmentId, SessionCatalog, SystemObjectType,
5861
};
5962
use mz_sql::names::{
6063
CommentObjectId, DatabaseId, FullItemName, FullSchemaName, ItemQualifiers, ObjectId,
@@ -128,6 +131,7 @@ mod transact;
128131
pub struct Catalog {
129132
state: CatalogState,
130133
plans: CatalogPlans,
134+
expr_cache_handle: Option<ExpressionCacheHandle>,
131135
storage: Arc<tokio::sync::Mutex<Box<dyn mz_catalog::durable::DurableCatalogState>>>,
132136
transient_revision: u64,
133137
}
@@ -139,6 +143,7 @@ impl Clone for Catalog {
139143
Self {
140144
state: self.state.clone(),
141145
plans: self.plans.clone(),
146+
expr_cache_handle: self.expr_cache_handle.clone(),
142147
storage: Arc::clone(&self.storage),
143148
transient_revision: self.transient_revision,
144149
}
@@ -363,6 +368,49 @@ impl Catalog {
363368
}
364369
policies
365370
}
371+
372+
/// Return a set of [`GlobalId`]s for items that need to have their cache entries invalidated
373+
/// as a result of creating new indexes on the items in `ons`.
374+
pub(crate) fn invalidate_for_index(
375+
&self,
376+
ons: impl Iterator<Item = GlobalId>,
377+
) -> BTreeSet<GlobalId> {
378+
let mut dependencies = BTreeSet::new();
379+
let mut queue = VecDeque::new();
380+
let mut seen = HashSet::new();
381+
for on in ons {
382+
dependencies.insert(on);
383+
seen.insert(on);
384+
let entry = self.get_entry(&on);
385+
let uses = entry.uses();
386+
queue.extend(uses.clone());
387+
seen.extend(uses);
388+
}
389+
390+
while let Some(cur) = queue.pop_front() {
391+
if seen.insert(cur) {
392+
let entry = self.get_entry(&cur);
393+
match entry.item_type() {
394+
CatalogItemType::Table
395+
| CatalogItemType::Source
396+
| CatalogItemType::MaterializedView
397+
| CatalogItemType::Sink
398+
| CatalogItemType::Index
399+
| CatalogItemType::Type
400+
| CatalogItemType::Func
401+
| CatalogItemType::Secret
402+
| CatalogItemType::Connection
403+
| CatalogItemType::ContinualTask => {
404+
dependencies.insert(cur);
405+
}
406+
CatalogItemType::View => {
407+
queue.extend(entry.uses());
408+
}
409+
}
410+
}
411+
}
412+
dependencies
413+
}
366414
}
367415

368416
#[derive(Debug)]
@@ -480,15 +528,21 @@ impl Catalog {
480528
) -> Result<Catalog, anyhow::Error> {
481529
let now = SYSTEM_TIME.clone();
482530
let environment_id = None;
483-
let openable_storage = TestCatalogStateBuilder::new(persist_client)
531+
let openable_storage = TestCatalogStateBuilder::new(persist_client.clone())
484532
.with_organization_id(organization_id)
485533
.with_default_deploy_generation()
486534
.build()
487535
.await?;
488536
let storage = openable_storage.open(now(), &test_bootstrap_args()).await?;
489537
let system_parameter_defaults = BTreeMap::default();
490-
Self::open_debug_catalog_inner(storage, now, environment_id, system_parameter_defaults)
491-
.await
538+
Self::open_debug_catalog_inner(
539+
persist_client,
540+
storage,
541+
now,
542+
environment_id,
543+
system_parameter_defaults,
544+
)
545+
.await
492546
}
493547

494548
/// Opens a read only debug persist backed catalog defined by `persist_client` and
@@ -501,16 +555,22 @@ impl Catalog {
501555
) -> Result<Catalog, anyhow::Error> {
502556
let now = SYSTEM_TIME.clone();
503557
let environment_id = None;
504-
let openable_storage = TestCatalogStateBuilder::new(persist_client)
558+
let openable_storage = TestCatalogStateBuilder::new(persist_client.clone())
505559
.with_organization_id(organization_id)
506560
.build()
507561
.await?;
508562
let storage = openable_storage
509563
.open_read_only(&test_bootstrap_args())
510564
.await?;
511565
let system_parameter_defaults = BTreeMap::default();
512-
Self::open_debug_catalog_inner(storage, now, environment_id, system_parameter_defaults)
513-
.await
566+
Self::open_debug_catalog_inner(
567+
persist_client,
568+
storage,
569+
now,
570+
environment_id,
571+
system_parameter_defaults,
572+
)
573+
.await
514574
}
515575

516576
/// Opens a read only debug persist backed catalog defined by `persist_client` and
@@ -524,7 +584,7 @@ impl Catalog {
524584
system_parameter_defaults: BTreeMap<String, String>,
525585
version: semver::Version,
526586
) -> Result<Catalog, anyhow::Error> {
527-
let openable_storage = TestCatalogStateBuilder::new(persist_client)
587+
let openable_storage = TestCatalogStateBuilder::new(persist_client.clone())
528588
.with_organization_id(environment_id.organization_id())
529589
.with_version(version)
530590
.build()
@@ -533,6 +593,7 @@ impl Catalog {
533593
.open_read_only(&test_bootstrap_args())
534594
.await?;
535595
Self::open_debug_catalog_inner(
596+
persist_client,
536597
storage,
537598
now,
538599
Some(environment_id),
@@ -542,6 +603,7 @@ impl Catalog {
542603
}
543604

544605
async fn open_debug_catalog_inner(
606+
persist_client: PersistClient,
545607
storage: Box<dyn DurableCatalogState>,
546608
now: NowFn,
547609
environment_id: Option<EnvironmentId>,
@@ -559,6 +621,8 @@ impl Catalog {
559621
migrated_storage_collections_0dt: _,
560622
new_builtins: _,
561623
builtin_table_updates: _,
624+
cached_global_exprs: _,
625+
uncached_local_exprs: _,
562626
} = Catalog::open(Config {
563627
storage,
564628
metrics_registry,
@@ -567,6 +631,8 @@ impl Catalog {
567631
all_features: false,
568632
build_info: &DUMMY_BUILD_INFO,
569633
environment_id: environment_id.unwrap_or(EnvironmentId::for_tests()),
634+
deploy_generation: 0,
635+
read_only: false,
570636
now,
571637
boot_ts: previous_ts,
572638
skip_migrations: true,
@@ -586,6 +652,7 @@ impl Catalog {
586652
connection_context: ConnectionContext::for_tests(secrets_reader),
587653
active_connection_count,
588654
builtin_item_migration_config: BuiltinItemMigrationConfig::Legacy,
655+
persist_client,
589656
helm_chart_version: None,
590657
},
591658
})
@@ -1260,6 +1327,31 @@ impl Catalog {
12601327
.deserialize_plan_with_enable_for_item_parsing(create_sql, force_if_exists_skip)
12611328
}
12621329

1330+
pub(crate) fn update_expression_cache<'a, 'b>(
1331+
&'a self,
1332+
new_local_expressions: Vec<(GlobalId, LocalExpressions)>,
1333+
new_global_expressions: Vec<(GlobalId, GlobalExpressions)>,
1334+
) -> BoxFuture<'b, ()> {
1335+
if let Some(expr_cache) = &self.expr_cache_handle {
1336+
let ons = new_local_expressions
1337+
.iter()
1338+
.map(|(id, _)| id)
1339+
.chain(new_global_expressions.iter().map(|(id, _)| id))
1340+
.filter_map(|id| self.get_entry(id).index())
1341+
.map(|index| index.on);
1342+
let invalidate_ids = self.invalidate_for_index(ons);
1343+
expr_cache
1344+
.update(
1345+
new_local_expressions,
1346+
new_global_expressions,
1347+
invalidate_ids,
1348+
)
1349+
.boxed()
1350+
} else {
1351+
async {}.boxed()
1352+
}
1353+
}
1354+
12631355
/// Listen for and apply all unconsumed updates to the durable catalog state.
12641356
// TODO(jkosh44) When this method is actually used outside of a test we can remove the
12651357
// `#[cfg(test)]` annotation.
@@ -2060,6 +2152,7 @@ mod tests {
20602152
use mz_sql::session::user::MZ_SYSTEM_ROLE_ID;
20612153
use mz_sql::session::vars::VarInput;
20622154

2155+
use crate::catalog::state::LocalExpressionCache;
20632156
use crate::catalog::{Catalog, Op};
20642157
use crate::optimize::dataflows::{prep_scalar_expr, EvalTime, ExprPrepStyle};
20652158
use crate::session::Session;
@@ -2378,7 +2471,7 @@ mod tests {
23782471
.expect("unable to open debug catalog");
23792472
let item = catalog
23802473
.state()
2381-
.deserialize_item(id, &create_sql)
2474+
.deserialize_item(id, &create_sql, &mut LocalExpressionCache::Closed)
23822475
.expect("unable to parse view");
23832476
catalog
23842477
.transact(
@@ -3274,7 +3367,7 @@ mod tests {
32743367
.state()
32753368
.deserialize_item(mv_id, &format!(
32763369
"CREATE MATERIALIZED VIEW {database_name}.{schema_name}.{mv_name} AS SELECT name FROM mz_tables"
3277-
))
3370+
), &mut LocalExpressionCache::Closed)
32783371
.expect("unable to deserialize item");
32793372
catalog
32803373
.transact(

0 commit comments

Comments
 (0)