From b6eda03c1dfe73eb4cdbf3db798660dd50fb0ccb Mon Sep 17 00:00:00 2001
From: Roshan Jobanputra <roshan@materialize.com>
Date: Mon, 16 Sep 2024 16:43:45 -0400
Subject: [PATCH 01/52] Opt-in catalog migration for converting subsources to
 source tables

---
 src/adapter-types/src/dyncfgs.rs   |   8 ++
 src/adapter/src/catalog/migrate.rs | 166 ++++++++++++++++++++++++++++-
 2 files changed, 169 insertions(+), 5 deletions(-)
diff --git a/src/adapter-types/src/dyncfgs.rs b/src/adapter-types/src/dyncfgs.rs
index 795a342f284a4..1ddd317c835fb 100644
--- a/src/adapter-types/src/dyncfgs.rs
+++ b/src/adapter-types/src/dyncfgs.rs
@@ -111,6 +111,13 @@ pub const ENABLE_EXPRESSION_CACHE: Config<bool> = Config::new(
     "Use a cache to store optimized expressions to help speed up start times.",
 );
 
+/// Whether to enable the migration to convert all sources to use tables.
+pub const ENABLE_SOURCE_TABLE_MIGRATION: Config<bool> = Config::new(
+    "enable_source_table_migration",
+    false,
+    "Whether to enable the migration to convert all sources to use tables.",
+);
+
 /// Adds the full set of all compute `Config`s.
 pub fn all_dyncfgs(configs: ConfigSet) -> ConfigSet {
     configs
@@ -128,4 +135,5 @@ pub fn all_dyncfgs(configs: ConfigSet) -> ConfigSet {
         .add(&DEFAULT_SINK_PARTITION_STRATEGY)
         .add(&ENABLE_CONTINUAL_TASK_BUILTINS)
         .add(&ENABLE_EXPRESSION_CACHE)
+        .add(&ENABLE_SOURCE_TABLE_MIGRATION)
 }
diff --git a/src/adapter/src/catalog/migrate.rs b/src/adapter/src/catalog/migrate.rs
index c62d768a838e6..888824f0d389a 100644
--- a/src/adapter/src/catalog/migrate.rs
+++ b/src/adapter/src/catalog/migrate.rs
@@ -10,7 +10,7 @@
 use std::collections::BTreeMap;
 
 use mz_catalog::builtin::BuiltinTable;
-use mz_catalog::durable::Transaction;
+use mz_catalog::durable::{Transaction, Item};
 use mz_catalog::memory::objects::StateUpdate;
 use mz_ore::collections::CollectionExt;
 use mz_ore::now::NowFn;
@@ -31,13 +31,23 @@ where
         &'a mut Transaction<'_>,
         CatalogItemId,
         &'a mut Statement<Raw>,
+        &'a Vec<(Item, Statement<Raw>)>,
     ) -> Result<(), anyhow::Error>,
 {
     let mut updated_items = BTreeMap::new();
+    let items_with_statements = tx
+        .get_items()
+        .map(|item| {
+            let stmt = mz_sql::parse::parse(&item.create_sql)?.into_element().ast;
+            Ok((item, stmt))
+        })
+        .collect::<Result<Vec<_>, anyhow::Error>>()?;
 
-    for mut item in tx.get_items() {
+    // Clone this vec to be referenced within the closure if needed
+    let items_with_statements_ref = items_with_statements.clone();
+    for (mut item, mut stmt) in items_with_statements {
         let mut stmt = mz_sql::parse::parse(&item.create_sql)?.into_element().ast;
-        f(tx, item.id, &mut stmt)?;
+        f(tx, item.id, &mut stmt, &items_with_statements_ref)?;
 
         item.create_sql = stmt.to_ast_string_stable();
 
@@ -97,7 +107,7 @@ pub(crate) async fn migrate(
         catalog_version
     );
 
-    rewrite_ast_items(tx, |_tx, _id, _stmt| {
+    rewrite_ast_items(tx, |tx, _id, stmt, all_items_and_statements| {
         // Add per-item AST migrations below.
         //
         // Each migration should be a function that takes `stmt` (the AST
@@ -107,7 +117,21 @@ pub(crate) async fn migrate(
         // Migration functions may also take `tx` as input to stage
         // arbitrary changes to the catalog.
 
-        Ok(())
+        // Special block for `ast_rewrite_sources_to_tables` migration
+        // since it requires a feature flag.
+        if let Some(config_val) = tx.get_system_config("enable_source_table_migration") {
+            let enable_migration = config_val.parse::<bool>().map_err(|e| {
+                anyhow::anyhow!(
+                        "could not parse enable_source_table_migration config value: {}",
+                        e
+                    )
+            })?;
+            if enable_migration {
+                info!("migrate: enable_source_table_migration");
+                ast_rewrite_sources_to_tables(stmt, all_items_and_statements)?;
+            }
+        }
+            Ok(())
     })?;
 
     // Load items into catalog. We make sure to consolidate the old updates with the new updates to
@@ -182,6 +206,138 @@ pub(crate) async fn migrate(
 // Please include the adapter team on any code reviews that add or edit
 // migrations.
 
+/// Migrates all sources to use the new sources as tables model
+///
+/// First we migrate existing `CREATE SUBSOURCE` statements, turning them into
+/// `CREATE TABLE .. FROM SOURCE` statements. This covers existing Postgres,
+/// MySQL, and multi-output (tpch, auction, marketing) load-generator subsources.
+///
+/// Second we migrate existing `CREATE SOURCE` statements for these multi-output
+/// sources to remove their subsource qualification option (e.g. FOR ALL TABLES..)
+/// and any subsource-specific options (e.g. TEXT COLUMNS).
+///
+/// Third we migrate existing `CREATE SOURCE` statements that refer to a single
+/// output collection. This includes existing Kafka and single-output load-generator
+/// subsources. These statements will generate an additional `CREATE TABLE .. FROM SOURCE`
+/// statement that copies over all the export-specific options. This statement will be tied
+/// to the existing statement's persist collection, but using a new GlobalID. The original
+/// source statement will be updated to remove the export-specific options and will be
+/// renamed to `<original_name>_source` while keeping its same GlobalId.
+///
+fn ast_rewrite_sources_to_tables(
+    stmt: &mut Statement<Raw>,
+    all_items_and_statements: &Vec<(Item, Statement<Raw>)>,
+) -> Result<(), anyhow::Error> {
+    use mz_sql::ast::{
+        CreateSubsourceOptionName, CreateTableFromSourceStatement, RawItemName,
+        TableFromSourceOption, WithOptionValue,
+    };
+
+    // Since some subsources have named-only references to their `of_source` and some have the
+    // global_id of their source, we first generate a reference mapping from all source names to
+    // items so we can correct the references in the `CREATE TABLE .. FROM SOURCE`
+    // statements to always use the ID of the source.
+    let source_name_to_item: BTreeMap<_, _> = all_items_and_statements
+        .iter()
+        .filter_map(|(item, statement)| match statement {
+            Statement::CreateSource(stmt) => Some((stmt.name.clone(), item)),
+            _ => None,
+        })
+        .collect();
+
+    match stmt {
+        // Migrate each 'source export' `CREATE SUBSOURCE` statement to an equivalent
+        // `CREATE TABLE .. FROM SOURCE` statement. Note that we will not be migrating
+        // 'progress' `CREATE SUBSOURCE` statements as they are not currently relevant
+        // to the new table model.
+        Statement::CreateSubsource(subsource) if subsource.of_source.is_some() => {
+            let raw_source_name = subsource.of_source.as_ref().unwrap();
+            let source = match raw_source_name {
+                RawItemName::Name(name) => {
+                    // Convert the name reference to an ID reference.
+                    let source_item = source_name_to_item.get(name).expect("source must exist");
+                    RawItemName::Id(source_item.id.to_string(), name.clone(), None)
+                }
+                RawItemName::Id(..) => raw_source_name.clone(),
+            };
+
+            // The external reference is a `with_option` on subsource statements but is a
+            // separate field on table statements.
+            let external_reference_option = subsource
+                .with_options
+                .iter()
+                .find(|o| o.name == CreateSubsourceOptionName::ExternalReference)
+                .expect("subsources must have external reference");
+            let external_reference = match &external_reference_option.value {
+                Some(WithOptionValue::UnresolvedItemName(name)) => name,
+                _ => unreachable!("external reference must be an unresolved item name"),
+            };
+
+            // The `details` option on both subsources and tables is identical, using the same
+            // ProtoSourceExportStatementDetails serialized value.
+            let existing_details = subsource
+                .with_options
+                .iter()
+                .find(|o| o.name == CreateSubsourceOptionName::Details)
+                .expect("subsources must have details");
+
+            let mut with_options = vec![TableFromSourceOption {
+                name: mz_sql::ast::TableFromSourceOptionName::Details,
+                value: existing_details.value.clone(),
+            }];
+
+            // TEXT COLUMNS and EXCLUDE COLUMNS options are stored on each subsource and can be copied
+            // directly to the table statement. Note that we also store a 'normalized' version of the
+            // text columns and exclude columns in the `with_options` field of the primary source statement
+            // for legacy reasons, but that is redundant info and will be removed below when we migrate
+            // the source statements.
+            if let Some(text_cols_option) = subsource
+                .with_options
+                .iter()
+                .find(|option| option.name == CreateSubsourceOptionName::TextColumns)
+            {
+                with_options.push(TableFromSourceOption {
+                    name: mz_sql::ast::TableFromSourceOptionName::TextColumns,
+                    value: text_cols_option.value.clone(),
+                });
+            }
+            if let Some(exclude_cols_option) = subsource
+                .with_options
+                .iter()
+                .find(|option| option.name == CreateSubsourceOptionName::ExcludeColumns)
+            {
+                with_options.push(TableFromSourceOption {
+                    name: mz_sql::ast::TableFromSourceOptionName::ExcludeColumns,
+                    value: exclude_cols_option.value.clone(),
+                });
+            }
+
+            let table = CreateTableFromSourceStatement {
+                name: subsource.name.clone(),
+                constraints: subsource.constraints.clone(),
+                columns: mz_sql::ast::TableFromSourceColumns::Defined(subsource.columns.clone()),
+                if_not_exists: subsource.if_not_exists,
+                source,
+                external_reference: Some(external_reference.clone()),
+                with_options,
+                // Subsources don't have `envelope`, `include_metadata`, or `format` options.
+                envelope: None,
+                include_metadata: vec![],
+                format: None,
+            };
+
+            info!(
+                "migrate: converted subsource {:?} to table {:?}",
+                subsource, table
+            );
+            *stmt = Statement::CreateTableFromSource(table);
+        }
+        _ => (),
+    }
+
+    Ok(())
+}
+
 // Durable migrations
 
 /// Migrations that run only on the durable catalog before any data is loaded into memory.

From 7d9bc2601b15acacd3254324f6e83abf0ca4596d Mon Sep 17 00:00:00 2001
From: Roshan Jobanputra <roshan@materialize.com>
Date: Wed, 23 Oct 2024 12:57:26 -0400
Subject: [PATCH 02/52] Add migration logic for source statements

---
 src/adapter/src/catalog/migrate.rs | 295 ++++++++++++++++++++++++++---
 1 file changed, 273 insertions(+), 22 deletions(-)

diff --git a/src/adapter/src/catalog/migrate.rs b/src/adapter/src/catalog/migrate.rs
index 888824f0d389a..acf7fa6b5d47d 100644
--- a/src/adapter/src/catalog/migrate.rs
+++ b/src/adapter/src/catalog/migrate.rs
@@ -29,7 +29,7 @@ fn rewrite_ast_items<F>(tx: &mut Transaction<'_>, mut f: F) -> Result<(), anyhow
 where
     F: for<'a> FnMut(
         &'a mut Transaction<'_>,
-        CatalogItemId,
+        &'a mut Item,
         &'a mut Statement<Raw>,
         &'a Vec<(Item, Statement<Raw>)>,
     ) -> Result<(), anyhow::Error>,
@@ -47,7 +47,7 @@ where
     let items_with_statements_ref = items_with_statements.clone();
     for (mut item, mut stmt) in items_with_statements {
         let mut stmt = mz_sql::parse::parse(&item.create_sql)?.into_element().ast;
-        f(tx, item.id, &mut stmt, &items_with_statements_ref)?;
+        f(tx, &mut item, &mut stmt, &items_with_statements_ref)?;
 
         item.create_sql = stmt.to_ast_string_stable();
 
@@ -107,7 +107,7 @@ pub(crate) async fn migrate(
         catalog_version
     );
 
-    rewrite_ast_items(tx, |tx, _id, stmt, all_items_and_statements| {
+    rewrite_ast_items(tx, |tx, item, stmt, all_items_and_statements| {
         // Add per-item AST migrations below.
         //
         // Each migration should be a function that takes `stmt` (the AST
@@ -128,7 +128,7 @@ pub(crate) async fn migrate(
             })?;
             if enable_migration {
                 info!("migrate: enable_source_table_migration");
-                ast_rewrite_sources_to_tables(stmt, all_items_and_statements)?;
+                ast_rewrite_sources_to_tables(tx, item, stmt, all_items_and_statements)?;
             }
         }
             Ok(())
@@ -225,18 +225,30 @@ pub(crate) async fn migrate(
 /// renamed to `<original_name>_source` while keeping its same GlobalId.
 ///
 fn ast_rewrite_sources_to_tables(
+    tx: &mut Transaction<'_>,
+    item: &mut Item,
     stmt: &mut Statement<Raw>,
     all_items_and_statements: &Vec<(Item, Statement<Raw>)>,
 ) -> Result<(), anyhow::Error> {
+    use maplit::btreemap;
+    use maplit::btreeset;
+    use mz_persist_types::ShardId;
+    use mz_proto::RustType;
     use mz_sql::ast::{
-        CreateSubsourceOptionName, CreateTableFromSourceStatement, RawItemName,
-        TableFromSourceOption, WithOptionValue,
+        CreateSourceConnection, CreateSourceOptionName, CreateSourceStatement,
+        CreateSubsourceOptionName, CreateTableFromSourceStatement, Ident, LoadGenerator,
+        MySqlConfigOptionName, PgConfigOptionName, RawItemName, TableFromSourceColumns,
+        TableFromSourceOption, TableFromSourceOptionName, Value, WithOptionValue,
     };
+    use mz_storage_client::controller::StorageTxn;
+    use mz_storage_types::sources::load_generator::LoadGeneratorOutput;
+    use mz_storage_types::sources::SourceExportStatementDetails;
+    use prost::Message;
 
     // Since some subsources have named-only references to their `of_source` and some have the
     // global_id of their source, we first generate a reference mapping from all source names to
-    // items so we can correct the references in the `CREATE TABLE .. FROM SOURCE`
-    // statements to always use the ID of the source.
+    // items so we can ensure we always use an ID-based reference in the stored
+    // `CREATE TABLE .. FROM SOURCE` statements.
     let source_name_to_item: BTreeMap<_, _> = all_items_and_statements
         .iter()
         .filter_map(|(item, statement)| match statement {
@@ -245,13 +257,34 @@ fn ast_rewrite_sources_to_tables(
         })
         .collect();
 
+    // Collect any existing `CREATE TABLE .. FROM SOURCE` statements by the source they reference.
+    let tables_by_source: BTreeMap<_, Vec<_>> = all_items_and_statements
+        .iter()
+        .filter_map(|(_, statement)| match statement {
+            Statement::CreateTableFromSource(stmt) => {
+                let source: GlobalId = match &stmt.source {
+                    RawItemName::Name(_) => unreachable!("tables store source as ID"),
+                    RawItemName::Id(id, _, _) => id.parse().expect("valid id"),
+                };
+                Some((source, stmt))
+            }
+            _ => None,
+        })
+        .fold(BTreeMap::new(), |mut acc, (source, stmt)| {
+            acc.entry(source).or_insert_with(Vec::new).push(stmt);
+            acc
+        });
+
     match stmt {
-        // Migrate each 'source export' `CREATE SUBSOURCE` statement to an equivalent
-        // `CREATE TABLE .. FROM SOURCE` statement. Note that we will not be migrating
-        // 'progress' `CREATE SUBSOURCE` statements as they are not currently relevant
-        // to the new table model.
-        Statement::CreateSubsource(subsource) if subsource.of_source.is_some() => {
-            let raw_source_name = subsource.of_source.as_ref().unwrap();
+        // Migrate each `CREATE SUBSOURCE` statement to an equivalent
+        // `CREATE TABLE .. FROM SOURCE` statement.
+        Statement::CreateSubsource(subsource) => {
+            let raw_source_name = match &subsource.of_source {
+                // We will not be migrating 'progress' `CREATE SUBSOURCE` statements as they
+                // are not currently relevant to the new table model.
+                None => return Ok(()),
+                Some(name) => name,
+            };
             let source = match raw_source_name {
                 RawItemName::Name(name) => {
                     // Convert the name reference to an ID reference.
@@ -282,22 +315,19 @@ fn ast_rewrite_sources_to_tables(
                 .expect("subsources must have details");
 
             let mut with_options = vec![TableFromSourceOption {
-                name: mz_sql::ast::TableFromSourceOptionName::Details,
+                name: TableFromSourceOptionName::Details,
                 value: existing_details.value.clone(),
             }];
 
             // TEXT COLUMNS and EXCLUDE COLUMNS options are stored on each subsource and can be copied
-            // directly to the table statement. Note that we also store a 'normalized' version of the
-            // text columns and exclude columns in the `with_options` field of the primary source statement
-            // for legacy reasons, but that is redundant info and will be removed below when we migrate
-            // the source statements.
+            // directly to the table statement.
             if let Some(text_cols_option) = subsource
                 .with_options
                 .iter()
                 .find(|option| option.name == CreateSubsourceOptionName::TextColumns)
             {
                 with_options.push(TableFromSourceOption {
-                    name: mz_sql::ast::TableFromSourceOptionName::TextColumns,
+                    name: TableFromSourceOptionName::TextColumns,
                     value: text_cols_option.value.clone(),
                 });
             }
@@ -307,7 +337,7 @@ fn ast_rewrite_sources_to_tables(
                 .find(|option| option.name == CreateSubsourceOptionName::ExcludeColumns)
             {
                 with_options.push(TableFromSourceOption {
-                    name: mz_sql::ast::TableFromSourceOptionName::ExcludeColumns,
+                    name: TableFromSourceOptionName::ExcludeColumns,
                     value: exclude_cols_option.value.clone(),
                 });
             }
@@ -327,11 +357,232 @@ fn ast_rewrite_sources_to_tables(
             };
 
             info!(
-                "migrate: converted subsource {:?} to table {:?}",
+                "migrate: converted subsource {} to table {}",
                 subsource, table
             );
             *stmt = Statement::CreateTableFromSource(table);
         }
+
+        // Postgres sources are multi-output sources whose subsources are
+        // migrated above. All we need to do is remove the subsource-related
+        // options from this statement since they are no longer relevant.
+        Statement::CreateSource(CreateSourceStatement {
+            connection:
+                CreateSourceConnection::Postgres { options, .. }
+                | CreateSourceConnection::Yugabyte { options, .. },
+            ..
+        }) => {
+            // This option storing text columns on the primary source statement is redundant
+            // with the option on subsource statements so can just be removed.
+            // This was kept for round-tripping of `CREATE SOURCE` statements that automatically
+            // generated subsources, which is no longer necessary.
+            if options
+                .iter()
+                .any(|o| matches!(o.name, PgConfigOptionName::TextColumns))
+            {
+                options.retain(|o| !matches!(o.name, PgConfigOptionName::TextColumns));
+                info!("migrate: converted postgres source {stmt} to remove subsource options");
+            }
+        }
+        // MySQL sources are multi-output sources whose subsources are
+        // migrated above. All we need to do is remove the subsource-related
+        // options from this statement since they are no longer relevant.
+        Statement::CreateSource(CreateSourceStatement {
+            connection: CreateSourceConnection::MySql { options, .. },
+            ..
+        }) => {
+            // These options storing text and exclude columns on the primary source statement
+            // are redundant with the options on subsource statements so can just be removed.
+            // They was kept for round-tripping of `CREATE SOURCE` statements that automatically
+            // generated subsources, which is no longer necessary.
+            if options.iter().any(|o| {
+                matches!(
+                    o.name,
+                    MySqlConfigOptionName::TextColumns | MySqlConfigOptionName::ExcludeColumns
+                )
+            }) {
+                options.retain(|o| {
+                    !matches!(
+                        o.name,
+                        MySqlConfigOptionName::TextColumns | MySqlConfigOptionName::ExcludeColumns
+                    )
+                });
+                info!("migrate: converted mysql source {stmt} to remove subsource options");
+            }
+        }
+        // Multi-output load generator sources whose subsources are already
+        // migrated above. There is no need to remove any options from this
+        // statement since they are not export-specific.
+        Statement::CreateSource(CreateSourceStatement {
+            connection:
+                CreateSourceConnection::LoadGenerator {
+                    generator:
+                        LoadGenerator::Auction | LoadGenerator::Marketing | LoadGenerator::Tpch,
+                    ..
+                },
+            ..
+        }) => {}
+        // Single-output sources that need to be migrated to tables. These sources currently output
+        // data to the primary collection of the source statement. We will create a new table
+        // statement for them and move all export-specific options over from the source statement,
+        // while moving the `CREATE SOURCE` statement to a new name and moving its shard to the
+        // new table statement.
+        Statement::CreateSource(CreateSourceStatement {
+            connection:
+                conn @ (CreateSourceConnection::Kafka { .. }
+                | CreateSourceConnection::LoadGenerator {
+                    generator:
+                        LoadGenerator::Clock
+                        | LoadGenerator::Datums
+                        | LoadGenerator::Counter
+                        | LoadGenerator::KeyValue,
+                    ..
+                }),
+            name,
+            col_names,
+            include_metadata,
+            format,
+            envelope,
+            with_options,
+            ..
+        }) => {
+            // To check if this is a source that has already been migrated we use a basic
+            // heuristic: if there is at least one existing table for the source, and if
+            // the envelope/format/include_metadata options are empty, we assume it's
+            // already been migrated.
+            if let Some(existing_tables) = tables_by_source.get(&item.id) {
+                if !existing_tables.is_empty()
+                    && envelope.is_none()
+                    && format.is_none()
+                    && include_metadata.is_empty()
+                {
+                    return Ok(());
+                }
+            }
+
+            // Use the current source name as the new table name, and rename the source to
+            // `<source_name>_source`. This is intended to allow users to continue using
+            // queries that reference the source name, since they will now need to query the
+            // table instead.
+            let new_source_name_ident = Ident::new_unchecked(
+                name.0.last().expect("at least one ident").to_string() + "_source",
+            );
+            let mut new_source_name = name.clone();
+            *new_source_name.0.last_mut().expect("at least one ident") = new_source_name_ident;
+            let table_name = std::mem::replace(name, new_source_name.clone());
+
+            // Also update the name of the source 'item'
+            let mut table_item_name = item.name.clone() + "_source";
+            std::mem::swap(&mut item.name, &mut table_item_name);
+
+            // A reference to the source that will be included in the table statement
+            let source_ref = RawItemName::Id(item.id.to_string(), new_source_name, None);
+
+            let columns = if col_names.is_empty() {
+                TableFromSourceColumns::NotSpecified
+            } else {
+                TableFromSourceColumns::Named(col_names.drain(..).collect())
+            };
+
+            // All source tables must have a `details` option, which is a serialized proto
+            // describing any source-specific details for this table statement.
+            let details = match conn {
+                // For kafka sources this proto is currently empty.
+                CreateSourceConnection::Kafka { .. } => SourceExportStatementDetails::Kafka {},
+                CreateSourceConnection::LoadGenerator { .. } => {
+                    // Since these load generators are single-output we use the default output.
+                    SourceExportStatementDetails::LoadGenerator {
+                        output: LoadGeneratorOutput::Default,
+                    }
+                }
+                _ => unreachable!("match determined above"),
+            };
+            let mut table_with_options = vec![TableFromSourceOption {
+                name: TableFromSourceOptionName::Details,
+                value: Some(WithOptionValue::Value(Value::String(hex::encode(
+                    details.into_proto().encode_to_vec(),
+                )))),
+            }];
+
+            // Move over the IgnoreKeys option if it exists.
+            let mut i = 0;
+            while i < with_options.len() {
+                if with_options[i].name == CreateSourceOptionName::IgnoreKeys {
+                    let option = with_options.remove(i);
+                    table_with_options.push(TableFromSourceOption {
+                        name: TableFromSourceOptionName::IgnoreKeys,
+                        value: option.value,
+                    });
+                } else {
+                    i += 1;
+                }
+            }
+
+            // Move over the Timeline option if it exists.
+            i = 0;
+            while i < with_options.len() {
+                if with_options[i].name == CreateSourceOptionName::Timeline {
+                    let option = with_options.remove(i);
+                    table_with_options.push(TableFromSourceOption {
+                        name: TableFromSourceOptionName::Timeline,
+                        value: option.value,
+                    });
+                } else {
+                    i += 1;
+                }
+            }
+
+            // The new table statement, stealing the export-specific options from the
+            // create source statement.
+            let table = CreateTableFromSourceStatement {
+                name: table_name,
+                constraints: vec![],
+                columns,
+                if_not_exists: false,
+                source: source_ref,
+                // external_reference is not required for single-output sources
+                external_reference: None,
+                with_options: table_with_options,
+                envelope: envelope.take(),
+                include_metadata: include_metadata.drain(..).collect(),
+                format: format.take(),
+            };
+
+            // Insert the new table statement into the catalog with a new id.
+            let ids = tx.allocate_user_item_ids(1)?;
+            let new_table_id = ids[0];
+            tx.insert_user_item(
+                new_table_id,
+                item.schema_id.clone(),
+                &table_item_name,
+                table.to_ast_string_stable(),
+                item.owner_id.clone(),
+                item.privileges.clone(),
+                &Default::default(),
+            )?;
+            // We need to move the shard currently attached to the source statement to the
+            // table statement such that the existing data in the shard is preserved and can
+            // be queried on the new table statement. However, we need to keep the GlobalId of
+            // the source the same, to preserve existing references to that statement in
+            // external tools such as DBT and Terraform. We will insert a new shard for the source
+            // statement which will be automatically created after the migration is complete.
+            let new_source_shard = ShardId::new();
+            let (source_id, existing_source_shard) = tx
+                .delete_collection_metadata(btreeset! {item.id})
+                .pop()
+                .expect("shard should exist");
+            tx.insert_collection_metadata(btreemap! {
+                new_table_id => existing_source_shard,
+                source_id => new_source_shard
+            })?;
+
+            info!("migrate: updated source {stmt} and added table {table}");
+        }
+
+        // When we upgrade to > rust 1.81 we should use #[expect(unreachable_patterns)]
+        // to enforce that we have covered all CreateSourceStatement variants.
+        #[allow(unreachable_patterns)]
+        Statement::CreateSource(_) => {}
         _ => (),
     }
 

From b4f9f8f0037ccb79db41937d475a5b8687b2ac61 Mon Sep 17 00:00:00 2001
From: Roshan Jobanputra <roshan@materialize.com>
Date: Wed, 23 Oct 2024 14:08:31 -0400
Subject: [PATCH 03/52] Rename the feature flag

---
 src/adapter-types/src/dyncfgs.rs   | 6 ++++--
 src/adapter/src/catalog/migrate.rs | 6 +++---
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/adapter-types/src/dyncfgs.rs b/src/adapter-types/src/dyncfgs.rs
index 1ddd317c835fb..14ca83651565f 100644
--- a/src/adapter-types/src/dyncfgs.rs
+++ b/src/adapter-types/src/dyncfgs.rs
@@ -112,8 +112,9 @@ pub const ENABLE_EXPRESSION_CACHE: Config<bool> = Config::new(
 );
 
 /// Whether to enable the migration to convert all sources to use tables.
-pub const ENABLE_SOURCE_TABLE_MIGRATION: Config<bool> = Config::new(
-    "enable_source_table_migration",
+/// TODO(#8678): This should also disable usage of the old source syntax.
+pub const FORCE_SOURCE_TABLE_SYNTAX: Config<bool> = Config::new(
+    "force_source_table_syntax",
     false,
     "Whether to enable the migration to convert all sources to use tables.",
 );
@@ -136,4 +137,5 @@ pub fn all_dyncfgs(configs: ConfigSet) -> ConfigSet {
         .add(&ENABLE_CONTINUAL_TASK_BUILTINS)
         .add(&ENABLE_EXPRESSION_CACHE)
         .add(&ENABLE_SOURCE_TABLE_MIGRATION)
+        .add(&FORCE_SOURCE_TABLE_SYNTAX)
 }
diff --git a/src/adapter/src/catalog/migrate.rs b/src/adapter/src/catalog/migrate.rs
index acf7fa6b5d47d..abbf5653a42a5 100644
--- a/src/adapter/src/catalog/migrate.rs
+++ b/src/adapter/src/catalog/migrate.rs
@@ -119,15 +119,15 @@ pub(crate) async fn migrate(
 
         // Special block for `ast_rewrite_sources_to_tables` migration
         // since it requires a feature flag.
-        if let Some(config_val) = tx.get_system_config("enable_source_table_migration") {
+        if let Some(config_val) = tx.get_system_config("force_source_table_syntax") {
             let enable_migration = config_val.parse::<bool>().map_err(|e| {
                 anyhow::anyhow!(
-                        "could not parse enable_source_table_migration config value: {}",
+                        "could not parse force_source_table_syntax config value: {}",
                         e
                     )
             })?;
             if enable_migration {
-                info!("migrate: enable_source_table_migration");
+                info!("migrate: force_source_table_syntax");
                 ast_rewrite_sources_to_tables(tx, item, stmt, all_items_and_statements)?;
             }
         }

From ecb16af96039c571beb609ebd6397cc080964099 Mon Sep 17 00:00:00 2001
From: Roshan Jobanputra <roshan@materialize.com>
Date: Wed, 23 Oct 2024 16:24:18 -0400
Subject: [PATCH 04/52] Add new table to audit log

---
 src/adapter/src/catalog/migrate.rs     | 40 ++++++++++++++++++++++++--
 src/catalog/src/durable/transaction.rs |  7 +++++
 2 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/src/adapter/src/catalog/migrate.rs b/src/adapter/src/catalog/migrate.rs
index abbf5653a42a5..155c42d8b478f 100644
--- a/src/adapter/src/catalog/migrate.rs
+++ b/src/adapter/src/catalog/migrate.rs
@@ -93,7 +93,7 @@ pub(crate) async fn migrate(
     tx: &mut Transaction<'_>,
     local_expr_cache: &mut LocalExpressionCache,
     item_updates: Vec<StateUpdate>,
-    _now: NowFn,
+    now: NowFn,
     _boot_ts: Timestamp,
 ) -> Result<Vec<BuiltinTableUpdate<&'static BuiltinTable>>, anyhow::Error> {
     let catalog_version = tx.get_catalog_content_version();
@@ -108,6 +108,7 @@ pub(crate) async fn migrate(
     );
 
     rewrite_ast_items(tx, |tx, item, stmt, all_items_and_statements| {
+        let now = now.clone();
         // Add per-item AST migrations below.
         //
         // Each migration should be a function that takes `stmt` (the AST
@@ -128,7 +129,7 @@ pub(crate) async fn migrate(
             })?;
             if enable_migration {
                 info!("migrate: force_source_table_syntax");
-                ast_rewrite_sources_to_tables(tx, item, stmt, all_items_and_statements)?;
+                ast_rewrite_sources_to_tables(tx, now, item, stmt, all_items_and_statements)?;
             }
         }
             Ok(())
@@ -226,6 +227,7 @@ pub(crate) async fn migrate(
 ///
 fn ast_rewrite_sources_to_tables(
     tx: &mut Transaction<'_>,
+    now: NowFn,
     item: &mut Item,
     stmt: &mut Statement<Raw>,
     all_items_and_statements: &Vec<(Item, Statement<Raw>)>,
@@ -576,6 +578,26 @@ fn ast_rewrite_sources_to_tables(
                 source_id => new_source_shard
             })?;
 
+            let schema = tx.get_schema(&item.schema_id).expect("schema must exist");
+
+            add_to_audit_log(
+                tx,
+                mz_audit_log::EventType::Create,
+                mz_audit_log::ObjectType::Table,
+                mz_audit_log::EventDetails::IdFullNameV1(mz_audit_log::IdFullNameV1 {
+                    id: new_table_id.to_string(),
+                    name: mz_audit_log::FullNameV1 {
+                        database: schema
+                            .database_id
+                            .map(|d| d.to_string())
+                            .unwrap_or_default(),
+                        schema: schema.name,
+                        item: table_item_name,
+                    },
+                }),
+                now(),
+            )?;
+
             info!("migrate: updated source {stmt} and added table {table}");
         }
 
@@ -609,3 +631,17 @@ pub(crate) fn durable_migrate(
 //
 // Please include the adapter team on any code reviews that add or edit
 // migrations.
+
+fn add_to_audit_log(
+    tx: &mut Transaction,
+    event_type: mz_audit_log::EventType,
+    object_type: mz_audit_log::ObjectType,
+    details: mz_audit_log::EventDetails,
+    occurred_at: mz_ore::now::EpochMillis,
+) -> Result<(), anyhow::Error> {
+    let id = tx.get_and_increment_id(mz_catalog::durable::AUDIT_LOG_ID_ALLOC_KEY.to_string())?;
+    let event =
+        mz_audit_log::VersionedEvent::new(id, event_type, object_type, details, None, occurred_at);
+    tx.insert_audit_log_event(event);
+    Ok(())
+}
diff --git a/src/catalog/src/durable/transaction.rs b/src/catalog/src/durable/transaction.rs
index 544b6702f5721..1255333895ed1 100644
--- a/src/catalog/src/durable/transaction.rs
+++ b/src/catalog/src/durable/transaction.rs
@@ -2047,6 +2047,13 @@ impl<'a> Transaction<'a> {
             .map(|(k, v)| DurableType::from_key_value(k, v))
     }
 
+    pub fn get_schema(&self, id: &SchemaId) -> Option<Schema> {
+        let key = SchemaKey { id: *id };
+        self.schemas
+            .get(&key)
+            .map(|v| DurableType::from_key_value(key, v.clone()))
+    }
+
     pub fn get_introspection_source_indexes(
         &self,
         cluster_id: ClusterId,

From d70ab1521d92049a1e0dfdf222c8cb3428fd3edb Mon Sep 17 00:00:00 2001
From: Roshan Jobanputra <roshan@materialize.com>
Date: Wed, 23 Oct 2024 16:37:47 -0400
Subject: [PATCH 05/52] Add platform check scenario to test migration

---
 .../checks/all_checks/source_tables.py        |  1 -
 .../materialize/checks/all_checks/upsert.py   | 56 +++++++++++++++++++
 .../materialize/checks/scenarios_upgrade.py   | 46 +++++++++++++++
 3 files changed, 102 insertions(+), 1 deletion(-)

diff --git a/misc/python/materialize/checks/all_checks/source_tables.py b/misc/python/materialize/checks/all_checks/source_tables.py
index 279ede6a4022e..c36bb1eb8af12 100644
--- a/misc/python/materialize/checks/all_checks/source_tables.py
+++ b/misc/python/materialize/checks/all_checks/source_tables.py
@@ -28,7 +28,6 @@ class TableFromPgSource(TableFromSourceBase):
     suffix = "tbl_from_pg_source"
 
     def initialize(self) -> Testdrive:
-
         return Testdrive(
             self.generic_setup()
             + dedent(
diff --git a/misc/python/materialize/checks/all_checks/upsert.py b/misc/python/materialize/checks/all_checks/upsert.py
index dd7f92caf831e..739bc9d5effa9 100644
--- a/misc/python/materialize/checks/all_checks/upsert.py
+++ b/misc/python/materialize/checks/all_checks/upsert.py
@@ -164,3 +164,59 @@ def validate(self) -> Testdrive:
            """
             )
         )
+
+
+class UpsertLegacy(Check):
+    """
+    An upsert source test that uses the legacy syntax to create the source
+    on all versions to ensure the source is properly migrated with the
+    ActivateSourceVersioningMigration scenario
+    """
+
+    def initialize(self) -> Testdrive:
+        return Testdrive(
+            schemas()
+            + dedent(
+                """
+                $ kafka-create-topic topic=upsert-legacy-syntax
+
+                $ kafka-ingest format=avro key-format=avro topic=upsert-legacy-syntax key-schema=${keyschema} schema=${schema} repeat=10000
+                {"key1": "A${kafka-ingest.iteration}"} {"f1": "A${kafka-ingest.iteration}"}
+
+                > CREATE SOURCE upsert_insert
+                  FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-upsert-legacy-syntax-${testdrive.seed}')
+                  FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn
+                  ENVELOPE UPSERT
+
+                > CREATE MATERIALIZED VIEW upsert_insert_view AS SELECT COUNT(DISTINCT key1 || ' ' || f1) FROM upsert_insert;
+                """
+            )
+        )
+
+    def manipulate(self) -> list[Testdrive]:
+        return [
+            Testdrive(schemas() + dedent(s))
+            for s in [
+                """
+                $ kafka-ingest format=avro key-format=avro topic=upsert-legacy-syntax key-schema=${keyschema} schema=${schema} repeat=10000
+                {"key1": "A${kafka-ingest.iteration}"} {"f1": "A${kafka-ingest.iteration}"}
+                """,
+                """
+                $ kafka-ingest format=avro key-format=avro topic=upsert-legacy-syntax key-schema=${keyschema} schema=${schema} repeat=10000
+                {"key1": "A${kafka-ingest.iteration}"} {"f1": "A${kafka-ingest.iteration}"}
+                """,
+            ]
+        ]
+
+    def validate(self) -> Testdrive:
+        return Testdrive(
+            dedent(
+                """
+                > SELECT COUNT(*), COUNT(DISTINCT key1), COUNT(DISTINCT f1) FROM upsert_insert
+                10000 10000 10000
+
+                > SELECT * FROM upsert_insert_view;
+                10000
+           """
+            )
+        )
diff --git a/misc/python/materialize/checks/scenarios_upgrade.py b/misc/python/materialize/checks/scenarios_upgrade.py
index fe9a9bd26c470..5c404c4f0b9be 100644
--- a/misc/python/materialize/checks/scenarios_upgrade.py
+++ b/misc/python/materialize/checks/scenarios_upgrade.py
@@ -387,3 +387,49 @@ def actions(self) -> list[Action]:
             ),
             Validate(self),
         ]
+
+
+class ActivateSourceVersioningMigration(Scenario):
+    """
+    Starts MZ, initializes and manipulates, then forces the migration
+    of sources to the new table model (introducing Source Versioning).
+    """
+
+    def base_version(self) -> MzVersion:
+        return get_last_version()
+
+    def actions(self) -> list[Action]:
+        print(f"Upgrading from tag {self.base_version()}")
+        return [
+            StartMz(
+                self,
+                tag=self.base_version(),
+            ),
+            Initialize(self),
+            Manipulate(self, phase=1),
+            KillMz(
+                capture_logs=True
+            ),  #  We always use True here otherwise docker-compose will lose the pre-upgrade logs
+            StartMz(
+                self,
+                tag=None,
+                # Activate the `force_source_table_syntax` flag
+                # which should trigger the migration of sources
+                # using the old syntax to the new table model.
+                additional_system_parameter_defaults={
+                    "force_source_table_syntax": "true",
+                },
+            ),
+            Manipulate(self, phase=2),
+            Validate(self),
+            # A second restart while already on the new version
+            KillMz(capture_logs=True),
+            StartMz(
+                self,
+                tag=None,
+                additional_system_parameter_defaults={
+                    "force_source_table_syntax": "true",
+                },
+            ),
+            Validate(self),
+        ]

From 35236b0cb8a8ef1bd41c99413e5053f2b622cec0 Mon Sep 17 00:00:00 2001
From: Roshan Jobanputra <roshan@materialize.com>
Date: Thu, 24 Oct 2024 10:00:47 -0400
Subject: [PATCH 06/52] Switch to using system vars instead of flags to allow
 console access to values

m
---
 src/adapter-types/src/dyncfgs.rs   |  9 ---------
 src/adapter/src/catalog/migrate.rs | 16 +++++-----------
 2 files changed, 5 insertions(+), 20 deletions(-)

diff --git a/src/adapter-types/src/dyncfgs.rs b/src/adapter-types/src/dyncfgs.rs
index 14ca83651565f..0ab56c9b41144 100644
--- a/src/adapter-types/src/dyncfgs.rs
+++ b/src/adapter-types/src/dyncfgs.rs
@@ -111,14 +111,6 @@ pub const ENABLE_EXPRESSION_CACHE: Config<bool> = Config::new(
     "Use a cache to store optimized expressions to help speed up start times.",
 );
 
-/// Whether to enable the migration to convert all sources to use tables.
-/// TODO(#8678): This should also disable usage of the old source syntax.
-pub const FORCE_SOURCE_TABLE_SYNTAX: Config<bool> = Config::new(
-    "force_source_table_syntax",
-    false,
-    "Whether to enable the migration to convert all sources to use tables.",
-);
-
 /// Adds the full set of all compute `Config`s.
 pub fn all_dyncfgs(configs: ConfigSet) -> ConfigSet {
     configs
@@ -137,5 +129,4 @@ pub fn all_dyncfgs(configs: ConfigSet) -> ConfigSet {
         .add(&ENABLE_CONTINUAL_TASK_BUILTINS)
         .add(&ENABLE_EXPRESSION_CACHE)
         .add(&ENABLE_SOURCE_TABLE_MIGRATION)
-        .add(&FORCE_SOURCE_TABLE_SYNTAX)
 }
diff --git a/src/adapter/src/catalog/migrate.rs b/src/adapter/src/catalog/migrate.rs
index 155c42d8b478f..923c733b9c395 100644
--- a/src/adapter/src/catalog/migrate.rs
+++ b/src/adapter/src/catalog/migrate.rs
@@ -107,6 +107,8 @@ pub(crate) async fn migrate(
         catalog_version
     );
 
+    let enable_source_table_migration = state.system_config().force_source_table_syntax();
+
     rewrite_ast_items(tx, |tx, item, stmt, all_items_and_statements| {
         let now = now.clone();
         // Add per-item AST migrations below.
@@ -118,20 +120,12 @@ pub(crate) async fn migrate(
         // Migration functions may also take `tx` as input to stage
         // arbitrary changes to the catalog.
 
-        // Special block for `ast_rewrite_sources_to_tables` migration
-        // since it requires a feature flag.
-        if let Some(config_val) = tx.get_system_config("force_source_table_syntax") {
-            let enable_migration = config_val.parse::<bool>().map_err(|e| {
-                anyhow::anyhow!(
-                        "could not parse force_source_table_syntax config value: {}",
-                        e
-                    )
-            })?;
-            if enable_migration {
+            // Special block for `ast_rewrite_sources_to_tables` migration
+            // since it requires a feature flag.
+            if enable_source_table_migration {
                 info!("migrate: force_source_table_syntax");
                 ast_rewrite_sources_to_tables(tx, now, item, stmt, all_items_and_statements)?;
             }
-        }
             Ok(())
     })?;
 

From 969e722ca2565718a76bc8abbde5fd5d54f2e2f8 Mon Sep 17 00:00:00 2001
From: Roshan Jobanputra <roshan@materialize.com>
Date: Thu, 24 Oct 2024 14:37:21 -0400
Subject: [PATCH 07/52] Fixes to migration based on testing

---
 src/adapter/src/catalog/migrate.rs            | 916 +++++++++++-------
 src/adapter/src/catalog/open.rs               |  30 +-
 src/sql-parser/src/ast/defs/ddl.rs            |  18 +
 src/sql/src/names.rs                          |  39 +
 .../src/sources/load_generator.rs             |   2 +
 5 files changed, 624 insertions(+), 381 deletions(-)

diff --git a/src/adapter/src/catalog/migrate.rs b/src/adapter/src/catalog/migrate.rs
index 923c733b9c395..ff21c13ce89fb 100644
--- a/src/adapter/src/catalog/migrate.rs
+++ b/src/adapter/src/catalog/migrate.rs
@@ -11,11 +11,12 @@ use std::collections::BTreeMap;
 
 use mz_catalog::builtin::BuiltinTable;
 use mz_catalog::durable::{Transaction, Item};
-use mz_catalog::memory::objects::StateUpdate;
+use mz_catalog::memory::objects::{StateUpdate,BootstrapStateUpdateKind};
 use mz_ore::collections::CollectionExt;
 use mz_ore::now::NowFn;
 use mz_repr::{CatalogItemId, Timestamp};
 use mz_sql::ast::display::AstDisplay;
+use mz_sql::names::FullItemName;
 use mz_sql_parser::ast::{Raw, Statement};
 use semver::Version;
 use tracing::info;
@@ -29,26 +30,15 @@ fn rewrite_ast_items<F>(tx: &mut Transaction<'_>, mut f: F) -> Result<(), anyhow
 where
     F: for<'a> FnMut(
         &'a mut Transaction<'_>,
-        &'a mut Item,
+        GlobalId,
         &'a mut Statement<Raw>,
-        &'a Vec<(Item, Statement<Raw>)>,
     ) -> Result<(), anyhow::Error>,
 {
     let mut updated_items = BTreeMap::new();
-    let items_with_statements = tx
-        .get_items()
-        .map(|item| {
-            let stmt = mz_sql::parse::parse(&item.create_sql)?.into_element().ast;
-            Ok((item, stmt))
-        })
-        .collect::<Result<Vec<_>, anyhow::Error>>()?;
 
-    // Clone this vec to be referenced within the closure if needed
-    let items_with_statements_ref = items_with_statements.clone();
-    for (mut item, mut stmt) in items_with_statements {
+    for mut item in tx.get_items() {
         let mut stmt = mz_sql::parse::parse(&item.create_sql)?.into_element().ast;
-        f(tx, &mut item, &mut stmt, &items_with_statements_ref)?;
-
+        f(tx, item.id, &mut stmt).await?;
         item.create_sql = stmt.to_ast_string_stable();
 
         updated_items.insert(item.id, item);
@@ -85,6 +75,11 @@ where
     Ok(())
 }
 
+pub(crate) struct MigrateResult {
+    pub(crate) builtin_table_updates: Vec<BuiltinTableUpdate<&'static BuiltinTable>>,
+    pub(crate) post_item_updates: Vec<(BootstrapStateUpdateKind, Timestamp, i64)>,
+}
+
 /// Migrates all user items and loads them into `state`.
 ///
 /// Returns the builtin updates corresponding to all user items.
@@ -95,7 +90,7 @@ pub(crate) async fn migrate(
     item_updates: Vec<StateUpdate>,
     now: NowFn,
     _boot_ts: Timestamp,
-) -> Result<Vec<BuiltinTableUpdate<&'static BuiltinTable>>, anyhow::Error> {
+) -> Result<MigrateResult, anyhow::Error> {
     let catalog_version = tx.get_catalog_content_version();
     let catalog_version = match catalog_version {
         Some(v) => Version::parse(&v)?,
@@ -107,7 +102,11 @@ pub(crate) async fn migrate(
         catalog_version
     );
 
-    let enable_source_table_migration = state.system_config().force_source_table_syntax();
+    // Special block for `ast_rewrite_sources_to_tables` migration
+    // since it requires a feature flag needs to update multiple AST items at once.
+    if state.system_config().force_source_table_syntax() {
+        ast_rewrite_sources_to_tables(tx, now)?;
+    }
 
     rewrite_ast_items(tx, |tx, item, stmt, all_items_and_statements| {
         let now = now.clone();
@@ -120,12 +119,6 @@ pub(crate) async fn migrate(
         // Migration functions may also take `tx` as input to stage
         // arbitrary changes to the catalog.
 
-            // Special block for `ast_rewrite_sources_to_tables` migration
-            // since it requires a feature flag.
-            if enable_source_table_migration {
-                info!("migrate: force_source_table_syntax");
-                ast_rewrite_sources_to_tables(tx, now, item, stmt, all_items_and_statements)?;
-            }
             Ok(())
     })?;
 
@@ -137,6 +130,18 @@ pub(crate) async fn migrate(
     let op_item_updates = into_consolidatable_updates_startup(op_item_updates, commit_ts);
     item_updates.extend(op_item_updates);
     differential_dataflow::consolidation::consolidate_updates(&mut item_updates);
+
+    // Since some migrations might introduce non-item 'post-item' updates, we sequester those
+    // so they can be applied with other post-item updates after migrations to avoid
+    // accumulating negative diffs.
+    let (post_item_updates, item_updates): (Vec<_>, Vec<_>) = item_updates
+        .into_iter()
+        // The only post-item update kind we currently generate is to
+        // update storage collection metadata.
+        .partition(|(kind, _, _)| {
+            matches!(kind, BootstrapStateUpdateKind::StorageCollectionMetadata(_))
+        });
+
     let item_updates = item_updates
         .into_iter()
         .map(|(kind, ts, diff)| StateUpdate {
@@ -188,7 +193,10 @@ pub(crate) async fn migrate(
         "migration from catalog version {:?} complete",
         catalog_version
     );
-    Ok(ast_builtin_table_updates)
+    Ok(MigrateResult {
+        builtin_table_updates: ast_builtin_table_updates,
+        post_item_updates,
+    })
 }
 
 // Add new migrations below their appropriate heading, and precede them with a
@@ -208,23 +216,20 @@ pub(crate) async fn migrate(
 /// MySQL, and multi-output (tpch, auction, marketing) load-generator subsources.
 ///
 /// Second we migrate existing `CREATE SOURCE` statements for these multi-output
-/// sources to remove their subsource qualification option (e.g. FOR ALL TABLES..)
-/// and any subsource-specific options (e.g. TEXT COLUMNS).
+/// sources to remove any subsource-specific options (e.g. TEXT COLUMNS).
 ///
-/// Third we migrate existing `CREATE SOURCE` statements that refer to a single
-/// output collection. This includes existing Kafka and single-output load-generator
-/// subsources. These statements will generate an additional `CREATE TABLE .. FROM SOURCE`
-/// statement that copies over all the export-specific options. This statement will be tied
-/// to the existing statement's persist collection, but using a new GlobalID. The original
-/// source statement will be updated to remove the export-specific options and will be
-/// renamed to `<original_name>_source` while keeping its same GlobalId.
+/// Third we migrate existing single-output `CREATE SOURCE` statements.
+/// This includes existing Kafka and single-output load-generator
+/// subsources. This will generate an additional `CREATE TABLE .. FROM SOURCE`
+/// statement that copies over all the export-specific options. This table will use
+/// to the existing source statement's persist shard but use a new GlobalID.
+/// The original source statement will be updated to remove the export-specific options,
+/// renamed to `<original_name>_source`, and use a new empty shard while keeping its
+/// same GlobalId.
 ///
 fn ast_rewrite_sources_to_tables(
     tx: &mut Transaction<'_>,
     now: NowFn,
-    item: &mut Item,
-    stmt: &mut Statement<Raw>,
-    all_items_and_statements: &Vec<(Item, Statement<Raw>)>,
 ) -> Result<(), anyhow::Error> {
     use maplit::btreemap;
     use maplit::btreeset;
@@ -232,374 +237,543 @@ fn ast_rewrite_sources_to_tables(
     use mz_proto::RustType;
     use mz_sql::ast::{
         CreateSourceConnection, CreateSourceOptionName, CreateSourceStatement,
-        CreateSubsourceOptionName, CreateTableFromSourceStatement, Ident, LoadGenerator,
-        MySqlConfigOptionName, PgConfigOptionName, RawItemName, TableFromSourceColumns,
-        TableFromSourceOption, TableFromSourceOptionName, Value, WithOptionValue,
+        CreateSubsourceOptionName, CreateSubsourceStatement, CreateTableFromSourceStatement, Ident,
+        KafkaSourceConfigOptionName, LoadGenerator, MySqlConfigOptionName, PgConfigOptionName,
+        RawItemName, TableFromSourceColumns, TableFromSourceOption, TableFromSourceOptionName,
+        UnresolvedItemName, Value, WithOptionValue,
     };
     use mz_storage_client::controller::StorageTxn;
     use mz_storage_types::sources::load_generator::LoadGeneratorOutput;
     use mz_storage_types::sources::SourceExportStatementDetails;
     use prost::Message;
 
-    // Since some subsources have named-only references to their `of_source` and some have the
-    // global_id of their source, we first generate a reference mapping from all source names to
-    // items so we can ensure we always use an ID-based reference in the stored
-    // `CREATE TABLE .. FROM SOURCE` statements.
-    let source_name_to_item: BTreeMap<_, _> = all_items_and_statements
-        .iter()
-        .filter_map(|(item, statement)| match statement {
-            Statement::CreateSource(stmt) => Some((stmt.name.clone(), item)),
-            _ => None,
+    let items_with_statements = tx
+        .get_items()
+        .map(|item| {
+            let stmt = mz_sql::parse::parse(&item.create_sql)?.into_element().ast;
+            Ok((item, stmt))
         })
-        .collect();
+        .collect::<Result<Vec<_>, anyhow::Error>>()?;
+    let items_with_statements_copied = items_with_statements.clone();
+
+    // Any GlobalId that should be changed to a new GlobalId in any statements that
+    // reference it. This is necessary for ensuring downstream statements (e.g.
+    // mat views, indexes) that reference a single-output source (e.g. kafka)
+    // will now reference the corresponding new table, with the same data, instead.
+    let mut changed_ids = BTreeMap::new();
+
+    for (mut item, stmt) in items_with_statements {
+        match stmt {
+            // Migrate each `CREATE SUBSOURCE` statement to an equivalent
+            // `CREATE TABLE .. FROM SOURCE` statement.
+            Statement::CreateSubsource(CreateSubsourceStatement {
+                name,
+                columns,
+                constraints,
+                of_source,
+                if_not_exists,
+                mut with_options,
+            }) => {
+                let raw_source_name = match of_source {
+                    // If `of_source` is None then this is a `progress` subsource which we
+                    // are not migrating as they are not currently relevant to the new table model.
+                    None => return Ok(()),
+                    Some(name) => name,
+                };
+                let source = match raw_source_name {
+                    // Some legacy subsources have named-only references to their `of_source`
+                    // so we ensure we always use an ID-based reference in the stored
+                    // `CREATE TABLE .. FROM SOURCE` statements.
+                    RawItemName::Name(name) => {
+                        // Convert the name reference to an ID reference.
+                        let (source_item, _) = items_with_statements_copied
+                            .iter()
+                            .find(|(_, statement)| match statement {
+                                Statement::CreateSource(stmt) => stmt.name == name,
+                                _ => false,
+                            })
+                            .expect("source must exist");
+                        RawItemName::Id(source_item.id.to_string(), name, None)
+                    }
+                    RawItemName::Id(..) => raw_source_name,
+                };
 
-    // Collect any existing `CREATE TABLE .. FROM SOURCE` statements by the source they reference.
-    let tables_by_source: BTreeMap<_, Vec<_>> = all_items_and_statements
-        .iter()
-        .filter_map(|(_, statement)| match statement {
-            Statement::CreateTableFromSource(stmt) => {
-                let source: GlobalId = match &stmt.source {
-                    RawItemName::Name(_) => unreachable!("tables store source as ID"),
-                    RawItemName::Id(id, _, _) => id.parse().expect("valid id"),
+                // The external reference is a `with_option` on subsource statements but is a
+                // separate field on table statements.
+                let mut i = 0;
+                let external_reference = loop {
+                    if i >= with_options.len() {
+                        panic!("subsource must have an external reference");
+                    }
+                    if with_options[i].name == CreateSubsourceOptionName::ExternalReference {
+                        let option = with_options.remove(i);
+                        match option.value {
+                            Some(WithOptionValue::UnresolvedItemName(name)) => break name,
+                            _ => unreachable!("external reference must be an unresolved item name"),
+                        };
+                    } else {
+                        i += 1;
+                    }
+                };
+                let with_options = with_options
+                    .into_iter()
+                    .map(|option| {
+                        match option.name {
+                            CreateSubsourceOptionName::Details => TableFromSourceOption {
+                                name: TableFromSourceOptionName::Details,
+                                // The `details` option on both subsources and tables is identical, using the same
+                                // ProtoSourceExportStatementDetails serialized value.
+                                value: option.value,
+                            },
+                            CreateSubsourceOptionName::TextColumns => TableFromSourceOption {
+                                name: TableFromSourceOptionName::TextColumns,
+                                value: option.value,
+                            },
+                            CreateSubsourceOptionName::ExcludeColumns => TableFromSourceOption {
+                                name: TableFromSourceOptionName::ExcludeColumns,
+                                value: option.value,
+                            },
+                            CreateSubsourceOptionName::Progress => {
+                                panic!("progress option should not exist on this subsource")
+                            }
+                            CreateSubsourceOptionName::ExternalReference => {
+                                // This option is handled separately above.
+                                unreachable!()
+                            }
+                        }
+                    })
+                    .collect::<Vec<_>>();
+
+                let table = CreateTableFromSourceStatement {
+                    name,
+                    constraints,
+                    columns: mz_sql::ast::TableFromSourceColumns::Defined(columns),
+                    if_not_exists,
+                    source,
+                    external_reference: Some(external_reference.clone()),
+                    with_options,
+                    // Subsources don't have `envelope`, `include_metadata`, or `format` options.
+                    envelope: None,
+                    include_metadata: vec![],
+                    format: None,
                 };
-                Some((source, stmt))
-            }
-            _ => None,
-        })
-        .fold(BTreeMap::new(), |mut acc, (source, stmt)| {
-            acc.entry(source).or_insert_with(Vec::new).push(stmt);
-            acc
-        });
 
-    match stmt {
-        // Migrate each `CREATE SUBSOURCE` statement to an equivalent
-        // `CREATE TABLE .. FROM SOURCE` statement.
-        Statement::CreateSubsource(subsource) => {
-            let raw_source_name = match &subsource.of_source {
-                // We will not be migrating 'progress' `CREATE SUBSOURCE` statements as they
-                // are not currently relevant to the new table model.
-                None => return Ok(()),
-                Some(name) => name,
-            };
-            let source = match raw_source_name {
-                RawItemName::Name(name) => {
-                    // Convert the name reference to an ID reference.
-                    let source_item = source_name_to_item.get(name).expect("source must exist");
-                    RawItemName::Id(source_item.id.to_string(), name.clone(), None)
-                }
-                RawItemName::Id(..) => raw_source_name.clone(),
-            };
-
-            // The external reference is a `with_option` on subsource statements but is a
-            // separate field on table statements.
-            let external_reference_option = subsource
-                .with_options
-                .iter()
-                .find(|o| o.name == CreateSubsourceOptionName::ExternalReference)
-                .expect("subsources must have external reference");
-            let external_reference = match &external_reference_option.value {
-                Some(WithOptionValue::UnresolvedItemName(name)) => name,
-                _ => unreachable!("external reference must be an unresolved item name"),
-            };
-
-            // The `details` option on both subsources and tables is identical, using the same
-            // ProtoSourceExportStatementDetails serialized value.
-            let existing_details = subsource
-                .with_options
-                .iter()
-                .find(|o| o.name == CreateSubsourceOptionName::Details)
-                .expect("subsources must have details");
-
-            let mut with_options = vec![TableFromSourceOption {
-                name: TableFromSourceOptionName::Details,
-                value: existing_details.value.clone(),
-            }];
-
-            // TEXT COLUMNS and EXCLUDE COLUMNS options are stored on each subsource and can be copied
-            // directly to the table statement.
-            if let Some(text_cols_option) = subsource
-                .with_options
-                .iter()
-                .find(|option| option.name == CreateSubsourceOptionName::TextColumns)
-            {
-                with_options.push(TableFromSourceOption {
-                    name: TableFromSourceOptionName::TextColumns,
-                    value: text_cols_option.value.clone(),
-                });
-            }
-            if let Some(exclude_cols_option) = subsource
-                .with_options
-                .iter()
-                .find(|option| option.name == CreateSubsourceOptionName::ExcludeColumns)
-            {
-                with_options.push(TableFromSourceOption {
-                    name: TableFromSourceOptionName::ExcludeColumns,
-                    value: exclude_cols_option.value.clone(),
-                });
+                info!(
+                    "migrate: converted subsource {} to table {}",
+                    item.create_sql, table
+                );
+                item.create_sql = Statement::CreateTableFromSource(table).to_ast_string_stable();
+                tx.update_item(item.id, item)?;
             }
 
-            let table = CreateTableFromSourceStatement {
-                name: subsource.name.clone(),
-                constraints: subsource.constraints.clone(),
-                columns: mz_sql::ast::TableFromSourceColumns::Defined(subsource.columns.clone()),
-                if_not_exists: subsource.if_not_exists,
-                source,
-                external_reference: Some(external_reference.clone()),
+            // Postgres sources are multi-output sources whose subsources are
+            // migrated above. All we need to do is remove the subsource-related
+            // options from this statement since they are no longer relevant.
+            Statement::CreateSource(CreateSourceStatement {
+                connection:
+                    mut conn @ (CreateSourceConnection::Postgres { .. }
+                    | CreateSourceConnection::Yugabyte { .. }),
+                name,
+                if_not_exists,
+                in_cluster,
+                include_metadata,
+                format,
+                envelope,
+                col_names,
                 with_options,
-                // Subsources don't have `envelope`, `include_metadata`, or `format` options.
-                envelope: None,
-                include_metadata: vec![],
-                format: None,
-            };
-
-            info!(
-                "migrate: converted subsource {} to table {}",
-                subsource, table
-            );
-            *stmt = Statement::CreateTableFromSource(table);
-        }
-
-        // Postgres sources are multi-output sources whose subsources are
-        // migrated above. All we need to do is remove the subsource-related
-        // options from this statement since they are no longer relevant.
-        Statement::CreateSource(CreateSourceStatement {
-            connection:
-                CreateSourceConnection::Postgres { options, .. }
-                | CreateSourceConnection::Yugabyte { options, .. },
-            ..
-        }) => {
-            // This option storing text columns on the primary source statement is redundant
-            // with the option on subsource statements so can just be removed.
-            // This was kept for round-tripping of `CREATE SOURCE` statements that automatically
-            // generated subsources, which is no longer necessary.
-            if options
-                .iter()
-                .any(|o| matches!(o.name, PgConfigOptionName::TextColumns))
-            {
-                options.retain(|o| !matches!(o.name, PgConfigOptionName::TextColumns));
-                info!("migrate: converted postgres source {stmt} to remove subsource options");
+                key_constraint,
+                external_references,
+                progress_subsource,
+            }) => {
+                let options = match &mut conn {
+                    CreateSourceConnection::Postgres { options, .. } => options,
+                    CreateSourceConnection::Yugabyte { options, .. } => options,
+                    _ => unreachable!("match determined above"),
+                };
+                // This option storing text columns on the primary source statement is redundant
+                // with the option on subsource statements so can just be removed.
+                // This was kept for round-tripping of `CREATE SOURCE` statements that automatically
+                // generated subsources, which is no longer necessary.
+                if options
+                    .iter()
+                    .any(|o| matches!(o.name, PgConfigOptionName::TextColumns))
+                {
+                    options.retain(|o| !matches!(o.name, PgConfigOptionName::TextColumns));
+                    let stmt = Statement::CreateSource(CreateSourceStatement {
+                        connection: conn,
+                        name,
+                        if_not_exists,
+                        in_cluster,
+                        include_metadata,
+                        format,
+                        envelope,
+                        col_names,
+                        with_options,
+                        key_constraint,
+                        external_references,
+                        progress_subsource,
+                    });
+                    item.create_sql = stmt.to_ast_string_stable();
+                    tx.update_item(item.id, item)?;
+                    info!("migrate: converted postgres source {stmt} to remove subsource options");
+                }
             }
-        }
-        // MySQL sources are multi-output sources whose subsources are
-        // migrated above. All we need to do is remove the subsource-related
-        // options from this statement since they are no longer relevant.
-        Statement::CreateSource(CreateSourceStatement {
-            connection: CreateSourceConnection::MySql { options, .. },
-            ..
-        }) => {
-            // These options storing text and exclude columns on the primary source statement
-            // are redundant with the options on subsource statements so can just be removed.
-            // They was kept for round-tripping of `CREATE SOURCE` statements that automatically
-            // generated subsources, which is no longer necessary.
-            if options.iter().any(|o| {
-                matches!(
-                    o.name,
-                    MySqlConfigOptionName::TextColumns | MySqlConfigOptionName::ExcludeColumns
-                )
-            }) {
-                options.retain(|o| {
-                    !matches!(
+            // MySQL sources are multi-output sources whose subsources are
+            // migrated above. All we need to do is remove the subsource-related
+            // options from this statement since they are no longer relevant.
+            Statement::CreateSource(CreateSourceStatement {
+                connection: mut conn @ CreateSourceConnection::MySql { .. },
+                name,
+                if_not_exists,
+                in_cluster,
+                include_metadata,
+                format,
+                envelope,
+                col_names,
+                with_options,
+                key_constraint,
+                external_references,
+                progress_subsource,
+                ..
+            }) => {
+                let options = match &mut conn {
+                    CreateSourceConnection::MySql { options, .. } => options,
+                    _ => unreachable!("match determined above"),
+                };
+                // These options storing text and exclude columns on the primary source statement
+                // are redundant with the options on subsource statements so can just be removed.
+                // They was kept for round-tripping of `CREATE SOURCE` statements that automatically
+                // generated subsources, which is no longer necessary.
+                if options.iter().any(|o| {
+                    matches!(
                         o.name,
                         MySqlConfigOptionName::TextColumns | MySqlConfigOptionName::ExcludeColumns
                     )
-                });
-                info!("migrate: converted mysql source {stmt} to remove subsource options");
+                }) {
+                    options.retain(|o| {
+                        !matches!(
+                            o.name,
+                            MySqlConfigOptionName::TextColumns
+                                | MySqlConfigOptionName::ExcludeColumns
+                        )
+                    });
+                    let stmt = Statement::CreateSource(CreateSourceStatement {
+                        connection: conn,
+                        name,
+                        if_not_exists,
+                        in_cluster,
+                        include_metadata,
+                        format,
+                        envelope,
+                        col_names,
+                        with_options,
+                        key_constraint,
+                        external_references,
+                        progress_subsource,
+                    });
+                    item.create_sql = stmt.to_ast_string_stable();
+                    tx.update_item(item.id, item)?;
+                    info!("migrate: converted mysql source {stmt} to remove subsource options");
+                }
             }
-        }
-        // Multi-output load generator sources whose subsources are already
-        // migrated above. There is no need to remove any options from this
-        // statement since they are not export-specific.
-        Statement::CreateSource(CreateSourceStatement {
-            connection:
-                CreateSourceConnection::LoadGenerator {
-                    generator:
-                        LoadGenerator::Auction | LoadGenerator::Marketing | LoadGenerator::Tpch,
-                    ..
-                },
-            ..
-        }) => {}
-        // Single-output sources that need to be migrated to tables. These sources currently output
-        // data to the primary collection of the source statement. We will create a new table
-        // statement for them and move all export-specific options over from the source statement,
-        // while moving the `CREATE SOURCE` statement to a new name and moving its shard to the
-        // new table statement.
-        Statement::CreateSource(CreateSourceStatement {
-            connection:
-                conn @ (CreateSourceConnection::Kafka { .. }
-                | CreateSourceConnection::LoadGenerator {
-                    generator:
-                        LoadGenerator::Clock
-                        | LoadGenerator::Datums
-                        | LoadGenerator::Counter
-                        | LoadGenerator::KeyValue,
-                    ..
-                }),
-            name,
-            col_names,
-            include_metadata,
-            format,
-            envelope,
-            with_options,
-            ..
-        }) => {
-            // To check if this is a source that has already been migrated we use a basic
-            // heuristic: if there is at least one existing table for the source, and if
-            // the envelope/format/include_metadata options are empty, we assume it's
-            // already been migrated.
-            if let Some(existing_tables) = tables_by_source.get(&item.id) {
-                if !existing_tables.is_empty()
+            // Multi-output load generator sources whose subsources are already
+            // migrated above. There is no need to remove any options from this
+            // statement since they are not export-specific.
+            Statement::CreateSource(CreateSourceStatement {
+                connection:
+                    CreateSourceConnection::LoadGenerator {
+                        generator:
+                            LoadGenerator::Auction | LoadGenerator::Marketing | LoadGenerator::Tpch,
+                        ..
+                    },
+                ..
+            }) => {}
+            // Single-output sources that need to be migrated to tables. These sources currently output
+            // data to the primary collection of the source statement. We will create a new table
+            // statement for them and move all export-specific options over from the source statement,
+            // while moving the `CREATE SOURCE` statement to a new name and moving its shard to the
+            // new table statement.
+            Statement::CreateSource(CreateSourceStatement {
+                connection:
+                    conn @ (CreateSourceConnection::Kafka { .. }
+                    | CreateSourceConnection::LoadGenerator {
+                        generator:
+                            LoadGenerator::Clock
+                            | LoadGenerator::Datums
+                            | LoadGenerator::Counter
+                            | LoadGenerator::KeyValue,
+                        ..
+                    }),
+                name,
+                col_names,
+                include_metadata,
+                format,
+                envelope,
+                mut with_options,
+                if_not_exists,
+                in_cluster,
+                progress_subsource,
+                external_references,
+                key_constraint,
+            }) => {
+                // To check if this is a source that has already been migrated we use a basic
+                // heuristic: if there is at least one existing table for the source, and if
+                // the envelope/format/include_metadata options are empty, we assume it's
+                // already been migrated.
+                let tables_for_source =
+                    items_with_statements_copied
+                        .iter()
+                        .any(|(item, statement)| match statement {
+                            Statement::CreateTableFromSource(stmt) => {
+                                let source: GlobalId = match &stmt.source {
+                                    RawItemName::Name(_) => {
+                                        unreachable!("tables store source as ID")
+                                    }
+                                    RawItemName::Id(source_id, _, _) => {
+                                        source_id.parse().expect("valid id")
+                                    }
+                                };
+                                source == item.id
+                            }
+                            _ => false,
+                        });
+                if tables_for_source
                     && envelope.is_none()
                     && format.is_none()
                     && include_metadata.is_empty()
                 {
+                    info!("migrate: skipping already migrated source: {}", name);
                     return Ok(());
                 }
-            }
 
-            // Use the current source name as the new table name, and rename the source to
-            // `<source_name>_source`. This is intended to allow users to continue using
-            // queries that reference the source name, since they will now need to query the
-            // table instead.
-            let new_source_name_ident = Ident::new_unchecked(
-                name.0.last().expect("at least one ident").to_string() + "_source",
-            );
-            let mut new_source_name = name.clone();
-            *new_source_name.0.last_mut().expect("at least one ident") = new_source_name_ident;
-            let table_name = std::mem::replace(name, new_source_name.clone());
-
-            // Also update the name of the source 'item'
-            let mut table_item_name = item.name.clone() + "_source";
-            std::mem::swap(&mut item.name, &mut table_item_name);
-
-            // A reference to the source that will be included in the table statement
-            let source_ref = RawItemName::Id(item.id.to_string(), new_source_name, None);
-
-            let columns = if col_names.is_empty() {
-                TableFromSourceColumns::NotSpecified
-            } else {
-                TableFromSourceColumns::Named(col_names.drain(..).collect())
-            };
-
-            // All source tables must have a `details` option, which is a serialized proto
-            // describing any source-specific details for this table statement.
-            let details = match conn {
-                // For kafka sources this proto is currently empty.
-                CreateSourceConnection::Kafka { .. } => SourceExportStatementDetails::Kafka {},
-                CreateSourceConnection::LoadGenerator { .. } => {
-                    // Since these load generators are single-output we use the default output.
-                    SourceExportStatementDetails::LoadGenerator {
-                        output: LoadGeneratorOutput::Default,
+                // Use the current source name as the new table name, and rename the source to
+                // `<source_name>_source`. This is intended to allow users to continue using
+                // queries that reference the source name, since they will now need to query the
+                // table instead.
+                let new_source_name_ident = Ident::new_unchecked(
+                    name.0.last().expect("at least one ident").to_string() + "_source",
+                );
+                let mut new_source_name = name.clone();
+                *new_source_name.0.last_mut().expect("at least one ident") = new_source_name_ident;
+
+                // Also update the name of the source 'item'
+                let mut table_item_name = item.name.clone() + "_source";
+                std::mem::swap(&mut item.name, &mut table_item_name);
+
+                // A reference to the source that will be included in the table statement
+                let source_ref =
+                    RawItemName::Id(item.id.to_string(), new_source_name.clone(), None);
+
+                let columns = if col_names.is_empty() {
+                    TableFromSourceColumns::NotSpecified
+                } else {
+                    TableFromSourceColumns::Named(col_names)
+                };
+
+                // All source tables must have a `details` option, which is a serialized proto
+                // describing any source-specific details for this table statement.
+                let details = match &conn {
+                    // For kafka sources this proto is currently empty.
+                    CreateSourceConnection::Kafka { .. } => SourceExportStatementDetails::Kafka {},
+                    CreateSourceConnection::LoadGenerator { .. } => {
+                        // Since these load generators are single-output we use the default output.
+                        SourceExportStatementDetails::LoadGenerator {
+                            output: LoadGeneratorOutput::Default,
+                        }
+                    }
+                    _ => unreachable!("match determined above"),
+                };
+                let mut table_with_options = vec![TableFromSourceOption {
+                    name: TableFromSourceOptionName::Details,
+                    value: Some(WithOptionValue::Value(Value::String(hex::encode(
+                        details.into_proto().encode_to_vec(),
+                    )))),
+                }];
+
+                // Move over the IgnoreKeys option if it exists.
+                let mut i = 0;
+                while i < with_options.len() {
+                    if with_options[i].name == CreateSourceOptionName::IgnoreKeys {
+                        let option = with_options.remove(i);
+                        table_with_options.push(TableFromSourceOption {
+                            name: TableFromSourceOptionName::IgnoreKeys,
+                            value: option.value,
+                        });
+                    } else {
+                        i += 1;
                     }
                 }
-                _ => unreachable!("match determined above"),
-            };
-            let mut table_with_options = vec![TableFromSourceOption {
-                name: TableFromSourceOptionName::Details,
-                value: Some(WithOptionValue::Value(Value::String(hex::encode(
-                    details.into_proto().encode_to_vec(),
-                )))),
-            }];
-
-            // Move over the IgnoreKeys option if it exists.
-            let mut i = 0;
-            while i < with_options.len() {
-                if with_options[i].name == CreateSourceOptionName::IgnoreKeys {
-                    let option = with_options.remove(i);
-                    table_with_options.push(TableFromSourceOption {
-                        name: TableFromSourceOptionName::IgnoreKeys,
-                        value: option.value,
-                    });
-                } else {
-                    i += 1;
+                // Move over the Timeline option if it exists.
+                i = 0;
+                while i < with_options.len() {
+                    if with_options[i].name == CreateSourceOptionName::Timeline {
+                        let option = with_options.remove(i);
+                        table_with_options.push(TableFromSourceOption {
+                            name: TableFromSourceOptionName::Timeline,
+                            value: option.value,
+                        });
+                    } else {
+                        i += 1;
+                    }
                 }
-            }
 
-            // Move over the Timeline option if it exists.
-            i = 0;
-            while i < with_options.len() {
-                if with_options[i].name == CreateSourceOptionName::Timeline {
-                    let option = with_options.remove(i);
-                    table_with_options.push(TableFromSourceOption {
-                        name: TableFromSourceOptionName::Timeline,
-                        value: option.value,
-                    });
-                } else {
-                    i += 1;
-                }
-            }
+                // Generate the same external-reference that would have been generated
+                // during purification for single-output sources.
+                let external_reference = match &conn {
+                    // For kafka sources this proto is currently empty.
+                    CreateSourceConnection::Kafka { options, .. } => {
+                        let topic_option = options
+                            .iter()
+                            .find(|o| matches!(o.name, KafkaSourceConfigOptionName::Topic))
+                            .expect("kafka sources must have a topic");
+                        let topic = match &topic_option.value {
+                            Some(WithOptionValue::Value(Value::String(topic))) => topic,
+                            _ => unreachable!("topic must be a string"),
+                        };
+
+                        Some(UnresolvedItemName::qualified(&[Ident::new(topic)?]))
+                    }
+                    CreateSourceConnection::LoadGenerator { generator, .. } => {
+                        // Since these load generators are single-output the external reference
+                        // uses the schema-name for both namespace and name.
+                        let name = FullItemName {
+                                database: mz_sql::names::RawDatabaseSpecifier::Name(
+                                    mz_storage_types::sources::load_generator::LOAD_GENERATOR_DATABASE_NAME
+                                        .to_owned(),
+                                ),
+                                schema: generator.schema_name().to_string(),
+                                item: generator.schema_name().to_string(),
+                            };
+                        Some(UnresolvedItemName::from(name))
+                    }
+                    _ => unreachable!("match determined above"),
+                };
 
-            // The new table statement, stealing the export-specific options from the
-            // create source statement.
-            let table = CreateTableFromSourceStatement {
-                name: table_name,
-                constraints: vec![],
-                columns,
-                if_not_exists: false,
-                source: source_ref,
-                // external_reference is not required for single-output sources
-                external_reference: None,
-                with_options: table_with_options,
-                envelope: envelope.take(),
-                include_metadata: include_metadata.drain(..).collect(),
-                format: format.take(),
-            };
-
-            // Insert the new table statement into the catalog with a new id.
-            let ids = tx.allocate_user_item_ids(1)?;
-            let new_table_id = ids[0];
-            tx.insert_user_item(
-                new_table_id,
-                item.schema_id.clone(),
-                &table_item_name,
-                table.to_ast_string_stable(),
-                item.owner_id.clone(),
-                item.privileges.clone(),
-                &Default::default(),
-            )?;
-            // We need to move the shard currently attached to the source statement to the
-            // table statement such that the existing data in the shard is preserved and can
-            // be queried on the new table statement. However, we need to keep the GlobalId of
-            // the source the same, to preserve existing references to that statement in
-            // external tools such as DBT and Terraform. We will insert a new shard for the source
-            // statement which will be automatically created after the migration is complete.
-            let new_source_shard = ShardId::new();
-            let (source_id, existing_source_shard) = tx
-                .delete_collection_metadata(btreeset! {item.id})
-                .pop()
-                .expect("shard should exist");
-            tx.insert_collection_metadata(btreemap! {
-                new_table_id => existing_source_shard,
-                source_id => new_source_shard
-            })?;
-
-            let schema = tx.get_schema(&item.schema_id).expect("schema must exist");
-
-            add_to_audit_log(
-                tx,
-                mz_audit_log::EventType::Create,
-                mz_audit_log::ObjectType::Table,
-                mz_audit_log::EventDetails::IdFullNameV1(mz_audit_log::IdFullNameV1 {
-                    id: new_table_id.to_string(),
-                    name: mz_audit_log::FullNameV1 {
-                        database: schema
-                            .database_id
-                            .map(|d| d.to_string())
-                            .unwrap_or_default(),
-                        schema: schema.name,
-                        item: table_item_name,
-                    },
-                }),
-                now(),
-            )?;
+                // The new table statement, stealing the name and the export-specific fields from
+                // the create source statement.
+                let table = CreateTableFromSourceStatement {
+                    name,
+                    constraints: vec![],
+                    columns,
+                    if_not_exists: false,
+                    source: source_ref,
+                    external_reference,
+                    with_options: table_with_options,
+                    envelope,
+                    include_metadata,
+                    format,
+                };
 
-            info!("migrate: updated source {stmt} and added table {table}");
+                // The source statement with a new name and many of its fields emptied
+                let source = CreateSourceStatement {
+                    connection: conn,
+                    name: new_source_name,
+                    if_not_exists,
+                    in_cluster,
+                    include_metadata: vec![],
+                    format: None,
+                    envelope: None,
+                    col_names: vec![],
+                    with_options,
+                    key_constraint,
+                    external_references,
+                    progress_subsource,
+                };
+
+                let source_id = item.id;
+                let schema_id = item.schema_id.clone();
+                let schema = tx.get_schema(&item.schema_id).expect("schema must exist");
+
+                let owner_id = item.owner_id.clone();
+                let privileges = item.privileges.clone();
+
+                // Update the source statement in the catalog first, since the name will
+                // otherwise conflict with the new table statement.
+                info!("migrate: updated source {} to {source}", item.create_sql);
+                item.create_sql = Statement::CreateSource(source).to_ast_string_stable();
+                tx.update_item(item.id, item)?;
+
+                // Insert the new table statement into the catalog with a new id.
+                let ids = tx.allocate_user_item_ids(1)?;
+                let new_table_id = ids[0];
+                info!("migrate: added table {new_table_id}: {table}");
+                tx.insert_user_item(
+                    new_table_id,
+                    schema_id,
+                    &table_item_name,
+                    table.to_ast_string_stable(),
+                    owner_id,
+                    privileges,
+                    &Default::default(),
+                )?;
+                // We need to move the shard currently attached to the source statement to the
+                // table statement such that the existing data in the shard is preserved and can
+                // be queried on the new table statement. However, we need to keep the GlobalId of
+                // the source the same, to preserve existing references to that statement in
+                // external tools such as DBT and Terraform. We will insert a new shard for the source
+                // statement which will be automatically created after the migration is complete.
+                let new_source_shard = ShardId::new();
+                let (source_id, existing_source_shard) = tx
+                    .delete_collection_metadata(btreeset! {source_id})
+                    .pop()
+                    .expect("shard should exist");
+                tx.insert_collection_metadata(btreemap! {
+                    new_table_id => existing_source_shard,
+                    source_id => new_source_shard
+                })?;
+
+                add_to_audit_log(
+                    tx,
+                    mz_audit_log::EventType::Create,
+                    mz_audit_log::ObjectType::Table,
+                    mz_audit_log::EventDetails::IdFullNameV1(mz_audit_log::IdFullNameV1 {
+                        id: new_table_id.to_string(),
+                        name: mz_audit_log::FullNameV1 {
+                            database: schema
+                                .database_id
+                                .map(|d| d.to_string())
+                                .unwrap_or_default(),
+                            schema: schema.name,
+                            item: table_item_name,
+                        },
+                    }),
+                    now(),
+                )?;
+
+                // We also need to update any other statements that reference the source to use the new
+                // table id/name instead.
+                changed_ids.insert(source_id, new_table_id);
+            }
+
+            // When we upgrade to > rust 1.81 we should use #[expect(unreachable_patterns)]
+            // to enforce that we have covered all CreateSourceStatement variants.
+            #[allow(unreachable_patterns)]
+            Statement::CreateSource(_) => {}
+            _ => (),
         }
+    }
 
-        // When we upgrade to > rust 1.81 we should use #[expect(unreachable_patterns)]
-        // to enforce that we have covered all CreateSourceStatement variants.
-        #[allow(unreachable_patterns)]
-        Statement::CreateSource(_) => {}
-        _ => (),
+    let mut updated_items = BTreeMap::new();
+    for (mut item, mut statement) in items_with_statements_copied {
+        match &statement {
+            // Don’t rewrite any of the statements we just migrated.
+            Statement::CreateSource(_) => {}
+            Statement::CreateSubsource(_) => {}
+            Statement::CreateTableFromSource(_) => {}
+            // We need to rewrite any statements that reference a source id to use the new
+            // table id instead, since any contained data in the source will now be in the table.
+            // This assumes the table has stolen the source's name, which is the case
+            // for all sources that were migrated.
+            _ => {
+                if mz_sql::names::modify_dependency_item_ids(&mut statement, &changed_ids) {
+                    item.create_sql = statement.to_ast_string_stable();
+                    updated_items.insert(item.id, item);
+                }
+            }
+        }
+    }
+    if !updated_items.is_empty() {
+        tx.update_items(updated_items)?;
     }
 
     Ok(())
diff --git a/src/adapter/src/catalog/open.rs b/src/adapter/src/catalog/open.rs
index eab051fd2057b..9f563aa3189ae 100644
--- a/src/adapter/src/catalog/open.rs
+++ b/src/adapter/src/catalog/open.rs
@@ -413,11 +413,7 @@ impl Catalog {
                 | BootstrapStateUpdateKind::StorageCollectionMetadata(_)
                 | BootstrapStateUpdateKind::SourceReferences(_)
                 | BootstrapStateUpdateKind::UnfinalizedShard(_) => {
-                    post_item_updates.push(StateUpdate {
-                        kind: kind.into(),
-                        ts,
-                        diff,
-                    })
+                    post_item_updates.push((kind, ts, diff));
                 }
                 BootstrapStateUpdateKind::AuditLog(_) => {
                     audit_log_updates.push(StateUpdate {
@@ -501,7 +497,7 @@ impl Catalog {
 
         // Migrate item ASTs.
         let builtin_table_update = if !config.skip_migrations {
-            migrate::migrate(
+            let migrate_result = migrate::migrate(
                 &mut state,
                 &mut txn,
                 &mut local_expr_cache,
@@ -516,7 +512,15 @@ impl Catalog {
                     this_version: config.build_info.version,
                     cause: e.to_string(),
                 })
-            })?
+            })?;
+            if !migrate_result.post_item_updates.is_empty() {
+                // Include any post-item-updates generated by migrations, and then consolidate
+                // them to ensure diffs are all positive.
+                post_item_updates.extend(migrate_result.post_item_updates);
+                differential_dataflow::consolidation::consolidate_updates(&mut post_item_updates);
+            }
+
+            migrate_result.builtin_table_updates
         } else {
             state
                 .apply_updates_for_bootstrap(item_updates, &mut local_expr_cache)
@@ -524,9 +528,15 @@ impl Catalog {
         };
         builtin_table_updates.extend(builtin_table_update);
 
-        let builtin_table_update = state
-            .apply_updates_for_bootstrap(post_item_updates, &mut local_expr_cache)
-            .await;
+        let post_item_updates = post_item_updates
+            .into_iter()
+            .map(|(kind, ts, diff)| StateUpdate {
+                kind: kind.into(),
+                ts,
+                diff: diff.try_into().expect("valid diff"),
+            })
+            .collect();
+        let builtin_table_update = state.apply_updates_for_bootstrap(post_item_updates, &mut local_expr_cache).await;
         builtin_table_updates.extend(builtin_table_update);
 
         // We don't need to apply the audit logs in memory, yet apply can be expensive when the
diff --git a/src/sql-parser/src/ast/defs/ddl.rs b/src/sql-parser/src/ast/defs/ddl.rs
index 7bffdcc990681..573aa56f4dfce 100644
--- a/src/sql-parser/src/ast/defs/ddl.rs
+++ b/src/sql-parser/src/ast/defs/ddl.rs
@@ -1294,6 +1294,24 @@ impl AstDisplay for LoadGenerator {
 }
 impl_display!(LoadGenerator);
 
+impl LoadGenerator {
+    /// Corresponds with the same mapping on the `LoadGenerator` enum defined in
+    /// src/storage-types/src/sources/load_generator.rs, but re-defined here for
+    /// cases where we only have the AST representation. This can be removed once
+    /// the `ast_rewrite_sources_to_tables` migration is removed.
+    pub fn schema_name(&self) -> &'static str {
+        match self {
+            LoadGenerator::Counter => "counter",
+            LoadGenerator::Clock => "clock",
+            LoadGenerator::Marketing => "marketing",
+            LoadGenerator::Auction => "auction",
+            LoadGenerator::Datums => "datums",
+            LoadGenerator::Tpch => "tpch",
+            LoadGenerator::KeyValue => "key_value",
+        }
+    }
+}
+
 #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub enum LoadGeneratorOptionName {
     ScaleFactor,
diff --git a/src/sql/src/names.rs b/src/sql/src/names.rs
index 7af22528fce85..2eb24322e83b1 100644
--- a/src/sql/src/names.rs
+++ b/src/sql/src/names.rs
@@ -24,6 +24,7 @@ use mz_repr::network_policy_id::NetworkPolicyId;
 use mz_repr::role_id::RoleId;
 use mz_repr::{CatalogItemId, GlobalId, RelationVersion};
 use mz_repr::{ColumnName, RelationVersionSelector};
+use mz_sql_parser::ast::visit_mut::VisitMutNode;
 use mz_sql_parser::ast::{CreateContinualTaskStatement, Expr, RawNetworkPolicyName, Version};
 use mz_sql_parser::ident;
 use proptest_derive::Arbitrary;
@@ -2386,6 +2387,44 @@ where
     ResolvedIds::new(visitor.ids)
 }
 
+#[derive(Debug)]
+pub struct ItemDependencyModifier<'a> {
+    pub modified: bool,
+    pub id_map: &'a BTreeMap<GlobalId, GlobalId>,
+}
+
+impl<'ast, 'a> VisitMut<'ast, Raw> for ItemDependencyModifier<'a> {
+    fn visit_item_name_mut(&mut self, item_name: &mut RawItemName) {
+        if let RawItemName::Id(id, _, _) = item_name {
+            let parsed_id = id.parse::<GlobalId>().unwrap();
+            if let Some(new_id) = self.id_map.get(&parsed_id) {
+                *id = new_id.to_string();
+                self.modified = true;
+            }
+        }
+    }
+}
+
+/// Updates any references in the provided AST node that are keys in `id_map`.
+/// If an id is found it will be updated to the value of the key in `id_map`.
+/// This assumes the names of the reference(s) are unmodified (e.g. each pair of
+/// ids refer to an item of the same name, whose id has changed).
+pub fn modify_dependency_item_ids<'ast, N>(
+    node: &'ast mut N,
+    id_map: &BTreeMap<GlobalId, GlobalId>,
+) -> bool
+where
+    N: VisitMutNode<'ast, Raw>,
+{
+    let mut modifier = ItemDependencyModifier {
+        id_map,
+        modified: false,
+    };
+    node.visit_mut(&mut modifier);
+
+    modifier.modified
+}
+
 // Used when displaying a view's source for human creation. If the name
 // specified is the same as the name in the catalog, we don't use the ID format.
 #[derive(Debug)]
diff --git a/src/storage-types/src/sources/load_generator.rs b/src/storage-types/src/sources/load_generator.rs
index 3d9dd471cd71a..0d6ea646992f1 100644
--- a/src/storage-types/src/sources/load_generator.rs
+++ b/src/storage-types/src/sources/load_generator.rs
@@ -196,6 +196,8 @@ pub enum LoadGenerator {
 pub const LOAD_GENERATOR_DATABASE_NAME: &str = "mz_load_generators";
 
 impl LoadGenerator {
+    /// Must be kept in-sync with the same mapping on the `LoadGenerator` enum defined in
+    /// src/sql-parser/src/ast/defs/ddl.rs.
     pub fn schema_name(&self) -> &'static str {
         match self {
             LoadGenerator::Counter { .. } => "counter",

From 0d0bf92d73d5a25233c7bc0304357a837d36d141 Mon Sep 17 00:00:00 2001
From: Roshan Jobanputra <roshan@materialize.com>
Date: Thu, 24 Oct 2024 14:37:35 -0400
Subject: [PATCH 08/52] Also test the migration in the legacy upgrade tests

---
 misc/python/materialize/mzcompose/__init__.py |  5 +-
 test/legacy-upgrade/mzcompose.py              | 54 +++++++++++++++++--
 2 files changed, 53 insertions(+), 6 deletions(-)

diff --git a/misc/python/materialize/mzcompose/__init__.py b/misc/python/materialize/mzcompose/__init__.py
index 561e68719a92e..c54d577571f08 100644
--- a/misc/python/materialize/mzcompose/__init__.py
+++ b/misc/python/materialize/mzcompose/__init__.py
@@ -53,7 +53,9 @@
 
 
 def get_default_system_parameters(
-    version: MzVersion | None = None, zero_downtime: bool = False
+    version: MzVersion | None = None,
+    zero_downtime: bool = False,
+    force_source_table_syntax: bool = False,
 ) -> dict[str, str]:
     """For upgrade tests we only want parameters set when all environmentd /
     clusterd processes have reached a specific version (or higher)
@@ -125,6 +127,7 @@ def get_default_system_parameters(
         "persist_record_schema_id": (
             "true" if version > MzVersion.parse_mz("v0.127.0-dev") else "false"
         ),
+        "force_source_table_syntax": "true" if force_source_table_syntax else "false",
         "persist_batch_columnar_format": "both_v2",
         "persist_batch_delete_enabled": "true",
         "persist_batch_structured_order": "true",
diff --git a/test/legacy-upgrade/mzcompose.py b/test/legacy-upgrade/mzcompose.py
index a839aabe7e23b..6f88e272d3011 100644
--- a/test/legacy-upgrade/mzcompose.py
+++ b/test/legacy-upgrade/mzcompose.py
@@ -116,20 +116,56 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
         else:
             if parallelism_count == 1 or parallelism_index == 0:
                 test_upgrade_from_version(
-                    c, f"{version}", priors, filter=args.filter, zero_downtime=True
+                    c,
+                    f"{version}",
+                    priors,
+                    filter=args.filter,
+                    zero_downtime=True,
+                    force_source_table_syntax=False,
                 )
             if parallelism_count == 1 or parallelism_index == 1:
                 test_upgrade_from_version(
-                    c, f"{version}", priors, filter=args.filter, zero_downtime=False
+                    c,
+                    f"{version}",
+                    priors,
+                    filter=args.filter,
+                    zero_downtime=False,
+                    force_source_table_syntax=False,
+                )
+                test_upgrade_from_version(
+                    c,
+                    f"{version}",
+                    priors,
+                    filter=args.filter,
+                    zero_downtime=False,
+                    force_source_table_syntax=True,
                 )
 
     if parallelism_count == 1 or parallelism_index == 0:
         test_upgrade_from_version(
-            c, "current_source", priors=[], filter=args.filter, zero_downtime=True
+            c,
+            "current_source",
+            priors=[],
+            filter=args.filter,
+            zero_downtime=True,
+            force_source_table_syntax=False,
         )
     if parallelism_count == 1 or parallelism_index == 1:
         test_upgrade_from_version(
-            c, "current_source", priors=[], filter=args.filter, zero_downtime=False
+            c,
+            "current_source",
+            priors=[],
+            filter=args.filter,
+            zero_downtime=False,
+            force_source_table_syntax=False,
+        )
+        test_upgrade_from_version(
+            c,
+            "current_source",
+            priors=[],
+            filter=args.filter,
+            zero_downtime=False,
+            force_source_table_syntax=True,
         )
 
 
@@ -152,13 +188,14 @@ def test_upgrade_from_version(
     priors: list[MzVersion],
     filter: str,
     zero_downtime: bool,
+    force_source_table_syntax: bool,
 ) -> None:
     print(
         f"+++ Testing {'0dt upgrade' if zero_downtime else 'regular upgrade'} from Materialize {from_version} to current_source."
     )
 
     system_parameter_defaults = get_default_system_parameters(
-        zero_downtime=zero_downtime
+        zero_downtime=zero_downtime,
     )
     deploy_generation = 0
 
@@ -288,6 +325,13 @@ def test_upgrade_from_version(
                     c.rm(mz_service)
 
     print(f"{'0dt-' if zero_downtime else ''}Upgrading to final version")
+    system_parameter_defaults = get_default_system_parameters(
+        zero_downtime=zero_downtime,
+        # We can only force the syntax on the final version so that the migration to convert
+        # sources to the new model can be applied without preventing sources from being
+        # created in the old syntax on the older version.
+        force_source_table_syntax=force_source_table_syntax,
+    )
     mz_to = Materialized(
         name=mz_service,
         options=list(mz_options.values()),

From 947d5281cf8c749dc54e2ff5f83781f25a0c3c61 Mon Sep 17 00:00:00 2001
From: Rainer Niedermayr <rainer@materialize.com>
Date: Fri, 25 Oct 2024 11:28:38 +0200
Subject: [PATCH 09/52] platform checks: unique source name

---
 misc/python/materialize/checks/all_checks/upsert.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/misc/python/materialize/checks/all_checks/upsert.py b/misc/python/materialize/checks/all_checks/upsert.py
index 739bc9d5effa9..543478f0a0fe8 100644
--- a/misc/python/materialize/checks/all_checks/upsert.py
+++ b/misc/python/materialize/checks/all_checks/upsert.py
@@ -183,12 +183,12 @@ def initialize(self) -> Testdrive:
                 $ kafka-ingest format=avro key-format=avro topic=upsert-legacy-syntax key-schema=${keyschema} schema=${schema} repeat=10000
                 {"key1": "A${kafka-ingest.iteration}"} {"f1": "A${kafka-ingest.iteration}"}
 
-                > CREATE SOURCE upsert_insert
+                > CREATE SOURCE upsert_insert_legacy
                   FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-upsert-legacy-syntax-${testdrive.seed}')
                   FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn
                   ENVELOPE UPSERT
 
-                > CREATE MATERIALIZED VIEW upsert_insert_view AS SELECT COUNT(DISTINCT key1 || ' ' || f1) FROM upsert_insert;
+                > CREATE MATERIALIZED VIEW upsert_insert_legacy_view AS SELECT COUNT(DISTINCT key1 || ' ' || f1) FROM upsert_insert_legacy;
                 """
             )
         )
@@ -212,10 +212,10 @@ def validate(self) -> Testdrive:
         return Testdrive(
             dedent(
                 """
-                > SELECT COUNT(*), COUNT(DISTINCT key1), COUNT(DISTINCT f1) FROM upsert_insert
+                > SELECT COUNT(*), COUNT(DISTINCT key1), COUNT(DISTINCT f1) FROM upsert_insert_legacy
                 10000 10000 10000
 
-                > SELECT * FROM upsert_insert_view;
+                > SELECT * FROM upsert_insert_legacy_view;
                 10000
            """
             )

From 06765d2f6f1086ad5cb2ed723bdc128938b44836 Mon Sep 17 00:00:00 2001
From: Roshan Jobanputra <roshan@materialize.com>
Date: Fri, 25 Oct 2024 10:23:56 -0400
Subject: [PATCH 10/52] Migration structure cleanup from feedback

---
 src/adapter/src/catalog/migrate.rs | 71 +++++++++++++-----------------
 1 file changed, 31 insertions(+), 40 deletions(-)

diff --git a/src/adapter/src/catalog/migrate.rs b/src/adapter/src/catalog/migrate.rs
index ff21c13ce89fb..da264a06fa1c6 100644
--- a/src/adapter/src/catalog/migrate.rs
+++ b/src/adapter/src/catalog/migrate.rs
@@ -277,7 +277,7 @@ fn ast_rewrite_sources_to_tables(
                 let raw_source_name = match of_source {
                     // If `of_source` is None then this is a `progress` subsource which we
                     // are not migrating as they are not currently relevant to the new table model.
-                    None => return Ok(()),
+                    None => continue,
                     Some(name) => name,
                 };
                 let source = match raw_source_name {
@@ -300,21 +300,17 @@ fn ast_rewrite_sources_to_tables(
 
                 // The external reference is a `with_option` on subsource statements but is a
                 // separate field on table statements.
-                let mut i = 0;
-                let external_reference = loop {
-                    if i >= with_options.len() {
-                        panic!("subsource must have an external reference");
-                    }
-                    if with_options[i].name == CreateSubsourceOptionName::ExternalReference {
-                        let option = with_options.remove(i);
-                        match option.value {
-                            Some(WithOptionValue::UnresolvedItemName(name)) => break name,
-                            _ => unreachable!("external reference must be an unresolved item name"),
-                        };
-                    } else {
-                        i += 1;
-                    }
+                let external_reference = match with_options
+                    .iter()
+                    .position(|opt| opt.name == CreateSubsourceOptionName::ExternalReference)
+                {
+                    Some(i) => match with_options.remove(i).value {
+                        Some(WithOptionValue::UnresolvedItemName(name)) => name,
+                        _ => unreachable!("external reference must be an unresolved item name"),
+                    },
+                    None => panic!("subsource must have an external reference"),
                 };
+
                 let with_options = with_options
                     .into_iter()
                     .map(|option| {
@@ -337,8 +333,7 @@ fn ast_rewrite_sources_to_tables(
                                 panic!("progress option should not exist on this subsource")
                             }
                             CreateSubsourceOptionName::ExternalReference => {
-                                // This option is handled separately above.
-                                unreachable!()
+                                unreachable!("This option is handled separately above.")
                             }
                         }
                     })
@@ -591,30 +586,26 @@ fn ast_rewrite_sources_to_tables(
                 }];
 
                 // Move over the IgnoreKeys option if it exists.
-                let mut i = 0;
-                while i < with_options.len() {
-                    if with_options[i].name == CreateSourceOptionName::IgnoreKeys {
-                        let option = with_options.remove(i);
-                        table_with_options.push(TableFromSourceOption {
-                            name: TableFromSourceOptionName::IgnoreKeys,
-                            value: option.value,
-                        });
-                    } else {
-                        i += 1;
-                    }
-                }
+                if let Some(i) = with_options
+                    .iter()
+                    .position(|opt| opt.name == CreateSourceOptionName::IgnoreKeys)
+                {
+                    let option = with_options.remove(i);
+                    table_with_options.push(TableFromSourceOption {
+                        name: TableFromSourceOptionName::IgnoreKeys,
+                        value: option.value,
+                    });
+                };
                 // Move over the Timeline option if it exists.
-                i = 0;
-                while i < with_options.len() {
-                    if with_options[i].name == CreateSourceOptionName::Timeline {
-                        let option = with_options.remove(i);
-                        table_with_options.push(TableFromSourceOption {
-                            name: TableFromSourceOptionName::Timeline,
-                            value: option.value,
-                        });
-                    } else {
-                        i += 1;
-                    }
+                if let Some(i) = with_options
+                    .iter()
+                    .position(|opt| opt.name == CreateSourceOptionName::Timeline)
+                {
+                    let option = with_options.remove(i);
+                    table_with_options.push(TableFromSourceOption {
+                        name: TableFromSourceOptionName::Timeline,
+                        value: option.value,
+                    });
                 }
 
                 // Generate the same external-reference that would have been generated

From b652d13e11b5247adf41e33137fdf2419e830ce5 Mon Sep 17 00:00:00 2001
From: Roshan Jobanputra <roshan@materialize.com>
Date: Fri, 25 Oct 2024 12:13:42 -0400
Subject: [PATCH 11/52] Address more feedback; ensure new source name is unique

---
 src/adapter/src/catalog/migrate.rs | 47 +++++++++++++++++++++++-------
 1 file changed, 37 insertions(+), 10 deletions(-)

diff --git a/src/adapter/src/catalog/migrate.rs b/src/adapter/src/catalog/migrate.rs
index da264a06fa1c6..b23eb29500d76 100644
--- a/src/adapter/src/catalog/migrate.rs
+++ b/src/adapter/src/catalog/migrate.rs
@@ -256,6 +256,16 @@ fn ast_rewrite_sources_to_tables(
         .collect::<Result<Vec<_>, anyhow::Error>>()?;
     let items_with_statements_copied = items_with_statements.clone();
 
+    let item_names_per_schema = items_with_statements_copied
+        .iter()
+        .map(|(item, _)| (item.schema_id.clone(), &item.name))
+        .fold(BTreeMap::new(), |mut acc, (schema_id, name)| {
+            acc.entry(schema_id)
+                .or_insert_with(|| btreeset! {})
+                .insert(name);
+            acc
+        });
+
     // Any GlobalId that should be changed to a new GlobalId in any statements that
     // reference it. This is necessary for ensuring downstream statements (e.g.
     // mat views, indexes) that reference a single-output source (e.g. kafka)
@@ -538,22 +548,40 @@ fn ast_rewrite_sources_to_tables(
                     && include_metadata.is_empty()
                 {
                     info!("migrate: skipping already migrated source: {}", name);
-                    return Ok(());
+                    continue;
                 }
 
                 // Use the current source name as the new table name, and rename the source to
                 // `<source_name>_source`. This is intended to allow users to continue using
                 // queries that reference the source name, since they will now need to query the
                 // table instead.
-                let new_source_name_ident = Ident::new_unchecked(
-                    name.0.last().expect("at least one ident").to_string() + "_source",
-                );
-                let mut new_source_name = name.clone();
-                *new_source_name.0.last_mut().expect("at least one ident") = new_source_name_ident;
 
-                // Also update the name of the source 'item'
-                let mut table_item_name = item.name.clone() + "_source";
-                std::mem::swap(&mut item.name, &mut table_item_name);
+                // First find an unused name within the same schema to avoid conflicts.
+                let mut new_source_item_name = format!("{}_source", item.name);
+                let mut new_source_name_inner =
+                    format!("{}_source", name.0.last().expect("at least one ident"));
+                let mut i = 0;
+                while item_names_per_schema
+                    .get(&item.schema_id)
+                    .expect("schema must exist")
+                    .contains(&new_source_item_name)
+                {
+                    new_source_item_name = format!("{}_source_{}", item.name, i);
+                    new_source_name_inner = format!(
+                        "{}_source_{}",
+                        name.0.last().expect("at least one ident"),
+                        i
+                    );
+                    i += 1;
+                }
+                // We will use the original item name for the new table item.
+                let table_item_name = item.name.clone();
+
+                // Update the source item/statement to use the new name.
+                let mut new_source_name = name.clone();
+                *new_source_name.0.last_mut().expect("at least one ident") =
+                    Ident::new_unchecked(new_source_name_inner);
+                item.name = new_source_item_name;
 
                 // A reference to the source that will be included in the table statement
                 let source_ref =
@@ -611,7 +639,6 @@ fn ast_rewrite_sources_to_tables(
                 // Generate the same external-reference that would have been generated
                 // during purification for single-output sources.
                 let external_reference = match &conn {
-                    // For kafka sources this proto is currently empty.
                     CreateSourceConnection::Kafka { options, .. } => {
                         let topic_option = options
                             .iter()

From c0898497ffef5c7ccf9bde0888140606c2dfe6fd Mon Sep 17 00:00:00 2001
From: Roshan Jobanputra <roshan@materialize.com>
Date: Fri, 25 Oct 2024 14:32:08 -0400
Subject: [PATCH 12/52] Fix legacy-upgrade checks

---
 test/legacy-upgrade/check-from-v0.27.0-compile-proto-sources.td | 2 --
 test/legacy-upgrade/check-from-v0.45.0-kafka-progress.td        | 2 --
 2 files changed, 4 deletions(-)

diff --git a/test/legacy-upgrade/check-from-v0.27.0-compile-proto-sources.td b/test/legacy-upgrade/check-from-v0.27.0-compile-proto-sources.td
index 4240d3a2229de..fcc08c2529d2d 100644
--- a/test/legacy-upgrade/check-from-v0.27.0-compile-proto-sources.td
+++ b/test/legacy-upgrade/check-from-v0.27.0-compile-proto-sources.td
@@ -24,5 +24,3 @@ c
 h
 a
 e
-
-> DROP SOURCE kafka_proto_source CASCADE;
diff --git a/test/legacy-upgrade/check-from-v0.45.0-kafka-progress.td b/test/legacy-upgrade/check-from-v0.45.0-kafka-progress.td
index 69af351dbbe33..9592f9055b7d5 100644
--- a/test/legacy-upgrade/check-from-v0.45.0-kafka-progress.td
+++ b/test/legacy-upgrade/check-from-v0.45.0-kafka-progress.td
@@ -18,5 +18,3 @@ $ set-regex match=\d+ replacement=<NUMBER>
 $ set-regex match=testdrive-upgrade-kafka-source-.*?' replacement=<TOPIC>'
 >[version<8100] SHOW CREATE SOURCE kafka_source
 materialize.public.kafka_source "CREATE SOURCE \"materialize\".\"public\".\"kafka_source\" FROM KAFKA CONNECTION \"materialize\".\"public\".\"kafka_conn\" (TOPIC = '<TOPIC>') FORMAT AVRO USING SCHEMA '{   \"type\": \"record\",   \"name\": \"cpx\",   \"fields\": [     {\"name\": \"a\", \"type\": \"long\"},     {\"name\": \"b\", \"type\": \"long\"}   ] }' ENVELOPE NONE EXPOSE PROGRESS AS \"materialize\".\"public\".\"kafka_source_progress\""
-
-> DROP SOURCE kafka_source;

From 491ac189d44ce67d35de2f3c8d3ac12f015cd19f Mon Sep 17 00:00:00 2001
From: Roshan Jobanputra <roshan@materialize.com>
Date: Fri, 25 Oct 2024 14:49:52 -0400
Subject: [PATCH 13/52] Fixes caused by rebase on main

---
 src/adapter/src/catalog/migrate.rs | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/src/adapter/src/catalog/migrate.rs b/src/adapter/src/catalog/migrate.rs
index b23eb29500d76..2c645ec9e56d3 100644
--- a/src/adapter/src/catalog/migrate.rs
+++ b/src/adapter/src/catalog/migrate.rs
@@ -38,7 +38,8 @@ where
 
     for mut item in tx.get_items() {
         let mut stmt = mz_sql::parse::parse(&item.create_sql)?.into_element().ast;
-        f(tx, item.id, &mut stmt).await?;
+        // TODO(alter_table): Switch this to CatalogItemId.
+        f(tx, item.global_id, &mut stmt).await?;
         item.create_sql = stmt.to_ast_string_stable();
 
         updated_items.insert(item.id, item);
@@ -303,7 +304,7 @@ fn ast_rewrite_sources_to_tables(
                                 _ => false,
                             })
                             .expect("source must exist");
-                        RawItemName::Id(source_item.id.to_string(), name, None)
+                        RawItemName::Id(source_item.global_id.to_string(), name, None)
                     }
                     RawItemName::Id(..) => raw_source_name,
                 };
@@ -368,7 +369,7 @@ fn ast_rewrite_sources_to_tables(
                     item.create_sql, table
                 );
                 item.create_sql = Statement::CreateTableFromSource(table).to_ast_string_stable();
-                tx.update_item(item.id, item)?;
+                tx.update_item(item.global_id, item)?;
             }
 
             // Postgres sources are multi-output sources whose subsources are
@@ -419,7 +420,7 @@ fn ast_rewrite_sources_to_tables(
                         progress_subsource,
                     });
                     item.create_sql = stmt.to_ast_string_stable();
-                    tx.update_item(item.id, item)?;
+                    tx.update_item(item.global_id, item)?;
                     info!("migrate: converted postgres source {stmt} to remove subsource options");
                 }
             }
@@ -477,7 +478,7 @@ fn ast_rewrite_sources_to_tables(
                         progress_subsource,
                     });
                     item.create_sql = stmt.to_ast_string_stable();
-                    tx.update_item(item.id, item)?;
+                    tx.update_item(item.global_id, item)?;
                     info!("migrate: converted mysql source {stmt} to remove subsource options");
                 }
             }
@@ -538,7 +539,7 @@ fn ast_rewrite_sources_to_tables(
                                         source_id.parse().expect("valid id")
                                     }
                                 };
-                                source == item.id
+                                source == item.global_id
                             }
                             _ => false,
                         });
@@ -585,7 +586,7 @@ fn ast_rewrite_sources_to_tables(
 
                 // A reference to the source that will be included in the table statement
                 let source_ref =
-                    RawItemName::Id(item.id.to_string(), new_source_name.clone(), None);
+                    RawItemName::Id(item.global_id.to_string(), new_source_name.clone(), None);
 
                 let columns = if col_names.is_empty() {
                     TableFromSourceColumns::NotSpecified
@@ -698,7 +699,7 @@ fn ast_rewrite_sources_to_tables(
                     progress_subsource,
                 };
 
-                let source_id = item.id;
+                let source_id = item.global_id;
                 let schema_id = item.schema_id.clone();
                 let schema = tx.get_schema(&item.schema_id).expect("schema must exist");
 
@@ -709,7 +710,7 @@ fn ast_rewrite_sources_to_tables(
                 // otherwise conflict with the new table statement.
                 info!("migrate: updated source {} to {source}", item.create_sql);
                 item.create_sql = Statement::CreateSource(source).to_ast_string_stable();
-                tx.update_item(item.id, item)?;
+                tx.update_item(item.global_id, item)?;
 
                 // Insert the new table statement into the catalog with a new id.
                 let ids = tx.allocate_user_item_ids(1)?;
@@ -785,7 +786,7 @@ fn ast_rewrite_sources_to_tables(
             _ => {
                 if mz_sql::names::modify_dependency_item_ids(&mut statement, &changed_ids) {
                     item.create_sql = statement.to_ast_string_stable();
-                    updated_items.insert(item.id, item);
+                    updated_items.insert(item.global_id, item);
                 }
             }
         }

From 8c983504cf381d7d54b806c67695c3aec147741e Mon Sep 17 00:00:00 2001
From: Rainer Niedermayr <rainer@materialize.com>
Date: Mon, 28 Oct 2024 11:24:18 +0100
Subject: [PATCH 14/52] ci: print source table migration issues

---
 misc/python/materialize/cli/ci_annotate_errors.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/misc/python/materialize/cli/ci_annotate_errors.py b/misc/python/materialize/cli/ci_annotate_errors.py
index 53b7fc82d272b..84865cbbcaff4 100644
--- a/misc/python/materialize/cli/ci_annotate_errors.py
+++ b/misc/python/materialize/cli/ci_annotate_errors.py
@@ -101,6 +101,8 @@
     | (FAIL|TIMEOUT)\s+\[\s*\d+\.\d+s\]
     # parallel-workload
     | worker_.*\ still\ running: [\s\S]* Threads\ have\ not\ stopped\ within\ 5\ minutes,\ exiting\ hard
+    # source-table migration
+    | source-table-migration\ issue
     )
     .* $
     """,

From da5d73332286b45680eb23efd0c245fb4ad702d4 Mon Sep 17 00:00:00 2001
From: Rainer Niedermayr <rainer@materialize.com>
Date: Fri, 25 Oct 2024 14:10:37 +0200
Subject: [PATCH 15/52] migration tests: pg-cdc-old-syntax

---
 ci/nightly/pipeline.template.yml              |  11 ++
 test/pg-cdc-old-syntax/mzcompose.py           | 132 +++++++++++++++++-
 .../subsource-resolution-duplicates.td        |   3 -
 3 files changed, 141 insertions(+), 5 deletions(-)

diff --git a/ci/nightly/pipeline.template.yml b/ci/nightly/pipeline.template.yml
index 01d43fc235446..0c71a2de11d54 100644
--- a/ci/nightly/pipeline.template.yml
+++ b/ci/nightly/pipeline.template.yml
@@ -646,6 +646,17 @@ steps:
           queue: hetzner-aarch64-4cpu-8gb
         # the mzbuild postgres version will be used, which depends on the Dockerfile specification
 
+      - id: pg-cdc-migration
+        label: Postgres CDC source-versioning migration tests
+        depends_on: build-aarch64
+        timeout_in_minutes: 360
+        plugins:
+          - ./ci/plugins/mzcompose:
+              composition: pg-cdc-old-syntax
+              run: migration
+        agents:
+          queue: hetzner-aarch64-4cpu-8gb
+
       - id: pg-cdc-resumption-old-syntax
         label: Postgres CDC resumption tests (before source versioning)
         depends_on: build-aarch64
diff --git a/test/pg-cdc-old-syntax/mzcompose.py b/test/pg-cdc-old-syntax/mzcompose.py
index f44e64c31615b..5560e259197ad 100644
--- a/test/pg-cdc-old-syntax/mzcompose.py
+++ b/test/pg-cdc-old-syntax/mzcompose.py
@@ -18,10 +18,16 @@
 from pg8000 import Connection
 
 from materialize import buildkite
+from materialize.mz_version import MzVersion
 from materialize.mzcompose.composition import Composition, WorkflowArgumentParser
 from materialize.mzcompose.service import Service, ServiceConfig
 from materialize.mzcompose.services.materialized import Materialized
-from materialize.mzcompose.services.postgres import Postgres
+from materialize.mzcompose.services.minio import Minio
+from materialize.mzcompose.services.postgres import (
+    METADATA_STORE,
+    CockroachOrPostgresMetadata,
+    Postgres,
+)
 from materialize.mzcompose.services.test_certs import TestCerts
 from materialize.mzcompose.services.testdrive import Testdrive
 from materialize.mzcompose.services.toxiproxy import Toxiproxy
@@ -88,8 +94,11 @@ def create_postgres(
         additional_system_parameter_defaults={
             "log_filter": "mz_storage::source::postgres=trace,debug,info,warn,error"
         },
+        external_minio=True,
     ),
     Testdrive(),
+    CockroachOrPostgresMetadata(),
+    Minio(setup_materialize=True),
     TestCerts(),
     Toxiproxy(),
     create_postgres(pg_version=None),
@@ -323,7 +332,11 @@ def workflow_cdc(c: Composition, parser: WorkflowArgumentParser) -> None:
 def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
     workflows_with_internal_sharding = ["cdc"]
     sharded_workflows = workflows_with_internal_sharding + buildkite.shard_list(
-        [w for w in c.workflows if w not in workflows_with_internal_sharding],
+        [
+            w
+            for w in c.workflows
+            if w not in workflows_with_internal_sharding and w != "migration"
+        ],
         lambda w: w,
     )
     print(
@@ -348,3 +361,118 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
 
         with c.test_case(name):
             c.workflow(name, *parser.args)
+
+
+def workflow_migration(c: Composition, parser: WorkflowArgumentParser) -> None:
+    parser.add_argument(
+        "filter",
+        nargs="*",
+        default=["*.td"],
+        help="limit to only the files matching filter",
+    )
+    args = parser.parse_args()
+
+    matching_files = []
+    for filter in args.filter:
+        matching_files.extend(glob.glob(filter, root_dir="test/pg-cdc-old-syntax"))
+    sharded_files: list[str] = sorted(
+        buildkite.shard_list(matching_files, lambda file: file)
+    )
+    print(f"Files: {sharded_files}")
+
+    ssl_ca = c.run("test-certs", "cat", "/secrets/ca.crt", capture=True).stdout
+    ssl_cert = c.run("test-certs", "cat", "/secrets/certuser.crt", capture=True).stdout
+    ssl_key = c.run("test-certs", "cat", "/secrets/certuser.key", capture=True).stdout
+    ssl_wrong_cert = c.run(
+        "test-certs", "cat", "/secrets/postgres.crt", capture=True
+    ).stdout
+    ssl_wrong_key = c.run(
+        "test-certs", "cat", "/secrets/postgres.key", capture=True
+    ).stdout
+
+    pg_version = get_targeted_pg_version(parser)
+
+    mz_old_image = "materialize/materialized:v0.122.0"
+    mz_new_image = None
+
+    assert MzVersion.parse_cargo() < MzVersion.parse_mz(
+        "v0.130.0"
+    ), "migration test probably no longer needed"
+
+    for file in sharded_files:
+        mz_old = Materialized(
+            name="materialized",
+            image=mz_old_image,
+            volumes_extra=["secrets:/share/secrets"],
+            external_metadata_store=True,
+            external_minio=True,
+            additional_system_parameter_defaults={
+                "log_filter": "mz_storage::source::postgres=trace,debug,info,warn,error"
+            },
+        )
+
+        mz_new = Materialized(
+            name="materialized",
+            image=mz_new_image,
+            volumes_extra=["secrets:/share/secrets"],
+            external_metadata_store=True,
+            external_minio=True,
+            additional_system_parameter_defaults={
+                "log_filter": "mz_storage::source::postgres=trace,debug,info,warn,error",
+                "force_source_table_syntax": "true",
+            },
+        )
+        with c.override(mz_old, create_postgres(pg_version=pg_version)):
+            c.up("materialized", "test-certs", "postgres")
+
+            print(f"Running {file} with mz_old")
+
+            c.run_testdrive_files(
+                f"--var=ssl-ca={ssl_ca}",
+                f"--var=ssl-cert={ssl_cert}",
+                f"--var=ssl-key={ssl_key}",
+                f"--var=ssl-wrong-cert={ssl_wrong_cert}",
+                f"--var=ssl-wrong-key={ssl_wrong_key}",
+                f"--var=default-replica-size={Materialized.Size.DEFAULT_SIZE}-{Materialized.Size.DEFAULT_SIZE}",
+                f"--var=default-storage-size={Materialized.Size.DEFAULT_SIZE}-1",
+                "--no-reset",
+                file,
+            )
+            c.kill("materialized", wait=True)
+
+            with c.override(mz_new):
+                c.up("materialized")
+
+                print("Running mz_new")
+                verify_sources(c, file)
+
+                c.kill("materialized", wait=True)
+                c.kill("postgres", wait=True)
+                c.kill(METADATA_STORE, wait=True)
+                c.rm("materialized")
+                c.rm(METADATA_STORE)
+                c.rm("postgres")
+                c.rm_volumes("mzdata")
+
+
+def verify_sources(c: Composition, file: str) -> None:
+    source_names = c.sql_query("SELECT name FROM mz_sources WHERE id LIKE 'u%';")
+
+    print(f"Sources created in {file} are: {source_names}")
+
+    for row in source_names:
+        verify_source(c, file, row[0])
+
+
+def verify_source(c: Composition, file: str, source_name: str) -> None:
+    try:
+        print(f"Checking source: {source_name}")
+        # must not crash
+        c.sql_query(f"SELECT count(*) FROM {source_name};")
+
+        result = c.sql_query(f"SHOW CREATE SOURCE {source_name};")
+        sql = result[0][1]
+        assert "FOR TABLE" not in sql, f"FOR TABLE found in: {sql}"
+        assert "FOR ALL TABLES" not in sql, f"FOR ALL TABLES found in: {sql}"
+    except Exception as e:
+        print(f"source-table-migration issue in {file}: {str(e)}")
diff --git a/test/pg-cdc-old-syntax/subsource-resolution-duplicates.td b/test/pg-cdc-old-syntax/subsource-resolution-duplicates.td
index 3f510be4088b8..8f053fd552679 100644
--- a/test/pg-cdc-old-syntax/subsource-resolution-duplicates.td
+++ b/test/pg-cdc-old-syntax/subsource-resolution-duplicates.td
@@ -57,6 +57,3 @@ detail: subsources referencing table: x, y
  mz_source          postgres  quickstart  ""
  mz_source_progress progress  <null>      ""
  t                  subsource quickstart  ""
-
-$ postgres-execute connection=postgres://postgres:postgres@postgres
-DROP SCHEMA other CASCADE;

From 9451a6b161c044b0f9be36ca0a807f94bf553c4b Mon Sep 17 00:00:00 2001
From: Rainer Niedermayr <rainer@materialize.com>
Date: Mon, 28 Oct 2024 11:35:03 +0100
Subject: [PATCH 16/52] migration tests: extract logic

---
 .../materialize/source_table_migration.py     | 50 +++++++++++++++++++
 test/pg-cdc-old-syntax/mzcompose.py           | 42 +++-------------
 2 files changed, 58 insertions(+), 34 deletions(-)
 create mode 100644 misc/python/materialize/source_table_migration.py

diff --git a/misc/python/materialize/source_table_migration.py b/misc/python/materialize/source_table_migration.py
new file mode 100644
index 0000000000000..ed420cbf172f8
--- /dev/null
+++ b/misc/python/materialize/source_table_migration.py
@@ -0,0 +1,50 @@
+# Copyright Materialize, Inc. and contributors. All rights reserved.
+#
+# Use of this software is governed by the Business Source License
+# included in the LICENSE file at the root of this repository.
+#
+# As of the Change Date specified in that file, in accordance with
+# the Business Source License, use of this software will be governed
+# by the Apache License, Version 2.0.
+
+"""Utilities for testing the source table migration"""
+from materialize.mz_version import MzVersion
+from materialize.mzcompose.composition import Composition
+
+
+def verify_sources_after_source_table_migration(c: Composition, file: str) -> None:
+    source_names = c.sql_query("SELECT name FROM mz_sources WHERE id LIKE 'u%';")
+
+    print(f"Sources created in {file} are: {source_names}")
+
+    for row in source_names:
+        _verify_source(c, file, row[0])
+
+
+def _verify_source(c: Composition, file: str, source_name: str) -> None:
+    try:
+        print(f"Checking source: {source_name}")
+        # must not crash
+        c.sql("SET statement_timeout = '20s'")
+        c.sql_query(f"SELECT count(*) FROM {source_name};")
+
+        result = c.sql_query(f"SHOW CREATE SOURCE {source_name};")
+        sql = result[0][1]
+        assert "FOR TABLE" not in sql, f"FOR TABLE found in: {sql}"
+        assert "FOR ALL TABLES" not in sql, f"FOR ALL TABLES found in: {sql}"
+    except Exception as e:
+        print(f"source-table-migration issue in {file}: {str(e)}")
+
+
+def check_source_table_migration_test_sensible() -> None:
+    assert MzVersion.parse_cargo() < MzVersion.parse_mz(
+        "v0.130.0"
+    ), "migration test probably no longer needed"
+
+
+def get_old_image_for_source_table_migration_test() -> str:
+    return "materialize/materialized:v0.122.0"
+
+
+def get_new_image_for_source_table_migration_test() -> str | None:
+    return None
diff --git a/test/pg-cdc-old-syntax/mzcompose.py b/test/pg-cdc-old-syntax/mzcompose.py
index 5560e259197ad..51ab5ed98d0e9 100644
--- a/test/pg-cdc-old-syntax/mzcompose.py
+++ b/test/pg-cdc-old-syntax/mzcompose.py
@@ -18,7 +18,6 @@
 from pg8000 import Connection
 
 from materialize import buildkite
-from materialize.mz_version import MzVersion
 from materialize.mzcompose.composition import Composition, WorkflowArgumentParser
 from materialize.mzcompose.service import Service, ServiceConfig
 from materialize.mzcompose.services.materialized import Materialized
@@ -31,6 +30,11 @@
 from materialize.mzcompose.services.test_certs import TestCerts
 from materialize.mzcompose.services.testdrive import Testdrive
 from materialize.mzcompose.services.toxiproxy import Toxiproxy
+from materialize.source_table_migration import (
+    get_new_image_for_source_table_migration_test,
+    get_old_image_for_source_table_migration_test,
+    verify_sources_after_source_table_migration,
+)
 
 # Set the max slot WAL keep size to 10MB
 DEFAULT_PG_EXTRA_COMMAND = ["-c", "max_slot_wal_keep_size=10"]
@@ -392,17 +396,10 @@ def workflow_migration(c: Composition, parser: WorkflowArgumentParser) -> None:
 
     pg_version = get_targeted_pg_version(parser)
 
-    mz_old_image = "materialize/materialized:v0.122.0"
-    mz_new_image = None
-
-    assert MzVersion.parse_cargo() < MzVersion.parse_mz(
-        "v0.130.0"
-    ), "migration test probably no longer needed"
-
     for file in sharded_files:
         mz_old = Materialized(
             name="materialized",
-            image=mz_old_image,
+            image=get_old_image_for_source_table_migration_test(),
             volumes_extra=["secrets:/share/secrets"],
             external_metadata_store=True,
             external_minio=True,
@@ -413,7 +410,7 @@ def workflow_migration(c: Composition, parser: WorkflowArgumentParser) -> None:
 
         mz_new = Materialized(
             name="materialized",
-            image=mz_new_image,
+            image=get_new_image_for_source_table_migration_test(),
             volumes_extra=["secrets:/share/secrets"],
             external_metadata_store=True,
             external_minio=True,
@@ -444,7 +441,7 @@ def workflow_migration(c: Composition, parser: WorkflowArgumentParser) -> None:
                 c.up("materialized")
 
                 print("Running mz_new")
-                verify_sources(c, file)
+                verify_sources_after_source_table_migration(c, file)
 
                 c.kill("materialized", wait=True)
                 c.kill("postgres", wait=True)
@@ -453,26 +450,3 @@ def workflow_migration(c: Composition, parser: WorkflowArgumentParser) -> None:
                 c.rm(METADATA_STORE)
                 c.rm("postgres")
                 c.rm_volumes("mzdata")
-
-
-def verify_sources(c: Composition, file: str) -> None:
-    source_names = c.sql_query("SELECT name FROM mz_sources WHERE id LIKE 'u%';")
-
-    print(f"Sources created in {file} are: {source_names}")
-
-    for row in source_names:
-        verify_source(c, file, row[0])
-
-
-def verify_source(c: Composition, file: str, source_name: str) -> None:
-    try:
-        print(f"Checking source: {source_name}")
-        # must not crash
-        c.sql_query(f"SELECT count(*) FROM {source_name};")
-
-        result = c.sql_query(f"SHOW CREATE SOURCE {source_name};")
-        sql = result[0][1]
-        assert "FOR TABLE" not in sql, f"FOR TABLE found in: {sql}"
-        assert "FOR ALL TABLES" not in sql, f"FOR ALL TABLES found in: {sql}"
-    except Exception as e:
-        print(f"source-table-migration issue in {file}: {str(e)}")

From cfd9da3404058cce4293469c06da7abab02ba007 Mon Sep 17 00:00:00 2001
From: Rainer Niedermayr <rainer@materialize.com>
Date: Mon, 28 Oct 2024 16:29:49 +0100
Subject: [PATCH 17/52] migration tests: improve verification

---
 .../materialize/source_table_migration.py     | 22 ++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/misc/python/materialize/source_table_migration.py b/misc/python/materialize/source_table_migration.py
index ed420cbf172f8..21ecc1ee4d7c1 100644
--- a/misc/python/materialize/source_table_migration.py
+++ b/misc/python/materialize/source_table_migration.py
@@ -12,16 +12,23 @@
 from materialize.mzcompose.composition import Composition
 
 
-def verify_sources_after_source_table_migration(c: Composition, file: str) -> None:
-    source_names = c.sql_query("SELECT name FROM mz_sources WHERE id LIKE 'u%';")
+def verify_sources_after_source_table_migration(
+    c: Composition, file: str, fail: bool = False
+) -> None:
+    source_names_rows = c.sql_query(
+        "SELECT sm.name || '.' || src.name FROM mz_sources src INNER JOIN mz_schemas sm ON src.schema_id = sm.id WHERE src.id LIKE 'u%';"
+    )
+    source_names = [row[0] for row in source_names_rows]
 
     print(f"Sources created in {file} are: {source_names}")
 
-    for row in source_names:
-        _verify_source(c, file, row[0])
+    for source_name in source_names:
+        _verify_source(c, file, source_name, fail=fail)
 
 
-def _verify_source(c: Composition, file: str, source_name: str) -> None:
+def _verify_source(
+    c: Composition, file: str, source_name: str, fail: bool = False
+) -> None:
     try:
         print(f"Checking source: {source_name}")
         # must not crash
@@ -32,9 +39,14 @@ def _verify_source(c: Composition, file: str, source_name: str) -> None:
         sql = result[0][1]
         assert "FOR TABLE" not in sql, f"FOR TABLE found in: {sql}"
         assert "FOR ALL TABLES" not in sql, f"FOR ALL TABLES found in: {sql}"
+        assert "CREATE SUBSOURCE" not in sql, f"FOR ALL TABLES found in: {sql}"
+        print("OK.")
     except Exception as e:
         print(f"source-table-migration issue in {file}: {str(e)}")
 
+        if fail:
+            raise e
+
 
 def check_source_table_migration_test_sensible() -> None:
     assert MzVersion.parse_cargo() < MzVersion.parse_mz(

From b737c30f26e188ec55ae70add164b42fa4982174 Mon Sep 17 00:00:00 2001
From: Rainer Niedermayr <rainer@materialize.com>
Date: Mon, 28 Oct 2024 13:47:06 +0100
Subject: [PATCH 18/52] migration tests: mysql-cdc-old-syntax

---
 ci/nightly/pipeline.template.yml              |  11 ++
 test/mysql-cdc-old-syntax/mzcompose.py        | 106 +++++++++++++++++-
 .../subsource-resolution-duplicates.td        |   3 -
 3 files changed, 116 insertions(+), 4 deletions(-)

diff --git a/ci/nightly/pipeline.template.yml b/ci/nightly/pipeline.template.yml
index 0c71a2de11d54..d55b219a764f8 100644
--- a/ci/nightly/pipeline.template.yml
+++ b/ci/nightly/pipeline.template.yml
@@ -615,6 +615,17 @@ steps:
         agents:
           queue: hetzner-aarch64-4cpu-8gb
 
+      - id: mysql-cdc-migration
+        label: MySQL CDC source-versioning migration tests
+        depends_on: build-aarch64
+        timeout_in_minutes: 360
+        plugins:
+          - ./ci/plugins/mzcompose:
+              composition: mysql-cdc-old-syntax
+              run: migration
+        agents:
+          queue: hetzner-aarch64-4cpu-8gb
+
       - id: mysql-cdc-resumption-old-syntax
         label: MySQL CDC resumption tests (before source versioning)
         depends_on: build-aarch64
diff --git a/test/mysql-cdc-old-syntax/mzcompose.py b/test/mysql-cdc-old-syntax/mzcompose.py
index 32d78456c45c2..46986c98c3190 100644
--- a/test/mysql-cdc-old-syntax/mzcompose.py
+++ b/test/mysql-cdc-old-syntax/mzcompose.py
@@ -22,9 +22,19 @@
 )
 from materialize.mzcompose.composition import Composition, WorkflowArgumentParser
 from materialize.mzcompose.services.materialized import Materialized
+from materialize.mzcompose.services.minio import Minio
 from materialize.mzcompose.services.mysql import MySql
+from materialize.mzcompose.services.postgres import (
+    METADATA_STORE,
+    CockroachOrPostgresMetadata,
+)
 from materialize.mzcompose.services.test_certs import TestCerts
 from materialize.mzcompose.services.testdrive import Testdrive
+from materialize.source_table_migration import (
+    get_new_image_for_source_table_migration_test,
+    get_old_image_for_source_table_migration_test,
+    verify_sources_after_source_table_migration,
+)
 
 
 def create_mysql(mysql_version: str) -> MySql:
@@ -46,6 +56,7 @@ def create_mysql_replica(mysql_version: str) -> MySql:
 
 SERVICES = [
     Materialized(
+        external_minio=True,
         additional_system_parameter_defaults={
             "log_filter": "mz_storage::source::mysql=trace,info"
         },
@@ -53,6 +64,8 @@ def create_mysql_replica(mysql_version: str) -> MySql:
     create_mysql(MySql.DEFAULT_VERSION),
     create_mysql_replica(MySql.DEFAULT_VERSION),
     TestCerts(),
+    CockroachOrPostgresMetadata(),
+    Minio(setup_materialize=True),
     Testdrive(default_timeout="60s"),
 ]
 
@@ -72,7 +85,11 @@ def get_targeted_mysql_version(parser: WorkflowArgumentParser) -> str:
 def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
     workflows_with_internal_sharding = ["cdc"]
     sharded_workflows = workflows_with_internal_sharding + buildkite.shard_list(
-        [w for w in c.workflows if w not in workflows_with_internal_sharding],
+        [
+            w
+            for w in c.workflows
+            if w not in workflows_with_internal_sharding and w != "migration"
+        ],
         lambda w: w,
     )
     print(
@@ -273,3 +290,90 @@ def do_inserts(c: Composition):
             """
         ),
     )
+
+
+def workflow_migration(c: Composition, parser: WorkflowArgumentParser) -> None:
+    parser.add_argument(
+        "filter",
+        nargs="*",
+        default=["*.td"],
+        help="limit to only the files matching filter",
+    )
+    args = parser.parse_args()
+
+    matching_files = []
+    for filter in args.filter:
+        matching_files.extend(glob.glob(filter, root_dir="test/mysql-cdc-old-syntax"))
+
+    # TODO: database-issues#8706
+    matching_files = [file for file in matching_files if file != "alter-source.td"]
+
+    sharded_files: list[str] = sorted(
+        buildkite.shard_list(matching_files, lambda file: file)
+    )
+    print(f"Files: {sharded_files}")
+
+    mysql_version = get_targeted_mysql_version(parser)
+
+    for file in sharded_files:
+
+        mz_old = Materialized(
+            name="materialized",
+            image=get_old_image_for_source_table_migration_test(),
+            external_metadata_store=True,
+            external_minio=True,
+            additional_system_parameter_defaults={
+                "log_filter": "mz_storage::source::mysql=trace,info"
+            },
+        )
+
+        mz_new = Materialized(
+            name="materialized",
+            image=get_new_image_for_source_table_migration_test(),
+            external_metadata_store=True,
+            external_minio=True,
+            additional_system_parameter_defaults={
+                "log_filter": "mz_storage::source::mysql=trace,info"
+            },
+        )
+
+        with c.override(mz_old, create_mysql(mysql_version)):
+            c.up("materialized", "mysql")
+
+            print(f"Running {file} with mz_old")
+
+            valid_ssl_context = retrieve_ssl_context_for_mysql(c)
+            wrong_ssl_context = retrieve_invalid_ssl_context_for_mysql(c)
+
+            c.sources_and_sinks_ignored_from_validation.add("drop_table")
+
+            c.run_testdrive_files(
+                f"--var=ssl-ca={valid_ssl_context.ca}",
+                f"--var=ssl-client-cert={valid_ssl_context.client_cert}",
+                f"--var=ssl-client-key={valid_ssl_context.client_key}",
+                f"--var=ssl-wrong-ca={wrong_ssl_context.ca}",
+                f"--var=ssl-wrong-client-cert={wrong_ssl_context.client_cert}",
+                f"--var=ssl-wrong-client-key={wrong_ssl_context.client_key}",
+                f"--var=mysql-root-password={MySql.DEFAULT_ROOT_PASSWORD}",
+                "--var=mysql-user-password=us3rp4ssw0rd",
+                f"--var=default-replica-size={Materialized.Size.DEFAULT_SIZE}-{Materialized.Size.DEFAULT_SIZE}",
+                f"--var=default-storage-size={Materialized.Size.DEFAULT_SIZE}-1",
+                "--no-reset",
+                file,
+            )
+
+            c.kill("materialized", wait=True)
+
+            with c.override(mz_new):
+                c.up("materialized")
+
+                print("Running mz_new")
+                verify_sources_after_source_table_migration(c, file)
+
+                c.kill("materialized", wait=True)
+                c.kill("mysql", wait=True)
+                c.kill(METADATA_STORE, wait=True)
+                c.rm("materialized")
+                c.rm(METADATA_STORE)
+                c.rm("mysql")
+                c.rm_volumes("mzdata")
diff --git a/test/mysql-cdc-old-syntax/subsource-resolution-duplicates.td b/test/mysql-cdc-old-syntax/subsource-resolution-duplicates.td
index 1b5662cc78956..c762d583849e5 100644
--- a/test/mysql-cdc-old-syntax/subsource-resolution-duplicates.td
+++ b/test/mysql-cdc-old-syntax/subsource-resolution-duplicates.td
@@ -53,6 +53,3 @@ detail: subsources referencing table: x, y
  mz_source          mysql     quickstart  ""
  mz_source_progress progress  <null>      ""
  t                  subsource quickstart  ""
-
-$ mysql-execute name=mysql
-DROP DATABASE other;

From e8fd48be0bc56a8501904d57f445c6716f4dd7ff Mon Sep 17 00:00:00 2001
From: Rainer Niedermayr <rainer@materialize.com>
Date: Mon, 28 Oct 2024 14:10:55 +0100
Subject: [PATCH 19/52] migration tests: testdrive-old-kafka-syntax

---
 ci/nightly/pipeline.template.yml              |  11 +
 .../mzcompose.py                              | 227 +++++++++++++++++-
 2 files changed, 235 insertions(+), 3 deletions(-)

diff --git a/ci/nightly/pipeline.template.yml b/ci/nightly/pipeline.template.yml
index d55b219a764f8..d8fff84b85c28 100644
--- a/ci/nightly/pipeline.template.yml
+++ b/ci/nightly/pipeline.template.yml
@@ -699,6 +699,17 @@ steps:
         agents:
           queue: hetzner-aarch64-8cpu-16gb
 
+      - id: testdrive-kafka-migration
+        label: "Testdrive %N migration tests"
+        depends_on: build-aarch64
+        timeout_in_minutes: 180
+        parallelism: 8
+        plugins:
+          - ./ci/plugins/mzcompose:
+              composition: testdrive-old-kafka-src-syntax
+              run: migration
+        agents:
+          queue: hetzner-aarch64-8cpu-16gb
 
   - group: AWS
     key: aws
diff --git a/test/testdrive-old-kafka-src-syntax/mzcompose.py b/test/testdrive-old-kafka-src-syntax/mzcompose.py
index 77fd29709bf9d..517e71d961e69 100644
--- a/test/testdrive-old-kafka-src-syntax/mzcompose.py
+++ b/test/testdrive-old-kafka-src-syntax/mzcompose.py
@@ -12,10 +12,10 @@
 the expected-result/actual-result (aka golden testing) paradigm. A query is
 retried until it produces the desired result.
 """
-
+import glob
 from pathlib import Path
 
-from materialize import ci_util
+from materialize import ci_util, spawn
 from materialize.mzcompose import get_default_system_parameters
 from materialize.mzcompose.composition import Composition, WorkflowArgumentParser
 from materialize.mzcompose.services.azure import Azurite
@@ -24,11 +24,20 @@
 from materialize.mzcompose.services.materialized import Materialized
 from materialize.mzcompose.services.minio import Minio
 from materialize.mzcompose.services.mysql import MySql
-from materialize.mzcompose.services.postgres import Postgres
+from materialize.mzcompose.services.postgres import (
+    METADATA_STORE,
+    CockroachOrPostgresMetadata,
+    Postgres,
+)
 from materialize.mzcompose.services.redpanda import Redpanda
 from materialize.mzcompose.services.schema_registry import SchemaRegistry
 from materialize.mzcompose.services.testdrive import Testdrive
 from materialize.mzcompose.services.zookeeper import Zookeeper
+from materialize.source_table_migration import (
+    get_new_image_for_source_table_migration_test,
+    get_old_image_for_source_table_migration_test,
+    verify_sources_after_source_table_migration,
+)
 
 SERVICES = [
     Zookeeper(),
@@ -40,12 +49,25 @@
     Minio(setup_materialize=True, additional_directories=["copytos3"]),
     Azurite(),
     Materialized(external_blob_store=True),
+    CockroachOrPostgresMetadata(),
     FivetranDestination(volumes_extra=["tmp:/share/tmp"]),
     Testdrive(external_blob_store=True),
 ]
 
 
 def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
+    for name in c.workflows:
+        if name == "default":
+            continue
+
+        if name == "migration":
+            continue
+
+        with c.test_case(name):
+            c.workflow(name)
+
+
+def workflow_kafka(c: Composition, parser: WorkflowArgumentParser) -> None:
     """Run testdrive."""
     parser.add_argument(
         "--redpanda",
@@ -237,3 +259,202 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
             ci_util.upload_junit_report(
                 "testdrive", Path(__file__).parent / junit_report
             )
+
+
+def workflow_migration(c: Composition, parser: WorkflowArgumentParser) -> None:
+    """Run testdrive."""
+    parser.add_argument(
+        "--redpanda",
+        action="store_true",
+        help="run against Redpanda instead of the Confluent Platform",
+    )
+    parser.add_argument(
+        "--aws-region",
+        help="run against the specified AWS region instead of localstack",
+    )
+    parser.add_argument(
+        "--kafka-default-partitions",
+        type=int,
+        metavar="N",
+        help="set the default number of kafka partitions per topic",
+    )
+    parser.add_argument(
+        "--system-param",
+        type=str,
+        action="append",
+        nargs="*",
+        help="System parameters to set in Materialize, i.e. what you would set with `ALTER SYSTEM SET`",
+    )
+
+    parser.add_argument("--replicas", type=int, default=1, help="use multiple replicas")
+
+    parser.add_argument(
+        "--default-timeout",
+        type=str,
+        help="set the default timeout for Testdrive",
+    )
+    parser.add_argument(
+        "files",
+        nargs="*",
+        default=["*.td"],
+        help="run against the specified files",
+    )
+
+    (args, _) = parser.parse_known_args()
+
+    matching_files = []
+    for filter in args.files:
+        matching_files.extend(
+            glob.glob(filter, root_dir="test/testdrive-old-kafka-src-syntax")
+        )
+    matching_files = [file for file in matching_files if file != "session.td"]
+
+    dependencies = [
+        "fivetran-destination",
+        "minio",
+        "materialized",
+        "postgres",
+        "mysql",
+    ]
+
+    if args.redpanda:
+        kafka_deps = ["redpanda"]
+    else:
+        kafka_deps = ["zookeeper", "kafka", "schema-registry"]
+
+    dependencies += kafka_deps
+
+    testdrive = Testdrive(
+        forward_buildkite_shard=True,
+        kafka_default_partitions=args.kafka_default_partitions,
+        aws_region=args.aws_region,
+        # validate_catalog_store=True,
+        default_timeout=args.default_timeout,
+        volumes_extra=["mzdata:/mzdata"],
+        external_minio=True,
+        fivetran_destination=True,
+        fivetran_destination_files_path="/share/tmp",
+        entrypoint_extra=[f"--var=uses-redpanda={args.redpanda}"],
+    )
+
+    sysparams = args.system_param
+    if not args.system_param:
+        sysparams = []
+
+    additional_system_parameter_defaults = {}
+    for val in sysparams:
+        x = val[0].split("=", maxsplit=1)
+        assert len(x) == 2, f"--system-param '{val}' should be the format <key>=<val>"
+        key = x[0]
+        val = x[1]
+
+        additional_system_parameter_defaults[key] = val
+
+    mz_old = Materialized(
+        default_size=Materialized.Size.DEFAULT_SIZE,
+        image=get_old_image_for_source_table_migration_test(),
+        external_metadata_store=True,
+        external_minio=True,
+        additional_system_parameter_defaults=additional_system_parameter_defaults,
+    )
+    mz_new = Materialized(
+        default_size=Materialized.Size.DEFAULT_SIZE,
+        image=get_new_image_for_source_table_migration_test(),
+        external_metadata_store=True,
+        external_minio=True,
+        additional_system_parameter_defaults=additional_system_parameter_defaults,
+    )
+
+    for file in matching_files:
+        with c.override(testdrive, mz_old):
+            c.up(*dependencies)
+
+            c.sql(
+                "ALTER SYSTEM SET max_clusters = 50;",
+                port=6877,
+                user="mz_system",
+            )
+
+            non_default_testdrive_vars = []
+
+            if args.replicas > 1:
+                c.sql("DROP CLUSTER quickstart CASCADE", user="mz_system", port=6877)
+                # Make sure a replica named 'r1' always exists
+                replica_names = [
+                    "r1" if replica_id == 0 else f"replica{replica_id}"
+                    for replica_id in range(0, args.replicas)
+                ]
+                replica_string = ",".join(
+                    f"{replica_name} (SIZE '{mz_old.default_replica_size}')"
+                    for replica_name in replica_names
+                )
+                c.sql(
+                    f"CREATE CLUSTER quickstart REPLICAS ({replica_string})",
+                    user="mz_system",
+                    port=6877,
+                )
+
+                # Note that any command that outputs SHOW CLUSTERS will have output
+                # that depends on the number of replicas testdrive has. This means
+                # it might be easier to skip certain tests if the number of replicas
+                # is > 1.
+                c.sql(
+                    f"""
+                    CREATE CLUSTER testdrive_single_replica_cluster SIZE = '{mz_old.default_replica_size}';
+                    GRANT ALL PRIVILEGES ON CLUSTER testdrive_single_replica_cluster TO materialize;
+                    """,
+                    user="mz_system",
+                    port=6877,
+                )
+
+                non_default_testdrive_vars.append(f"--var=replicas={args.replicas}")
+                non_default_testdrive_vars.append(
+                    "--var=single-replica-cluster=testdrive_single_replica_cluster"
+                )
+
+            non_default_testdrive_vars.append(
+                f"--var=default-replica-size={mz_old.default_replica_size}"
+            )
+            non_default_testdrive_vars.append(
+                f"--var=default-storage-size={mz_old.default_storage_size}"
+            )
+
+            print(f"Running {file} with mz_old")
+
+            c.run_testdrive_files(
+                *non_default_testdrive_vars,
+                "--no-reset",
+                file,
+            )
+
+            c.kill("materialized", wait=True)
+
+            with c.override(mz_new):
+                c.up("materialized")
+
+                print("Running mz_new")
+                verify_sources_after_source_table_migration(c, file)
+
+                c.kill("materialized", wait=True)
+                c.kill("postgres", wait=True)
+                c.kill("mysql", wait=True)
+                c.kill(METADATA_STORE, wait=True)
+
+                for dep in kafka_deps:
+                    c.kill(dep, wait=True)
+
+                for dep in kafka_deps:
+                    c.rm(dep)
+
+                c.rm("materialized")
+                c.rm(METADATA_STORE)
+                c.rm("postgres")
+                c.rm("mysql")
+
+                # remove the testdrive container which uses the mzdata volume
+                testdrive_container_id = spawn.capture(
+                    ["docker", "ps", "-a", "--filter", f"volume={c.name}_mzdata", "-q"]
+                ).strip()
+                spawn.runv(["docker", "rm", testdrive_container_id])
+
+                c.rm_volumes("mzdata", force=True)

From 2bba8bda74f2d9d3e20e75b2a2b5ca0e6c70fe2b Mon Sep 17 00:00:00 2001
From: Rainer Niedermayr <rainer@materialize.com>
Date: Tue, 29 Oct 2024 09:32:42 +0100
Subject: [PATCH 20/52] migration tests: improve output

---
 misc/python/materialize/source_table_migration.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/misc/python/materialize/source_table_migration.py b/misc/python/materialize/source_table_migration.py
index 21ecc1ee4d7c1..ffafc1d7c3dc4 100644
--- a/misc/python/materialize/source_table_migration.py
+++ b/misc/python/materialize/source_table_migration.py
@@ -22,6 +22,8 @@ def verify_sources_after_source_table_migration(
 
     print(f"Sources created in {file} are: {source_names}")
 
+    c.sql("SET statement_timeout = '20s'")
+
     for source_name in source_names:
         _verify_source(c, file, source_name, fail=fail)
 
@@ -31,11 +33,15 @@ def _verify_source(
 ) -> None:
     try:
         print(f"Checking source: {source_name}")
+
         # must not crash
-        c.sql("SET statement_timeout = '20s'")
-        c.sql_query(f"SELECT count(*) FROM {source_name};")
+        statement = f"SELECT count(*) FROM {source_name};"
+        print(statement)
+        c.sql_query(statement)
 
-        result = c.sql_query(f"SHOW CREATE SOURCE {source_name};")
+        statement = f"SHOW CREATE SOURCE {source_name};"
+        print(statement)
+        result = c.sql_query(statement)
         sql = result[0][1]
         assert "FOR TABLE" not in sql, f"FOR TABLE found in: {sql}"
         assert "FOR ALL TABLES" not in sql, f"FOR ALL TABLES found in: {sql}"

From 27652cd59f01e7b0a1c82671fcad43d5a11d20f1 Mon Sep 17 00:00:00 2001
From: Rainer Niedermayr <rainer@materialize.com>
Date: Tue, 29 Oct 2024 14:15:53 +0100
Subject: [PATCH 21/52] migration tests: fixes

---
 test/mysql-cdc-old-syntax/mzcompose.py           | 6 ++----
 test/testdrive-old-kafka-src-syntax/mzcompose.py | 2 ++
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/mysql-cdc-old-syntax/mzcompose.py b/test/mysql-cdc-old-syntax/mzcompose.py
index 46986c98c3190..308cd4a9c2856 100644
--- a/test/mysql-cdc-old-syntax/mzcompose.py
+++ b/test/mysql-cdc-old-syntax/mzcompose.py
@@ -305,9 +305,6 @@ def workflow_migration(c: Composition, parser: WorkflowArgumentParser) -> None:
     for filter in args.filter:
         matching_files.extend(glob.glob(filter, root_dir="test/mysql-cdc-old-syntax"))
 
-    # TODO: database-issues#8706
-    matching_files = [file for file in matching_files if file != "alter-source.td"]
-
     sharded_files: list[str] = sorted(
         buildkite.shard_list(matching_files, lambda file: file)
     )
@@ -333,7 +330,8 @@ def workflow_migration(c: Composition, parser: WorkflowArgumentParser) -> None:
             external_metadata_store=True,
             external_minio=True,
             additional_system_parameter_defaults={
-                "log_filter": "mz_storage::source::mysql=trace,info"
+                "log_filter": "mz_storage::source::mysql=trace,info",
+                "force_source_table_syntax": "true",
             },
         )
 
diff --git a/test/testdrive-old-kafka-src-syntax/mzcompose.py b/test/testdrive-old-kafka-src-syntax/mzcompose.py
index 517e71d961e69..075ddd07ea6e5 100644
--- a/test/testdrive-old-kafka-src-syntax/mzcompose.py
+++ b/test/testdrive-old-kafka-src-syntax/mzcompose.py
@@ -350,6 +350,8 @@ def workflow_migration(c: Composition, parser: WorkflowArgumentParser) -> None:
 
         additional_system_parameter_defaults[key] = val
 
+    additional_system_parameter_defaults["force_source_table_syntax"] = "true"
+
     mz_old = Materialized(
         default_size=Materialized.Size.DEFAULT_SIZE,
         image=get_old_image_for_source_table_migration_test(),

From bb49cf5e11caed3e7f1c5715d0a5ec8b685f7850 Mon Sep 17 00:00:00 2001
From: Rainer Niedermayr <rainer@materialize.com>
Date: Tue, 29 Oct 2024 16:12:33 +0100
Subject: [PATCH 22/52] migration tests: fixes

---
 misc/python/materialize/source_table_migration.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/misc/python/materialize/source_table_migration.py b/misc/python/materialize/source_table_migration.py
index ffafc1d7c3dc4..423443da73fb2 100644
--- a/misc/python/materialize/source_table_migration.py
+++ b/misc/python/materialize/source_table_migration.py
@@ -45,7 +45,10 @@ def _verify_source(
         sql = result[0][1]
         assert "FOR TABLE" not in sql, f"FOR TABLE found in: {sql}"
         assert "FOR ALL TABLES" not in sql, f"FOR ALL TABLES found in: {sql}"
-        assert "CREATE SUBSOURCE" not in sql, f"FOR ALL TABLES found in: {sql}"
+
+        if not source_name.endswith("_progress"):
+            assert "CREATE SUBSOURCE" not in sql, f"CREATE SUBSOURCE found in: {sql}"
+
         print("OK.")
     except Exception as e:
         print(f"source-table-migration issue in {file}: {str(e)}")

From acdb421ddcba760dfc23023b3e414bbecaeaa049 Mon Sep 17 00:00:00 2001
From: Roshan Jobanputra <roshan@materialize.com>
Date: Tue, 29 Oct 2024 15:17:24 -0400
Subject: [PATCH 23/52] Fix for mysql source being restarted after new table
 added

---
 src/storage/src/source/mysql/replication/events.rs | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/storage/src/source/mysql/replication/events.rs b/src/storage/src/source/mysql/replication/events.rs
index 14a628e8db25a..56f0016a3faa4 100644
--- a/src/storage/src/source/mysql/replication/events.rs
+++ b/src/storage/src/source/mysql/replication/events.rs
@@ -197,6 +197,11 @@ pub(super) async fn handle_query_event(
         (Some("commit"), None) => {
             is_complete_event = true;
         }
+        // Detect `CREATE TABLE <tbl>` statements which don't affect existing tables but do
+        // signify a complete event (e.g. for the purposes of advancing the GTID)
+        (Some("create"), Some("table")) => {
+            is_complete_event = true;
+        }
         _ => {}
     }
 

From 62d7ef70470cbdc09af1a83b169aeb882f29ac02 Mon Sep 17 00:00:00 2001
From: Roshan Jobanputra <roshan@materialize.com>
Date: Tue, 29 Oct 2024 17:54:37 -0400
Subject: [PATCH 24/52] Avoid needing to rewrite ids of dependent statements by
 changing the sorting of item updates

---
 src/adapter/src/catalog/apply.rs   | 79 ++++++++++++++++++++----------
 src/adapter/src/catalog/migrate.rs |  1 +
 src/adapter/src/coord.rs           | 23 ---------
 src/sql/src/names.rs               | 26 ++++++++++
 4 files changed, 81 insertions(+), 48 deletions(-)

diff --git a/src/adapter/src/catalog/apply.rs b/src/adapter/src/catalog/apply.rs
index 9db55bc11d605..8157d7098c16a 100644
--- a/src/adapter/src/catalog/apply.rs
+++ b/src/adapter/src/catalog/apply.rs
@@ -25,7 +25,7 @@ use mz_catalog::builtin::{
 use mz_catalog::durable::objects::{
     ClusterKey, DatabaseKey, DurableType, ItemKey, NetworkPolicyKey, RoleKey, SchemaKey,
 };
-use mz_catalog::durable::{CatalogError, DurableCatalogError, SystemObjectMapping};
+use mz_catalog::durable::{CatalogError,  SystemObjectMapping};
 use mz_catalog::memory::error::{Error, ErrorKind};
 use mz_catalog::memory::objects::{
     CatalogEntry, CatalogItem, Cluster, ClusterReplica, DataSourceDesc, Database, Func, Index, Log,
@@ -37,6 +37,7 @@ use mz_compute_types::config::ComputeReplicaConfig;
 use mz_controller::clusters::{ReplicaConfig, ReplicaLogging};
 use mz_controller_types::ClusterId;
 use mz_expr::MirScalarExpr;
+use mz_ore::collections::CollectionExt;
 use mz_ore::tracing::OpenTelemetryContext;
 use mz_ore::{instrument, soft_assert_no_log};
 use mz_pgrepr::oid::INVALID_OID;
@@ -54,7 +55,7 @@ use mz_sql::session::vars::{VarError, VarInput};
 use mz_sql::{plan, rbac};
 use mz_sql_parser::ast::Expr;
 use mz_storage_types::sources::Timeline;
-use tracing::{error, info_span, warn, Instrument};
+use tracing::{info_span, warn, Instrument};
 
 use crate::catalog::state::LocalExpressionCache;
 use crate::catalog::{BuiltinTableUpdate, CatalogState};
@@ -1029,17 +1030,7 @@ impl CatalogState {
                         }
                     }
                 };
-                // We allow sinks to break this invariant due to a know issue with `ALTER SINK`.
-                // https://github.com/MaterializeInc/materialize/pull/28708.
-                if !entry.is_sink() && entry.uses().iter().any(|id| *id > entry.id) {
-                    let msg = format!(
-                        "item cannot depend on items with larger GlobalIds, item: {:?}, dependencies: {:?}",
-                        entry,
-                        entry.uses()
-                    );
-                    error!("internal catalog errr: {msg}");
-                    return Err(CatalogError::Durable(DurableCatalogError::Internal(msg)));
-                }
+
                 self.insert_entry(entry);
             }
             StateDiff::Retraction => {
@@ -1894,25 +1885,63 @@ fn sort_updates_inner(updates: Vec<StateUpdate>) -> Vec<StateUpdate> {
         }
     }
 
-    /// Sort item updates by [`CatalogItemId`].
+    /// Sort item updates by parsing statements to identify any id-based dependencies within
+    /// this set of updates and then performing a topological sort.
     fn sort_item_updates(
         item_updates: Vec<(mz_catalog::durable::Item, Timestamp, StateDiff)>,
     ) -> VecDeque<(mz_catalog::durable::Item, Timestamp, StateDiff)> {
-        item_updates
+        let mut items_with_dependencies = item_updates
             .into_iter()
-            // HACK: due to `ALTER SINK`, sinks can appear before the objects they
-            // depend upon. Fortunately, because sinks can never have dependencies
-            // and can never depend upon one another, to fix the topological sort,
-            // we can just always move sinks to the end.
-            .sorted_by_key(|(item, _ts, _diff)| {
-                if item.create_sql.starts_with("CREATE SINK") {
-                    CatalogItemId::User(u64::MAX)
-                } else {
-                    item.id
-                }
+            .map(|(item, ts, diff)| {
+                let parsed = mz_sql::parse::parse(&item.create_sql)
+                    .expect("failed to parse persisted SQL")
+                    .into_element()
+                    .ast;
+                let deps = mz_sql::names::raw_item_dependency_ids(&parsed);
+
+                (item.global_id, (deps, (item, ts, diff)))
             })
+            .collect::<BTreeMap<_, _>>();
+        let mut visited = BTreeSet::new();
+        let mut sorted = Vec::new();
+        fn dfs(
+            id: GlobalId,
+            visited: &mut BTreeSet<GlobalId>,
+            sorted: &mut Vec<GlobalId>,
+            items_with_dependencies: &BTreeMap<
+                GlobalId,
+                (
+                    BTreeSet<GlobalId>,
+                    (mz_catalog::durable::Item, Timestamp, StateDiff),
+                ),
+            >,
+        ) {
+            visited.insert(id);
+            let deps = items_with_dependencies
+                .get(&id)
+                .map(|(deps, _)| deps)
+                .expect("item should be in the map");
+            for dep in deps {
+                // We only want to visit dependencies that are in the current set of updates.
+                if !visited.contains(dep) && items_with_dependencies.contains_key(dep) {
+                    dfs(*dep, visited, sorted, items_with_dependencies);
+                }
+            }
+            sorted.push(id);
+        }
+        for id in items_with_dependencies.keys() {
+            if !visited.contains(id) {
+                dfs(*id, &mut visited, &mut sorted, &items_with_dependencies);
+            }
+        }
+        // return the values from items_with_dependencies in the order of sorted
+        sorted
+            .into_iter()
+            .filter_map(|id| items_with_dependencies.remove(&id))
+            .map(|item| item.1)
             .collect()
     }
+
     let item_retractions = sort_item_updates(item_retractions);
     let item_additions = sort_item_updates(item_additions);
 
diff --git a/src/adapter/src/catalog/migrate.rs b/src/adapter/src/catalog/migrate.rs
index 2c645ec9e56d3..196e18250a0f5 100644
--- a/src/adapter/src/catalog/migrate.rs
+++ b/src/adapter/src/catalog/migrate.rs
@@ -785,6 +785,7 @@ fn ast_rewrite_sources_to_tables(
             // for all sources that were migrated.
             _ => {
                 if mz_sql::names::modify_dependency_item_ids(&mut statement, &changed_ids) {
+                    info!("migrate: updated dependency reference in statement {statement}");
                     item.create_sql = statement.to_ast_string_stable();
                     updated_items.insert(item.global_id, item);
                 }
diff --git a/src/adapter/src/coord.rs b/src/adapter/src/coord.rs
index 2f26cff656e02..039365243c075 100644
--- a/src/adapter/src/coord.rs
+++ b/src/adapter/src/coord.rs
@@ -1943,29 +1943,6 @@ impl Coordinator {
         let mut privatelink_connections = BTreeMap::new();
 
         for entry in &entries {
-            // TODO(database-issues#7922): we should move this invariant into `CatalogEntry`.
-            mz_ore::soft_assert_or_log!(
-                // We only expect user objects to objects obey this invariant.
-                // System objects, for instance, can depend on other system
-                // objects that belong to a schema that is simply loaded first.
-                // To meaningfully resolve this, we could need more careful
-                // loading order or more complex IDs, neither of which seem very
-                // beneficial.
-                //
-                // HACK: sinks are permitted to depend on items with larger IDs,
-                // due to `ALTER SINK`.
-                !entry.id().is_user()
-                    || entry.is_sink()
-                    || entry
-                        .uses()
-                        .iter()
-                        .all(|dependency_id| *dependency_id <= entry.id),
-                "entries should only use to items with lesser `GlobalId`s, but \
-                {:?} uses {:?}",
-                entry.id,
-                entry.uses()
-            );
-
             debug!(
                 "coordinator init: installing {} {}",
                 entry.item().typ(),
diff --git a/src/sql/src/names.rs b/src/sql/src/names.rs
index 2eb24322e83b1..658cc93bba9e4 100644
--- a/src/sql/src/names.rs
+++ b/src/sql/src/names.rs
@@ -2387,6 +2387,32 @@ where
     ResolvedIds::new(visitor.ids)
 }
 
+#[derive(Debug)]
+pub struct RawItemDependencyIds {
+    pub ids: BTreeSet<GlobalId>,
+}
+
+impl<'ast> Visit<'ast, Raw> for RawItemDependencyIds {
+    fn visit_item_name(&mut self, item_name: &RawItemName) {
+        if let RawItemName::Id(id, _, _) = item_name {
+            let parsed_id = id.parse::<GlobalId>().unwrap();
+            self.ids.insert(parsed_id);
+        }
+    }
+}
+
+/// Collect any ID-based dependencies of the provided raw AST node.
+pub fn raw_item_dependency_ids<'ast, N>(node: &'ast N) -> BTreeSet<GlobalId>
+where
+    N: VisitNode<'ast, Raw>,
+{
+    let mut deps = RawItemDependencyIds {
+        ids: BTreeSet::new(),
+    };
+    node.visit(&mut deps);
+    deps.ids
+}
+
 #[derive(Debug)]
 pub struct ItemDependencyModifier<'a> {
     pub modified: bool,

From 1bcc902daaa891fac4e7c114f6f3ffe3b24e186c Mon Sep 17 00:00:00 2001
From: Joseph Koshakow <koshy44@gmail.com>
Date: Thu, 14 Nov 2024 11:35:44 -0500
Subject: [PATCH 25/52] Fix merge skew

---
 src/adapter/src/catalog/migrate.rs | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/src/adapter/src/catalog/migrate.rs b/src/adapter/src/catalog/migrate.rs
index 196e18250a0f5..85b25f474f01c 100644
--- a/src/adapter/src/catalog/migrate.rs
+++ b/src/adapter/src/catalog/migrate.rs
@@ -14,13 +14,14 @@ use mz_catalog::durable::{Transaction, Item};
 use mz_catalog::memory::objects::{StateUpdate,BootstrapStateUpdateKind};
 use mz_ore::collections::CollectionExt;
 use mz_ore::now::NowFn;
-use mz_repr::{CatalogItemId, Timestamp};
+use mz_repr::{CatalogItemId, GlobalId, Timestamp};
 use mz_sql::ast::display::AstDisplay;
 use mz_sql::names::FullItemName;
 use mz_sql_parser::ast::{Raw, Statement};
 use semver::Version;
 use tracing::info;
 use uuid::Uuid;
+
 // DO NOT add any more imports from `crate` outside of `crate::catalog`.
 use crate::catalog::open::into_consolidatable_updates_startup;
 use crate::catalog::state::LocalExpressionCache;
@@ -369,7 +370,7 @@ fn ast_rewrite_sources_to_tables(
                     item.create_sql, table
                 );
                 item.create_sql = Statement::CreateTableFromSource(table).to_ast_string_stable();
-                tx.update_item(item.global_id, item)?;
+                tx.update_item(item.id, item)?;
             }
 
             // Postgres sources are multi-output sources whose subsources are
@@ -420,7 +421,7 @@ fn ast_rewrite_sources_to_tables(
                         progress_subsource,
                     });
                     item.create_sql = stmt.to_ast_string_stable();
-                    tx.update_item(item.global_id, item)?;
+                    tx.update_item(item.id, item)?;
                     info!("migrate: converted postgres source {stmt} to remove subsource options");
                 }
             }
@@ -478,7 +479,7 @@ fn ast_rewrite_sources_to_tables(
                         progress_subsource,
                     });
                     item.create_sql = stmt.to_ast_string_stable();
-                    tx.update_item(item.global_id, item)?;
+                    tx.update_item(item.id, item)?;
                     info!("migrate: converted mysql source {stmt} to remove subsource options");
                 }
             }
@@ -705,25 +706,28 @@ fn ast_rewrite_sources_to_tables(
 
                 let owner_id = item.owner_id.clone();
                 let privileges = item.privileges.clone();
+                let extra_versions = item.extra_versions.clone();
 
                 // Update the source statement in the catalog first, since the name will
                 // otherwise conflict with the new table statement.
                 info!("migrate: updated source {} to {source}", item.create_sql);
                 item.create_sql = Statement::CreateSource(source).to_ast_string_stable();
-                tx.update_item(item.global_id, item)?;
+                tx.update_item(item.id, item)?;
 
                 // Insert the new table statement into the catalog with a new id.
                 let ids = tx.allocate_user_item_ids(1)?;
-                let new_table_id = ids[0];
+                let (new_table_id, new_table_global_id) = ids[0];
                 info!("migrate: added table {new_table_id}: {table}");
                 tx.insert_user_item(
                     new_table_id,
+                    new_table_global_id,
                     schema_id,
                     &table_item_name,
                     table.to_ast_string_stable(),
                     owner_id,
                     privileges,
                     &Default::default(),
+                    extra_versions,
                 )?;
                 // We need to move the shard currently attached to the source statement to the
                 // table statement such that the existing data in the shard is preserved and can
@@ -737,7 +741,7 @@ fn ast_rewrite_sources_to_tables(
                     .pop()
                     .expect("shard should exist");
                 tx.insert_collection_metadata(btreemap! {
-                    new_table_id => existing_source_shard,
+                    new_table_global_id => existing_source_shard,
                     source_id => new_source_shard
                 })?;
 
@@ -761,7 +765,7 @@ fn ast_rewrite_sources_to_tables(
 
                 // We also need to update any other statements that reference the source to use the new
                 // table id/name instead.
-                changed_ids.insert(source_id, new_table_id);
+                changed_ids.insert(source_id, new_table_global_id);
             }
 
             // When we upgrade to > rust 1.81 we should use #[expect(unreachable_patterns)]
@@ -787,7 +791,7 @@ fn ast_rewrite_sources_to_tables(
                 if mz_sql::names::modify_dependency_item_ids(&mut statement, &changed_ids) {
                     info!("migrate: updated dependency reference in statement {statement}");
                     item.create_sql = statement.to_ast_string_stable();
-                    updated_items.insert(item.global_id, item);
+                    updated_items.insert(item.id, item);
                 }
             }
         }

From 30aa14b5f698db0257046b9bb3fe5d852858b911 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow <koshy44@gmail.com>
Date: Thu, 14 Nov 2024 14:17:41 -0500
Subject: [PATCH 26/52] Fix dependency tracking

---
 src/adapter/src/catalog/apply.rs | 60 ++++++++++++++++++++++++++------
 src/sql/src/names.rs             | 29 +++++++++------
 2 files changed, 69 insertions(+), 20 deletions(-)

diff --git a/src/adapter/src/catalog/apply.rs b/src/adapter/src/catalog/apply.rs
index 8157d7098c16a..07e69c12c226b 100644
--- a/src/adapter/src/catalog/apply.rs
+++ b/src/adapter/src/catalog/apply.rs
@@ -37,7 +37,7 @@ use mz_compute_types::config::ComputeReplicaConfig;
 use mz_controller::clusters::{ReplicaConfig, ReplicaLogging};
 use mz_controller_types::ClusterId;
 use mz_expr::MirScalarExpr;
-use mz_ore::collections::CollectionExt;
+use mz_ore::collections::{CollectionExt, HashMap};
 use mz_ore::tracing::OpenTelemetryContext;
 use mz_ore::{instrument, soft_assert_no_log};
 use mz_pgrepr::oid::INVALID_OID;
@@ -101,7 +101,7 @@ impl CatalogState {
         local_expression_cache: &mut LocalExpressionCache,
     ) -> Vec<BuiltinTableUpdate<&'static BuiltinTable>> {
         let mut builtin_table_updates = Vec::with_capacity(updates.len());
-        let updates = sort_updates(updates);
+        let updates = sort_updates(updates, &self);
 
         let mut groups: Vec<Vec<_>> = Vec::new();
         for (_, updates) in &updates.into_iter().group_by(|update| update.ts) {
@@ -143,7 +143,7 @@ impl CatalogState {
         updates: Vec<StateUpdate>,
     ) -> Result<Vec<BuiltinTableUpdate<&'static BuiltinTable>>, CatalogError> {
         let mut builtin_table_updates = Vec::with_capacity(updates.len());
-        let updates = sort_updates(updates);
+        let updates = sort_updates(updates, self);
 
         for (_, updates) in &updates.into_iter().group_by(|update| update.ts) {
             let mut retractions = InProgressRetractions::default();
@@ -1740,12 +1740,12 @@ impl CatalogState {
 }
 
 /// Sort [`StateUpdate`]s in timestamp then dependency order
-fn sort_updates(mut updates: Vec<StateUpdate>) -> Vec<StateUpdate> {
+fn sort_updates(mut updates: Vec<StateUpdate>, state: &CatalogState) -> Vec<StateUpdate> {
     let mut sorted_updates = Vec::with_capacity(updates.len());
 
     updates.sort_by_key(|update| update.ts);
     for (_, updates) in &updates.into_iter().group_by(|update| update.ts) {
-        let sorted_ts_updates = sort_updates_inner(updates.collect());
+        let sorted_ts_updates = sort_updates_inner(updates.collect(), state);
         sorted_updates.extend(sorted_ts_updates);
     }
 
@@ -1753,7 +1753,7 @@ fn sort_updates(mut updates: Vec<StateUpdate>) -> Vec<StateUpdate> {
 }
 
 /// Sort [`StateUpdate`]s in dependency order for a single timestamp.
-fn sort_updates_inner(updates: Vec<StateUpdate>) -> Vec<StateUpdate> {
+fn sort_updates_inner(updates: Vec<StateUpdate>, state: &CatalogState) -> Vec<StateUpdate> {
     fn push_update<T>(
         update: T,
         diff: StateDiff,
@@ -1889,7 +1889,16 @@ fn sort_updates_inner(updates: Vec<StateUpdate>) -> Vec<StateUpdate> {
     /// this set of updates and then performing a topological sort.
     fn sort_item_updates(
         item_updates: Vec<(mz_catalog::durable::Item, Timestamp, StateDiff)>,
+        state: &CatalogState,
     ) -> VecDeque<(mz_catalog::durable::Item, Timestamp, StateDiff)> {
+        let mut item_id_lookup: HashMap<_, HashMap<_, _>> = HashMap::new();
+        for (item, _, _) in &item_updates {
+            item_id_lookup
+                .entry(item.schema_id)
+                .or_insert_with(|| HashMap::new())
+                .insert(item.name.clone(), item.global_id);
+        }
+
         let mut items_with_dependencies = item_updates
             .into_iter()
             .map(|(item, ts, diff)| {
@@ -1897,9 +1906,40 @@ fn sort_updates_inner(updates: Vec<StateUpdate>) -> Vec<StateUpdate> {
                     .expect("failed to parse persisted SQL")
                     .into_element()
                     .ast;
-                let deps = mz_sql::names::raw_item_dependency_ids(&parsed);
+                let (mut id_deps, name_deps) = mz_sql::names::raw_item_dependency_ids(&parsed);
+
+                // Convert named deps into ID deps. Ideally this is empty and all dependencies are
+                // specified by ID. However, this is not the case and require some changes and a
+                // migration to fix.
+                for name in name_deps {
+                    let (db, schema, item) = match name.0.len() {
+                        3 => (
+                            Some(name.0[0].as_str()),
+                            name.0[1].as_str(),
+                            name.0[2].as_str(),
+                        ),
+                        2 => (None, name.0[1].as_str(), name.0[2].as_str()),
+                        _ => panic!("Invalid item name: {name:?}"),
+                    };
+                    let schema = state
+                        .resolve_schema(None, db, schema, &SYSTEM_CONN_ID)
+                        .expect("schema must be loaded before an item");
+                    // If `name` is not also being applied in this batch then the relative order of
+                    // `item` and `name` doesn't matter, so we can ignore it.
+                    let schema_id = match schema.id {
+                        SchemaSpecifier::Id(id) => id,
+                        SchemaSpecifier::Temporary => {
+                            panic!("temporary item {name:?} persisted as dependency of {item:?}")
+                        }
+                    };
+                    if let Some(ids) = item_id_lookup.get(&schema_id) {
+                        if let Some(id) = ids.get(item) {
+                            id_deps.insert(*id);
+                        }
+                    }
+                }
 
-                (item.global_id, (deps, (item, ts, diff)))
+                (item.global_id, (id_deps, (item, ts, diff)))
             })
             .collect::<BTreeMap<_, _>>();
         let mut visited = BTreeSet::new();
@@ -1942,8 +1982,8 @@ fn sort_updates_inner(updates: Vec<StateUpdate>) -> Vec<StateUpdate> {
             .collect()
     }
 
-    let item_retractions = sort_item_updates(item_retractions);
-    let item_additions = sort_item_updates(item_additions);
+    let item_retractions = sort_item_updates(item_retractions, state);
+    let item_additions = sort_item_updates(item_additions, state);
 
     /// Sort temporary item updates by GlobalId.
     fn sort_temp_item_updates(
diff --git a/src/sql/src/names.rs b/src/sql/src/names.rs
index 658cc93bba9e4..2221d9257dd05 100644
--- a/src/sql/src/names.rs
+++ b/src/sql/src/names.rs
@@ -2388,29 +2388,38 @@ where
 }
 
 #[derive(Debug)]
-pub struct RawItemDependencyIds {
-    pub ids: BTreeSet<GlobalId>,
+struct RawItemDependencyIds<'a> {
+    ids: BTreeSet<GlobalId>,
+    names: BTreeSet<&'a UnresolvedItemName>,
 }
 
-impl<'ast> Visit<'ast, Raw> for RawItemDependencyIds {
-    fn visit_item_name(&mut self, item_name: &RawItemName) {
-        if let RawItemName::Id(id, _, _) = item_name {
-            let parsed_id = id.parse::<GlobalId>().unwrap();
-            self.ids.insert(parsed_id);
+impl<'ast> Visit<'ast, Raw> for RawItemDependencyIds<'ast> {
+    fn visit_item_name(&mut self, item_name: &'ast RawItemName) {
+        match item_name {
+            RawItemName::Name(name) => {
+                self.names.insert(name);
+            }
+            RawItemName::Id(id, _, _) => {
+                let parsed_id = id.parse::<GlobalId>().unwrap();
+                self.ids.insert(parsed_id);
+            }
         }
     }
 }
 
-/// Collect any ID-based dependencies of the provided raw AST node.
-pub fn raw_item_dependency_ids<'ast, N>(node: &'ast N) -> BTreeSet<GlobalId>
+/// Collect any dependencies of the provided raw AST node.
+pub fn raw_item_dependency_ids<'ast, N>(
+    node: &'ast N,
+) -> (BTreeSet<GlobalId>, BTreeSet<&'ast UnresolvedItemName>)
 where
     N: VisitNode<'ast, Raw>,
 {
     let mut deps = RawItemDependencyIds {
         ids: BTreeSet::new(),
+        names: BTreeSet::new(),
     };
     node.visit(&mut deps);
-    deps.ids
+    (deps.ids, deps.names)
 }
 
 #[derive(Debug)]

From cc0a868f6658827f15330e7e0005efc865e597d5 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow <koshy44@gmail.com>
Date: Thu, 14 Nov 2024 14:18:24 -0500
Subject: [PATCH 27/52] Fix lint

---
 src/adapter/src/catalog/open.rs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/adapter/src/catalog/open.rs b/src/adapter/src/catalog/open.rs
index 9f563aa3189ae..61131977252af 100644
--- a/src/adapter/src/catalog/open.rs
+++ b/src/adapter/src/catalog/open.rs
@@ -536,7 +536,9 @@ impl Catalog {
                 diff: diff.try_into().expect("valid diff"),
             })
             .collect();
-        let builtin_table_update = state.apply_updates_for_bootstrap(post_item_updates, &mut local_expr_cache).await;
+        let builtin_table_update = state
+            .apply_updates_for_bootstrap(post_item_updates, &mut local_expr_cache)
+            .await;
         builtin_table_updates.extend(builtin_table_update);
 
         // We don't need to apply the audit logs in memory, yet apply can be expensive when the

From 635e9883b08ebdecf5070101238fd98af2604bdb Mon Sep 17 00:00:00 2001
From: Joseph Koshakow <koshy44@gmail.com>
Date: Thu, 14 Nov 2024 14:54:23 -0500
Subject: [PATCH 28/52] Fix some issues

---
 src/adapter/src/catalog/apply.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/adapter/src/catalog/apply.rs b/src/adapter/src/catalog/apply.rs
index 07e69c12c226b..a8ba5800d7b19 100644
--- a/src/adapter/src/catalog/apply.rs
+++ b/src/adapter/src/catalog/apply.rs
@@ -101,7 +101,7 @@ impl CatalogState {
         local_expression_cache: &mut LocalExpressionCache,
     ) -> Vec<BuiltinTableUpdate<&'static BuiltinTable>> {
         let mut builtin_table_updates = Vec::with_capacity(updates.len());
-        let updates = sort_updates(updates, &self);
+        let updates = sort_updates(updates, self);
 
         let mut groups: Vec<Vec<_>> = Vec::new();
         for (_, updates) in &updates.into_iter().group_by(|update| update.ts) {
@@ -1895,7 +1895,7 @@ fn sort_updates_inner(updates: Vec<StateUpdate>, state: &CatalogState) -> Vec<St
         for (item, _, _) in &item_updates {
             item_id_lookup
                 .entry(item.schema_id)
-                .or_insert_with(|| HashMap::new())
+                .or_insert_with(HashMap::new)
                 .insert(item.name.clone(), item.global_id);
         }
 
@@ -1918,7 +1918,7 @@ fn sort_updates_inner(updates: Vec<StateUpdate>, state: &CatalogState) -> Vec<St
                             name.0[1].as_str(),
                             name.0[2].as_str(),
                         ),
-                        2 => (None, name.0[1].as_str(), name.0[2].as_str()),
+                        2 => (None, name.0[0].as_str(), name.0[1].as_str()),
                         _ => panic!("Invalid item name: {name:?}"),
                     };
                     let schema = state

From 7e0428659266daafb9b45c5107043c4f3cee8d68 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow <koshy44@gmail.com>
Date: Thu, 14 Nov 2024 16:50:25 -0500
Subject: [PATCH 29/52] Fix dependency tracking

---
 src/adapter/src/catalog/apply.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/adapter/src/catalog/apply.rs b/src/adapter/src/catalog/apply.rs
index a8ba5800d7b19..e33c734eecfb5 100644
--- a/src/adapter/src/catalog/apply.rs
+++ b/src/adapter/src/catalog/apply.rs
@@ -1919,7 +1919,8 @@ fn sort_updates_inner(updates: Vec<StateUpdate>, state: &CatalogState) -> Vec<St
                             name.0[2].as_str(),
                         ),
                         2 => (None, name.0[0].as_str(), name.0[1].as_str()),
-                        _ => panic!("Invalid item name: {name:?}"),
+                        // This must be a CTE.
+                        _ => continue,
                     };
                     let schema = state
                         .resolve_schema(None, db, schema, &SYSTEM_CONN_ID)

From 21b4d51234079f419af663a04f229e189f8da7b3 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow <koshy44@gmail.com>
Date: Fri, 15 Nov 2024 10:01:41 -0500
Subject: [PATCH 30/52] Fix merge skew

---
 src/adapter/src/catalog/open.rs | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/adapter/src/catalog/open.rs b/src/adapter/src/catalog/open.rs
index 61131977252af..926cebb6bd709 100644
--- a/src/adapter/src/catalog/open.rs
+++ b/src/adapter/src/catalog/open.rs
@@ -379,7 +379,6 @@ impl Catalog {
         let mut post_item_updates = Vec::new();
         let mut audit_log_updates = Vec::new();
         for (kind, ts, diff) in updates {
-            let diff = diff.try_into().expect("valid diff");
             match kind {
                 BootstrapStateUpdateKind::Role(_)
                 | BootstrapStateUpdateKind::Database(_)
@@ -393,7 +392,7 @@ impl Catalog {
                     pre_item_updates.push(StateUpdate {
                         kind: kind.into(),
                         ts,
-                        diff,
+                        diff: diff.try_into().expect("valid diff"),
                     })
                 }
                 BootstrapStateUpdateKind::IntrospectionSourceIndex(_)
@@ -401,13 +400,13 @@ impl Catalog {
                     system_item_updates.push(StateUpdate {
                         kind: kind.into(),
                         ts,
-                        diff,
+                        diff: diff.try_into().expect("valid diff"),
                     })
                 }
                 BootstrapStateUpdateKind::Item(_) => item_updates.push(StateUpdate {
                     kind: kind.into(),
                     ts,
-                    diff,
+                    diff: diff.try_into().expect("valid diff"),
                 }),
                 BootstrapStateUpdateKind::Comment(_)
                 | BootstrapStateUpdateKind::StorageCollectionMetadata(_)
@@ -419,7 +418,7 @@ impl Catalog {
                     audit_log_updates.push(StateUpdate {
                         kind: kind.into(),
                         ts,
-                        diff,
+                        diff: diff.try_into().expect("valid diff"),
                     });
                 }
             }

From a6c67e83d6371032970e60f33b403ac6cd41f7d7 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow <koshy44@gmail.com>
Date: Fri, 15 Nov 2024 12:11:40 -0500
Subject: [PATCH 31/52] Update test versions

---
 misc/python/materialize/source_table_migration.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/misc/python/materialize/source_table_migration.py b/misc/python/materialize/source_table_migration.py
index 423443da73fb2..0f56806d2d9ba 100644
--- a/misc/python/materialize/source_table_migration.py
+++ b/misc/python/materialize/source_table_migration.py
@@ -59,12 +59,12 @@ def _verify_source(
 
 def check_source_table_migration_test_sensible() -> None:
     assert MzVersion.parse_cargo() < MzVersion.parse_mz(
-        "v0.130.0"
+        "v0.133.0"
     ), "migration test probably no longer needed"
 
 
 def get_old_image_for_source_table_migration_test() -> str:
-    return "materialize/materialized:v0.122.0"
+    return "materialize/materialized:v0.125.0"
 
 
 def get_new_image_for_source_table_migration_test() -> str | None:

From 3144e921bb978ac5ad18dc7c9b8401ae6cbec3db Mon Sep 17 00:00:00 2001
From: Joseph Koshakow <koshy44@gmail.com>
Date: Thu, 12 Dec 2024 16:05:38 -0500
Subject: [PATCH 32/52] More merge skew fixes

---
 .../materialize/checks/all_checks/source_tables.py    |  1 +
 src/adapter-types/src/dyncfgs.rs                      |  1 -
 src/adapter/src/catalog/apply.rs                      |  2 +-
 src/adapter/src/catalog/migrate.rs                    | 11 +++++------
 4 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/misc/python/materialize/checks/all_checks/source_tables.py b/misc/python/materialize/checks/all_checks/source_tables.py
index c36bb1eb8af12..279ede6a4022e 100644
--- a/misc/python/materialize/checks/all_checks/source_tables.py
+++ b/misc/python/materialize/checks/all_checks/source_tables.py
@@ -28,6 +28,7 @@ class TableFromPgSource(TableFromSourceBase):
     suffix = "tbl_from_pg_source"
 
     def initialize(self) -> Testdrive:
+
         return Testdrive(
             self.generic_setup()
             + dedent(
diff --git a/src/adapter-types/src/dyncfgs.rs b/src/adapter-types/src/dyncfgs.rs
index 0ab56c9b41144..795a342f284a4 100644
--- a/src/adapter-types/src/dyncfgs.rs
+++ b/src/adapter-types/src/dyncfgs.rs
@@ -128,5 +128,4 @@ pub fn all_dyncfgs(configs: ConfigSet) -> ConfigSet {
         .add(&DEFAULT_SINK_PARTITION_STRATEGY)
         .add(&ENABLE_CONTINUAL_TASK_BUILTINS)
         .add(&ENABLE_EXPRESSION_CACHE)
-        .add(&ENABLE_SOURCE_TABLE_MIGRATION)
 }
diff --git a/src/adapter/src/catalog/apply.rs b/src/adapter/src/catalog/apply.rs
index e33c734eecfb5..f8b2e39b9c487 100644
--- a/src/adapter/src/catalog/apply.rs
+++ b/src/adapter/src/catalog/apply.rs
@@ -25,7 +25,7 @@ use mz_catalog::builtin::{
 use mz_catalog::durable::objects::{
     ClusterKey, DatabaseKey, DurableType, ItemKey, NetworkPolicyKey, RoleKey, SchemaKey,
 };
-use mz_catalog::durable::{CatalogError,  SystemObjectMapping};
+use mz_catalog::durable::{CatalogError, SystemObjectMapping};
 use mz_catalog::memory::error::{Error, ErrorKind};
 use mz_catalog::memory::objects::{
     CatalogEntry, CatalogItem, Cluster, ClusterReplica, DataSourceDesc, Database, Func, Index, Log,
diff --git a/src/adapter/src/catalog/migrate.rs b/src/adapter/src/catalog/migrate.rs
index 85b25f474f01c..bfadeaa3cd2bb 100644
--- a/src/adapter/src/catalog/migrate.rs
+++ b/src/adapter/src/catalog/migrate.rs
@@ -31,7 +31,7 @@ fn rewrite_ast_items<F>(tx: &mut Transaction<'_>, mut f: F) -> Result<(), anyhow
 where
     F: for<'a> FnMut(
         &'a mut Transaction<'_>,
-        GlobalId,
+        CatalogItemId,
         &'a mut Statement<Raw>,
     ) -> Result<(), anyhow::Error>,
 {
@@ -39,8 +39,8 @@ where
 
     for mut item in tx.get_items() {
         let mut stmt = mz_sql::parse::parse(&item.create_sql)?.into_element().ast;
-        // TODO(alter_table): Switch this to CatalogItemId.
-        f(tx, item.global_id, &mut stmt).await?;
+        f(tx, item.id, &mut stmt)?;
+
         item.create_sql = stmt.to_ast_string_stable();
 
         updated_items.insert(item.id, item);
@@ -110,8 +110,7 @@ pub(crate) async fn migrate(
         ast_rewrite_sources_to_tables(tx, now)?;
     }
 
-    rewrite_ast_items(tx, |tx, item, stmt, all_items_and_statements| {
-        let now = now.clone();
+    rewrite_ast_items(tx, |_tx, _id, _stmt| {
         // Add per-item AST migrations below.
         //
         // Each migration should be a function that takes `stmt` (the AST
@@ -121,7 +120,7 @@ pub(crate) async fn migrate(
         // Migration functions may also take `tx` as input to stage
         // arbitrary changes to the catalog.
 
-            Ok(())
+        Ok(())
     })?;
 
     // Load items into catalog. We make sure to consolidate the old updates with the new updates to

From 01619dc65d5324dbe1ae6acfde9fe2b6fd85b2a7 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow <koshy44@gmail.com>
Date: Mon, 16 Dec 2024 14:06:13 -0500
Subject: [PATCH 33/52] Update item sorting to only sort within item groups

---
 src/adapter/src/catalog/apply.rs   | 209 ++++++++++++++---------------
 src/catalog/src/durable/objects.rs |  60 +++++----
 src/sql/src/names.rs               |  35 -----
 3 files changed, 139 insertions(+), 165 deletions(-)

diff --git a/src/adapter/src/catalog/apply.rs b/src/adapter/src/catalog/apply.rs
index f8b2e39b9c487..7be37cb11650a 100644
--- a/src/adapter/src/catalog/apply.rs
+++ b/src/adapter/src/catalog/apply.rs
@@ -37,7 +37,6 @@ use mz_compute_types::config::ComputeReplicaConfig;
 use mz_controller::clusters::{ReplicaConfig, ReplicaLogging};
 use mz_controller_types::ClusterId;
 use mz_expr::MirScalarExpr;
-use mz_ore::collections::{CollectionExt, HashMap};
 use mz_ore::tracing::OpenTelemetryContext;
 use mz_ore::{instrument, soft_assert_no_log};
 use mz_pgrepr::oid::INVALID_OID;
@@ -101,7 +100,7 @@ impl CatalogState {
         local_expression_cache: &mut LocalExpressionCache,
     ) -> Vec<BuiltinTableUpdate<&'static BuiltinTable>> {
         let mut builtin_table_updates = Vec::with_capacity(updates.len());
-        let updates = sort_updates(updates, self);
+        let updates = sort_updates(updates);
 
         let mut groups: Vec<Vec<_>> = Vec::new();
         for (_, updates) in &updates.into_iter().group_by(|update| update.ts) {
@@ -143,7 +142,7 @@ impl CatalogState {
         updates: Vec<StateUpdate>,
     ) -> Result<Vec<BuiltinTableUpdate<&'static BuiltinTable>>, CatalogError> {
         let mut builtin_table_updates = Vec::with_capacity(updates.len());
-        let updates = sort_updates(updates, self);
+        let updates = sort_updates(updates);
 
         for (_, updates) in &updates.into_iter().group_by(|update| update.ts) {
             let mut retractions = InProgressRetractions::default();
@@ -1740,12 +1739,12 @@ impl CatalogState {
 }
 
 /// Sort [`StateUpdate`]s in timestamp then dependency order
-fn sort_updates(mut updates: Vec<StateUpdate>, state: &CatalogState) -> Vec<StateUpdate> {
+fn sort_updates(mut updates: Vec<StateUpdate>) -> Vec<StateUpdate> {
     let mut sorted_updates = Vec::with_capacity(updates.len());
 
     updates.sort_by_key(|update| update.ts);
     for (_, updates) in &updates.into_iter().group_by(|update| update.ts) {
-        let sorted_ts_updates = sort_updates_inner(updates.collect(), state);
+        let sorted_ts_updates = sort_updates_inner(updates.collect());
         sorted_updates.extend(sorted_ts_updates);
     }
 
@@ -1753,7 +1752,7 @@ fn sort_updates(mut updates: Vec<StateUpdate>, state: &CatalogState) -> Vec<Stat
 }
 
 /// Sort [`StateUpdate`]s in dependency order for a single timestamp.
-fn sort_updates_inner(updates: Vec<StateUpdate>, state: &CatalogState) -> Vec<StateUpdate> {
+fn sort_updates_inner(updates: Vec<StateUpdate>) -> Vec<StateUpdate> {
     fn push_update<T>(
         update: T,
         diff: StateDiff,
@@ -1889,117 +1888,117 @@ fn sort_updates_inner(updates: Vec<StateUpdate>, state: &CatalogState) -> Vec<St
     /// this set of updates and then performing a topological sort.
     fn sort_item_updates(
         item_updates: Vec<(mz_catalog::durable::Item, Timestamp, StateDiff)>,
-        state: &CatalogState,
     ) -> VecDeque<(mz_catalog::durable::Item, Timestamp, StateDiff)> {
-        let mut item_id_lookup: HashMap<_, HashMap<_, _>> = HashMap::new();
-        for (item, _, _) in &item_updates {
-            item_id_lookup
-                .entry(item.schema_id)
-                .or_insert_with(HashMap::new)
-                .insert(item.name.clone(), item.global_id);
-        }
-
-        let mut items_with_dependencies = item_updates
-            .into_iter()
-            .map(|(item, ts, diff)| {
-                let parsed = mz_sql::parse::parse(&item.create_sql)
-                    .expect("failed to parse persisted SQL")
-                    .into_element()
-                    .ast;
-                let (mut id_deps, name_deps) = mz_sql::names::raw_item_dependency_ids(&parsed);
-
-                // Convert named deps into ID deps. Ideally this is empty and all dependencies are
-                // specified by ID. However, this is not the case and require some changes and a
-                // migration to fix.
-                for name in name_deps {
-                    let (db, schema, item) = match name.0.len() {
-                        3 => (
-                            Some(name.0[0].as_str()),
-                            name.0[1].as_str(),
-                            name.0[2].as_str(),
-                        ),
-                        2 => (None, name.0[0].as_str(), name.0[1].as_str()),
-                        // This must be a CTE.
-                        _ => continue,
-                    };
-                    let schema = state
-                        .resolve_schema(None, db, schema, &SYSTEM_CONN_ID)
-                        .expect("schema must be loaded before an item");
-                    // If `name` is not also being applied in this batch then the relative order of
-                    // `item` and `name` doesn't matter, so we can ignore it.
-                    let schema_id = match schema.id {
-                        SchemaSpecifier::Id(id) => id,
-                        SchemaSpecifier::Temporary => {
-                            panic!("temporary item {name:?} persisted as dependency of {item:?}")
-                        }
-                    };
-                    if let Some(ids) = item_id_lookup.get(&schema_id) {
-                        if let Some(id) = ids.get(item) {
-                            id_deps.insert(*id);
-                        }
-                    }
-                }
-
-                (item.global_id, (id_deps, (item, ts, diff)))
-            })
-            .collect::<BTreeMap<_, _>>();
-        let mut visited = BTreeSet::new();
-        let mut sorted = Vec::new();
-        fn dfs(
-            id: GlobalId,
-            visited: &mut BTreeSet<GlobalId>,
-            sorted: &mut Vec<GlobalId>,
-            items_with_dependencies: &BTreeMap<
-                GlobalId,
-                (
-                    BTreeSet<GlobalId>,
-                    (mz_catalog::durable::Item, Timestamp, StateDiff),
-                ),
-            >,
-        ) {
-            visited.insert(id);
-            let deps = items_with_dependencies
-                .get(&id)
-                .map(|(deps, _)| deps)
-                .expect("item should be in the map");
-            for dep in deps {
-                // We only want to visit dependencies that are in the current set of updates.
-                if !visited.contains(dep) && items_with_dependencies.contains_key(dep) {
-                    dfs(*dep, visited, sorted, items_with_dependencies);
-                }
+        // Partition items into groups s.t. each item in one group has a predefined order with all
+        // items in other groups. For example, all sinks are ordered greater than all tables.
+        let mut types = Vec::new();
+        let mut secrets = Vec::new();
+        let mut connections = Vec::new();
+        let mut sources = Vec::new();
+        let mut tables = Vec::new();
+        let mut derived_items = Vec::new();
+        let mut sinks = Vec::new();
+        let mut continual_tasks = Vec::new();
+
+        for update in item_updates {
+            match update.0.item_type() {
+                CatalogItemType::Type => types.push(update),
+                CatalogItemType::Secret => secrets.push(update),
+                CatalogItemType::Connection => connections.push(update),
+                CatalogItemType::Source => sources.push(update),
+                CatalogItemType::Table => tables.push(update),
+                CatalogItemType::View
+                | CatalogItemType::MaterializedView
+                | CatalogItemType::Index
+                | CatalogItemType::Func => derived_items.push(update),
+                CatalogItemType::Sink => sinks.push(update),
+                CatalogItemType::ContinualTask => continual_tasks.push(update),
             }
-            sorted.push(id);
         }
-        for id in items_with_dependencies.keys() {
-            if !visited.contains(id) {
-                dfs(*id, &mut visited, &mut sorted, &items_with_dependencies);
-            }
+
+        // Within each group, sort by ID.
+        for group in [
+            &mut types,
+            &mut secrets,
+            &mut connections,
+            &mut sources,
+            &mut tables,
+            &mut derived_items,
+            &mut sinks,
+            &mut continual_tasks,
+        ] {
+            group.sort_by_key(|(item, _, _)| item.id);
         }
-        // return the values from items_with_dependencies in the order of sorted
-        sorted
-            .into_iter()
-            .filter_map(|id| items_with_dependencies.remove(&id))
-            .map(|item| item.1)
+
+        iter::empty()
+            .chain(types)
+            .chain(secrets)
+            .chain(connections)
+            .chain(sources)
+            .chain(tables)
+            .chain(derived_items)
+            .chain(sinks)
+            .chain(continual_tasks)
             .collect()
     }
 
-    let item_retractions = sort_item_updates(item_retractions, state);
-    let item_additions = sort_item_updates(item_additions, state);
+    let item_retractions = sort_item_updates(item_retractions);
+    let item_additions = sort_item_updates(item_additions);
 
     /// Sort temporary item updates by GlobalId.
     fn sort_temp_item_updates(
         temp_item_updates: Vec<(TemporaryItem, Timestamp, StateDiff)>,
     ) -> VecDeque<(TemporaryItem, Timestamp, StateDiff)> {
-        temp_item_updates
-            .into_iter()
-            // HACK: due to `ALTER SINK`, sinks can appear before the objects they
-            // depend upon. Fortunately, because sinks can never have dependencies
-            // and can never depend upon one another, to fix the topological sort,
-            // we can just always move sinks to the end.
-            .sorted_by_key(|(item, _ts, _diff)| match item.item.typ() {
-                CatalogItemType::Sink => CatalogItemId::User(u64::MAX),
-                _ => item.id,
-            })
+        // Partition items into groups s.t. each item in one group has a predefined order with all
+        // items in other groups. For example, all sinks are ordered greater than all tables.
+        let mut types = Vec::new();
+        let mut secrets = Vec::new();
+        let mut connections = Vec::new();
+        let mut sources = Vec::new();
+        let mut tables = Vec::new();
+        let mut derived_items = Vec::new();
+        let mut sinks = Vec::new();
+        let mut continual_tasks = Vec::new();
+
+        for update in temp_item_updates {
+            match update.0.item.typ() {
+                CatalogItemType::Type => types.push(update),
+                CatalogItemType::Secret => secrets.push(update),
+                CatalogItemType::Connection => connections.push(update),
+                CatalogItemType::Source => sources.push(update),
+                CatalogItemType::Table => tables.push(update),
+                CatalogItemType::View
+                | CatalogItemType::MaterializedView
+                | CatalogItemType::Index
+                | CatalogItemType::Func => derived_items.push(update),
+                CatalogItemType::Sink => sinks.push(update),
+                CatalogItemType::ContinualTask => continual_tasks.push(update),
+            }
+        }
+
+        // Within each group, sort by ID.
+        for group in [
+            &mut types,
+            &mut secrets,
+            &mut connections,
+            &mut sources,
+            &mut tables,
+            &mut derived_items,
+            &mut sinks,
+            &mut continual_tasks,
+        ] {
+            group.sort_by_key(|(item, _, _)| item.id);
+        }
+
+        iter::empty()
+            .chain(types)
+            .chain(secrets)
+            .chain(connections)
+            .chain(sources)
+            .chain(tables)
+            .chain(derived_items)
+            .chain(sinks)
+            .chain(continual_tasks)
             .collect()
     }
     let temp_item_retractions = sort_temp_item_updates(temp_item_retractions);
diff --git a/src/catalog/src/durable/objects.rs b/src/catalog/src/durable/objects.rs
index e6bb869ce84fb..1d0002461d61f 100644
--- a/src/catalog/src/durable/objects.rs
+++ b/src/catalog/src/durable/objects.rs
@@ -509,6 +509,12 @@ pub struct Item {
     pub extra_versions: BTreeMap<RelationVersion, GlobalId>,
 }
 
+impl Item {
+    pub fn item_type(&self) -> CatalogItemType {
+        item_type(&self.create_sql)
+    }
+}
+
 impl DurableType for Item {
     type Key = ItemKey;
     type Value = ItemValue;
@@ -1289,32 +1295,36 @@ pub struct ItemValue {
 }
 
 impl ItemValue {
-    pub(crate) fn item_type(&self) -> CatalogItemType {
-        // NOTE(benesch): the implementation of this method is hideous, but is
-        // there a better alternative? Storing the object type alongside the
-        // `create_sql` would introduce the possibility of skew.
-        let mut tokens = self.create_sql.split_whitespace();
-        assert_eq!(tokens.next(), Some("CREATE"));
-        match tokens.next() {
-            Some("TABLE") => CatalogItemType::Table,
-            Some("SOURCE") | Some("SUBSOURCE") => CatalogItemType::Source,
-            Some("SINK") => CatalogItemType::Sink,
-            Some("VIEW") => CatalogItemType::View,
-            Some("MATERIALIZED") => {
-                assert_eq!(tokens.next(), Some("VIEW"));
-                CatalogItemType::MaterializedView
-            }
-            Some("CONTINUAL") => {
-                assert_eq!(tokens.next(), Some("TASK"));
-                CatalogItemType::ContinualTask
-            }
-            Some("INDEX") => CatalogItemType::Index,
-            Some("TYPE") => CatalogItemType::Type,
-            Some("FUNCTION") => CatalogItemType::Func,
-            Some("SECRET") => CatalogItemType::Secret,
-            Some("CONNECTION") => CatalogItemType::Connection,
-            _ => panic!("unexpected create sql: {}", self.create_sql),
+    pub fn item_type(&self) -> CatalogItemType {
+        item_type(&self.create_sql)
+    }
+}
+
+fn item_type(create_sql: &str) -> CatalogItemType {
+    // NOTE(benesch): the implementation of this method is hideous, but is
+    // there a better alternative? Storing the object type alongside the
+    // `create_sql` would introduce the possibility of skew.
+    let mut tokens = create_sql.split_whitespace();
+    assert_eq!(tokens.next(), Some("CREATE"));
+    match tokens.next() {
+        Some("TABLE") => CatalogItemType::Table,
+        Some("SOURCE") | Some("SUBSOURCE") => CatalogItemType::Source,
+        Some("SINK") => CatalogItemType::Sink,
+        Some("VIEW") => CatalogItemType::View,
+        Some("MATERIALIZED") => {
+            assert_eq!(tokens.next(), Some("VIEW"));
+            CatalogItemType::MaterializedView
+        }
+        Some("CONTINUAL") => {
+            assert_eq!(tokens.next(), Some("TASK"));
+            CatalogItemType::ContinualTask
         }
+        Some("INDEX") => CatalogItemType::Index,
+        Some("TYPE") => CatalogItemType::Type,
+        Some("FUNCTION") => CatalogItemType::Func,
+        Some("SECRET") => CatalogItemType::Secret,
+        Some("CONNECTION") => CatalogItemType::Connection,
+        _ => panic!("unexpected create sql: {}", create_sql),
     }
 }
 
diff --git a/src/sql/src/names.rs b/src/sql/src/names.rs
index 2221d9257dd05..2eb24322e83b1 100644
--- a/src/sql/src/names.rs
+++ b/src/sql/src/names.rs
@@ -2387,41 +2387,6 @@ where
     ResolvedIds::new(visitor.ids)
 }
 
-#[derive(Debug)]
-struct RawItemDependencyIds<'a> {
-    ids: BTreeSet<GlobalId>,
-    names: BTreeSet<&'a UnresolvedItemName>,
-}
-
-impl<'ast> Visit<'ast, Raw> for RawItemDependencyIds<'ast> {
-    fn visit_item_name(&mut self, item_name: &'ast RawItemName) {
-        match item_name {
-            RawItemName::Name(name) => {
-                self.names.insert(name);
-            }
-            RawItemName::Id(id, _, _) => {
-                let parsed_id = id.parse::<GlobalId>().unwrap();
-                self.ids.insert(parsed_id);
-            }
-        }
-    }
-}
-
-/// Collect any dependencies of the provided raw AST node.
-pub fn raw_item_dependency_ids<'ast, N>(
-    node: &'ast N,
-) -> (BTreeSet<GlobalId>, BTreeSet<&'ast UnresolvedItemName>)
-where
-    N: VisitNode<'ast, Raw>,
-{
-    let mut deps = RawItemDependencyIds {
-        ids: BTreeSet::new(),
-        names: BTreeSet::new(),
-    };
-    node.visit(&mut deps);
-    (deps.ids, deps.names)
-}
-
 #[derive(Debug)]
 pub struct ItemDependencyModifier<'a> {
     pub modified: bool,

From ec3842d02fd07a5d21c74eb02740dc301211a1c9 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow <koshy44@gmail.com>
Date: Tue, 17 Dec 2024 12:12:01 -0500
Subject: [PATCH 34/52] Experiment for migrate

---
 src/adapter/src/catalog/open.rs | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/adapter/src/catalog/open.rs b/src/adapter/src/catalog/open.rs
index 926cebb6bd709..a8493292c1528 100644
--- a/src/adapter/src/catalog/open.rs
+++ b/src/adapter/src/catalog/open.rs
@@ -516,6 +516,11 @@ impl Catalog {
                 // Include any post-item-updates generated by migrations, and then consolidate
                 // them to ensure diffs are all positive.
                 post_item_updates.extend(migrate_result.post_item_updates);
+                if let Some(max_ts) = post_item_updates.iter().map(|(_, ts, _)| ts).max().cloned() {
+                    for (_, ts, _) in &mut post_item_updates {
+                        *ts = max_ts;
+                    }
+                }
                 differential_dataflow::consolidation::consolidate_updates(&mut post_item_updates);
             }
 

From 622e63ece42251e3212fc42250a59cee01232411 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow <koshy44@gmail.com>
Date: Wed, 18 Dec 2024 11:14:41 -0500
Subject: [PATCH 35/52] Fix migration idempotency

---
 src/adapter/src/catalog/migrate.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/adapter/src/catalog/migrate.rs b/src/adapter/src/catalog/migrate.rs
index bfadeaa3cd2bb..f03611ddc90ef 100644
--- a/src/adapter/src/catalog/migrate.rs
+++ b/src/adapter/src/catalog/migrate.rs
@@ -529,7 +529,7 @@ fn ast_rewrite_sources_to_tables(
                 let tables_for_source =
                     items_with_statements_copied
                         .iter()
-                        .any(|(item, statement)| match statement {
+                        .any(|(_, statement)| match statement {
                             Statement::CreateTableFromSource(stmt) => {
                                 let source: GlobalId = match &stmt.source {
                                     RawItemName::Name(_) => {

From 5a1135a5685f7d33dcc38629add495fa7c33a6dd Mon Sep 17 00:00:00 2001
From: Joseph Koshakow <koshy44@gmail.com>
Date: Wed, 18 Dec 2024 11:24:38 -0500
Subject: [PATCH 36/52] Fixup

---
 src/adapter/src/catalog/apply.rs | 16 ++++++++++++----
 src/adapter/src/catalog/open.rs  |  1 +
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/src/adapter/src/catalog/apply.rs b/src/adapter/src/catalog/apply.rs
index 7be37cb11650a..cd6f3fea66a74 100644
--- a/src/adapter/src/catalog/apply.rs
+++ b/src/adapter/src/catalog/apply.rs
@@ -1892,6 +1892,8 @@ fn sort_updates_inner(updates: Vec<StateUpdate>) -> Vec<StateUpdate> {
         // Partition items into groups s.t. each item in one group has a predefined order with all
         // items in other groups. For example, all sinks are ordered greater than all tables.
         let mut types = Vec::new();
+        // N.B. Functions can depend on system tables, but not user tables.
+        let mut funcs = Vec::new();
         let mut secrets = Vec::new();
         let mut connections = Vec::new();
         let mut sources = Vec::new();
@@ -1903,14 +1905,14 @@ fn sort_updates_inner(updates: Vec<StateUpdate>) -> Vec<StateUpdate> {
         for update in item_updates {
             match update.0.item_type() {
                 CatalogItemType::Type => types.push(update),
+                CatalogItemType::Func => funcs.push(update),
                 CatalogItemType::Secret => secrets.push(update),
                 CatalogItemType::Connection => connections.push(update),
                 CatalogItemType::Source => sources.push(update),
                 CatalogItemType::Table => tables.push(update),
                 CatalogItemType::View
                 | CatalogItemType::MaterializedView
-                | CatalogItemType::Index
-                | CatalogItemType::Func => derived_items.push(update),
+                | CatalogItemType::Index => derived_items.push(update),
                 CatalogItemType::Sink => sinks.push(update),
                 CatalogItemType::ContinualTask => continual_tasks.push(update),
             }
@@ -1919,6 +1921,7 @@ fn sort_updates_inner(updates: Vec<StateUpdate>) -> Vec<StateUpdate> {
         // Within each group, sort by ID.
         for group in [
             &mut types,
+            &mut funcs,
             &mut secrets,
             &mut connections,
             &mut sources,
@@ -1932,6 +1935,7 @@ fn sort_updates_inner(updates: Vec<StateUpdate>) -> Vec<StateUpdate> {
 
         iter::empty()
             .chain(types)
+            .chain(funcs)
             .chain(secrets)
             .chain(connections)
             .chain(sources)
@@ -1952,6 +1956,8 @@ fn sort_updates_inner(updates: Vec<StateUpdate>) -> Vec<StateUpdate> {
         // Partition items into groups s.t. each item in one group has a predefined order with all
         // items in other groups. For example, all sinks are ordered greater than all tables.
         let mut types = Vec::new();
+        // N.B. Functions can depend on system tables, but not user tables.
+        let mut funcs = Vec::new();
         let mut secrets = Vec::new();
         let mut connections = Vec::new();
         let mut sources = Vec::new();
@@ -1963,14 +1969,14 @@ fn sort_updates_inner(updates: Vec<StateUpdate>) -> Vec<StateUpdate> {
         for update in temp_item_updates {
             match update.0.item.typ() {
                 CatalogItemType::Type => types.push(update),
+                CatalogItemType::Func => funcs.push(update),
                 CatalogItemType::Secret => secrets.push(update),
                 CatalogItemType::Connection => connections.push(update),
                 CatalogItemType::Source => sources.push(update),
                 CatalogItemType::Table => tables.push(update),
                 CatalogItemType::View
                 | CatalogItemType::MaterializedView
-                | CatalogItemType::Index
-                | CatalogItemType::Func => derived_items.push(update),
+                | CatalogItemType::Index => derived_items.push(update),
                 CatalogItemType::Sink => sinks.push(update),
                 CatalogItemType::ContinualTask => continual_tasks.push(update),
             }
@@ -1979,6 +1985,7 @@ fn sort_updates_inner(updates: Vec<StateUpdate>) -> Vec<StateUpdate> {
         // Within each group, sort by ID.
         for group in [
             &mut types,
+            &mut funcs,
             &mut secrets,
             &mut connections,
             &mut sources,
@@ -1992,6 +1999,7 @@ fn sort_updates_inner(updates: Vec<StateUpdate>) -> Vec<StateUpdate> {
 
         iter::empty()
             .chain(types)
+            .chain(funcs)
             .chain(secrets)
             .chain(connections)
             .chain(sources)
diff --git a/src/adapter/src/catalog/open.rs b/src/adapter/src/catalog/open.rs
index a8493292c1528..7a2e0c08c3a1e 100644
--- a/src/adapter/src/catalog/open.rs
+++ b/src/adapter/src/catalog/open.rs
@@ -516,6 +516,7 @@ impl Catalog {
                 // Include any post-item-updates generated by migrations, and then consolidate
                 // them to ensure diffs are all positive.
                 post_item_updates.extend(migrate_result.post_item_updates);
+                // Push everything to the same timestamp so it consolidates cleanly.
                 if let Some(max_ts) = post_item_updates.iter().map(|(_, ts, _)| ts).max().cloned() {
                     for (_, ts, _) in &mut post_item_updates {
                         *ts = max_ts;

From a79183a2f535eaaecadf01873e6df2d879a8cb7b Mon Sep 17 00:00:00 2001
From: Joseph Koshakow <koshy44@gmail.com>
Date: Thu, 2 Jan 2025 13:09:23 -0500
Subject: [PATCH 37/52] resolve merge conflicts

---
 src/adapter/src/catalog/migrate.rs | 35 +++++-------------------------
 1 file changed, 6 insertions(+), 29 deletions(-)

diff --git a/src/adapter/src/catalog/migrate.rs b/src/adapter/src/catalog/migrate.rs
index f03611ddc90ef..4c3e2668b9b4f 100644
--- a/src/adapter/src/catalog/migrate.rs
+++ b/src/adapter/src/catalog/migrate.rs
@@ -10,8 +10,8 @@
 use std::collections::BTreeMap;
 
 use mz_catalog::builtin::BuiltinTable;
-use mz_catalog::durable::{Transaction, Item};
-use mz_catalog::memory::objects::{StateUpdate,BootstrapStateUpdateKind};
+use mz_catalog::durable::Transaction;
+use mz_catalog::memory::objects::{BootstrapStateUpdateKind, StateUpdate};
 use mz_ore::collections::CollectionExt;
 use mz_ore::now::NowFn;
 use mz_repr::{CatalogItemId, GlobalId, Timestamp};
@@ -237,8 +237,8 @@ fn ast_rewrite_sources_to_tables(
     use mz_persist_types::ShardId;
     use mz_proto::RustType;
     use mz_sql::ast::{
-        CreateSourceConnection, CreateSourceOptionName, CreateSourceStatement,
-        CreateSubsourceOptionName, CreateSubsourceStatement, CreateTableFromSourceStatement, Ident,
+        CreateSourceConnection, CreateSourceStatement, CreateSubsourceOptionName,
+        CreateSubsourceStatement, CreateTableFromSourceStatement, Ident,
         KafkaSourceConfigOptionName, LoadGenerator, MySqlConfigOptionName, PgConfigOptionName,
         RawItemName, TableFromSourceColumns, TableFromSourceOption, TableFromSourceOptionName,
         UnresolvedItemName, Value, WithOptionValue,
@@ -515,7 +515,7 @@ fn ast_rewrite_sources_to_tables(
                 include_metadata,
                 format,
                 envelope,
-                mut with_options,
+                with_options,
                 if_not_exists,
                 in_cluster,
                 progress_subsource,
@@ -607,36 +607,13 @@ fn ast_rewrite_sources_to_tables(
                     }
                     _ => unreachable!("match determined above"),
                 };
-                let mut table_with_options = vec![TableFromSourceOption {
+                let table_with_options = vec![TableFromSourceOption {
                     name: TableFromSourceOptionName::Details,
                     value: Some(WithOptionValue::Value(Value::String(hex::encode(
                         details.into_proto().encode_to_vec(),
                     )))),
                 }];
 
-                // Move over the IgnoreKeys option if it exists.
-                if let Some(i) = with_options
-                    .iter()
-                    .position(|opt| opt.name == CreateSourceOptionName::IgnoreKeys)
-                {
-                    let option = with_options.remove(i);
-                    table_with_options.push(TableFromSourceOption {
-                        name: TableFromSourceOptionName::IgnoreKeys,
-                        value: option.value,
-                    });
-                };
-                // Move over the Timeline option if it exists.
-                if let Some(i) = with_options
-                    .iter()
-                    .position(|opt| opt.name == CreateSourceOptionName::Timeline)
-                {
-                    let option = with_options.remove(i);
-                    table_with_options.push(TableFromSourceOption {
-                        name: TableFromSourceOptionName::Timeline,
-                        value: option.value,
-                    });
-                }
-
                 // Generate the same external-reference that would have been generated
                 // during purification for single-output sources.
                 let external_reference = match &conn {

From a81d461abb9e7adea6aaa6fcc2e84bde4bb03528 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow <koshy44@gmail.com>
Date: Tue, 14 Jan 2025 09:53:59 -0500
Subject: [PATCH 38/52] Update versions

---
 misc/python/materialize/mzcompose/__init__.py     | 2 +-
 misc/python/materialize/source_table_migration.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/misc/python/materialize/mzcompose/__init__.py b/misc/python/materialize/mzcompose/__init__.py
index c54d577571f08..bb4fe0440da38 100644
--- a/misc/python/materialize/mzcompose/__init__.py
+++ b/misc/python/materialize/mzcompose/__init__.py
@@ -91,7 +91,7 @@ def get_default_system_parameters(
         "enable_0dt_deployment": "true" if zero_downtime else "false",
         "enable_0dt_deployment_panic_after_timeout": "true",
         "enable_0dt_deployment_sources": (
-            "true" if version >= MzVersion.parse_mz("v0.125.0-dev") else "false"
+            "true" if version >= MzVersion.parse_mz("v0.130.0-dev") else "false"
         ),
         "enable_alter_swap": "true",
         "enable_columnation_lgalloc": "true",
diff --git a/misc/python/materialize/source_table_migration.py b/misc/python/materialize/source_table_migration.py
index 0f56806d2d9ba..45add7e76fd73 100644
--- a/misc/python/materialize/source_table_migration.py
+++ b/misc/python/materialize/source_table_migration.py
@@ -59,12 +59,12 @@ def _verify_source(
 
 def check_source_table_migration_test_sensible() -> None:
     assert MzVersion.parse_cargo() < MzVersion.parse_mz(
-        "v0.133.0"
+        "v0.137.0"
     ), "migration test probably no longer needed"
 
 
 def get_old_image_for_source_table_migration_test() -> str:
-    return "materialize/materialized:v0.125.0"
+    return "materialize/materialized:v0.129.0"
 
 
 def get_new_image_for_source_table_migration_test() -> str | None:

From 67aec327d44d8d4a93d31daee1216ed7db4dadab Mon Sep 17 00:00:00 2001
From: Joseph Koshakow <koshy44@gmail.com>
Date: Thu, 16 Jan 2025 13:35:57 -0500
Subject: [PATCH 39/52] Fix more merge skew

---
 Cargo.lock                                    |  3 +++
 misc/python/materialize/mzcompose/__init__.py |  2 +-
 .../materialize/source_table_migration.py     |  4 +--
 src/adapter/BUILD.bazel                       |  6 +++++
 src/adapter/Cargo.toml                        |  3 +++
 src/adapter/src/catalog/apply.rs              |  5 ++--
 src/adapter/src/catalog/migrate.rs            | 27 ++++++++++---------
 src/sql/src/names.rs                          |  6 ++---
 test/mysql-cdc-old-syntax/mzcompose.py        |  6 ++---
 test/pg-cdc-old-syntax/mzcompose.py           |  6 ++---
 .../mzcompose.py                              |  6 ++---
 11 files changed, 43 insertions(+), 31 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 1e3c27b91b8f0..cb41f3cca09d9 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4583,6 +4583,7 @@ dependencies = [
  "fail",
  "futures",
  "governor",
+ "hex",
  "http 1.1.0",
  "ipnet",
  "itertools 0.12.1",
@@ -4611,6 +4612,7 @@ dependencies = [
  "mz-pgrepr",
  "mz-pgwire-common",
  "mz-postgres-util",
+ "mz-proto",
  "mz-repr",
  "mz-rocksdb-types",
  "mz-secrets",
@@ -4626,6 +4628,7 @@ dependencies = [
  "mz-transform",
  "opentelemetry",
  "prometheus",
+ "prost",
  "qcell",
  "rand 0.8.5",
  "rand_chacha 0.3.0",
diff --git a/misc/python/materialize/mzcompose/__init__.py b/misc/python/materialize/mzcompose/__init__.py
index bb4fe0440da38..bc66525fcdd7c 100644
--- a/misc/python/materialize/mzcompose/__init__.py
+++ b/misc/python/materialize/mzcompose/__init__.py
@@ -91,7 +91,7 @@ def get_default_system_parameters(
         "enable_0dt_deployment": "true" if zero_downtime else "false",
         "enable_0dt_deployment_panic_after_timeout": "true",
         "enable_0dt_deployment_sources": (
-            "true" if version >= MzVersion.parse_mz("v0.130.0-dev") else "false"
+            "true" if version >= MzVersion.parse_mz("v0.132.0-dev") else "false"
         ),
         "enable_alter_swap": "true",
         "enable_columnation_lgalloc": "true",
diff --git a/misc/python/materialize/source_table_migration.py b/misc/python/materialize/source_table_migration.py
index 45add7e76fd73..e717dc0aedb2b 100644
--- a/misc/python/materialize/source_table_migration.py
+++ b/misc/python/materialize/source_table_migration.py
@@ -59,12 +59,12 @@ def _verify_source(
 
 def check_source_table_migration_test_sensible() -> None:
     assert MzVersion.parse_cargo() < MzVersion.parse_mz(
-        "v0.137.0"
+        "v0.139.0"
     ), "migration test probably no longer needed"
 
 
 def get_old_image_for_source_table_migration_test() -> str:
-    return "materialize/materialized:v0.129.0"
+    return "materialize/materialized:v0.131.0"
 
 
 def get_new_image_for_source_table_migration_test() -> str | None:
diff --git a/src/adapter/BUILD.bazel b/src/adapter/BUILD.bazel
index 65920e5ef9811..f833c66a2fd83 100644
--- a/src/adapter/BUILD.bazel
+++ b/src/adapter/BUILD.bazel
@@ -53,6 +53,7 @@ rust_library(
         "//src/pgrepr:mz_pgrepr",
         "//src/pgwire-common:mz_pgwire_common",
         "//src/postgres-util:mz_postgres_util",
+        "//src/proto:mz_proto",
         "//src/repr:mz_repr",
         "//src/rocksdb-types:mz_rocksdb_types",
         "//src/secrets:mz_secrets",
@@ -119,6 +120,7 @@ rust_test(
         "//src/pgrepr:mz_pgrepr",
         "//src/pgwire-common:mz_pgwire_common",
         "//src/postgres-util:mz_postgres_util",
+        "//src/proto:mz_proto",
         "//src/repr:mz_repr",
         "//src/rocksdb-types:mz_rocksdb_types",
         "//src/secrets:mz_secrets",
@@ -165,6 +167,7 @@ rust_doc_test(
         "//src/pgrepr:mz_pgrepr",
         "//src/pgwire-common:mz_pgwire_common",
         "//src/postgres-util:mz_postgres_util",
+        "//src/proto:mz_proto",
         "//src/repr:mz_repr",
         "//src/rocksdb-types:mz_rocksdb_types",
         "//src/secrets:mz_secrets",
@@ -231,6 +234,7 @@ rust_test(
         "//src/pgrepr:mz_pgrepr",
         "//src/pgwire-common:mz_pgwire_common",
         "//src/postgres-util:mz_postgres_util",
+        "//src/proto:mz_proto",
         "//src/repr:mz_repr",
         "//src/rocksdb-types:mz_rocksdb_types",
         "//src/secrets:mz_secrets",
@@ -297,6 +301,7 @@ rust_test(
         "//src/pgrepr:mz_pgrepr",
         "//src/pgwire-common:mz_pgwire_common",
         "//src/postgres-util:mz_postgres_util",
+        "//src/proto:mz_proto",
         "//src/repr:mz_repr",
         "//src/rocksdb-types:mz_rocksdb_types",
         "//src/secrets:mz_secrets",
@@ -363,6 +368,7 @@ rust_test(
         "//src/pgrepr:mz_pgrepr",
         "//src/pgwire-common:mz_pgwire_common",
         "//src/postgres-util:mz_postgres_util",
+        "//src/proto:mz_proto",
         "//src/repr:mz_repr",
         "//src/rocksdb-types:mz_rocksdb_types",
         "//src/secrets:mz_secrets",
diff --git a/src/adapter/Cargo.toml b/src/adapter/Cargo.toml
index a4298b2fe1703..d84638fa3f525 100644
--- a/src/adapter/Cargo.toml
+++ b/src/adapter/Cargo.toml
@@ -23,6 +23,7 @@ enum-kinds = "0.5.1"
 fail = { version = "0.5.1", features = ["failpoints"] }
 futures = "0.3.25"
 governor = "0.6.0"
+hex = "0.4.3"
 http = "1.1.0"
 ipnet = "2.5.0"
 itertools = "0.12.1"
@@ -53,6 +54,7 @@ mz-pgcopy = { path = "../pgcopy" }
 mz-pgrepr = { path = "../pgrepr" }
 mz-pgwire-common = { path = "../pgwire-common" }
 mz-postgres-util = { path = "../postgres-util" }
+mz-proto = { path = "../proto" }
 mz-repr = { path = "../repr", features = ["tracing_"] }
 mz-rocksdb-types = { path = "../rocksdb-types" }
 mz-secrets = { path = "../secrets" }
@@ -68,6 +70,7 @@ mz-transform = { path = "../transform" }
 mz-timestamp-oracle = { path = "../timestamp-oracle" }
 opentelemetry = { version = "0.24.0", features = ["trace"] }
 prometheus = { version = "0.13.3", default-features = false }
+prost = { version = "0.13.2", features = ["no-recursion-limit"] }
 qcell = "0.5"
 rand = "0.8.5"
 rand_chacha = "0.3"
diff --git a/src/adapter/src/catalog/apply.rs b/src/adapter/src/catalog/apply.rs
index cd6f3fea66a74..5add00b0caf40 100644
--- a/src/adapter/src/catalog/apply.rs
+++ b/src/adapter/src/catalog/apply.rs
@@ -1884,8 +1884,7 @@ fn sort_updates_inner(updates: Vec<StateUpdate>) -> Vec<StateUpdate> {
         }
     }
 
-    /// Sort item updates by parsing statements to identify any id-based dependencies within
-    /// this set of updates and then performing a topological sort.
+    /// Sort item updates by dependency.
     fn sort_item_updates(
         item_updates: Vec<(mz_catalog::durable::Item, Timestamp, StateDiff)>,
     ) -> VecDeque<(mz_catalog::durable::Item, Timestamp, StateDiff)> {
@@ -1949,7 +1948,7 @@ fn sort_updates_inner(updates: Vec<StateUpdate>) -> Vec<StateUpdate> {
     let item_retractions = sort_item_updates(item_retractions);
     let item_additions = sort_item_updates(item_additions);
 
-    /// Sort temporary item updates by GlobalId.
+    /// Sort temporary item updates by dependency.
     fn sort_temp_item_updates(
         temp_item_updates: Vec<(TemporaryItem, Timestamp, StateDiff)>,
     ) -> VecDeque<(TemporaryItem, Timestamp, StateDiff)> {
diff --git a/src/adapter/src/catalog/migrate.rs b/src/adapter/src/catalog/migrate.rs
index 4c3e2668b9b4f..a4da5729d29f8 100644
--- a/src/adapter/src/catalog/migrate.rs
+++ b/src/adapter/src/catalog/migrate.rs
@@ -14,7 +14,7 @@ use mz_catalog::durable::Transaction;
 use mz_catalog::memory::objects::{BootstrapStateUpdateKind, StateUpdate};
 use mz_ore::collections::CollectionExt;
 use mz_ore::now::NowFn;
-use mz_repr::{CatalogItemId, GlobalId, Timestamp};
+use mz_repr::{CatalogItemId, Timestamp};
 use mz_sql::ast::display::AstDisplay;
 use mz_sql::names::FullItemName;
 use mz_sql_parser::ast::{Raw, Statement};
@@ -267,7 +267,7 @@ fn ast_rewrite_sources_to_tables(
             acc
         });
 
-    // Any GlobalId that should be changed to a new GlobalId in any statements that
+    // Any CatalogItemId that should be changed to a new CatalogItemId in any statements that
     // reference it. This is necessary for ensuring downstream statements (e.g.
     // mat views, indexes) that reference a single-output source (e.g. kafka)
     // will now reference the corresponding new table, with the same data, instead.
@@ -276,7 +276,7 @@ fn ast_rewrite_sources_to_tables(
     for (mut item, stmt) in items_with_statements {
         match stmt {
             // Migrate each `CREATE SUBSOURCE` statement to an equivalent
-            // `CREATE TABLE .. FROM SOURCE` statement.
+            // `CREATE TABLE ... FROM SOURCE` statement.
             Statement::CreateSubsource(CreateSubsourceStatement {
                 name,
                 columns,
@@ -294,7 +294,7 @@ fn ast_rewrite_sources_to_tables(
                 let source = match raw_source_name {
                     // Some legacy subsources have named-only references to their `of_source`
                     // so we ensure we always use an ID-based reference in the stored
-                    // `CREATE TABLE .. FROM SOURCE` statements.
+                    // `CREATE TABLE ... FROM SOURCE` statements.
                     RawItemName::Name(name) => {
                         // Convert the name reference to an ID reference.
                         let (source_item, _) = items_with_statements_copied
@@ -304,7 +304,7 @@ fn ast_rewrite_sources_to_tables(
                                 _ => false,
                             })
                             .expect("source must exist");
-                        RawItemName::Id(source_item.global_id.to_string(), name, None)
+                        RawItemName::Id(source_item.id.to_string(), name, None)
                     }
                     RawItemName::Id(..) => raw_source_name,
                 };
@@ -531,7 +531,7 @@ fn ast_rewrite_sources_to_tables(
                         .iter()
                         .any(|(_, statement)| match statement {
                             Statement::CreateTableFromSource(stmt) => {
-                                let source: GlobalId = match &stmt.source {
+                                let source: CatalogItemId = match &stmt.source {
                                     RawItemName::Name(_) => {
                                         unreachable!("tables store source as ID")
                                     }
@@ -539,7 +539,7 @@ fn ast_rewrite_sources_to_tables(
                                         source_id.parse().expect("valid id")
                                     }
                                 };
-                                source == item.global_id
+                                source == item.id
                             }
                             _ => false,
                         });
@@ -586,7 +586,7 @@ fn ast_rewrite_sources_to_tables(
 
                 // A reference to the source that will be included in the table statement
                 let source_ref =
-                    RawItemName::Id(item.global_id.to_string(), new_source_name.clone(), None);
+                    RawItemName::Id(item.id.to_string(), new_source_name.clone(), None);
 
                 let columns = if col_names.is_empty() {
                     TableFromSourceColumns::NotSpecified
@@ -676,7 +676,8 @@ fn ast_rewrite_sources_to_tables(
                     progress_subsource,
                 };
 
-                let source_id = item.global_id;
+                let source_id = item.id;
+                let source_global_id = item.global_id;
                 let schema_id = item.schema_id.clone();
                 let schema = tx.get_schema(&item.schema_id).expect("schema must exist");
 
@@ -712,13 +713,13 @@ fn ast_rewrite_sources_to_tables(
                 // external tools such as DBT and Terraform. We will insert a new shard for the source
                 // statement which will be automatically created after the migration is complete.
                 let new_source_shard = ShardId::new();
-                let (source_id, existing_source_shard) = tx
-                    .delete_collection_metadata(btreeset! {source_id})
+                let (source_global_id, existing_source_shard) = tx
+                    .delete_collection_metadata(btreeset! {source_global_id})
                     .pop()
                     .expect("shard should exist");
                 tx.insert_collection_metadata(btreemap! {
                     new_table_global_id => existing_source_shard,
-                    source_id => new_source_shard
+                    source_global_id => new_source_shard
                 })?;
 
                 add_to_audit_log(
@@ -741,7 +742,7 @@ fn ast_rewrite_sources_to_tables(
 
                 // We also need to update any other statements that reference the source to use the new
                 // table id/name instead.
-                changed_ids.insert(source_id, new_table_global_id);
+                changed_ids.insert(source_id, new_table_id);
             }
 
             // When we upgrade to > rust 1.81 we should use #[expect(unreachable_patterns)]
diff --git a/src/sql/src/names.rs b/src/sql/src/names.rs
index 2eb24322e83b1..1401f83872040 100644
--- a/src/sql/src/names.rs
+++ b/src/sql/src/names.rs
@@ -2390,13 +2390,13 @@ where
 #[derive(Debug)]
 pub struct ItemDependencyModifier<'a> {
     pub modified: bool,
-    pub id_map: &'a BTreeMap<GlobalId, GlobalId>,
+    pub id_map: &'a BTreeMap<CatalogItemId, CatalogItemId>,
 }
 
 impl<'ast, 'a> VisitMut<'ast, Raw> for ItemDependencyModifier<'a> {
     fn visit_item_name_mut(&mut self, item_name: &mut RawItemName) {
         if let RawItemName::Id(id, _, _) = item_name {
-            let parsed_id = id.parse::<GlobalId>().unwrap();
+            let parsed_id = id.parse::<CatalogItemId>().unwrap();
             if let Some(new_id) = self.id_map.get(&parsed_id) {
                 *id = new_id.to_string();
                 self.modified = true;
@@ -2411,7 +2411,7 @@ impl<'ast, 'a> VisitMut<'ast, Raw> for ItemDependencyModifier<'a> {
 /// ids refer to an item of the same name, whose id has changed).
 pub fn modify_dependency_item_ids<'ast, N>(
     node: &'ast mut N,
-    id_map: &BTreeMap<GlobalId, GlobalId>,
+    id_map: &BTreeMap<CatalogItemId, CatalogItemId>,
 ) -> bool
 where
     N: VisitMutNode<'ast, Raw>,
diff --git a/test/mysql-cdc-old-syntax/mzcompose.py b/test/mysql-cdc-old-syntax/mzcompose.py
index 308cd4a9c2856..e472f1162fb22 100644
--- a/test/mysql-cdc-old-syntax/mzcompose.py
+++ b/test/mysql-cdc-old-syntax/mzcompose.py
@@ -56,7 +56,7 @@ def create_mysql_replica(mysql_version: str) -> MySql:
 
 SERVICES = [
     Materialized(
-        external_minio=True,
+        external_blob_store=True,
         additional_system_parameter_defaults={
             "log_filter": "mz_storage::source::mysql=trace,info"
         },
@@ -318,7 +318,7 @@ def workflow_migration(c: Composition, parser: WorkflowArgumentParser) -> None:
             name="materialized",
             image=get_old_image_for_source_table_migration_test(),
             external_metadata_store=True,
-            external_minio=True,
+            external_blob_store=True,
             additional_system_parameter_defaults={
                 "log_filter": "mz_storage::source::mysql=trace,info"
             },
@@ -328,7 +328,7 @@ def workflow_migration(c: Composition, parser: WorkflowArgumentParser) -> None:
             name="materialized",
             image=get_new_image_for_source_table_migration_test(),
             external_metadata_store=True,
-            external_minio=True,
+            external_blob_store=True,
             additional_system_parameter_defaults={
                 "log_filter": "mz_storage::source::mysql=trace,info",
                 "force_source_table_syntax": "true",
diff --git a/test/pg-cdc-old-syntax/mzcompose.py b/test/pg-cdc-old-syntax/mzcompose.py
index 51ab5ed98d0e9..9f5ad1f7f7e9b 100644
--- a/test/pg-cdc-old-syntax/mzcompose.py
+++ b/test/pg-cdc-old-syntax/mzcompose.py
@@ -98,7 +98,7 @@ def create_postgres(
         additional_system_parameter_defaults={
             "log_filter": "mz_storage::source::postgres=trace,debug,info,warn,error"
         },
-        external_minio=True,
+        external_blob_store=True,
     ),
     Testdrive(),
     CockroachOrPostgresMetadata(),
@@ -402,7 +402,7 @@ def workflow_migration(c: Composition, parser: WorkflowArgumentParser) -> None:
             image=get_old_image_for_source_table_migration_test(),
             volumes_extra=["secrets:/share/secrets"],
             external_metadata_store=True,
-            external_minio=True,
+            external_blob_store=True,
             additional_system_parameter_defaults={
                 "log_filter": "mz_storage::source::postgres=trace,debug,info,warn,error"
             },
@@ -413,7 +413,7 @@ def workflow_migration(c: Composition, parser: WorkflowArgumentParser) -> None:
             image=get_new_image_for_source_table_migration_test(),
             volumes_extra=["secrets:/share/secrets"],
             external_metadata_store=True,
-            external_minio=True,
+            external_blob_store=True,
             additional_system_parameter_defaults={
                 "log_filter": "mz_storage::source::postgres=trace,debug,info,warn,error",
                 "force_source_table_syntax": "true",
diff --git a/test/testdrive-old-kafka-src-syntax/mzcompose.py b/test/testdrive-old-kafka-src-syntax/mzcompose.py
index 075ddd07ea6e5..c9a11b1462a51 100644
--- a/test/testdrive-old-kafka-src-syntax/mzcompose.py
+++ b/test/testdrive-old-kafka-src-syntax/mzcompose.py
@@ -331,7 +331,7 @@ def workflow_migration(c: Composition, parser: WorkflowArgumentParser) -> None:
         # validate_catalog_store=True,
         default_timeout=args.default_timeout,
         volumes_extra=["mzdata:/mzdata"],
-        external_minio=True,
+        external_blob_store=True,
         fivetran_destination=True,
         fivetran_destination_files_path="/share/tmp",
         entrypoint_extra=[f"--var=uses-redpanda={args.redpanda}"],
@@ -356,14 +356,14 @@ def workflow_migration(c: Composition, parser: WorkflowArgumentParser) -> None:
         default_size=Materialized.Size.DEFAULT_SIZE,
         image=get_old_image_for_source_table_migration_test(),
         external_metadata_store=True,
-        external_minio=True,
+        external_blob_store=True,
         additional_system_parameter_defaults=additional_system_parameter_defaults,
     )
     mz_new = Materialized(
         default_size=Materialized.Size.DEFAULT_SIZE,
         image=get_new_image_for_source_table_migration_test(),
         external_metadata_store=True,
-        external_minio=True,
+        external_blob_store=True,
         additional_system_parameter_defaults=additional_system_parameter_defaults,
     )
 

From a3f06d29deddcaa4f39affeadb2353fcd3849741 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow <koshy44@gmail.com>
Date: Tue, 28 Jan 2025 11:17:39 -0500
Subject: [PATCH 40/52] Remove prints

---
 misc/python/materialize/source_table_migration.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/misc/python/materialize/source_table_migration.py b/misc/python/materialize/source_table_migration.py
index e717dc0aedb2b..474665977698f 100644
--- a/misc/python/materialize/source_table_migration.py
+++ b/misc/python/materialize/source_table_migration.py
@@ -36,11 +36,9 @@ def _verify_source(
 
         # must not crash
         statement = f"SELECT count(*) FROM {source_name};"
-        print(statement)
         c.sql_query(statement)
 
         statement = f"SHOW CREATE SOURCE {source_name};"
-        print(statement)
         result = c.sql_query(statement)
         sql = result[0][1]
         assert "FOR TABLE" not in sql, f"FOR TABLE found in: {sql}"

From 7650c3997231a82e6b0bac2a473e8a9c9c2f3415 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow <koshy44@gmail.com>
Date: Tue, 28 Jan 2025 11:53:26 -0500
Subject: [PATCH 41/52] fixup

---
 src/adapter/src/catalog/apply.rs   | 17 +++++++++++++++++
 src/adapter/src/catalog/migrate.rs |  4 +---
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/src/adapter/src/catalog/apply.rs b/src/adapter/src/catalog/apply.rs
index 5add00b0caf40..521afcedb6525 100644
--- a/src/adapter/src/catalog/apply.rs
+++ b/src/adapter/src/catalog/apply.rs
@@ -1885,6 +1885,20 @@ fn sort_updates_inner(updates: Vec<StateUpdate>) -> Vec<StateUpdate> {
     }
 
     /// Sort item updates by dependency.
+    ///
+    /// First we group items into groups that are totally ordered by dependency. For example, when
+    /// sorting all items by dependency we know that all tables can come after all sources, because
+    /// a source can never depend on a table. Within these groups, the ID order matches the
+    /// dependency order.
+    ///
+    /// It used to be the case that the ID order of ALL items matched the dependency order. However,
+    /// certain migrations shuffled item IDs around s.t. this was no longer true. A much better
+    /// approach would be to investigate each item, discover their exact dependencies, and then
+    /// perform a topological sort. This is non-trivial because we only have the CREATE SQL of each
+    /// item here. Within the SQL the dependent items are sometimes referred to by ID and sometimes
+    /// referred to by name.
+    ///
+    /// The logic of this function should match [`sort_temp_item_updates`].
     fn sort_item_updates(
         item_updates: Vec<(mz_catalog::durable::Item, Timestamp, StateDiff)>,
     ) -> VecDeque<(mz_catalog::durable::Item, Timestamp, StateDiff)> {
@@ -1892,6 +1906,7 @@ fn sort_updates_inner(updates: Vec<StateUpdate>) -> Vec<StateUpdate> {
         // items in other groups. For example, all sinks are ordered greater than all tables.
         let mut types = Vec::new();
         // N.B. Functions can depend on system tables, but not user tables.
+        // TODO(udf): This will change when UDFs are supported.
         let mut funcs = Vec::new();
         let mut secrets = Vec::new();
         let mut connections = Vec::new();
@@ -1949,6 +1964,8 @@ fn sort_updates_inner(updates: Vec<StateUpdate>) -> Vec<StateUpdate> {
     let item_additions = sort_item_updates(item_additions);
 
     /// Sort temporary item updates by dependency.
+    ///
+    /// The logic of this function should match [`sort_item_updates`].
     fn sort_temp_item_updates(
         temp_item_updates: Vec<(TemporaryItem, Timestamp, StateDiff)>,
     ) -> VecDeque<(TemporaryItem, Timestamp, StateDiff)> {
diff --git a/src/adapter/src/catalog/migrate.rs b/src/adapter/src/catalog/migrate.rs
index a4da5729d29f8..e40cdc370531c 100644
--- a/src/adapter/src/catalog/migrate.rs
+++ b/src/adapter/src/catalog/migrate.rs
@@ -745,9 +745,7 @@ fn ast_rewrite_sources_to_tables(
                 changed_ids.insert(source_id, new_table_id);
             }
 
-            // When we upgrade to > rust 1.81 we should use #[expect(unreachable_patterns)]
-            // to enforce that we have covered all CreateSourceStatement variants.
-            #[allow(unreachable_patterns)]
+            #[expect(unreachable_patterns)]
             Statement::CreateSource(_) => {}
             _ => (),
         }

From e6c9316e67894c4e71d3efe5d75b397e85f83a9a Mon Sep 17 00:00:00 2001
From: Joseph Koshakow <koshy44@gmail.com>
Date: Tue, 28 Jan 2025 13:43:32 -0500
Subject: [PATCH 42/52] Fixup

---
 src/adapter/src/catalog/migrate.rs | 44 ++++++++++++++++--------------
 1 file changed, 24 insertions(+), 20 deletions(-)

diff --git a/src/adapter/src/catalog/migrate.rs b/src/adapter/src/catalog/migrate.rs
index e94951f3d8edf..c98bfa4d12efe 100644
--- a/src/adapter/src/catalog/migrate.rs
+++ b/src/adapter/src/catalog/migrate.rs
@@ -19,7 +19,7 @@ use mz_persist_types::ShardId;
 use mz_repr::{CatalogItemId, Timestamp};
 use mz_sql::ast::display::AstDisplay;
 use mz_sql::names::FullItemName;
-use mz_sql_parser::ast::{Raw, Statement};
+use mz_sql_parser::ast::{IdentError, Raw, Statement};
 use mz_storage_client::controller::StorageTxn;
 use semver::Version;
 use tracing::info;
@@ -560,32 +560,36 @@ fn ast_rewrite_sources_to_tables(
                 // queries that reference the source name, since they will now need to query the
                 // table instead.
 
+                assert_eq!(
+                    item.name,
+                    name.0
+                        .last()
+                        .expect("at least one ident")
+                        .to_ast_string_stable()
+                );
                 // First find an unused name within the same schema to avoid conflicts.
-                let mut new_source_item_name = format!("{}_source", item.name);
-                let mut new_source_name_inner =
-                    format!("{}_source", name.0.last().expect("at least one ident"));
-                let mut i = 0;
-                while item_names_per_schema
-                    .get(&item.schema_id)
-                    .expect("schema must exist")
-                    .contains(&new_source_item_name)
-                {
-                    new_source_item_name = format!("{}_source_{}", item.name, i);
-                    new_source_name_inner = format!(
-                        "{}_source_{}",
-                        name.0.last().expect("at least one ident"),
-                        i
-                    );
-                    i += 1;
-                }
+                let is_valid = |new_source_ident: &Ident| {
+                    if item_names_per_schema
+                        .get(&item.schema_id)
+                        .expect("schema must exist")
+                        .contains(&new_source_ident.to_ast_string_stable())
+                    {
+                        Ok::<_, IdentError>(false)
+                    } else {
+                        Ok(true)
+                    }
+                };
+                let new_source_ident =
+                    Ident::try_generate_name(item.name.clone(), "_source", is_valid)?;
+
                 // We will use the original item name for the new table item.
                 let table_item_name = item.name.clone();
 
                 // Update the source item/statement to use the new name.
                 let mut new_source_name = name.clone();
                 *new_source_name.0.last_mut().expect("at least one ident") =
-                    Ident::new_unchecked(new_source_name_inner);
-                item.name = new_source_item_name;
+                    new_source_ident.clone();
+                item.name = new_source_ident.to_ast_string_stable();
 
                 // A reference to the source that will be included in the table statement
                 let source_ref =

From 7acae30b14933c80d8033e9cc7c29acae4bbe5be Mon Sep 17 00:00:00 2001
From: Joseph Koshakow <koshy44@gmail.com>
Date: Tue, 28 Jan 2025 14:01:58 -0500
Subject: [PATCH 43/52] Fix migration test

---
 test/testdrive-old-kafka-src-syntax/mzcompose.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/testdrive-old-kafka-src-syntax/mzcompose.py b/test/testdrive-old-kafka-src-syntax/mzcompose.py
index c9a11b1462a51..38169c32a89df 100644
--- a/test/testdrive-old-kafka-src-syntax/mzcompose.py
+++ b/test/testdrive-old-kafka-src-syntax/mzcompose.py
@@ -328,7 +328,6 @@ def workflow_migration(c: Composition, parser: WorkflowArgumentParser) -> None:
         forward_buildkite_shard=True,
         kafka_default_partitions=args.kafka_default_partitions,
         aws_region=args.aws_region,
-        # validate_catalog_store=True,
         default_timeout=args.default_timeout,
         volumes_extra=["mzdata:/mzdata"],
         external_blob_store=True,
@@ -350,8 +349,6 @@ def workflow_migration(c: Composition, parser: WorkflowArgumentParser) -> None:
 
         additional_system_parameter_defaults[key] = val
 
-    additional_system_parameter_defaults["force_source_table_syntax"] = "true"
-
     mz_old = Materialized(
         default_size=Materialized.Size.DEFAULT_SIZE,
         image=get_old_image_for_source_table_migration_test(),
@@ -359,6 +356,9 @@ def workflow_migration(c: Composition, parser: WorkflowArgumentParser) -> None:
         external_blob_store=True,
         additional_system_parameter_defaults=additional_system_parameter_defaults,
     )
+
+    additional_system_parameter_defaults["force_source_table_syntax"] = "true"
+
     mz_new = Materialized(
         default_size=Materialized.Size.DEFAULT_SIZE,
         image=get_new_image_for_source_table_migration_test(),

From 84360a91b14a025f75a0fda6b6abf63682021d46 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow <koshy44@gmail.com>
Date: Tue, 28 Jan 2025 14:12:12 -0500
Subject: [PATCH 44/52] Update test/testdrive-old-kafka-src-syntax/mzcompose.py

Co-authored-by: Dennis Felsing <dennis@felsing.org>
---
 test/testdrive-old-kafka-src-syntax/mzcompose.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/testdrive-old-kafka-src-syntax/mzcompose.py b/test/testdrive-old-kafka-src-syntax/mzcompose.py
index 38169c32a89df..899a355644a99 100644
--- a/test/testdrive-old-kafka-src-syntax/mzcompose.py
+++ b/test/testdrive-old-kafka-src-syntax/mzcompose.py
@@ -354,7 +354,7 @@ def workflow_migration(c: Composition, parser: WorkflowArgumentParser) -> None:
         image=get_old_image_for_source_table_migration_test(),
         external_metadata_store=True,
         external_blob_store=True,
-        additional_system_parameter_defaults=additional_system_parameter_defaults,
+        additional_system_parameter_defaults=dict(additional_system_parameter_defaults),
     )
 
     additional_system_parameter_defaults["force_source_table_syntax"] = "true"

From 583408ad2ff85ee91a83187d1361fcae7b11e667 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow <koshy44@gmail.com>
Date: Tue, 28 Jan 2025 14:34:05 -0500
Subject: [PATCH 45/52] Fixup

---
 src/adapter/src/catalog/migrate.rs | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/src/adapter/src/catalog/migrate.rs b/src/adapter/src/catalog/migrate.rs
index c98bfa4d12efe..33823c4e7f816 100644
--- a/src/adapter/src/catalog/migrate.rs
+++ b/src/adapter/src/catalog/migrate.rs
@@ -562,17 +562,14 @@ fn ast_rewrite_sources_to_tables(
 
                 assert_eq!(
                     item.name,
-                    name.0
-                        .last()
-                        .expect("at least one ident")
-                        .to_ast_string_stable()
+                    name.0.last().expect("at least one ident").to_string()
                 );
                 // First find an unused name within the same schema to avoid conflicts.
                 let is_valid = |new_source_ident: &Ident| {
                     if item_names_per_schema
                         .get(&item.schema_id)
                         .expect("schema must exist")
-                        .contains(&new_source_ident.to_ast_string_stable())
+                        .contains(&new_source_ident.to_string())
                     {
                         Ok::<_, IdentError>(false)
                     } else {
@@ -589,7 +586,7 @@ fn ast_rewrite_sources_to_tables(
                 let mut new_source_name = name.clone();
                 *new_source_name.0.last_mut().expect("at least one ident") =
                     new_source_ident.clone();
-                item.name = new_source_ident.to_ast_string_stable();
+                item.name = new_source_ident.to_string();
 
                 // A reference to the source that will be included in the table statement
                 let source_ref =

From 6cdddc995ce4fe29cd64348ddd6de1b1c6da88ab Mon Sep 17 00:00:00 2001
From: Dennis Felsing <dennis@felsing.org>
Date: Wed, 29 Jan 2025 16:57:20 +0000
Subject: [PATCH 46/52] Fix and work around test failures

Based on https://buildkite.com/materialize/nightly/builds/10996
---
 ci/nightly/pipeline.template.yml              |  3 +--
 .../lint-main/checks/check-mzcompose-files.sh |  1 +
 test/legacy-upgrade/mzcompose.py              | 14 ++++++++++++++
 test/mysql-cdc-old-syntax/mzcompose.py        |  2 +-
 test/pg-cdc-old-syntax/mzcompose.py           |  2 +-
 .../avro-resolution-no-publish-writer.td      |  5 +++--
 .../mzcompose.py                              | 19 +++++--------------
 7 files changed, 26 insertions(+), 20 deletions(-)

diff --git a/ci/nightly/pipeline.template.yml b/ci/nightly/pipeline.template.yml
index 7498277861dc4..9b8bd6bbc3041 100644
--- a/ci/nightly/pipeline.template.yml
+++ b/ci/nightly/pipeline.template.yml
@@ -700,10 +700,9 @@ steps:
           queue: hetzner-aarch64-8cpu-16gb
 
       - id: testdrive-kafka-migration
-        label: "Testdrive %N migration tests"
+        label: "Testdrive (before Kafka source versioning) migration tests"
         depends_on: build-aarch64
         timeout_in_minutes: 180
-        parallelism: 8
         plugins:
           - ./ci/plugins/mzcompose:
               composition: testdrive-old-kafka-src-syntax
diff --git a/ci/test/lint-main/checks/check-mzcompose-files.sh b/ci/test/lint-main/checks/check-mzcompose-files.sh
index 0031f92c3dc3d..0ee2e916c074f 100755
--- a/ci/test/lint-main/checks/check-mzcompose-files.sh
+++ b/ci/test/lint-main/checks/check-mzcompose-files.sh
@@ -45,6 +45,7 @@ check_default_workflow_references_others() {
         -not -wholename "./test/canary-environment/mzcompose.py" `# Only run manually` \
         -not -wholename "./test/ssh-connection/mzcompose.py" `# Handled differently` \
         -not -wholename "./test/scalability/mzcompose.py" `# Other workflows are for manual usage` \
+        -not -wholename "./test/testdrive-old-kafka-src-syntax/mzcompose.py" `# Other workflow is run separately` \
     )
 
     for file in "${MZCOMPOSE_TEST_FILES[@]}"; do
diff --git a/test/legacy-upgrade/mzcompose.py b/test/legacy-upgrade/mzcompose.py
index 1da734a1fc9e9..9b4eee9404ca4 100644
--- a/test/legacy-upgrade/mzcompose.py
+++ b/test/legacy-upgrade/mzcompose.py
@@ -152,6 +152,19 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
             zero_downtime=True,
             force_source_table_syntax=False,
         )
+        if args.lts_upgrade:
+            # Direct upgrade from latest LTS version without any inbetween versions
+            version = LTS_VERSIONS[-1]
+            priors = [v for v in all_versions if v <= version]
+            test_upgrade_from_version(
+                c,
+                f"{version}",
+                priors,
+                filter=args.filter,
+                zero_downtime=False,
+                force_source_table_syntax=True,
+                lts_upgrade=True,
+            )
     if parallelism_count == 1 or parallelism_index == 1:
         test_upgrade_from_version(
             c,
@@ -179,6 +192,7 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
                 priors,
                 filter=args.filter,
                 zero_downtime=False,
+                force_source_table_syntax=False,
                 lts_upgrade=True,
             )
 
diff --git a/test/mysql-cdc-old-syntax/mzcompose.py b/test/mysql-cdc-old-syntax/mzcompose.py
index e472f1162fb22..c92ec8e0a1ebc 100644
--- a/test/mysql-cdc-old-syntax/mzcompose.py
+++ b/test/mysql-cdc-old-syntax/mzcompose.py
@@ -96,7 +96,7 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
         f"Workflows in shard with index {buildkite.get_parallelism_index()}: {sharded_workflows}"
     )
     for name in sharded_workflows:
-        if name == "default":
+        if name in ("default", "migration"):
             continue
 
         with c.test_case(name):
diff --git a/test/pg-cdc-old-syntax/mzcompose.py b/test/pg-cdc-old-syntax/mzcompose.py
index 9f5ad1f7f7e9b..43a8ea2933ccc 100644
--- a/test/pg-cdc-old-syntax/mzcompose.py
+++ b/test/pg-cdc-old-syntax/mzcompose.py
@@ -347,7 +347,7 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
         f"Workflows in shard with index {buildkite.get_parallelism_index()}: {sharded_workflows}"
     )
     for name in sharded_workflows:
-        if name == "default":
+        if name in ("default", "migration"):
             continue
 
         # TODO: Flaky, reenable when database-issues#7611 is fixed
diff --git a/test/testdrive-old-kafka-src-syntax/avro-resolution-no-publish-writer.td b/test/testdrive-old-kafka-src-syntax/avro-resolution-no-publish-writer.td
index e0bb6d626c03c..19ce5236fcf66 100644
--- a/test/testdrive-old-kafka-src-syntax/avro-resolution-no-publish-writer.td
+++ b/test/testdrive-old-kafka-src-syntax/avro-resolution-no-publish-writer.td
@@ -43,5 +43,6 @@ GRANT CREATE, USAGE ON SCHEMA public TO materialize
 $ kafka-ingest format=bytes topic=resolution-no-publish-writer timestamp=1
 \\x00\x00\x00\x00\x01\xf6\x01
 
-! SELECT * FROM resolution_no_publish_writer;
-contains:to resolve
+# TODO: Reenable when https://github.com/MaterializeInc/database-issues/issues/8933 is fixed
+# ! SELECT * FROM resolution_no_publish_writer;
+# contains:to resolve
diff --git a/test/testdrive-old-kafka-src-syntax/mzcompose.py b/test/testdrive-old-kafka-src-syntax/mzcompose.py
index 899a355644a99..a39c3bb16e35e 100644
--- a/test/testdrive-old-kafka-src-syntax/mzcompose.py
+++ b/test/testdrive-old-kafka-src-syntax/mzcompose.py
@@ -35,7 +35,6 @@
 from materialize.mzcompose.services.zookeeper import Zookeeper
 from materialize.source_table_migration import (
     get_new_image_for_source_table_migration_test,
-    get_old_image_for_source_table_migration_test,
     verify_sources_after_source_table_migration,
 )
 
@@ -56,18 +55,6 @@
 
 
 def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
-    for name in c.workflows:
-        if name == "default":
-            continue
-
-        if name == "migration":
-            continue
-
-        with c.test_case(name):
-            c.workflow(name)
-
-
-def workflow_kafka(c: Composition, parser: WorkflowArgumentParser) -> None:
     """Run testdrive."""
     parser.add_argument(
         "--redpanda",
@@ -351,13 +338,17 @@ def workflow_migration(c: Composition, parser: WorkflowArgumentParser) -> None:
 
     mz_old = Materialized(
         default_size=Materialized.Size.DEFAULT_SIZE,
-        image=get_old_image_for_source_table_migration_test(),
+        image=get_new_image_for_source_table_migration_test(),
         external_metadata_store=True,
         external_blob_store=True,
         additional_system_parameter_defaults=dict(additional_system_parameter_defaults),
     )
 
+    print(additional_system_parameter_defaults)
+    x = dict(additional_system_parameter_defaults)
     additional_system_parameter_defaults["force_source_table_syntax"] = "true"
+    print(additional_system_parameter_defaults)
+    print(x)
 
     mz_new = Materialized(
         default_size=Materialized.Size.DEFAULT_SIZE,

From 853f226993bb36fb0d01302b8f8e5caf67a15ea3 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow <koshy44@gmail.com>
Date: Wed, 29 Jan 2025 14:21:22 -0500
Subject: [PATCH 47/52] Try and fix test

---
 test/mysql-cdc-old-syntax/table-in-mysql-schema.td | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/test/mysql-cdc-old-syntax/table-in-mysql-schema.td b/test/mysql-cdc-old-syntax/table-in-mysql-schema.td
index 87490c15e78c6..24f1ec8836941 100644
--- a/test/mysql-cdc-old-syntax/table-in-mysql-schema.td
+++ b/test/mysql-cdc-old-syntax/table-in-mysql-schema.td
@@ -29,11 +29,13 @@ $ mysql-connect name=mysql url=mysql://root@mysql password=${arg.mysql-root-pass
 $ mysql-execute name=mysql
 DROP DATABASE IF EXISTS public;
 DROP DATABASE IF EXISTS another_schema;
+DROP DATABASE IF EXISTS other;
+DROP DATABASE IF EXISTS mysql;
+CREATE DATABASE mysql;
 USE mysql;
-
 # Insert data pre-snapshot
-CREATE TABLE t_in_mysql (f1 INT);
-INSERT INTO t_in_mysql VALUES (1), (2);
+CREATE TABLE mysql.t_in_mysql (f1 INT);
+INSERT INTO mysql.t_in_mysql VALUES (1), (2);
 
 ! CREATE SOURCE mz_source
   FROM MYSQL CONNECTION mysql_conn

From 7b93cb3909cc435ad0365193afcca480304cbaa9 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow <koshy44@gmail.com>
Date: Wed, 29 Jan 2025 14:35:50 -0500
Subject: [PATCH 48/52] Try and fix test

---
 test/mysql-cdc-old-syntax/table-in-mysql-schema.td | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/test/mysql-cdc-old-syntax/table-in-mysql-schema.td b/test/mysql-cdc-old-syntax/table-in-mysql-schema.td
index 24f1ec8836941..e7e992a9d4824 100644
--- a/test/mysql-cdc-old-syntax/table-in-mysql-schema.td
+++ b/test/mysql-cdc-old-syntax/table-in-mysql-schema.td
@@ -30,8 +30,7 @@ $ mysql-execute name=mysql
 DROP DATABASE IF EXISTS public;
 DROP DATABASE IF EXISTS another_schema;
 DROP DATABASE IF EXISTS other;
-DROP DATABASE IF EXISTS mysql;
-CREATE DATABASE mysql;
+CREATE DATABASE IF NOT EXISTS mysql;
 USE mysql;
 # Insert data pre-snapshot
 CREATE TABLE mysql.t_in_mysql (f1 INT);

From c033fe10cf2ef168cb90315bc0e78eccdf815ca0 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow <koshy44@gmail.com>
Date: Wed, 29 Jan 2025 15:25:49 -0500
Subject: [PATCH 49/52] Fix kafka tests

---
 .../testdrive-old-kafka-src-syntax/indexes.td | 107 +++++++++++++++++-
 1 file changed, 106 insertions(+), 1 deletion(-)

diff --git a/test/testdrive-old-kafka-src-syntax/indexes.td b/test/testdrive-old-kafka-src-syntax/indexes.td
index afe535df1b2a9..5d2c2aa87ec64 100644
--- a/test/testdrive-old-kafka-src-syntax/indexes.td
+++ b/test/testdrive-old-kafka-src-syntax/indexes.td
@@ -7,6 +7,9 @@
 # the Business Source License, use of this software will be governed
 # by the Apache License, Version 2.0.
 
+$ set-sql-timeout duration=1s
+$ set-max-tries max-tries=5
+
 $ set-arg-default single-replica-cluster=quickstart
 
 $ set-regex match=cluster1|quickstart replacement=<VARIABLE_OUTPUT>
@@ -279,7 +282,109 @@ bar_ind bar <VARIABLE_OUTPUT> {a}  ""
 $ postgres-execute connection=postgres://mz_system@${testdrive.materialize-internal-sql-addr}/materialize
 ALTER SYSTEM SET enable_rbac_checks TO true
 
-> SHOW INDEXES IN CLUSTER mz_catalog_server
+>[version<=13100] SHOW INDEXES IN CLUSTER mz_catalog_server
+mz_active_peeks_per_worker_s2_primary_idx                   mz_active_peeks_per_worker                   mz_catalog_server    {id,worker_id}                              ""
+mz_arrangement_batches_raw_s2_primary_idx                   mz_arrangement_batches_raw                   mz_catalog_server    {operator_id,worker_id}                     ""
+mz_arrangement_records_raw_s2_primary_idx                   mz_arrangement_records_raw                   mz_catalog_server    {operator_id,worker_id}                     ""
+mz_arrangement_sharing_raw_s2_primary_idx                   mz_arrangement_sharing_raw                   mz_catalog_server    {operator_id,worker_id}                     ""
+mz_arrangement_heap_capacity_raw_s2_primary_idx             mz_arrangement_heap_capacity_raw             mz_catalog_server    {operator_id,worker_id}                     ""
+mz_arrangement_heap_allocations_raw_s2_primary_idx          mz_arrangement_heap_allocations_raw          mz_catalog_server    {operator_id,worker_id}                     ""
+mz_arrangement_heap_size_raw_s2_primary_idx                 mz_arrangement_heap_size_raw                 mz_catalog_server    {operator_id,worker_id}                     ""
+mz_arrangement_batcher_allocations_raw_s2_primary_idx       mz_arrangement_batcher_allocations_raw       mz_catalog_server    {operator_id,worker_id}                     ""
+mz_arrangement_batcher_capacity_raw_s2_primary_idx          mz_arrangement_batcher_capacity_raw          mz_catalog_server    {operator_id,worker_id}                     ""
+mz_arrangement_batcher_records_raw_s2_primary_idx           mz_arrangement_batcher_records_raw           mz_catalog_server    {operator_id,worker_id}                     ""
+mz_arrangement_batcher_size_raw_s2_primary_idx              mz_arrangement_batcher_size_raw              mz_catalog_server    {operator_id,worker_id}                     ""
+mz_cluster_deployment_lineage_ind                           mz_cluster_deployment_lineage                mz_catalog_server    {cluster_id}                                ""
+mz_cluster_replica_history_ind                              mz_cluster_replica_history                   mz_catalog_server    {dropped_at}                                ""
+mz_cluster_replica_name_history_ind                         mz_cluster_replica_name_history              mz_catalog_server    {id}                                        ""
+mz_cluster_replica_metrics_ind                              mz_cluster_replica_metrics                   mz_catalog_server    {replica_id}                                ""
+mz_cluster_replica_metrics_history_ind                      mz_cluster_replica_metrics_history           mz_catalog_server    {replica_id}                                ""
+mz_cluster_replica_sizes_ind                                mz_cluster_replica_sizes                     mz_catalog_server    {size}                                      ""
+mz_cluster_replica_statuses_ind                             mz_cluster_replica_statuses                  mz_catalog_server    {replica_id}                                ""
+mz_cluster_replica_status_history_ind                       mz_cluster_replica_status_history            mz_catalog_server    {replica_id}                                ""
+mz_cluster_replicas_ind                                     mz_cluster_replicas                          mz_catalog_server    {id}                                        ""
+mz_clusters_ind                                             mz_clusters                                  mz_catalog_server    {id}                                        ""
+mz_columns_ind                                              mz_columns                                   mz_catalog_server    {name}                                      ""
+mz_comments_ind                                             mz_comments                                  mz_catalog_server    {id}                                        ""
+mz_compute_dependencies_ind                                 mz_compute_dependencies                      mz_catalog_server    {dependency_id}                             ""
+mz_compute_dataflow_global_ids_per_worker_s2_primary_idx   mz_compute_dataflow_global_ids_per_worker     mz_catalog_server    {id,worker_id}                             ""
+mz_compute_error_counts_raw_s2_primary_idx                  mz_compute_error_counts_raw                  mz_catalog_server    {export_id,worker_id}                       ""
+mz_compute_exports_per_worker_s2_primary_idx                mz_compute_exports_per_worker                mz_catalog_server    {export_id,worker_id}                       ""
+mz_compute_frontiers_per_worker_s2_primary_idx              mz_compute_frontiers_per_worker              mz_catalog_server    {export_id,worker_id}                       ""
+mz_compute_hydration_times_per_worker_s2_primary_idx        mz_compute_hydration_times_per_worker        mz_catalog_server    {export_id,worker_id}                       ""
+mz_compute_import_frontiers_per_worker_s2_primary_idx       mz_compute_import_frontiers_per_worker       mz_catalog_server    {export_id,import_id,worker_id}             ""
+mz_compute_lir_mapping_per_worker_s2_primary_idx            mz_compute_lir_mapping_per_worker            mz_catalog_server    {global_id,lir_id,worker_id}                ""
+mz_compute_operator_durations_histogram_raw_s2_primary_idx  mz_compute_operator_durations_histogram_raw  mz_catalog_server    {id,worker_id,duration_ns}                  ""
+mz_connections_ind                                          mz_connections                               mz_catalog_server    {schema_id}                                 ""
+mz_console_cluster_utilization_overview_ind                 mz_console_cluster_utilization_overview      mz_catalog_server    {cluster_id}                                ""
+mz_continual_tasks_ind                                      mz_continual_tasks                           mz_catalog_server    {id}                                        ""
+mz_databases_ind                                            mz_databases                                 mz_catalog_server    {name}                                      ""
+mz_dataflow_addresses_per_worker_s2_primary_idx             mz_dataflow_addresses_per_worker             mz_catalog_server    {id,worker_id}                              ""
+mz_dataflow_channels_per_worker_s2_primary_idx              mz_dataflow_channels_per_worker              mz_catalog_server    {id,worker_id}                              ""
+mz_dataflow_operator_reachability_raw_s2_primary_idx        mz_dataflow_operator_reachability_raw        mz_catalog_server    {address,port,worker_id,update_type,time}   ""
+mz_dataflow_operators_per_worker_s2_primary_idx             mz_dataflow_operators_per_worker             mz_catalog_server    {id,worker_id}                              ""
+mz_dataflow_shutdown_durations_histogram_raw_s2_primary_idx mz_dataflow_shutdown_durations_histogram_raw mz_catalog_server    {worker_id,duration_ns}                     ""
+mz_frontiers_ind                                            mz_frontiers                                 mz_catalog_server    {object_id}                                 ""
+mz_indexes_ind                                              mz_indexes                                   mz_catalog_server    {id}                                        ""
+mz_kafka_sources_ind                                        mz_kafka_sources                             mz_catalog_server    {id}                                        ""
+mz_materialized_views_ind                                   mz_materialized_views                        mz_catalog_server    {id}                                        ""
+mz_message_batch_counts_received_raw_s2_primary_idx         mz_message_batch_counts_received_raw         mz_catalog_server    {channel_id,from_worker_id,to_worker_id}    ""
+mz_message_batch_counts_sent_raw_s2_primary_idx             mz_message_batch_counts_sent_raw             mz_catalog_server    {channel_id,from_worker_id,to_worker_id}    ""
+mz_message_counts_received_raw_s2_primary_idx               mz_message_counts_received_raw               mz_catalog_server    {channel_id,from_worker_id,to_worker_id}    ""
+mz_message_counts_sent_raw_s2_primary_idx                   mz_message_counts_sent_raw                   mz_catalog_server    {channel_id,from_worker_id,to_worker_id}    ""
+mz_object_dependencies_ind                                  mz_object_dependencies                       mz_catalog_server    {object_id}                                 ""
+mz_object_history_ind                                       mz_object_history                            mz_catalog_server    {id}                                        ""
+mz_object_lifetimes_ind                                     mz_object_lifetimes                          mz_catalog_server    {id}                                        ""
+mz_object_transitive_dependencies_ind                       mz_object_transitive_dependencies            mz_catalog_server    {object_id}                                 ""
+mz_objects_ind                                              mz_objects                                   mz_catalog_server    {schema_id}                                 ""
+mz_notices_ind                                              mz_notices                                   mz_catalog_server    {id}                                        ""
+mz_peek_durations_histogram_raw_s2_primary_idx              mz_peek_durations_histogram_raw              mz_catalog_server    {worker_id,type,duration_ns}                ""
+mz_recent_activity_log_thinned_ind                          mz_recent_activity_log_thinned               mz_catalog_server    {sql_hash}                                  ""
+mz_recent_storage_usage_ind                                 mz_recent_storage_usage                      mz_catalog_server    {object_id}                                 ""
+mz_roles_ind                                                mz_roles                                     mz_catalog_server    {id}                                        ""
+mz_scheduling_elapsed_raw_s2_primary_idx                    mz_scheduling_elapsed_raw                    mz_catalog_server    {id,worker_id}                              ""
+mz_scheduling_parks_histogram_raw_s2_primary_idx            mz_scheduling_parks_histogram_raw            mz_catalog_server    {worker_id,slept_for_ns,requested_ns}       ""
+mz_schemas_ind                                              mz_schemas                                   mz_catalog_server    {database_id}                               ""
+mz_secrets_ind                                              mz_secrets                                   mz_catalog_server    {name}                                      ""
+mz_show_all_objects_ind                                     mz_show_all_objects                          mz_catalog_server    {schema_id}                                 ""
+mz_show_cluster_replicas_ind                                mz_show_cluster_replicas                     mz_catalog_server    {cluster}                                   ""
+mz_show_clusters_ind                                        mz_show_clusters                             mz_catalog_server    {name}                                      ""
+mz_show_columns_ind                                         mz_show_columns                              mz_catalog_server    {id}                                        ""
+mz_show_connections_ind                                     mz_show_connections                          mz_catalog_server    {schema_id}                                 ""
+mz_show_databases_ind                                       mz_show_databases                            mz_catalog_server    {name}                                      ""
+mz_show_indexes_ind                                         mz_show_indexes                              mz_catalog_server    {schema_id}                                 ""
+mz_show_materialized_views_ind                              mz_show_materialized_views                   mz_catalog_server    {schema_id}                                 ""
+mz_show_roles_ind                                           mz_show_roles                                mz_catalog_server    {name}                                      ""
+mz_show_schemas_ind                                         mz_show_schemas                              mz_catalog_server    {database_id}                               ""
+mz_show_secrets_ind                                         mz_show_secrets                              mz_catalog_server    {schema_id}                                 ""
+mz_show_sinks_ind                                           mz_show_sinks                                mz_catalog_server    {schema_id}                                 ""
+mz_show_sources_ind                                         mz_show_sources                              mz_catalog_server    {schema_id}                                 ""
+mz_show_tables_ind                                          mz_show_tables                               mz_catalog_server    {schema_id}                                 ""
+mz_show_types_ind                                           mz_show_types                                mz_catalog_server    {schema_id}                                 ""
+mz_show_views_ind                                           mz_show_views                                mz_catalog_server    {schema_id}                                 ""
+mz_sink_statistics_ind                                      mz_sink_statistics                           mz_catalog_server    {id}                                        ""
+mz_sink_status_history_ind                                  mz_sink_status_history                       mz_catalog_server    {sink_id}                                   ""
+mz_source_statistics_with_history_ind                       mz_source_statistics_with_history            mz_catalog_server    {id}                                        ""
+mz_sink_statuses_ind                                        mz_sink_statuses                             mz_catalog_server    {id}                                        ""
+mz_sinks_ind                                                mz_sinks                                     mz_catalog_server    {id}                                        ""
+mz_source_statistics_ind                                    mz_source_statistics                         mz_catalog_server    {id}                                        ""
+mz_source_status_history_ind                                mz_source_status_history                     mz_catalog_server    {source_id}                                 ""
+mz_source_statuses_ind                                      mz_source_statuses                           mz_catalog_server    {id}                                        ""
+mz_sources_ind                                              mz_sources                                   mz_catalog_server    {id}                                        ""
+mz_tables_ind                                               mz_tables                                    mz_catalog_server    {schema_id}                                 ""
+mz_types_ind                                                mz_types                                     mz_catalog_server    {schema_id}                                 ""
+mz_recent_sql_text_ind                                      mz_recent_sql_text                           mz_catalog_server    {sql_hash}                                  ""
+mz_views_ind                                                mz_views                                     mz_catalog_server    {schema_id}                                 ""
+mz_wallclock_global_lag_recent_history_ind                  mz_wallclock_global_lag_recent_history       mz_catalog_server    {object_id}                                 ""
+mz_webhook_sources_ind                                      mz_webhook_sources                           mz_catalog_server    {id}                                        ""
+pg_attrdef_all_databases_ind                                pg_attrdef_all_databases                     mz_catalog_server    {oid,adrelid,adnum,adbin,adsrc}             ""
+pg_attribute_all_databases_ind                              pg_attribute_all_databases                   mz_catalog_server    {attrelid,attname,atttypid,attlen,attnum,atttypmod,attnotnull,atthasdef,attidentity,attgenerated,attisdropped,attcollation,database_name,pg_type_database_name}    ""
+pg_class_all_databases_ind                                  pg_class_all_databases                       mz_catalog_server    {relname}                                   ""
+pg_description_all_databases_ind                            pg_description_all_databases                 mz_catalog_server    {objoid,classoid,objsubid,description,oid_database_name,class_database_name}    ""
+pg_namespace_all_databases_ind                              pg_namespace_all_databases                   mz_catalog_server    {nspname}                                   ""
+pg_type_all_databases_ind                                   pg_type_all_databases                        mz_catalog_server    {oid}                                       ""
+
+>[version>13100] SHOW INDEXES IN CLUSTER mz_catalog_server
 mz_active_peeks_per_worker_s2_primary_idx                   mz_active_peeks_per_worker                   mz_catalog_server    {id,worker_id}                              ""
 mz_arrangement_batches_raw_s2_primary_idx                   mz_arrangement_batches_raw                   mz_catalog_server    {operator_id,worker_id}                     ""
 mz_arrangement_records_raw_s2_primary_idx                   mz_arrangement_records_raw                   mz_catalog_server    {operator_id,worker_id}                     ""

From 3420277d0ad14489503d1e3ff5d40b6bcd2c1e8a Mon Sep 17 00:00:00 2001
From: Dennis Felsing <dennis@felsing.org>
Date: Thu, 30 Jan 2025 08:12:51 +0000
Subject: [PATCH 50/52] ci: Larger agent for td migration test

---
 ci/nightly/pipeline.template.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/nightly/pipeline.template.yml b/ci/nightly/pipeline.template.yml
index b8aeb8c1d6bc5..2d418175e193f 100644
--- a/ci/nightly/pipeline.template.yml
+++ b/ci/nightly/pipeline.template.yml
@@ -708,7 +708,7 @@ steps:
               composition: testdrive-old-kafka-src-syntax
               run: migration
         agents:
-          queue: hetzner-aarch64-8cpu-16gb
+          queue: hetzner-aarch64-16cpu-32gb
 
   - group: AWS
     key: aws

From e2b56b47c7d56e4fd309dd4dc397931c2f2e1be9 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow <koshy44@gmail.com>
Date: Thu, 30 Jan 2025 10:06:09 -0500
Subject: [PATCH 51/52] Fix mz config

---
 .../mzcompose.py                              | 56 ++++++++++++++-----
 1 file changed, 41 insertions(+), 15 deletions(-)

diff --git a/test/testdrive-old-kafka-src-syntax/mzcompose.py b/test/testdrive-old-kafka-src-syntax/mzcompose.py
index 31ee0363afc7c..0f8565324e8c8 100644
--- a/test/testdrive-old-kafka-src-syntax/mzcompose.py
+++ b/test/testdrive-old-kafka-src-syntax/mzcompose.py
@@ -281,6 +281,17 @@ def workflow_migration(c: Composition, parser: WorkflowArgumentParser) -> None:
         type=str,
         help="set the default timeout for Testdrive",
     )
+
+    parser.add_argument(
+        "--rewrite-results",
+        action="store_true",
+        help="Rewrite results, disables junit reports",
+    )
+
+    parser.add_argument(
+        "--azurite", action="store_true", help="Use Azurite as blob store instead of S3"
+    )
+
     parser.add_argument(
         "files",
         nargs="*",
@@ -312,18 +323,6 @@ def workflow_migration(c: Composition, parser: WorkflowArgumentParser) -> None:
 
     dependencies += kafka_deps
 
-    testdrive = Testdrive(
-        forward_buildkite_shard=True,
-        kafka_default_partitions=args.kafka_default_partitions,
-        aws_region=args.aws_region,
-        default_timeout=args.default_timeout,
-        volumes_extra=["mzdata:/mzdata"],
-        external_blob_store=True,
-        fivetran_destination=True,
-        fivetran_destination_files_path="/share/tmp",
-        entrypoint_extra=[f"--var=uses-redpanda={args.redpanda}"],
-    )
-
     sysparams = args.system_param
     if not args.system_param:
         sysparams = []
@@ -337,14 +336,41 @@ def workflow_migration(c: Composition, parser: WorkflowArgumentParser) -> None:
 
         additional_system_parameter_defaults[key] = val
 
+    leaves_tombstones = (
+        "true"
+        if additional_system_parameter_defaults.get(
+            "storage_use_continual_feedback_upsert",
+            get_default_system_parameters()["storage_use_continual_feedback_upsert"],
+        )
+           == "false"
+        else "false"
+    )
+
     mz_old = Materialized(
         default_size=Materialized.Size.DEFAULT_SIZE,
-        image=get_old_image_for_source_table_migration_test(),
-        external_metadata_store=True,
+        image=get_new_image_for_source_table_migration_test(),
         external_blob_store=True,
+        blob_store_is_azure=args.azurite,
         additional_system_parameter_defaults=dict(additional_system_parameter_defaults),
     )
 
+    testdrive = Testdrive(
+        forward_buildkite_shard=True,
+        kafka_default_partitions=args.kafka_default_partitions,
+        aws_region=args.aws_region,
+        validate_catalog_store=True,
+        default_timeout=args.default_timeout,
+        volumes_extra=["mzdata:/mzdata"],
+        external_blob_store=True,
+        blob_store_is_azure=args.azurite,
+        fivetran_destination=True,
+        fivetran_destination_files_path="/share/tmp",
+        entrypoint_extra=[
+            f"--var=uses-redpanda={args.redpanda}",
+            f"--var=leaves-tombstones={leaves_tombstones}",
+        ],
+    )
+
     print(additional_system_parameter_defaults)
     x = dict(additional_system_parameter_defaults)
     additional_system_parameter_defaults["force_source_table_syntax"] = "true"
@@ -354,8 +380,8 @@ def workflow_migration(c: Composition, parser: WorkflowArgumentParser) -> None:
     mz_new = Materialized(
         default_size=Materialized.Size.DEFAULT_SIZE,
         image=get_new_image_for_source_table_migration_test(),
-        external_metadata_store=True,
         external_blob_store=True,
+        blob_store_is_azure=args.azurite,
         additional_system_parameter_defaults=additional_system_parameter_defaults,
     )
 

From 1286010677cfcbfe27cf361b41719827aa65f1e8 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow <koshy44@gmail.com>
Date: Thu, 30 Jan 2025 10:27:04 -0500
Subject: [PATCH 52/52] Fix kafka tests

---
 test/testdrive-old-kafka-src-syntax/mzcompose.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/test/testdrive-old-kafka-src-syntax/mzcompose.py b/test/testdrive-old-kafka-src-syntax/mzcompose.py
index 0f8565324e8c8..3491e6b711218 100644
--- a/test/testdrive-old-kafka-src-syntax/mzcompose.py
+++ b/test/testdrive-old-kafka-src-syntax/mzcompose.py
@@ -342,13 +342,13 @@ def workflow_migration(c: Composition, parser: WorkflowArgumentParser) -> None:
             "storage_use_continual_feedback_upsert",
             get_default_system_parameters()["storage_use_continual_feedback_upsert"],
         )
-           == "false"
+        == "false"
         else "false"
     )
 
     mz_old = Materialized(
         default_size=Materialized.Size.DEFAULT_SIZE,
-        image=get_new_image_for_source_table_migration_test(),
+        image=get_old_image_for_source_table_migration_test(),
         external_blob_store=True,
         blob_store_is_azure=args.azurite,
         additional_system_parameter_defaults=dict(additional_system_parameter_defaults),
@@ -358,7 +358,6 @@ def workflow_migration(c: Composition, parser: WorkflowArgumentParser) -> None:
         forward_buildkite_shard=True,
         kafka_default_partitions=args.kafka_default_partitions,
         aws_region=args.aws_region,
-        validate_catalog_store=True,
         default_timeout=args.default_timeout,
         volumes_extra=["mzdata:/mzdata"],
         external_blob_store=True,