-
Notifications
You must be signed in to change notification settings - Fork 494
mysql: add support for invisible columns #31239
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
6907869
bb7369e
b47d1d8
7ade1a6
2ddba16
c2cc53e
31f732a
0af5488
fe6897e
8f97a57
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -90,9 +90,12 @@ use std::sync::Arc; | |
|
|
||
| use differential_dataflow::AsCollection; | ||
| use futures::TryStreamExt; | ||
| use itertools::Itertools; | ||
| use mysql_async::prelude::Queryable; | ||
| use mysql_async::{IsolationLevel, Row as MySqlRow, TxOpts}; | ||
| use mz_mysql_util::{pack_mysql_row, query_sys_var, MySqlError, ER_NO_SUCH_TABLE}; | ||
| use mz_mysql_util::{ | ||
| pack_mysql_row, query_sys_var, quote_identifier, MySqlError, ER_NO_SUCH_TABLE, | ||
| }; | ||
| use mz_ore::cast::CastFrom; | ||
| use mz_ore::future::InTask; | ||
| use mz_ore::iter::IteratorExt; | ||
|
|
@@ -404,9 +407,8 @@ pub(crate) fn render<G: Scope<Timestamp = GtidPartition>>( | |
|
|
||
| let mut snapshot_staged = 0; | ||
| for (table, outputs) in &reader_snapshot_table_info { | ||
| let query = format!("SELECT * FROM {}", table); | ||
| trace!(%id, "timely-{worker_id} reading snapshot from \ | ||
| table '{table}'"); | ||
| let query = build_snapshot_query(outputs); | ||
| trace!(%id, "timely-{worker_id} reading snapshot query='{}'", query); | ||
| let mut results = tx.exec_stream(query, ()).await?; | ||
| let mut count = 0; | ||
| while let Some(row) = results.try_next().await? { | ||
|
|
@@ -517,6 +519,31 @@ where | |
| Ok(total) | ||
| } | ||
|
|
||
| /// Builds the SQL query to be used for creating the snapshot using the first entry in outputs. | ||
| /// | ||
| /// Expect `outputs` to contain entries for a single table, and to have at least 1 entry. | ||
| /// Expect that each MySqlTableDesc entry contains all columns described in information_schema.columns. | ||
| #[must_use] | ||
| fn build_snapshot_query(outputs: &[SourceOutputInfo]) -> String { | ||
| let info = outputs.first().expect("MySQL table info"); | ||
| if outputs.len() > 1 | ||
| && !outputs | ||
| .iter() | ||
| .skip(1) | ||
| .map(|other_info| &other_info.desc.columns) | ||
| .all(|other_columns| *other_columns == info.desc.columns) | ||
| { | ||
| panic!("Mismatch in table descriptions for {}", info.table_name); | ||
| } | ||
| let columns = info | ||
| .desc | ||
| .columns | ||
| .iter() | ||
| .map(|col| quote_identifier(&col.name)) | ||
| .join(", "); | ||
| format!("SELECT {} FROM {}", columns, info.table_name) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (sorry for the drive-by review) Do we have to worry about SQL injection here?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. mmm.. good point, I made an unfortunate assumption that the existing format! was good enough. I'll update the snapshot command and collect_table_statistics to use prepared statements.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I did wonder about that during review and concluded that we don't have to be worried as to perform an injection you have to somehow create weirdly named columns, which means you already admin level have access to the database. My understanding is that we can't use prepared statements here since with those you can put placeholders for values but here we want to put placeholders for column names. If it's possible and easy to do we should maybe do it
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have done this kind of thing in PG by building dynamic statements using virtual sets for the identifiers, but I'm not as familiar with MySQL, and this is proving challenging to try to implement via prepared statements. mysql_async client doesn't provide a
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nice! |
||
| } | ||
|
|
||
| #[derive(Default)] | ||
| struct TableStatistics { | ||
| count_latency: f64, | ||
|
|
@@ -541,3 +568,48 @@ where | |
|
|
||
| Ok(stats) | ||
| } | ||
|
|
||
| #[cfg(test)] | ||
| mod tests { | ||
| use super::*; | ||
| use mz_mysql_util::{MySqlColumnDesc, MySqlTableDesc}; | ||
| use timely::progress::Antichain; | ||
|
|
||
| #[mz_ore::test] | ||
| fn snapshot_query_duplicate_table() { | ||
| let schema_name = "myschema".to_string(); | ||
| let table_name = "mytable".to_string(); | ||
| let table = MySqlTableName(schema_name.clone(), table_name.clone()); | ||
| let columns = ["c1", "c2", "c3"] | ||
| .iter() | ||
| .map(|col| MySqlColumnDesc { | ||
| name: col.to_string(), | ||
| column_type: None, | ||
| meta: None, | ||
| }) | ||
| .collect::<Vec<_>>(); | ||
| let desc = MySqlTableDesc { | ||
| schema_name: schema_name.clone(), | ||
| name: table_name.clone(), | ||
| columns, | ||
| keys: BTreeSet::default(), | ||
| }; | ||
| let info = SourceOutputInfo { | ||
| output_index: 1, // ignored | ||
| table_name: table.clone(), | ||
| desc, | ||
| text_columns: vec![], | ||
| exclude_columns: vec![], | ||
| initial_gtid_set: Antichain::default(), | ||
| resume_upper: Antichain::default(), | ||
| }; | ||
| let query = build_snapshot_query(&[info.clone(), info]); | ||
| assert_eq!( | ||
| format!( | ||
| "SELECT `c1`, `c2`, `c3` FROM `{}`.`{}`", | ||
| &schema_name, &table_name | ||
| ), | ||
| query | ||
| ); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,7 +15,7 @@ | |
| import threading | ||
| from textwrap import dedent | ||
|
|
||
| from materialize import buildkite | ||
| from materialize import MZ_ROOT, buildkite | ||
| from materialize.mysql_util import ( | ||
| retrieve_invalid_ssl_context_for_mysql, | ||
| retrieve_ssl_context_for_mysql, | ||
|
|
@@ -99,7 +99,9 @@ def workflow_cdc(c: Composition, parser: WorkflowArgumentParser) -> None: | |
|
|
||
| matching_files = [] | ||
| for filter in args.filter: | ||
| matching_files.extend(glob.glob(filter, root_dir="test/mysql-cdc")) | ||
| matching_files.extend( | ||
| glob.glob(filter, root_dir=MZ_ROOT / "test" / "mysql-cdc") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for this fix! I've been bitten by it! |
||
| ) | ||
| sharded_files: list[str] = sorted( | ||
| buildkite.shard_list(matching_files, lambda file: file) | ||
| ) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,34 @@ | ||
| # Copyright Materialize, Inc. and contributors. All rights reserved. | ||
| # | ||
| # Use of this software is governed by the Business Source License | ||
| # included in the LICENSE file at the root of this repository. | ||
| # | ||
| # As of the Change Date specified in that file, in accordance with | ||
| # the Business Source License, use of this software will be governed | ||
| # by the Apache License, Version 2.0. | ||
|
|
||
| # | ||
| # identifiers that need proper quoting | ||
| # | ||
|
|
||
| > CREATE SECRET mysqlpass AS '${arg.mysql-root-password}' | ||
| > CREATE CONNECTION mysql_conn TO MYSQL ( | ||
| HOST mysql, | ||
| USER root, | ||
| PASSWORD SECRET mysqlpass | ||
| ) | ||
|
|
||
| $ mysql-connect name=mysql url=mysql://root@mysql password=${arg.mysql-root-password} | ||
|
|
||
| $ mysql-execute name=mysql | ||
| DROP DATABASE IF EXISTS public; | ||
| CREATE DATABASE public; | ||
| USE public; | ||
| CREATE TABLE `t``1` (`f``1` INT) | ||
| INSERT INTO `t``1` VALUES(1); | ||
|
|
||
| > CREATE SOURCE mz_source FROM MYSQL CONNECTION mysql_conn; | ||
| > CREATE TABLE t1 FROM SOURCE mz_source (REFERENCE public."t`1"); | ||
|
|
||
| > SELECT * FROM t1; | ||
| 1 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
small nit: in idiomatic Rust, at least in the MZ codebase, it usually better to avoid long chains of iterator transformations with either a short chain if possible or just a imperative for loop. This functional style becomes harder to read after a certain point.
So I would write this either as:
or:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
got it, thank you!