Skip to content

Commit f960792

Browse files
martykulmadef-
andauthored
mysql: add support for invisible columns (#31239)
- explicitly request columns in MySQL snapshot select statement - corrected the glob root path in mzcompose.py to get the tests to run ### Motivation Adds support for invisible columns in mysql - MaterializeInc/database-issues#7635 - MaterializeInc/database-issues#7782 ### Checklist - [x] This PR has adequate test coverage / QA involvement has been duly considered. ([trigger-ci for additional test/nightly runs](https://trigger-ci.dev.materialize.com/)) - [ ] This PR has an associated up-to-date [design doc](https://github.com/MaterializeInc/materialize/blob/main/doc/developer/design/README.md), is a design doc ([template](https://github.com/MaterializeInc/materialize/blob/main/doc/developer/design/00000000_template.md)), or is sufficiently small to not require a design. <!-- Reference the design in the description. --> - [ ] If this PR evolves [an existing `$T ⇔ Proto$T` mapping](https://github.com/MaterializeInc/materialize/blob/main/doc/developer/command-and-response-binary-encoding.md) (possibly in a backwards-incompatible way), then it is tagged with a `T-proto` label. - [ ] If this PR will require changes to cloud orchestration or tests, there is a companion cloud PR to account for those changes that is tagged with the release-blocker label ([example](MaterializeInc/cloud#5021)). <!-- Ask in #team-cloud on Slack if you need help preparing the cloud PR. --> - [x] If this PR includes major [user-facing behavior changes](https://github.com/MaterializeInc/materialize/blob/main/doc/developer/guide-changes.md#what-changes-require-a-release-note), I have pinged the relevant PM to schedule a changelog post. --------- Co-authored-by: Dennis Felsing <[email protected]>
1 parent 6e6960f commit f960792

File tree

9 files changed

+271
-24
lines changed

9 files changed

+271
-24
lines changed

ci/nightly/pipeline.template.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -959,7 +959,7 @@ steps:
959959
- id: checks-0dt-restart-entire-mz-forced-migrations-azurite
960960
label: "Checks 0dt restart of the entire Mz with forced migrations with :azure: blob store"
961961
depends_on: build-aarch64
962-
timeout_in_minutes: 120
962+
timeout_in_minutes: 180
963963
parallelism: 2
964964
agents:
965965
queue: hetzner-aarch64-16cpu-32gb

ci/test/pipeline.template.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -410,7 +410,7 @@ steps:
410410

411411
- id: mysql-cdc-resumption
412412
label: "MySQL CDC resumption tests"
413-
parallelism: 4
413+
parallelism: 6
414414
depends_on: build-aarch64
415415
timeout_in_minutes: 30
416416
inputs: [test/mysql-cdc-resumption]

misc/python/materialize/checks/all_checks/mysql_cdc.py

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,5 +480,106 @@ def validate(self) -> Testdrive:
480480
)
481481

482482

483+
@externally_idempotent(False)
484+
class MySqlInvisibleColumn(Check):
485+
def _can_run(self, e: Executor) -> bool:
486+
return self.base_version > MzVersion.parse_mz("v0.132.0-dev")
487+
488+
def initialize(self) -> Testdrive:
489+
return Testdrive(
490+
dedent(
491+
f"""
492+
$ mysql-connect name=mysql url=mysql://root@mysql password={MySql.DEFAULT_ROOT_PASSWORD}
493+
494+
$ mysql-execute name=mysql
495+
# create the database if it does not exist yet but do not drop it
496+
CREATE DATABASE IF NOT EXISTS public;
497+
USE public;
498+
499+
CREATE USER mysql4 IDENTIFIED BY 'mysql';
500+
GRANT REPLICATION SLAVE ON *.* TO mysql4;
501+
GRANT ALL ON public.* TO mysql4;
502+
503+
DROP TABLE IF EXISTS mysql_invisible_table;
504+
505+
CREATE TABLE mysql_invisible_table (f1 INT, f2 FLOAT INVISIBLE, f3 DATE INVISIBLE, f4 TEXT INVISIBLE);
506+
507+
INSERT INTO mysql_invisible_table (f1, f2, f3, f4) VALUES (1, 0.1, '2025-01-01', 'one');
508+
509+
> CREATE SECRET mysql_invisible_pass AS 'mysql';
510+
> CREATE CONNECTION mysql_invisible_conn TO MYSQL (
511+
HOST 'mysql',
512+
USER mysql4,
513+
PASSWORD SECRET mysql_invisible_pass
514+
)
515+
516+
> CREATE SOURCE mysql_invisible_source
517+
FROM MYSQL CONNECTION mysql_invisible_conn;
518+
> CREATE TABLE mysql_invisible_table FROM SOURCE mysql_invisible_source (REFERENCE public.mysql_invisible_table);
519+
520+
# Return all rows
521+
> CREATE MATERIALIZED VIEW mysql_invisible_view AS
522+
SELECT * FROM mysql_invisible_table
523+
"""
524+
)
525+
)
526+
527+
def manipulate(self) -> list[Testdrive]:
528+
return [
529+
Testdrive(dedent(s))
530+
for s in [
531+
f"""
532+
$ mysql-connect name=mysql url=mysql://root@mysql password={MySql.DEFAULT_ROOT_PASSWORD}
533+
534+
$ mysql-execute name=mysql
535+
USE public;
536+
INSERT INTO mysql_invisible_table (f1, f2, f3, f4) VALUES (2, 0.2, '2025-02-02', 'two');
537+
""",
538+
f"""
539+
$ mysql-connect name=mysql url=mysql://root@mysql password={MySql.DEFAULT_ROOT_PASSWORD}
540+
541+
$ mysql-execute name=mysql
542+
USE public;
543+
INSERT INTO mysql_invisible_table (f1, f2, f3, f4) VALUES (3, 0.3, '2025-03-03', 'three');
544+
""",
545+
]
546+
]
547+
548+
def validate(self) -> Testdrive:
549+
return Testdrive(
550+
dedent(
551+
f"""
552+
> SELECT * FROM mysql_invisible_table;
553+
1 0.1 2025-01-01 one
554+
2 0.2 2025-02-02 two
555+
3 0.3 2025-03-03 three
556+
557+
$ mysql-connect name=mysql url=mysql://root@mysql password={MySql.DEFAULT_ROOT_PASSWORD}
558+
559+
$ mysql-execute name=mysql
560+
USE public;
561+
ALTER TABLE mysql_invisible_table ALTER COLUMN f2 SET VISIBLE;
562+
INSERT INTO mysql_invisible_table (f1, f2, f3, f4) VALUES (4, 0.4, '2025-04-04', 'four');
563+
564+
> SELECT * FROM mysql_invisible_table;
565+
1 0.1 2025-01-01 one
566+
2 0.2 2025-02-02 two
567+
3 0.3 2025-03-03 three
568+
4 0.4 2025-04-04 four
569+
570+
# Rollback the last INSERTs so that validate() can be called multiple times
571+
$ mysql-execute name=mysql
572+
DELETE FROM mysql_invisible_table WHERE f1 = 4;
573+
ALTER TABLE mysql_invisible_table ALTER COLUMN f2 SET INVISIBLE;
574+
575+
> SELECT * FROM mysql_invisible_table;
576+
1 0.1 2025-01-01 one
577+
2 0.2 2025-02-02 two
578+
3 0.3 2025-03-03 three
579+
"""
580+
)
581+
)
582+
583+
483584
def remove_target_cluster_from_explain(sql: str) -> str:
484585
return re.sub(r"\n\s*Target cluster: \w+\n", "", sql)

src/mysql-util/src/lib.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,10 +114,33 @@ pub enum MySqlError {
114114
MySql(#[from] mysql_async::Error),
115115
}
116116

117+
/// Quotes MySQL identifiers. [See MySQL quote_identifier()](https://github.com/mysql/mysql-sys/blob/master/functions/quote_identifier.sql)
118+
pub fn quote_identifier(identifier: &str) -> String {
119+
let mut escaped = identifier.replace("`", "``");
120+
escaped.insert(0, '`');
121+
escaped.push('`');
122+
escaped
123+
}
124+
117125
// NOTE: this error was renamed between MySQL 5.7 and 8.0
118126
// https://dev.mysql.com/doc/mysql-errors/8.0/en/server-error-reference.html#error_er_source_fatal_error_reading_binlog
119127
// https://dev.mysql.com/doc/mysql-errors/5.7/en/server-error-reference.html#error_er_master_fatal_error_reading_binlog
120128
pub const ER_SOURCE_FATAL_ERROR_READING_BINLOG_CODE: u16 = 1236;
121129

122130
// https://dev.mysql.com/doc/mysql-errors/8.0/en/server-error-reference.html#error_er_no_such_table
123131
pub const ER_NO_SUCH_TABLE: u16 = 1146;
132+
133+
#[cfg(test)]
134+
mod tests {
135+
136+
use super::quote_identifier;
137+
#[mz_ore::test]
138+
fn test_identifier_quoting() {
139+
let expected = vec!["`a`", "`naughty``sql`", "```;naughty;sql;```"];
140+
let input = ["a", "naughty`sql", "`;naughty;sql;`"]
141+
.iter()
142+
.map(|raw_str| quote_identifier(raw_str))
143+
.collect::<Vec<_>>();
144+
assert_eq!(expected, input);
145+
}
146+
}

src/storage/src/source/mysql.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ use std::rc::Rc;
5858

5959
use differential_dataflow::AsCollection;
6060
use itertools::Itertools;
61+
use mz_mysql_util::quote_identifier;
6162
use mz_ore::cast::CastFrom;
6263
use mz_repr::Diff;
6364
use mz_repr::GlobalId;
@@ -338,7 +339,12 @@ impl MySqlTableName {
338339

339340
impl fmt::Display for MySqlTableName {
340341
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
341-
write!(f, "`{}`.`{}`", self.0, self.1)
342+
write!(
343+
f,
344+
"{}.{}",
345+
quote_identifier(&self.0),
346+
quote_identifier(&self.1)
347+
)
342348
}
343349
}
344350

src/storage/src/source/mysql/snapshot.rs

Lines changed: 74 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,12 @@ use std::sync::Arc;
9090

9191
use differential_dataflow::AsCollection;
9292
use futures::TryStreamExt;
93+
use itertools::Itertools;
9394
use mysql_async::prelude::Queryable;
9495
use mysql_async::{IsolationLevel, Row as MySqlRow, TxOpts};
95-
use mz_mysql_util::{pack_mysql_row, query_sys_var, MySqlError, ER_NO_SUCH_TABLE};
96+
use mz_mysql_util::{
97+
pack_mysql_row, query_sys_var, quote_identifier, MySqlError, ER_NO_SUCH_TABLE,
98+
};
9699
use mz_ore::cast::CastFrom;
97100
use mz_ore::future::InTask;
98101
use mz_ore::iter::IteratorExt;
@@ -404,9 +407,8 @@ pub(crate) fn render<G: Scope<Timestamp = GtidPartition>>(
404407

405408
let mut snapshot_staged = 0;
406409
for (table, outputs) in &reader_snapshot_table_info {
407-
let query = format!("SELECT * FROM {}", table);
408-
trace!(%id, "timely-{worker_id} reading snapshot from \
409-
table '{table}'");
410+
let query = build_snapshot_query(outputs);
411+
trace!(%id, "timely-{worker_id} reading snapshot query='{}'", query);
410412
let mut results = tx.exec_stream(query, ()).await?;
411413
let mut count = 0;
412414
while let Some(row) = results.try_next().await? {
@@ -517,6 +519,29 @@ where
517519
Ok(total)
518520
}
519521

522+
/// Builds the SQL query to be used for creating the snapshot using the first entry in outputs.
523+
///
524+
/// Expect `outputs` to contain entries for a single table, and to have at least 1 entry.
525+
/// Expect that each MySqlTableDesc entry contains all columns described in information_schema.columns.
526+
#[must_use]
527+
fn build_snapshot_query(outputs: &[SourceOutputInfo]) -> String {
528+
let info = outputs.first().expect("MySQL table info");
529+
for output in &outputs[1..] {
530+
assert!(
531+
info.desc.columns == output.desc.columns,
532+
"Mismatch in table descriptions for {}",
533+
info.table_name
534+
);
535+
}
536+
let columns = info
537+
.desc
538+
.columns
539+
.iter()
540+
.map(|col| quote_identifier(&col.name))
541+
.join(", ");
542+
format!("SELECT {} FROM {}", columns, info.table_name)
543+
}
544+
520545
#[derive(Default)]
521546
struct TableStatistics {
522547
count_latency: f64,
@@ -541,3 +566,48 @@ where
541566

542567
Ok(stats)
543568
}
569+
570+
#[cfg(test)]
571+
mod tests {
572+
use super::*;
573+
use mz_mysql_util::{MySqlColumnDesc, MySqlTableDesc};
574+
use timely::progress::Antichain;
575+
576+
#[mz_ore::test]
577+
fn snapshot_query_duplicate_table() {
578+
let schema_name = "myschema".to_string();
579+
let table_name = "mytable".to_string();
580+
let table = MySqlTableName(schema_name.clone(), table_name.clone());
581+
let columns = ["c1", "c2", "c3"]
582+
.iter()
583+
.map(|col| MySqlColumnDesc {
584+
name: col.to_string(),
585+
column_type: None,
586+
meta: None,
587+
})
588+
.collect::<Vec<_>>();
589+
let desc = MySqlTableDesc {
590+
schema_name: schema_name.clone(),
591+
name: table_name.clone(),
592+
columns,
593+
keys: BTreeSet::default(),
594+
};
595+
let info = SourceOutputInfo {
596+
output_index: 1, // ignored
597+
table_name: table.clone(),
598+
desc,
599+
text_columns: vec![],
600+
exclude_columns: vec![],
601+
initial_gtid_set: Antichain::default(),
602+
resume_upper: Antichain::default(),
603+
};
604+
let query = build_snapshot_query(&[info.clone(), info]);
605+
assert_eq!(
606+
format!(
607+
"SELECT `c1`, `c2`, `c3` FROM `{}`.`{}`",
608+
&schema_name, &table_name
609+
),
610+
query
611+
);
612+
}
613+
}

test/mysql-cdc/invisible-columns.td

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -29,20 +29,31 @@ DROP DATABASE IF EXISTS public;
2929
CREATE DATABASE public;
3030
USE public;
3131

32-
CREATE TABLE t1 (f1 INT, f2 INT INVISIBLE, f3 INT INVISIBLE);
33-
INSERT INTO t1 (f1, f2, f3) VALUES (10, 20, 30);
32+
SET sql_generate_invisible_primary_key=ON;
33+
CREATE TABLE t1 (f1 INT, f2 INT INVISIBLE, f3 DATE INVISIBLE, f4 INT INVISIBLE);
34+
INSERT INTO t1 (f1, f2, f3, f4) VALUES (10, 20, '2025-01-28', 6);
3435
INSERT INTO t1 VALUES (11);
3536

36-
# TODO: database-issues#7782 (invisible columns not supported)
37-
# > CREATE SOURCE mz_source FROM MYSQL CONNECTION mysql_conn;
38-
# > CREATE TABLE t1 FROM SOURCE mz_source (REFERENCE public.t1);
39-
#
40-
# > SELECT * FROM t1;
41-
# 10
42-
# 11
43-
#
44-
# $ mysql-execute name=mysql
45-
# ALTER TABLE t1 ALTER COLUMN f2 SET VISIBLE;
46-
#
47-
# ! SELECT * FROM t1;
48-
# contains:incompatible schema change
37+
> CREATE SOURCE mz_source FROM MYSQL CONNECTION mysql_conn;
38+
> CREATE TABLE t1 FROM SOURCE mz_source (REFERENCE public.t1) WITH (TEXT COLUMNS (f3), EXCLUDE COLUMNS (f4));
39+
40+
> SELECT * FROM t1;
41+
1 10 20 "2025-01-28"
42+
2 11 <null> <null>
43+
44+
$ mysql-execute name=mysql
45+
ALTER TABLE t1 ALTER COLUMN f2 SET VISIBLE;
46+
INSERT INTO t1 (f1, f2, f3, f4) VALUES (111, 222, '2025-01-29', 6);
47+
INSERT INTO t1 (f1, f2) VALUES (1111, 2222);
48+
49+
> SELECT * from t1;
50+
1 10 20 "2025-01-28"
51+
2 11 <null> <null>
52+
3 111 222 "2025-01-29"
53+
4 1111 2222 <null>
54+
55+
$ mysql-execute name=mysql
56+
ALTER TABLE t1 DROP COLUMN f2;
57+
58+
! SELECT * FROM t1;
59+
contains:incompatible schema change

test/mysql-cdc/mzcompose.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import threading
1616
from textwrap import dedent
1717

18-
from materialize import buildkite
18+
from materialize import MZ_ROOT, buildkite
1919
from materialize.mysql_util import (
2020
retrieve_invalid_ssl_context_for_mysql,
2121
retrieve_ssl_context_for_mysql,
@@ -99,7 +99,9 @@ def workflow_cdc(c: Composition, parser: WorkflowArgumentParser) -> None:
9999

100100
matching_files = []
101101
for filter in args.filter:
102-
matching_files.extend(glob.glob(filter, root_dir="test/mysql-cdc"))
102+
matching_files.extend(
103+
glob.glob(filter, root_dir=MZ_ROOT / "test" / "mysql-cdc")
104+
)
103105
sharded_files: list[str] = sorted(
104106
buildkite.shard_list(matching_files, lambda file: file)
105107
)
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Copyright Materialize, Inc. and contributors. All rights reserved.
2+
#
3+
# Use of this software is governed by the Business Source License
4+
# included in the LICENSE file at the root of this repository.
5+
#
6+
# As of the Change Date specified in that file, in accordance with
7+
# the Business Source License, use of this software will be governed
8+
# by the Apache License, Version 2.0.
9+
10+
#
11+
# identifiers that need proper quoting
12+
#
13+
14+
> CREATE SECRET mysqlpass AS '${arg.mysql-root-password}'
15+
> CREATE CONNECTION mysql_conn TO MYSQL (
16+
HOST mysql,
17+
USER root,
18+
PASSWORD SECRET mysqlpass
19+
)
20+
21+
$ mysql-connect name=mysql url=mysql://root@mysql password=${arg.mysql-root-password}
22+
23+
$ mysql-execute name=mysql
24+
DROP DATABASE IF EXISTS public;
25+
CREATE DATABASE public;
26+
USE public;
27+
CREATE TABLE `t``1` (`f``1` INT)
28+
INSERT INTO `t``1` VALUES(1);
29+
30+
> CREATE SOURCE mz_source FROM MYSQL CONNECTION mysql_conn;
31+
> CREATE TABLE t1 FROM SOURCE mz_source (REFERENCE public."t`1");
32+
33+
> SELECT * FROM t1;
34+
1

0 commit comments

Comments
 (0)