Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions mysql-test/suite/rocksdb/r/recovery_after_alter_crash.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Include helper scripts to check if RocksDB and debug sync features are available
# 1. Disable query logging temporarily to suppress expected warnings during test
# 2. Create a test table with MyRocks engine, a primary key, and one secondary index
CREATE TABLE t (
i INT PRIMARY KEY,
c1 INT,
c2 INT,
KEY k1 (c1)
) ENGINE=ROCKSDB;
# 3. Insert sample data into the table
INSERT INTO t VALUES (1, 10, 100), (2, 20, 200), (3, 30, 300);
# 4. Set debug sync to simulate crash after RocksDB commits an ALTER operation
SET SESSION debug='+d,crash_commit_after_prepare';
# 5. Expect the server to crash when committing the ALTER statement
ALTER TABLE t ADD INDEX k2 (c2), ALGORITHM=INPLACE;
ERROR HY000: Lost connection to MySQL server during query
# 6. Wait until the server is fully disconnected due to the crash
# 7. Restart the MySQL server; MyRocks should automatically rebuild the corrupted table definition
# restart
# 8. Verify that the data in the table is intact after reconstruction
SELECT * FROM t ORDER BY i;
i c1 c2
1 10 100
2 20 200
3 30 300
# 9. Verify that the table structure is consistent and not corrupted
ANALYZE TABLE t;
Table Op Msg_type Msg_text
test.t analyze status OK
# 10. Check that the original index (k1) exists and can be used in queries
EXPLAIN SELECT * FROM t WHERE c1 = 200;
id select_type table partitions type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t NULL ref k1 k1 5 const 1 100.00 NULL
Warnings:
Note 1003 /* select#1 */ select `test`.`t`.`i` AS `i`,`test`.`t`.`c1` AS `c1`,`test`.`t`.`c2` AS `c2` from `test`.`t` where (`test`.`t`.`c1` = 200)
# 11. Assert that RocksDB successfully reconstructed the corrupted table
include/assert_grep.inc [RocksDB table reconstruction finished]
# 12. Attempt to add a new index k2 on column c2 using the INPLACE algorithm.
# This command causes a server crash when `rocksdb_validate_tables=1`, which is the default setting
ALTER TABLE t ADD INDEX k2 (c2), ALGORITHM=INPLACE;
# 13. Clean up the test table
DROP TABLE t;
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--loose-rocksdb_validate_tables=2
56 changes: 56 additions & 0 deletions mysql-test/suite/rocksdb/t/recovery_after_alter_crash.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
--echo # Include helper scripts to check if RocksDB and debug sync features are available
--source include/have_rocksdb.inc
--source include/have_debug_sync.inc

--echo # 1. Disable query logging temporarily to suppress expected warnings during test
--disable_query_log
CALL mtr.add_suppression("Plugin rocksdb reported: 'Table '.*' definition differs between MyRocks .* ADD INDEX'");
--enable_query_log

--echo # 2. Create a test table with MyRocks engine, a primary key, and one secondary index
CREATE TABLE t (
i INT PRIMARY KEY,
c1 INT,
c2 INT,
KEY k1 (c1)
) ENGINE=ROCKSDB;

--echo # 3. Insert sample data into the table
INSERT INTO t VALUES (1, 10, 100), (2, 20, 200), (3, 30, 300);

--echo # 4. Set debug sync to simulate crash after RocksDB commits an ALTER operation
SET SESSION debug='+d,crash_commit_after_prepare';

--echo # 5. Expect the server to crash when committing the ALTER statement
--source include/expect_crash.inc
--error CR_SERVER_LOST
ALTER TABLE t ADD INDEX k2 (c2), ALGORITHM=INPLACE;

--echo # 6. Wait until the server is fully disconnected due to the crash
--source include/wait_until_disconnected.inc

--echo # 7. Restart the MySQL server; MyRocks should automatically rebuild the corrupted table definition
--source include/start_mysqld.inc

--echo # 8. Verify that the data in the table is intact after reconstruction
SELECT * FROM t ORDER BY i;

--echo # 9. Verify that the table structure is consistent and not corrupted
ANALYZE TABLE t;

--echo # 10. Check that the original index (k1) exists and can be used in queries
EXPLAIN SELECT * FROM t WHERE c1 = 200;

--echo # 11. Assert that RocksDB successfully reconstructed the corrupted table
--let $assert_text = RocksDB table reconstruction finished
--let $assert_file = $MYSQLTEST_VARDIR/log/mysqld.1.err
--let $assert_select = Reconstruction of the corrupted table .* has finished.
--let $assert_count = 1
--source include/assert_grep.inc

--echo # 12. Attempt to add a new index k2 on column c2 using the INPLACE algorithm.
--echo # This command causes a server crash when `rocksdb_validate_tables=1`, which is the default setting
ALTER TABLE t ADD INDEX k2 (c2), ALGORITHM=INPLACE;

--echo # 13. Clean up the test table
DROP TABLE t;
5 changes: 5 additions & 0 deletions storage/rocksdb/ChangeLog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# MyRocks Changelog

## MyRocks 9.3.1-4
**This version was shipped with Percona Server 8.0.45-36 and 8.4.8-8.**
* PS-10287 – Add automatic table definition recovery after crash on ALTER
· [Jira](https://perconadev.atlassian.net/browse/PS-10287)

## MyRocks 9.3.1-3
**This version was shipped with Percona Server 8.0.44-35 and 8.4.7-7.**
* PS-9680 – Fix potential data corruption in MyRocks after RocksDB 7.10.0 changes
Expand Down
173 changes: 157 additions & 16 deletions storage/rocksdb/ha_rocksdb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8069,37 +8069,71 @@ int ha_rocksdb::open(const char *const name, int mode, uint test_if_locked,
uint n_keys = table->s->keys + (has_hidden_pk(*table) ? 1 : 0);

if (m_tbl_def->m_key_count != n_keys) {
const char *alter_op;
if (m_tbl_def->m_key_count > n_keys)
alter_op = "ALTER TABLE ... ADD INDEX";
else
alter_op = "ALTER TABLE ... DROP INDEX";

LogPluginErrMsg(ERROR_LEVEL, 0,
"Table '%s' definition mismatch between MyRocks "
"(m_key_count=%d) and data dictionary (n_keys=%d)",
"Table '%s' definition differs between MyRocks "
"(m_key_count=%d) and the data dictionary (n_keys=%d). "
"This was likely caused by a crash during '%s'.",
m_tbl_def->full_tablename().c_str(), m_tbl_def->m_key_count,
n_keys);
n_keys, alter_op);

for (uint i = 0; i < table->s->keys; i++) {
const char *key_name;
assert((*table).key_info != nullptr);
assert((*table).key_info[i].name != nullptr);
key_name = (*table).key_info[i].name;

LogPluginErrMsg(
ERROR_LEVEL, 0,
"ha_rocksdb::open TABLE table_name=%s i=%d/%d key_name=%s",
table->s->table_name.str, i, table->s->keys, key_name);
LogPluginErrMsg(INFORMATION_LEVEL, 0,
"TABLE table_name=%s i=%d/%d key_name=%s",
table->s->table_name.str, i, table->s->keys, key_name);
}

uint pk = table->s->primary_key;
LogPluginErrMsg(
ERROR_LEVEL, 0, "ha_rocksdb::open TABLE PK key_name=%s",
INFORMATION_LEVEL, 0, "TABLE PK key_name=%s",
pk == MAX_INDEXES ? HIDDEN_PK_NAME : (*table).key_info[pk].name);

for (uint i = 0; i < m_tbl_def->m_key_count; i++) {
const char *rdb_name = m_tbl_def->m_key_descr_arr[i]->m_name.c_str();
LogPluginErrMsg(
ERROR_LEVEL, 0,
"ha_rocksdb::open KEY_descr_arr table_name=%s i=%d/%d key_name=%s",
m_tbl_def->full_tablename().c_str(), i, m_tbl_def->m_key_count,
rdb_name);
LogPluginErrMsg(INFORMATION_LEVEL, 0,
"KEY_descr_arr table_name=%s i=%d/%d key_name=%s",
m_tbl_def->full_tablename().c_str(), i,
m_tbl_def->m_key_count, rdb_name);
}

#if defined(ROCKSDB_INCLUDE_VALIDATE_TABLES) && ROCKSDB_INCLUDE_VALIDATE_TABLES
if (m_tbl_def->m_key_count > n_keys && rocksdb_validate_tables > 0) {
if (rocksdb_validate_tables == 1) {
LogPluginErrMsg(
ERROR_LEVEL, 0,
"Start the server with \"rocksdb_validate_tables=2\" to attempt to "
"fix the problem.");
} else {
const std::string tbl_name = m_tbl_def->full_tablename();
LogPluginErrMsg(INFORMATION_LEVEL, 0,
"Starting reconstruction of the corrupted table %s.",
m_tbl_def->full_tablename().c_str());

if (rebuild_table_def_from_table(table) != HA_EXIT_SUCCESS) {
LogPluginErrMsg(ERROR_LEVEL, 0,
"Failed to rebuild table definition for table '%s'",
tbl_name.c_str());
my_error(ER_KEY_CREATE_DURING_ALTER, MYF(0));
return HA_EXIT_FAILURE;
}

LogPluginErrMsg(
INFORMATION_LEVEL, 0,
"Reconstruction of the corrupted table %s has finished.",
m_tbl_def->full_tablename().c_str());
}
}
#endif
}

// close() above has already called free_key_buffers(). No need to do it here.
Expand Down Expand Up @@ -8685,8 +8719,6 @@ std::unordered_map<std::string, uint> ha_rocksdb::get_old_key_positions(
const TABLE &old_table_arg, const Rdb_tbl_def &old_tbl_def_arg) const {
DBUG_ENTER_FUNC();

std::shared_ptr<Rdb_key_def> *const old_key_descr =
old_tbl_def_arg.m_key_descr_arr;
std::unordered_map<std::string, uint> old_key_pos;
std::unordered_map<std::string, uint> new_key_pos;
uint i;
Expand All @@ -8697,7 +8729,7 @@ std::unordered_map<std::string, uint> ha_rocksdb::get_old_key_positions(

for (i = 0; i < old_tbl_def_arg.m_key_count; i++) {
if (is_hidden_pk(i, old_table_arg, old_tbl_def_arg)) {
old_key_pos[old_key_descr[i]->m_name] = i;
old_key_pos[HIDDEN_PK_NAME] = i;
continue;
}

Expand All @@ -8710,6 +8742,9 @@ std::unordered_map<std::string, uint> ha_rocksdb::get_old_key_positions(
CREATE TABLE t1 (a INT, b INT, KEY ka(a)) ENGINE=RocksDB;
ALTER TABLE t1 DROP INDEX ka, ADD INDEX ka(b), ALGORITHM=INPLACE;
*/
if (i >= old_table_arg.s->keys) {
continue;
}
const KEY *const old_key = &old_table_arg.key_info[i];
const auto &it = new_key_pos.find(old_key->name);
if (it == new_key_pos.end()) {
Expand Down Expand Up @@ -13019,6 +13054,40 @@ int ha_rocksdb::delete_table(Rdb_tbl_def *const tbl) {
DBUG_RETURN(HA_EXIT_SUCCESS);
}

// A simplified version of ha_rocksdb::delete_table
// that leaves the table's data intact
int ha_rocksdb::delete_table_def(Rdb_tbl_def *const tbl) {
DBUG_ENTER_FUNC();

assert(tbl != nullptr);
assert(m_tbl_def == nullptr || m_tbl_def == tbl);
uint table_default_cf_id = tbl->m_key_descr_arr[0]->get_gl_index_id().cf_id;
auto local_dict_manager =
dict_manager.get_dict_manager_selector_non_const(table_default_cf_id);
const std::unique_ptr<rocksdb::WriteBatch> wb = local_dict_manager->begin();
rocksdb::WriteBatch *const batch = wb.get();

{
/*
Remove the table entry in data dictionary (this will also remove it from
the persistent data dictionary).
*/
ddl_manager.remove(tbl, batch, table_default_cf_id, true);

int err = local_dict_manager->commit(batch);
if (err) {
DBUG_RETURN(err);
}
}

// avoid dangling pointer
m_tbl_def = nullptr;
m_iterator = nullptr;
m_pk_iterator = nullptr;
inited = NONE;
DBUG_RETURN(HA_EXIT_SUCCESS);
}

/*
Note: the following function is called when the table is not open. That is,
this->table==nullptr, pk_key_descr==nullptr, etc.
Expand Down Expand Up @@ -14402,6 +14471,78 @@ bool ha_rocksdb::prepare_inplace_alter_table(
DBUG_RETURN(HA_EXIT_SUCCESS);
}

// Reconstructs a corrupted m_tbl_def (Rdb_tbl_def) by removing the
// partially added index left after an incomplete "ALTER ... ADD INDEX".
// Derived from ha_rocksdb::prepare_inplace_alter_table.
bool ha_rocksdb::rebuild_table_def_from_table(TABLE *altered_table) {
DBUG_ENTER_FUNC();
assert(altered_table != nullptr);

std::unique_ptr<Rdb_tbl_def> new_tdef;
std::unique_ptr<std::shared_ptr<Rdb_key_def>[]> new_key_descr;

uint new_n_keys =
altered_table->s->keys + (has_hidden_pk(*altered_table) ? 1 : 0);
const std::string tbl_name = m_tbl_def->full_tablename();

try {
// Allocate the array and assign to the unique_ptr.
new_key_descr.reset(new std::shared_ptr<Rdb_key_def>[new_n_keys]);
new_tdef = std::make_unique<Rdb_tbl_def>(tbl_name);
} catch (const std::bad_alloc &) {
my_error(ER_OUTOFMEMORY, MYF(0));
DBUG_RETURN(HA_EXIT_FAILURE);
}

new_tdef->m_key_descr_arr = new_key_descr.get();
new_tdef->m_key_count = new_n_keys;
new_tdef->m_pk_index = altered_table->s->primary_key;
new_tdef->m_auto_incr_val =
m_tbl_def->m_auto_incr_val.load(std::memory_order_relaxed);
new_tdef->m_hidden_pk_val =
m_tbl_def->m_hidden_pk_val.load(std::memory_order_relaxed);

if (create_key_defs(*altered_table, *new_tdef,
"" /* actual_user_table_name */, false /* is_dd_tbl */,
table, m_tbl_def)) {
goto error;
}

// delete old table_def but keep data
if (delete_table_def(m_tbl_def) != HA_EXIT_SUCCESS) {
LogPluginErrMsg(ERROR_LEVEL, 0, "Failure when trying to drop table %s",
tbl_name.c_str());
goto error;
}

{
auto local_dict_manager =
dict_manager.get_dict_manager_selector_non_const(false);
const std::unique_ptr<rocksdb::WriteBatch> wb = local_dict_manager->begin();
rocksdb::WriteBatch *const batch = wb.get();

std::lock_guard<Rdb_dict_manager> dm_lock(*local_dict_manager);

int err = ddl_manager.put_and_write(new_tdef.get(), batch);
if (err != HA_EXIT_SUCCESS) goto error;

err = local_dict_manager->commit(batch);
if (err != HA_EXIT_SUCCESS) goto error;
}

m_tbl_def = new_tdef.release(); // transfer ownership
new_key_descr.release(); // release ownership as above we used
// new_tdef->m_key_descr_arr = new_key_descr.get();
m_key_descr_arr = m_tbl_def->m_key_descr_arr;
m_pk_descr = m_key_descr_arr[pk_index(*altered_table, *m_tbl_def)];

DBUG_RETURN(HA_EXIT_SUCCESS);

error:
my_error(ER_KEY_CREATE_DURING_ALTER, MYF(0));
DBUG_RETURN(HA_EXIT_FAILURE);
}

/**
Alter the table structure in-place with operations specified using
HA_ALTER_FLAGS and Alter_inplace_info. The level of concurrency allowed
Expand Down
2 changes: 2 additions & 0 deletions storage/rocksdb/ha_rocksdb.h
Original file line number Diff line number Diff line change
Expand Up @@ -861,6 +861,7 @@ class ha_rocksdb : public my_core::handler, public blob_buffer {
key_range *const max_key) override
MY_ATTRIBUTE((__warn_unused_result__));

int delete_table_def(Rdb_tbl_def *const tbl);
int delete_table(Rdb_tbl_def *const tbl);
int delete_table(const char *const from, const dd::Table *table_def) override
MY_ATTRIBUTE((__warn_unused_result__));
Expand Down Expand Up @@ -900,6 +901,7 @@ class ha_rocksdb : public my_core::handler, public blob_buffer {
TABLE *altered_table,
my_core::Alter_inplace_info *ha_alter_info) override;

bool rebuild_table_def_from_table(TABLE *altered_table);
bool prepare_inplace_alter_table(TABLE *altered_table,
my_core::Alter_inplace_info *ha_alter_info,
const dd::Table *old_table_def,
Expand Down