Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#
# PS-9638 - Enable CHECK TABLE EXTENDED to detect InnoDB LOB corruptions
#
call mtr.add_suppression("\\[ERROR\\] .* Invalid record! External LOB first page cannot be shared between two records");
call mtr.add_suppression("\\[ERROR\\] .* The external LOB first page is \\[page id: space=\\d+, page number=\\d+\\]");
call mtr.add_suppression("\\[ERROR\\] .* The first occurence of the external LOB first page is in record : page_no: \\d+ with heap_no: \\d+");
call mtr.add_suppression("\\[ERROR\\] .* The second occurence of the external LOB first page is in record: page_no: \\d+ with heap no: \\d+");
call mtr.add_suppression("\\[ERROR\\] .* Apparent corruption in space \\d+ page \\d+ index `PRIMARY`");
call mtr.add_suppression("\\[ERROR\\] .* In page \\d+ of index `PRIMARY` of table `test`.`t1`");
call mtr.add_suppression("\\[Warning\\] .* Cannot open table test/t1Please refer to .*innodb-troubleshooting.html for how to resolve the issue.");
CREATE TABLE t1 (id INT PRIMARY KEY, make_big CHAR(200), val LONGBLOB, INDEX idx1(val(50)));
INSERT INTO t1 (id,val) VALUES (1,REPEAT('a',1000000));
INSERT INTO t1 (id,val) VALUES (2,REPEAT('b',1000000));
INSERT INTO t1 (id,val) VALUES (3,REPEAT('c',1000000));
INSERT INTO t1 (id,val) VALUES (4,REPEAT('d',1000000));
INSERT INTO t1 (id,val) VALUES (5,REPEAT('e',1000000));
INSERT INTO t1 (id,val) VALUES (6,REPEAT('f',1000000));
SET DEBUG='+d, simulate_lob_corruption';
CHECK TABLE t1;
Table Op Msg_type Msg_text
test.t1 check status OK
CHECK TABLE t1 EXTENDED;
Table Op Msg_type Msg_text
test.t1 check Warning InnoDB: The B-tree of index PRIMARY is corrupted.
test.t1 check error Corrupt
SELECT * FROM t1;
ERROR 42S02: Table 'test.t1' doesn't exist
DROP TABLE t1;
case 2: compressed table
CREATE TABLE t1 (id INT PRIMARY KEY, make_big CHAR(200), val LONGBLOB, INDEX idx1(val(50))) ROW_FORMAT=COMPRESSED;
INSERT INTO t1 (id,val) VALUES (1,REPEAT('a',1000000));
INSERT INTO t1 (id,val) VALUES (2,REPEAT('b',1000000));
INSERT INTO t1 (id,val) VALUES (3,REPEAT('c',1000000));
INSERT INTO t1 (id,val) VALUES (4,REPEAT('d',1000000));
INSERT INTO t1 (id,val) VALUES (5,REPEAT('e',1000000));
INSERT INTO t1 (id,val) VALUES (6,REPEAT('f',1000000));
SET DEBUG='+d, simulate_lob_corruption';
CHECK TABLE t1;
Table Op Msg_type Msg_text
test.t1 check status OK
CHECK TABLE t1 EXTENDED;
Table Op Msg_type Msg_text
test.t1 check Warning InnoDB: The B-tree of index PRIMARY is corrupted.
test.t1 check error Corrupt
SELECT * FROM t1;
ERROR 42S02: Table 'test.t1' doesn't exist
DROP TABLE t1;
51 changes: 51 additions & 0 deletions mysql-test/suite/innodb/t/percona_extended_check_table_debug.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
--source include/have_debug.inc

--echo #
--echo # PS-9638 - Enable CHECK TABLE EXTENDED to detect InnoDB LOB corruptions
--echo #
call mtr.add_suppression("\\[ERROR\\] .* Invalid record! External LOB first page cannot be shared between two records");
call mtr.add_suppression("\\[ERROR\\] .* The external LOB first page is \\[page id: space=\\d+, page number=\\d+\\]");
call mtr.add_suppression("\\[ERROR\\] .* The first occurence of the external LOB first page is in record : page_no: \\d+ with heap_no: \\d+");
call mtr.add_suppression("\\[ERROR\\] .* The second occurence of the external LOB first page is in record: page_no: \\d+ with heap no: \\d+");
call mtr.add_suppression("\\[ERROR\\] .* Apparent corruption in space \\d+ page \\d+ index `PRIMARY`");
call mtr.add_suppression("\\[ERROR\\] .* In page \\d+ of index `PRIMARY` of table `test`.`t1`");
call mtr.add_suppression("\\[Warning\\] .* Cannot open table test/t1Please refer to .*innodb-troubleshooting.html for how to resolve the issue.");

CREATE TABLE t1 (id INT PRIMARY KEY, make_big CHAR(200), val LONGBLOB, INDEX idx1(val(50)));

INSERT INTO t1 (id,val) VALUES (1,REPEAT('a',1000000));
INSERT INTO t1 (id,val) VALUES (2,REPEAT('b',1000000));
INSERT INTO t1 (id,val) VALUES (3,REPEAT('c',1000000));
INSERT INTO t1 (id,val) VALUES (4,REPEAT('d',1000000));
INSERT INTO t1 (id,val) VALUES (5,REPEAT('e',1000000));
INSERT INTO t1 (id,val) VALUES (6,REPEAT('f',1000000));

SET DEBUG='+d, simulate_lob_corruption';
CHECK TABLE t1;

CHECK TABLE t1 EXTENDED;

--error ER_NO_SUCH_TABLE
SELECT * FROM t1;

DROP TABLE t1;

--echo case 2: compressed table
CREATE TABLE t1 (id INT PRIMARY KEY, make_big CHAR(200), val LONGBLOB, INDEX idx1(val(50))) ROW_FORMAT=COMPRESSED;

INSERT INTO t1 (id,val) VALUES (1,REPEAT('a',1000000));
INSERT INTO t1 (id,val) VALUES (2,REPEAT('b',1000000));
INSERT INTO t1 (id,val) VALUES (3,REPEAT('c',1000000));
INSERT INTO t1 (id,val) VALUES (4,REPEAT('d',1000000));
INSERT INTO t1 (id,val) VALUES (5,REPEAT('e',1000000));
INSERT INTO t1 (id,val) VALUES (6,REPEAT('f',1000000));

SET DEBUG='+d, simulate_lob_corruption';
CHECK TABLE t1;

CHECK TABLE t1 EXTENDED;

--error ER_NO_SUCH_TABLE
SELECT * FROM t1;

DROP TABLE t1;
10 changes: 6 additions & 4 deletions storage/innobase/fsp/fsp0fsp.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3627,11 +3627,13 @@ bool fseg_page_is_free(fseg_header_t *seg_header, /*!< in: segment header */

const page_size_t page_size(space->flags);

seg_inode = fseg_inode_get(seg_header, space_id, page_size, &mtr);
if (seg_header != nullptr) {
seg_inode = fseg_inode_get(seg_header, space_id, page_size, &mtr);

ut_a(seg_inode);
ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
ut_a(seg_inode);
ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ bugprone-implicit-widening-of-multiplication-result ⚠️
performing an implicit widening conversion to type unsigned long of a multiplication performed in type unsigned int

}

descr = xdes_get_descriptor(space_id, page, page_size, &mtr);
ut_a(descr);
Expand Down
26 changes: 25 additions & 1 deletion storage/innobase/handler/ha_innodb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ this program; if not, write to the Free Software Foundation, Inc.,
#include "os0enc.h"
#include "os0file.h"

#include <scope_guard.h>
#include <mutex>
#include <sstream>
#include <string>
Expand Down Expand Up @@ -19081,11 +19082,25 @@ int ha_innobase::check(THD *thd, /*!< in: user thread handle */
continue;
}

/* true if user uses CHECK TABLE t1 EXTENDED */
const bool is_extended = check_opt->flags & T_EXTEND;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ readability-implicit-bool-conversion ⚠️
implicit conversion unsigned long -> bool

Suggested change
const bool is_extended = check_opt->flags & T_EXTEND;
const bool is_extended = (check_opt->flags & T_EXTEND) != 0u;


if (!(check_opt->flags & T_QUICK) && !index->is_corrupted()) {
/* Enlarge the fatal lock wait timeout during
CHECK TABLE. */
srv_fatal_semaphore_wait_extend.fetch_add(1);

if (is_extended && index->is_clustered()) {
// Setup the thread local map for clustered index only
thread_local_blob_map = new blob_ref_map();
}

auto blob_ref_clear_guard = create_scope_guard([]() {
if (!thread_local_blob_map) return;
delete thread_local_blob_map;
thread_local_blob_map = nullptr;
});

bool valid = btr_validate_index(index, m_prebuilt->trx, false);

/* Restore the fatal lock wait timeout after
Expand All @@ -19099,7 +19114,16 @@ int ha_innobase::check(THD *thd, /*!< in: user thread handle */
"InnoDB: The B-tree of"
" index %s is corrupted.",
index->name());
continue;

// with extended mode, if clustered index is corrupted, it is marked
// as corrupted. We skip checking other indexes. The table is not
// repairable and user has to drop it
if (is_extended && index->is_clustered()) {
dict_set_corrupted(index);
break;
} else {
continue;
}
Comment on lines +19124 to +19126

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ llvm-else-after-return ⚠️
do not use else after break

Suggested change
} else {
continue;
}
} continue;

}
}

Expand Down
18 changes: 18 additions & 0 deletions storage/innobase/include/page0page.h
Original file line number Diff line number Diff line change
Expand Up @@ -807,6 +807,24 @@ bool page_is_spatial_non_leaf(const rec_t *rec, dict_index_t *index);

page_t *page_create_low(buf_block_t *block, ulint comp, page_type_t page_type);

/** A blob map to track the first page no of external LOB and its parent record
which is the <page_no, heap_no>. This is used to find duplicate external LOB
pages that is shared between two records. This can happen only on corruption
(cause unknown yet). CHECK TABLE t1 EXTENDED will use this map to report
corruption and mark the table as corrupted */
using blob_ref_map = std::unordered_map<page_no_t, std::pair<page_no_t, ulint>>;
extern thread_local blob_ref_map *thread_local_blob_map;

/** Validate that the external LOB's first page is not shared between records of
a clustered index
@param[in] rec physical record
@param[in] index index of the table
@param[in] offsets the record offset array
@return true If OK else false if external LOB is found to be shared between two
records, ie false on failure */
bool page_rec_blob_validate(const rec_t *rec, const dict_index_t *index,
const ulint *offsets);

#include "page0page.ic"

#endif
130 changes: 130 additions & 0 deletions storage/innobase/page/page0page.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,14 @@ this program; if not, write to the Free Software Foundation, Inc.,
#include "lock0lock.h"
#include "srv0srv.h"
#endif /* !UNIV_HOTBACKUP */
#include "lob0lob.h"

/** A blob map to track the first page no of external LOB and its parent record
which is the <page_no, heap_no>. This is used to find duplicate external LOB
pages that is shared between two records. This can happen only on corruption
(cause unknown yet). CHECK TABLE t1 EXTENDED will use this map to report
corruption and mark the table as corrupted */
thread_local blob_ref_map *thread_local_blob_map = nullptr;

/* THE INDEX PAGE
==============
Expand Down Expand Up @@ -1721,6 +1729,124 @@ bool page_rec_validate(
return true;
}

/** Validate that the external LOB's first page is not shared between records of
a clustered index
@param[in] rec physical record
@param[in] index index of the table
@param[in] offsets the record offset array
@return true If OK else false if external LOB is found to be shared between two
records, ie false on failure */
bool page_rec_blob_validate(const rec_t *rec, const dict_index_t *index,
const ulint *offsets) {
// this means reference check is not enabled. Enabled only via
// CHECK TABLE path
if (thread_local_blob_map == nullptr) {
return true;
}

// if index is not PRIMARY, return true
if (!index->is_clustered()) {
return true;
}

// if page-level is not zero, return true because blob exists only on leaf
// level
const page_t *page = page_align(rec);
if (!page_is_leaf(page)) {
return true;
}

// if rec is not user record, blobs dont exist, return true
if (!page_rec_is_user_rec(rec)) {
return true;
}

// if rec doesn't have any external LOB, return true
if (!rec_offs_any_extern(offsets)) {
return true;
}

// if rec is deleted marked, return true, we cannot validate the blob. the
// blob pages in the deleted marked records could be freed
if (rec_get_deleted_flag(rec, rec_offs_comp(offsets))) {
return true;
}

// if rec is not the owner of the blob, we cannot validate if blob page state
// now validate that the blob first page is not marked as free from page
// bitmap

ulint n_fields = rec_offs_n_fields(offsets);

for (ulint i = 0; i < n_fields; i++) {
if (rec_offs_nth_extern(index, offsets, i)) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ readability-implicit-bool-conversion ⚠️
implicit conversion ulint (aka unsigned long) -> bool

Suggested change
if (rec_offs_nth_extern(index, offsets, i)) {
if (rec_offs_nth_extern(index, offsets, i) != 0u) {

// We do const_cast to remove constness because lob::ref_t doesn't have a
// variant that takes const record pointer
byte *field_ref = const_cast<byte *>(

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ cppcoreguidelines-pro-type-const-cast ⚠️
do not use const_cast

lob::btr_rec_get_field_ref(index, rec, offsets, i));

lob::ref_t ref(field_ref);
if (!ref.is_owner() || ref.is_null() || ref.is_null_relaxed() ||
ref.is_being_modified()) {
continue;
}

if (ref.length() == 0) {
// LOB purged
continue;
}

space_id_t blob_space_id = ref.space_id();
page_no_t blob_page_no = ref.page_no();

page_id_t blob_page_id(blob_space_id, blob_page_no);
bool is_free = fseg_page_is_free(nullptr, blob_space_id, blob_page_no);
if (is_free) {
// This should not be possible. A record that owns the BLOB shouldn't
// have the first page marked as free in page bitmap
ut_ad(0);
ib::error() << "Invalid record. The record's blob reference is marked"
<< " as free although the record owns it "
<< " page_no: " << page_get_page_no(page)
<< " heap_no: " << page_rec_get_heap_no(rec);
ib::error() << "BLOB reference that is marked free " << blob_page_id;

return false;
}

DBUG_EXECUTE_IF(
"simulate_lob_corruption",
// introduce corruption after 5 external LOB entries
if (thread_local_blob_map->size() >= 5) {
// we introduce a fake entry in the map
(*thread_local_blob_map)[blob_page_no] = std::make_pair(
page_get_page_no(page) - 1, page_rec_get_heap_no(rec) - 1);
});

auto it = thread_local_blob_map->find(blob_page_no);
if (it == thread_local_blob_map->end()) {
(*thread_local_blob_map)[blob_page_no] =
std::make_pair(page_get_page_no(page), page_rec_get_heap_no(rec));
} else {
auto val = it->second;
ib::error() << "Invalid record! External LOB first page cannot be "
"shared between "
"two records";
ib::error() << "The external LOB first page is " << blob_page_id;
ib::error() << "The first occurence of the external LOB first page is "
"in record : page_no: "
<< val.first << " with heap_no: " << val.second;
ib::error() << "The second occurence of the external LOB first page is "
"in record: page_no: "
<< page_get_page_no(page)
<< " with heap no: " << page_rec_get_heap_no(rec);
return false;
}
}
}
return true;
}

#ifndef UNIV_HOTBACKUP
#ifdef UNIV_DEBUG
/** Checks that the first directory slot points to the infimum record and
Expand Down Expand Up @@ -2235,6 +2361,10 @@ bool page_validate(const page_t *page, dict_index_t *index,
goto func_exit;
}

if (!page_rec_blob_validate(const_cast<byte *>(rec), index, offsets)) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ cppcoreguidelines-pro-type-const-cast ⚠️
do not use const_cast

goto func_exit;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ cppcoreguidelines-avoid-goto ⚠️
avoid using goto for flow control

}

DBUG_EXECUTE_IF(
"check_table_set_wrong_min_bit",
if (page_rec_is_user_rec(rec) &&
Expand Down
Loading