Skip to content

Commit e22b2c5

Browse files
committed
Call populate_modsnap_state before grabbing table locks
Signed-off-by: mdouglas47 <[email protected]>
1 parent 340fa8e commit e22b2c5

File tree

9 files changed

+83
-25
lines changed

9 files changed

+83
-25
lines changed

db/db_tunables.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
/* Separator for composite tunable components. */
4242
#define COMPOSITE_TUNABLE_SEP '.'
4343

44+
extern int gbl_sleep_5s_after_caching_table_versions;
4445
extern int gbl_transactional_drop_plus_rename;
4546
extern int gbl_bulk_import_validation_werror;
4647
extern int gbl_debug_sleep_during_bulk_import;

db/db_tunables.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@
2424
at multiple places.
2525
*/
2626

27+
REGISTER_TUNABLE(
28+
"sleep_5s_after_caching_table_versions",
29+
"Sleep for 5 seconds between caching table versions and getting schema lock in get_prepared_stmt (Default OFF)",
30+
TUNABLE_BOOLEAN, &gbl_sleep_5s_after_caching_table_versions, 0, NULL, NULL, NULL, NULL);
2731
REGISTER_TUNABLE("abort_during_downgrade_if_scs_dont_stop", "Abort if scs don't stop within 60 seconds"
2832
"after starting a downgrade (default OFF)", TUNABLE_BOOLEAN,
2933
&gbl_abort_during_downgrade_if_scs_dont_stop, 0, NULL, NULL,

db/sqlglue.c

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4993,15 +4993,6 @@ int sqlite3BtreeBeginTrans(Vdbe *vdbe, Btree *pBt, int wrflag, int *pSchemaVersi
49934993
}
49944994
#endif
49954995

4996-
struct dbtable *db =
4997-
&thedb->static_table;
4998-
/* Latch last commit LSN */
4999-
if (clnt->dbtran.mode == TRANLEVEL_MODSNAP && !clnt->modsnap_in_progress && (db->handle != NULL) &&
5000-
(populate_modsnap_state(clnt) != 0)) {
5001-
rc = SQLITE_INTERNAL;
5002-
goto done;
5003-
}
5004-
50054996
/* already have a transaction, keep using it until it commits/aborts */
50064997
if (clnt->intrans || clnt->in_sqlite_init ||
50074998
(clnt->ctrl_sqlengine != SQLENG_STRT_STATE &&

db/sqlinterfaces.c

Lines changed: 33 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
#include <sbuf2.h>
4242
#include <bdb_api.h>
4343
#include <bdb_int.h>
44+
#include <bdbglue.h>
4445

4546
#include "comdb2.h"
4647
#include "types.h"
@@ -183,6 +184,7 @@ int gbl_dump_history_on_too_many_verify_errors = 0;
183184
int gbl_thdpool_queue_only = 0;
184185
int gbl_random_sql_work_delayed = 0;
185186
int gbl_random_sql_work_rejected = 0;
187+
int gbl_sleep_5s_after_caching_table_versions = 0;
186188

187189
int gbl_eventlog_fullhintsql = 1;
188190

@@ -1730,41 +1732,51 @@ int cache_table_versions(struct sqlclntstate *clnt)
17301732
return 0;
17311733
}
17321734

1733-
int populate_modsnap_state(struct sqlclntstate *clnt)
1735+
static int populate_modsnap_state(struct sqlclntstate *clnt)
17341736
{
1737+
assert_no_schema_lk();
1738+
1739+
int rc = 0;
17351740
struct dbtable *db = &thedb->static_table;
17361741
assert(db->handle);
17371742

1743+
bdb_state_type *const bdb_state = thedb->bdb_env;
1744+
BDB_READLOCK(__func__);
1745+
rdlock_schema_lk();
1746+
17381747
if (clnt->is_hasql_retry) {
17391748
get_snapshot(clnt, (int *)&clnt->modsnap_start_lsn_file, (int *)&clnt->modsnap_start_lsn_offset);
17401749
}
17411750

1742-
rdlock_schema_lk();
1743-
if (bdb_get_modsnap_start_state(db->handle, thedb->bdb_attr, clnt->is_hasql_retry, clnt->snapshot,
1744-
&clnt->modsnap_start_lsn_file, &clnt->modsnap_start_lsn_offset,
1745-
&clnt->last_checkpoint_lsn_file, &clnt->last_checkpoint_lsn_offset)) {
1746-
unlock_schema_lk();
1751+
rc = bdb_get_modsnap_start_state(db->handle, thedb->bdb_attr, clnt->is_hasql_retry, clnt->snapshot,
1752+
&clnt->modsnap_start_lsn_file, &clnt->modsnap_start_lsn_offset,
1753+
&clnt->last_checkpoint_lsn_file, &clnt->last_checkpoint_lsn_offset);
1754+
if (rc) {
17471755
logmsg(LOGMSG_ERROR, "%s: Failed to get modsnap txn start state\n", __func__);
1748-
return 1;
1756+
goto done;
17491757
}
17501758

17511759
// TODO: Refactor cache_table_versions to not take in a clnt and then call it within bdb_get_modsnap_start_state
17521760
// so that we're holding the schema lock over less code.
1753-
if (cache_table_versions(clnt)) {
1754-
unlock_schema_lk();
1761+
rc = cache_table_versions(clnt);
1762+
if (rc) {
17551763
logmsg(LOGMSG_ERROR, "%s: Failed to cache table versions\n", __func__);
1756-
return 1;
1764+
goto done;
17571765
}
1758-
unlock_schema_lk();
17591766

1760-
if (bdb_register_modsnap(db->handle, clnt->modsnap_start_lsn_file, clnt->modsnap_start_lsn_offset,
1761-
clnt->last_checkpoint_lsn_file, clnt->last_checkpoint_lsn_offset,
1762-
&clnt->modsnap_registration)) {
1767+
rc = bdb_register_modsnap(db->handle, clnt->modsnap_start_lsn_file, clnt->modsnap_start_lsn_offset,
1768+
clnt->last_checkpoint_lsn_file, clnt->last_checkpoint_lsn_offset,
1769+
&clnt->modsnap_registration);
1770+
if (rc) {
17631771
logmsg(LOGMSG_ERROR, "%s: Failed to register modsnap txn\n", __func__);
1764-
return 1;
1772+
goto done;
17651773
}
17661774

17671775
clnt->modsnap_in_progress = 1;
1776+
1777+
done:
1778+
unlock_schema_lk();
1779+
BDB_RELLOCK();
17681780
return 0;
17691781
}
17701782

@@ -3355,6 +3367,12 @@ int get_prepared_stmt(struct sqlthdstate *thd, struct sqlclntstate *clnt,
33553367
/* sc-thread holds schema-lk in verify_dbstore */
33563368
if (!clnt->verify_dbstore) {
33573369
curtran_assert_nolocks();
3370+
if (clnt->dbtran.mode == TRANLEVEL_MODSNAP && !clnt->modsnap_in_progress && populate_modsnap_state(clnt)) {
3371+
return SQLITE_INTERNAL;
3372+
}
3373+
if (gbl_sleep_5s_after_caching_table_versions) {
3374+
sleep(5);
3375+
}
33583376
rdlock_schema_lk();
33593377
}
33603378
int rc = get_prepared_stmt_int(thd, clnt, rec, err,

db/sqlinterfaces.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,6 @@ struct param_data *clnt_find_param(struct sqlclntstate *clnt, const char *name,
8787
int index);
8888
int bind_parameters(struct reqlogger *logger, sqlite3_stmt *stmt, struct sqlclntstate *clnt, char **err,
8989
int sample_queries);
90-
int populate_modsnap_state(struct sqlclntstate *clnt);
9190
void clear_modsnap_state(struct sqlclntstate *clnt);
9291
int cache_table_versions(struct sqlclntstate *clnt);
9392

tests/si_sc_race.test/Makefile

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
ifeq ($(TESTSROOTDIR),)
2+
include ../testcase.mk
3+
else
4+
include $(TESTSROOTDIR)/testcase.mk
5+
endif
6+
ifeq ($(TEST_TIMEOUT),)
7+
export TEST_TIMEOUT=1m
8+
endif

tests/si_sc_race.test/lrl.options

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
enable_snapshot_isolation
2+
sql_tranlevel_default snapshot

tests/si_sc_race.test/runit

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#!/usr/bin/env bash
2+
bash -n "$0" | exit 1
3+
4+
dbname=$1
5+
6+
send_to_cluster() {
7+
local cmd="$1"
8+
if [ -n "${CLUSTER}" ]; then
9+
for node in ${CLUSTER}; do
10+
cdb2sql ${CDB2_OPTIONS} ${dbname} default --host "${node}" "$cmd"
11+
done
12+
else
13+
cdb2sql ${CDB2_OPTIONS} ${dbname} default --host "$(hostname)" "$cmd"
14+
fi
15+
}
16+
17+
test_select_fails_when_concurrent_sc() {
18+
# This test demonstrates that a standalone snapshot select can fail if a schema change
19+
# occurs between caching the table versions and executing the select.
20+
cdb2sql ${CDB2_OPTIONS} ${dbname} default "create table t(i int)"
21+
cdb2sql ${CDB2_OPTIONS} ${dbname} default "insert into t values(1)"
22+
send_to_cluster "exec procedure sys.cmd.send('sleep_5s_after_caching_table_versions on')"
23+
cdb2sql ${CDB2_OPTIONS} ${dbname} default "select * from t" &
24+
local -r pid=$!
25+
sleep 1
26+
send_to_cluster "exec procedure sys.cmd.send('sleep_5s_after_caching_table_versions off')"
27+
cdb2sql ${CDB2_OPTIONS} ${dbname} default "alter table t add column j int"
28+
if wait ${pid}; then
29+
echo "FAIL: expected select to fail, but it succeeded"
30+
return 1
31+
fi
32+
}
33+
34+
test_select_fails_when_concurrent_sc

tests/tunables.test/t00_all_tunables.expected

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -938,6 +938,7 @@
938938
(name='skip_sync_if_direct', description='Don't fsync files if directio enabled', type='BOOLEAN', value='ON', read_only='N')
939939
(name='skip_table_schema_check', description='skip_table_schema_check', type='BOOLEAN', value='OFF', read_only='N')
940940
(name='skipdelaybase', description='Delay commits by at least this much if forced to delay by incoherent nodes.', type='INTEGER', value='100', read_only='N')
941+
(name='sleep_5s_after_caching_table_versions', description='Sleep for 5 seconds between caching table versions and getting schema lock in get_prepared_stmt (Default OFF)', type='BOOLEAN', value='OFF', read_only='N')
941942
(name='slow_rep_log_get_loop', description='Add latency to the master's log-get loop for testing. (Default: off)', type='BOOLEAN', value='OFF', read_only='N')
942943
(name='slow_rep_process_txn_freq', description='', type='INTEGER', value='0', read_only='N')
943944
(name='slow_rep_process_txn_maxms', description='', type='INTEGER', value='0', read_only='N')

0 commit comments

Comments
 (0)