Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions db/db_tunables.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
/* Separator for composite tunable components. */
#define COMPOSITE_TUNABLE_SEP '.'

extern int gbl_sleep_5s_after_caching_table_versions;
extern int gbl_transactional_drop_plus_rename;
extern int gbl_bulk_import_validation_werror;
extern int gbl_debug_sleep_during_bulk_import;
Expand Down
4 changes: 4 additions & 0 deletions db/db_tunables.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@
at multiple places.
*/

REGISTER_TUNABLE(
"sleep_5s_after_caching_table_versions",
"Sleep for 5 seconds between caching table versions and getting schema lock in get_prepared_stmt (Default OFF)",
TUNABLE_BOOLEAN, &gbl_sleep_5s_after_caching_table_versions, 0, NULL, NULL, NULL, NULL);
REGISTER_TUNABLE("abort_during_downgrade_if_scs_dont_stop", "Abort if scs don't stop within 60 seconds"
"after starting a downgrade (default OFF)", TUNABLE_BOOLEAN,
&gbl_abort_during_downgrade_if_scs_dont_stop, 0, NULL, NULL,
Expand Down
9 changes: 0 additions & 9 deletions db/sqlglue.c
Original file line number Diff line number Diff line change
Expand Up @@ -4993,15 +4993,6 @@ int sqlite3BtreeBeginTrans(Vdbe *vdbe, Btree *pBt, int wrflag, int *pSchemaVersi
}
#endif

struct dbtable *db =
&thedb->static_table;
/* Latch last commit LSN */
if (clnt->dbtran.mode == TRANLEVEL_MODSNAP && !clnt->modsnap_in_progress && (db->handle != NULL) &&
(populate_modsnap_state(clnt) != 0)) {
rc = SQLITE_INTERNAL;
goto done;
}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Issue was that we already hold table locks at this point, and populate_modsnap_state then tried to grab the schema lock


/* already have a transaction, keep using it until it commits/aborts */
if (clnt->intrans || clnt->in_sqlite_init ||
(clnt->ctrl_sqlengine != SQLENG_STRT_STATE &&
Expand Down
48 changes: 33 additions & 15 deletions db/sqlinterfaces.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
#include <sbuf2.h>
#include <bdb_api.h>
#include <bdb_int.h>
#include <bdbglue.h>

#include "comdb2.h"
#include "types.h"
Expand Down Expand Up @@ -183,6 +184,7 @@ int gbl_dump_history_on_too_many_verify_errors = 0;
int gbl_thdpool_queue_only = 0;
int gbl_random_sql_work_delayed = 0;
int gbl_random_sql_work_rejected = 0;
int gbl_sleep_5s_after_caching_table_versions = 0;

int gbl_eventlog_fullhintsql = 1;

Expand Down Expand Up @@ -1730,41 +1732,51 @@ int cache_table_versions(struct sqlclntstate *clnt)
return 0;
}

int populate_modsnap_state(struct sqlclntstate *clnt)
static int populate_modsnap_state(struct sqlclntstate *clnt)
{
assert_no_schema_lk();

int rc = 0;
struct dbtable *db = &thedb->static_table;
assert(db->handle);

bdb_state_type *const bdb_state = thedb->bdb_env;
BDB_READLOCK(__func__);
rdlock_schema_lk();

if (clnt->is_hasql_retry) {
get_snapshot(clnt, (int *)&clnt->modsnap_start_lsn_file, (int *)&clnt->modsnap_start_lsn_offset);
}

rdlock_schema_lk();
if (bdb_get_modsnap_start_state(db->handle, thedb->bdb_attr, clnt->is_hasql_retry, clnt->snapshot,
&clnt->modsnap_start_lsn_file, &clnt->modsnap_start_lsn_offset,
&clnt->last_checkpoint_lsn_file, &clnt->last_checkpoint_lsn_offset)) {
unlock_schema_lk();
rc = bdb_get_modsnap_start_state(db->handle, thedb->bdb_attr, clnt->is_hasql_retry, clnt->snapshot,
&clnt->modsnap_start_lsn_file, &clnt->modsnap_start_lsn_offset,
&clnt->last_checkpoint_lsn_file, &clnt->last_checkpoint_lsn_offset);
if (rc) {
logmsg(LOGMSG_ERROR, "%s: Failed to get modsnap txn start state\n", __func__);
return 1;
goto done;
}

// TODO: Refactor cache_table_versions to not take in a clnt and then call it within bdb_get_modsnap_start_state
// so that we're holding the schema lock over less code.
if (cache_table_versions(clnt)) {
unlock_schema_lk();
rc = cache_table_versions(clnt);
if (rc) {
logmsg(LOGMSG_ERROR, "%s: Failed to cache table versions\n", __func__);
return 1;
goto done;
}
unlock_schema_lk();

if (bdb_register_modsnap(db->handle, clnt->modsnap_start_lsn_file, clnt->modsnap_start_lsn_offset,
clnt->last_checkpoint_lsn_file, clnt->last_checkpoint_lsn_offset,
&clnt->modsnap_registration)) {
rc = bdb_register_modsnap(db->handle, clnt->modsnap_start_lsn_file, clnt->modsnap_start_lsn_offset,
clnt->last_checkpoint_lsn_file, clnt->last_checkpoint_lsn_offset,
&clnt->modsnap_registration);
if (rc) {
logmsg(LOGMSG_ERROR, "%s: Failed to register modsnap txn\n", __func__);
return 1;
goto done;
}

clnt->modsnap_in_progress = 1;

done:
unlock_schema_lk();
BDB_RELLOCK();
return 0;
}

Expand Down Expand Up @@ -3355,6 +3367,12 @@ int get_prepared_stmt(struct sqlthdstate *thd, struct sqlclntstate *clnt,
/* sc-thread holds schema-lk in verify_dbstore */
if (!clnt->verify_dbstore) {
curtran_assert_nolocks();
if (clnt->dbtran.mode == TRANLEVEL_MODSNAP && !clnt->modsnap_in_progress && populate_modsnap_state(clnt)) {
return SQLITE_INTERNAL;
}
if (gbl_sleep_5s_after_caching_table_versions) {
sleep(5);
}
rdlock_schema_lk();
}
int rc = get_prepared_stmt_int(thd, clnt, rec, err,
Expand Down
1 change: 0 additions & 1 deletion db/sqlinterfaces.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@ struct param_data *clnt_find_param(struct sqlclntstate *clnt, const char *name,
int index);
int bind_parameters(struct reqlogger *logger, sqlite3_stmt *stmt, struct sqlclntstate *clnt, char **err,
int sample_queries);
int populate_modsnap_state(struct sqlclntstate *clnt);
void clear_modsnap_state(struct sqlclntstate *clnt);
int cache_table_versions(struct sqlclntstate *clnt);

Expand Down
8 changes: 8 additions & 0 deletions tests/si_sc_race.test/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
ifeq ($(TESTSROOTDIR),)
include ../testcase.mk
else
include $(TESTSROOTDIR)/testcase.mk
endif
ifeq ($(TEST_TIMEOUT),)
export TEST_TIMEOUT=1m
endif
2 changes: 2 additions & 0 deletions tests/si_sc_race.test/lrl.options
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
enable_snapshot_isolation
sql_tranlevel_default snapshot
34 changes: 34 additions & 0 deletions tests/si_sc_race.test/runit
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/usr/bin/env bash
bash -n "$0" | exit 1

dbname=$1

send_to_cluster() {
local cmd="$1"
if [ -n "${CLUSTER}" ]; then
for node in ${CLUSTER}; do
cdb2sql ${CDB2_OPTIONS} ${dbname} default --host "${node}" "$cmd"
done
else
cdb2sql ${CDB2_OPTIONS} ${dbname} default --host "$(hostname)" "$cmd"
fi
}

test_select_fails_when_concurrent_sc() {
# This test demonstrates that a standalone snapshot select can fail if a schema change
# occurs between caching the table versions and executing the select.
cdb2sql ${CDB2_OPTIONS} ${dbname} default "create table t(i int)"
cdb2sql ${CDB2_OPTIONS} ${dbname} default "insert into t values(1)"
send_to_cluster "exec procedure sys.cmd.send('sleep_5s_after_caching_table_versions on')"
cdb2sql ${CDB2_OPTIONS} ${dbname} default "select * from t" &
local -r pid=$!
sleep 1
send_to_cluster "exec procedure sys.cmd.send('sleep_5s_after_caching_table_versions off')"
cdb2sql ${CDB2_OPTIONS} ${dbname} default "alter table t add column j int"
if wait ${pid}; then
echo "FAIL: expected select to fail, but it succeeded"
return 1
fi
}

test_select_fails_when_concurrent_sc
1 change: 1 addition & 0 deletions tests/tunables.test/t00_all_tunables.expected
Original file line number Diff line number Diff line change
Expand Up @@ -938,6 +938,7 @@
(name='skip_sync_if_direct', description='Don't fsync files if directio enabled', type='BOOLEAN', value='ON', read_only='N')
(name='skip_table_schema_check', description='skip_table_schema_check', type='BOOLEAN', value='OFF', read_only='N')
(name='skipdelaybase', description='Delay commits by at least this much if forced to delay by incoherent nodes.', type='INTEGER', value='100', read_only='N')
(name='sleep_5s_after_caching_table_versions', description='Sleep for 5 seconds between caching table versions and getting schema lock in get_prepared_stmt (Default OFF)', type='BOOLEAN', value='OFF', read_only='N')
(name='slow_rep_log_get_loop', description='Add latency to the master's log-get loop for testing. (Default: off)', type='BOOLEAN', value='OFF', read_only='N')
(name='slow_rep_process_txn_freq', description='', type='INTEGER', value='0', read_only='N')
(name='slow_rep_process_txn_maxms', description='', type='INTEGER', value='0', read_only='N')
Expand Down