Skip to content

Commit c236e6a

Browse files
committed
MB-20759: Fix false-positive race on DcpConnMap::numActiveSnoozingBackfills
The existing code was safe, however ThreadSanitizer doesn't know about our own Spinlocks. Given this shouldn't be a hot path, switch to normal std:mutex. ThreadSanitizer report: WARNING: ThreadSanitizer: data race (pid=23569) Read of size 2 at 0x7d840000eca2 by thread T8 (mutexes: write M294, read M27095, write M66205, write M102676, write M95235): #0 DcpConnMap::canAddBackfillToActiveQ() ep-engine/src/connmap.cc:1308 (ep.so+0x000000045ac5) #1 BackfillManager::schedule() ep-engine/src/dcp/backfill-manager.cc:142 (ep.so+0x00000005b0eb) #2 DcpProducer::scheduleBackfillManager() ep-engine/src/dcp/producer.cc:702 (ep.so+0x000000078fe3) #3 ActiveStream::scheduleBackfill_UNLOCKED(bool) ep-engine/src/dcp/stream.cc:1016 (ep.so+0x00000008f280) #4 ActiveStream::transitionState(stream_state_t) ep-engine/src/dcp/stream.cc:1145 (ep.so+0x000000090589) #5 ActiveStream::setActive() ep-engine/src/dcp/stream.h:204 (ep.so+0x00000009958e) #6 DcpProducer::streamRequest() ep-engine/src/dcp/producer.cc:327 (ep.so+0x00000007f85d) #7 EvpDcpStreamReq ep-engine/src/ep_engine.cc:1573 (ep.so+0x0000000cea78) #8 dcp_stream_req_executor memcached/daemon/mcbp_executors.cc:2272 (memcached+0x00000045925c) #9 process_bin_packet memcached/daemon/mcbp_executors.cc:4650 (memcached+0x00000046481d) #10 mcbp_complete_nread(McbpConnection*) memcached/daemon/mcbp_executors.cc:4759 (memcached+0x00000046481d) #11 conn_nread(McbpConnection*) memcached/daemon/statemachine_mcbp.cc:314 (memcached+0x000000472678) #12 McbpStateMachine::execute(McbpConnection&) memcached/daemon/statemachine_mcbp.h:43 (memcached+0x000000447054) #13 McbpConnection::runStateMachinery() memcached/daemon/connection_mcbp.cc:1003 (memcached+0x000000447054) #14 McbpConnection::runEventLoop(short) memcached/daemon/connection_mcbp.cc:1274 (memcached+0x0000004470dd) #15 run_event_loop memcached/daemon/connections.cc:147 (memcached+0x00000044b9e9) #16 event_handler(int, short, void*) memcached/daemon/memcached.cc:851 (memcached+0x00000041466c) #17 event_persist_closure src/libevent/event.c:1319 (libevent_core-2.0.so.5+0x00000000b6b7) #18 event_process_active_single_queue src/libevent/event.c:1363 (libevent_core-2.0.so.5+0x00000000b6b7) #19 event_process_active src/libevent/event.c:1438 (libevent_core-2.0.so.5+0x00000000b6b7) #20 event_base_loop src/libevent/event.c:1639 (libevent_core-2.0.so.5+0x00000000b6b7) #21 CouchbaseThread::run() platform/src/cb_pthreads.cc:54 (libplatform.so.0.1.0+0x0000000057a5) #22 platform_thread_wrap platform/src/cb_pthreads.cc:66 (libplatform.so.0.1.0+0x0000000057a5) Previous write of size 2 at 0x7d840000eca2 by thread T55: #0 DcpConnMap::decrNumActiveSnoozingBackfills() ep-engine/src/connmap.cc:1319 (ep.so+0x000000045b7b) #1 BackfillManager::backfill() ep-engine/src/dcp/backfill-manager.cc:273 (ep.so+0x00000005a783) #2 BackfillManagerTask::run() ep-engine/src/dcp/backfill-manager.cc:62 (ep.so+0x00000005ac1c) #3 ExecutorThread::run() ep-engine/src/executorthread.cc:115 (ep.so+0x000000108d96) #4 launch_executor_thread ep-engine/src/executorthread.cc:33 (ep.so+0x000000109675) #5 CouchbaseThread::run() platform/src/cb_pthreads.cc:54 (libplatform.so.0.1.0+0x0000000057a5) #6 platform_thread_wrap platform/src/cb_pthreads.cc:66 (libplatform.so.0.1.0+0x0000000057a5) Change-Id: I88df57b268c1e615b7c5d2b7caf5f038a53692ba Reviewed-on: http://review.couchbase.org/69235 Reviewed-by: Trond Norbye <[email protected]> Tested-by: buildbot <[email protected]>
1 parent 575919e commit c236e6a

File tree

2 files changed

+33
-22
lines changed

2 files changed

+33
-22
lines changed

src/connmap.cc

+21-15
Original file line numberDiff line numberDiff line change
@@ -924,7 +924,7 @@ void TAPSessionStats::clearStats(const std::string &name) {
924924
DcpConnMap::DcpConnMap(EventuallyPersistentEngine &e)
925925
: ConnMap(e),
926926
aggrDcpConsumerBufferSize(0) {
927-
numActiveSnoozingBackfills = 0;
927+
backfills.numActiveSnoozing = 0;
928928
updateMaxActiveSnoozingBackfills(engine.getEpStats().getMaxDataSize());
929929
minCompressionRatioForProducer.store(
930930
engine.getConfiguration().getDcpMinCompressionRatio());
@@ -1312,36 +1312,42 @@ void DcpConnMap::notifyBackfillManagerTasks() {
13121312

13131313
bool DcpConnMap::canAddBackfillToActiveQ()
13141314
{
1315-
SpinLockHolder lh(&numBackfillsLock);
1316-
if (numActiveSnoozingBackfills < maxActiveSnoozingBackfills) {
1317-
++numActiveSnoozingBackfills;
1315+
std::lock_guard<std::mutex> lh(backfills.mutex);
1316+
if (backfills.numActiveSnoozing < backfills.maxActiveSnoozing) {
1317+
++backfills.numActiveSnoozing;
13181318
return true;
13191319
}
13201320
return false;
13211321
}
13221322

13231323
void DcpConnMap::decrNumActiveSnoozingBackfills()
13241324
{
1325-
SpinLockHolder lh(&numBackfillsLock);
1326-
if (numActiveSnoozingBackfills > 0) {
1327-
--numActiveSnoozingBackfills;
1328-
} else {
1329-
LOG(EXTENSION_LOG_WARNING, "ActiveSnoozingBackfills already zero!!!");
1325+
{
1326+
std::lock_guard<std::mutex> lh(backfills.mutex);
1327+
if (backfills.numActiveSnoozing > 0) {
1328+
--backfills.numActiveSnoozing;
1329+
return;
1330+
}
13301331
}
1332+
LOG(EXTENSION_LOG_WARNING, "ActiveSnoozingBackfills already zero!!!");
13311333
}
13321334

13331335
void DcpConnMap::updateMaxActiveSnoozingBackfills(size_t maxDataSize)
13341336
{
13351337
double numBackfillsMemThresholdPercent =
13361338
static_cast<double>(numBackfillsMemThreshold)/100;
13371339
size_t max = maxDataSize * numBackfillsMemThresholdPercent / dbFileMem;
1338-
/* We must have atleast one active/snoozing backfill */
1339-
SpinLockHolder lh(&numBackfillsLock);
1340-
maxActiveSnoozingBackfills =
1341-
std::max(static_cast<size_t>(1),
1342-
std::min(max, static_cast<size_t>(numBackfillsThreshold)));
1340+
uint16_t newMaxActive;
1341+
{
1342+
std::lock_guard<std::mutex> lh(backfills.mutex);
1343+
/* We must have atleast one active/snoozing backfill */
1344+
backfills.maxActiveSnoozing =
1345+
std::max(static_cast<size_t>(1),
1346+
std::min(max, static_cast<size_t>(numBackfillsThreshold)));
1347+
newMaxActive = backfills.maxActiveSnoozing;
1348+
}
13431349
LOG(EXTENSION_LOG_DEBUG, "Max active snoozing backfills set to %d",
1344-
maxActiveSnoozingBackfills);
1350+
newMaxActive);
13451351
}
13461352

13471353
void DcpConnMap::addStats(ADD_STAT add_stat, const void *c) {

src/connmap.h

+12-7
Original file line numberDiff line numberDiff line change
@@ -506,13 +506,13 @@ class DcpConnMap : public ConnMap {
506506
void updateMaxActiveSnoozingBackfills(size_t maxDataSize);
507507

508508
uint16_t getNumActiveSnoozingBackfills () {
509-
SpinLockHolder lh(&numBackfillsLock);
510-
return numActiveSnoozingBackfills;
509+
std::lock_guard<std::mutex> lh(backfills.mutex);
510+
return backfills.numActiveSnoozing;
511511
}
512512

513513
uint16_t getMaxActiveSnoozingBackfills () {
514-
SpinLockHolder lh(&numBackfillsLock);
515-
return maxActiveSnoozingBackfills;
514+
std::lock_guard<std::mutex> lh(backfills.mutex);
515+
return backfills.maxActiveSnoozing;
516516
}
517517

518518
ENGINE_ERROR_CODE addPassiveStream(ConnHandler& conn, uint32_t opaque,
@@ -558,11 +558,16 @@ class DcpConnMap : public ConnMap {
558558
*/
559559
static void cancelTasks(CookieToConnectionMap& map);
560560

561-
SpinLock numBackfillsLock;
562561
/* Db file memory */
563562
static const uint32_t dbFileMem;
564-
uint16_t numActiveSnoozingBackfills;
565-
uint16_t maxActiveSnoozingBackfills;
563+
564+
// Current and maximum number of backfills which are snoozing.
565+
struct {
566+
std::mutex mutex;
567+
uint16_t numActiveSnoozing;
568+
uint16_t maxActiveSnoozing;
569+
} backfills;
570+
566571
/* Max num of backfills we want to have irrespective of memory */
567572
static const uint16_t numBackfillsThreshold;
568573
/* Max percentage of memory we want backfills to occupy */

0 commit comments

Comments
 (0)