Skip to content

Commit 1eb44b0

Browse files
clarissecheahMongoDB Bot
authored and
MongoDB Bot
committed
SERVER-97174 Validate command on secondaries reads unfinished oplog batch (#32610)
GitOrigin-RevId: 6f2b790441d0ad5256a8c81aee297a3f53514aa5
1 parent 7d8fa3a commit 1eb44b0

File tree

4 files changed

+120
-0
lines changed

4 files changed

+120
-0
lines changed
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
/**
2+
* Tests that foreground validation on the secondary occurs in between batch boundaries of oplog
3+
* application and that trying to proceed with validation while we have yet to finish applying a
4+
* batch will result in validate being blocked due to waiting for a lock.
5+
*/
6+
7+
import {configureFailPoint} from "jstests/libs/fail_point_util.js";
8+
import {ReplSetTest} from "jstests/libs/replsettest.js";
9+
10+
const rst = new ReplSetTest({nodes: 2});
11+
rst.startSet();
12+
rst.initiate();
13+
14+
const primary = rst.getPrimary();
15+
const secondary = rst.getSecondary();
16+
17+
const dbName = "test_validation";
18+
const collName = "test_coll_validation";
19+
const primaryDB = primary.getDB(dbName);
20+
const secDB = secondary.getDB(dbName);
21+
const primaryColl = primaryDB.getCollection(collName);
22+
23+
/*
24+
* Create some indexes and insert some data, so we can validate them more meaningfully.
25+
*/
26+
assert.commandWorked(primaryColl.createIndex({a: 1}));
27+
assert.commandWorked(primaryColl.createIndex({b: 1}));
28+
assert.commandWorked(primaryColl.createIndex({c: 1}));
29+
30+
const numDocs = 10;
31+
for (let i = 0; i < numDocs; ++i) {
32+
assert.commandWorked(primaryColl.insert({a: i, b: i, c: i}));
33+
}
34+
35+
rst.awaitReplication();
36+
/* Enable the failpoint to stop oplog batch application before completion. */
37+
let failPoint = "pauseBatchApplicationBeforeCompletion";
38+
const fp = configureFailPoint(secDB, failPoint);
39+
40+
// Start oplog batch application.
41+
const awaitOplog = startParallelShell(function() {
42+
const primaryDB = db.getSiblingDB('test_validation');
43+
const coll = primaryDB.getCollection('test_coll_validation');
44+
assert.commandWorked(coll.insert({a: 1, b: 1, c: 1}, {writeConcern: {w: 2}}));
45+
}, primary.port);
46+
47+
fp.wait();
48+
49+
// Start validation on the secondary on a parallel shell.
50+
const awaitValidate = startParallelShell(function() {
51+
const secDB = db.getSiblingDB('test_validation');
52+
const coll = secDB.getCollection('test_coll_validation');
53+
secDB.setSecondaryOk();
54+
const res = coll.validate();
55+
assert.commandWorked(res);
56+
assert(res.valid, "Validate cmd failed: " + tojson(res));
57+
}, secondary.port);
58+
59+
// Ensure that the validation call is blocked on waiting for the lock.
60+
assert.soon(() => {
61+
return secDB.currentOp().inprog.some(op => {
62+
return op.active && (op.command.validate === "test_coll_validation") && (op.waitingForLock);
63+
});
64+
});
65+
66+
fp.off();
67+
awaitOplog();
68+
awaitValidate();
69+
70+
rst.stopSet();

src/mongo/db/catalog/validate/validate_state.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,9 @@ ValidateState::ValidateState(OperationContext* opCtx,
6969
const NamespaceString& nss,
7070
ValidationOptions options)
7171
: ValidationOptions(std::move(options)),
72+
_validateLock(isBackground()
73+
? boost::none
74+
: boost::optional<Lock::SharedLock>{obtainSharedValidationLock(opCtx)}),
7275
_globalLock(opCtx, isBackground() ? MODE_IS : MODE_IX),
7376
_nss(nss),
7477
_dataThrottle(opCtx, [&]() { return gMaxValidateMBperSec.load(); }) {
@@ -224,6 +227,9 @@ Status ValidateState::initializeCollection(OperationContext* opCtx) {
224227
_dataThrottle.turnThrottlingOff();
225228
}
226229

230+
// We can release the validate lock here as we now have exclusive access to the collection and
231+
// no CRUD operations or fast count changes will occur.
232+
_validateLock.reset();
227233
return Status::OK();
228234
}
229235

@@ -258,5 +264,26 @@ void ValidateState::initializeCursors(OperationContext* opCtx) {
258264
_firstRecordId = record ? std::move(record->id) : RecordId();
259265
}
260266

267+
namespace {
268+
/*
269+
* Oplog Batch Applier takes this lock in exclusive mode when applying the
270+
* batch. Foreground validation waits on this lock to begin validation.
271+
* We must synchronise these operations as foreground validation involves opening a snapshot of the
272+
* most recent data and during oplog application, CRUD operations on the document are performed in a
273+
* transaction separate from the fast count updates. This could potentially lead to validation
274+
* opening a snapshot between these two transactions and result in an incorrectly reported fast
275+
* count discrepancy.
276+
*/
277+
Lock::ResourceMutex validateLock("validateLock");
278+
} // namespace
279+
280+
Lock::ExclusiveLock ValidateState::obtainExclusiveValidationLock(OperationContext* opCtx) {
281+
return Lock::ExclusiveLock(opCtx, validateLock);
282+
}
283+
284+
Lock::SharedLock ValidateState::obtainSharedValidationLock(OperationContext* opCtx) {
285+
return Lock::SharedLock(opCtx, validateLock);
286+
}
287+
261288
} // namespace CollectionValidation
262289
} // namespace mongo

src/mongo/db/catalog/validate/validate_state.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,9 +141,28 @@ class ValidateState : public ValidationOptions {
141141
return _validateTs;
142142
}
143143

144+
/**
145+
* Returns a scoped object, which holds the 'validateLock' in exclusive mode for
146+
* the given scope. It must only be used to coordinate validation with concurrent
147+
* oplog batch applications.
148+
*/
149+
static Lock::ExclusiveLock obtainExclusiveValidationLock(OperationContext* opCtx);
150+
151+
/**
152+
* Returns a scoped object, which holds the 'validateLock' in shared mode for
153+
* the given scope. It must only be used to coordinate validation with concurrent
154+
* oplog batch applications.
155+
*/
156+
static Lock::SharedLock obtainSharedValidationLock(OperationContext* opCtx);
157+
144158
private:
145159
ValidateState() = delete;
146160

161+
// This lock needs to be obtained before the global lock. Initialise in the validation
162+
// constructor. Oplog Batch Applier takes this lock in exclusive mode when applying the batch.
163+
// Foreground validation waits on this lock to begin.
164+
boost::optional<Lock::SharedLock> _validateLock;
165+
147166
// To avoid racing with shutdown/rollback, this lock must be initialized early.
148167
Lock::GlobalLock _globalLock;
149168

src/mongo/db/repl/oplog_applier_impl.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
#include "mongo/bson/bsonobj.h"
5252
#include "mongo/bson/timestamp.h"
5353
#include "mongo/db/admission/execution_admission_context.h"
54+
#include "mongo/db/catalog/validate/validate_state.h"
5455
#include "mongo/db/client.h"
5556
#include "mongo/db/collection_crud/collection_write_path.h"
5657
#include "mongo/db/commands/fsync.h"
@@ -565,6 +566,9 @@ void OplogApplierImpl::_run(OplogBuffer* oplogBuffer) {
565566
// Don't allow the fsync+lock thread to see intermediate states of batch application.
566567
stdx::lock_guard<stdx::mutex> fsynclk(oplogApplierLockedFsync);
567568

569+
// Obtain the validation lock to synchronise batch application with validation.
570+
auto lk = CollectionValidation::ValidateState::obtainExclusiveValidationLock(&opCtx);
571+
568572
// Apply the operations in this batch. '_applyOplogBatch' returns the optime of the
569573
// last op that was applied, which should be the last optime in the batch.
570574
auto swLastOpTimeAppliedInBatch = _applyOplogBatch(&opCtx, ops.releaseBatch());

0 commit comments

Comments
 (0)