Skip to content

Commit c3df175

Browse files
authored
[Blockstore] Correctly handle errors for unconfirmed blobs (#4588)
- Correctly handle errors for unconfirmed blobs - Add controller to define actor execution order in tests - Add test with Unconfirmed blobs and error from blob storage References #4415 #4414
1 parent e0de69c commit c3df175

File tree

3 files changed

+123
-3
lines changed

3 files changed

+123
-3
lines changed

.github/config/muted_ya.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,2 @@
11
cloud/filestore/tests/fio_index/mount-kikimr-test *
22
cloud/filestore/tests/fio_index/mount-local-test *
3-
cloud/blockstore/libs/storage/partition/ut TPartitionTest.ShouldSendCorrectBarriersInfoAfterReboot

cloud/blockstore/libs/storage/partition/part_actor_writeblocks.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,7 @@ void TPartitionActor::HandleWriteBlocksCompleted(
347347
ProfileLog->Write(std::move(record));
348348
}
349349

350-
if (msg->UnconfirmedBlobsAdded) {
350+
if (msg->UnconfirmedBlobsAdded && !HasError(msg->GetError())) {
351351
// blobs are confirmed, but AddBlobs request will be executed
352352
// (for this commit) later
353353
State->BlobsConfirmed(commitId, std::move(msg->BlobsToConfirm));

cloud/blockstore/libs/storage/partition/part_ut.cpp

Lines changed: 122 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,75 @@ class TDummyActor final
158158

159159
////////////////////////////////////////////////////////////////////////////////
160160

161+
class TEventExecutionOrderController
162+
{
163+
public:
164+
TEventExecutionOrderController(
165+
TTestActorRuntimeBase& runtime,
166+
const TVector<std::pair<ui32, ui32>>& eventOrders,
167+
TTestActorRuntimeBase::TEventFilter baseFilter = nullptr)
168+
{
169+
for (const auto& [prerequisiteEvent, dependentEvent]: eventOrders) {
170+
EventDependencies[dependentEvent] = prerequisiteEvent;
171+
}
172+
173+
runtime.SetEventFilter(
174+
[this, baseFilter, &runtime](
175+
TTestActorRuntimeBase& rt,
176+
TAutoPtr<IEventHandle>& ev) -> bool
177+
{
178+
Y_ABORT_UNLESS(ev);
179+
180+
TActorId recipient = ev->GetRecipientRewrite();
181+
ui32 eventType = ev->GetTypeRewrite();
182+
183+
bool baseFilterResult = baseFilter ? baseFilter(rt, ev) : false;
184+
185+
auto prerequisiteEventIt = EventDependencies.find(eventType);
186+
if (prerequisiteEventIt != EventDependencies.end()) {
187+
ui32 prerequisiteEvent = prerequisiteEventIt->second;
188+
auto& processedEventsByRecipient =
189+
ProcessedEvents[recipient];
190+
if (!processedEventsByRecipient.contains(
191+
prerequisiteEvent)) {
192+
DelayedEvents[recipient][prerequisiteEvent] =
193+
std::move(ev);
194+
return true;
195+
}
196+
}
197+
198+
auto& delayedEventsByRecipient = DelayedEvents[recipient];
199+
auto delayedEventIt = delayedEventsByRecipient.find(eventType);
200+
if (delayedEventIt != delayedEventsByRecipient.end()) {
201+
ProcessedEvents[recipient].insert(eventType);
202+
TAutoPtr<IEventHandle> delayedEvent =
203+
std::move(delayedEventIt->second);
204+
delayedEventsByRecipient.erase(delayedEventIt);
205+
206+
// Remove the recipient from the map if there are no more
207+
// delayed events
208+
if (delayedEventsByRecipient.empty()) {
209+
DelayedEvents.erase(recipient);
210+
}
211+
212+
runtime.Schedule(
213+
delayedEvent,
214+
TDuration::MilliSeconds(100));
215+
}
216+
217+
return baseFilterResult;
218+
});
219+
}
220+
221+
private:
222+
THashMap<ui32, ui32> EventDependencies;
223+
THashMap<TActorId, THashSet<ui32>> ProcessedEvents;
224+
// Map of delayed events by recipient and event type
225+
THashMap<TActorId, THashMap<ui32, TAutoPtr<IEventHandle>>> DelayedEvents;
226+
};
227+
228+
////////////////////////////////////////////////////////////////////////////////
229+
161230
void InitTestActorRuntime(
162231
TTestActorRuntime& runtime,
163232
const NProto::TStorageServiceConfig& config,
@@ -13010,9 +13079,61 @@ Y_UNIT_TEST_SUITE(TPartitionTest)
1301013079
}
1301113080
}
1301213081

13082+
Y_UNIT_TEST(ShouldCorrectlyHandleWriteBlocksErrorsWithUnconfirmedBlobs)
13083+
{
13084+
auto config = DefaultConfig();
13085+
config.SetWriteBlobThreshold(1);
13086+
config.SetAddingUnconfirmedBlobsEnabled(true);
13087+
auto runtime = PrepareTestActorRuntime(
13088+
config,
13089+
4096,
13090+
{},
13091+
{.MediaKind = NCloud::NProto::STORAGE_MEDIA_HYBRID});
13092+
13093+
TTestActorRuntimeBase::TEventFilter rejectWriteBlobFilter =
13094+
[&](TTestActorRuntimeBase&, TAutoPtr<IEventHandle>& ev)
13095+
{
13096+
switch (ev->GetTypeRewrite()) {
13097+
case TEvPartitionPrivate::EvWriteBlobResponse: {
13098+
auto* msg =
13099+
ev->Get<TEvPartitionPrivate::TEvWriteBlobResponse>();
13100+
auto& e = const_cast<NProto::TError&>(msg->Error);
13101+
e.SetCode(E_REJECTED);
13102+
return false;
13103+
}
13104+
};
13105+
return false;
13106+
};
13107+
13108+
// Set up event order controller to ensure that
13109+
// AddUnconfirmedBlobsResponse is processed before WriteBlobResponse
13110+
TEventExecutionOrderController orderController(
13111+
*runtime,
13112+
TVector<std::pair<ui32, ui32>>{
13113+
{TEvPartitionPrivate::EvAddUnconfirmedBlobsResponse,
13114+
TEvPartitionPrivate::EvWriteBlobResponse}},
13115+
rejectWriteBlobFilter);
13116+
13117+
TPartitionClient partition(*runtime);
13118+
partition.WaitReady();
13119+
13120+
partition.SendWriteBlocksRequest(TBlockRange32::WithLength(10, 1), 1);
13121+
13122+
runtime->DispatchEvents(TDispatchOptions(), TDuration::Seconds(1));
13123+
13124+
{
13125+
auto response = partition.StatPartition();
13126+
const auto& stats = response->Record.GetStats();
13127+
// Without proper error handling it crashes in BlobsConfirmed due to
13128+
// checksum verification, so execution never reaches this point. If
13129+
// we disable verification, we hit the `1 != 0` check — meaning we
13130+
// end up confirming an E_REJECTED blob without a checksum.
13131+
UNIT_ASSERT_VALUES_EQUAL(1, stats.GetUnconfirmedBlobCount());
13132+
}
13133+
}
13134+
1301313135
Y_UNIT_TEST(ShouldSendCorrectBarriersInfoAfterReboot)
1301413136
{
13015-
return; // TODO: fix this test. See issue #4414
1301613137
auto config = DefaultConfig();
1301713138
config.SetWriteBlobThreshold(1);
1301813139
config.SetAddingUnconfirmedBlobsEnabled(true);

0 commit comments

Comments
 (0)