Skip to content

Commit 56e2ccf

Browse files
author
yawzhang
committed
Fix issues found in Storage Hammer
1. concurrency issue between remove logdev and device truncate 2. log index→entry misalignment (issue: #857) 3. fix metric compute logic
1 parent af72d2f commit 56e2ccf

File tree

7 files changed

+398
-8
lines changed

7 files changed

+398
-8
lines changed

conanfile.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
class HomestoreConan(ConanFile):
1111
name = "homestore"
12-
version = "7.3.2"
12+
version = "7.3.3"
1313

1414
homepage = "https://github.com/eBay/Homestore"
1515
description = "HomeStore Storage Engine"
@@ -53,7 +53,7 @@ def build_requirements(self):
5353

5454
def requirements(self):
5555
self.requires("iomgr/[^12]@oss/master", transitive_headers=True)
56-
self.requires("sisl/[^13]@oss/master", transitive_headers=True)
56+
self.requires("sisl/[>=13.2.2]@oss/master", transitive_headers=True)
5757
self.requires("nuraft_mesg/[^4]@oss/main", transitive_headers=True)
5858

5959
self.requires("farmhash/cci.20190513@", transitive_headers=True)

src/lib/.DS_Store

6 KB
Binary file not shown.

src/lib/logstore/log_store.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -360,18 +360,21 @@ bool HomeLogStore::rollback(logstore_seq_num_t to_lsn) {
360360
incr_pending_request_num();
361361
// Fast path
362362
if (to_lsn == m_tail_lsn.load()) {
363+
THIS_LOGSTORE_LOG(INFO, "Rollback to the current tail_lsn {}, no-op", to_lsn);
363364
decr_pending_request_num();
364365
return true;
365366
}
366367

367-
if (to_lsn > m_tail_lsn.load() || to_lsn < m_start_lsn.load()) {
368+
// Special case: allow rollback to exactly start_lsn - 1.
369+
// This handles the scenario where all logs were truncated and a leader switch happens before new logs commit.
370+
if (to_lsn > m_tail_lsn.load() || to_lsn < m_start_lsn.load() - 1) {
368371
HS_LOG_ASSERT(false, "Attempted to rollback to {} which is not in the range of [{}, {}]", to_lsn,
369-
m_start_lsn.load(), m_tail_lsn.load());
372+
m_start_lsn.load() - 1, m_tail_lsn.load());
370373
decr_pending_request_num();
371374
return false;
372375
}
373376

374-
THIS_LOGSTORE_LOG(INFO, "Rolling back to {}, tail {}", to_lsn, m_tail_lsn.load());
377+
THIS_LOGSTORE_LOG(INFO, "Rolling back to {}, start {} tail {}", to_lsn, m_start_lsn.load(), m_tail_lsn.load());
375378
bool do_flush{false};
376379
do {
377380
{

src/lib/logstore/log_store_service.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,6 @@ void LogStoreService::remove_log_store(logdev_id_t logdev_id, logstore_id_t stor
309309
HS_LOG(INFO, logstore, "Removing logstore {} from logdev {}", store_id, logdev_id);
310310
incr_pending_request_num();
311311
folly::SharedMutexWritePriority::WriteHolder holder(m_logdev_map_mtx);
312-
COUNTER_INCREMENT(m_metrics, logstores_count, 1);
313312
const auto it = m_id_logdev_map.find(logdev_id);
314313
if (it == m_id_logdev_map.end()) {
315314
HS_LOG(WARN, logstore, "logdev id {} doesnt exist", logdev_id);
@@ -325,6 +324,7 @@ void LogStoreService::device_truncate() {
325324
// TODO: make device_truncate_under_lock return future and do collectAllFutures;
326325
if (is_stopping()) return;
327326
incr_pending_request_num();
327+
folly::SharedMutexWritePriority::ReadHolder holder(m_logdev_map_mtx);
328328
for (auto& [id, logdev] : m_id_logdev_map) {
329329
HS_LOG(DEBUG, logstore, "Truncating logdev {}", id);
330330
logdev->truncate();

src/lib/replication/log_store/home_raft_log_store.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -187,8 +187,11 @@ void HomeRaftLogStore::write_at(ulong index, nuraft::ptr< nuraft::log_entry >& e
187187
// calls, but it is dangerous to set higher number.
188188
m_last_durable_lsn = -1;
189189

190-
m_log_store->append_async(sisl::io_blob{buf->data_begin(), uint32_cast(buf->size()), false /* is_aligned */},
191-
nullptr /* cookie */, [buf](int64_t, sisl::io_blob&, logdev_key, void*) {});
190+
auto const appended_seq =
191+
m_log_store->append_async(sisl::io_blob{buf->data_begin(), uint32_cast(buf->size()), false /* is_aligned */},
192+
nullptr /* cookie */, [buf](int64_t, sisl::io_blob&, logdev_key, void*) {});
193+
HS_REL_ASSERT_EQ(to_repl_lsn(appended_seq), static_cast< repl_lsn_t >(index),
194+
"write_at appended lsn mismatch: expected {} actual {}", index, to_repl_lsn(appended_seq));
192195

193196
auto position_in_cache = index % m_log_entry_cache.size();
194197
{

src/tests/test_home_raft_logstore.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,15 @@ class RaftLogStoreClient {
6666
ASSERT_EQ(m_rls->start_index(), m_start_lsn) << "Start Index not expected to be updated after insertion";
6767
}
6868

69+
void write_at_start_bounday_test(nuraft::ptr< nuraft::log_entry >& le) {
70+
m_rls->write_at(m_start_lsn, le);
71+
ASSERT_EQ(m_rls->entry_at(m_start_lsn)->get_term(), le->get_term());
72+
m_next_lsn = m_start_lsn + 1;
73+
ASSERT_EQ(m_rls->start_index(), m_start_lsn);
74+
ASSERT_EQ(m_rls->last_index(), m_start_lsn);
75+
ASSERT_EQ(m_rls->next_slot(), m_next_lsn);
76+
}
77+
6978
void rollback_test() {
7079
m_next_lsn = (m_next_lsn - m_start_lsn) / 2; // Rollback half of the current logs
7180
++m_cur_term;
@@ -92,6 +101,13 @@ class RaftLogStoreClient {
92101
validate_all_logs();
93102
}
94103

104+
void compact_all_test() {
105+
m_start_lsn = m_next_lsn;
106+
m_rls->compact(m_next_lsn - 1);
107+
ASSERT_EQ(m_rls->start_index(), m_next_lsn) << "Post compaction, start_index is invalid";
108+
validate_all_logs();
109+
}
110+
95111
void pack_test(uint64_t from, int32_t cnt, pack_result_t& out_pack) {
96112
out_pack.actual_data = m_rls->pack(from, cnt);
97113
ASSERT_NE(out_pack.actual_data.get(), nullptr);
@@ -258,6 +274,24 @@ TEST_F(TestRaftLogStore, lifecycle_test) {
258274
this->m_follower_store.append_read_test(nrecords); // total_records in follower = 4000
259275
}
260276

277+
TEST_F(TestRaftLogStore, write_at_start_boundary_test) {
278+
LOGINFO("Step 1: Append some records and then compact all");
279+
this->m_leader_store.append_read_test(2);
280+
this->m_leader_store.compact_all_test();
281+
282+
LOGINFO("Step 2: Write at start boundary when no logs exist, expect allowed");
283+
// no more logs exist, write_at at start boundary should be allowed
284+
auto le = nuraft::cs_new< nuraft::log_entry >(5 /*term*/, nuraft::buffer::alloc(8));
285+
le->get_buf().put("ALLOW");
286+
this->m_leader_store.write_at_start_bounday_test(le);
287+
288+
LOGINFO("Step 3: Write at start boundary again, expect allowed");
289+
// write_at start boundary again to make sure rollback is allowed
290+
le = nuraft::cs_new< nuraft::log_entry >(6 /*term*/, nuraft::buffer::alloc(8));
291+
le->get_buf().put("ALLOW2");
292+
this->m_leader_store.write_at_start_bounday_test(le);
293+
}
294+
261295
SISL_OPTIONS_ENABLE(logging, test_home_raft_log_store, iomgr, test_common_setup)
262296
SISL_OPTION_GROUP(test_home_raft_log_store,
263297
(num_records, "", "num_records", "number of record to test",

0 commit comments

Comments
 (0)