From 3cd1714b4cb4c7b0416e223cffcbe126ea79ac84 Mon Sep 17 00:00:00 2001 From: Garand Tyson Date: Mon, 10 Mar 2025 22:36:01 -0700 Subject: [PATCH] Switch in-memory index to hash set --- src/bucket/InMemoryIndex.cpp | 35 +++------ src/bucket/InMemoryIndex.h | 147 ++++++++++++++++++++++++++++++++--- 2 files changed, 148 insertions(+), 34 deletions(-) diff --git a/src/bucket/InMemoryIndex.cpp b/src/bucket/InMemoryIndex.cpp index 1c69f4fe1e..9b22e8c102 100644 --- a/src/bucket/InMemoryIndex.cpp +++ b/src/bucket/InMemoryIndex.cpp @@ -4,49 +4,36 @@ #include "bucket/InMemoryIndex.h" #include "bucket/BucketManager.h" -#include "bucket/LedgerCmp.h" #include "bucket/LiveBucket.h" +#include "util/GlobalChecks.h" #include "util/XDRStream.h" #include "util/types.h" #include "xdr/Stellar-ledger-entries.h" -#include namespace stellar { void -InMemoryBucketState::pushBack(BucketEntry const& be) +InMemoryBucketState::insert(BucketEntry const& be) { - if (!mEntries.empty()) - { - if (!BucketEntryIdCmp{}(*mEntries.back(), be)) - { - throw std::runtime_error( - "InMemoryBucketState::push_back: Inserted out of order entry!"); - } - } - - mEntries.push_back(std::make_shared(be)); + auto [_, inserted] = mEntries.insert( + InternalInMemoryBucketEntry(std::make_shared(be))); + releaseAssertOrThrow(inserted); } // Perform a binary search using start iter as lower bound for search key. std::pair InMemoryBucketState::scan(IterT start, LedgerKey const& searchKey) const { - auto it = - std::lower_bound(start, mEntries.end(), searchKey, - [](std::shared_ptr const& element, - LedgerKey const& key) { - return getBucketLedgerKey(*element) < key; - }); - + ZoneScoped; + auto it = mEntries.find(InternalInMemoryBucketEntry(searchKey)); // If we found the key - if (it != mEntries.end() && getBucketLedgerKey(**it) == searchKey) + if (it != mEntries.end()) { - return {IndexReturnT(*it), it}; + return {IndexReturnT(it->get()), mEntries.begin()}; } - return {IndexReturnT(), it}; + return {IndexReturnT(), mEntries.begin()}; } InMemoryIndex::InMemoryIndex(BucketManager const& bm, @@ -99,7 +86,7 @@ InMemoryIndex::InMemoryIndex(BucketManager const& bm, } // Populate inMemoryState - mInMemoryState.pushBack(be); + mInMemoryState.insert(be); // Populate offerRange if (!firstOffer && lk.type() == OFFER) diff --git a/src/bucket/InMemoryIndex.h b/src/bucket/InMemoryIndex.h index bf5efa58de..2eb6ce0d2a 100644 --- a/src/bucket/InMemoryIndex.h +++ b/src/bucket/InMemoryIndex.h @@ -8,31 +8,158 @@ #include "bucket/BucketUtils.h" #include "xdr/Stellar-ledger-entries.h" -#include -#include +#include "ledger/LedgerHashUtils.h" +#include namespace stellar { class SHA256; +// LedgerKey sizes usually dominate LedgerEntry size, so we don't want to +// store a key-value map to be memory efficient. Instead, we store a set of +// InternalInMemoryBucketEntry objects, which is a wrapper around either a +// LedgerKey or cached BucketEntry. This allows us to use std::unordered_set to +// efficiently store cache entries, but allows lookup by key only. +// Note that C++20 allows heterogeneous lookup in unordered_set, so we can +// simplify this class once we upgrade. +class InternalInMemoryBucketEntry +{ + private: + struct AbstractEntry + { + virtual ~AbstractEntry() = default; + virtual LedgerKey copyKey() const = 0; + virtual size_t hash() const = 0; + virtual IndexPtrT const& get() const = 0; + + virtual bool + operator==(const AbstractEntry& other) const + { + return copyKey() == other.copyKey(); + } + }; + + // "Value" entry type used for storing BucketEntry in cache + struct ValueEntry : public AbstractEntry + { + private: + IndexPtrT entry; + + public: + ValueEntry(IndexPtrT entry) : entry(entry) + { + } + + LedgerKey + copyKey() const override + { + return getBucketLedgerKey(*entry); + } + + size_t + hash() const override + { + return std::hash{}(getBucketLedgerKey(*entry)); + } + + IndexPtrT const& + get() const override + { + return entry; + } + }; + + // "Key" entry type only used for querying the cache + struct QueryKey : public AbstractEntry + { + private: + LedgerKey ledgerKey; + + public: + QueryKey(LedgerKey const& ledgerKey) : ledgerKey(ledgerKey) + { + } + + LedgerKey + copyKey() const override + { + return ledgerKey; + } + + size_t + hash() const override + { + return std::hash{}(ledgerKey); + } + + IndexPtrT const& + get() const override + { + throw std::runtime_error("Called get() on QueryKey"); + } + }; + + std::unique_ptr impl; + + public: + InternalInMemoryBucketEntry(IndexPtrT entry) + : impl(std::make_unique(entry)) + { + } + + InternalInMemoryBucketEntry(LedgerKey const& ledgerKey) + : impl(std::make_unique(ledgerKey)) + { + } + + size_t + hash() const + { + return impl->hash(); + } + + bool + operator==(InternalInMemoryBucketEntry const& other) const + { + return impl->operator==(*other.impl); + } + + IndexPtrT const& + get() const + { + return impl->get(); + } +}; + +struct InternalInMemoryBucketEntryHash +{ + size_t + operator()(InternalInMemoryBucketEntry const& entry) const + { + return entry.hash(); + } +}; + // For small Buckets, we can cache all contents in memory. Because we cache all // entries, the index is just as large as the Bucket itself, so we never persist // this index type. It is always recreated on startup. class InMemoryBucketState : public NonMovableOrCopyable { - // Entries sorted by LedgerKey. INIT/LIVE entries stored as - // LedgerEntry, DEADENTRY stored as LedgerKey. - std::vector mEntries; + using InMemorySet = std::unordered_set; + + InMemorySet mEntries; public: - using IterT = std::vector::const_iterator; + using IterT = InMemorySet::const_iterator; - // Insert a LedgerEntry (INIT/LIVE) into the ordered container. Entries must - // be ordered. - void pushBack(BucketEntry const& be); + // Insert a LedgerEntry (INIT/LIVE) into the cache. + void insert(BucketEntry const& be); - // Find a LedgerEntry by key starting from the given iterator. + // Find a LedgerEntry. IterT::begin is always returned, and start is + // ignored. This interface just helps maintain consistency with + // DiskIndex::scan. std::pair scan(IterT start, LedgerKey const& searchKey) const;