stellar · marta-lokhova · Mar 24, 2025 · Mar 11, 2025
diff --git a/src/bucket/InMemoryIndex.cpp b/src/bucket/InMemoryIndex.cpp
@@ -4,49 +4,36 @@
 
 #include "bucket/InMemoryIndex.h"
 #include "bucket/BucketManager.h"
-#include "bucket/LedgerCmp.h"
 #include "bucket/LiveBucket.h"
+#include "util/GlobalChecks.h"
 #include "util/XDRStream.h"
 #include "util/types.h"
 #include "xdr/Stellar-ledger-entries.h"
-#include <algorithm>
 
 namespace stellar
 {
 
 void
-InMemoryBucketState::pushBack(BucketEntry const& be)
+InMemoryBucketState::insert(BucketEntry const& be)
 {
-    if (!mEntries.empty())
-    {
-        if (!BucketEntryIdCmp<LiveBucket>{}(*mEntries.back(), be))
-        {
-            throw std::runtime_error(
-                "InMemoryBucketState::push_back: Inserted out of order entry!");
-        }
-    }
-
-    mEntries.push_back(std::make_shared<BucketEntry>(be));
+    auto [_, inserted] = mEntries.insert(
+        InternalInMemoryBucketEntry(std::make_shared<BucketEntry const>(be)));
+    releaseAssertOrThrow(inserted);
 }
 
 // Perform a binary search using start iter as lower bound for search key.
 std::pair<IndexReturnT, InMemoryBucketState::IterT>
 InMemoryBucketState::scan(IterT start, LedgerKey const& searchKey) const
 {
-    auto it =
-        std::lower_bound(start, mEntries.end(), searchKey,
-                         [](std::shared_ptr<BucketEntry const> const& element,
-                            LedgerKey const& key) {
-                             return getBucketLedgerKey(*element) < key;
-                         });
-
+    ZoneScoped;
+    auto it = mEntries.find(InternalInMemoryBucketEntry(searchKey));
     // If we found the key
-    if (it != mEntries.end() && getBucketLedgerKey(**it) == searchKey)
+    if (it != mEntries.end())
     {
-        return {IndexReturnT(*it), it};
+        return {IndexReturnT(it->get()), mEntries.begin()};
     }
 
-    return {IndexReturnT(), it};
+    return {IndexReturnT(), mEntries.begin()};
 }
 
 InMemoryIndex::InMemoryIndex(BucketManager const& bm,
@@ -99,7 +86,7 @@ InMemoryIndex::InMemoryIndex(BucketManager const& bm,
         }
 
         // Populate inMemoryState
-        mInMemoryState.pushBack(be);
+        mInMemoryState.insert(be);
 
         // Populate offerRange
         if (!firstOffer && lk.type() == OFFER)

diff --git a/src/bucket/InMemoryIndex.h b/src/bucket/InMemoryIndex.h
@@ -8,31 +8,158 @@
 #include "bucket/BucketUtils.h"
 #include "xdr/Stellar-ledger-entries.h"
 
-#include <variant>
-#include <vector>
+#include "ledger/LedgerHashUtils.h"
+#include <unordered_set>
 
 namespace stellar
 {
 
 class SHA256;
 
+// LedgerKey sizes usually dominate LedgerEntry size, so we don't want to
+// store a key-value map to be memory efficient. Instead, we store a set of
+// InternalInMemoryBucketEntry objects, which is a wrapper around either a
+// LedgerKey or cached BucketEntry. This allows us to use std::unordered_set to
+// efficiently store cache entries, but allows lookup by key only.
+// Note that C++20 allows heterogeneous lookup in unordered_set, so we can
+// simplify this class once we upgrade.
+class InternalInMemoryBucketEntry
+{
+  private:
+    struct AbstractEntry
+    {
+        virtual ~AbstractEntry() = default;
+        virtual LedgerKey copyKey() const = 0;
+        virtual size_t hash() const = 0;
+        virtual IndexPtrT const& get() const = 0;
+
+        virtual bool
+        operator==(const AbstractEntry& other) const
+        {
+            return copyKey() == other.copyKey();
+        }
+    };
+
+    // "Value" entry type used for storing BucketEntry in cache
+    struct ValueEntry : public AbstractEntry
+    {
+      private:
+        IndexPtrT entry;
+
+      public:
+        ValueEntry(IndexPtrT entry) : entry(entry)
+        {
+        }
+
+        LedgerKey
+        copyKey() const override
+        {
+            return getBucketLedgerKey(*entry);
+        }
+
+        size_t
+        hash() const override
+        {
+            return std::hash<LedgerKey>{}(getBucketLedgerKey(*entry));
+        }
+
+        IndexPtrT const&
+        get() const override
+        {
+            return entry;
+        }
+    };
+
+    // "Key" entry type only used for querying the cache
+    struct QueryKey : public AbstractEntry
+    {
+      private:
+        LedgerKey ledgerKey;
+
+      public:
+        QueryKey(LedgerKey const& ledgerKey) : ledgerKey(ledgerKey)
+        {
+        }
+
+        LedgerKey
+        copyKey() const override
+        {
+            return ledgerKey;
+        }
+
+        size_t
+        hash() const override
+        {
+            return std::hash<LedgerKey>{}(ledgerKey);
+        }
+
+        IndexPtrT const&
+        get() const override
+        {
+            throw std::runtime_error("Called get() on QueryKey");
+        }
+    };
+
+    std::unique_ptr<AbstractEntry> impl;
+
+  public:
+    InternalInMemoryBucketEntry(IndexPtrT entry)
+        : impl(std::make_unique<ValueEntry>(entry))
+    {
+    }
+
+    InternalInMemoryBucketEntry(LedgerKey const& ledgerKey)
+        : impl(std::make_unique<QueryKey>(ledgerKey))
+    {
+    }
+
+    size_t
+    hash() const
+    {
+        return impl->hash();
+    }
+
+    bool
+    operator==(InternalInMemoryBucketEntry const& other) const
+    {
+        return impl->operator==(*other.impl);
+    }
+
+    IndexPtrT const&
+    get() const
+    {
+        return impl->get();
+    }
+};
+
+struct InternalInMemoryBucketEntryHash
+{
+    size_t
+    operator()(InternalInMemoryBucketEntry const& entry) const
+    {
+        return entry.hash();
+    }
+};
+
 // For small Buckets, we can cache all contents in memory. Because we cache all
 // entries, the index is just as large as the Bucket itself, so we never persist
 // this index type. It is always recreated on startup.
 class InMemoryBucketState : public NonMovableOrCopyable
 {
-    // Entries sorted by LedgerKey. INIT/LIVE entries stored as
-    // LedgerEntry, DEADENTRY stored as LedgerKey.
-    std::vector<IndexPtrT> mEntries;
+    using InMemorySet = std::unordered_set<InternalInMemoryBucketEntry,
+                                           InternalInMemoryBucketEntryHash>;
+
+    InMemorySet mEntries;
 
   public:
-    using IterT = std::vector<IndexPtrT>::const_iterator;
+    using IterT = InMemorySet::const_iterator;
 
-    // Insert a LedgerEntry (INIT/LIVE) into the ordered container. Entries must
-    // be ordered.
-    void pushBack(BucketEntry const& be);
+    // Insert a LedgerEntry (INIT/LIVE) into the cache.
+    void insert(BucketEntry const& be);
 
-    // Find a LedgerEntry by key starting from the given iterator.
+    // Find a LedgerEntry. IterT::begin is always returned, and start is
+    // ignored. This interface just helps maintain consistency with
+    // DiskIndex::scan.
     std::pair<IndexReturnT, IterT> scan(IterT start,
                                         LedgerKey const& searchKey) const;