Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Switch in-memory index to hash set #4665

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 11 additions & 24 deletions src/bucket/InMemoryIndex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,49 +4,36 @@

#include "bucket/InMemoryIndex.h"
#include "bucket/BucketManager.h"
#include "bucket/LedgerCmp.h"
#include "bucket/LiveBucket.h"
#include "util/GlobalChecks.h"
#include "util/XDRStream.h"
#include "util/types.h"
#include "xdr/Stellar-ledger-entries.h"
#include <algorithm>

namespace stellar
{

void
InMemoryBucketState::pushBack(BucketEntry const& be)
InMemoryBucketState::insert(BucketEntry const& be)
{
if (!mEntries.empty())
{
if (!BucketEntryIdCmp<LiveBucket>{}(*mEntries.back(), be))
{
throw std::runtime_error(
"InMemoryBucketState::push_back: Inserted out of order entry!");
}
}

mEntries.push_back(std::make_shared<BucketEntry>(be));
auto [_, inserted] = mEntries.insert(
InternalInMemoryBucketEntry(std::make_shared<BucketEntry const>(be)));
releaseAssertOrThrow(inserted);
}

// Perform a binary search using start iter as lower bound for search key.
std::pair<IndexReturnT, InMemoryBucketState::IterT>
InMemoryBucketState::scan(IterT start, LedgerKey const& searchKey) const
{
auto it =
std::lower_bound(start, mEntries.end(), searchKey,
[](std::shared_ptr<BucketEntry const> const& element,
LedgerKey const& key) {
return getBucketLedgerKey(*element) < key;
});

ZoneScoped;
auto it = mEntries.find(InternalInMemoryBucketEntry(searchKey));
// If we found the key
if (it != mEntries.end() && getBucketLedgerKey(**it) == searchKey)
if (it != mEntries.end())
{
return {IndexReturnT(*it), it};
return {IndexReturnT(it->get()), mEntries.begin()};
}

return {IndexReturnT(), it};
return {IndexReturnT(), mEntries.begin()};
}

InMemoryIndex::InMemoryIndex(BucketManager const& bm,
Expand Down Expand Up @@ -99,7 +86,7 @@ InMemoryIndex::InMemoryIndex(BucketManager const& bm,
}

// Populate inMemoryState
mInMemoryState.pushBack(be);
mInMemoryState.insert(be);

// Populate offerRange
if (!firstOffer && lk.type() == OFFER)
Expand Down
147 changes: 137 additions & 10 deletions src/bucket/InMemoryIndex.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,31 +8,158 @@
#include "bucket/BucketUtils.h"
#include "xdr/Stellar-ledger-entries.h"

#include <variant>
#include <vector>
#include "ledger/LedgerHashUtils.h"
#include <unordered_set>

namespace stellar
{

class SHA256;

// LedgerKey sizes usually dominate LedgerEntry size, so we don't want to
// store a key-value map to be memory efficient. Instead, we store a set of
// InternalInMemoryBucketEntry objects, which is a wrapper around either a
// LedgerKey or cached BucketEntry. This allows us to use std::unordered_set to
// efficiently store cache entries, but allows lookup by key only.
// Note that C++20 allows heterogeneous lookup in unordered_set, so we can
// simplify this class once we upgrade.
class InternalInMemoryBucketEntry
{
private:
struct AbstractEntry
{
virtual ~AbstractEntry() = default;
virtual LedgerKey copyKey() const = 0;
virtual size_t hash() const = 0;
virtual IndexPtrT const& get() const = 0;

virtual bool
operator==(const AbstractEntry& other) const
{
return copyKey() == other.copyKey();
}
};

// "Value" entry type used for storing BucketEntry in cache
struct ValueEntry : public AbstractEntry
{
private:
IndexPtrT entry;

public:
ValueEntry(IndexPtrT entry) : entry(entry)
{
}

LedgerKey
copyKey() const override
{
return getBucketLedgerKey(*entry);
}

size_t
hash() const override
{
return std::hash<LedgerKey>{}(getBucketLedgerKey(*entry));
}

IndexPtrT const&
get() const override
{
return entry;
}
};

// "Key" entry type only used for querying the cache
struct QueryKey : public AbstractEntry
{
private:
LedgerKey ledgerKey;

public:
QueryKey(LedgerKey const& ledgerKey) : ledgerKey(ledgerKey)
{
}

LedgerKey
copyKey() const override
{
return ledgerKey;
}

size_t
hash() const override
{
return std::hash<LedgerKey>{}(ledgerKey);
}

IndexPtrT const&
get() const override
{
throw std::runtime_error("Called get() on QueryKey");
}
};

std::unique_ptr<AbstractEntry> impl;

public:
InternalInMemoryBucketEntry(IndexPtrT entry)
: impl(std::make_unique<ValueEntry>(entry))
{
}

InternalInMemoryBucketEntry(LedgerKey const& ledgerKey)
: impl(std::make_unique<QueryKey>(ledgerKey))
{
}

size_t
hash() const
{
return impl->hash();
}

bool
operator==(InternalInMemoryBucketEntry const& other) const
{
return impl->operator==(*other.impl);
}

IndexPtrT const&
get() const
{
return impl->get();
}
};

struct InternalInMemoryBucketEntryHash
{
size_t
operator()(InternalInMemoryBucketEntry const& entry) const
{
return entry.hash();
}
};

// For small Buckets, we can cache all contents in memory. Because we cache all
// entries, the index is just as large as the Bucket itself, so we never persist
// this index type. It is always recreated on startup.
class InMemoryBucketState : public NonMovableOrCopyable
{
// Entries sorted by LedgerKey. INIT/LIVE entries stored as
// LedgerEntry, DEADENTRY stored as LedgerKey.
std::vector<IndexPtrT> mEntries;
using InMemorySet = std::unordered_set<InternalInMemoryBucketEntry,
InternalInMemoryBucketEntryHash>;

InMemorySet mEntries;

public:
using IterT = std::vector<IndexPtrT>::const_iterator;
using IterT = InMemorySet::const_iterator;

// Insert a LedgerEntry (INIT/LIVE) into the ordered container. Entries must
// be ordered.
void pushBack(BucketEntry const& be);
// Insert a LedgerEntry (INIT/LIVE) into the cache.
void insert(BucketEntry const& be);

// Find a LedgerEntry by key starting from the given iterator.
// Find a LedgerEntry. IterT::begin is always returned, and start is
// ignored. This interface just helps maintain consistency with
// DiskIndex::scan.
std::pair<IndexReturnT, IterT> scan(IterT start,
LedgerKey const& searchKey) const;

Expand Down