-
Notifications
You must be signed in to change notification settings - Fork 7
Antalya: Cache the list objects operation on object storage using a TTL + prefix matching cache implementation #743
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
3dc33f3
7e182c7
ef985c9
43c1383
989cfe0
157450d
d4af4ae
727b64d
68cbad7
00f58b3
67ccaf0
2b37e0c
0d6e343
0f605c4
3ed2349
f345b33
9242843
4e19b09
7a6eaec
8c4ea48
74b980c
4f55a75
303ee27
e6b379e
d7b50f4
6cfa510
be8c6a1
b60cb95
d91bf00
c6e53a1
f1c3591
14973d2
55ac0bc
e0e19a2
45af8a5
7266d92
8e78b28
2ed102d
28bfcfb
aab089c
dd5934e
0f5057e
27c4dea
d789d1e
fef71c0
6bfcb86
f68725a
7597da0
e7940af
f863a6e
cbfe36d
9092aba
057b0b5
49748c9
96cf2d2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,213 @@ | ||
#include <Storages/Cache/ObjectStorageListObjectsCache.h> | ||
#include <boost/functional/hash.hpp> | ||
|
||
namespace DB | ||
{ | ||
|
||
template <typename Key, typename Mapped, typename HashFunction, typename WeightFunction, typename IsStaleFunction> | ||
class ObjectStorageListObjectsCachePolicy : public TTLCachePolicy<Key, Mapped, HashFunction, WeightFunction, IsStaleFunction> | ||
{ | ||
public: | ||
using BasePolicy = TTLCachePolicy<Key, Mapped, HashFunction, WeightFunction, IsStaleFunction>; | ||
using typename BasePolicy::MappedPtr; | ||
using typename BasePolicy::KeyMapped; | ||
using BasePolicy::cache; | ||
|
||
ObjectStorageListObjectsCachePolicy() | ||
: BasePolicy(std::make_unique<NoCachePolicyUserQuota>()) | ||
{ | ||
} | ||
|
||
MappedPtr get(const Key & key) override | ||
{ | ||
if (const auto it = cache.find(key); it != cache.end()) | ||
{ | ||
if (IsStaleFunction()(it->first)) | ||
{ | ||
BasePolicy::remove(it->first); | ||
return {}; | ||
} | ||
return it->second; | ||
} | ||
|
||
if (const auto it = findBestMatchingPrefix(key); it != cache.end()) | ||
{ | ||
if (IsStaleFunction()(it->first)) | ||
arthurpassos marked this conversation as resolved.
Show resolved
Hide resolved
|
||
{ | ||
BasePolicy::remove(it->first); | ||
return {}; | ||
} | ||
return it->second; | ||
} | ||
|
||
return {}; | ||
} | ||
|
||
std::optional<KeyMapped> getWithKey(const Key & key) override | ||
{ | ||
if (const auto it = cache.find(key); it != cache.end()) | ||
ianton-ru marked this conversation as resolved.
Show resolved
Hide resolved
|
||
{ | ||
if (IsStaleFunction()(it->first)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This case is interesting. In case we find an exact match, but it has expired. Should we try to find a prefix match or simply update the entry? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, there can be a more up-to-date prefix entry, so why not try to reuse it There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The only reason is that this entry would cease to exist. It would never be cached again. And it would become a linear search forever. Actually, not forever, if the more up-to-date prefix entry gets evicted and this query is performed again, it would re-appear. But I think you are right. |
||
{ | ||
BasePolicy::remove(it->first); | ||
return std::nullopt; | ||
} | ||
return std::make_optional<KeyMapped>({it->first, it->second}); | ||
} | ||
|
||
if (const auto it = findBestMatchingPrefix(key); it != cache.end()) | ||
{ | ||
if (IsStaleFunction()(it->first)) | ||
arthurpassos marked this conversation as resolved.
Show resolved
Hide resolved
|
||
{ | ||
BasePolicy::remove(it->first); | ||
return std::nullopt; | ||
} | ||
return std::make_optional<KeyMapped>({it->first, it->second}); | ||
} | ||
|
||
return std::nullopt; | ||
} | ||
|
||
private: | ||
auto findBestMatchingPrefix(const Key & key) | ||
{ | ||
const auto & prefix = key.prefix; | ||
|
||
auto best_match = cache.end(); | ||
size_t best_length = 0; | ||
|
||
std::vector<Key> to_remove; | ||
|
||
for (auto it = cache.begin(); it != cache.end(); ++it) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't like this cycle.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you explain why it is better? And why do you assume the below?
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, assuming this version you suggested works, the time complexity goes to O(key_path_size). Which should probably be better than O(N). But it won't find "the best match", tho. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just implemented it, can you please have a look? Btw, thanks for the suggestion, it's a great one. |
||
{ | ||
const auto & candidate_bucket = it->first.bucket; | ||
const auto & candidate_prefix = it->first.prefix; | ||
|
||
if (candidate_bucket == key.bucket && prefix.starts_with(candidate_prefix)) | ||
{ | ||
if (IsStaleFunction()(it->first)) | ||
{ | ||
to_remove.push_back(it->first); | ||
continue; | ||
} | ||
|
||
if (candidate_prefix.size() > best_length) | ||
{ | ||
best_match = it; | ||
best_length = candidate_prefix.size(); | ||
} | ||
} | ||
} | ||
|
||
for (const auto & k : to_remove) | ||
BasePolicy::remove(k); | ||
|
||
return best_match; | ||
} | ||
}; | ||
|
||
ObjectStorageListObjectsCache::Key::Key( | ||
const String & bucket_, | ||
const String & prefix_, | ||
const std::chrono::system_clock::time_point & expires_at_, | ||
std::optional<UUID> user_id_) | ||
: bucket(bucket_), prefix(prefix_), expires_at(expires_at_), user_id(user_id_) {} | ||
|
||
bool ObjectStorageListObjectsCache::Key::operator==(const Key & other) const | ||
{ | ||
return bucket == other.bucket && prefix == other.prefix; | ||
} | ||
|
||
size_t ObjectStorageListObjectsCache::KeyHasher::operator()(const Key & key) const | ||
{ | ||
std::size_t seed = 0; | ||
|
||
boost::hash_combine(seed, std::hash<String>()(key.bucket)); | ||
boost::hash_combine(seed, std::hash<String>()(key.prefix)); | ||
|
||
return seed; | ||
} | ||
|
||
bool ObjectStorageListObjectsCache::IsStale::operator()(const Key & key) const | ||
{ | ||
return key.expires_at < std::chrono::system_clock::now(); | ||
} | ||
|
||
size_t ObjectStorageListObjectsCache::WeightFunction::operator()(const Value & value) const | ||
{ | ||
std::size_t weight = 0; | ||
|
||
for (const auto & object : value) | ||
{ | ||
weight += object->relative_path.capacity() + sizeof(ObjectMetadata); | ||
} | ||
|
||
return weight; | ||
} | ||
|
||
ObjectStorageListObjectsCache::ObjectStorageListObjectsCache() | ||
: cache(std::make_unique<ObjectStorageListObjectsCachePolicy<Key, Value, KeyHasher, WeightFunction, IsStale>>()) | ||
{ | ||
} | ||
|
||
void ObjectStorageListObjectsCache::set( | ||
const std::string & bucket, | ||
const std::string & prefix, | ||
const std::shared_ptr<Value> & value) | ||
{ | ||
const auto key = Key{bucket, prefix, std::chrono::system_clock::now() + std::chrono::seconds(ttl)}; | ||
|
||
cache.set(key, value); | ||
} | ||
|
||
ObjectStorageListObjectsCache::Cache::MappedPtr ObjectStorageListObjectsCache::get(const String & bucket, const String & prefix, bool filter_by_prefix) | ||
{ | ||
const auto input_key = Key{bucket, prefix}; | ||
auto pair = cache.getWithKey(input_key); | ||
|
||
if (!pair) | ||
{ | ||
return {}; | ||
} | ||
|
||
if (pair->key == input_key || filter_by_prefix) | ||
arthurpassos marked this conversation as resolved.
Show resolved
Hide resolved
|
||
{ | ||
return pair->mapped; | ||
} | ||
|
||
auto filtered_objects = std::make_shared<std::vector<ObjectInfoPtr>>(); | ||
filtered_objects->reserve(pair->mapped->size()); | ||
|
||
for (const auto & object : *pair->mapped) | ||
{ | ||
if (object->relative_path.starts_with(input_key.prefix)) | ||
ianton-ru marked this conversation as resolved.
Show resolved
Hide resolved
|
||
{ | ||
filtered_objects->push_back(object); | ||
} | ||
} | ||
|
||
return filtered_objects; | ||
} | ||
|
||
void ObjectStorageListObjectsCache::setMaxSizeInBytes(std::size_t size_in_bytes_) | ||
{ | ||
cache.setMaxSizeInBytes(size_in_bytes_); | ||
} | ||
|
||
void ObjectStorageListObjectsCache::setMaxCount(std::size_t count) | ||
{ | ||
cache.setMaxCount(count); | ||
} | ||
|
||
void ObjectStorageListObjectsCache::setTTL(std::size_t ttl_) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is it in seconds/miliseconds/minutes/hours? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In seconds, will modify the argument name |
||
{ | ||
ttl = ttl_; | ||
} | ||
|
||
ObjectStorageListObjectsCache & ObjectStorageListObjectsCache::instance() | ||
{ | ||
static ObjectStorageListObjectsCache instance; | ||
return instance; | ||
} | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
#pragma once | ||
|
||
#include <chrono> | ||
#include <Disks/ObjectStorages/IObjectStorage.h> | ||
#include <Storages/ObjectStorage/StorageObjectStorage.h> | ||
#include <Common/TTLCachePolicy.h> | ||
#include <Common/CacheBase.h> | ||
|
||
namespace DB | ||
{ | ||
|
||
class ObjectStorageListObjectsCache | ||
{ | ||
public: | ||
static ObjectStorageListObjectsCache & instance(); | ||
|
||
struct Key | ||
{ | ||
Key( | ||
const String & bucket_, | ||
const String & prefix_, | ||
const std::chrono::system_clock::time_point & expires_at_ = std::chrono::system_clock::now(), | ||
std::optional<UUID> user_id_ = std::nullopt); | ||
|
||
std::string bucket; | ||
std::string prefix; | ||
std::chrono::system_clock::time_point expires_at; | ||
std::optional<UUID> user_id; | ||
|
||
bool operator==(const Key & other) const; | ||
}; | ||
|
||
using Value = StorageObjectStorage::ObjectInfos; | ||
struct KeyHasher | ||
{ | ||
size_t operator()(const Key & key) const; | ||
}; | ||
|
||
struct IsStale | ||
{ | ||
bool operator()(const Key & key) const; | ||
}; | ||
|
||
struct WeightFunction | ||
{ | ||
size_t operator()(const Value & value) const; | ||
}; | ||
|
||
using Cache = CacheBase<Key, Value, KeyHasher, WeightFunction>; | ||
|
||
void set( | ||
const std::string & bucket, | ||
const std::string & prefix, | ||
const std::shared_ptr<Value> & value); | ||
|
||
Cache::MappedPtr get(const String & bucket, const String & prefix, bool filter_by_prefix = true); | ||
|
||
void setMaxSizeInBytes(std::size_t size_in_bytes_); | ||
void setMaxCount(std::size_t count); | ||
void setTTL(std::size_t ttl_); | ||
|
||
private: | ||
ObjectStorageListObjectsCache(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just as a precaution, please declare copy and move constructors/assignment operators as deleted here, so there is not way to accidentally make a copy like that:
|
||
|
||
Cache cache; | ||
size_t ttl; | ||
}; | ||
|
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
please add it to the
src/Core/SettingsChangesHistory.cpp