Skip to content

Commit 40ad2bc

Browse files
[CAS] Cleanup chaining for UnifiedOnDiskCache
Previously, the chaining of KeyValueDB and OnDiskGraphDB is not consistant. Some operations are implemented directly in the lowest layer, some are in the UnifiedOnDiskCache layer, and some are in the ActionCache/ObjectStore layer. Now unifies all the chaining logics down into OnDiskGraphDB and OnDiskKeyValueDB layer, with the exception of KeyValueDB chaining will need the help of functions in UnifiedOnDiskCache layer. This cleans up the interfaces for UnifiedOnDiskCache member functions so it only contains database managment functions. Old functions like `KVPut/Get` can be done directly via underlying database file with a little bit of extra wrapper around it (see libCASPluginTest.dylib) implementation for the simple wrapper needed.
1 parent 38b1a5f commit 40ad2bc

15 files changed

+265
-226
lines changed

llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//===- BuiltinUnifiedCASDatabases.h -----------------------------*- C++ -*-===//
1+
//===----------------------------------------------------------------------===//
22
//
33
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44
// See https://llvm.org/LICENSE.txt for license information.

llvm/include/llvm/CAS/ObjectStore.h

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -291,13 +291,9 @@ class ObjectStore {
291291
/// Reference to an abstract hierarchical node, with data and references.
292292
/// Reference is passed by value and is expected to be valid as long as the \a
293293
/// ObjectStore is.
294-
///
295-
/// TODO: Expose \a ObjectStore::readData() and only call \a
296-
/// ObjectStore::getDataString() when asked.
297294
class ObjectProxy {
298295
public:
299-
const ObjectStore &getCAS() const { return *CAS; }
300-
ObjectStore &getCAS() { return *CAS; }
296+
ObjectStore &getCAS() const { return *CAS; }
301297
CASID getID() const { return CAS->getID(Ref); }
302298
ObjectRef getRef() const { return Ref; }
303299
size_t getNumReferences() const { return CAS->getNumRefs(H); }

llvm/include/llvm/CAS/OnDiskGraphDB.h

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -341,13 +341,16 @@ class OnDiskGraphDB {
341341
/// \param HashByteSize Size for the object digest hash bytes.
342342
/// \param UpstreamDB Optional on-disk store to be used for faulting-in nodes
343343
/// if they don't exist in the primary store. The upstream store is only used
344-
/// for reading nodes, new nodes are only written to the primary store.
344+
/// for reading nodes, new nodes are only written to the primary store. User
345+
/// need to make sure \p UpstreamDB outlives current instance of
346+
/// OnDiskGraphDB and the common usage is to have an \p UnifiedOnDiskCache to
347+
/// manage both.
345348
/// \param Policy If \p UpstreamDB is provided, controls how nodes are copied
346349
/// to primary store. This is recorded at creation time and subsequent opens
347350
/// need to pass the same policy otherwise the \p open will fail.
348351
static Expected<std::unique_ptr<OnDiskGraphDB>>
349352
open(StringRef Path, StringRef HashName, unsigned HashByteSize,
350-
std::unique_ptr<OnDiskGraphDB> UpstreamDB = nullptr,
353+
OnDiskGraphDB *UpstreamDB = nullptr,
351354
std::shared_ptr<OnDiskCASLogger> Logger = nullptr,
352355
FaultInPolicy Policy = FaultInPolicy::FullTree);
353356

@@ -440,9 +443,8 @@ class OnDiskGraphDB {
440443

441444
// Private constructor.
442445
OnDiskGraphDB(StringRef RootPath, OnDiskTrieRawHashMap Index,
443-
OnDiskDataAllocator DataPool,
444-
std::unique_ptr<OnDiskGraphDB> UpstreamDB, FaultInPolicy Policy,
445-
std::shared_ptr<OnDiskCASLogger> Logger);
446+
OnDiskDataAllocator DataPool, OnDiskGraphDB *UpstreamDB,
447+
FaultInPolicy Policy, std::shared_ptr<OnDiskCASLogger> Logger);
446448

447449
/// Mapping from hash to object reference.
448450
///
@@ -461,7 +463,7 @@ class OnDiskGraphDB {
461463
std::string RootPath;
462464

463465
/// Optional on-disk store to be used for faulting-in nodes.
464-
std::unique_ptr<OnDiskGraphDB> UpstreamDB;
466+
OnDiskGraphDB* UpstreamDB = nullptr;
465467

466468
/// The policy used to fault in data from upstream.
467469
FaultInPolicy FIPolicy;

llvm/include/llvm/CAS/OnDiskKeyValueDB.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919

2020
namespace llvm::cas::ondisk {
2121

22+
class UnifiedOnDiskCache;
23+
2224
/// An on-disk key-value data store with the following properties:
2325
/// * Keys are fixed length binary hashes with expected normal distribution.
2426
/// * Values are buffers of the same size, specified at creation time.
@@ -59,9 +61,13 @@ class OnDiskKeyValueDB {
5961
/// \param KeySize Size for the key hash bytes.
6062
/// \param ValueName Identifier name for the values.
6163
/// \param ValueSize Size for the value bytes.
64+
/// \param UnifiedCache An optional UnifiedOnDiskCache that manages the size
65+
/// and lifetime of the CAS instance and it must owns current initializing
66+
/// KeyValueDB after initialized.
6267
static Expected<std::unique_ptr<OnDiskKeyValueDB>>
6368
open(StringRef Path, StringRef HashName, unsigned KeySize,
6469
StringRef ValueName, size_t ValueSize,
70+
UnifiedOnDiskCache *UnifiedCache = nullptr,
6571
std::shared_ptr<OnDiskCASLogger> Logger = nullptr);
6672

6773
using CheckValueT =
@@ -71,11 +77,14 @@ class OnDiskKeyValueDB {
7177
Error validate(CheckValueT CheckValue) const;
7278

7379
private:
74-
OnDiskKeyValueDB(size_t ValueSize, OnDiskTrieRawHashMap Cache)
75-
: ValueSize(ValueSize), Cache(std::move(Cache)) {}
80+
OnDiskKeyValueDB(size_t ValueSize, OnDiskTrieRawHashMap Cache,
81+
UnifiedOnDiskCache *UnifiedCache)
82+
: ValueSize(ValueSize), Cache(std::move(Cache)),
83+
UnifiedCache(UnifiedCache) {}
7684

7785
const size_t ValueSize;
7886
OnDiskTrieRawHashMap Cache;
87+
UnifiedOnDiskCache *UnifiedCache = nullptr;
7988
};
8089

8190
} // namespace llvm::cas::ondisk

llvm/include/llvm/CAS/UnifiedOnDiskCache.h

Lines changed: 13 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//===- UnifiedOnDiskCache.h -------------------------------------*- C++ -*-===//
1+
//===----------------------------------------------------------------------===//
22
//
33
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44
// See https://llvm.org/LICENSE.txt for license information.
@@ -43,28 +43,8 @@ class UnifiedOnDiskCache {
4343
/// The \p OnDiskGraphDB instance for the open directory.
4444
OnDiskGraphDB &getGraphDB() { return *PrimaryGraphDB; }
4545

46-
/// Associate an \p ObjectID, of the \p OnDiskGraphDB instance, with a key.
47-
///
48-
/// \param Key the hash bytes for the key.
49-
/// \param Value the \p ObjectID value.
50-
///
51-
/// \returns the \p ObjectID associated with the \p Key. It may be different
52-
/// than \p Value if another value was already associated with this key.
53-
Expected<ObjectID> KVPut(ArrayRef<uint8_t> Key, ObjectID Value);
54-
55-
/// Associate an \p ObjectID, of the \p OnDiskGraphDB instance, with a key.
56-
/// An \p ObjectID as a key is equivalent to its digest bytes.
57-
///
58-
/// \param Key the \p ObjectID for the key.
59-
/// \param Value the \p ObjectID value.
60-
///
61-
/// \returns the \p ObjectID associated with the \p Key. It may be different
62-
/// than \p Value if another value was already associated with this key.
63-
Expected<ObjectID> KVPut(ObjectID Key, ObjectID Value);
64-
65-
/// \returns the \p ObjectID, of the \p OnDiskGraphDB instance, associated
66-
/// with the \p Key, or \p std::nullopt if the key does not exist.
67-
Expected<std::optional<ObjectID>> KVGet(ArrayRef<uint8_t> Key);
46+
/// The \p OnDiskGraphDB instance for the open directory.
47+
OnDiskKeyValueDB &getKeyValueDB() { return *PrimaryKVDB; }
6848

6949
/// Open a \p UnifiedOnDiskCache instance for a directory.
7050
///
@@ -150,18 +130,23 @@ class UnifiedOnDiskCache {
150130
static Error collectGarbage(StringRef Path,
151131
ondisk::OnDiskCASLogger *Logger = nullptr);
152132

133+
/// Remove unused data from the current UnifiedOnDiskCache.
153134
Error collectGarbage();
154135

155-
~UnifiedOnDiskCache();
136+
/// Helper function to convert the value stored in KeyValueDB and ObjectID.
137+
static ObjectID getObjectIDFromValue(ArrayRef<char> Value);
156138

157-
Error validateActionCache();
139+
using ValueBytes = std::array<char, sizeof(uint64_t)>;
140+
static ValueBytes getValueFromObjectID(ObjectID ID);
158141

159-
OnDiskGraphDB *getUpstreamGraphDB() const { return UpstreamGraphDB; }
142+
~UnifiedOnDiskCache();
160143

161144
private:
145+
friend class OnDiskGraphDB;
146+
friend class OnDiskKeyValueDB;
162147
UnifiedOnDiskCache();
163148

164-
Expected<std::optional<ObjectID>>
149+
Expected<std::optional<ArrayRef<char>>>
165150
faultInFromUpstreamKV(ArrayRef<uint8_t> Key);
166151

167152
/// \returns the storage size of the primary directory.
@@ -175,7 +160,7 @@ class UnifiedOnDiskCache {
175160
std::atomic<bool> NeedsGarbageCollection;
176161
std::string PrimaryDBDir;
177162

178-
OnDiskGraphDB *UpstreamGraphDB = nullptr;
163+
std::unique_ptr<OnDiskGraphDB> UpstreamGraphDB;
179164
std::unique_ptr<OnDiskGraphDB> PrimaryGraphDB;
180165

181166
std::unique_ptr<OnDiskKeyValueDB> UpstreamKVDB;

llvm/lib/CAS/ActionCaches.cpp

Lines changed: 36 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@
1919
#include "llvm/CAS/OnDiskKeyValueDB.h"
2020
#include "llvm/CAS/UnifiedOnDiskCache.h"
2121
#include "llvm/Config/llvm-config.h"
22-
#include "llvm/Support/Alignment.h"
2322
#include "llvm/Support/BLAKE3.h"
2423
#include "llvm/Support/Compiler.h"
24+
#include "llvm/Support/Errc.h"
2525
#include "llvm/Support/Path.h"
2626

2727
#define DEBUG_TYPE "cas-action-caches"
@@ -67,6 +67,7 @@ class InMemoryActionCache final : public ActionCache {
6767
InMemoryCacheT Cache;
6868
};
6969

70+
/// Builtin basic OnDiskActionCache that uses one underlying OnDiskKeyValueDB.
7071
class OnDiskActionCache final : public ActionCache {
7172
public:
7273
Error putImpl(ArrayRef<uint8_t> ActionKey, const CASID &Result,
@@ -87,6 +88,8 @@ class OnDiskActionCache final : public ActionCache {
8788
using DataT = CacheEntry<sizeof(HashType)>;
8889
};
8990

91+
/// Builtin unified ActionCache that wraps around UnifiedOnDiskCache to privode
92+
/// access to its ActionCache.
9093
class UnifiedOnDiskActionCache final : public ActionCache {
9194
public:
9295
Error putImpl(ArrayRef<uint8_t> ActionKey, const CASID &Result,
@@ -118,7 +121,8 @@ static Error createResultCachePoisonedError(ArrayRef<uint8_t> KeyHash,
118121
}
119122

120123
Expected<std::optional<CASID>>
121-
InMemoryActionCache::getImpl(ArrayRef<uint8_t> Key, bool /*CanBeDistributed*/) const {
124+
InMemoryActionCache::getImpl(ArrayRef<uint8_t> Key,
125+
bool /*CanBeDistributed*/) const {
122126
auto Result = Cache.find(Key);
123127
if (!Result)
124128
return std::nullopt;
@@ -169,17 +173,18 @@ OnDiskActionCache::create(StringRef AbsPath) {
169173
ondisk::OnDiskCASLogger::openIfEnabled(AbsPath).moveInto(Logger))
170174
return std::move(E);
171175
std::unique_ptr<ondisk::OnDiskKeyValueDB> DB;
172-
if (Error E = ondisk::OnDiskKeyValueDB::open(AbsPath, getHashName(),
173-
sizeof(HashType), getHashName(),
174-
sizeof(DataT), std::move(Logger))
176+
if (Error E = ondisk::OnDiskKeyValueDB::open(
177+
AbsPath, getHashName(), sizeof(HashType), getHashName(),
178+
sizeof(DataT), /*UnifiedCache=*/nullptr, std::move(Logger))
175179
.moveInto(DB))
176180
return std::move(E);
177181
return std::unique_ptr<OnDiskActionCache>(
178182
new OnDiskActionCache(std::move(DB)));
179183
}
180184

181185
Expected<std::optional<CASID>>
182-
OnDiskActionCache::getImpl(ArrayRef<uint8_t> Key, bool /*CanBeDistributed*/) const {
186+
OnDiskActionCache::getImpl(ArrayRef<uint8_t> Key,
187+
bool /*CanBeDistributed*/) const {
183188
std::optional<ArrayRef<char>> Val;
184189
if (Error E = DB->get(Key).moveInto(Val))
185190
return std::move(E);
@@ -218,13 +223,14 @@ UnifiedOnDiskActionCache::UnifiedOnDiskActionCache(
218223
Expected<std::optional<CASID>>
219224
UnifiedOnDiskActionCache::getImpl(ArrayRef<uint8_t> Key,
220225
bool /*CanBeDistributed*/) const {
221-
std::optional<ondisk::ObjectID> Val;
222-
if (Error E = UniDB->KVGet(Key).moveInto(Val))
226+
std::optional<ArrayRef<char>> Val;
227+
if (Error E = UniDB->getKeyValueDB().get(Key).moveInto(Val))
223228
return std::move(E);
224229
if (!Val)
225230
return std::nullopt;
231+
auto ID = ondisk::UnifiedOnDiskCache::getObjectIDFromValue(*Val);
226232
return CASID::create(&getContext(),
227-
toStringRef(UniDB->getGraphDB().getDigest(*Val)));
233+
toStringRef(UniDB->getGraphDB().getDigest(ID)));
228234
}
229235

230236
Error UnifiedOnDiskActionCache::putImpl(ArrayRef<uint8_t> Key,
@@ -233,20 +239,35 @@ Error UnifiedOnDiskActionCache::putImpl(ArrayRef<uint8_t> Key,
233239
auto Expected = UniDB->getGraphDB().getReference(Result.getHash());
234240
if (LLVM_UNLIKELY(!Expected))
235241
return Expected.takeError();
236-
std::optional<ondisk::ObjectID> Observed;
237-
if (Error E = UniDB->KVPut(Key, *Expected).moveInto(Observed))
242+
243+
auto Value = ondisk::UnifiedOnDiskCache::getValueFromObjectID(*Expected);
244+
std::optional<ArrayRef<char>> Observed;
245+
if (Error E = UniDB->getKeyValueDB().put(Key, Value).moveInto(Observed))
238246
return E;
239247

240-
if (*Expected == Observed)
248+
auto ObservedID = ondisk::UnifiedOnDiskCache::getObjectIDFromValue(*Observed);
249+
if (*Expected == ObservedID)
241250
return Error::success();
242251

243252
return createResultCachePoisonedError(
244-
Key, getContext(), Result,
245-
UniDB->getGraphDB().getDigest(*Observed));
253+
Key, getContext(), Result, UniDB->getGraphDB().getDigest(ObservedID));
246254
}
247255

248256
Error UnifiedOnDiskActionCache::validate() const {
249-
return UniDB->validateActionCache();
257+
auto ValidateRef = [](FileOffset Offset, ArrayRef<char> Value) -> Error {
258+
auto ID = ondisk::UnifiedOnDiskCache::getObjectIDFromValue(Value);
259+
auto formatError = [&](Twine Msg) {
260+
return createStringError(
261+
llvm::errc::illegal_byte_sequence,
262+
"bad record at 0x" +
263+
utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " +
264+
Msg.str());
265+
};
266+
if (ID.getOpaqueData() == 0)
267+
return formatError("zero is not a valid ref");
268+
return Error::success();
269+
};
270+
return UniDB->getKeyValueDB().validate(ValidateRef);
250271
}
251272

252273
Expected<std::unique_ptr<ActionCache>>

llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//===- BuiltinUnifiedCASDatabases.cpp ---------------------------*- C++ -*-===//
1+
//===----------------------------------------------------------------------===//
22
//
33
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44
// See https://llvm.org/LICENSE.txt for license information.
@@ -35,4 +35,4 @@ Expected<ValidationResult> cas::validateOnDiskUnifiedCASDatabasesIfNeeded(
3535
#else
3636
return createStringError(inconvertibleErrorCode(), "OnDiskCache is disabled");
3737
#endif
38-
}
38+
}

llvm/lib/CAS/OnDiskCAS.cpp

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@ class OnDiskCAS : public BuiltinCAS {
4343

4444
static Expected<std::unique_ptr<OnDiskCAS>> open(StringRef Path);
4545

46-
OnDiskCAS(std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB_)
47-
: UniDB(std::move(UniDB_)), DB(&UniDB->getGraphDB()) {}
46+
OnDiskCAS(std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB)
47+
: UnifiedDB(std::move(UniDB)), DB(&UnifiedDB->getGraphDB()) {}
4848

4949
private:
5050
ObjectHandle convertHandle(ondisk::ObjectHandle Node) const {
@@ -78,11 +78,11 @@ class OnDiskCAS : public BuiltinCAS {
7878
Expected<std::optional<uint64_t>> getStorageSize() const final;
7979
Error pruneStorageData() final;
8080

81-
OnDiskCAS(std::unique_ptr<ondisk::OnDiskGraphDB> DB_)
82-
: OwnedDB(std::move(DB_)), DB(OwnedDB.get()) {}
81+
OnDiskCAS(std::unique_ptr<ondisk::OnDiskGraphDB> GraphDB)
82+
: OwnedDB(std::move(GraphDB)), DB(OwnedDB.get()) {}
8383

8484
std::unique_ptr<ondisk::OnDiskGraphDB> OwnedDB;
85-
std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB;
85+
std::shared_ptr<ondisk::UnifiedOnDiskCache> UnifiedDB;
8686
ondisk::OnDiskGraphDB *DB;
8787
};
8888

@@ -99,8 +99,6 @@ Error OnDiskCAS::validate(bool CheckHash) const {
9999

100100
if (auto E = DB->validate(CheckHash, Hasher))
101101
return E;
102-
if (UniDB && UniDB->getUpstreamGraphDB())
103-
return UniDB->getUpstreamGraphDB()->validate(CheckHash, Hasher);
104102

105103
return Error::success();
106104
}
@@ -165,15 +163,15 @@ Error OnDiskCAS::forEachRef(ObjectHandle Node,
165163
}
166164

167165
Error OnDiskCAS::setSizeLimit(std::optional<uint64_t> SizeLimit) {
168-
UniDB->setSizeLimit(SizeLimit);
166+
UnifiedDB->setSizeLimit(SizeLimit);
169167
return Error::success();
170168
}
171169

172170
Expected<std::optional<uint64_t>> OnDiskCAS::getStorageSize() const {
173-
return UniDB->getStorageSize();
171+
return UnifiedDB->getStorageSize();
174172
}
175173

176-
Error OnDiskCAS::pruneStorageData() { return UniDB->collectGarbage(); }
174+
Error OnDiskCAS::pruneStorageData() { return UnifiedDB->collectGarbage(); }
177175

178176
Expected<std::unique_ptr<OnDiskCAS>> OnDiskCAS::open(StringRef AbsPath) {
179177
std::shared_ptr<ondisk::OnDiskCASLogger> Logger;

0 commit comments

Comments
 (0)