diff --git a/clang/tools/driver/cc1depscan_main.cpp b/clang/tools/driver/cc1depscan_main.cpp index 8fb45c8d4ad04..eca652027a6dd 100644 --- a/clang/tools/driver/cc1depscan_main.cpp +++ b/clang/tools/driver/cc1depscan_main.cpp @@ -941,7 +941,7 @@ void ScanServer::start(bool Exclusive, ArrayRef CASArgs) { return; SmallString<64> LLVMCasStorage; SmallString<64> CASPath; - CASOpts.getResolvedCASPath(CASPath); + ExitOnErr(CASOpts.getResolvedCASPath(CASPath)); ExitOnErr(llvm::cas::validateOnDiskUnifiedCASDatabasesIfNeeded( CASPath, /*CheckHash=*/true, /*AllowRecovery=*/true, @@ -961,7 +961,8 @@ void ScanServer::start(bool Exclusive, ArrayRef CASArgs) { // Try to lock; failure means there's another daemon running. if (std::error_code EC = llvm::sys::fs::tryLockFile( - PidFD, std::chrono::milliseconds(0), /*Exclusive=*/true)) { + PidFD, std::chrono::milliseconds(0), + llvm::sys::fs::LockKind::Exclusive)) { if (Exclusive) reportError("another daemon using the base path"); ::exit(0); diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index f3e878fe932b8..a86a577b3d73c 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -890,6 +890,7 @@ option (LLVM_ENABLE_SPHINX "Use Sphinx to generate llvm documentation." OFF) option (LLVM_ENABLE_OCAMLDOC "Build OCaml bindings documentation." ON) option (LLVM_ENABLE_BINDINGS "Build bindings." ON) option (LLVM_ENABLE_TELEMETRY "Enable the telemetry library. If set to OFF, library cannot be enabled after build (eg., at runtime)" ON) +option (LLVM_ENABLE_ONDISK_CAS "Build OnDiskCAS." ON) if(CMAKE_SIZEOF_VOID_P GREATER_EQUAL 8) set(LLVM_ENABLE_ONDISK_CAS_default ON) diff --git a/llvm/include/llvm/CAS/ActionCache.h b/llvm/include/llvm/CAS/ActionCache.h index 467c38171c960..c2222d4e88b00 100644 --- a/llvm/include/llvm/CAS/ActionCache.h +++ b/llvm/include/llvm/CAS/ActionCache.h @@ -1,13 +1,19 @@ -//===- llvm/CAS/ActionCache.h -----------------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the declaration of the ActionCache class, which is the +/// base class for ActionCache implementations. +/// +//===----------------------------------------------------------------------===// -#ifndef LLVM_CAS_CASACTIONCACHE_H -#define LLVM_CAS_CASACTIONCACHE_H +#ifndef LLVM_CAS_ACTIONCACHE_H +#define LLVM_CAS_ACTIONCACHE_H #include "llvm/ADT/FunctionExtras.h" #include "llvm/ADT/StringRef.h" @@ -27,15 +33,11 @@ class ObjectProxy; /// for CAS types. class CacheKey { public: - LLVM_ABI StringRef getKey() const { return Key; } + StringRef getKey() const { return Key; } - // TODO: Support CacheKey other than a CASID but rather any array of bytes. - // To do that, ActionCache need to be able to rehash the key into the index, - // which then `getOrCompute` method can be used to avoid multiple calls to - // has function. LLVM_ABI CacheKey(const CASID &ID); - LLVM_ABI CacheKey(const ObjectProxy &Proxy); - LLVM_ABI CacheKey(const ObjectStore &CAS, const ObjectRef &Ref); + LLVM_ABI_FOR_TEST CacheKey(const ObjectProxy &Proxy); + CacheKey(const ObjectStore &CAS, const ObjectRef &Ref); private: std::string Key; @@ -54,63 +56,67 @@ struct AsyncErrorValue { Error Value; }; -/// A cache from a key describing an action to the result of doing it. +/// A cache from a key (that describes an action) to the result of performing +/// that action. /// -/// Actions are expected to be pure (collision is an error). +/// Actions are expected to be pure. Storing mappings from one action to +/// multiple results will result in error (cache poisoning). class ActionCache { virtual void anchor(); public: /// Get a previously computed result for \p ActionKey. /// - /// \param Globally if true it is a hint to the underlying implementation that - /// the lookup is profitable to be done on a distributed caching level, not - /// just locally. The implementation is free to ignore this flag. + /// \param CanBeDistributed is a hint to the underlying implementation that if + /// it is true, the lookup is profitable to be done on a distributed caching + /// level, not just locally. The implementation is free to ignore this flag. Expected> get(const CacheKey &ActionKey, - bool Globally = false) const { - return getImpl(arrayRefFromStringRef(ActionKey.getKey()), Globally); + bool CanBeDistributed = false) const { + return getImpl(arrayRefFromStringRef(ActionKey.getKey()), CanBeDistributed); } /// Asynchronous version of \c get. std::future getFuture(const CacheKey &ActionKey, - bool Globally = false) const; + bool CanBeDistributed = false) const; /// Asynchronous version of \c get. - void getAsync(const CacheKey &ActionKey, bool Globally, + void getAsync(const CacheKey &ActionKey, bool CanBeDistributed, unique_function>)> Callback, std::unique_ptr *CancelObj = nullptr) const { - return getImplAsync(arrayRefFromStringRef(ActionKey.getKey()), Globally, + return getImplAsync(arrayRefFromStringRef(ActionKey.getKey()), CanBeDistributed, std::move(Callback), CancelObj); } /// Cache \p Result for the \p ActionKey computation. /// - /// \param Globally if true it is a hint to the underlying implementation that - /// the association is profitable to be done on a distributed caching level, - /// not just locally. The implementation is free to ignore this flag. + /// \param CanBeDistributed is a hint to the underlying implementation that if + /// it is true, the association is profitable to be done on a distributed + /// caching level, not just locally. The implementation is free to ignore this + /// flag. Error put(const CacheKey &ActionKey, const CASID &Result, - bool Globally = false) { + bool CanBeDistributed = false) { assert(Result.getContext().getHashSchemaIdentifier() == getContext().getHashSchemaIdentifier() && "Hash schema mismatch"); - return putImpl(arrayRefFromStringRef(ActionKey.getKey()), Result, Globally); + return putImpl(arrayRefFromStringRef(ActionKey.getKey()), Result, + CanBeDistributed); } /// Asynchronous version of \c put. std::future putFuture(const CacheKey &ActionKey, const CASID &Result, - bool Globally = false); + bool CanBeDistributed = false); /// Asynchronous version of \c put. /// \param[out] CancelObj Optional pointer to receive a cancellation object. - void putAsync(const CacheKey &ActionKey, const CASID &Result, bool Globally, + void putAsync(const CacheKey &ActionKey, const CASID &Result, bool CanBeDistributed, unique_function Callback, std::unique_ptr *CancelObj = nullptr) { assert(Result.getContext().getHashSchemaIdentifier() == getContext().getHashSchemaIdentifier() && "Hash schema mismatch"); return putImplAsync(arrayRefFromStringRef(ActionKey.getKey()), Result, - Globally, std::move(Callback), CancelObj); + CanBeDistributed, std::move(Callback), CancelObj); } /// Validate the ActionCache contents. @@ -119,17 +125,21 @@ class ActionCache { virtual ~ActionCache() = default; protected: - virtual Expected> getImpl(ArrayRef ResolvedKey, - bool Globally) const = 0; + // Implementation detail for \p get method. + virtual Expected> + getImpl(ArrayRef ResolvedKey, bool CanBeDistributed) const = 0; + virtual void - getImplAsync(ArrayRef ResolvedKey, bool Globally, + getImplAsync(ArrayRef ResolvedKey, bool CanBeDistributed, unique_function>)> Callback, std::unique_ptr *CancelObj) const; + // Implementation detail for \p put method. virtual Error putImpl(ArrayRef ResolvedKey, const CASID &Result, - bool Globally) = 0; + bool CanBeDistributed) = 0; + virtual void putImplAsync(ArrayRef ResolvedKey, const CASID &Result, - bool Globally, + bool CanBeDistributed, unique_function Callback, std::unique_ptr *CancelObj); @@ -146,7 +156,7 @@ LLVM_ABI std::unique_ptr createInMemoryActionCache(); /// Get a reasonable default on-disk path for a persistent ActionCache for the /// current user. -LLVM_ABI std::string getDefaultOnDiskActionCachePath(); +std::string getDefaultOnDiskActionCachePath(); /// Create an action cache on disk. LLVM_ABI Expected> @@ -154,4 +164,4 @@ createOnDiskActionCache(StringRef Path); } // end namespace llvm::cas -#endif // LLVM_CAS_CASACTIONCACHE_H +#endif // LLVM_CAS_ACTIONCACHE_H diff --git a/llvm/include/llvm/CAS/BuiltinCASContext.h b/llvm/include/llvm/CAS/BuiltinCASContext.h index ebc4ca8bd1f2e..e9a226a423e5a 100644 --- a/llvm/include/llvm/CAS/BuiltinCASContext.h +++ b/llvm/include/llvm/CAS/BuiltinCASContext.h @@ -54,6 +54,7 @@ namespace llvm::cas::builtin { using HasherT = BLAKE3; using HashType = decltype(HasherT::hash(std::declval &>())); +/// CASContext for LLVM builtin CAS using BLAKE3 hash type. class BuiltinCASContext : public CASContext { void printIDImpl(raw_ostream &OS, const CASID &ID) const final; void anchor() override; diff --git a/llvm/include/llvm/CAS/BuiltinObjectHasher.h b/llvm/include/llvm/CAS/BuiltinObjectHasher.h index 544167714c589..7079e5ec448c8 100644 --- a/llvm/include/llvm/CAS/BuiltinObjectHasher.h +++ b/llvm/include/llvm/CAS/BuiltinObjectHasher.h @@ -9,13 +9,12 @@ #ifndef LLVM_CAS_BUILTINOBJECTHASHER_H #define LLVM_CAS_BUILTINOBJECTHASHER_H -#include "llvm/ADT/StringRef.h" #include "llvm/CAS/ObjectStore.h" #include "llvm/Support/Endian.h" -namespace llvm { -namespace cas { +namespace llvm::cas { +/// Hasher for stored objects in builtin CAS. template class BuiltinObjectHasher { public: using HashT = decltype(HasherT::hash(std::declval &>())); @@ -66,13 +65,13 @@ template class BuiltinObjectHasher { void updateArray(ArrayRef Bytes) { updateArray(ArrayRef(reinterpret_cast(Bytes.data()), - Bytes.size())); + Bytes.size())); } void updateSize(uint64_t Size) { Size = support::endian::byte_swap(Size, endianness::little); - Hasher.update(ArrayRef(reinterpret_cast(&Size), - sizeof(Size))); + Hasher.update( + ArrayRef(reinterpret_cast(&Size), sizeof(Size))); } BuiltinObjectHasher() = default; @@ -80,7 +79,6 @@ template class BuiltinObjectHasher { HasherT Hasher; }; -} // namespace cas -} // namespace llvm +} // namespace llvm::cas #endif // LLVM_CAS_BUILTINOBJECTHASHER_H diff --git a/llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h b/llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h index 90da8d9825b0d..e95e50b46e863 100644 --- a/llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h +++ b/llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h @@ -1,4 +1,4 @@ -//===- BuiltinUnifiedCASDatabases.h -----------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -31,7 +31,7 @@ createOnDiskUnifiedCASDatabases(StringRef Path); /// marking the files for garbage collection. /// \param ForceValidation Whether to force validation to occur even if it /// should not be necessary. -/// \param LLVMCasBinary If provided, validation is performed out-of-process +/// \param LLVMCasBinaryPath If provided, validation is performed out-of-process /// using the given \c llvm-cas executable which protects against crashes /// during validation. Otherwise validation is performed in-process. /// @@ -42,7 +42,7 @@ createOnDiskUnifiedCASDatabases(StringRef Path); LLVM_ABI Expected validateOnDiskUnifiedCASDatabasesIfNeeded( StringRef Path, bool CheckHash, bool AllowRecovery, bool ForceValidation, - std::optional LLVMCasBinary); + std::optional LLVMCasBinaryPath); } // namespace llvm::cas diff --git a/llvm/include/llvm/CAS/CASConfiguration.h b/llvm/include/llvm/CAS/CASConfiguration.h index 0ac389500bbcc..5db739ff38072 100644 --- a/llvm/include/llvm/CAS/CASConfiguration.h +++ b/llvm/include/llvm/CAS/CASConfiguration.h @@ -51,11 +51,7 @@ class CASConfiguration { return !(LHS == RHS); } - // Get resolved CASPath. - void getResolvedCASPath(llvm::SmallVectorImpl &Result) const; - // Create CASDatabase from the CASConfiguration. - LLVM_ABI llvm::Expected, std::shared_ptr>> createDatabases() const; @@ -71,12 +67,14 @@ class CASConfiguration { /// /// Returns the path to configuration file and its corresponding /// CASConfiguration. - LLVM_ABI static std::optional> createFromSearchConfigFile( StringRef Path, llvm::IntrusiveRefCntPtr VFS = nullptr); + /// Get resolved CASPath. + Error getResolvedCASPath(llvm::SmallVectorImpl &Result) const; + /// DenseMap support \{ static cas::CASConfiguration getDenseMapEmptyKey() { return {}; } diff --git a/llvm/include/llvm/CAS/CASFileSystem.h b/llvm/include/llvm/CAS/CASFileSystem.h index 83ad3717ab0f8..eb328b11d85f8 100644 --- a/llvm/include/llvm/CAS/CASFileSystem.h +++ b/llvm/include/llvm/CAS/CASFileSystem.h @@ -20,7 +20,7 @@ class CASID; /// Abstract class represents an open file backed by a CAS. class CASBackedFile : public RTTIExtends { public: - LLVM_ABI static const char ID; + static const char ID; /// Get the CAS reference for the contents of the file. virtual cas::ObjectRef getObjectRefForContent() = 0; }; @@ -29,7 +29,7 @@ class CASBackedFile : public RTTIExtends { class CASBackedFileSystem : public llvm::RTTIExtends { public: - LLVM_ABI static const char ID; + static const char ID; /// This is a convenience method that opens a file, gets its content and then /// closes the file. It returns MemoryBuffer and ObjectRef in one call to avoid @@ -42,8 +42,7 @@ class CASBackedFileSystem bool IsVolatile = false, bool IsText = true); /// Get ObjectRef of a file from its path. - LLVM_ABI llvm::Expected - getObjectRefForFileContent(const Twine &Name); + llvm::Expected getObjectRefForFileContent(const Twine &Name); /// Implementation for openFileForRead using CASBackedFile. ErrorOr> @@ -63,11 +62,11 @@ class CASBackedFileSystem createThreadSafeProxyFS() = 0; }; -LLVM_ABI Expected> +Expected> createCASFileSystem(std::shared_ptr DB, const CASID &RootID, sys::path::Style PathStyle = sys::path::Style::native); -LLVM_ABI Expected> +Expected> createCASFileSystem(ObjectStore &DB, const CASID &RootID, sys::path::Style PathStyle = sys::path::Style::native); diff --git a/llvm/include/llvm/CAS/CASID.h b/llvm/include/llvm/CAS/CASID.h index c859060bdb62a..caf2b5687c615 100644 --- a/llvm/include/llvm/CAS/CASID.h +++ b/llvm/include/llvm/CAS/CASID.h @@ -55,18 +55,21 @@ class CASContext { /// compared directly. If they are, then \a /// CASIDContext::getHashSchemaIdentifier() is compared to see if they can be /// compared by hash, in which case the result of \a getHash() is compared. -/// -/// FIXME: Rename to ObjectID (and rename file to CASObjectID.h?). class CASID { public: void dump() const; - void print(raw_ostream &OS) const { - return getContext().printIDImpl(OS, *this); - } + friend raw_ostream &operator<<(raw_ostream &OS, const CASID &ID) { ID.print(OS); return OS; } + + /// Print CASID. + void print(raw_ostream &OS) const { + return getContext().printIDImpl(OS, *this); + } + + /// Return a printable string for CASID. LLVM_ABI std::string toString() const; ArrayRef getHash() const { @@ -92,8 +95,7 @@ class CASID { } friend hash_code hash_value(const CASID &ID) { - ArrayRef Hash = ID.getHash(); - return hash_combine_range(Hash.begin(), Hash.end()); + return hash_combine_range(ID.getHash()); } const CASContext &getContext() const { @@ -110,6 +112,7 @@ class CASID { CASID() = delete; + /// Create CASID from CASContext and raw hash bytes. static CASID create(const CASContext *Context, StringRef Hash) { return CASID(Context, Hash); } @@ -137,7 +140,7 @@ template struct AsyncValue { }; class Cancellable { - LLVM_ABI virtual void anchor(); + virtual void anchor(); public: virtual ~Cancellable() {} diff --git a/llvm/include/llvm/CAS/CASNodeSchema.h b/llvm/include/llvm/CAS/CASNodeSchema.h index 9f4b61189b5ca..e31b5af1cb807 100644 --- a/llvm/include/llvm/CAS/CASNodeSchema.h +++ b/llvm/include/llvm/CAS/CASNodeSchema.h @@ -10,28 +10,27 @@ #define LLVM_CAS_CASNODESCHEMA_H #include "llvm/CAS/CASReference.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/ExtensibleRTTI.h" -namespace llvm { -namespace cas { +namespace llvm::cas { class ObjectProxy; /// A base class for schemas built on top of CAS nodes. -/// -/// TODO: Build a FilesystemSchema on top of this for reimplementing Trees on -/// top of the CAS. -class NodeSchema : public RTTIExtends { - LLVM_ABI void anchor() override; +class LLVM_ABI NodeSchema : public RTTIExtends { + void anchor() override; public: - LLVM_ABI static char ID; + static char ID; /// Check if \a Node is a root (entry node) for the schema. This is a strong /// check, since it requires that the first reference matches a complete /// type-id DAG. virtual bool isRootNode(const cas::ObjectProxy &Node) const = 0; + /// Check if \a Node is a node for the schema. This can be any node that + /// belongs to the schema. virtual bool isNode(const cas::ObjectProxy &Node) const = 0; cas::ObjectStore &CAS; @@ -70,7 +69,6 @@ class SchemaPool { SmallVector> Schemas; }; -} // namespace cas -} // namespace llvm +} // namespace llvm::cas #endif // LLVM_CAS_CASNODESCHEMA_H diff --git a/llvm/include/llvm/CAS/CASOutputBackend.h b/llvm/include/llvm/CAS/CASOutputBackend.h index 2a35949a74c29..eb219d10c5460 100644 --- a/llvm/include/llvm/CAS/CASOutputBackend.h +++ b/llvm/include/llvm/CAS/CASOutputBackend.h @@ -33,7 +33,7 @@ class CASOutputBackend : public vfs::OutputBackend { SmallVector takeOutputs() { return std::move(Outputs); } /// Add a CAS object to the path in the output backend. - LLVM_ABI void addObject(StringRef Path, ObjectRef Object); + void addObject(StringRef Path, ObjectRef Object); private: Expected> @@ -47,8 +47,8 @@ class CASOutputBackend : public vfs::OutputBackend { } public: - LLVM_ABI CASOutputBackend(std::shared_ptr CAS); - LLVM_ABI CASOutputBackend(ObjectStore &CAS); + CASOutputBackend(std::shared_ptr CAS); + CASOutputBackend(ObjectStore &CAS); ~CASOutputBackend(); private: diff --git a/llvm/include/llvm/CAS/CASProvidingFileSystem.h b/llvm/include/llvm/CAS/CASProvidingFileSystem.h index 60301de73de08..a9fe6a034211e 100644 --- a/llvm/include/llvm/CAS/CASProvidingFileSystem.h +++ b/llvm/include/llvm/CAS/CASProvidingFileSystem.h @@ -23,7 +23,7 @@ class ObjectStore; /// Implements \p vfs::File::getObjectRefForContent() by ingesting the file /// buffer into the \p DB, unless the \p UnderlyingFS already supports \p /// vfs::File::getObjectRefForContent(). -LLVM_ABI std::unique_ptr createCASProvidingFileSystem( +std::unique_ptr createCASProvidingFileSystem( std::shared_ptr DB, IntrusiveRefCntPtr UnderlyingFS); diff --git a/llvm/include/llvm/CAS/CASReference.h b/llvm/include/llvm/CAS/CASReference.h index 1f435cf306c4c..15ce29e59ba71 100644 --- a/llvm/include/llvm/CAS/CASReference.h +++ b/llvm/include/llvm/CAS/CASReference.h @@ -20,7 +20,6 @@ class raw_ostream; namespace cas { class ObjectStore; - class ObjectHandle; class ObjectRef; @@ -41,8 +40,9 @@ class ReferenceBase { return InternalRef; } + /// Helper functions for DenseMapInfo. unsigned getDenseMapHash() const { - return (unsigned)llvm::hash_value(InternalRef); + return static_cast(llvm::hash_value(InternalRef)); } bool isDenseMapEmpty() const { return InternalRef == getDenseMapEmptyRef(); } bool isDenseMapTombstone() const { @@ -89,7 +89,7 @@ class ReferenceBase { #endif }; -/// Reference to an object in a \a ObjectStore instance. +/// Reference to an object in an \a ObjectStore instance. /// /// If you have an ObjectRef, you know the object exists, and you can point at /// it from new nodes with \a ObjectStore::store(), but you don't know anything @@ -105,12 +105,6 @@ class ReferenceBase { /// ObjectHandle, a variant that knows what kind of entity it is. \a /// ObjectStore::getReferenceKind() can expect the type of reference without /// asking for unloaded objects to be loaded. -/// -/// This is a wrapper around a \c uint64_t (and a \a ObjectStore instance when -/// assertions are on). If necessary, it can be deconstructed and reconstructed -/// using \a Reference::getInternalRef() and \a -/// Reference::getFromInternalRef(), but clients aren't expected to need to do -/// this. These both require the right \a ObjectStore instance. class ObjectRef : public ReferenceBase { struct DenseMapTag {}; diff --git a/llvm/include/llvm/CAS/CachingOnDiskFileSystem.h b/llvm/include/llvm/CAS/CachingOnDiskFileSystem.h index 3883fd7b32792..b124124aa3bdc 100644 --- a/llvm/include/llvm/CAS/CachingOnDiskFileSystem.h +++ b/llvm/include/llvm/CAS/CachingOnDiskFileSystem.h @@ -146,10 +146,10 @@ class CachingOnDiskFileSystem std::shared_ptr OwnedDB; }; -LLVM_ABI Expected> +Expected> createCachingOnDiskFileSystem(std::shared_ptr DB); -LLVM_ABI Expected> +Expected> createCachingOnDiskFileSystem(ObjectStore &DB); } // namespace cas diff --git a/llvm/include/llvm/CAS/FileOffset.h b/llvm/include/llvm/CAS/FileOffset.h new file mode 100644 index 0000000000000..a3dc06b38e996 --- /dev/null +++ b/llvm/include/llvm/CAS/FileOffset.h @@ -0,0 +1,39 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file declares interface for FileOffset that represent stored data at an +/// offset from the beginning of a file. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CAS_FILEOFFSET_H +#define LLVM_CAS_FILEOFFSET_H + +#include + +namespace llvm::cas { + +/// FileOffset is a wrapper around `uint64_t` to represent the offset of data +/// from the beginning of the file. +class FileOffset { +public: + uint64_t get() const { return Offset; } + + explicit operator bool() const { return Offset; } + + FileOffset() = default; + explicit FileOffset(uint64_t Offset) : Offset(Offset) {} + +private: + uint64_t Offset = 0; +}; + +} // namespace llvm::cas + +#endif // LLVM_CAS_FILEOFFSET_H diff --git a/llvm/include/llvm/CAS/FileSystemCache.h b/llvm/include/llvm/CAS/FileSystemCache.h index bfc5183b1af88..df2ff3834d7ec 100644 --- a/llvm/include/llvm/CAS/FileSystemCache.h +++ b/llvm/include/llvm/CAS/FileSystemCache.h @@ -14,7 +14,6 @@ #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/StringMap.h" #include "llvm/CAS/CASReference.h" -#include "llvm/CAS/HashMappedTrie.h" #include "llvm/CAS/ThreadSafeAllocator.h" #include "llvm/Support/AlignOf.h" #include "llvm/Support/Allocator.h" diff --git a/llvm/include/llvm/CAS/HashMappedTrie.h b/llvm/include/llvm/CAS/HashMappedTrie.h deleted file mode 100644 index c9430dec182a1..0000000000000 --- a/llvm/include/llvm/CAS/HashMappedTrie.h +++ /dev/null @@ -1,351 +0,0 @@ -//===- HashMappedTrie.h -----------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CAS_HASHMAPPEDTRIE_H -#define LLVM_CAS_HASHMAPPEDTRIE_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/Casting.h" -#include -#include - -namespace llvm { - -class MemoryBuffer; -class raw_ostream; - -namespace cas { - -/// Base class for a lock-free thread-safe hash-mapped trie. -class ThreadSafeHashMappedTrieBase { -public: - enum : size_t { TrieContentBaseSize = 4 }; - -private: - template struct AllocValueType { - char Base[TrieContentBaseSize]; - std::aligned_union_t Content; - }; - -protected: - template static constexpr size_t getContentAllocSize() { - return sizeof(AllocValueType); - } - template static constexpr size_t getContentAllocAlign() { - return alignof(AllocValueType); - } - template static constexpr size_t getContentOffset() { - return offsetof(AllocValueType, Content); - } - -public: - void operator delete(void *Ptr) { ::free(Ptr); } - - static constexpr size_t DefaultNumRootBits = 6; - static constexpr size_t DefaultNumSubtrieBits = 4; - - LLVM_DUMP_METHOD void dump() const; - void print(raw_ostream &OS) const; - -protected: - /// Result of a lookup. Suitable for an insertion hint. Maybe could be - /// expanded into an iterator of sorts, but likely not useful (visiting - /// everything in the trie should probably be done some way other than - /// through an iterator pattern). - class PointerBase { - protected: - void *get() const { return I == -2u ? P : nullptr; } - - public: - PointerBase() noexcept {} - PointerBase(PointerBase &&) = default; - PointerBase(const PointerBase &) = default; - PointerBase &operator=(PointerBase &&) = default; - PointerBase &operator=(const PointerBase &) = default; - - private: - friend class ThreadSafeHashMappedTrieBase; - explicit PointerBase(void *Content) : P(Content), I(-2u) {} - PointerBase(void *P, unsigned I, unsigned B) : P(P), I(I), B(B) {} - - bool isHint() const { return I != -1u && I != -2u; } - - void *P = nullptr; - unsigned I = -1u; - unsigned B = 0; - }; - - PointerBase find(ArrayRef Hash) const; - - /// Insert and return the stored content. - PointerBase - insert(PointerBase Hint, ArrayRef Hash, - function_ref Hash)> - Constructor); - - ThreadSafeHashMappedTrieBase() = delete; - - ThreadSafeHashMappedTrieBase(size_t ContentAllocSize, - size_t ContentAllocAlign, size_t ContentOffset, - std::optional NumRootBits = std::nullopt, - std::optional NumSubtrieBits = std::nullopt); - - /// Destructor, which asserts if there's anything to do. Subclasses should - /// call \a destroyImpl(). - /// - /// \pre \a destroyImpl() was already called. - ~ThreadSafeHashMappedTrieBase(); - void destroyImpl(function_ref Destructor); - - ThreadSafeHashMappedTrieBase(ThreadSafeHashMappedTrieBase &&RHS); - - // Move assignment can be implemented in a thread-safe way if NumRootBits and - // NumSubtrieBits are stored inside the Root. - ThreadSafeHashMappedTrieBase & - operator=(ThreadSafeHashMappedTrieBase &&RHS) = delete; - - // No copy. - ThreadSafeHashMappedTrieBase(const ThreadSafeHashMappedTrieBase &) = delete; - ThreadSafeHashMappedTrieBase & - operator=(const ThreadSafeHashMappedTrieBase &) = delete; - -private: - const unsigned short ContentAllocSize; - const unsigned short ContentAllocAlign; - const unsigned short ContentOffset; - unsigned short NumRootBits; - unsigned short NumSubtrieBits; - struct ImplType; - // ImplPtr is owned by ThreadSafeHashMappedTrieBase and needs to be freed in - // destoryImpl. - std::atomic ImplPtr; - ImplType &getOrCreateImpl(); - ImplType *getImpl() const; -}; - -/// Lock-free thread-safe hash-mapped trie. -template -class ThreadSafeHashMappedTrie : ThreadSafeHashMappedTrieBase { -public: - using HashT = std::array; - - class LazyValueConstructor; - struct value_type { - const HashT Hash; - T Data; - - value_type(value_type &&) = default; - value_type(const value_type &) = default; - - value_type(ArrayRef Hash, const T &Data) - : Hash(makeHash(Hash)), Data(Data) {} - value_type(ArrayRef Hash, T &&Data) - : Hash(makeHash(Hash)), Data(std::move(Data)) {} - - private: - friend class LazyValueConstructor; - - struct EmplaceTag {}; - template - value_type(ArrayRef Hash, EmplaceTag, ArgsT &&... Args) - : Hash(makeHash(Hash)), Data(std::forward(Args)...) {} - - static HashT makeHash(ArrayRef HashRef) { - HashT Hash; - std::copy(HashRef.begin(), HashRef.end(), Hash.data()); - return Hash; - } - }; - - using ThreadSafeHashMappedTrieBase::operator delete; - using HashType = HashT; - - using ThreadSafeHashMappedTrieBase::dump; - using ThreadSafeHashMappedTrieBase::print; - -private: - template class PointerImpl : PointerBase { - friend class ThreadSafeHashMappedTrie; - - ValueT *get() const { - if (void *B = PointerBase::get()) - return reinterpret_cast(B); - return nullptr; - } - - public: - ValueT &operator*() const { - assert(get()); - return *get(); - } - ValueT *operator->() const { - assert(get()); - return get(); - } - explicit operator bool() const { return get(); } - - PointerImpl() = default; - PointerImpl(PointerImpl &&) = default; - PointerImpl(const PointerImpl &) = default; - PointerImpl &operator=(PointerImpl &&) = default; - PointerImpl &operator=(const PointerImpl &) = default; - - protected: - PointerImpl(PointerBase Result) : PointerBase(Result) {} - }; - -public: - class pointer; - class const_pointer; - class pointer : public PointerImpl { - friend class ThreadSafeHashMappedTrie; - friend class const_pointer; - - public: - pointer() = default; - pointer(pointer &&) = default; - pointer(const pointer &) = default; - pointer &operator=(pointer &&) = default; - pointer &operator=(const pointer &) = default; - - private: - pointer(PointerBase Result) : pointer::PointerImpl(Result) {} - }; - - class const_pointer : public PointerImpl { - friend class ThreadSafeHashMappedTrie; - - public: - const_pointer() = default; - const_pointer(const_pointer &&) = default; - const_pointer(const const_pointer &) = default; - const_pointer &operator=(const_pointer &&) = default; - const_pointer &operator=(const const_pointer &) = default; - - const_pointer(const pointer &P) : const_pointer::PointerImpl(P) {} - - private: - const_pointer(PointerBase Result) : const_pointer::PointerImpl(Result) {} - }; - - class LazyValueConstructor { - public: - value_type &operator()(T &&RHS) { - assert(Mem && "Constructor already called, or moved away"); - return assign(::new (Mem) value_type(Hash, std::move(RHS))); - } - value_type &operator()(const T &RHS) { - assert(Mem && "Constructor already called, or moved away"); - return assign(::new (Mem) value_type(Hash, RHS)); - } - template value_type &emplace(ArgsT &&... Args) { - assert(Mem && "Constructor already called, or moved away"); - return assign(::new (Mem) - value_type(Hash, typename value_type::EmplaceTag{}, - std::forward(Args)...)); - } - - LazyValueConstructor(LazyValueConstructor &&RHS) - : Mem(RHS.Mem), Result(RHS.Result), Hash(RHS.Hash) { - RHS.Mem = nullptr; // Moved away, cannot call. - } - ~LazyValueConstructor() { - assert(!Mem && "Constructor never called!"); - } - - private: - value_type &assign(value_type *V) { - Mem = nullptr; - Result = V; - return *V; - } - friend class ThreadSafeHashMappedTrie; - LazyValueConstructor() = delete; - LazyValueConstructor(void *Mem, value_type *&Result, ArrayRef Hash) - : Mem(Mem), Result(Result), Hash(Hash) { - assert(Hash.size() == sizeof(HashT) && "Invalid hash"); - assert(Mem && "Invalid memory for construction"); - } - void *Mem; - value_type *&Result; - ArrayRef Hash; - }; - - /// Insert with a hint. Default-constructed hint will work, but it's - /// recommended to start with a lookup to avoid overhead in object creation - /// if it already exists. - pointer insertLazy(const_pointer Hint, ArrayRef Hash, - function_ref OnConstruct) { - return pointer(ThreadSafeHashMappedTrieBase::insert( - Hint, Hash, [&](void *Mem, ArrayRef Hash) { - value_type *Result = nullptr; - OnConstruct(LazyValueConstructor(Mem, Result, Hash)); - return Result->Hash.data(); - })); - } - - pointer insertLazy(ArrayRef Hash, - function_ref OnConstruct) { - return insertLazy(const_pointer(), Hash, OnConstruct); - } - - pointer insert(const_pointer Hint, value_type &&HashedData) { - return insertLazy(Hint, HashedData.Hash, - [&](LazyValueConstructor C) { - C(std::move(HashedData.Data)); - }); - } - - pointer insert(const_pointer Hint, const value_type &HashedData) { - return insertLazy(Hint, HashedData.Hash, - [&](LazyValueConstructor C) { - C(HashedData.Data); - }); - } - - pointer find(ArrayRef Hash) { - assert(Hash.size() == std::tuple_size::value); - return ThreadSafeHashMappedTrieBase::find(Hash); - } - - const_pointer find(ArrayRef Hash) const { - assert(Hash.size() == std::tuple_size::value); - return ThreadSafeHashMappedTrieBase::find(Hash); - } - - ThreadSafeHashMappedTrie(std::optional NumRootBits = std::nullopt, - std::optional NumSubtrieBits = std::nullopt) - : ThreadSafeHashMappedTrieBase(getContentAllocSize(), - getContentAllocAlign(), - getContentOffset(), - NumRootBits, NumSubtrieBits) {} - - ~ThreadSafeHashMappedTrie() { - if (std::is_trivially_destructible::value) - this->destroyImpl(nullptr); - else - this->destroyImpl( - [](void *P) { static_cast(P)->~value_type(); }); - } - - // Move constructor okay. - ThreadSafeHashMappedTrie(ThreadSafeHashMappedTrie &&) = default; - - // No move assignment or any copy. - ThreadSafeHashMappedTrie &operator=(ThreadSafeHashMappedTrie &&) = delete; - ThreadSafeHashMappedTrie(const ThreadSafeHashMappedTrie &) = delete; - ThreadSafeHashMappedTrie & - operator=(const ThreadSafeHashMappedTrie &) = delete; -}; - -} // namespace cas -} // namespace llvm - -#endif // LLVM_CAS_HASHMAPPEDTRIE_H diff --git a/llvm/include/llvm/CAS/HierarchicalTreeBuilder.h b/llvm/include/llvm/CAS/HierarchicalTreeBuilder.h index c3ed06164c0f8..2bde171aad31e 100644 --- a/llvm/include/llvm/CAS/HierarchicalTreeBuilder.h +++ b/llvm/include/llvm/CAS/HierarchicalTreeBuilder.h @@ -50,8 +50,8 @@ class HierarchicalTreeBuilder { SmallVector Entries; SmallVector TreeContents; - LLVM_ABI void pushImpl(std::optional Ref, - TreeEntry::EntryKind Kind, const Twine &Path); + void pushImpl(std::optional Ref, TreeEntry::EntryKind Kind, + const Twine &Path); public: HierarchicalTreeBuilder(sys::path::Style PathStyle = sys::path::Style::native) @@ -85,7 +85,7 @@ class HierarchicalTreeBuilder { /// Recursively create the trees implied by calls to \a push(), return the /// top-level \a CASID. - LLVM_ABI Expected create(ObjectStore &CAS); + Expected create(ObjectStore &CAS); }; } // namespace cas diff --git a/llvm/include/llvm/CAS/MappedFileRegionBumpPtr.h b/llvm/include/llvm/CAS/MappedFileRegionArena.h similarity index 58% rename from llvm/include/llvm/CAS/MappedFileRegionBumpPtr.h rename to llvm/include/llvm/CAS/MappedFileRegionArena.h index 71aaf438a43cb..a00bfa7306ef6 100644 --- a/llvm/include/llvm/CAS/MappedFileRegionBumpPtr.h +++ b/llvm/include/llvm/CAS/MappedFileRegionArena.h @@ -1,15 +1,20 @@ -//===- MappedFileRegionBumpPtr.h --------------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +// +/// \file +/// This file declares interface for MappedFileRegionArena, a bump pointer +/// allocator, backed by a memory-mapped file. +/// +//===----------------------------------------------------------------------===// -#ifndef LLVM_CAS_MAPPEDFILEREGIONBUMPPTR_H -#define LLVM_CAS_MAPPEDFILEREGIONBUMPPTR_H +#ifndef LLVM_CAS_MAPPEDFILEREGIONARENA_H +#define LLVM_CAS_MAPPEDFILEREGIONARENA_H -#include "llvm/Config/llvm-config.h" #include "llvm/Support/Alignment.h" #include "llvm/Support/FileSystem.h" #include @@ -18,7 +23,7 @@ namespace llvm::cas { namespace ondisk { class OnDiskCASLogger; -} +} // namespace ondisk /// Allocator for an owned mapped file region that supports thread-safe and /// process-safe bump pointer allocation. @@ -31,33 +36,39 @@ class OnDiskCASLogger; /// Process-safe. Uses file locks when resizing the file during initialization /// and destruction. /// -/// Thread-safe, assuming all threads use the same instance to talk to a given -/// file/mapping. Unsafe to have multiple instances talking to the same file -/// in the same process since file locks will misbehave. Clients should -/// coordinate (somehow). -/// -/// \note Currently we allocate the whole file without sparseness on Windows. +/// Thread-safe. Requires OS support thread-safe file lock. /// /// Provides 8-byte alignment for all allocations. -class MappedFileRegionBumpPtr { +class MappedFileRegionArena { public: using RegionT = sys::fs::mapped_file_region; - /// Create a \c MappedFileRegionBumpPtr. + /// Header for MappedFileRegionArena. It can be configured to be located + /// at any location within the file and the allocation will be appended after + /// the header. + struct Header { + // BumpPtr for new allocation. + std::atomic BumpPtr; + // Allocated size on disk. + std::atomic AllocatedSize; + // Capacity of the file. + std::atomic Capacity; + // Offset from the beginning of the file to this header (for verification). + std::atomic HeaderOffset; + }; + + /// Create a \c MappedFileRegionArena. /// /// \param Path the path to open the mapped region. /// \param Capacity the maximum size for the mapped file region. - /// \param BumpPtrOffset the offset at which to store the bump pointer. + /// \param HeaderOffset the offset at which to store the header. This is so + /// that information can be stored before the header, like a file magic. /// \param NewFileConstructor is for constructing new files. It has exclusive /// access to the file. Must call \c initializeBumpPtr. - LLVM_ABI_FOR_TEST static Expected - create(const Twine &Path, uint64_t Capacity, int64_t BumpPtrOffset, + LLVM_ABI_FOR_TEST static Expected + create(const Twine &Path, uint64_t Capacity, uint64_t HeaderOffset, std::shared_ptr Logger, - function_ref NewFileConstructor); - - /// Finish initializing the bump pointer. Must be called by - /// \c NewFileConstructor. - void initializeBumpPtr(int64_t BumpPtrOffset); + function_ref NewFileConstructor); /// Minimum alignment for allocations, currently hardcoded to 8B. static constexpr Align getAlign() { @@ -83,22 +94,25 @@ class MappedFileRegionBumpPtr { RegionT &getRegion() { return Region; } - ~MappedFileRegionBumpPtr() { destroyImpl(); } + ~MappedFileRegionArena() { destroyImpl(); } - MappedFileRegionBumpPtr() = default; - MappedFileRegionBumpPtr(MappedFileRegionBumpPtr &&RHS) { moveImpl(RHS); } - MappedFileRegionBumpPtr &operator=(MappedFileRegionBumpPtr &&RHS) { + MappedFileRegionArena() = default; + MappedFileRegionArena(MappedFileRegionArena &&RHS) { moveImpl(RHS); } + MappedFileRegionArena &operator=(MappedFileRegionArena &&RHS) { destroyImpl(); moveImpl(RHS); return *this; } - MappedFileRegionBumpPtr(const MappedFileRegionBumpPtr &) = delete; - MappedFileRegionBumpPtr &operator=(const MappedFileRegionBumpPtr &) = delete; + MappedFileRegionArena(const MappedFileRegionArena &) = delete; + MappedFileRegionArena &operator=(const MappedFileRegionArena &) = delete; private: + // initialize header from offset. + Error initializeHeader(uint64_t HeaderOffset); + LLVM_ABI_FOR_TEST void destroyImpl(); - void moveImpl(MappedFileRegionBumpPtr &RHS) { + void moveImpl(MappedFileRegionArena &RHS) { std::swap(Region, RHS.Region); std::swap(H, RHS.H); std::swap(Path, RHS.Path); @@ -108,18 +122,16 @@ class MappedFileRegionBumpPtr { } private: - struct Header { - std::atomic BumpPtr; - std::atomic AllocatedSize; - }; RegionT Region; Header *H = nullptr; std::string Path; + // File descriptor for the main storage file. std::optional FD; + // File descriptor for the file used as reader/writer lock. std::optional SharedLockFD; std::shared_ptr Logger = nullptr; }; } // namespace llvm::cas -#endif // LLVM_CAS_MAPPEDFILEREGIONBUMPPTR_H +#endif // LLVM_CAS_MAPPEDFILEREGIONARENA_H diff --git a/llvm/include/llvm/CAS/NamedValuesSchema.h b/llvm/include/llvm/CAS/NamedValuesSchema.h new file mode 100644 index 0000000000000..cf9652a58a26b --- /dev/null +++ b/llvm/include/llvm/CAS/NamedValuesSchema.h @@ -0,0 +1,160 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the declarations for the NamedValuesSchema, a schema to +/// represent an array of named nodes inside CAS. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CAS_NAMEDVALUESSCHEMA_H +#define LLVM_CAS_NAMEDVALUESSCHEMA_H + +#include "llvm/CAS/CASNodeSchema.h" +#include "llvm/CAS/ObjectStore.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Compiler.h" + +namespace llvm::cas { + +class NamedValuesProxy; + +/// Represents an entry in NamedValuesSchema. +struct NamedValuesEntry { + NamedValuesEntry(StringRef Name, ObjectRef Ref) : Name(Name), Ref(Ref) {} + + StringRef Name; + ObjectRef Ref; + + friend bool operator==(const NamedValuesEntry &LHS, + const NamedValuesEntry &RHS) { + return LHS.Ref == RHS.Ref && LHS.Name == RHS.Name; + } + + /// Ordering the entries by name. Items should have unique names. + friend bool operator<(const NamedValuesEntry &LHS, + const NamedValuesEntry &RHS) { + return LHS.Name < RHS.Name; + } +}; + +/// A schema for representing an array of named nodes in a CAS. The name of the +/// nodes are stored in the root node so child node can be loaded on demand +/// based on name and the name for all nodes need to be unique. +class LLVM_ABI NamedValuesSchema + : public RTTIExtends { + void anchor() override; + +public: + static char ID; + + bool isRootNode(const ObjectProxy &Node) const final { + // NamedValuesSchema only has one node, thus root node. + return isNode(Node); + } + + /// Check if a proxy represents a valid node. + bool isNode(const ObjectProxy &Node) const final; + + /// Create a NamedValuesSchema. + static Expected create(ObjectStore &CAS); + + /// Load NamedValuesProxy from an ObjectRef. + Expected load(ObjectRef Object) const; + + /// Load NamedValuesProxy from an ObjectProxy. + Expected load(ObjectProxy Object) const; + + /// Construct a \c NamedValuesSchema CAS object with the given entries. + Expected construct(ArrayRef Entries); + + /// A builder class for creating nodes in NamedValuesSchema. + class Builder { + public: + Builder(ObjectStore &CAS) : CAS(CAS) {} + + /// Add an entry to the builder. + LLVM_ABI void add(StringRef Name, ObjectRef Ref); + + /// Build the node from added entries. + LLVM_ABI Expected build(); + + private: + ObjectStore &CAS; + SmallVector Nodes; + BumpPtrAllocator Alloc; + }; + +private: + friend class NamedValuesProxy; + + NamedValuesSchema(ObjectStore &CAS, Error &E); + + /// Get the number of entries. + size_t getNumEntries(NamedValuesProxy Values) const; + + /// Iterate over entries with a callback. + Error + forEachEntry(NamedValuesProxy Values, + function_ref Callback) const; + + /// Lookup an entry by name. + std::optional lookupEntry(NamedValuesProxy Values, + StringRef Name) const; + + /// Load an entry by index. + NamedValuesEntry loadEntry(NamedValuesProxy Values, size_t I) const; + + /// Name for the schema. + static constexpr StringLiteral SchemaName = + "llvm::cas::schema::namedvalues::v1"; + std::optional NamedValuesKindRef; +}; + +/// A proxy for a loaded CAS Object in NamedValuesSchema. +class NamedValuesProxy : public ObjectProxy { +public: + /// Get the schema associated with this proxy. + const NamedValuesSchema &getSchema() const { return *Schema; } + + /// Iterate over entries with a callback. + Error + forEachEntry(function_ref Callback) const { + return Schema->forEachEntry(*this, Callback); + } + + /// Check if the object is empty. + bool empty() const { return size() == 0; } + + /// Get the number of entries in the CAS object. + size_t size() const { return Schema->getNumEntries(*this); } + + /// Lookup an entry by name. + std::optional lookup(StringRef Name) const { + if (auto I = Schema->lookupEntry(*this, Name)) + return get(*I); + return std::nullopt; + } + + /// Get the name of an entry by index. + LLVM_ABI StringRef getName(size_t I) const; + + /// Get an entry by index. + NamedValuesEntry get(size_t I) const { return Schema->loadEntry(*this, I); } + +private: + NamedValuesProxy(const NamedValuesSchema &Schema, const ObjectProxy &Node) + : ObjectProxy(Node), Schema(&Schema) {} + + friend class NamedValuesSchema; + const NamedValuesSchema *Schema; +}; + +} // namespace llvm::cas + +#endif diff --git a/llvm/include/llvm/CAS/ObjectStore.h b/llvm/include/llvm/CAS/ObjectStore.h index 021c97d092bdd..6383b3f3094f3 100644 --- a/llvm/include/llvm/CAS/ObjectStore.h +++ b/llvm/include/llvm/CAS/ObjectStore.h @@ -5,6 +5,11 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the declaration of the ObjectStore class. +/// +//===----------------------------------------------------------------------===// #ifndef LLVM_CAS_OBJECTSTORE_H #define LLVM_CAS_OBJECTSTORE_H @@ -12,9 +17,8 @@ #include "llvm/ADT/StringRef.h" #include "llvm/CAS/CASID.h" #include "llvm/CAS/CASReference.h" -#include "llvm/CAS/TreeEntry.h" #include "llvm/Support/Error.h" -#include "llvm/Support/FileSystem.h" // FIXME: Split out sys::fs::file_status. +#include "llvm/Support/FileSystem.h" #include #include @@ -73,6 +77,9 @@ using AsyncProxyValue = AsyncValue; /// wraps access APIs to avoid having to pass extra parameters. It is the /// object used for accessing underlying data and refs by CAS users. /// +/// Both ObjectRef and ObjectHandle are lightweight, wrapping a `uint64_t` and +/// are only valid with the associated ObjectStore instance. +/// /// There are a few options for accessing content of objects, with different /// lifetime tradeoffs: /// @@ -83,50 +90,6 @@ using AsyncProxyValue = AsyncValue; /// long as \a ObjectStore. /// - \a readRef() and \a forEachRef() iterate through the references in an /// object. There is no lifetime assumption. -/// -/// Both ObjectRef and ObjectHandle are lightweight, wrapping a `uint64_t`. -/// Doing anything with them requires a ObjectStore. As a convenience: -/// -/// -/// TODO: Remove CASID. -/// -/// Here's how to remove CASID: -/// -/// - Add APIs for bypassing CASID when parsing: -/// - Validate an ID without doing anything else (current check done by -/// `parseID()`). -/// - Get the hash for an object or StringRef-based ID. -/// - Get an ObjectRef or load an ObjectHandle from a StringRef-based ID. -/// - Update existing code using CASID to use the new ObjectRef, -/// ObjectHandle, and StringRef APIs. -/// - Remove CASID, changing `getObjectID()` to return `std::string`. -/// -/// TODO: Consider optimizing small and/or string-like leaf objects: -/// -/// - \a NodeBuilder and \a NodeReader interfaces can bring some of the same -/// gains without adding complexity to \a ObjectStore. E.g., \a NodeBuilder -/// could have an API to add a named field to a node under construction; if -/// the name is small enough, it's stored locally in the node's own data, but -/// if it's bigger then it's outlined to a separate CAS object. \a NodeReader -/// could handle the complications of reading. -/// - Implementations can do fast lookups of small objects by adding a -/// content-based index for them (prefix tree / suffix tree of content), -/// amortizing overhead of hash computation in \a storeNode(). -/// - Implementations could remove small leaf objects from the main index, -/// indexing them separately with a partial hash (e.g., 4B prefix), to -/// optimize storage overhead (32B hash is big for small objects!). Lookups -/// by UID that miss the main index would get more expensive, requiring a -/// hash computation for each small object with a matching partial hash, but -/// maybe this would be rare. To mitigate this cost, small leaf objects could -/// get added to the main index lazily on first lookup-by-UID, lazily adding -/// the full overhead of the hash storage only when used by clients. -/// - NOTE: we tried adding an API to store "raw data" that can be optimized, -/// but it was very complicated to reason about. -/// - Introduced many opportunities for implementation bugs. -/// - Introduced many complications in the API. -/// -/// FIXME: Split out ActionCache as a separate concept, and rename this -/// ObjectStore. class ObjectStore { friend class ObjectProxy; void anchor(); @@ -202,7 +165,8 @@ class ObjectStore { /// Get the size of some data. virtual uint64_t getDataSize(ObjectHandle Node) const = 0; - /// Methods for handling objects. + /// Methods for handling objects. CAS implementations need to override to + /// provide functions to access stored CAS objects and references. virtual Error forEachRef(ObjectHandle Node, function_ref Callback) const = 0; virtual ObjectRef readRef(ObjectHandle Node, size_t I) const = 0; @@ -270,7 +234,7 @@ class ObjectStore { return storeFromOpenFileImpl(FD, Status); } - LLVM_ABI static Error createUnknownObjectError(const CASID &ID); + static Error createUnknownObjectError(const CASID &ID); /// Create ObjectProxy from CASID. If the object doesn't exist, get an error. LLVM_ABI Expected getProxy(const CASID &ID); @@ -279,10 +243,10 @@ class ObjectStore { LLVM_ABI Expected getProxy(ObjectRef Ref); /// \returns \c std::nullopt if the object is missing from the CAS. - LLVM_ABI Expected> getProxyIfExists(ObjectRef Ref); + Expected> getProxyIfExists(ObjectRef Ref); /// Asynchronous version of \c getProxyIfExists. - LLVM_ABI std::future getProxyFuture(ObjectRef Ref); + std::future getProxyFuture(ObjectRef Ref); /// Asynchronous version of \c getProxyIfExists using a callback. /// \param[out] CancelObj Optional pointer to receive a cancellation object. @@ -291,7 +255,7 @@ class ObjectStore { unique_function>)> Callback, std::unique_ptr *CancelObj = nullptr); /// Asynchronous version of \c getProxyIfExists using a callback. - LLVM_ABI void getProxyAsync( + void getProxyAsync( ObjectRef Ref, unique_function>)> Callback, std::unique_ptr *CancelObj = nullptr); @@ -309,7 +273,7 @@ class ObjectStore { /// Set the size for limiting growth of on-disk storage. This has an effect /// for when the instance is closed. /// - /// Implementations may be not have this implemented. + /// Implementations may leave this unimplemented. virtual Error setSizeLimit(std::optional SizeLimit) { return Error::success(); } @@ -325,7 +289,7 @@ class ObjectStore { /// Prune local storage to reduce its size according to the desired size /// limit. Pruning can happen concurrently with other operations. /// - /// Implementations may be not have this implemented. + /// Implementations may leave this unimplemented. virtual Error pruneStorageData() { return Error::success(); } /// Validate the whole node tree. @@ -355,19 +319,14 @@ class ObjectStore { /// Reference to an abstract hierarchical node, with data and references. /// Reference is passed by value and is expected to be valid as long as the \a /// ObjectStore is. -/// -/// TODO: Expose \a ObjectStore::readData() and only call \a -/// ObjectStore::getDataString() when asked. class ObjectProxy { public: - const ObjectStore &getCAS() const { return *CAS; } - ObjectStore &getCAS() { return *CAS; } + ObjectStore &getCAS() const { return *CAS; } CASID getID() const { return CAS->getID(Ref); } ObjectRef getRef() const { return Ref; } size_t getNumReferences() const { return CAS->getNumRefs(H); } ObjectRef getReference(size_t I) const { return CAS->readRef(H, I); } - // FIXME: Remove this. operator CASID() const { return getID(); } CASID getReferenceID(size_t I) const { std::optional ID = getCAS().getID(getReference(I)); @@ -381,7 +340,7 @@ class ObjectProxy { return CAS->forEachRef(H, Callback); } - LLVM_ABI std::unique_ptr + std::unique_ptr getMemoryBuffer(StringRef Name = "", bool RequiresNullTerminator = true) const; @@ -428,31 +387,21 @@ LLVM_ABI std::unique_ptr createInMemoryCAS(); /// \returns true if \c LLVM_ENABLE_ONDISK_CAS configuration was enabled. bool isOnDiskCASEnabled(); -/// Gets or creates a persistent on-disk path at \p Path. -/// -/// Deprecated: if \p Path resolves to \a getDefaultOnDiskCASStableID(), -/// automatically opens \a getDefaultOnDiskCASPath() instead. -/// -/// FIXME: Remove the special behaviour for getDefaultOnDiskCASStableID(). The -/// client should handle this logic, if/when desired. +/// Create a persistent on-disk path at \p Path. LLVM_ABI Expected> createOnDiskCAS(const Twine &Path); /// Set \p Path to a reasonable default on-disk path for a persistent CAS for /// the current user. -LLVM_ABI void getDefaultOnDiskCASPath(SmallVectorImpl &Path); +Error getDefaultOnDiskCASPath(SmallVectorImpl &Path); /// Get a reasonable default on-disk path for a persistent CAS for the current /// user. -LLVM_ABI std::string getDefaultOnDiskCASPath(); +llvm::Expected getDefaultOnDiskCASPath(); -/// FIXME: Remove. -void getDefaultOnDiskCASStableID(SmallVectorImpl &Path); - -/// FIXME: Remove. -std::string getDefaultOnDiskCASStableID(); +class ActionCache; -/// Create ObjectStore from a string identifier. +/// Create ObjectStore and ActionCache from a string identifier. /// Currently the string identifier is using URL scheme with following supported /// schemes: /// * InMemory CAS: mem:// @@ -467,18 +416,17 @@ std::string getDefaultOnDiskCASStableID(); /// on-disk directory that the plugin should use, otherwise the default /// OnDiskCAS location will be used. /// FIXME: Need to implement proper URL encoding scheme that allows "%". -Expected> createCASFromIdentifier(StringRef Path); +Expected, std::shared_ptr>> +createCASFromIdentifier(StringRef Path); /// Register a URL scheme to CAS Identifier. -using ObjectStoreCreateFuncTy = - Expected>(const Twine &); +using ObjectStoreCreateFuncTy = Expected< + std::pair, std::shared_ptr>>( + const Twine &); void registerCASURLScheme(StringRef Prefix, ObjectStoreCreateFuncTy *Func); -class ActionCache; - /// Create \c ObjectStore and \c ActionCache instances using the plugin /// interface. -LLVM_ABI Expected, std::shared_ptr>> createPluginCASDatabases( StringRef PluginPath, StringRef OnDiskPath, diff --git a/llvm/include/llvm/CAS/OnDiskCASLogger.h b/llvm/include/llvm/CAS/OnDiskCASLogger.h index e0c21b771b11d..1bea1e082c2f9 100644 --- a/llvm/include/llvm/CAS/OnDiskCASLogger.h +++ b/llvm/include/llvm/CAS/OnDiskCASLogger.h @@ -1,14 +1,21 @@ -//===- OnDiskCASLogger.h ----------------------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +// +/// \file +/// This file declares interface for OnDiskCASLogger, an interface that can be +/// used to log CAS events to help debugging CAS errors. +/// +//===----------------------------------------------------------------------===// #ifndef LLVM_CAS_ONDISKLOGGER_H #define LLVM_CAS_ONDISKLOGGER_H +#include "llvm/Support/Compiler.h" #include "llvm/Support/Error.h" #include @@ -30,46 +37,48 @@ class OnDiskCASLogger { /// \param Path The parent directory of the log file. /// \param LogAllocations Whether to log all low-level allocations. This is /// on the order of twice as expensive to log. - static Expected> open(const Twine &Path, - bool LogAllocations); + LLVM_ABI static Expected> + open(const Twine &Path, bool LogAllocations); /// Create or append to a log file inside the given CAS directory \p Path if /// logging is enabled by the environment variable \c LLVM_CAS_LOG. If /// LLVM_CAS_LOG is set >= 2 then also log allocations. - static Expected> + LLVM_ABI static Expected> openIfEnabled(const Twine &Path); - ~OnDiskCASLogger(); + LLVM_ABI ~OnDiskCASLogger(); - /// An offset into an \c OnDiskHashMappedTrie. + /// An offset into an \c OnDiskTrieRawHashMap. using TrieOffset = int64_t; - void log_compare_exchange_strong(void *Region, TrieOffset Trie, size_t SlotI, - TrieOffset Expected, TrieOffset New, - TrieOffset Previous); - void log_SubtrieHandle_create(void *Region, TrieOffset Trie, - uint32_t StartBit, uint32_t NumBits); - void log_HashMappedTrieHandle_createRecord(void *Region, - TrieOffset TrieOffset, - ArrayRef Hash); - void log_MappedFileRegionBumpPtr_resizeFile(StringRef Path, size_t Before, - size_t After); - void log_MappedFileRegionBumpPtr_create(StringRef Path, int FD, void *Region, - size_t Capacity, size_t Size); - void log_MappedFileRegionBumpPtr_oom(StringRef Path, size_t Capacity, - size_t Size, size_t AllocSize); - void log_MappedFileRegionBumpPtr_close(StringRef Path); - void log_MappedFileRegionBumpPtr_allocate(void *Region, TrieOffset Off, - size_t Size); - void log_UnifiedOnDiskCache_collectGarbage(StringRef Path); - void log_UnifiedOnDiskCache_validateIfNeeded( + LLVM_ABI void logSubtrieHandleCmpXchg(void *Region, TrieOffset Trie, + size_t SlotI, TrieOffset Expected, + TrieOffset New, TrieOffset Previous); + LLVM_ABI void logSubtrieHandleCreate(void *Region, TrieOffset Trie, + uint32_t StartBit, uint32_t NumBits); + LLVM_ABI void logHashMappedTrieHandleCreateRecord(void *Region, + TrieOffset TrieOffset, + ArrayRef Hash); + LLVM_ABI void logMappedFileRegionArenaResizeFile(StringRef Path, + size_t Before, size_t After); + LLVM_ABI void logMappedFileRegionArenaCreate(StringRef Path, int FD, + void *Region, size_t Capacity, + size_t Size); + LLVM_ABI void logMappedFileRegionArenaOom(StringRef Path, size_t Capacity, + size_t Size, size_t AllocSize); + LLVM_ABI void logMappedFileRegionArenaClose(StringRef Path); + LLVM_ABI void logMappedFileRegionArenaAllocate(void *Region, TrieOffset Off, + size_t Size); + LLVM_ABI void logUnifiedOnDiskCacheCollectGarbage(StringRef Path); + LLVM_ABI void logUnifiedOnDiskCacheValidateIfNeeded( StringRef Path, uint64_t BootTime, uint64_t ValidationTime, bool CheckHash, bool AllowRecovery, bool Force, std::optional LLVMCas, StringRef ValidationError, bool Skipped, bool Recovered); - void log_TempFile_create(StringRef Name); - void log_TempFile_keep(StringRef TmpName, StringRef Name, std::error_code EC); - void log_TempFile_remove(StringRef TmpName, std::error_code EC); + LLVM_ABI void logTempFileCreate(StringRef Name); + LLVM_ABI void logTempFileKeep(StringRef TmpName, StringRef Name, + std::error_code EC); + LLVM_ABI void logTempFileRemove(StringRef TmpName, std::error_code EC); private: OnDiskCASLogger(raw_fd_ostream &OS, bool LogAllocations); diff --git a/llvm/include/llvm/CAS/OnDiskDataAllocator.h b/llvm/include/llvm/CAS/OnDiskDataAllocator.h new file mode 100644 index 0000000000000..f4c8bcb05ce69 --- /dev/null +++ b/llvm/include/llvm/CAS/OnDiskDataAllocator.h @@ -0,0 +1,98 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file declares interface for OnDiskDataAllocator, a file backed data +/// pool can be used to allocate space to store data packed in a single file. It +/// is based on MappedFileRegionArena and includes a header in the beginning to +/// provide metadata. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CAS_ONDISKDATAALLOCATOR_H +#define LLVM_CAS_ONDISKDATAALLOCATOR_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/CAS/FileOffset.h" +#include "llvm/CAS/OnDiskCASLogger.h" +#include "llvm/Support/Error.h" + +namespace llvm::cas { + +/// Sink for data. Stores variable length data with 8-byte alignment. Does not +/// track size of data, which is assumed to known from context, or embedded. +/// Uses 0-padding but does not guarantee 0-termination. +class OnDiskDataAllocator { +public: + using ValueProxy = MutableArrayRef; + + /// A pointer to data stored on disk. + class OnDiskPtr { + public: + FileOffset getOffset() const { return Offset; } + explicit operator bool() const { return bool(getOffset()); } + const ValueProxy &operator*() const { + assert(Offset && "Null dereference"); + return Value; + } + const ValueProxy *operator->() const { + assert(Offset && "Null dereference"); + return &Value; + } + + OnDiskPtr() = default; + + private: + friend class OnDiskDataAllocator; + OnDiskPtr(FileOffset Offset, ValueProxy Value) + : Offset(Offset), Value(Value) {} + FileOffset Offset; + ValueProxy Value; + }; + + /// Get the data of \p Size stored at the given \p Offset. Note the allocator + /// doesn't keep track of the allocation size, thus \p Size doesn't need to + /// match the size of allocation but needs to be smaller. + LLVM_ABI_FOR_TEST Expected> get(FileOffset Offset, + size_t Size) const; + + /// Allocate at least \p Size with 8-byte alignment. + LLVM_ABI_FOR_TEST Expected allocate(size_t Size); + + /// \returns the buffer that was allocated at \p create time, with size + /// \p UserHeaderSize. + MutableArrayRef getUserHeader() const; + + LLVM_ABI_FOR_TEST size_t size() const; + LLVM_ABI_FOR_TEST size_t capacity() const; + + LLVM_ABI_FOR_TEST static Expected + create(const Twine &Path, const Twine &TableName, uint64_t MaxFileSize, + std::optional NewFileInitialSize, + uint32_t UserHeaderSize = 0, + std::shared_ptr Logger = nullptr, + function_ref UserHeaderInit = nullptr); + + LLVM_ABI_FOR_TEST OnDiskDataAllocator(OnDiskDataAllocator &&RHS); + LLVM_ABI_FOR_TEST OnDiskDataAllocator &operator=(OnDiskDataAllocator &&RHS); + + // No copy. Just call \a create() again. + OnDiskDataAllocator(const OnDiskDataAllocator &) = delete; + OnDiskDataAllocator &operator=(const OnDiskDataAllocator &) = delete; + + LLVM_ABI_FOR_TEST ~OnDiskDataAllocator(); + +private: + struct ImplType; + explicit OnDiskDataAllocator(std::unique_ptr Impl); + std::unique_ptr Impl; +}; + +} // namespace llvm::cas + +#endif // LLVM_CAS_ONDISKDATAALLOCATOR_H diff --git a/llvm/include/llvm/CAS/OnDiskGraphDB.h b/llvm/include/llvm/CAS/OnDiskGraphDB.h index 3d4991627dcf5..367917bf5df6c 100644 --- a/llvm/include/llvm/CAS/OnDiskGraphDB.h +++ b/llvm/include/llvm/CAS/OnDiskGraphDB.h @@ -1,29 +1,36 @@ -//===- OnDiskGraphDB.h ------------------------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +// +/// \file +/// This declares OnDiskGraphDB, an ondisk CAS database with a fixed length +/// hash. This is the class that implements the database storage scheme without +/// exposing the hashing algorithm. +// +//===----------------------------------------------------------------------===// #ifndef LLVM_CAS_ONDISKGRAPHDB_H #define LLVM_CAS_ONDISKGRAPHDB_H #include "llvm/ADT/PointerUnion.h" -#include "llvm/CAS/OnDiskHashMappedTrie.h" +#include "llvm/CAS/OnDiskCASLogger.h" +#include "llvm/CAS/OnDiskDataAllocator.h" +#include "llvm/CAS/OnDiskTrieRawHashMap.h" +#include namespace llvm::cas::ondisk { -/// 8B reference. +/// Standard 8 byte reference inside OnDiskGraphDB. class InternalRef { public: - FileOffset getFileOffset() const { return FileOffset(getRawOffset()); } - + FileOffset getFileOffset() const { return FileOffset(Data); } uint64_t getRawData() const { return Data; } - uint64_t getRawOffset() const { return Data; } static InternalRef getFromRawData(uint64_t Data) { return InternalRef(Data); } - static InternalRef getFromOffset(FileOffset Offset) { return InternalRef(Offset.get()); } @@ -38,19 +45,17 @@ class InternalRef { uint64_t Data; }; -/// 4B reference. +/// Compact 4 byte reference inside OnDiskGraphDB for smaller references. class InternalRef4B { public: FileOffset getFileOffset() const { return FileOffset(Data); } - uint32_t getRawData() const { return Data; } /// Shrink to 4B reference. static std::optional tryToShrink(InternalRef Ref) { - uint64_t Offset = Ref.getRawOffset(); + uint64_t Offset = Ref.getRawData(); if (Offset > UINT32_MAX) return std::nullopt; - return InternalRef4B(Offset); } @@ -146,10 +151,9 @@ class InternalRefArrayRef { if (is4B()) { auto *B = cast(Begin); return ArrayRef((const uint8_t *)B, sizeof(InternalRef4B) * Size); - } else { - auto *B = cast(Begin); - return ArrayRef((const uint8_t *)B, sizeof(InternalRef) * Size); } + auto *B = cast(Begin); + return ArrayRef((const uint8_t *)B, sizeof(InternalRef) * Size); } InternalRefArrayRef(std::nullopt_t = std::nullopt) { @@ -170,8 +174,6 @@ class InternalRefArrayRef { size_t Size = 0; }; -struct OnDiskContent; - /// Reference to a node. The node's data may not be stored in the database. /// An \p ObjectID instance can only be used with the \p OnDiskGraphDB instance /// it came from. \p ObjectIDs from different \p OnDiskGraphDB instances are not @@ -197,11 +199,11 @@ class ObjectID { /// Handle for a loaded node object. class ObjectHandle { public: + explicit ObjectHandle(uint64_t Opaque) : Opaque(Opaque) {} uint64_t getOpaqueData() const { return Opaque; } - static ObjectHandle fromOpaqueData(uint64_t Opaque) { - return ObjectHandle(Opaque); - } + static ObjectHandle fromFileOffset(FileOffset Offset); + static ObjectHandle fromMemory(uintptr_t Ptr); friend bool operator==(const ObjectHandle &LHS, const ObjectHandle &RHS) { return LHS.Opaque == RHS.Opaque; @@ -211,10 +213,10 @@ class ObjectHandle { } private: - explicit ObjectHandle(uint64_t Opaque) : Opaque(Opaque) {} uint64_t Opaque; }; +/// Iterator for ObjectID. class object_refs_iterator : public iterator_facade_base { @@ -280,7 +282,8 @@ class OnDiskGraphDB { /// \returns the hash bytes digest for the object reference. ArrayRef getDigest(ObjectID Ref) const { - return getDigest(getInternalRef(Ref)); + // ObjectID should be valid to fetch Digest. + return cantFail(getDigest(getInternalRef(Ref))); } /// Form a reference for the provided hash. The reference can be used as part @@ -300,7 +303,12 @@ class OnDiskGraphDB { /// Check whether the object associated with \p Ref is stored in the CAS. /// Note that this function does not fault-in. bool containsObject(ObjectID Ref, bool CheckUpstream = true) const { - switch (getObjectPresence(Ref, CheckUpstream)) { + auto Presence = getObjectPresence(Ref, CheckUpstream); + if (!Presence) { + consumeError(Presence.takeError()); + return false; + } + switch (*Presence) { case ObjectPresence::Missing: return false; case ObjectPresence::InPrimaryDB: @@ -308,11 +316,13 @@ class OnDiskGraphDB { case ObjectPresence::OnlyInUpstreamDB: return true; } + llvm_unreachable("Unknown ObjectPresence enum"); } /// \returns the data part of the provided object handle. LLVM_ABI_FOR_TEST ArrayRef getObjectData(ObjectHandle Node) const; + /// \returns the object referenced by the provided object handle. object_refs_range getObjectRefs(ObjectHandle Node) const { InternalRefArrayRef Refs = getInternalRefs(Node); return make_range(Refs.begin(), Refs.end()); @@ -359,6 +369,13 @@ class OnDiskGraphDB { /// Hashing function type for validation. using HashingFuncT = function_ref>, ArrayRef, SmallVectorImpl &)>; + + /// Validate the OnDiskGraphDB. + /// + /// \param Deep if true, rehash all the objects to ensure no data + /// corruption in stored objects, otherwise just validate the structure of + /// CAS database. + /// \param Hasher is the hashing function used for objects inside CAS. Error validate(bool Deep, HashingFuncT Hasher) const; /// Checks that \p ID exists in the index. It is allowed to not have data @@ -382,22 +399,24 @@ class OnDiskGraphDB { /// \param HashByteSize Size for the object digest hash bytes. /// \param UpstreamDB Optional on-disk store to be used for faulting-in nodes /// if they don't exist in the primary store. The upstream store is only used - /// for reading nodes, new nodes are only written to the primary store. + /// for reading nodes, new nodes are only written to the primary store. User + /// need to make sure \p UpstreamDB outlives current instance of + /// OnDiskGraphDB and the common usage is to have an \p UnifiedOnDiskCache to + /// manage both. /// \param Policy If \p UpstreamDB is provided, controls how nodes are copied /// to primary store. This is recorded at creation time and subsequent opens /// need to pass the same policy otherwise the \p open will fail. LLVM_ABI_FOR_TEST static Expected> open(StringRef Path, StringRef HashName, unsigned HashByteSize, - std::unique_ptr UpstreamDB = nullptr, + OnDiskGraphDB *UpstreamDB = nullptr, std::shared_ptr Logger = nullptr, FaultInPolicy Policy = FaultInPolicy::FullTree); LLVM_ABI_FOR_TEST ~OnDiskGraphDB(); private: + /// Forward declaration for a proxy for an ondisk index record. struct IndexProxy; - class TempFile; - class MappedTempFile; enum class ObjectPresence { Missing, @@ -406,13 +425,16 @@ class OnDiskGraphDB { }; /// Check if object exists and if it is on upstream only. - LLVM_ABI_FOR_TEST ObjectPresence + LLVM_ABI_FOR_TEST Expected getObjectPresence(ObjectID Ref, bool CheckUpstream) const; /// When \p load is called for a node that doesn't exist, this function tries /// to load it from the upstream store and copy it to the primary one. Expected> faultInFromUpstream(ObjectID PrimaryID); + + /// Import the entire tree from upstream with \p UpstreamNode as root. Error importFullTree(ObjectID PrimaryID, ObjectHandle UpstreamNode); + /// Import only the \param UpstreamNode. Error importSingleNode(ObjectID PrimaryID, ObjectHandle UpstreamNode); Error importUpstreamData(ObjectID PrimaryID, ArrayRef PrimaryRefs, ObjectHandle UpstreamNode); @@ -422,6 +444,7 @@ class OnDiskGraphDB { Error storeFile(ObjectID ID, StringRef FilePath, std::optional ImportKind); + /// Found the IndexProxy for the hash. Expected indexHash(ArrayRef Hash); /// Get path for creating standalone data file. @@ -430,64 +453,71 @@ class OnDiskGraphDB { /// Create a standalone leaf file. Error createStandaloneLeaf(IndexProxy &I, ArrayRef Data); - Expected createTempFile(StringRef FinalPath, uint64_t Size); - + /// \name Helper functions for internal data structures. + /// \{ static InternalRef getInternalRef(ObjectID Ref) { return InternalRef::getFromRawData(Ref.getOpaqueData()); } + static ObjectID getExternalReference(InternalRef Ref) { return ObjectID::fromOpaqueData(Ref.getRawData()); } static ObjectID getExternalReference(const IndexProxy &I); - LLVM_ABI_FOR_TEST ArrayRef + static InternalRef makeInternalRef(FileOffset IndexOffset); + + LLVM_ABI_FOR_TEST Expected> getDigest(InternalRef Ref) const; ArrayRef getDigest(const IndexProxy &I) const; - IndexProxy getIndexProxyFromRef(InternalRef Ref) const; - - // FIXME: on newer branches we have refactored getIndexProxyFromRef to return - // Expected. As a stop gap, provide a checked API. - Expected getIndexProxyFromRefChecked(InternalRef Ref) const; - - static InternalRef makeInternalRef(FileOffset IndexOffset); + Expected getIndexProxyFromRef(InternalRef Ref) const; IndexProxy - getIndexProxyFromPointer(OnDiskHashMappedTrie::const_pointer P) const; + getIndexProxyFromPointer(OnDiskTrieRawHashMap::ConstOnDiskPtr P) const; LLVM_ABI_FOR_TEST InternalRefArrayRef getInternalRefs(ObjectHandle Node) const; + /// \} - void recordStandaloneSizeIncrease(size_t SizeIncrease); + /// Get the atomic variable that keeps track of the standalone data storage + /// size. + std::atomic &standaloneStorageSize() const; - std::atomic &getStandaloneStorageSize(); + /// Increase the standalone data size. + void recordStandaloneSizeIncrease(size_t SizeIncrease); + /// Get the standalone data size. uint64_t getStandaloneStorageSize() const; - OnDiskGraphDB(StringRef RootPath, OnDiskHashMappedTrie Index, - OnDiskDataAllocator DataPool, - std::unique_ptr UpstreamDB, FaultInPolicy Policy, - std::shared_ptr Logger); + // Private constructor. + OnDiskGraphDB(StringRef RootPath, OnDiskTrieRawHashMap Index, + OnDiskDataAllocator DataPool, OnDiskGraphDB *UpstreamDB, + FaultInPolicy Policy, std::shared_ptr Logger); /// Mapping from hash to object reference. /// /// Data type is TrieRecord. - OnDiskHashMappedTrie Index; + OnDiskTrieRawHashMap Index; /// Storage for most objects. /// /// Data type is DataRecordHandle. OnDiskDataAllocator DataPool; - void *StandaloneData; // a StandaloneDataMap. + /// A StandaloneDataMap. + void *StandaloneData = nullptr; + /// Path to the root directory. std::string RootPath; /// Optional on-disk store to be used for faulting-in nodes. - std::unique_ptr UpstreamDB; + OnDiskGraphDB* UpstreamDB = nullptr; + + /// The policy used to fault in data from upstream. FaultInPolicy FIPolicy; + /// Debug Logger. std::shared_ptr Logger; }; diff --git a/llvm/include/llvm/CAS/OnDiskHashMappedTrie.h b/llvm/include/llvm/CAS/OnDiskHashMappedTrie.h deleted file mode 100644 index c216baa85ab27..0000000000000 --- a/llvm/include/llvm/CAS/OnDiskHashMappedTrie.h +++ /dev/null @@ -1,357 +0,0 @@ -//===- OnDiskHashMappedTrie.h -----------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CAS_ONDISKHASHMAPPEDTRIE_H -#define LLVM_CAS_ONDISKHASHMAPPEDTRIE_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/STLFunctionalExtras.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Config/llvm-config.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/FileSystem.h" -#include -#include -#include - -namespace llvm { - -class MemoryBuffer; -class raw_ostream; - -namespace cas { - -namespace ondisk { -class OnDiskCASLogger; -} - -class FileOffset { -public: - int64_t get() const { return Offset; } - - explicit operator bool() const { return Offset; } - - FileOffset() = default; - explicit FileOffset(int64_t Offset) : Offset(Offset) { assert(Offset >= 0); } - -private: - int64_t Offset = 0; -}; - -/// On-disk hash-mapped trie. Thread-safe / lock-free. -/// -/// This is an on-disk, (mostly) thread-safe key-value store that is (mostly) -/// lock-free. The keys are fixed length, and are expected to be binary hashes -/// with a normal distribution. -/// -/// - Thread-safety is achieved through the use of atomics within a shared -/// memory mapping. Atomic access does not work on networked filesystems. -/// - Filesystem locks are used, but only sparingly: -/// - during initialization, for creating / opening an existing store; -/// - for the lifetime of the instance, a shared/reader lock is held -/// - during destruction, if there are no concurrent readers, to shrink the -/// files to their minimum size. -/// - Path is used as a directory: -/// - "index" stores the root trie and subtries. -/// - "data" stores (most of) the entries, like a bump-ptr-allocator. -/// - Large entries are stored externally in a file named by the key. -/// - Code is system-dependent (Windows not yet implemented), and binary format -/// itself is not portable. These are not artifacts that can/should be moved -/// between different systems; they are only appropriate for local storage. -/// -/// FIXME: Add support for storing top-level metadata or identifiers that can -/// be created / read during initialization. -/// -/// FIXME: Implement for Windows. See comment next to implementation of \a -/// OnDiskHashMappedTrie::MappedFileInfo::open(). -class OnDiskHashMappedTrie { -public: - LLVM_DUMP_METHOD void dump() const; - void - print(raw_ostream &OS, - function_ref)> PrintRecordData = nullptr) const; - -public: - struct ConstValueProxy { - ConstValueProxy() = default; - ConstValueProxy(ArrayRef Hash, ArrayRef Data) - : Hash(Hash), Data(Data) {} - ConstValueProxy(ArrayRef Hash, StringRef Data) - : Hash(Hash), Data(Data.begin(), Data.size()) {} - - ArrayRef Hash; - ArrayRef Data; - }; - - struct ValueProxy { - operator ConstValueProxy() const { return ConstValueProxy(Hash, Data); } - - ValueProxy() = default; - ValueProxy(ArrayRef Hash, MutableArrayRef Data) - : Hash(Hash), Data(Data) {} - - ArrayRef Hash; - MutableArrayRef Data; - }; - - /// Validate the trie data structure. - /// - /// Callback receives the file offset to the data entry and the data stored. - Error validate( - function_ref RecordVerifier) const; - -public: - template class PointerImpl { - public: - FileOffset getOffset() const { - return FileOffset(OffsetLow32 | (uint64_t)OffsetHigh16 << 32); - } - - explicit operator bool() const { return IsValue; } - - const ProxyT &operator*() const { - assert(IsValue); - return Value; - } - const ProxyT *operator->() const { - assert(IsValue); - return &Value; - } - - PointerImpl() = default; - - protected: - PointerImpl(FileOffset Offset, ProxyT Value) - : PointerImpl(Value, Offset, /*IsValue=*/true) {} - - PointerImpl(ProxyT Value, FileOffset Offset, bool IsValue) - : Value(Value), OffsetLow32((uint64_t)Offset.get()), - OffsetHigh16((uint64_t)Offset.get() >> 32), IsValue(IsValue) { - if (IsValue) - checkOffset(Offset); - } - - static void checkOffset(FileOffset Offset) { - assert(Offset.get() > 0); - assert((uint64_t)Offset.get() < (1LL << 48)); - } - - ProxyT Value; - uint32_t OffsetLow32 = 0; - uint16_t OffsetHigh16 = 0; - bool IsValue = false; - }; - - class pointer; - class const_pointer : public PointerImpl { - public: - const_pointer() = default; - - private: - friend class pointer; - friend class OnDiskHashMappedTrie; - using const_pointer::PointerImpl::PointerImpl; - }; - - class pointer : public PointerImpl { - public: - operator const_pointer() const { - return const_pointer(Value, getOffset(), IsValue); - } - - pointer() = default; - - private: - friend class OnDiskHashMappedTrie; - using pointer::PointerImpl::PointerImpl; - }; - - pointer getMutablePointer(const_pointer CP) { - if (!CP) - return pointer(); - ValueProxy V{CP->Hash, MutableArrayRef(const_cast(CP->Data.data()), - CP->Data.size())}; - return pointer(CP.getOffset(), V); - } - - const_pointer find(ArrayRef Hash) const; - pointer find(ArrayRef Hash) { - return getMutablePointer( - const_cast(this)->find(Hash)); - } - - const_pointer recoverFromHashPointer(const uint8_t *HashBegin) const; - pointer recoverFromHashPointer(const uint8_t *HashBegin) { - return getMutablePointer( - const_cast(this)->recoverFromHashPointer( - HashBegin)); - } - - const_pointer recoverFromFileOffset(FileOffset Offset) const; - pointer recoverFromFileOffset(FileOffset Offset) { - return getMutablePointer( - const_cast(this)->recoverFromFileOffset( - Offset)); - } - - using LazyInsertOnConstructCB = - function_ref; - using LazyInsertOnLeakCB = - function_ref; - - /// Insert lazily. - /// - /// \p OnConstruct is called when ready to insert a value, after allocating - /// space for the data. It is called at most once. - /// - /// \p OnLeak is called only if \p OnConstruct has been called and a race - /// occurred before insertion, causing the tentative offset and data to be - /// abandoned. This allows clients to clean up other results or update any - /// references. - /// - /// NOTE: Does *not* guarantee that \p OnConstruct is only called on success. - /// The in-memory \a HashMappedTrie uses LazyAtomicPointer to synchronize - /// simultaneous writes, but that seems dangerous to use in a memory-mapped - /// file in case a process crashes in the busy state. - Expected insertLazy(ArrayRef Hash, - LazyInsertOnConstructCB OnConstruct = nullptr, - LazyInsertOnLeakCB OnLeak = nullptr); - - Expected insert(const ConstValueProxy &Value) { - return insertLazy(Value.Hash, [&](FileOffset, ValueProxy Allocated) { - assert(Allocated.Hash == Value.Hash); - assert(Allocated.Data.size() == Value.Data.size()); - llvm::copy(Value.Data, Allocated.Data.begin()); - }); - } - - size_t size() const; - size_t capacity() const; - - /// Gets or creates a file at \p Path with a hash-mapped trie named \p - /// TrieName. The hash size is \p NumHashBits (in bits) and the records store - /// data of size \p DataSize (in bytes). - /// - /// \p MaxFileSize controls the maximum file size to support, limiting the - /// size of the \a mapped_file_region. \p NewFileInitialSize is the starting - /// size if a new file is created. - /// - /// \p NewTableNumRootBits and \p NewTableNumSubtrieBits are hints to - /// configure the trie, if it doesn't already exist. - /// - /// \pre NumHashBits is a multiple of 8 (byte-aligned). - /// - /// TODO: Expose the internal DatabaseFile abstraction and add support for - /// adding more tables to a single file. - /// - /// FIXME: Rename to getOrCreate(). - static Expected - create(const Twine &Path, const Twine &TrieName, size_t NumHashBits, - uint64_t DataSize, uint64_t MaxFileSize, - std::optional NewFileInitialSize, - std::shared_ptr Logger = nullptr, - std::optional NewTableNumRootBits = std::nullopt, - std::optional NewTableNumSubtrieBits = std::nullopt); - - OnDiskHashMappedTrie(OnDiskHashMappedTrie &&RHS); - OnDiskHashMappedTrie &operator=(OnDiskHashMappedTrie &&RHS); - ~OnDiskHashMappedTrie(); - -private: - struct ImplType; - explicit OnDiskHashMappedTrie(std::unique_ptr Impl); - std::unique_ptr Impl; -}; - -/// Sink for data. Stores variable length data with 8-byte alignment. Does not -/// track size of data, which is assumed to known from context, or embedded. -/// Uses 0-padding but does not guarantee 0-termination. -class OnDiskDataAllocator { -public: - using ValueProxy = MutableArrayRef; - - /// An iterator-like return value for data insertion. Maybe it should be - /// called \c iterator, but it has no increment. - class pointer { - public: - FileOffset getOffset() const { return Offset; } - explicit operator bool() const { return bool(getOffset()); } - const ValueProxy &operator*() const { - assert(Offset && "Null dereference"); - return Value; - } - const ValueProxy *operator->() const { - assert(Offset && "Null dereference"); - return &Value; - } - - pointer() = default; - - private: - friend class OnDiskDataAllocator; - pointer(FileOffset Offset, ValueProxy Value) - : Offset(Offset), Value(Value) {} - FileOffset Offset; - ValueProxy Value; - }; - - // Look up the data stored at the given offset. - const char *beginData(FileOffset Offset) const; - char *beginData(FileOffset Offset) { - return const_cast( - const_cast(this)->beginData(Offset)); - } - - Expected allocate(size_t Size); - Expected save(ArrayRef Data) { - auto P = allocate(Data.size()); - if (LLVM_UNLIKELY(!P)) - return P.takeError(); - llvm::copy(Data, (*P)->begin()); - return P; - } - Expected save(StringRef Data) { - return save(ArrayRef(Data.begin(), Data.size())); - } - - /// \returns the buffer that was allocated at \p create time, with size - /// \p UserHeaderSize. - MutableArrayRef getUserHeader(); - - size_t size() const; - size_t capacity() const; - - static Expected - create(const Twine &Path, const Twine &TableName, uint64_t MaxFileSize, - std::optional NewFileInitialSize, - uint32_t UserHeaderSize = 0, - std::shared_ptr Logger = nullptr, - function_ref UserHeaderInit = nullptr); - - OnDiskDataAllocator(OnDiskDataAllocator &&RHS); - OnDiskDataAllocator &operator=(OnDiskDataAllocator &&RHS); - - // No copy. Just call \a create() again. - OnDiskDataAllocator(const OnDiskDataAllocator &) = delete; - OnDiskDataAllocator &operator=(const OnDiskDataAllocator &) = delete; - - ~OnDiskDataAllocator(); - -private: - struct ImplType; - explicit OnDiskDataAllocator(std::unique_ptr Impl); - std::unique_ptr Impl; -}; - -} // namespace cas -} // namespace llvm - -#endif // LLVM_CAS_ONDISKHASHMAPPEDTRIE_H diff --git a/llvm/include/llvm/CAS/OnDiskKeyValueDB.h b/llvm/include/llvm/CAS/OnDiskKeyValueDB.h index facbe79770ccd..68cced665f28e 100644 --- a/llvm/include/llvm/CAS/OnDiskKeyValueDB.h +++ b/llvm/include/llvm/CAS/OnDiskKeyValueDB.h @@ -1,18 +1,26 @@ -//===- OnDiskKeyValueDB.h ---------------------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +// +/// \file +/// This declares OnDiskKeyValueDB, a key value storage database of fixed size +/// key and value. +// +//===----------------------------------------------------------------------===// #ifndef LLVM_CAS_ONDISKKEYVALUEDB_H #define LLVM_CAS_ONDISKKEYVALUEDB_H -#include "llvm/CAS/OnDiskHashMappedTrie.h" +#include "llvm/CAS/OnDiskTrieRawHashMap.h" namespace llvm::cas::ondisk { +class UnifiedOnDiskCache; + /// An on-disk key-value data store with the following properties: /// * Keys are fixed length binary hashes with expected normal distribution. /// * Values are buffers of the same size, specified at creation time. @@ -37,9 +45,7 @@ class OnDiskKeyValueDB { get(ArrayRef Key); /// \returns Total size of stored data. - size_t getStorageSize() const { - return Cache.size(); - } + size_t getStorageSize() const { return Cache.size(); } /// \returns The precentage of space utilization of hard space limits. /// @@ -57,20 +63,27 @@ class OnDiskKeyValueDB { /// \param KeySize Size for the key hash bytes. /// \param ValueName Identifier name for the values. /// \param ValueSize Size for the value bytes. + /// \param UnifiedCache An optional UnifiedOnDiskCache that manages the size + /// and lifetime of the CAS instance and it must owns current initializing + /// KeyValueDB after initialized. LLVM_ABI_FOR_TEST static Expected> open(StringRef Path, StringRef HashName, unsigned KeySize, StringRef ValueName, size_t ValueSize, + UnifiedOnDiskCache *UnifiedCache = nullptr, std::shared_ptr Logger = nullptr); - using CheckValueT = function_ref)>; - LLVM_ABI_FOR_TEST Error validate(CheckValueT CheckValue) const; + /// Validate the storage. + LLVM_ABI_FOR_TEST Error validate() const; private: - OnDiskKeyValueDB(size_t ValueSize, OnDiskHashMappedTrie Cache) - : ValueSize(ValueSize), Cache(std::move(Cache)) {} + OnDiskKeyValueDB(size_t ValueSize, OnDiskTrieRawHashMap Cache, + UnifiedOnDiskCache *UnifiedCache) + : ValueSize(ValueSize), Cache(std::move(Cache)), + UnifiedCache(UnifiedCache) {} const size_t ValueSize; - OnDiskHashMappedTrie Cache; + OnDiskTrieRawHashMap Cache; + UnifiedOnDiskCache *UnifiedCache = nullptr; }; } // namespace llvm::cas::ondisk diff --git a/llvm/include/llvm/CAS/OnDiskTrieRawHashMap.h b/llvm/include/llvm/CAS/OnDiskTrieRawHashMap.h new file mode 100644 index 0000000000000..c6cfdd1f62f4d --- /dev/null +++ b/llvm/include/llvm/CAS/OnDiskTrieRawHashMap.h @@ -0,0 +1,242 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file declares interface for OnDiskTrieRawHashMap, a thread-safe and +/// (mostly) lock-free hash map stored as trie and backed by persistent files on +/// disk. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CAS_ONDISKTRIERAWHASHMAP_H +#define LLVM_CAS_ONDISKTRIERAWHASHMAP_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/STLFunctionalExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CAS/FileOffset.h" +#include "llvm/Support/Error.h" +#include + +namespace llvm { + +class raw_ostream; + +namespace cas { + +namespace ondisk { +class OnDiskCASLogger; +} // namespace ondisk +/// OnDiskTrieRawHashMap is a persistent trie data structure used as hash maps. +/// The keys are fixed length, and are expected to be binary hashes with a +/// normal distribution. +/// +/// - Thread-safety is achieved through the use of atomics within a shared +/// memory mapping. Atomic access does not work on networked filesystems. +/// - Filesystem locks are used, but only sparingly: +/// - during initialization, for creating / opening an existing store; +/// - for the lifetime of the instance, a shared/reader lock is held +/// - during destruction, if there are no concurrent readers, to shrink the +/// files to their minimum size. +/// - Path is used as a directory: +/// - "index" stores the root trie and subtries. +/// - "data" stores (most of) the entries, like a bump-ptr-allocator. +/// - Large entries are stored externally in a file named by the key. +/// - Code is system-dependent and binary format itself is not portable. These +/// are not artifacts that can/should be moved between different systems; they +/// are only appropriate for local storage. +class OnDiskTrieRawHashMap { +public: + LLVM_DUMP_METHOD void dump() const; + void + print(raw_ostream &OS, + function_ref)> PrintRecordData = nullptr) const; + +public: + /// Const value proxy to access the records stored in TrieRawHashMap. + struct ConstValueProxy { + ConstValueProxy() = default; + ConstValueProxy(ArrayRef Hash, ArrayRef Data) + : Hash(Hash), Data(Data) {} + ConstValueProxy(ArrayRef Hash, StringRef Data) + : Hash(Hash), Data(Data.begin(), Data.size()) {} + + ArrayRef Hash; + ArrayRef Data; + }; + + /// Value proxy to access the records stored in TrieRawHashMap. + struct ValueProxy { + operator ConstValueProxy() const { return ConstValueProxy(Hash, Data); } + + ValueProxy() = default; + ValueProxy(ArrayRef Hash, MutableArrayRef Data) + : Hash(Hash), Data(Data) {} + + ArrayRef Hash; + MutableArrayRef Data; + }; + + /// Validate the trie data structure. + /// + /// Callback receives the file offset to the data entry and the data stored. + LLVM_ABI_FOR_TEST Error validate( + function_ref RecordVerifier) const; + + /// Check the valid range of file offset for OnDiskTrieRawHashMap. + static bool validOffset(FileOffset Offset) { + return Offset.get() < (1LL << 48); + } + +public: + /// Template class to implement a `pointer` type into the trie data structure. + /// + /// It provides pointer-like operation, e.g., dereference to get underlying + /// data. It also reserves the top 16 bits of the pointer value, which can be + /// used to pack additional information if needed. + template class PointerImpl { + public: + FileOffset getOffset() const { + return FileOffset(OffsetLow32 | (uint64_t)OffsetHigh16 << 32); + } + + explicit operator bool() const { return IsValue; } + + const ProxyT &operator*() const { + assert(IsValue); + return Value; + } + const ProxyT *operator->() const { + assert(IsValue); + return &Value; + } + + PointerImpl() = default; + + protected: + PointerImpl(ProxyT Value, FileOffset Offset, bool IsValue = true) + : Value(Value), OffsetLow32(Offset.get()), + OffsetHigh16(Offset.get() >> 32), IsValue(IsValue) { + if (IsValue) + assert(validOffset(Offset)); + } + + ProxyT Value; + uint32_t OffsetLow32 = 0; + uint16_t OffsetHigh16 = 0; + + // True if points to a value (not a "nullptr"). Use an extra field because + // 0 can be a valid offset. + bool IsValue = false; + }; + + class OnDiskPtr; + class ConstOnDiskPtr : public PointerImpl { + public: + ConstOnDiskPtr() = default; + + private: + friend class OnDiskPtr; + friend class OnDiskTrieRawHashMap; + using ConstOnDiskPtr::PointerImpl::PointerImpl; + }; + + class OnDiskPtr : public PointerImpl { + public: + operator ConstOnDiskPtr() const { + return ConstOnDiskPtr(Value, getOffset(), IsValue); + } + + OnDiskPtr() = default; + + private: + friend class OnDiskTrieRawHashMap; + using OnDiskPtr::PointerImpl::PointerImpl; + }; + + /// Find the value from hash. + /// + /// \returns pointer to the value if exists, otherwise returns a non-value + /// pointer that evaluates to `false` when convert to boolean. + LLVM_ABI_FOR_TEST ConstOnDiskPtr find(ArrayRef Hash) const; + + /// Helper function to recover a pointer into the trie from file offset. + LLVM_ABI_FOR_TEST Expected + recoverFromFileOffset(FileOffset Offset) const; + + using LazyInsertOnConstructCB = + function_ref; + using LazyInsertOnLeakCB = + function_ref; + + /// Insert lazily. + /// + /// \p OnConstruct is called when ready to insert a value, after allocating + /// space for the data. It is called at most once. + /// + /// \p OnLeak is called only if \p OnConstruct has been called and a race + /// occurred before insertion, causing the tentative offset and data to be + /// abandoned. This allows clients to clean up other results or update any + /// references. + /// + /// NOTE: Does *not* guarantee that \p OnConstruct is only called on success. + /// The in-memory \a TrieRawHashMap uses LazyAtomicPointer to synchronize + /// simultaneous writes, but that seems dangerous to use in a memory-mapped + /// file in case a process crashes in the busy state. + LLVM_ABI_FOR_TEST Expected + insertLazy(ArrayRef Hash, + LazyInsertOnConstructCB OnConstruct = nullptr, + LazyInsertOnLeakCB OnLeak = nullptr); + + Expected insert(const ConstValueProxy &Value) { + return insertLazy(Value.Hash, [&](FileOffset, ValueProxy Allocated) { + assert(Allocated.Hash == Value.Hash); + assert(Allocated.Data.size() == Value.Data.size()); + llvm::copy(Value.Data, Allocated.Data.begin()); + }); + } + + LLVM_ABI_FOR_TEST size_t size() const; + LLVM_ABI_FOR_TEST size_t capacity() const; + + /// Gets or creates a file at \p Path with a hash-mapped trie named \p + /// TrieName. The hash size is \p NumHashBits (in bits) and the records store + /// data of size \p DataSize (in bytes). + /// + /// \p MaxFileSize controls the maximum file size to support, limiting the + /// size of the \a mapped_file_region. \p NewFileInitialSize is the starting + /// size if a new file is created. + /// + /// \p NewTableNumRootBits and \p NewTableNumSubtrieBits are hints to + /// configure the trie, if it doesn't already exist. + /// + /// \pre NumHashBits is a multiple of 8 (byte-aligned). + LLVM_ABI_FOR_TEST static Expected + create(const Twine &Path, const Twine &TrieName, size_t NumHashBits, + uint64_t DataSize, uint64_t MaxFileSize, + std::optional NewFileInitialSize, + std::shared_ptr Logger = nullptr, + std::optional NewTableNumRootBits = std::nullopt, + std::optional NewTableNumSubtrieBits = std::nullopt); + + LLVM_ABI_FOR_TEST OnDiskTrieRawHashMap(OnDiskTrieRawHashMap &&RHS); + LLVM_ABI_FOR_TEST OnDiskTrieRawHashMap &operator=(OnDiskTrieRawHashMap &&RHS); + LLVM_ABI_FOR_TEST ~OnDiskTrieRawHashMap(); + +private: + struct ImplType; + explicit OnDiskTrieRawHashMap(std::unique_ptr Impl); + std::unique_ptr Impl; +}; + +} // namespace cas +} // namespace llvm + +#endif // LLVM_CAS_ONDISKTRIERAWHASHMAP_H diff --git a/llvm/include/llvm/CAS/TreeEntry.h b/llvm/include/llvm/CAS/TreeEntry.h index 997ef1b3121e0..f37eaddcde3ac 100644 --- a/llvm/include/llvm/CAS/TreeEntry.h +++ b/llvm/include/llvm/CAS/TreeEntry.h @@ -61,7 +61,7 @@ class NamedTreeEntry : public TreeEntry { NamedTreeEntry(ObjectRef Ref, EntryKind Kind, StringRef Name) : TreeEntry(Ref, Kind), Name(Name) {} - LLVM_ABI void print(raw_ostream &OS, ObjectStore &CAS) const; + void print(raw_ostream &OS, ObjectStore &CAS) const; private: StringRef Name; diff --git a/llvm/include/llvm/CAS/TreeSchema.h b/llvm/include/llvm/CAS/TreeSchema.h index 5dadaabc17612..3f54b1fd609e4 100644 --- a/llvm/include/llvm/CAS/TreeSchema.h +++ b/llvm/include/llvm/CAS/TreeSchema.h @@ -26,13 +26,13 @@ class TreeSchema : public RTTIExtends { bool isRootNode(const ObjectProxy &Node) const final { return false; // TreeSchema doesn't have a root node. } - LLVM_ABI bool isNode(const ObjectProxy &Node) const final; + bool isNode(const ObjectProxy &Node) const final; - LLVM_ABI TreeSchema(ObjectStore &CAS); + TreeSchema(ObjectStore &CAS); - LLVM_ABI size_t getNumTreeEntries(TreeProxy Tree) const; + size_t getNumTreeEntries(TreeProxy Tree) const; - LLVM_ABI Error + Error forEachTreeEntry(TreeProxy Tree, function_ref Callback) const; @@ -44,19 +44,18 @@ class TreeSchema : public RTTIExtends { /// /// Passes the \p TreeNodeProxy if the entry is a \p TreeEntry::Tree, /// otherwise passes \p None. - LLVM_ABI Error walkFileTreeRecursively( + Error walkFileTreeRecursively( ObjectStore &CAS, ObjectRef Root, function_ref)> Callback); - LLVM_ABI std::optional lookupTreeEntry(TreeProxy Tree, - StringRef Name) const; - LLVM_ABI NamedTreeEntry loadTreeEntry(TreeProxy Tree, size_t I) const; + std::optional lookupTreeEntry(TreeProxy Tree, StringRef Name) const; + NamedTreeEntry loadTreeEntry(TreeProxy Tree, size_t I) const; - LLVM_ABI Expected load(ObjectRef Object) const; - LLVM_ABI Expected load(ObjectProxy Object) const; + Expected load(ObjectRef Object) const; + Expected load(ObjectProxy Object) const; - LLVM_ABI Expected create(ArrayRef Entries = {}); + Expected create(ArrayRef Entries = {}); private: static constexpr StringLiteral SchemaName = "llvm::cas::schema::tree::v1"; diff --git a/llvm/include/llvm/CAS/UnifiedOnDiskCache.h b/llvm/include/llvm/CAS/UnifiedOnDiskCache.h index abc384ee5bdd0..54dd8c0571735 100644 --- a/llvm/include/llvm/CAS/UnifiedOnDiskCache.h +++ b/llvm/include/llvm/CAS/UnifiedOnDiskCache.h @@ -1,4 +1,4 @@ -//===- UnifiedOnDiskCache.h -------------------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -11,6 +11,7 @@ #include "llvm/CAS/OnDiskGraphDB.h" #include "llvm/CAS/ValidationResult.h" +#include namespace llvm::cas::ondisk { @@ -32,7 +33,7 @@ class OnDiskKeyValueDB; /// Usage patterns should be that an instance of \p UnifiedOnDiskCache is open /// for a limited period of time, e.g. for the duration of a build operation. /// For long-living processes that need periodic access to a -/// \p UnifiedOnDiskCache, the client should device a scheme where access is +/// \p UnifiedOnDiskCache, the client should devise a scheme where access is /// performed within some defined period. For example, if a service is designed /// to continuously wait for requests that access a \p UnifiedOnDiskCache, it /// could keep the instance alive while new requests are coming in but close it @@ -45,28 +46,11 @@ class UnifiedOnDiskCache { /// The \p OnDiskGraphDB instance for the open directory. const OnDiskGraphDB &getGraphDB() const { return *PrimaryGraphDB; } - /// Associate an \p ObjectID, of the \p OnDiskGraphDB instance, with a key. - /// - /// \param Key the hash bytes for the key. - /// \param Value the \p ObjectID value. - /// - /// \returns the \p ObjectID associated with the \p Key. It may be different - /// than \p Value if another value was already associated with this key. - Expected KVPut(ArrayRef Key, ObjectID Value); - - /// Associate an \p ObjectID, of the \p OnDiskGraphDB instance, with a key. - /// An \p ObjectID as a key is equivalent to its digest bytes. - /// - /// \param Key the \p ObjectID for the key. - /// \param Value the \p ObjectID value. - /// - /// \returns the \p ObjectID associated with the \p Key. It may be different - /// than \p Value if another value was already associated with this key. - Expected KVPut(ObjectID Key, ObjectID Value); + /// The \p OnDiskGraphDB instance for the open directory. + OnDiskKeyValueDB &getKeyValueDB() { return *PrimaryKVDB; } - /// \returns the \p ObjectID, of the \p OnDiskGraphDB instance, associated - /// with the \p Key, or \p std::nullopt if the key does not exist. - Expected> KVGet(ArrayRef Key); + /// The \p OnDiskGraphDB instance for the open directory. + const OnDiskKeyValueDB &getKeyValueDB() const { return *PrimaryKVDB; } /// Open a \p UnifiedOnDiskCache instance for a directory. /// @@ -115,6 +99,9 @@ class UnifiedOnDiskCache { bool AllowRecovery, bool ForceValidation, std::optional LLVMCasBinary); + /// Validate the action cache only. + LLVM_ABI_FOR_TEST Error validateActionCache() const; + /// This is called implicitly at destruction time, so it is not required for a /// client to call this. After calling \p close the only method that is valid /// to call is \p needsGarbageCollection. @@ -153,18 +140,23 @@ class UnifiedOnDiskCache { LLVM_ABI_FOR_TEST static Error collectGarbage(StringRef Path, ondisk::OnDiskCASLogger *Logger = nullptr); + /// Remove unused data from the current UnifiedOnDiskCache. Error collectGarbage(); - LLVM_ABI_FOR_TEST ~UnifiedOnDiskCache(); + /// Helper function to convert the value stored in KeyValueDB and ObjectID. + LLVM_ABI_FOR_TEST static ObjectID getObjectIDFromValue(ArrayRef Value); - Error validateActionCache(); + using ValueBytes = std::array; + LLVM_ABI_FOR_TEST static ValueBytes getValueFromObjectID(ObjectID ID); - OnDiskGraphDB *getUpstreamGraphDB() const { return UpstreamGraphDB; } + LLVM_ABI_FOR_TEST ~UnifiedOnDiskCache(); private: + friend class OnDiskGraphDB; + friend class OnDiskKeyValueDB; UnifiedOnDiskCache(); - Expected> + Expected>> faultInFromUpstreamKV(ArrayRef Key); /// \returns the storage size of the primary directory. @@ -178,7 +170,7 @@ class UnifiedOnDiskCache { std::atomic NeedsGarbageCollection; std::string PrimaryDBDir; - OnDiskGraphDB *UpstreamGraphDB = nullptr; + std::unique_ptr UpstreamGraphDB; std::unique_ptr PrimaryGraphDB; std::unique_ptr UpstreamKVDB; diff --git a/llvm/include/llvm/Config/llvm-config.h.cmake b/llvm/include/llvm/Config/llvm-config.h.cmake index a6832299e5b6b..b1f1087846cf0 100644 --- a/llvm/include/llvm/Config/llvm-config.h.cmake +++ b/llvm/include/llvm/Config/llvm-config.h.cmake @@ -143,4 +143,7 @@ coverage bugs, and to 0 otherwise. */ #cmakedefine01 LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN +/* Define to 1 to enable LLVM OnDisk Content Addressable Storage */ +#cmakedefine01 LLVM_ENABLE_ONDISK_CAS + #endif diff --git a/llvm/include/llvm/Support/FileSystem.h b/llvm/include/llvm/Support/FileSystem.h index ad396985c59de..9d0c33221dcee 100644 --- a/llvm/include/llvm/Support/FileSystem.h +++ b/llvm/include/llvm/Support/FileSystem.h @@ -1164,6 +1164,12 @@ LLVM_ABI Expected openNativeFileForRead(const Twine &Name, OpenFlags Flags = OF_None, SmallVectorImpl *RealPath = nullptr); +/// An enumeration for the lock kind. +enum class LockKind { + Exclusive, // Exclusive/writer lock + Shared // Shared/reader lock +}; + /// Try to locks the file during the specified time. /// /// This function implements advisory locking on entire file. If it returns @@ -1177,6 +1183,7 @@ openNativeFileForRead(const Twine &Name, OpenFlags Flags = OF_None, /// @param Timeout Time in milliseconds that the process should wait before /// reporting lock failure. Zero value means try to get lock only /// once. +/// @param Kind The kind of the lock used (exclusive/shared). /// @returns errc::success if lock is successfully obtained, /// errc::no_lock_available if the file cannot be locked, or platform-specific /// error_code otherwise. @@ -1188,7 +1195,7 @@ openNativeFileForRead(const Twine &Name, OpenFlags Flags = OF_None, LLVM_ABI std::error_code tryLockFile(int FD, std::chrono::milliseconds Timeout = std::chrono::milliseconds(0), - bool Exclusive = true); + LockKind Kind = LockKind::Exclusive); /// Get RealPath from file handle. /// @@ -1202,9 +1209,8 @@ std::error_code getRealPathFromHandle(file_t Handle, /// /// This function acts as @ref tryLockFile but it waits infinitely. /// \param FD file descriptor to use for locking. -/// \param Exclusive if \p true use exclusive/writer lock, otherwise use -/// shared/reader lock. -LLVM_ABI std::error_code lockFile(int FD, bool Exclusive = true); +/// \param Kind of lock to used (exclusive/shared). +LLVM_ABI std::error_code lockFile(int FD, LockKind Kind = LockKind::Exclusive); /// Unlock the file. /// diff --git a/llvm/lib/CAS/ActionCache.cpp b/llvm/lib/CAS/ActionCache.cpp index ddddef04a27cc..03001a9e93d81 100644 --- a/llvm/lib/CAS/ActionCache.cpp +++ b/llvm/lib/CAS/ActionCache.cpp @@ -1,4 +1,4 @@ -//===- ActionCache.cpp ------------------------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -22,10 +22,10 @@ CacheKey::CacheKey(const ObjectStore &CAS, const ObjectRef &Ref) : Key(toStringRef(CAS.getID(Ref).getHash())) {} std::future ActionCache::getFuture(const CacheKey &ActionKey, - bool Globally) const { + bool CanBeDistributed) const { std::promise Promise; auto Future = Promise.get_future(); - getAsync(ActionKey, Globally, + getAsync(ActionKey, CanBeDistributed, [Promise = std::move(Promise)](Expected> ID) mutable { Promise.set_value(std::move(ID)); @@ -35,10 +35,10 @@ std::future ActionCache::getFuture(const CacheKey &ActionKey, std::future ActionCache::putFuture(const CacheKey &ActionKey, const CASID &Result, - bool Globally) { + bool CanBeDistributed) { std::promise Promise; auto Future = Promise.get_future(); - putAsync(ActionKey, Result, Globally, + putAsync(ActionKey, Result, CanBeDistributed, [Promise = std::move(Promise)](Error E) mutable { Promise.set_value(std::move(E)); }); @@ -46,17 +46,17 @@ std::future ActionCache::putFuture(const CacheKey &ActionKey, } void ActionCache::getImplAsync( - ArrayRef ResolvedKey, bool Globally, + ArrayRef ResolvedKey, bool CanBeDistributed, unique_function>)> Callback, std::unique_ptr *) const { // The default implementation is synchronous. - return Callback(getImpl(ResolvedKey, Globally)); + return Callback(getImpl(ResolvedKey, CanBeDistributed)); } void ActionCache::putImplAsync(ArrayRef ResolvedKey, - const CASID &Result, bool Globally, + const CASID &Result, bool CanBeDistributed, unique_function Callback, std::unique_ptr *) { // The default implementation is synchronous. - return Callback(putImpl(ResolvedKey, Result, Globally)); + return Callback(putImpl(ResolvedKey, Result, CanBeDistributed)); } diff --git a/llvm/lib/CAS/ActionCaches.cpp b/llvm/lib/CAS/ActionCaches.cpp index 83891b4215954..9da34b95265b0 100644 --- a/llvm/lib/CAS/ActionCaches.cpp +++ b/llvm/lib/CAS/ActionCaches.cpp @@ -1,27 +1,28 @@ -//===- ActionCaches.cpp -----------------------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +/// +/// \file This file implements the underlying ActionCache implementations. +/// +//===----------------------------------------------------------------------===// #include "BuiltinCAS.h" +#include "llvm/ADT/TrieRawHashMap.h" #include "llvm/CAS/ActionCache.h" -#include "llvm/CAS/HashMappedTrie.h" -#include "llvm/CAS/ObjectStore.h" #include "llvm/CAS/OnDiskCASLogger.h" -#include "llvm/CAS/OnDiskGraphDB.h" -#include "llvm/CAS/OnDiskHashMappedTrie.h" #include "llvm/CAS/OnDiskKeyValueDB.h" #include "llvm/CAS/UnifiedOnDiskCache.h" #include "llvm/Config/llvm-config.h" -#include "llvm/Support/Alignment.h" #include "llvm/Support/BLAKE3.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Errc.h" #include "llvm/Support/Path.h" -#define DEBUG_TYPE "action-caches" +#define DEBUG_TYPE "cas-action-caches" using namespace llvm; using namespace llvm::cas; @@ -42,15 +43,16 @@ template class CacheEntry { std::array Value; }; +/// Builtin InMemory ActionCache that stores the mapping in memory. class InMemoryActionCache final : public ActionCache { public: InMemoryActionCache() : ActionCache(builtin::BuiltinCASContext::getDefaultContext()) {} Error putImpl(ArrayRef ActionKey, const CASID &Result, - bool Globally) final; + bool CanBeDistributed) final; Expected> getImpl(ArrayRef ActionKey, - bool Globally) const final; + bool CanBeDistributed) const final; Error validate() const final { return createStringError("InMemoryActionCache doesn't support validate()"); @@ -58,17 +60,18 @@ class InMemoryActionCache final : public ActionCache { private: using DataT = CacheEntry; - using InMemoryCacheT = ThreadSafeHashMappedTrie; + using InMemoryCacheT = ThreadSafeTrieRawHashMap; InMemoryCacheT Cache; }; +/// Builtin basic OnDiskActionCache that uses one underlying OnDiskKeyValueDB. class OnDiskActionCache final : public ActionCache { public: Error putImpl(ArrayRef ActionKey, const CASID &Result, - bool Globally) final; + bool CanBeDistributed) final; Expected> getImpl(ArrayRef ActionKey, - bool Globally) const final; + bool CanBeDistributed) const final; static Expected> create(StringRef Path); @@ -83,12 +86,14 @@ class OnDiskActionCache final : public ActionCache { using DataT = CacheEntry; }; +/// Builtin unified ActionCache that wraps around UnifiedOnDiskCache to provide +/// access to its ActionCache. class UnifiedOnDiskActionCache final : public ActionCache { public: Error putImpl(ArrayRef ActionKey, const CASID &Result, - bool Globally) final; + bool CanBeDistributed) final; Expected> getImpl(ArrayRef ActionKey, - bool Globally) const final; + bool CanBeDistributed) const final; UnifiedOnDiskActionCache(std::shared_ptr UniDB); @@ -99,18 +104,14 @@ class UnifiedOnDiskActionCache final : public ActionCache { }; } // end namespace -static std::string hashToString(ArrayRef Hash) { - SmallString<64> Str; - toHex(Hash, /*LowerCase=*/true, Str); - return Str.str().str(); -} - -static Error createResultCachePoisonedError(StringRef Key, +static Error createResultCachePoisonedError(ArrayRef KeyHash, const CASContext &Context, CASID Output, ArrayRef ExistingOutput) { std::string Existing = CASID::create(&Context, toStringRef(ExistingOutput)).toString(); + SmallString<64> Key; + toHex(KeyHash, /*LowerCase=*/true, Key); return createStringError(std::make_error_code(std::errc::invalid_argument), "cache poisoned for '" + Key + "' (new='" + Output.toString() + "' vs. existing '" + @@ -118,7 +119,8 @@ static Error createResultCachePoisonedError(StringRef Key, } Expected> -InMemoryActionCache::getImpl(ArrayRef Key, bool /*Globally*/) const { +InMemoryActionCache::getImpl(ArrayRef Key, + bool /*CanBeDistributed*/) const { auto Result = Cache.find(Key); if (!Result) return std::nullopt; @@ -126,7 +128,7 @@ InMemoryActionCache::getImpl(ArrayRef Key, bool /*Globally*/) const { } Error InMemoryActionCache::putImpl(ArrayRef Key, const CASID &Result, - bool /*Globally*/) { + bool /*CanBeDistributed*/) { DataT Expected(Result.getHash()); const InMemoryCacheT::value_type &Cached = *Cache.insertLazy( Key, [&](auto ValueConstructor) { ValueConstructor.emplace(Expected); }); @@ -135,14 +137,13 @@ Error InMemoryActionCache::putImpl(ArrayRef Key, const CASID &Result, if (Expected.getValue() == Observed.getValue()) return Error::success(); - return createResultCachePoisonedError(hashToString(Key), getContext(), Result, + return createResultCachePoisonedError(Key, getContext(), Result, Observed.getValue()); } static constexpr StringLiteral DefaultName = "actioncache"; -namespace llvm { -namespace cas { +namespace llvm::cas { std::string getDefaultOnDiskActionCachePath() { SmallString<128> Path; @@ -156,8 +157,7 @@ std::unique_ptr createInMemoryActionCache() { return std::make_unique(); } -} // namespace cas -} // namespace llvm +} // namespace llvm::cas OnDiskActionCache::OnDiskActionCache( std::unique_ptr DB) @@ -167,13 +167,15 @@ OnDiskActionCache::OnDiskActionCache( Expected> OnDiskActionCache::create(StringRef AbsPath) { std::shared_ptr Logger; +#ifndef _WIN32 if (Error E = ondisk::OnDiskCASLogger::openIfEnabled(AbsPath).moveInto(Logger)) return std::move(E); +#endif std::unique_ptr DB; - if (Error E = ondisk::OnDiskKeyValueDB::open(AbsPath, getHashName(), - sizeof(HashType), getHashName(), - sizeof(DataT), std::move(Logger)) + if (Error E = ondisk::OnDiskKeyValueDB::open( + AbsPath, getHashName(), sizeof(HashType), getHashName(), + sizeof(DataT), /*UnifiedCache=*/nullptr, std::move(Logger)) .moveInto(DB)) return std::move(E); return std::unique_ptr( @@ -181,7 +183,8 @@ OnDiskActionCache::create(StringRef AbsPath) { } Expected> -OnDiskActionCache::getImpl(ArrayRef Key, bool /*Globally*/) const { +OnDiskActionCache::getImpl(ArrayRef Key, + bool /*CanBeDistributed*/) const { std::optional> Val; if (Error E = DB->get(Key).moveInto(Val)) return std::move(E); @@ -191,7 +194,7 @@ OnDiskActionCache::getImpl(ArrayRef Key, bool /*Globally*/) const { } Error OnDiskActionCache::putImpl(ArrayRef Key, const CASID &Result, - bool /*Globally*/) { + bool /*CanBeDistributed*/) { auto ResultHash = Result.getHash(); ArrayRef Expected((const char *)ResultHash.data(), ResultHash.size()); ArrayRef Observed; @@ -202,15 +205,11 @@ Error OnDiskActionCache::putImpl(ArrayRef Key, const CASID &Result, return Error::success(); return createResultCachePoisonedError( - hashToString(Key), getContext(), Result, + Key, getContext(), Result, ArrayRef((const uint8_t *)Observed.data(), Observed.size())); } -Error OnDiskActionCache::validate() const { - // FIXME: without the matching CAS there is nothing we can check about the - // cached values. The hash size is already validated by the DB validator. - return DB->validate(nullptr); -} +Error OnDiskActionCache::validate() const { return DB->validate(); } UnifiedOnDiskActionCache::UnifiedOnDiskActionCache( std::shared_ptr UniDB) @@ -219,32 +218,35 @@ UnifiedOnDiskActionCache::UnifiedOnDiskActionCache( Expected> UnifiedOnDiskActionCache::getImpl(ArrayRef Key, - bool /*Globally*/) const { - std::optional Val; - if (Error E = UniDB->KVGet(Key).moveInto(Val)) + bool /*CanBeDistributed*/) const { + std::optional> Val; + if (Error E = UniDB->getKeyValueDB().get(Key).moveInto(Val)) return std::move(E); if (!Val) return std::nullopt; + auto ID = ondisk::UnifiedOnDiskCache::getObjectIDFromValue(*Val); return CASID::create(&getContext(), - toStringRef(UniDB->getGraphDB().getDigest(*Val))); + toStringRef(UniDB->getGraphDB().getDigest(ID))); } Error UnifiedOnDiskActionCache::putImpl(ArrayRef Key, const CASID &Result, - bool /*Globally*/) { + bool /*CanBeDistributed*/) { auto Expected = UniDB->getGraphDB().getReference(Result.getHash()); if (LLVM_UNLIKELY(!Expected)) return Expected.takeError(); - std::optional Observed; - if (Error E = UniDB->KVPut(Key, *Expected).moveInto(Observed)) + + auto Value = ondisk::UnifiedOnDiskCache::getValueFromObjectID(*Expected); + std::optional> Observed; + if (Error E = UniDB->getKeyValueDB().put(Key, Value).moveInto(Observed)) return E; - if (*Expected == Observed) + auto ObservedID = ondisk::UnifiedOnDiskCache::getObjectIDFromValue(*Observed); + if (*Expected == ObservedID) return Error::success(); return createResultCachePoisonedError( - hashToString(Key), getContext(), Result, - UniDB->getGraphDB().getDigest(*Observed)); + Key, getContext(), Result, UniDB->getGraphDB().getDigest(ObservedID)); } Error UnifiedOnDiskActionCache::validate() const { diff --git a/llvm/lib/CAS/BuiltinCAS.cpp b/llvm/lib/CAS/BuiltinCAS.cpp index f66167edab8f1..2652cabf7097c 100644 --- a/llvm/lib/CAS/BuiltinCAS.cpp +++ b/llvm/lib/CAS/BuiltinCAS.cpp @@ -10,9 +10,6 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/CAS/BuiltinObjectHasher.h" #include "llvm/CAS/UnifiedOnDiskCache.h" -#include "llvm/Support/Alignment.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Process.h" using namespace llvm; using namespace llvm::cas; diff --git a/llvm/lib/CAS/BuiltinCAS.h b/llvm/lib/CAS/BuiltinCAS.h index c07caab730d88..b800a19b7b5e9 100644 --- a/llvm/lib/CAS/BuiltinCAS.h +++ b/llvm/lib/CAS/BuiltinCAS.h @@ -14,14 +14,14 @@ #include "llvm/CAS/ObjectStore.h" #include -namespace llvm { -namespace cas { +namespace llvm::cas { class ActionCache; namespace ondisk { class UnifiedOnDiskCache; } namespace builtin { +/// Common base class for builtin CAS implementations using the same CASContext. class BuiltinCAS : public ObjectStore { public: BuiltinCAS() : ObjectStore(BuiltinCASContext::getDefaultContext()) {} @@ -95,7 +95,6 @@ constexpr StringLiteral DefaultDirProxy = "/^llvm::cas::builtin::default"; constexpr StringLiteral DefaultDir = "llvm.cas.builtin.default"; } // end namespace builtin -} // end namespace cas -} // end namespace llvm +} // end namespace llvm::cas #endif // LLVM_LIB_CAS_BUILTINCAS_H diff --git a/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp b/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp index 5cedf6f49abad..8283e981b1099 100644 --- a/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp +++ b/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp @@ -1,4 +1,4 @@ -//===- BuiltinUnifiedCASDatabases.cpp ---------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -35,4 +35,4 @@ Expected cas::validateOnDiskUnifiedCASDatabasesIfNeeded( #else return createStringError(inconvertibleErrorCode(), "OnDiskCache is disabled"); #endif -} \ No newline at end of file +} diff --git a/llvm/lib/CAS/CASConfiguration.cpp b/llvm/lib/CAS/CASConfiguration.cpp index c816f20ed5e4b..99b3c88cbd2e4 100644 --- a/llvm/lib/CAS/CASConfiguration.cpp +++ b/llvm/lib/CAS/CASConfiguration.cpp @@ -15,13 +15,13 @@ using namespace llvm; using namespace llvm::cas; -void CASConfiguration::getResolvedCASPath( +Error CASConfiguration::getResolvedCASPath( llvm::SmallVectorImpl &Result) const { - if (CASPath == "auto") { - getDefaultOnDiskCASPath(Result); - } else { - Result.assign(CASPath.begin(), CASPath.end()); - } + if (CASPath == "auto") + return getDefaultOnDiskCASPath(Result); + + Result.assign(CASPath.begin(), CASPath.end()); + return Error::success(); } Expected, std::shared_ptr>> @@ -34,7 +34,8 @@ CASConfiguration::createDatabases() const { } SmallString<128> PathBuf; - getResolvedCASPath(PathBuf); + if (auto E = getResolvedCASPath(PathBuf)) + return std::move(E); std::pair, std::unique_ptr> DBs; return createOnDiskUnifiedCASDatabases(PathBuf); diff --git a/llvm/lib/CAS/CASProvidingFileSystem.cpp b/llvm/lib/CAS/CASProvidingFileSystem.cpp index 80f516a572790..faef4bee47731 100644 --- a/llvm/lib/CAS/CASProvidingFileSystem.cpp +++ b/llvm/lib/CAS/CASProvidingFileSystem.cpp @@ -69,6 +69,19 @@ class CASProvidingFileSystem final : public CASBackedFileSystem { std::error_code isLocal(const Twine &Path, bool &Result) final { return FS->isLocal(Path, Result); } + void visitChildFileSystems(VisitCallbackTy Callback) override { + Callback(*FS); + FS->visitChildFileSystems(Callback); + } + void printImpl(raw_ostream &OS, PrintType Type, + unsigned IndentLevel) const override { + printIndent(OS, IndentLevel); + OS << "CASProvidingFilesystem\n"; + if (Type == PrintType::Summary) + return; + FS->print(OS, Type == PrintType::Contents ? PrintType::Summary : Type, + IndentLevel + 1); + } llvm::Expected> openCASBackedFileForRead(const Twine &Path) final { diff --git a/llvm/lib/CAS/CMakeLists.txt b/llvm/lib/CAS/CMakeLists.txt index 82f0e7184e689..67f107678af67 100644 --- a/llvm/lib/CAS/CMakeLists.txt +++ b/llvm/lib/CAS/CMakeLists.txt @@ -1,5 +1,5 @@ -if (LLVM_ENABLE_ONDISK_CAS) - add_definitions(-DLLVM_ENABLE_ONDISK_CAS=1) +if (UNIX AND "${CMAKE_SYSTEM_NAME}" MATCHES "AIX") + set(additional_libs bsd) endif() add_llvm_component_library(LLVMCAS @@ -15,17 +15,19 @@ add_llvm_component_library(LLVMCAS CASOutputBackend.cpp CASProvidingFileSystem.cpp CachingOnDiskFileSystem.cpp + DatabaseFile.cpp FileSystemCache.cpp - HashMappedTrie.cpp HierarchicalTreeBuilder.cpp InMemoryCAS.cpp - MappedFileRegionBumpPtr.cpp + MappedFileRegionArena.cpp + NamedValuesSchema.cpp ObjectStore.cpp OnDiskCAS.cpp OnDiskCASLogger.cpp OnDiskCommon.cpp + OnDiskDataAllocator.cpp OnDiskGraphDB.cpp - OnDiskHashMappedTrie.cpp + OnDiskTrieRawHashMap.cpp OnDiskKeyValueDB.cpp PluginCAS.cpp TreeSchema.cpp @@ -35,7 +37,10 @@ add_llvm_component_library(LLVMCAS ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/CAS + LINK_LIBS + ${LLVM_PTHREAD_LIB} + ${additional_libs} + LINK_COMPONENTS - BinaryFormat Support ) diff --git a/llvm/lib/CAS/CachingOnDiskFileSystem.cpp b/llvm/lib/CAS/CachingOnDiskFileSystem.cpp index 7c29d632225ea..8cf938994014b 100644 --- a/llvm/lib/CAS/CachingOnDiskFileSystem.cpp +++ b/llvm/lib/CAS/CachingOnDiskFileSystem.cpp @@ -9,15 +9,13 @@ #include "llvm/CAS/CachingOnDiskFileSystem.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/ScopeExit.h" -#include "llvm/ADT/StringMap.h" #include "llvm/CAS/FileSystemCache.h" #include "llvm/CAS/HierarchicalTreeBuilder.h" #include "llvm/CAS/ObjectStore.h" #include "llvm/CAS/TreePath.h" #include "llvm/Config/config.h" -#include "llvm/Support/AlignOf.h" -#include "llvm/Support/Allocator.h" #include "llvm/Support/FileSystem.h" + #include using namespace llvm; @@ -125,6 +123,9 @@ class CachingOnDiskFileSystemImpl final : public CachingOnDiskFileSystem { return makeIntrusiveRefCnt(*this); } + void printImpl(raw_ostream &OS, PrintType Type, + unsigned IndentLevel) const final; + CachingOnDiskFileSystemImpl(std::shared_ptr DB) : CachingOnDiskFileSystem(std::move(DB)) { initializeWorkingDirectory(); @@ -402,7 +403,7 @@ CachingOnDiskFileSystemImpl::makeEntry( if (!F) return F.takeError(); - auto CloseOnExit = make_scope_exit([&F]() { sys::fs::closeFile(*F); }); + llvm::scope_exit CloseOnExit([&F]() { sys::fs::closeFile(*F); }); return makeFile(Parent, TreePathStorage.Path, *F, Status); } @@ -546,6 +547,7 @@ CachingOnDiskFileSystemImpl::getDirectoryIterator(const Twine &Path) { Expected CachingOnDiskFileSystemImpl::preloadRealPath(DirectoryEntry &From, StringRef Remaining) { + PathStorage RemainingStorage(Remaining); SmallString<256> ExpectedRealTreePath; ExpectedRealTreePath = From.getTreePath(); @@ -629,7 +631,7 @@ CachingOnDiskFileSystemImpl::preloadRealPath(DirectoryEntry &From, llvm::consumeError(FD.takeError()); return nullptr; } - auto CloseOnExit = make_scope_exit([&FD]() { sys::fs::closeFile(*FD); }); + llvm::scope_exit CloseOnExit([&FD]() { sys::fs::closeFile(*FD); }); auto F = makeFile(*State.Entry, RealTreePath, *FD, Status); if (F) @@ -900,6 +902,13 @@ CachingOnDiskFileSystemImpl::createTreeBuilder() { return std::make_unique(*this); } +void CachingOnDiskFileSystemImpl::printImpl(raw_ostream &OS, PrintType Type, + unsigned IndentLevel) const { + printIndent(OS, IndentLevel); + OS << "CachingOnDiskFileSystem\n"; + // FIXME: print contents +} + void CachingOnDiskFileSystemImpl::TreeBuilder::pushSymlink( const DirectoryEntry &Entry) { assert(Entry.isSymlink()); diff --git a/llvm/lib/CAS/DatabaseFile.cpp b/llvm/lib/CAS/DatabaseFile.cpp new file mode 100644 index 0000000000000..b862eabe7f35f --- /dev/null +++ b/llvm/lib/CAS/DatabaseFile.cpp @@ -0,0 +1,130 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file This file implements the common abstractions for CAS database file. +/// +//===----------------------------------------------------------------------===// + +#include "DatabaseFile.h" + +using namespace llvm; +using namespace llvm::cas; +using namespace llvm::cas::ondisk; + +Error ondisk::createTableConfigError(std::errc ErrC, StringRef Path, + StringRef TableName, const Twine &Msg) { + return createStringError(make_error_code(ErrC), + Path + "[" + TableName + "]: " + Msg); +} + +Error ondisk::checkTable(StringRef Label, size_t Expected, size_t Observed, + StringRef Path, StringRef TrieName) { + if (Expected == Observed) + return Error::success(); + return createTableConfigError(std::errc::invalid_argument, Path, TrieName, + "mismatched " + Label + + " (expected: " + Twine(Expected) + + ", observed: " + Twine(Observed) + ")"); +} + +Expected +DatabaseFile::create(const Twine &Path, uint64_t Capacity, + std::shared_ptr Logger, + function_ref NewDBConstructor) { + // Constructor for if the file doesn't exist. + auto NewFileConstructor = [&](MappedFileRegionArena &Alloc) -> Error { + if (Alloc.capacity() < + sizeof(Header) + sizeof(MappedFileRegionArena::Header)) + return createTableConfigError(std::errc::argument_out_of_domain, + Path.str(), "datafile", + "Allocator too small for header"); + (void)new (Alloc.data()) Header{getMagic(), getVersion(), {0}}; + DatabaseFile DB(Alloc); + return NewDBConstructor(DB); + }; + + // Get or create the file. + MappedFileRegionArena Alloc; + if (Error E = + MappedFileRegionArena::create(Path, Capacity, sizeof(Header), + std::move(Logger), NewFileConstructor) + .moveInto(Alloc)) + return std::move(E); + + return DatabaseFile::get( + std::make_unique(std::move(Alloc))); +} + +Error DatabaseFile::addTable(TableHandle Table) { + assert(Table); + assert(&Table.getRegion() == &getRegion()); + int64_t ExistingRootOffset = 0; + const int64_t NewOffset = + reinterpret_cast(&Table.getHeader()) - getRegion().data(); + if (H->RootTableOffset.compare_exchange_strong(ExistingRootOffset, NewOffset)) + return Error::success(); + + // Silently ignore attempts to set the root to itself. + if (ExistingRootOffset == NewOffset) + return Error::success(); + + // Return an proper error message. + TableHandle Root(getRegion(), ExistingRootOffset); + if (Root.getName() == Table.getName()) + return createStringError( + make_error_code(std::errc::not_supported), + "collision with existing table of the same name '" + Table.getName() + + "'"); + + return createStringError(make_error_code(std::errc::not_supported), + "cannot add new table '" + Table.getName() + + "'" + " to existing root '" + + Root.getName() + "'"); +} + +std::optional DatabaseFile::findTable(StringRef Name) { + int64_t RootTableOffset = H->RootTableOffset.load(); + if (!RootTableOffset) + return std::nullopt; + + TableHandle Root(getRegion(), RootTableOffset); + if (Root.getName() == Name) + return Root; + + return std::nullopt; +} + +Error DatabaseFile::validate(MappedFileRegion &Region) { + if (Region.size() < sizeof(Header)) + return createStringError(std::errc::invalid_argument, + "database: missing header"); + + // Check the magic and version. + auto *H = reinterpret_cast
(Region.data()); + if (H->Magic != getMagic()) + return createStringError(std::errc::invalid_argument, + "database: bad magic"); + if (H->Version != getVersion()) + return createStringError(std::errc::invalid_argument, + "database: wrong version"); + + if (H->RootTableOffset < 0 || + static_cast(H->RootTableOffset) > Region.size()) + return createStringError(std::errc::invalid_argument, + "database: root table offset out of bound"); + + auto *MFH = reinterpret_cast(Region.data() + + sizeof(Header)); + // Check the bump-ptr, which should point past the header. + if (MFH->BumpPtr.load() < (int64_t)sizeof(Header)) + return createStringError(std::errc::invalid_argument, + "database: corrupt bump-ptr"); + + return Error::success(); +} diff --git a/llvm/lib/CAS/DatabaseFile.h b/llvm/lib/CAS/DatabaseFile.h new file mode 100644 index 0000000000000..203d1eec5540b --- /dev/null +++ b/llvm/lib/CAS/DatabaseFile.h @@ -0,0 +1,155 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file declares the common interface for a DatabaseFile that is used to +/// implement OnDiskCAS. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CAS_DATABASEFILE_H +#define LLVM_LIB_CAS_DATABASEFILE_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/CAS/MappedFileRegionArena.h" +#include "llvm/CAS/OnDiskCASLogger.h" +#include "llvm/Support/Error.h" + +namespace llvm::cas::ondisk { + +using MappedFileRegion = MappedFileRegionArena::RegionT; + +/// Generic handle for a table. +/// +/// Generic table header layout: +/// - 2-bytes: TableKind +/// - 2-bytes: TableNameSize +/// - 4-bytes: TableNameRelOffset (relative to header) +class TableHandle { +public: + enum class TableKind : uint16_t { + TrieRawHashMap = 1, + DataAllocator = 2, + }; + struct Header { + TableKind Kind; + uint16_t NameSize; + int32_t NameRelOffset; ///< Relative to Header. + }; + + explicit operator bool() const { return H; } + const Header &getHeader() const { return *H; } + MappedFileRegion &getRegion() const { return *Region; } + + template static void check() { + static_assert( + std::is_same::value, + "T::GenericHeader should be of type TableHandle::Header"); + static_assert(offsetof(typename T::Header, GenericHeader) == 0, + "T::GenericHeader must be the head of T::Header"); + } + template bool is() const { return T::Kind == H->Kind; } + template T dyn_cast() const { + check(); + if (is()) + return T(*Region, *reinterpret_cast(H)); + return T(); + } + template T cast() const { + assert(is()); + return dyn_cast(); + } + + StringRef getName() const { + auto *Begin = reinterpret_cast(H) + H->NameRelOffset; + return StringRef(Begin, H->NameSize); + } + + TableHandle() = default; + TableHandle(MappedFileRegion &Region, Header &H) : Region(&Region), H(&H) {} + TableHandle(MappedFileRegion &Region, intptr_t HeaderOffset) + : TableHandle(Region, + *reinterpret_cast
(Region.data() + HeaderOffset)) { + } + +private: + MappedFileRegion *Region = nullptr; + Header *H = nullptr; +}; + +/// Encapsulate a database file, which: +/// - Sets/checks magic. +/// - Sets/checks version. +/// - Points at an arbitrary root table. +/// - Sets up a MappedFileRegionArena for allocation. +/// +/// Top-level layout: +/// - 4-bytes: Magic +/// - 4-bytes: Version +/// - 8-bytes: RootTableOffset (16-bits: Kind; 48-bits: Offset) +/// - 8-bytes: BumpPtr from MappedFileRegionArena +class DatabaseFile { +public: + static constexpr uint32_t getMagic() { return 0xDA7ABA53UL; } + static constexpr uint32_t getVersion() { return 1UL; } + struct Header { + uint32_t Magic; + uint32_t Version; + std::atomic RootTableOffset; + }; + + const Header &getHeader() { return *H; } + MappedFileRegionArena &getAlloc() { return Alloc; } + MappedFileRegion &getRegion() { return Alloc.getRegion(); } + + /// Add a table. This is currently not thread safe and should be called inside + /// NewDBConstructor. + Error addTable(TableHandle Table); + + /// Find a table. May return null. + std::optional findTable(StringRef Name); + + /// Create the DatabaseFile at Path with Capacity. + static Expected + create(const Twine &Path, uint64_t Capacity, + std::shared_ptr Logger, + function_ref NewDBConstructor); + + size_t size() const { return Alloc.size(); } + +private: + static Expected + get(std::unique_ptr Alloc) { + if (Error E = validate(Alloc->getRegion())) + return std::move(E); + return DatabaseFile(std::move(Alloc)); + } + + static Error validate(MappedFileRegion &Region); + + DatabaseFile(MappedFileRegionArena &Alloc) + : H(reinterpret_cast
(Alloc.data())), Alloc(Alloc) {} + DatabaseFile(std::unique_ptr Alloc) + : DatabaseFile(*Alloc) { + OwnedAlloc = std::move(Alloc); + } + + Header *H = nullptr; + MappedFileRegionArena &Alloc; + std::unique_ptr OwnedAlloc; +}; + +Error createTableConfigError(std::errc ErrC, StringRef Path, + StringRef TableName, const Twine &Msg); + +Error checkTable(StringRef Label, size_t Expected, size_t Observed, + StringRef Path, StringRef TrieName); + +} // namespace llvm::cas::ondisk + +#endif diff --git a/llvm/lib/CAS/FileSystemCache.cpp b/llvm/lib/CAS/FileSystemCache.cpp index ccee0327721db..e5c5c096f47bc 100644 --- a/llvm/lib/CAS/FileSystemCache.cpp +++ b/llvm/lib/CAS/FileSystemCache.cpp @@ -7,15 +7,8 @@ //===----------------------------------------------------------------------===// #include "llvm/CAS/FileSystemCache.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/ScopeExit.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/CAS/HashMappedTrie.h" -#include "llvm/CAS/ObjectStore.h" -#include "llvm/Support/AlignOf.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Path.h" -#include using namespace llvm; using namespace llvm::cas; diff --git a/llvm/lib/CAS/HashMappedTrie.cpp b/llvm/lib/CAS/HashMappedTrie.cpp deleted file mode 100644 index 2acac583b7985..0000000000000 --- a/llvm/lib/CAS/HashMappedTrie.cpp +++ /dev/null @@ -1,478 +0,0 @@ -//===- HashMappedTrie.cpp -------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "llvm/CAS/HashMappedTrie.h" -#include "HashMappedTrieIndexGenerator.h" -#include "llvm/ADT/LazyAtomicPointer.h" -#include "llvm/CAS/ThreadSafeAllocator.h" -#include "llvm/Support/Allocator.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include - -using namespace llvm; -using namespace llvm::cas; - -namespace { -struct TrieNode { - const bool IsSubtrie = false; - - TrieNode(bool IsSubtrie) : IsSubtrie(IsSubtrie) {} - - static void *operator new(size_t Size) { return ::malloc(Size); } - void operator delete(void *Ptr) { ::free(Ptr); } -}; - -struct TrieContent final : public TrieNode { - const uint8_t ContentOffset; - const uint8_t HashSize; - const uint8_t HashOffset; - - void *getValuePointer() const { - auto Content = reinterpret_cast(this) + ContentOffset; - return const_cast(Content); - } - - ArrayRef getHash() const { - auto *Begin = reinterpret_cast(this) + HashOffset; - return ArrayRef(Begin, Begin + HashSize); - } - - TrieContent(size_t ContentOffset, size_t HashSize, size_t HashOffset) - : TrieNode(/*IsSubtrie=*/false), ContentOffset(ContentOffset), - HashSize(HashSize), HashOffset(HashOffset) {} -}; -static_assert(sizeof(TrieContent) == - ThreadSafeHashMappedTrieBase::TrieContentBaseSize, - "Check header assumption!"); - -class TrieSubtrie final : public TrieNode { -public: - TrieNode *get(size_t I) const { return Slots[I].load(); } - - TrieSubtrie * - sink(size_t I, TrieContent &Content, size_t NumSubtrieBits, size_t NewI, - function_ref)> Saver); - - void printHash(raw_ostream &OS, ArrayRef Bytes) const; - void print(raw_ostream &OS) const { print(OS, std::nullopt); } - void print(raw_ostream &OS, std::optional Prefix) const; - void dump() const { print(dbgs()); } - - static std::unique_ptr create(size_t StartBit, size_t NumBits); - - explicit TrieSubtrie(size_t StartBit, size_t NumBits); - -private: - // FIXME: Use a bitset to speed up access: - // - // std::array, NumSlots/64> IsSet; - // - // This will avoid needing to visit sparsely filled slots in - // \a ThreadSafeHashMappedTrieBase::destroyImpl() when there's a non-trivial - // destructor. - // - // It would also greatly speed up iteration, if we add that some day, and - // allow get() to return one level sooner. - // - // This would be the algorithm for updating IsSet (after updating Slots): - // - // std::atomic &Bits = IsSet[I.High]; - // const uint64_t NewBit = 1ULL << I.Low; - // uint64_t Old = 0; - // while (!Bits.compare_exchange_weak(Old, Old | NewBit)) - // ; - - // For debugging. - unsigned StartBit = 0; - unsigned NumBits = 0; - -public: - /// Linked list for ownership of tries. The pointer is owned by TrieSubtrie. - std::atomic Next; - - /// The (co-allocated) slots of the subtrie. - MutableArrayRef> Slots; -}; -} // end namespace - -namespace llvm { -template <> struct isa_impl { - static inline bool doit(const TrieNode &TN) { return !TN.IsSubtrie; } -}; -template <> struct isa_impl { - static inline bool doit(const TrieNode &TN) { return TN.IsSubtrie; } -}; -} // end namespace llvm - -static size_t getTrieTailSize(size_t StartBit, size_t NumBits) { - assert(NumBits < 20 && "Tries should have fewer than ~1M slots"); - return sizeof(TrieNode *) * (1u << NumBits); -} - -std::unique_ptr TrieSubtrie::create(size_t StartBit, - size_t NumBits) { - size_t Size = sizeof(TrieSubtrie) + getTrieTailSize(StartBit, NumBits); - void *Memory = ::malloc(Size); - TrieSubtrie *S = ::new (Memory) TrieSubtrie(StartBit, NumBits); - return std::unique_ptr(S); -} - -TrieSubtrie::TrieSubtrie(size_t StartBit, size_t NumBits) - : TrieNode(true), StartBit(StartBit), NumBits(NumBits), Next(nullptr), - Slots(reinterpret_cast *>( - reinterpret_cast(this) + sizeof(TrieSubtrie)), - (1u << NumBits)) { - for (auto *I = Slots.begin(), *E = Slots.end(); I != E; ++I) - new (I) LazyAtomicPointer(nullptr); - - static_assert( - std::is_trivially_destructible>::value, - "Expected no work in destructor for TrieNode"); -} - -TrieSubtrie *TrieSubtrie::sink( - size_t I, TrieContent &Content, size_t NumSubtrieBits, size_t NewI, - function_ref)> Saver) { - assert(NumSubtrieBits > 0); - std::unique_ptr S = create(StartBit + NumBits, NumSubtrieBits); - - assert(NewI < S->Slots.size()); - S->Slots[NewI].store(&Content); - - TrieNode *ExistingNode = &Content; - assert(I < Slots.size()); - if (Slots[I].compare_exchange_strong(ExistingNode, S.get())) - return Saver(std::move(S)); - - // Another thread created a subtrie already. Return it and let "S" be - // destructed. - return cast(ExistingNode); -} - -struct ThreadSafeHashMappedTrieBase::ImplType { - static ImplType *create(size_t StartBit, size_t NumBits) { - size_t Size = sizeof(ImplType) + getTrieTailSize(StartBit, NumBits); - void *Memory = ::malloc(Size); - return ::new (Memory) ImplType(StartBit, NumBits); - } - - static void *operator new(size_t Size) { return ::malloc(Size); } - void operator delete(void *Ptr) { ::free(Ptr); } - - TrieSubtrie *save(std::unique_ptr S) { - assert(!S->Next && "Expected S to a freshly-constructed leaf"); - - TrieSubtrie *CurrentHead = nullptr; - // Add ownership of "S" to front of the list, so that Root -> S -> - // Root.Next. This works by repeatedly setting S->Next to a candidate value - // of Root.Next (initially nullptr), then setting Root.Next to S once the - // candidate matches reality. - while (!Root.Next.compare_exchange_weak(CurrentHead, S.get())) - S->Next.exchange(CurrentHead); - - // Ownership transferred to subtrie. - return S.release(); - } - - /// FIXME: This should take a function that allocates and constructs the - /// content lazily (taking the hash as a separate parameter), in case of - /// collision. - ThreadSafeAllocator ContentAlloc; - TrieSubtrie Root; // Must be last! Tail-allocated. - -private: - ImplType(size_t StartBit, size_t NumBits) : Root(StartBit, NumBits) {} -}; - -ThreadSafeHashMappedTrieBase::ImplType & -ThreadSafeHashMappedTrieBase::getOrCreateImpl() { - if (ImplType *Impl = ImplPtr.load()) - return *Impl; - - // Create a new ImplType and store it if another thread doesn't do so first. - // If another thread wins this one is destroyed locally. - std::unique_ptr Impl(ImplType::create(0, NumRootBits)); - ImplType *ExistingImpl = nullptr; - if (ImplPtr.compare_exchange_strong(ExistingImpl, Impl.get())) - return *Impl.release(); - - return *ExistingImpl; -} - -ThreadSafeHashMappedTrieBase::PointerBase -ThreadSafeHashMappedTrieBase::find(ArrayRef Hash) const { - assert(!Hash.empty() && "Uninitialized hash"); - - ImplType *Impl = ImplPtr.load(); - if (!Impl) - return PointerBase(); - - TrieSubtrie *S = &Impl->Root; - IndexGenerator IndexGen{NumRootBits, NumSubtrieBits, Hash}; - size_t Index = IndexGen.next(); - for (;;) { - // Try to set the content. - TrieNode *Existing = S->get(Index); - if (!Existing) - return PointerBase(S, Index, *IndexGen.StartBit); - - // Check for an exact match. - if (auto *ExistingContent = dyn_cast(Existing)) - return ExistingContent->getHash() == Hash - ? PointerBase(ExistingContent->getValuePointer()) - : PointerBase(S, Index, *IndexGen.StartBit); - - Index = IndexGen.next(); - S = cast(Existing); - } -} - -ThreadSafeHashMappedTrieBase::PointerBase ThreadSafeHashMappedTrieBase::insert( - PointerBase Hint, ArrayRef Hash, - function_ref Hash)> - Constructor) { - assert(!Hash.empty() && "Uninitialized hash"); - - ImplType &Impl = getOrCreateImpl(); - TrieSubtrie *S = &Impl.Root; - IndexGenerator IndexGen{NumRootBits, NumSubtrieBits, Hash}; - size_t Index; - if (Hint.isHint()) { - S = static_cast(Hint.P); - Index = IndexGen.hint(Hint.I, Hint.B); - } else { - Index = IndexGen.next(); - } - - for (;;) { - // Load the node from the slot, allocating and calling the constructor if - // the slot is empty. - bool Generated = false; - TrieNode &Existing = S->Slots[Index].loadOrGenerate([&]() { - Generated = true; - - // Construct the value itself at the tail. - uint8_t *Memory = reinterpret_cast( - Impl.ContentAlloc.Allocate(ContentAllocSize, ContentAllocAlign)); - const uint8_t *HashStorage = Constructor(Memory + ContentOffset, Hash); - - // Construct the TrieContent header, passing in the offset to the hash. - TrieContent *Content = ::new (Memory) - TrieContent(ContentOffset, Hash.size(), HashStorage - Memory); - assert(Hash == Content->getHash() && "Hash not properly initialized"); - return Content; - }); - // If we just generated it, return it! - if (Generated) - return PointerBase(cast(Existing).getValuePointer()); - - if (isa(Existing)) { - S = &cast(Existing); - Index = IndexGen.next(); - continue; - } - - // Return the existing content if it's an exact match! - auto &ExistingContent = cast(Existing); - if (ExistingContent.getHash() == Hash) - return PointerBase(ExistingContent.getValuePointer()); - - // Sink the existing content as long as the indexes match. - for (;;) { - size_t NextIndex = IndexGen.next(); - size_t NewIndexForExistingContent = - IndexGen.getCollidingBits(ExistingContent.getHash()); - S = S->sink(Index, ExistingContent, IndexGen.getNumBits(), - NewIndexForExistingContent, - [&Impl](std::unique_ptr S) { - return Impl.save(std::move(S)); - }); - Index = NextIndex; - - // Found the difference. - if (NextIndex != NewIndexForExistingContent) - break; - } - } -} - -static void printHexDigit(raw_ostream &OS, uint8_t Digit) { - if (Digit < 10) - OS << char(Digit + '0'); - else - OS << char(Digit - 10 + 'a'); -} - -static void printHexDigits(raw_ostream &OS, ArrayRef Bytes, - size_t StartBit, size_t NumBits) { - assert(StartBit % 4 == 0); - assert(NumBits % 4 == 0); - for (size_t I = StartBit, E = StartBit + NumBits; I != E; I += 4) { - uint8_t HexPair = Bytes[I / 8]; - uint8_t HexDigit = I % 8 == 0 ? HexPair >> 4 : HexPair & 0xf; - printHexDigit(OS, HexDigit); - } -} - -static void printBits(raw_ostream &OS, ArrayRef Bytes, size_t StartBit, - size_t NumBits) { - assert(StartBit + NumBits <= Bytes.size() * 8u); - for (size_t I = StartBit, E = StartBit + NumBits; I != E; ++I) { - uint8_t Byte = Bytes[I / 8]; - size_t ByteOffset = I % 8; - if (size_t ByteShift = 8 - ByteOffset - 1) - Byte >>= ByteShift; - OS << (Byte & 0x1 ? '1' : '0'); - } -} - -void TrieSubtrie::printHash(raw_ostream &OS, ArrayRef Bytes) const { - // afb[1c:00*01110*0]def - size_t EndBit = StartBit + NumBits; - size_t HashEndBit = Bytes.size() * 8u; - - size_t FirstBinaryBit = StartBit & ~0x3u; - printHexDigits(OS, Bytes, 0, FirstBinaryBit); - - size_t LastBinaryBit = (EndBit + 3u) & ~0x3u; - OS << "["; - printBits(OS, Bytes, FirstBinaryBit, LastBinaryBit - FirstBinaryBit); - OS << "]"; - - printHexDigits(OS, Bytes, LastBinaryBit, HashEndBit - LastBinaryBit); -} - -static void appendIndexBits(std::string &Prefix, size_t Index, - size_t NumSlots) { - std::string Bits; - for (size_t NumBits = 1u; NumBits < NumSlots; NumBits <<= 1) { - Bits.push_back('0' + (Index & 0x1)); - Index >>= 1; - } - for (char Ch : llvm::reverse(Bits)) - Prefix += Ch; -} - -static void printPrefix(raw_ostream &OS, StringRef Prefix) { - while (Prefix.size() >= 4) { - uint8_t Digit; - bool ErrorParsingBinary = Prefix.take_front(4).getAsInteger(2, Digit); - assert(!ErrorParsingBinary); - (void)ErrorParsingBinary; - printHexDigit(OS, Digit); - Prefix = Prefix.drop_front(4); - } - if (!Prefix.empty()) - OS << "[" << Prefix << "]"; -} - -void TrieSubtrie::print(raw_ostream &OS, - std::optional Prefix) const { - if (!Prefix) { - OS << "root"; - Prefix.emplace(); - } else { - OS << "subtrie="; - printPrefix(OS, *Prefix); - } - - OS << " num-slots=" << Slots.size() << "\n"; - SmallVector Subs; - SmallVector Prefixes; - for (size_t I = 0, E = Slots.size(); I != E; ++I) { - TrieNode *N = get(I); - if (!N) - continue; - OS << "- index=" << I << " "; - if (auto *S = dyn_cast(N)) { - std::string SubtriePrefix = *Prefix; - appendIndexBits(SubtriePrefix, I, Slots.size()); - OS << "subtrie="; - printPrefix(OS, SubtriePrefix); - OS << "\n"; - Subs.push_back(S); - Prefixes.push_back(SubtriePrefix); - continue; - } - auto *Content = cast(N); - OS << "content="; - printHash(OS, Content->getHash()); - OS << "\n"; - } - for (size_t I = 0, E = Subs.size(); I != E; ++I) - Subs[I]->print(OS, Prefixes[I]); -} - -void ThreadSafeHashMappedTrieBase::print(raw_ostream &OS) const { - OS << "root-bits=" << NumRootBits << " subtrie-bits=" << NumSubtrieBits - << "\n"; - if (ImplType *Impl = ImplPtr.load()) - Impl->Root.print(OS); - else - OS << "[no-root]\n"; -} - -LLVM_DUMP_METHOD void ThreadSafeHashMappedTrieBase::dump() const { - print(dbgs()); -} - -ThreadSafeHashMappedTrieBase::ThreadSafeHashMappedTrieBase( - size_t ContentAllocSize, size_t ContentAllocAlign, size_t ContentOffset, - std::optional NumRootBits, std::optional NumSubtrieBits) - : ContentAllocSize(ContentAllocSize), ContentAllocAlign(ContentAllocAlign), - ContentOffset(ContentOffset), - NumRootBits(NumRootBits ? *NumRootBits : DefaultNumRootBits), - NumSubtrieBits(NumSubtrieBits ? *NumSubtrieBits : DefaultNumSubtrieBits), - ImplPtr(nullptr) { - assert((!NumRootBits || *NumRootBits < 20) && - "Root should have fewer than ~1M slots"); - assert((!NumSubtrieBits || *NumSubtrieBits < 10) && - "Subtries should have fewer than ~1K slots"); -} - -ThreadSafeHashMappedTrieBase::ThreadSafeHashMappedTrieBase( - ThreadSafeHashMappedTrieBase &&RHS) - : ContentAllocSize(RHS.ContentAllocSize), - ContentAllocAlign(RHS.ContentAllocAlign), - ContentOffset(RHS.ContentOffset), NumRootBits(RHS.NumRootBits), - NumSubtrieBits(RHS.NumSubtrieBits) { - // Steal the root from RHS. - ImplPtr = RHS.ImplPtr.exchange(nullptr); -} - -ThreadSafeHashMappedTrieBase::~ThreadSafeHashMappedTrieBase() { - assert(!ImplPtr.load() && "Expected subclass to call destroyImpl()"); -} - -void ThreadSafeHashMappedTrieBase::destroyImpl( - function_ref Destructor) { - std::unique_ptr Impl(ImplPtr.exchange(nullptr)); - if (!Impl) - return; - - // Destroy content nodes throughout trie. Avoid destroying any subtries since - // we need TrieNode::classof() to find the content nodes. - // - // FIXME: Once we have bitsets (see FIXME in TrieSubtrie class), use them - // facilitate sparse iteration here. - if (Destructor) - for (TrieSubtrie *Trie = &Impl->Root; Trie; Trie = Trie->Next.load()) - for (auto &Slot : Trie->Slots) - if (auto *Content = dyn_cast_or_null(Slot.load())) - Destructor(Content->getValuePointer()); - - // Destroy the subtries. Incidentally, this destroys them in the reverse order - // of saving. - TrieSubtrie *Trie = Impl->Root.Next; - while (Trie) { - TrieSubtrie *Next = Trie->Next.exchange(nullptr); - delete Trie; - Trie = Next; - } -} diff --git a/llvm/lib/CAS/InMemoryCAS.cpp b/llvm/lib/CAS/InMemoryCAS.cpp index 58750f75378b1..1d1c9183f34f7 100644 --- a/llvm/lib/CAS/InMemoryCAS.cpp +++ b/llvm/lib/CAS/InMemoryCAS.cpp @@ -9,11 +9,11 @@ #include "BuiltinCAS.h" #include "llvm/ADT/LazyAtomicPointer.h" #include "llvm/ADT/PointerIntPair.h" -#include "llvm/ADT/PointerUnion.h" -#include "llvm/CAS/BuiltinObjectHasher.h" -#include "llvm/CAS/HashMappedTrie.h" -#include "llvm/CAS/ThreadSafeAllocator.h" +#include "llvm/ADT/TrieRawHashMap.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ThreadSafeAllocator.h" +#include "llvm/Support/TrailingObjects.h" using namespace llvm; using namespace llvm::cas; @@ -26,14 +26,14 @@ class InMemoryObject; /// Index of referenced IDs (map: Hash -> InMemoryObject*). Uses /// LazyAtomicPointer to coordinate creation of objects. using InMemoryIndexT = - ThreadSafeHashMappedTrie, + ThreadSafeTrieRawHashMap, sizeof(HashType)>; /// Values in \a InMemoryIndexT. \a InMemoryObject's point at this to access /// their hash. using InMemoryIndexValueT = InMemoryIndexT::value_type; - +/// Builtin InMemory CAS that stores CAS object in the memory. class InMemoryObject { public: enum class Kind { @@ -57,6 +57,9 @@ class InMemoryObject { InMemoryObject() = delete; InMemoryObject(InMemoryObject &&) = delete; InMemoryObject(const InMemoryObject &) = delete; + InMemoryObject &operator=(const InMemoryObject &) = delete; + InMemoryObject &operator=(InMemoryObject &&) = delete; + virtual ~InMemoryObject() = default; protected: InMemoryObject(Kind K, const InMemoryIndexValueT &I) : IndexAndKind(&I, K) {} @@ -71,12 +74,12 @@ class InMemoryObject { static_assert(((int)Kind::Max >> NumKindBits) == 0, "Kind will be truncated"); public: - inline ArrayRef getData() const; + ArrayRef getData() const; - inline ArrayRef getRefs() const; + ArrayRef getRefs() const; }; -class InMemoryRefObject : public InMemoryObject { +class InMemoryRefObject final : public InMemoryObject { public: static constexpr Kind KindValue = Kind::RefNode; static bool classof(const InMemoryObject *O) { @@ -109,7 +112,10 @@ class InMemoryRefObject : public InMemoryObject { ArrayRef Data; }; -class InMemoryInlineObject : public InMemoryObject { +class InMemoryInlineObject final + : public InMemoryObject, + public TrailingObjects { public: static constexpr Kind KindValue = Kind::InlineNode; static bool classof(const InMemoryObject *O) { @@ -118,15 +124,12 @@ class InMemoryInlineObject : public InMemoryObject { ArrayRef getRefs() const { return getRefsImpl(); } ArrayRef getRefsImpl() const { - return ArrayRef( - reinterpret_cast(this + 1), NumRefs); + return ArrayRef(getTrailingObjects(), NumRefs); } ArrayRef getData() const { return getDataImpl(); } ArrayRef getDataImpl() const { - ArrayRef Refs = getRefs(); - return ArrayRef( - reinterpret_cast(Refs.data() + Refs.size()), DataSize); + return ArrayRef(getTrailingObjects(), DataSize); } static InMemoryInlineObject & @@ -138,6 +141,10 @@ class InMemoryInlineObject : public InMemoryObject { return *new (Mem) InMemoryInlineObject(I, Refs, Data); } + size_t numTrailingObjects(OverloadToken) const { + return NumRefs; + } + private: InMemoryInlineObject(const InMemoryIndexValueT &I, ArrayRef Refs, @@ -295,7 +302,7 @@ InMemoryCAS::storeFromNullTerminatedRegion(ArrayRef ComputedHash, // Save Map if the winning node uses it. if (auto *RefNode = dyn_cast(&Node)) if (RefNode->getData().data() == Map.data()) - new (MemoryMaps.Allocate()) sys::fs::mapped_file_region(std::move(Map)); + new (MemoryMaps.Allocate(1)) sys::fs::mapped_file_region(std::move(Map)); return toReference(Node); } diff --git a/llvm/lib/CAS/MappedFileRegionArena.cpp b/llvm/lib/CAS/MappedFileRegionArena.cpp new file mode 100644 index 0000000000000..f2413b3ba6ecf --- /dev/null +++ b/llvm/lib/CAS/MappedFileRegionArena.cpp @@ -0,0 +1,441 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file Implements MappedFileRegionArena. +/// +/// A bump pointer allocator, backed by a memory-mapped file. +/// +/// The effect we want is: +/// +/// Step 1. If it doesn't exist, create the file with an initial size. +/// Step 2. Reserve virtual memory large enough for the max file size. +/// Step 3. Map the file into memory in the reserved region. +/// Step 4. Increase the file size and update the mapping when necessary. +/// +/// However, updating the mapping is challenging when it needs to work portably, +/// and across multiple processes without locking for every read. Our current +/// implementation handles the steps above in following ways: +/// +/// Step 1. Use \ref sys::fs::resize_file_sparse to grow the file to its max +/// size (typically several GB). If the file system doesn't support +/// sparse file, this may return a fully allocated file. +/// Step 2. Call \ref sys::fs::mapped_file_region to map the entire file. +/// Step 3. [Automatic as part of step 2.] +/// Step 4. If supported, use \c fallocate or similiar APIs to ensure the file +/// system storage for the sparse file so we won't end up with partial +/// file if the disk is out of space. +/// +/// Additionally, we attempt to resize the file to its actual data size when +/// closing the mapping, if this is the only concurrent instance. This is done +/// using file locks. Shrinking the file mitigates problems with having large +/// files: on filesystems without sparse files it avoids unnecessary space use; +/// it also avoids allocating the full size if another process copies the file, +/// which typically loses sparseness. These mitigations only work while the file +/// is not in use. +/// +/// The capacity and the header offset is determined by the first user of the +/// MappedFileRegionArena instance and any future mismatched value from the +/// original will result in error on creation. +/// +/// To support resizing, we use two separate file locks: +/// 1. We use a shared reader lock on a ".shared" file until destruction. +/// 2. We use a lock on the main file during initialization - shared to check +/// the status, upgraded to exclusive to resize/initialize the file. +/// +/// Then during destruction we attempt to get exclusive access on (1), which +/// requires no concurrent readers. If so, we shrink the file. Using two +/// separate locks simplifies the implementation and enables it to work on +/// platforms (e.g. Windows) where a shared/reader lock prevents writing. +//===----------------------------------------------------------------------===// + +#include "llvm/CAS/MappedFileRegionArena.h" +#include "OnDiskCommon.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/CAS/OnDiskCASLogger.h" + +#if LLVM_ON_UNIX +#include +#if __has_include() +#include +#endif +#ifdef DEV_BSIZE +#define MAPPED_FILE_BSIZE DEV_BSIZE +#elif __linux__ +#define MAPPED_FILE_BSIZE 512 +#endif +#endif + +using namespace llvm; +using namespace llvm::cas; +using namespace llvm::cas::ondisk; + +namespace { +struct FileWithLock { + std::string Path; + int FD = -1; + std::optional Locked; + +private: + FileWithLock(std::string PathStr, Error &E) : Path(std::move(PathStr)) { + ErrorAsOutParameter EOP(&E); + if (std::error_code EC = sys::fs::openFileForReadWrite( + Path, FD, sys::fs::CD_OpenAlways, sys::fs::OF_None)) + E = createFileError(Path, EC); + } + +public: + FileWithLock(FileWithLock &) = delete; + FileWithLock(FileWithLock &&Other) { + Path = std::move(Other.Path); + FD = Other.FD; + Other.FD = -1; + Locked = Other.Locked; + Other.Locked = std::nullopt; + } + + ~FileWithLock() { consumeError(unlock()); } + + static Expected open(StringRef Path) { + Error E = Error::success(); + FileWithLock Result(Path.str(), E); + if (E) + return std::move(E); + return std::move(Result); + } + + Error lock(sys::fs::LockKind LK) { + assert(!Locked && "already locked"); + if (std::error_code EC = lockFileThreadSafe(FD, LK)) + return createFileError(Path, EC); + Locked = LK; + return Error::success(); + } + + Error switchLock(sys::fs::LockKind LK) { + assert(Locked && "not locked"); + if (auto E = unlock()) + return E; + + return lock(LK); + } + + Error unlock() { + if (Locked) { + Locked = std::nullopt; + if (std::error_code EC = unlockFileThreadSafe(FD)) + return createFileError(Path, EC); + } + return Error::success(); + } + + // Return true if succeed to lock the file exclusively. + bool tryLockExclusive() { + assert(!Locked && "can only try to lock if not locked"); + if (tryLockFileThreadSafe(FD) == std::error_code()) { + Locked = sys::fs::LockKind::Exclusive; + return true; + } + + return false; + } + + // Release the lock so it will not be unlocked on destruction. + void release() { + Locked = std::nullopt; + FD = -1; + } +}; + +struct FileSizeInfo { + uint64_t Size; + uint64_t AllocatedSize; + + static ErrorOr get(sys::fs::file_t File); +}; +} // end anonymous namespace + +Expected MappedFileRegionArena::create( + const Twine &Path, uint64_t Capacity, uint64_t HeaderOffset, + std::shared_ptr Logger, + function_ref NewFileConstructor) { + uint64_t MinCapacity = HeaderOffset + sizeof(Header); + if (Capacity < MinCapacity) + return createStringError( + std::make_error_code(std::errc::invalid_argument), + "capacity is too small to hold MappedFileRegionArena"); + + MappedFileRegionArena Result; + Result.Path = Path.str(); + Result.Logger = std::move(Logger); + + // Open the support file. See file comment for details of locking scheme. + SmallString<128> SharedFilePath(Result.Path); + SharedFilePath.append(".shared"); + + auto SharedFileLock = FileWithLock::open(SharedFilePath); + if (!SharedFileLock) + return SharedFileLock.takeError(); + Result.SharedLockFD = SharedFileLock->FD; + + // Take shared/reader lock that will be held until destroyImpl if construction + // is successful. + if (auto E = SharedFileLock->lock(sys::fs::LockKind::Shared)) + return std::move(E); + + // Take shared/reader lock for initialization. + auto MainFile = FileWithLock::open(Result.Path); + if (!MainFile) + return MainFile.takeError(); + if (Error E = MainFile->lock(sys::fs::LockKind::Shared)) + return std::move(E); + Result.FD = MainFile->FD; + + sys::fs::file_t File = sys::fs::convertFDToNativeFile(MainFile->FD); + auto FileSize = FileSizeInfo::get(File); + if (!FileSize) + return createFileError(Result.Path, FileSize.getError()); + + // If the size is smaller than the capacity, we need to initialize the file. + // It maybe empty, or may have been shrunk during a previous close. + if (FileSize->Size < Capacity) { + // Lock the file exclusively so only one process will do the initialization. + if (Error E = MainFile->switchLock(sys::fs::LockKind::Exclusive)) + return std::move(E); + // Retrieve the current size now that we have exclusive access. + FileSize = FileSizeInfo::get(File); + if (!FileSize) + return createFileError(Result.Path, FileSize.getError()); + } + + if (FileSize->Size >= MinCapacity) { + // File is initialized. Read out the header to check for capacity and + // offset. + SmallVector HeaderContent(sizeof(Header)); + auto Size = sys::fs::readNativeFileSlice(File, HeaderContent, HeaderOffset); + if (!Size) + return Size.takeError(); + + Header H; + memcpy(&H, HeaderContent.data(), sizeof(H)); + if (H.HeaderOffset != HeaderOffset) + return createStringError( + std::make_error_code(std::errc::invalid_argument), + "specified header offset (" + utostr(HeaderOffset) + + ") does not match existing config (" + utostr(H.HeaderOffset) + + ")"); + + if (H.Capacity < MinCapacity) + return createStringError( + std::make_error_code(std::errc::bad_file_descriptor), + "capacity inside the MappedFileRegionArena is too small"); + + // If the capacity doesn't match, use the existing capacity instead. + if (H.Capacity != Capacity) + Capacity = H.Capacity; + } + + // If the size is smaller than capacity, we need to resize the file. + if (FileSize->Size < Capacity) { + // Acquire the exclusive lock before resizing the file. In the rare case + // when opening a large CAS using a small requested size, a shared lock + // needs to switch to an exclusive lock here. + if (MainFile->Locked != sys::fs::LockKind::Exclusive) { + if (Error E = MainFile->switchLock(sys::fs::LockKind::Exclusive)) + return std::move(E); + } + if (std::error_code EC = + sys::fs::resize_file_sparse(MainFile->FD, Capacity)) + return createFileError(Result.Path, EC); + if (Result.Logger) + Result.Logger->logMappedFileRegionArenaResizeFile( + Result.Path, FileSize->Size, Capacity); + } + + // Create the mapped region. + { + std::error_code EC; + const char *Name = nullptr; +#ifdef _WIN32 + // Give the file mapping a name to ensure the same mappings are + // shared across processes. + std::string MapName = Result.Path; + std::replace(MapName.begin(), MapName.end(), '\\', '/'); + MapName = "Local\\" + MapName; + Name = MapName.c_str(); +#endif + sys::fs::mapped_file_region Map( + File, sys::fs::mapped_file_region::readwrite, Capacity, 0, EC, Name); + if (EC) + return createFileError(Result.Path, EC); + Result.Region = std::move(Map); + } + + // Initialize the header. + if (Error E = Result.initializeHeader(HeaderOffset)) + return std::move(E); + + if (FileSize->Size < MinCapacity) { + assert(MainFile->Locked == sys::fs::LockKind::Exclusive); + // If we need to fully initialize the file, call NewFileConstructor. + if (Error E = NewFileConstructor(Result)) + return std::move(E); + + Result.H->HeaderOffset.exchange(HeaderOffset); + Result.H->Capacity.exchange(Capacity); + } + + if (MainFile->Locked == sys::fs::LockKind::Exclusive) { + // If holding an exclusive lock, we might have resized the file and + // performed some read/write to the file. Query the file size again to make + // sure everything is up-to-date. Otherwise, FileSize info is already + // up-to-date. + FileSize = FileSizeInfo::get(File); + if (!FileSize) + return createFileError(Result.Path, FileSize.getError()); + Result.H->AllocatedSize.exchange(FileSize->AllocatedSize); + } + + // Release the shared lock so it can be closed in destoryImpl(). + SharedFileLock->release(); + return std::move(Result); +} + +void MappedFileRegionArena::destroyImpl() { + if (!FD) + return; + + // Drop the shared lock indicating we are no longer accessing the file. + if (SharedLockFD) + (void)unlockFileThreadSafe(*SharedLockFD); + + // Attempt to truncate the file if we can get exclusive access. Ignore any + // errors. + if (H) { + assert(SharedLockFD && "Must have shared lock file open"); + if (tryLockFileThreadSafe(*SharedLockFD) == std::error_code()) { + size_t Size = size(); + size_t Capacity = capacity(); + // sync to file system to make sure all contents are up-to-date. + (void)Region.sync(); + // unmap the file before resizing since that is the requirement for + // some platforms. + Region.unmap(); + (void)sys::fs::resize_file(*FD, Size); + (void)unlockFileThreadSafe(*SharedLockFD); + if (Logger) + Logger->logMappedFileRegionArenaResizeFile(Path, Capacity, Size); + } + } + + auto Close = [](std::optional &FD) { + if (FD) { + sys::fs::file_t File = sys::fs::convertFDToNativeFile(*FD); + sys::fs::closeFile(File); + FD = std::nullopt; + } + }; + + // Close the file and shared lock. + Close(FD); + Close(SharedLockFD); + + if (Logger) + Logger->logMappedFileRegionArenaClose(Path); +} + +Error MappedFileRegionArena::initializeHeader(uint64_t HeaderOffset) { + if (capacity() >= static_cast(INT64_MAX)) + return createStringError(make_error_code(std::errc::protocol_error), + "arena capacity does not fit in int64_t"); + uint64_t HeaderEndOffset = HeaderOffset + sizeof(decltype(*H)); + if (HeaderEndOffset > capacity()) + return createStringError(make_error_code(std::errc::protocol_error), + "arena header extends past capacity"); + if (!isAligned(Align::Of(), HeaderOffset)) + return createStringError(make_error_code(std::errc::protocol_error), + "arena header offset is not aligned"); + H = reinterpret_cast(data() + HeaderOffset); + + uint64_t ExistingValue = 0; + if (!H->BumpPtr.compare_exchange_strong(ExistingValue, HeaderEndOffset)) + if (ExistingValue < HeaderEndOffset) + return createStringError( + make_error_code(std::errc::protocol_error), + "arena bump pointer is corrupt: 0x" + + utohexstr(ExistingValue, /*LowerCase=*/true)); + if (Logger) + Logger->logMappedFileRegionArenaCreate(Path, *FD, data(), capacity(), + size()); + return Error::success(); +} + +static Error createAllocatorOutOfSpaceError() { + return createStringError(std::make_error_code(std::errc::not_enough_memory), + "memory mapped file allocator is out of space"); +} + +Expected MappedFileRegionArena::allocateOffset(uint64_t AllocSize) { + AllocSize = alignTo(AllocSize, getAlign()); + uint64_t OldEnd = H->BumpPtr.fetch_add(AllocSize); + uint64_t NewEnd = OldEnd + AllocSize; + if (LLVM_UNLIKELY(NewEnd > capacity())) { + // Return the allocation. If the start already passed the end, that means + // some other concurrent allocations already consumed all the capacity. + // There is no need to return the original value. If the start was not + // passed the end, current allocation certainly bumped it passed the end. + // All other allocation afterwards must have failed and current allocation + // is in charge of return the allocation back to a valid value. + if (OldEnd <= capacity()) + (void)H->BumpPtr.exchange(OldEnd); + + if (Logger) + Logger->logMappedFileRegionArenaOom(Path, capacity(), OldEnd, AllocSize); + + return createAllocatorOutOfSpaceError(); + } + + uint64_t DiskSize = H->AllocatedSize; + if (LLVM_UNLIKELY(NewEnd > DiskSize)) { + uint64_t NewSize; + // The minimum increment is a page, but allocate more to amortize the cost. + constexpr uint64_t Increment = 1 * 1024 * 1024; // 1 MB + if (Error E = preallocateFileTail(*FD, DiskSize, DiskSize + Increment) + .moveInto(NewSize)) + return std::move(E); + assert(NewSize >= DiskSize + Increment); + // FIXME: on Darwin this can under-count the size if there is a race to + // preallocate disk, because the semantics of F_PREALLOCATE are to add bytes + // to the end of the file, not to allocate up to a fixed size. + // Any discrepancy will be resolved the next time the file is truncated and + // then reopend. + while (DiskSize < NewSize) + H->AllocatedSize.compare_exchange_strong(DiskSize, NewSize); + } + + if (Logger) + Logger->logMappedFileRegionArenaAllocate(data(), OldEnd, AllocSize); + + return OldEnd; +} + +ErrorOr FileSizeInfo::get(sys::fs::file_t File) { +#if LLVM_ON_UNIX && defined(MAPPED_FILE_BSIZE) + struct stat Status; + int StatRet = ::fstat(File, &Status); + if (StatRet) + return errnoAsErrorCode(); + uint64_t AllocatedSize = uint64_t(Status.st_blksize) * MAPPED_FILE_BSIZE; + return FileSizeInfo{uint64_t(Status.st_size), AllocatedSize}; +#else + // Fallback: assume the file is fully allocated. Note: this may result in + // data loss on out-of-space. + sys::fs::file_status Status; + if (std::error_code EC = sys::fs::status(File, Status)) + return EC; + return FileSizeInfo{Status.getSize(), Status.getSize()}; +#endif +} diff --git a/llvm/lib/CAS/MappedFileRegionBumpPtr.cpp b/llvm/lib/CAS/MappedFileRegionBumpPtr.cpp deleted file mode 100644 index 79e40a0c0417f..0000000000000 --- a/llvm/lib/CAS/MappedFileRegionBumpPtr.cpp +++ /dev/null @@ -1,355 +0,0 @@ -//===- MappedFileRegionBumpPtr.cpp ------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// A bump pointer allocator, backed by a memory-mapped file. -/// -/// The effect we want is: -/// -/// 1. If it doesn't exist, create the file with an initial size. -/// 2. Reserve virtual memory large enough for the max file size. -/// 3. Map the file into memory in the reserved region. -/// 4. Increase the file size and update the mapping when necessary. -/// -/// However, updating the mapping is challenging when it needs to work portably, -/// and across multiple processes without locking for every read. Our current -/// implementation strategy is: -/// -/// 1. Use \c ftruncate (\c sys::fs::resize_file) to grow the file to its max -/// size (typically several GB). Many modern filesystems will create a sparse -/// file, so that the trailing unused pages do not take space on disk. -/// 2. Call \c mmap (\c sys::fs::mapped_file_region) -/// 3. [Automatic as part of 2.] -/// 4. [Automatic as part of 2.] -/// -/// Additionally, we attempt to resize the file to its actual data size when -/// closing the mapping, if this is the only concurrent instance. This is done -/// using file locks. Shrinking the file mitigates problems with having large -/// files: on filesystems without sparse files it avoids unnecessary space use; -/// it also avoids allocating the full size if another process copies the file, -/// which typically loses sparseness. These mitigations only work while the file -/// is not in use. -/// -/// FIXME: we assume that all concurrent users of the file will use the same -/// value for Capacity. Otherwise a process with a larger capacity can write -/// data that is "out of bounds" for processes with smaller capacity. Currently -/// this is true in the CAS. -/// -/// To support resizing, we use two separate file locks: -/// 1. We use a shared reader lock on a ".shared" file until destruction. -/// 2. We use a lock on the main file during initialization - shared to check -/// the status, upgraded to exclusive to resize/initialize the file. -/// -/// Then during destruction we attempt to get exclusive access on (1), which -/// requires no concurrent readers. If so, we shrink the file. Using two -/// separate locks simplifies the implementation and enables it to work on -/// platforms (e.g. Windows) where a shared/reader lock prevents writing. -//===----------------------------------------------------------------------===// - -#include "llvm/CAS/MappedFileRegionBumpPtr.h" -#include "OnDiskCommon.h" -#include "llvm/CAS/OnDiskCASLogger.h" -#include "llvm/Support/Compiler.h" - -#if LLVM_ON_UNIX -#include -#if __has_include() -#include -#endif -#ifdef DEV_BSIZE -#define MAPPED_FILE_BSIZE DEV_BSIZE -#elif __linux__ -#define MAPPED_FILE_BSIZE 512 -#endif -#endif - -using namespace llvm; -using namespace llvm::cas; -using namespace llvm::cas::ondisk; - -namespace { -struct FileLockRAII { - std::string Path; - int FD; - enum LockKind { Shared, Exclusive }; - std::optional Locked; - - FileLockRAII(std::string Path, int FD) : Path(std::move(Path)), FD(FD) {} - ~FileLockRAII() { consumeError(unlock()); } - - Error lock(LockKind LK) { - if (std::error_code EC = lockFileThreadSafe(FD, LK == Exclusive)) - return createFileError(Path, EC); - Locked = LK; - return Error::success(); - } - - Error unlock() { - if (Locked) { - Locked = std::nullopt; - if (std::error_code EC = unlockFileThreadSafe(FD)) - return createFileError(Path, EC); - } - return Error::success(); - } -}; - -struct FileSizeInfo { - uint64_t Size; - uint64_t AllocatedSize; - - static ErrorOr get(sys::fs::file_t File); -}; -} // end anonymous namespace - -Expected MappedFileRegionBumpPtr::create( - const Twine &Path, uint64_t Capacity, int64_t BumpPtrOffset, - std::shared_ptr Logger, - function_ref NewFileConstructor) { - MappedFileRegionBumpPtr Result; - Result.Path = Path.str(); - Result.Logger = std::move(Logger); - // Open the main file. - int FD; - if (std::error_code EC = sys::fs::openFileForReadWrite( - Result.Path, FD, sys::fs::CD_OpenAlways, sys::fs::OF_None)) - return createFileError(Path, EC); - Result.FD = FD; - - // Open the shared lock file. See file comment for details of locking scheme. - SmallString<128> SharedLockPath(Result.Path); - SharedLockPath.append(".shared"); - int SharedLockFD; - if (std::error_code EC = sys::fs::openFileForReadWrite( - SharedLockPath, SharedLockFD, sys::fs::CD_OpenAlways, - sys::fs::OF_None)) - return createFileError(SharedLockPath, EC); - Result.SharedLockFD = SharedLockFD; - - // Take shared/reader lock that will be held until we close the file; unlocked - // by destroyImpl. - if (std::error_code EC = - lockFileThreadSafe(SharedLockFD, /*Exclusive=*/false)) - return createFileError(Path, EC); - - // Take shared/reader lock for initialization. - FileLockRAII InitLock(Result.Path, FD); - if (Error E = InitLock.lock(FileLockRAII::Shared)) - return std::move(E); - - sys::fs::file_t File = sys::fs::convertFDToNativeFile(FD); - auto FileSize = FileSizeInfo::get(File); - if (!FileSize) - return createFileError(Result.Path, FileSize.getError()); - - if (FileSize->Size < Capacity) { - // Lock the file exclusively so only one process will do the initialization. - if (Error E = InitLock.unlock()) - return std::move(E); - if (Error E = InitLock.lock(FileLockRAII::Exclusive)) - return std::move(E); - // Retrieve the current size now that we have exclusive access. - FileSize = FileSizeInfo::get(File); - if (!FileSize) - return createFileError(Result.Path, FileSize.getError()); - } - - // At this point either the file is still under-sized, or we have the size for - // the completely initialized file. - - if (FileSize->Size < Capacity) { - // We are initializing the file; it may be empty, or may have been shrunk - // during a previous close. - // FIXME: Detect a case where someone opened it with a smaller capacity. - assert(InitLock.Locked == FileLockRAII::Exclusive); - if (std::error_code EC = sys::fs::resize_file_sparse(FD, Capacity)) - return createFileError(Result.Path, EC); - - if (Result.Logger) - Result.Logger->log_MappedFileRegionBumpPtr_resizeFile( - Result.Path, FileSize->Size, Capacity); - } else { - // Someone else initialized it. - Capacity = FileSize->Size; - } - - // Create the mapped region. - { - std::error_code EC; - const char *Name = nullptr; -#ifdef _WIN32 - // Give the file mapping a name to ensure the same mappings are - // shared across processes. - std::string MapName = Result.Path; - std::replace(MapName.begin(), MapName.end(), '\\', '/'); - MapName = "Local\\" + MapName; - Name = MapName.c_str(); -#endif - sys::fs::mapped_file_region Map( - File, sys::fs::mapped_file_region::readwrite, Capacity, 0, EC, Name); - if (EC) - return createFileError(Result.Path, EC); - Result.Region = std::move(Map); - } - - if (FileSize->Size == 0) { - assert(InitLock.Locked == FileLockRAII::Exclusive); - // We are creating a new file; run the constructor. - if (Error E = NewFileConstructor(Result)) - return std::move(E); - } else { - Result.initializeBumpPtr(BumpPtrOffset); - } - - if (FileSize->Size < Capacity && FileSize->AllocatedSize < Capacity) { - // We are initializing the file; sync the allocated size in case it - // changed when truncating or during construction. - FileSize = FileSizeInfo::get(File); - if (!FileSize) - return createFileError(Result.Path, FileSize.getError()); - assert(InitLock.Locked == FileLockRAII::Exclusive); - Result.H->AllocatedSize.exchange(FileSize->AllocatedSize); - } - - return Result; -} - -void MappedFileRegionBumpPtr::destroyImpl() { - if (!FD) - return; - - // Drop the shared lock indicating we are no longer accessing the file. - if (SharedLockFD) - (void)unlockFileThreadSafe(*SharedLockFD); - - // Attempt to truncate the file if we can get exclusive access. Ignore any - // errors. - if (H) { - assert(SharedLockFD && "Must have shared lock file open"); - if (tryLockFileThreadSafe(*SharedLockFD) == std::error_code()) { - size_t Size = size(); - size_t Capacity = capacity(); - assert(Size <= Capacity); - // sync to file system to make sure all contents are up-to-date. - (void)Region.sync(); - Region.unmap(); - (void)sys::fs::resize_file(*FD, Size); - (void)unlockFileThreadSafe(*SharedLockFD); - - if (Logger) - Logger->log_MappedFileRegionBumpPtr_resizeFile(Path, Capacity, Size); - } else { -#if defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64)) - // A workaround for intermittent data corruption bug on Windows - // ARM64 https://github.com/swiftlang/llvm-project/issues/12605 - (void)Region.sync(); -#endif - } - } - - auto Close = [](std::optional &FD) { - if (FD) { - sys::fs::file_t File = sys::fs::convertFDToNativeFile(*FD); - sys::fs::closeFile(File); - FD = std::nullopt; - } - }; - - // Close the file and shared lock. - Close(FD); - Close(SharedLockFD); - - if (Logger) - Logger->log_MappedFileRegionBumpPtr_close(Path); -} - -void MappedFileRegionBumpPtr::initializeBumpPtr(int64_t BumpPtrOffset) { - assert(capacity() < (uint64_t)INT64_MAX && "capacity must fit in int64_t"); - int64_t BumpPtrEndOffset = BumpPtrOffset + sizeof(decltype(*H)); - assert(BumpPtrEndOffset <= (int64_t)capacity() && - "Expected end offset to be pre-allocated"); - assert(isAligned(Align::Of(), BumpPtrOffset) && - "Expected end offset to be aligned"); - H = reinterpret_cast(data() + BumpPtrOffset); - - int64_t ExistingValue = 0; - if (!H->BumpPtr.compare_exchange_strong(ExistingValue, BumpPtrEndOffset)) - assert(ExistingValue >= BumpPtrEndOffset && - "Expected 0, or past the end of the BumpPtr itself"); - - if (Logger) - Logger->log_MappedFileRegionBumpPtr_create(Path, *FD, data(), capacity(), - size()); -} - -static Error createAllocatorOutOfSpaceError() { - return createStringError(std::make_error_code(std::errc::not_enough_memory), - "memory mapped file allocator is out of space"); -} - -Expected MappedFileRegionBumpPtr::allocateOffset(uint64_t AllocSize) { - AllocSize = alignTo(AllocSize, getAlign()); - int64_t OldEnd = H->BumpPtr.fetch_add(AllocSize); - int64_t NewEnd = OldEnd + AllocSize; - if (LLVM_UNLIKELY(NewEnd > (int64_t)capacity())) { - // Return the allocation. If the start already passed the end, that means - // some other concurrent allocations already consumed all the capacity. - // There is no need to return the original value. If the start was not - // passed the end, current allocation certainly bumped it passed the end. - // All other allocation afterwards must have failed and current allocation - // is in charge of return the allocation back to a valid value. - if (OldEnd <= (int64_t)capacity()) - (void)H->BumpPtr.exchange(OldEnd); - - if (Logger) - Logger->log_MappedFileRegionBumpPtr_oom(Path, capacity(), OldEnd, - AllocSize); - - return createAllocatorOutOfSpaceError(); - } - - int64_t DiskSize = H->AllocatedSize; - if (LLVM_UNLIKELY(NewEnd > DiskSize)) { - int64_t NewSize; - // The minimum increment is a page, but allocate more to amortize the cost. - constexpr int64_t Increment = 1 * 1024 * 1024; // 1 MB - if (Error E = preallocateFileTail(*FD, DiskSize, DiskSize + Increment).moveInto(NewSize)) - return std::move(E); - assert(NewSize >= DiskSize + Increment); - // FIXME: on Darwin this can under-count the size if there is a race to - // preallocate disk, because the semantics of F_PREALLOCATE are to add bytes - // to the end of the file, not to allocate up to a fixed size. - // Any discrepancy will be resolved the next time the file is truncated and - // then reopend. - while (DiskSize < NewSize) - H->AllocatedSize.compare_exchange_strong(DiskSize, NewSize); - } - - if (Logger) - Logger->log_MappedFileRegionBumpPtr_allocate(data(), OldEnd, AllocSize); - - return OldEnd; -} - -ErrorOr FileSizeInfo::get(sys::fs::file_t File) { -#if LLVM_ON_UNIX && defined(MAPPED_FILE_BSIZE) - struct stat Status; - int StatRet = ::fstat(File, &Status); - if (StatRet) - return errnoAsErrorCode(); - uint64_t AllocatedSize = uint64_t(Status.st_blksize) * MAPPED_FILE_BSIZE; - return FileSizeInfo{uint64_t(Status.st_size), AllocatedSize}; -#else - // Fallback: assume the file is fully allocated. Note: this may result in - // data loss on out-of-space. - sys::fs::file_status Status; - if (std::error_code EC = sys::fs::status(File, Status)) - return EC; - return FileSizeInfo{Status.getSize(), Status.getSize()}; -#endif -} diff --git a/llvm/lib/CAS/NamedValuesSchema.cpp b/llvm/lib/CAS/NamedValuesSchema.cpp new file mode 100644 index 0000000000000..5c6886162c043 --- /dev/null +++ b/llvm/lib/CAS/NamedValuesSchema.cpp @@ -0,0 +1,190 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/CAS/NamedValuesSchema.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/EndianStream.h" +#include "llvm/Support/StringSaver.h" + +using namespace llvm; +using namespace llvm::cas; + +char NamedValuesSchema::ID = 0; +constexpr StringLiteral NamedValuesSchema::SchemaName; + +void NamedValuesSchema::anchor() {} + +bool NamedValuesSchema::isNode(const ObjectProxy &Node) const { + // Load the first ref to check its content. + if (Node.getNumReferences() < 1) + return false; + + auto FirstRef = Node.getReference(0); + return FirstRef == *NamedValuesKindRef; +} + +NamedValuesSchema::NamedValuesSchema(cas::ObjectStore &CAS, Error &E) + : NamedValuesSchema::RTTIExtends(CAS) { + ErrorAsOutParameter EAOP(E); + auto Kind = CAS.storeFromString({}, SchemaName); + if (!Kind) { + E = Kind.takeError(); + return; + } + NamedValuesKindRef = *Kind; +} + +Expected NamedValuesSchema::create(ObjectStore &CAS) { + Error E = Error::success(); + NamedValuesSchema S(CAS, E); + if (E) + return std::move(E); + return S; +} + +size_t NamedValuesSchema::getNumEntries(NamedValuesProxy Values) const { + return Values.getNumReferences() - 1; +} + +Error NamedValuesSchema::forEachEntry( + NamedValuesProxy Values, + function_ref Callback) const { + for (size_t I = 0, IE = getNumEntries(Values); I != IE; ++I) + if (Error E = Callback(loadEntry(Values, I))) + return E; + + return Error::success(); +} + +NamedValuesEntry NamedValuesSchema::loadEntry(NamedValuesProxy Values, + size_t I) const { + StringRef Name = Values.getName(I); + auto ObjectRef = Values.getReference(I + 1); + + return {Name, ObjectRef}; +} + +std::optional NamedValuesSchema::lookupEntry(NamedValuesProxy Values, + StringRef Name) const { + size_t NumNames = getNumEntries(Values); + if (!NumNames) + return std::nullopt; + + // Start with a binary search, if there are enough entries. + // FIXME: MaxLinearSearchSize is a heuristic and not optimized. + const size_t MaxLinearSearchSize = 4; + size_t Last = NumNames; + size_t First = 0; + while (Last - First > MaxLinearSearchSize) { + auto I = First + (Last - First) / 2; + StringRef NameI = Values.getName(I); + switch (Name.compare(NameI)) { + case 0: + return I; + case -1: + Last = I; + break; + case 1: + First = I + 1; + break; + } + } + + // Use a linear search for small list. + for (; First != Last; ++First) + if (Name == Values.getName(First)) + return First; + + return std::nullopt; +} + +Expected NamedValuesSchema::load(ObjectRef Object) const { + auto Node = CAS.getProxy(Object); + if (!Node) + return Node.takeError(); + + return load(*Node); +} + +Expected NamedValuesSchema::load(ObjectProxy Object) const { + if (!isNode(Object)) + return createStringError(inconvertibleErrorCode(), + "object does not conform to NamedValuesSchema"); + + return NamedValuesProxy(*this, Object); +} + +Expected +NamedValuesSchema::construct(ArrayRef Entries) { + // ScratchPad for output. + SmallString<256> Data; + SmallVector Refs; + Refs.push_back(*NamedValuesKindRef); + + // Ensure a stable order for entries and ignore name collisions. + SmallVector Sorted(Entries); + llvm::stable_sort(Sorted); + + if (llvm::unique(Sorted) != Sorted.end()) + return createStringError("entry names are not unique"); + + raw_svector_ostream OS(Data); + support::endian::Writer Writer(OS, endianness::little); + // Encode the entries in the Data. The layout of the named values schema + // object is: + // * Name offset table: The offset of in the data blob for where to find the + // string. It has N + 1 entries and you can find the name of n-th entry at + // offset[n] -> offset[n+1]. Each offset is encoded as little-endian + // uint32_t. + // * Object: ObjectRef for each entry is at n + 1 refs for the object (with + // the first one being the named value kind ID). + + // Write Name. + // The start of the string table index. + uint32_t StrIdx = sizeof(uint32_t) * (Sorted.size() + 1); + for (auto &Entry : Sorted) { + Writer.write(StrIdx); + StrIdx += Entry.Name.size(); + + // Append refs. + Refs.push_back(Entry.Ref); + } + // Write the end index for the last string. + Writer.write(StrIdx); + + // Write names in the end of the block. + for (auto &Entry : Sorted) + OS << Entry.Name; + + auto Proxy = CAS.createProxy(Refs, Data); + if (!Proxy) + return Proxy.takeError(); + + return NamedValuesProxy(*this, *Proxy); +} + +void NamedValuesSchema::Builder::add(StringRef Name, ObjectRef Ref) { + StringSaver Saver(Alloc); + Nodes.emplace_back(Saver.save(Name), Ref); +} + +Expected NamedValuesSchema::Builder::build() { + auto Schema = NamedValuesSchema::create(CAS); + if (!Schema) + return Schema.takeError(); + return Schema->construct(Nodes); +} + +StringRef NamedValuesProxy::getName(size_t I) const { + uint32_t StartIdx = + support::endian::read32le(getData().data() + sizeof(uint32_t) * I); + uint32_t EndIdx = + support::endian::read32le(getData().data() + sizeof(uint32_t) * (I + 1)); + + return StringRef(getData().data() + StartIdx, EndIdx - StartIdx); +} diff --git a/llvm/lib/CAS/ObjectStore.cpp b/llvm/lib/CAS/ObjectStore.cpp index a8da4434562e2..fe8dbebd64530 100644 --- a/llvm/lib/CAS/ObjectStore.cpp +++ b/llvm/lib/CAS/ObjectStore.cpp @@ -11,15 +11,16 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/FunctionExtras.h" #include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" -#include "llvm/CAS/UnifiedOnDiskCache.h" +#include "llvm/CAS/ActionCache.h" +#include "llvm/CAS/BuiltinUnifiedCASDatabases.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Errc.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" -#include "llvm/Support/SmallVectorMemoryBuffer.h" #include using namespace llvm; @@ -181,23 +182,18 @@ Expected ObjectStore::createProxy(ArrayRef Refs, Expected ObjectStore::storeFromOpenFileImpl(sys::fs::file_t FD, std::optional Status) { - // Copy the file into an immutable memory buffer and call \c store on that. - // Using \c mmap would be unsafe because there's a race window between when we - // get the digest hash for the \c mmap contents and when we store the data; if - // the file changes in-between we will create an invalid object. - - // FIXME: For the on-disk CAS implementation use cloning to store it as a + // TODO: For the on-disk CAS implementation use cloning to store it as a // standalone file if the file-system supports it and the file is large. + uint64_t Size = Status ? Status->getSize() : -1; + auto Buffer = MemoryBuffer::getOpenFile(FD, /*Filename=*/"", Size); + if (!Buffer) + return errorCodeToError(Buffer.getError()); - constexpr size_t ChunkSize = 4 * 4096; - SmallString<0> Data; - Data.reserve(ChunkSize * 2); - if (Error E = sys::fs::readNativeFileToEOF(FD, Data, ChunkSize)) - return std::move(E); - return store({}, ArrayRef(Data.data(), Data.size())); + return store({}, arrayRefFromStringRef((*Buffer)->getBuffer())); } Expected ObjectStore::storeFromFile(StringRef Path) { + sys::fs::file_t FD; if (Error E = sys::fs::openNativeFileForRead(Path).moveInto(FD)) return E; @@ -206,6 +202,7 @@ Expected ObjectStore::storeFromFile(StringRef Path) { } Error ObjectStore::exportDataToFile(ObjectHandle Node, StringRef Path) const { + SmallString<256> TmpPath; SmallString<256> Model; Model += sys::path::parent_path(Path); @@ -315,10 +312,13 @@ Expected ObjectStore::importObject(ObjectStore &Upstream, // Remove the current node and its IDs from the stack. PrimaryRefStack.truncate(PrimaryRefStack.size() - Cur.RefsCount); - CursorStack.pop_back(); + // Push new node into created objects. PrimaryRefStack.push_back(*NewNode); CreatedObjects.try_emplace(Cur.Ref, *NewNode); + + // Pop the cursor in the end after all uses. + CursorStack.pop_back(); continue; } @@ -350,17 +350,21 @@ ObjectProxy::getMemoryBuffer(StringRef Name, return CAS->getMemoryBuffer(H, Name, RequiresNullTerminator); } -static Expected> +static Expected< + std::pair, std::shared_ptr>> createOnDiskCASImpl(const Twine &Path) { - return createOnDiskCAS(Path); + SmallString<128> Buffer; + return createOnDiskUnifiedCASDatabases(Path.toStringRef(Buffer)); } -static Expected> +static Expected< + std::pair, std::shared_ptr>> createInMemoryCASImpl(const Twine &) { - return createInMemoryCAS(); + return std::make_pair(createInMemoryCAS(), createInMemoryActionCache()); } -static Expected> +static Expected< + std::pair, std::shared_ptr>> createPluginCASImpl(const Twine &URL) { // Format used is // plugin://${PATH_TO_PLUGIN}?${OPT1}=${VAL1}&${OPT2}=${VAL2}.. @@ -381,15 +385,14 @@ createPluginCASImpl(const Twine &URL) { } } - if (OnDiskPath.empty()) - OnDiskPath = getDefaultOnDiskCASPath(); - - std::pair, std::shared_ptr> CASDBs; - if (Error E = createPluginCASDatabases(PluginPath, OnDiskPath, PluginArgs) - .moveInto(CASDBs)) - return std::move(E); + if (OnDiskPath.empty()) { + auto Path = getDefaultOnDiskCASPath(); + if (!Path) + return Path.takeError(); + OnDiskPath = *Path; + } - return std::move(CASDBs.first); + return createPluginCASDatabases(PluginPath, OnDiskPath, PluginArgs); } static ManagedStatic> RegisteredScheme; @@ -403,7 +406,7 @@ static StringMap &getRegisteredScheme() { return *RegisteredScheme; } -Expected> +Expected, std::shared_ptr>> cas::createCASFromIdentifier(StringRef Path) { for (auto &Scheme : getRegisteredScheme()) { if (Path.consume_front(Scheme.getKey())) @@ -417,15 +420,13 @@ cas::createCASFromIdentifier(StringRef Path) { // FIXME: some current default behavior. SmallString<256> PathBuf; if (Path == "auto") { - getDefaultOnDiskCASPath(PathBuf); + if (auto E = getDefaultOnDiskCASPath(PathBuf)) + return std::move(E); Path = PathBuf; } // Fallback is to create UnifiedOnDiskCache. - auto UniDB = builtin::createBuiltinUnifiedOnDiskCache(Path); - if (!UniDB) - return UniDB.takeError(); - return builtin::createObjectStoreFromUnifiedOnDiskCache(std::move(*UniDB)); + return createOnDiskUnifiedCASDatabases(Path); } void cas::registerCASURLScheme(StringRef Prefix, diff --git a/llvm/lib/CAS/OnDiskCAS.cpp b/llvm/lib/CAS/OnDiskCAS.cpp index 2e29d4d6b3b90..2c543534eeef9 100644 --- a/llvm/lib/CAS/OnDiskCAS.cpp +++ b/llvm/lib/CAS/OnDiskCAS.cpp @@ -1,4 +1,4 @@ -//===- OnDiskCAS.cpp --------------------------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -49,8 +49,8 @@ class OnDiskCAS : public BuiltinCAS { static Expected> open(StringRef Path); - OnDiskCAS(std::shared_ptr UniDB_) - : UniDB(std::move(UniDB_)), DB(&UniDB->getGraphDB()) {} + OnDiskCAS(std::shared_ptr UniDB) + : UnifiedDB(std::move(UniDB)), DB(&UnifiedDB->getGraphDB()) {} private: ObjectHandle convertHandle(ondisk::ObjectHandle Node) const { @@ -58,7 +58,7 @@ class OnDiskCAS : public BuiltinCAS { } ondisk::ObjectHandle convertHandle(ObjectHandle Node) const { - return ondisk::ObjectHandle::fromOpaqueData(Node.getInternalRef(*this)); + return ondisk::ObjectHandle(Node.getInternalRef(*this)); } ObjectRef convertRef(ondisk::ObjectID Ref) const { @@ -71,12 +71,14 @@ class OnDiskCAS : public BuiltinCAS { size_t getNumRefs(ObjectHandle Node) const final { auto RefsRange = DB->getObjectRefs(convertHandle(Node)); - return std::distance(RefsRange.begin(), RefsRange.end()); + return llvm::size(RefsRange); } + ObjectRef readRef(ObjectHandle Node, size_t I) const final { auto RefsRange = DB->getObjectRefs(convertHandle(Node)); return convertRef(RefsRange.begin()[I]); } + Error forEachRef(ObjectHandle Node, function_ref Callback) const final; @@ -84,11 +86,11 @@ class OnDiskCAS : public BuiltinCAS { Expected> getStorageSize() const final; Error pruneStorageData() final; - OnDiskCAS(std::unique_ptr DB_) - : OwnedDB(std::move(DB_)), DB(OwnedDB.get()) {} + OnDiskCAS(std::unique_ptr GraphDB) + : OwnedDB(std::move(GraphDB)), DB(OwnedDB.get()) {} std::unique_ptr OwnedDB; - std::shared_ptr UniDB; + std::shared_ptr UnifiedDB; ondisk::OnDiskGraphDB *DB; }; @@ -98,9 +100,6 @@ void OnDiskCAS::print(raw_ostream &OS) const { DB->print(OS); } Error OnDiskCAS::validate(bool CheckHash) const { if (auto E = DB->validate(CheckHash, builtin::hashingFunc)) return E; - if (UniDB && UniDB->getUpstreamGraphDB()) - return UniDB->getUpstreamGraphDB()->validate(CheckHash, - builtin::hashingFunc); return Error::success(); } @@ -174,6 +173,7 @@ Error OnDiskCAS::exportDataToFile(ObjectHandle Node, StringRef Path) const { // Optimized version using the underlying database file. assert(FBData.FileInfo.has_value()); + ondisk::UniqueTempFile UniqueTmp; auto ExpectedPath = UniqueTmp.createAndCopyFrom(sys::path::parent_path(Path), FBData.FileInfo->FilePath); @@ -212,21 +212,23 @@ Error OnDiskCAS::forEachRef(ObjectHandle Node, } Error OnDiskCAS::setSizeLimit(std::optional SizeLimit) { - UniDB->setSizeLimit(SizeLimit); + UnifiedDB->setSizeLimit(SizeLimit); return Error::success(); } Expected> OnDiskCAS::getStorageSize() const { - return UniDB->getStorageSize(); + return UnifiedDB->getStorageSize(); } -Error OnDiskCAS::pruneStorageData() { return UniDB->collectGarbage(); } +Error OnDiskCAS::pruneStorageData() { return UnifiedDB->collectGarbage(); } Expected> OnDiskCAS::open(StringRef AbsPath) { std::shared_ptr Logger; +#ifndef _WIN32 if (Error E = ondisk::OnDiskCASLogger::openIfEnabled(AbsPath).moveInto(Logger)) return std::move(E); +#endif Expected> DB = ondisk::OnDiskGraphDB::open(AbsPath, BuiltinCASContext::getHashName(), @@ -253,10 +255,6 @@ Expected> cas::createOnDiskCAS(const Twine &Path) { Path.toVector(AbsPath); sys::fs::make_absolute(AbsPath); - // FIXME: Remove this and update clients to do this logic. - if (AbsPath == getDefaultOnDiskCASStableID()) - AbsPath = StringRef(getDefaultOnDiskCASPath()); - return OnDiskCAS::open(AbsPath); #else return createStringError(inconvertibleErrorCode(), "OnDiskCAS is disabled"); @@ -271,26 +269,16 @@ cas::builtin::createObjectStoreFromUnifiedOnDiskCache( static constexpr StringLiteral DefaultName = "cas"; -void cas::getDefaultOnDiskCASStableID(SmallVectorImpl &Path) { - Path.assign(DefaultDirProxy.begin(), DefaultDirProxy.end()); - llvm::sys::path::append(Path, DefaultDir, DefaultName); -} - -std::string cas::getDefaultOnDiskCASStableID() { - SmallString<128> Path; - getDefaultOnDiskCASStableID(Path); - return Path.str().str(); -} - -void cas::getDefaultOnDiskCASPath(SmallVectorImpl &Path) { - // FIXME: Should this return 'Error' instead of hard-failing? +Error cas::getDefaultOnDiskCASPath(SmallVectorImpl &Path) { if (!llvm::sys::path::cache_directory(Path)) - report_fatal_error("cannot get default cache directory"); + return createStringError("cache directory is not available"); llvm::sys::path::append(Path, DefaultDir, DefaultName); + return Error::success(); } -std::string cas::getDefaultOnDiskCASPath() { +Expected cas::getDefaultOnDiskCASPath() { SmallString<128> Path; - getDefaultOnDiskCASPath(Path); + if (auto E = getDefaultOnDiskCASPath(Path)) + return std::move(E); return Path.str().str(); } diff --git a/llvm/lib/CAS/OnDiskCASLogger.cpp b/llvm/lib/CAS/OnDiskCASLogger.cpp index a39cbbd3e0b58..8ff63a8f85248 100644 --- a/llvm/lib/CAS/OnDiskCASLogger.cpp +++ b/llvm/lib/CAS/OnDiskCASLogger.cpp @@ -1,10 +1,20 @@ -//===- OnDiskCASLogger.cpp ------------------------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +// +/// \file +/// This file implements OnDiskCASLogger. The logger will write the timestamp +/// and events to a log file using filestream. The logger should be thread-safe +/// and process-safe because each write is small enough to atomically update the +/// file. +/// +/// The logger can be enabled via `LLVM_CAS_LOG` environmental variable. +// +//===----------------------------------------------------------------------===// #include "llvm/CAS/OnDiskCASLogger.h" @@ -78,16 +88,17 @@ OnDiskCASLogger::open(const Twine &Path, bool LogAllocations) { } static uint64_t getTimestampMillis() { - #ifdef __APPLE__ - // Using chrono is roughly 50% slower. - struct timeval T; - gettimeofday(&T, 0); - return T.tv_sec * 1000 + T.tv_usec / 1000; - #else - auto Time = std::chrono::system_clock::now(); - auto Millis = std::chrono::duration_cast(Time.time_since_epoch()); - return Millis.count(); - #endif +#ifdef __APPLE__ + // Using chrono is roughly 50% slower. + struct timeval T; + gettimeofday(&T, 0); + return T.tv_sec * 1000 + T.tv_usec / 1000; +#else + auto Time = std::chrono::system_clock::now(); + auto Millis = std::chrono::duration_cast( + Time.time_since_epoch()); + return Millis.count(); +#endif } namespace { @@ -127,11 +138,10 @@ static void formatTrieOffset(raw_ostream &OS, int64_t Off) { OS << format_hex(Off, 0); } -void OnDiskCASLogger::log_compare_exchange_strong(void *Region, TrieOffset Trie, - size_t SlotI, - TrieOffset Expected, - TrieOffset New, - TrieOffset Previous) { +void OnDiskCASLogger::logSubtrieHandleCmpXchg(void *Region, TrieOffset Trie, + size_t SlotI, TrieOffset Expected, + TrieOffset New, + TrieOffset Previous) { TextLogLine Log(OS); Log << "cmpxcgh subtrie region=" << Region << " offset="; formatTrieOffset(Log, Trie); @@ -143,16 +153,16 @@ void OnDiskCASLogger::log_compare_exchange_strong(void *Region, TrieOffset Trie, formatTrieOffset(Log, Previous); } -void OnDiskCASLogger::log_SubtrieHandle_create(void *Region, TrieOffset Trie, - uint32_t StartBit, - uint32_t NumBits) { +void OnDiskCASLogger::logSubtrieHandleCreate(void *Region, TrieOffset Trie, + uint32_t StartBit, + uint32_t NumBits) { TextLogLine Log(OS); Log << "create subtrie region=" << Region << " offset="; formatTrieOffset(Log, Trie); Log << " start-bit=" << StartBit << " num-bits=" << NumBits; } -void OnDiskCASLogger::log_HashMappedTrieHandle_createRecord( +void OnDiskCASLogger::logHashMappedTrieHandleCreateRecord( void *Region, TrieOffset Off, ArrayRef Hash) { TextLogLine Log(OS); Log << "create record region=" << Region << " offset="; @@ -160,44 +170,46 @@ void OnDiskCASLogger::log_HashMappedTrieHandle_createRecord( Log << " hash=" << format_bytes(Hash, std::nullopt, 32, 32); } -void OnDiskCASLogger::log_MappedFileRegionBumpPtr_resizeFile(StringRef Path, - size_t Before, - size_t After) { +void OnDiskCASLogger::logMappedFileRegionArenaResizeFile(StringRef Path, + size_t Before, + size_t After) { TextLogLine Log(OS); Log << "resize mapped file '" << Path << "' from=" << Before << " to=" << After; } -void OnDiskCASLogger::log_MappedFileRegionBumpPtr_create(StringRef Path, int FD, - void *Region, - size_t Capacity, - size_t Size) { +void OnDiskCASLogger::logMappedFileRegionArenaCreate(StringRef Path, int FD, + void *Region, + size_t Capacity, + size_t Size) { sys::fs::file_status Stat; std::error_code EC = status(FD, Stat); TextLogLine Log(OS); Log << "mmap '" << Path << "' " << Region; - Log << " dev=" << (EC ? ~0ull : Stat.getUniqueID().getDevice()); - Log << " inode=" << (EC ? ~0ull : Stat.getUniqueID().getFile()); - ; Log << " size=" << Size << " capacity=" << Capacity; + if (EC) { + Log << " failed status with error: " << EC.message(); + return; + } + Log << " dev=" << format_hex(Stat.getUniqueID().getDevice(), 4); + Log << " inode=" << format_hex(Stat.getUniqueID().getFile(), 4); } -void OnDiskCASLogger::log_MappedFileRegionBumpPtr_oom(StringRef Path, - size_t Capacity, - size_t Size, - size_t AllocSize) { +void OnDiskCASLogger::logMappedFileRegionArenaOom(StringRef Path, + size_t Capacity, size_t Size, + size_t AllocSize) { TextLogLine Log(OS); Log << "oom '" << Path << "' old-size=" << Size << " capacity=" << Capacity << "alloc-size=" << AllocSize; } -void OnDiskCASLogger::log_MappedFileRegionBumpPtr_close(StringRef Path) { +void OnDiskCASLogger::logMappedFileRegionArenaClose(StringRef Path) { TextLogLine Log(OS); Log << "close mmap '" << Path << "'"; } -void OnDiskCASLogger::log_MappedFileRegionBumpPtr_allocate(void *Region, - TrieOffset Off, - size_t Size) { +void OnDiskCASLogger::logMappedFileRegionArenaAllocate(void *Region, + TrieOffset Off, + size_t Size) { if (!LogAllocations) return; TextLogLine Log(OS); @@ -206,12 +218,12 @@ void OnDiskCASLogger::log_MappedFileRegionBumpPtr_allocate(void *Region, Log << " size=" << Size; } -void OnDiskCASLogger::log_UnifiedOnDiskCache_collectGarbage(StringRef Path) { +void OnDiskCASLogger::logUnifiedOnDiskCacheCollectGarbage(StringRef Path) { TextLogLine Log(OS); Log << "collect garbage '" << Path << "'"; } -void OnDiskCASLogger::log_UnifiedOnDiskCache_validateIfNeeded( +void OnDiskCASLogger::logUnifiedOnDiskCacheValidateIfNeeded( StringRef Path, uint64_t BootTime, uint64_t ValidationTime, bool CheckHash, bool AllowRecovery, bool Force, std::optional LLVMCas, StringRef ValidationError, bool Skipped, bool Recovered) { @@ -230,21 +242,20 @@ void OnDiskCASLogger::log_UnifiedOnDiskCache_validateIfNeeded( Log << " data was invalid " << ValidationError; } -void OnDiskCASLogger::log_TempFile_create(StringRef Name) { +void OnDiskCASLogger::logTempFileCreate(StringRef Name) { TextLogLine Log(OS); Log << "standalone file create '" << Name << "'"; } -void OnDiskCASLogger::log_TempFile_keep(StringRef TmpName, StringRef Name, - std::error_code EC) { +void OnDiskCASLogger::logTempFileKeep(StringRef TmpName, StringRef Name, + std::error_code EC) { TextLogLine Log(OS); Log << "standalone file rename '" << TmpName << "' to '" << Name << "'"; if (EC) Log << " error: " << EC.message(); } -void OnDiskCASLogger::log_TempFile_remove(StringRef TmpName, - std::error_code EC) { +void OnDiskCASLogger::logTempFileRemove(StringRef TmpName, std::error_code EC) { TextLogLine Log(OS); Log << "standalone file remove '" << TmpName << "'"; if (EC) diff --git a/llvm/lib/CAS/OnDiskCommon.cpp b/llvm/lib/CAS/OnDiskCommon.cpp index a18604a27cfec..98b79369563a3 100644 --- a/llvm/lib/CAS/OnDiskCommon.cpp +++ b/llvm/lib/CAS/OnDiskCommon.cpp @@ -7,15 +7,12 @@ //===----------------------------------------------------------------------===// #include "OnDiskCommon.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Config/config.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" -#include +#include #include -#include #include #if __has_include() @@ -31,6 +28,21 @@ #include #endif +#if __has_include() +#include // statfs +#endif + +#ifdef __APPLE__ +#if __has_include() +#include +#endif +#endif + +#ifdef _WIN32 +#define NOMINMAX +#include +#endif + using namespace llvm; static uint64_t OnDiskCASMaxMappingSize = 0; @@ -65,14 +77,15 @@ void cas::ondisk::setMaxMappingSize(uint64_t Size) { OnDiskCASMaxMappingSize = Size; } -std::error_code cas::ondisk::lockFileThreadSafe(int FD, bool Exclusive) { +std::error_code cas::ondisk::lockFileThreadSafe(int FD, + sys::fs::LockKind Kind) { #if HAVE_FLOCK - if (flock(FD, Exclusive ? LOCK_EX : LOCK_SH) == 0) + if (flock(FD, Kind == sys::fs::LockKind::Exclusive ? LOCK_EX : LOCK_SH) == 0) return std::error_code(); return std::error_code(errno, std::generic_category()); #elif defined(_WIN32) // On Windows this implementation is thread-safe. - return sys::fs::lockFile(FD, Exclusive); + return sys::fs::lockFile(FD, Kind); #else return make_error_code(std::errc::no_lock_available); #endif @@ -93,12 +106,13 @@ std::error_code cas::ondisk::unlockFileThreadSafe(int FD) { std::error_code cas::ondisk::tryLockFileThreadSafe(int FD, std::chrono::milliseconds Timeout, - bool Exclusive) { + sys::fs::LockKind Kind) { #if HAVE_FLOCK auto Start = std::chrono::steady_clock::now(); auto End = Start + Timeout; do { - if (flock(FD, (Exclusive ? LOCK_EX : LOCK_SH) | LOCK_NB) == 0) + if (flock(FD, (Kind == sys::fs::LockKind::Exclusive ? LOCK_EX : LOCK_SH) | + LOCK_NB) == 0) return std::error_code(); int Error = errno; if (Error == EWOULDBLOCK) { @@ -113,27 +127,28 @@ cas::ondisk::tryLockFileThreadSafe(int FD, std::chrono::milliseconds Timeout, return make_error_code(std::errc::no_lock_available); #elif defined(_WIN32) // On Windows this implementation is thread-safe. - return sys::fs::tryLockFile(FD, Timeout, Exclusive); + return sys::fs::tryLockFile(FD, Timeout, Kind); #else return make_error_code(std::errc::no_lock_available); #endif } -Expected cas::ondisk::preallocateFileTail(int FD, size_t CurrentSize, size_t NewSize) { +Expected cas::ondisk::preallocateFileTail(int FD, size_t CurrentSize, + size_t NewSize) { auto CreateError = [&](std::error_code EC) -> Expected { if (EC == std::errc::not_supported) // Ignore ENOTSUP in case the filesystem cannot preallocate. return NewSize; #if defined(HAVE_POSIX_FALLOCATE) - if (EC == std::errc::invalid_argument && - CurrentSize < NewSize && // len > 0 + if (EC == std::errc::invalid_argument && CurrentSize < NewSize && // len > 0 NewSize < std::numeric_limits::max()) // 0 <= offset, len < max // Prior to 2024, POSIX required EINVAL for cases that should be ENOTSUP, // so handle it the same as above if it is not one of the other ways to // get EINVAL. return NewSize; #endif - return createStringError(EC, "failed to allocate to CAS file: " + EC.message()); + return createStringError(EC, + "failed to allocate to CAS file: " + EC.message()); }; #if defined(HAVE_POSIX_FALLOCATE) // Note: posix_fallocate returns its error directly, not via errno. @@ -142,7 +157,13 @@ Expected cas::ondisk::preallocateFileTail(int FD, size_t CurrentSize, si return NewSize; #elif defined(__APPLE__) fstore_t FAlloc; - FAlloc.fst_flags = F_ALLOCATEALL | F_ALLOCATEPERSIST; + FAlloc.fst_flags = F_ALLOCATEALL; +#if defined(F_ALLOCATEPERSIST) && \ + defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && \ + __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 130000 + // F_ALLOCATEPERSIST is introduced in macOS 13. + FAlloc.fst_flags |= F_ALLOCATEPERSIST; +#endif FAlloc.fst_posmode = F_PEOFPOSMODE; FAlloc.fst_offset = 0; FAlloc.fst_length = NewSize - CurrentSize; @@ -152,7 +173,60 @@ Expected cas::ondisk::preallocateFileTail(int FD, size_t CurrentSize, si assert(CurrentSize + FAlloc.fst_bytesalloc >= NewSize); return CurrentSize + FAlloc.fst_bytesalloc; #else - return NewSize; // Pretend it worked. + (void)CreateError; // Silence unused variable. + return NewSize; // Pretend it worked. +#endif +} + +bool cas::ondisk::useSmallMappingSize(const Twine &P) { + // Add exceptions to use small database file here. +#if defined(__APPLE__) && __has_include() + // macOS tmpfs does not support sparse tails. + SmallString<128> PathStorage; + StringRef Path = P.toNullTerminatedStringRef(PathStorage); + struct statfs StatFS; + if (statfs(Path.data(), &StatFS) != 0) + return false; + + if (strcmp(StatFS.f_fstypename, "tmpfs") == 0) + return true; +#endif + // Default to use regular database file. + return false; +} + +Expected cas::ondisk::getBootTime() { +#ifdef __APPLE__ +#if __has_include() && defined(KERN_BOOTTIME) + struct timeval TV; + size_t TVLen = sizeof(TV); + int KernBoot[2] = {CTL_KERN, KERN_BOOTTIME}; + if (sysctl(KernBoot, 2, &TV, &TVLen, nullptr, 0) < 0) + return createStringError(llvm::errnoAsErrorCode(), + "failed to get boottime"); + if (TVLen != sizeof(TV)) + return createStringError("sysctl kern.boottime unexpected format"); + return TV.tv_sec; +#else + return 0; +#endif +#elif defined(__linux__) + // Use the mtime for /proc, which is recreated during system boot. + // We could also read /proc/stat and search for 'btime'. + sys::fs::file_status Status; + if (std::error_code EC = sys::fs::status("/proc", Status)) + return createFileError("/proc", EC); + return Status.getLastModificationTime().time_since_epoch().count(); +#elif defined(_WIN32) + // Windows: Calculate boot time from current time minus uptime + // GetTickCount64() returns milliseconds since boot + auto now = std::chrono::system_clock::now(); + ULONGLONG uptimeMs = GetTickCount64(); + auto bootTime = now - std::chrono::milliseconds(uptimeMs); + return std::chrono::duration_cast( + bootTime.time_since_epoch()).count(); +#else + return 0; #endif } @@ -170,7 +244,7 @@ cas::ondisk::UniqueTempFile::createAndCopyFrom(StringRef ParentPath, sys::path::append(Model, "%%%%%%%.tmp"); if (std::error_code EC = sys::fs::createUniqueFile(Model, UniqueTmpPath)) return createFileError(Model, EC); - TmpPath = UniqueTmpPath; + TmpPath = std::move(UniqueTmpPath); TmpPath += ".tmp"; // modify so that there's no file at that path. // \c copy_file will use \c clonefile when applicable. if (std::error_code EC = sys::fs::copy_file(CopyFromPath, TmpPath)) diff --git a/llvm/lib/CAS/OnDiskCommon.h b/llvm/lib/CAS/OnDiskCommon.h index 0eb5538a3e832..4d2661c7b842e 100644 --- a/llvm/lib/CAS/OnDiskCommon.h +++ b/llvm/lib/CAS/OnDiskCommon.h @@ -9,13 +9,17 @@ #ifndef LLVM_LIB_CAS_ONDISKCOMMON_H #define LLVM_LIB_CAS_ONDISKCOMMON_H -#include "llvm/ADT/SmallString.h" #include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" #include #include namespace llvm::cas::ondisk { +/// The version for all the ondisk database files. It needs to be bumped when +/// compatibility breaking changes are introduced. +constexpr StringLiteral CASFormatVersion = "v1"; + /// Retrieves an overridden maximum mapping size for CAS files, if any, /// speicified by LLVM_CAS_MAX_MAPPING_SIZE in the environment or set by /// `setMaxMappingSize()`. If the value from environment is unreadable, returns @@ -27,10 +31,16 @@ Expected> getOverriddenMaxMappingSize(); /// created. Set value 0 to use default size. LLVM_ABI_FOR_TEST void setMaxMappingSize(uint64_t Size); +/// Whether to use a small file mapping for ondisk databases created in \p Path. +/// +/// For some file system that doesn't support sparse file, use a smaller file +/// mapping to avoid consuming too much disk space on creation. +bool useSmallMappingSize(const Twine &Path); + /// Thread-safe alternative to \c sys::fs::lockFile. This does not support all /// the platforms that \c sys::fs::lockFile does, so keep it in the CAS library /// for now. -std::error_code lockFileThreadSafe(int FD, bool Exclusive = true); +std::error_code lockFileThreadSafe(int FD, llvm::sys::fs::LockKind Kind); /// Thread-safe alternative to \c sys::fs::unlockFile. This does not support all /// the platforms that \c sys::fs::lockFile does, so keep it in the CAS library @@ -42,7 +52,7 @@ std::error_code unlockFileThreadSafe(int FD); /// library for now. std::error_code tryLockFileThreadSafe( int FD, std::chrono::milliseconds Timeout = std::chrono::milliseconds(0), - bool Exclusive = true); + llvm::sys::fs::LockKind Kind = llvm::sys::fs::LockKind::Exclusive); /// Allocate space for the file \p FD on disk, if the filesystem supports it. /// @@ -50,7 +60,15 @@ std::error_code tryLockFileThreadSafe( /// \c std::errc::no_space_on_device are detected before we write data. /// /// \returns the new size of the file, or an \c Error. -Expected preallocateFileTail(int FD, size_t CurrentSize, size_t NewSize); +Expected preallocateFileTail(int FD, size_t CurrentSize, + size_t NewSize); + +/// Get boot time for the OS. This can be used to check if the CAS has been +/// validated since boot. +/// +/// \returns the boot time in seconds (0 if operation not supported), or an \c +/// Error. +Expected getBootTime(); /// Helper RAII class for copying a file to a unique file path. At destruction /// time it will delete any new temporary files created. diff --git a/llvm/lib/CAS/OnDiskDataAllocator.cpp b/llvm/lib/CAS/OnDiskDataAllocator.cpp new file mode 100644 index 0000000000000..5f646ac74a2e6 --- /dev/null +++ b/llvm/lib/CAS/OnDiskDataAllocator.cpp @@ -0,0 +1,239 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file Implements OnDiskDataAllocator. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/CAS/OnDiskDataAllocator.h" +#include "DatabaseFile.h" +#include "llvm/CAS/OnDiskCASLogger.h" +#include "llvm/Config/llvm-config.h" + +using namespace llvm; +using namespace llvm::cas; +using namespace llvm::cas::ondisk; + +#if LLVM_ENABLE_ONDISK_CAS + +//===----------------------------------------------------------------------===// +// DataAllocator data structures. +//===----------------------------------------------------------------------===// + +namespace { +/// DataAllocator table layout: +/// - [8-bytes: Generic table header] +/// - 8-bytes: AllocatorOffset (reserved for implementing free lists) +/// - 8-bytes: Size for user data header +/// - +/// +/// Record layout: +/// - +class DataAllocatorHandle { +public: + static constexpr TableHandle::TableKind Kind = + TableHandle::TableKind::DataAllocator; + + struct Header { + TableHandle::Header GenericHeader; + std::atomic AllocatorOffset; + const uint64_t UserHeaderSize; + }; + + operator TableHandle() const { + if (!H) + return TableHandle(); + return TableHandle(*Region, H->GenericHeader); + } + + Expected> allocate(MappedFileRegionArena &Alloc, + size_t DataSize) { + assert(&Alloc.getRegion() == Region); + auto Ptr = Alloc.allocate(DataSize); + if (LLVM_UNLIKELY(!Ptr)) + return Ptr.takeError(); + return MutableArrayRef(*Ptr, DataSize); + } + + explicit operator bool() const { return H; } + const Header &getHeader() const { return *H; } + MappedFileRegion &getRegion() const { return *Region; } + + MutableArrayRef getUserHeader() { + return MutableArrayRef(reinterpret_cast(H + 1), + H->UserHeaderSize); + } + + static Expected + create(MappedFileRegionArena &Alloc, StringRef Name, uint32_t UserHeaderSize); + + DataAllocatorHandle() = default; + DataAllocatorHandle(MappedFileRegion &Region, Header &H) + : Region(&Region), H(&H) {} + DataAllocatorHandle(MappedFileRegion &Region, intptr_t HeaderOffset) + : DataAllocatorHandle( + Region, *reinterpret_cast
(Region.data() + HeaderOffset)) { + } + +private: + MappedFileRegion *Region = nullptr; + Header *H = nullptr; +}; + +} // end anonymous namespace + +struct OnDiskDataAllocator::ImplType { + DatabaseFile File; + DataAllocatorHandle Store; +}; + +Expected +DataAllocatorHandle::create(MappedFileRegionArena &Alloc, StringRef Name, + uint32_t UserHeaderSize) { + // Allocate. + auto Offset = + Alloc.allocateOffset(sizeof(Header) + UserHeaderSize + Name.size() + 1); + if (LLVM_UNLIKELY(!Offset)) + return Offset.takeError(); + + // Construct the header and the name. + assert(Name.size() <= UINT16_MAX && "Expected smaller table name"); + auto *H = new (Alloc.getRegion().data() + *Offset) + Header{{TableHandle::TableKind::DataAllocator, + static_cast(Name.size()), + static_cast(sizeof(Header) + UserHeaderSize)}, + /*AllocatorOffset=*/{0}, + /*UserHeaderSize=*/UserHeaderSize}; + // Memset UserHeader. + char *UserHeader = reinterpret_cast(H + 1); + memset(UserHeader, 0, UserHeaderSize); + // Write database file name (null-terminated). + char *NameStorage = UserHeader + UserHeaderSize; + llvm::copy(Name, NameStorage); + NameStorage[Name.size()] = 0; + return DataAllocatorHandle(Alloc.getRegion(), *H); +} + +Expected OnDiskDataAllocator::create( + const Twine &PathTwine, const Twine &TableNameTwine, uint64_t MaxFileSize, + std::optional NewFileInitialSize, uint32_t UserHeaderSize, + std::shared_ptr Logger, + function_ref UserHeaderInit) { + assert(!UserHeaderSize || UserHeaderInit); + SmallString<128> PathStorage; + StringRef Path = PathTwine.toStringRef(PathStorage); + SmallString<128> TableNameStorage; + StringRef TableName = TableNameTwine.toStringRef(TableNameStorage); + + // Constructor for if the file doesn't exist. + auto NewDBConstructor = [&](DatabaseFile &DB) -> Error { + auto Store = + DataAllocatorHandle::create(DB.getAlloc(), TableName, UserHeaderSize); + if (LLVM_UNLIKELY(!Store)) + return Store.takeError(); + + if (auto E = DB.addTable(*Store)) + return E; + + if (UserHeaderSize) + UserHeaderInit(Store->getUserHeader().data()); + return Error::success(); + }; + + // Get or create the file. + Expected File = + DatabaseFile::create(Path, MaxFileSize, Logger, NewDBConstructor); + if (!File) + return File.takeError(); + + // Find the table and validate it. + std::optional Table = File->findTable(TableName); + if (!Table) + return createTableConfigError(std::errc::argument_out_of_domain, Path, + TableName, "table not found"); + if (Error E = checkTable("table kind", (size_t)DataAllocatorHandle::Kind, + (size_t)Table->getHeader().Kind, Path, TableName)) + return std::move(E); + auto Store = Table->cast(); + assert(Store && "Already checked the kind"); + + // Success. + OnDiskDataAllocator::ImplType Impl{DatabaseFile(std::move(*File)), Store}; + return OnDiskDataAllocator(std::make_unique(std::move(Impl))); +} + +Expected +OnDiskDataAllocator::allocate(size_t Size) { + auto Data = Impl->Store.allocate(Impl->File.getAlloc(), Size); + if (LLVM_UNLIKELY(!Data)) + return Data.takeError(); + + return OnDiskPtr(FileOffset(Data->data() - Impl->Store.getRegion().data()), + *Data); +} + +Expected> OnDiskDataAllocator::get(FileOffset Offset, + size_t Size) const { + assert(Offset); + assert(Impl); + if (Offset.get() + Size >= Impl->File.getAlloc().size()) + return createStringError(make_error_code(std::errc::protocol_error), + "requested size too large in allocator"); + return ArrayRef{Impl->File.getRegion().data() + Offset.get(), Size}; +} + +MutableArrayRef OnDiskDataAllocator::getUserHeader() const { + return Impl->Store.getUserHeader(); +} + +size_t OnDiskDataAllocator::size() const { return Impl->File.size(); } +size_t OnDiskDataAllocator::capacity() const { + return Impl->File.getRegion().size(); +} + +OnDiskDataAllocator::OnDiskDataAllocator(std::unique_ptr Impl) + : Impl(std::move(Impl)) {} + +#else // !LLVM_ENABLE_ONDISK_CAS + +struct OnDiskDataAllocator::ImplType {}; + +Expected OnDiskDataAllocator::create( + const Twine &Path, const Twine &TableName, uint64_t MaxFileSize, + std::optional NewFileInitialSize, uint32_t UserHeaderSize, + std::shared_ptr Logger, + function_ref UserHeaderInit) { + return createStringError(make_error_code(std::errc::not_supported), + "OnDiskDataAllocator is not supported"); +} + +Expected +OnDiskDataAllocator::allocate(size_t Size) { + return createStringError(make_error_code(std::errc::not_supported), + "OnDiskDataAllocator is not supported"); +} + +Expected> OnDiskDataAllocator::get(FileOffset Offset, + size_t Size) const { + return createStringError(make_error_code(std::errc::not_supported), + "OnDiskDataAllocator is not supported"); +} + +MutableArrayRef OnDiskDataAllocator::getUserHeader() const { + return {}; +} + +size_t OnDiskDataAllocator::size() const { return 0; } +size_t OnDiskDataAllocator::capacity() const { return 0; } + +#endif // LLVM_ENABLE_ONDISK_CAS + +OnDiskDataAllocator::OnDiskDataAllocator(OnDiskDataAllocator &&RHS) = default; +OnDiskDataAllocator & +OnDiskDataAllocator::operator=(OnDiskDataAllocator &&RHS) = default; +OnDiskDataAllocator::~OnDiskDataAllocator() = default; diff --git a/llvm/lib/CAS/OnDiskGraphDB.cpp b/llvm/lib/CAS/OnDiskGraphDB.cpp index 03fa8e4b1a875..cafe2ed70e34f 100644 --- a/llvm/lib/CAS/OnDiskGraphDB.cpp +++ b/llvm/lib/CAS/OnDiskGraphDB.cpp @@ -1,4 +1,4 @@ -//===- OnDiskGraphDB.cpp ----------------------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,44 +6,41 @@ // //===----------------------------------------------------------------------===// // -// On-disk CAS nodes database, independent of a particular hashing algorithm. -// -// Here's a top-level description of the current layout (could expose or make -// this configurable in the future). -// -// Files, each with a prefix set by \a FilePrefix: -// -// - db/.index: a file for the "index" table, named by \a -// IndexTableName and managed by \a HashMappedTrie. The contents are 8B -// that are accessed atomically, describing the object kind and where/how -// it's stored (including an optional file offset). See \a TrieRecord for -// more details. -// - db/.data: a file for the "data" table, named by \a -// DataPoolTableName and managed by \a DataStore. New objects within -// TrieRecord::MaxEmbeddedSize are inserted here as \a -// TrieRecord::StorageKind::DataPool. -// - db/..data: a file storing an object outside the main -// "data" table, named by its offset into the "index" table, with the -// format of \a TrieRecord::StorageKind::Standalone. -// - db/..leaf: a file storing a leaf node outside the -// main "data" table, named by its offset into the "index" table, with -// the format of \a TrieRecord::StorageKind::StandaloneLeaf. -// - db/..leaf+0: a file storing a leaf object outside the -// main "data" table, named by its offset into the "index" table, with -// the format of \a TrieRecord::StorageKind::StandaloneLeaf0. -// -// The "index", and "data" tables could be stored in a single file, -// (using a root record that points at the two types of stores), but splitting -// the files seems more convenient for now. -// -// ObjectID: this is a pointer to Trie record -// -// ObjectHandle: this is a pointer to Data record -// -// Eventually: consider creating a StringPool for strings instead of using -// RecordDataStore table. -// - Lookup by prefix tree -// - Store by suffix tree +/// \file +/// This file implements OnDiskGraphDB, an on-disk CAS nodes database, +/// independent of a particular hashing algorithm. It only needs to be +/// configured for the hash size and controls the schema of the storage. +/// +/// OnDiskGraphDB defines: +/// +/// - How the data is stored inside database, either as a standalone file, or +/// allocated inside a datapool. +/// - How references to other objects inside the same database is stored. They +/// are stored as internal references, instead of full hash value to save +/// space. +/// - How to chain databases together and import objects from upstream +/// databases. +/// +/// Here's a top-level description of the current layout: +/// +/// - db/index.: a file for the "index" table, named by \a +/// IndexTableName and managed by \a TrieRawHashMap. The contents are 8B +/// that are accessed atomically, describing the object kind and where/how +/// it's stored (including an optional file offset). See \a TrieRecord for +/// more details. +/// - db/data.: a file for the "data" table, named by \a +/// DataPoolTableName and managed by \a DataStore. New objects within +/// TrieRecord::MaxEmbeddedSize are inserted here as \a +/// TrieRecord::StorageKind::DataPool. +/// - db/obj..: a file storing an object outside the main +/// "data" table, named by its offset into the "index" table, with the +/// format of \a TrieRecord::StorageKind::Standalone. +/// - db/leaf..: a file storing a leaf node outside the +/// main "data" table, named by its offset into the "index" table, with +/// the format of \a TrieRecord::StorageKind::StandaloneLeaf. +/// - db/leaf+0..: a file storing a null-terminated leaf object +/// outside the main "data" table, named by its offset into the "index" table, +/// with the format of \a TrieRecord::StorageKind::StandaloneLeaf0. // //===----------------------------------------------------------------------===// @@ -53,24 +50,22 @@ #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CAS/OnDiskCASLogger.h" -#include "llvm/CAS/OnDiskHashMappedTrie.h" +#include "llvm/CAS/OnDiskDataAllocator.h" +#include "llvm/CAS/OnDiskTrieRawHashMap.h" #include "llvm/Support/Alignment.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" -#include "llvm/Support/Format.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" +#include +#include #include #include -#if __has_include() -#include // statfs -#endif - #define DEBUG_TYPE "on-disk-cas" using namespace llvm; @@ -80,22 +75,24 @@ using namespace llvm::cas::ondisk; static constexpr StringLiteral IndexTableName = "llvm.cas.index"; static constexpr StringLiteral DataPoolTableName = "llvm.cas.data"; -static constexpr StringLiteral IndexFile = "index"; -static constexpr StringLiteral DataPoolFile = "data"; +static constexpr StringLiteral IndexFilePrefix = "index."; +static constexpr StringLiteral DataPoolFilePrefix = "data."; -static constexpr StringLiteral FilePrefix = "v9."; -static constexpr StringLiteral FileSuffixData = ".data"; -static constexpr StringLiteral FileSuffixLeaf = ".leaf"; -static constexpr StringLiteral FileSuffixLeaf0 = ".leaf+0"; +static constexpr StringLiteral FilePrefixObject = "obj."; +static constexpr StringLiteral FilePrefixLeaf = "leaf."; +static constexpr StringLiteral FilePrefixLeaf0 = "leaf+0."; + +static Error createCorruptObjectError(Expected> ID) { + if (!ID) + return ID.takeError(); -static Error createCorruptObjectError(ArrayRef ID) { return createStringError(llvm::errc::invalid_argument, - "corrupt object '" + toHex(ID) + "'"); + "corrupt object '" + toHex(*ID) + "'"); } namespace { -/// Trie record data: 8B, atomic +/// Trie record data: 8 bytes, atomic /// - 1-byte: StorageKind /// - 7-bytes: DataStoreOffset (offset into referenced file) class TrieRecord { @@ -104,37 +101,37 @@ class TrieRecord { /// Unknown object. Unknown = 0, - /// vX.data: main pool, full DataStore record. + /// data.vX: main pool, full DataStore record. DataPool = 1, - /// vX..data: standalone, with a full DataStore record. + /// obj..vX: standalone, with a full DataStore record. Standalone = 10, - /// vX..leaf: standalone, just the data. File contents + /// leaf..vX: standalone, just the data. File contents /// exactly the data content and file size matches the data size. No refs. StandaloneLeaf = 11, - /// vX..leaf+0: standalone, just the data plus an + /// leaf+0..vX: standalone, just the data plus an /// extra null character ('\0'). File size is 1 bigger than the data size. /// No refs. StandaloneLeaf0 = 12, }; - static StringRef getStandaloneFileSuffix(StorageKind SK) { + static StringRef getStandaloneFilePrefix(StorageKind SK) { switch (SK) { default: llvm_unreachable("Expected standalone storage kind"); case TrieRecord::StorageKind::Standalone: - return FileSuffixData; - case TrieRecord::StorageKind::StandaloneLeaf0: - return FileSuffixLeaf0; + return FilePrefixObject; case TrieRecord::StorageKind::StandaloneLeaf: - return FileSuffixLeaf; + return FilePrefixLeaf; + case TrieRecord::StorageKind::StandaloneLeaf0: + return FilePrefixLeaf0; } } enum Limits : int64_t { - // Saves files bigger than 64KB standalone instead of embedding them. + /// Saves files bigger than 64KB standalone instead of embedding them. MaxEmbeddedSize = 64LL * 1024LL - 1, }; @@ -143,6 +140,7 @@ class TrieRecord { FileOffset Offset; }; + /// Pack StorageKind and Offset from Data into 8 byte TrieRecord. static uint64_t pack(Data D) { assert(D.Offset.get() < (int64_t)(1ULL << 56)); uint64_t Packed = uint64_t(D.SK) << 56 | D.Offset.get(); @@ -155,6 +153,7 @@ class TrieRecord { return Packed; } + // Unpack TrieRecord into Data. static Data unpack(uint64_t Packed) { Data D; if (!Packed) @@ -226,6 +225,7 @@ struct DataRecordHandle { 0, "Not enough bits"); + /// Layout of the DataRecordHandle and how to decode it. struct LayoutFlags { NumRefsFlags NumRefs; DataSizeFlags DataSize; @@ -291,10 +291,12 @@ struct DataRecordHandle { return getDataRelOffset() + getDataSize() + 1; } + /// Describe the layout of data stored and how to decode from + /// DataRecordHandle. struct Layout { explicit Layout(const Input &I); - LayoutFlags Flags{}; + LayoutFlags Flags; uint64_t DataSize = 0; uint32_t NumRefs = 0; int64_t RefsRelOffset = 0; @@ -332,6 +334,8 @@ struct DataRecordHandle { return DataRecordHandle( *reinterpret_cast(Mem)); } + static Expected + getFromDataPool(const OnDiskDataAllocator &Pool, FileOffset Offset); explicit operator bool() const { return H; } const Header &getHeader() const { return *H; } @@ -345,6 +349,20 @@ struct DataRecordHandle { const Header *H = nullptr; }; +/// Proxy for any on-disk object or raw data. +struct OnDiskContent { + std::optional Record; + std::optional> Bytes; + + ArrayRef getData() const { + if (Bytes) + return *Bytes; + assert(Record && "Expected record or bytes"); + return Record->getData(); + } +}; + +/// Data loaded inside the memory from standalone file. class StandaloneDataInMemory { public: OnDiskContent getContent() const; @@ -352,12 +370,7 @@ class StandaloneDataInMemory { OnDiskGraphDB::FileBackedData getInternalFileBackedObjectData(StringRef RootPath) const; - /// FIXME: Should be mapped_file_region instead of MemoryBuffer to drop a - /// layer of indirection. - std::unique_ptr Region; - TrieRecord::StorageKind SK; - FileOffset IndexOffset; - StandaloneDataInMemory(std::unique_ptr Region, + StandaloneDataInMemory(std::unique_ptr Region, TrieRecord::StorageKind SK, FileOffset IndexOffset) : Region(std::move(Region)), SK(SK), IndexOffset(IndexOffset) { #ifndef NDEBUG @@ -374,17 +387,21 @@ class StandaloneDataInMemory { assert(IsStandalone); #endif } + +private: + std::unique_ptr Region; + TrieRecord::StorageKind SK; + FileOffset IndexOffset; }; -/// Container for "big" objects mapped in separately. +/// Container to lookup loaded standalone objects. template class StandaloneDataMap { static_assert(isPowerOf2_64(NumShards), "Expected power of 2"); public: - const StandaloneDataInMemory &insert(ArrayRef Hash, - TrieRecord::StorageKind SK, - std::unique_ptr Buffer, - FileOffset IndexOffset); + uintptr_t insert(ArrayRef Hash, TrieRecord::StorageKind SK, + std::unique_ptr Region, + FileOffset IndexOffset); const StandaloneDataInMemory *lookup(ArrayRef Hash) const; bool count(ArrayRef Hash) const { return bool(lookup(Hash)); } @@ -409,27 +426,7 @@ template class StandaloneDataMap { using StandaloneDataMapTy = StandaloneDataMap<16>; -struct InternalHandle { - FileOffset getAsFileOffset() const { return *DataOffset; } - - uint64_t getRawData() const { - if (DataOffset) { - uint64_t Raw = DataOffset->get(); - assert(!(Raw & 0x1)); - return Raw; - } - uint64_t Raw = reinterpret_cast(SDIM); - assert(!(Raw & 0x1)); - return Raw | 1; - } - - explicit InternalHandle(FileOffset DataOffset) : DataOffset(DataOffset) {} - explicit InternalHandle(uint64_t DataOffset) : DataOffset(DataOffset) {} - explicit InternalHandle(const StandaloneDataInMemory &SDIM) : SDIM(&SDIM) {} - std::optional DataOffset; - const StandaloneDataInMemory *SDIM = nullptr; -}; - +/// A vector of internal node references. class InternalRefVector { public: void push_back(InternalRef Ref) { @@ -460,19 +457,6 @@ class InternalRefVector { } // namespace -/// Proxy for any on-disk object or raw data. -struct ondisk::OnDiskContent { - std::optional Record; - std::optional> Bytes; - - ArrayRef getData() const { - if (Bytes) - return *Bytes; - assert(Record && "Expected record or bytes"); - return Record->getData(); - } -}; - Expected DataRecordHandle::createWithError( function_ref(size_t Size)> Alloc, const Input &I) { Layout L(I); @@ -482,11 +466,16 @@ Expected DataRecordHandle::createWithError( return Mem.takeError(); } -DataRecordHandle -DataRecordHandle::create(function_ref Alloc, - const Input &I) { - Layout L(I); - return constructImpl(Alloc(L.getTotalSize()), I, L); +ObjectHandle ObjectHandle::fromFileOffset(FileOffset Offset) { + // Store the file offset as it is. + assert(!(Offset.get() & 0x1)); + return ObjectHandle(Offset.get()); +} + +ObjectHandle ObjectHandle::fromMemory(uintptr_t Ptr) { + // Store the pointer from memory with lowest bit set. + assert(!(Ptr & 0x1)); + return ObjectHandle(Ptr | 1); } /// Proxy for an on-disk index record. @@ -497,17 +486,17 @@ struct OnDiskGraphDB::IndexProxy { }; template -const StandaloneDataInMemory & -StandaloneDataMap::insert(ArrayRef Hash, TrieRecord::StorageKind SK, - std::unique_ptr Buffer, - FileOffset IndexOffset) { +uintptr_t StandaloneDataMap::insert( + ArrayRef Hash, TrieRecord::StorageKind SK, + std::unique_ptr Region, + FileOffset IndexOffset) { auto &S = getShard(Hash); std::lock_guard Lock(S.Mutex); auto &V = S.Map[Hash.data()]; if (!V) - V = std::make_unique(std::move(Buffer), SK, + V = std::make_unique(std::move(Region), SK, IndexOffset); - return *V; + return reinterpret_cast(V.get()); } template @@ -521,12 +510,14 @@ StandaloneDataMap::lookup(ArrayRef Hash) const { return &*I->second; } +namespace { + /// Copy of \a sys::fs::TempFile that skips RemoveOnSignal, which is too /// expensive to register/unregister at this rate. /// /// FIXME: Add a TempFileManager that maintains a thread-safe list of open temp /// files and has a signal handler registerd that removes them all. -class OnDiskGraphDB::TempFile { +class TempFile { bool Done = false; TempFile(StringRef Name, int FD, OnDiskCASLogger *Logger) : TmpName(std::string(Name)), FD(FD), Logger(Logger) {} @@ -560,7 +551,7 @@ class OnDiskGraphDB::TempFile { ~TempFile() { consumeError(discard()); } }; -class OnDiskGraphDB::MappedTempFile { +class MappedTempFile { public: char *data() const { return Map.data(); } size_t size() const { return Map.size(); } @@ -584,8 +575,9 @@ class OnDiskGraphDB::MappedTempFile { TempFile Temp; sys::fs::mapped_file_region Map; }; +} // namespace -Error OnDiskGraphDB::TempFile::discard() { +Error TempFile::discard() { Done = true; if (FD != -1) { sys::fs::file_t File = sys::fs::convertFDToNativeFile(FD); @@ -599,7 +591,7 @@ Error OnDiskGraphDB::TempFile::discard() { if (!TmpName.empty()) { std::error_code EC = sys::fs::remove(TmpName); if (Logger) - Logger->log_TempFile_remove(TmpName, EC); + Logger->logTempFileRemove(TmpName, EC); if (EC) return errorCodeToError(EC); } @@ -608,14 +600,14 @@ Error OnDiskGraphDB::TempFile::discard() { return Error::success(); } -Error OnDiskGraphDB::TempFile::keep(const Twine &Name) { +Error TempFile::keep(const Twine &Name) { assert(!Done); Done = true; // Always try to close and rename. std::error_code RenameEC = sys::fs::rename(TmpName, Name); if (Logger) - Logger->log_TempFile_keep(TmpName, Name.str(), RenameEC); + Logger->logTempFileKeep(TmpName, Name.str(), RenameEC); if (!RenameEC) TmpName = ""; @@ -628,15 +620,15 @@ Error OnDiskGraphDB::TempFile::keep(const Twine &Name) { return errorCodeToError(RenameEC); } -Expected -OnDiskGraphDB::TempFile::create(const Twine &Model, OnDiskCASLogger *Logger) { +Expected TempFile::create(const Twine &Model, + OnDiskCASLogger *Logger) { int FD; SmallString<128> ResultPath; if (std::error_code EC = sys::fs::createUniqueFile(Model, FD, ResultPath)) return errorCodeToError(EC); if (Logger) - Logger->log_TempFile_create(ResultPath); + Logger->logTempFileCreate(ResultPath); TempFile Ret(ResultPath, FD, Logger); return std::move(Ret); @@ -651,8 +643,20 @@ bool TrieRecord::compare_exchange_strong(Data &Existing, Data New) { return false; } -DataRecordHandle DataRecordHandle::construct(char *Mem, const Input &I) { - return constructImpl(Mem, I, Layout(I)); +Expected +DataRecordHandle::getFromDataPool(const OnDiskDataAllocator &Pool, + FileOffset Offset) { + auto HeaderData = Pool.get(Offset, sizeof(DataRecordHandle::Header)); + if (!HeaderData) + return HeaderData.takeError(); + + auto Record = DataRecordHandle::get(HeaderData->data()); + if (Record.getTotalSize() + Offset.get() > Pool.size()) + return createStringError( + make_error_code(std::errc::illegal_byte_sequence), + "data record span passed the end of the data pool"); + + return Record; } DataRecordHandle DataRecordHandle::constructImpl(char *Mem, const Input &I, @@ -850,6 +854,7 @@ uint64_t DataRecordHandle::getDataSize() const { case DataSizeFlags::Uses8B: return support::endian::read64le(DataSizePtr); } + llvm_unreachable("Unknown DataSizeFlags enum"); } void DataRecordHandle::skipDataSize(LayoutFlags LF, int64_t &RelOffset) const { @@ -877,6 +882,7 @@ uint32_t DataRecordHandle::getNumRefs() const { case NumRefsFlags::Uses8B: return support::endian::read64le(NumRefsPtr); } + llvm_unreachable("Unknown NumRefsFlags enum"); } void DataRecordHandle::skipNumRefs(LayoutFlags LF, int64_t &RelOffset) const { @@ -905,8 +911,15 @@ int64_t DataRecordHandle::getDataRelOffset() const { } Error OnDiskGraphDB::validate(bool Deep, HashingFuncT Hasher) const { + if (UpstreamDB) { + if (auto E = UpstreamDB->validate(Deep, Hasher)) + return E; + } + if (!isAligned(Align(8), DataPool.size())) + return createStringError(llvm::errc::illegal_byte_sequence, + "data pool bump pointer is not aligned"); return Index.validate([&](FileOffset Offset, - OnDiskHashMappedTrie::ConstValueProxy Record) + OnDiskTrieRawHashMap::ConstValueProxy Record) -> Error { auto formatError = [&](Twine Msg) { return createStringError( @@ -933,6 +946,8 @@ Error OnDiskGraphDB::validate(bool Deep, HashingFuncT Hasher) const { auto Ref = InternalRef::getFromOffset(Offset); auto I = getIndexProxyFromRef(Ref); + if (!I) + return I.takeError(); switch (D.SK) { case TrieRecord::StorageKind::Unknown: @@ -940,19 +955,33 @@ Error OnDiskGraphDB::validate(bool Deep, HashingFuncT Hasher) const { // the record. It can be reused by later insertion so just skip this entry // for now. return Error::success(); - case TrieRecord::StorageKind::DataPool: + case TrieRecord::StorageKind::DataPool: { // Check offset is a postive value, and large enough to hold the // header for the data record. if (D.Offset.get() <= 0 || - (uint64_t)D.Offset.get() + sizeof(DataRecordHandle::Header) >= - DataPool.size()) + D.Offset.get() + sizeof(DataRecordHandle::Header) >= DataPool.size()) return formatError("datapool record out of bound"); + + // DataRecord start needs to be aligned. + if (!isAligned(Align(8), D.Offset.get())) + return formatError("data record offset is not aligned"); + + // Validate the layout flags before getFromDataPool calls getTotalSize(). + auto HeaderData = + DataPool.get(D.Offset, sizeof(DataRecordHandle::Header)); + if (!HeaderData) + return formatError(toString(HeaderData.takeError())); + auto LF = DataRecordHandle::get(HeaderData->data()).getLayoutFlags(); + if (LF.NumRefs > DataRecordHandle::NumRefsFlags::Max || + LF.DataSize > DataRecordHandle::DataSizeFlags::Max) + return formatError("data record has invalid layout flags"); break; + } case TrieRecord::StorageKind::Standalone: case TrieRecord::StorageKind::StandaloneLeaf: case TrieRecord::StorageKind::StandaloneLeaf0: SmallString<256> Path; - getStandalonePath(TrieRecord::getStandaloneFileSuffix(D.SK), I.Offset, + getStandalonePath(TrieRecord::getStandaloneFilePrefix(D.SK), I->Offset, Path); // If need to validate the content of the file later, just load the // buffer here. Otherwise, just check the existance of the file. @@ -972,7 +1001,7 @@ Error OnDiskGraphDB::validate(bool Deep, HashingFuncT Hasher) const { auto dataError = [&](Twine Msg) { return createStringError(llvm::errc::illegal_byte_sequence, - "bad data for digest \'" + toHex(I.Hash) + + "bad data for digest \'" + toHex(I->Hash) + "\': " + Msg.str()); }; SmallVector> Refs; @@ -982,14 +1011,19 @@ Error OnDiskGraphDB::validate(bool Deep, HashingFuncT Hasher) const { case TrieRecord::StorageKind::Unknown: llvm_unreachable("already handled"); case TrieRecord::StorageKind::DataPool: { - auto DataRecord = DataRecordHandle::get(DataPool.beginData(D.Offset)); - if (DataRecord.getTotalSize() + D.Offset.get() > DataPool.size()) - return dataError("data record span passed the end of the data pool"); - for (auto InternRef : DataRecord.getRefs()) { + auto DataRecord = DataRecordHandle::getFromDataPool(DataPool, D.Offset); + if (!DataRecord) + return dataError(toString(DataRecord.takeError())); + + for (auto InternRef : DataRecord->getRefs()) { + if (InternRef.getFileOffset().get() <= 0) + return dataError("invalid ref offset"); auto Index = getIndexProxyFromRef(InternRef); - Refs.push_back(Index.Hash); + if (!Index) + return Index.takeError(); + Refs.push_back(Index->Hash); } - StoredData = DataRecord.getData(); + StoredData = DataRecord->getData(); break; } case TrieRecord::StorageKind::Standalone: { @@ -1000,8 +1034,12 @@ Error OnDiskGraphDB::validate(bool Deep, HashingFuncT Hasher) const { return dataError( "data record span passed the end of the standalone file"); for (auto InternRef : DataRecord.getRefs()) { + if (InternRef.getFileOffset().get() <= 0) + return dataError("invalid ref offset"); auto Index = getIndexProxyFromRef(InternRef); - Refs.push_back(Index.Hash); + if (!Index) + return Index.takeError(); + Refs.push_back(Index->Hash); } StoredData = DataRecord.getData(); break; @@ -1020,7 +1058,7 @@ Error OnDiskGraphDB::validate(bool Deep, HashingFuncT Hasher) const { SmallVector ComputedHash; Hasher(Refs, StoredData, ComputedHash); - if (I.Hash != ArrayRef(ComputedHash)) + if (I->Hash != ArrayRef(ComputedHash)) return dataError("hash mismatch, got \'" + toHex(ComputedHash) + "\' instead"); @@ -1041,12 +1079,12 @@ Error OnDiskGraphDB::validateObjectID(ObjectID ExternalRef) const { return formatError("zero is not a valid ref"); InternalRef InternalRef = getInternalRef(ExternalRef); - auto I = getIndexProxyFromRefChecked(InternalRef); + auto I = getIndexProxyFromRef(InternalRef); if (!I) return formatError(llvm::toString(I.takeError())); auto Hash = getDigest(*I); - OnDiskHashMappedTrie::const_pointer P = Index.find(Hash); + OnDiskTrieRawHashMap::ConstOnDiskPtr P = Index.find(Hash); if (!P) return formatError("not found using hash " + toHex(Hash)); IndexProxy OtherI = getIndexProxyFromPointer(P); @@ -1062,7 +1100,7 @@ void OnDiskGraphDB::print(raw_ostream &OS) const { OS << "on-disk-root-path: " << RootPath << "\n"; struct PoolInfo { - int64_t Offset; + uint64_t Offset; }; SmallVector Pool; @@ -1103,11 +1141,15 @@ void OnDiskGraphDB::print(raw_ostream &OS) const { Pool, [](PoolInfo LHS, PoolInfo RHS) { return LHS.Offset < RHS.Offset; }); for (PoolInfo PI : Pool) { OS << "- addr=" << (void *)PI.Offset << " "; - DataRecordHandle D = - DataRecordHandle::get(DataPool.beginData(FileOffset(PI.Offset))); - OS << "record refs=" << D.getNumRefs() << " data=" << D.getDataSize() - << " size=" << D.getTotalSize() - << " end=" << (void *)(PI.Offset + D.getTotalSize()) << "\n"; + auto D = DataRecordHandle::getFromDataPool(DataPool, FileOffset(PI.Offset)); + if (!D) { + OS << "error: " << toString(D.takeError()); + return; + } + + OS << "record refs=" << D->getNumRefs() << " data=" << D->getDataSize() + << " size=" << D->getTotalSize() + << " end=" << (void *)(PI.Offset + D->getTotalSize()) << "\n"; } } @@ -1115,7 +1157,7 @@ Expected OnDiskGraphDB::indexHash(ArrayRef Hash) { auto P = Index.insertLazy( Hash, [](FileOffset TentativeOffset, - OnDiskHashMappedTrie::ValueProxy TentativeValue) { + OnDiskTrieRawHashMap::ValueProxy TentativeValue) { assert(TentativeValue.Data.size() == sizeof(TrieRecord)); assert( isAddrAligned(Align::Of(), TentativeValue.Data.data())); @@ -1129,7 +1171,7 @@ OnDiskGraphDB::indexHash(ArrayRef Hash) { } OnDiskGraphDB::IndexProxy OnDiskGraphDB::getIndexProxyFromPointer( - OnDiskHashMappedTrie::const_pointer P) const { + OnDiskTrieRawHashMap::ConstOnDiskPtr P) const { assert(P); assert(P.getOffset()); return IndexProxy{P.getOffset(), P->Hash, @@ -1167,7 +1209,7 @@ OnDiskGraphDB::getExistingReference(ArrayRef Digest, return getExternalReference(*I); }; - OnDiskHashMappedTrie::const_pointer P = Index.find(Digest); + OnDiskTrieRawHashMap::ConstOnDiskPtr P = Index.find(Digest); if (!P) return tryUpstream(std::nullopt); IndexProxy I = getIndexProxyFromPointer(P); @@ -1177,27 +1219,19 @@ OnDiskGraphDB::getExistingReference(ArrayRef Digest, return getExternalReference(makeInternalRef(I.Offset)); } -OnDiskGraphDB::IndexProxy -OnDiskGraphDB::getIndexProxyFromRef(InternalRef Ref) const { - OnDiskHashMappedTrie::const_pointer P = - Index.recoverFromFileOffset(Ref.getFileOffset()); - if (LLVM_UNLIKELY(!P)) - report_fatal_error("OnDiskCAS: corrupt internal reference"); - return getIndexProxyFromPointer(P); -} - Expected -OnDiskGraphDB::getIndexProxyFromRefChecked(InternalRef Ref) const { - OnDiskHashMappedTrie::const_pointer P = - Index.recoverFromFileOffset(Ref.getFileOffset()); +OnDiskGraphDB::getIndexProxyFromRef(InternalRef Ref) const { + auto P = Index.recoverFromFileOffset(Ref.getFileOffset()); if (LLVM_UNLIKELY(!P)) - return createStringError(make_error_code(std::errc::protocol_error), "corrupt internal reference"); - return getIndexProxyFromPointer(P); + return P.takeError(); + return getIndexProxyFromPointer(*P); } -ArrayRef OnDiskGraphDB::getDigest(InternalRef Ref) const { - IndexProxy I = getIndexProxyFromRef(Ref); - return I.Hash; +Expected> OnDiskGraphDB::getDigest(InternalRef Ref) const { + auto I = getIndexProxyFromRef(Ref); + if (!I) + return I.takeError(); + return I->Hash; } ArrayRef OnDiskGraphDB::getDigest(const IndexProxy &I) const { @@ -1207,20 +1241,16 @@ ArrayRef OnDiskGraphDB::getDigest(const IndexProxy &I) const { static std::variant getStandaloneDataOrDataRecord(const OnDiskDataAllocator &DataPool, ObjectHandle OH) { - auto getInternalHandle = [](ObjectHandle Handle) -> InternalHandle { - uint64_t Data = Handle.getOpaqueData(); - if (Data & 1) - return InternalHandle(*reinterpret_cast( - Data & (-1ULL << 1))); - return InternalHandle(Data); - }; - - InternalHandle Handle = getInternalHandle(OH); - if (Handle.SDIM) - return Handle.SDIM; + // Decode ObjectHandle to locate the stored content. + uint64_t Data = OH.getOpaqueData(); + if (Data & 1) { + const auto *SDIM = + reinterpret_cast(Data & (-1ULL << 1)); + return SDIM; + } auto DataHandle = - DataRecordHandle::get(DataPool.beginData(Handle.getAsFileOffset())); + cantFail(DataRecordHandle::getFromDataPool(DataPool, FileOffset(Data))); assert(DataHandle.getData().end()[0] == 0 && "Null termination"); return DataHandle; } @@ -1263,21 +1293,16 @@ OnDiskGraphDB::getInternalFileBackedObjectData(ObjectHandle Node) const { Expected> OnDiskGraphDB::load(ObjectID ExternalRef) { InternalRef Ref = getInternalRef(ExternalRef); - IndexProxy I = getIndexProxyFromRef(Ref); - TrieRecord::Data Object = I.Ref.load(); + auto I = getIndexProxyFromRef(Ref); + if (!I) + return I.takeError(); + TrieRecord::Data Object = I->Ref.load(); - if (Object.SK == TrieRecord::StorageKind::Unknown) { - if (!UpstreamDB) - return std::nullopt; + if (Object.SK == TrieRecord::StorageKind::Unknown) return faultInFromUpstream(ExternalRef); - } - - auto toObjectHandle = [](InternalHandle H) -> ObjectHandle { - return ObjectHandle::fromOpaqueData(H.getRawData()); - }; if (Object.SK == TrieRecord::StorageKind::DataPool) - return toObjectHandle(InternalHandle(Object.Offset)); + return ObjectHandle::fromFileOffset(Object.Offset); // Only TrieRecord::StorageKind::Standalone (and variants) need to be // explicitly loaded. @@ -1285,7 +1310,7 @@ OnDiskGraphDB::load(ObjectID ExternalRef) { // There's corruption if standalone objects have offsets, or if we get here // for something that isn't standalone. if (Object.Offset) - return createCorruptObjectError(getDigest(I)); + return createCorruptObjectError(getDigest(*I)); switch (Object.SK) { case TrieRecord::StorageKind::Unknown: case TrieRecord::StorageKind::DataPool: @@ -1302,20 +1327,37 @@ OnDiskGraphDB::load(ObjectID ExternalRef) { // suitably 0-padded. Requiring null-termination here would be too expensive // for extremely large objects that happen to be page-aligned. SmallString<256> Path; - getStandalonePath(TrieRecord::getStandaloneFileSuffix(Object.SK), I.Offset, + getStandalonePath(TrieRecord::getStandaloneFilePrefix(Object.SK), I->Offset, Path); - ErrorOr> OwnedBuffer = MemoryBuffer::getFile( - Path, /*IsText=*/false, /*RequiresNullTerminator=*/false); - if (!OwnedBuffer) - return createCorruptObjectError(getDigest(I)); - return toObjectHandle(InternalHandle( + + auto File = sys::fs::openNativeFileForRead(Path); + if (!File) + return createFileError(Path, File.takeError()); + + llvm::scope_exit CloseFile([&]() { sys::fs::closeFile(*File); }); + + sys::fs::file_status Status; + if (std::error_code EC = sys::fs::status(*File, Status)) + return createCorruptObjectError(getDigest(*I)); + + std::error_code EC; + auto Region = std::make_unique( + *File, sys::fs::mapped_file_region::readonly, Status.getSize(), 0, EC); + if (EC) + return createCorruptObjectError(getDigest(*I)); + + return ObjectHandle::fromMemory( static_cast(StandaloneData) - ->insert(I.Hash, Object.SK, std::move(*OwnedBuffer), I.Offset))); + ->insert(I->Hash, Object.SK, std::move(Region), I->Offset)); } Expected OnDiskGraphDB::isMaterialized(ObjectID Ref) { - switch (getObjectPresence(Ref, /*CheckUpstream=*/true)) { + auto Presence = getObjectPresence(Ref, /*CheckUpstream=*/true); + if (!Presence) + return Presence.takeError(); + + switch (*Presence) { case ObjectPresence::Missing: return false; case ObjectPresence::InPrimaryDB: @@ -1325,23 +1367,24 @@ Expected OnDiskGraphDB::isMaterialized(ObjectID Ref) { return FaultInResult.takeError(); return true; } + llvm_unreachable("Unknown ObjectPresence enum"); } -OnDiskGraphDB::ObjectPresence +Expected OnDiskGraphDB::getObjectPresence(ObjectID ExternalRef, bool CheckUpstream) const { InternalRef Ref = getInternalRef(ExternalRef); - Expected I = getIndexProxyFromRefChecked(Ref); - if (!I) { - // FIXME: this decision should be migrated to callers. - consumeError(I.takeError()); - return ObjectPresence::Missing; - } + auto I = getIndexProxyFromRef(Ref); + if (!I) + return I.takeError(); + TrieRecord::Data Object = I->Ref.load(); if (Object.SK != TrieRecord::StorageKind::Unknown) return ObjectPresence::InPrimaryDB; + if (!CheckUpstream || !UpstreamDB) return ObjectPresence::Missing; + std::optional UpstreamID = UpstreamDB->getExistingReference(getDigest(*I)); return UpstreamID.has_value() ? ObjectPresence::OnlyInUpstreamDB @@ -1352,16 +1395,17 @@ InternalRef OnDiskGraphDB::makeInternalRef(FileOffset IndexOffset) { return InternalRef::getFromOffset(IndexOffset); } -static void getStandalonePath(StringRef RootPath, StringRef Suffix, +static void getStandalonePath(StringRef RootPath, StringRef Prefix, FileOffset IndexOffset, SmallVectorImpl &Path) { Path.assign(RootPath.begin(), RootPath.end()); - sys::path::append(Path, FilePrefix + Twine(IndexOffset.get()) + Suffix); + sys::path::append(Path, + Prefix + Twine(IndexOffset.get()) + "." + CASFormatVersion); } -void OnDiskGraphDB::getStandalonePath(StringRef Suffix, FileOffset IndexOffset, +void OnDiskGraphDB::getStandalonePath(StringRef Prefix, FileOffset IndexOffset, SmallVectorImpl &Path) const { - return ::getStandalonePath(RootPath, Suffix, IndexOffset, Path); + return ::getStandalonePath(RootPath, Prefix, IndexOffset, Path); } OnDiskContent StandaloneDataInMemory::getContent() const { @@ -1381,14 +1425,14 @@ OnDiskContent StandaloneDataInMemory::getContent() const { } if (Leaf) { - assert(Region->getBuffer().drop_back(Leaf0).end()[0] == 0 && + StringRef Data(Region->data(), Region->size()); + assert(Data.drop_back(Leaf0).end()[0] == 0 && "Standalone node data missing null termination"); - return OnDiskContent{ - std::nullopt, - arrayRefFromStringRef(Region->getBuffer().drop_back(Leaf0))}; + return OnDiskContent{std::nullopt, + arrayRefFromStringRef(Data.drop_back(Leaf0))}; } - DataRecordHandle Record = DataRecordHandle::get(Region->getBuffer().data()); + DataRecordHandle Record = DataRecordHandle::get(Region->data()); assert(Record.getData().end()[0] == 0 && "Standalone object record missing null termination for data"); return OnDiskContent{Record, std::nullopt}; @@ -1408,19 +1452,20 @@ StandaloneDataInMemory::getInternalFileBackedObjectData( case TrieRecord::StorageKind::StandaloneLeaf: bool IsFileNulTerminated = SK == TrieRecord::StorageKind::StandaloneLeaf0; SmallString<256> Path; - ::getStandalonePath(RootPath, TrieRecord::getStandaloneFileSuffix(SK), + ::getStandalonePath(RootPath, TrieRecord::getStandaloneFilePrefix(SK), IndexOffset, Path); return OnDiskGraphDB::FileBackedData{ getContent().getData(), OnDiskGraphDB::FileBackedData::FileInfoTy{ std::string(Path), IsFileNulTerminated}}; } + llvm_unreachable("Unknown StorageKind enum"); } -Expected -OnDiskGraphDB::createTempFile(StringRef FinalPath, uint64_t Size) { +static Expected +createTempFile(StringRef FinalPath, uint64_t Size, OnDiskCASLogger *Logger) { + assert(Size && "Unexpected request for an empty temp file"); - Expected File = - TempFile::create(FinalPath + ".%%%%%%", Logger.get()); + Expected File = TempFile::create(FinalPath + ".%%%%%%", Logger); if (!File) return File.takeError(); @@ -1454,11 +1499,12 @@ Error OnDiskGraphDB::createStandaloneLeaf(IndexProxy &I, ArrayRef Data) { SmallString<256> Path; int64_t FileSize = Data.size() + Leaf0; - getStandalonePath(TrieRecord::getStandaloneFileSuffix(SK), I.Offset, Path); + getStandalonePath(TrieRecord::getStandaloneFilePrefix(SK), I.Offset, Path); + // Write the file. Don't reuse this mapped_file_region, which is read/write. // Let load() pull up one that's read-only. - Expected File = createTempFile(Path, FileSize); + Expected File = createTempFile(Path, FileSize, Logger.get()); if (!File) return File.takeError(); assert(File->size() == (uint64_t)FileSize); @@ -1488,18 +1534,20 @@ Error OnDiskGraphDB::createStandaloneLeaf(IndexProxy &I, ArrayRef Data) { Error OnDiskGraphDB::store(ObjectID ID, ArrayRef Refs, ArrayRef Data) { - IndexProxy I = getIndexProxyFromRef(getInternalRef(ID)); + auto I = getIndexProxyFromRef(getInternalRef(ID)); + if (LLVM_UNLIKELY(!I)) + return I.takeError(); // Early return in case the node exists. { - TrieRecord::Data Existing = I.Ref.load(); + TrieRecord::Data Existing = I->Ref.load(); if (Existing.SK != TrieRecord::StorageKind::Unknown) return Error::success(); } // Big leaf nodes. if (Refs.empty() && Data.size() > TrieRecord::MaxEmbeddedSize) - return createStandaloneLeaf(I, Data); + return createStandaloneLeaf(*I, Data); // TODO: Check whether it's worth checking the index for an already existing // object (like storeTreeImpl() does) before building up the @@ -1519,10 +1567,10 @@ Error OnDiskGraphDB::store(ObjectID ID, ArrayRef Refs, std::optional File; std::optional FileSize; auto AllocStandaloneFile = [&](size_t Size) -> Expected { - getStandalonePath(TrieRecord::getStandaloneFileSuffix( + getStandalonePath(TrieRecord::getStandaloneFilePrefix( TrieRecord::StorageKind::Standalone), - I.Offset, Path); - if (Error E = createTempFile(Path, Size).moveInto(File)) + I->Offset, Path); + if (Error E = createTempFile(Path, Size, Logger.get()).moveInto(File)) return std::move(E); assert(File->size() == Size); FileSize = Size; @@ -1570,7 +1618,7 @@ Error OnDiskGraphDB::store(ObjectID ID, ArrayRef Refs, // // Then decide what to do with the file. Better to discard than overwrite if // another thread/process has already added this. - TrieRecord::Data Existing = I.Ref.load(); + TrieRecord::Data Existing = I->Ref.load(); { TrieRecord::Data NewObject{SK, PoolOffset}; if (File) { @@ -1589,7 +1637,7 @@ Error OnDiskGraphDB::store(ObjectID ID, ArrayRef Refs, // TODO: Find a way to reuse the storage from the new-but-abandoned record // handle. if (Existing.SK == TrieRecord::StorageKind::Unknown) { - if (I.Ref.compare_exchange_strong(Existing, NewObject)) { + if (I->Ref.compare_exchange_strong(Existing, NewObject)) { if (FileSize) recordStandaloneSizeIncrease(*FileSize); return Error::success(); @@ -1598,7 +1646,7 @@ Error OnDiskGraphDB::store(ObjectID ID, ArrayRef Refs, } if (Existing.SK == TrieRecord::StorageKind::Unknown) - return createCorruptObjectError(getDigest(I)); + return createCorruptObjectError(getDigest(*I)); // Load existing object. return Error::success(); @@ -1611,7 +1659,7 @@ Error OnDiskGraphDB::storeFile(ObjectID ID, StringRef FilePath) { Error OnDiskGraphDB::storeFile( ObjectID ID, StringRef FilePath, std::optional ImportKind) { - auto I = getIndexProxyFromRefChecked(getInternalRef(ID)); + auto I = getIndexProxyFromRef(getInternalRef(ID)); if (LLVM_UNLIKELY(!I)) return I.takeError(); @@ -1622,6 +1670,7 @@ Error OnDiskGraphDB::storeFile( return Error::success(); } + uint64_t FileSize; if (std::error_code EC = sys::fs::file_size(FilePath, FileSize)) return createFileError(FilePath, EC); @@ -1670,7 +1719,7 @@ Error OnDiskGraphDB::storeFile( } SmallString<256> StandalonePath; - getStandalonePath(TrieRecord::getStandaloneFileSuffix(SK), I->Offset, + getStandalonePath(TrieRecord::getStandaloneFilePrefix(SK), I->Offset, StandalonePath); if (Error E = UniqueTmp.renameTo(StandalonePath)) return E; @@ -1693,10 +1742,10 @@ Error OnDiskGraphDB::storeFile( } void OnDiskGraphDB::recordStandaloneSizeIncrease(size_t SizeIncrease) { - getStandaloneStorageSize().fetch_add(SizeIncrease, std::memory_order_relaxed); + standaloneStorageSize().fetch_add(SizeIncrease, std::memory_order_relaxed); } -std::atomic &OnDiskGraphDB::getStandaloneStorageSize() { +std::atomic &OnDiskGraphDB::standaloneStorageSize() const { MutableArrayRef UserHeader = DataPool.getUserHeader(); assert(UserHeader.size() == sizeof(std::atomic)); assert(isAddrAligned(Align(8), UserHeader.data())); @@ -1704,8 +1753,7 @@ std::atomic &OnDiskGraphDB::getStandaloneStorageSize() { } uint64_t OnDiskGraphDB::getStandaloneStorageSize() const { - return const_cast(this)->getStandaloneStorageSize().load( - std::memory_order_relaxed); + return standaloneStorageSize().load(std::memory_order_relaxed); } size_t OnDiskGraphDB::getStorageSize() const { @@ -1718,37 +1766,21 @@ unsigned OnDiskGraphDB::getHardStorageLimitUtilization() const { return std::max(IndexPercent, DataPercent); } -static bool useSmallMappedFiles(const Twine &P) { - // macOS tmpfs does not support sparse tails. -#if defined(__APPLE__) && __has_include() - SmallString<128> PathStorage; - StringRef Path = P.toNullTerminatedStringRef(PathStorage); - struct statfs StatFS; - if (statfs(Path.data(), &StatFS) != 0) - return false; - - if (strcmp(StatFS.f_fstypename, "tmpfs") == 0) - return true; -#endif - - return false; -} - -Expected> OnDiskGraphDB::open( - StringRef AbsPath, StringRef HashName, unsigned HashByteSize, - std::unique_ptr UpstreamDB, - std::shared_ptr Logger, FaultInPolicy Policy) { +Expected> +OnDiskGraphDB::open(StringRef AbsPath, StringRef HashName, + unsigned HashByteSize, OnDiskGraphDB *UpstreamDB, + std::shared_ptr Logger, + FaultInPolicy Policy) { if (std::error_code EC = sys::fs::create_directories(AbsPath)) return createFileError(AbsPath, EC); - const StringRef Slash = sys::path::get_separator(); constexpr uint64_t MB = 1024ull * 1024ull; constexpr uint64_t GB = 1024ull * 1024ull * 1024ull; uint64_t MaxIndexSize = 12 * GB; uint64_t MaxDataPoolSize = 24 * GB; - if (useSmallMappedFiles(AbsPath)) { + if (useSmallMappingSize(AbsPath)) { MaxIndexSize = 1 * GB; MaxDataPoolSize = 2 * GB; } @@ -1759,22 +1791,26 @@ Expected> OnDiskGraphDB::open( if (*CustomSize) MaxIndexSize = MaxDataPoolSize = **CustomSize; - std::optional Index; - if (Error E = - OnDiskHashMappedTrie::create(AbsPath + Slash + FilePrefix + IndexFile, - IndexTableName + "[" + HashName + "]", - HashByteSize * CHAR_BIT, - /*DataSize=*/sizeof(TrieRecord), - MaxIndexSize, /*MinFileSize=*/MB, Logger) - .moveInto(Index)) + SmallString<256> IndexPath(AbsPath); + sys::path::append(IndexPath, IndexFilePrefix + CASFormatVersion); + std::optional Index; + if (Error E = OnDiskTrieRawHashMap::create( + IndexPath, IndexTableName + "[" + HashName + "]", + HashByteSize * CHAR_BIT, + /*DataSize=*/sizeof(TrieRecord), MaxIndexSize, + /*MinFileSize=*/MB, Logger) + .moveInto(Index)) return std::move(E); uint32_t UserHeaderSize = sizeof(std::atomic); + + SmallString<256> DataPoolPath(AbsPath); + sys::path::append(DataPoolPath, DataPoolFilePrefix + CASFormatVersion); std::optional DataPool; StringRef PolicyName = Policy == FaultInPolicy::SingleNode ? "single" : "full"; if (Error E = OnDiskDataAllocator::create( - AbsPath + Slash + FilePrefix + DataPoolFile, + DataPoolPath, DataPoolTableName + "[" + HashName + "]" + PolicyName, MaxDataPoolSize, /*MinFileSize=*/MB, UserHeaderSize, Logger, [](void *UserHeaderPtr) { @@ -1784,25 +1820,24 @@ Expected> OnDiskGraphDB::open( return std::move(E); if (DataPool->getUserHeader().size() != UserHeaderSize) return createStringError(llvm::errc::argument_out_of_domain, - "unexpected user header in '" + AbsPath + Slash + - FilePrefix + DataPoolFile + "'"); + "unexpected user header in '" + DataPoolPath + + "'"); return std::unique_ptr( new OnDiskGraphDB(AbsPath, std::move(*Index), std::move(*DataPool), - std::move(UpstreamDB), Policy, std::move(Logger))); + UpstreamDB, Policy, std::move(Logger))); } -OnDiskGraphDB::OnDiskGraphDB(StringRef RootPath, OnDiskHashMappedTrie Index, +OnDiskGraphDB::OnDiskGraphDB(StringRef RootPath, OnDiskTrieRawHashMap Index, OnDiskDataAllocator DataPool, - std::unique_ptr UpstreamDB, - FaultInPolicy Policy, + OnDiskGraphDB *UpstreamDB, FaultInPolicy Policy, std::shared_ptr Logger) : Index(std::move(Index)), DataPool(std::move(DataPool)), - RootPath(RootPath.str()), UpstreamDB(std::move(UpstreamDB)), - FIPolicy(Policy), Logger(std::move(Logger)) { + RootPath(RootPath.str()), UpstreamDB(UpstreamDB), FIPolicy(Policy), + Logger(std::move(Logger)) { /// Lifetime for "big" objects not in DataPool. /// - /// NOTE: Could use ThreadSafeHashMappedTrie here. For now, doing something + /// NOTE: Could use ThreadSafeTrieRawHashMap here. For now, doing something /// simpler on the assumption there won't be much contention since most data /// is not big. If there is contention, and we've already fixed ObjectProxy /// object handles to be cheap enough to use consistently, the fix might be @@ -1822,7 +1857,6 @@ Error OnDiskGraphDB::importFullTree(ObjectID PrimaryID, // against the process dying during importing and leaving the database with an // incomplete tree. Note that if the upstream has missing nodes then the tree // will be copied with missing nodes as well, it won't be considered an error. - struct UpstreamCursor { ObjectHandle Node; size_t RefsCount; @@ -1845,9 +1879,8 @@ Error OnDiskGraphDB::importFullTree(ObjectID PrimaryID, if (!Node) return; auto Refs = UpstreamDB->getObjectRefs(*Node); - CursorStack.push_back({*Node, - (size_t)std::distance(Refs.begin(), Refs.end()), - Refs.begin(), Refs.end()}); + CursorStack.push_back( + {*Node, (size_t)llvm::size(Refs), Refs.begin(), Refs.end()}); }; enqueueNode(PrimaryID, UpstreamNode); @@ -1900,7 +1933,7 @@ Error OnDiskGraphDB::importSingleNode(ObjectID PrimaryID, auto UpstreamRefs = UpstreamDB->getObjectRefs(UpstreamNode); SmallVector Refs; - Refs.reserve(std::distance(UpstreamRefs.begin(), UpstreamRefs.end())); + Refs.reserve(llvm::size(UpstreamRefs)); for (ObjectID UpstreamRef : UpstreamRefs) { auto Ref = getReference(UpstreamDB->getDigest(UpstreamRef)); if (LLVM_UNLIKELY(!Ref)) @@ -1935,7 +1968,8 @@ Error OnDiskGraphDB::importUpstreamData(ObjectID PrimaryID, Expected> OnDiskGraphDB::faultInFromUpstream(ObjectID PrimaryID) { - assert(UpstreamDB); + if (!UpstreamDB) + return std::nullopt; auto UpstreamID = UpstreamDB->getReference(getDigest(PrimaryID)); if (LLVM_UNLIKELY(!UpstreamID)) diff --git a/llvm/lib/CAS/OnDiskKeyValueDB.cpp b/llvm/lib/CAS/OnDiskKeyValueDB.cpp index ba248281c414a..b8883f3d75082 100644 --- a/llvm/lib/CAS/OnDiskKeyValueDB.cpp +++ b/llvm/lib/CAS/OnDiskKeyValueDB.cpp @@ -5,10 +5,23 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +// +/// \file +/// This file implements OnDiskKeyValueDB, an ondisk key value database. +/// +/// The KeyValue database file is named `actions.` inside the CAS +/// directory. The database stores a mapping between a fixed-sized key and a +/// fixed-sized value, where the size of key and value can be configured when +/// opening the database. +/// +// +//===----------------------------------------------------------------------===// #include "llvm/CAS/OnDiskKeyValueDB.h" #include "OnDiskCommon.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/CAS/OnDiskTrieRawHashMap.h" +#include "llvm/CAS/UnifiedOnDiskCache.h" #include "llvm/Support/Alignment.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Errc.h" @@ -18,8 +31,7 @@ using namespace llvm; using namespace llvm::cas; using namespace llvm::cas::ondisk; -static constexpr StringLiteral ActionCacheFile = "actions"; -static constexpr StringLiteral FilePrefix = "v4."; +static constexpr StringLiteral ActionCacheFile = "actions."; Expected> OnDiskKeyValueDB::put(ArrayRef Key, ArrayRef Value) { @@ -30,7 +42,7 @@ Expected> OnDiskKeyValueDB::put(ArrayRef Key, assert(Value.size() == ValueSize); auto ActionP = Cache.insertLazy( Key, [&](FileOffset TentativeOffset, - OnDiskHashMappedTrie::ValueProxy TentativeValue) { + OnDiskTrieRawHashMap::ValueProxy TentativeValue) { assert(TentativeValue.Data.size() == ValueSize); llvm::copy(Value, TentativeValue.Data.data()); }); @@ -42,22 +54,28 @@ Expected> OnDiskKeyValueDB::put(ArrayRef Key, Expected>> OnDiskKeyValueDB::get(ArrayRef Key) { // Check the result cache. - OnDiskHashMappedTrie::const_pointer ActionP = Cache.find(Key); - if (!ActionP) + OnDiskTrieRawHashMap::ConstOnDiskPtr ActionP = Cache.find(Key); + if (ActionP) { + assert(isAddrAligned(Align(8), ActionP->Data.data())); + return ActionP->Data; + } + if (!UnifiedCache || !UnifiedCache->UpstreamKVDB) return std::nullopt; - assert(isAddrAligned(Align(8), ActionP->Data.data())); - return ActionP->Data; + + // Try to fault in from upstream. + return UnifiedCache->faultInFromUpstreamKV(Key); } Expected> OnDiskKeyValueDB::open(StringRef Path, StringRef HashName, unsigned KeySize, StringRef ValueName, size_t ValueSize, + UnifiedOnDiskCache *Cache, std::shared_ptr Logger) { if (std::error_code EC = sys::fs::create_directories(Path)) return createFileError(Path, EC); SmallString<256> CachePath(Path); - sys::path::append(CachePath, FilePrefix + ActionCacheFile); + sys::path::append(CachePath, ActionCacheFile + CASFormatVersion); constexpr uint64_t MB = 1024ull * 1024ull; constexpr uint64_t GB = 1024ull * 1024ull * 1024ull; @@ -68,8 +86,8 @@ OnDiskKeyValueDB::open(StringRef Path, StringRef HashName, unsigned KeySize, if (*CustomSize) MaxFileSize = **CustomSize; - std::optional ActionCache; - if (Error E = OnDiskHashMappedTrie::create( + std::optional ActionCache; + if (Error E = OnDiskTrieRawHashMap::create( CachePath, "llvm.actioncache[" + HashName + "->" + ValueName + "]", KeySize * 8, @@ -79,13 +97,14 @@ OnDiskKeyValueDB::open(StringRef Path, StringRef HashName, unsigned KeySize, return std::move(E); return std::unique_ptr( - new OnDiskKeyValueDB(ValueSize, std::move(*ActionCache))); + new OnDiskKeyValueDB(ValueSize, std::move(*ActionCache), Cache)); } -Error OnDiskKeyValueDB::validate(CheckValueT CheckValue) const { +static Error validateOnDiskKeyValueDB(const OnDiskTrieRawHashMap &Cache, + size_t ValueSize, OnDiskGraphDB *CAS) { return Cache.validate( [&](FileOffset Offset, - OnDiskHashMappedTrie::ConstValueProxy Record) -> Error { + OnDiskTrieRawHashMap::ConstValueProxy Record) -> Error { auto formatError = [&](Twine Msg) { return createStringError( llvm::errc::illegal_byte_sequence, @@ -96,10 +115,28 @@ Error OnDiskKeyValueDB::validate(CheckValueT CheckValue) const { if (Record.Data.size() != ValueSize) return formatError("wrong cache value size"); - if (!isAligned(Align(8), Record.Data.size())) + if (!isAddrAligned(Align(8), Record.Data.data())) return formatError("wrong cache value alignment"); - if (CheckValue) - return CheckValue(Offset, Record.Data); + if (CAS) { + auto ID = + ondisk::UnifiedOnDiskCache::getObjectIDFromValue(Record.Data); + if (Error E = CAS->validateObjectID(ID)) + return formatError(llvm::toString(std::move(E))); + } return Error::success(); }); -} \ No newline at end of file +} + +Error OnDiskKeyValueDB::validate() const { + if (UnifiedCache && UnifiedCache->UpstreamKVDB) { + assert(UnifiedCache->UpstreamGraphDB && + "upstream cache and cas must be paired"); + if (auto E = validateOnDiskKeyValueDB(UnifiedCache->UpstreamKVDB->Cache, + UnifiedCache->UpstreamKVDB->ValueSize, + UnifiedCache->UpstreamGraphDB.get())) + return E; + } + return validateOnDiskKeyValueDB( + Cache, ValueSize, + UnifiedCache ? UnifiedCache->PrimaryGraphDB.get() : nullptr); +} diff --git a/llvm/lib/CAS/OnDiskHashMappedTrie.cpp b/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp similarity index 56% rename from llvm/lib/CAS/OnDiskHashMappedTrie.cpp rename to llvm/lib/CAS/OnDiskTrieRawHashMap.cpp index 5ca85e0b0e39b..edb19d7e7c737 100644 --- a/llvm/lib/CAS/OnDiskHashMappedTrie.cpp +++ b/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp @@ -1,295 +1,56 @@ -//===- OnDiskHashMappedTrie.cpp -------------------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +/// +/// \file Implements OnDiskTrieRawHashMap. +/// +//===----------------------------------------------------------------------===// -#include "llvm/CAS/OnDiskHashMappedTrie.h" -#include "HashMappedTrieIndexGenerator.h" -#include "llvm/ADT/ScopeExit.h" +#include "llvm/CAS/OnDiskTrieRawHashMap.h" +#include "DatabaseFile.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/CAS/MappedFileRegionBumpPtr.h" +#include "llvm/ADT/TrieHashIndexGenerator.h" +#include "llvm/CAS/MappedFileRegionArena.h" #include "llvm/CAS/OnDiskCASLogger.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Path.h" +#include "llvm/Config/llvm-config.h" #include "llvm/Support/ThreadPool.h" #include "llvm/Support/Threading.h" #include "llvm/Support/raw_ostream.h" -#include using namespace llvm; using namespace llvm::cas; -using ondisk::OnDiskCASLogger; +using namespace llvm::cas::ondisk; #if LLVM_ENABLE_ONDISK_CAS -static_assert(sizeof(size_t) == sizeof(uint64_t), "64-bit only"); -static_assert(sizeof(std::atomic) == sizeof(uint64_t), - "Requires lock-free 64-bit atomics"); - //===----------------------------------------------------------------------===// -// Generic database data structures. -//===----------------------------------------------------------------------===// -namespace { -using MappedFileRegion = MappedFileRegionBumpPtr::RegionT; - -/// Generic handle for a table. -/// -/// Probably we want some table kinds for pointing at multiple tables. -/// - Probably a tree or trie type makes sense. -/// - Or a deque. Linear search is okay as long as there aren't many tables in -/// a file. -/// -/// Generic table header layout: -/// - 2-bytes: TableKind -/// - 2-bytes: TableNameSize -/// - 4-bytes: TableNameRelOffset (relative to header) -class TableHandle { -public: - enum class TableKind : uint16_t { - HashMappedTrie = 1, - DataAllocator = 2, - }; - struct Header { - TableKind Kind; - uint16_t NameSize; - int32_t NameRelOffset; // Relative to Header. - }; - - explicit operator bool() const { return H; } - const Header &getHeader() const { return *H; } - MappedFileRegion &getRegion() const { return *Region; } - - template static void check() { - static_assert( - std::is_same::value, - "T::GenericHeader should be of type TableHandle::Header"); - static_assert(offsetof(typename T::Header, GenericHeader) == 0, - "T::GenericHeader must be the head of T::Header"); - } - template bool is() const { return T::Kind == H->Kind; } - template T dyn_cast() const { - check(); - if (is()) - return T(*Region, *reinterpret_cast(H)); - return T(); - } - template T cast() const { - assert(is()); - return dyn_cast(); - } - - StringRef getName() const { - auto *Begin = reinterpret_cast(H) + H->NameRelOffset; - return StringRef(Begin, H->NameSize); - } - - TableHandle() = default; - TableHandle(MappedFileRegion &Region, Header &H) : Region(&Region), H(&H) {} - TableHandle(MappedFileRegion &Region, intptr_t HeaderOffset) - : TableHandle(Region, - *reinterpret_cast
(Region.data() + HeaderOffset)) { - } - -private: - MappedFileRegion *Region = nullptr; - Header *H = nullptr; -}; - -/// Encapsulate a database file, which: -/// - Sets/checks magic. -/// - Sets/checks version. -/// - Points at an arbitrary root table (can be changed later using a lock-free -/// algorithm). -/// - Sets up a BumpPtr for allocation. -/// -/// Top-level layout: -/// - 8-bytes: Magic -/// - 8-bytes: Version -/// - 8-bytes: RootTable (16-bits: Kind; 48-bits: Offset) -/// - 8-bytes: BumpPtr -class DatabaseFile { -public: - static constexpr uint64_t getMagic() { return 0x00FFDA7ABA53FF00ULL; } - static constexpr uint64_t getVersion() { return 1ULL; } - struct Header { - uint64_t Magic; - uint64_t Version; - std::atomic RootTableOffset; - std::atomic BumpPtr; - }; - - const Header &getHeader() { return *H; } - MappedFileRegionBumpPtr &getAlloc() { return Alloc; } - MappedFileRegion &getRegion() { return Alloc.getRegion(); } - - /// Add a table. - /// - /// TODO: Allow lazy construction via getOrCreate()-style API. - void addTable(TableHandle Table); - - /// Find a table. May return null. - std::optional findTable(StringRef Name); - - static Expected - create(const Twine &Path, uint64_t Capacity, - std::shared_ptr Logger, - function_ref NewDBConstructor); - - size_t size() const { return Alloc.size(); } - -private: - static Expected - get(std::unique_ptr Alloc) { - if (Error E = validate(Alloc->getRegion())) - return std::move(E); - return DatabaseFile(std::move(Alloc)); - } - - static Error validate(MappedFileRegion &Region); - - DatabaseFile(MappedFileRegionBumpPtr &Alloc) - : H(reinterpret_cast
(Alloc.data())), Alloc(Alloc) {} - DatabaseFile(std::unique_ptr Alloc) - : DatabaseFile(*Alloc) { - OwnedAlloc = std::move(Alloc); - } - - Header *H = nullptr; - MappedFileRegionBumpPtr &Alloc; - std::unique_ptr OwnedAlloc; -}; - -} // end anonymous namespace - -static Error createTableConfigError(std::errc ErrC, StringRef Path, - StringRef TableName, const Twine &Msg) { - return createStringError(make_error_code(ErrC), - Path + "[" + TableName + "]: " + Msg); -} - -Expected -DatabaseFile::create(const Twine &Path, uint64_t Capacity, - std::shared_ptr Logger, - function_ref NewDBConstructor) { - // Constructor for if the file doesn't exist. - auto NewFileConstructor = [&](MappedFileRegionBumpPtr &Alloc) -> Error { - if (Alloc.capacity() < sizeof(Header)) - return createTableConfigError(std::errc::argument_out_of_domain, - Path.str(), "datafile", - "Allocator too small for header"); - (void)new (Alloc.data()) Header{getMagic(), getVersion(), {0}, {0}}; - Alloc.initializeBumpPtr(offsetof(Header, BumpPtr)); - DatabaseFile DB(Alloc); - return NewDBConstructor(DB); - }; - - // Get or create the file. - MappedFileRegionBumpPtr Alloc; - if (Error E = MappedFileRegionBumpPtr::create( - Path, Capacity, offsetof(Header, BumpPtr), - std::move(Logger), NewFileConstructor) - .moveInto(Alloc)) - return std::move(E); - - return DatabaseFile::get( - std::make_unique(std::move(Alloc))); -} - -void DatabaseFile::addTable(TableHandle Table) { - assert(Table); - assert(&Table.getRegion() == &getRegion()); - int64_t ExistingRootOffset = 0; - const int64_t NewOffset = - reinterpret_cast(&Table.getHeader()) - getRegion().data(); - if (H->RootTableOffset.compare_exchange_strong(ExistingRootOffset, NewOffset)) - return; - - // Silently ignore attempts to set the root to itself. - if (ExistingRootOffset == NewOffset) - return; - - // FIXME: Fix the API so that having the same name is not an error. Instead, - // the colliding table should just be used as-is and the client can decide - // what to do with the new one. - // - // TODO: Add support for creating a chain or tree of tables (more than one at - // all!) to avoid this error. - TableHandle Root(getRegion(), ExistingRootOffset); - if (Root.getName() == Table.getName()) - report_fatal_error( - createStringError(make_error_code(std::errc::not_supported), - "table name collision '" + Table.getName() + "'")); - else - report_fatal_error( - createStringError(make_error_code(std::errc::not_supported), - "cannot add new table '" + Table.getName() + - "'" - " to existing root '" + - Root.getName() + "'")); -} - -std::optional DatabaseFile::findTable(StringRef Name) { - int64_t RootTableOffset = H->RootTableOffset.load(); - if (!RootTableOffset) - return std::nullopt; - - TableHandle Root(getRegion(), RootTableOffset); - if (Root.getName() == Name) - return Root; - - // TODO: Once multiple tables are supported, need to walk to find them. - return std::nullopt; -} - -Error DatabaseFile::validate(MappedFileRegion &Region) { - if (Region.size() < sizeof(Header)) - return createStringError(std::errc::invalid_argument, - "database: missing header"); - - // Check the magic and version. - auto *H = reinterpret_cast
(Region.data()); - if (H->Magic != getMagic()) - return createStringError(std::errc::invalid_argument, - "database: bad magic"); - if (H->Version != getVersion()) - return createStringError(std::errc::invalid_argument, - "database: wrong version"); - - // Check the bump-ptr, which should point past the header. - if (H->BumpPtr.load() < (int64_t)sizeof(Header)) - return createStringError(std::errc::invalid_argument, - "database: corrupt bump-ptr"); - - return Error::success(); -} - -//===----------------------------------------------------------------------===// -// HashMappedTrie data structures. +// TrieRawHashMap data structures. //===----------------------------------------------------------------------===// namespace { class SubtrieHandle; +class TrieRawHashMapHandle; class TrieVisitor; + +/// A value stored in the slots inside a SubTrie. A stored value can either be a +/// subtrie (encoded after negation) which is the file offset to another +/// subtrie, or it can be a fileset to a DataRecord. class SubtrieSlotValue { public: explicit operator bool() const { return !isEmpty(); } bool isEmpty() const { return !Offset; } bool isData() const { return Offset > 0; } bool isSubtrie() const { return Offset < 0; } - int64_t asData() const { + uint64_t asData() const { assert(isData()); return Offset; } - int64_t asSubtrie() const { + uint64_t asSubtrie() const { assert(isSubtrie()); return -Offset; } @@ -328,8 +89,6 @@ class SubtrieSlotValue { int64_t Offset = 0; }; -class HashMappedTrieHandle; - /// Subtrie layout: /// - 2-bytes: StartBit /// - 1-bytes: NumBits=lg(num-slots) @@ -356,7 +115,7 @@ class SubtrieHandle { using SlotT = std::atomic; static int64_t getSlotsSize(uint32_t NumBits) { - return sizeof(int64_t) * (1u << NumBits); + return sizeof(int64_t) * (1ull << NumBits); } static int64_t getSize(uint32_t NumBits) { @@ -381,9 +140,9 @@ class SubtrieHandle { SubtrieSlotValue SaveExpected(Expected); bool Result = Slots[I].compare_exchange_strong(Expected.Offset, New.Offset); if (Logger) - Logger->log_compare_exchange_strong(Region->data(), getOffset().Offset, I, - SaveExpected.Offset, New.Offset, - Expected.Offset); + Logger->logSubtrieHandleCmpXchg(Region->data(), getOffset().Offset, I, + SaveExpected.Offset, New.Offset, + Expected.Offset); return Result; } @@ -398,7 +157,7 @@ class SubtrieHandle { /// /// Returns the subtrie that now lives at \p I. Expected sink(size_t I, SubtrieSlotValue V, - MappedFileRegionBumpPtr &Alloc, + MappedFileRegionArena &Alloc, size_t NumSubtrieBits, SubtrieHandle &UnusedSubtrie, size_t NewI); @@ -418,7 +177,7 @@ class SubtrieHandle { uint32_t getStartBit() const { return H->StartBit; } uint32_t getNumBits() const { return H->NumBits; } - static Expected create(MappedFileRegionBumpPtr &Alloc, + static Expected create(MappedFileRegionArena &Alloc, uint32_t StartBit, uint32_t NumBits, OnDiskCASLogger *Logger); @@ -446,15 +205,16 @@ class SubtrieHandle { OnDiskCASLogger *Logger = nullptr; static MutableArrayRef getSlots(Header &H) { - return MutableArrayRef(reinterpret_cast(&H + 1), 1u << H.NumBits); + return MutableArrayRef(reinterpret_cast(&H + 1), + 1ull << H.NumBits); } }; -/// Handle for a HashMappedTrie table. +/// Handle for a TrieRawHashMap table. /// -/// HashMappedTrie table layout: +/// TrieRawHashMap table layout: /// - [8-bytes: Generic table header] -/// - 1-byte: NumSubtrieBits +/// - 1-byte: NumSubtrieBits /// - 1-byte: Flags (not used yet) /// - 2-bytes: NumHashBits /// - 4-bytes: RecordDataSize (in bytes) @@ -463,17 +223,17 @@ class SubtrieHandle { /// - '\0' /// /// Record layout: -/// - /// - -class HashMappedTrieHandle { +/// - +class TrieRawHashMapHandle { public: static constexpr TableHandle::TableKind Kind = - TableHandle::TableKind::HashMappedTrie; + TableHandle::TableKind::TrieRawHashMap; struct Header { TableHandle::Header GenericHeader; uint8_t NumSubtrieBits; - uint8_t Flags; // None used yet. + uint8_t Flags; ///< None used yet. uint16_t NumHashBits; uint32_t RecordDataSize; std::atomic RootTrieOffset; @@ -487,7 +247,7 @@ class HashMappedTrieHandle { } struct RecordData { - OnDiskHashMappedTrie::ValueProxy Proxy; + OnDiskTrieRawHashMap::ValueProxy Proxy; SubtrieSlotValue Offset; FileOffset getFileOffset() const { return Offset.asDataFileOffset(); } }; @@ -516,33 +276,35 @@ class HashMappedTrieHandle { } RecordData getRecord(SubtrieSlotValue Offset); - Expected createRecord(MappedFileRegionBumpPtr &Alloc, + Expected createRecord(MappedFileRegionArena &Alloc, ArrayRef Hash); explicit operator bool() const { return H; } const Header &getHeader() const { return *H; } SubtrieHandle getRoot() const; - Expected getOrCreateRoot(MappedFileRegionBumpPtr &Alloc); + int64_t getRootTrieOffset() const { return H->RootTrieOffset; } + Expected getOrCreateRoot(MappedFileRegionArena &Alloc); MappedFileRegion &getRegion() const { return *Region; } size_t getFlags() const { return H->Flags; } - uint64_t getNumSubtrieBits() const { return H->NumSubtrieBits; } - uint64_t getNumHashBits() const { return H->NumHashBits; } + size_t getNumSubtrieBits() const { return H->NumSubtrieBits; } + size_t getNumHashBits() const { return H->NumHashBits; } size_t getNumHashBytes() const { return getNumHashBytes(H->NumHashBits); } size_t getRecordDataSize() const { return H->RecordDataSize; } size_t getRecordSize() const { return getRecordSize(H->RecordDataSize, H->NumHashBits); } - IndexGenerator getIndexGen(SubtrieHandle Root, ArrayRef Hash) { + TrieHashIndexGenerator getIndexGen(SubtrieHandle Root, + ArrayRef Hash) { assert(Root.getStartBit() == 0); assert(getNumHashBytes() == Hash.size()); assert(getNumHashBits() == Hash.size() * 8); - return IndexGenerator{Root.getNumBits(), getNumSubtrieBits(), Hash}; + return TrieHashIndexGenerator{Root.getNumBits(), getNumSubtrieBits(), Hash}; } - static Expected - create(MappedFileRegionBumpPtr &Alloc, StringRef Name, + static Expected + create(MappedFileRegionArena &Alloc, StringRef Name, std::optional NumRootBits, uint64_t NumSubtrieBits, uint64_t NumHashBits, uint64_t RecordDataSize, std::shared_ptr Logger); @@ -552,16 +314,15 @@ class HashMappedTrieHandle { function_ref)> PrintRecordData = nullptr) const; Error validate( - function_ref + function_ref RecordVerifier) const; - - HashMappedTrieHandle() = default; - HashMappedTrieHandle(MappedFileRegion &Region, Header &H, + TrieRawHashMapHandle() = default; + TrieRawHashMapHandle(MappedFileRegion &Region, Header &H, std::shared_ptr Logger = nullptr) : Region(&Region), H(&H), Logger(std::move(Logger)) {} - HashMappedTrieHandle(MappedFileRegion &Region, intptr_t HeaderOffset, + TrieRawHashMapHandle(MappedFileRegion &Region, intptr_t HeaderOffset, std::shared_ptr Logger = nullptr) - : HashMappedTrieHandle( + : TrieRawHashMapHandle( Region, *reinterpret_cast
(Region.data() + HeaderOffset), std::move(Logger)) {} @@ -578,18 +339,18 @@ class HashMappedTrieHandle { } // end anonymous namespace -struct OnDiskHashMappedTrie::ImplType { +struct OnDiskTrieRawHashMap::ImplType { DatabaseFile File; - HashMappedTrieHandle Trie; + TrieRawHashMapHandle Trie; }; -Expected SubtrieHandle::create(MappedFileRegionBumpPtr &Alloc, +Expected SubtrieHandle::create(MappedFileRegionArena &Alloc, uint32_t StartBit, uint32_t NumBits, OnDiskCASLogger *Logger) { - assert(StartBit <= HashMappedTrieHandle::MaxNumHashBits); + assert(StartBit <= TrieRawHashMapHandle::MaxNumHashBits); assert(NumBits <= UINT8_MAX); - assert(NumBits <= HashMappedTrieHandle::MaxNumRootBits); + assert(NumBits <= TrieRawHashMapHandle::MaxNumRootBits); auto Mem = Alloc.allocate(getSize(NumBits)); if (LLVM_UNLIKELY(!Mem)) @@ -602,12 +363,12 @@ Expected SubtrieHandle::create(MappedFileRegionBumpPtr &Alloc, new (I) SlotT(0); if (Logger) - Logger->log_SubtrieHandle_create(Alloc.data(), S.getOffset().Offset, - StartBit, NumBits); + Logger->logSubtrieHandleCreate(Alloc.data(), S.getOffset().Offset, StartBit, + NumBits); return S; } -SubtrieHandle HashMappedTrieHandle::getRoot() const { +SubtrieHandle TrieRawHashMapHandle::getRoot() const { if (int64_t Root = H->RootTrieOffset) return SubtrieHandle(getRegion(), SubtrieSlotValue::getSubtrieOffset(Root), Logger.get()); @@ -615,7 +376,7 @@ SubtrieHandle HashMappedTrieHandle::getRoot() const { } Expected -HashMappedTrieHandle::getOrCreateRoot(MappedFileRegionBumpPtr &Alloc) { +TrieRawHashMapHandle::getOrCreateRoot(MappedFileRegionArena &Alloc) { assert(&Alloc.getRegion() == &getRegion()); if (SubtrieHandle Root = getRoot()) return Root; @@ -627,8 +388,7 @@ HashMappedTrieHandle::getOrCreateRoot(MappedFileRegionBumpPtr &Alloc) { return LazyRoot.takeError(); if (H->RootTrieOffset.compare_exchange_strong( - Race, LazyRoot->getOffset().asSubtrie()), - Logger.get()) + Race, LazyRoot->getOffset().asSubtrie())) return *LazyRoot; // There was a race. Return the other root. @@ -638,8 +398,8 @@ HashMappedTrieHandle::getOrCreateRoot(MappedFileRegionBumpPtr &Alloc) { Logger.get()); } -Expected -HashMappedTrieHandle::create(MappedFileRegionBumpPtr &Alloc, StringRef Name, +Expected +TrieRawHashMapHandle::create(MappedFileRegionArena &Alloc, StringRef Name, std::optional NumRootBits, uint64_t NumSubtrieBits, uint64_t NumHashBits, uint64_t RecordDataSize, @@ -655,7 +415,7 @@ HashMappedTrieHandle::create(MappedFileRegionBumpPtr &Alloc, StringRef Name, assert(NumHashBits <= UINT16_MAX && "Expected valid hash size"); assert(RecordDataSize <= UINT32_MAX && "Expected smaller table name"); auto *H = new (Alloc.getRegion().data() + *Offset) - Header{{TableHandle::TableKind::HashMappedTrie, (uint16_t)Name.size(), + Header{{TableHandle::TableKind::TrieRawHashMap, (uint16_t)Name.size(), (uint32_t)sizeof(Header)}, (uint8_t)NumSubtrieBits, /*Flags=*/0, @@ -668,7 +428,7 @@ HashMappedTrieHandle::create(MappedFileRegionBumpPtr &Alloc, StringRef Name, NameStorage[Name.size()] = 0; // Construct a root trie, if requested. - HashMappedTrieHandle Trie(Alloc.getRegion(), *H, Logger); + TrieRawHashMapHandle Trie(Alloc.getRegion(), *H, Logger); auto Sub = SubtrieHandle::create(Alloc, 0, *NumRootBits, Logger.get()); if (LLVM_UNLIKELY(!Sub)) return Sub.takeError(); @@ -677,18 +437,18 @@ HashMappedTrieHandle::create(MappedFileRegionBumpPtr &Alloc, StringRef Name, return Trie; } -HashMappedTrieHandle::RecordData -HashMappedTrieHandle::getRecord(SubtrieSlotValue Offset) { +TrieRawHashMapHandle::RecordData +TrieRawHashMapHandle::getRecord(SubtrieSlotValue Offset) { char *Begin = Region->data() + Offset.asData(); - OnDiskHashMappedTrie::ValueProxy Proxy; + OnDiskTrieRawHashMap::ValueProxy Proxy; Proxy.Data = MutableArrayRef(Begin, getRecordDataSize()); Proxy.Hash = ArrayRef(reinterpret_cast(Proxy.Data.end()), - getNumHashBytes()); + getNumHashBytes()); return RecordData{Proxy, Offset}; } -Expected -HashMappedTrieHandle::createRecord(MappedFileRegionBumpPtr &Alloc, +Expected +TrieRawHashMapHandle::createRecord(MappedFileRegionArena &Alloc, ArrayRef Hash) { assert(&Alloc.getRegion() == Region); assert(Hash.size() == getNumHashBytes()); @@ -700,75 +460,58 @@ HashMappedTrieHandle::createRecord(MappedFileRegionBumpPtr &Alloc, llvm::copy(Hash, const_cast(Record.Proxy.Hash.begin())); if (Logger) - Logger->log_HashMappedTrieHandle_createRecord( + Logger->logHashMappedTrieHandleCreateRecord( Alloc.data(), Record.Offset.getRawOffset(), Hash); return Record; } -OnDiskHashMappedTrie::const_pointer -OnDiskHashMappedTrie::recoverFromHashPointer( - const uint8_t *HashBeginPtr) const { - // Record hashes occur immediately after data. Compute the beginning of the - // record and check for overflow. - const uintptr_t HashBegin = reinterpret_cast(HashBeginPtr); - const uintptr_t RecordBegin = HashBegin - Impl->Trie.getRecordSize(); - if (HashBegin < RecordBegin) - return const_pointer(); - - // Check that it'll be a positive offset. - const uintptr_t FileBegin = - reinterpret_cast(Impl->File.getRegion().data()); - if (RecordBegin < FileBegin) - return const_pointer(); - - // Good enough to form an offset. Continue checking there. - return recoverFromFileOffset(FileOffset(RecordBegin - FileBegin)); -} - -OnDiskHashMappedTrie::const_pointer -OnDiskHashMappedTrie::recoverFromFileOffset(FileOffset Offset) const { +Expected +OnDiskTrieRawHashMap::recoverFromFileOffset(FileOffset Offset) const { // Check alignment. - if (!isAligned(MappedFileRegionBumpPtr::getAlign(), Offset.get())) - return const_pointer(); + if (!isAligned(MappedFileRegionArena::getAlign(), Offset.get())) + return createStringError(make_error_code(std::errc::protocol_error), + "unaligned file offset at 0x" + + utohexstr(Offset.get(), /*LowerCase=*/true)); // Check bounds. // // Note: There's no potential overflow when using \c uint64_t because Offset - // is in \c [0,INT64_MAX] and the record size is in \c [0,UINT32_MAX]. - assert(Offset.get() >= 0 && "Expected FileOffset constructor guarantee this"); - if ((uint64_t)Offset.get() + Impl->Trie.getRecordSize() > - Impl->File.getAlloc().size()) - return const_pointer(); + // is in valid offset range and the record size is in \c [0,UINT32_MAX]. + if (!validOffset(Offset) || + Offset.get() + Impl->Trie.getRecordSize() > Impl->File.getAlloc().size()) + return createStringError(make_error_code(std::errc::protocol_error), + "file offset too large: 0x" + + utohexstr(Offset.get(), /*LowerCase=*/true)); // Looks okay... - HashMappedTrieHandle::RecordData D = + TrieRawHashMapHandle::RecordData D = Impl->Trie.getRecord(SubtrieSlotValue::getDataOffset(Offset)); - return const_pointer(D.getFileOffset(), D.Proxy); + return ConstOnDiskPtr(D.Proxy, D.getFileOffset()); } -OnDiskHashMappedTrie::const_pointer -OnDiskHashMappedTrie::find(ArrayRef Hash) const { - HashMappedTrieHandle Trie = Impl->Trie; +OnDiskTrieRawHashMap::ConstOnDiskPtr +OnDiskTrieRawHashMap::find(ArrayRef Hash) const { + TrieRawHashMapHandle Trie = Impl->Trie; assert(Hash.size() == Trie.getNumHashBytes() && "Invalid hash"); SubtrieHandle S = Trie.getRoot(); if (!S) - return const_pointer(); + return ConstOnDiskPtr(); - IndexGenerator IndexGen = Trie.getIndexGen(S, Hash); + TrieHashIndexGenerator IndexGen = Trie.getIndexGen(S, Hash); size_t Index = IndexGen.next(); for (;;) { // Try to set the content. SubtrieSlotValue V = S.load(Index); if (!V) - return const_pointer(); + return ConstOnDiskPtr(); // Check for an exact match. if (V.isData()) { - HashMappedTrieHandle::RecordData D = Trie.getRecord(V); - return D.Proxy.Hash == Hash ? const_pointer(D.getFileOffset(), D.Proxy) - : const_pointer(); + TrieRawHashMapHandle::RecordData D = Trie.getRecord(V); + return D.Proxy.Hash == Hash ? ConstOnDiskPtr(D.Proxy, D.getFileOffset()) + : ConstOnDiskPtr(); } Index = IndexGen.next(); @@ -786,35 +529,24 @@ void SubtrieHandle::reinitialize(uint32_t StartBit, uint32_t NumBits) { H->NumBits = NumBits; } -Expected -OnDiskHashMappedTrie::insertLazy(ArrayRef Hash, +Expected +OnDiskTrieRawHashMap::insertLazy(ArrayRef Hash, LazyInsertOnConstructCB OnConstruct, LazyInsertOnLeakCB OnLeak) { - HashMappedTrieHandle Trie = Impl->Trie; + TrieRawHashMapHandle Trie = Impl->Trie; assert(Hash.size() == Trie.getNumHashBytes() && "Invalid hash"); - MappedFileRegionBumpPtr &Alloc = Impl->File.getAlloc(); + MappedFileRegionArena &Alloc = Impl->File.getAlloc(); std::optional S; auto Err = Trie.getOrCreateRoot(Alloc).moveInto(S); if (LLVM_UNLIKELY(Err)) return std::move(Err); - IndexGenerator IndexGen = Trie.getIndexGen(*S, Hash); + TrieHashIndexGenerator IndexGen = Trie.getIndexGen(*S, Hash); size_t Index = IndexGen.next(); - // FIXME: Add non-assertion based checks for data corruption that would - // otherwise cause infinite loops in release builds, instead calling - // report_fatal_error(). - // - // Two loops are possible: - // - All bits used up in the IndexGenerator because subtries are somehow - // linked in a cycle. Could confirm that each subtrie's start-bit - // follows from the start-bit and num-bits of its parent. Could also check - // that the generator doesn't run out of bits. - // - Existing data matches tail of Hash but not the head (stored in an - // invalid spot). Probably a cheap way to check this too, but needs - // thought. - std::optional NewRecord; + // Walk through the hash bytes and insert into correct trie position. + std::optional NewRecord; SubtrieHandle UnusedSubtrie; for (;;) { SubtrieSlotValue Existing = S->load(Index); @@ -830,7 +562,8 @@ OnDiskHashMappedTrie::insertLazy(ArrayRef Hash, } if (S->compare_exchange_strong(Index, Existing, NewRecord->Offset)) - return pointer(NewRecord->Offset.asDataFileOffset(), NewRecord->Proxy); + return OnDiskPtr(NewRecord->Proxy, + NewRecord->Offset.asDataFileOffset()); // Race means that Existing is no longer empty; fall through... } @@ -842,13 +575,13 @@ OnDiskHashMappedTrie::insertLazy(ArrayRef Hash, } // Check for an exact match. - HashMappedTrieHandle::RecordData ExistingRecord = Trie.getRecord(Existing); + TrieRawHashMapHandle::RecordData ExistingRecord = Trie.getRecord(Existing); if (ExistingRecord.Proxy.Hash == Hash) { if (NewRecord && OnLeak) OnLeak(NewRecord->Offset.asDataFileOffset(), NewRecord->Proxy, ExistingRecord.Offset.asDataFileOffset(), ExistingRecord.Proxy); - return pointer(ExistingRecord.Offset.asDataFileOffset(), - ExistingRecord.Proxy); + return OnDiskPtr(ExistingRecord.Proxy, + ExistingRecord.Offset.asDataFileOffset()); } // Sink the existing content as long as the indexes match. @@ -872,7 +605,7 @@ OnDiskHashMappedTrie::insertLazy(ArrayRef Hash, } Expected SubtrieHandle::sink(size_t I, SubtrieSlotValue V, - MappedFileRegionBumpPtr &Alloc, + MappedFileRegionArena &Alloc, size_t NumSubtrieBits, SubtrieHandle &UnusedSubtrie, size_t NewI) { @@ -906,23 +639,23 @@ Expected SubtrieHandle::sink(size_t I, SubtrieSlotValue V, return SubtrieHandle(Alloc.getRegion(), V, Logger); } -void OnDiskHashMappedTrie::print( +void OnDiskTrieRawHashMap::print( raw_ostream &OS, function_ref)> PrintRecordData) const { Impl->Trie.print(OS, PrintRecordData); } -Error OnDiskHashMappedTrie::validate( +Error OnDiskTrieRawHashMap::validate( function_ref RecordVerifier) const { - return Impl->Trie.validate(RecordVerifier); -} + uint64_t BumpPtr = Impl->File.getAlloc().size(); + if (!isAligned(MappedFileRegionArena::getAlign(), BumpPtr)) + return createStringError(make_error_code(std::errc::protocol_error), + "arena bump pointer is not aligned: 0x" + + utohexstr(BumpPtr, /*LowerCase=*/true)); -static void printHexDigit(raw_ostream &OS, uint8_t Digit) { - if (Digit < 10) - OS << char(Digit + '0'); - else - OS << char(Digit - 10 + 'a'); + return Impl->Trie.validate(RecordVerifier); } +// Helper function that prints hexdigit and have a sub-byte starting position. static void printHexDigits(raw_ostream &OS, ArrayRef Bytes, size_t StartBit, size_t NumBits) { assert(StartBit % 4 == 0); @@ -930,7 +663,7 @@ static void printHexDigits(raw_ostream &OS, ArrayRef Bytes, for (size_t I = StartBit, E = StartBit + NumBits; I != E; I += 4) { uint8_t HexPair = Bytes[I / 8]; uint8_t HexDigit = I % 8 == 0 ? HexPair >> 4 : HexPair & 0xf; - printHexDigit(OS, HexDigit); + OS << hexdigit(HexDigit, /*LowerCase=*/true); } } @@ -979,14 +712,14 @@ static void printPrefix(raw_ostream &OS, StringRef Prefix) { bool ErrorParsingBinary = Prefix.take_front(4).getAsInteger(2, Digit); assert(!ErrorParsingBinary); (void)ErrorParsingBinary; - printHexDigit(OS, Digit); + OS << hexdigit(Digit, /*LowerCase=*/true); Prefix = Prefix.drop_front(4); } if (!Prefix.empty()) OS << "[" << Prefix << "]"; } -LLVM_DUMP_METHOD void OnDiskHashMappedTrie::dump() const { print(dbgs()); } +LLVM_DUMP_METHOD void OnDiskTrieRawHashMap::dump() const { print(dbgs()); } static Expected checkParameter(StringRef Label, size_t Max, std::optional Value, @@ -1004,23 +737,13 @@ static Expected checkParameter(StringRef Label, size_t Max, "invalid " + Label + ": " + Twine(*Value) + " (max: " + Twine(Max) + ")"); } -static Error checkTable(StringRef Label, size_t Expected, size_t Observed, - StringRef Path, StringRef TrieName) { - if (Expected == Observed) - return Error::success(); - return createTableConfigError(std::errc::invalid_argument, Path, TrieName, - "mismatched " + Label + - " (expected: " + Twine(Expected) + - ", observed: " + Twine(Observed) + ")"); -} - -size_t OnDiskHashMappedTrie::size() const { return Impl->File.size(); } -size_t OnDiskHashMappedTrie::capacity() const { +size_t OnDiskTrieRawHashMap::size() const { return Impl->File.size(); } +size_t OnDiskTrieRawHashMap::capacity() const { return Impl->File.getRegion().size(); } -Expected -OnDiskHashMappedTrie::create(const Twine &PathTwine, const Twine &TrieNameTwine, +Expected +OnDiskTrieRawHashMap::create(const Twine &PathTwine, const Twine &TrieNameTwine, size_t NumHashBits, uint64_t DataSize, uint64_t MaxFileSize, std::optional NewFileInitialSize, @@ -1032,19 +755,23 @@ OnDiskHashMappedTrie::create(const Twine &PathTwine, const Twine &TrieNameTwine, SmallString<128> TrieNameStorage; StringRef TrieName = TrieNameTwine.toStringRef(TrieNameStorage); + if (MaxFileSize == 0) + return createTableConfigError(std::errc::invalid_argument, Path, TrieName, + "invalid size"); + constexpr size_t DefaultNumRootBits = 10; constexpr size_t DefaultNumSubtrieBits = 6; size_t NumRootBits; if (Error E = checkParameter( - "root bits", HashMappedTrieHandle::MaxNumRootBits, + "root bits", TrieRawHashMapHandle::MaxNumRootBits, NewTableNumRootBits, DefaultNumRootBits, Path, TrieName) .moveInto(NumRootBits)) return std::move(E); size_t NumSubtrieBits; if (Error E = checkParameter("subtrie bits", - HashMappedTrieHandle::MaxNumSubtrieBits, + TrieRawHashMapHandle::MaxNumSubtrieBits, NewTableNumSubtrieBits, DefaultNumSubtrieBits, Path, TrieName) .moveInto(NumSubtrieBits)) @@ -1052,7 +779,7 @@ OnDiskHashMappedTrie::create(const Twine &PathTwine, const Twine &TrieNameTwine, size_t NumHashBytes = NumHashBits >> 3; if (Error E = - checkParameter("hash size", HashMappedTrieHandle::MaxNumHashBits, + checkParameter("hash size", TrieRawHashMapHandle::MaxNumHashBits, NumHashBits, std::nullopt, Path, TrieName) .takeError()) return std::move(E); @@ -1065,14 +792,13 @@ OnDiskHashMappedTrie::create(const Twine &PathTwine, const Twine &TrieNameTwine, // Constructor for if the file doesn't exist. auto NewDBConstructor = [&](DatabaseFile &DB) -> Error { - auto Trie = HashMappedTrieHandle::create(DB.getAlloc(), TrieName, + auto Trie = TrieRawHashMapHandle::create(DB.getAlloc(), TrieName, NumRootBits, NumSubtrieBits, NumHashBits, DataSize, Logger); if (LLVM_UNLIKELY(!Trie)) return Trie.takeError(); - DB.addTable(*Trie); - return Error::success(); + return DB.addTable(*Trie); }; // Get or create the file. @@ -1082,16 +808,14 @@ OnDiskHashMappedTrie::create(const Twine &PathTwine, const Twine &TrieNameTwine, return File.takeError(); // Find the trie and validate it. - // - // TODO: Add support for creating/adding a table to an existing file. std::optional Table = File->findTable(TrieName); if (!Table) return createTableConfigError(std::errc::argument_out_of_domain, Path, TrieName, "table not found"); - if (Error E = checkTable("table kind", (size_t)HashMappedTrieHandle::Kind, + if (Error E = checkTable("table kind", (size_t)TrieRawHashMapHandle::Kind, (size_t)Table->getHeader().Kind, Path, TrieName)) return std::move(E); - auto Trie = Table->cast(); + auto Trie = Table->cast(); Trie.setLogger(Logger); assert(Trie && "Already checked the kind"); @@ -1110,8 +834,8 @@ OnDiskHashMappedTrie::create(const Twine &PathTwine, const Twine &TrieNameTwine, "unsupported flags: " + Twine(Flags)); // Success. - OnDiskHashMappedTrie::ImplType Impl{DatabaseFile(std::move(*File)), Trie}; - return OnDiskHashMappedTrie(std::make_unique(std::move(Impl))); + OnDiskTrieRawHashMap::ImplType Impl{DatabaseFile(std::move(*File)), Trie}; + return OnDiskTrieRawHashMap(std::make_unique(std::move(Impl))); } static Error createInvalidTrieError(uint64_t Offset, const Twine &Msg) { @@ -1126,10 +850,15 @@ static Error createInvalidTrieError(uint64_t Offset, const Twine &Msg) { //===----------------------------------------------------------------------===// namespace { -// A vistior to traverse the Trie. +/// A multi-threaded vistior to traverse the Trie. +/// +/// TODO: add more sanity checks that isn't just plain data corruption. For +/// example, some ill-formed data can be constructed to form a cycle using +/// Sub-Tries and it can lead to inifinite loop when visiting (or inserting +/// data). class TrieVisitor { public: - TrieVisitor(HashMappedTrieHandle Trie, unsigned ThreadCount = 0, + TrieVisitor(TrieRawHashMapHandle Trie, unsigned ThreadCount = 0, unsigned ErrorLimit = 50) : Trie(Trie), ErrorLimit(ErrorLimit), Threads(hardware_concurrency(ThreadCount)) {} @@ -1137,23 +866,28 @@ class TrieVisitor { Error visit(); private: + // Virtual method to implement the action when visiting a sub-trie. virtual Error visitSubTrie(StringRef Prefix, SubtrieHandle SubTrie) { return Error::success(); } + // Virtual method to implement the action when visiting a slot in a trie node. virtual Error visitSlot(unsigned I, SubtrieHandle Subtrie, StringRef Prefix, SubtrieSlotValue Slot) { return Error::success(); } protected: - HashMappedTrieHandle Trie; + TrieRawHashMapHandle Trie; private: Error traverseTrieNode(SubtrieHandle Node, StringRef Prefix); Error validateSubTrie(SubtrieHandle Node, bool IsRoot); + Error validateSubtrieHeader(uint64_t Offset, bool IsRoot); + + // Helper function to capture errors when visiting the trie nodes. void addError(Error NewError) { assert(NewError && "not an error"); std::lock_guard ErrorLock(Lock); @@ -1182,9 +916,10 @@ class TrieVisitor { DefaultThreadPool Threads; }; +/// A visitor that traverse and print the Trie. class TriePrinter : public TrieVisitor { public: - TriePrinter(HashMappedTrieHandle Trie, raw_ostream &OS, + TriePrinter(TrieRawHashMapHandle Trie, raw_ostream &OS, function_ref)> PrintRecordData) : TrieVisitor(Trie, /*ThreadCount=*/1), OS(OS), PrintRecordData(PrintRecordData) {} @@ -1196,7 +931,7 @@ class TriePrinter : public TrieVisitor { OS << "records\n"; llvm::sort(Records); for (int64_t Offset : Records) { - HashMappedTrieHandle::RecordData Record = + TrieRawHashMapHandle::RecordData Record = Trie.getRecord(SubtrieSlotValue::getDataOffset(Offset)); if (auto Err = printRecord(Record)) return Err; @@ -1204,8 +939,8 @@ class TriePrinter : public TrieVisitor { return Error::success(); } - Error printRecord(HashMappedTrieHandle::RecordData &Record) { - OS << "- addr=" << (void*)Record.getFileOffset().get() << " "; + Error printRecord(TrieRawHashMapHandle::RecordData &Record) { + OS << "- addr=" << (void *)Record.getFileOffset().get() << " "; if (PrintRecordData) { PrintRecordData(Record.Proxy.Data); } else { @@ -1248,7 +983,7 @@ class TriePrinter : public TrieVisitor { OS << "\n"; return Error::success(); } - HashMappedTrieHandle::RecordData Record = Trie.getRecord(Slot); + TrieRawHashMapHandle::RecordData Record = Trie.getRecord(Slot); OS << "addr=" << (void *)Record.getFileOffset().get(); OS << " content="; Subtrie.printHash(OS, Record.Proxy.Hash); @@ -1263,12 +998,12 @@ class TriePrinter : public TrieVisitor { SmallVector Records; }; -// TrieVerifier that adds additional verification on top of the basic visitor. +/// TrieVerifier that adds additional verification on top of the basic visitor. class TrieVerifier : public TrieVisitor { public: TrieVerifier( - HashMappedTrieHandle Trie, - function_ref + TrieRawHashMapHandle Trie, + function_ref RecordVerifier) : TrieVisitor(Trie), RecordVerifier(RecordVerifier) {} @@ -1280,24 +1015,35 @@ class TrieVerifier : public TrieVisitor { Error visitSlot(unsigned I, SubtrieHandle Subtrie, StringRef Prefix, SubtrieSlotValue Slot) final { if (RecordVerifier && Slot.isData()) { - if (!isAligned(MappedFileRegionBumpPtr::getAlign(), Slot.asData())) + if (!isAligned(MappedFileRegionArena::getAlign(), Slot.asData())) return createInvalidTrieError(Slot.asData(), "mis-aligned data entry"); - HashMappedTrieHandle::RecordData Record = + uint64_t DataOffset = Slot.asData(); + uint64_t RecordEnd = DataOffset + Trie.getRecordSize(); + if (RecordEnd > (uint64_t)Trie.getRegion().size()) + return createInvalidTrieError(DataOffset, + "data entry extends past end of file"); + + TrieRawHashMapHandle::RecordData Record = Trie.getRecord(SubtrieSlotValue::getDataOffset(Slot.asData())); return RecordVerifier(Slot.asDataFileOffset(), - OnDiskHashMappedTrie::ConstValueProxy{ + OnDiskTrieRawHashMap::ConstValueProxy{ Record.Proxy.Hash, Record.Proxy.Data}); } return Error::success(); } - function_ref + function_ref RecordVerifier; }; } // namespace Error TrieVisitor::visit() { + if (int64_t RootOffset = Trie.getRootTrieOffset()) { + if (auto Err = validateSubtrieHeader(RootOffset, /*IsRoot=*/true)) + return Err; + } + auto Root = Trie.getRoot(); if (!Root) return Error::success(); @@ -1321,6 +1067,8 @@ Error TrieVisitor::visit() { std::string SubtriePrefix; appendIndexBits(SubtriePrefix, I, NumSlots); if (Slot.isSubtrie()) { + if (auto Err = validateSubtrieHeader(Slot.asSubtrie(), /*IsRoot=*/false)) + return Err; SubtrieHandle S(Trie.getRegion(), Slot, Trie.getLogger()); Subs.push_back(S); Prefixes.push_back(SubtriePrefix); @@ -1354,9 +1102,11 @@ Error TrieVisitor::validateSubTrie(SubtrieHandle Node, bool IsRoot) { Trie.getRegion().data() + Trie.getRegion().size()) return createInvalidTrieError(Offset, "subtrie node spans out of bound"); - if (Node.getStartBit() + Node.getNumBits() > Trie.getNumHashBits()) + if (!IsRoot && + Node.getStartBit() + Node.getNumBits() > Trie.getNumHashBits()) { return createInvalidTrieError(Offset, "subtrie represents too many hash bits"); + } if (IsRoot) { if (Node.getStartBit() != 0) @@ -1372,6 +1122,26 @@ Error TrieVisitor::validateSubTrie(SubtrieHandle Node, bool IsRoot) { return Error::success(); } +Error TrieVisitor::validateSubtrieHeader(uint64_t Offset, bool IsRoot) { + uint64_t RegionSize = Trie.getRegion().size(); + if (Offset + sizeof(SubtrieHandle::Header) > RegionSize) + return createInvalidTrieError(Offset, "subtrie header out of bound"); + + auto *H = reinterpret_cast( + Trie.getRegion().data() + Offset); + if (H->NumBits == 0) + return createInvalidTrieError(Offset, "invalid subtrie NumBits"); + + if (!IsRoot && H->NumBits > Trie.getNumSubtrieBits()) + return createInvalidTrieError(Offset, "subtrie has corrupt NumBits"); + + if (Offset + static_cast(SubtrieHandle::getSize(H->NumBits)) > + RegionSize) + return createInvalidTrieError(Offset, "subtrie node spans out of bound"); + + return Error::success(); +} + Error TrieVisitor::traverseTrieNode(SubtrieHandle Node, StringRef Prefix) { if (auto Err = validateSubTrie(Node, /*IsRoot=*/false)) return Err; @@ -1392,6 +1162,8 @@ Error TrieVisitor::traverseTrieNode(SubtrieHandle Node, StringRef Prefix) { std::string SubtriePrefix = Prefix.str(); appendIndexBits(SubtriePrefix, I, NumSlots); if (Slot.isSubtrie()) { + if (auto Err = validateSubtrieHeader(Slot.asSubtrie(), /*IsRoot=*/false)) + return Err; SubtrieHandle S(Trie.getRegion(), Slot, Trie.getLogger()); Subs.push_back(S); Prefixes.push_back(SubtriePrefix); @@ -1406,7 +1178,7 @@ Error TrieVisitor::traverseTrieNode(SubtrieHandle Node, StringRef Prefix) { return Error::success(); } -void HashMappedTrieHandle::print( +void TrieRawHashMapHandle::print( raw_ostream &OS, function_ref)> PrintRecordData) const { OS << "hash-num-bits=" << getNumHashBits() << " hash-size=" << getNumHashBytes() @@ -1418,12 +1190,10 @@ void HashMappedTrieHandle::print( if (auto Err = Printer.printRecords()) OS << "error: " << toString(std::move(Err)) << "\n"; - - return; } -Error HashMappedTrieHandle::validate( - function_ref +Error TrieRawHashMapHandle::validate( + function_ref RecordVerifier) const { // Use the base TrieVisitor to identify the errors inside trie first. TrieVisitor BasicVerifier(*this); @@ -1437,271 +1207,61 @@ Error HashMappedTrieHandle::validate( return Verifier.visit(); } -//===----------------------------------------------------------------------===// -// DataAllocator data structures. -//===----------------------------------------------------------------------===// - -namespace { -/// DataAllocator table layout: -/// - [8-bytes: Generic table header] -/// - 8-bytes: AllocatorOffset (reserved for implementing free lists) -/// - 8-bytes: Size for user data header -/// - -/// -/// Record layout: -/// - -class DataAllocatorHandle { -public: - static constexpr TableHandle::TableKind Kind = - TableHandle::TableKind::DataAllocator; - - struct Header { - TableHandle::Header GenericHeader; - std::atomic AllocatorOffset; - const uint64_t UserHeaderSize; - }; - - operator TableHandle() const { - if (!H) - return TableHandle(); - return TableHandle(*Region, H->GenericHeader); - } - - Expected> allocate(MappedFileRegionBumpPtr &Alloc, - size_t DataSize) { - assert(&Alloc.getRegion() == Region); - auto Ptr = Alloc.allocate(DataSize); - if (LLVM_UNLIKELY(!Ptr)) - return Ptr.takeError(); - return MutableArrayRef(*Ptr, DataSize); - } - - explicit operator bool() const { return H; } - const Header &getHeader() const { return *H; } - MappedFileRegion &getRegion() const { return *Region; } - - MutableArrayRef getUserHeader() { - return MutableArrayRef(reinterpret_cast(H + 1), - H->UserHeaderSize); - } - - static Expected create(MappedFileRegionBumpPtr &Alloc, - StringRef Name, - uint32_t UserHeaderSize); - - DataAllocatorHandle() = default; - DataAllocatorHandle(MappedFileRegion &Region, Header &H) - : Region(&Region), H(&H) {} - DataAllocatorHandle(MappedFileRegion &Region, intptr_t HeaderOffset) - : DataAllocatorHandle( - Region, *reinterpret_cast
(Region.data() + HeaderOffset)) { - } - -private: - MappedFileRegion *Region = nullptr; - Header *H = nullptr; -}; - -} // end anonymous namespace - -struct OnDiskDataAllocator::ImplType { - DatabaseFile File; - DataAllocatorHandle Store; -}; - -Expected -DataAllocatorHandle::create(MappedFileRegionBumpPtr &Alloc, StringRef Name, - uint32_t UserHeaderSize) { - // Allocate. - auto Offset = - Alloc.allocateOffset(sizeof(Header) + UserHeaderSize + Name.size() + 1); - if (LLVM_UNLIKELY(!Offset)) - return Offset.takeError(); - - // Construct the header and the name. - assert(Name.size() <= UINT16_MAX && "Expected smaller table name"); - auto *H = new (Alloc.getRegion().data() + *Offset) - Header{{TableHandle::TableKind::DataAllocator, (uint16_t)Name.size(), - (int32_t)(sizeof(Header) + UserHeaderSize)}, - /*AllocatorOffset=*/{0}, - /*UserHeaderSize=*/UserHeaderSize}; - memset(H + 1, 0, UserHeaderSize); - char *NameStorage = reinterpret_cast(H + 1) + UserHeaderSize; - llvm::copy(Name, NameStorage); - NameStorage[Name.size()] = 0; - return DataAllocatorHandle(Alloc.getRegion(), *H); -} - -Expected OnDiskDataAllocator::create( - const Twine &PathTwine, const Twine &TableNameTwine, uint64_t MaxFileSize, - std::optional NewFileInitialSize, uint32_t UserHeaderSize, - std::shared_ptr Logger, - function_ref UserHeaderInit) { - assert(!UserHeaderSize || UserHeaderInit); - SmallString<128> PathStorage; - StringRef Path = PathTwine.toStringRef(PathStorage); - SmallString<128> TableNameStorage; - StringRef TableName = TableNameTwine.toStringRef(TableNameStorage); - - // Constructor for if the file doesn't exist. - auto NewDBConstructor = [&](DatabaseFile &DB) -> Error { - auto Store = - DataAllocatorHandle::create(DB.getAlloc(), TableName, UserHeaderSize); - if (LLVM_UNLIKELY(!Store)) - return Store.takeError(); - - DB.addTable(*Store); - if (UserHeaderSize) - UserHeaderInit(Store->getUserHeader().data()); - return Error::success(); - }; - - // Get or create the file. - Expected File = - DatabaseFile::create(Path, MaxFileSize, Logger, NewDBConstructor); - if (!File) - return File.takeError(); - - // Find the table and validate it. - // - // TODO: Add support for creating/adding a table to an existing file. - std::optional Table = File->findTable(TableName); - if (!Table) - return createTableConfigError(std::errc::argument_out_of_domain, Path, - TableName, "table not found"); - if (Error E = checkTable("table kind", (size_t)DataAllocatorHandle::Kind, - (size_t)Table->getHeader().Kind, Path, TableName)) - return std::move(E); - auto Store = Table->cast(); - assert(Store && "Already checked the kind"); - - // Success. - OnDiskDataAllocator::ImplType Impl{DatabaseFile(std::move(*File)), Store}; - return OnDiskDataAllocator(std::make_unique(std::move(Impl))); -} - -Expected -OnDiskDataAllocator::allocate(size_t Size) { - auto Data = Impl->Store.allocate(Impl->File.getAlloc(), Size); - if (LLVM_UNLIKELY(!Data)) - return Data.takeError(); - - return pointer(FileOffset(Data->data() - Impl->Store.getRegion().data()), - *Data); -} - -const char *OnDiskDataAllocator::beginData(FileOffset Offset) const { - assert(Offset); - assert(Impl); - assert(Offset.get() < (int64_t)Impl->File.getAlloc().size()); - return Impl->File.getRegion().data() + Offset.get(); -} - -MutableArrayRef OnDiskDataAllocator::getUserHeader() { - return Impl->Store.getUserHeader(); -} - -size_t OnDiskDataAllocator::size() const { return Impl->File.size(); } -size_t OnDiskDataAllocator::capacity() const { - return Impl->File.getRegion().size(); -} - -OnDiskDataAllocator::OnDiskDataAllocator(std::unique_ptr Impl) - : Impl(std::move(Impl)) {} - #else // !LLVM_ENABLE_ONDISK_CAS -struct OnDiskHashMappedTrie::ImplType {}; +struct OnDiskTrieRawHashMap::ImplType {}; -Expected -OnDiskHashMappedTrie::create(const Twine &PathTwine, const Twine &TrieNameTwine, +Expected +OnDiskTrieRawHashMap::create(const Twine &PathTwine, const Twine &TrieNameTwine, size_t NumHashBits, uint64_t DataSize, uint64_t MaxFileSize, std::optional NewFileInitialSize, std::shared_ptr Logger, std::optional NewTableNumRootBits, std::optional NewTableNumSubtrieBits) { - report_fatal_error("not supported"); + return createStringError(make_error_code(std::errc::not_supported), + "OnDiskTrieRawHashMap is not supported"); } -Expected -OnDiskHashMappedTrie::insertLazy(ArrayRef Hash, +Expected +OnDiskTrieRawHashMap::insertLazy(ArrayRef Hash, LazyInsertOnConstructCB OnConstruct, LazyInsertOnLeakCB OnLeak) { - report_fatal_error("not supported"); + return createStringError(make_error_code(std::errc::not_supported), + "OnDiskTrieRawHashMap is not supported"); } -OnDiskHashMappedTrie::const_pointer -OnDiskHashMappedTrie::recoverFromFileOffset(FileOffset Offset) const { - report_fatal_error("not supported"); +Expected +OnDiskTrieRawHashMap::recoverFromFileOffset(FileOffset Offset) const { + return createStringError(make_error_code(std::errc::not_supported), + "OnDiskTrieRawHashMap is not supported"); } -OnDiskHashMappedTrie::const_pointer -OnDiskHashMappedTrie::find(ArrayRef Hash) const { - report_fatal_error("not supported"); +OnDiskTrieRawHashMap::ConstOnDiskPtr +OnDiskTrieRawHashMap::find(ArrayRef Hash) const { + return ConstOnDiskPtr(); } -void OnDiskHashMappedTrie::print( +void OnDiskTrieRawHashMap::print( raw_ostream &OS, function_ref)> PrintRecordData) const { - report_fatal_error("not supported"); } -Error OnDiskHashMappedTrie::validate( - function_ref +Error OnDiskTrieRawHashMap::validate( + function_ref RecordVerifier) const { - report_fatal_error("not supported"); + return createStringError(make_error_code(std::errc::not_supported), + "OnDiskTrieRawHashMap is not supported"); } -size_t OnDiskHashMappedTrie::size() const { - report_fatal_error("not supported"); -} - -size_t OnDiskHashMappedTrie::capacity() const { - report_fatal_error("not supported"); -} - -size_t OnDiskDataAllocator::capacity() const { - report_fatal_error("not supported"); -} - -struct OnDiskDataAllocator::ImplType {}; - -Expected OnDiskDataAllocator::create( - const Twine &Path, const Twine &TableName, uint64_t MaxFileSize, - std::optional NewFileInitialSize, uint32_t UserHeaderSize, - std::shared_ptr Logger, - function_ref UserHeaderInit) { - report_fatal_error("not supported"); -} - -Expected -OnDiskDataAllocator::allocate(size_t Size) { - report_fatal_error("not supported"); -} - -const char *OnDiskDataAllocator::beginData(FileOffset Offset) const { - report_fatal_error("not supported"); -} - -MutableArrayRef OnDiskDataAllocator::getUserHeader() { - report_fatal_error("not supported"); -} - -size_t OnDiskDataAllocator::size() const { - report_fatal_error("not supported"); -} +size_t OnDiskTrieRawHashMap::size() const { return 0; } +size_t OnDiskTrieRawHashMap::capacity() const { return 0; } #endif // LLVM_ENABLE_ONDISK_CAS -OnDiskHashMappedTrie::OnDiskHashMappedTrie(std::unique_ptr Impl) +OnDiskTrieRawHashMap::OnDiskTrieRawHashMap(std::unique_ptr Impl) : Impl(std::move(Impl)) {} -OnDiskHashMappedTrie::OnDiskHashMappedTrie(OnDiskHashMappedTrie &&RHS) = +OnDiskTrieRawHashMap::OnDiskTrieRawHashMap(OnDiskTrieRawHashMap &&RHS) = default; -OnDiskHashMappedTrie & -OnDiskHashMappedTrie::operator=(OnDiskHashMappedTrie &&RHS) = default; -OnDiskHashMappedTrie::~OnDiskHashMappedTrie() = default; - -OnDiskDataAllocator::OnDiskDataAllocator(OnDiskDataAllocator &&RHS) = default; -OnDiskDataAllocator & -OnDiskDataAllocator::operator=(OnDiskDataAllocator &&RHS) = default; -OnDiskDataAllocator::~OnDiskDataAllocator() = default; +OnDiskTrieRawHashMap & +OnDiskTrieRawHashMap::operator=(OnDiskTrieRawHashMap &&RHS) = default; +OnDiskTrieRawHashMap::~OnDiskTrieRawHashMap() = default; diff --git a/llvm/lib/CAS/PluginCAS.cpp b/llvm/lib/CAS/PluginCAS.cpp index 029a33833971b..5f02548850137 100644 --- a/llvm/lib/CAS/PluginCAS.cpp +++ b/llvm/lib/CAS/PluginCAS.cpp @@ -87,7 +87,7 @@ Expected> PluginCASContext::create( #undef CASPLUGINAPI_FUNCTION llcas_cas_options_t c_opts = Functions.cas_options_create(); - auto _ = make_scope_exit([&]() { Functions.cas_options_dispose(c_opts); }); + llvm::scope_exit _([&]() { Functions.cas_options_dispose(c_opts); }); Functions.cas_options_set_client_version(c_opts, LLCAS_VERSION_MAJOR, LLCAS_VERSION_MINOR); @@ -329,7 +329,7 @@ void PluginObjectStore::loadIfExistsAsync( llcas_loaded_object_t c_obj, char *c_err) { auto getObjAndDispose = [&](LoadObjCtx *Ctx) -> Expected> { - auto _ = make_scope_exit([Ctx]() { delete Ctx; }); + llvm::scope_exit _([Ctx]() { delete Ctx; }); switch (c_result) { case LLCAS_LOOKUP_RESULT_SUCCESS: return Ctx->CAS->makeObjectHandle(c_obj.opaque); @@ -533,7 +533,7 @@ void PluginActionCache::getImplAsync( llcas_objectid_t c_value, char *c_err) { auto getValueAndDispose = [&](CacheGetCtx *Ctx) -> Expected> { - auto _ = make_scope_exit([Ctx]() { delete Ctx; }); + llvm::scope_exit _([Ctx]() { delete Ctx; }); switch (c_result) { case LLCAS_LOOKUP_RESULT_SUCCESS: { llcas_digest_t c_digest = Ctx->CASCtx->Functions.objectid_get_digest( @@ -601,7 +601,7 @@ void PluginActionCache::putImplAsync(ArrayRef ResolvedKey, }; auto CachePutCB = [](void *c_ctx, bool failed, char *c_err) { auto checkForErrorAndDispose = [&](CachePutCtx *Ctx) -> Error { - auto _ = make_scope_exit([Ctx]() { delete Ctx; }); + llvm::scope_exit _([Ctx]() { delete Ctx; }); if (failed) return Ctx->CASCtx->errorAndDispose(c_err); return Error::success(); diff --git a/llvm/lib/CAS/UnifiedOnDiskCache.cpp b/llvm/lib/CAS/UnifiedOnDiskCache.cpp index bd19dadb4cb59..e948b13db037a 100644 --- a/llvm/lib/CAS/UnifiedOnDiskCache.cpp +++ b/llvm/lib/CAS/UnifiedOnDiskCache.cpp @@ -1,4 +1,4 @@ -//===- UnifiedOnDiskCache.cpp -----------------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,60 +6,62 @@ // //===----------------------------------------------------------------------===// // -// Encapsulates \p OnDiskGraphDB and \p OnDiskKeyValueDB instances within one -// directory while also restricting storage growth with a scheme of chaining the -// two most recent directories (primary & upstream), where the primary -// "faults-in" data from the upstream one. When the primary (most recent) -// directory exceeds its intended limit a new empty directory becomes the -// primary one. -// -// Within the top-level directory (the path that \p UnifiedOnDiskCache::open -// receives) there are directories named like this: -// -// 'v.' -// 'v..' -// ... -// -// 'version' is the version integer for this \p UnifiedOnDiskCache's scheme and -// the part after the dot is an increasing integer. The primary directory is the -// one with the highest integer and the upstream one is the directory before it. -// For example, if the sub-directories contained are: -// -// 'v1.5', 'v1.6', 'v1.7', 'v1.8' -// -// Then the primary one is 'v1.8', the upstream one is 'v1.7', and the rest are -// unused directories that can be safely deleted at any time and by any process. -// -// Contained within the top-level directory is a file named "lock" which is used -// for processes to take shared or exclusive locks for the contents of the top -// directory. While a \p UnifiedOnDiskCache is open it keeps a shared lock for -// the top-level directory; when it closes, if the primary sub-directory -// exceeded its limit, it attempts to get an exclusive lock in order to create a -// new empty primary directory; if it can't get the exclusive lock it gives up -// and lets the next \p UnifiedOnDiskCache instance that closes to attempt -// again. -// -// The downside of this scheme is that while \p UnifiedOnDiskCache is open on a -// directory, by any process, the storage size in that directory will keep -// growing unrestricted. But the major benefit is that garbage-collection can be -// triggered on a directory concurrently, at any time and by any process, -// without affecting any active readers/writers in the same process or other -// processes. -// -// The \c UnifiedOnDiskCache also provides validation and recovery on top of the -// underlying on-disk storage. The low-level storage is designed to remain -// coherent across regular process crashes, but may be invalid after power loss -// or similar system failures. \c UnifiedOnDiskCache::validateIfNeeded allows -// validating the contents once per boot and can recover by marking invalid -// data for garbage collection. -// -// The data recovery described above requires exclusive access to the CAS, and -// it is an error to attempt recovery if the CAS is open in any process/thread. -// In order to maximize backwards compatibility with tools that do not perform -// validation before opening the CAS, we do not attempt to get exclusive access -// until recovery is actually performed, meaning as long as the data is valid -// it will not conflict with concurrent use. +/// \file +/// Encapsulates \p OnDiskGraphDB and \p OnDiskKeyValueDB instances within one +/// directory while also restricting storage growth with a scheme of chaining +/// the two most recent directories (primary & upstream), where the primary +/// "faults-in" data from the upstream one. When the primary (most recent) +/// directory exceeds its intended limit a new empty directory becomes the +/// primary one. +/// +/// Within the top-level directory (the path that \p UnifiedOnDiskCache::open +/// receives) there are directories named like this: +/// +/// 'v.' +/// 'v.' +/// 'v.' +/// ... +/// +/// 'version' is the version integer for this \p UnifiedOnDiskCache's scheme and +/// the part after the dot is an increasing integer. The primary directory is +/// the one with the highest integer and the upstream one is the directory +/// before it. For example, if the sub-directories contained are: +/// +/// 'v1.5', 'v1.6', 'v1.7', 'v1.8' +/// +/// Then the primary one is 'v1.8', the upstream one is 'v1.7', and the rest are +/// unused directories that can be safely deleted at any time and by any +/// process. +/// +/// Contained within the top-level directory is a file named "lock" which is +/// used for processes to take shared or exclusive locks for the contents of the +/// top directory. While a \p UnifiedOnDiskCache is open it keeps a shared lock +/// for the top-level directory; when it closes, if the primary sub-directory +/// exceeded its limit, it attempts to get an exclusive lock in order to create +/// a new empty primary directory; if it can't get the exclusive lock it gives +/// up and lets the next \p UnifiedOnDiskCache instance that closes to attempt +/// again. +/// +/// The downside of this scheme is that while \p UnifiedOnDiskCache is open on a +/// directory, by any process, the storage size in that directory will keep +/// growing unrestricted. But the major benefit is that garbage-collection can +/// be triggered on a directory concurrently, at any time and by any process, +/// without affecting any active readers/writers in the same process or other +/// processes. +/// +/// The \c UnifiedOnDiskCache also provides validation and recovery on top of +/// the underlying on-disk storage. The low-level storage is designed to remain +/// coherent across regular process crashes, but may be invalid after power loss +/// or similar system failures. \c UnifiedOnDiskCache::validateIfNeeded allows +/// validating the contents once per boot and can recover by marking invalid +/// data for garbage collection. +/// +/// The data recovery described above requires exclusive access to the CAS, and +/// it is an error to attempt recovery if the CAS is open in any process/thread. +/// In order to maximize backwards compatibility with tools that do not perform +/// validation before opening the CAS, we do not attempt to get exclusive access +/// until recovery is actually performed, meaning as long as the data is valid +/// it will not conflict with concurrent use. // //===----------------------------------------------------------------------===// @@ -83,18 +85,8 @@ #include "llvm/Support/Path.h" #include "llvm/Support/Program.h" #include "llvm/Support/raw_ostream.h" -#include #include -#if __has_include() -#include -#endif - -#ifdef _WIN32 -#define NOMINMAX -#include -#endif - using namespace llvm; using namespace llvm::cas; using namespace llvm::cas::ondisk; @@ -107,38 +99,22 @@ static constexpr StringLiteral DBDirPrefix = "v1."; static constexpr StringLiteral ValidationFilename = "v1.validation"; static constexpr StringLiteral CorruptPrefix = "corrupt."; -Expected UnifiedOnDiskCache::KVPut(ObjectID Key, ObjectID Value) { - return KVPut(PrimaryGraphDB->getDigest(Key), Value); -} - -Expected UnifiedOnDiskCache::KVPut(ArrayRef Key, - ObjectID Value) { - static_assert(sizeof(Value.getOpaqueData()) == sizeof(uint64_t), - "unexpected return opaque type"); - std::array ValBytes; - support::endian::write64le(ValBytes.data(), Value.getOpaqueData()); - Expected> Existing = PrimaryKVDB->put(Key, ValBytes); - if (!Existing) - return Existing.takeError(); - assert(Existing->size() == sizeof(uint64_t)); - return ObjectID::fromOpaqueData(support::endian::read64le(Existing->data())); +ObjectID UnifiedOnDiskCache::getObjectIDFromValue(ArrayRef Value) { + // little endian encoded. + assert(Value.size() == sizeof(uint64_t)); + return ObjectID::fromOpaqueData(support::endian::read64le(Value.data())); } -Expected> -UnifiedOnDiskCache::KVGet(ArrayRef Key) { - std::optional> Value; - if (Error E = PrimaryKVDB->get(Key).moveInto(Value)) - return std::move(E); - if (!Value) { - if (UpstreamKVDB) - return faultInFromUpstreamKV(Key); - return std::nullopt; - } - assert(Value->size() == sizeof(uint64_t)); - return ObjectID::fromOpaqueData(support::endian::read64le(Value->data())); +UnifiedOnDiskCache::ValueBytes +UnifiedOnDiskCache::getValueFromObjectID(ObjectID ID) { + // little endian encoded. + UnifiedOnDiskCache::ValueBytes ValBytes; + static_assert(ValBytes.size() == sizeof(ID.getOpaqueData())); + support::endian::write64le(ValBytes.data(), ID.getOpaqueData()); + return ValBytes; } -Expected> +Expected>> UnifiedOnDiskCache::faultInFromUpstreamKV(ArrayRef Key) { assert(UpstreamGraphDB); assert(UpstreamKVDB); @@ -152,55 +128,24 @@ UnifiedOnDiskCache::faultInFromUpstreamKV(ArrayRef Key) { // The value is the \p ObjectID in the context of the upstream // \p OnDiskGraphDB instance. Translate it to the context of the primary // \p OnDiskGraphDB instance. - assert(UpstreamValue->size() == sizeof(uint64_t)); - ObjectID UpstreamID = ObjectID::fromOpaqueData( - support::endian::read64le(UpstreamValue->data())); + ObjectID UpstreamID = getObjectIDFromValue(*UpstreamValue); auto PrimaryID = PrimaryGraphDB->getReference(UpstreamGraphDB->getDigest(UpstreamID)); if (LLVM_UNLIKELY(!PrimaryID)) return PrimaryID.takeError(); - return KVPut(Key, *PrimaryID); -} - -Error UnifiedOnDiskCache::validateActionCache() { - SmallVector> CachePairs; - CachePairs.emplace_back(PrimaryKVDB.get(), PrimaryGraphDB.get()); - if (UpstreamKVDB && UpstreamGraphDB) { - CachePairs.emplace_back(UpstreamKVDB.get(), UpstreamGraphDB); - } - - for (auto &CachePair : CachePairs) { - auto ValidateRef = [&](FileOffset Offset, ArrayRef Value) -> Error { - assert(Value.size() == sizeof(uint64_t) && "should be validated already"); - auto ID = ObjectID::fromOpaqueData(support::endian::read64le(Value.data())); - auto formatError = [&](Twine Msg) { - return createStringError( - llvm::errc::illegal_byte_sequence, - "bad record at 0x" + - utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " + - Msg.str()); - }; - if (Error E = CachePair.second->validateObjectID(ID)) - return formatError(llvm::toString(std::move(E))); - return Error::success(); - }; - - if (Error E = CachePair.first->validate(ValidateRef)) - return E; - } - return Error::success(); + return PrimaryKVDB->put(Key, getValueFromObjectID(*PrimaryID)); } /// \returns all the 'v.' names of sub-directories, sorted with /// ascending order of the integer after the dot. Corrupt directories, if /// included, will come first. -static Error getAllDBDirs(StringRef Path, SmallVectorImpl &DBDirs, - bool IncludeCorrupt = false) { +static Expected> +getAllDBDirs(StringRef Path, bool IncludeCorrupt = false) { struct DBDir { uint64_t Order; std::string Name; }; - SmallVector FoundDBDirs; + SmallVector FoundDBDirs; std::error_code EC; for (sys::fs::directory_iterator DirI(Path, EC), DirE; !EC && DirI != DirE; @@ -224,28 +169,30 @@ static Error getAllDBDirs(StringRef Path, SmallVectorImpl &DBDirs, return createFileError(Path, EC); llvm::sort(FoundDBDirs, [](const DBDir &LHS, const DBDir &RHS) -> bool { - return LHS.Order <= RHS.Order; + return LHS.Order < RHS.Order; }); + + SmallVector DBDirs; for (DBDir &Dir : FoundDBDirs) DBDirs.push_back(std::move(Dir.Name)); - return Error::success(); + return DBDirs; } -static Error getAllGarbageDirs(StringRef Path, - SmallVectorImpl &DBDirs) { - if (Error E = getAllDBDirs(Path, DBDirs, /*IncludeCorrupt=*/true)) - return E; +static Expected> getAllGarbageDirs(StringRef Path) { + auto DBDirs = getAllDBDirs(Path, /*IncludeCorrupt=*/true); + if (!DBDirs) + return DBDirs.takeError(); // FIXME: When the version of \p DBDirPrefix is bumped up we need to figure // out how to handle the leftover sub-directories of the previous version. - for (unsigned Keep = 2; Keep > 0 && !DBDirs.empty(); --Keep) { - StringRef Back(DBDirs.back()); + for (unsigned Keep = 2; Keep > 0 && !DBDirs->empty(); --Keep) { + StringRef Back(DBDirs->back()); if (Back.starts_with(CorruptPrefix)) break; - DBDirs.pop_back(); + DBDirs->pop_back(); } - return Error::success(); + return *DBDirs; } /// \returns Given a sub-directory named 'v.', it outputs the @@ -303,6 +250,10 @@ static Error validateOutOfProcess(StringRef LLVMCasBinary, StringRef RootPath, return Error::success(); } +Error UnifiedOnDiskCache::validateActionCache() const { + return getKeyValueDB().validate(); +} + static Error validateInProcess(StringRef RootPath, StringRef HashName, unsigned HashByteSize, bool CheckHash, OnDiskGraphDB::HashingFuncT HashFn) { @@ -318,42 +269,10 @@ static Error validateInProcess(StringRef RootPath, StringRef HashName, return Error::success(); } -static Expected getBootTime() { -#if __has_include() && defined(KERN_BOOTTIME) - // macOS/BSD: Use sysctl to get boot time - struct timeval TV; - size_t TVLen = sizeof(TV); - int KernBoot[2] = {CTL_KERN, KERN_BOOTTIME}; - if (sysctl(KernBoot, 2, &TV, &TVLen, nullptr, 0) < 0) - return createStringError(llvm::errnoAsErrorCode(), - "failed to get boottime"); - if (TVLen != sizeof(TV)) - return createStringError("sysctl kern.boottime unexpected format"); - return TV.tv_sec; -#elif defined(__linux__) - // Linux: Use the mtime for /proc, which is recreated during system boot. - // We could also read /proc/stat and search for 'btime'. - sys::fs::file_status Status; - if (std::error_code EC = sys::fs::status("/proc", Status)) - return createFileError("/proc", EC); - return Status.getLastModificationTime().time_since_epoch().count(); -#elif defined(_WIN32) - // Windows: Calculate boot time from current time minus uptime - // GetTickCount64() returns milliseconds since boot - auto now = std::chrono::system_clock::now(); - ULONGLONG uptimeMs = GetTickCount64(); - auto bootTime = now - std::chrono::milliseconds(uptimeMs); - return std::chrono::duration_cast( - bootTime.time_since_epoch()).count(); -#else - llvm::report_fatal_error("getBootTime unimplemented for this platform"); -#endif -} - Expected UnifiedOnDiskCache::validateIfNeeded( StringRef RootPath, StringRef HashName, unsigned HashByteSize, bool CheckHash, OnDiskGraphDB::HashingFuncT HashFn, bool AllowRecovery, - bool ForceValidation, std::optional LLVMCasBinary) { + bool ForceValidation, std::optional LLVMCasBinaryPath) { if (std::error_code EC = sys::fs::create_directories(RootPath)) return createFileError(RootPath, EC); @@ -366,16 +285,18 @@ Expected UnifiedOnDiskCache::validateIfNeeded( assert(FD != -1); sys::fs::file_t File = sys::fs::convertFDToNativeFile(FD); - auto CloseFile = make_scope_exit([&]() { sys::fs::closeFile(File); }); + llvm::scope_exit CloseFile([&]() { sys::fs::closeFile(File); }); - if (std::error_code EC = lockFileThreadSafe(FD, /*Exclusive=*/true)) + if (std::error_code EC = lockFileThreadSafe(FD, sys::fs::LockKind::Exclusive)) return createFileError(PathBuf, EC); - auto UnlockFD = make_scope_exit([&]() { unlockFileThreadSafe(FD); }); + llvm::scope_exit UnlockFD([&]() { unlockFileThreadSafe(FD); }); std::shared_ptr Logger; +#ifndef _WIN32 if (Error E = ondisk::OnDiskCASLogger::openIfEnabled(RootPath).moveInto(Logger)) return std::move(E); +#endif SmallString<8> Bytes; if (Error E = sys::fs::readNativeFileToEOF(File, Bytes)) @@ -396,12 +317,13 @@ Expected UnifiedOnDiskCache::validateIfNeeded( bool Skipped = false; std::string LogValidationError; - auto Log = llvm::make_scope_exit([&] { + llvm::scope_exit Log([&] { if (!Logger) return; - Logger->log_UnifiedOnDiskCache_validateIfNeeded( + Logger->logUnifiedOnDiskCacheValidateIfNeeded( RootPath, BootTime, ValidationBootTime, CheckHash, AllowRecovery, - ForceValidation, LLVMCasBinary, LogValidationError, Skipped, Recovered); + ForceValidation, LLVMCasBinaryPath, LogValidationError, Skipped, + Recovered); }); if (ValidationBootTime == BootTime && !ForceValidation) { @@ -411,8 +333,8 @@ Expected UnifiedOnDiskCache::validateIfNeeded( // Validate! bool NeedsRecovery = false; - Error E = LLVMCasBinary - ? validateOutOfProcess(*LLVMCasBinary, RootPath, CheckHash) + Error E = LLVMCasBinaryPath + ? validateOutOfProcess(*LLVMCasBinaryPath, RootPath, CheckHash) : validateInProcess(RootPath, HashName, HashByteSize, CheckHash, HashFn); if (E) { @@ -435,7 +357,7 @@ Expected UnifiedOnDiskCache::validateIfNeeded( PathBuf, LockFD, sys::fs::CD_OpenAlways, sys::fs::OF_None)) return createFileError(PathBuf, EC); sys::fs::file_t LockFile = sys::fs::convertFDToNativeFile(LockFD); - auto CloseLock = make_scope_exit([&]() { sys::fs::closeFile(LockFile); }); + llvm::scope_exit CloseLock([&]() { sys::fs::closeFile(LockFile); }); if (std::error_code EC = tryLockFileThreadSafe(LockFD)) { if (EC == std::errc::no_lock_available) return createFileError( @@ -443,13 +365,13 @@ Expected UnifiedOnDiskCache::validateIfNeeded( "CAS validation requires exclusive access but CAS was in use"); return createFileError(PathBuf, EC); } - auto UnlockFD = make_scope_exit([&]() { unlockFileThreadSafe(LockFD); }); + llvm::scope_exit UnlockFD([&]() { unlockFileThreadSafe(LockFD); }); - SmallVector DBDirs; - if (Error E = getAllDBDirs(RootPath, DBDirs)) - return std::move(E); + auto DBDirs = getAllDBDirs(RootPath); + if (!DBDirs) + return DBDirs.takeError(); - for (StringRef DBDir : DBDirs) { + for (StringRef DBDir : *DBDirs) { sys::path::remove_filename(PathBuf); sys::path::append(PathBuf, DBDir); std::error_code EC; @@ -488,14 +410,14 @@ Expected UnifiedOnDiskCache::validateIfNeeded( return createFileError(PathBuf, OS.error()); } - return NeedsRecovery ? ValidationResult::Recovered - : ValidationResult::Valid; + return NeedsRecovery ? ValidationResult::Recovered : ValidationResult::Valid; } Expected> UnifiedOnDiskCache::open(StringRef RootPath, std::optional SizeLimit, StringRef HashName, unsigned HashByteSize, OnDiskGraphDB::FaultInPolicy FaultInPolicy) { + if (std::error_code EC = sys::fs::create_directories(RootPath)) return createFileError(RootPath, EC); @@ -510,30 +432,32 @@ UnifiedOnDiskCache::open(StringRef RootPath, std::optional SizeLimit, // from creating a new chain (essentially while a \p UnifiedOnDiskCache // instance holds a shared lock the storage for the primary directory will // grow unrestricted). - if (std::error_code EC = lockFileThreadSafe(LockFD, /*Exclusive=*/false)) + if (std::error_code EC = + lockFileThreadSafe(LockFD, sys::fs::LockKind::Shared)) return createFileError(PathBuf, EC); - SmallVector DBDirs; - if (Error E = getAllDBDirs(RootPath, DBDirs)) - return std::move(E); - if (DBDirs.empty()) - DBDirs.push_back((Twine(DBDirPrefix) + "1").str()); - - assert(!DBDirs.empty()); + auto DBDirs = getAllDBDirs(RootPath); + if (!DBDirs) + return DBDirs.takeError(); + if (DBDirs->empty()) + DBDirs->push_back((Twine(DBDirPrefix) + "1").str()); std::shared_ptr Logger; +#ifndef _WIN32 if (Error E = ondisk::OnDiskCASLogger::openIfEnabled(RootPath).moveInto(Logger)) return std::move(E); +#endif /// If there is only one directory open databases on it. If there are 2 or /// more directories, get the most recent directories and chain them, with the /// most recent being the primary one. The remaining directories are unused /// data than can be garbage-collected. + auto UniDB = std::unique_ptr(new UnifiedOnDiskCache()); std::unique_ptr UpstreamGraphDB; std::unique_ptr UpstreamKVDB; - if (DBDirs.size() > 1) { - StringRef UpstreamDir = *(DBDirs.end() - 2); + if (DBDirs->size() > 1) { + StringRef UpstreamDir = *(DBDirs->end() - 2); PathBuf = RootPath; sys::path::append(PathBuf, UpstreamDir); if (Error E = @@ -543,19 +467,19 @@ UnifiedOnDiskCache::open(StringRef RootPath, std::optional SizeLimit, return std::move(E); if (Error E = OnDiskKeyValueDB::open(PathBuf, HashName, HashByteSize, /*ValueName=*/"objectid", - /*ValueSize=*/sizeof(uint64_t), Logger) + /*ValueSize=*/sizeof(uint64_t), + /*UnifiedCache=*/nullptr, Logger) .moveInto(UpstreamKVDB)) return std::move(E); } - OnDiskGraphDB *UpstreamGraphDBPtr = UpstreamGraphDB.get(); - StringRef PrimaryDir = *(DBDirs.end() - 1); + StringRef PrimaryDir = *(DBDirs->end() - 1); PathBuf = RootPath; sys::path::append(PathBuf, PrimaryDir); std::unique_ptr PrimaryGraphDB; if (Error E = OnDiskGraphDB::open(PathBuf, HashName, HashByteSize, - std::move(UpstreamGraphDB), Logger, FaultInPolicy) + UpstreamGraphDB.get(), Logger, FaultInPolicy) .moveInto(PrimaryGraphDB)) return std::move(E); std::unique_ptr PrimaryKVDB; @@ -563,17 +487,17 @@ UnifiedOnDiskCache::open(StringRef RootPath, std::optional SizeLimit, // including an extra translation step of the value during fault-in. if (Error E = OnDiskKeyValueDB::open(PathBuf, HashName, HashByteSize, /*ValueName=*/"objectid", - /*ValueSize=*/sizeof(uint64_t), Logger) + /*ValueSize=*/sizeof(uint64_t), + UniDB.get(), Logger) .moveInto(PrimaryKVDB)) return std::move(E); - auto UniDB = std::unique_ptr(new UnifiedOnDiskCache()); UniDB->RootPath = RootPath; UniDB->SizeLimit = SizeLimit.value_or(0); UniDB->LockFD = LockFD; - UniDB->NeedsGarbageCollection = DBDirs.size() > 2; + UniDB->NeedsGarbageCollection = DBDirs->size() > 2; UniDB->PrimaryDBDir = PrimaryDir; - UniDB->UpstreamGraphDB = UpstreamGraphDBPtr; + UniDB->UpstreamGraphDB = std::move(UpstreamGraphDB); UniDB->PrimaryGraphDB = std::move(PrimaryGraphDB); UniDB->UpstreamKVDB = std::move(UpstreamKVDB); UniDB->PrimaryKVDB = std::move(PrimaryKVDB); @@ -605,10 +529,10 @@ bool UnifiedOnDiskCache::hasExceededSizeLimit() const { return false; // If the hard limit is beyond 85%, declare above limit and request clean up. - unsigned CurrentPrecent = + unsigned CurrentPercent = std::max(PrimaryGraphDB->getHardStorageLimitUtilization(), PrimaryKVDB->getHardStorageLimitUtilization()); - if (CurrentPrecent > 85) + if (CurrentPercent > 85) return true; // We allow each of the directories in the chain to reach up to half the @@ -625,9 +549,10 @@ bool UnifiedOnDiskCache::hasExceededSizeLimit() const { } Error UnifiedOnDiskCache::close(bool CheckSizeLimit) { + if (LockFD == -1) return Error::success(); // already closed. - auto _1 = make_scope_exit([&]() { + llvm::scope_exit CloseLock([&]() { assert(LockFD >= 0); sys::fs::file_t LockFile = sys::fs::convertFDToNativeFile(LockFD); sys::fs::closeFile(LockFile); @@ -635,10 +560,10 @@ Error UnifiedOnDiskCache::close(bool CheckSizeLimit) { }); bool ExceededSizeLimit = CheckSizeLimit ? hasExceededSizeLimit() : false; - PrimaryKVDB.reset(); UpstreamKVDB.reset(); + PrimaryKVDB.reset(); + UpstreamGraphDB.reset(); PrimaryGraphDB.reset(); - UpstreamGraphDB = nullptr; if (std::error_code EC = unlockFileThreadSafe(LockFD)) return createFileError(RootPath, EC); @@ -650,12 +575,12 @@ Error UnifiedOnDiskCache::close(bool CheckSizeLimit) { // this \p UnifiedOnDiskCache path is opened. if (std::error_code EC = tryLockFileThreadSafe( - LockFD, std::chrono::milliseconds(0), /*Exclusive=*/true)) { + LockFD, std::chrono::milliseconds(0), sys::fs::LockKind::Exclusive)) { if (EC == errc::no_lock_available) return Error::success(); // couldn't get exclusive lock, give up. return createFileError(RootPath, EC); } - auto _2 = make_scope_exit([&]() { unlockFileThreadSafe(LockFD); }); + llvm::scope_exit UnlockFile([&]() { unlockFileThreadSafe(LockFD); }); // Managed to get an exclusive lock which means there are no other open // \p UnifiedOnDiskCache instances for the same path, so we can safely start a @@ -681,15 +606,15 @@ UnifiedOnDiskCache::~UnifiedOnDiskCache() { consumeError(close()); } Error UnifiedOnDiskCache::collectGarbage(StringRef Path, ondisk::OnDiskCASLogger *Logger) { - SmallVector DBDirs; - if (Error E = getAllGarbageDirs(Path, DBDirs)) - return E; + auto DBDirs = getAllGarbageDirs(Path); + if (!DBDirs) + return DBDirs.takeError(); SmallString<256> PathBuf(Path); - for (StringRef UnusedSubDir : DBDirs) { + for (StringRef UnusedSubDir : *DBDirs) { sys::path::append(PathBuf, UnusedSubDir); if (Logger) - Logger->log_UnifiedOnDiskCache_collectGarbage(PathBuf); + Logger->logUnifiedOnDiskCacheCollectGarbage(PathBuf); if (std::error_code EC = sys::fs::remove_directories(PathBuf)) return createFileError(PathBuf, EC); sys::path::remove_filename(PathBuf); diff --git a/llvm/lib/RemoteCachingService/CAS/GRPCRelayCAS.cpp b/llvm/lib/RemoteCachingService/CAS/GRPCRelayCAS.cpp index d4a428de87760..677c9cca04ff0 100644 --- a/llvm/lib/RemoteCachingService/CAS/GRPCRelayCAS.cpp +++ b/llvm/lib/RemoteCachingService/CAS/GRPCRelayCAS.cpp @@ -7,8 +7,8 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/LazyAtomicPointer.h" +#include "llvm/ADT/TrieRawHashMap.h" #include "llvm/CAS/CASID.h" -#include "llvm/CAS/HashMappedTrie.h" #include "llvm/CAS/ObjectStore.h" #include "llvm/CAS/ThreadSafeAllocator.h" #include "llvm/Config/config.h" @@ -31,7 +31,7 @@ class InMemoryCASData; // The in memory HashMappedTrie to store CASData from Service. // This implementation assumes 80 byte hash max. using InMemoryIndexT = - ThreadSafeHashMappedTrie, 80>; + ThreadSafeTrieRawHashMap, 80>; using InMemoryIndexValueT = InMemoryIndexT::value_type; // InMemoryCASData. @@ -405,7 +405,7 @@ GRPCRelayCAS::storeFromOpenFileImpl(sys::fs::file_t FD, auto &I = indexHash(arrayRefFromStringRef(*Response)); // TODO: we can avoid the copy by implementing InMemoryRef object like // InMemoryCAS. - return toReference(storeObjectImpl(I, std::nullopt, Data)); + return toReference(storeObjectImpl(I, {}, Data)); } GRPCActionCache::GRPCActionCache(StringRef Path, Error &Err) diff --git a/llvm/lib/RemoteCachingService/RemoteCachingService.cpp b/llvm/lib/RemoteCachingService/RemoteCachingService.cpp index b6493149cfa5b..d50a44cf09f7c 100644 --- a/llvm/lib/RemoteCachingService/RemoteCachingService.cpp +++ b/llvm/lib/RemoteCachingService/RemoteCachingService.cpp @@ -11,6 +11,20 @@ using namespace llvm; +static Expected, + std::shared_ptr>> +createGRPCRelayDBs(const llvm::Twine &Path) { + std::shared_ptr CAS; + std::shared_ptr AC; + SmallString<128> Buffer; + Path.toVector(Buffer); + if (Error E = cas::createGRPCRelayCAS(Buffer).moveInto(CAS)) + return std::move(E); + if (Error E = cas::createGRPCActionCache(Buffer).moveInto(AC)) + return std::move(E); + return std::make_pair(std::move(CAS), std::move(AC)); +} + cas::RegisterGRPCCAS::RegisterGRPCCAS() { - cas::registerCASURLScheme("grpc://", &cas::createGRPCRelayCAS); + cas::registerCASURLScheme("grpc://", createGRPCRelayDBs); } diff --git a/llvm/lib/Support/TrieRawHashMap.cpp b/llvm/lib/Support/TrieRawHashMap.cpp index 2719ab5b315ee..e5af3d65873c7 100644 --- a/llvm/lib/Support/TrieRawHashMap.cpp +++ b/llvm/lib/Support/TrieRawHashMap.cpp @@ -62,8 +62,10 @@ class TrieSubtrie final public: using Slot = LazyAtomicPointer; + const Slot &get(size_t I) const { return getTrailingObjects()[I]; } Slot &get(size_t I) { return getTrailingObjects()[I]; } TrieNode *load(size_t I) { return get(I).load(); } + const TrieNode *load(size_t I) const { return get(I).load(); } unsigned size() const { return Size; } @@ -71,6 +73,11 @@ class TrieSubtrie final sink(size_t I, TrieContent &Content, size_t NumSubtrieBits, size_t NewI, function_ref)> Saver); + void printHash(raw_ostream &OS, ArrayRef Bytes) const; + void print(raw_ostream &OS) const { print(OS, std::nullopt); } + void print(raw_ostream &OS, std::optional Prefix) const; + void dump() const { print(dbgs()); } + static std::unique_ptr create(size_t StartBit, size_t NumBits); explicit TrieSubtrie(size_t StartBit, size_t NumBits); @@ -328,6 +335,128 @@ ThreadSafeTrieRawHashMapBase::PointerBase ThreadSafeTrieRawHashMapBase::insert( llvm_unreachable("failed to insert the node after consuming all hash bytes"); } +static void printHexDigit(raw_ostream &OS, uint8_t Digit) { + if (Digit < 10) + OS << char(Digit + '0'); + else + OS << char(Digit - 10 + 'a'); +} + +static void printHexDigits(raw_ostream &OS, ArrayRef Bytes, + size_t StartBit, size_t NumBits) { + assert(StartBit % 4 == 0); + assert(NumBits % 4 == 0); + for (size_t I = StartBit, E = StartBit + NumBits; I != E; I += 4) { + uint8_t HexPair = Bytes[I / 8]; + uint8_t HexDigit = I % 8 == 0 ? HexPair >> 4 : HexPair & 0xf; + printHexDigit(OS, HexDigit); + } +} + +static void printBits(raw_ostream &OS, ArrayRef Bytes, size_t StartBit, + size_t NumBits) { + assert(StartBit + NumBits <= Bytes.size() * 8u); + for (size_t I = StartBit, E = StartBit + NumBits; I != E; ++I) { + uint8_t Byte = Bytes[I / 8]; + size_t ByteOffset = I % 8; + if (size_t ByteShift = 8 - ByteOffset - 1) + Byte >>= ByteShift; + OS << (Byte & 0x1 ? '1' : '0'); + } +} + +void TrieSubtrie::printHash(raw_ostream &OS, ArrayRef Bytes) const { + // afb[1c:00*01110*0]def + size_t EndBit = StartBit + NumBits; + size_t HashEndBit = Bytes.size() * 8u; + + size_t FirstBinaryBit = StartBit & ~0x3u; + printHexDigits(OS, Bytes, 0, FirstBinaryBit); + + size_t LastBinaryBit = (EndBit + 3u) & ~0x3u; + OS << "["; + printBits(OS, Bytes, FirstBinaryBit, LastBinaryBit - FirstBinaryBit); + OS << "]"; + + printHexDigits(OS, Bytes, LastBinaryBit, HashEndBit - LastBinaryBit); +} + +static void appendIndexBits(std::string &Prefix, size_t Index, + size_t NumSlots) { + std::string Bits; + for (size_t NumBits = 1u; NumBits < NumSlots; NumBits <<= 1) { + Bits.push_back('0' + (Index & 0x1)); + Index >>= 1; + } + for (char Ch : llvm::reverse(Bits)) + Prefix += Ch; +} + +static void printPrefix(raw_ostream &OS, StringRef Prefix) { + while (Prefix.size() >= 4) { + uint8_t Digit; + bool ErrorParsingBinary = Prefix.take_front(4).getAsInteger(2, Digit); + assert(!ErrorParsingBinary); + (void)ErrorParsingBinary; + printHexDigit(OS, Digit); + Prefix = Prefix.drop_front(4); + } + if (!Prefix.empty()) + OS << "[" << Prefix << "]"; +} + +void TrieSubtrie::print(raw_ostream &OS, + std::optional Prefix) const { + if (!Prefix) { + OS << "root"; + Prefix.emplace(); + } else { + OS << "subtrie="; + printPrefix(OS, *Prefix); + } + + OS << " num-slots=" << Size << "\n"; + SmallVector Subs; + SmallVector Prefixes; + for (size_t I = 0, E = Size; I != E; ++I) { + const TrieNode *N = load(I); + if (!N) + continue; + OS << "- index=" << I << " "; + if (const auto *S = dyn_cast(N)) { + std::string SubtriePrefix = *Prefix; + appendIndexBits(SubtriePrefix, I, Size); + OS << "subtrie="; + printPrefix(OS, SubtriePrefix); + OS << "\n"; + Subs.push_back(S); + Prefixes.push_back(SubtriePrefix); + continue; + } + auto *Content = cast(N); + OS << "content="; + printHash(OS, Content->getHash()); + OS << "\n"; + } + for (size_t I = 0, E = Subs.size(); I != E; ++I) + Subs[I]->print(OS, Prefixes[I]); +} + +void ThreadSafeTrieRawHashMapBase::print(raw_ostream &OS) const { + OS << "root-bits=" << NumRootBits << " subtrie-bits=" << NumSubtrieBits + << "\n"; + if (ImplType *Impl = ImplPtr.load()) + Impl->getRoot()->print(OS); + else + OS << "[no-root]\n"; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void ThreadSafeTrieRawHashMapBase::dump() const { + print(dbgs()); +} +#endif + ThreadSafeTrieRawHashMapBase::ThreadSafeTrieRawHashMapBase( size_t ContentAllocSize, size_t ContentAllocAlign, size_t ContentOffset, std::optional NumRootBits, std::optional NumSubtrieBits) diff --git a/llvm/lib/Support/Unix/Path.inc b/llvm/lib/Support/Unix/Path.inc index 305401213104f..755dd22b8a5cf 100644 --- a/llvm/lib/Support/Unix/Path.inc +++ b/llvm/lib/Support/Unix/Path.inc @@ -601,6 +601,7 @@ std::error_code resize_file(int FD, uint64_t Size) { } std::error_code resize_file_sparse(int FD, uint64_t Size) { + // On Unix, this is the same as `resize_file`. return resize_file(FD, Size); } @@ -1266,13 +1267,21 @@ Expected readNativeFileSlice(file_t FD, MutableArrayRef Buf, return NumRead; } -std::error_code tryLockFile(int FD, std::chrono::milliseconds Timeout, bool Exclusive) { +std::error_code tryLockFile(int FD, std::chrono::milliseconds Timeout, + LockKind Kind) { auto Start = std::chrono::steady_clock::now(); auto End = Start + Timeout; do { struct flock Lock; memset(&Lock, 0, sizeof(Lock)); - Lock.l_type = Exclusive ? F_WRLCK : F_RDLCK; + switch (Kind) { + case LockKind::Exclusive: + Lock.l_type = F_WRLCK; + break; + case LockKind::Shared: + Lock.l_type = F_RDLCK; + break; + } Lock.l_whence = SEEK_SET; Lock.l_start = 0; Lock.l_len = 0; @@ -1288,10 +1297,17 @@ std::error_code tryLockFile(int FD, std::chrono::milliseconds Timeout, bool Excl return make_error_code(errc::no_lock_available); } -std::error_code lockFile(int FD, bool Exclusive) { +std::error_code lockFile(int FD, LockKind Kind) { struct flock Lock; memset(&Lock, 0, sizeof(Lock)); - Lock.l_type = Exclusive ? F_WRLCK : F_RDLCK; + switch (Kind) { + case LockKind::Exclusive: + Lock.l_type = F_WRLCK; + break; + case LockKind::Shared: + Lock.l_type = F_RDLCK; + break; + } Lock.l_whence = SEEK_SET; Lock.l_start = 0; Lock.l_len = 0; diff --git a/llvm/lib/Support/Windows/Path.inc b/llvm/lib/Support/Windows/Path.inc index e0882aa04aaca..6c52cb06ab97c 100644 --- a/llvm/lib/Support/Windows/Path.inc +++ b/llvm/lib/Support/Windows/Path.inc @@ -1378,8 +1378,9 @@ std::error_code getRealPathFromHandle(file_t Handle, return realPathFromHandle(Handle, RealPath); } -std::error_code tryLockFile(int FD, std::chrono::milliseconds Timeout, bool Exclusive) { - DWORD Flags = Exclusive ? LOCKFILE_EXCLUSIVE_LOCK : 0; +std::error_code tryLockFile(int FD, std::chrono::milliseconds Timeout, + LockKind Kind) { + DWORD Flags = Kind == LockKind::Exclusive ? LOCKFILE_EXCLUSIVE_LOCK : 0; Flags |= LOCKFILE_FAIL_IMMEDIATELY; OVERLAPPED OV = {}; file_t File = convertFDToNativeFile(FD); @@ -1400,8 +1401,8 @@ std::error_code tryLockFile(int FD, std::chrono::milliseconds Timeout, bool Excl return mapWindowsError(ERROR_LOCK_VIOLATION); } -std::error_code lockFile(int FD, bool Exclusive) { - DWORD Flags = Exclusive ? LOCKFILE_EXCLUSIVE_LOCK : 0; +std::error_code lockFile(int FD, LockKind Kind) { + DWORD Flags = Kind == LockKind::Exclusive ? LOCKFILE_EXCLUSIVE_LOCK : 0; OVERLAPPED OV = {}; file_t File = convertFDToNativeFile(FD); if (::LockFileEx(File, Flags, 0, MAXDWORD, MAXDWORD, &OV)) diff --git a/llvm/test/CAS/LEB-mccas.ll b/llvm/test/CAS/LEB-mccas.ll index 54f2db390c7c0..6392bb8819b98 100644 --- a/llvm/test/CAS/LEB-mccas.ll +++ b/llvm/test/CAS/LEB-mccas.ll @@ -6,6 +6,8 @@ ; RUN: rm -rf %t && mkdir -p %t ; RUN: llc --filetype=obj --mccas-verify --cas-backend --cas-friendly-debug-info --cas=%t/cas %s -o %t/LEB.o +; REQUIRES: aarch64-registered-target + target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" target triple = "arm64-apple-macosx14.0.0" @@ -25,4 +27,4 @@ define { ptr, i32 } @appendAnimation() #0 personality ptr @__objc_personality_v0 declare i32 @__objc_personality_v0(...) -attributes #0 = { noinline optnone } +attributes #0 = { noinline optnone } \ No newline at end of file diff --git a/llvm/test/CAS/databasefile-concurrent-creation.c b/llvm/test/CAS/databasefile-concurrent-creation.c index 6e980a946f614..97f2a2468f845 100644 --- a/llvm/test/CAS/databasefile-concurrent-creation.c +++ b/llvm/test/CAS/databasefile-concurrent-creation.c @@ -1,5 +1,7 @@ -// REQUIRES: ondisk_cas, shell +// REQUIRES: ondisk_cas +// UNSUPPORTED: system-windows +// RUN: rm -rf %t // RUN: mkdir -p %t // This uses a script that triggers parallel `llvm-cas` invocations on an empty directory. diff --git a/llvm/test/CAS/logging.test b/llvm/test/CAS/logging.test index b4159869c7b92..bad9919cc46d4 100644 --- a/llvm/test/CAS/logging.test +++ b/llvm/test/CAS/logging.test @@ -8,29 +8,29 @@ RUN: FileCheck %s --input-file %t/cas/v1.log RUN: FileCheck %s --input-file %t/cas/v1.log --check-prefix=STANDALONE -// CHECK: resize mapped file '{{.*}}v9.index' -// CHECK: mmap '{{.*}}v9.index' [[INDEX:0x[0-9a-f]+]] -// CHECK: resize mapped file '{{.*}}v9.data' -// CHECK: mmap '{{.*}}v9.data' [[DATA:0x[0-9a-f]+]] -// CHECK: resize mapped file '{{.*}}v4.actions' -// CHECK: mmap '{{.*}}v4.actions' [[ACTIONS:0x[0-9a-f]+]] +// CHECK: resize mapped file '{{.*}}index.v{{[0-9]+}}' +// CHECK: mmap '{{.*}}index.v{{[0-9]+}}' [[INDEX:0x[0-9a-f]+]] +// CHECK: resize mapped file '{{.*}}data.v{{[0-9]+}}' +// CHECK: mmap '{{.*}}data.v{{[0-9]+}}' [[DATA:0x[0-9a-f]+]] +// CHECK: resize mapped file '{{.*}}actions.v{{[0-9]+}}' +// CHECK: mmap '{{.*}}actions.v{{[0-9]+}}' [[ACTIONS:0x[0-9a-f]+]] // store input/a contents into the datapool // CHECK: create record region=[[INDEX]] offset=[[INPUT_A_OFF:0x[0-9a-f]+]] hash=9b096cd140f119 // CHECK: cmpxcgh subtrie region=[[INDEX]] offset={{.*}} slot={{.*}} expected=0x0 new=[[INPUT_A_OFF]] prev=0x0 // CHECK: alloc [[DATA]] -// CHECK: resize mapped file '{{.*}}v4.actions' -// CHECK: close mmap '{{.*}}v4.actions' -// CHECK: resize mapped file '{{.*}}v9.data' -// CHECK: close mmap '{{.*}}v9.data' -// CHECK: resize mapped file '{{.*}}v9.index' -// CHECK: close mmap '{{.*}}v9.index' +// CHECK: resize mapped file '{{.*}}actions.v{{[0-9]+}}' +// CHECK: close mmap '{{.*}}actions.v{{[0-9]+}}' +// CHECK: resize mapped file '{{.*}}data.v{{[0-9]+}}' +// CHECK: close mmap '{{.*}}data.v{{[0-9]+}}' +// CHECK: resize mapped file '{{.*}}index.v{{[0-9]+}}' +// CHECK: close mmap '{{.*}}index.v{{[0-9]+}}' // CHECK: validate-if-needed '{{.*}}cas' boot=[[BOOT:[0-9]+]] last-valid=0 check-hash=1 allow-recovery=0 force=0 llvm-cas={{.*}}llvm-cas // CHECK: validate-if-needed '{{.*}}cas' boot=[[BOOT]] last-valid=[[BOOT]] check-hash=0 allow-recovery=1 force=1 llvm-cas={{.*}}llvm-cas -// STANDALONE: standalone file create '[[PATH:.*v9.[0-9a-f]*.leaf]].[[SUFFIX:[0-9a-f]*]]' +// STANDALONE: standalone file create '[[PATH:.*leaf.[0-9a-f]*.v[0-9]+]].[[SUFFIX:[0-9a-f]*]]' // STANDALONE: standalone file rename '[[PATH]].[[SUFFIX]]' to '[[PATH]]' //--- input/a diff --git a/llvm/test/CAS/validate-if-needed.test b/llvm/test/CAS/validate-if-needed.test index 0e8c28a23a36e..a5b6163357191 100644 --- a/llvm/test/CAS/validate-if-needed.test +++ b/llvm/test/CAS/validate-if-needed.test @@ -1,6 +1,6 @@ RUN: rm -rf %t && mkdir %t RUN: llvm-cas --cas %t/cas --ingest %S/Inputs > %t/cas.id -RUN: mv %t/cas/v1.1/v9.data %t/cas/v1.1/v9.data.bak +RUN: mv %t/cas/v1.1/data.v1 %t/cas/v1.1/data.v1.bak # INVALID: bad record # VALID: validated successfully @@ -12,7 +12,7 @@ RUN: not llvm-cas --cas %t/cas --validate-if-needed 2>&1 | FileCheck %s -check-p RUN: not llvm-cas --cas %t/cas --validate-if-needed 2>&1 | FileCheck %s -check-prefix=INVALID # Validation happens once per boot. -RUN: mv %t/cas/v1.1/v9.data.bak %t/cas/v1.1/v9.data +RUN: mv %t/cas/v1.1/data.v1.bak %t/cas/v1.1/data.v1 RUN: llvm-cas --cas %t/cas --validate-if-needed | FileCheck %s -check-prefix=VALID RUN: llvm-cas --cas %t/cas --validate-if-needed | FileCheck %s -check-prefix=SKIPPED # Wrong timestamp triggers re-validation. @@ -20,7 +20,7 @@ RUN: echo '123' > %t/cas/v1.validation RUN: llvm-cas --cas %t/cas --validate-if-needed | FileCheck %s -check-prefix=VALID RUN: llvm-cas --cas %t/cas --validate-if-needed | FileCheck %s -check-prefix=SKIPPED # Skipped validation does not catch errors. -RUN: mv %t/cas/v1.1/v9.data %t/cas/v1.1/v9.data.bak +RUN: mv %t/cas/v1.1/data.v1 %t/cas/v1.1/data.v1.bak RUN: llvm-cas --cas %t/cas --validate-if-needed | FileCheck %s -check-prefix=SKIPPED # Unless forced. @@ -33,7 +33,7 @@ RUN: llvm-cas --cas %t/cas --validate-if-needed --allow-recovery | FileCheck %s RUN: llvm-cas --cas %t/cas --validate-if-needed --force | FileCheck %s -check-prefix=VALID RUN: rm -rf %t/cas/v1.1 RUN: cp -r %t/cas/corrupt.0.v1.1 %t/cas/v1.1 -RUN: mv %t/cas/v1.1/v9.data %t/cas/v1.1/v9.data.bak +RUN: mv %t/cas/v1.1/data.v1 %t/cas/v1.1/data.v1.bak RUN: llvm-cas --cas %t/cas --validate-if-needed --allow-recovery --force | FileCheck %s -check-prefix=RECOVERED RUN: ls %t/cas/corrupt.1.v1.1 diff --git a/llvm/test/tools/llvm-cas/action-cache.test b/llvm/test/tools/llvm-cas/action-cache.test new file mode 100644 index 0000000000000..fcb212c24e215 --- /dev/null +++ b/llvm/test/tools/llvm-cas/action-cache.test @@ -0,0 +1,14 @@ +RUN: rm -rf %t %t.cas +RUN: mkdir %t + +RUN: llvm-cas --cas %t.cas --make-blob \ +RUN: --data %S/Inputs/oneline >%t/oneline.casid +RUN: llvm-cas --cas %t.cas --make-blob \ +RUN: --data %S/Inputs/oneline-nonewline >%t/oneline-nonewline.casid + +RUN: llvm-cas --cas %t.cas --put-cache-key @%t/oneline.casid @%t/oneline-nonewline.casid +RUN: llvm-cas --cas %t.cas --get-cache-result @%t/oneline.casid > %t/result.casid +RUN: diff %t/oneline-nonewline.casid %t/result.casid + +RUN: not llvm-cas --cas %t.cas --get-cache-result @%t/oneline-nonewline.casid 2>&1 | FileCheck %s +CHECK: result not found diff --git a/llvm/test/tools/llvm-cas/ingest.test b/llvm/test/tools/llvm-cas/ingest.test index 99e51e066f06f..690566ff4aef1 100644 --- a/llvm/test/tools/llvm-cas/ingest.test +++ b/llvm/test/tools/llvm-cas/ingest.test @@ -7,9 +7,9 @@ RUN: llvm-cas --cas %t/cas --ingest %S/Inputs > %t/cas.id RUN: llvm-cas --cas %t/cas --ls-tree-recursive @%t/cas.id | FileCheck %s // Using the plugin. -RUN: llvm-cas --cas plugin://%llvmshlibdir/%pluginpreCASPluginTest%pluginext?ondisk-path=%t/cas-plugin --ingest %S/Inputs > %t/cas-plugin.id -RUN: llvm-cas --cas plugin://%llvmshlibdir/%pluginpreCASPluginTest%pluginext?ondisk-path=%t/cas-plugin --ls-tree-recursive @%t/cas-plugin.id | FileCheck %s -RUN: llvm-cas --cas %t/cas-plugin -fcas-plugin-path %llvmshlibdir/%pluginpreCASPluginTest%pluginext --ls-tree-recursive @%t/cas-plugin.id | FileCheck %s +RUN: llvm-cas --cas plugin://%llvmshlibdir/libCASPluginTest%pluginext?ondisk-path=%t/cas-plugin --ingest %S/Inputs > %t/cas-plugin.id +RUN: llvm-cas --cas plugin://%llvmshlibdir/libCASPluginTest%pluginext?ondisk-path=%t/cas-plugin --ls-tree-recursive @%t/cas-plugin.id | FileCheck %s +RUN: llvm-cas --cas %t/cas-plugin -fcas-plugin-path %llvmshlibdir/libCASPluginTest%pluginext --ls-tree-recursive @%t/cas-plugin.id | FileCheck %s CHECK: syml CHECK-SAME: broken_symlink -> missing @@ -27,7 +27,7 @@ CHECK: syml CHECK-SAME: sym_dir -> directory RUN: llvm-cas --cas %t/cas --get-cas-id --data %S/Inputs/directory/file @%t/cas.id > %t/file.casid -RUN: llvm-cas --cas %t/cas --cat-blob @%t/file.casid | FileCheck %s --check-prefix=CHECK-TEST-FILE +RUN: llvm-cas --cas %t/cas --cat-node-data @%t/file.casid | FileCheck %s --check-prefix=CHECK-TEST-FILE CHECK-TEST-FILE: test @@ -40,5 +40,5 @@ CHECK-NODE-REFS: llvmcas:// CHECK-NODE-REFS: llvmcas:// // Test exporting the entire tree. -RUN: llvm-cas --cas %t/new-cas --fcas-plugin-path %llvmshlibdir/%pluginpreCASPluginTest%pluginext --upstream-cas %t/cas --import @%t/cas.id > %t/plugin.id -RUN: llvm-cas --cas %t/new-cas --fcas-plugin-path %llvmshlibdir/%pluginpreCASPluginTest%pluginext --ls-tree-recursive @%t/plugin.id | FileCheck %s +RUN: llvm-cas --cas %t/new-cas --fcas-plugin-path %llvmshlibdir/libCASPluginTest%pluginext --upstream-cas %t/cas --import @%t/cas.id > %t/plugin.id +RUN: llvm-cas --cas %t/new-cas --fcas-plugin-path %llvmshlibdir/libCASPluginTest%pluginext --ls-tree-recursive @%t/plugin.id | FileCheck %s diff --git a/llvm/test/tools/llvm-cas/logging.test b/llvm/test/tools/llvm-cas/logging.test new file mode 100644 index 0000000000000..5dc0955eabcb8 --- /dev/null +++ b/llvm/test/tools/llvm-cas/logging.test @@ -0,0 +1,41 @@ +REQUIRES: !system-windows, !system-cygwin + +RUN: rm -rf %t +RUN: split-file %s %t +RUN: %python -c "with open(r'%t/input/large', 'w') as file: file.truncate(100000)" +RUN: env LLVM_CAS_LOG=2 llvm-cas --cas %t/cas --make-blob --data %t/input/a +RUN: env LLVM_CAS_LOG=2 llvm-cas --cas %t/cas --make-blob --data %t/input/large +RUN: env LLVM_CAS_LOG=2 llvm-cas --cas %t/cas --validate-if-needed -check-hash +RUN: env LLVM_CAS_LOG=2 llvm-cas --cas %t/cas --validate-if-needed -force -allow-recovery +RUN: FileCheck %s --input-file %t/cas/v1.log +RUN: FileCheck %s --input-file %t/cas/v1.log --check-prefix=STANDALONE + + +// CHECK: resize mapped file '{{.*}}index.v{{[0-9]+}}' +// CHECK: mmap '{{.*}}index.v{{[0-9]+}}' [[INDEX:0x[0-9a-f]+]] +// CHECK: resize mapped file '{{.*}}data.v{{[0-9]+}}' +// CHECK: mmap '{{.*}}data.v{{[0-9]+}}' [[DATA:0x[0-9a-f]+]] +// CHECK: resize mapped file '{{.*}}actions.v{{[0-9]+}}' +// CHECK: mmap '{{.*}}actions.v{{[0-9]+}}' [[ACTIONS:0x[0-9a-f]+]] + +// store input/a contents into the datapool +// CHECK: create record region=[[INDEX]] offset=[[INPUT_A_OFF:0x[0-9a-f]+]] hash=9b096cd140f119 +// CHECK: cmpxcgh subtrie region=[[INDEX]] offset={{.*}} slot={{.*}} expected=0x0 new=[[INPUT_A_OFF]] prev=0x0 +// CHECK: alloc [[DATA]] + +// CHECK: resize mapped file '{{.*}}actions.v{{[0-9]+}}' +// CHECK: close mmap '{{.*}}actions.v{{[0-9]+}}' +// CHECK: resize mapped file '{{.*}}data.v{{[0-9]+}}' +// CHECK: close mmap '{{.*}}data.v{{[0-9]+}}' +// CHECK: resize mapped file '{{.*}}index.v{{[0-9]+}}' +// CHECK: close mmap '{{.*}}index.v{{[0-9]+}}' + +// CHECK: validate-if-needed '{{.*}}cas' boot=[[BOOT:[0-9]+]] last-valid=0 check-hash=1 allow-recovery=0 force=0 llvm-cas={{.*}}llvm-cas +// CHECK: validate-if-needed '{{.*}}cas' boot=[[BOOT]] last-valid=[[BOOT]] check-hash=0 allow-recovery=1 force=1 llvm-cas={{.*}}llvm-cas + +// STANDALONE: standalone file create '[[PATH:.*leaf.[0-9a-f]*.v[0-9]+]].[[SUFFIX:[0-9a-f]*]]' +// STANDALONE: standalone file rename '[[PATH]].[[SUFFIX]]' to '[[PATH]]' + +//--- input/a +Input 1 + diff --git a/llvm/test/tools/llvm-cas/make-blob.test b/llvm/test/tools/llvm-cas/make-blob.test index 10c64732ceb90..f34d51fed908e 100644 --- a/llvm/test/tools/llvm-cas/make-blob.test +++ b/llvm/test/tools/llvm-cas/make-blob.test @@ -14,29 +14,24 @@ RUN: --data %S/Inputs/oneline >%t/oneline.casid RUN: llvm-cas --cas %t.cas --make-blob \ RUN: --data %S/Inputs/oneline-nonewline >%t/oneline-nonewline.casid -RUN: llvm-cas --cas %t.cas --cat-blob @%t/empty.casid |\ +RUN: llvm-cas --cas %t.cas --cat-node-data @%t/empty.casid |\ RUN: FileCheck %s -check-prefix CHECK-EMPTY -allow-empty -RUN: llvm-cas --cas %t.cas --print-kind @%t/empty.casid |\ -RUN: FileCheck %s -check-prefix CHECK-KIND CHECK-EMPTY-NOT: {{.}} -CHECK-KIND: object -RUN: llvm-cas --cas %t.cas --cat-blob @%t/abc.casid |\ +RUN: llvm-cas --cas %t.cas --cat-node-data @%t/abc.casid |\ RUN: FileCheck %s -check-prefix CHECK-ABC -RUN: llvm-cas --cas %t.cas --print-kind @%t/abc.casid |\ -RUN: FileCheck %s -check-prefix CHECK-KIND CHECK-ABC: abc -RUN: llvm-cas --cas %t.cas --cat-blob @%t/oneline-nonewline.casid |\ +RUN: llvm-cas --cas %t.cas --cat-node-data @%t/oneline-nonewline.casid |\ RUN: FileCheck %s -check-prefix CHECK-ONELINE -RUN: llvm-cas --cas %t.cas --cat-blob @%t/oneline.casid |\ +RUN: llvm-cas --cas %t.cas --cat-node-data @%t/oneline.casid |\ RUN: FileCheck %s -check-prefix CHECK-ONELINE CHECK-ONELINE: content # Double-check newlines. -RUN: llvm-cas --cas %t.cas --cat-blob @%t/oneline-nonewline.casid \ +RUN: llvm-cas --cas %t.cas --cat-node-data @%t/oneline-nonewline.casid \ RUN: >%t/oneline-nonewline RUN: diff %S/Inputs/oneline-nonewline %t/oneline-nonewline -RUN: llvm-cas --cas %t.cas --cat-blob @%t/oneline.casid \ +RUN: llvm-cas --cas %t.cas --cat-node-data @%t/oneline.casid \ RUN: >%t/oneline RUN: diff %S/Inputs/oneline %t/oneline diff --git a/llvm/test/tools/llvm-cas/make-node.test b/llvm/test/tools/llvm-cas/make-node.test index 876afd89c6962..e083dbdd438ac 100644 --- a/llvm/test/tools/llvm-cas/make-node.test +++ b/llvm/test/tools/llvm-cas/make-node.test @@ -11,11 +11,6 @@ RUN: llvm-cas --cas %t/cas --ls-node-refs @%t/empty.casid |\ RUN: FileCheck %s -check-prefix CHECK-EMPTY -allow-empty CHECK-EMPTY-NOT: {{.}} -RUN: llvm-cas --cas %t/cas --print-kind @%t/empty.casid |\ -RUN: FileCheck %s -check-prefix CHECK-NO-KIND -### FIXME: Node ObjectKind with no reference is Blob kind in BuiltinCAS. -CHECK-NO-KIND: object - # Make a complex object, which references existing ones. Reference a blob and # other objects, and reference one of them twice to be sure they don't get # deduped. @@ -27,13 +22,10 @@ RUN: cat %t/complex.refs | sed -e 's,^.,CHECK: ,' > %t/complex.check RUN: llvm-cas --cas %t/cas --make-node \ RUN: --data %S/Inputs/oneline @%t/complex.refs \ RUN: >%t/complex.casid -RUN: llvm-cas --cas %t/cas --print-kind \ -RUN: @%t/complex.casid | FileCheck %s -check-prefix COMPLEX-KIND RUN: llvm-cas --cas %t/cas --cat-node-data \ RUN: @%t/complex.casid | FileCheck %s -check-prefix COMPLEX-DATA RUN: llvm-cas --cas %t/cas --ls-node-refs @%t/complex.casid |\ RUN: FileCheck %t/complex.check -COMPLEX-KIND: object COMPLEX-DATA: content RUN: not llvm-cas --cas %t/cas --ls-tree @%t/complex.casid 2>&1 | FileCheck %s --check-prefix=CHECK-WRONG-TYPE diff --git a/llvm/test/CAS/mapping-size-too-small.test b/llvm/test/tools/llvm-cas/mapping-size-too-small.test similarity index 94% rename from llvm/test/CAS/mapping-size-too-small.test rename to llvm/test/tools/llvm-cas/mapping-size-too-small.test index fc3fd2ef3c3b4..20a2d2c66f1a6 100644 --- a/llvm/test/CAS/mapping-size-too-small.test +++ b/llvm/test/tools/llvm-cas/mapping-size-too-small.test @@ -3,7 +3,7 @@ RUN: split-file %s %t # Check that if we start with a larger CAS it does not blow up when read with a smaller size. RUN: env LLVM_CAS_MAX_MAPPING_SIZE=10240 llvm-cas -cas %t/cas -make-blob -data %t/input > %t/casid -RUN: env LLVM_CAS_MAX_MAPPING_SIZE=1024 llvm-cas -cas %t/cas -cat-blob @%t/casid | FileCheck %s +RUN: env LLVM_CAS_MAX_MAPPING_SIZE=1024 llvm-cas -cas %t/cas -cat-node-data @%t/casid | FileCheck %s # CHECK: 01234567890 RUN: rm -rf %t/cas diff --git a/llvm/test/tools/llvm-cas/validation.test b/llvm/test/tools/llvm-cas/validation.test index a3cc7b179e4a9..2f0c8c6f3f10f 100644 --- a/llvm/test/tools/llvm-cas/validation.test +++ b/llvm/test/tools/llvm-cas/validation.test @@ -12,7 +12,10 @@ RUN: llvm-cas --cas %t/cas --ingest %S/Inputs > %t/cas.id RUN: llvm-cas --cas %t/cas --validate RUN: llvm-cas --cas %t/cas --validate --check-hash -RUN: rm %t/cas/v1.1/v9.data +# Check that validation works with a relative path. +RUN: cd %t && llvm-cas --cas cas --validate --check-hash + +RUN: rm %t/cas/v1.1/data.v1 RUN: not llvm-cas --cas %t/cas --validate RUN: not llvm-cas --cas %t/cas --validate --check-hash @@ -35,13 +38,13 @@ RUN: llvm-cas --cas %t/ac --get-cache-result @%t/abc.casid RUN: llvm-cas --cas %t/ac --validate # Check that validation fails if the objects referenced are missing. -RUN: mv %t/ac/v1.1/v9.index %t/tmp.v9.index +RUN: mv %t/ac/v1.1/index.v1 %t/tmp.index.v1 RUN: not llvm-cas --cas %t/ac --validate -RUN: mv %t/tmp.v9.index %t/ac/v1.1/v9.index +RUN: mv %t/tmp.index.v1 %t/ac/v1.1/index.v1 RUN: llvm-cas --cas %t/ac --validate # Note: records are 40 bytes (32 hash bytes + 8 byte value), so trim the last # allocated record, leaving it invalid. -RUN: truncate -s -40 %t/ac/v1.1/v4.actions +RUN: truncate -s -40 %t/ac/v1.1/actions.v1 RUN: not llvm-cas --cas %t/ac --validate diff --git a/llvm/tools/libCASPluginTest/libCASPluginTest.cpp b/llvm/tools/libCASPluginTest/libCASPluginTest.cpp index 5c526ff91bade..726390a7acce5 100644 --- a/llvm/tools/libCASPluginTest/libCASPluginTest.cpp +++ b/llvm/tools/libCASPluginTest/libCASPluginTest.cpp @@ -13,6 +13,7 @@ #include "llvm-c/CAS/PluginAPI_functions.h" #include "llvm/CAS/BuiltinObjectHasher.h" #include "llvm/CAS/CASID.h" +#include "llvm/CAS/OnDiskKeyValueDB.h" #include "llvm/CAS/UnifiedOnDiskCache.h" #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/Errc.h" @@ -318,13 +319,33 @@ Expected CASWrapper::downstreamNode(ObjectID Node) { return importNode(Node, FromDB, ToDB); } +static Expected cachePut(OnDiskKeyValueDB &DB, ArrayRef Key, + ObjectID ID) { + auto Value = UnifiedOnDiskCache::getValueFromObjectID(ID); + auto Result = DB.put(Key, Value); + if (!Result) + return Result.takeError(); + return UnifiedOnDiskCache::getObjectIDFromValue(*Result); +} + +static Expected> cacheGet(OnDiskKeyValueDB &DB, + ArrayRef Key) { + auto Result = DB.get(Key); + if (!Result) + return Result.takeError(); + if (!*Result) + return std::nullopt; + return UnifiedOnDiskCache::getObjectIDFromValue(**Result); +} + Error CASWrapper::upstreamKey(ArrayRef Key, ObjectID Value) { if (!UpstreamDB) return Error::success(); Expected UpstreamVal = upstreamNode(Value); if (!UpstreamVal) return UpstreamVal.takeError(); - Expected PutValue = UpstreamDB->KVPut(Key, *UpstreamVal); + Expected PutValue = + cachePut(UpstreamDB->getKeyValueDB(), Key, *UpstreamVal); if (!PutValue) return PutValue.takeError(); assert(*PutValue == *UpstreamVal); @@ -336,7 +357,8 @@ CASWrapper::downstreamKey(ArrayRef Key) { if (!UpstreamDB) return std::nullopt; std::optional UpstreamValue; - if (Error E = UpstreamDB->KVGet(Key).moveInto(UpstreamValue)) + if (Error E = + cacheGet(UpstreamDB->getKeyValueDB(), Key).moveInto(UpstreamValue)) return std::move(E); if (!UpstreamValue) return std::nullopt; @@ -345,7 +367,7 @@ CASWrapper::downstreamKey(ArrayRef Key) { UpstreamDB->getGraphDB().getDigest(*UpstreamValue)); if (!Value) return Value.takeError(); - Expected PutValue = DB->KVPut(Key, *Value); + Expected PutValue = cachePut(DB->getKeyValueDB(), Key, *Value); if (!PutValue) return PutValue.takeError(); assert(*PutValue == *Value); @@ -591,7 +613,7 @@ bool llcas_cas_store_object(llcas_cas_t c_cas, llcas_data_t c_data, llcas_data_t llcas_loaded_object_get_data(llcas_cas_t c_cas, llcas_loaded_object_t c_obj) { auto &CAS = unwrap(c_cas)->DB->getGraphDB(); - ondisk::ObjectHandle Obj = ondisk::ObjectHandle::fromOpaqueData(c_obj.opaque); + ondisk::ObjectHandle Obj = ondisk::ObjectHandle(c_obj.opaque); auto Data = CAS.getObjectData(Obj); return llcas_data_t{Data.data(), Data.size()}; } @@ -599,7 +621,7 @@ llcas_data_t llcas_loaded_object_get_data(llcas_cas_t c_cas, llcas_object_refs_t llcas_loaded_object_get_refs(llcas_cas_t c_cas, llcas_loaded_object_t c_obj) { auto &CAS = unwrap(c_cas)->DB->getGraphDB(); - ondisk::ObjectHandle Obj = ondisk::ObjectHandle::fromOpaqueData(c_obj.opaque); + ondisk::ObjectHandle Obj = ondisk::ObjectHandle(c_obj.opaque); auto Refs = CAS.getObjectRefs(Obj); return llcas_object_refs_t{Refs.begin().getOpaqueData(), Refs.end().getOpaqueData()}; @@ -628,7 +650,7 @@ llcas_actioncache_get_for_digest(llcas_cas_t c_cas, llcas_digest_t c_key, auto &DB = *Wrap.DB; ArrayRef Key(c_key.data, c_key.size); std::optional Value; - if (Error E = DB.KVGet(Key).moveInto(Value)) + if (Error E = cacheGet(DB.getKeyValueDB(), Key).moveInto(Value)) return reportError(std::move(E), error, LLCAS_LOOKUP_RESULT_ERROR); if (!Value) { if (!globally) @@ -684,7 +706,7 @@ bool llcas_actioncache_put_for_digest(llcas_cas_t c_cas, llcas_digest_t c_key, auto &DB = *Wrap.DB; ObjectID Value = ObjectID::fromOpaqueData(c_value.opaque); ArrayRef Key(c_key.data, c_key.size); - Expected Ret = DB.KVPut(Key, Value); + Expected Ret = cachePut(DB.getKeyValueDB(), Key, Value); if (!Ret) return reportError(Ret.takeError(), error, true); if (*Ret != Value) diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp index f71930d7f958a..54d5e29568890 100644 --- a/llvm/tools/llc/llc.cpp +++ b/llvm/tools/llc/llc.cpp @@ -350,12 +350,9 @@ static std::unique_ptr GetOutputStream(Triple::OSType OS) { static std::shared_ptr getCAS() { if (CASPath.empty()) return cas::createInMemoryCAS(); - auto MaybeCAS = - CASPath == "auto" - ? cas::createCASFromIdentifier(cas::getDefaultOnDiskCASPath()) - : cas::createCASFromIdentifier(CASPath); + auto MaybeCAS = cas::createCASFromIdentifier(CASPath); if (MaybeCAS) - return std::move(*MaybeCAS); + return std::move(MaybeCAS->first); reportError(toString(MaybeCAS.takeError())); } diff --git a/llvm/tools/llvm-cas-dump/llvm-cas-dump.cpp b/llvm/tools/llvm-cas-dump/llvm-cas-dump.cpp index f01ee512ebeab..e93bccec46753 100644 --- a/llvm/tools/llvm-cas-dump/llvm-cas-dump.cpp +++ b/llvm/tools/llvm-cas-dump/llvm-cas-dump.cpp @@ -165,7 +165,7 @@ int main(int argc, char *argv[]) { HexDumpOneLine, Verbose, DIERefs}; std::shared_ptr CAS = - ExitOnErr(createCASFromIdentifier(CASPath)); + ExitOnErr(createCASFromIdentifier(CASPath)).first; MCCASPrinter Printer(Options, *CAS, llvm::outs()); StringMap Files; diff --git a/llvm/tools/llvm-cas-object-format/llvm-cas-object-format.cpp b/llvm/tools/llvm-cas-object-format/llvm-cas-object-format.cpp index 589d2ad358c9c..baf68ce6f7bff 100644 --- a/llvm/tools/llvm-cas-object-format/llvm-cas-object-format.cpp +++ b/llvm/tools/llvm-cas-object-format/llvm-cas-object-format.cpp @@ -44,7 +44,7 @@ int main(int argc, char *argv[]) { cl::ParseCommandLineOptions(argc, argv); std::shared_ptr CAS = - ExitOnErr(createCASFromIdentifier(CASPath)); + ExitOnErr(createCASFromIdentifier(CASPath)).first; for (StringRef IF : InputFiles) { ExitOnError ExitOnErr; diff --git a/llvm/tools/llvm-cas-test/Config.def b/llvm/tools/llvm-cas-test/Config.def new file mode 100644 index 0000000000000..f689a7e7b8e5b --- /dev/null +++ b/llvm/tools/llvm-cas-test/Config.def @@ -0,0 +1,18 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Define CONFIG before including. +// +// #define CONFIG(NAME, TYPE, DEFAULT_VAL, MIN_VAL, MAX_VAL, OPT_NAME, OPT_DESC) + +CONFIG(NumShards, uint8_t, 10, 1, 20, num-shards, "number of shards") +CONFIG(NumChildren, uint8_t, 8, 1, 32, num-children, "number of child nodes") +CONFIG(TreeDepth, uint8_t, 4, 1, 8, tree-depth, "tree depth") +CONFIG(DataLength, uint16_t, 1024, 1, 4096, data-length, "data length") +CONFIG(PrecentFile, uint8_t, 10, 0, 100, precent-file, + "percentage of nodes that is long enough to be file based") diff --git a/llvm/tools/llvm-cas-test/llvm-cas-test.cpp b/llvm/tools/llvm-cas-test/llvm-cas-test.cpp index bb2c73373a219..45364b838e6bc 100644 --- a/llvm/tools/llvm-cas-test/llvm-cas-test.cpp +++ b/llvm/tools/llvm-cas-test/llvm-cas-test.cpp @@ -10,6 +10,8 @@ #include "llvm/CAS/BuiltinUnifiedCASDatabases.h" #include "llvm/CAS/ObjectStore.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/CrashRecoveryContext.h" +#include "llvm/Support/InitLLVM.h" #include "llvm/Support/Path.h" #include "llvm/Support/Program.h" #include "llvm/Support/RandomNumberGenerator.h" @@ -36,9 +38,7 @@ static cl::opt // CAS configuration. static cl::opt CASPath("cas", cl::desc("CAS path on disk for testing"), cl::Required); -static cl::opt - PrintConfig("print-config", - cl::desc("print randomly generated configuration")); +static cl::opt Verbose("v", cl::desc("verbose output")); static cl::opt ForceKill("force-kill", cl::desc("force kill subprocess to test termination")); @@ -46,19 +46,12 @@ static cl::opt KeepLog("keep-log", cl::desc("keep log and do not rotate the log")); // CAS stress test parameters. -static cl::opt - OptNumShards("num-shards", cl::desc("number of shards"), cl::init(0)); -static cl::opt OptTreeDepth("tree-depth", cl::desc("tree depth"), - cl::init(0)); -static cl::opt OptNumChildren("num-children", - cl::desc("number of child nodes"), - cl::init(0)); -static cl::opt OptDataLength("data-length", cl::desc("data length"), - cl::init(0)); -static cl::opt OptPrecentFile( - "precent-file", - cl::desc("percentage of nodes that is long enough to be file based"), - cl::init(0)); +#define CONFIG(NAME, TYPE, DEFAULT_VAL, MIN_VAL, MAX_VAL, OPT_NAME, OPT_DESC) \ + static cl::opt Opt##NAME(#OPT_NAME, cl::desc(OPT_DESC), \ + cl::init(0)); +#include "Config.def" +#undef CONFIG + // Default size to be 100MB. static cl::opt SizeLimit("size-limit", cl::desc("CAS size limit (in MB)"), cl::init(100)); @@ -77,24 +70,19 @@ enum CASFuzzingSettings : uint8_t { struct Config { CASFuzzingSettings Settings = Default; - uint8_t NumShards; - uint8_t NumChildren; - uint8_t TreeDepth; - uint16_t DataLength; - uint16_t PrecentFile; - - static constexpr unsigned MaxShards = 20; - static constexpr unsigned MaxChildren = 32; - static constexpr unsigned MaxDepth = 8; - static constexpr unsigned MaxDataLength = 1024 * 4; +#define CONFIG(NAME, TYPE, DEFAULT_VAL, MIN_VAL, MAX_VAL, OPT_NAME, OPT_DESC) \ + TYPE NAME; +#include "Config.def" +#undef CONFIG void constrainParameters() { - // reduce the size of parameter if they are too big. - NumShards = NumShards % MaxShards; - NumChildren = NumChildren % MaxChildren; - TreeDepth = TreeDepth % MaxDepth; - DataLength = DataLength % MaxDataLength; - PrecentFile = PrecentFile % 100; + // Reduce the size of parameter if they are too big. If the value is not + // passed in as parameter, constrain the value between MIN_VAL and MAX_VAL. +#define CONFIG(NAME, TYPE, DEFAULT_VAL, MIN_VAL, MAX_VAL, OPT_NAME, OPT_DESC) \ + NAME = Opt##NAME >= MIN_VAL ? Opt##NAME \ + : (NAME % (MAX_VAL - MIN_VAL) + MIN_VAL); +#include "Config.def" +#undef CONFIG if (ForceKill) { Settings |= Fork; @@ -103,34 +91,32 @@ struct Config { } bool extendToFile(uint8_t Seed) const { - return ((float)Seed / (float)UINT8_MAX) > ((float)PrecentFile / 100.0f); + return ((float)Seed / (float)UINT8_MAX) < ((float)PrecentFile / 100.0f); } void init() { - NumShards = OptNumShards ? OptNumShards : MaxShards; - NumChildren = OptNumChildren ? OptNumChildren : MaxChildren; - TreeDepth = OptTreeDepth ? OptTreeDepth : MaxDepth; - DataLength = OptDataLength ? OptDataLength : MaxDataLength; - PrecentFile = OptPrecentFile; +#define CONFIG(NAME, TYPE, DEFAULT_VAL, MIN_VAL, MAX_VAL, OPT_NAME, OPT_DESC) \ + NAME = Opt##NAME >= MIN_VAL ? Opt##NAME : DEFAULT_VAL; +#include "Config.def" +#undef CONFIG } void appendCommandLineOpts(std::vector &Cmd) { - Cmd.push_back("--num-shards=" + utostr(NumShards)); - Cmd.push_back("--num-children=" + utostr(NumChildren)); - Cmd.push_back("--tree-depth=" + utostr(TreeDepth)); - Cmd.push_back("--data-length=" + utostr(DataLength)); - Cmd.push_back("--precent-file=" + utostr(PrecentFile)); +#define CONFIG(NAME, TYPE, DEFAULT_VAL, MIN_VAL, MAX_VAL, OPT_NAME, OPT_DESC) \ + Cmd.push_back(std::string("--") + #OPT_NAME + "=" + utostr(NAME)); +#include "Config.def" +#undef CONFIG } void dump() { llvm::errs() << "## Configuration:" << " Fork: " << (bool)(Settings & Fork) << " Kill: " << (bool)(Settings & CheckTermination) - << " NumShards: " << (unsigned)NumShards - << " TreeDepth: " << (unsigned)TreeDepth - << " NumChildren: " << (unsigned)NumChildren - << " DataLength: " << (unsigned)DataLength - << " PrecentFile: " << (unsigned)PrecentFile << "\n"; +#define CONFIG(NAME, TYPE, DEFAULT_VAL, MIN_VAL, MAX_VAL, OPT_NAME, OPT_DESC) \ + << " " << #NAME << ": " << (unsigned)NAME +#include "Config.def" +#undef CONFIG + << "\n"; } }; @@ -140,34 +126,42 @@ static void fillData(ObjectStore &CAS, ActionCache &AC, const Config &Conf) { DefaultThreadPool ThreadPool(hardware_concurrency()); for (size_t I = 0; I != Conf.NumShards; ++I) { ThreadPool.async([&] { - std::vector Refs; - for (unsigned Depth = 0; Depth < Conf.TreeDepth; ++Depth) { - unsigned NumNodes = (Conf.TreeDepth - Depth + 1) * Conf.NumChildren + 1; - std::vector Created; - Created.reserve(NumNodes); - ArrayRef PreviouslyCreated(Refs); - for (unsigned I = 0; I < NumNodes; ++I) { - std::vector Data(Conf.DataLength); - getRandomBytes(Data.data(), Data.size()); - // Use the first byte that generated to decide if we should make it - // 64KB bigger and force that into a file based storage. - if (Conf.extendToFile(Data[0])) - Data.resize(64LL * 1024LL + Conf.DataLength); - - if (Depth == 0) { - auto Ref = ExitOnErr(CAS.store({}, Data)); - Created.push_back(Ref); - } else { - auto Parent = PreviouslyCreated.slice(I, Conf.NumChildren); - auto Ref = ExitOnErr(CAS.store(Parent, Data)); - Created.push_back(Ref); + CrashRecoveryContext CRC; + auto Success = CRC.RunSafely([&]() { + std::vector Refs; + for (unsigned Depth = 0; Depth < Conf.TreeDepth; ++Depth) { + unsigned NumNodes = + (Conf.TreeDepth - Depth + 1) * Conf.NumChildren + 1; + std::vector Created; + Created.reserve(NumNodes); + ArrayRef PreviouslyCreated(Refs); + for (unsigned I = 0; I < NumNodes; ++I) { + assert(Conf.DataLength > 0); + std::vector Data(Conf.DataLength); + getRandomBytes(Data.data(), Data.size()); + // Use the first byte that generated to decide if we should make it + // 64KB bigger and force that into a file based storage. + if (Conf.extendToFile(Data[0])) + Data.resize(64LL * 1024LL + Conf.DataLength); + + if (Depth == 0) { + auto Ref = ExitOnErr(CAS.store({}, Data)); + Created.push_back(Ref); + } else { + auto Parent = PreviouslyCreated.slice(I, Conf.NumChildren); + auto Ref = ExitOnErr(CAS.store(Parent, Data)); + Created.push_back(Ref); + } } + // Put a self mapping in action cache to avoid cache poisoning. + if (!Created.empty()) + ExitOnErr( + AC.put(CAS.getID(Created.back()), CAS.getID(Created.back()))); + Refs.swap(Created); } - // Put a self mapping in action cache to avoid cache poisoning. - if (!Created.empty()) - ExitOnErr( - AC.put(CAS.getID(Created.back()), CAS.getID(Created.back()))); - Refs.swap(Created); + }); + if (!Success) { + ExitOnErr(createStringError("fillData crashed")); } }); } @@ -182,7 +176,6 @@ static int genData() { auto DB = ExitOnErr(cas::createOnDiskUnifiedCASDatabases(CASPath)); fillData(*DB.first, *DB.second, Conf); - return 0; } @@ -193,7 +186,7 @@ static int runOneTest(const char *Argv0) { getRandomBytes(&Conf, sizeof(Conf)); Conf.constrainParameters(); - if (PrintConfig) + if (Verbose) Conf.dump(); // Start with fresh log if --keep-log is not used. @@ -201,7 +194,7 @@ static int runOneTest(const char *Argv0) { static constexpr StringLiteral LogFile = "v1.log"; SmallString<256> LogPath(CASPath); llvm::sys::path::append(LogPath, LogFile); - llvm::sys::fs::remove(LogPath); + llvm::sys::fs::rename(LogPath, LogPath + ".old"); } auto DB = ExitOnErr(cas::createOnDiskUnifiedCASDatabases(CASPath)); @@ -226,25 +219,52 @@ static int runOneTest(const char *Argv0) { Subprocesses.push_back(SP); } - if (Conf.Settings & CheckTermination) { - for_each(Subprocesses, [](auto &P) { - // Wait 1 second and killed the process. - auto WP = sys::Wait(P, 1); - if (WP.ReturnCode) - llvm::errs() << "subprocess killed successfully\n"; - }); - } else { - for_each(Subprocesses, [](auto &P) { sys::Wait(P, std::nullopt); }); + std::optional Timeout; + // Wait 1 second and killed the process if CheckTermination. + if (Conf.Settings & CheckTermination) + Timeout = 1; + + auto HasError = any_of(Subprocesses, [&](auto &P) { + std::string ErrMsg; + auto WP = sys::Wait(P, Timeout, /*ErrMsg=*/&ErrMsg); + if (WP.ReturnCode == 0) + return false; + if (Timeout) { + if (WP.ReturnCode == -2 && + StringRef(ErrMsg).starts_with("Child timed out")) { + if (Verbose) + llvm::errs() << "subprocess killed successfully\n"; + return false; + } + if (WP.ReturnCode == -1 && + StringRef(ErrMsg).ends_with("No child processes")) { + // The child process ended in the window between check and kill. + return false; + } + } + llvm::errs() << "subprocess failed with error code (" << WP.ReturnCode + << "): " << ErrMsg << "\n"; + return true; + }); + if (HasError) { + llvm::errs() << "end of stress test due to an error in subprocess\n"; + return 1; } - } else { // in-process fill data. fillData(CAS, AC, Conf); } + if (Verbose) + llvm::errs() << "Finished filling data, start validating\n"; // validate and prune in the end. ExitOnErr(CAS.validate(true)); + if (Verbose) + llvm::errs() << "Finished validating, start pruning storage if needed\n"; + ExitOnErr(CAS.pruneStorageData()); + if (Verbose) + llvm::errs() << "Finished pruning, end of iteration\n"; return 0; } @@ -265,7 +285,7 @@ static int checkLockFiles() { ExitOnError ExitOnErr("llvm-cas-test: check-lock-files: "); SmallString<128> DataPoolPath(CASPath); - sys::path::append(DataPoolPath, "v1.1/v9.data"); + sys::path::append(DataPoolPath, "v1.1/data.v1"); auto OpenCASAndGetDataPoolSize = [&]() -> Expected { auto Result = createOnDiskUnifiedCASDatabases(CASPath); @@ -300,6 +320,7 @@ static int checkLockFiles() { } int main(int argc, char **argv) { + InitLLVM X(argc, argv); cl::ParseCommandLineOptions(argc, argv, "llvm-cas-test CAS testing tool\n"); switch (Command) { diff --git a/llvm/tools/llvm-cas/CMakeLists.txt b/llvm/tools/llvm-cas/CMakeLists.txt index ac852e8e33d2f..f4458c32e61d0 100644 --- a/llvm/tools/llvm-cas/CMakeLists.txt +++ b/llvm/tools/llvm-cas/CMakeLists.txt @@ -1,10 +1,19 @@ +set(LLVM_TARGET_DEFINITIONS Options.td) +tablegen(LLVM Options.inc -gen-opt-parser-defs) +add_public_tablegen_target(LLVMCASToolTableGen) + set(LLVM_LINK_COMPONENTS Support CAS + Option RemoteCachingService CASUtil ) add_llvm_tool(llvm-cas llvm-cas.cpp + + DEPENDS + ${tablegen_deps} + LLVMCASToolTableGen ) diff --git a/llvm/tools/llvm-cas/Options.td b/llvm/tools/llvm-cas/Options.td new file mode 100644 index 0000000000000..5b1851f4a046a --- /dev/null +++ b/llvm/tools/llvm-cas/Options.td @@ -0,0 +1,95 @@ +include "llvm/Option/OptParser.td" + +class F : Flag<["--", "-"], name>; + +def grp_action : OptionGroup<"Actions">, HelpText<"llvm-cas actions">; + +def help : F<"help">, HelpText<"Prints this help output">; +def : Flag<["-"], "h">, Alias, HelpText<"Alias for --help">; + +// Tool actions + +def cas_dump : F<"dump">, HelpText<"Dump internal contents">, Group; +def cat_node_data : F<"cat-node-data">, + HelpText<"Cat node data">, + Group; +def diff_graph : F<"diff-graphs">, HelpText<"diff graphs">, Group; +def traverse_graph : F<"traverse-graph">, + HelpText<"traverse graph">, + Group; +def make_blob : F<"make-blob">, HelpText<"Make blob">, Group; +def make_node : F<"make-node">, HelpText<"Make node">, Group; +def ls_node_refs : F<"ls-node-refs">, + HelpText<"List node refs">, + Group; +def ls_tree : F<"ls-tree">, HelpText<"list tree">, Group; +def ls_tree_recursive : F<"ls-tree-recursive">, + HelpText<"list tree recursive">, + Group; +def ingest : F<"ingest">, HelpText<"ingest file system">, Group; +def merge_tree : F<"merge">, HelpText<"merge paths/cas-ids">, Group; +def get_cas_id : F<"get-cas-id">, + HelpText<"get cas id for file">, + Group; +def import : F<"import">, + HelpText<"Import objects from another CAS">, + Group; +def put_cache_key : F<"put-cache-key">, + HelpText<"Set a value for a cache key">, + Group; +def get_cache_result : F<"get-cache-result">, + HelpText<"Get the result value from a cache key">, + Group; +def validate : F<"validate">, + HelpText<"Validate ObjectStore">, + Group; +def validate_object : F<"validate-object">, + HelpText<"Validate the object for CASID">, + Group; +def validate_if_needed : F<"validate-if-needed">, + HelpText<"Validate cas contents if needed">, + Group; +def prune : F<"prune">, HelpText<"Prune local cas storage">, Group; + +// Tool options + +def cas_path : Separate<["-", "--"], "cas">, + MetaVarName<"">, + HelpText<"Path to CAS on disk">; + +def cas_plugin_path : Separate<["-", "--"], "fcas-plugin-path">, + MetaVarName<"">, + HelpText<"Path to plugin CAS library">; + +def cas_plugin_option : Separate<["-", "--"], "fcas-plugin-option">, + MetaVarName<"