Skip to content

[6.36][ntuple] Move RCluster/RClusterPool out of Experimental #18482

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 6 additions & 11 deletions tree/ntuple/inc/ROOT/RCluster.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,11 @@
#include <vector>

namespace ROOT {
namespace Experimental {
namespace Internal {

// clang-format off
/**
\class ROnDiskPage
\class ROOT::Internal::ROnDiskPage
\ingroup NTuple
\brief A page as being stored on disk, that is packed and compressed

Expand Down Expand Up @@ -68,16 +67,15 @@ public:
}; // class ROnDiskPage

} // namespace Internal
} // namespace Experimental
} // namespace ROOT

// For hash maps ROnDiskPage::Key --> ROnDiskPage
namespace std
{
template <>
struct hash<ROOT::Experimental::Internal::ROnDiskPage::Key> {
struct hash<ROOT::Internal::ROnDiskPage::Key> {
// TODO(jblomer): quick and dirty hash, likely very sub-optimal, to be revised later.
size_t operator()(const ROOT::Experimental::Internal::ROnDiskPage::Key &key) const
size_t operator()(const ROOT::Internal::ROnDiskPage::Key &key) const
{
return (
(std::hash<ROOT::DescriptorId_t>()(key.fPhysicalColumnId) ^ (hash<ROOT::NTupleSize_t>()(key.fPageNo) << 1)) >>
Expand All @@ -86,14 +84,12 @@ struct hash<ROOT::Experimental::Internal::ROnDiskPage::Key> {
};
}


namespace ROOT {
namespace Experimental {
namespace Internal {

// clang-format off
/**
\class ROOT::Experimental::Internal::ROnDiskPageMap
\class ROOT::Internal::ROnDiskPageMap
\ingroup NTuple
\brief A memory region that contains packed and compressed pages

Expand Down Expand Up @@ -122,7 +118,7 @@ public:

// clang-format off
/**
\class ROOT::Experimental::Internal::ROnDiskPageMapHeap
\class ROOT::Internal::ROnDiskPageMapHeap
\ingroup NTuple
\brief An ROnDiskPageMap that is used for an fMemory allocated as an array of unsigned char.
*/
Expand All @@ -142,7 +138,7 @@ public:

// clang-format off
/**
\class ROOT::Experimental::Internal::RCluster
\class ROOT::Internal::RCluster
\ingroup NTuple
\brief An in-memory subset of the packed and compressed pages of a cluster

Expand Down Expand Up @@ -199,7 +195,6 @@ public:
}; // class RCluster

} // namespace Internal
} // namespace Experimental
} // namespace ROOT

#endif
4 changes: 1 addition & 3 deletions tree/ntuple/inc/ROOT/RClusterPool.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,11 @@ namespace Internal {
class RPageSource;
}

namespace Experimental {
namespace Internal {

// clang-format off
/**
\class ROOT::Experimental::Internal::RClusterPool
\class ROOT::Internal::RClusterPool
\ingroup NTuple
\brief Managed a set of clusters containing compressed and packed pages

Expand Down Expand Up @@ -142,7 +141,6 @@ public:
}; // class RClusterPool

} // namespace Internal
} // namespace Experimental
} // namespace ROOT

#endif
2 changes: 1 addition & 1 deletion tree/ntuple/inc/ROOT/RField/RFieldSequenceContainer.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ public:
////////////////////////////////////////////////////////////////////////////////

/**
\class ROOT::Experimental::RArrayAsRVecField
\class ROOT::RArrayAsRVecField
\brief A field for fixed-size arrays that are represented as RVecs in memory.
\ingroup NTuple
This class is used only for reading. In particular, it helps exposing
Expand Down
2 changes: 1 addition & 1 deletion tree/ntuple/inc/ROOT/RNTupleDescriptor.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ class RClusterDescriptor final {
public:
// clang-format off
/**
\class ROOT::Experimental::RClusterDescriptor::RColumnRange
\class ROOT::RClusterDescriptor::RColumnRange
\ingroup NTuple
\brief The window of element indexes of a particular column in a particular cluster
*/
Expand Down
11 changes: 5 additions & 6 deletions tree/ntuple/inc/ROOT/RNTupleMerger.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class RNTuple;

namespace Internal {
class RPageAllocator;
class RClusterPool;
}

namespace Experimental::Internal {
Expand Down Expand Up @@ -59,8 +60,6 @@ struct RColumnMergeInfo;
struct RNTupleMergeData;
struct RSealedPageMergeData;

class RClusterPool;

/// Set of merging options to pass to RNTupleMerger.
/// If you're using the merger through TFileMerger you need to give it string-based options instead.
/// Here is the mapping for the TFileMerger options:
Expand Down Expand Up @@ -99,11 +98,11 @@ class RNTupleMerger final {
std::optional<TTaskGroup> fTaskGroup;
std::unique_ptr<ROOT::RNTupleModel> fModel;

void MergeCommonColumns(RClusterPool &clusterPool, const ROOT::RClusterDescriptor &clusterDesc,
void MergeCommonColumns(ROOT::Internal::RClusterPool &clusterPool, const ROOT::RClusterDescriptor &clusterDesc,
std::span<const RColumnMergeInfo> commonColumns,
const RCluster::ColumnSet_t &commonColumnSet, std::size_t nCommonColumnsInCluster,
RSealedPageMergeData &sealedPageData, const RNTupleMergeData &mergeData,
ROOT::Internal::RPageAllocator &pageAlloc);
const ROOT::Internal::RCluster::ColumnSet_t &commonColumnSet,
std::size_t nCommonColumnsInCluster, RSealedPageMergeData &sealedPageData,
const RNTupleMergeData &mergeData, ROOT::Internal::RPageAllocator &pageAlloc);

void MergeSourceClusters(ROOT::Internal::RPageSource &source, std::span<const RColumnMergeInfo> commonColumns,
std::span<const RColumnMergeInfo> extraDstColumns, RNTupleMergeData &mergeData);
Expand Down
13 changes: 6 additions & 7 deletions tree/ntuple/inc/ROOT/RPageStorage.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -667,7 +667,7 @@ protected:
public:
void Insert(ROOT::DescriptorId_t physicalColumnId, ROOT::Internal::RColumnElementBase::RIdentifier elementId);
void Erase(ROOT::DescriptorId_t physicalColumnId, ROOT::Internal::RColumnElementBase::RIdentifier elementId);
ROOT::Experimental::Internal::RCluster::ColumnSet_t ToColumnSet() const;
ROOT::Internal::RCluster::ColumnSet_t ToColumnSet() const;
bool HasColumnInfos(ROOT::DescriptorId_t physicalColumnId) const
{
return fColumnInfos.count(physicalColumnId) > 0;
Expand Down Expand Up @@ -703,7 +703,7 @@ protected:
/// Returns a new, unattached page source for the same data set
virtual std::unique_ptr<RPageSource> CloneImpl() const = 0;
// Only called if a task scheduler is set. No-op be default.
virtual void UnzipClusterImpl(ROOT::Experimental::Internal::RCluster *cluster);
virtual void UnzipClusterImpl(ROOT::Internal::RCluster *cluster);
// Returns a page from storage if not found in the page pool. Should be able to handle zero page locators.
virtual ROOT::Internal::RPageRef
LoadPageImpl(ColumnHandle_t columnHandle, const RClusterInfo &clusterInfo, ROOT::NTupleSize_t idxInCluster) = 0;
Expand All @@ -712,8 +712,7 @@ protected:
/// `kTypePageZero` locator are filled in `pageZeroMap`; otherwise, `perPageFunc` is called for each page. This is
/// commonly used as part of `LoadClusters()` in derived classes.
void PrepareLoadCluster(
const ROOT::Experimental::Internal::RCluster::RKey &clusterKey,
ROOT::Experimental::Internal::ROnDiskPageMap &pageZeroMap,
const ROOT::Internal::RCluster::RKey &clusterKey, ROOT::Internal::ROnDiskPageMap &pageZeroMap,
std::function<void(ROOT::DescriptorId_t, ROOT::NTupleSize_t, const ROOT::RClusterDescriptor::RPageInfo &)>
perPageFunc);

Expand Down Expand Up @@ -805,15 +804,15 @@ public:
/// for the cluster would assume an incomplete cluster and trigger loading again.
/// `LoadClusters()` is typically called from the I/O thread of a cluster pool, i.e. the method runs
/// concurrently to other methods of the page source.
virtual std::vector<std::unique_ptr<ROOT::Experimental::Internal::RCluster>>
LoadClusters(std::span<ROOT::Experimental::Internal::RCluster::RKey> clusterKeys) = 0;
virtual std::vector<std::unique_ptr<ROOT::Internal::RCluster>>
LoadClusters(std::span<ROOT::Internal::RCluster::RKey> clusterKeys) = 0;

/// Parallel decompression and unpacking of the pages in the given cluster. The unzipped pages are supposed
/// to be preloaded in a page pool attached to the source. The method is triggered by the cluster pool's
/// unzip thread. It is an optional optimization, the method can safely do nothing. In particular, the
/// actual implementation will only run if a task scheduler is set. In practice, a task scheduler is set
/// if implicit multi-threading is turned on.
void UnzipCluster(ROOT::Experimental::Internal::RCluster *cluster);
void UnzipCluster(ROOT::Internal::RCluster *cluster);

// TODO(gparolini): for symmetry with SealPage(), we should either make this private or SealPage() public.
RResult<ROOT::Internal::RPage>
Expand Down
14 changes: 9 additions & 5 deletions tree/ntuple/inc/ROOT/RPageStorageDaos.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,15 @@
#include <optional>

namespace ROOT {
namespace Experimental {

namespace Internal {
using ntuple_index_t = std::uint32_t;
class RCluster;
class RClusterPool;
} // namespace Internal

namespace Experimental {
namespace Internal {
using ntuple_index_t = std::uint32_t;
class RDaosPool;
class RDaosContainer;
class RPageAllocatorHeap;
Expand Down Expand Up @@ -151,13 +154,13 @@ private:
ntuple_index_t fNTupleIndex{0};

/// The last cluster from which a page got loaded. Points into fClusterPool->fPool
RCluster *fCurrentCluster = nullptr;
ROOT::Internal::RCluster *fCurrentCluster = nullptr;
/// A container that stores object data (header/footer, pages, etc.)
std::unique_ptr<RDaosContainer> fDaosContainer;
/// A URI to a DAOS pool of the form 'daos://pool-label/container-label'
std::string fURI;
/// The cluster pool asynchronously preloads the next few clusters
std::unique_ptr<RClusterPool> fClusterPool;
std::unique_ptr<ROOT::Internal::RClusterPool> fClusterPool;

ROOT::Internal::RNTupleDescriptorBuilder fDescriptorBuilder;

Expand All @@ -177,7 +180,8 @@ public:
void
LoadSealedPage(ROOT::DescriptorId_t physicalColumnId, RNTupleLocalIndex localIndex, RSealedPage &sealedPage) final;

std::vector<std::unique_ptr<RCluster>> LoadClusters(std::span<RCluster::RKey> clusterKeys) final;
std::vector<std::unique_ptr<ROOT::Internal::RCluster>>
LoadClusters(std::span<ROOT::Internal::RCluster::RKey> clusterKeys) final;

/// Return the object class used for user data OIDs in this ntuple.
std::string GetObjectClass() const;
Expand Down
18 changes: 7 additions & 11 deletions tree/ntuple/inc/ROOT/RPageStorageFile.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,8 @@ namespace ROOT {
class RNTuple; // for making RPageSourceFile a friend of RNTuple
class RNTupleLocator;

namespace Experimental::Internal {
class RClusterPool;
}

namespace Internal {
class RClusterPool;
class RRawFile;
class RPageAllocatorHeap;

Expand Down Expand Up @@ -131,15 +128,15 @@ private:
/// Either provided by CreateFromAnchor, or read from the ROOT file given the ntuple name
std::optional<RNTuple> fAnchor;
/// The last cluster from which a page got loaded. Points into fClusterPool->fPool
ROOT::Experimental::Internal::RCluster *fCurrentCluster = nullptr;
ROOT::Internal::RCluster *fCurrentCluster = nullptr;
/// An RRawFile is used to request the necessary byte ranges from a local or a remote file
std::unique_ptr<RRawFile> fFile;
/// Takes the fFile to read ntuple blobs from it
ROOT::Internal::RMiniFileReader fReader;
/// The descriptor is created from the header and footer either in AttachImpl or in CreateFromAnchor
RNTupleDescriptorBuilder fDescriptorBuilder;
/// The cluster pool asynchronously preloads the next few clusters
std::unique_ptr<ROOT::Experimental::Internal::RClusterPool> fClusterPool;
std::unique_ptr<ROOT::Internal::RClusterPool> fClusterPool;
/// Populated by LoadStructureImpl(), reset at the end of Attach()
RStructureBuffer fStructureBuffer;

Expand All @@ -149,9 +146,8 @@ private:
/// read requests for a given cluster and columns. The reead requests are appended to
/// the provided vector. This way, requests can be collected for multiple clusters before
/// sending them to RRawFile::ReadV().
std::unique_ptr<ROOT::Experimental::Internal::RCluster>
PrepareSingleCluster(const ROOT::Experimental::Internal::RCluster::RKey &clusterKey,
std::vector<RRawFile::RIOVec> &readRequests);
std::unique_ptr<ROOT::Internal::RCluster>
PrepareSingleCluster(const ROOT::Internal::RCluster::RKey &clusterKey, std::vector<RRawFile::RIOVec> &readRequests);

protected:
void LoadStructureImpl() final;
Expand Down Expand Up @@ -180,8 +176,8 @@ public:
void
LoadSealedPage(ROOT::DescriptorId_t physicalColumnId, RNTupleLocalIndex localIndex, RSealedPage &sealedPage) final;

std::vector<std::unique_ptr<ROOT::Experimental::Internal::RCluster>>
LoadClusters(std::span<ROOT::Experimental::Internal::RCluster::RKey> clusterKeys) final;
std::vector<std::unique_ptr<ROOT::Internal::RCluster>>
LoadClusters(std::span<ROOT::Internal::RCluster::RKey> clusterKeys) final;
}; // class RPageSourceFile

} // namespace Internal
Expand Down
13 changes: 6 additions & 7 deletions tree/ntuple/src/RCluster.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -20,32 +20,31 @@
#include <iterator>
#include <utility>

ROOT::Experimental::Internal::ROnDiskPageMap::~ROnDiskPageMap() = default;
ROOT::Internal::ROnDiskPageMap::~ROnDiskPageMap() = default;

////////////////////////////////////////////////////////////////////////////////

ROOT::Experimental::Internal::ROnDiskPageMapHeap::~ROnDiskPageMapHeap() = default;
ROOT::Internal::ROnDiskPageMapHeap::~ROnDiskPageMapHeap() = default;

////////////////////////////////////////////////////////////////////////////////

const ROOT::Experimental::Internal::ROnDiskPage *
ROOT::Experimental::Internal::RCluster::GetOnDiskPage(const ROnDiskPage::Key &key) const
const ROOT::Internal::ROnDiskPage *ROOT::Internal::RCluster::GetOnDiskPage(const ROnDiskPage::Key &key) const
{
const auto itr = fOnDiskPages.find(key);
if (itr != fOnDiskPages.end())
return &(itr->second);
return nullptr;
}

void ROOT::Experimental::Internal::RCluster::Adopt(std::unique_ptr<ROnDiskPageMap> pageMap)
void ROOT::Internal::RCluster::Adopt(std::unique_ptr<ROnDiskPageMap> pageMap)
{
auto &pages = pageMap->fOnDiskPages;
fOnDiskPages.insert(std::make_move_iterator(pages.begin()), std::make_move_iterator(pages.end()));
pageMap->fOnDiskPages.clear();
fPageMaps.emplace_back(std::move(pageMap));
}

void ROOT::Experimental::Internal::RCluster::Adopt(RCluster &&other)
void ROOT::Internal::RCluster::Adopt(RCluster &&other)
{
R__ASSERT(fClusterId == other.fClusterId);

Expand All @@ -60,7 +59,7 @@ void ROOT::Experimental::Internal::RCluster::Adopt(RCluster &&other)
other.fPageMaps.clear();
}

void ROOT::Experimental::Internal::RCluster::SetColumnAvailable(ROOT::DescriptorId_t physicalColumnId)
void ROOT::Internal::RCluster::SetColumnAvailable(ROOT::DescriptorId_t physicalColumnId)
{
fAvailPhysicalColumns.insert(physicalColumnId);
}
Loading
Loading