From a50d770dc9b35a23bab89a8b035c5c74e9efc92a Mon Sep 17 00:00:00 2001 From: silverweed Date: Mon, 17 Mar 2025 11:07:47 +0100 Subject: [PATCH] [ntuple] Move RPageInfo(Extended) up one level of nesting They are now inner classes of RClusterDescriptor directly rather than of RClusterDescriptor::RPageRange --- tree/ntuple/v7/inc/ROOT/RNTupleDescriptor.hxx | 119 +++++++++--------- tree/ntuple/v7/inc/ROOT/RPageStorage.hxx | 5 +- tree/ntuple/v7/src/RNTupleDescriptor.cxx | 2 +- tree/ntuple/v7/src/RPageStorage.cxx | 9 +- tree/ntuple/v7/src/RPageStorageDaos.cxx | 4 +- tree/ntuple/v7/src/RPageStorageFile.cxx | 4 +- tree/ntuple/v7/test/ntuple_serialize.cxx | 16 +-- 7 files changed, 81 insertions(+), 78 deletions(-) diff --git a/tree/ntuple/v7/inc/ROOT/RNTupleDescriptor.hxx b/tree/ntuple/v7/inc/ROOT/RNTupleDescriptor.hxx index e944f8547eeec..d200a6656b597 100644 --- a/tree/ntuple/v7/inc/ROOT/RNTupleDescriptor.hxx +++ b/tree/ntuple/v7/inc/ROOT/RNTupleDescriptor.hxx @@ -302,6 +302,68 @@ public: } }; + // clang-format off + /** + \class ROOT::Experimental::RClusterDescriptor::RPageInfo + \ingroup NTuple + \brief Information about a single page in the context of a cluster's page range. + */ + // clang-format on + // NOTE: We do not need to store the element size / uncompressed page size because we know to which column + // the page belongs + struct RPageInfo { + protected: + /// The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRanges + std::uint32_t fNElements = std::uint32_t(-1); + /// The meaning of fLocator depends on the storage backend. + RNTupleLocator fLocator; + /// If true, the 8 bytes following the serialized page are an xxhash of the on-disk page data + bool fHasChecksum = false; + + public: + RPageInfo() = default; + RPageInfo(std::uint32_t nElements, const RNTupleLocator &locator, bool hasChecksum) + : fNElements(nElements), fLocator(locator), fHasChecksum(hasChecksum) + { + } + + bool operator==(const RPageInfo &other) const + { + return fNElements == other.fNElements && fLocator == other.fLocator; + } + + std::uint32_t GetNElements() const { return fNElements; } + void SetNElements(std::uint32_t n) { fNElements = n; } + + const RNTupleLocator &GetLocator() const { return fLocator; } + RNTupleLocator &GetLocator() { return fLocator; } + void SetLocator(const RNTupleLocator &locator) { fLocator = locator; } + + bool HasChecksum() const { return fHasChecksum; } + void SetHasChecksum(bool hasChecksum) { fHasChecksum = hasChecksum; } + }; + + struct RPageInfoExtended : RPageInfo { + private: + /// Index (in cluster) of the first element in page. + ROOT::NTupleSize_t fFirstElementIndex = 0; + /// Page number in the corresponding RPageRange. + ROOT::NTupleSize_t fPageNumber = 0; + + public: + RPageInfoExtended() = default; + RPageInfoExtended(const RPageInfo &pageInfo, ROOT::NTupleSize_t firstElementIndex, ROOT::NTupleSize_t pageNumber) + : RPageInfo(pageInfo), fFirstElementIndex(firstElementIndex), fPageNumber(pageNumber) + { + } + + ROOT::NTupleSize_t GetFirstElementIndex() const { return fFirstElementIndex; } + void SetFirstElementIndex(ROOT::NTupleSize_t firstInPage) { fFirstElementIndex = firstInPage; } + + ROOT::NTupleSize_t GetPageNumber() const { return fPageNumber; } + void SetPageNumber(ROOT::NTupleSize_t pageNumber) { fPageNumber = pageNumber; } + }; + // clang-format off /** \class ROOT::Experimental::RClusterDescriptor::RPageRange @@ -312,63 +374,6 @@ public: class RPageRange { friend class Internal::RClusterDescriptorBuilder; - public: - /// We do not need to store the element size / uncompressed page size because we know to which column - /// the page belongs - struct RPageInfo { - protected: - /// The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRanges - std::uint32_t fNElements = std::uint32_t(-1); - /// The meaning of fLocator depends on the storage backend. - RNTupleLocator fLocator; - /// If true, the 8 bytes following the serialized page are an xxhash of the on-disk page data - bool fHasChecksum = false; - - public: - RPageInfo() = default; - RPageInfo(std::uint32_t nElements, const RNTupleLocator &locator, bool hasChecksum) - : fNElements(nElements), fLocator(locator), fHasChecksum(hasChecksum) - { - } - - bool operator==(const RPageInfo &other) const - { - return fNElements == other.fNElements && fLocator == other.fLocator; - } - - std::uint32_t GetNElements() const { return fNElements; } - void SetNElements(std::uint32_t n) { fNElements = n; } - - const RNTupleLocator &GetLocator() const { return fLocator; } - RNTupleLocator &GetLocator() { return fLocator; } - void SetLocator(const RNTupleLocator &locator) { fLocator = locator; } - - bool HasChecksum() const { return fHasChecksum; } - void SetHasChecksum(bool hasChecksum) { fHasChecksum = hasChecksum; } - }; - - struct RPageInfoExtended : RPageInfo { - private: - /// Index (in cluster) of the first element in page. - ROOT::NTupleSize_t fFirstElementIndex = 0; - /// Page number in the corresponding RPageRange. - ROOT::NTupleSize_t fPageNumber = 0; - - public: - RPageInfoExtended() = default; - RPageInfoExtended(const RPageInfo &pageInfo, ROOT::NTupleSize_t firstElementIndex, - ROOT::NTupleSize_t pageNumber) - : RPageInfo(pageInfo), fFirstElementIndex(firstElementIndex), fPageNumber(pageNumber) - { - } - - ROOT::NTupleSize_t GetFirstElementIndex() const { return fFirstElementIndex; } - void SetFirstElementIndex(ROOT::NTupleSize_t firstInPage) { fFirstElementIndex = firstInPage; } - - ROOT::NTupleSize_t GetPageNumber() const { return fPageNumber; } - void SetPageNumber(ROOT::NTupleSize_t pageNumber) { fPageNumber = pageNumber; } - }; - private: /// Extend this RPageRange to fit the given RColumnRange, i.e. prepend as many synthetic RPageInfos as needed to /// cover the range in `columnRange`. `RPageInfo`s are constructed to contain as many elements of type `element` diff --git a/tree/ntuple/v7/inc/ROOT/RPageStorage.hxx b/tree/ntuple/v7/inc/ROOT/RPageStorage.hxx index 84f79a371011e..cb69d656dc2e2 100644 --- a/tree/ntuple/v7/inc/ROOT/RPageStorage.hxx +++ b/tree/ntuple/v7/inc/ROOT/RPageStorage.hxx @@ -687,7 +687,7 @@ protected: struct RClusterInfo { ROOT::DescriptorId_t fClusterId = 0; /// Location of the page on disk - RClusterDescriptor::RPageRange::RPageInfoExtended fPageInfo; + RClusterDescriptor::RPageInfoExtended fPageInfo; /// The first element number of the page's column in the given cluster std::uint64_t fColumnOffset = 0; }; @@ -717,8 +717,7 @@ protected: /// commonly used as part of `LoadClusters()` in derived classes. void PrepareLoadCluster( const RCluster::RKey &clusterKey, ROnDiskPageMap &pageZeroMap, - std::function - perPageFunc); + std::function perPageFunc); /// Enables the default set of metrics provided by RPageSource. `prefix` will be used as the prefix for /// the counters registered in the internal RNTupleMetrics object. diff --git a/tree/ntuple/v7/src/RNTupleDescriptor.cxx b/tree/ntuple/v7/src/RNTupleDescriptor.cxx index 0108cc4eefceb..bfeee865343f3 100644 --- a/tree/ntuple/v7/src/RNTupleDescriptor.cxx +++ b/tree/ntuple/v7/src/RNTupleDescriptor.cxx @@ -194,7 +194,7 @@ ROOT::Experimental::RColumnDescriptor ROOT::Experimental::RColumnDescriptor::Clo //////////////////////////////////////////////////////////////////////////////// -ROOT::Experimental::RClusterDescriptor::RPageRange::RPageInfoExtended +ROOT::Experimental::RClusterDescriptor::RPageInfoExtended ROOT::Experimental::RClusterDescriptor::RPageRange::Find(ROOT::NTupleSize_t idxInCluster) const { const auto N = fCumulativeNElements.size(); diff --git a/tree/ntuple/v7/src/RPageStorage.cxx b/tree/ntuple/v7/src/RPageStorage.cxx index ba04809b6f46b..b852677e1ab46 100644 --- a/tree/ntuple/v7/src/RPageStorage.cxx +++ b/tree/ntuple/v7/src/RPageStorage.cxx @@ -312,8 +312,7 @@ void ROOT::Experimental::Internal::RPageSource::UnzipClusterImpl(RCluster *clust void ROOT::Experimental::Internal::RPageSource::PrepareLoadCluster( const RCluster::RKey &clusterKey, ROnDiskPageMap &pageZeroMap, - std::function - perPageFunc) + std::function perPageFunc) { auto descriptorGuard = GetSharedDescriptorGuard(); const auto &clusterDesc = descriptorGuard->GetClusterDescriptor(clusterKey.fClusterId); @@ -1010,7 +1009,7 @@ void ROOT::Experimental::Internal::RPagePersistentSink::CommitPage(ColumnHandle_ { fOpenColumnRanges.at(columnHandle.fPhysicalId).IncrementNElements(page.GetNElements()); - RClusterDescriptor::RPageRange::RPageInfo pageInfo; + RClusterDescriptor::RPageInfo pageInfo; pageInfo.SetNElements(page.GetNElements()); pageInfo.SetLocator(CommitPageImpl(columnHandle, page)); pageInfo.SetHasChecksum(GetWriteOptions().GetEnablePageChecksums()); @@ -1022,7 +1021,7 @@ void ROOT::Experimental::Internal::RPagePersistentSink::CommitSealedPage(ROOT::D { fOpenColumnRanges.at(physicalColumnId).IncrementNElements(sealedPage.GetNElements()); - RClusterDescriptor::RPageRange::RPageInfo pageInfo; + RClusterDescriptor::RPageInfo pageInfo; pageInfo.SetNElements(sealedPage.GetNElements()); pageInfo.SetLocator(CommitSealedPageImpl(physicalColumnId, sealedPage)); pageInfo.SetHasChecksum(sealedPage.GetHasChecksum()); @@ -1106,7 +1105,7 @@ void ROOT::Experimental::Internal::RPagePersistentSink::CommitSealedPageV( for (auto sealedPageIt = range.fFirst; sealedPageIt != range.fLast; ++sealedPageIt) { fOpenColumnRanges.at(range.fPhysicalColumnId).IncrementNElements(sealedPageIt->GetNElements()); - RClusterDescriptor::RPageRange::RPageInfo pageInfo; + RClusterDescriptor::RPageInfo pageInfo; pageInfo.SetNElements(sealedPageIt->GetNElements()); pageInfo.SetLocator(locators[locatorIndexes[i++]]); pageInfo.SetHasChecksum(sealedPageIt->GetHasChecksum()); diff --git a/tree/ntuple/v7/src/RPageStorageDaos.cxx b/tree/ntuple/v7/src/RPageStorageDaos.cxx index 06ba6d52ffcaa..b77d57d15ef37 100644 --- a/tree/ntuple/v7/src/RPageStorageDaos.cxx +++ b/tree/ntuple/v7/src/RPageStorageDaos.cxx @@ -550,7 +550,7 @@ void ROOT::Experimental::Internal::RPageSourceDaos::LoadSealedPage(ROOT::Descrip { const auto clusterId = localIndex.GetClusterId(); - RClusterDescriptor::RPageRange::RPageInfo pageInfo; + RClusterDescriptor::RPageInfo pageInfo; { auto descriptorGuard = GetSharedDescriptorGuard(); const auto &clusterDescriptor = descriptorGuard->GetClusterDescriptor(clusterId); @@ -699,7 +699,7 @@ ROOT::Experimental::Internal::RPageSourceDaos::LoadClusters(std::span()); diff --git a/tree/ntuple/v7/src/RPageStorageFile.cxx b/tree/ntuple/v7/src/RPageStorageFile.cxx index 2a115dd77ed29..7c0cd3ae96193 100644 --- a/tree/ntuple/v7/src/RPageStorageFile.cxx +++ b/tree/ntuple/v7/src/RPageStorageFile.cxx @@ -397,7 +397,7 @@ void ROOT::Experimental::Internal::RPageSourceFile::LoadSealedPage(ROOT::Descrip { const auto clusterId = localIndex.GetClusterId(); - RClusterDescriptor::RPageRange::RPageInfo pageInfo; + RClusterDescriptor::RPageInfo pageInfo; { auto descriptorGuard = GetSharedDescriptorGuard(); const auto &clusterDescriptor = descriptorGuard->GetClusterDescriptor(clusterId); @@ -514,7 +514,7 @@ ROOT::Experimental::Internal::RPageSourceFile::PrepareSingleCluster( auto pageZeroMap = std::make_unique(); PrepareLoadCluster(clusterKey, *pageZeroMap, [&](ROOT::DescriptorId_t physicalColumnId, ROOT::NTupleSize_t pageNo, - const RClusterDescriptor::RPageRange::RPageInfo &pageInfo) { + const RClusterDescriptor::RPageInfo &pageInfo) { const auto &pageLocator = pageInfo.GetLocator(); if (pageLocator.GetType() == RNTupleLocator::kTypeUnknown) throw RException(R__FAIL("tried to read a page with an unknown locator")); diff --git a/tree/ntuple/v7/test/ntuple_serialize.cxx b/tree/ntuple/v7/test/ntuple_serialize.cxx index afc5e1890f994..674e17d4b264b 100644 --- a/tree/ntuple/v7/test/ntuple_serialize.cxx +++ b/tree/ntuple/v7/test/ntuple_serialize.cxx @@ -672,7 +672,7 @@ TEST(RNTuple, SerializeFooter) .Unwrap()); ROOT::Experimental::RClusterDescriptor::RColumnRange columnRange; - ROOT::Experimental::RClusterDescriptor::RPageRange::RPageInfo pageInfo; + ROOT::Experimental::RClusterDescriptor::RPageInfo pageInfo; RClusterDescriptorBuilder clusterBuilder; clusterBuilder.ClusterId(84).FirstEntryIndex(0).NEntries(100); ROOT::Experimental::RClusterDescriptor::RPageRange pageRange; @@ -994,7 +994,7 @@ TEST(RNTuple, SerializeMultiColumnRepresentation) RClusterDescriptorBuilder clusterBuilder; ROOT::Experimental::RClusterDescriptor::RPageRange pageRange; - ROOT::Experimental::RClusterDescriptor::RPageRange::RPageInfo pageInfo; + ROOT::Experimental::RClusterDescriptor::RPageInfo pageInfo; // First cluster clusterBuilder.ClusterId(13).FirstEntryIndex(0).NEntries(1); clusterBuilder.MarkSuppressedColumnRange(0); @@ -1190,7 +1190,7 @@ TEST(RNTuple, SerializeMultiColumnRepresentationProjection) RClusterDescriptorBuilder clusterBuilder; ROOT::Experimental::RClusterDescriptor::RPageRange pageRange; - ROOT::Experimental::RClusterDescriptor::RPageRange::RPageInfo pageInfo; + ROOT::Experimental::RClusterDescriptor::RPageInfo pageInfo; // First cluster clusterBuilder.ClusterId(13).FirstEntryIndex(0).NEntries(1); clusterBuilder.MarkSuppressedColumnRange(0); @@ -1315,7 +1315,7 @@ TEST(RNTuple, SerializeMultiColumnRepresentationDeferred) context.MapSchema(builder.GetDescriptor(), /*forHeaderExtension=*/true); ROOT::Experimental::RClusterDescriptor::RPageRange pageRange; - ROOT::Experimental::RClusterDescriptor::RPageRange::RPageInfo pageInfo; + ROOT::Experimental::RClusterDescriptor::RPageInfo pageInfo; // Second cluster clusterBuilder.ClusterId(17).FirstEntryIndex(1).NEntries(2); clusterBuilder.MarkSuppressedColumnRange(1); @@ -1425,7 +1425,7 @@ TEST(RNTuple, SerializeMultiColumnRepresentationIncremental) // First cluster RClusterDescriptorBuilder clusterBuilder; ROOT::Experimental::RClusterDescriptor::RPageRange pageRange; - ROOT::Experimental::RClusterDescriptor::RPageRange::RPageInfo pageInfo; + ROOT::Experimental::RClusterDescriptor::RPageInfo pageInfo; clusterBuilder.ClusterId(13).FirstEntryIndex(0).NEntries(1); pageRange.SetPhysicalColumnId(0); pageInfo.SetNElements(1); @@ -1558,7 +1558,7 @@ TEST(RNTuple, DeserializeDescriptorModes) sizeHeader = context.GetHeaderSize(); ROOT::Experimental::RClusterDescriptor::RPageRange pageRange; - ROOT::Experimental::RClusterDescriptor::RPageRange::RPageInfo pageInfo; + ROOT::Experimental::RClusterDescriptor::RPageInfo pageInfo; // First cluster RClusterDescriptorBuilder clusterBuilder; @@ -1833,7 +1833,7 @@ TEST(RNTuple, SerializeMultiColumnRepresentationDeferred_HeaderExtBeforeSerializ builder.AddCluster(clusterBuilder.MoveDescriptor().Unwrap()); ROOT::Experimental::RClusterDescriptor::RPageRange pageRange; - ROOT::Experimental::RClusterDescriptor::RPageRange::RPageInfo pageInfo; + ROOT::Experimental::RClusterDescriptor::RPageInfo pageInfo; // Second cluster clusterBuilder.ClusterId(17).FirstEntryIndex(1).NEntries(2); clusterBuilder.MarkSuppressedColumnRange(1); @@ -1959,7 +1959,7 @@ TEST(RNTuple, SerializeMultiColumnRepresentationDeferredInMainHeader) builder.AddCluster(clusterBuilder.MoveDescriptor().Unwrap()); ROOT::Experimental::RClusterDescriptor::RPageRange pageRange; - ROOT::Experimental::RClusterDescriptor::RPageRange::RPageInfo pageInfo; + ROOT::Experimental::RClusterDescriptor::RPageInfo pageInfo; // Second cluster clusterBuilder.ClusterId(17).FirstEntryIndex(1).NEntries(2); clusterBuilder.MarkSuppressedColumnRange(1);