From 14a24e52c992e9606bb732934996e78cc34c8867 Mon Sep 17 00:00:00 2001 From: Florine de Geus Date: Mon, 3 Mar 2025 14:15:56 +0100 Subject: [PATCH 1/4] [ntuple] Add partitions to `RNTupleJoinTable` With this addition, the join table can be split up into several mappings from join values to entry numbers, according to some (numeric) partition key. It has several use cases, but the immediate one is that with this approach, the join table is not restricted to one page source anymore. This is useful for the integration into the `RNTupleProcessor`, where we want to be able to create joins of chains of RNTuples and have to deal with more than one page source. As a side-effect, the state management of the join table and the notion of lazy building has changed. There is no single `isBuilt` state anymore, and the `Add` function eagerly builds the mapping for the provided page source and adds it to the join table. As such, the responsibility of deciding whether to eagerly or lazily build the join table is moved to the application using the join table (i.e. by strategically calling `Add`). --- tree/ntuple/v7/inc/ROOT/RNTupleJoinTable.hxx | 153 +++++++++++-------- tree/ntuple/v7/inc/ROOT/RNTupleProcessor.hxx | 1 + tree/ntuple/v7/src/RNTupleJoinTable.cxx | 110 +++++++++---- tree/ntuple/v7/src/RNTupleProcessor.cxx | 4 +- tree/ntuple/v7/test/ntuple_join_table.cxx | 123 ++++++++++++--- 5 files changed, 273 insertions(+), 118 deletions(-) diff --git a/tree/ntuple/v7/inc/ROOT/RNTupleJoinTable.hxx b/tree/ntuple/v7/inc/ROOT/RNTupleJoinTable.hxx index a022658d0afb7..2adde77826a86 100644 --- a/tree/ntuple/v7/inc/ROOT/RNTupleJoinTable.hxx +++ b/tree/ntuple/v7/inc/ROOT/RNTupleJoinTable.hxx @@ -36,49 +36,79 @@ namespace Internal { class RNTupleJoinTable { public: using NTupleJoinValue_t = std::uint64_t; + using PartitionKey_t = std::uint64_t; + static constexpr PartitionKey_t kDefaultPartitionKey = PartitionKey_t(-1); private: - ///////////////////////////////////////////////////////////////////////////// - /// Container for the hashes of the join fields. - class RCombinedJoinFieldValue { - public: - std::vector fFieldValues; - RCombinedJoinFieldValue(const std::vector &fieldValues) - { - fFieldValues.reserve(fieldValues.size()); - fFieldValues = fieldValues; - } - inline bool operator==(const RCombinedJoinFieldValue &other) const { return other.fFieldValues == fFieldValues; } - }; + // clang-format off + /** + \class ROOT::Experimental::Internal::RNTupleJoinTable::REntryMapping + \ingroup NTuple + \brief Provides a mapping from one or several join field values to an entry index. + */ + // clang-format on + class REntryMapping { + private: + ////////////////////////////////////////////////////////////////////////// + /// Container for the combined hashes of join field values. + struct RCombinedJoinFieldValue { + std::vector fJoinFieldValues; + + RCombinedJoinFieldValue(const std::vector &joinFieldValues) + : fJoinFieldValues(joinFieldValues) + { + } - ///////////////////////////////////////////////////////////////////////////// - /// Hash combining the individual join field value hashes from RCombinedJoinFieldValue. Uses the implementation from - /// `boost::hash_combine` (see - /// https://www.boost.org/doc/libs/1_55_0/doc/html/hash/reference.html#boost.hash_combine). - struct RCombinedJoinFieldValueHash { - inline std::size_t operator()(const RCombinedJoinFieldValue &joinFieldValue) const - { - std::size_t combinedHash = 0; - for (const auto &fieldVal : joinFieldValue.fFieldValues) { - combinedHash ^= fieldVal + 0x9e3779b9 + (fieldVal << 6) + (fieldVal >> 2); + inline bool operator==(const RCombinedJoinFieldValue &other) const + { + return other.fJoinFieldValues == fJoinFieldValues; } - return combinedHash; - } - }; + }; + + ////////////////////////////////////////////////////////////////////////// + /// Hash combining the individual join field value hashes from RCombinedJoinFieldValue. Uses the implementation + /// from `boost::hash_combine` (see + /// https://www.boost.org/doc/libs/1_55_0/doc/html/hash/reference.html#boost.hash_combine). + struct RCombinedJoinFieldValueHash { + inline std::size_t operator()(const RCombinedJoinFieldValue &joinFieldValue) const + { + std::size_t combinedHash = 0; + for (const auto &fieldVal : joinFieldValue.fJoinFieldValues) { + combinedHash ^= fieldVal + 0x9e3779b9 + (fieldVal << 6) + (fieldVal >> 2); + } + return combinedHash; + } + }; + + /// The mapping itself. Maps field values (or combinations thereof in case the join key is composed of multiple + /// fields) to their respective entry numbers. + std::unordered_map, RCombinedJoinFieldValueHash> + fMapping; + + /// Names of the join fields used for the mapping to their respective entry indexes. + std::vector fJoinFieldNames; - /// The join table itself. Maps field values (or combinations thereof in case the join table is defined for multiple - /// fields) to their respective entry indexes. - std::unordered_map, RCombinedJoinFieldValueHash> fJoinTable; + /// The size (in bytes) for each join field, corresponding to `fJoinFieldNames`. This information is stored to be + /// able to properly cast incoming void pointers to the join field values in `GetEntryIndexes`. + std::vector fJoinFieldValueSizes; + public: + ////////////////////////////////////////////////////////////////////////// + /// \brief Get the entry indexes for this entry mapping. + const std::vector *GetEntryIndexes(std::vector valuePtrs) const; + + ////////////////////////////////////////////////////////////////////////// + /// \brief Create a new entry mapping. + /// + /// \param[in] pageSource The page source of the RNTuple with the entries to map. + /// \param[in] joinFieldNames Names of the join fields to use in the mapping. + REntryMapping(RPageSource &pageSource, const std::vector &joinFieldNames); + }; /// Names of the join fields used for the mapping to their respective entry indexes. std::vector fJoinFieldNames; - /// The size (in bytes) for each join field, corresponding to `fJoinFieldNames`. This information is stored to be - /// able to properly cast incoming void pointers to the join field values in `GetEntryIndexes`. - std::vector fJoinFieldValueSizes; - - /// Only built join tables can be queried. - bool fIsBuilt = false; + /// Partitions of one or multiple entry mappings. + std::unordered_map>> fPartitions; ///////////////////////////////////////////////////////////////////////////// /// \brief Create an a new RNTupleJoinTable for the RNTuple represented by the provided page source. @@ -87,12 +117,6 @@ private: /// allowed. RNTupleJoinTable(const std::vector &fieldNames) : fJoinFieldNames(fieldNames) {} - ///////////////////////////////////////////////////////////////////////////// - /// \brief Ensure the RNTupleJoinTable has been built. - /// - /// \throws RException If the join table has not been built, and can therefore not be used yet. - void EnsureBuilt() const; - public: RNTupleJoinTable(const RNTupleJoinTable &other) = delete; RNTupleJoinTable &operator=(const RNTupleJoinTable &other) = delete; @@ -110,42 +134,49 @@ public: static std::unique_ptr Create(const std::vector &fieldNames); ///////////////////////////////////////////////////////////////////////////// - /// \brief Build the join table. + /// \brief Add an entry mapping to the join table. + /// /// - /// \param[in] pageSource The page source of the RNTuple for which to build the join table. + /// \param[in] pageSource The page source of the RNTuple with the entries to map. + /// \param[in] partitionKey Which partition to add the mapping to. If not provided, it will be added to the default + /// partition. /// - /// Only a built join table can be queried (with RNTupleJoinTable::GetEntryIndexes). - void Build(RPageSource &pageSource); + /// \return A reference to the updated join table. + RNTupleJoinTable &Add(RPageSource &pageSource, PartitionKey_t partitionKey = kDefaultPartitionKey); ///////////////////////////////////////////////////////////////////////////// - /// \brief Get the number of entries in the join table. + /// \brief Get all entry indexes for the given join field value(s) within a partition. /// - /// \return The number of entries in the join table. + /// \param[in] valuePtrs A vector of pointers to the join field values to look up. + /// \param[in] partitionKey The partition key to use for the lookup. If not provided, it will use the default + /// partition key. /// - /// \note This does not have to correspond to the number of entries in the original RNTuple. If the original RNTuple - /// contains duplicate join field values, they are counted as one. - std::size_t GetSize() const - { - EnsureBuilt(); - return fJoinTable.size(); - } + /// \return The entry numbers that correspond to `valuePtrs`. When there are no corresponding entries, an empty + /// vector is returned. + std::vector + GetEntryIndexes(const std::vector &valuePtrs, PartitionKey_t partitionKey = kDefaultPartitionKey) const; ///////////////////////////////////////////////////////////////////////////// - /// \brief Whether the join table has been built (and therefore ready to be used). + /// \brief Get all entry indexes for the given join field value(s) for a specific set of partitions. /// - /// \return `true` if the join table has been built. + /// \param[in] valuePtrs A vector of pointers to the join field values to look up. + /// \param[in] partitionKeys The partition keys to use for the lookup. /// - /// Only built join tables can be queried. - bool IsBuilt() const { return fIsBuilt; } + /// \return The entry numbers that correspond to `valuePtrs`, grouped by partition. When there are no corresponding + /// entries, an empty map is returned. + std::unordered_map> + GetPartitionedEntryIndexes(const std::vector &valuePtrs, + const std::vector &partitionKeys) const; ///////////////////////////////////////////////////////////////////////////// - /// \brief Get all entry indexes for the given join field value(s). + /// \brief Get all entry indexes for the given join field value(s) for all partitions. /// /// \param[in] valuePtrs A vector of pointers to the join field values to look up. /// - /// \return The entry indexes that correspond to `valuePtrs`. An empty vector is returned when there are no matching - /// indexes. - std::vector GetEntryIndexes(const std::vector &valuePtrs) const; + /// \return The entry numbers that correspond to `valuePtrs`, grouped by partition. When there are no corresponding + /// entries, an empty map is returned. + std::unordered_map> + GetPartitionedEntryIndexes(const std::vector &valuePtrs) const; }; } // namespace Internal } // namespace Experimental diff --git a/tree/ntuple/v7/inc/ROOT/RNTupleProcessor.hxx b/tree/ntuple/v7/inc/ROOT/RNTupleProcessor.hxx index 7f9cbda45fe35..491a371c0b352 100644 --- a/tree/ntuple/v7/inc/ROOT/RNTupleProcessor.hxx +++ b/tree/ntuple/v7/inc/ROOT/RNTupleProcessor.hxx @@ -503,6 +503,7 @@ private: /// Tokens representing the join fields present in the main RNTuple std::vector fJoinFieldTokens; std::vector> fJoinTables; + bool fJoinTablesAreBuilt = false; bool HasJoinTable() const { return fJoinTables.size() > 0; } diff --git a/tree/ntuple/v7/src/RNTupleJoinTable.cxx b/tree/ntuple/v7/src/RNTupleJoinTable.cxx index 65396364fa2e1..7fe7f20bf14b2 100644 --- a/tree/ntuple/v7/src/RNTupleJoinTable.cxx +++ b/tree/ntuple/v7/src/RNTupleJoinTable.cxx @@ -33,25 +33,10 @@ CastValuePtr(void *valuePtr, std::size_t fieldValueSize) } } // anonymous namespace -void ROOT::Experimental::Internal::RNTupleJoinTable::EnsureBuilt() const +ROOT::Experimental::Internal::RNTupleJoinTable::REntryMapping::REntryMapping( + RPageSource &pageSource, const std::vector &joinFieldNames) + : fJoinFieldNames(joinFieldNames) { - if (!fIsBuilt) - throw RException(R__FAIL("join table has not been built yet")); -} - -std::unique_ptr -ROOT::Experimental::Internal::RNTupleJoinTable::Create(const std::vector &fieldNames) -{ - auto joinTable = std::unique_ptr(new RNTupleJoinTable(fieldNames)); - - return joinTable; -} - -void ROOT::Experimental::Internal::RNTupleJoinTable::Build(RPageSource &pageSource) -{ - if (fIsBuilt) - return; - static const std::unordered_set allowedTypes = {"std::int8_t", "std::int16_t", "std::int32_t", "std::int64_t", "std::uint8_t", "std::uint16_t", "std::uint32_t", "std::uint64_t"}; @@ -59,7 +44,6 @@ void ROOT::Experimental::Internal::RNTupleJoinTable::Build(RPageSource &pageSour pageSource.Attach(); auto desc = pageSource.GetSharedDescriptorGuard(); - fJoinFieldValueSizes.reserve(fJoinFieldNames.size()); std::vector> fields; std::vector fieldValues; fieldValues.reserve(fJoinFieldNames.size()); @@ -78,7 +62,6 @@ void ROOT::Experimental::Internal::RNTupleJoinTable::Build(RPageSource &pageSour } auto field = fieldDesc.CreateField(desc.GetRef()); - ROOT::Internal::CallConnectPageSourceOnField(*field, pageSource); fieldValues.emplace_back(field->CreateValue()); @@ -98,17 +81,14 @@ void ROOT::Experimental::Internal::RNTupleJoinTable::Build(RPageSource &pageSour auto valuePtr = fieldValue.GetPtr(); joinFieldValues.push_back(CastValuePtr(valuePtr.get(), fieldValue.GetField().GetValueSize())); } - fJoinTable[RCombinedJoinFieldValue(joinFieldValues)].push_back(i); - } - fIsBuilt = true; + fMapping[RCombinedJoinFieldValue(joinFieldValues)].push_back(i); + } } -std::vector -ROOT::Experimental::Internal::RNTupleJoinTable::GetEntryIndexes(const std::vector &valuePtrs) const +const std::vector * +ROOT ::Experimental::Internal::RNTupleJoinTable::REntryMapping::GetEntryIndexes(std::vector valuePtrs) const { - EnsureBuilt(); - if (valuePtrs.size() != fJoinFieldNames.size()) throw RException(R__FAIL("number of value pointers must match number of join fields")); @@ -119,10 +99,80 @@ ROOT::Experimental::Internal::RNTupleJoinTable::GetEntryIndexes(const std::vecto joinFieldValues.push_back(CastValuePtr(valuePtrs[i], fJoinFieldValueSizes[i])); } - auto entryIdxs = fJoinTable.find(RCombinedJoinFieldValue(joinFieldValues)); + if (const auto &entries = fMapping.find(RCombinedJoinFieldValue(joinFieldValues)); entries != fMapping.end()) { + return &entries->second; + } + + return nullptr; +} + +//------------------------------------------------------------------------------ + +std::unique_ptr +ROOT::Experimental::Internal::RNTupleJoinTable::Create(const std::vector &fieldNames) +{ + return std::unique_ptr(new RNTupleJoinTable(fieldNames)); +} + +ROOT::Experimental::Internal::RNTupleJoinTable & +ROOT::Experimental::Internal::RNTupleJoinTable::Add(RPageSource &pageSource, PartitionKey_t partitionKey) +{ + auto joinMapping = std::unique_ptr(new REntryMapping(pageSource, fJoinFieldNames)); + fPartitions[partitionKey].emplace_back(std::move(joinMapping)); + + return *this; +} - if (entryIdxs == fJoinTable.end()) +std::vector +ROOT::Experimental::Internal::RNTupleJoinTable::GetEntryIndexes(const std::vector &valuePtrs, + PartitionKey_t partitionKey) const +{ + auto partition = fPartitions.find(partitionKey); + if (partition == fPartitions.end()) return {}; - return entryIdxs->second; + std::vector entryIdxs{}; + + for (const auto &joinMapping : partition->second) { + auto entriesForMapping = joinMapping->GetEntryIndexes(valuePtrs); + if (entriesForMapping) + entryIdxs.insert(entryIdxs.end(), entriesForMapping->begin(), entriesForMapping->end()); + } + + return entryIdxs; +} + +std::unordered_map> +ROOT::Experimental::Internal::RNTupleJoinTable::GetPartitionedEntryIndexes( + const std::vector &valuePtrs, const std::vector &partitionKeys) const +{ + std::unordered_map> entryIdxs{}; + + for (const auto &partitionKey : partitionKeys) { + auto entriesForPartition = GetEntryIndexes(valuePtrs, partitionKey); + if (!entriesForPartition.empty()) { + entryIdxs[partitionKey].insert(entryIdxs[partitionKey].end(), entriesForPartition.begin(), + entriesForPartition.end()); + } + } + + return entryIdxs; +} + +std::unordered_map> +ROOT::Experimental::Internal::RNTupleJoinTable::GetPartitionedEntryIndexes(const std::vector &valuePtrs) const +{ + std::unordered_map> entryIdxs{}; + + for (const auto &partition : fPartitions) { + for (const auto &joinMapping : partition.second) { + auto entriesForMapping = joinMapping->GetEntryIndexes(valuePtrs); + if (entriesForMapping) { + entryIdxs[partition.first].insert(entryIdxs[partition.first].end(), entriesForMapping->begin(), + entriesForMapping->end()); + } + } + } + + return entryIdxs; } diff --git a/tree/ntuple/v7/src/RNTupleProcessor.cxx b/tree/ntuple/v7/src/RNTupleProcessor.cxx index c60641f559ee4..a4beea5770ff7 100644 --- a/tree/ntuple/v7/src/RNTupleProcessor.cxx +++ b/tree/ntuple/v7/src/RNTupleProcessor.cxx @@ -538,8 +538,8 @@ ROOT::NTupleSize_t ROOT::Experimental::RNTupleJoinProcessor::LoadEntry(ROOT::NTu auxEntryIdxs.reserve(fJoinTables.size()); for (unsigned i = 0; i < fJoinTables.size(); ++i) { auto &joinTable = fJoinTables[i]; - if (!joinTable->IsBuilt()) - joinTable->Build(*fAuxiliaryPageSources[i]); + if (!fJoinTablesAreBuilt) + joinTable->Add(*fAuxiliaryPageSources[i]); auto entryIdxs = joinTable->GetEntryIndexes(valPtrs); diff --git a/tree/ntuple/v7/test/ntuple_join_table.cxx b/tree/ntuple/v7/test/ntuple_join_table.cxx index c15670d037445..8a10835b374c8 100644 --- a/tree/ntuple/v7/test/ntuple_join_table.cxx +++ b/tree/ntuple/v7/test/ntuple_join_table.cxx @@ -18,25 +18,20 @@ TEST(RNTupleJoinTable, Basic) auto pageSource = RPageSource::Create("ntuple", fileGuard.GetPath()); auto joinTable = RNTupleJoinTable::Create({"fld"}); - EXPECT_FALSE(joinTable->IsBuilt()); - try { - joinTable->GetEntryIndexes({nullptr}); - FAIL() << "querying an unbuilt join table should not be possible"; - } catch (const ROOT::RException &err) { - EXPECT_THAT(err.what(), testing::HasSubstr("join table has not been built yet")); - } + std::uint64_t fldValue = 0; - joinTable->Build(*pageSource); - EXPECT_TRUE(joinTable->IsBuilt()); + // No entry mappings have been added to the join table yet + EXPECT_EQ(std::vector{}, joinTable->GetEntryIndexes({&fldValue})); - EXPECT_EQ(10UL, joinTable->GetSize()); + // Now add the entry mapping for the page source + joinTable->Add(*pageSource); auto ntuple = RNTupleReader::Open("ntuple", fileGuard.GetPath()); auto fld = ntuple->GetView("fld"); for (unsigned i = 0; i < ntuple->GetNEntries(); ++i) { - auto fldValue = fld(i); + fldValue = fld(i); EXPECT_EQ(fldValue, i * 2); EXPECT_EQ(joinTable->GetEntryIndexes({&fldValue}), std::vector{static_cast(i)}); } @@ -59,12 +54,8 @@ TEST(RNTupleJoinTable, InvalidTypes) auto pageSource = RPageSource::Create("ntuple", fileGuard.GetPath()); - auto joinTable = RNTupleJoinTable::Create({"fldInt"}); - joinTable->Build(*pageSource); - EXPECT_EQ(1UL, joinTable->GetSize()); - try { - RNTupleJoinTable::Create({"fldFloat"})->Build(*pageSource); + RNTupleJoinTable::Create({"fldFloat"})->Add(*pageSource); FAIL() << "non-integral-type field should not be allowed as join fields"; } catch (const ROOT::RException &err) { EXPECT_THAT( @@ -74,7 +65,7 @@ TEST(RNTupleJoinTable, InvalidTypes) } try { - RNTupleJoinTable::Create({"fldString"})->Build(*pageSource); + RNTupleJoinTable::Create({"fldString"})->Add(*pageSource); FAIL() << "non-integral-type field should not be allowed as join fields"; } catch (const ROOT::RException &err) { EXPECT_THAT( @@ -84,7 +75,7 @@ TEST(RNTupleJoinTable, InvalidTypes) } try { - RNTupleJoinTable::Create({"fldStruct"})->Build(*pageSource); + RNTupleJoinTable::Create({"fldStruct"})->Add(*pageSource); FAIL() << "non-integral-type field should not be allowed as join fields"; } catch (const ROOT::RException &err) { EXPECT_THAT(err.what(), testing::HasSubstr("cannot use field \"fldStruct\" with type \"CustomStruct\" in " @@ -127,7 +118,7 @@ TEST(RNTupleJoinTable, SparseSecondary) auto secondaryPageSource = RPageSource::Create("secondary", fileGuardSecondary.GetPath()); auto joinTable = RNTupleJoinTable::Create({"event"}); - joinTable->Build(*secondaryPageSource); + joinTable->Add(*secondaryPageSource); auto secondaryNTuple = RNTupleReader::Open("secondary", fileGuardSecondary.GetPath()); auto fldX = secondaryNTuple->GetView("x"); @@ -170,9 +161,7 @@ TEST(RNTupleJoinTable, MultipleFields) auto pageSource = RPageSource::Create("ntuple", fileGuard.GetPath()); auto joinTable = RNTupleJoinTable::Create({"run", "event"}); - joinTable->Build(*pageSource); - - EXPECT_EQ(15ULL, joinTable->GetSize()); + joinTable->Add(*pageSource); auto ntuple = RNTupleReader::Open("ntuple", fileGuard.GetPath()); auto fld = ntuple->GetView("x"); @@ -229,9 +218,7 @@ TEST(RNTupleJoinTable, MultipleMatches) auto pageSource = RPageSource::Create("ntuple", fileGuard.GetPath()); auto joinTable = RNTupleJoinTable::Create({"run"}); - joinTable->Build(*pageSource); - - EXPECT_EQ(3ULL, joinTable->GetSize()); + joinTable->Add(*pageSource); std::uint64_t run = 1; auto entryIdxs = joinTable->GetEntryIndexes({&run}); @@ -246,3 +233,89 @@ TEST(RNTupleJoinTable, MultipleMatches) entryIdxs = joinTable->GetEntryIndexes({&(++run)}); EXPECT_EQ(std::vector{}, entryIdxs); } + +TEST(RNTupleJoinTable, Partitions) +{ + auto fnWriteNTuple = [](const FileRaii &fileGuard, std::uint16_t run) { + auto model = RNTupleModel::Create(); + *model->MakeField("run") = run; + auto fldI = model->MakeField("i"); + + auto ntuple = RNTupleWriter::Recreate(std::move(model), "ntuple", fileGuard.GetPath()); + + for (int i = 0; i < 5; ++i) { + *fldI = i; + ntuple->Fill(); + } + }; + + auto joinTable = RNTupleJoinTable::Create({"i"}); + + std::vector fileGuards; + fileGuards.emplace_back("test_ntuple_join_partition1.root"); + fileGuards.emplace_back("test_ntuple_join_partition2.root"); + fileGuards.emplace_back("test_ntuple_join_partition3.root"); + fileGuards.emplace_back("test_ntuple_join_partition4.root"); + + std::int16_t runNumbers[4] = {1, 2, 3, 3}; + std::vector> pageSources; + + // Create four ntuples where with their corresponding run numbers and add them to the join table using this run + // number as the partition key. + for (unsigned i = 0; i < fileGuards.size(); ++i) { + fnWriteNTuple(fileGuards[i], runNumbers[i]); + pageSources.emplace_back(RPageSource::Create("ntuple", fileGuards[i].GetPath())); + joinTable->Add(*pageSources.back(), runNumbers[i]); + } + + std::vector openSpec; + for (const auto &fileGuard : fileGuards) { + openSpec.emplace_back("ntuple", fileGuard.GetPath()); + } + auto proc = RNTupleProcessor::CreateChain(openSpec); + + auto i = proc->GetEntry().GetPtr("i"); + auto run = proc->GetEntry().GetPtr("run"); + + // When getting the entry indexes for all partitions, we expect multiple resulting entry indexes (i.e., one entry + // index for each ntuple in the chain). + *i = 0; + std::unordered_map> expectedEntryIdxMap = { + {1, {0}}, + {2, {0}}, + {3, {0, 0}}, + }; + EXPECT_EQ(expectedEntryIdxMap, joinTable->GetPartitionedEntryIndexes({i.get()})); + EXPECT_EQ(expectedEntryIdxMap, joinTable->GetPartitionedEntryIndexes({i.get()}, {1, 2, 3})); + + expectedEntryIdxMap = { + {1, {0}}, + {3, {0, 0}}, + }; + EXPECT_EQ(expectedEntryIdxMap, joinTable->GetPartitionedEntryIndexes({i.get()}, {1, 3})); + + // Calling GetEntryIndexes with a partition key not present in the join table shouldn't fail; it should just return + // an empty vector. + EXPECT_EQ(std::vector{}, joinTable->GetEntryIndexes({i.get()}, 4)); + + // Similarly, calling GetEntryIndexes with a partition key that is present in the join table but a join value that + // isn't shouldn't fail; it should just return an empty vector. + *i = 99; + EXPECT_EQ(std::vector{}, joinTable->GetEntryIndexes({i.get()}, 3)); + + expectedEntryIdxMap.clear(); + EXPECT_EQ(expectedEntryIdxMap, joinTable->GetPartitionedEntryIndexes({i.get()})); + EXPECT_EQ(expectedEntryIdxMap, joinTable->GetPartitionedEntryIndexes({i.get()}, {1, 2, 3})); + EXPECT_EQ(expectedEntryIdxMap, joinTable->GetPartitionedEntryIndexes({i.get()}, {1, 3})); + + for (auto it = proc->begin(); it != proc->end(); it++) { + auto entryIdxs = joinTable->GetEntryIndexes({i.get()}, *run); + + // Because two ntuples store their events under run number 3 and their entries for `i` are identical, two entry + // indexes are expected. For the other case (run == 1 and run == 2), only one entry index is expected. + if (*run == 3) + EXPECT_EQ(entryIdxs.size(), 2ul); + else + EXPECT_EQ(entryIdxs.size(), 1ul); + } +} From 156f69ed405e9299e0cf56b255d5c99574f2f339 Mon Sep 17 00:00:00 2001 From: Florine de Geus Date: Fri, 7 Mar 2025 17:01:34 +0100 Subject: [PATCH 2/4] [ntuple][NFC] Rename `fieldNames` to `joinFieldNames` --- tree/ntuple/v7/inc/ROOT/RNTupleJoinTable.hxx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tree/ntuple/v7/inc/ROOT/RNTupleJoinTable.hxx b/tree/ntuple/v7/inc/ROOT/RNTupleJoinTable.hxx index 2adde77826a86..63d03b329515d 100644 --- a/tree/ntuple/v7/inc/ROOT/RNTupleJoinTable.hxx +++ b/tree/ntuple/v7/inc/ROOT/RNTupleJoinTable.hxx @@ -113,9 +113,9 @@ private: ///////////////////////////////////////////////////////////////////////////// /// \brief Create an a new RNTupleJoinTable for the RNTuple represented by the provided page source. /// - /// \param[in] fieldNames The names of the join fields to use for the join table. Only integral-type fields are + /// \param[in] joinFieldNames The names of the join fields to use for the join table. Only integral-type fields are /// allowed. - RNTupleJoinTable(const std::vector &fieldNames) : fJoinFieldNames(fieldNames) {} + RNTupleJoinTable(const std::vector &joinFieldNames) : fJoinFieldNames(joinFieldNames) {} public: RNTupleJoinTable(const RNTupleJoinTable &other) = delete; @@ -127,11 +127,11 @@ public: ///////////////////////////////////////////////////////////////////////////// /// \brief Create an RNTupleJoinTable from an existing RNTuple. /// - /// \param[in] fieldNames The names of the join fields to use for the join table. Only integral-type fields are + /// \param[in] joinFieldNames The names of the join fields to use for the join table. Only integral-type fields are /// allowed. /// /// \return A pointer to the newly-created join table. - static std::unique_ptr Create(const std::vector &fieldNames); + static std::unique_ptr Create(const std::vector &joinFieldNames); ///////////////////////////////////////////////////////////////////////////// /// \brief Add an entry mapping to the join table. From d3e81317677fb7386fa6811db92bd713d44be615 Mon Sep 17 00:00:00 2001 From: Florine de Geus Date: Tue, 18 Mar 2025 15:16:41 +0100 Subject: [PATCH 3/4] [ntuple][NFC] Rename `joinFieldValues` to `castJoinValues` --- tree/ntuple/v7/src/RNTupleJoinTable.cxx | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/tree/ntuple/v7/src/RNTupleJoinTable.cxx b/tree/ntuple/v7/src/RNTupleJoinTable.cxx index 7fe7f20bf14b2..0ed9204604f8d 100644 --- a/tree/ntuple/v7/src/RNTupleJoinTable.cxx +++ b/tree/ntuple/v7/src/RNTupleJoinTable.cxx @@ -69,20 +69,21 @@ ROOT::Experimental::Internal::RNTupleJoinTable::REntryMapping::REntryMapping( fields.emplace_back(std::move(field)); } - std::vector joinFieldValues; - joinFieldValues.reserve(fJoinFieldNames.size()); + std::vector castJoinValues; + castJoinValues.reserve(fJoinFieldNames.size()); for (unsigned i = 0; i < pageSource.GetNEntries(); ++i) { - joinFieldValues.clear(); + castJoinValues.clear(); + for (auto &fieldValue : fieldValues) { // TODO(fdegeus): use bulk reading fieldValue.Read(i); auto valuePtr = fieldValue.GetPtr(); - joinFieldValues.push_back(CastValuePtr(valuePtr.get(), fieldValue.GetField().GetValueSize())); + castJoinValues.push_back(CastValuePtr(valuePtr.get(), fieldValue.GetField().GetValueSize())); } - fMapping[RCombinedJoinFieldValue(joinFieldValues)].push_back(i); + fMapping[RCombinedJoinFieldValue(castJoinValues)].push_back(i); } } @@ -92,14 +93,14 @@ ROOT ::Experimental::Internal::RNTupleJoinTable::REntryMapping::GetEntryIndexes( if (valuePtrs.size() != fJoinFieldNames.size()) throw RException(R__FAIL("number of value pointers must match number of join fields")); - std::vector joinFieldValues; - joinFieldValues.reserve(valuePtrs.size()); + std::vector castJoinValues; + castJoinValues.reserve(valuePtrs.size()); for (unsigned i = 0; i < valuePtrs.size(); ++i) { - joinFieldValues.push_back(CastValuePtr(valuePtrs[i], fJoinFieldValueSizes[i])); + castJoinValues.push_back(CastValuePtr(valuePtrs[i], fJoinFieldValueSizes[i])); } - if (const auto &entries = fMapping.find(RCombinedJoinFieldValue(joinFieldValues)); entries != fMapping.end()) { + if (const auto &entries = fMapping.find(RCombinedJoinFieldValue(castJoinValues)); entries != fMapping.end()) { return &entries->second; } From 14daddbb18850687c35176c96d83c14f6d8bd9e7 Mon Sep 17 00:00:00 2001 From: Florine de Geus Date: Tue, 18 Mar 2025 15:17:42 +0100 Subject: [PATCH 4/4] [ntuple][NFC] Rename `NTupleJoinValue_t` to `JoinValue_t` Since it is declared within `RNTupleJoinTable`, it is clear that this type belongs to RNTuple from the context. --- tree/ntuple/v7/inc/ROOT/RNTupleJoinTable.hxx | 9 +++------ tree/ntuple/v7/src/RNTupleJoinTable.cxx | 9 ++++----- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/tree/ntuple/v7/inc/ROOT/RNTupleJoinTable.hxx b/tree/ntuple/v7/inc/ROOT/RNTupleJoinTable.hxx index 63d03b329515d..ccf9bc3e2956c 100644 --- a/tree/ntuple/v7/inc/ROOT/RNTupleJoinTable.hxx +++ b/tree/ntuple/v7/inc/ROOT/RNTupleJoinTable.hxx @@ -35,7 +35,7 @@ namespace Internal { // clang-format on class RNTupleJoinTable { public: - using NTupleJoinValue_t = std::uint64_t; + using JoinValue_t = std::uint64_t; using PartitionKey_t = std::uint64_t; static constexpr PartitionKey_t kDefaultPartitionKey = PartitionKey_t(-1); @@ -52,12 +52,9 @@ private: ////////////////////////////////////////////////////////////////////////// /// Container for the combined hashes of join field values. struct RCombinedJoinFieldValue { - std::vector fJoinFieldValues; + std::vector fJoinFieldValues; - RCombinedJoinFieldValue(const std::vector &joinFieldValues) - : fJoinFieldValues(joinFieldValues) - { - } + RCombinedJoinFieldValue(const std::vector &joinFieldValues) : fJoinFieldValues(joinFieldValues) {} inline bool operator==(const RCombinedJoinFieldValue &other) const { diff --git a/tree/ntuple/v7/src/RNTupleJoinTable.cxx b/tree/ntuple/v7/src/RNTupleJoinTable.cxx index 0ed9204604f8d..ac6896fd318ea 100644 --- a/tree/ntuple/v7/src/RNTupleJoinTable.cxx +++ b/tree/ntuple/v7/src/RNTupleJoinTable.cxx @@ -16,10 +16,9 @@ #include namespace { -ROOT::Experimental::Internal::RNTupleJoinTable::NTupleJoinValue_t -CastValuePtr(void *valuePtr, std::size_t fieldValueSize) +ROOT::Experimental::Internal::RNTupleJoinTable::JoinValue_t CastValuePtr(void *valuePtr, std::size_t fieldValueSize) { - ROOT::Experimental::Internal::RNTupleJoinTable::NTupleJoinValue_t value; + ROOT::Experimental::Internal::RNTupleJoinTable::JoinValue_t value; switch (fieldValueSize) { case 1: value = *reinterpret_cast(valuePtr); break; @@ -69,7 +68,7 @@ ROOT::Experimental::Internal::RNTupleJoinTable::REntryMapping::REntryMapping( fields.emplace_back(std::move(field)); } - std::vector castJoinValues; + std::vector castJoinValues; castJoinValues.reserve(fJoinFieldNames.size()); for (unsigned i = 0; i < pageSource.GetNEntries(); ++i) { @@ -93,7 +92,7 @@ ROOT ::Experimental::Internal::RNTupleJoinTable::REntryMapping::GetEntryIndexes( if (valuePtrs.size() != fJoinFieldNames.size()) throw RException(R__FAIL("number of value pointers must match number of join fields")); - std::vector castJoinValues; + std::vector castJoinValues; castJoinValues.reserve(valuePtrs.size()); for (unsigned i = 0; i < valuePtrs.size(); ++i) {