From 30cf055c2b3d075404377e7a24e7ddc1075351d5 Mon Sep 17 00:00:00 2001 From: Vincenzo Eduardo Padulano Date: Mon, 10 Mar 2025 07:27:36 +0100 Subject: [PATCH] [df] Improve semantics of data member Change the data member holding the column names which should be ignored in case of TTreeReader errors regarding missing branches, from std::vector to std::set. The column names should not be repeated to avoid having multiple times the same column name left in the data member, which results in leftover names even after the corresponding operation of the computation graph has finished. Once a column is found, it will be available for all nodes of the computation graph anyway. --- .../inc/ROOT/RDF/RDefaultValueFor.hxx | 4 ++-- .../inc/ROOT/RDF/RFilterWithMissingValues.hxx | 4 ++-- tree/dataframe/inc/ROOT/RDF/RLoopManager.hxx | 21 ++++++++++++------- tree/dataframe/src/RDFInterfaceUtils.cxx | 5 ++--- tree/treeplayer/inc/ROOT/TTreeProcessorMT.hxx | 8 +++---- tree/treeplayer/inc/TTreeReader.h | 5 +++-- tree/treeplayer/src/TTreeProcessorMT.cxx | 6 +++--- tree/treeplayer/src/TTreeReader.cxx | 7 +++---- tree/treeplayer/src/TTreeReaderArray.cxx | 6 ++---- tree/treeplayer/src/TTreeReaderValue.cxx | 3 +-- 10 files changed, 36 insertions(+), 33 deletions(-) diff --git a/tree/dataframe/inc/ROOT/RDF/RDefaultValueFor.hxx b/tree/dataframe/inc/ROOT/RDF/RDefaultValueFor.hxx index 6275cd3fef505..c4de1f6779bc7 100644 --- a/tree/dataframe/inc/ROOT/RDF/RDefaultValueFor.hxx +++ b/tree/dataframe/inc/ROOT/RDF/RDefaultValueFor.hxx @@ -76,7 +76,7 @@ public: { fLoopManager->Register(this); // We suppress errors that TTreeReader prints regarding the missing branch - fLoopManager->GetSuppressErrorsForMissingBranches().push_back(fColumnNames[0]); + fLoopManager->InsertSuppressErrorsForMissingBranch(fColumnNames[0]); } RDefaultValueFor(const RDefaultValueFor &) = delete; @@ -86,7 +86,7 @@ public: ~RDefaultValueFor() { fLoopManager->Deregister(this); - ROOT::Internal::RDF::Erase(fColumnNames[0], fLoopManager->GetSuppressErrorsForMissingBranches()); + fLoopManager->EraseSuppressErrorsForMissingBranch(fColumnNames[0]); } void InitSlot(TTreeReader *r, unsigned int slot) final diff --git a/tree/dataframe/inc/ROOT/RDF/RFilterWithMissingValues.hxx b/tree/dataframe/inc/ROOT/RDF/RFilterWithMissingValues.hxx index 9a7494a7f9120..34a0466d6c486 100644 --- a/tree/dataframe/inc/ROOT/RDF/RFilterWithMissingValues.hxx +++ b/tree/dataframe/inc/ROOT/RDF/RFilterWithMissingValues.hxx @@ -81,7 +81,7 @@ public: { fLoopManager->Register(this); // We suppress errors that TTreeReader prints regarding the missing branch - fLoopManager->GetSuppressErrorsForMissingBranches().push_back(fColumnNames[0]); + fLoopManager->InsertSuppressErrorsForMissingBranch(fColumnNames[0]); } RFilterWithMissingValues(const RFilterWithMissingValues &) = delete; @@ -93,7 +93,7 @@ public: // must Deregister objects from the RLoopManager here, before the fPrevNodePtr data member is destroyed: // otherwise if fPrevNodePtr is the RLoopManager, it will be destroyed before the calls to Deregister happen. fLoopManager->Deregister(this); - ROOT::Internal::RDF::Erase(fColumnNames[0], fLoopManager->GetSuppressErrorsForMissingBranches()); + fLoopManager->EraseSuppressErrorsForMissingBranch(fColumnNames[0]); } bool CheckFilters(unsigned int slot, Long64_t entry) final diff --git a/tree/dataframe/inc/ROOT/RDF/RLoopManager.hxx b/tree/dataframe/inc/ROOT/RDF/RLoopManager.hxx index 0343b51930e66..d3d55520351d8 100644 --- a/tree/dataframe/inc/ROOT/RDF/RLoopManager.hxx +++ b/tree/dataframe/inc/ROOT/RDF/RLoopManager.hxx @@ -196,7 +196,7 @@ class RLoopManager : public RNodeBase { // List of branches for which we want to suppress the printed error about // missing branch when switching to a new tree. This is modified by readers, // so must be declared before them in this class. - std::vector fSuppressErrorsForMissingBranches{}; + std::set fSuppressErrorsForMissingBranches{}; ROOT::Internal::RDF::RStringCache fCachedColNames; std::set>> fUniqueDefinesWithReaders; @@ -293,15 +293,22 @@ public: return fUniqueVariationsWithReaders; } - std::vector &GetSuppressErrorsForMissingBranches() { return fSuppressErrorsForMissingBranches; } - const std::vector &GetSuppressErrorsForMissingBranches() const - { - return fSuppressErrorsForMissingBranches; - } - void SetTTreeLifeline(std::any lifeline); void SetDataSource(std::unique_ptr dataSource); + + void InsertSuppressErrorsForMissingBranch(const std::string &branchName) + { + fSuppressErrorsForMissingBranches.insert(branchName); + } + void EraseSuppressErrorsForMissingBranch(const std::string &branchName) + { + fSuppressErrorsForMissingBranches.erase(branchName); + } + const std::set &GetSuppressErrorsForMissingBranches() const + { + return fSuppressErrorsForMissingBranches; + } }; /// \brief Create an RLoopManager that reads a TChain. diff --git a/tree/dataframe/src/RDFInterfaceUtils.cxx b/tree/dataframe/src/RDFInterfaceUtils.cxx index 113fce2d127c2..ae88fbb86d4b0 100644 --- a/tree/dataframe/src/RDFInterfaceUtils.cxx +++ b/tree/dataframe/src/RDFInterfaceUtils.cxx @@ -931,10 +931,9 @@ ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColu // Look for a possible overlap between the unknown columns and the // columns we should ignore for the purpose of the following exception std::set intersection; - auto colsToIgnore = lm.GetSuppressErrorsForMissingBranches(); + const auto &colsToIgnore = lm.GetSuppressErrorsForMissingBranches(); std::sort(unknownColumns.begin(), unknownColumns.end()); - std::sort(colsToIgnore.begin(), colsToIgnore.end()); - std::set_intersection(unknownColumns.begin(), unknownColumns.end(), colsToIgnore.begin(), colsToIgnore.end(), + std::set_intersection(unknownColumns.cbegin(), unknownColumns.cend(), colsToIgnore.cbegin(), colsToIgnore.cend(), std::inserter(intersection, intersection.begin())); if (intersection.empty()) { std::string errMsg = std::string("Unknown column") + (unknownColumns.size() > 1 ? "s: " : ": "); diff --git a/tree/treeplayer/inc/ROOT/TTreeProcessorMT.hxx b/tree/treeplayer/inc/ROOT/TTreeProcessorMT.hxx index da76aa44f14e3..168df106d3682 100644 --- a/tree/treeplayer/inc/ROOT/TTreeProcessorMT.hxx +++ b/tree/treeplayer/inc/ROOT/TTreeProcessorMT.hxx @@ -72,7 +72,7 @@ public: const std::vector &fileNames, const ROOT::TreeUtils::RFriendInfo &friendInfo, const TEntryList &entryList, const std::vector &nEntries, - const std::vector &suppressErrorsForMissingBranches); + const std::set &suppressErrorsForMissingBranches); void Reset(); }; } // End of namespace Internal @@ -97,7 +97,7 @@ private: // List of branches for which we want to suppress the printed error about // missing branch when switching to a new tree - std::vector fSuppressErrorsForMissingBranches{}; + std::set fSuppressErrorsForMissingBranches{}; public: TTreeProcessorMT(std::string_view filename, std::string_view treename = "", UInt_t nThreads = 0u, @@ -109,10 +109,10 @@ public: const std::pair &globalRange = {0, std::numeric_limits::max()}): TTreeProcessorMT(std::vector(filenames), treename, nThreads, globalRange) {} TTreeProcessorMT(TTree &tree, const TEntryList &entries, UInt_t nThreads = 0u, - const std::vector &suppressErrorsForMissingBranches = {}); + const std::set &suppressErrorsForMissingBranches = {}); TTreeProcessorMT(TTree &tree, UInt_t nThreads = 0u, const std::pair &globalRange = {0, std::numeric_limits::max()}, - const std::vector &suppressErrorsForMissingBranches = {}); + const std::set &suppressErrorsForMissingBranches = {}); void Process(std::function func); diff --git a/tree/treeplayer/inc/TTreeReader.h b/tree/treeplayer/inc/TTreeReader.h index ccf9215942953..5ba3ed17e3690 100644 --- a/tree/treeplayer/inc/TTreeReader.h +++ b/tree/treeplayer/inc/TTreeReader.h @@ -29,6 +29,7 @@ #include #include #include +#include #include class TDictionary; @@ -184,7 +185,7 @@ class TTreeReader : public TObject { TTreeReader(); TTreeReader(TTree *tree, TEntryList *entryList = nullptr, bool warnAboutLongerFriends = true, - const std::vector &suppressErrorsForMissingBranches = {}); + const std::set &suppressErrorsForMissingBranches = {}); TTreeReader(const char *keyname, TDirectory *dir, TEntryList *entryList = nullptr); TTreeReader(const char *keyname, TEntryList *entryList = nullptr) : TTreeReader(keyname, nullptr, entryList) {} @@ -338,7 +339,7 @@ class TTreeReader : public TObject { // List of branches for which we want to suppress the printed error about // missing branch when switching to a new tree - std::vector fSuppressErrorsForMissingBranches{}; + std::set fSuppressErrorsForMissingBranches{}; std::vector fMissingProxies{}; friend class ROOT::Internal::TTreeReaderValueBase; diff --git a/tree/treeplayer/src/TTreeProcessorMT.cxx b/tree/treeplayer/src/TTreeProcessorMT.cxx index 9acd1b7653046..f7c6fa9a266c9 100644 --- a/tree/treeplayer/src/TTreeProcessorMT.cxx +++ b/tree/treeplayer/src/TTreeProcessorMT.cxx @@ -288,7 +288,7 @@ std::unique_ptr TTreeView::GetTreeReader(Long64_t start, Long64_t end, const std::vector &treeNames, const std::vector &fileNames, const ROOT::TreeUtils::RFriendInfo &friendInfo, const TEntryList &entryList, const std::vector &nEntries, - const std::vector &suppressErrorsForMissingBranches) + const std::set &suppressErrorsForMissingBranches) { const bool hasEntryList = entryList.GetN() > 0; const bool usingLocalEntries = friendInfo.fFriendNames.empty() && !hasEntryList; @@ -412,7 +412,7 @@ TTreeProcessorMT::TTreeProcessorMT(const std::vector &filename /// \param[in] nThreads Number of threads to create in the underlying thread-pool. The semantics of this argument are /// the same as for TThreadExecutor. TTreeProcessorMT::TTreeProcessorMT(TTree &tree, const TEntryList &entries, UInt_t nThreads, - const std::vector &suppressErrorsForMissingBranches) + const std::set &suppressErrorsForMissingBranches) : fFileNames(Internal::TreeUtils::GetFileNamesFromTree(tree)), fTreeNames(Internal::TreeUtils::GetTreeFullPaths(tree)), fEntryList(entries), @@ -430,7 +430,7 @@ TTreeProcessorMT::TTreeProcessorMT(TTree &tree, const TEntryList &entries, UInt_ /// the same as for TThreadExecutor. /// \param[in] globalRange Global entry range to process, {begin (inclusive), end (exclusive)}. TTreeProcessorMT::TTreeProcessorMT(TTree &tree, UInt_t nThreads, const EntryRange &globalRange, - const std::vector &suppressErrorsForMissingBranches) + const std::set &suppressErrorsForMissingBranches) : fFileNames(Internal::TreeUtils::GetFileNamesFromTree(tree)), fTreeNames(Internal::TreeUtils::GetTreeFullPaths(tree)), fFriendInfo(Internal::TreeUtils::GetFriendInfo(tree, /*retrieveEntries*/ true)), diff --git a/tree/treeplayer/src/TTreeReader.cxx b/tree/treeplayer/src/TTreeReader.cxx index c881bb246ceb8..3a867213d62c8 100644 --- a/tree/treeplayer/src/TTreeReader.cxx +++ b/tree/treeplayer/src/TTreeReader.cxx @@ -201,7 +201,7 @@ TTreeReader::TTreeReader() : fNotify(this), fFriendProxies() {} /// per chain.SetEntryList(&entryList). TTreeReader::TTreeReader(TTree *tree, TEntryList *entryList /*= nullptr*/, bool warnAboutLongerFriends, - const std::vector &suppressErrorsForMissingBranches) + const std::set &suppressErrorsForMissingBranches) : fTree(tree), fEntryList(entryList), fNotify(this), @@ -384,9 +384,8 @@ bool TTreeReader::SetProxies() // where the first tree of the chain does not contain that branch. In such // case, we need to postpone the creation of the corresponding proxy until // we find the branch in a following tree of the chain. - const bool suppressErrorsForThisBranch = - (std::find(fSuppressErrorsForMissingBranches.cbegin(), fSuppressErrorsForMissingBranches.cend(), - reader->fBranchName.View()) != fSuppressErrorsForMissingBranches.cend()); + const bool suppressErrorsForThisBranch = (fSuppressErrorsForMissingBranches.find(reader->fBranchName.Data()) != + fSuppressErrorsForMissingBranches.cend()); // Because of the situation described above, we may have some proxies // already created and some not, if their branch was not available so far. // Make sure we do not recreate the proxy unnecessarily, unless the diff --git a/tree/treeplayer/src/TTreeReaderArray.cxx b/tree/treeplayer/src/TTreeReaderArray.cxx index 5ae74ae188267..e0278bbb9c8d5 100644 --- a/tree/treeplayer/src/TTreeReaderArray.cxx +++ b/tree/treeplayer/src/TTreeReaderArray.cxx @@ -493,10 +493,8 @@ void ROOT::Internal::TTreeReaderArrayBase::CreateProxy() // Tell the branch proxy to suppress the errors for missing branch if this // branch name is found in the list of suppressions - const bool suppressErrorsForThisBranch = - (std::find(fTreeReader->fSuppressErrorsForMissingBranches.cbegin(), - fTreeReader->fSuppressErrorsForMissingBranches.cend(), - fBranchName.Data()) != fTreeReader->fSuppressErrorsForMissingBranches.cend()); + const bool suppressErrorsForThisBranch = (fTreeReader->fSuppressErrorsForMissingBranches.find(fBranchName.Data()) != + fTreeReader->fSuppressErrorsForMissingBranches.cend()); TDictionary *branchActualType = nullptr; TBranch *branch = nullptr; diff --git a/tree/treeplayer/src/TTreeReaderValue.cxx b/tree/treeplayer/src/TTreeReaderValue.cxx index 8ab1b681e3949..b36989b0aae57 100644 --- a/tree/treeplayer/src/TTreeReaderValue.cxx +++ b/tree/treeplayer/src/TTreeReaderValue.cxx @@ -544,8 +544,7 @@ void ROOT::Internal::TTreeReaderValueBase::CreateProxy() // one tree in the chain has that branch const auto &suppressErrorsForMissingBranches = fTreeReader->fSuppressErrorsForMissingBranches; const bool suppressErrorsForThisBranch = - (std::find(suppressErrorsForMissingBranches.cbegin(), suppressErrorsForMissingBranches.cend(), - originalBranchName) != suppressErrorsForMissingBranches.cend()); + (suppressErrorsForMissingBranches.find(originalBranchName) != suppressErrorsForMissingBranches.cend()); if (!branch) { // We had an error, the branch name had no "." or we simply did not find anything.