Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ntuple] Extend RNTupleOpenSpec to allow creating processors from a TDirectory #17934

Merged
merged 4 commits into from
Mar 21, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 32 additions & 13 deletions tree/ntuple/v7/inc/ROOT/RNTupleProcessor.hxx
Original file line number Diff line number Diff line change
@@ -36,12 +36,32 @@ namespace Internal {
struct RNTupleProcessorEntryLoader;
} // namespace Internal

/// Used to specify the underlying RNTuples in RNTupleProcessor
struct RNTupleOpenSpec {
// clang-format off
/**
\class ROOT::Experimental::RNTupleOpenSpec
\ingroup NTuple
\brief Specification of the name and location of an RNTuple, used for creating a new RNTupleProcessor.

An RNTupleOpenSpec can be created by providing either a string with a path to the ROOT file or a pointer to the
TDirectory (or any of its subclasses) that contains the RNTuple.

Note that the RNTupleOpenSpec is *write-only*, to prevent usability issues with Python.
*/
// clang-format on
class RNTupleOpenSpec {
friend class RNTupleProcessor;
friend class RNTupleSingleProcessor;
friend class RNTupleJoinProcessor;

private:
std::string fNTupleName;
std::string fStorage;
std::variant<std::string, TDirectory *> fStorage;

public:
RNTupleOpenSpec(std::string_view n, TDirectory *s) : fNTupleName(n), fStorage(s) {}
RNTupleOpenSpec(std::string_view n, const std::string &s) : fNTupleName(n), fStorage(s) {}

RNTupleOpenSpec(std::string_view n, std::string_view s) : fNTupleName(n), fStorage(s) {}
std::unique_ptr<Internal::RPageSource> CreatePageSource() const;
};

// clang-format off
@@ -282,7 +302,7 @@ public:
///
/// \return A pointer to the newly created RNTupleProcessor.
static std::unique_ptr<RNTupleProcessor>
Create(const RNTupleOpenSpec &ntuple, std::unique_ptr<RNTupleModel> model = nullptr);
Create(RNTupleOpenSpec ntuple, std::unique_ptr<RNTupleModel> model = nullptr);

/////////////////////////////////////////////////////////////////////////////
/// \brief Create an RNTupleProcessor for a single RNTuple.
@@ -296,7 +316,7 @@ public:
///
/// \return A pointer to the newly created RNTupleProcessor.
static std::unique_ptr<RNTupleProcessor>
Create(const RNTupleOpenSpec &ntuple, std::string_view processorName, std::unique_ptr<RNTupleModel> model = nullptr);
Create(RNTupleOpenSpec ntuple, std::string_view processorName, std::unique_ptr<RNTupleModel> model = nullptr);

/////////////////////////////////////////////////////////////////////////////
/// \brief Create an RNTupleProcessor for a *chain* (i.e., a vertical combination) of RNTuples.
@@ -307,7 +327,7 @@ public:
///
/// \return A pointer to the newly created RNTupleProcessor.
static std::unique_ptr<RNTupleProcessor>
CreateChain(const std::vector<RNTupleOpenSpec> &ntuples, std::unique_ptr<RNTupleModel> model = nullptr);
CreateChain(std::vector<RNTupleOpenSpec> ntuples, std::unique_ptr<RNTupleModel> model = nullptr);

/////////////////////////////////////////////////////////////////////////////
/// \brief Create an RNTupleProcessor for a *chain* (i.e., a vertical combination) of RNTuples.
@@ -320,7 +340,7 @@ public:
/// one will be created based on the descriptor of the first RNTuple specified.
///
/// \return A pointer to the newly created RNTupleProcessor.
static std::unique_ptr<RNTupleProcessor> CreateChain(const std::vector<RNTupleOpenSpec> &ntuples,
static std::unique_ptr<RNTupleProcessor> CreateChain(std::vector<RNTupleOpenSpec> ntuples,
std::string_view processorName,
std::unique_ptr<RNTupleModel> model = nullptr);

@@ -367,7 +387,7 @@ public:
///
/// \return A pointer to the newly created RNTupleProcessor.
static std::unique_ptr<RNTupleProcessor>
CreateJoin(const RNTupleOpenSpec &primaryNTuple, const std::vector<RNTupleOpenSpec> &auxNTuples,
CreateJoin(RNTupleOpenSpec primaryNTuple, std::vector<RNTupleOpenSpec> auxNTuples,
const std::vector<std::string> &joinFields, std::vector<std::unique_ptr<RNTupleModel>> models = {});

/////////////////////////////////////////////////////////////////////////////
@@ -390,7 +410,7 @@ public:
///
/// \return A pointer to the newly created RNTupleProcessor.
static std::unique_ptr<RNTupleProcessor>
CreateJoin(const RNTupleOpenSpec &primaryNTuple, const std::vector<RNTupleOpenSpec> &auxNTuples,
CreateJoin(RNTupleOpenSpec primaryNTuple, std::vector<RNTupleOpenSpec> auxNTuples,
const std::vector<std::string> &joinFields, std::string_view processorName,
std::vector<std::unique_ptr<RNTupleModel>> models = {});
};
@@ -438,8 +458,7 @@ private:
/// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::Create, this is
/// the name of the underlying RNTuple.
/// \param[in] model The model that specifies which fields should be read by the processor.
RNTupleSingleProcessor(const RNTupleOpenSpec &ntuple, std::string_view processorName,
std::unique_ptr<RNTupleModel> model);
RNTupleSingleProcessor(RNTupleOpenSpec ntuple, std::string_view processorName, std::unique_ptr<RNTupleModel> model);
};

// clang-format off
@@ -534,7 +553,7 @@ private:
/// {mainNTuple, auxNTuple[0], ...}. The pointer returned by RNTupleModel::MakeField can be used to access a field's
/// value during the processor iteration. When an empty list is passed, the models are created from the descriptor of
/// each RNTuple specified in `mainNTuple` and `auxNTuple`.
RNTupleJoinProcessor(const RNTupleOpenSpec &mainNTuple, const std::vector<RNTupleOpenSpec> &auxNTuples,
RNTupleJoinProcessor(RNTupleOpenSpec mainNTuple, std::vector<RNTupleOpenSpec> auxNTuples,
const std::vector<std::string> &joinFields, std::string_view processorName,
std::vector<std::unique_ptr<RNTupleModel>> models = {});

87 changes: 48 additions & 39 deletions tree/ntuple/v7/src/RNTupleProcessor.cxx
Original file line number Diff line number Diff line change
@@ -16,47 +16,50 @@
#include <ROOT/RNTupleProcessor.hxx>

#include <ROOT/RFieldBase.hxx>
#include <ROOT/RNTuple.hxx>
#include <ROOT/RPageStorageFile.hxx>

namespace {
using ROOT::Experimental::RNTupleOpenSpec;
void EnsureUniqueNTupleNames(const RNTupleOpenSpec &primaryNTuple, const std::vector<RNTupleOpenSpec> &auxNTuples)
#include <TDirectory.h>

std::unique_ptr<ROOT::Experimental::Internal::RPageSource> ROOT::Experimental::RNTupleOpenSpec::CreatePageSource() const
{
std::unordered_set<std::string> uniqueNTupleNames{primaryNTuple.fNTupleName};
for (const auto &ntuple : auxNTuples) {
auto res = uniqueNTupleNames.emplace(ntuple.fNTupleName);
if (!res.second) {
throw ROOT::RException(R__FAIL("joining RNTuples with the same name is not allowed"));
}
}
if (const std::string *storagePath = std::get_if<std::string>(&fStorage))
return ROOT::Experimental::Internal::RPageSource::Create(fNTupleName, *storagePath);

auto dir = std::get<TDirectory *>(fStorage);
auto ntuple = std::unique_ptr<ROOT::RNTuple>(dir->Get<ROOT::RNTuple>(fNTupleName.c_str()));
return ROOT::Experimental::Internal::RPageSourceFile::CreateFromAnchor(*ntuple);
}
} // anonymous namespace

std::unique_ptr<ROOT::Experimental::RNTupleProcessor>
ROOT::Experimental::RNTupleProcessor::Create(const RNTupleOpenSpec &ntuple, std::unique_ptr<RNTupleModel> model)
ROOT::Experimental::RNTupleProcessor::Create(RNTupleOpenSpec ntuple, std::unique_ptr<RNTupleModel> model)
{
return Create(ntuple, ntuple.fNTupleName, std::move(model));
auto processorName = ntuple.fNTupleName;
return Create(std::move(ntuple), processorName, std::move(model));
}

std::unique_ptr<ROOT::Experimental::RNTupleProcessor>
ROOT::Experimental::RNTupleProcessor::Create(const RNTupleOpenSpec &ntuple, std::string_view processorName,
ROOT::Experimental::RNTupleProcessor::Create(RNTupleOpenSpec ntuple, std::string_view processorName,
std::unique_ptr<RNTupleModel> model)
{
return std::unique_ptr<RNTupleSingleProcessor>(new RNTupleSingleProcessor(ntuple, processorName, std::move(model)));
return std::unique_ptr<RNTupleSingleProcessor>(
new RNTupleSingleProcessor(std::move(ntuple), processorName, std::move(model)));
}

std::unique_ptr<ROOT::Experimental::RNTupleProcessor>
ROOT::Experimental::RNTupleProcessor::CreateChain(const std::vector<RNTupleOpenSpec> &ntuples,
ROOT::Experimental::RNTupleProcessor::CreateChain(std::vector<RNTupleOpenSpec> ntuples,
std::unique_ptr<RNTupleModel> model)
{
if (ntuples.empty())
throw RException(R__FAIL("at least one RNTuple must be provided"));

return CreateChain(ntuples, ntuples[0].fNTupleName, std::move(model));
auto processorName = ntuples[0].fNTupleName;
return CreateChain(std::move(ntuples), processorName, std::move(model));
}

std::unique_ptr<ROOT::Experimental::RNTupleProcessor>
ROOT::Experimental::RNTupleProcessor::CreateChain(const std::vector<RNTupleOpenSpec> &ntuples,
std::string_view processorName, std::unique_ptr<RNTupleModel> model)
ROOT::Experimental::RNTupleProcessor::CreateChain(std::vector<RNTupleOpenSpec> ntuples, std::string_view processorName,
std::unique_ptr<RNTupleModel> model)
{
if (ntuples.empty())
throw RException(R__FAIL("at least one RNTuple must be provided"));
@@ -66,13 +69,13 @@ ROOT::Experimental::RNTupleProcessor::CreateChain(const std::vector<RNTupleOpenS

// If no model is provided, infer it from the first ntuple.
if (!model) {
auto firstPageSource = Internal::RPageSource::Create(ntuples[0].fNTupleName, ntuples[0].fStorage);
auto firstPageSource = ntuples[0].CreatePageSource();
firstPageSource->Attach();
model = firstPageSource->GetSharedDescriptorGuard()->CreateModel();
}

for (const auto &ntuple : ntuples) {
innerProcessors.emplace_back(Create(ntuple, model->Clone()));
for (auto &ntuple : ntuples) {
innerProcessors.emplace_back(Create(std::move(ntuple), model->Clone()));
}

return CreateChain(std::move(innerProcessors), processorName, std::move(model));
@@ -106,17 +109,16 @@ ROOT::Experimental::RNTupleProcessor::CreateChain(std::vector<std::unique_ptr<RN
}

std::unique_ptr<ROOT::Experimental::RNTupleProcessor>
ROOT::Experimental::RNTupleProcessor::CreateJoin(const RNTupleOpenSpec &primaryNTuple,
const std::vector<RNTupleOpenSpec> &auxNTuples,
ROOT::Experimental::RNTupleProcessor::CreateJoin(RNTupleOpenSpec primaryNTuple, std::vector<RNTupleOpenSpec> auxNTuples,
const std::vector<std::string> &joinFields,
std::vector<std::unique_ptr<RNTupleModel>> models)
{
return CreateJoin(primaryNTuple, auxNTuples, joinFields, primaryNTuple.fNTupleName, std::move(models));
auto processorName = primaryNTuple.fNTupleName;
return CreateJoin(std::move(primaryNTuple), std::move(auxNTuples), joinFields, processorName, std::move(models));
}

std::unique_ptr<ROOT::Experimental::RNTupleProcessor>
ROOT::Experimental::RNTupleProcessor::CreateJoin(const RNTupleOpenSpec &primaryNTuple,
const std::vector<RNTupleOpenSpec> &auxNTuples,
ROOT::Experimental::RNTupleProcessor::CreateJoin(RNTupleOpenSpec primaryNTuple, std::vector<RNTupleOpenSpec> auxNTuples,
const std::vector<std::string> &joinFields,
std::string_view processorName,
std::vector<std::unique_ptr<RNTupleModel>> models)
@@ -133,17 +135,24 @@ ROOT::Experimental::RNTupleProcessor::CreateJoin(const RNTupleOpenSpec &primaryN
throw RException(R__FAIL("join fields must be unique"));
}

// Ensure that all ntuples are uniquely named to prevent name clashes.
// TODO(fdegeus) allow for the provision of aliases for ntuples with the same name, removing the constraint of
// uniquely-named ntuples.
EnsureUniqueNTupleNames(primaryNTuple, auxNTuples);
std::unordered_set<std::string> uniqueNTupleNames{primaryNTuple.fNTupleName};
for (const auto &ntuple : auxNTuples) {
auto res = uniqueNTupleNames.emplace(ntuple.fNTupleName);
if (!res.second) {
throw ROOT::RException(R__FAIL("joining RNTuples with the same name is not allowed"));
}
}

std::unique_ptr<RNTupleJoinProcessor> processor;
if (!models.empty()) {
processor = std::unique_ptr<RNTupleJoinProcessor>(
new RNTupleJoinProcessor(primaryNTuple, auxNTuples, joinFields, processorName, std::move(models)));
processor = std::unique_ptr<RNTupleJoinProcessor>(new RNTupleJoinProcessor(
std::move(primaryNTuple), std::move(auxNTuples), joinFields, processorName, std::move(models)));
} else {
processor = std::unique_ptr<RNTupleJoinProcessor>(
new RNTupleJoinProcessor(primaryNTuple, auxNTuples, joinFields, processorName));
new RNTupleJoinProcessor(std::move(primaryNTuple), std::move(auxNTuples), joinFields, processorName));
}

processor->SetJoinFieldTokens(joinFields);
@@ -174,13 +183,13 @@ void ROOT::Experimental::RNTupleProcessor::ConnectField(RFieldContext &fieldCont

//------------------------------------------------------------------------------

ROOT::Experimental::RNTupleSingleProcessor::RNTupleSingleProcessor(const RNTupleOpenSpec &ntuple,
ROOT::Experimental::RNTupleSingleProcessor::RNTupleSingleProcessor(RNTupleOpenSpec ntuple,
std::string_view processorName,
std::unique_ptr<RNTupleModel> model)
: RNTupleProcessor(processorName, std::move(model)), fNTupleSpec(ntuple)
: RNTupleProcessor(processorName, std::move(model)), fNTupleSpec(std::move(ntuple))
{
if (!fModel) {
fPageSource = Internal::RPageSource::Create(fNTupleSpec.fNTupleName, fNTupleSpec.fStorage);
fPageSource = fNTupleSpec.CreatePageSource();
fPageSource->Attach();
fModel = fPageSource->GetSharedDescriptorGuard()->CreateModel();
}
@@ -236,7 +245,7 @@ void ROOT::Experimental::RNTupleSingleProcessor::Connect()
return;

if (!fPageSource)
fPageSource = Internal::RPageSource::Create(fNTupleSpec.fNTupleName, fNTupleSpec.fStorage);
fPageSource = fNTupleSpec.CreatePageSource();
fPageSource->Attach();
fNEntries = fPageSource->GetNEntries();

@@ -335,15 +344,15 @@ ROOT::NTupleSize_t ROOT::Experimental::RNTupleChainProcessor::LoadEntry(ROOT::NT

//------------------------------------------------------------------------------

ROOT::Experimental::RNTupleJoinProcessor::RNTupleJoinProcessor(const RNTupleOpenSpec &mainNTuple,
const std::vector<RNTupleOpenSpec> &auxNTuples,
ROOT::Experimental::RNTupleJoinProcessor::RNTupleJoinProcessor(RNTupleOpenSpec mainNTuple,
std::vector<RNTupleOpenSpec> auxNTuples,
const std::vector<std::string> &joinFields,
std::string_view processorName,
std::vector<std::unique_ptr<RNTupleModel>> models)
: RNTupleProcessor(processorName, nullptr)
{
fNTuples.emplace_back(mainNTuple);
fPageSource = Internal::RPageSource::Create(mainNTuple.fNTupleName, mainNTuple.fStorage);
fPageSource = mainNTuple.CreatePageSource();
fPageSource->Attach();

if (fPageSource->GetNEntries() == 0) {
@@ -396,7 +405,7 @@ void ROOT::Experimental::RNTupleJoinProcessor::AddAuxiliary(const RNTupleOpenSpe

fNTuples.emplace_back(auxNTuple);

auto pageSource = Internal::RPageSource::Create(auxNTuple.fNTupleName, auxNTuple.fStorage);
auto pageSource = auxNTuple.CreatePageSource();
pageSource->Attach();

if (pageSource->GetNEntries() == 0) {
Loading
Loading