diff --git a/tree/ntuple/v7/inc/ROOT/RNTupleModel.hxx b/tree/ntuple/v7/inc/ROOT/RNTupleModel.hxx index 2fab4fca0354a..5488f912a0ee4 100644 --- a/tree/ntuple/v7/inc/ROOT/RNTupleModel.hxx +++ b/tree/ntuple/v7/inc/ROOT/RNTupleModel.hxx @@ -337,6 +337,9 @@ public: const std::string &GetDescription() const { return fDescription; } void SetDescription(std::string_view description); + /// Get the names of the fields currently present in the model, including projected fields. Registered subfields + /// are not included, use GetRegisteredSubfieldnames() for this. + const std::unordered_set<std::string> &GetFieldNames() const { return fFieldNames; } /// Get the (qualified) names of subfields that have been registered to be included in entries from this model. const std::unordered_set<std::string> &GetRegisteredSubfieldNames() const { return fRegisteredSubfields; } diff --git a/tree/ntuple/v7/inc/ROOT/RNTupleProcessor.hxx b/tree/ntuple/v7/inc/ROOT/RNTupleProcessor.hxx index 978894608f223..7f4515fcf55c7 100644 --- a/tree/ntuple/v7/inc/ROOT/RNTupleProcessor.hxx +++ b/tree/ntuple/v7/inc/ROOT/RNTupleProcessor.hxx @@ -381,14 +381,18 @@ public: /// \param[in] joinFields The names of the fields on which to join, in case the specified RNTuples are unaligned. /// The join is made based on the combined join field values, and therefore each field has to be present in each /// specified RNTuple. If an empty list is provided, it is assumed that the specified ntuple are fully aligned. - /// \param[in] models A list of models for the RNTuples. This list must either contain a model for the primary - /// RNTuple and each auxiliary RNTuple (following the specification order), or be empty. When the list is empty, the - /// default model (i.e. containing all fields) will be used for each RNTuple. + /// \param[in] primaryModel An RNTupleModel specifying which fields from the primary RNTuple can be read by the + /// processor. If no model is provided, one will be created based on the descriptor of the primary RNTuple. + /// \param[in] auxModels A list of RNTupleModels specifying which fields from the corresponding auxiliary RNTuple + /// (according to the order of `auxNTuples`) can be read by the processor. If this vector is empty, the models will + /// be created based on the descriptors of their corresponding RNTuples. This also applies to individual auxiliary + /// RNTuples for which the provided model is a `nullptr`. /// /// \return A pointer to the newly created RNTupleProcessor. static std::unique_ptr<RNTupleProcessor> - CreateJoin(RNTupleOpenSpec primaryNTuple, std::vector<RNTupleOpenSpec> auxNTuples, - const std::vector<std::string> &joinFields, std::vector<std::unique_ptr<RNTupleModel>> models = {}); + CreateJoin(const RNTupleOpenSpec &primaryNTuple, const std::vector<RNTupleOpenSpec> &auxNTuples, + const std::vector<std::string> &joinFields, std::unique_ptr<RNTupleModel> primaryModel = nullptr, + std::vector<std::unique_ptr<RNTupleModel>> auxModels = {}); ///////////////////////////////////////////////////////////////////////////// /// \brief Create an RNTupleProcessor for a *join* (i.e., a horizontal combination) of RNTuples. @@ -403,16 +407,22 @@ public: /// specified RNTuple. If an empty list is provided, it is assumed that the specified RNTuple are fully aligned. /// \param[in] processorName The name to give to the processor. Use /// CreateJoin(const RNTupleOpenSpec &, const std::vector<RNTupleOpenSpec> &, const std::vector<std::string> &, - /// std::vector<std::unique_ptr<RNTupleModel>>) to automatically use the name of the input RNTuple instead. - /// \param[in] models A list of models for the RNTuples. This list must either contain a model for the primary - /// RNTuple and each auxiliary RNTuple (following the specification order), or be empty. When the list is empty, the - /// default model (i.e. containing all fields) will be used for each RNTuple. + /// std::unique_ptr<RNTupleModel>, std::vector<std::unique_ptr<RNTupleModel>>) to automatically use the name of the + /// input RNTuple instead. + /// \param[in] primaryModel An RNTupleModel specifying which fields from the primary RNTuple + /// can be read by the processor. If no model is provided, one will be created based on the descriptor of the primary + /// RNTuple. + /// \param[in] auxModels A list of RNTupleModels specifying which fields from the corresponding auxiliary + /// RNTuple (according to the order of `auxNTuples`) can be read by the processor. If this vector is empty, the + /// models will be created based on the descriptors of their corresponding RNTuples. This also applies to individual + /// auxiliary RNTuples for which the provided model is a `nullptr`. /// /// \return A pointer to the newly created RNTupleProcessor. static std::unique_ptr<RNTupleProcessor> - CreateJoin(RNTupleOpenSpec primaryNTuple, std::vector<RNTupleOpenSpec> auxNTuples, + CreateJoin(const RNTupleOpenSpec &primaryNTuple, const std::vector<RNTupleOpenSpec> &auxNTuples, const std::vector<std::string> &joinFields, std::string_view processorName, - std::vector<std::unique_ptr<RNTupleModel>> models = {}); + std::unique_ptr<RNTupleModel> primaryModel = nullptr, + std::vector<std::unique_ptr<RNTupleModel>> auxModels = {}); }; // clang-format off @@ -541,33 +551,15 @@ private: ROOT::NTupleSize_t GetNEntries() final { return fNEntries; } ///////////////////////////////////////////////////////////////////////////// - /// \brief Construct a new RNTupleJoinProcessor. + /// \brief Set fModel by combining the primary and auxiliary models. /// - /// \param[in] mainNTuple The source specification (name and storage location) of the primary RNTuple. - /// \param[in] auxNTUples The source specifications (name and storage location) of the auxiliary RNTuples. - /// \param[in] joinFields The names of the fields on which to join, in case the specified RNTuples are unaligned. - /// The join is made based on the combined join field values, and therefore each field has to be present in each - /// specified RNTuple. If an empty list is provided, it is assumed that the RNTuples are fully aligned. - /// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::CreateJoin, this - /// is the name of the main RNTuple. - /// \param[in] models The models that specify which fields should be read by the processor, ordered according to - /// {mainNTuple, auxNTuple[0], ...}. The pointer returned by RNTupleModel::MakeField can be used to access a field's - /// value during the processor iteration. When an empty list is passed, the models are created from the descriptor of - /// each RNTuple specified in `mainNTuple` and `auxNTuple`. - RNTupleJoinProcessor(RNTupleOpenSpec mainNTuple, std::vector<RNTupleOpenSpec> auxNTuples, - const std::vector<std::string> &joinFields, std::string_view processorName, - std::vector<std::unique_ptr<RNTupleModel>> models = {}); - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Add an auxiliary RNTuple to the processor. + /// \param[in] primaryModel The model of the primary RNTuple. + /// \param[in] auxModels Models of the auxiliary RNTuples. /// - /// \param[in] auxNTuple The source specification (name and storage location) of the auxiliary RNTuple. - /// \param[in] joinFields The names of the fields used in the join. - /// \param[in] model The model that specifies which fields should be read by the processor. The pointer returned by - /// RNTupleModel::MakeField can be used to access a field's value during the processor iteration. When no model is - /// specified, it is created from the RNTuple's descriptor. - void AddAuxiliary(const RNTupleOpenSpec &auxNTuple, const std::vector<std::string> &joinFields, - std::unique_ptr<RNTupleModel> model = nullptr); + /// To prevent field name clashes when one or more models have fields with duplicate names, fields from each + /// auxiliary model are stored as a anonymous record, and subsequently registered as subfields in the join model. + /// This way, they can be accessed from the processor's entry as `auxNTupleName.fieldName`. + void SetModel(std::unique_ptr<RNTupleModel> primaryModel, std::vector<std::unique_ptr<RNTupleModel>> auxModels); ///////////////////////////////////////////////////////////////////////////// /// \brief Connect all fields, once the primary and all auxiliary RNTuples have been added. @@ -585,6 +577,27 @@ private: } } + ///////////////////////////////////////////////////////////////////////////// + /// \brief Construct a new RNTupleJoinProcessor. + /// + /// \param[in] mainNTuple The source specification (name and storage location) of the primary RNTuple. + /// \param[in] auxNTUples The source specifications (name and storage location) of the auxiliary RNTuples. + /// \param[in] joinFields The names of the fields on which to join, in case the specified RNTuples are unaligned. + /// The join is made based on the combined join field values, and therefore each field has to be present in each + /// specified RNTuple. If an empty list is provided, it is assumed that the RNTuples are fully aligned. + /// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::CreateJoin, this + /// is the name of the main RNTuple. + /// \param[in] primaryModel An RNTupleModel specifying which fields from the primary RNTuple can be read by the + /// processor. If no model is provided, one will be created based on the descriptor of the primary RNTuple. + /// \param[in] auxModels A list of RNTupleModels specifying which fields from the corresponding auxiliary RNTuple + /// (according to the order of `auxNTuples`) can be read by the processor. If this vector is empty, the models will + /// be created based on the descriptors of their corresponding RNTuples. This also applies to individual auxiliary + /// RNTuples for which the provided model is a `nullptr`. + RNTupleJoinProcessor(const RNTupleOpenSpec &mainNTuple, const std::vector<RNTupleOpenSpec> &auxNTuples, + const std::vector<std::string> &joinFields, std::string_view processorName, + std::unique_ptr<RNTupleModel> primaryModel = nullptr, + std::vector<std::unique_ptr<RNTupleModel>> auxModels = {}); + public: RNTupleJoinProcessor(const RNTupleJoinProcessor &) = delete; RNTupleJoinProcessor operator=(const RNTupleJoinProcessor &) = delete; diff --git a/tree/ntuple/v7/src/RNTupleProcessor.cxx b/tree/ntuple/v7/src/RNTupleProcessor.cxx index 63cd9bb147998..ac70a081db3fd 100644 --- a/tree/ntuple/v7/src/RNTupleProcessor.cxx +++ b/tree/ntuple/v7/src/RNTupleProcessor.cxx @@ -109,23 +109,23 @@ ROOT::Experimental::RNTupleProcessor::CreateChain(std::vector<std::unique_ptr<RN } std::unique_ptr<ROOT::Experimental::RNTupleProcessor> -ROOT::Experimental::RNTupleProcessor::CreateJoin(RNTupleOpenSpec primaryNTuple, std::vector<RNTupleOpenSpec> auxNTuples, +ROOT::Experimental::RNTupleProcessor::CreateJoin(const RNTupleOpenSpec &primaryNTuple, + const std::vector<RNTupleOpenSpec> &auxNTuples, const std::vector<std::string> &joinFields, - std::vector<std::unique_ptr<RNTupleModel>> models) + std::unique_ptr<RNTupleModel> primaryModel, + std::vector<std::unique_ptr<RNTupleModel>> auxModels) { - auto processorName = primaryNTuple.fNTupleName; - return CreateJoin(std::move(primaryNTuple), std::move(auxNTuples), joinFields, processorName, std::move(models)); + return CreateJoin(primaryNTuple, auxNTuples, joinFields, primaryNTuple.fNTupleName, std::move(primaryModel), + std::move(auxModels)); } -std::unique_ptr<ROOT::Experimental::RNTupleProcessor> -ROOT::Experimental::RNTupleProcessor::CreateJoin(RNTupleOpenSpec primaryNTuple, std::vector<RNTupleOpenSpec> auxNTuples, - const std::vector<std::string> &joinFields, - std::string_view processorName, - std::vector<std::unique_ptr<RNTupleModel>> models) +std::unique_ptr<ROOT::Experimental::RNTupleProcessor> ROOT::Experimental::RNTupleProcessor::CreateJoin( + const RNTupleOpenSpec &primaryNTuple, const std::vector<RNTupleOpenSpec> &auxNTuples, + const std::vector<std::string> &joinFields, std::string_view processorName, + std::unique_ptr<RNTupleModel> primaryModel, std::vector<std::unique_ptr<RNTupleModel>> auxModels) { - if (!models.empty() && models.size() != (auxNTuples.size() + 1)) { - throw RException(R__FAIL("number of provided models must match number of specified ntuples")); - } + if (!auxModels.empty() && auxModels.size() != auxNTuples.size()) + throw RException(R__FAIL("number of auxiliary models and auxiliary RNTuples does not match")); if (joinFields.size() > 4) { throw RException(R__FAIL("a maximum of four join fields is allowed")); @@ -146,14 +146,8 @@ ROOT::Experimental::RNTupleProcessor::CreateJoin(RNTupleOpenSpec primaryNTuple, } } - std::unique_ptr<RNTupleJoinProcessor> processor; - if (!models.empty()) { - processor = std::unique_ptr<RNTupleJoinProcessor>(new RNTupleJoinProcessor( - std::move(primaryNTuple), std::move(auxNTuples), joinFields, processorName, std::move(models))); - } else { - processor = std::unique_ptr<RNTupleJoinProcessor>( - new RNTupleJoinProcessor(std::move(primaryNTuple), std::move(auxNTuples), joinFields, processorName)); - } + std::unique_ptr<RNTupleJoinProcessor> processor = std::unique_ptr<RNTupleJoinProcessor>(new RNTupleJoinProcessor( + primaryNTuple, auxNTuples, joinFields, processorName, std::move(primaryModel), std::move(auxModels))); processor->SetJoinFieldTokens(joinFields); processor->ConnectFields(); @@ -164,6 +158,7 @@ ROOT::Experimental::RNTupleProcessor::CreateJoin(RNTupleOpenSpec primaryNTuple, void ROOT::Experimental::RNTupleProcessor::ConnectField(RFieldContext &fieldContext, Internal::RPageSource &pageSource, REntry &entry) { + pageSource.Attach(); auto desc = pageSource.GetSharedDescriptorGuard(); const auto fieldId = desc->FindFieldId(fieldContext.GetProtoField().GetFieldName()); @@ -344,14 +339,17 @@ ROOT::NTupleSize_t ROOT::Experimental::RNTupleChainProcessor::LoadEntry(ROOT::NT //------------------------------------------------------------------------------ -ROOT::Experimental::RNTupleJoinProcessor::RNTupleJoinProcessor(RNTupleOpenSpec mainNTuple, - std::vector<RNTupleOpenSpec> auxNTuples, +ROOT::Experimental::RNTupleJoinProcessor::RNTupleJoinProcessor(const RNTupleOpenSpec &mainNTuple, + const std::vector<RNTupleOpenSpec> &auxNTuples, const std::vector<std::string> &joinFields, std::string_view processorName, - std::vector<std::unique_ptr<RNTupleModel>> models) + std::unique_ptr<RNTupleModel> primaryModel, + std::vector<std::unique_ptr<RNTupleModel>> auxModels) : RNTupleProcessor(processorName, nullptr) { fNTuples.emplace_back(mainNTuple); + fNTuples.insert(fNTuples.end(), auxNTuples.begin(), auxNTuples.end()); + fPageSource = mainNTuple.CreatePageSource(); fPageSource->Attach(); @@ -361,10 +359,25 @@ ROOT::Experimental::RNTupleJoinProcessor::RNTupleJoinProcessor(RNTupleOpenSpec m fNEntries = fPageSource->GetNEntries(); - if (models.empty()) - fModel = fPageSource->GetSharedDescriptorGuard()->CreateModel(); - else - fModel = models[0]->Clone(); + for (const auto &auxNTuple : auxNTuples) { + fAuxiliaryPageSources.emplace_back(auxNTuple.CreatePageSource()); + if (!joinFields.empty()) + fJoinTables.emplace_back(Internal::RNTupleJoinTable::Create(joinFields)); + } + + if (!primaryModel) + primaryModel = fPageSource->GetSharedDescriptorGuard()->CreateModel(); + if (auxModels.empty()) { + auxModels.resize(fAuxiliaryPageSources.size()); + } + for (unsigned i = 0; i < fAuxiliaryPageSources.size(); ++i) { + if (!auxModels[i]) { + fAuxiliaryPageSources[i]->Attach(); + auxModels[i] = fAuxiliaryPageSources[i]->GetSharedDescriptorGuard()->CreateModel(); + } + } + + SetModel(std::move(primaryModel), std::move(auxModels)); fModel->Freeze(); fEntry = fModel->CreateEntry(); @@ -376,120 +389,71 @@ ROOT::Experimental::RNTupleJoinProcessor::RNTupleJoinProcessor(RNTupleOpenSpec m // If the model provided by the user has a default entry, use the value pointers from the default entry of the // model that was passed to this constructor. This way, the pointers returned by RNTupleModel::MakeField can be // used in the processor loop to access the corresponding field values. - if (!models.empty() && !models[0]->IsBare()) { - auto valuePtr = models[0]->GetDefaultEntry().GetPtr<void>(fieldName); + if (!fModel->IsBare()) { + auto valuePtr = fModel->GetDefaultEntry().GetPtr<void>(fieldName); fEntry->BindValue(fieldName, valuePtr); } - const auto &[fieldContext, _] = - fFieldContexts.try_emplace(fieldName, field.Clone(fieldName), fEntry->GetToken(fieldName)); - ConnectField(fieldContext->second, *fPageSource, *fEntry); - } - - for (unsigned i = 0; i < auxNTuples.size(); ++i) { - if (models.empty()) { - AddAuxiliary(auxNTuples[i], joinFields); - } else { - // The size of `models` is checked in `CreateJoin`; at this point we can safely assume that `models.size() == - // auxNTuples.size() + 1`. - AddAuxiliary(auxNTuples[i], joinFields, std::move(models[i + 1])); + auto auxNTupleName = std::find_if(auxNTuples.cbegin(), auxNTuples.cend(), [&fieldName](const RNTupleOpenSpec &n) { + return fieldName.substr(0, n.fNTupleName.size()) == n.fNTupleName; + }); + + // If the current field name does not begin with the name of one of the auxiliary ntuples, we are dealing with a + // field from the primary ntuple, so it can be added as a field context. Otherwise, if it does begin with the + // name, but is not equal to just the name (e.g. it is a subfield of `auxNTupleName`, which means it is a proper + // field in the corresponding auxiliary ntuple) we also need to add it as a field context. If it is exactly equal + // to an auxiliary ntuple name, it is the untyped record field containing the auxiliary fields itself. This one we + // don't want to add as a field context, because there is nothing to read from. + // TODO(fdegeus) handle the case where a primary field has the name of an auxiliary ntuple. + if (auxNTupleName == auxNTuples.end()) { + fFieldContexts.try_emplace(fieldName, field.Clone(field.GetFieldName()), fEntry->GetToken(fieldName)); + } else if (fieldName != auxNTupleName->fNTupleName) { + // Add 1 because we also have to take into account the primary ntuple. + auto ntupleIdx = std::distance(auxNTuples.begin(), auxNTupleName) + 1; + fFieldContexts.try_emplace(fieldName, field.Clone(field.GetFieldName()), fEntry->GetToken(fieldName), + ntupleIdx); } } } -void ROOT::Experimental::RNTupleJoinProcessor::AddAuxiliary(const RNTupleOpenSpec &auxNTuple, - const std::vector<std::string> &joinFields, - std::unique_ptr<RNTupleModel> model) +void ROOT::Experimental::RNTupleJoinProcessor::SetModel(std::unique_ptr<RNTupleModel> primaryModel, + std::vector<std::unique_ptr<RNTupleModel>> auxModels) { - assert(fNEntriesProcessed == 0 && "cannot add auxiliary ntuples after processing has started"); - - fNTuples.emplace_back(auxNTuple); - - auto pageSource = auxNTuple.CreatePageSource(); - pageSource->Attach(); - - if (pageSource->GetNEntries() == 0) { - throw RException(R__FAIL("provided RNTuple is empty")); - } - - if (!model) - model = pageSource->GetSharedDescriptorGuard()->CreateModel(); - - model->Freeze(); - auto entry = model->CreateBareEntry(); - - // Append the auxiliary fields to the join model + fModel = std::move(primaryModel); fModel->Unfreeze(); - // The fields of the auxiliary ntuple are contained in an anonymous record field and subsequently registered as - // subfields to the join model. This way they can be accessed through the processor as `auxNTupleName.fieldName`, - // which is necessary in case there are duplicate field names between the main ntuple and (any of the) auxiliary - // ntuples. - std::vector<std::unique_ptr<ROOT::RFieldBase>> auxFields; - auxFields.reserve(entry->fValues.size()); - for (const auto &val : *entry) { - auto &field = val.GetField(); + // Create an anonymous record field for each auxiliary ntuple, containing their top-level fields. These original + // top-level fields are registered as subfields in the join model, such that they can be accessed as + // `auxNTupleName.fieldName`. + for (unsigned i = 0; i < auxModels.size(); ++i) { + std::vector<std::unique_ptr<ROOT::RFieldBase>> auxFields; + auxFields.reserve(auxModels[i]->GetFieldNames().size()); - auxFields.emplace_back(field.Clone(field.GetQualifiedFieldName())); - } - std::unique_ptr<ROOT::RFieldBase> auxParentField = - std::make_unique<ROOT::RRecordField>(auxNTuple.fNTupleName, std::move(auxFields)); - - if (!auxParentField) { - throw RException(R__FAIL("could not create auxiliary RNTuple parent field")); - } - - const auto &subfields = auxParentField->GetConstSubfields(); - fModel->AddField(std::move(auxParentField)); - for (const auto &field : subfields) { - fModel->RegisterSubfield(field->GetQualifiedFieldName()); - } - - fModel->Freeze(); - // After modifying the join model, we need to create a new entry since the old one is invalidated. However, we do - // want to carry over the value pointers, so the pointers returned by `MakeField` during the creation of the original - // model by the user can be used in the processor loop. - auto newEntry = fModel->CreateEntry(); + for (const auto &fieldName : auxModels[i]->GetFieldNames()) { + auxFields.emplace_back(auxModels[i]->GetConstField(fieldName).Clone(fieldName)); + } - for (const auto &value : *newEntry) { - const auto &field = value.GetField(); + auto auxParentField = std::make_unique<ROOT::RRecordField>(fNTuples[i + 1].fNTupleName, std::move(auxFields)); + const auto &subFields = auxParentField->GetConstSubfields(); + fModel->AddField(std::move(auxParentField)); - // Skip if the field is the untyped record that holds the fields of auxiliary ntuples. - const auto fnIsNTuple = [&field](RNTupleOpenSpec n) { return n.fNTupleName == field.GetFieldName(); }; - if (std::find_if(fNTuples.cbegin(), fNTuples.cend(), fnIsNTuple) != fNTuples.end()) { - continue; + for (const auto &field : subFields) { + fModel->RegisterSubfield(field->GetQualifiedFieldName()); } - auto fieldContext = fFieldContexts.find(field.GetQualifiedFieldName()); - // If the field belongs to the auxiliary ntuple currently being added, apart from assigning its entry value the - // correct pointer, we also have to create a field context for it. - if (fieldContext == fFieldContexts.end()) { - // If the model has a default entry, use the value pointers from the entry in the entry managed by the - // processor. This way, the pointers returned by RNTupleModel::MakeField can be used in the processor loop to - // access the corresponding field values. - if (!model->IsBare()) { - auto valuePtr = model->GetDefaultEntry().GetPtr<void>(field.GetFieldName()); - newEntry->BindValue(field.GetQualifiedFieldName(), valuePtr); + // If the model has a default entry, adopt its value pointers. This way, the pointers returned by + // RNTupleModel::MakeField can be used in the processor loop to access the corresponding field values. + if (!auxModels[i]->IsBare()) { + const auto &auxDefaultEntry = auxModels[i]->GetDefaultEntry(); + auto &joinDefaultEntry = fModel->GetDefaultEntry(); + for (const auto &fieldName : auxModels[i]->GetFieldNames()) { + auto valuePtr = auxDefaultEntry.GetPtr<void>(fieldName); + joinDefaultEntry.BindValue(fNTuples[i + 1].fNTupleName + "." + fieldName, valuePtr); } - - auto token = newEntry->GetToken(field.GetQualifiedFieldName()); - fFieldContexts.try_emplace(field.GetQualifiedFieldName(), field.Clone(field.GetFieldName()), token, - fNTuples.size() - 1); - } else { - auto valuePtr = fEntry->GetPtr<void>(fieldContext->second.fToken); - auto newToken = newEntry->GetToken(field.GetQualifiedFieldName()); - newEntry->BindValue(newToken, valuePtr); - fieldContext->second.fToken = std::move(newToken); } } - fEntry.swap(newEntry); - - // If no join fields have been specified, an aligned join is assumed and an join table won't be created. - if (!joinFields.empty()) - fJoinTables.emplace_back(Internal::RNTupleJoinTable::Create(joinFields)); - - fAuxiliaryPageSources.emplace_back(std::move(pageSource)); + fModel->Freeze(); } void ROOT::Experimental::RNTupleJoinProcessor::ConnectFields() diff --git a/tree/ntuple/v7/test/ntuple_processor_join.cxx b/tree/ntuple/v7/test/ntuple_processor_join.cxx index ab7ac46a13324..02c11e455a8fd 100644 --- a/tree/ntuple/v7/test/ntuple_processor_join.cxx +++ b/tree/ntuple/v7/test/ntuple_processor_join.cxx @@ -243,24 +243,21 @@ TEST_F(RNTupleJoinProcessorTest, MissingEntries) TEST_F(RNTupleJoinProcessorTest, WithModel) { - auto model1 = RNTupleModel::Create(); - auto i = model1->MakeField<int>("i"); - auto x = model1->MakeField<float>("x"); + auto primaryModel = RNTupleModel::Create(); + auto i = primaryModel->MakeField<int>("i"); + auto x = primaryModel->MakeField<float>("x"); - auto model2 = RNTupleModel::Create(); - auto y = model2->MakeField<std::vector<float>>("y"); + std::vector<std::unique_ptr<RNTupleModel>> auxModels; - auto model3 = RNTupleModel::Create(); - auto z = model3->MakeField<float>("z"); + auxModels.push_back(RNTupleModel::Create()); + auto y = auxModels.back()->MakeField<std::vector<float>>("y"); - std::vector<std::unique_ptr<RNTupleModel>> models; - models.push_back(std::move(model1)); - models.push_back(std::move(model2)); - models.push_back(std::move(model3)); + auxModels.push_back(RNTupleModel::Create()); + auto z = auxModels.back()->MakeField<float>("z"); auto proc = RNTupleProcessor::CreateJoin({fNTupleNames[0], fFileNames[0]}, {{fNTupleNames[1], fFileNames[1]}, {fNTupleNames[2], fFileNames[2]}}, {"i"}, - std::move(models)); + std::move(primaryModel), std::move(auxModels)); int nEntries = 0; std::vector<float> yExpected; @@ -290,6 +287,147 @@ TEST_F(RNTupleJoinProcessorTest, WithModel) EXPECT_EQ(5, proc->GetNEntriesProcessed()); } +TEST_F(RNTupleJoinProcessorTest, WithBareModel) +{ + auto primaryModel = RNTupleModel::CreateBare(); + primaryModel->MakeField<int>("i"); + primaryModel->MakeField<float>("x"); + + std::vector<std::unique_ptr<RNTupleModel>> auxModels; + + auxModels.push_back(RNTupleModel::CreateBare()); + auxModels.back()->MakeField<std::vector<float>>("y"); + + auxModels.push_back(RNTupleModel::CreateBare()); + auxModels.back()->MakeField<float>("z"); + + auto proc = RNTupleProcessor::CreateJoin({fNTupleNames[0], fFileNames[0]}, + {{fNTupleNames[1], fFileNames[1]}, {fNTupleNames[2], fFileNames[2]}}, {"i"}, + std::move(primaryModel), std::move(auxModels)); + + auto i = proc->GetEntry().GetPtr<int>("i"); + auto x = proc->GetEntry().GetPtr<float>("x"); + auto y = proc->GetEntry().GetPtr<std::vector<float>>("ntuple2.y"); + auto z = proc->GetEntry().GetPtr<float>("ntuple3.z"); + + int nEntries = 0; + std::vector<float> yExpected; + for (auto &entry : *proc) { + EXPECT_EQ(proc->GetCurrentEntryNumber(), nEntries++); + + EXPECT_EQ(proc->GetCurrentEntryNumber() * 2, *i); + + EXPECT_FLOAT_EQ(*i * 0.5f, *x); + + yExpected = {static_cast<float>(*i * 0.2), 3.14, static_cast<float>(*i * 1.3)}; + EXPECT_EQ(yExpected, *y); + EXPECT_FLOAT_EQ(static_cast<float>(*i * 2.f), *z); + + try { + entry.GetPtr<float>("ntuple2.z"); + FAIL() << "should not be able to access values from fields not present in the provided models"; + } catch (const ROOT::RException &err) { + EXPECT_THAT(err.what(), testing::HasSubstr("invalid field name: ntuple2.z")); + } + } + + EXPECT_EQ(5, proc->GetNEntriesProcessed()); +} + +TEST_F(RNTupleJoinProcessorTest, PartialModels) +{ + { + std::vector<std::unique_ptr<RNTupleModel>> auxModels; + + auxModels.emplace_back(RNTupleModel::Create()); + auto y = auxModels.back()->MakeField<std::vector<float>>("y"); + + auxModels.emplace_back(RNTupleModel::Create()); + auto z = auxModels.back()->MakeField<float>("z"); + + // no primary model provided, aux models have been provided + auto procNoPrimaryModel = RNTupleProcessor::CreateJoin( + {fNTupleNames[0], fFileNames[0]}, {{fNTupleNames[1], fFileNames[1]}, {fNTupleNames[2], fFileNames[2]}}, {"i"}, + nullptr, std::move(auxModels)); + + auto i = procNoPrimaryModel->GetEntry().GetPtr<int>("i"); + std::vector<float> yExpected; + for (auto &entry : *procNoPrimaryModel) { + EXPECT_EQ(procNoPrimaryModel->GetCurrentEntryNumber() * 2, *i); + EXPECT_FLOAT_EQ(*i * 0.5f, *entry.GetPtr<float>("x")); + yExpected = {static_cast<float>(*i * 0.2), 3.14, static_cast<float>(*i * 1.3)}; + EXPECT_EQ(yExpected, *y); + EXPECT_FLOAT_EQ(static_cast<float>(*i * 2.f), *z); + + try { + entry.GetPtr<float>("ntuple2.z"); + FAIL() << "should not be able to access values from fields not present in the provided models"; + } catch (const ROOT::RException &err) { + EXPECT_THAT(err.what(), testing::HasSubstr("invalid field name: ntuple2.z")); + } + } + } + { + // primary model provided, no aux models have been provided + auto primaryModel = RNTupleModel::Create(); + auto i = primaryModel->MakeField<int>("i"); + auto x = primaryModel->MakeField<float>("x"); + + auto procNoAuxModels = RNTupleProcessor::CreateJoin( + {fNTupleNames[0], fFileNames[0]}, {{fNTupleNames[1], fFileNames[1]}, {fNTupleNames[2], fFileNames[2]}}, {"i"}, + std::move(primaryModel)); + + std::vector<float> yExpected; + for (auto &entry : *procNoAuxModels) { + EXPECT_EQ(procNoAuxModels->GetCurrentEntryNumber() * 2, *i); + EXPECT_FLOAT_EQ(*i * 0.5f, *x); + yExpected = {static_cast<float>(*i * 0.2), 3.14, static_cast<float>(*i * 1.3)}; + EXPECT_EQ(yExpected, *entry.GetPtr<std::vector<float>>("ntuple2.y")); + EXPECT_FLOAT_EQ(static_cast<float>(*i * 2.f), *entry.GetPtr<float>("ntuple3.z")); + + try { + entry.GetPtr<float>("ntuple2.z"); + FAIL() << "should not be able to access values from fields not present in the provided models"; + } catch (const ROOT::RException &err) { + EXPECT_THAT(err.what(), testing::HasSubstr("invalid field name: ntuple2.z")); + } + } + } + { + // primary model and model for first aux ntuple has been provided, but not for the second + auto primaryModel = RNTupleModel::Create(); + auto i = primaryModel->MakeField<int>("i"); + auto x = primaryModel->MakeField<float>("x"); + + std::vector<std::unique_ptr<RNTupleModel>> partialAuxModels; + + partialAuxModels.emplace_back(RNTupleModel::Create()); + auto y = partialAuxModels.back()->MakeField<std::vector<float>>("y"); + + partialAuxModels.emplace_back(nullptr); + + auto procPartialAuxModels = RNTupleProcessor::CreateJoin( + {fNTupleNames[0], fFileNames[0]}, {{fNTupleNames[1], fFileNames[1]}, {fNTupleNames[2], fFileNames[2]}}, {"i"}, + std::move(primaryModel), std::move(partialAuxModels)); + + std::vector<float> yExpected; + for (auto &entry : *procPartialAuxModels) { + EXPECT_EQ(procPartialAuxModels->GetCurrentEntryNumber() * 2, *i); + EXPECT_FLOAT_EQ(*i * 0.5f, *x); + yExpected = {static_cast<float>(*i * 0.2), 3.14, static_cast<float>(*i * 1.3)}; + EXPECT_EQ(yExpected, *y); + EXPECT_FLOAT_EQ(static_cast<float>(*i * 2.f), *entry.GetPtr<float>("ntuple3.z")); + + try { + entry.GetPtr<float>("ntuple2.z"); + FAIL() << "should not be able to access values from fields not present in the provided models"; + } catch (const ROOT::RException &err) { + EXPECT_THAT(err.what(), testing::HasSubstr("invalid field name: ntuple2.z")); + } + } + } +} + TEST_F(RNTupleJoinProcessorTest, TMemFile) { TMemFile memFile("test_ntuple_processor_join_tmemfile.root", "RECREATE");