From 8a5718288f72c134c16655da257ca55d4e333c68 Mon Sep 17 00:00:00 2001 From: martinfoell Date: Wed, 16 Apr 2025 16:08:43 +0200 Subject: [PATCH 01/12] Enhance machine learning tutorial presentation --- tutorials/machine_learning/index.md | 140 ++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 tutorials/machine_learning/index.md diff --git a/tutorials/machine_learning/index.md b/tutorials/machine_learning/index.md new file mode 100644 index 0000000000000..475d5ba6c8474 --- /dev/null +++ b/tutorials/machine_learning/index.md @@ -0,0 +1,140 @@ +\addtogroup tutorial_ml + + +## Table of contents +- [Basic TMVA tutorials](\ref basic) + - [Training](\ref training) + - [Applications](\ref application) + - [Others](\ref other) +- [Cross validation](\ref cross_val) +- [New TMVA interfaces](\ref new_interface) + - [RBDT](\ref rbdt) +- [Deep learning in TMVA](\ref deep_learing) +- [TMVA Keras tutorials](\ref keras) +- [TMVA PyTorch tutorials](\ref pytorch) +- [Interference with SOFIE](\ref interference) +- [RBatchGenerator](\ref rbatchgen) + + +\anchor basic +## Basic TMVA tutorials + + +\anchor training +### Training + +| **Tutorial** | **Description** | +|--------------|-----------------| +| TMVAMinimalClassification.C | Minimal self-contained example for setting up TMVA with binary classification. | +| TMVAMulticlass.C | This macro provides a simple example for the training and testing of the TMVA multiclass classification. | +| TMVAClassification.C | This macro provides examples for the training and testing of the TMVA classifiers. | +| TMVAClassificationCategory.C | This macro provides examples for the training and testing of the TMVA classifiers in categorisation mode. | +| TMVARegression.C | This macro provides examples for the training and testing of the TMVA classifiers. | +| classification.C | | + +\anchor application +### Applications + +| **Tutorial** | **Description** | +|--------------|-----------------| +| TMVAMulticlassApplication.C | This macro provides a simple example on how to use the trained multiclass classifiers within an analysis module. | +| TMVAClassificationApplication.C | This macro provides a simple example on how to use the trained classifiers within an analysis module. | +| TMVAClassificationCategoryApplication.C | This macro provides a simple example on how to use the trained classifiers (with categories) within an analysis module. | +| TMVACrossValidationApplication.C | This macro provides an example of how to use TMVA for k-folds cross evaluation in application. | +| TMVAMulticlassApplication.C | This macro provides a simple example on how to use the trained multiclass classifiers within an analysis module. | +| TMVARegressionApplication.C | This macro provides a simple example on how to use the trained regression MVAs within an analysis module. | + +\anchor other +### Others + +| **Tutorial** | **Description** | +|--------------|-----------------| +| TMVAGAexample.C | This executable gives an example of a very simple use of the genetic algorithm of TMVA. | +| TMVAGAexample2.C | This executable gives an example of a very simple use of the genetic algorithm of TMVA. | +| TMVAMultipleBackgroundExample.C | This example shows the training of signal with three different backgrounds. | + +\anchor cross_val +## Cross validation +| **Tutorial** | **Description** | +|--------------|-----------------| +| TMVACrossValidation.C | This macro provides an example of how to use TMVA for k-folds cross evaluation. | +| TMVACrossValidationApplication.C | This macro provides an example of how to use TMVA for k-folds cross evaluation in application. | +| TMVACrossValidationRegression.C | This macro provides an example of how to use TMVA for k-folds cross evaluation. | + +\anchor new_interface +## New TMVA interfaces + +| **Tutorial** | **Description** | +|--------------|-----------------| +| createData.C | Plot the variables. | +| tmva001_RTensor.C | This tutorial illustrates the basic features of the RTensor class, RTensor is a std::vector-like container with additional shape information. | +| tmva002_RDataFrameAsTensor.C | This tutorial shows how the content of an RDataFrame can be converted to an RTensor object. | +| tmva003_RReader.C | This tutorial shows how to apply with the modern interfaces models saved in TMVA XML files. | + + +\anchor rbdt +### RBDT +| **Tutorial** | **Description** | +|--------------|-----------------| +| tmva100_DataPreparation.py | This tutorial illustrates how to prepare ROOT datasets to be nicely readable by most machine learning methods. | +| tmva101_Training.py | This tutorial show how you can train a machine learning model with any package reading the training data directly from ROOT files. | +| tmva102_Testing.py | This tutorial illustrates how you can test a trained BDT model using the fast tree inference engine offered by TMVA and external tools such as scikit-learn. | +| tmva103_Application.C | This tutorial illustrates how you can conveniently apply BDTs in C++ using the fast tree inference engine offered by TMVA. | + +\anchor deep_learing +## Deep learning in TMVA + +| **Tutorial** || **Description** | +|---------------|-----------------|-----------------| +| TMVA_CNN_Classification.C | TMVA_CNN_Classification.py | TMVA Classification Example Using a Convolutional Neural Network. | +| TMVA_Higgs_Classification.C | TMVA_Higgs_Classification.py | Classification example of TMVA based on public Higgs UCI dataset. | +| TMVA_RNN_Classification.C | TMVA_RNN_Classification.py | TMVA Classification Example Using a Recurrent Neural Network. | + +\anchor keras +## TMVA Keras tutorials + +| **Tutorial** | **Description** | +|--------------|-----------------| +| ApplicationClassificationKeras.py | This tutorial shows how to apply a trained model to new data. | +| ApplicationRegressionKeras.py | This tutorial shows how to apply a trained model to new data (regression). | +| ClassificationKeras.py | This tutorial shows how to do classification in TMVA with neural networks trained with keras. | +| GenerateModel.py | This tutorial shows how to define and generate a keras model for use with TMVA. | +| MulticlassKeras.py | This tutorial shows how to do multiclass classification in TMVA with neural networks trained with keras. | +| RegressionKeras.py | This tutorial shows how to do regression in TMVA with neural networks trained with keras. | + +\anchor pytorch +## TMVA PyTorch tutorials +| **Tutorial** | **Description** | +|--------------|-----------------| +| ApplicationClassificationPyTorch.py | This tutorial shows how to apply a trained model to new data. | +| ApplicationRegressionPyTorch.py | This tutorial shows how to apply a trained model to new data (regression). | +| ClassificationPyTorch.py | This tutorial shows how to do classification in TMVA with neural networks trained with PyTorch. | +| MulticlassPyTorch.py | This tutorial shows how to do multiclass classification in TMVA with neural networks trained with PyTorch. | +| RegressionPyTorch.py | This tutorial shows how to do regression in TMVA with neural networks trained with PyTorch. | + + + +\anchor interference +## Interference with SOFIE + +| **Tutorial** || **Description** | +|---------------|-----------------|-----------------| +| | TMVA_SOFIE_Inference.py | This macro provides an example of using a trained model with Keras and make inference using SOFIE directly from Numpy. | +| TMVA_SOFIE_Keras.C | | This macro provides a simple example for the parsing of Keras .h5 file into RModel object and further generating the .hxx header files for inference. | +| TMVA_SOFIE_Keras_HiggsModel.C | | This macro run the SOFIE parser on the Keras model obtaining running TMVA_Higgs_Classification.C You need to run that macro before this one. | +| | TMVA_SOFIE_Models.py | Example of inference with SOFIE using a set of models trained with Keras. | +| TMVA_SOFIE_ONNX.C | | This macro provides a simple example for the parsing of ONNX files into RModel object and further generating the .hxx header files for inference. | +| TMVA_SOFIE_PyTorch.C | | This macro provides a simple example for the parsing of PyTorch .pt file into RModel object and further generating the .hxx header files for inference. | +| TMVA_SOFIE_RDataFrame.C | TMVA_SOFIE_RDataFrame.py | Example of inference with SOFIE and RDataFrame, of a model trained with Keras. | +| TMVA_SOFIE_RDataFrame_JIT.C | | This macro provides an example of using a trained model with Keras and make inference using SOFIE and RDataFrame. | +| TMVA_SOFIE_RSofieReader.C | | This macro provides an example of using a trained model with Keras and make inference using SOFIE with the RSofieReader class. | + +\anchor rbatchgen +## RBatchGenerator + +| **Tutorial** | **Description** | +|--------------|-----------------| +| RBatchGenerator_NumPy.py | Example of getting batches of events from a ROOT dataset as Python generators of numpy arrays. | +| RBatchGenerator_PyTorch.py | Example of getting batches of events from a ROOT dataset into a basic PyTorch workflow. | +| RBatchGenerator_TensorFlow.py | Example of getting batches of events from a ROOT dataset into a basic TensorFlow workflow. | + From b98d49fe257f99afcb241031ff2a38342065f918 Mon Sep 17 00:00:00 2001 From: martinfoell Date: Thu, 17 Apr 2025 11:28:34 +0200 Subject: [PATCH 02/12] Change the description of the tutorials --- tutorials/machine_learning/index.md | 110 ++++++++++++++-------------- 1 file changed, 55 insertions(+), 55 deletions(-) diff --git a/tutorials/machine_learning/index.md b/tutorials/machine_learning/index.md index 475d5ba6c8474..126f367f3bd85 100644 --- a/tutorials/machine_learning/index.md +++ b/tutorials/machine_learning/index.md @@ -12,8 +12,8 @@ - [Deep learning in TMVA](\ref deep_learing) - [TMVA Keras tutorials](\ref keras) - [TMVA PyTorch tutorials](\ref pytorch) -- [Interference with SOFIE](\ref interference) -- [RBatchGenerator](\ref rbatchgen) +- [Inference with SOFIE](\ref inference) +- [Data loading for training](\ref data_loading) \anchor basic @@ -26,10 +26,10 @@ | **Tutorial** | **Description** | |--------------|-----------------| | TMVAMinimalClassification.C | Minimal self-contained example for setting up TMVA with binary classification. | -| TMVAMulticlass.C | This macro provides a simple example for the training and testing of the TMVA multiclass classification. | -| TMVAClassification.C | This macro provides examples for the training and testing of the TMVA classifiers. | -| TMVAClassificationCategory.C | This macro provides examples for the training and testing of the TMVA classifiers in categorisation mode. | -| TMVARegression.C | This macro provides examples for the training and testing of the TMVA classifiers. | +| TMVAMulticlass.C | Training and testing of the TMVA multiclass classification. | +| TMVAClassification.C | Training and testing of the TMVA classifiers. | +| TMVAClassificationCategory.C | Training and testing of the TMVA classifiers in categorisation mode. | +| TMVARegression.C | Training and testing of the TMVA classifiers. | | classification.C | | \anchor application @@ -37,29 +37,29 @@ | **Tutorial** | **Description** | |--------------|-----------------| -| TMVAMulticlassApplication.C | This macro provides a simple example on how to use the trained multiclass classifiers within an analysis module. | -| TMVAClassificationApplication.C | This macro provides a simple example on how to use the trained classifiers within an analysis module. | -| TMVAClassificationCategoryApplication.C | This macro provides a simple example on how to use the trained classifiers (with categories) within an analysis module. | -| TMVACrossValidationApplication.C | This macro provides an example of how to use TMVA for k-folds cross evaluation in application. | -| TMVAMulticlassApplication.C | This macro provides a simple example on how to use the trained multiclass classifiers within an analysis module. | -| TMVARegressionApplication.C | This macro provides a simple example on how to use the trained regression MVAs within an analysis module. | +| TMVAMulticlassApplication.C | Using the trained multiclass classifiers within an analysis module. | +| TMVAClassificationApplication.C | Using the trained classifiers within an analysis module. | +| TMVAClassificationCategoryApplication.C | Using the trained classifiers (with categories) within an analysis module. | +| TMVACrossValidationApplication.C | Using TMVA for k-folds cross evaluation in application. | +| TMVAMulticlassApplication.C | Using trained multiclass classifiers within an analysis module. | +| TMVARegressionApplication.C | Using the trained regression MVAs within an analysis module. | \anchor other ### Others | **Tutorial** | **Description** | |--------------|-----------------| -| TMVAGAexample.C | This executable gives an example of a very simple use of the genetic algorithm of TMVA. | -| TMVAGAexample2.C | This executable gives an example of a very simple use of the genetic algorithm of TMVA. | -| TMVAMultipleBackgroundExample.C | This example shows the training of signal with three different backgrounds. | +| TMVAGAexample.C | Using the genetic algorithm of TMVA. | +| TMVAGAexample2.C | Using the genetic algorithm of TMVA. | +| TMVAMultipleBackgroundExample.C | Training of signal with three different backgrounds. | \anchor cross_val ## Cross validation | **Tutorial** | **Description** | |--------------|-----------------| -| TMVACrossValidation.C | This macro provides an example of how to use TMVA for k-folds cross evaluation. | -| TMVACrossValidationApplication.C | This macro provides an example of how to use TMVA for k-folds cross evaluation in application. | -| TMVACrossValidationRegression.C | This macro provides an example of how to use TMVA for k-folds cross evaluation. | +| TMVACrossValidation.C | Using the TMVA k-folds cross evaluation. | +| TMVACrossValidationApplication.C | Using the TMVA k-folds cross evaluation in application. | +| TMVACrossValidationRegression.C | Using the TMVA k-folds cross evaluation. | \anchor new_interface ## New TMVA interfaces @@ -67,74 +67,74 @@ | **Tutorial** | **Description** | |--------------|-----------------| | createData.C | Plot the variables. | -| tmva001_RTensor.C | This tutorial illustrates the basic features of the RTensor class, RTensor is a std::vector-like container with additional shape information. | -| tmva002_RDataFrameAsTensor.C | This tutorial shows how the content of an RDataFrame can be converted to an RTensor object. | -| tmva003_RReader.C | This tutorial shows how to apply with the modern interfaces models saved in TMVA XML files. | +| tmva001_RTensor.C | Illustrate the basic features of the RTensor class, RTensor is a std::vector-like container with additional shape information. | +| tmva002_RDataFrameAsTensor.C | Convert the content of an RDataFrame to an RTensor object. | +| tmva003_RReader.C | Use modern interfaces models saved in TMVA XML files. | \anchor rbdt ### RBDT | **Tutorial** | **Description** | |--------------|-----------------| -| tmva100_DataPreparation.py | This tutorial illustrates how to prepare ROOT datasets to be nicely readable by most machine learning methods. | -| tmva101_Training.py | This tutorial show how you can train a machine learning model with any package reading the training data directly from ROOT files. | -| tmva102_Testing.py | This tutorial illustrates how you can test a trained BDT model using the fast tree inference engine offered by TMVA and external tools such as scikit-learn. | -| tmva103_Application.C | This tutorial illustrates how you can conveniently apply BDTs in C++ using the fast tree inference engine offered by TMVA. | +| tmva100_DataPreparation.py | Prepare ROOT datasets to be nicely readable by most machine learning methods. | +| tmva101_Training.py | Train a machine learning model with any package reading the training data directly from ROOT files. | +| tmva102_Testing.py | Test a trained BDT model using the fast tree inference engine offered by TMVA and external tools such as scikit-learn. | +| tmva103_Application.C | Apply BDTs in C++ using the fast tree inference engine offered by TMVA. | \anchor deep_learing ## Deep learning in TMVA | **Tutorial** || **Description** | |---------------|-----------------|-----------------| -| TMVA_CNN_Classification.C | TMVA_CNN_Classification.py | TMVA Classification Example Using a Convolutional Neural Network. | +| TMVA_CNN_Classification.C | TMVA_CNN_Classification.py | TMVA Classification example using a Convolutional Neural Network. | | TMVA_Higgs_Classification.C | TMVA_Higgs_Classification.py | Classification example of TMVA based on public Higgs UCI dataset. | -| TMVA_RNN_Classification.C | TMVA_RNN_Classification.py | TMVA Classification Example Using a Recurrent Neural Network. | +| TMVA_RNN_Classification.C | TMVA_RNN_Classification.py | TMVA Classification example using a Recurrent Neural Network. | \anchor keras ## TMVA Keras tutorials | **Tutorial** | **Description** | |--------------|-----------------| -| ApplicationClassificationKeras.py | This tutorial shows how to apply a trained model to new data. | -| ApplicationRegressionKeras.py | This tutorial shows how to apply a trained model to new data (regression). | -| ClassificationKeras.py | This tutorial shows how to do classification in TMVA with neural networks trained with keras. | -| GenerateModel.py | This tutorial shows how to define and generate a keras model for use with TMVA. | -| MulticlassKeras.py | This tutorial shows how to do multiclass classification in TMVA with neural networks trained with keras. | -| RegressionKeras.py | This tutorial shows how to do regression in TMVA with neural networks trained with keras. | +| ApplicationClassificationKeras.py | Apply a trained model to new data. | +| ApplicationRegressionKeras.py | Apply a trained model to new data (regression). | +| ClassificationKeras.py | Classification in TMVA with neural networks trained with keras. | +| GenerateModel.py | Define and generate a keras model for use with TMVA. | +| MulticlassKeras.py | Multiclass classification in TMVA with neural networks trained with keras. | +| RegressionKeras.py | Regression in TMVA with neural networks trained with keras. | \anchor pytorch ## TMVA PyTorch tutorials | **Tutorial** | **Description** | |--------------|-----------------| -| ApplicationClassificationPyTorch.py | This tutorial shows how to apply a trained model to new data. | -| ApplicationRegressionPyTorch.py | This tutorial shows how to apply a trained model to new data (regression). | -| ClassificationPyTorch.py | This tutorial shows how to do classification in TMVA with neural networks trained with PyTorch. | -| MulticlassPyTorch.py | This tutorial shows how to do multiclass classification in TMVA with neural networks trained with PyTorch. | -| RegressionPyTorch.py | This tutorial shows how to do regression in TMVA with neural networks trained with PyTorch. | +| ApplicationClassificationPyTorch.py | Apply a trained model to new data. | +| ApplicationRegressionPyTorch.py | Apply a trained model to new data (regression). | +| ClassificationPyTorch.py | Classification in TMVA with neural networks trained with PyTorch. | +| MulticlassPyTorch.py | Multiclass classification in TMVA with neural networks trained with PyTorch. | +| RegressionPyTorch.py | Regression in TMVA with neural networks trained with PyTorch. | -\anchor interference -## Interference with SOFIE +\anchor inference +## Inference with SOFIE | **Tutorial** || **Description** | |---------------|-----------------|-----------------| -| | TMVA_SOFIE_Inference.py | This macro provides an example of using a trained model with Keras and make inference using SOFIE directly from Numpy. | -| TMVA_SOFIE_Keras.C | | This macro provides a simple example for the parsing of Keras .h5 file into RModel object and further generating the .hxx header files for inference. | -| TMVA_SOFIE_Keras_HiggsModel.C | | This macro run the SOFIE parser on the Keras model obtaining running TMVA_Higgs_Classification.C You need to run that macro before this one. | -| | TMVA_SOFIE_Models.py | Example of inference with SOFIE using a set of models trained with Keras. | -| TMVA_SOFIE_ONNX.C | | This macro provides a simple example for the parsing of ONNX files into RModel object and further generating the .hxx header files for inference. | -| TMVA_SOFIE_PyTorch.C | | This macro provides a simple example for the parsing of PyTorch .pt file into RModel object and further generating the .hxx header files for inference. | -| TMVA_SOFIE_RDataFrame.C | TMVA_SOFIE_RDataFrame.py | Example of inference with SOFIE and RDataFrame, of a model trained with Keras. | -| TMVA_SOFIE_RDataFrame_JIT.C | | This macro provides an example of using a trained model with Keras and make inference using SOFIE and RDataFrame. | -| TMVA_SOFIE_RSofieReader.C | | This macro provides an example of using a trained model with Keras and make inference using SOFIE with the RSofieReader class. | - -\anchor rbatchgen -## RBatchGenerator +| | TMVA_SOFIE_Inference.py | Using a trained model with Keras and make inference using SOFIE directly from Numpy. | +| TMVA_SOFIE_Keras.C | | Parsing of Keras .h5 file into RModel object and further generating the .hxx header files for inference. | +| TMVA_SOFIE_Keras_HiggsModel.C | | Run the SOFIE parser on the Keras model obtaining running TMVA_Higgs_Classification.C. You need to run that macro before this one. | +| | TMVA_SOFIE_Models.py | Inference with SOFIE using a set of models trained with Keras. | +| TMVA_SOFIE_ONNX.C | | Parsing of ONNX files into RModel object and further generating the .hxx header files for inference. | +| TMVA_SOFIE_PyTorch.C | | Parsing of PyTorch .pt file into RModel object and further generating the .hxx header files for inference. | +| TMVA_SOFIE_RDataFrame.C | TMVA_SOFIE_RDataFrame.py | Inference with SOFIE and RDataFrame, of a model trained with Keras. | +| TMVA_SOFIE_RDataFrame_JIT.C | | Using a trained model with Keras and make inference using SOFIE and RDataFrame. | +| TMVA_SOFIE_RSofieReader.C | | Using a trained model with Keras and make inference using SOFIE with the RSofieReader class. | + +\anchor data_loading +## Data loading for training | **Tutorial** | **Description** | |--------------|-----------------| -| RBatchGenerator_NumPy.py | Example of getting batches of events from a ROOT dataset as Python generators of numpy arrays. | -| RBatchGenerator_PyTorch.py | Example of getting batches of events from a ROOT dataset into a basic PyTorch workflow. | -| RBatchGenerator_TensorFlow.py | Example of getting batches of events from a ROOT dataset into a basic TensorFlow workflow. | +| RBatchGenerator_NumPy.py | Loading batches of events from a ROOT dataset as Python generators of numpy arrays. | +| RBatchGenerator_PyTorch.py | Loading batches of events from a ROOT dataset into a basic PyTorch workflow. | +| RBatchGenerator_TensorFlow.py | Loading batches of events from a ROOT dataset into a basic TensorFlow workflow. | From cf2983020f765d5b85906b9382aa180a0ca1d298 Mon Sep 17 00:00:00 2001 From: Silia Taider Date: Thu, 17 Apr 2025 10:17:03 +0200 Subject: [PATCH 03/12] [skip-ci][python] Update release notes to include UHI --- README/ReleaseNotes/v636/index.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/README/ReleaseNotes/v636/index.md b/README/ReleaseNotes/v636/index.md index 2ab81e102ab80..c13135d4e1169 100644 --- a/README/ReleaseNotes/v636/index.md +++ b/README/ReleaseNotes/v636/index.md @@ -29,6 +29,7 @@ The following people have contributed to this new version: Manuel Tobias Schiller, University of Glasgow,\ Surya Somayyajula, UMass Amherst,\ Petr Stepanov, @petrstepanov,\ + Silia Taider, CERN/EP-SFT,\ Dongliang Zhang, University of Science and Technology of China ## Deprecation and Removal @@ -46,6 +47,14 @@ The following people have contributed to this new version: ## Python Interface +### UHI +* ROOT histograms now comply with the [Unified Histogram Interface (UHI)](https://uhi.readthedocs.io/en/latest/index.html) specification, enhancing interoperability with other UHI-compatible libraries and standardizing histogram operations. + The following features were added: + * Implemented the UHI `PlottableHistogram` protocol enabling ROOT histograms to be plotted by any library supporting `PlottableHistogram` objects. + * Introduced UHI-style indexing for access and setting bin values. + * Introduced UHI-style slicing for selecting histogram ranges. + * Implemented the `ROOT.uhi.loc`, `ROOT.uhi.underflow`, `ROOT.uhi.overflow`, `ROOT.uhi.rebin`, and `ROOT.uhi.sum` tags. + ## RDataFrame - When running multiple computation graphs run concurrently using [`RunGraphs()`](https://root.cern/doc/master/namespaceROOT_1_1RDF.html#a526d77d018bf69462d736bbdd1a695c4), the pool of slot numbers that a thread can pick from is now shared across all graphs. This enables use cases where a single resource, which may be expensive to create or copy, From 72cb70139db8db7c8bd0e60f310eda1134cbb9eb Mon Sep 17 00:00:00 2001 From: silverweed Date: Thu, 17 Apr 2025 11:50:52 +0200 Subject: [PATCH 04/12] [ntuple][RDF] remove more Experimental/ROOT7 leftovers from RDF and RNTuple --- bindings/pyroot/pythonizations/CMakeLists.txt | 6 +----- .../python/ROOT/_pythonization/_rntuple.py | 4 ++-- .../pyroot/pythonizations/test/root_module.py | 4 ++-- tree/dataframe/inc/ROOT/RDF/ActionHelpers.hxx | 4 ---- tree/dataframe/inc/ROOT/RDF/InterfaceUtils.hxx | 6 ------ tree/dataframe/inc/ROOT/RDF/RInterface.hxx | 12 ------------ tree/dataframe/src/RDataFrame.cxx | 15 --------------- tutorials/CMakeLists.txt | 7 +++++++ 8 files changed, 12 insertions(+), 46 deletions(-) diff --git a/bindings/pyroot/pythonizations/CMakeLists.txt b/bindings/pyroot/pythonizations/CMakeLists.txt index 2e9ecb425e972..e222e04c0ef99 100644 --- a/bindings/pyroot/pythonizations/CMakeLists.txt +++ b/bindings/pyroot/pythonizations/CMakeLists.txt @@ -67,11 +67,6 @@ if(tmva) endif() endif() -if(root7) - list(APPEND PYROOT_EXTRA_PYTHON_SOURCES - ROOT/_pythonization/_rntuple.py) -endif() - list(APPEND PYROOT_EXTRA_HEADERS inc/TPyDispatcher.h) @@ -93,6 +88,7 @@ set(py_sources ROOT/_pythonization/_generic.py ROOT/_pythonization/_memory_utils.py ROOT/_pythonization/_pyz_utils.py + ROOT/_pythonization/_rntuple.py ROOT/_pythonization/_runtime_error.py ROOT/_pythonization/_rvec.py ROOT/_pythonization/_stl_vector.py diff --git a/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_rntuple.py b/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_rntuple.py index 5a91fcc8f62b7..7a9e7f47104ca 100644 --- a/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_rntuple.py +++ b/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_rntuple.py @@ -96,7 +96,7 @@ def _RNTupleReader_Open(maybe_model, *args): maybe_model = maybe_model.Clone() import ROOT - return ROOT.Experimental.RNTupleReader._Open(maybe_model, *args) + return ROOT.RNTupleReader._Open(maybe_model, *args) def _RNTupleReader_LoadEntry(self, *args): @@ -105,7 +105,7 @@ def _RNTupleReader_LoadEntry(self, *args): return self._LoadEntry(*args) -@pythonization("RNTupleReader", ns="ROOT::Experimental") +@pythonization("RNTupleReader", ns="ROOT") def pythonize_RNTupleReader(klass): klass._Open = klass.Open klass.Open = _RNTupleReader_Open diff --git a/bindings/pyroot/pythonizations/test/root_module.py b/bindings/pyroot/pythonizations/test/root_module.py index 2ebc8dfaadd9e..fb66d13054a8a 100644 --- a/bindings/pyroot/pythonizations/test/root_module.py +++ b/bindings/pyroot/pythonizations/test/root_module.py @@ -99,8 +99,8 @@ def test_import_nested_submodules(self): if root_module_has("RDF.Experimental.Distributed"): import ROOT.RDF.Experimental.Distributed - if root_module_has("Experimental.RNTuple"): - from ROOT.Experimental import RNTuple + if root_module_has("RNTuple"): + from ROOT import RNTuple if root_module_has("RooFit.Evaluator"): from ROOT.RooFit import Evaluator diff --git a/tree/dataframe/inc/ROOT/RDF/ActionHelpers.hxx b/tree/dataframe/inc/ROOT/RDF/ActionHelpers.hxx index 00d064abef252..7eed1cfef66c4 100644 --- a/tree/dataframe/inc/ROOT/RDF/ActionHelpers.hxx +++ b/tree/dataframe/inc/ROOT/RDF/ActionHelpers.hxx @@ -48,10 +48,8 @@ #include "ROOT/RDF/RMergeableValue.hxx" #include "ROOT/RDF/RLoopManager.hxx" -#ifdef R__HAS_ROOT7 #include "ROOT/RNTupleDS.hxx" #include "ROOT/RNTupleWriter.hxx" // for SnapshotRNTupleHelper -#endif #include "ROOT/RTTreeDS.hxx" #include @@ -1968,7 +1966,6 @@ public: } }; -#ifdef R__HAS_ROOT7 /// Ensure that the RNTuple with the resulting snapshot can be written to the target TFile. This means checking that the /// TFile can be opened in the mode specified in `opts`, deleting any existing RNTuples in case /// `opts.fOverwriteIfExists = true`, or throwing an error otherwise. @@ -2118,7 +2115,6 @@ public: std::vector(fIsDefine)}; } }; -#endif template ::value> diff --git a/tree/dataframe/inc/ROOT/RDF/InterfaceUtils.hxx b/tree/dataframe/inc/ROOT/RDF/InterfaceUtils.hxx index 373c526496058..0cdc6735808c2 100644 --- a/tree/dataframe/inc/ROOT/RDF/InterfaceUtils.hxx +++ b/tree/dataframe/inc/ROOT/RDF/InterfaceUtils.hxx @@ -277,7 +277,6 @@ BuildAction(const ColumnNames_t &colNames, const std::shared_ptr actionPtr; if (snapHelperArgs->fToNTuple) { -#ifdef R__HAS_ROOT7 if (!ROOT::IsImplicitMTEnabled()) { // single-thread snapshot using Helper_t = SnapshotRNTupleHelper; @@ -293,11 +292,6 @@ BuildAction(const ColumnNames_t &colNames, const std::shared_ptr> resPtr; if (options.fOutputFormat == ESnapshotOutputFormat::kRNTuple) { -#ifdef R__HAS_ROOT7 if (RDFInternal::GetDataSourceLabel(*this) == "TTreeDS") { throw std::runtime_error("Snapshotting from TTree to RNTuple is not yet supported. The current recommended " "way to convert TTrees to RNTuple is through the RNTupleImporter."); @@ -1366,11 +1365,6 @@ public: resPtr = CreateAction( colListNoAliasesWithSizeBranches, newRDF, snapHelperArgs, fProxiedPtr, colListNoAliasesWithSizeBranches.size()); -#else - throw std::runtime_error( - "RDataFrame: Cannot snapshot to RNTuple - this installation of ROOT has not been build with ROOT7 " - "components enabled."); -#endif } else { if (RDFInternal::GetDataSourceLabel(*this) == "RNTupleDS" && options.fOutputFormat == ESnapshotOutputFormat::kDefault) { @@ -3235,7 +3229,6 @@ private: RResultPtr> resPtr; if (options.fOutputFormat == ESnapshotOutputFormat::kRNTuple) { -#ifdef R__HAS_ROOT7 if (RDFInternal::GetDataSourceLabel(*this) == "TTreeDS") { throw std::runtime_error("Snapshotting from TTree to RNTuple is not yet supported. The current recommended " "way to convert TTrees to RNTuple is through the RNTupleImporter."); @@ -3253,11 +3246,6 @@ private: // names. resPtr = CreateAction(validCols, newRDF, snapHelperArgs, fProxiedPtr); -#else - throw std::runtime_error( - "RDataFrame: Cannot snapshot to RNTuple - this installation of ROOT has not been build with ROOT7 " - "components enabled."); -#endif } else { if (RDFInternal::GetDataSourceLabel(*this) == "RNTupleDS" && options.fOutputFormat == ESnapshotOutputFormat::kDefault) { diff --git a/tree/dataframe/src/RDataFrame.cxx b/tree/dataframe/src/RDataFrame.cxx index 41b124787d3ca..1c7435943e92b 100644 --- a/tree/dataframe/src/RDataFrame.cxx +++ b/tree/dataframe/src/RDataFrame.cxx @@ -1870,17 +1870,10 @@ RDataFrame::RDataFrame(std::string_view treeName, TDirectory *dirPtr, const Colu /// The default columns are looked at in case no column is specified in the /// booking of actions or transformations. /// \note see ROOT::RDF::RInterface for the documentation of the methods available. -#ifdef R__HAS_ROOT7 RDataFrame::RDataFrame(std::string_view treeName, std::string_view fileNameGlob, const ColumnNames_t &defaultColumns) : RInterface(ROOT::Detail::RDF::CreateLMFromFile(treeName, fileNameGlob, defaultColumns)) { } -#else -RDataFrame::RDataFrame(std::string_view treeName, std::string_view fileNameGlob, const ColumnNames_t &defaultColumns) - : RInterface(ROOT::Detail::RDF::CreateLMFromTTree(treeName, fileNameGlob, defaultColumns)) -{ -} -#endif //////////////////////////////////////////////////////////////////////////// /// \brief Build the dataframe. @@ -1893,19 +1886,11 @@ RDataFrame::RDataFrame(std::string_view treeName, std::string_view fileNameGlob, /// /// The default columns are looked at in case no column is specified in the booking of actions or transformations. /// \note see ROOT::RDF::RInterface for the documentation of the methods available. -#ifdef R__HAS_ROOT7 RDataFrame::RDataFrame(std::string_view datasetName, const std::vector &fileNameGlobs, const ColumnNames_t &defaultColumns) : RInterface(ROOT::Detail::RDF::CreateLMFromFile(datasetName, fileNameGlobs, defaultColumns)) { } -#else -RDataFrame::RDataFrame(std::string_view datasetName, const std::vector &fileNameGlobs, - const ColumnNames_t &defaultColumns) - : RInterface(ROOT::Detail::RDF::CreateLMFromTTree(datasetName, fileNameGlobs, defaultColumns)) -{ -} -#endif //////////////////////////////////////////////////////////////////////////// /// \brief Build the dataframe. diff --git a/tutorials/CMakeLists.txt b/tutorials/CMakeLists.txt index 5380b6f0f30d6..4c597d4964271 100644 --- a/tutorials/CMakeLists.txt +++ b/tutorials/CMakeLists.txt @@ -403,6 +403,13 @@ else() endif() file(GLOB v7_veto_files RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}/ experimental/*.py experimental/*.cxx experimental/*/*.cxx experimental/*.C experimental/*/*.C experimental/rcanvas/*.py experimental/rcanvas/*.cxx) list(APPEND root7_veto ${v7_veto_files}) + # This depends on ntupleutil + list(APPEND root7_veto io/ntuple/ntpl008_import.C) + # This depends on RCanvas + list(APPEND root7_veto io/ntuple/ntpl011_global_temperatures.C) + if((NOT davix) OR (NOT dataframe)) + list(APPEND root7_veto io/ntuple/ntpl004_dimuon.C) + endif() endif() if (APPLE AND CMAKE_SYSTEM_PROCESSOR MATCHES arm64) From 5e71b75c961e8c4b49af0e854567d4ff4cb8147d Mon Sep 17 00:00:00 2001 From: Vincenzo Eduardo Padulano Date: Thu, 17 Apr 2025 12:39:23 +0200 Subject: [PATCH 05/12] [df] Move distributed RDataFrame out of experimental The distributed RDataFrame Python package has been moved out of experimental mode and it is now called `ROOT.RDF.Distributed`. The functionality has already been merged with the traditional dataframe constructor so that users can type `ROOT.RDataFrame(..., executor=...)` to get a distributed RDataFrame. For the moment, we keep the old naming with the Experimental keyword and raise a warning to the user when they access the module through the old name. This will be completely removed in ROOT 6.38. --- bindings/CMakeLists.txt | 2 +- .../{experimental => }/distrdf/.gitignore | 0 .../{experimental => }/distrdf/CMakeLists.txt | 8 ++-- .../distrdf/python/DistRDF/Backends/Base.py | 0 .../python/DistRDF/Backends/Dask/Backend.py | 0 .../python/DistRDF/Backends/Dask/__init__.py | 0 .../python/DistRDF/Backends/Spark/Backend.py | 0 .../python/DistRDF/Backends/Spark/__init__.py | 0 .../distrdf/python/DistRDF/Backends/Utils.py | 0 .../python/DistRDF/Backends/__init__.py | 4 +- .../DistRDF/ComputationGraphGenerator.py | 0 .../distrdf/python/DistRDF/DataFrame.py | 0 .../distrdf/python/DistRDF/HeadNode.py | 2 +- .../distrdf/python/DistRDF/LiveVisualize.py | 0 .../distrdf/python/DistRDF/Node.py | 0 .../distrdf/python/DistRDF/Operation.py | 0 .../distrdf/python/DistRDF/Proxy.py | 0 .../python/DistRDF/PythonMergeables.py | 0 .../distrdf/python/DistRDF/Ranges.py | 0 .../distrdf/python/DistRDF/__init__.py | 41 +++++++++++++++--- .../distrdf/python/DistRDF/_graph_cache.py | 0 .../distrdf/test/CMakeLists.txt | 0 .../distrdf/test/__init__.py | 0 .../distrdf/test/backend/1000clusters.root | Bin .../test/backend/1cluster_20entries.root | Bin .../distrdf/test/backend/2clusters.root | Bin .../distrdf/test/backend/4clusters.root | Bin .../distrdf/test/backend/CMakeLists.txt | 0 .../distrdf/test/backend/Slimmed_ntuple.root | Bin .../distrdf/test/backend/test_common.py | 0 .../distrdf/test/backend/test_dist.py | 0 .../test/backend/test_graph_caching.py | 0 .../test/backend/test_headers/header1.hxx | 5 ++- .../test/backend/test_headers/header2.hxx | 16 +++++++ .../test/backend/test_headers/header3.hxx | 5 ++- .../test/backend/test_headers/header4.hxx | 5 ++- .../distrdf/test/test_callable_generator.py | 0 .../distrdf/test/test_friendinfo.py | 0 .../distrdf/test/test_headnode.py | 0 .../distrdf/test/test_init.py | 0 .../distrdf/test/test_node.py | 0 .../distrdf/test/test_operation.py | 0 .../distrdf/test/test_proxy.py | 0 .../distrdf/test/test_ranges.py | 0 .../test/backend/test_headers/header2.hxx | 14 ------ .../pythonizations/python/ROOT/_facade.py | 12 ++--- .../ROOT/_pythonization/_rdf_namespace.py | 8 ++-- .../pyroot/pythonizations/test/root_module.py | 2 +- tree/dataframe/src/RDataFrame.cxx | 27 +++++------- .../distrdf003_live_visualization.py | 4 +- 50 files changed, 94 insertions(+), 61 deletions(-) rename bindings/{experimental => }/distrdf/.gitignore (100%) rename bindings/{experimental => }/distrdf/CMakeLists.txt (97%) rename bindings/{experimental => }/distrdf/python/DistRDF/Backends/Base.py (100%) rename bindings/{experimental => }/distrdf/python/DistRDF/Backends/Dask/Backend.py (100%) rename bindings/{experimental => }/distrdf/python/DistRDF/Backends/Dask/__init__.py (100%) rename bindings/{experimental => }/distrdf/python/DistRDF/Backends/Spark/Backend.py (100%) rename bindings/{experimental => }/distrdf/python/DistRDF/Backends/Spark/__init__.py (100%) rename bindings/{experimental => }/distrdf/python/DistRDF/Backends/Utils.py (100%) rename bindings/{experimental => }/distrdf/python/DistRDF/Backends/__init__.py (91%) rename bindings/{experimental => }/distrdf/python/DistRDF/ComputationGraphGenerator.py (100%) rename bindings/{experimental => }/distrdf/python/DistRDF/DataFrame.py (100%) rename bindings/{experimental => }/distrdf/python/DistRDF/HeadNode.py (99%) rename bindings/{experimental => }/distrdf/python/DistRDF/LiveVisualize.py (100%) rename bindings/{experimental => }/distrdf/python/DistRDF/Node.py (100%) rename bindings/{experimental => }/distrdf/python/DistRDF/Operation.py (100%) rename bindings/{experimental => }/distrdf/python/DistRDF/Proxy.py (100%) rename bindings/{experimental => }/distrdf/python/DistRDF/PythonMergeables.py (100%) rename bindings/{experimental => }/distrdf/python/DistRDF/Ranges.py (100%) rename bindings/{experimental => }/distrdf/python/DistRDF/__init__.py (86%) rename bindings/{experimental => }/distrdf/python/DistRDF/_graph_cache.py (100%) rename bindings/{experimental => }/distrdf/test/CMakeLists.txt (100%) rename bindings/{experimental => }/distrdf/test/__init__.py (100%) rename bindings/{experimental => }/distrdf/test/backend/1000clusters.root (100%) rename bindings/{experimental => }/distrdf/test/backend/1cluster_20entries.root (100%) rename bindings/{experimental => }/distrdf/test/backend/2clusters.root (100%) rename bindings/{experimental => }/distrdf/test/backend/4clusters.root (100%) rename bindings/{experimental => }/distrdf/test/backend/CMakeLists.txt (100%) rename bindings/{experimental => }/distrdf/test/backend/Slimmed_ntuple.root (100%) rename bindings/{experimental => }/distrdf/test/backend/test_common.py (100%) rename bindings/{experimental => }/distrdf/test/backend/test_dist.py (100%) rename bindings/{experimental => }/distrdf/test/backend/test_graph_caching.py (100%) rename bindings/{experimental => }/distrdf/test/backend/test_headers/header1.hxx (58%) create mode 100644 bindings/distrdf/test/backend/test_headers/header2.hxx rename bindings/{experimental => }/distrdf/test/backend/test_headers/header3.hxx (58%) rename bindings/{experimental => }/distrdf/test/backend/test_headers/header4.hxx (58%) rename bindings/{experimental => }/distrdf/test/test_callable_generator.py (100%) rename bindings/{experimental => }/distrdf/test/test_friendinfo.py (100%) rename bindings/{experimental => }/distrdf/test/test_headnode.py (100%) rename bindings/{experimental => }/distrdf/test/test_init.py (100%) rename bindings/{experimental => }/distrdf/test/test_node.py (100%) rename bindings/{experimental => }/distrdf/test/test_operation.py (100%) rename bindings/{experimental => }/distrdf/test/test_proxy.py (100%) rename bindings/{experimental => }/distrdf/test/test_ranges.py (100%) delete mode 100644 bindings/experimental/distrdf/test/backend/test_headers/header2.hxx diff --git a/bindings/CMakeLists.txt b/bindings/CMakeLists.txt index c00837fa42876..e370865c2c2c4 100644 --- a/bindings/CMakeLists.txt +++ b/bindings/CMakeLists.txt @@ -10,7 +10,7 @@ if(pyroot) if(dataframe) message(STATUS "Distributed RDataFrame enabled") - add_subdirectory(experimental/distrdf) + add_subdirectory(distrdf) else() message(STATUS "Requirements to enable distributed RDataFrame:") message(STATUS " dataframe: required:ON, actual:${dataframe}") diff --git a/bindings/experimental/distrdf/.gitignore b/bindings/distrdf/.gitignore similarity index 100% rename from bindings/experimental/distrdf/.gitignore rename to bindings/distrdf/.gitignore diff --git a/bindings/experimental/distrdf/CMakeLists.txt b/bindings/distrdf/CMakeLists.txt similarity index 97% rename from bindings/experimental/distrdf/CMakeLists.txt rename to bindings/distrdf/CMakeLists.txt index b23481b3beccb..78b95dcdbbee0 100644 --- a/bindings/experimental/distrdf/CMakeLists.txt +++ b/bindings/distrdf/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 1995-2023, Rene Brun and Fons Rademakers. +# Copyright (C) 1995-2025, Rene Brun and Fons Rademakers. # All rights reserved. # # For the licensing terms see $ROOTSYS/LICENSE. @@ -14,6 +14,7 @@ set(py_sources DistRDF/ComputationGraphGenerator.py DistRDF/DataFrame.py DistRDF/HeadNode.py + DistRDF/LiveVisualize.py DistRDF/Node.py DistRDF/Operation.py DistRDF/Proxy.py @@ -22,11 +23,10 @@ set(py_sources DistRDF/Backends/__init__.py DistRDF/Backends/Base.py DistRDF/Backends/Utils.py - DistRDF/Backends/Spark/__init__.py - DistRDF/Backends/Spark/Backend.py DistRDF/Backends/Dask/__init__.py DistRDF/Backends/Dask/Backend.py - DistRDF/LiveVisualize.py + DistRDF/Backends/Spark/__init__.py + DistRDF/Backends/Spark/Backend.py ) # Add custom rules to copy the Python sources into the build directory diff --git a/bindings/experimental/distrdf/python/DistRDF/Backends/Base.py b/bindings/distrdf/python/DistRDF/Backends/Base.py similarity index 100% rename from bindings/experimental/distrdf/python/DistRDF/Backends/Base.py rename to bindings/distrdf/python/DistRDF/Backends/Base.py diff --git a/bindings/experimental/distrdf/python/DistRDF/Backends/Dask/Backend.py b/bindings/distrdf/python/DistRDF/Backends/Dask/Backend.py similarity index 100% rename from bindings/experimental/distrdf/python/DistRDF/Backends/Dask/Backend.py rename to bindings/distrdf/python/DistRDF/Backends/Dask/Backend.py diff --git a/bindings/experimental/distrdf/python/DistRDF/Backends/Dask/__init__.py b/bindings/distrdf/python/DistRDF/Backends/Dask/__init__.py similarity index 100% rename from bindings/experimental/distrdf/python/DistRDF/Backends/Dask/__init__.py rename to bindings/distrdf/python/DistRDF/Backends/Dask/__init__.py diff --git a/bindings/experimental/distrdf/python/DistRDF/Backends/Spark/Backend.py b/bindings/distrdf/python/DistRDF/Backends/Spark/Backend.py similarity index 100% rename from bindings/experimental/distrdf/python/DistRDF/Backends/Spark/Backend.py rename to bindings/distrdf/python/DistRDF/Backends/Spark/Backend.py diff --git a/bindings/experimental/distrdf/python/DistRDF/Backends/Spark/__init__.py b/bindings/distrdf/python/DistRDF/Backends/Spark/__init__.py similarity index 100% rename from bindings/experimental/distrdf/python/DistRDF/Backends/Spark/__init__.py rename to bindings/distrdf/python/DistRDF/Backends/Spark/__init__.py diff --git a/bindings/experimental/distrdf/python/DistRDF/Backends/Utils.py b/bindings/distrdf/python/DistRDF/Backends/Utils.py similarity index 100% rename from bindings/experimental/distrdf/python/DistRDF/Backends/Utils.py rename to bindings/distrdf/python/DistRDF/Backends/Utils.py diff --git a/bindings/experimental/distrdf/python/DistRDF/Backends/__init__.py b/bindings/distrdf/python/DistRDF/Backends/__init__.py similarity index 91% rename from bindings/experimental/distrdf/python/DistRDF/Backends/__init__.py rename to bindings/distrdf/python/DistRDF/Backends/__init__.py index 2954ee4953133..61a891a36f609 100644 --- a/bindings/experimental/distrdf/python/DistRDF/Backends/__init__.py +++ b/bindings/distrdf/python/DistRDF/Backends/__init__.py @@ -26,11 +26,11 @@ def build_backends_submodules(parentmodule: types.ModuleType) -> types.ModuleTyp # The actual python package with the backend implementation actual = importlib.import_module(__name__ + "." + module_name) # A dummy module to inject in the parent module - fullmodulename = "ROOT.RDF.Experimental.Distributed." + module_name + fullmodulename = "ROOT.RDF.Distributed." + module_name dummy = types.ModuleType(fullmodulename) # PEP302 attributes - dummy.__file__ = "" + dummy.__file__ = "" # dummy.__name__ is the constructor argument dummy.__path__ = [] # this makes it a package # dummy.__loader__ is not defined diff --git a/bindings/experimental/distrdf/python/DistRDF/ComputationGraphGenerator.py b/bindings/distrdf/python/DistRDF/ComputationGraphGenerator.py similarity index 100% rename from bindings/experimental/distrdf/python/DistRDF/ComputationGraphGenerator.py rename to bindings/distrdf/python/DistRDF/ComputationGraphGenerator.py diff --git a/bindings/experimental/distrdf/python/DistRDF/DataFrame.py b/bindings/distrdf/python/DistRDF/DataFrame.py similarity index 100% rename from bindings/experimental/distrdf/python/DistRDF/DataFrame.py rename to bindings/distrdf/python/DistRDF/DataFrame.py diff --git a/bindings/experimental/distrdf/python/DistRDF/HeadNode.py b/bindings/distrdf/python/DistRDF/HeadNode.py similarity index 99% rename from bindings/experimental/distrdf/python/DistRDF/HeadNode.py rename to bindings/distrdf/python/DistRDF/HeadNode.py index 9a5809831dc9b..ae0f62472f57f 100644 --- a/bindings/experimental/distrdf/python/DistRDF/HeadNode.py +++ b/bindings/distrdf/python/DistRDF/HeadNode.py @@ -841,7 +841,7 @@ def build_rdf_from_range(current_range: Ranges.get_ntuple_ranges) -> TaskObjects if not filenames: return TaskObjects(None, None) - return TaskObjects(ROOT.RDF.Experimental.FromRNTuple(ntuplename, filenames), None) + return TaskObjects(ROOT.RDF.FromRNTuple(ntuplename, filenames), None) return build_rdf_from_range diff --git a/bindings/experimental/distrdf/python/DistRDF/LiveVisualize.py b/bindings/distrdf/python/DistRDF/LiveVisualize.py similarity index 100% rename from bindings/experimental/distrdf/python/DistRDF/LiveVisualize.py rename to bindings/distrdf/python/DistRDF/LiveVisualize.py diff --git a/bindings/experimental/distrdf/python/DistRDF/Node.py b/bindings/distrdf/python/DistRDF/Node.py similarity index 100% rename from bindings/experimental/distrdf/python/DistRDF/Node.py rename to bindings/distrdf/python/DistRDF/Node.py diff --git a/bindings/experimental/distrdf/python/DistRDF/Operation.py b/bindings/distrdf/python/DistRDF/Operation.py similarity index 100% rename from bindings/experimental/distrdf/python/DistRDF/Operation.py rename to bindings/distrdf/python/DistRDF/Operation.py diff --git a/bindings/experimental/distrdf/python/DistRDF/Proxy.py b/bindings/distrdf/python/DistRDF/Proxy.py similarity index 100% rename from bindings/experimental/distrdf/python/DistRDF/Proxy.py rename to bindings/distrdf/python/DistRDF/Proxy.py diff --git a/bindings/experimental/distrdf/python/DistRDF/PythonMergeables.py b/bindings/distrdf/python/DistRDF/PythonMergeables.py similarity index 100% rename from bindings/experimental/distrdf/python/DistRDF/PythonMergeables.py rename to bindings/distrdf/python/DistRDF/PythonMergeables.py diff --git a/bindings/experimental/distrdf/python/DistRDF/Ranges.py b/bindings/distrdf/python/DistRDF/Ranges.py similarity index 100% rename from bindings/experimental/distrdf/python/DistRDF/Ranges.py rename to bindings/distrdf/python/DistRDF/Ranges.py diff --git a/bindings/experimental/distrdf/python/DistRDF/__init__.py b/bindings/distrdf/python/DistRDF/__init__.py similarity index 86% rename from bindings/experimental/distrdf/python/DistRDF/__init__.py rename to bindings/distrdf/python/DistRDF/__init__.py index d6476b41764b3..e66d923a02d47 100644 --- a/bindings/experimental/distrdf/python/DistRDF/__init__.py +++ b/bindings/distrdf/python/DistRDF/__init__.py @@ -12,8 +12,9 @@ from __future__ import annotations import logging -import os import types +import warnings +import textwrap import concurrent.futures @@ -192,17 +193,42 @@ def FromSpec(jsonfile : str, *args, **kwargs) -> RDataFrame: f"The client object of type '{type(executor)}' is not a supported " "connection type for distributed RDataFrame.") -def create_distributed_module(parentmodule): + +class _DeprecatedModule(types.ModuleType): + """A simple module type to raise a warning before usage.""" + + def __getattribute__(self, name): + msg_warng = textwrap.dedent( + """ + In ROOT 6.36, the ROOT.RDF.Experimental.Distributed module has become just ROOT.RDF.Distributed. ROOT 6.38 + will remove the 'Experimental' keyword completely, so it is suggested to move to the stable API in user + code. You can now change lines such as: + ``` + connection = ... # your distributed Dask client or SparkContext + RDataFrame = ROOT.RDF.Experimental.Distributed.[Backend].RDataFrame + df = RDataFrame(..., [daskclient,sparkcontext] = connection) + ``` + to simply: + ``` + connection = ... # your distributed Dask client or SparkContext + df = ROOT.RDataFrame(..., executor = connection) + ``` + """ + ) + warnings.warn(msg_warng, FutureWarning) + return super().__getattribute__(name) + +def create_distributed_module(parentmodule, experimental: bool = False): """ - Helper function to create the ROOT.RDF.Experimental.Distributed module. + Helper function to create the ROOT.RDF.Distributed module. Users will see this module as the entry point of functions to create and run an RDataFrame computation distributedly. """ - distributed = types.ModuleType("ROOT.RDF.Experimental.Distributed") + distributed = types.ModuleType("ROOT.RDF.Distributed") # PEP302 attributes - distributed.__file__ = "" + distributed.__file__ = "" # distributed.__name__ is the constructor argument distributed.__path__ = [] # this makes it a package # distributed.__loader__ is not defined @@ -220,7 +246,10 @@ def create_distributed_module(parentmodule): distributed.DistributeSharedLibs = DistributeSharedLibs distributed.DistributeCppCode = DistributeCppCode distributed.FromSpec = FromSpec - + + if experimental: + distributed.__class__ = _DeprecatedModule + return distributed def RDataFrame(*args, **kwargs): diff --git a/bindings/experimental/distrdf/python/DistRDF/_graph_cache.py b/bindings/distrdf/python/DistRDF/_graph_cache.py similarity index 100% rename from bindings/experimental/distrdf/python/DistRDF/_graph_cache.py rename to bindings/distrdf/python/DistRDF/_graph_cache.py diff --git a/bindings/experimental/distrdf/test/CMakeLists.txt b/bindings/distrdf/test/CMakeLists.txt similarity index 100% rename from bindings/experimental/distrdf/test/CMakeLists.txt rename to bindings/distrdf/test/CMakeLists.txt diff --git a/bindings/experimental/distrdf/test/__init__.py b/bindings/distrdf/test/__init__.py similarity index 100% rename from bindings/experimental/distrdf/test/__init__.py rename to bindings/distrdf/test/__init__.py diff --git a/bindings/experimental/distrdf/test/backend/1000clusters.root b/bindings/distrdf/test/backend/1000clusters.root similarity index 100% rename from bindings/experimental/distrdf/test/backend/1000clusters.root rename to bindings/distrdf/test/backend/1000clusters.root diff --git a/bindings/experimental/distrdf/test/backend/1cluster_20entries.root b/bindings/distrdf/test/backend/1cluster_20entries.root similarity index 100% rename from bindings/experimental/distrdf/test/backend/1cluster_20entries.root rename to bindings/distrdf/test/backend/1cluster_20entries.root diff --git a/bindings/experimental/distrdf/test/backend/2clusters.root b/bindings/distrdf/test/backend/2clusters.root similarity index 100% rename from bindings/experimental/distrdf/test/backend/2clusters.root rename to bindings/distrdf/test/backend/2clusters.root diff --git a/bindings/experimental/distrdf/test/backend/4clusters.root b/bindings/distrdf/test/backend/4clusters.root similarity index 100% rename from bindings/experimental/distrdf/test/backend/4clusters.root rename to bindings/distrdf/test/backend/4clusters.root diff --git a/bindings/experimental/distrdf/test/backend/CMakeLists.txt b/bindings/distrdf/test/backend/CMakeLists.txt similarity index 100% rename from bindings/experimental/distrdf/test/backend/CMakeLists.txt rename to bindings/distrdf/test/backend/CMakeLists.txt diff --git a/bindings/experimental/distrdf/test/backend/Slimmed_ntuple.root b/bindings/distrdf/test/backend/Slimmed_ntuple.root similarity index 100% rename from bindings/experimental/distrdf/test/backend/Slimmed_ntuple.root rename to bindings/distrdf/test/backend/Slimmed_ntuple.root diff --git a/bindings/experimental/distrdf/test/backend/test_common.py b/bindings/distrdf/test/backend/test_common.py similarity index 100% rename from bindings/experimental/distrdf/test/backend/test_common.py rename to bindings/distrdf/test/backend/test_common.py diff --git a/bindings/experimental/distrdf/test/backend/test_dist.py b/bindings/distrdf/test/backend/test_dist.py similarity index 100% rename from bindings/experimental/distrdf/test/backend/test_dist.py rename to bindings/distrdf/test/backend/test_dist.py diff --git a/bindings/experimental/distrdf/test/backend/test_graph_caching.py b/bindings/distrdf/test/backend/test_graph_caching.py similarity index 100% rename from bindings/experimental/distrdf/test/backend/test_graph_caching.py rename to bindings/distrdf/test/backend/test_graph_caching.py diff --git a/bindings/experimental/distrdf/test/backend/test_headers/header1.hxx b/bindings/distrdf/test/backend/test_headers/header1.hxx similarity index 58% rename from bindings/experimental/distrdf/test/backend/test_headers/header1.hxx rename to bindings/distrdf/test/backend/test_headers/header1.hxx index 42f965e80db24..2fa63d1a9f18f 100644 --- a/bindings/experimental/distrdf/test/backend/test_headers/header1.hxx +++ b/bindings/distrdf/test/backend/test_headers/header1.hxx @@ -1,8 +1,9 @@ #ifndef HEADER_1 #define HEADER_1 -bool f(int x) { - return true; +bool f(int x) +{ + return true; } #endif diff --git a/bindings/distrdf/test/backend/test_headers/header2.hxx b/bindings/distrdf/test/backend/test_headers/header2.hxx new file mode 100644 index 0000000000000..eb773b778d720 --- /dev/null +++ b/bindings/distrdf/test/backend/test_headers/header2.hxx @@ -0,0 +1,16 @@ +#include + +#ifndef HEADER_2 +#define HEADER_2 + +int f1(int x) +{ + return x; +} + +std::string f2(std::string s) +{ + return s; +} + +#endif diff --git a/bindings/experimental/distrdf/test/backend/test_headers/header3.hxx b/bindings/distrdf/test/backend/test_headers/header3.hxx similarity index 58% rename from bindings/experimental/distrdf/test/backend/test_headers/header3.hxx rename to bindings/distrdf/test/backend/test_headers/header3.hxx index 37ffad5848bcd..4cad1af8920e8 100644 --- a/bindings/experimental/distrdf/test/backend/test_headers/header3.hxx +++ b/bindings/distrdf/test/backend/test_headers/header3.hxx @@ -1,8 +1,9 @@ #ifndef HEADER_3 #define HEADER_3 -bool a(int x) { - return true; +bool a(int x) +{ + return true; } #endif diff --git a/bindings/experimental/distrdf/test/backend/test_headers/header4.hxx b/bindings/distrdf/test/backend/test_headers/header4.hxx similarity index 58% rename from bindings/experimental/distrdf/test/backend/test_headers/header4.hxx rename to bindings/distrdf/test/backend/test_headers/header4.hxx index 69a8572c6225f..c27a988aa63bb 100644 --- a/bindings/experimental/distrdf/test/backend/test_headers/header4.hxx +++ b/bindings/distrdf/test/backend/test_headers/header4.hxx @@ -1,8 +1,9 @@ #ifndef HEADER_4 #define HEADER_4 -bool b(int x) { - return true; +bool b(int x) +{ + return true; } #endif diff --git a/bindings/experimental/distrdf/test/test_callable_generator.py b/bindings/distrdf/test/test_callable_generator.py similarity index 100% rename from bindings/experimental/distrdf/test/test_callable_generator.py rename to bindings/distrdf/test/test_callable_generator.py diff --git a/bindings/experimental/distrdf/test/test_friendinfo.py b/bindings/distrdf/test/test_friendinfo.py similarity index 100% rename from bindings/experimental/distrdf/test/test_friendinfo.py rename to bindings/distrdf/test/test_friendinfo.py diff --git a/bindings/experimental/distrdf/test/test_headnode.py b/bindings/distrdf/test/test_headnode.py similarity index 100% rename from bindings/experimental/distrdf/test/test_headnode.py rename to bindings/distrdf/test/test_headnode.py diff --git a/bindings/experimental/distrdf/test/test_init.py b/bindings/distrdf/test/test_init.py similarity index 100% rename from bindings/experimental/distrdf/test/test_init.py rename to bindings/distrdf/test/test_init.py diff --git a/bindings/experimental/distrdf/test/test_node.py b/bindings/distrdf/test/test_node.py similarity index 100% rename from bindings/experimental/distrdf/test/test_node.py rename to bindings/distrdf/test/test_node.py diff --git a/bindings/experimental/distrdf/test/test_operation.py b/bindings/distrdf/test/test_operation.py similarity index 100% rename from bindings/experimental/distrdf/test/test_operation.py rename to bindings/distrdf/test/test_operation.py diff --git a/bindings/experimental/distrdf/test/test_proxy.py b/bindings/distrdf/test/test_proxy.py similarity index 100% rename from bindings/experimental/distrdf/test/test_proxy.py rename to bindings/distrdf/test/test_proxy.py diff --git a/bindings/experimental/distrdf/test/test_ranges.py b/bindings/distrdf/test/test_ranges.py similarity index 100% rename from bindings/experimental/distrdf/test/test_ranges.py rename to bindings/distrdf/test/test_ranges.py diff --git a/bindings/experimental/distrdf/test/backend/test_headers/header2.hxx b/bindings/experimental/distrdf/test/backend/test_headers/header2.hxx deleted file mode 100644 index 58630180cb72e..0000000000000 --- a/bindings/experimental/distrdf/test/backend/test_headers/header2.hxx +++ /dev/null @@ -1,14 +0,0 @@ -#include - -#ifndef HEADER_2 -#define HEADER_2 - -int f1(int x) { - return x; -} - -std::string f2(std::string s) { - return s; -} - -#endif diff --git a/bindings/pyroot/pythonizations/python/ROOT/_facade.py b/bindings/pyroot/pythonizations/python/ROOT/_facade.py index b7bcb0e5d66de..3cebe36002244 100644 --- a/bindings/pyroot/pythonizations/python/ROOT/_facade.py +++ b/bindings/pyroot/pythonizations/python/ROOT/_facade.py @@ -363,10 +363,12 @@ def MakePandasDataFrame(df): try: # Inject Pythonizations to interact between local and distributed RDF package from ._pythonization._rdf_namespace import _create_distributed_module, _rungraphs, _variationsfor, _fromspec - ns.Experimental.Distributed = _create_distributed_module(ns.Experimental) - ns.RunGraphs = _rungraphs(ns.Experimental.Distributed.RunGraphs, ns.RunGraphs) - ns.Experimental.VariationsFor = _variationsfor(ns.Experimental.Distributed.VariationsFor, ns.Experimental.VariationsFor) - ns.Experimental.FromSpec = _fromspec(ns.Experimental.Distributed.FromSpec, ns.Experimental.FromSpec) + ns.Distributed = _create_distributed_module(ns) + # Inject the experimental package which shows a warning before usage + ns.Experimental.Distributed = _create_distributed_module(ns, True) + ns.RunGraphs = _rungraphs(ns.Distributed.RunGraphs, ns.RunGraphs) + ns.Experimental.VariationsFor = _variationsfor(ns.Distributed.VariationsFor, ns.Experimental.VariationsFor) + ns.Experimental.FromSpec = _fromspec(ns.Distributed.FromSpec, ns.Experimental.FromSpec) except ImportError: pass @@ -465,4 +467,4 @@ def uhi(self): _add_module_level_uhi_helpers(uhi_module) except ImportError: raise Exception("Failed to pythonize the namespace uhi") - return uhi_module \ No newline at end of file + return uhi_module diff --git a/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_rdf_namespace.py b/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_rdf_namespace.py index 184e2798699e7..7af4e26b732f7 100644 --- a/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_rdf_namespace.py +++ b/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_rdf_namespace.py @@ -14,9 +14,9 @@ """ -def _create_distributed_module(parent): +def _create_distributed_module(parent, experimental: bool = False): """ - Create the ROOT.RDF.Experimental.Distributed python module. + Create the ROOT.RDF.Distributed python module. This module will be injected into the ROOT.RDF namespace. @@ -24,11 +24,11 @@ def _create_distributed_module(parent): parent: The ROOT.RDF namespace. Needed to define __package__. Returns: - types.ModuleType: The ROOT.RDF.Experimental.Distributed submodule. + types.ModuleType: The ROOT.RDF.Distributed submodule. """ import DistRDF - return DistRDF.create_distributed_module(parent) + return DistRDF.create_distributed_module(parent, experimental) def _rungraphs(distrdf_rungraphs, rdf_rungraphs): diff --git a/bindings/pyroot/pythonizations/test/root_module.py b/bindings/pyroot/pythonizations/test/root_module.py index fb66d13054a8a..31e7216ed2a99 100644 --- a/bindings/pyroot/pythonizations/test/root_module.py +++ b/bindings/pyroot/pythonizations/test/root_module.py @@ -97,7 +97,7 @@ def test_import_nested_submodules(self): # if root_module_has("RDF.Experimental.Distributed"): - import ROOT.RDF.Experimental.Distributed + import ROOT.RDF.Distributed if root_module_has("RNTuple"): from ROOT import RNTuple diff --git a/tree/dataframe/src/RDataFrame.cxx b/tree/dataframe/src/RDataFrame.cxx index 1c7435943e92b..de4faa905885d 100644 --- a/tree/dataframe/src/RDataFrame.cxx +++ b/tree/dataframe/src/RDataFrame.cxx @@ -703,11 +703,10 @@ of execution of the operations of nodes in this branch of the computation graph ## Distributed execution RDataFrame applications can be executed in parallel through distributed computing frameworks on a set of remote machines -thanks to the Python package `ROOT.RDF.Experimental.Distributed`. This experimental, **Python-only** package allows to scale the +thanks to the Python package `ROOT.RDF.Distributed`. This **Python-only** package allows to scale the optimized performance RDataFrame can achieve on a single machine to multiple nodes at the same time. It is designed so that different backends can be easily plugged in, currently supporting [Apache Spark](http://spark.apache.org/) and -[Dask](https://dask.org/). To make use of distributed RDataFrame, you only need to switch `ROOT.RDataFrame` with -the backend-specific `RDataFrame` of your choice, for example: +[Dask](https://dask.org/). Here is a minimal example usage of distributed RDataFrame: ~~~{.py} import ROOT @@ -755,8 +754,6 @@ parts of the RDataFrame API currently work with this package. The subset that is with support for more operations coming in the future. Currently, to the supported data sources belong TTree, TChain, RNTuple and RDatasetSpec. -\note The distributed RDataFrame module requires at least Python version 3.8. - ### Connecting to a Spark cluster In order to distribute the RDataFrame workload, you can connect to a Spark cluster you have access to through the @@ -773,11 +770,11 @@ sc = SparkContext(conf=conf) # The Spark RDataFrame constructor accepts an optional "sparkcontext" parameter # and it will distribute the application to the connected cluster -df = RDataFrame("mytree", "myfile.root", executor = sc) +df = ROOT.RDataFrame("mytree", "myfile.root", executor = sc) ~~~ Note that with the usage above the case of `executor = None` is not supported. One -can explicitly create a `ROOT.RDF.Experimental.Distributed.Spark.RDataFrame` object +can explicitly create a `ROOT.RDF.Distributed.Spark.RDataFrame` object in order to get a default instance of [SparkContext](https://spark.apache.org/docs/latest/api/python/reference/api/pyspark.SparkContext.html) in case it is not already provided as argument. @@ -798,13 +795,13 @@ if __name__ == "__main__": client = Client("dask_scheduler.domain.com:8786") # The Dask RDataFrame constructor accepts the Dask Client object as an optional argument - df = RDataFrame("mytree","myfile.root", executor=client) + df = ROOT.RDataFrame("mytree","myfile.root", executor=client) # Proceed as usual df.Define("x","someoperation").Histo1D(("name", "title", 10, 0, 10), "x") ~~~ Note that with the usage above the case of `executor = None` is not supported. One -can explicitly create a `ROOT.RDF.Experimental.Distributed.Dask.RDataFrame` object +can explicitly create a `ROOT.RDF.Distributed.Dask.RDataFrame` object in order to get a default instance of [distributed.Client](http://distributed.dask.org/en/stable/api.html#distributed.Client) in case it is not already provided as argument. This will run multiple processes @@ -937,12 +934,12 @@ of a distributed RDataFrame application. It enables visualizing intermediate results as they are computed across multiple nodes of a Dask cluster by creating a canvas and continuously updating it as partial results become available. -The LiveVisualize() function can be imported from the Python package **ROOT.RDF.Experimental.Distributed**: +The LiveVisualize() function can be imported from the Python package **ROOT.RDF.Distributed**: ~~~{.py} import ROOT -LiveVisualize = ROOT.RDF.Experimental.Distributed.LiveVisualize +LiveVisualize = ROOT.RDF.Distributed.LiveVisualize ~~~ The function takes drawable objects (e.g. histograms) and optional callback functions as argument, it accepts 4 different input formats: @@ -1008,7 +1005,7 @@ ROOT.gInterpreter.AddIncludePath("myheader.hxx") df.Define(...) # Distributed RDF script -ROOT.RDF.Experimental.Distributed.DistributeHeaders("myheader.hxx") +ROOT.RDF.Distributed.DistributeHeaders("myheader.hxx") df.Define(...) ~~~ @@ -1020,7 +1017,7 @@ ROOT.gSystem.Load("my_library.so") df.Define(...) # Distributed RDF script -ROOT.RDF.Experimental.Distributed.DistributeSharedLibs("my_library.so") +ROOT.RDF.Distributed.DistributeSharedLibs("my_library.so") df.Define(...) ~~~ @@ -1034,7 +1031,7 @@ ROOT.gInterpreter.Declare("my_code") df.Define(...) # Distributed RDF script -ROOT.RDF.Experimental.Distributed.DistributeCppCode("my_code") +ROOT.RDF.Distributed.DistributeCppCode("my_code") df.Define(...) ~~~ @@ -1044,7 +1041,7 @@ df.Define(...) # Local RDataFrame script is not applicable here as local RDF application can simply access the external files it needs. # Distributed RDF script -ROOT.RDF.Experimental.Distributed.DistributeFiles("my_file") +ROOT.RDF.Distributed.DistributeFiles("my_file") df.Define(...) ~~~ diff --git a/tutorials/analysis/dataframe/distrdf003_live_visualization.py b/tutorials/analysis/dataframe/distrdf003_live_visualization.py index 7fe24239bd2c8..87e0a2b56bded 100644 --- a/tutorials/analysis/dataframe/distrdf003_live_visualization.py +++ b/tutorials/analysis/dataframe/distrdf003_live_visualization.py @@ -19,10 +19,10 @@ import ROOT # Import the live visualization function -LiveVisualize = ROOT.RDF.Experimental.Distributed.LiveVisualize +LiveVisualize = ROOT.RDF.Distributed.LiveVisualize # Point RDataFrame calls to Dask RDataFrame object -RDataFrame = ROOT.RDF.Experimental.Distributed.Dask.RDataFrame +RDataFrame = ROOT.RDF.Distributed.Dask.RDataFrame # Function to create a Dask cluster and return the client def create_connection(): From 7faff5652df492ead8924a94f5bb6e9326f2a433 Mon Sep 17 00:00:00 2001 From: Vincenzo Eduardo Padulano Date: Thu, 17 Apr 2025 12:45:44 +0200 Subject: [PATCH 06/12] [df] Lint and format distributed __init__.py --- bindings/distrdf/python/DistRDF/__init__.py | 73 +++++++++++++-------- 1 file changed, 46 insertions(+), 27 deletions(-) diff --git a/bindings/distrdf/python/DistRDF/__init__.py b/bindings/distrdf/python/DistRDF/__init__.py index e66d923a02d47..a37223eca9ddf 100644 --- a/bindings/distrdf/python/DistRDF/__init__.py +++ b/bindings/distrdf/python/DistRDF/__init__.py @@ -11,21 +11,18 @@ ################################################################################ from __future__ import annotations +import concurrent.futures import logging +import textwrap import types import warnings -import textwrap - -import concurrent.futures - -from typing import Iterable, TYPE_CHECKING +from typing import TYPE_CHECKING, Iterable from DistRDF.Backends import build_backends_submodules from DistRDF.LiveVisualize import LiveVisualize - if TYPE_CHECKING: - from DistRDF.Proxy import ResultPtrProxy, ResultMapProxy + from DistRDF.Proxy import ResultMapProxy, ResultPtrProxy logger = logging.getLogger(__name__) @@ -48,56 +45,64 @@ def initialize(fun, *args, **kwargs): **kwargs (dict): Keyword arguments used to execute the function. """ from DistRDF.Backends import Base + Base.BaseBackend.register_initialization(fun, *args, **kwargs) + def DistributeCppCode(code_to_declare: str) -> None: """ - Declare the C++ code that has to be processed on each worker. + Declare the C++ code that has to be processed on each worker. Args: codeToDeclare (str): cpp code to be declared on the workers - + """ from DistRDF.Backends import Base + Base.BaseBackend.register_declaration(code_to_declare) - + def DistributeHeaders(paths_to_headers: Iterable[str]): """ - This function allows users to directly load C++ custom headers + This function allows users to directly load C++ custom headers onto the workers. The headers are declared locally first. Args: paths_to_headers (list): list of paths to headers to be distributed to each worker - """ + """ from DistRDF.Backends import Base - Base.BaseBackend.register_headers(paths_to_headers) + + Base.BaseBackend.register_headers(paths_to_headers) + def DistributeFiles(paths_to_files: Iterable[str]): """ This function allows users to directly load arbitrary files - onto the workers. + onto the workers. Args: paths_to_files (list): list of paths to files to be distributed - + """ from DistRDF.Backends import Base + Base.BaseBackend.register_files(paths_to_files) - + def DistributeSharedLibs(paths_to_shared_libraries: Iterable[str]) -> None: """ - This function allows users to directly load pre-compiled shared libraries - onto the workers. The shared libraries are loaded locally first. + This function allows users to directly load pre-compiled shared libraries + onto the workers. The shared libraries are loaded locally first. Args: paths_to_shared_libraries (list): list of paths to shared libraries to be distributed - + """ from DistRDF.Backends import Base + Base.BaseBackend.register_shared_lib(paths_to_shared_libraries) + def RunGraphs(proxies: Iterable) -> int: """ Trigger the execution of multiple RDataFrame computation graphs on a certain @@ -137,6 +142,7 @@ def RunGraphs(proxies: Iterable) -> int: """ # Import here to avoid circular dependencies in main module from DistRDF.Proxy import execute_graph + if not proxies: logger.warning("RunGraphs: Got an empty list of handles, now quitting.") return 0 @@ -152,6 +158,7 @@ def RunGraphs(proxies: Iterable) -> int: return len(uniqueproxies) + def VariationsFor(actionproxy: ResultPtrProxy) -> ResultMapProxy: """ Equivalent of ROOT.RDF.Experimental.VariationsFor in distributed mode. @@ -159,15 +166,17 @@ def VariationsFor(actionproxy: ResultPtrProxy) -> ResultMapProxy: # similar to resPtr.fActionPtr->MakeVariedAction() return actionproxy.create_variations() -def FromSpec(jsonfile : str, *args, **kwargs) -> RDataFrame: + +def FromSpec(jsonfile: str, *args, **kwargs) -> RDataFrame: """ Equivalent of ROOT.RDF.Experimental.FromSpec in distributed mode. - """ + """ import ROOT + spec = ROOT.Internal.RDF.RetrieveSpecFromJson(jsonfile) - + executor = kwargs.get("executor", None) - if executor is None: + if executor is None: raise ValueError( "Missing keyword argument 'executor'. Please provide a connection object " "to one of the schedulers supported by distributed RDataFrame." @@ -175,7 +184,9 @@ def FromSpec(jsonfile : str, *args, **kwargs) -> RDataFrame: # Try to dispatch to the correct distributed scheduler implementation try: from distributed import Client + from DistRDF.Backends.Dask import RDataFrame + if isinstance(executor, Client): return RDataFrame(spec, *args, **kwargs) except ImportError: @@ -183,15 +194,17 @@ def FromSpec(jsonfile : str, *args, **kwargs) -> RDataFrame: try: from pyspark import SparkContext + from DistRDF.Backends.Spark import RDataFrame + if isinstance(executor, SparkContext): return RDataFrame(spec, *args, **kwargs) except ImportError: pass raise TypeError( - f"The client object of type '{type(executor)}' is not a supported " - "connection type for distributed RDataFrame.") + f"The client object of type '{type(executor)}' is not a supported connection type for distributed RDataFrame." + ) class _DeprecatedModule(types.ModuleType): @@ -218,6 +231,7 @@ def __getattribute__(self, name): warnings.warn(msg_warng, FutureWarning) return super().__getattribute__(name) + def create_distributed_module(parentmodule, experimental: bool = False): """ Helper function to create the ROOT.RDF.Distributed module. @@ -252,6 +266,7 @@ def create_distributed_module(parentmodule, experimental: bool = False): return distributed + def RDataFrame(*args, **kwargs): executor = kwargs.get("executor", None) if executor is None: @@ -263,7 +278,9 @@ def RDataFrame(*args, **kwargs): # Try to dispatch to the correct distributed scheduler implementation try: from distributed import Client + from DistRDF.Backends.Dask import RDataFrame + if isinstance(executor, Client): return RDataFrame(*args, **kwargs) except ImportError: @@ -271,12 +288,14 @@ def RDataFrame(*args, **kwargs): try: from pyspark import SparkContext + from DistRDF.Backends.Spark import RDataFrame + if isinstance(executor, SparkContext): return RDataFrame(*args, **kwargs) except ImportError: pass raise TypeError( - f"The client object of type '{type(executor)}' is not a supported " - "connection type for distributed RDataFrame.") + f"The client object of type '{type(executor)}' is not a supported connection type for distributed RDataFrame." + ) From aa645d7ca7be1d1174c504b964c154021e8ac66b Mon Sep 17 00:00:00 2001 From: silverweed Date: Thu, 17 Apr 2025 16:18:00 +0200 Subject: [PATCH 07/12] [base] Move RFloat16.hxx out of ROOT7 --- core/base/CMakeLists.txt | 2 +- core/base/{v7 => }/inc/ROOT/RFloat16.hxx | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename core/base/{v7 => }/inc/ROOT/RFloat16.hxx (100%) diff --git a/core/base/CMakeLists.txt b/core/base/CMakeLists.txt index 3a6ee89386abe..33ad00aae5e05 100644 --- a/core/base/CMakeLists.txt +++ b/core/base/CMakeLists.txt @@ -16,6 +16,7 @@ if(MSVC AND MSVC_VERSION GREATER_EQUAL 1925 AND MSVC_VERSION LESS 1929) endif() set(BASE_HEADERS + ROOT/RFloat16.hxx ROOT/TErrorDefaultHandler.hxx ROOT/TExecutorCRTP.hxx ROOT/TSequentialExecutor.hxx @@ -200,7 +201,6 @@ set(BASE_SOURCES if(root7) set(BASE_HEADER_DIRS inc/ v7/inc/) list(APPEND BASE_HEADERS - ROOT/RFloat16.hxx ROOT/RIndexIter.hxx) endif() diff --git a/core/base/v7/inc/ROOT/RFloat16.hxx b/core/base/inc/ROOT/RFloat16.hxx similarity index 100% rename from core/base/v7/inc/ROOT/RFloat16.hxx rename to core/base/inc/ROOT/RFloat16.hxx From a82fb68b1404f5ffaf4af75c2b378b6925795902 Mon Sep 17 00:00:00 2001 From: Philippe Canal Date: Wed, 9 Apr 2025 17:25:37 -0500 Subject: [PATCH 08/12] meta: Allow global disabling of AutoParsing during TClass::GetClass If TClass.cxx is build with the cpp macro: ROOT_DISABLE_TCLASS_GET_CLASS_AUTOPARSING defined, it will no longer do any auto-parsing during the execution of `TClass::GetClass`. This will result in not being able to find TClass-es when the name requires not-already loaded interpreted information (eg. a typedef to be resolved). Comments include additional possible interfaces to turn on this feature. --- core/meta/src/TClass.cxx | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/core/meta/src/TClass.cxx b/core/meta/src/TClass.cxx index bc34a3e6323e7..13f2db0b37a32 100644 --- a/core/meta/src/TClass.cxx +++ b/core/meta/src/TClass.cxx @@ -3153,6 +3153,21 @@ TClass *TClass::GetClass(const char *name, Bool_t load, Bool_t silent, size_t hi // continue as before ... } + bool disableAutoParsing = gInterpreter->IsAutoParsingSuspended(); + // FIXME: We need to decided on the interface to disable auto-parsing only during TClass::GetClass. +#ifdef ROOT_DISABLE_TCLASS_GET_CLASS_AUTOPARSING + constexpr bool requestDisableAutoLoading = true; +#else + // We could get the user choice from: + // - environment variable ROOT_DISABLE_TCLASS_GET_CLASS_AUTOPARSING + // - rootrc key Root.TClass.GetClass.AutoParsing + // - TClass::SetGetClassAutoParsing + constexpr bool requestDisableAutoLoading = false; +#endif + if (requestDisableAutoLoading) + disableAutoParsing = true; + TInterpreter::SuspendAutoParsing autoparseFence(gInterpreter, disableAutoParsing); + // Note: this variable does not always holds the fully normalized name // as there is information from a not yet loaded library or from header // not yet parsed that may be needed to fully normalize the name. From e7454e8520923d394507571a694121c865a2bd34 Mon Sep 17 00:00:00 2001 From: Philippe Canal Date: Wed, 9 Apr 2025 17:26:39 -0500 Subject: [PATCH 09/12] TCling: Register, give access to and print list of autoparsed class. Use `gInterpreter->Print("autoparsed");` to print a list of the class names that directly lead to auto-parsing. Use `gCling->GetAutoParseClasses()` to programatically get a set of the class names that directly lead to auto-parsing. --- core/metacling/src/TCling.cxx | 29 +++++++++++++++++++++++++++++ core/metacling/src/TCling.h | 3 +++ 2 files changed, 32 insertions(+) diff --git a/core/metacling/src/TCling.cxx b/core/metacling/src/TCling.cxx index 867c6d9386533..9db881ce23343 100644 --- a/core/metacling/src/TCling.cxx +++ b/core/metacling/src/TCling.cxx @@ -2662,6 +2662,29 @@ void TCling::PrintIntro() { } +//////////////////////////////////////////////////////////////////////////////// +/// Print information about the interpreter. +///\param[in] option Selects the type of information to print. +/// +/// List of currently support options: +/// - autoparsed: Print the list of classes that triggered autoparsing. +void TCling::Print(Option_t *option) const +{ + if (option && *option) { + if (!strcmp(option, "autoparsed")) { + std::cout << "Auto parsed classes:" << std::endl; + for (auto & cls : fAutoParseClasses) { + std::cout << " " << cls << std::endl; + } + } else { + ::Error("TCling::Print", "Unknown option '%s'", option); + } + } else { + ::Info("TCling::Print", "No options specified"); + } +} + + //////////////////////////////////////////////////////////////////////////////// /// \brief Add a directory to the list of directories in which the /// interpreter looks for include files. @@ -6536,6 +6559,12 @@ UInt_t TCling::AutoParseImplRecurse(const char *cls, bool topLevel) } } + if (nHheadersParsed) { + // Register that we did autoparsing for this class. + fAutoParseClasses.insert(cls); + if (gDebug) + Info("AutoParse", "Parsed %d headers for %s", nHheadersParsed, cls); + } return nHheadersParsed; } diff --git a/core/metacling/src/TCling.h b/core/metacling/src/TCling.h index 3fafddd266049..3d037f2f06876 100644 --- a/core/metacling/src/TCling.h +++ b/core/metacling/src/TCling.h @@ -121,6 +121,7 @@ class TCling final : public TInterpreter { std::set fLookedUpClasses; // Set of classes for which headers were looked up already std::set fPayloads; // Set of payloads std::set fParsedPayloadsAddresses; // Set of payloads which were parsed + std::set fAutoParseClasses; // Set of classes for which we autoparsed a header std::hash fStringHashFunction; // A simple hashing function std::unordered_set fNSFromRootmaps; // Collection of namespaces fwd declared in the rootmaps TObjArray* fRootmapFiles; // Loaded rootmap files. @@ -200,6 +201,7 @@ class TCling final : public TInterpreter { Int_t AutoLoad(const char *classname, Bool_t knowDictNotLoaded = kFALSE) final; Int_t AutoLoad(const std::type_info& typeinfo, Bool_t knowDictNotLoaded = kFALSE) final; Int_t AutoParse(const char* cls) final; + const std::set& GetAutoParseClasses() const { return fAutoParseClasses; } void* LazyFunctionCreatorAutoload(const std::string& mangled_name); bool LibraryLoadingFailed(const std::string&, const std::string&, bool, bool); Bool_t IsAutoLoadNamespaceCandidate(const clang::NamespaceDecl* nsDecl); @@ -240,6 +242,7 @@ class TCling final : public TInterpreter { Longptr_t ProcessLineAsynch(const char* line, EErrorCode* error = nullptr); Longptr_t ProcessLineSynch(const char* line, EErrorCode* error = nullptr) final; void PrintIntro() final; + void Print(Option_t *option="") const final; bool RegisterPrebuiltModulePath(const std::string& FullPath, const std::string& ModuleMapName = "module.modulemap") const final; void RegisterModule(const char* modulename, From 3067cfee7b57a222af6abc05e0f5e7db0a0dc139 Mon Sep 17 00:00:00 2001 From: Philippe Canal Date: Fri, 11 Apr 2025 12:19:23 -0500 Subject: [PATCH 10/12] Remove use of obsolete gccxmlpath --- cmake/modules/RootMacros.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/modules/RootMacros.cmake b/cmake/modules/RootMacros.cmake index f8f02be27b490..34a7459b95da5 100644 --- a/cmake/modules/RootMacros.cmake +++ b/cmake/modules/RootMacros.cmake @@ -187,7 +187,7 @@ function(REFLEX_GENERATE_DICTIONARY dictionary) OUTPUT ${gensrcdict} ${rootmapname} COMMAND ${ROOT_genreflex_CMD} ARGS ${headerfiles} -o ${gensrcdict} ${rootmapopts} --select=${selectionfile} - --gccxmlpath=${GCCXML_home}/bin ${ARG_OPTIONS} + ${ARG_OPTIONS} "-I$>,;-I>" "$<$>:-D$>" DEPENDS ${headerfiles} ${selectionfile} ${ARG_DEPENDS} From 885f558353353b4524135923b99da75f106ac96a Mon Sep 17 00:00:00 2001 From: Philippe Canal Date: Mon, 14 Apr 2025 15:55:05 -0500 Subject: [PATCH 11/12] [meta] Add `Root.TClass.GetClass.AutoParsing` rootrc key. This allows to disable auto-parsing during `TClass::GetClass` for debugging purposes. --- config/rootrc.in | 8 ++++++++ core/meta/src/TClass.cxx | 7 ++----- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/config/rootrc.in b/config/rootrc.in index f1bfd4c775200..0b57c1808fc45 100644 --- a/config/rootrc.in +++ b/config/rootrc.in @@ -644,3 +644,11 @@ Rint.Canvas.HighLightColor: 5 # 1 All Branches (default) # Can be overridden by the environment variable ROOT_TTREECACHE_PREFILL # TTreeCache.Prefill: 1 + +# Advanced Debug Settings +# Setting Root.TClass.GetClass.AutoParsing to false +# will disable any auto-parsing execution of `TClass::GetClass`. This will +# result in not being able to find TClass-es when the name requires not-already +# loaded interpreted information (eg. a typedef to be resolved). +# +# Root.TClass.GetClass.AutoParsing: true diff --git a/core/meta/src/TClass.cxx b/core/meta/src/TClass.cxx index 13f2db0b37a32..e66d4211d9f32 100644 --- a/core/meta/src/TClass.cxx +++ b/core/meta/src/TClass.cxx @@ -55,6 +55,7 @@ In order to access the name of a class within the ROOT type system, the method T #include "TDataType.h" #include "TDatime.h" #include "TEnum.h" +#include "TEnv.h" #include "TError.h" #include "TExMap.h" #include "TFunctionTemplate.h" @@ -3158,11 +3159,7 @@ TClass *TClass::GetClass(const char *name, Bool_t load, Bool_t silent, size_t hi #ifdef ROOT_DISABLE_TCLASS_GET_CLASS_AUTOPARSING constexpr bool requestDisableAutoLoading = true; #else - // We could get the user choice from: - // - environment variable ROOT_DISABLE_TCLASS_GET_CLASS_AUTOPARSING - // - rootrc key Root.TClass.GetClass.AutoParsing - // - TClass::SetGetClassAutoParsing - constexpr bool requestDisableAutoLoading = false; + static const bool requestDisableAutoLoading = !gEnv->GetValue("Root.TClass.GetClass.AutoParsing", true); #endif if (requestDisableAutoLoading) disableAutoParsing = true; From 214f51314e026ae40b0a677039d7f22e2fc94e04 Mon Sep 17 00:00:00 2001 From: Philippe Canal Date: Mon, 14 Apr 2025 17:19:07 -0500 Subject: [PATCH 12/12] TCling: Register and print list of autoloaded libraries. Use `gInterpreter->Print(autoloaded);` to print a list of the libraries that have been automaticaly loaded during TClass::GetClass and due to a symbol requested during code interpretation. --- core/base/src/TROOT.cxx | 3 +++ core/meta/inc/TInterpreter.h | 1 + core/metacling/src/TCling.cxx | 14 ++++++++++++++ core/metacling/src/TCling.h | 3 +++ 4 files changed, 21 insertions(+) diff --git a/core/base/src/TROOT.cxx b/core/base/src/TROOT.cxx index b24925bf966e9..5760dc3438b9e 100644 --- a/core/base/src/TROOT.cxx +++ b/core/base/src/TROOT.cxx @@ -2202,6 +2202,9 @@ Int_t TROOT::LoadClass(const char * /*classname*/, const char *libname, // TSystem::Load returns 1 when the library was already loaded, return success in this case. if (err == 1) err = 0; + if (err == 0) + // Register the Autoloading of the library + gCling->RegisterAutoLoadedLibrary(libname); return err; } } else { diff --git a/core/meta/inc/TInterpreter.h b/core/meta/inc/TInterpreter.h index 59f2a09b91039..94980fdb0a7fc 100644 --- a/core/meta/inc/TInterpreter.h +++ b/core/meta/inc/TInterpreter.h @@ -194,6 +194,7 @@ class TInterpreter : public TNamed { virtual void AddAvailableIndentifiers(TSeqCollection&) = 0; virtual void RegisterTClassUpdate(TClass *oldcl,DictFuncPtr_t dict) = 0; virtual void UnRegisterTClassUpdate(const TClass *oldcl) = 0; + virtual void RegisterAutoLoadedLibrary(const char *libname) = 0; virtual Int_t SetClassSharedLibs(const char *cls, const char *libs) = 0; virtual void SetGetline(const char*(*getlineFunc)(const char* prompt), void (*histaddFunc)(const char* line)) = 0; diff --git a/core/metacling/src/TCling.cxx b/core/metacling/src/TCling.cxx index 9db881ce23343..991b52cda289b 100644 --- a/core/metacling/src/TCling.cxx +++ b/core/metacling/src/TCling.cxx @@ -2676,6 +2676,11 @@ void TCling::Print(Option_t *option) const for (auto & cls : fAutoParseClasses) { std::cout << " " << cls << std::endl; } + } else if (!strcmp(option, "autoloaded")) { + std::cout << "Auto loaded libraries:" << std::endl; + for (auto & lib : fAutoLoadedLibraries) { + std::cout << " " << lib << std::endl; + } } else { ::Error("TCling::Print", "Unknown option '%s'", option); } @@ -3455,6 +3460,14 @@ static bool StartsWithStrLit(const char *haystack, const char (&needle)[N]) { } } +//////////////////////////////////////////////////////////////////////////////// +/// Register that a library was autoloaded either to provide a 'missing' symbol +/// or to provide a class (see TClass::GetClass and TROOT::LoadClass). +void TCling::RegisterAutoLoadedLibrary(const char *libname) +{ + fAutoLoadedLibraries.insert(libname); +} + //////////////////////////////////////////////////////////////////////////////// /// Register a new shared library name with the interpreter; add it to /// fSharedLibs. @@ -6671,6 +6684,7 @@ void* TCling::LazyFunctionCreatorAutoload(const std::string& mangled_name) { if (!LibLoader(libName)) return nullptr; + fAutoLoadedLibraries.insert(libName); return llvm::sys::DynamicLibrary::SearchForAddressOfSymbol(dlsym_mangled_name); } diff --git a/core/metacling/src/TCling.h b/core/metacling/src/TCling.h index 3d037f2f06876..5f9ee0b43bee7 100644 --- a/core/metacling/src/TCling.h +++ b/core/metacling/src/TCling.h @@ -122,6 +122,7 @@ class TCling final : public TInterpreter { std::set fPayloads; // Set of payloads std::set fParsedPayloadsAddresses; // Set of payloads which were parsed std::set fAutoParseClasses; // Set of classes for which we autoparsed a header + std::set fAutoLoadedLibraries; // Set of libraries that were autoloaded std::hash fStringHashFunction; // A simple hashing function std::unordered_set fNSFromRootmaps; // Collection of namespaces fwd declared in the rootmaps TObjArray* fRootmapFiles; // Loaded rootmap files. @@ -259,6 +260,8 @@ class TCling final : public TInterpreter { void RegisterTClassUpdate(TClass *oldcl,DictFuncPtr_t dict) final; void UnRegisterTClassUpdate(const TClass *oldcl) final; + void RegisterAutoLoadedLibrary(const char *libname) final; + Int_t SetClassSharedLibs(const char *cls, const char *libs) final; void SetGetline(const char * (*getlineFunc)(const char* prompt), void (*histaddFunc)(const char* line)) final;