Skip to content

meta: Allow global disabling of AutoParsing during TClass::GetClass #222

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 12 commits into
base: cms/master/96292fedbc
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions README/ReleaseNotes/v636/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ The following people have contributed to this new version:
Manuel Tobias Schiller, University of Glasgow,\
Surya Somayyajula, UMass Amherst,\
Petr Stepanov, @petrstepanov,\
Silia Taider, CERN/EP-SFT,\
Dongliang Zhang, University of Science and Technology of China

## Deprecation and Removal
Expand All @@ -46,6 +47,14 @@ The following people have contributed to this new version:

## Python Interface

### UHI
* ROOT histograms now comply with the [Unified Histogram Interface (UHI)](https://uhi.readthedocs.io/en/latest/index.html) specification, enhancing interoperability with other UHI-compatible libraries and standardizing histogram operations.
The following features were added:
* Implemented the UHI `PlottableHistogram` protocol enabling ROOT histograms to be plotted by any library supporting `PlottableHistogram` objects.
* Introduced UHI-style indexing for access and setting bin values.
* Introduced UHI-style slicing for selecting histogram ranges.
* Implemented the `ROOT.uhi.loc`, `ROOT.uhi.underflow`, `ROOT.uhi.overflow`, `ROOT.uhi.rebin`, and `ROOT.uhi.sum` tags.

## RDataFrame
- When running multiple computation graphs run concurrently using [`RunGraphs()`](https://root.cern/doc/master/namespaceROOT_1_1RDF.html#a526d77d018bf69462d736bbdd1a695c4),
the pool of slot numbers that a thread can pick from is now shared across all graphs. This enables use cases where a single resource, which may be expensive to create or copy,
Expand Down
2 changes: 1 addition & 1 deletion bindings/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ if(pyroot)

if(dataframe)
message(STATUS "Distributed RDataFrame enabled")
add_subdirectory(experimental/distrdf)
add_subdirectory(distrdf)
else()
message(STATUS "Requirements to enable distributed RDataFrame:")
message(STATUS " dataframe: required:ON, actual:${dataframe}")
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 1995-2023, Rene Brun and Fons Rademakers.
# Copyright (C) 1995-2025, Rene Brun and Fons Rademakers.
# All rights reserved.
#
# For the licensing terms see $ROOTSYS/LICENSE.
Expand All @@ -14,6 +14,7 @@ set(py_sources
DistRDF/ComputationGraphGenerator.py
DistRDF/DataFrame.py
DistRDF/HeadNode.py
DistRDF/LiveVisualize.py
DistRDF/Node.py
DistRDF/Operation.py
DistRDF/Proxy.py
Expand All @@ -22,11 +23,10 @@ set(py_sources
DistRDF/Backends/__init__.py
DistRDF/Backends/Base.py
DistRDF/Backends/Utils.py
DistRDF/Backends/Spark/__init__.py
DistRDF/Backends/Spark/Backend.py
DistRDF/Backends/Dask/__init__.py
DistRDF/Backends/Dask/Backend.py
DistRDF/LiveVisualize.py
DistRDF/Backends/Spark/__init__.py
DistRDF/Backends/Spark/Backend.py
)

# Add custom rules to copy the Python sources into the build directory
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@ def build_backends_submodules(parentmodule: types.ModuleType) -> types.ModuleTyp
# The actual python package with the backend implementation
actual = importlib.import_module(__name__ + "." + module_name)
# A dummy module to inject in the parent module
fullmodulename = "ROOT.RDF.Experimental.Distributed." + module_name
fullmodulename = "ROOT.RDF.Distributed." + module_name
dummy = types.ModuleType(fullmodulename)

# PEP302 attributes
dummy.__file__ = "<module ROOT.RDF.Experimental.Distributed>"
dummy.__file__ = "<module ROOT.RDF.Distributed>"
# dummy.__name__ is the constructor argument
dummy.__path__ = [] # this makes it a package
# dummy.__loader__ is not defined
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -841,7 +841,7 @@ def build_rdf_from_range(current_range: Ranges.get_ntuple_ranges) -> TaskObjects
if not filenames:
return TaskObjects(None, None)

return TaskObjects(ROOT.RDF.Experimental.FromRNTuple(ntuplename, filenames), None)
return TaskObjects(ROOT.RDF.FromRNTuple(ntuplename, filenames), None)

return build_rdf_from_range

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,18 @@
################################################################################
from __future__ import annotations

import concurrent.futures
import logging
import os
import textwrap
import types

import concurrent.futures

from typing import Iterable, TYPE_CHECKING
import warnings
from typing import TYPE_CHECKING, Iterable

from DistRDF.Backends import build_backends_submodules
from DistRDF.LiveVisualize import LiveVisualize


if TYPE_CHECKING:
from DistRDF.Proxy import ResultPtrProxy, ResultMapProxy
from DistRDF.Proxy import ResultMapProxy, ResultPtrProxy

logger = logging.getLogger(__name__)

Expand All @@ -47,56 +45,64 @@ def initialize(fun, *args, **kwargs):
**kwargs (dict): Keyword arguments used to execute the function.
"""
from DistRDF.Backends import Base

Base.BaseBackend.register_initialization(fun, *args, **kwargs)


def DistributeCppCode(code_to_declare: str) -> None:
"""
Declare the C++ code that has to be processed on each worker.
Declare the C++ code that has to be processed on each worker.
Args:
codeToDeclare (str): cpp code to be declared on the workers

"""
from DistRDF.Backends import Base

Base.BaseBackend.register_declaration(code_to_declare)


def DistributeHeaders(paths_to_headers: Iterable[str]):
"""
This function allows users to directly load C++ custom headers
This function allows users to directly load C++ custom headers
onto the workers. The headers are declared locally first.

Args:
paths_to_headers (list): list of paths to headers to be distributed to each worker

"""
"""
from DistRDF.Backends import Base
Base.BaseBackend.register_headers(paths_to_headers)

Base.BaseBackend.register_headers(paths_to_headers)


def DistributeFiles(paths_to_files: Iterable[str]):
"""
This function allows users to directly load arbitrary files
onto the workers.
onto the workers.

Args:
paths_to_files (list): list of paths to files to be distributed

"""
from DistRDF.Backends import Base

Base.BaseBackend.register_files(paths_to_files)


def DistributeSharedLibs(paths_to_shared_libraries: Iterable[str]) -> None:
"""
This function allows users to directly load pre-compiled shared libraries
onto the workers. The shared libraries are loaded locally first.
This function allows users to directly load pre-compiled shared libraries
onto the workers. The shared libraries are loaded locally first.

Args:
paths_to_shared_libraries (list): list of paths to shared libraries to be distributed

"""
from DistRDF.Backends import Base

Base.BaseBackend.register_shared_lib(paths_to_shared_libraries)


def RunGraphs(proxies: Iterable) -> int:
"""
Trigger the execution of multiple RDataFrame computation graphs on a certain
Expand Down Expand Up @@ -136,6 +142,7 @@ def RunGraphs(proxies: Iterable) -> int:
"""
# Import here to avoid circular dependencies in main module
from DistRDF.Proxy import execute_graph

if not proxies:
logger.warning("RunGraphs: Got an empty list of handles, now quitting.")
return 0
Expand All @@ -151,58 +158,91 @@ def RunGraphs(proxies: Iterable) -> int:

return len(uniqueproxies)


def VariationsFor(actionproxy: ResultPtrProxy) -> ResultMapProxy:
"""
Equivalent of ROOT.RDF.Experimental.VariationsFor in distributed mode.
"""
# similar to resPtr.fActionPtr->MakeVariedAction()
return actionproxy.create_variations()

def FromSpec(jsonfile : str, *args, **kwargs) -> RDataFrame:

def FromSpec(jsonfile: str, *args, **kwargs) -> RDataFrame:
"""
Equivalent of ROOT.RDF.Experimental.FromSpec in distributed mode.
"""
"""
import ROOT

spec = ROOT.Internal.RDF.RetrieveSpecFromJson(jsonfile)

executor = kwargs.get("executor", None)
if executor is None:
if executor is None:
raise ValueError(
"Missing keyword argument 'executor'. Please provide a connection object "
"to one of the schedulers supported by distributed RDataFrame."
)
# Try to dispatch to the correct distributed scheduler implementation
try:
from distributed import Client

from DistRDF.Backends.Dask import RDataFrame

if isinstance(executor, Client):
return RDataFrame(spec, *args, **kwargs)
except ImportError:
pass

try:
from pyspark import SparkContext

from DistRDF.Backends.Spark import RDataFrame

if isinstance(executor, SparkContext):
return RDataFrame(spec, *args, **kwargs)
except ImportError:
pass

raise TypeError(
f"The client object of type '{type(executor)}' is not a supported "
"connection type for distributed RDataFrame.")
f"The client object of type '{type(executor)}' is not a supported connection type for distributed RDataFrame."
)


class _DeprecatedModule(types.ModuleType):
"""A simple module type to raise a warning before usage."""

def __getattribute__(self, name):
msg_warng = textwrap.dedent(
"""
In ROOT 6.36, the ROOT.RDF.Experimental.Distributed module has become just ROOT.RDF.Distributed. ROOT 6.38
will remove the 'Experimental' keyword completely, so it is suggested to move to the stable API in user
code. You can now change lines such as:
```
connection = ... # your distributed Dask client or SparkContext
RDataFrame = ROOT.RDF.Experimental.Distributed.[Backend].RDataFrame
df = RDataFrame(..., [daskclient,sparkcontext] = connection)
```
to simply:
```
connection = ... # your distributed Dask client or SparkContext
df = ROOT.RDataFrame(..., executor = connection)
```
"""
)
warnings.warn(msg_warng, FutureWarning)
return super().__getattribute__(name)


def create_distributed_module(parentmodule):
def create_distributed_module(parentmodule, experimental: bool = False):
"""
Helper function to create the ROOT.RDF.Experimental.Distributed module.
Helper function to create the ROOT.RDF.Distributed module.

Users will see this module as the entry point of functions to create and
run an RDataFrame computation distributedly.
"""
distributed = types.ModuleType("ROOT.RDF.Experimental.Distributed")
distributed = types.ModuleType("ROOT.RDF.Distributed")

# PEP302 attributes
distributed.__file__ = "<module ROOT.RDF.Experimental>"
distributed.__file__ = "<module ROOT.RDF>"
# distributed.__name__ is the constructor argument
distributed.__path__ = [] # this makes it a package
# distributed.__loader__ is not defined
Expand All @@ -220,9 +260,13 @@ def create_distributed_module(parentmodule):
distributed.DistributeSharedLibs = DistributeSharedLibs
distributed.DistributeCppCode = DistributeCppCode
distributed.FromSpec = FromSpec


if experimental:
distributed.__class__ = _DeprecatedModule

return distributed


def RDataFrame(*args, **kwargs):
executor = kwargs.get("executor", None)
if executor is None:
Expand All @@ -234,20 +278,24 @@ def RDataFrame(*args, **kwargs):
# Try to dispatch to the correct distributed scheduler implementation
try:
from distributed import Client

from DistRDF.Backends.Dask import RDataFrame

if isinstance(executor, Client):
return RDataFrame(*args, **kwargs)
except ImportError:
pass

try:
from pyspark import SparkContext

from DistRDF.Backends.Spark import RDataFrame

if isinstance(executor, SparkContext):
return RDataFrame(*args, **kwargs)
except ImportError:
pass

raise TypeError(
f"The client object of type '{type(executor)}' is not a supported "
"connection type for distributed RDataFrame.")
f"The client object of type '{type(executor)}' is not a supported connection type for distributed RDataFrame."
)
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
#ifndef HEADER_1
#define HEADER_1

bool f(int x) {
return true;
bool f(int x)
{
return true;
}

#endif
16 changes: 16 additions & 0 deletions bindings/distrdf/test/backend/test_headers/header2.hxx
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#include <string>

#ifndef HEADER_2
#define HEADER_2

int f1(int x)
{
return x;
}

std::string f2(std::string s)
{
return s;
}

#endif
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
#ifndef HEADER_3
#define HEADER_3

bool a(int x) {
return true;
bool a(int x)
{
return true;
}

#endif
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
#ifndef HEADER_4
#define HEADER_4

bool b(int x) {
return true;
bool b(int x)
{
return true;
}

#endif
Loading