Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
25aca25
Run cudf-polars conda unit test with more than 1 process
mroeschke Sep 16, 2025
816a133
Merge remote-tracking branch 'upstream/branch-25.10' into ci/cudf_pol…
mroeschke Sep 16, 2025
534883e
Use a dask_cluster session fixture instead of session_start/end confi…
mroeschke Sep 16, 2025
f09d147
Merge remote-tracking branch 'upstream/branch-25.10' into ci/cudf_pol…
mroeschke Sep 17, 2025
a1458cf
Use less processes for OOMs
mroeschke Sep 17, 2025
fa9a77f
Move autouse fixture to experimental conftest.py, allocate less data …
mroeschke Sep 17, 2025
ba0179b
Merge remote-tracking branch 'upstream/branch-25.10' into ci/cudf_pol…
mroeschke Sep 17, 2025
bb33390
Allocate less memory in test_dask_serialization_roundtrip
mroeschke Sep 17, 2025
6df7cf9
Merge remote-tracking branch 'upstream/branch-25.10' into ci/cudf_pol…
mroeschke Sep 17, 2025
5e37c93
Undo test changes to test_dask_serialize
mroeschke Sep 17, 2025
bdc912a
Try only 2 processes?
mroeschke Sep 17, 2025
f3372ad
Try function scoping to cleanup after tests
mroeschke Sep 18, 2025
ad26cd3
Merge remote-tracking branch 'upstream/branch-25.10' into ci/cudf_pol…
mroeschke Sep 18, 2025
e5c7b38
Merge remote-tracking branch 'upstream/branch-25.10' into ci/cudf_pol…
mroeschke Sep 18, 2025
399a730
Merge remote-tracking branch 'upstream/branch-25.10' into ci/cudf_pol…
mroeschke Sep 19, 2025
017bf31
Disable some jobs to isolate polars conda jobs
mroeschke Sep 19, 2025
187c946
Comment out also on pr builder
mroeschke Sep 19, 2025
59d144d
try hardcoding free memory for OOMs
mroeschke Sep 19, 2025
29b3b41
Add fixture to cleare MR cache
TomAugspurger Sep 24, 2025
4c51a08
Undo limiting free_memory initial pool size, try 8 processes
mroeschke Sep 25, 2025
840d206
Revert "Comment out also on pr builder"
mroeschke Sep 25, 2025
2e99088
Revert "Disable some jobs to isolate polars conda jobs"
mroeschke Sep 25, 2025
5d8c909
Merge remote-tracking branch 'upstream/branch-25.12' into ci/cudf_pol…
mroeschke Sep 25, 2025
814a8ac
Try using 6 processes
mroeschke Sep 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions ci/test_python_other.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,10 @@ rapids-logger "pytest custreamz"
--cov-report=xml:"${RAPIDS_COVERAGE_DIR}/custreamz-coverage.xml" \
--cov-report=term

# Note that cudf-polars uses rmm.mr.CudaAsyncMemoryResource() which allocates
# half the available memory. This doesn't play well with multiple workers, so
# we keep --numprocesses=1 for now. This should be resolved by
# https://github.com/rapidsai/cudf/issues/16723.
rapids-logger "pytest cudf-polars"
./ci/run_cudf_polars_pytests.sh \
--junitxml="${RAPIDS_TESTS_DIR}/junit-cudf-polars.xml" \
--numprocesses=1 \
--numprocesses=6 \
--dist=worksteal \
--cov-config=./pyproject.toml \
--cov=cudf_polars \
Expand Down
48 changes: 15 additions & 33 deletions python/cudf_polars/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,30 @@
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations

import os

import pytest

DISTRIBUTED_CLUSTER_KEY = pytest.StashKey[dict]()
import cudf_polars.callback


@pytest.fixture(params=[False, True], ids=["no_nulls", "nulls"], scope="session")
def with_nulls(request):
return request.param


@pytest.fixture
def clear_memory_resource_cache():
"""
Clear the cudf_polars.callback.default_memory_resource cache before and after a test.

This function caches memory resources for the duration of the process. Any test that
creates a pool (e.g. ``CudaAsyncMemoryResource``) should use this fixture to ensure that
the pool is freed after the test.
"""
cudf_polars.callback.default_memory_resource.cache_clear()
yield
cudf_polars.callback.default_memory_resource.cache_clear()


def pytest_addoption(parser):
parser.addoption(
"--executor",
Expand Down Expand Up @@ -57,33 +69,3 @@ def pytest_configure(config):
cudf_polars.testing.asserts.DEFAULT_BLOCKSIZE_MODE = config.getoption(
"--blocksize-mode"
)


def pytest_sessionstart(session):
if (
session.config.getoption("--scheduler") == "distributed"
and session.config.getoption("--executor") == "streaming"
):
from dask import config
from dask.distributed import Client
from dask_cuda import LocalCUDACluster

# Avoid "Sending large graph of size ..." warnings
# (We expect these for tests using literal/random arrays)
config.set({"distributed.admin.large-graph-warning-threshold": "20MB"})

n_workers = int(os.environ.get("CUDF_POLARS_NUM_WORKERS", "1"))
cluster = LocalCUDACluster(n_workers=n_workers)
client = Client(cluster)
session.stash[DISTRIBUTED_CLUSTER_KEY] = {"cluster": cluster, "client": client}


def pytest_sessionfinish(session):
if DISTRIBUTED_CLUSTER_KEY in session.stash:
cluster_info = session.stash[DISTRIBUTED_CLUSTER_KEY]
client = cluster_info.get("client")
cluster = cluster_info.get("cluster")
if client is not None:
client.shutdown()
if cluster is not None:
cluster.close()
44 changes: 44 additions & 0 deletions python/cudf_polars/tests/experimental/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations

import os

import pytest


@pytest.fixture(autouse=True)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I put this fixture in the experimental directory since currently these are the only tests (with the CI script) that purposefully test with the distributed executor. Is that OK @TomAugspurger given your thoughts on reorganizing the cudf_polars test suite in the future?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, that sounds good to me.

def dask_cluster(pytestconfig, worker_id):
if (
pytestconfig.getoption("--scheduler") == "distributed"
and pytestconfig.getoption("--executor") == "streaming"
):
worker_count = int(os.environ.get("PYTEST_XDIST_WORKER_COUNT", "0"))
from dask import config
from dask_cuda import LocalCUDACluster

# Avoid "Sending large graph of size ..." warnings
# (We expect these for tests using literal/random arrays)
config.set({"distributed.admin.large-graph-warning-threshold": "20MB"})
if worker_count > 0:
# Avoid port conflicts with multiple test runners
worker_index = int(worker_id.removeprefix("gw"))
scheduler_port = 8800 + worker_index
dashboard_address = 8900 + worker_index
else:
scheduler_port = None
dashboard_address = None

n_workers = int(os.environ.get("CUDF_POLARS_NUM_WORKERS", "1"))

with (
LocalCUDACluster(
n_workers=n_workers,
scheduler_port=scheduler_port,
dashboard_address=dashboard_address,
) as cluster,
cluster.get_client(),
):
yield
else:
yield
1 change: 1 addition & 0 deletions python/cudf_polars/tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ def test_invalid_memory_resource_raises(mr):
reason="managed memory not supported",
)
@pytest.mark.parametrize("enable_managed_memory", ["1", "0"])
@pytest.mark.usefixtures("clear_memory_resource_cache")
def test_cudf_polars_enable_disable_managed_memory(monkeypatch, enable_managed_memory):
q = pl.LazyFrame({"a": [1, 2, 3]})

Expand Down