Skip to content

Commit 319a372

Browse files
committed
merge with main for ruff
1 parent fd71ced commit 319a372

File tree

2 files changed

+45
-4
lines changed

2 files changed

+45
-4
lines changed

cuda_core/cuda/core/experimental/_device.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from cuda import cuda, cudart
99
from cuda.core.experimental._context import Context, ContextOptions
10-
from cuda.core.experimental._memory import Buffer, MemoryResource, _DefaultAsyncMempool
10+
from cuda.core.experimental._memory import Buffer, MemoryResource, _AsyncMemoryResource, _DefaultAsyncMempool
1111
from cuda.core.experimental._stream import Stream, StreamOptions, default_stream
1212
from cuda.core.experimental._utils import ComputeCapability, CUDAError, handle_return, precondition
1313

@@ -62,15 +62,21 @@ def __new__(cls, device_id=None):
6262
for dev_id in range(total):
6363
dev = super().__new__(cls)
6464
dev._id = dev_id
65-
dev._mr = _DefaultAsyncMempool(dev_id)
65+
# If the device is in TCC mode, or does not support memory pools for some other reason,
66+
# use the AsyncMemoryResource which does not use memory pools.
67+
if (handle_return(cudart.cudaGetDeviceProperties(dev_id))).memoryPoolsSupported == 0:
68+
dev._mr = _AsyncMemoryResource(dev_id)
69+
else:
70+
dev._mr = _DefaultAsyncMempool(dev_id)
71+
6672
dev._has_inited = False
6773
_tls.devices.append(dev)
6874

6975
return _tls.devices[device_id]
7076

7177
def _check_context_initialized(self, *args, **kwargs):
7278
if not self._has_inited:
73-
raise CUDAError("the device is not yet initialized, perhaps you forgot to call .set_current() first?")
79+
raise CUDAError("the device is not yet initialized, " "perhaps you forgot to call .set_current() first?")
7480

7581
@property
7682
def device_id(self) -> int:

cuda_core/cuda/core/experimental/_memory.py

+36-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,11 @@ class Buffer:
4242
"""
4343

4444
# TODO: handle ownership? (_mr could be None)
45-
__slots__ = ("_ptr", "_size", "_mr")
45+
__slots__ = (
46+
"_ptr",
47+
"_size",
48+
"_mr",
49+
)
4650

4751
def __init__(self, ptr, size, mr: MemoryResource = None):
4852
self._ptr = ptr
@@ -286,3 +290,34 @@ def is_host_accessible(self) -> bool:
286290
@property
287291
def device_id(self) -> int:
288292
raise RuntimeError("the pinned memory resource is not bound to any GPU")
293+
294+
295+
class _AsyncMemoryResource(MemoryResource):
296+
__slots__ = ("_dev_id",)
297+
298+
def __init__(self, dev_id):
299+
self._handle = None
300+
self._dev_id = dev_id
301+
302+
def allocate(self, size, stream=None) -> Buffer:
303+
if stream is None:
304+
stream = default_stream()
305+
ptr = handle_return(cuda.cuMemAllocAsync(size, stream._handle))
306+
return Buffer(ptr, size, self)
307+
308+
def deallocate(self, ptr, size, stream=None):
309+
if stream is None:
310+
stream = default_stream()
311+
handle_return(cuda.cuMemFreeAsync(ptr, stream._handle))
312+
313+
@property
314+
def is_device_accessible(self) -> bool:
315+
return True
316+
317+
@property
318+
def is_host_accessible(self) -> bool:
319+
return False
320+
321+
@property
322+
def device_id(self) -> int:
323+
return self._dev_id

0 commit comments

Comments
 (0)