diff --git a/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in b/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in index d102901cf..2aed45e64 100644 --- a/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in +++ b/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in @@ -9,13 +9,12 @@ # This code was automatically generated with version 12.9.0. Do not modify it directly. {{if 'Windows' == platform.system()}} import os -import site -import struct import win32api -from pywintypes import error {{else}} cimport cuda.bindings._lib.dlfcn as dlfcn +from libc.stdint cimport uintptr_t {{endif}} +from cuda.bindings import path_finder from libc.stdint cimport intptr_t @@ -48,65 +47,18 @@ cdef bint __cuPythonInit = False {{if 'nvrtcSetFlowCallback' in found_functions}}cdef void *__nvrtcSetFlowCallback = NULL{{endif}} cdef int cuPythonInit() except -1 nogil: + {{if 'Windows' != platform.system()}} + cdef void* handle = NULL + {{endif}} + global __cuPythonInit if __cuPythonInit: return 0 __cuPythonInit = True - # Load library - {{if 'Windows' == platform.system()}} - with gil: - # First check if the DLL has been loaded by 3rd parties - try: - handle = win32api.GetModuleHandle("nvrtc64_120_0.dll") - except: - handle = None - - # Check if DLLs can be found within pip installations - if not handle: - LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000 - LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100 - site_packages = [site.getusersitepackages()] + site.getsitepackages() - for sp in site_packages: - mod_path = os.path.join(sp, "nvidia", "cuda_nvrtc", "bin") - if os.path.isdir(mod_path): - os.add_dll_directory(mod_path) - try: - handle = win32api.LoadLibraryEx( - # Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path... - os.path.join(mod_path, "nvrtc64_120_0.dll"), - 0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR) - - # Note: nvrtc64_120_0.dll calls into nvrtc-builtins64_*.dll which is - # located in the same mod_path. - # Update PATH environ so that the two dlls can find each other - os.environ["PATH"] = os.pathsep.join((os.environ.get("PATH", ""), mod_path)) - except: - pass - else: - break - else: - # Else try default search - # Only reached if DLL wasn't found in any site-package path - LOAD_LIBRARY_SAFE_CURRENT_DIRS = 0x00002000 - try: - handle = win32api.LoadLibraryEx("nvrtc64_120_0.dll", 0, LOAD_LIBRARY_SAFE_CURRENT_DIRS) - except: - pass - - if not handle: - raise RuntimeError('Failed to LoadLibraryEx nvrtc64_120_0.dll') - {{else}} - handle = dlfcn.dlopen('libnvrtc.so.12', dlfcn.RTLD_NOW) - if handle == NULL: - with gil: - raise RuntimeError('Failed to dlopen libnvrtc.so.12') - {{endif}} - - - # Load function {{if 'Windows' == platform.system()}} with gil: + handle = path_finder._load_nvidia_dynamic_library("nvrtc").handle {{if 'nvrtcGetErrorString' in found_functions}} try: global __nvrtcGetErrorString @@ -291,6 +243,8 @@ cdef int cuPythonInit() except -1 nogil: {{endif}} {{else}} + with gil: + handle = path_finder._load_nvidia_dynamic_library("nvrtc").handle {{if 'nvrtcGetErrorString' in found_functions}} global __nvrtcGetErrorString __nvrtcGetErrorString = dlfcn.dlsym(handle, 'nvrtcGetErrorString') diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx index bb61a3e22..36bdcb4f4 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx @@ -4,12 +4,12 @@ # # This code was automatically generated across versions from 12.0.1 to 12.9.0. Do not modify it directly. -from libc.stdint cimport intptr_t - -from .utils cimport get_nvjitlink_dso_version_suffix +from libc.stdint cimport intptr_t, uintptr_t from .utils import FunctionNotFoundError, NotSupportedError +from cuda.bindings import path_finder + ############################################################################### # Extern ############################################################################### @@ -52,17 +52,9 @@ cdef void* __nvJitLinkGetInfoLog = NULL cdef void* __nvJitLinkVersion = NULL -cdef void* load_library(const int driver_ver) except* with gil: - cdef void* handle - for suffix in get_nvjitlink_dso_version_suffix(driver_ver): - so_name = "libnvJitLink.so" + (f".{suffix}" if suffix else suffix) - handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL) - if handle != NULL: - break - else: - err_msg = dlerror() - raise RuntimeError(f'Failed to dlopen libnvJitLink ({err_msg.decode()})') - return handle +cdef void* load_library(int driver_ver) except* with gil: + cdef uintptr_t handle = path_finder._load_nvidia_dynamic_library("nvJitLink").handle + return handle cdef int _check_or_init_nvjitlink() except -1 nogil: diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx index a2f77ca2e..fb29ac8b7 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx @@ -6,12 +6,9 @@ from libc.stdint cimport intptr_t -from .utils cimport get_nvjitlink_dso_version_suffix - from .utils import FunctionNotFoundError, NotSupportedError -import os -import site +from cuda.bindings import path_finder import win32api @@ -42,44 +39,9 @@ cdef void* __nvJitLinkGetInfoLog = NULL cdef void* __nvJitLinkVersion = NULL -cdef inline list get_site_packages(): - return [site.getusersitepackages()] + site.getsitepackages() - - -cdef load_library(const int driver_ver): - handle = 0 - - for suffix in get_nvjitlink_dso_version_suffix(driver_ver): - if len(suffix) == 0: - continue - dll_name = f"nvJitLink_{suffix}0_0.dll" - - # First check if the DLL has been loaded by 3rd parties - try: - return win32api.GetModuleHandle(dll_name) - except: - pass - - # Next, check if DLLs are installed via pip - for sp in get_site_packages(): - mod_path = os.path.join(sp, "nvidia", "nvJitLink", "bin") - if os.path.isdir(mod_path): - os.add_dll_directory(mod_path) - try: - return win32api.LoadLibraryEx( - # Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path... - os.path.join(mod_path, dll_name), - 0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR) - except: - pass - # Finally, try default search - # Only reached if DLL wasn't found in any site-package path - try: - return win32api.LoadLibrary(dll_name) - except: - pass - - raise RuntimeError('Failed to load nvJitLink') +cdef void* load_library(int driver_ver) except* with gil: + cdef intptr_t handle = path_finder._load_nvidia_dynamic_library("nvJitLink").handle + return handle cdef int _check_or_init_nvjitlink() except -1 nogil: @@ -88,15 +50,16 @@ cdef int _check_or_init_nvjitlink() except -1 nogil: return 0 cdef int err, driver_ver + cdef intptr_t handle with gil: # Load driver to check version try: - handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32) + nvcuda_handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32) except Exception as e: raise NotSupportedError(f'CUDA driver is not found ({e})') global __cuDriverGetVersion if __cuDriverGetVersion == NULL: - __cuDriverGetVersion = win32api.GetProcAddress(handle, 'cuDriverGetVersion') + __cuDriverGetVersion = win32api.GetProcAddress(nvcuda_handle, 'cuDriverGetVersion') if __cuDriverGetVersion == NULL: raise RuntimeError('something went wrong') err = (__cuDriverGetVersion)(&driver_ver) @@ -104,7 +67,7 @@ cdef int _check_or_init_nvjitlink() except -1 nogil: raise RuntimeError('something went wrong') # Load library - handle = load_library(driver_ver) + handle = load_library(driver_ver) # Load function global __nvJitLinkCreate diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx index 53675b094..8759096a4 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx @@ -4,12 +4,12 @@ # # This code was automatically generated across versions from 11.0.3 to 12.9.0. Do not modify it directly. -from libc.stdint cimport intptr_t - -from .utils cimport get_nvvm_dso_version_suffix +from libc.stdint cimport intptr_t, uintptr_t from .utils import FunctionNotFoundError, NotSupportedError +from cuda.bindings import path_finder + ############################################################################### # Extern ############################################################################### @@ -51,16 +51,8 @@ cdef void* __nvvmGetProgramLog = NULL cdef void* load_library(const int driver_ver) except* with gil: - cdef void* handle - for suffix in get_nvvm_dso_version_suffix(driver_ver): - so_name = "libnvvm.so" + (f".{suffix}" if suffix else suffix) - handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL) - if handle != NULL: - break - else: - err_msg = dlerror() - raise RuntimeError(f'Failed to dlopen libnvvm ({err_msg.decode()})') - return handle + cdef uintptr_t handle = path_finder._load_nvidia_dynamic_library("nvvm").handle + return handle cdef int _check_or_init_nvvm() except -1 nogil: diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx index 3f9f54a4d..abf75ba1d 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx @@ -6,12 +6,9 @@ from libc.stdint cimport intptr_t -from .utils cimport get_nvvm_dso_version_suffix - from .utils import FunctionNotFoundError, NotSupportedError -import os -import site +from cuda.bindings import path_finder import win32api @@ -40,52 +37,9 @@ cdef void* __nvvmGetProgramLogSize = NULL cdef void* __nvvmGetProgramLog = NULL -cdef inline list get_site_packages(): - return [site.getusersitepackages()] + site.getsitepackages() + ["conda"] - - -cdef load_library(const int driver_ver): - handle = 0 - - for suffix in get_nvvm_dso_version_suffix(driver_ver): - if len(suffix) == 0: - continue - dll_name = "nvvm64_40_0.dll" - - # First check if the DLL has been loaded by 3rd parties - try: - return win32api.GetModuleHandle(dll_name) - except: - pass - - # Next, check if DLLs are installed via pip or conda - for sp in get_site_packages(): - if sp == "conda": - # nvvm is not under $CONDA_PREFIX/lib, so it's not in the default search path - conda_prefix = os.environ.get("CONDA_PREFIX") - if conda_prefix is None: - continue - mod_path = os.path.join(conda_prefix, "Library", "nvvm", "bin") - else: - mod_path = os.path.join(sp, "nvidia", "cuda_nvcc", "nvvm", "bin") - if os.path.isdir(mod_path): - os.add_dll_directory(mod_path) - try: - return win32api.LoadLibraryEx( - # Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path... - os.path.join(mod_path, dll_name), - 0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR) - except: - pass - - # Finally, try default search - # Only reached if DLL wasn't found in any site-package path - try: - return win32api.LoadLibrary(dll_name) - except: - pass - - raise RuntimeError('Failed to load nvvm') +cdef void* load_library(int driver_ver) except* with gil: + cdef intptr_t handle = path_finder._load_nvidia_dynamic_library("nvvm").handle + return handle cdef int _check_or_init_nvvm() except -1 nogil: @@ -94,15 +48,16 @@ cdef int _check_or_init_nvvm() except -1 nogil: return 0 cdef int err, driver_ver + cdef intptr_t handle with gil: # Load driver to check version try: - handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32) + nvcuda_handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32) except Exception as e: raise NotSupportedError(f'CUDA driver is not found ({e})') global __cuDriverGetVersion if __cuDriverGetVersion == NULL: - __cuDriverGetVersion = win32api.GetProcAddress(handle, 'cuDriverGetVersion') + __cuDriverGetVersion = win32api.GetProcAddress(nvcuda_handle, 'cuDriverGetVersion') if __cuDriverGetVersion == NULL: raise RuntimeError('something went wrong') err = (__cuDriverGetVersion)(&driver_ver) @@ -110,7 +65,7 @@ cdef int _check_or_init_nvvm() except -1 nogil: raise RuntimeError('something went wrong') # Load library - handle = load_library(driver_ver) + handle = load_library(driver_ver) # Load function global __nvvmVersion diff --git a/cuda_bindings/cuda/bindings/_internal/utils.pxd b/cuda_bindings/cuda/bindings/_internal/utils.pxd index cac7846ff..a4b71c531 100644 --- a/cuda_bindings/cuda/bindings/_internal/utils.pxd +++ b/cuda_bindings/cuda/bindings/_internal/utils.pxd @@ -165,6 +165,3 @@ cdef int get_nested_resource_ptr(nested_resource[ResT] &in_out_ptr, object obj, cdef bint is_nested_sequence(data) cdef void* get_buffer_pointer(buf, Py_ssize_t size, readonly=*) except* - -cdef tuple get_nvjitlink_dso_version_suffix(int driver_ver) -cdef tuple get_nvvm_dso_version_suffix(int driver_ver) diff --git a/cuda_bindings/cuda/bindings/_internal/utils.pyx b/cuda_bindings/cuda/bindings/_internal/utils.pyx index 0a693c052..7fc77b22c 100644 --- a/cuda_bindings/cuda/bindings/_internal/utils.pyx +++ b/cuda_bindings/cuda/bindings/_internal/utils.pyx @@ -127,17 +127,3 @@ cdef int get_nested_resource_ptr(nested_resource[ResT] &in_out_ptr, object obj, class FunctionNotFoundError(RuntimeError): pass class NotSupportedError(RuntimeError): pass - - -cdef tuple get_nvjitlink_dso_version_suffix(int driver_ver): - if 12000 <= driver_ver < 13000: - return ('12', '') - raise NotSupportedError(f'CUDA driver version {driver_ver} is not supported') - - -cdef tuple get_nvvm_dso_version_suffix(int driver_ver): - if 11000 <= driver_ver < 11020: - return ('3', '') - if 11020 <= driver_ver < 13000: - return ('4', '') - raise NotSupportedError(f'CUDA driver version {driver_ver} is not supported') diff --git a/cuda_bindings/cuda/bindings/_path_finder/README.md b/cuda_bindings/cuda/bindings/_path_finder/README.md index 94b80499f..fa51b56fa 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/README.md +++ b/cuda_bindings/cuda/bindings/_path_finder/README.md @@ -24,52 +24,27 @@ strategy for locating NVIDIA shared libraries: The absolute path of the already loaded library will be returned, along with the handle to the library. -1. **Python Package Ecosystem** - - Scans `sys.path` to find libraries installed via NVIDIA Python wheels. +1. **NVIDIA Python wheels** + - Scans all site-packages to find libraries installed via NVIDIA Python wheels. -2. **Conda Environments** - - Leverages Conda-specific paths through our fork of `get_cuda_paths()` - from numba-cuda. - -3. **Environment variables** - - Relies on `CUDA_HOME`/`CUDA_PATH` environment variables if set. - -4. **System Installations** - - Checks traditional system locations through these paths: - - Linux: `/usr/local/cuda/lib64` - - Windows: `C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\vX.Y\bin` - (where X.Y is the CTK version) - - **Notably does NOT search**: - - Versioned CUDA directories like `/usr/local/cuda-12.3` - - Distribution-specific packages (RPM/DEB) - EXCEPT Debian's `nvidia-cuda-toolkit` - -5. **OS Default Mechanisms** +2. **OS default mechanisms / Conda environments** - Falls back to native loader: - `dlopen()` on Linux - `LoadLibraryW()` on Windows + - CTK installations with system config updates are expected to be discovered: + - Linux: Via `/etc/ld.so.conf.d/*cuda*.conf` + - Windows: Via `C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\vX.Y\bin` on system `PATH` + - Conda installations are expected to be discovered: + - Linux: Via `$ORIGIN/../lib` on `RPATH` (of the `python` binary) + - Windows: Via `%CONDA_PREFIX%\Library\bin` on system `PATH` + +3. **Environment variables** + - Relies on `CUDA_HOME` or `CUDA_PATH` environment variables if set + (in that order). Note that the search is done on a per-library basis. There is no centralized mechanism that ensures all libraries are found in the same way. -## Implementation Philosophy - -The current implementation balances stability and evolution: - -- **Baseline Foundation:** Uses a fork of numba-cuda's `cuda_paths.py` that has been - battle-tested in production environments. - -- **Validation Infrastructure:** Comprehensive CI testing matrix being developed to cover: - - Various Linux/Windows environments - - Python packaging formats (wheels, conda) - - CUDA Toolkit versions - -- **Roadmap:** Planned refactoring to: - - Unify library discovery logic - - Improve maintainability - - Better enforce search priority - - Expand platform support - ## Maintenance Requirements These key components must be updated for new CUDA Toolkit releases: diff --git a/cuda_bindings/cuda/bindings/_path_finder/cuda_paths.py b/cuda_bindings/cuda/bindings/_path_finder/cuda_paths.py deleted file mode 100644 index 80f4e0149..000000000 --- a/cuda_bindings/cuda/bindings/_path_finder/cuda_paths.py +++ /dev/null @@ -1,573 +0,0 @@ -# Copyright 2025 NVIDIA Corporation. All rights reserved. -# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE - -# Forked from: -# https://github.com/NVIDIA/numba-cuda/blob/8c9c9d0cb901c06774a9abea6d12b6a4b0287e5e/numba_cuda/numba/cuda/cuda_paths.py - -# The numba-cuda version in turn was forked from: -# https://github.com/numba/numba/blob/6c8a71ffc3eaa1c68e1bac927b80ee7469002b3f/numba/cuda/cuda_paths.py -# SPDX-License-Identifier: BSD-2-Clause -# -# Original Numba LICENSE: -# Copyright (c) 2012, Anaconda, Inc. -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# -# Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import ctypes -import os -import platform -import re -import site -import sys -import traceback -import warnings -from collections import namedtuple -from pathlib import Path - -from cuda.bindings._path_finder.findlib import find_lib - -IS_WIN32 = sys.platform.startswith("win32") - -_env_path_tuple = namedtuple("_env_path_tuple", ["by", "info"]) - - -def _get_numba_CUDA_INCLUDE_PATH(): - # From numba/numba/core/config.py - - def _readenv(name, ctor, default): - value = os.environ.get(name) - if value is None: - return default() if callable(default) else default - try: - return ctor(value) - except Exception: - warnings.warn( # noqa: B028 - f"Environment variable '{name}' is defined but " - f"its associated value '{value}' could not be " - "parsed.\nThe parse failed with exception:\n" - f"{traceback.format_exc()}", - RuntimeWarning, - ) - return default - - if IS_WIN32: - cuda_path = os.environ.get("CUDA_PATH") - if cuda_path: # noqa: SIM108 - default_cuda_include_path = os.path.join(cuda_path, "include") - else: - default_cuda_include_path = "cuda_include_not_found" - else: - default_cuda_include_path = os.path.join(os.sep, "usr", "local", "cuda", "include") - CUDA_INCLUDE_PATH = _readenv("NUMBA_CUDA_INCLUDE_PATH", str, default_cuda_include_path) - return CUDA_INCLUDE_PATH - - -config_CUDA_INCLUDE_PATH = _get_numba_CUDA_INCLUDE_PATH() - -SEARCH_PRIORITY = [ - "Conda environment", - "Conda environment (NVIDIA package)", - "NVIDIA NVCC Wheel", - "CUDA_HOME", - "System", - "Debian package", -] - - -def _priority_index(label): - if label in SEARCH_PRIORITY: - return SEARCH_PRIORITY.index(label) - else: - raise ValueError(f"Can't determine search priority for {label}") - - -def _find_first_valid_lazy(options): - sorted_options = sorted(options, key=lambda x: _priority_index(x[0])) - for label, fn in sorted_options: - value = fn() - if value: - return label, value - return "", None - - -def _build_options(pairs): - """Sorts and returns a list of (label, value) tuples according to SEARCH_PRIORITY.""" - priority_index = {label: i for i, label in enumerate(SEARCH_PRIORITY)} - return sorted(pairs, key=lambda pair: priority_index.get(pair[0], float("inf"))) - - -def _find_valid_path(options): - """Find valid path from *options*, which is a list of 2-tuple of - (name, path). Return first pair where *path* is not None. - If no valid path is found, return ('', None) - """ - for by, data in options: - if data is not None: - return by, data - else: - return "", None - - -def _get_libdevice_path_decision(): - options = _build_options( - [ - ("Conda environment", get_conda_ctk), - ("Conda environment (NVIDIA package)", get_nvidia_libdevice_ctk), - ("CUDA_HOME", lambda: get_cuda_home("nvvm", "libdevice")), - ("NVIDIA NVCC Wheel", get_libdevice_wheel), - ("System", lambda: get_system_ctk("nvvm", "libdevice")), - ("Debian package", get_debian_pkg_libdevice), - ] - ) - return _find_first_valid_lazy(options) - - -def _nvvm_lib_dir(): - if IS_WIN32: - return "nvvm", "bin" - else: - return "nvvm", "lib64" - - -def _get_nvvm_path_decision(): - options = [ - ("Conda environment", get_conda_ctk), - ("Conda environment (NVIDIA package)", get_nvidia_nvvm_ctk), - ("NVIDIA NVCC Wheel", _get_nvvm_wheel), - ("CUDA_HOME", lambda: get_cuda_home(*_nvvm_lib_dir())), - ("System", lambda: get_system_ctk(*_nvvm_lib_dir())), - ] - return _find_first_valid_lazy(options) - - -def _get_nvrtc_system_ctk(): - sys_path = get_system_ctk("bin" if IS_WIN32 else "lib64") - candidates = find_lib("nvrtc", sys_path) - if candidates: - return max(candidates) - - -def _get_nvrtc_path_decision(): - options = _build_options( - [ - ("CUDA_HOME", lambda: get_cuda_home("nvrtc")), - ("Conda environment", get_conda_ctk), - ("Conda environment (NVIDIA package)", get_nvidia_cudalib_ctk), - ("NVIDIA NVCC Wheel", _get_nvrtc_wheel), - ("System", _get_nvrtc_system_ctk), - ] - ) - return _find_first_valid_lazy(options) - - -def _get_nvvm_wheel(): - platform_map = { - "linux": ("lib64", "libnvvm.so"), - "win32": ("bin", "nvvm64_40_0.dll"), - } - - for plat, (dso_dir, dso_path) in platform_map.items(): - if sys.platform.startswith(plat): - break - else: - raise NotImplementedError("Unsupported platform") - - site_paths = [site.getusersitepackages()] + site.getsitepackages() - - for sp in filter(None, site_paths): - nvvm_path = Path(sp, "nvidia", "cuda_nvcc", "nvvm", dso_dir, dso_path) - if nvvm_path.exists(): - return str(nvvm_path.parent) - - return None - - -def get_nvrtc_dso_path(): - site_paths = [site.getusersitepackages()] + site.getsitepackages() - for sp in site_paths: - lib_dir = os.path.join( - sp, - "nvidia", - "cuda_nvrtc", - ("bin" if IS_WIN32 else "lib") if sp else None, - ) - if lib_dir and os.path.exists(lib_dir): - for major in (12, 11): - if major == 11: - cu_ver = "112" if IS_WIN32 else "11.2" - elif major == 12: - cu_ver = "120" if IS_WIN32 else "12" - else: - raise NotImplementedError(f"CUDA {major} is not supported") - - dso_path = os.path.join( - lib_dir, - f"nvrtc64_{cu_ver}_0.dll" if IS_WIN32 else f"libnvrtc.so.{cu_ver}", - ) - if os.path.isfile(dso_path): - return dso_path - return None - - -def _get_nvrtc_wheel(): - dso_path = get_nvrtc_dso_path() - if dso_path: - try: - result = ctypes.CDLL(dso_path, mode=ctypes.RTLD_GLOBAL) - except OSError: - pass - else: - if IS_WIN32: - import win32api - - # This absolute path will - # always be correct regardless of the package source - nvrtc_path = win32api.GetModuleFileNameW(result._handle) - dso_dir = os.path.dirname(nvrtc_path) - builtins_path = os.path.join( - dso_dir, - [f for f in os.listdir(dso_dir) if re.match("^nvrtc-builtins.*.dll$", f)][0], - ) - if not os.path.exists(builtins_path): - raise RuntimeError(f'Path does not exist: "{builtins_path}"') - return Path(dso_path) - - -def _get_libdevice_paths(): - by, libdir = _get_libdevice_path_decision() - if not libdir: - return _env_path_tuple(by, None) - out = os.path.join(libdir, "libdevice.10.bc") - return _env_path_tuple(by, out) - - -def _cudalib_path(): - if IS_WIN32: - return "bin" - else: - return "lib64" - - -def _cuda_home_static_cudalib_path(): - if IS_WIN32: - return ("lib", "x64") - else: - return ("lib64",) - - -def _get_cudalib_wheel(): - """Get the cudalib path from the NVCC wheel.""" - site_paths = [site.getusersitepackages()] + site.getsitepackages() - libdir = "bin" if IS_WIN32 else "lib" - for sp in filter(None, site_paths): - cudalib_path = Path(sp, "nvidia", "cuda_runtime", libdir) - if cudalib_path.exists(): - return str(cudalib_path) - return None - - -def _get_cudalib_dir_path_decision(): - options = _build_options( - [ - ("Conda environment", get_conda_ctk), - ("Conda environment (NVIDIA package)", get_nvidia_cudalib_ctk), - ("NVIDIA NVCC Wheel", _get_cudalib_wheel), - ("CUDA_HOME", lambda: get_cuda_home(_cudalib_path())), - ("System", lambda: get_system_ctk(_cudalib_path())), - ] - ) - return _find_first_valid_lazy(options) - - -def _get_static_cudalib_dir_path_decision(): - options = _build_options( - [ - ("Conda environment", get_conda_ctk), - ( - "Conda environment (NVIDIA package)", - get_nvidia_static_cudalib_ctk, - ), - ( - "CUDA_HOME", - lambda: get_cuda_home(*_cuda_home_static_cudalib_path()), - ), - ("System", lambda: get_system_ctk(_cudalib_path())), - ] - ) - return _find_first_valid_lazy(options) - - -def _get_cudalib_dir(): - by, libdir = _get_cudalib_dir_path_decision() - return _env_path_tuple(by, libdir) - - -def _get_static_cudalib_dir(): - by, libdir = _get_static_cudalib_dir_path_decision() - return _env_path_tuple(by, libdir) - - -def get_system_ctk(*subdirs): - """Return path to system-wide cudatoolkit; or, None if it doesn't exist.""" - # Linux? - if not IS_WIN32: - # Is cuda alias to /usr/local/cuda? - # We are intentionally not getting versioned cuda installation. - result = os.path.join("/usr/local/cuda", *subdirs) - if os.path.exists(result): - return result - - -def get_conda_ctk(): - """Return path to directory containing the shared libraries of cudatoolkit.""" - is_conda_env = os.path.exists(os.path.join(sys.prefix, "conda-meta")) - if not is_conda_env: - return - # Assume the existence of NVVM to imply cudatoolkit installed - paths = find_lib("nvvm") - if not paths: - return - # Use the directory name of the max path - return os.path.dirname(max(paths)) - - -def get_nvidia_nvvm_ctk(): - """Return path to directory containing the NVVM shared library.""" - is_conda_env = os.path.exists(os.path.join(sys.prefix, "conda-meta")) - if not is_conda_env: - return - - # Assume the existence of NVVM in the conda env implies that a CUDA toolkit - # conda package is installed. - - # First, try the location used on Linux and the Windows 11.x packages - libdir = os.path.join(sys.prefix, "nvvm", _cudalib_path()) - if not os.path.exists(libdir) or not os.path.isdir(libdir): - # If that fails, try the location used for Windows 12.x packages - libdir = os.path.join(sys.prefix, "Library", "nvvm", _cudalib_path()) - if not os.path.exists(libdir) or not os.path.isdir(libdir): - # If that doesn't exist either, assume we don't have the NVIDIA - # conda package - return - - paths = find_lib("nvvm", libdir=libdir) - if not paths: - return - # Use the directory name of the max path - return os.path.dirname(max(paths)) - - -def get_nvidia_libdevice_ctk(): - """Return path to directory containing the libdevice library.""" - nvvm_ctk = get_nvidia_nvvm_ctk() - if not nvvm_ctk: - return - nvvm_dir = os.path.dirname(nvvm_ctk) - return os.path.join(nvvm_dir, "libdevice") - - -def get_nvidia_cudalib_ctk(): - """Return path to directory containing the shared libraries of cudatoolkit.""" - nvvm_ctk = get_nvidia_nvvm_ctk() - if not nvvm_ctk: - return - env_dir = os.path.dirname(os.path.dirname(nvvm_ctk)) - subdir = "bin" if IS_WIN32 else "lib" - return os.path.join(env_dir, subdir) - - -def get_nvidia_static_cudalib_ctk(): - """Return path to directory containing the static libraries of cudatoolkit.""" - nvvm_ctk = get_nvidia_nvvm_ctk() - if not nvvm_ctk: - return - - if IS_WIN32 and ("Library" not in nvvm_ctk): # noqa: SIM108 - # Location specific to CUDA 11.x packages on Windows - dirs = ("Lib", "x64") - else: - # Linux, or Windows with CUDA 12.x packages - dirs = ("lib",) - - env_dir = os.path.dirname(os.path.dirname(nvvm_ctk)) - return os.path.join(env_dir, *dirs) - - -def get_cuda_home(*subdirs): - """Get paths of CUDA_HOME. - If *subdirs* are the subdirectory name to be appended in the resulting - path. - """ - cuda_home = os.environ.get("CUDA_HOME") - if cuda_home is None: - # Try Windows CUDA installation without Anaconda - cuda_home = os.environ.get("CUDA_PATH") - if cuda_home is not None: - return os.path.join(cuda_home, *subdirs) - - -def _get_nvvm_path(): - by, path = _get_nvvm_path_decision() - - if by == "NVIDIA NVCC Wheel": - platform_map = { - "linux": "libnvvm.so", - "win32": "nvvm64_40_0.dll", - } - - for plat, dso_name in platform_map.items(): - if sys.platform.startswith(plat): - break - else: - raise NotImplementedError("Unsupported platform") - - path = os.path.join(path, dso_name) - else: - candidates = find_lib("nvvm", path) - path = max(candidates) if candidates else None - return _env_path_tuple(by, path) - - -def _get_nvrtc_path(): - by, path = _get_nvrtc_path_decision() - if by == "NVIDIA NVCC Wheel": - path = str(path) - elif by == "System": - return _env_path_tuple(by, path) - else: - candidates = find_lib("nvrtc", path) - path = max(candidates) if candidates else None - return _env_path_tuple(by, path) - - -def get_cuda_paths(): - """Returns a dictionary mapping component names to a 2-tuple - of (source_variable, info). - - The returned dictionary will have the following keys and infos: - - "nvvm": file_path - - "libdevice": List[Tuple[arch, file_path]] - - "cudalib_dir": directory_path - - Note: The result of the function is cached. - """ - # Check cache - if hasattr(get_cuda_paths, "_cached_result"): - return get_cuda_paths._cached_result - else: - # Not in cache - d = { - "nvvm": _get_nvvm_path(), - "nvrtc": _get_nvrtc_path(), - "libdevice": _get_libdevice_paths(), - "cudalib_dir": _get_cudalib_dir(), - "static_cudalib_dir": _get_static_cudalib_dir(), - "include_dir": _get_include_dir(), - } - # Cache result - get_cuda_paths._cached_result = d - return d - - -def get_debian_pkg_libdevice(): - """ - Return the Debian NVIDIA Maintainers-packaged libdevice location, if it - exists. - """ - pkg_libdevice_location = "/usr/lib/nvidia-cuda-toolkit/libdevice" - if not os.path.exists(pkg_libdevice_location): - return None - return pkg_libdevice_location - - -def get_libdevice_wheel(): - nvvm_path = _get_nvvm_wheel() - if nvvm_path is None: - return None - nvvm_path = Path(nvvm_path) - libdevice_path = nvvm_path.parent / "libdevice" - - return str(libdevice_path) - - -def get_current_cuda_target_name(): - """Determine conda's CTK target folder based on system and machine arch. - - CTK's conda package delivers headers based on its architecture type. For example, - `x86_64` machine places header under `$CONDA_PREFIX/targets/x86_64-linux`, and - `aarch64` places under `$CONDA_PREFIX/targets/sbsa-linux`. Read more about the - nuances at cudart's conda feedstock: - https://github.com/conda-forge/cuda-cudart-feedstock/blob/main/recipe/meta.yaml#L8-L11 # noqa: E501 - """ - system = platform.system() - machine = platform.machine() - - if system == "Linux": - arch_to_targets = {"x86_64": "x86_64-linux", "aarch64": "sbsa-linux"} - elif system == "Windows": - arch_to_targets = { - "AMD64": "x64", - } - else: - arch_to_targets = {} - - return arch_to_targets.get(machine, None) - - -def get_conda_include_dir(): - """ - Return the include directory in the current conda environment, if one - is active and it exists. - """ - is_conda_env = os.path.exists(os.path.join(sys.prefix, "conda-meta")) - if not is_conda_env: - return - - if platform.system() == "Windows": - include_dir = os.path.join(sys.prefix, "Library", "include") - elif target_name := get_current_cuda_target_name(): - include_dir = os.path.join(sys.prefix, "targets", target_name, "include") - else: - # A fallback when target cannot determined - # though usually it shouldn't. - include_dir = os.path.join(sys.prefix, "include") - - if ( - os.path.exists(include_dir) - and os.path.isdir(include_dir) - and os.path.exists(os.path.join(include_dir, "cuda_device_runtime_api.h")) - ): - return include_dir - return - - -def _get_include_dir(): - """Find the root include directory.""" - options = [ - ("Conda environment (NVIDIA package)", get_conda_include_dir()), - ("CUDA_INCLUDE_PATH Config Entry", config_CUDA_INCLUDE_PATH), - # TODO: add others - ] - by, include_dir = _find_valid_path(options) - return _env_path_tuple(by, include_dir) diff --git a/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py index af9f42fbf..9835b72d0 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py @@ -4,11 +4,9 @@ import functools import glob import os -import sys -from cuda.bindings._path_finder.cuda_paths import get_cuda_paths from cuda.bindings._path_finder.find_sub_dirs import find_sub_dirs_all_sitepackages -from cuda.bindings._path_finder.supported_libs import is_suppressed_dll_file +from cuda.bindings._path_finder.supported_libs import IS_WINDOWS, is_suppressed_dll_file def _no_such_file_in_sub_dirs(sub_dirs, file_wild, error_messages, attachments): @@ -20,10 +18,7 @@ def _no_such_file_in_sub_dirs(sub_dirs, file_wild, error_messages, attachments): def _find_so_using_nvidia_lib_dirs(libname, so_basename, error_messages, attachments): - if libname == "nvvm": # noqa: SIM108 - nvidia_sub_dirs = ("nvidia", "*", "nvvm", "lib64") - else: - nvidia_sub_dirs = ("nvidia", "*", "lib") + nvidia_sub_dirs = ("nvidia", "*", "nvvm", "lib64") if libname == "nvvm" else ("nvidia", "*", "lib") file_wild = so_basename + "*" for lib_dir in find_sub_dirs_all_sitepackages(nvidia_sub_dirs): # First look for an exact match @@ -49,10 +44,7 @@ def _find_dll_under_dir(dirpath, file_wild): def _find_dll_using_nvidia_bin_dirs(libname, lib_searched_for, error_messages, attachments): - if libname == "nvvm": # noqa: SIM108 - nvidia_sub_dirs = ("nvidia", "*", "nvvm", "bin") - else: - nvidia_sub_dirs = ("nvidia", "*", "bin") + nvidia_sub_dirs = ("nvidia", "*", "nvvm", "bin") if libname == "nvvm" else ("nvidia", "*", "bin") for bin_dir in find_sub_dirs_all_sitepackages(nvidia_sub_dirs): dll_name = _find_dll_under_dir(bin_dir, lib_searched_for) if dll_name is not None: @@ -61,55 +53,57 @@ def _find_dll_using_nvidia_bin_dirs(libname, lib_searched_for, error_messages, a return None -def _get_cuda_paths_info(key, error_messages): - env_path_tuple = get_cuda_paths()[key] - if not env_path_tuple: - error_messages.append(f'Failure obtaining get_cuda_paths()["{key}"]') - return None - if not env_path_tuple.info: - error_messages.append(f'Failure obtaining get_cuda_paths()["{key}"].info') - return None - return env_path_tuple.info +def _get_cuda_home(): + cuda_home = os.environ.get("CUDA_HOME") + if cuda_home is None: + cuda_home = os.environ.get("CUDA_PATH") + return cuda_home -def _find_so_using_cudalib_dir(so_basename, error_messages, attachments): - cudalib_dir = _get_cuda_paths_info("cudalib_dir", error_messages) - if cudalib_dir is None: +def _find_lib_dir_using_cuda_home(libname): + cuda_home = _get_cuda_home() + if cuda_home is None: return None - primary_so_dir = cudalib_dir + "/" - candidate_so_dirs = [primary_so_dir] - libs = ["/lib/", "/lib64/"] - for _ in range(2): - alt_dir = libs[0].join(primary_so_dir.rsplit(libs[1], 1)) - if alt_dir not in candidate_so_dirs: - candidate_so_dirs.append(alt_dir) - libs.reverse() - candidate_so_names = [so_dirname + so_basename for so_dirname in candidate_so_dirs] - for so_name in candidate_so_names: - if os.path.isfile(so_name): - return so_name - error_messages.append(f"No such file: {so_name}") - for so_dirname in candidate_so_dirs: - attachments.append(f' listdir("{so_dirname}"):') - if not os.path.isdir(so_dirname): - attachments.append(" DIRECTORY DOES NOT EXIST") - else: - for node in sorted(os.listdir(so_dirname)): - attachments.append(f" {node}") + if IS_WINDOWS: + subdirs = (os.path.join("nvvm", "bin"),) if libname == "nvvm" else ("bin",) + else: + subdirs = ( + (os.path.join("nvvm", "lib64"),) + if libname == "nvvm" + else ( + "lib64", # CTK + "lib", # Conda + ) + ) + for subdir in subdirs: + dirname = os.path.join(cuda_home, subdir) + if os.path.isdir(dirname): + return dirname return None -def _find_dll_using_cudalib_dir(libname, error_messages, attachments): - cudalib_dir = _get_cuda_paths_info("cudalib_dir", error_messages) - if cudalib_dir is None: - return None +def _find_so_using_lib_dir(lib_dir, so_basename, error_messages, attachments): + so_name = os.path.join(lib_dir, so_basename) + if os.path.isfile(so_name): + return so_name + error_messages.append(f"No such file: {so_name}") + attachments.append(f' listdir("{lib_dir}"):') + if not os.path.isdir(lib_dir): + attachments.append(" DIRECTORY DOES NOT EXIST") + else: + for node in sorted(os.listdir(lib_dir)): + attachments.append(f" {node}") + return None + + +def _find_dll_using_lib_dir(lib_dir, libname, error_messages, attachments): file_wild = libname + "*.dll" - dll_name = _find_dll_under_dir(cudalib_dir, file_wild) + dll_name = _find_dll_under_dir(lib_dir, file_wild) if dll_name is not None: return dll_name error_messages.append(f"No such file: {file_wild}") - attachments.append(f' listdir("{cudalib_dir}"):') - for node in sorted(os.listdir(cudalib_dir)): + attachments.append(f' listdir("{lib_dir}"):') + for node in sorted(os.listdir(lib_dir)): attachments.append(f" {node}") return None @@ -121,28 +115,30 @@ def __init__(self, libname: str): self.attachments = [] self.abs_path = None - if sys.platform == "win32": + if IS_WINDOWS: self.lib_searched_for = f"{libname}*.dll" - self.abs_path = _find_dll_using_nvidia_bin_dirs( - libname, self.lib_searched_for, self.error_messages, self.attachments - ) if self.abs_path is None: - if libname == "nvvm": - self.abs_path = _get_cuda_paths_info("nvvm", self.error_messages) - else: - self.abs_path = _find_dll_using_cudalib_dir(libname, self.error_messages, self.attachments) + self.abs_path = _find_dll_using_nvidia_bin_dirs( + libname, self.lib_searched_for, self.error_messages, self.attachments + ) else: self.lib_searched_for = f"lib{libname}.so" - self.abs_path = _find_so_using_nvidia_lib_dirs( - libname, self.lib_searched_for, self.error_messages, self.attachments - ) if self.abs_path is None: - if libname == "nvvm": - self.abs_path = _get_cuda_paths_info("nvvm", self.error_messages) - else: - self.abs_path = _find_so_using_cudalib_dir( - self.lib_searched_for, self.error_messages, self.attachments - ) + self.abs_path = _find_so_using_nvidia_lib_dirs( + libname, self.lib_searched_for, self.error_messages, self.attachments + ) + + def retry_with_cuda_home_priority_last(self): + cuda_home_lib_dir = _find_lib_dir_using_cuda_home(self.libname) + if cuda_home_lib_dir is not None: + if IS_WINDOWS: + self.abs_path = _find_dll_using_lib_dir( + cuda_home_lib_dir, self.libname, self.error_messages, self.attachments + ) + else: + self.abs_path = _find_so_using_lib_dir( + cuda_home_lib_dir, self.lib_searched_for, self.error_messages, self.attachments + ) def raise_if_abs_path_is_None(self): if self.abs_path: diff --git a/cuda_bindings/cuda/bindings/_path_finder/findlib.py b/cuda_bindings/cuda/bindings/_path_finder/findlib.py deleted file mode 100644 index 992a3940e..000000000 --- a/cuda_bindings/cuda/bindings/_path_finder/findlib.py +++ /dev/null @@ -1,97 +0,0 @@ -# SPDX-License-Identifier: BSD-2-Clause -# -# Forked from: -# https://github.com/numba/numba/blob/f0d24824fcd6a454827e3c108882395d00befc04/numba/misc/findlib.py -# -# Original LICENSE: -# Copyright (c) 2012, Anaconda, Inc. -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# -# Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import os -import re -import sys - - -def get_lib_dirs(): - """ - Anaconda specific - """ - if sys.platform == "win32": - # on windows, historically `DLLs` has been used for CUDA libraries, - # since approximately CUDA 9.2, `Library\bin` has been used. - dirnames = ["DLLs", os.path.join("Library", "bin")] - else: - dirnames = [ - "lib", - ] - libdirs = [os.path.join(sys.prefix, x) for x in dirnames] - return libdirs - - -DLLNAMEMAP = { - "linux": r"lib%(name)s\.so\.%(ver)s$", - "linux2": r"lib%(name)s\.so\.%(ver)s$", - "linux-static": r"lib%(name)s\.a$", - "darwin": r"lib%(name)s\.%(ver)s\.dylib$", - "win32": r"%(name)s%(ver)s\.dll$", - "win32-static": r"%(name)s\.lib$", - "bsd": r"lib%(name)s\.so\.%(ver)s$", -} - -RE_VER = r"[0-9]*([_\.][0-9]+)*" - - -def find_lib(libname, libdir=None, platform=None, static=False): - platform = platform or sys.platform - platform = "bsd" if "bsd" in platform else platform - if static: - platform = f"{platform}-static" - if platform not in DLLNAMEMAP: - # Return empty list if platform name is undefined. - # Not all platforms define their static library paths. - return [] - pat = DLLNAMEMAP[platform] % {"name": libname, "ver": RE_VER} - regex = re.compile(pat) - return find_file(regex, libdir) - - -def find_file(pat, libdir=None): - if libdir is None: - libdirs = get_lib_dirs() - elif isinstance(libdir, str): - libdirs = [ - libdir, - ] - else: - libdirs = list(libdir) - files = [] - for ldir in libdirs: - try: - entries = os.listdir(ldir) - except FileNotFoundError: - continue - candidates = [os.path.join(ldir, ent) for ent in entries if pat.match(ent)] - files.extend([c for c in candidates if os.path.isfile(c)]) - return files diff --git a/cuda_bindings/cuda/bindings/_path_finder/load_dl_common.py b/cuda_bindings/cuda/bindings/_path_finder/load_dl_common.py index 4592f6c33..034b9d433 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/load_dl_common.py +++ b/cuda_bindings/cuda/bindings/_path_finder/load_dl_common.py @@ -4,23 +4,19 @@ from dataclasses import dataclass from typing import Callable, Optional -from cuda.bindings._path_finder.supported_libs import DIRECT_DEPENDENCIES +from cuda.bindings._path_finder.supported_libs import DIRECT_DEPENDENCIES, IS_WINDOWS +if IS_WINDOWS: + import pywintypes -@dataclass -class LoadedDL: - """Represents a loaded dynamic library. + HandleType = pywintypes.HANDLE +else: + HandleType = int - Attributes: - handle: The library handle (can be converted to void* in Cython) - abs_path: The absolute path to the library file - was_already_loaded_from_elsewhere: Whether the library was already loaded - """ - # ATTENTION: To convert `handle` back to `void*` in cython: - # Linux: `cdef void* ptr = ` - # Windows: `cdef void* ptr = ` - handle: int +@dataclass +class LoadedDL: + handle: HandleType abs_path: Optional[str] was_already_loaded_from_elsewhere: bool diff --git a/cuda_bindings/cuda/bindings/_path_finder/load_dl_windows.py b/cuda_bindings/cuda/bindings/_path_finder/load_dl_windows.py index 1f0c9c7e2..ec305be92 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/load_dl_windows.py +++ b/cuda_bindings/cuda/bindings/_path_finder/load_dl_windows.py @@ -35,7 +35,7 @@ def add_dll_directory(dll_abs_path: str) -> None: os.environ["PATH"] = dirpath if curr_path is None else os.pathsep.join((curr_path, dirpath)) -def abs_path_for_dynamic_library(handle: int) -> str: +def abs_path_for_dynamic_library(libname: str, handle: pywintypes.HANDLE) -> str: """Get the absolute path of a loaded dynamic library on Windows. Args: @@ -57,7 +57,8 @@ def abs_path_for_dynamic_library(handle: int) -> str: if n_chars == 0: raise OSError( - "GetModuleFileNameW failed. Long paths may require enabling the " + f"GetModuleFileNameW failed ({libname=!r}, {buf_size=}). " + "Long paths may require enabling the " "Windows 10+ long path registry setting. See: " "https://docs.python.org/3/using/windows.html#removing-the-max-path-limitation" ) @@ -99,7 +100,7 @@ def check_if_already_loaded_from_elsewhere(libname: str) -> Optional[LoadedDL]: except pywintypes.error: continue else: - return LoadedDL(handle, abs_path_for_dynamic_library(handle), True) + return LoadedDL(handle, abs_path_for_dynamic_library(libname, handle), True) return None @@ -116,9 +117,12 @@ def load_with_system_search(libname: str, _unused: str) -> Optional[LoadedDL]: from cuda.bindings._path_finder.supported_libs import SUPPORTED_WINDOWS_DLLS for dll_name in SUPPORTED_WINDOWS_DLLS.get(libname, ()): - handle = ctypes.windll.kernel32.LoadLibraryW(ctypes.c_wchar_p(dll_name)) - if handle: - return LoadedDL(handle, abs_path_for_dynamic_library(handle), False) + try: + handle = win32api.LoadLibraryEx(dll_name, 0, 0) + except pywintypes.error: + continue + else: + return LoadedDL(handle, abs_path_for_dynamic_library(libname, handle), False) return None diff --git a/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py index 015c4cdf8..f8fe5ce4a 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py @@ -2,12 +2,12 @@ # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE import functools -import sys from cuda.bindings._path_finder.find_nvidia_dynamic_library import _find_nvidia_dynamic_library from cuda.bindings._path_finder.load_dl_common import LoadedDL, load_dependencies +from cuda.bindings._path_finder.supported_libs import IS_WINDOWS -if sys.platform == "win32": +if IS_WINDOWS: from cuda.bindings._path_finder.load_dl_windows import ( check_if_already_loaded_from_elsewhere, load_with_abs_path, @@ -38,6 +38,7 @@ def _load_nvidia_dynamic_library_no_cache(libname: str) -> LoadedDL: loaded = load_with_system_search(libname, found.lib_searched_for) if loaded is not None: return loaded + found.retry_with_cuda_home_priority_last() found.raise_if_abs_path_is_None() # Load the library from the found path diff --git a/cuda_bindings/cuda/bindings/_path_finder/supported_libs.py b/cuda_bindings/cuda/bindings/_path_finder/supported_libs.py index 6852c7fce..14dc98a96 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/supported_libs.py +++ b/cuda_bindings/cuda/bindings/_path_finder/supported_libs.py @@ -5,6 +5,8 @@ import sys +IS_WINDOWS = sys.platform == "win32" + SUPPORTED_LIBNAMES = ( # Core CUDA Runtime and Compiler "nvJitLink", @@ -65,7 +67,7 @@ + PARTIALLY_SUPPORTED_LIBNAMES_WINDOWS_ONLY ) -if sys.platform == "win32": +if IS_WINDOWS: PARTIALLY_SUPPORTED_LIBNAMES = PARTIALLY_SUPPORTED_LIBNAMES_WINDOWS else: PARTIALLY_SUPPORTED_LIBNAMES = PARTIALLY_SUPPORTED_LIBNAMES_LINUX @@ -109,6 +111,7 @@ # cuda_12.5.1_555.42.06_linux.run # cuda_12.6.2_560.35.03_linux.run # cuda_12.8.0_570.86.10_linux.run +# cuda_12.9.0_575.51.03_linux.run # Generated with toolshed/build_path_finder_sonames.py SUPPORTED_LINUX_SONAMES = { "cublas": ( @@ -230,6 +233,7 @@ # cuda_12.5.1_555.85_windows.exe # cuda_12.6.2_560.94_windows.exe # cuda_12.8.1_572.61_windows.exe +# cuda_12.9.0_576.02_windows.txt # Generated with toolshed/build_path_finder_dlls.py (WITH MANUAL EDITS) SUPPORTED_WINDOWS_DLLS = { "cublas": ( @@ -337,6 +341,7 @@ "nvvm64.dll", "nvvm64_33_0.dll", "nvvm64_40_0.dll", + "nvvm70.dll", ), } diff --git a/cuda_bindings/tests/run_python_code_safely.py b/cuda_bindings/tests/run_python_code_safely.py new file mode 100644 index 000000000..316cb3885 --- /dev/null +++ b/cuda_bindings/tests/run_python_code_safely.py @@ -0,0 +1,86 @@ +import multiprocessing +import queue # for Empty +import subprocess # nosec B404 +import sys +import traceback +from io import StringIO + + +class Worker: + def __init__(self, python_code, result_queue): + self.python_code = python_code + self.result_queue = result_queue + + def __call__(self): + # Capture stdout/stderr + old_stdout = sys.stdout + old_stderr = sys.stderr + sys.stdout = StringIO() + sys.stderr = StringIO() + + try: + exec(self.python_code, {"__name__": "__main__"}) # nosec B102 + returncode = 0 + except SystemExit as e: # Handle sys.exit() + returncode = e.code if isinstance(e.code, int) else 0 + except BaseException: + traceback.print_exc() + returncode = 1 + finally: + # Collect outputs and restore streams + stdout = sys.stdout.getvalue() + stderr = sys.stderr.getvalue() + sys.stdout = old_stdout + sys.stderr = old_stderr + try: # noqa: SIM105 + self.result_queue.put((returncode, stdout, stderr)) + except Exception: # nosec B110 + # If the queue is broken (e.g., parent gone), best effort logging + pass + + +def run_python_code_safely(python_code, *, timeout=None): + """Run Python code in a spawned subprocess, capturing stdout/stderr/output.""" + ctx = multiprocessing.get_context("spawn") + result_queue = ctx.Queue() + process = ctx.Process(target=Worker(python_code, result_queue)) + process.start() + + try: + process.join(timeout) + if process.is_alive(): + process.terminate() + process.join() + return subprocess.CompletedProcess( + args=[sys.executable, "-c", python_code], + returncode=-9, + stdout="", + stderr=f"Process timed out after {timeout} seconds and was terminated.", + ) + + try: + returncode, stdout, stderr = result_queue.get(timeout=1.0) + except (queue.Empty, EOFError): + return subprocess.CompletedProcess( + args=[sys.executable, "-c", python_code], + returncode=-999, + stdout="", + stderr="Process exited or crashed before returning results.", + ) + + return subprocess.CompletedProcess( + args=[sys.executable, "-c", python_code], + returncode=returncode, + stdout=stdout, + stderr=stderr, + ) + + finally: + try: + result_queue.close() + result_queue.join_thread() + except Exception: # nosec B110 + pass + if process.is_alive(): + process.kill() + process.join() diff --git a/cuda_bindings/tests/test_path_finder_find_load.py b/cuda_bindings/tests/test_path_finder_load.py similarity index 78% rename from cuda_bindings/tests/test_path_finder_find_load.py rename to cuda_bindings/tests/test_path_finder_load.py index 2a5f887fd..5c21e8a05 100644 --- a/cuda_bindings/tests/test_path_finder_find_load.py +++ b/cuda_bindings/tests/test_path_finder_load.py @@ -2,10 +2,10 @@ # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE import os -import subprocess # nosec B404 import sys import pytest +from run_python_code_safely import run_python_code_safely from cuda.bindings import path_finder from cuda.bindings._path_finder import supported_libs @@ -38,7 +38,7 @@ def test_all_libnames_expected_lib_symbols_consistency(): assert tuple(sorted(ALL_LIBNAMES)) == tuple(sorted(supported_libs.EXPECTED_LIB_SYMBOLS.keys())) -def _build_subprocess_failed_for_libname_message(libname, result): +def build_subprocess_failed_for_libname_message(libname, result): return ( f"Subprocess failed for {libname=!r} with exit code {result.returncode}\n" f"--- stdout-from-subprocess ---\n{result.stdout}\n" @@ -46,9 +46,8 @@ def _build_subprocess_failed_for_libname_message(libname, result): ) -@pytest.mark.parametrize("api", ("find", "load")) @pytest.mark.parametrize("libname", TEST_FIND_OR_LOAD_LIBNAMES) -def test_find_or_load_nvidia_dynamic_library(info_summary_append, api, libname): +def test_find_or_load_nvidia_dynamic_library(info_summary_append, libname): # We intentionally run each dynamic library operation in a subprocess # to ensure isolation of global dynamic linking state (e.g., dlopen handles). # Without subprocesses, loading/unloading libraries during testing could @@ -56,14 +55,8 @@ def test_find_or_load_nvidia_dynamic_library(info_summary_append, api, libname): # # Defining the subprocess code snippets as strings ensures each subprocess # runs a minimal, independent script tailored to the specific libname and API being tested. - if api == "find": - code = f"""\ -from cuda.bindings._path_finder.find_nvidia_dynamic_library import find_nvidia_dynamic_library -abs_path = find_nvidia_dynamic_library({libname!r}) -print(f"{{abs_path!r}}") -""" - else: - code = f"""\ + code = f"""\ +import os from cuda.bindings.path_finder import _load_nvidia_dynamic_library from cuda.bindings._path_finder.load_nvidia_dynamic_library import _load_nvidia_dynamic_library_no_cache @@ -78,19 +71,13 @@ def test_find_or_load_nvidia_dynamic_library(info_summary_append, api, libname): loaded_dl_no_cache = _load_nvidia_dynamic_library_no_cache({libname!r}) if not loaded_dl_no_cache.was_already_loaded_from_elsewhere: raise RuntimeError("loaded_dl_no_cache.was_already_loaded_from_elsewhere") -if loaded_dl_no_cache.abs_path != loaded_dl_fresh.abs_path: - raise RuntimeError(f"{{loaded_dl_no_cache.abs_path=!r}} != {{loaded_dl_fresh.abs_path=!r}}") +if not os.path.samefile(loaded_dl_no_cache.abs_path, loaded_dl_fresh.abs_path): + raise RuntimeError(f"not os.path.samefile({{loaded_dl_no_cache.abs_path=!r}}, {{loaded_dl_fresh.abs_path=!r}})") print(f"{{loaded_dl_fresh.abs_path!r}}") """ - result = subprocess.run( # nosec B603 - [sys.executable, "-c", code], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - encoding="utf-8", - timeout=30, # Ensure CI testing does not hang for an excessive amount of time. - ) + result = run_python_code_safely(code, timeout=30) if result.returncode == 0: info_summary_append(f"abs_path={result.stdout.rstrip()}") else: - raise RuntimeError(_build_subprocess_failed_for_libname_message(libname, result)) + raise RuntimeError(build_subprocess_failed_for_libname_message(libname, result)) diff --git a/toolshed/run_cuda_bindings_path_finder.py b/toolshed/run_cuda_bindings_path_finder.py index 19f43c288..ca2193a81 100644 --- a/toolshed/run_cuda_bindings_path_finder.py +++ b/toolshed/run_cuda_bindings_path_finder.py @@ -6,7 +6,7 @@ import traceback from cuda.bindings import path_finder -from cuda.bindings._path_finder import cuda_paths, supported_libs +from cuda.bindings._path_finder import supported_libs ALL_LIBNAMES = ( path_finder._SUPPORTED_LIBNAMES + supported_libs.PARTIALLY_SUPPORTED_LIBNAMES @@ -14,14 +14,12 @@ def run(args): - assert len(args) == 0 + if args: + libnames = args + else: + libnames = ALL_LIBNAMES - paths = cuda_paths.get_cuda_paths() - for k, v in paths.items(): - print(f"{k}: {v}", flush=True) - print() - - for libname in ALL_LIBNAMES: + for libname in libnames: print(f"{libname=}") try: loaded_dl = path_finder._load_nvidia_dynamic_library(libname)