Skip to content

Commit 67bc43c

Browse files
lwfacebook-github-bot
authored andcommitted
Handle case of CUDA driver available but no GPUs (#399)
Summary: Pull Request resolved: #399 Fixes #393. Fixes pytorch/pytorch#60578. Reviewed By: beauby Differential Revision: D29363848 fbshipit-source-id: 9007cdbdbb76ec81842965a6c48dbf525f69f185
1 parent 12ec726 commit 67bc43c

File tree

1 file changed

+15
-1
lines changed

1 file changed

+15
-1
lines changed

tensorpipe/common/cuda_lib.h

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,13 @@
3333

3434
namespace tensorpipe {
3535

36+
class NoDevicesError final : public BaseError {
37+
public:
38+
std::string what() const override {
39+
return "The CUDA driver failed to init because it didn't find any device";
40+
}
41+
};
42+
3643
// Master list of all symbols we care about from libcuda.
3744

3845
#define TP_FORALL_CUDA_SYMBOLS(_) \
@@ -114,7 +121,14 @@ class CudaLib {
114121
}
115122
TP_FORALL_CUDA_SYMBOLS(TP_LOAD_SYMBOL)
116123
#undef TP_LOAD_SYMBOL
117-
TP_CUDA_DRIVER_CHECK(lib, lib.init(0));
124+
CUresult result = lib.init(0);
125+
// If the driver doesn't find any devices it fails to init (beats me why)
126+
// but we must support this case, by disabling the channels, rather than
127+
// throwing. Hence we treat it as if we couldn't find the driver.
128+
if (result == CUDA_ERROR_NO_DEVICE) {
129+
return std::make_tuple(TP_CREATE_ERROR(NoDevicesError), CudaLib());
130+
}
131+
TP_CUDA_DRIVER_CHECK(lib, result);
118132
return std::make_tuple(Error::kSuccess, std::move(lib));
119133
}
120134

0 commit comments

Comments
 (0)