Skip to content

Commit d1cfad6

Browse files
committed
[Code] Cuda backend clear && minor renaming
1 parent dcfb18e commit d1cfad6

23 files changed

+108
-646
lines changed

.gitignore

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
# IDE files
2-
/.idea
2+
/**/.idea/
33

44
# Build files
5-
/cmake-build-debug
6-
/build
5+
/**/cmake-build-debug/
6+
/**/*build*/
77

88
# Python cache
9-
/**/__pycache__/
9+
/**/__pycache__/

cubool/CMakeLists.txt

+13-22
Original file line numberDiff line numberDiff line change
@@ -69,19 +69,19 @@ if (CUBOOL_WITH_CUDA)
6969
set(CUBOOL_CUDA_SOURCES
7070
sources/cuda/cuda_backend.hpp
7171
sources/cuda/cuda_backend.cu
72-
sources/cuda/instance.hpp
73-
sources/cuda/instance.cu
74-
sources/cuda/instance.cpp
75-
sources/cuda/matrix_csr.hpp
76-
sources/cuda/matrix_csr.cu
77-
sources/cuda/matrix_csr_build.cu
78-
sources/cuda/matrix_csr_extract.cu
79-
sources/cuda/matrix_csr_ewiseadd.cu
80-
sources/cuda/matrix_csr_kronecker.cu
81-
sources/cuda/matrix_csr_multiply.cu
82-
sources/cuda/matrix_csr_transpose.cu
83-
sources/cuda/matrix_csr_reduce.cu
84-
sources/cuda/matrix_csr_extract_sub_matrix.cu
72+
sources/cuda/cuda_instance.hpp
73+
sources/cuda/cuda_instance.cu
74+
sources/cuda/cuda_instance.cpp
75+
sources/cuda/cuda_matrix.hpp
76+
sources/cuda/cuda_matrix.cu
77+
sources/cuda/cuda_matrix_build.cu
78+
sources/cuda/cuda_matrix_extract.cu
79+
sources/cuda/cuda_matrix_ewiseadd.cu
80+
sources/cuda/cuda_matrix_kronecker.cu
81+
sources/cuda/cuda_matrix_multiply.cu
82+
sources/cuda/cuda_matrix_transpose.cu
83+
sources/cuda/cuda_matrix_reduce.cu
84+
sources/cuda/cuda_matrix_extract_sub_matrix.cu
8585
sources/cuda/kernels/slow_sort.cuh
8686
sources/cuda/kernels/bin_search.cuh
8787
sources/cuda/kernels/sptranspose.cuh
@@ -146,15 +146,6 @@ if (CUBOOL_WITH_CUDA)
146146
set_target_properties(cubool PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
147147

148148
# Settings: https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/
149-
#target_compile_options(cubool PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
150-
# # todo: fix this flag later -arch=sm_30 ?
151-
# # todo: can we omit arch flag?
152-
# -gencode=arch=compute_30,code=sm_30
153-
# -gencode=arch=compute_50,code=sm_50
154-
# -gencode=arch=compute_52,code=sm_52
155-
# -gencode=arch=compute_60,code=sm_60
156-
# -gencode=arch=compute_61,code=sm_61
157-
# -gencode=arch=compute_61,code=compute_61>)
158149

159150
target_compile_options(cubool PRIVATE $<$<COMPILE_LANGUAGE:CUDA>: -use_fast_math -Xptxas -O2>)
160151

cubool/sources/cuda/cuda_backend.cu

+6-6
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,15 @@
2323
/**********************************************************************************/
2424

2525
#include <cuda/cuda_backend.hpp>
26-
#include <cuda/matrix_csr.hpp>
26+
#include <cuda/cuda_matrix.hpp>
2727
#include <core/library.hpp>
2828
#include <io/logger.hpp>
2929

3030
namespace cubool {
3131

3232
void CudaBackend::initialize(hints initHints) {
33-
if (Instance::isCudaDeviceSupported()) {
34-
mInstance = new Instance(initHints & CUBOOL_HINT_GPU_MEM_MANAGED);
33+
if (CudaInstance::isCudaDeviceSupported()) {
34+
mInstance = new CudaInstance(initHints & CUBOOL_HINT_GPU_MEM_MANAGED);
3535
}
3636

3737
// No device. Cannot init this backend
@@ -58,7 +58,7 @@ namespace cubool {
5858

5959
MatrixBase *CudaBackend::createMatrix(size_t nrows, size_t ncols) {
6060
mMatCount++;
61-
return new MatrixCsr(nrows, ncols, getInstance());
61+
return new CudaMatrix(nrows, ncols, getInstance());
6262
}
6363

6464
void CudaBackend::releaseMatrix(MatrixBase *matrixBase) {
@@ -67,10 +67,10 @@ namespace cubool {
6767
}
6868

6969
void CudaBackend::queryCapabilities(cuBool_DeviceCaps &caps) {
70-
Instance::queryDeviceCapabilities(caps);
70+
CudaInstance::queryDeviceCapabilities(caps);
7171
}
7272

73-
Instance & CudaBackend::getInstance() {
73+
CudaInstance & CudaBackend::getInstance() {
7474
return *mInstance;
7575
}
7676

cubool/sources/cuda/cuda_backend.hpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
#define CUBOOL_CUDA_BACKEND_HPP
2727

2828
#include <backend/backend_base.hpp>
29-
#include <cuda/instance.hpp>
29+
#include <cuda/cuda_instance.hpp>
3030

3131
namespace cubool {
3232

@@ -45,10 +45,10 @@ namespace cubool {
4545
void releaseMatrix(MatrixBase *matrixBase) override;
4646
void queryCapabilities(cuBool_DeviceCaps& caps) override;
4747

48-
Instance& getInstance();
48+
CudaInstance& getInstance();
4949

5050
private:
51-
Instance* mInstance;
51+
CudaInstance* mInstance;
5252
size_t mMatCount = 0;
5353
};
5454

cubool/sources/cuda/instance.cpp cubool/sources/cuda/cuda_instance.cpp

+10-10
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,16 @@
2222
/* SOFTWARE. */
2323
/**********************************************************************************/
2424

25-
#include <cuda/instance.hpp>
25+
#include <cuda/cuda_instance.hpp>
2626
#include <core/error.hpp>
2727
#include <string>
2828
#include <cstdlib>
2929

3030
namespace cubool {
3131

32-
volatile Instance* Instance::gInstance = nullptr;
32+
volatile CudaInstance* CudaInstance::gInstance = nullptr;
3333

34-
Instance::Instance(bool useManagedMemory) {
34+
CudaInstance::CudaInstance(bool useManagedMemory) {
3535
gInstance = this;
3636
mMemoryType = useManagedMemory? Managed: Default;
3737

@@ -41,28 +41,28 @@ namespace cubool {
4141
#endif // CUBOOL_DEBUG
4242
}
4343

44-
void Instance::allocate(void* &ptr, size_t size) const {
44+
void CudaInstance::allocate(void* &ptr, size_t size) const {
4545
ptr = malloc(size);
4646
CHECK_RAISE_ERROR(ptr != nullptr, MemOpFailed, "Failed to allocate memory on the CPU");
4747
mHostAllocCount++;
4848
}
4949

50-
void Instance::deallocate(void* ptr) const {
50+
void CudaInstance::deallocate(void* ptr) const {
5151
CHECK_RAISE_ERROR(ptr != nullptr, InvalidArgument, "Passed null ptr to free");
5252
free(ptr);
5353
mHostAllocCount--;
5454
}
5555

56-
Instance& Instance::getInstanceRef() {
56+
CudaInstance& CudaInstance::getInstanceRef() {
5757
CHECK_RAISE_ERROR(gInstance != nullptr, InvalidState, "No instance in the system");
58-
return (Instance&) *gInstance;
58+
return (CudaInstance&) *gInstance;
5959
}
6060

61-
Instance* Instance::getInstancePtr() {
62-
return (Instance* ) gInstance;
61+
CudaInstance* CudaInstance::getInstancePtr() {
62+
return (CudaInstance* ) gInstance;
6363
}
6464

65-
bool Instance::isInstancePresent() {
65+
bool CudaInstance::isInstancePresent() {
6666
return gInstance != nullptr;
6767
}
6868

cubool/sources/cuda/instance.cu cubool/sources/cuda/cuda_instance.cu

+8-8
Original file line numberDiff line numberDiff line change
@@ -22,22 +22,22 @@
2222
/* SOFTWARE. */
2323
/**********************************************************************************/
2424

25-
#include <cuda/instance.hpp>
26-
#include <cuda/matrix_dense.hpp>
25+
#include <cuda/cuda_instance.hpp>
2726
#include <core/error.hpp>
2827
#include <string>
28+
#include <cassert>
2929
#include <cstring>
3030

3131
namespace cubool {
3232

33-
Instance::~Instance() {
33+
CudaInstance::~CudaInstance() {
3434
assert(mHostAllocCount == 0);
3535
assert(mDeviceAllocCount == 0);
3636

3737
gInstance = nullptr;
3838
}
3939

40-
void Instance::allocateOnGpu(void* &ptr, size_t size) const {
40+
void CudaInstance::allocateOnGpu(void* &ptr, size_t size) const {
4141
cudaError error;
4242

4343
switch (mMemoryType) {
@@ -59,7 +59,7 @@ namespace cubool {
5959
mDeviceAllocCount++;
6060
}
6161

62-
void Instance::deallocateOnGpu(void* ptr) const {
62+
void CudaInstance::deallocateOnGpu(void* ptr) const {
6363
cudaError error = cudaFree(ptr);
6464

6565
if (error != cudaSuccess) {
@@ -70,7 +70,7 @@ namespace cubool {
7070
mDeviceAllocCount--;
7171
}
7272

73-
void Instance::syncHostDevice() const {
73+
void CudaInstance::syncHostDevice() const {
7474
cudaError error = cudaDeviceSynchronize();
7575

7676
if (error != cudaSuccess) {
@@ -79,13 +79,13 @@ namespace cubool {
7979
}
8080
}
8181

82-
bool Instance::isCudaDeviceSupported() {
82+
bool CudaInstance::isCudaDeviceSupported() {
8383
int device;
8484
cudaError error = cudaGetDevice(&device);
8585
return error == cudaSuccess;
8686
}
8787

88-
void Instance::queryDeviceCapabilities(cuBool_DeviceCaps &deviceCaps) {
88+
void CudaInstance::queryDeviceCapabilities(cuBool_DeviceCaps &deviceCaps) {
8989
const unsigned long long KiB = 1024;
9090

9191
int device;

cubool/sources/cuda/instance.hpp cubool/sources/cuda/cuda_instance.hpp

+11-11
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@
2222
/* SOFTWARE. */
2323
/**********************************************************************************/
2424

25-
#ifndef CUBOOL_INSTANCE_HPP
26-
#define CUBOOL_INSTANCE_HPP
25+
#ifndef CUBOOL_CUDA_INSTANCE_HPP
26+
#define CUBOOL_CUDA_INSTANCE_HPP
2727

2828
#include <core/config.hpp>
2929
#include <unordered_set>
@@ -33,17 +33,17 @@ namespace cubool {
3333
/**
3434
* Manages global state for various internal operations.
3535
*/
36-
class Instance {
36+
class CudaInstance {
3737
public:
3838
enum MemType {
3939
Default,
4040
Managed
4141
};
4242

43-
explicit Instance(bool useManagedMemory);
44-
Instance(const Instance& other) = delete;
45-
Instance(Instance&& other) noexcept = delete;
46-
~Instance();
43+
explicit CudaInstance(bool useManagedMemory);
44+
CudaInstance(const CudaInstance& other) = delete;
45+
CudaInstance(CudaInstance&& other) noexcept = delete;
46+
~CudaInstance();
4747

4848
// For custom host & device allocators
4949
void allocate(void* &ptr, size_t s) const;
@@ -55,18 +55,18 @@ namespace cubool {
5555

5656
static bool isCudaDeviceSupported();
5757
static void queryDeviceCapabilities(cuBool_DeviceCaps& deviceCaps);
58-
static Instance& getInstanceRef();
59-
static Instance* getInstancePtr();
58+
static CudaInstance& getInstanceRef();
59+
static CudaInstance* getInstancePtr();
6060
static bool isInstancePresent();
6161

6262
private:
6363
MemType mMemoryType = Default;
6464
mutable size_t mHostAllocCount = 0;
6565
mutable size_t mDeviceAllocCount = 0;
6666

67-
static volatile Instance* gInstance;
67+
static volatile CudaInstance* gInstance;
6868
};
6969

7070
}
7171

72-
#endif //CUBOOL_INSTANCE_HPP
72+
#endif //CUBOOL_CUDA_INSTANCE_HPP

cubool/sources/cuda/matrix_csr.cu cubool/sources/cuda/cuda_matrix.cu

+14-14
Original file line numberDiff line numberDiff line change
@@ -22,24 +22,24 @@
2222
/* SOFTWARE. */
2323
/**********************************************************************************/
2424

25-
#include <cuda/matrix_csr.hpp>
25+
#include <cuda/cuda_matrix.hpp>
2626
#include <core/error.hpp>
2727
#include <utils/timer.hpp>
2828
#include <algorithm>
2929

3030
namespace cubool {
3131

32-
MatrixCsr::MatrixCsr(size_t nrows, size_t ncols, Instance &instance) : mInstance(instance) {
32+
CudaMatrix::CudaMatrix(size_t nrows, size_t ncols, CudaInstance &instance) : mInstance(instance) {
3333
mNrows = nrows;
3434
mNcols = ncols;
3535
}
3636

37-
void MatrixCsr::setElement(index i, index j) {
37+
void CudaMatrix::setElement(index i, index j) {
3838
RAISE_ERROR(NotImplemented, "This function is not supported for this matrix class");
3939
}
4040

41-
void MatrixCsr::clone(const MatrixBase &otherBase) {
42-
auto other = dynamic_cast<const MatrixCsr*>(&otherBase);
41+
void CudaMatrix::clone(const MatrixBase &otherBase) {
42+
auto other = dynamic_cast<const CudaMatrix*>(&otherBase);
4343

4444
CHECK_RAISE_ERROR(other != nullptr, InvalidArgument, "Passed matrix does not belong to csr matrix class");
4545
CHECK_RAISE_ERROR(other != this, InvalidArgument, "Matrices must differ");
@@ -58,14 +58,14 @@ namespace cubool {
5858
this->mMatrixImpl = other->mMatrixImpl;
5959
}
6060

61-
void MatrixCsr::resizeStorageToDim() const {
61+
void CudaMatrix::resizeStorageToDim() const {
6262
if (mMatrixImpl.is_zero_dim()) {
6363
// If actual storage was not allocated, allocate one for an empty matrix
6464
mMatrixImpl = std::move(MatrixImplType(mNrows, mNcols));
6565
}
6666
}
6767

68-
void MatrixCsr::clearAndResizeStorageToDim() const {
68+
void CudaMatrix::clearAndResizeStorageToDim() const {
6969
if (mMatrixImpl.m_vals > 0) {
7070
// Release only if have some nnz values
7171
mMatrixImpl.zero_dim();
@@ -75,27 +75,27 @@ namespace cubool {
7575
this->resizeStorageToDim();
7676
}
7777

78-
index MatrixCsr::getNrows() const {
78+
index CudaMatrix::getNrows() const {
7979
return mNrows;
8080
}
8181

82-
index MatrixCsr::getNcols() const {
82+
index CudaMatrix::getNcols() const {
8383
return mNcols;
8484
}
8585

86-
index MatrixCsr::getNvals() const {
86+
index CudaMatrix::getNvals() const {
8787
return mMatrixImpl.m_vals;
8888
}
8989

90-
bool MatrixCsr::isStorageEmpty() const {
90+
bool CudaMatrix::isStorageEmpty() const {
9191
return mMatrixImpl.is_zero_dim();
9292
}
9393

94-
bool MatrixCsr::isMatrixEmpty() const {
94+
bool CudaMatrix::isMatrixEmpty() const {
9595
return mMatrixImpl.m_vals == 0;
9696
}
9797

98-
void MatrixCsr::transferToDevice(const std::vector<index> &rowOffsets, const std::vector<index> &colIndices) const {
98+
void CudaMatrix::transferToDevice(const std::vector<index> &rowOffsets, const std::vector<index> &colIndices) const {
9999
// Create device buffers and copy data from the cpu side
100100
thrust::device_vector<index, DeviceAlloc<index>> rowsDeviceVec(rowOffsets.size());
101101
thrust::device_vector<index, DeviceAlloc<index>> colsDeviceVec(colIndices.size());
@@ -107,7 +107,7 @@ namespace cubool {
107107
mMatrixImpl = std::move(MatrixImplType(std::move(colsDeviceVec), std::move(rowsDeviceVec), getNrows(), getNcols(), colIndices.size()));
108108
}
109109

110-
void MatrixCsr::transferFromDevice(std::vector<index> &rowOffsets, std::vector<index> &colIndices) const {
110+
void CudaMatrix::transferFromDevice(std::vector<index> &rowOffsets, std::vector<index> &colIndices) const {
111111
rowOffsets.resize(mMatrixImpl.m_row_index.size());
112112
colIndices.resize(mMatrixImpl.m_col_index.size());
113113

0 commit comments

Comments
 (0)