diff --git a/src/kmcuda.cc b/src/kmcuda.cc index df8cbbe..46b32b2 100644 --- a/src/kmcuda.cc +++ b/src/kmcuda.cc @@ -51,7 +51,7 @@ static KMCUDAResult check_kmeans_args( if (yinyang_t < 0 || yinyang_t > 0.5) { return kmcudaInvalidArguments; } -#if CUDA_ARCH < 60 +#if defined(__CUDA_ARCH__) && CUDA_ARCH < 60 if (fp16x2) { INFO("CUDA device arch %d does not support fp16\n", CUDA_ARCH); return kmcudaInvalidArguments; @@ -83,14 +83,18 @@ static std::vector setup_devices(uint32_t device, int device_ptrs, int verb dev, cudaGetErrorString(err)); devs.pop_back(); } - if (props.major != (CUDA_ARCH / 10) || props.minor != (CUDA_ARCH % 10)) { - INFO("compute capability mismatch for device %d: wanted %d.%d, have " - "%d.%d\n>>>> you may want to build kmcuda with -DCUDA_ARCH=%d " - "(refer to \"Building\" in README.md)\n", - dev, CUDA_ARCH / 10, CUDA_ARCH % 10, props.major, props.minor, - props.major * 10 + props.minor); - devs.pop_back(); - } + +#if defined(__CUDA_ARCH__) + if (props.major != (CUDA_ARCH / 10) || props.minor != (CUDA_ARCH % 10)) { + INFO("compute capability mismatch for device %d: wanted %d.%d, have " + "%d.%d\n>>>> you may want to build kmcuda with -DCUDA_ARCH=%d " + "(refer to \"Building\" in README.md)\n", + dev, CUDA_ARCH / 10, CUDA_ARCH % 10, props.major, props.minor, + props.major * 10 + props.minor); + devs.pop_back(); + } +#endif + } device >>= 1; } @@ -559,7 +563,7 @@ static KMCUDAResult check_knn_args( neighbors == nullptr) { return kmcudaInvalidArguments; } -#if CUDA_ARCH < 60 +#if defined(__CUDA_ARCH__) && CUDA_ARCH < 60 if (fp16x2) { INFO("CUDA device arch %d does not support fp16\n", CUDA_ARCH); return kmcudaInvalidArguments; diff --git a/src/kmcuda.h b/src/kmcuda.h index 8b013b9..72f2bf2 100644 --- a/src/kmcuda.h +++ b/src/kmcuda.h @@ -37,6 +37,8 @@ #include +#include + /// All possible error codes in public API. typedef enum { /// Everything's all right. diff --git a/src/knn.cu b/src/knn.cu index a25c7b1..c26b1da 100644 --- a/src/knn.cu +++ b/src/knn.cu @@ -1,4 +1,5 @@ #include +#include #include "private.h" #include "metric_abstraction.h" diff --git a/src/metric_abstraction.h b/src/metric_abstraction.h index 38ea3e8..7668a95 100644 --- a/src/metric_abstraction.h +++ b/src/metric_abstraction.h @@ -2,6 +2,9 @@ // distance and normalization functions. // +#define _USE_MATH_DEFINES +#include + #ifndef KMCUDA_METRIC_ABSTRACTION_H #define KMCUDA_METRIC_ABSTRACTION_H @@ -169,7 +172,7 @@ struct METRIC { } FPATTR static typename HALF::type distance( - F sqr1 __attribute__((unused)), F sqr2 __attribute__((unused)), F prod) { + F sqr1, F sqr2, F prod) { float fp = _float(_fin(prod)); if (fp >= 1.f) return _half(0.f); if (fp <= -1.f) return _half(M_PI); @@ -252,7 +255,7 @@ struct METRIC { return acos(partial); } - FPATTR static void normalize(uint32_t count __attribute__((unused)), float *vec) { + FPATTR static void normalize(uint32_t count, float *vec) { // Kahan summation with inverted c float norm = 0, corr = 0; #pragma unroll 4 @@ -272,7 +275,7 @@ struct METRIC { } #if CUDA_ARCH >= 60 - FPATTR static void normalize(uint32_t count __attribute__((unused)), half2 *vec) { + FPATTR static void normalize(uint32_t count, half2 *vec) { // We really have to calculate norm in 32-bit floats because the maximum // value which 16-bit float may represent is 2^16. float norm = 0, corr = 0; diff --git a/src/wrappers.h b/src/wrappers.h index cec03e1..d083160 100644 --- a/src/wrappers.h +++ b/src/wrappers.h @@ -17,7 +17,10 @@ template class unique_devptr : public unique_devptr_parent { public: explicit unique_devptr(T *ptr, bool fake = false) : unique_devptr_parent( - ptr, fake? [](T*){} : [](T *p){ cudaFree(p); }) {} + ptr, + (fake == true) + ? (std::function)([](T*) {}) + : [](T *p) { cudaFree(p); }) {} }; /// std::vector of unique_devptr-s. Used to pass device arrays inside .cu