Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Kmcuda for windows bugfixes #62

Open
wants to merge 24 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
6ad81ee
added Visual Studio solution that become a basis for next changes
pavlexander Dec 6, 2018
4a5d640
rearrenged files to correct projects
pavlexander Dec 6, 2018
2f79ba5
solution reorganization
pavlexander Dec 6, 2018
0feeb60
solution reorganization
pavlexander Dec 6, 2018
229b494
Revert "solution reorganization"
pavlexander Dec 6, 2018
a63db99
Revert "solution reorganization"
pavlexander Dec 6, 2018
ace13d7
Revert "rearrenged files to correct projects"
pavlexander Dec 6, 2018
ff01ede
Revert "added Visual Studio solution that become a basis for next cha…
pavlexander Dec 6, 2018
3f94191
added solution to support further changes
pavlexander Dec 6, 2018
88f8beb
added fix for blockIdx being an invalid identifier
pavlexander Dec 6, 2018
8adc482
removed " __attribute__((unused))" as it is not being supported on Win
pavlexander Dec 6, 2018
f2d24c5
adde fix for ternary operator, which is not being compiled on Win
pavlexander Dec 6, 2018
b2eca87
applied a fix for Pi math function
pavlexander Dec 6, 2018
4675d43
fix for "min" not being a part of std
pavlexander Dec 6, 2018
2cab6ba
added a fix (temporary?), for cases when CUDA_ARCH variable is not be…
pavlexander Dec 6, 2018
024b630
remove obsolete files
pavlexander Dec 6, 2018
1d3ec12
applied VisualStudio solution related changes
pavlexander Dec 6, 2018
d4525d6
undone chnages to files, which are irrelevant to kmcuda patch for win…
pavlexander Dec 6, 2018
6235c4b
added support for building solution on Windows
pavlexander Dec 10, 2018
ffc0ce8
Merge branch 'cuda-windows-bugfixes-release' of https://github.com/pa…
pavlexander Dec 10, 2018
4f6deca
added support for building solution on Windows
Dec 10, 2018
4ea50fe
Merge branch 'cuda-windows-bugfixes-release' of https://github.com/pa…
Dec 10, 2018
ff707ab
added support for building solution on Windows (3)
Dec 10, 2018
681e0fc
added support for building solution on Windows (4)
pavlexander Dec 10, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 14 additions & 10 deletions src/kmcuda.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ static KMCUDAResult check_kmeans_args(
if (yinyang_t < 0 || yinyang_t > 0.5) {
return kmcudaInvalidArguments;
}
#if CUDA_ARCH < 60
#if defined(__CUDA_ARCH__) && CUDA_ARCH < 60
if (fp16x2) {
INFO("CUDA device arch %d does not support fp16\n", CUDA_ARCH);
return kmcudaInvalidArguments;
Expand Down Expand Up @@ -83,14 +83,18 @@ static std::vector<int> setup_devices(uint32_t device, int device_ptrs, int verb
dev, cudaGetErrorString(err));
devs.pop_back();
}
if (props.major != (CUDA_ARCH / 10) || props.minor != (CUDA_ARCH % 10)) {
INFO("compute capability mismatch for device %d: wanted %d.%d, have "
"%d.%d\n>>>> you may want to build kmcuda with -DCUDA_ARCH=%d "
"(refer to \"Building\" in README.md)\n",
dev, CUDA_ARCH / 10, CUDA_ARCH % 10, props.major, props.minor,
props.major * 10 + props.minor);
devs.pop_back();
}

#if defined(__CUDA_ARCH__)
if (props.major != (CUDA_ARCH / 10) || props.minor != (CUDA_ARCH % 10)) {
INFO("compute capability mismatch for device %d: wanted %d.%d, have "
"%d.%d\n>>>> you may want to build kmcuda with -DCUDA_ARCH=%d "
"(refer to \"Building\" in README.md)\n",
dev, CUDA_ARCH / 10, CUDA_ARCH % 10, props.major, props.minor,
props.major * 10 + props.minor);
devs.pop_back();
}
#endif

}
device >>= 1;
}
Expand Down Expand Up @@ -559,7 +563,7 @@ static KMCUDAResult check_knn_args(
neighbors == nullptr) {
return kmcudaInvalidArguments;
}
#if CUDA_ARCH < 60
#if defined(__CUDA_ARCH__) && CUDA_ARCH < 60
if (fp16x2) {
INFO("CUDA device arch %d does not support fp16\n", CUDA_ARCH);
return kmcudaInvalidArguments;
Expand Down
2 changes: 2 additions & 0 deletions src/kmcuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@

#include <stdint.h>

#include<device_launch_parameters.h>

/// All possible error codes in public API.
typedef enum {
/// Everything's all right.
Expand Down
1 change: 1 addition & 0 deletions src/knn.cu
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <cfloat>
#include <algorithm>

#include "private.h"
#include "metric_abstraction.h"
Expand Down
9 changes: 6 additions & 3 deletions src/metric_abstraction.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
// distance and normalization functions.
//

#define _USE_MATH_DEFINES
#include <math.h>

#ifndef KMCUDA_METRIC_ABSTRACTION_H
#define KMCUDA_METRIC_ABSTRACTION_H

Expand Down Expand Up @@ -169,7 +172,7 @@ struct METRIC<kmcudaDistanceMetricCosine, F> {
}

FPATTR static typename HALF<F>::type distance(
F sqr1 __attribute__((unused)), F sqr2 __attribute__((unused)), F prod) {
F sqr1, F sqr2, F prod) {
float fp = _float(_fin(prod));
if (fp >= 1.f) return _half<F>(0.f);
if (fp <= -1.f) return _half<F>(M_PI);
Expand Down Expand Up @@ -252,7 +255,7 @@ struct METRIC<kmcudaDistanceMetricCosine, F> {
return acos(partial);
}

FPATTR static void normalize(uint32_t count __attribute__((unused)), float *vec) {
FPATTR static void normalize(uint32_t count, float *vec) {
// Kahan summation with inverted c
float norm = 0, corr = 0;
#pragma unroll 4
Expand All @@ -272,7 +275,7 @@ struct METRIC<kmcudaDistanceMetricCosine, F> {
}

#if CUDA_ARCH >= 60
FPATTR static void normalize(uint32_t count __attribute__((unused)), half2 *vec) {
FPATTR static void normalize(uint32_t count, half2 *vec) {
// We really have to calculate norm in 32-bit floats because the maximum
// value which 16-bit float may represent is 2^16.
float norm = 0, corr = 0;
Expand Down
5 changes: 4 additions & 1 deletion src/wrappers.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@ template <typename T>
class unique_devptr : public unique_devptr_parent<T> {
public:
explicit unique_devptr(T *ptr, bool fake = false) : unique_devptr_parent<T>(
ptr, fake? [](T*){} : [](T *p){ cudaFree(p); }) {}
ptr,
(fake == true)
? (std::function<void(T*)>)([](T*) {})
: [](T *p) { cudaFree(p); }) {}
};

/// std::vector of unique_devptr-s. Used to pass device arrays inside .cu
Expand Down