Skip to content

Commit

Permalink
fix windows build problem
Browse files Browse the repository at this point in the history
  • Loading branch information
FindDefinition committed Sep 24, 2022
1 parent 19a599e commit 77f1cf0
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 20 deletions.
2 changes: 2 additions & 0 deletions spconv/algo.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ def cached_get_nvrtc_params(self, desp: GemmAlgoDesp, arch: Tuple[int, int], str
if key in self._nvrtc_caches:
return self._nvrtc_caches[key]
mod, ker = self._compile_nvrtc_module(desp)
print(f"Can't find algo {desp} in prebuilt. compile with nvrtc...")
nvrtc_params = _get_nvrtc_params(mod, ker, "gemm_kernel")
self._nvrtc_caches[key] = nvrtc_params
return nvrtc_params
Expand Down Expand Up @@ -288,6 +289,7 @@ def _cached_get_nvrtc_params(self, desp: GemmAlgoDesp, arch: Tuple[int,
if key in self._nvrtc_caches:
return self._nvrtc_caches[key]
mod, ker = self._compile_nvrtc_module(desp)
print(f"Can't find algo {desp} in prebuilt. compile with nvrtc...")
nvrtc_params = _get_nvrtc_params(mod, ker, "gemm_kernel")
self._nvrtc_caches[key] = nvrtc_params
return nvrtc_params
Expand Down
5 changes: 3 additions & 2 deletions spconv/csrc/sparse/all.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ def __init__(self):
defines.append(f"#define SPCONV_ALLOC_{to_snake_case(name).upper()} {pccm.literal(v)}")
define_str = "\n".join(defines)
self.add_global_code(define_str)
self.build_meta.add_global_cflags("cl", "/DNOMINMAX")
# for name in dir(AllocKeys):
# if not name.startswith("__"):
# v = getattr(AllocKeys, name)
Expand Down Expand Up @@ -1580,10 +1581,10 @@ def get_indice_gen_tensors_from_workspace(self):
}}
if (!subm){{
size_t pair_single_size = kv * int64_t(num_act_in);
auto ten = tv::from_blob(workspace, {{pair_single_size + 1}}, use_int64_hash_k ? tv::int64 : tv::int32, 0);
auto ten = tv::from_blob(workspace, {{int64_t(pair_single_size + 1)}}, use_int64_hash_k ? tv::int64 : tv::int32, 0);
res.insert({{{pccm.literal(AllocKeys.IndicePairsUniq)}, ten}});
workspace += ten.nbytes();
auto ten2 = tv::from_blob(workspace, {{pair_single_size + 1}}, use_int64_hash_k ? tv::int64 : tv::int32, 0);
auto ten2 = tv::from_blob(workspace, {{int64_t(pair_single_size + 1)}}, use_int64_hash_k ? tv::int64 : tv::int32, 0);
res.insert({{{pccm.literal(AllocKeys.IndicePairsUniqBackup)}, ten2}});
workspace += ten2.nbytes();
}}
Expand Down
2 changes: 1 addition & 1 deletion spconv/csrc/sparse/alloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ def deallocate(self):
code.arg("ptr", "char *")
code.arg("num_bytes", "size_t")
code.raw(f"""
return allocator_.free_noexcept(tv::from_blob(ptr, {{num_bytes}}, tv::uint8, 0));
return allocator_.free_noexcept(tv::from_blob(ptr, {{int64_t(num_bytes)}}, tv::uint8, 0));
""")
return code

Expand Down
17 changes: 8 additions & 9 deletions spconv/csrc/utils/pcc.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ def encode_with_order(self):
auto point_stride = points.stride(0);
int64_t final_size = sizeof(int64_t) * 5 + sizeof(float) * 3;
tv::Tensor res;
tv::ssprint(1);
tv::dispatch<float, double>(points.dtype(), [&](auto IP){{
using TPoint = TV_DECLTYPE(IP);
Expand All @@ -88,13 +87,13 @@ def encode_with_order(self):
auto pos_int = op::apply(floorf, pos_unit_voxel).cast<int32_t>();
auto pos_enc = (point / errors - pos_int.cast<float>() * float(256)).cast<uint8_t>();
tv::array<uint8_t, kEncodeDim> enc;
tv::if_constexpr<(kEncodeDim > 3)>([&](auto _){{
enc[0] = pos_enc[0];
enc[1] = pos_enc[1];
enc[2] = pos_enc[2];
if (kEncodeDim > 3){{
TInten inten = intensity_data[0];
enc = _(tv::array<uint8_t, kEncodeDim>{{pos_enc[0], pos_enc[1], pos_enc[2], uint8_t(inten)}});
intensity_data += inten_stride;
}}, [&](auto _){{
enc = _(tv::array<uint8_t, kEncodeDim>{{pos_enc[0], pos_enc[1], pos_enc[2]}});
}});
enc[3] = uint8_t(inten);
}}
auto pos_uint = pos_int + hash_t::direct_hash_offset();
uint64_t scalar = hash_t::encode(pos_int[0], pos_int[1], pos_int[2]);
auto iter = hash.find(scalar);
Expand Down Expand Up @@ -225,7 +224,7 @@ def decode(self):
error[2] = error_header[2];
res_ptr += sizeof(float) * 3;
tv::Tensor points;
tv::dispatch_int<static_cast<int>(EncodeType::XYZI_8), static_cast<int>(EncodeType::XYZ_8)>(static_cast<int>(type), [&](auto I){{
tv::dispatch_int<static_cast<int>(EncodeType::XYZI_8), static_cast<int>(EncodeType::XYZ_8)>(static_cast<int>(type), [&, error](auto I){{
constexpr int kTypeInt = TV_DECLTYPE(I)::value;
constexpr int kEncodeDim = kTypeInt == static_cast<int>(EncodeType::XYZI_8) ? 4 : 3;
points = tv::empty({{N, kEncodeDim}}, tv::float32);
Expand All @@ -241,7 +240,7 @@ def decode(self):
auto point_cur_ptr = points_ptr;
for (int j = 0; j < cluster_size; ++j){{
auto& enc = enc_ptr[j];
auto point = op::slice<0, 3>(enc).template cast<float>() * error + offset;
tv::array<float, 3> point = op::slice<0, 3>(enc).template cast<float>() * error + offset;
point_cur_ptr[0] = point[0];
point_cur_ptr[1] = point[1];
point_cur_ptr[2] = point[2];
Expand Down
23 changes: 15 additions & 8 deletions test/test_all_algo.py
Original file line number Diff line number Diff line change
Expand Up @@ -640,6 +640,7 @@ def _test_native_conv_cuda(subm: bool):

arch = torch.cuda.get_device_capability()
stream = get_current_stream()
force_nvrtc = False
for shape, bs, C, K, k, s, p, d, dtype in tqdm.tqdm(params_grid(
shapes, batchsizes, in_channels, out_channels, ksizes,
strides, paddings, dilations, dtypes)):
Expand Down Expand Up @@ -718,7 +719,8 @@ def _test_native_conv_cuda(subm: bool):
c_inds=out_indices,
hint=AlgoHint.Fowrard.value,
alpha=1.0,
beta=beta)
beta=beta,
force_nvrtc=force_nvrtc)
else:
GEMM.run_with_tuned_result(
BestAlgoByProfile(desp, tester.arch, 1),
Expand All @@ -735,7 +737,8 @@ def _test_native_conv_cuda(subm: bool):
c_inds=out_indices,
hint=AlgoHint.Fowrard.value,
alpha=1.0,
beta=beta)
beta=beta,
force_nvrtc=force_nvrtc)
inited = True
if bias is not None and tester.check_act:
InferenceOps.bias_add_act_inplace(output_tv, bias, tv.gemm.Activation.ReLU, 0, 0)
Expand Down Expand Up @@ -801,7 +804,8 @@ def _test_native_conv_cuda(subm: bool):
c_inds=inp_indices,
hint=AlgoHint.Fowrard.value,
alpha=1.0,
beta=beta)
beta=beta,
force_nvrtc=force_nvrtc)
else:
GEMM.run_with_tuned_result(
BestAlgoByProfile(desp, tester.arch, 1),
Expand All @@ -818,7 +822,8 @@ def _test_native_conv_cuda(subm: bool):
c_inds=inp_indices,
hint=AlgoHint.Fowrard.value,
alpha=1.0,
beta=beta)
beta=beta,
force_nvrtc=force_nvrtc)

inited = True
din_my = inp_tv.cpu().numpy()
Expand Down Expand Up @@ -879,7 +884,8 @@ def _test_native_conv_cuda(subm: bool):
c_inds=tv.Tensor(),
hint=AlgoHint.BackwardWeight.value,
alpha=1.0,
beta=beta)
beta=beta,
force_nvrtc=force_nvrtc)

else:
GEMM.run_with_tuned_result(BestAlgoByProfile(desp, tester.arch, 32),
Expand All @@ -896,7 +902,8 @@ def _test_native_conv_cuda(subm: bool):
b_inds=b_inds,
hint=AlgoHint.BackwardWeight.value,
alpha=1.0,
beta=beta)
beta=beta,
force_nvrtc=force_nvrtc)

dw_my = weight_tv.cpu().numpy()
if dtype != np.float16:
Expand All @@ -909,8 +916,8 @@ def _test_native_conv_cuda(subm: bool):

def test_all_algo_unit():
# for i in range(5):
_test_impgemm_conv_cuda(True)
_test_impgemm_conv_cuda(False)
# _test_impgemm_conv_cuda(True)
# _test_impgemm_conv_cuda(False)
_test_native_conv_cuda(True)
_test_native_conv_cuda(False)

Expand Down

0 comments on commit 77f1cf0

Please sign in to comment.