fix windows build problem

traveller59 · Sep 24, 2022 · 77f1cf0 · 77f1cf0
1 parent 19a599e
commit 77f1cf0
Show file tree

Hide file tree

Showing 5 changed files with 29 additions and 20 deletions.
diff --git a/spconv/algo.py b/spconv/algo.py
@@ -164,6 +164,7 @@ def cached_get_nvrtc_params(self, desp: GemmAlgoDesp, arch: Tuple[int, int], str
         if key in self._nvrtc_caches:
             return self._nvrtc_caches[key]
         mod, ker = self._compile_nvrtc_module(desp)
+        print(f"Can't find algo {desp} in prebuilt. compile with nvrtc...")
         nvrtc_params = _get_nvrtc_params(mod, ker, "gemm_kernel")
         self._nvrtc_caches[key] = nvrtc_params
         return nvrtc_params
@@ -288,6 +289,7 @@ def _cached_get_nvrtc_params(self, desp: GemmAlgoDesp, arch: Tuple[int,
         if key in self._nvrtc_caches:
             return self._nvrtc_caches[key]
         mod, ker = self._compile_nvrtc_module(desp)
+        print(f"Can't find algo {desp} in prebuilt. compile with nvrtc...")
         nvrtc_params = _get_nvrtc_params(mod, ker, "gemm_kernel")
         self._nvrtc_caches[key] = nvrtc_params
         return nvrtc_params

diff --git a/spconv/csrc/sparse/all.py b/spconv/csrc/sparse/all.py
@@ -126,6 +126,7 @@ def __init__(self):
                 defines.append(f"#define SPCONV_ALLOC_{to_snake_case(name).upper()} {pccm.literal(v)}")
         define_str = "\n".join(defines)
         self.add_global_code(define_str)
+        self.build_meta.add_global_cflags("cl", "/DNOMINMAX")
         # for name in dir(AllocKeys):
         #     if not name.startswith("__"):
         #         v = getattr(AllocKeys, name)
@@ -1580,10 +1581,10 @@ def get_indice_gen_tensors_from_workspace(self):
         }}
         if (!subm){{
             size_t pair_single_size = kv * int64_t(num_act_in);
-            auto ten = tv::from_blob(workspace, {{pair_single_size + 1}}, use_int64_hash_k ? tv::int64 : tv::int32, 0);
+            auto ten = tv::from_blob(workspace, {{int64_t(pair_single_size + 1)}}, use_int64_hash_k ? tv::int64 : tv::int32, 0);
             res.insert({{{pccm.literal(AllocKeys.IndicePairsUniq)}, ten}});
             workspace += ten.nbytes();
-            auto ten2 = tv::from_blob(workspace, {{pair_single_size + 1}}, use_int64_hash_k ? tv::int64 : tv::int32, 0);
+            auto ten2 = tv::from_blob(workspace, {{int64_t(pair_single_size + 1)}}, use_int64_hash_k ? tv::int64 : tv::int32, 0);
             res.insert({{{pccm.literal(AllocKeys.IndicePairsUniqBackup)}, ten2}});
             workspace += ten2.nbytes();
         }}

diff --git a/spconv/csrc/sparse/alloc.py b/spconv/csrc/sparse/alloc.py
@@ -220,7 +220,7 @@ def deallocate(self):
         code.arg("ptr", "char *")
         code.arg("num_bytes", "size_t")
         code.raw(f"""
-        return allocator_.free_noexcept(tv::from_blob(ptr, {{num_bytes}}, tv::uint8, 0));
+        return allocator_.free_noexcept(tv::from_blob(ptr, {{int64_t(num_bytes)}}, tv::uint8, 0));
         """)
         return code
 

diff --git a/spconv/csrc/utils/pcc.py b/spconv/csrc/utils/pcc.py
@@ -64,7 +64,6 @@ def encode_with_order(self):
         auto point_stride = points.stride(0);
         int64_t final_size = sizeof(int64_t) * 5 + sizeof(float) * 3;
         tv::Tensor res;
-        tv::ssprint(1);
         tv::dispatch<float, double>(points.dtype(), [&](auto IP){{
             using TPoint = TV_DECLTYPE(IP);
 
@@ -88,13 +87,13 @@ def encode_with_order(self):
                         auto pos_int = op::apply(floorf, pos_unit_voxel).cast<int32_t>();
                         auto pos_enc = (point / errors - pos_int.cast<float>() * float(256)).cast<uint8_t>();
                         tv::array<uint8_t, kEncodeDim> enc;
-                        tv::if_constexpr<(kEncodeDim > 3)>([&](auto _){{
+                        enc[0] = pos_enc[0];
+                        enc[1] = pos_enc[1];
+                        enc[2] = pos_enc[2];
+                        if (kEncodeDim > 3){{
                             TInten inten = intensity_data[0];
-                            enc = _(tv::array<uint8_t, kEncodeDim>{{pos_enc[0], pos_enc[1], pos_enc[2], uint8_t(inten)}});
-                            intensity_data += inten_stride;
-                        }}, [&](auto _){{
-                            enc = _(tv::array<uint8_t, kEncodeDim>{{pos_enc[0], pos_enc[1], pos_enc[2]}});
-                        }});
+                            enc[3] = uint8_t(inten);
+                        }}
                         auto pos_uint = pos_int + hash_t::direct_hash_offset();
                         uint64_t scalar = hash_t::encode(pos_int[0], pos_int[1], pos_int[2]);
                         auto iter = hash.find(scalar);
@@ -225,7 +224,7 @@ def decode(self):
         error[2] = error_header[2];
         res_ptr += sizeof(float) * 3;
         tv::Tensor points;
-        tv::dispatch_int<static_cast<int>(EncodeType::XYZI_8), static_cast<int>(EncodeType::XYZ_8)>(static_cast<int>(type), [&](auto I){{
+        tv::dispatch_int<static_cast<int>(EncodeType::XYZI_8), static_cast<int>(EncodeType::XYZ_8)>(static_cast<int>(type), [&, error](auto I){{
             constexpr int kTypeInt = TV_DECLTYPE(I)::value;
             constexpr int kEncodeDim = kTypeInt == static_cast<int>(EncodeType::XYZI_8) ? 4 : 3;
             points = tv::empty({{N, kEncodeDim}}, tv::float32);
@@ -241,7 +240,7 @@ def decode(self):
                 auto point_cur_ptr = points_ptr;
                 for (int j = 0; j < cluster_size; ++j){{
                     auto& enc = enc_ptr[j];
-                    auto point = op::slice<0, 3>(enc).template cast<float>() * error + offset;
+                    tv::array<float, 3> point = op::slice<0, 3>(enc).template cast<float>() * error + offset;
                     point_cur_ptr[0] = point[0];
                     point_cur_ptr[1] = point[1];
                     point_cur_ptr[2] = point[2];

diff --git a/test/test_all_algo.py b/test/test_all_algo.py
@@ -640,6 +640,7 @@ def _test_native_conv_cuda(subm: bool):
 
     arch = torch.cuda.get_device_capability()
     stream = get_current_stream()
+    force_nvrtc = False
     for shape, bs, C, K, k, s, p, d, dtype in tqdm.tqdm(params_grid(
             shapes, batchsizes, in_channels, out_channels, ksizes,
             strides, paddings, dilations, dtypes)):
@@ -718,7 +719,8 @@ def _test_native_conv_cuda(subm: bool):
                                 c_inds=out_indices,
                                 hint=AlgoHint.Fowrard.value,
                                 alpha=1.0,
-                                beta=beta)
+                                beta=beta,
+                                force_nvrtc=force_nvrtc)
                         else:
                             GEMM.run_with_tuned_result(
                                 BestAlgoByProfile(desp, tester.arch, 1),
@@ -735,7 +737,8 @@ def _test_native_conv_cuda(subm: bool):
                                 c_inds=out_indices,
                                 hint=AlgoHint.Fowrard.value,
                                 alpha=1.0,
-                                beta=beta)
+                                beta=beta,
+                                force_nvrtc=force_nvrtc)
                         inited = True
                     if bias is not None and tester.check_act:
                         InferenceOps.bias_add_act_inplace(output_tv, bias, tv.gemm.Activation.ReLU, 0, 0)
@@ -801,7 +804,8 @@ def _test_native_conv_cuda(subm: bool):
                                 c_inds=inp_indices,
                                 hint=AlgoHint.Fowrard.value,
                                 alpha=1.0,
-                                beta=beta)
+                                beta=beta,
+                                force_nvrtc=force_nvrtc)
                         else:
                             GEMM.run_with_tuned_result(
                                 BestAlgoByProfile(desp, tester.arch, 1),
@@ -818,7 +822,8 @@ def _test_native_conv_cuda(subm: bool):
                                 c_inds=inp_indices,
                                 hint=AlgoHint.Fowrard.value,
                                 alpha=1.0,
-                                beta=beta)
+                                beta=beta,
+                                force_nvrtc=force_nvrtc)
 
                         inited = True
                     din_my = inp_tv.cpu().numpy()
@@ -879,7 +884,8 @@ def _test_native_conv_cuda(subm: bool):
                                 c_inds=tv.Tensor(),
                                 hint=AlgoHint.BackwardWeight.value,
                                 alpha=1.0,
-                                beta=beta)
+                                beta=beta,
+                                force_nvrtc=force_nvrtc)
 
                         else:
                             GEMM.run_with_tuned_result(BestAlgoByProfile(desp, tester.arch, 32),
@@ -896,7 +902,8 @@ def _test_native_conv_cuda(subm: bool):
                                                     b_inds=b_inds,
                                                     hint=AlgoHint.BackwardWeight.value,
                                                     alpha=1.0,
-                                                    beta=beta)
+                                                    beta=beta,
+                                                    force_nvrtc=force_nvrtc)
 
                     dw_my = weight_tv.cpu().numpy()
                     if dtype != np.float16:
@@ -909,8 +916,8 @@ def _test_native_conv_cuda(subm: bool):
 
 def test_all_algo_unit():
     # for i in range(5):
-    _test_impgemm_conv_cuda(True)
-    _test_impgemm_conv_cuda(False)
+    # _test_impgemm_conv_cuda(True)
+    # _test_impgemm_conv_cuda(False)
     _test_native_conv_cuda(True)
     _test_native_conv_cuda(False)