Merge pull request #75 from junjihashimoto/fix/pybind

austinvhuang · web-flow · commit 6447d851581d · 2025-02-08T08:28:41.000-05:00
Fix pybind
diff --git a/bindings/haskell/gpu-cpp.cabal b/bindings/haskell/gpu-cpp.cabal
@@ -26,7 +26,7 @@ library
     hs-source-dirs:   src
     default-language: Haskell2010
     ghc-options:      -optcxx-std=c++17
-    extra-libraries:  dawn
+    extra-libraries:  webgpu_dawn
 
 executable gpu-cpp
     import:           warnings
diff --git a/bindings/python/Makefile b/bindings/python/Makefile
@@ -10,15 +10,14 @@ else
     STDLIB := -stdlib=libc++
 endif
 
-FLAGS=-shared -fPIC -std=c++17 $(STDLIB) -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -L$(GPUCPP)/third_party/lib -ldawn \
+FLAGS=-shared -fPIC -std=c++17 $(STDLIB) -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -L$(GPUCPP)/third_party/lib -lwebgpu_dawn \
   `python3 -m pybind11 --includes` \
-  `python3-config --include --ldflags --embed`
+  `python3-config --includes --ldflags`
 
 SUFFIX=$(shell $(PYTHON)-config --extension-suffix)
 
 gpu_cpp$(SUFFIX): gpu_cpp.cpp 
 	$(CXX) $(FLAGS) -o $@ $<
-	install_name_tool -change @rpath/libdawn.dylib $(LIBDIR)/libdawn.dylib gpu_cpp$(SUFFIX)
 
 test: test_gpu_cpp.py gpu_cpp$(SUFFIX)
 	$(PYTHON) test_gpu_cpp.py
diff --git a/bindings/python/gpu_cpp.cpp b/bindings/python/gpu_cpp.cpp
@@ -40,7 +40,7 @@ KernelCode* py_createKernelCode(const std::string &pData, size_t workgroupSize,
   return new KernelCode(pData, workgroupSize, (NumType)precision);
 }
 
-Kernel* py_createKernel(Context *ctx, const KernelCode *code,
+Kernel py_createKernel(Context *ctx, const KernelCode *code,
                         // const Tensor *dataBindings, size_t numTensors,
                         const py::list& dataBindings_py,
                         // const size_t *viewOffsets,
@@ -54,7 +54,7 @@ Kernel* py_createKernel(Context *ctx, const KernelCode *code,
   for (auto item : viewOffsets_py) {
     viewOffsets.push_back(item.cast<size_t>());
   }
-  return new Kernel(createKernel(*ctx, *code, bindings.data(), bindings.size(), viewOffsets.data(), vector_to_shape(totalWorkgroups)));
+  return createKernel(*ctx, *code, bindings.data(), bindings.size(), viewOffsets.data(), vector_to_shape(totalWorkgroups));
 }
 
 Tensor* py_createTensor(Context *ctx, const std::vector<int> &dims, int dtype) {
@@ -82,9 +82,9 @@ struct GpuAsync {
   }
 };
 
-GpuAsync* py_dispatchKernel(Context *ctx, Kernel *kernel) {
+GpuAsync* py_dispatchKernel(Context *ctx, Kernel kernel) {
   auto async = new GpuAsync();
-  dispatchKernel(*ctx, *kernel, async->promise);
+  dispatchKernel(*ctx, kernel, async->promise);
   return async;
 }
 
@@ -96,12 +96,12 @@ PYBIND11_MODULE(gpu_cpp, m) {
     m.doc() = "gpu.cpp plugin";
     py::class_<Context>(m, "Context");
     py::class_<Tensor>(m, "Tensor");
-    py::class_<Kernel>(m, "Kernel");
+    py::class_<RawKernel, std::shared_ptr<RawKernel>>(m, "Kernel");
     py::class_<KernelCode>(m, "KernelCode");
     py::class_<GpuAsync>(m, "GpuAsync");
     m.def("create_context", &py_createContext, py::return_value_policy::take_ownership);
     m.def("create_tensor", &py_createTensor, py::return_value_policy::take_ownership);
-    m.def("create_kernel", &py_createKernel, py::return_value_policy::take_ownership);
+    m.def("create_kernel", &py_createKernel);
     m.def("create_kernel_code", &py_createKernelCode, py::return_value_policy::take_ownership);
     m.def("dispatch_kernel", &py_dispatchKernel, py::return_value_policy::take_ownership);
     m.def("wait", &py_wait, "Wait for GPU");