diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5d1e691..d515d1e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -11,6 +11,7 @@ sofa_find_package(Sofa.GL QUIET)
set(PLUGIN_SPH_SRC_DIR src/SofaSphFluid)
set(HEADER_FILES
${PLUGIN_SPH_SRC_DIR}/config.h.in
+ ${PLUGIN_SPH_SRC_DIR}/initSPHFluid.h
${PLUGIN_SPH_SRC_DIR}/ParticleSink.h
${PLUGIN_SPH_SRC_DIR}/ParticleSink.inl
${PLUGIN_SPH_SRC_DIR}/ParticleSource.h
@@ -74,3 +75,5 @@ sofa_create_package_with_targets(
INCLUDE_INSTALL_DIR ${PROJECT_NAME}
RELOCATABLE "plugins"
)
+
+sofa_add_subdirectory(plugin extensions/CUDA SofaSphFluid.CUDA)
diff --git a/extensions/CUDA/CMakeLists.txt b/extensions/CUDA/CMakeLists.txt
new file mode 100644
index 0000000..8acc8bd
--- /dev/null
+++ b/extensions/CUDA/CMakeLists.txt
@@ -0,0 +1,48 @@
+cmake_minimum_required(VERSION 3.12)
+project(SofaSphFluid.CUDA CUDA CXX)
+
+set(HEADER_FILES
+ src/SofaSphFluid/CUDA/init.h
+ src/SofaSphFluid/CUDA/config.h.in
+
+ src/SofaSphFluid/CUDA/CudaParticleSource.h
+ src/SofaSphFluid/CUDA/CudaParticleSource.inl
+ src/SofaSphFluid/CUDA/CudaSPHFluidForceField.h
+ src/SofaSphFluid/CUDA/CudaSPHFluidForceField.inl
+ src/SofaSphFluid/CUDA/CudaParticlesRepulsionForceField.h
+ src/SofaSphFluid/CUDA/CudaParticlesRepulsionForceField.inl
+ src/SofaSphFluid/CUDA/CudaSpatialGridContainer.h
+ src/SofaSphFluid/CUDA/CudaSpatialGridContainer.inl
+)
+
+set(SOURCE_FILES
+ src/SofaSphFluid/CUDA/init.cpp
+
+ src/SofaSphFluid/CUDA/CudaParticleSource.cpp
+ src/SofaSphFluid/CUDA/CudaSPHFluidForceField.cpp
+ src/SofaSphFluid/CUDA/CudaParticlesRepulsionForceField.cpp
+ src/SofaSphFluid/CUDA/CudaSpatialGridContainer.cpp
+)
+
+set(CUDA_SOURCES
+ src/SofaSphFluid/CUDA/CudaParticleSource.cu
+ src/SofaSphFluid/CUDA/CudaSPHFluidForceField.cu
+ src/SofaSphFluid/CUDA/CudaParticlesRepulsionForceField.cu
+ src/SofaSphFluid/CUDA/CudaSpatialGridContainer.cu
+)
+
+sofa_find_package(SofaSphFluid REQUIRED)
+sofa_find_package(SofaCUDA REQUIRED)
+
+add_library(${PROJECT_NAME} SHARED ${HEADER_FILES} ${SOURCE_FILES} ${CUDA_SOURCES})
+target_link_libraries(${PROJECT_NAME} SofaSphFluid)
+target_link_libraries(${PROJECT_NAME} SofaCUDA)
+
+sofa_create_package_with_targets(
+ PACKAGE_NAME ${PROJECT_NAME}
+ PACKAGE_VERSION ${Sofa_VERSION}
+ TARGETS ${PROJECT_NAME} AUTO_SET_TARGET_PROPERTIES
+ INCLUDE_SOURCE_DIR "src"
+ INCLUDE_INSTALL_DIR "${PROJECT_NAME}"
+ RELOCATABLE "plugins"
+)
diff --git a/extensions/CUDA/SofaSphFluid.CUDAConfig.cmake.in b/extensions/CUDA/SofaSphFluid.CUDAConfig.cmake.in
new file mode 100644
index 0000000..67b3bbe
--- /dev/null
+++ b/extensions/CUDA/SofaSphFluid.CUDAConfig.cmake.in
@@ -0,0 +1,9 @@
+# CMake package configuration file for the SofaSphFluid.CUDA library
+
+@PACKAGE_GUARD@
+@PACKAGE_INIT@
+
+find_package(SofaSphFluid QUIET REQUIRED)
+find_package(SofaCUDA QUIET REQUIRED)
+
+check_required_components(SofaSphFluid.CUDA)
diff --git a/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticleSource.cpp b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticleSource.cpp
new file mode 100644
index 0000000..a7d1714
--- /dev/null
+++ b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticleSource.cpp
@@ -0,0 +1,56 @@
+/******************************************************************************
+* SOFA, Simulation Open-Framework Architecture *
+* (c) 2006 INRIA, USTL, UJF, CNRS, MGH *
+* *
+* This program is free software; you can redistribute it and/or modify it *
+* under the terms of the GNU Lesser General Public License as published by *
+* the Free Software Foundation; either version 2.1 of the License, or (at *
+* your option) any later version. *
+* *
+* This program is distributed in the hope that it will be useful, but WITHOUT *
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or *
+* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License *
+* for more details. *
+* *
+* You should have received a copy of the GNU Lesser General Public License *
+* along with this program. If not, see . *
+*******************************************************************************
+* Authors: The SOFA Team and external contributors (see Authors.txt) *
+* *
+* Contact information: contact@sofa-framework.org *
+******************************************************************************/
+#include
+#include "CudaParticleSource.inl"
+#include
+#include
+#include
+
+namespace sofa
+{
+
+
+namespace core::behavior
+{
+template class ProjectiveConstraintSet;
+#ifdef SOFA_GPU_CUDA_DOUBLE
+template class ProjectiveConstraintSet;
+#endif
+} // namespace core::behavior
+
+
+namespace gpu::cuda
+{
+
+
+
+int ParticleSourceCudaClass = core::RegisterObject("Supports GPU-side computations using CUDA")
+ .add< component::misc::ParticleSource >()
+#ifdef SOFA_GPU_CUDA_DOUBLE
+ .add< component::misc::ParticleSource >()
+#endif // SOFA_GPU_CUDA_DOUBLE
+ ;
+
+} // namespace gpu::cuda
+
+
+} // namespace sofa
diff --git a/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticleSource.cu b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticleSource.cu
new file mode 100644
index 0000000..bca3121
--- /dev/null
+++ b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticleSource.cu
@@ -0,0 +1,132 @@
+/******************************************************************************
+* SOFA, Simulation Open-Framework Architecture *
+* (c) 2006 INRIA, USTL, UJF, CNRS, MGH *
+* *
+* This program is free software; you can redistribute it and/or modify it *
+* under the terms of the GNU Lesser General Public License as published by *
+* the Free Software Foundation; either version 2.1 of the License, or (at *
+* your option) any later version. *
+* *
+* This program is distributed in the hope that it will be useful, but WITHOUT *
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or *
+* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License *
+* for more details. *
+* *
+* You should have received a copy of the GNU Lesser General Public License *
+* along with this program. If not, see . *
+*******************************************************************************
+* Authors: The SOFA Team and external contributors (see Authors.txt) *
+* *
+* Contact information: contact@sofa-framework.org *
+******************************************************************************/
+#include
+#include
+#include "cuda.h"
+
+#if defined(__cplusplus) && CUDA_VERSION < 2000
+namespace sofa
+{
+namespace gpu
+{
+namespace cuda
+{
+#endif
+
+extern "C"
+{
+
+ void ParticleSourceCuda3f_fillValues(unsigned int totalsize, unsigned int subsetsize, void* dest, const void* indices, float fx, float fy, float fz);
+ void ParticleSourceCuda3f_copyValuesWithOffset(unsigned int totalsize, unsigned int subsetsize, void* dest, const void* indices, const void* src, float fx, float fy, float fz);
+
+#ifdef SOFA_GPU_CUDA_DOUBLE
+
+ void ParticleSourceCuda3d_fillValues(unsigned int totalsize, unsigned int subsetsize, void* dest, const void* indices, double fx, double fy, double fz);
+ void ParticleSourceCuda3d_copyValuesWithOffset(unsigned int totalsize, unsigned int subsetsize, void* dest, const void* indices, const void* src, double fx, double fy, double fz);
+
+#endif // SOFA_GPU_CUDA_DOUBLE
+}
+
+//////////////////////
+// GPU-side methods //
+//////////////////////
+
+
+template
+__global__ void ParticleSourceCuda3t_fillValues_kernel(unsigned int totalsize, unsigned int subsetsize, real* dest, const unsigned int* indices, real fx, real fy, real fz)
+{
+ unsigned int index0 = blockIdx.x * BSIZE;
+ unsigned int index = index0+threadIdx.x;
+ if (index < subsetsize)
+ {
+ unsigned int dindex = indices[index];
+ if (dindex < totalsize)
+ {
+ unsigned int dindex3 = dindex * 3;
+ dest[dindex3+0] = fx;
+ dest[dindex3+1] = fy;
+ dest[dindex3+2] = fz;
+ }
+ }
+}
+
+template
+__global__ void ParticleSourceCuda3t_copyValuesWithOffset_kernel(unsigned int totalsize, unsigned int subsetsize, real* dest, const unsigned int* indices, const real* src, real fx, real fy, real fz)
+{
+ unsigned int index0 = blockIdx.x * BSIZE;
+ unsigned int index = index0+threadIdx.x;
+ if (index < subsetsize)
+ {
+ unsigned int dindex = indices[index];
+ if (dindex < totalsize)
+ {
+ unsigned int dindex3 = dindex * 3;
+ unsigned int index3 = index * 3;
+ dest[dindex3+0] = src[index3+0]+fx;
+ dest[dindex3+1] = src[index3+1]+fy;
+ dest[dindex3+2] = src[index3+2]+fz;
+ }
+ }
+}
+
+//////////////////////
+// CPU-side methods //
+//////////////////////
+
+
+void ParticleSourceCuda3f_fillValues(unsigned int totalsize, unsigned int subsetsize, void* dest, const void* indices, float fx, float fy, float fz)
+{
+ dim3 threads(BSIZE,1);
+ dim3 grid((subsetsize+BSIZE-1)/BSIZE,1);
+ {ParticleSourceCuda3t_fillValues_kernel<<< grid, threads >>>(totalsize, subsetsize, (float*)dest, (const unsigned int*)indices, fx, fy, fz); mycudaDebugError("ParticleSourceCuda3t_fillValues_kernel");}
+}
+
+void ParticleSourceCuda3f_copyValuesWithOffset(unsigned int totalsize, unsigned int subsetsize, void* dest, const void* indices, const void* src, float fx, float fy, float fz)
+{
+ dim3 threads(BSIZE,1);
+ dim3 grid((subsetsize+BSIZE-1)/BSIZE,1);
+ {ParticleSourceCuda3t_copyValuesWithOffset_kernel<<< grid, threads >>>(totalsize, subsetsize, (float*)dest, (const unsigned int*)indices, (const float*)src, fx, fy, fz); mycudaDebugError("ParticleSourceCuda3t_copyValuesWithOffset_kernel");}
+}
+
+#ifdef SOFA_GPU_CUDA_DOUBLE
+
+void ParticleSourceCuda3d_fillValues(unsigned int totalsize, unsigned int subsetsize, void* dest, const void* indices, double fx, double fy, double fz)
+{
+ dim3 threads(BSIZE,1);
+ dim3 grid((subsetsize+BSIZE-1)/BSIZE,1);
+ {ParticleSourceCuda3t_fillValues_kernel<<< grid, threads >>>(totalsize, subsetsize, (double*)dest, (const unsigned int*)indices, fx, fy, fz); mycudaDebugError("ParticleSourceCuda3t_fillValues_kernel");}
+}
+
+void ParticleSourceCuda3d_copyValuesWithOffset(unsigned int totalsize, unsigned int subsetsize, void* dest, const void* indices, const void* src, double fx, double fy, double fz)
+{
+ dim3 threads(BSIZE,1);
+ dim3 grid((subsetsize+BSIZE-1)/BSIZE,1);
+ {ParticleSourceCuda3t_copyValuesWithOffset_kernel<<< grid, threads >>>(totalsize, subsetsize, (double*)dest, (const unsigned int*)indices, (const double*)src, fx, fy, fz); mycudaDebugError("ParticleSourceCuda3t_copyValuesWithOffset_kernel");}
+}
+
+#endif // SOFA_GPU_CUDA_DOUBLE
+
+#if defined(__cplusplus) && CUDA_VERSION < 2000
+} // namespace cuda
+} // namespace gpu
+} // namespace sofa
+#endif
diff --git a/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticleSource.h b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticleSource.h
new file mode 100644
index 0000000..37847f0
--- /dev/null
+++ b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticleSource.h
@@ -0,0 +1,27 @@
+/******************************************************************************
+* SOFA, Simulation Open-Framework Architecture *
+* (c) 2006 INRIA, USTL, UJF, CNRS, MGH *
+* *
+* This program is free software; you can redistribute it and/or modify it *
+* under the terms of the GNU Lesser General Public License as published by *
+* the Free Software Foundation; either version 2.1 of the License, or (at *
+* your option) any later version. *
+* *
+* This program is distributed in the hope that it will be useful, but WITHOUT *
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or *
+* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License *
+* for more details. *
+* *
+* You should have received a copy of the GNU Lesser General Public License *
+* along with this program. If not, see . *
+*******************************************************************************
+* Authors: The SOFA Team and external contributors (see Authors.txt) *
+* *
+* Contact information: contact@sofa-framework.org *
+******************************************************************************/
+#ifndef SOFA_GPU_CUDA_CUDAPARTICLESOURCE_H
+#define SOFA_GPU_CUDA_CUDAPARTICLESOURCE_H
+
+#include
+
+#endif
diff --git a/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticleSource.inl b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticleSource.inl
new file mode 100644
index 0000000..9a66fdb
--- /dev/null
+++ b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticleSource.inl
@@ -0,0 +1,138 @@
+/******************************************************************************
+* SOFA, Simulation Open-Framework Architecture *
+* (c) 2006 INRIA, USTL, UJF, CNRS, MGH *
+* *
+* This program is free software; you can redistribute it and/or modify it *
+* under the terms of the GNU Lesser General Public License as published by *
+* the Free Software Foundation; either version 2.1 of the License, or (at *
+* your option) any later version. *
+* *
+* This program is distributed in the hope that it will be useful, but WITHOUT *
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or *
+* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License *
+* for more details. *
+* *
+* You should have received a copy of the GNU Lesser General Public License *
+* along with this program. If not, see . *
+*******************************************************************************
+* Authors: The SOFA Team and external contributors (see Authors.txt) *
+* *
+* Contact information: contact@sofa-framework.org *
+******************************************************************************/
+#ifndef SOFA_GPU_CUDA_CUDAPARTICLESOURCE_INL
+#define SOFA_GPU_CUDA_CUDAPARTICLESOURCE_INL
+
+#include "CudaParticleSource.h"
+#include
+
+namespace sofa
+{
+
+
+namespace gpu::cuda
+{
+
+extern "C"
+{
+
+ void ParticleSourceCuda3f_fillValues(unsigned int totalsize, unsigned int subsetsize, void* dest, const void* indices, float fx, float fy, float fz);
+ void ParticleSourceCuda3f_copyValuesWithOffset(unsigned int totalsize, unsigned int subsetsize, void* dest, const void* indices, const void* src, float fx, float fy, float fz);
+
+
+#ifdef SOFA_GPU_CUDA_DOUBLE
+
+ void ParticleSourceCuda3d_fillValues(unsigned int totalsize, unsigned int subsetsize, void* dest, const void* indices, double fx, double fy, double fz);
+ void ParticleSourceCuda3d_copyValuesWithOffset(unsigned int totalsize, unsigned int subsetsize, void* dest, const void* indices, const void* src, double fx, double fy, double fz);
+
+#endif // SOFA_GPU_CUDA_DOUBLE
+}
+
+} // namespace gpu::cuda
+
+
+namespace component::misc
+{
+
+using namespace gpu::cuda;
+
+// template <>
+// void ParticleSource::projectResponse(VecDeriv& res)
+// {
+// if (!this->mstate) return;
+// const VecIndex& lastparticles = this->lastparticles.getValue();
+// if (lastparticles.empty()) return;
+// double time = getContext()->getTime();
+// if (time < f_start.getValue() || time > f_stop.getValue()) return;
+// // constraint the last values
+// //mycudaMemset(((Deriv*)res.deviceWrite())+lastparticles[0], 0, lastparticles.size()*sizeof(Deriv));
+// ParticleSourceCuda3f_fillValues(res.size(), lastparticles.size(), res.deviceWrite(), lastparticles.deviceRead(), 0, 0, 0);
+// }
+
+// template <>
+// void ParticleSource::projectVelocity(VecDeriv& res)
+// {
+// if (!this->mstate) return;
+// const VecIndex& lastparticles = this->lastparticles.getValue();
+// if (lastparticles.empty()) return;
+// double time = getContext()->getTime();
+// if (time < f_start.getValue() || time > f_stop.getValue()) return;
+// // constraint the last values
+// Deriv vel = f_velocity.getValue();
+// #if 1
+// //mycudaMemset(((Deriv*)res.deviceWrite())+lastparticles[0], 0, lastparticles.size()*sizeof(Coord));
+// ParticleSourceCuda3f_fillValues(res.size(), lastparticles.size(), res.deviceWrite(), lastparticles.deviceRead(), vel[0], vel[1], vel[2]);
+// #else
+// for (unsigned int s=0; s
+// void ParticleSource::projectPosition(VecCoord& res)
+// {
+// if (!this->mstate) return;
+// const VecIndex& lastparticles = this->lastparticles.getValue();
+// if (lastparticles.empty()) return;
+// double time = getContext()->getTime();
+// if (time < f_start.getValue() || time > f_stop.getValue()) return;
+// // constraint the last values
+// Deriv vel = f_velocity.getValue();
+// vel *= (time-lasttime);
+// //mycudaMemset(((Deriv*)res.deviceWrite())+lastparticles[0], 0, lastparticles.size()*sizeof(Coord));
+// ParticleSourceCuda3f_copyValuesWithOffset(res.size(), lastparticles.size(), res.deviceWrite(), lastparticles.deviceRead(), lastpos.deviceRead(), vel[0], vel[1], vel[2]);
+// }
+
+
+// #ifdef SOFA_GPU_CUDA_DOUBLE
+
+// template <>
+// void ParticleSource::projectResponse(VecDeriv& res)
+// {
+// if (!this->mstate) return;
+// const VecIndex& lastparticles = this->lastparticles.getValue();
+// if (lastparticles.empty()) return;
+// double time = getContext()->getTime();
+// if (time < f_start.getValue() || time > f_stop.getValue()) return;
+// // constraint the last values
+// mycudaMemset(((Deriv*)res.deviceWrite())+lastparticles[0], 0, lastparticles.size()*sizeof(Coord));
+// }
+
+// template <>
+// void ParticleSource::projectVelocity(VecDeriv& /*res*/)
+// {
+// }
+
+// template <>
+// void ParticleSource::projectPosition(VecDeriv& /*res*/)
+// {
+// }
+
+// #endif // SOFA_GPU_CUDA_DOUBLE
+
+} // namespace component::misc
+
+
+} // namespace sofa
+
+#endif
diff --git a/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticlesRepulsionForceField.cpp b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticlesRepulsionForceField.cpp
new file mode 100644
index 0000000..68d2d48
--- /dev/null
+++ b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticlesRepulsionForceField.cpp
@@ -0,0 +1,43 @@
+/******************************************************************************
+* SOFA, Simulation Open-Framework Architecture *
+* (c) 2006 INRIA, USTL, UJF, CNRS, MGH *
+* *
+* This program is free software; you can redistribute it and/or modify it *
+* under the terms of the GNU Lesser General Public License as published by *
+* the Free Software Foundation; either version 2.1 of the License, or (at *
+* your option) any later version. *
+* *
+* This program is distributed in the hope that it will be useful, but WITHOUT *
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or *
+* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License *
+* for more details. *
+* *
+* You should have received a copy of the GNU Lesser General Public License *
+* along with this program. If not, see . *
+*******************************************************************************
+* Authors: The SOFA Team and external contributors (see Authors.txt) *
+* *
+* Contact information: contact@sofa-framework.org *
+******************************************************************************/
+#include
+#include "CudaParticlesRepulsionForceField.inl"
+#include
+#include
+#include
+
+
+namespace sofa::gpu::cuda
+{
+
+int ParticlesRepulsionForceFieldCudaClass = core::RegisterObject("Supports GPU-side computations using CUDA")
+ .add< component::forcefield::ParticlesRepulsionForceField >()
+#ifdef SOFA_GPU_CUDA_DOUBLE
+ .add< component::forcefield::ParticlesRepulsionForceField >()
+#endif // SOFA_GPU_CUDA_DOUBLE
+ ;
+
+} // namespace sofa::gpu::cuda
+
+
+
+
diff --git a/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticlesRepulsionForceField.cu b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticlesRepulsionForceField.cu
new file mode 100644
index 0000000..cafd2e8
--- /dev/null
+++ b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticlesRepulsionForceField.cu
@@ -0,0 +1,320 @@
+/******************************************************************************
+* SOFA, Simulation Open-Framework Architecture *
+* (c) 2006 INRIA, USTL, UJF, CNRS, MGH *
+* *
+* This program is free software; you can redistribute it and/or modify it *
+* under the terms of the GNU Lesser General Public License as published by *
+* the Free Software Foundation; either version 2.1 of the License, or (at *
+* your option) any later version. *
+* *
+* This program is distributed in the hope that it will be useful, but WITHOUT *
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or *
+* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License *
+* for more details. *
+* *
+* You should have received a copy of the GNU Lesser General Public License *
+* along with this program. If not, see . *
+*******************************************************************************
+* Authors: The SOFA Team and external contributors (see Authors.txt) *
+* *
+* Contact information: contact@sofa-framework.org *
+******************************************************************************/
+#include
+#include
+#include "cuda.h"
+
+#if defined(__cplusplus) && CUDA_VERSION < 2000
+namespace sofa
+{
+namespace gpu
+{
+namespace cuda
+{
+#endif
+
+template
+class GPURepulsion
+{
+public:
+ real d;
+ real d2;
+ real stiffness;
+ real damping;
+};
+
+typedef GPURepulsion GPURepulsion3f;
+typedef GPURepulsion GPURepulsion3d;
+
+extern "C"
+{
+ void ParticlesRepulsionForceFieldCuda3f_addForce (unsigned int size, const void* cells, const void* cellGhost, GPURepulsion3f* repulsion, void* f, const void* x, const void* v );
+ void ParticlesRepulsionForceFieldCuda3f_addDForce(unsigned int size, const void* cells, const void* cellGhost, GPURepulsion3f* repulsion, void* f, const void* x, const void* dx);
+
+#ifdef SOFA_GPU_CUDA_DOUBLE
+
+ void ParticlesRepulsionForceFieldCuda3d_addForce (unsigned int size, const void* cells, const void* cellGhost, GPURepulsion3d* repulsion, void* f, const void* x, const void* v );
+ void ParticlesRepulsionForceFieldCuda3d_addDForce(unsigned int size, const void* cells, const void* cellGhost, GPURepulsion3d* repulsion, void* f, const void* x, const void* dx);
+
+#endif // SOFA_GPU_CUDA_DOUBLE
+}
+
+//////////////////////
+// GPU-side methods //
+//////////////////////
+
+template
+__device__ void ParticlesRepulsionCalcForce(CudaVec3 x1, CudaVec3 v1, CudaVec3 x2, CudaVec3 v2, CudaVec3& force, GPURepulsion& rep)
+{
+ CudaVec3 n = x2-x1;
+ real d2 = norm2(n);
+ if (d2 < rep.d2)
+ {
+ CudaVec3 vi = v2-v1;
+ real inv_d = rsqrtf(d2);
+ n *= inv_d;
+ real d = d2*inv_d - rep.d;
+ real forceIntensity = rep.stiffness*d;
+ real dampingIntensity = rep.damping*d;
+ force += n*forceIntensity - vi*dampingIntensity;
+ }
+}
+
+template
+__device__ void ParticlesRepulsionCalcDForce(CudaVec3 x1, CudaVec3 dx1, CudaVec3 x2, CudaVec3 dx2, CudaVec3& dforce, GPURepulsion& rep)
+{
+ CudaVec3 n = x2-x1;
+ real d2 = norm2(n);
+ if (d2 < rep.d2)
+ {
+ CudaVec3 dxi = dx2-dx1;
+ //real inv_d = rsqrtf(d2);
+ //n *= inv_d;
+ //dforce = n * (rep.stiffness * dot(dxi, n));
+
+ //dforce = (n * (rep.stiffness * dot(dxi, n)))/d2;
+
+ dforce += n * (real)__fdividef((rep.stiffness * dot(dxi, n)),d2);
+ }
+}
+
+template
+__global__ void ParticlesRepulsionForceFieldCuda3t_addForce_kernel(int size, const int *cells, const int *cellGhost, GPURepulsion repulsion, real* f, const real* x, const real* v)
+{
+ __shared__ int2 range;
+ __shared__ int ghost;
+ __shared__ real temp_x[BSIZE*3];
+ __shared__ real temp_v[BSIZE*3];
+ int tx3 = threadIdx.x * 3;
+ for (int cell = blockIdx.x; cell < size; cell += gridDim.x)
+ {
+ if (!threadIdx.x)
+ {
+ //range = *(const int2*)(cells+cell);
+ range.x = cells[cell];
+ range.y = cells[cell+1];
+ range.y &= ~(1U<<31);
+ ghost = cellGhost[cell];
+ }
+ __syncthreads();
+ if (range.x <= 0) continue; // no actual particle in this cell
+ for (int px0 = range.x; px0 < ghost; px0 += BSIZE)
+ {
+ int px = px0 + threadIdx.x;
+ CudaVec3 xi;
+ CudaVec3 vi;
+ CudaVec3 force;
+ int index;
+ if (px < range.y)
+ {
+ index = cells[px];
+ xi = ((const CudaVec3*)x)[index];
+ temp_x[tx3 ] = xi.x;
+ temp_x[tx3+1] = xi.y;
+ temp_x[tx3+2] = xi.z;
+ vi = ((const CudaVec3*)v)[index];
+ temp_v[tx3 ] = vi.x;
+ temp_v[tx3+1] = vi.y;
+ temp_v[tx3+2] = vi.z;
+ }
+ __syncthreads();
+ if (px < ghost)
+ {
+ // actual particle -> compute interactions
+ force = CudaVec3::make(0,0,0);
+ int np = min(range.y-px0,BSIZE);
+ for (int i=0; i < np; ++i)
+ {
+ if (i != threadIdx.x)
+ ParticlesRepulsionCalcForce(xi, vi, ((const CudaVec3*)temp_x)[i], ((const CudaVec3*)temp_v)[i], force, repulsion);
+ }
+ }
+ __syncthreads();
+ // loop through other groups of particles
+ for (int py0 = range.x; py0 < range.y; py0 += BSIZE)
+ {
+ if (py0 == px0) continue;
+ int py = py0 + threadIdx.x;
+ if (py < range.y)
+ {
+ int index2 = cells[py];
+ CudaVec3 xj = ((const CudaVec3*)x)[index2];
+ temp_x[tx3 ] = xj.x;
+ temp_x[tx3+1] = xj.y;
+ temp_x[tx3+2] = xj.z;
+ CudaVec3 vj = ((const CudaVec3*)v)[index2];
+ temp_v[tx3 ] = vj.x;
+ temp_v[tx3+1] = vj.y;
+ temp_v[tx3+2] = vj.z;
+ }
+ __syncthreads();
+ if (px < ghost)
+ {
+ // actual particle -> compute interactions
+ int np = min(range.y-py0,BSIZE);
+ for (int i=0; i < np; ++i)
+ {
+ ParticlesRepulsionCalcForce(xi, vi, ((const CudaVec3*)temp_x)[i], ((const CudaVec3*)temp_v)[i], force, repulsion);
+ }
+ }
+ __syncthreads();
+ }
+ if (px < ghost)
+ {
+ // actual particle -> write computed force
+ ((CudaVec3*)f)[index] += force;
+ }
+ }
+ }
+}
+
+template
+__global__ void ParticlesRepulsionForceFieldCuda3t_addDForce_kernel(int size, const int *cells, const int *cellGhost, GPURepulsion repulsion, real* df, const real* x, const real* dx)
+{
+ __shared__ int2 range;
+ __shared__ int ghost;
+ __shared__ real temp_x[BSIZE*3];
+ __shared__ real temp_dx[BSIZE*3];
+ int tx3 = threadIdx.x * 3;
+ for (int cell = blockIdx.x; cell < size; cell += gridDim.x)
+ {
+ if (!threadIdx.x)
+ {
+ //range = *(const int2*)(cells+cell);
+ range.x = cells[cell];
+ range.y = cells[cell+1];
+ range.y &= ~(1U<<31);
+ ghost = cellGhost[cell];
+ }
+ __syncthreads();
+ if (range.x <= 0) continue; // no actual particle in this cell
+ for (int px0 = range.x; px0 < ghost; px0 += BSIZE)
+ {
+ int px = px0 + threadIdx.x;
+ CudaVec3 xi;
+ CudaVec3 dxi;
+ CudaVec3 dforce;
+ int index;
+ if (px < range.y)
+ {
+ index = cells[px];
+ xi = ((const CudaVec3*)x)[index];
+ temp_x[tx3 ] = xi.x;
+ temp_x[tx3+1] = xi.y;
+ temp_x[tx3+2] = xi.z;
+ dxi = ((const CudaVec3*)dx)[index];
+ temp_dx[tx3 ] = dxi.x;
+ temp_dx[tx3+1] = dxi.y;
+ temp_dx[tx3+2] = dxi.z;
+ }
+ __syncthreads();
+ if (px < ghost)
+ {
+ // actual particle -> compute interactions
+ dforce = CudaVec3::make(0,0,0);
+ int np = min(range.y-px0,BSIZE);
+ for (int i=0; i < np; ++i)
+ {
+ if (i != threadIdx.x)
+ ParticlesRepulsionCalcDForce(xi, dxi, ((const CudaVec3*)temp_x)[i], ((const CudaVec3*)temp_dx)[i], dforce, repulsion);
+ }
+ }
+ __syncthreads();
+ // loop through other groups of particles
+ for (int py0 = range.x; py0 < range.y; py0 += BSIZE)
+ {
+ if (py0 == px0) continue;
+ int py = py0 + threadIdx.x;
+ if (py < range.y)
+ {
+ int index2 = cells[py];
+ CudaVec3 xj = ((const CudaVec3*)x)[index2];
+ temp_x[tx3 ] = xj.x;
+ temp_x[tx3+1] = xj.y;
+ temp_x[tx3+2] = xj.z;
+ CudaVec3 dxj = ((const CudaVec3*)dx)[index2];
+ temp_dx[tx3 ] = dxj.x;
+ temp_dx[tx3+1] = dxj.y;
+ temp_dx[tx3+2] = dxj.z;
+ }
+ __syncthreads();
+ if (px < ghost)
+ {
+ // actual particle -> compute interactions
+ int np = min(range.y-py0,BSIZE);
+ for (int i=0; i < np; ++i)
+ {
+ ParticlesRepulsionCalcDForce(xi, dxi, ((const CudaVec3*)temp_x)[i], ((const CudaVec3*)temp_dx)[i], dforce, repulsion);
+ }
+ }
+ __syncthreads();
+ }
+ if (px < ghost)
+ {
+ // actual particle -> write computed force
+ ((CudaVec3*)df)[index] += dforce;
+ }
+ }
+ }
+}
+
+//////////////////////
+// CPU-side methods //
+//////////////////////
+
+void ParticlesRepulsionForceFieldCuda3f_addForce(unsigned int size, const void* cells, const void* cellGhost, GPURepulsion3f* repulsion, void* f, const void* x, const void* v)
+{
+ dim3 threads(BSIZE,1);
+ dim3 grid(60,1);
+ {ParticlesRepulsionForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *repulsion, (float*)f, (const float*)x, (const float*)v); mycudaDebugError("ParticlesRepulsionForceFieldCuda3t_addForce_kernel");}
+}
+
+void ParticlesRepulsionForceFieldCuda3f_addDForce(unsigned int size, const void* cells, const void* cellGhost, GPURepulsion3f* repulsion, void* df, const void* x, const void* dx)
+{
+ dim3 threads(BSIZE,1);
+ dim3 grid(60/BSIZE,1);
+ {ParticlesRepulsionForceFieldCuda3t_addDForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *repulsion, (float*)df, (const float*)x, (const float*)dx); mycudaDebugError("ParticlesRepulsionForceFieldCuda3t_addDForce_kernel");}
+}
+
+#ifdef SOFA_GPU_CUDA_DOUBLE
+
+void ParticlesRepulsionForceFieldCuda3d_addForce(unsigned int size, const void* cells, const void* cellGhost, GPURepulsion3d* repulsion, void* f, const void* x, const void* v)
+{
+ dim3 threads(BSIZE,1);
+ dim3 grid(60/BSIZE,1);
+ {ParticlesRepulsionForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *repulsion, (double*)f, (const double*)x, (const double*)v); mycudaDebugError("ParticlesRepulsionForceFieldCuda3t_addForce_kernel");}
+}
+
+void ParticlesRepulsionForceFieldCuda3d_addDForce(unsigned int size, const void* cells, const void* cellGhost, GPURepulsion3d* repulsion, void* df, const void* x, const void* dx)
+{
+ dim3 threads(BSIZE,1);
+ dim3 grid(60/BSIZE,1);
+ {ParticlesRepulsionForceFieldCuda3t_addDForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *repulsion, (double*)df, (const double*)x, (const double*)dx); mycudaDebugError("ParticlesRepulsionForceFieldCuda3t_addDForce_kernel");}
+}
+
+#endif // SOFA_GPU_CUDA_DOUBLE
+
+#if defined(__cplusplus) && CUDA_VERSION < 2000
+} // namespace cuda
+} // namespace gpu
+} // namespace sofa
+#endif
diff --git a/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticlesRepulsionForceField.h b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticlesRepulsionForceField.h
new file mode 100644
index 0000000..7631366
--- /dev/null
+++ b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticlesRepulsionForceField.h
@@ -0,0 +1,75 @@
+/******************************************************************************
+* SOFA, Simulation Open-Framework Architecture *
+* (c) 2006 INRIA, USTL, UJF, CNRS, MGH *
+* *
+* This program is free software; you can redistribute it and/or modify it *
+* under the terms of the GNU Lesser General Public License as published by *
+* the Free Software Foundation; either version 2.1 of the License, or (at *
+* your option) any later version. *
+* *
+* This program is distributed in the hope that it will be useful, but WITHOUT *
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or *
+* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License *
+* for more details. *
+* *
+* You should have received a copy of the GNU Lesser General Public License *
+* along with this program. If not, see . *
+*******************************************************************************
+* Authors: The SOFA Team and external contributors (see Authors.txt) *
+* *
+* Contact information: contact@sofa-framework.org *
+******************************************************************************/
+#ifndef SOFA_GPU_CUDA_CUDAPARTICLESREPULSIONFORCEFIELD_H
+#define SOFA_GPU_CUDA_CUDAPARTICLESREPULSIONFORCEFIELD_H
+
+#include
+#include
+#include
+
+namespace sofa
+{
+
+
+namespace gpu::cuda
+{
+
+template
+struct GPURepulsion
+{
+ real d;
+ real d2;
+ real stiffness;
+ real damping;
+};
+
+typedef GPURepulsion GPURepulsion3f;
+typedef GPURepulsion GPURepulsion3d;
+
+} // namespace gpu::cuda
+
+
+namespace component::forcefield
+{
+
+template <>
+void ParticlesRepulsionForceField::addForce(const core::MechanicalParams* mparams, DataVecDeriv& d_f, const DataVecCoord& d_x, const DataVecDeriv& d_v);
+
+template <>
+void ParticlesRepulsionForceField::addDForce(const core::MechanicalParams* mparams, DataVecDeriv& d_df, const DataVecDeriv& d_dx);
+
+#ifdef SOFA_GPU_CUDA_DOUBLE
+
+template <>
+void ParticlesRepulsionForceField::addForce(const core::MechanicalParams* mparams, DataVecDeriv& d_f, const DataVecCoord& d_x, const DataVecDeriv& d_v);
+
+template <>
+void ParticlesRepulsionForceField::addDForce(const core::MechanicalParams* mparams, DataVecDeriv& d_df, const DataVecDeriv& d_dx);
+
+#endif // SOFA_GPU_CUDA_DOUBLE
+
+} // namespace component::forcefield
+
+
+} // namespace sofa
+
+#endif
diff --git a/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticlesRepulsionForceField.inl b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticlesRepulsionForceField.inl
new file mode 100644
index 0000000..2931e07
--- /dev/null
+++ b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticlesRepulsionForceField.inl
@@ -0,0 +1,168 @@
+/******************************************************************************
+* SOFA, Simulation Open-Framework Architecture *
+* (c) 2006 INRIA, USTL, UJF, CNRS, MGH *
+* *
+* This program is free software; you can redistribute it and/or modify it *
+* under the terms of the GNU Lesser General Public License as published by *
+* the Free Software Foundation; either version 2.1 of the License, or (at *
+* your option) any later version. *
+* *
+* This program is distributed in the hope that it will be useful, but WITHOUT *
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or *
+* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License *
+* for more details. *
+* *
+* You should have received a copy of the GNU Lesser General Public License *
+* along with this program. If not, see . *
+*******************************************************************************
+* Authors: The SOFA Team and external contributors (see Authors.txt) *
+* *
+* Contact information: contact@sofa-framework.org *
+******************************************************************************/
+#ifndef SOFA_GPU_CUDA_CUDAPARTICLESREPULSIONFORCEFIELD_INL
+#define SOFA_GPU_CUDA_CUDAPARTICLESREPULSIONFORCEFIELD_INL
+
+#include "CudaParticlesRepulsionForceField.h"
+#include
+//#include
+
+namespace sofa
+{
+
+
+namespace gpu::cuda
+{
+
+extern "C"
+{
+
+ void ParticlesRepulsionForceFieldCuda3f_addForce (unsigned int size, const void* cells, const void* cellGhost, GPURepulsion3f* repulsion, void* f, const void* x, const void* v );
+ void ParticlesRepulsionForceFieldCuda3f_addDForce(unsigned int size, const void* cells, const void* cellGhost, GPURepulsion3f* repulsion, void* f, const void* x, const void* dx);
+
+#ifdef SOFA_GPU_CUDA_DOUBLE
+
+ void ParticlesRepulsionForceFieldCuda3d_addForce (unsigned int size, const void* cells, const void* cellGhost, GPURepulsion3d* repulsion, void* f, const void* x, const void* v );
+ void ParticlesRepulsionForceFieldCuda3d_addDForce(unsigned int size, const void* cells, const void* cellGhost, GPURepulsion3d* repulsion, void* f, const void* x, const void* dx);
+
+#endif // SOFA_GPU_CUDA_DOUBLE
+}
+
+} // namespace gpu::cuda
+
+
+namespace component::forcefield
+{
+
+using namespace gpu::cuda;
+
+
+template <>
+void ParticlesRepulsionForceField::addForce(const core::MechanicalParams* /*mparams*/, DataVecDeriv& d_f, const DataVecCoord& d_x, const DataVecDeriv& d_v)
+{
+ if (grid == NULL) return;
+
+ VecDeriv& f = *d_f.beginEdit();
+ const VecCoord& x = d_x.getValue();
+ const VecDeriv& v = d_v.getValue();
+
+ grid->updateGrid(x);
+ GPURepulsion3f repulsion;
+ repulsion.d = distance.getValue();
+ repulsion.d2 = repulsion.d*repulsion.d;
+ repulsion.stiffness = stiffness.getValue();
+ repulsion.damping = damping.getValue();
+ f.resize(x.size());
+ Grid::Grid* g = grid->getGrid();
+ ParticlesRepulsionForceFieldCuda3f_addForce(
+ g->getNbCells(), g->getCellsVector().deviceRead(), g->getCellGhostVector().deviceRead(),
+ &repulsion, f.deviceWrite(), x.deviceRead(), v.deviceRead());
+
+ d_f.endEdit();
+}
+
+template <>
+void ParticlesRepulsionForceField::addDForce(const core::MechanicalParams* mparams, DataVecDeriv& d_df, const DataVecDeriv& d_dx)
+{
+ if (grid == NULL) return;
+
+ VecDeriv& df = *d_df.beginEdit();
+ const VecDeriv& dx = d_dx.getValue();
+ Real kFactor = (Real)sofa::core::mechanicalparams::kFactorIncludingRayleighDamping(mparams,this->rayleighStiffness.getValue());
+ Real bFactor = (Real)sofa::core::mechanicalparams::bFactor(mparams);
+
+ const VecCoord& x = this->mstate->read(sofa::core::vec_id::read_access::position)->getValue();
+ GPURepulsion3f repulsion;
+ repulsion.d = distance.getValue();
+ repulsion.d2 = repulsion.d*repulsion.d;
+ repulsion.stiffness = (float)(stiffness.getValue()*kFactor);
+ repulsion.damping = (float)(damping.getValue()*bFactor);
+ df.resize(dx.size());
+ Grid::Grid* g = grid->getGrid();
+ ParticlesRepulsionForceFieldCuda3f_addDForce(
+ g->getNbCells(), g->getCellsVector().deviceRead(), g->getCellGhostVector().deviceRead(),
+ &repulsion, df.deviceWrite(), x.deviceRead(), dx.deviceRead());
+
+ d_df.endEdit();
+}
+
+
+#ifdef SOFA_GPU_CUDA_DOUBLE
+
+template <>
+void ParticlesRepulsionForceField::addForce(const core::MechanicalParams* /*mparams*/, DataVecDeriv& d_f, const DataVecCoord& d_x, const DataVecDeriv& d_v)
+{
+ if (grid == NULL) return;
+
+ VecDeriv& f = *d_f.beginEdit();
+ const VecCoord& x = d_x.getValue();
+ const VecDeriv& v = d_v.getValue();
+
+ grid->updateGrid(x);
+ GPURepulsion3d repulsion;
+ repulsion.d = distance.getValue();
+ repulsion.d2 = repulsion.d*repulsion.d;
+ repulsion.stiffness = stiffness.getValue();
+ repulsion.damping = damping.getValue();
+ f.resize(x.size());
+ Grid::Grid* g = grid->getGrid();
+ ParticlesRepulsionForceFieldCuda3d_addForce(
+ g->getNbCells(), g->getCellsVector().deviceRead(), g->getCellGhostVector().deviceRead(),
+ &repulsion, f.deviceWrite(), x.deviceRead(), v.deviceRead());
+
+ d_f.endEdit();
+}
+
+template <>
+void ParticlesRepulsionForceField::addDForce(const core::MechanicalParams* mparams, DataVecDeriv& d_df, const DataVecDeriv& d_dx)
+{
+ if (grid == NULL) return;
+
+ VecDeriv& df = *d_df.beginEdit();
+ const VecDeriv& dx = d_dx.getValue();
+ Real kFactor = (Real)sofa::core::mechanicalparams::kFactorIncludingRayleighDamping(mparams,this->rayleighStiffness.getValue());
+ Real bFactor = (Real)sofa::core::mechanicalparams::bFactor(mparams);
+
+ const VecCoord& x = this->mstate->read(sofa::core::vec_id::read_access::position)->getValue();
+ GPURepulsion3d repulsion;
+ repulsion.d = distance.getValue();
+ repulsion.d2 = repulsion.d*repulsion.d;
+ repulsion.stiffness = stiffness.getValue()*kFactor;
+ repulsion.damping = damping.getValue()*bFactor;
+ df.resize(dx.size());
+ Grid::Grid* g = grid->getGrid();
+ ParticlesRepulsionForceFieldCuda3d_addDForce(
+ g->getNbCells(), g->getCellsVector().deviceRead(), g->getCellGhostVector().deviceRead(),
+ &repulsion, df.deviceWrite(), x.deviceRead(), dx.deviceRead());
+
+ d_df.endEdit();
+}
+
+#endif // SOFA_GPU_CUDA_DOUBLE
+
+
+} // namespace component::forcefield
+
+
+} // namespace sofa
+
+#endif
diff --git a/extensions/CUDA/src/SofaSphFluid/CUDA/CudaSPHFluidForceField.cpp b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaSPHFluidForceField.cpp
new file mode 100644
index 0000000..c6dd42f
--- /dev/null
+++ b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaSPHFluidForceField.cpp
@@ -0,0 +1,43 @@
+/******************************************************************************
+* SOFA, Simulation Open-Framework Architecture *
+* (c) 2006 INRIA, USTL, UJF, CNRS, MGH *
+* *
+* This program is free software; you can redistribute it and/or modify it *
+* under the terms of the GNU Lesser General Public License as published by *
+* the Free Software Foundation; either version 2.1 of the License, or (at *
+* your option) any later version. *
+* *
+* This program is distributed in the hope that it will be useful, but WITHOUT *
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or *
+* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License *
+* for more details. *
+* *
+* You should have received a copy of the GNU Lesser General Public License *
+* along with this program. If not, see . *
+*******************************************************************************
+* Authors: The SOFA Team and external contributors (see Authors.txt) *
+* *
+* Contact information: contact@sofa-framework.org *
+******************************************************************************/
+#include
+#include "CudaSPHFluidForceField.inl"
+#include
+#include
+#include
+
+
+namespace sofa::gpu::cuda
+{
+
+int SPHFluidForceFieldCudaClass = core::RegisterObject("Supports GPU-side computations using CUDA")
+ .add< component::forcefield::SPHFluidForceField >()
+#ifdef SOFA_GPU_CUDA_DOUBLE
+ .add< component::forcefield::SPHFluidForceField >()
+#endif // SOFA_GPU_CUDA_DOUBLE
+ ;
+
+} // namespace sofa::gpu::cuda
+
+
+
+
diff --git a/extensions/CUDA/src/SofaSphFluid/CUDA/CudaSPHFluidForceField.cu b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaSPHFluidForceField.cu
new file mode 100644
index 0000000..1ab6b88
--- /dev/null
+++ b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaSPHFluidForceField.cu
@@ -0,0 +1,1246 @@
+/******************************************************************************
+* SOFA, Simulation Open-Framework Architecture *
+* (c) 2006 INRIA, USTL, UJF, CNRS, MGH *
+* *
+* This program is free software; you can redistribute it and/or modify it *
+* under the terms of the GNU Lesser General Public License as published by *
+* the Free Software Foundation; either version 2.1 of the License, or (at *
+* your option) any later version. *
+* *
+* This program is distributed in the hope that it will be useful, but WITHOUT *
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or *
+* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License *
+* for more details. *
+* *
+* You should have received a copy of the GNU Lesser General Public License *
+* along with this program. If not, see . *
+*******************************************************************************
+* Authors: The SOFA Team and external contributors (see Authors.txt) *
+* *
+* Contact information: contact@sofa-framework.org *
+******************************************************************************/
+#include
+#include
+#include "cuda.h"
+
+#if defined(__cplusplus) && CUDA_VERSION < 2000
+namespace sofa
+{
+namespace gpu
+{
+namespace cuda
+{
+#endif
+
+template
+class GPUSPHFluid
+{
+public:
+ real h; ///< particles radius
+ real h2; ///< particles radius squared
+ real inv_h2; ///< particles radius squared inverse
+ real stiffness; ///< pressure stiffness
+ real mass; ///< particles mass
+ real mass2; ///< particles mass squared
+ real density0; ///< 1000 kg/m3 for water
+ real viscosity;
+ real surfaceTension;
+
+ // Precomputed constants for smoothing kernels
+ real CWd; ///< = constWd(h)
+ //real CgradWd; ///< = constGradWd(h)
+ real CgradWp; ///< = constGradWp(h)
+ real ClaplacianWv; ///< = constLaplacianWv(h)
+};
+
+typedef GPUSPHFluid GPUSPHFluid3f;
+typedef GPUSPHFluid GPUSPHFluid3d;
+
+extern "C"
+{
+
+ void SPHFluidForceFieldCuda3f_computeDensity(int kernelType, int pressureType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3f* params, void* pos4, const void* x);
+ void SPHFluidForceFieldCuda3f_addForce (int kernelType, int pressureType, int viscosityType, int surfaceTensionType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3f* params, void* f, const void* pos4, const void* v);
+//void SPHFluidForceFieldCuda3f_addDForce(int kernelType, int pressureType, int viscosityType, int surfaceTensionType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3f* params, void* f, const void* pos4, const void* v, const void* dx);
+
+#ifdef SOFA_GPU_CUDA_DOUBLE
+
+ void SPHFluidForceFieldCuda3d_computeDensity(int kernelType, int pressureType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3d* params, void* pos4, const void* x);
+ void SPHFluidForceFieldCuda3d_addForce (int kernelType, int pressureType, int viscosityType, int surfaceTensionType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3d* params, void* f, const void* pos4, const void* v);
+//void SPHFluidForceFieldCuda3d_addDForce(int kernelType, int pressureType, int viscosityType, int surfaceTensionType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3d* params, void* f, const void* pos4, const void* v, const void* dx);
+
+#endif // SOFA_GPU_CUDA_DOUBLE
+
+}
+
+//////////////////////
+// GPU-side methods //
+//////////////////////
+
+#ifndef R_PI
+#ifdef M_PI
+#define R_PI M_PI
+#else
+#define R_PI 3.141592653589793238462
+#endif
+#endif
+
+template
+class SPH;
+
+template
+class SPH
+{
+public:
+ typedef real Real;
+ typedef CudaVec3 Deriv;
+ static Real constW(Real h)
+ {
+ return (Real) (48/R_PI)/(h*h*h);
+ }
+
+ static __device__ Real W0(Real C)
+ {
+ return C*((Real)(1.0/6.0));
+ }
+ static __device__ Real W0()
+ {
+ return ((Real)(1.0/6.0));
+ }
+ static __device__ Real W(Real r_h, Real C)
+ {
+ Real s = 1-r_h;
+ if (r_h < (Real)0.5) return C*((Real)(1.0/6.0) - r_h*r_h*s);
+ else return C*((Real)(1.0/3.0) * (s*s*s));
+ }
+ static __device__ Real W2(Real r2_h2, Real C)
+ {
+ Real r_h = sqrtf(r2_h2);
+ Real s = 1-r_h;
+ if (r2_h2 < (Real)0.25) return C*((Real)(1.0/6.0) - r2_h2*s);
+ else return C*((Real)(1.0/3.0) * (s*s*s));
+ }
+ static __device__ Real W2(Real r2_h2)
+ {
+ Real r_h = sqrtf(r2_h2);
+ Real s = 1-r_h;
+ if (r2_h2 < (Real)0.25) return ((Real)(1.0/6.0) - r2_h2*s);
+ else return ((Real)(1.0/3.0) * (s*s*s));
+ }
+ static Real constGradW(Real h)
+ {
+ return constW(h)/(h*h);
+ }
+ static __device__ Deriv gradW(const Deriv& d, Real r_h, Real C)
+ {
+ Real g;
+ if (r_h < (Real)0.5) g = 3*r_h - 2;
+ else { Real s = 1-r_h; g = -s*s/r_h; }
+ return d*(C*g);
+ }
+ static Real constLaplacianW(Real h)
+ {
+ return constW(h)/(h*h);
+ }
+ static __device__ Real laplacianW(Real r_h, Real C)
+ {
+ if (r_h < (Real)0.5) return C*(12*r_h-6);
+ else if (r_h < (Real)1) return C*(6-4*r_h-2/r_h);
+ else return 0;
+ }
+ /// Density Smoothing Kernel
+ static Real constWd(Real h)
+ {
+ return constW(h);
+ }
+ static __device__ Real Wd(Real r_h, Real C)
+ {
+ return W(r_h,C);
+ }
+ static __device__ Real Wd2(Real r2_h2, Real C)
+ {
+ return W2(r2_h2,C);
+ }
+ static __device__ Real Wd2(Real r2_h2)
+ {
+ return W2(r2_h2);
+ }
+ static __device__ Real Wd0()
+ {
+ return W0();
+ }
+ static Real constGradWd(Real h)
+ {
+ return constGradW(h);
+ }
+ static __device__ Deriv gradWd(const Deriv& d, Real r_h, Real C)
+ {
+ return gradW(d,r_h,C);
+ }
+ static __device__ Deriv gradWd2(const Deriv& d, Real r2_h2, Real C)
+ {
+ return gradW(d,sqrtf(r2_h2),C);
+ }
+ static Real constLaplacianWd(Real h)
+ {
+ return constLaplacianW(h);
+ }
+ static __device__ Real laplacianWd(Real r_h, Real C)
+ {
+ return laplacianW(r_h,C);
+ }
+ static __device__ Real laplacianWd2(Real r2_h2, Real C)
+ {
+ return laplacianW(sqrtf(r2_h2),C);
+ }
+
+ /// Pressure Smoothing Kernel
+ static Real constWp(Real h)
+ {
+ return constW(h);
+ }
+ static __device__ Real Wp(Real r_h, Real C)
+ {
+ return W(r_h,C);
+ }
+ static Real constGradWp(Real h)
+ {
+ return constGradW(h);
+ }
+ static __device__ Deriv gradWp(const Deriv& d, Real r_h, Real C)
+ {
+ return gradW(d,r_h,C);
+ }
+
+ /// Viscosity Smoothing Kernel
+ static Real constWv(Real h)
+ {
+ return constW(h);
+ }
+ static __device__ Real Wv(Real r_h, Real C)
+ {
+ return W(r_h,C);
+ }
+ static __device__ Real Wv2(Real r2_h2, Real r_h, Real C)
+ {
+ return W2(r2_h2,r_h,C);
+ }
+ static Real constGradWv(Real h)
+ {
+ return constGradW(h);
+ }
+ static __device__ Deriv gradWv(const Deriv& d, Real r_h, Real C)
+ {
+ return gradW(d,r_h,C);
+ }
+ static __device__ Deriv gradWv2(const Deriv& d, Real r2_h2, Real r_h, Real C)
+ {
+ return gradW(d,r_h,C);
+ }
+ static Real constLaplacianWv(Real h)
+ {
+ return constLaplacianW(h);
+ }
+
+ static __device__ Real laplacianWv(Real r_h, Real C)
+ {
+ return laplacianW(r_h,C);
+ }
+};
+
+template
+class SPH
+{
+public:
+ typedef real Real;
+ typedef CudaVec3 Deriv;
+ /// Density Smoothing Kernel: W = 315 / 64pih9 * (h2 - r2)3 = 315 / 64pih3 * (1 - (r/h)2)3
+ static Real constWd(Real h)
+ {
+ return (Real)(315 / (64*R_PI*h*h*h));
+ }
+ static __device__ Real Wd(Real r_h, Real C)
+ {
+ Real a = (1-r_h*r_h);
+ return C*a*a*a;
+ }
+ static __device__ Real Wd2(Real r2_h2, Real C)
+ {
+ Real a = (1-r2_h2);
+ return C*a*a*a;
+ }
+ static __device__ Real Wd2(Real r2_h2)
+ {
+ Real a = (1-r2_h2);
+ return a*a*a;
+ }
+ static __device__ Real Wd0(Real C)
+ {
+ return C;
+ }
+ static __device__ Real Wd0()
+ {
+ return 1;
+ }
+ static Real constGradWd(Real h)
+ {
+ return -6*constWd(h)/(h*h);
+ }
+ static __device__ Deriv gradWd(const Deriv& d, Real r_h, Real C)
+ {
+ Real a = (1-r_h*r_h);
+ return d*(C*a*a);
+ }
+ static __device__ Deriv gradWd2(const Deriv& d, Real r2_h2, Real C)
+ {
+ Real a = (1-r2_h2);
+ return d*(C*a*a);
+ }
+ static Real constLaplacianWd(Real h)
+ {
+ return -6*constWd(h)/(h*h);
+ }
+ static __device__ Real laplacianWd(Real r_h, Real C)
+ {
+ Real r2_h2 = r_h*r_h;
+ return C*((1-r2_h2)*(3-7*r2_h2));
+ }
+ static __device__ Real laplacianWd2(Real r2_h2, Real C)
+ {
+ return C*((1-r2_h2)*(3-7*r2_h2));
+ }
+
+ /// Pressure Smoothing Kernel: W = 15 / pih6 (h - r)3 = 15 / pih3 (1 - r/h)3
+ static Real constWp(Real h)
+ {
+ return (Real)(15 / (R_PI*h*h*h));
+ }
+ static __device__ Real Wp(Real r_h, Real C)
+ {
+ Real a = (1-r_h);
+ return C*a*a*a;
+ }
+ static Real constGradWp(Real h)
+ {
+ return (-3*constWp(h)) / (h*h);
+ }
+ static __device__ Deriv gradWp(const Deriv& d, Real r_h, Real C)
+ {
+ Real a = (1-r_h);
+ return d * (C*a*a/r_h);
+ }
+
+ /// Viscosity Smoothing Kernel: W = 15/(2pih3) (-r3/2h3 + r2/h2 + h/2r - 1)
+ static Real constWv(Real h)
+ {
+ return (Real)(15/(2*R_PI*h*h*h));
+ }
+ static __device__ Real Wv(Real r_h, Real C)
+ {
+ Real r2_h2 = r_h*r_h;
+ Real r3_h3 = r2_h2*r_h;
+ return C*(-0.5f*r3_h3 + r2_h2 + 0.5f/r_h - 1);
+ }
+ static __device__ Real Wv2(Real r2_h2, Real r_h, Real C)
+ {
+ Real r3_h3 = r2_h2*r_h;
+ return C*(-0.5f*r3_h3 + r2_h2 + 0.5f/r_h - 1);
+ }
+ static Real constGradWv(Real h)
+ {
+ return constWv(h)/(h*h);
+ }
+ static __device__ Deriv gradWv(const Deriv& d, Real r_h, Real C)
+ {
+ Real r3_h3 = r_h*r_h*r_h;
+ return d * (C*(2.0f -3.0f*r_h - 0.5f/r3_h3));
+ }
+ static __device__ Deriv gradWv2(const Deriv& d, Real r2_h2, Real r_h, Real C)
+ {
+ Real r3_h3 = r2_h2*r_h;
+ return d * (C*(-3*r_h + 4 - 1/r3_h3));
+ }
+ static Real constLaplacianWv(Real h)
+ {
+ return 6*constWv(h)/(h*h);
+ }
+
+ static __device__ Real laplacianWv(Real r_h, Real C)
+ {
+ return C*(1-r_h);
+ }
+};
+
+template
+__device__ void SPHFluidInitDensity(CudaVec3 /*x1*/, real& density, GPUSPHFluid& params)
+{
+ density = 0; //params.mass * params.CWd; //SPH::Wd(0,params.CWd);
+}
+
+template
+__device__ void SPHFluidCalcDensity(CudaVec3 x1, CudaVec3 x2, real& density, GPUSPHFluid& params)
+{
+ CudaVec3 n = x2-x1;
+ real d2 = norm2(n);
+ if (sqrtf(d2) < params.h)
+ //if (d2 < params.h2)
+ {
+ //real r_h = rsqrtf(params.h2/d2);
+ //real r_h = sqrtf(d2/params.h2);
+ real r2_h2 = (d2/params.h2);
+ real d = SPH::Wd2(r2_h2);
+ density += d;
+ }
+}
+
+template
+__device__ void SPHFluidFinalDensity(CudaVec3 /*x1*/, real& density, GPUSPHFluid& params)
+{
+ density = (SPH::Wd0()+density) * params.CWd * params.mass; //SPH::Wd(0,params.CWd);
+}
+
+template
+__device__ void SPHFluidCalcForce(CudaVec4 x1, CudaVec3 v1, CudaVec4 x2, CudaVec3 v2, CudaVec3& force, GPUSPHFluid& params)
+{
+ CudaVec3 n = CudaVec3::make(x2.x-x1.x,x2.y-x1.y,x2.z-x1.z);
+ real d2 = norm2(n);
+ if (d2 < params.h2)
+ {
+ //real r_h = rsqrtf(params.h2/d2);
+ //real r_h = sqrtf(d2/params.h2);
+ real r2_h2 = (d2/params.h2);
+ real r_h = sqrtf(r2_h2);
+ real density1 = x1.w;
+ real density2 = x2.w;
+
+ // Pressure
+ real pressure1 = params.stiffness * (density1 - params.density0);
+ real pressure2 = params.stiffness * (density2 - params.density0);
+ real pressureFV = params.mass2 * (pressure1 / (density1*density1) + pressure2 / (density2*density2) );
+
+ // Viscosity
+ if (viscosityType == 1)
+ {
+ force += ( v2 - v1 ) * ( params.mass2 * params.viscosity * SPH::laplacianWv(r_h,params.ClaplacianWv) / (density1 * density2) );
+ }
+ else if (viscosityType == 2)
+ {
+ real vx = dot(n,v2-v1);
+ if (vx < 0)
+ pressureFV += - (vx * params.viscosity * params.h * params.mass / ((r2_h2 + 0.01f*params.h2)*(density1+density2)*0.5f));
+ }
+
+ force += SPH::gradWp(n, r_h, params.CgradWp) * pressureFV;
+ }
+}
+/*
+template
+__device__ void SPHFluidCalcDForce(CudaVec4 x1, CudaVec3 v1, CudaVec3 dx1, CudaVec4 x2, CudaVec3 v2, CudaVec3 dx2, CudaVec3& dforce, GPUSPHFluid& params)
+{
+ CudaVec3 n = CudaVec3::make(x2.x-x1.x,x2.y-x1.y,x2.z-x1.z);
+ real d2 = norm2(n);
+ if (d2 < params.h2)
+ {
+ CudaVec3 dxi = dx2-dx1;
+
+ real inv_d = rsqrtf(d2);
+
+ real r2_h2 = (d2/params.h2);
+ real r_h = sqrtf(r2_h2);
+
+ real density1 = x1.w;
+ real density2 = x2.w;
+
+ // Pressure
+ real pressure1 = params.stiffness * (density1 - params.density0);
+ real pressure2 = params.stiffness * (density2 - params.density0);
+
+ real fpressure = params.mass2 * (pressure1 / (density1*density1) + pressure2 / (density2*density2) );
+
+ // Derivatives
+
+ CudaVec3 dn = dxi * inv_d;
+
+ real dr_h = dot(dn,n)/params.h;
+ //real dr2_h2 = 2*r_h*dr_h;
+
+ real ddensity = dot(dxi,SPH::gradWd2(n, r2_h2, params.CgradWd));
+ real dpressure = params.stiffness*ddensity;
+ // d(a/b^2) = (d(a)*b-2*a*d(b))/b^3
+ real dfpressure = params.mass2 * ( (dpressure*density1-2*pressure1*ddensity)/(density1*density1*density1)
+ +(dpressure*density2-2*pressure2*ddensity)/(density2*density2*density2) );
+ real a = (1-r_h);
+ real dWp = params.CgradWp*a*a;
+ real ddWp = -2*params.CgradWp*a*dr_h;
+
+ // f = n * dWp * fpressure
+ // df = dn * (dWp * fpressure) + n * (ddWp * fpressure + dWp * dfpressure);
+
+ dforce += dn * (dWp * fpressure) + n * (ddWp * fpressure + dWp * dfpressure);
+
+ // Viscosity
+ // force += ( v2 - v1 ) * ( params.mass2 * params.viscosity * params.ClaplacianWv * (1-r_h) / (density1 * density2) );
+ real d1d2 = density1*density2;
+ dforce += dxi * ( params.mass2 * params.viscosity * params.ClaplacianWv * (1-r_h) / (density1 * density2) )
+ + (v2-v1) * ( params.mass2 * params.viscosity * params.ClaplacianWv * (-dr_h*d1d2 - (1-r_h)*ddensity*(density1+density2))/(d1d2*d1d2));
+
+ }
+}
+*/
+template
+__global__ void SPHFluidForceFieldCuda3t_computeDensity_kernel(int size, const int *cells, const int *cellGhost, GPUSPHFluid params, real* pos4, const real* x)
+{
+ __shared__ int2 range;
+ __shared__ int ghost;
+ __shared__ real temp_x[BSIZE*3];
+ int tx3 = threadIdx.x * 3;
+ for (int cell = blockIdx.x; cell < size; cell += gridDim.x)
+ {
+ __syncthreads();
+ if (!threadIdx.x)
+ {
+ //range = *(const int2*)(cells+cell);
+ range.x = cells[cell];
+ range.y = cells[cell+1];
+ range.y &= ~(1U<<31);
+ ghost = cellGhost[cell];
+ }
+ __syncthreads();
+ if (range.x <= 0) continue; // no actual particle in this cell
+ for (int px0 = range.x; px0 < ghost; px0 += BSIZE)
+ {
+ int px = px0 + threadIdx.x;
+ CudaVec3 xi;
+ real density;
+ int index;
+ if (px < range.y)
+ {
+ index = cells[px];
+ xi = ((const CudaVec3*)x)[index];
+ temp_x[tx3 ] = xi.x;
+ temp_x[tx3+1] = xi.y;
+ temp_x[tx3+2] = xi.z;
+ }
+ __syncthreads();
+ if (px < ghost)
+ {
+ // actual particle -> compute interactions
+ SPHFluidInitDensity(xi, density, params);
+
+ int np = min(range.y-px0,BSIZE);
+ for (int i=0; i < np; ++i)
+ {
+ if (i != threadIdx.x)
+ SPHFluidCalcDensity(xi, ((const CudaVec3*)temp_x)[i], density, params);
+ }
+ }
+ __syncthreads();
+ // loop through other groups of particles
+ for (int py0 = range.x; py0 < range.y; py0 += BSIZE)
+ {
+ if (py0 == px0) continue;
+ int py = py0 + threadIdx.x;
+ if (py < range.y)
+ {
+ int index2 = cells[py];
+ CudaVec3 xj = ((const CudaVec3*)x)[index2];
+ temp_x[tx3 ] = xj.x;
+ temp_x[tx3+1] = xj.y;
+ temp_x[tx3+2] = xj.z;
+ }
+ __syncthreads();
+ if (px < ghost)
+ {
+ // actual particle -> compute interactions
+ int np = min(range.y-py0,BSIZE);
+ for (int i=0; i < np; ++i)
+ {
+ SPHFluidCalcDensity(xi, ((const CudaVec3*)temp_x)[i], density, params);
+ }
+ }
+ __syncthreads();
+ }
+ if (px < ghost)
+ {
+ // actual particle -> write computed density
+ SPHFluidFinalDensity(xi, density, params);
+ CudaVec4 res = CudaVec4::make(xi.x,xi.y,xi.z,density);
+ ((CudaVec4*)pos4)[index] = res;
+ }
+ }
+ }
+}
+
+template
+__global__ void SPHFluidForceFieldCuda3t_addForce_kernel(int size, const int *cells, const int *cellGhost, GPUSPHFluid params, real* f, const real* pos4, const real* v)
+{
+ __shared__ int2 range;
+ __shared__ int ghost;
+ __shared__ real temp_x[BSIZE*4];
+ __shared__ real temp_v[BSIZE*3];
+ int tx3 = threadIdx.x * 3;
+ int tx4 = threadIdx.x << 2;
+ for (int cell = blockIdx.x; cell < size; cell += gridDim.x)
+ {
+ if (!threadIdx.x)
+ {
+ //range = *(const int2*)(cells+cell);
+ range.x = cells[cell];
+ range.y = cells[cell+1];
+ range.y &= ~(1U<<31);
+ ghost = cellGhost[cell];
+ }
+ __syncthreads();
+ if (range.x <= 0) continue; // no actual particle in this cell
+ for (int px0 = range.x; px0 < ghost; px0 += BSIZE)
+ {
+ int px = px0 + threadIdx.x;
+ CudaVec4 xi;
+ CudaVec3 vi;
+ CudaVec3 force;
+ int index;
+ if (px < range.y)
+ {
+ index = cells[px];
+ xi = ((const CudaVec4*)pos4)[index];
+ temp_x[tx4 ] = xi.x;
+ temp_x[tx4+1] = xi.y;
+ temp_x[tx4+2] = xi.z;
+ temp_x[tx4+3] = xi.w;
+ vi = ((const CudaVec3*)v)[index];
+ temp_v[tx3 ] = vi.x;
+ temp_v[tx3+1] = vi.y;
+ temp_v[tx3+2] = vi.z;
+ }
+ __syncthreads();
+ if (px < ghost)
+ {
+ // actual particle -> compute interactions
+ force = CudaVec3::make(0,0,0);
+ int np = min(range.y-px0,BSIZE);
+ for (int i=0; i < np; ++i)
+ {
+ if (i != threadIdx.x)
+ SPHFluidCalcForce(xi, vi, ((const CudaVec4*)temp_x)[i], ((const CudaVec3*)temp_v)[i], force, params);
+ }
+ }
+ __syncthreads();
+ // loop through other groups of particles
+ for (int py0 = range.x; py0 < range.y; py0 += BSIZE)
+ {
+ if (py0 == px0) continue;
+ int py = py0 + threadIdx.x;
+ if (py < range.y)
+ {
+ int index2 = cells[py];
+ CudaVec4 xj = ((const CudaVec4*)pos4)[index2];
+ temp_x[tx4 ] = xj.x;
+ temp_x[tx4+1] = xj.y;
+ temp_x[tx4+2] = xj.z;
+ temp_x[tx4+3] = xj.w;
+ CudaVec3 vj = ((const CudaVec3*)v)[index2];
+ temp_v[tx3 ] = vj.x;
+ temp_v[tx3+1] = vj.y;
+ temp_v[tx3+2] = vj.z;
+ }
+ __syncthreads();
+ if (px < ghost)
+ {
+ // actual particle -> compute interactions
+ int np = min(range.y-py0,BSIZE);
+ for (int i=0; i < np; ++i)
+ {
+ SPHFluidCalcForce(xi, vi, ((const CudaVec4*)temp_x)[i], ((const CudaVec3*)temp_v)[i], force, params);
+ }
+ }
+ __syncthreads();
+ }
+ if (px < ghost)
+ {
+ // actual particle -> write computed force
+ ((CudaVec3*)f)[index] += force;
+ }
+ }
+ }
+}
+/*
+template
+__global__ void SPHFluidForceFieldCuda3t_addDForce_kernel(int size, const int *cells, const int *cellGhost, GPUSPHFluid params, real* df, const real* pos4, const real* v, const real* dx)
+{
+ __shared__ int2 range;
+ __shared__ int ghost;
+ __shared__ real temp_x[BSIZE*4];
+ __shared__ real temp_v[BSIZE*3];
+ __shared__ real temp_dx[BSIZE*3];
+ int tx3 = threadIdx.x * 3;
+ int tx4 = threadIdx.x << 2;
+ for (int cell = blockIdx.x; cell < size; cell += gridDim.x)
+ {
+ if (!threadIdx.x)
+ {
+ //range = *(const int2*)(cells+cell);
+ range.x = cells[cell];
+ range.y = cells[cell+1];
+ range.y &= ~(1U<<31);
+ ghost = cellGhost[cell];
+ }
+ __syncthreads();
+ if (range.x <= 0) continue; // no actual particle in this cell
+ for (int px0 = range.x; px0 < ghost; px0 += BSIZE)
+ {
+ int px = px0 + threadIdx.x;
+ CudaVec4 xi;
+ CudaVec3 vi;
+ CudaVec3 dxi;
+ CudaVec3 dforce;
+ int index;
+ if (px < range.y)
+ {
+ index = cells[px];
+ xi = ((const CudaVec4*)pos4)[index];
+ temp_x[tx4 ] = xi.x;
+ temp_x[tx4+1] = xi.y;
+ temp_x[tx4+2] = xi.z;
+ temp_x[tx4+3] = xi.w;
+ vi = ((const CudaVec3*)v)[index];
+ temp_v[tx3 ] = vi.x;
+ temp_v[tx3+1] = vi.y;
+ temp_v[tx3+2] = vi.z;
+ dxi = ((const CudaVec3*)dx)[index];
+ temp_dx[tx3 ] = dxi.x;
+ temp_dx[tx3+1] = dxi.y;
+ temp_dx[tx3+2] = dxi.z;
+ }
+ __syncthreads();
+ if (px < ghost)
+ { // actual particle -> compute interactions
+ dforce = CudaVec3::make(0,0,0);
+ int np = min(range.y-px0,BSIZE);
+ for (int i=0; i < np; ++i)
+ {
+ if (i != threadIdx.x)
+ SPHFluidCalcDForce(xi, vi, dxi, ((const CudaVec4*)temp_x)[i], ((const CudaVec3*)temp_v)[i], ((const CudaVec3*)temp_dx)[i], dforce, params);
+ }
+ }
+ __syncthreads();
+ // loop through other groups of particles
+ for (int py0 = range.x; py0 < range.y; py0 += BSIZE)
+ {
+ if (py0 == px0) continue;
+ int py = py0 + threadIdx.x;
+ if (py < range.y)
+ {
+ int index2 = cells[py];
+ CudaVec4 xj = ((const CudaVec4*)pos4)[index2];
+ temp_x[tx4 ] = xj.x;
+ temp_x[tx4+1] = xj.y;
+ temp_x[tx4+2] = xj.z;
+ temp_x[tx4+3] = xj.w;
+ CudaVec3 vj = ((const CudaVec3*)v)[index2];
+ temp_v[tx3 ] = vj.x;
+ temp_v[tx3+1] = vj.y;
+ temp_v[tx3+2] = vj.z;
+ CudaVec3 dxj = ((const CudaVec3*)dx)[index2];
+ temp_dx[tx3 ] = dxj.x;
+ temp_dx[tx3+1] = dxj.y;
+ temp_dx[tx3+2] = dxj.z;
+ }
+ __syncthreads();
+ if (px < ghost)
+ { // actual particle -> compute interactions
+ int np = min(range.y-py0,BSIZE);
+ for (int i=0; i < np; ++i)
+ {
+ SPHFluidCalcDForce(xi, vi, dxi, ((const CudaVec4*)temp_x)[i], ((const CudaVec3*)temp_v)[i], ((const CudaVec3*)temp_dx)[i], dforce, params);
+ }
+ }
+ __syncthreads();
+ }
+ if (px < ghost)
+ { // actual particle -> write computed force
+ ((CudaVec3*)df)[index] += dforce;
+ }
+ }
+ }
+}
+*/
+//////////////////////
+// CPU-side methods //
+//////////////////////
+
+void SPHFluidForceFieldCuda3f_computeDensity(int kernelType, int pressureType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3f* params, void* pos4, const void* x)
+{
+ dim3 threads(BSIZE,1);
+ dim3 grid(60,1);
+ switch(kernelType)
+ {
+ case 0:
+ switch(pressureType)
+ {
+ case 0: //SPHFluidForceFieldCuda3t_computeDensity_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)pos4, (const float*)x);
+ break;
+ case 1: SPHFluidForceFieldCuda3t_computeDensity_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)pos4, (const float*)x);
+ break;
+ default: break;
+ }
+ break;
+ case 1:
+ switch(pressureType)
+ {
+ case 0: //SPHFluidForceFieldCuda3t_computeDensity_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)pos4, (const float*)x);
+ break;
+ case 1: SPHFluidForceFieldCuda3t_computeDensity_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)pos4, (const float*)x);
+ break;
+ default: break;
+ }
+ break;
+ default: break;
+ }
+ mycudaDebugError("SPHFluidForceFieldCuda3t_computeDensity_kernel");
+}
+
+void SPHFluidForceFieldCuda3f_addForce(int kernelType, int pressureType, int viscosityType, int surfaceTensionType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3f* params, void* f, const void* x, const void* v)
+{
+ dim3 threads(BSIZE,1);
+ dim3 grid(60,1);
+ switch(kernelType)
+ {
+ case 0:
+ switch(pressureType)
+ {
+ case 0:
+ switch(viscosityType)
+ {
+ case 0:
+ switch(surfaceTensionType)
+ {
+ case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v);
+ break;
+ case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v);
+ break;
+ case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v);
+ break;
+ default: break;
+ }
+ break;
+ case 1:
+ switch(surfaceTensionType)
+ {
+ case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v);
+ break;
+ case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v);
+ break;
+ case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v);
+ break;
+ default: break;
+ }
+ break;
+ case 2:
+ switch(surfaceTensionType)
+ {
+ case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v);
+ break;
+ case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v);
+ break;
+ case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v);
+ break;
+ default: break;
+ }
+ break;
+ default: break;
+ }
+ break;
+ case 1:
+ switch(viscosityType)
+ {
+ case 0:
+ switch(surfaceTensionType)
+ {
+ case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v);
+ break;
+ case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v);
+ break;
+ case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v);
+ break;
+ default: break;
+ }
+ break;
+ case 1:
+ switch(surfaceTensionType)
+ {
+ case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v);
+ break;
+ case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v);
+ break;
+ case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v);
+ break;
+ default: break;
+ }
+ break;
+ case 2:
+ switch(surfaceTensionType)
+ {
+ case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v);
+ break;
+ case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v);
+ break;
+ case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v);
+ break;
+ default: break;
+ }
+ break;
+ default: break;
+ }
+ break;
+ default: break;
+ }
+ break;
+ case 1:
+ switch(pressureType)
+ {
+ case 0:
+ switch(viscosityType)
+ {
+ case 0:
+ switch(surfaceTensionType)
+ {
+ case 0: SPHFluidForceFieldCuda3t_addForce_kernel