diff --git a/CMakeLists.txt b/CMakeLists.txt index 5d1e691..d515d1e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,6 +11,7 @@ sofa_find_package(Sofa.GL QUIET) set(PLUGIN_SPH_SRC_DIR src/SofaSphFluid) set(HEADER_FILES ${PLUGIN_SPH_SRC_DIR}/config.h.in + ${PLUGIN_SPH_SRC_DIR}/initSPHFluid.h ${PLUGIN_SPH_SRC_DIR}/ParticleSink.h ${PLUGIN_SPH_SRC_DIR}/ParticleSink.inl ${PLUGIN_SPH_SRC_DIR}/ParticleSource.h @@ -74,3 +75,5 @@ sofa_create_package_with_targets( INCLUDE_INSTALL_DIR ${PROJECT_NAME} RELOCATABLE "plugins" ) + +sofa_add_subdirectory(plugin extensions/CUDA SofaSphFluid.CUDA) diff --git a/extensions/CUDA/CMakeLists.txt b/extensions/CUDA/CMakeLists.txt new file mode 100644 index 0000000..8acc8bd --- /dev/null +++ b/extensions/CUDA/CMakeLists.txt @@ -0,0 +1,48 @@ +cmake_minimum_required(VERSION 3.12) +project(SofaSphFluid.CUDA CUDA CXX) + +set(HEADER_FILES + src/SofaSphFluid/CUDA/init.h + src/SofaSphFluid/CUDA/config.h.in + + src/SofaSphFluid/CUDA/CudaParticleSource.h + src/SofaSphFluid/CUDA/CudaParticleSource.inl + src/SofaSphFluid/CUDA/CudaSPHFluidForceField.h + src/SofaSphFluid/CUDA/CudaSPHFluidForceField.inl + src/SofaSphFluid/CUDA/CudaParticlesRepulsionForceField.h + src/SofaSphFluid/CUDA/CudaParticlesRepulsionForceField.inl + src/SofaSphFluid/CUDA/CudaSpatialGridContainer.h + src/SofaSphFluid/CUDA/CudaSpatialGridContainer.inl +) + +set(SOURCE_FILES + src/SofaSphFluid/CUDA/init.cpp + + src/SofaSphFluid/CUDA/CudaParticleSource.cpp + src/SofaSphFluid/CUDA/CudaSPHFluidForceField.cpp + src/SofaSphFluid/CUDA/CudaParticlesRepulsionForceField.cpp + src/SofaSphFluid/CUDA/CudaSpatialGridContainer.cpp +) + +set(CUDA_SOURCES + src/SofaSphFluid/CUDA/CudaParticleSource.cu + src/SofaSphFluid/CUDA/CudaSPHFluidForceField.cu + src/SofaSphFluid/CUDA/CudaParticlesRepulsionForceField.cu + src/SofaSphFluid/CUDA/CudaSpatialGridContainer.cu +) + +sofa_find_package(SofaSphFluid REQUIRED) +sofa_find_package(SofaCUDA REQUIRED) + +add_library(${PROJECT_NAME} SHARED ${HEADER_FILES} ${SOURCE_FILES} ${CUDA_SOURCES}) +target_link_libraries(${PROJECT_NAME} SofaSphFluid) +target_link_libraries(${PROJECT_NAME} SofaCUDA) + +sofa_create_package_with_targets( + PACKAGE_NAME ${PROJECT_NAME} + PACKAGE_VERSION ${Sofa_VERSION} + TARGETS ${PROJECT_NAME} AUTO_SET_TARGET_PROPERTIES + INCLUDE_SOURCE_DIR "src" + INCLUDE_INSTALL_DIR "${PROJECT_NAME}" + RELOCATABLE "plugins" +) diff --git a/extensions/CUDA/SofaSphFluid.CUDAConfig.cmake.in b/extensions/CUDA/SofaSphFluid.CUDAConfig.cmake.in new file mode 100644 index 0000000..67b3bbe --- /dev/null +++ b/extensions/CUDA/SofaSphFluid.CUDAConfig.cmake.in @@ -0,0 +1,9 @@ +# CMake package configuration file for the SofaSphFluid.CUDA library + +@PACKAGE_GUARD@ +@PACKAGE_INIT@ + +find_package(SofaSphFluid QUIET REQUIRED) +find_package(SofaCUDA QUIET REQUIRED) + +check_required_components(SofaSphFluid.CUDA) diff --git a/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticleSource.cpp b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticleSource.cpp new file mode 100644 index 0000000..a7d1714 --- /dev/null +++ b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticleSource.cpp @@ -0,0 +1,56 @@ +/****************************************************************************** +* SOFA, Simulation Open-Framework Architecture * +* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU Lesser General Public License as published by * +* the Free Software Foundation; either version 2.1 of the License, or (at * +* your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, but WITHOUT * +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * +* for more details. * +* * +* You should have received a copy of the GNU Lesser General Public License * +* along with this program. If not, see . * +******************************************************************************* +* Authors: The SOFA Team and external contributors (see Authors.txt) * +* * +* Contact information: contact@sofa-framework.org * +******************************************************************************/ +#include +#include "CudaParticleSource.inl" +#include +#include +#include + +namespace sofa +{ + + +namespace core::behavior +{ +template class ProjectiveConstraintSet; +#ifdef SOFA_GPU_CUDA_DOUBLE +template class ProjectiveConstraintSet; +#endif +} // namespace core::behavior + + +namespace gpu::cuda +{ + + + +int ParticleSourceCudaClass = core::RegisterObject("Supports GPU-side computations using CUDA") + .add< component::misc::ParticleSource >() +#ifdef SOFA_GPU_CUDA_DOUBLE + .add< component::misc::ParticleSource >() +#endif // SOFA_GPU_CUDA_DOUBLE + ; + +} // namespace gpu::cuda + + +} // namespace sofa diff --git a/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticleSource.cu b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticleSource.cu new file mode 100644 index 0000000..bca3121 --- /dev/null +++ b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticleSource.cu @@ -0,0 +1,132 @@ +/****************************************************************************** +* SOFA, Simulation Open-Framework Architecture * +* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU Lesser General Public License as published by * +* the Free Software Foundation; either version 2.1 of the License, or (at * +* your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, but WITHOUT * +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * +* for more details. * +* * +* You should have received a copy of the GNU Lesser General Public License * +* along with this program. If not, see . * +******************************************************************************* +* Authors: The SOFA Team and external contributors (see Authors.txt) * +* * +* Contact information: contact@sofa-framework.org * +******************************************************************************/ +#include +#include +#include "cuda.h" + +#if defined(__cplusplus) && CUDA_VERSION < 2000 +namespace sofa +{ +namespace gpu +{ +namespace cuda +{ +#endif + +extern "C" +{ + + void ParticleSourceCuda3f_fillValues(unsigned int totalsize, unsigned int subsetsize, void* dest, const void* indices, float fx, float fy, float fz); + void ParticleSourceCuda3f_copyValuesWithOffset(unsigned int totalsize, unsigned int subsetsize, void* dest, const void* indices, const void* src, float fx, float fy, float fz); + +#ifdef SOFA_GPU_CUDA_DOUBLE + + void ParticleSourceCuda3d_fillValues(unsigned int totalsize, unsigned int subsetsize, void* dest, const void* indices, double fx, double fy, double fz); + void ParticleSourceCuda3d_copyValuesWithOffset(unsigned int totalsize, unsigned int subsetsize, void* dest, const void* indices, const void* src, double fx, double fy, double fz); + +#endif // SOFA_GPU_CUDA_DOUBLE +} + +////////////////////// +// GPU-side methods // +////////////////////// + + +template +__global__ void ParticleSourceCuda3t_fillValues_kernel(unsigned int totalsize, unsigned int subsetsize, real* dest, const unsigned int* indices, real fx, real fy, real fz) +{ + unsigned int index0 = blockIdx.x * BSIZE; + unsigned int index = index0+threadIdx.x; + if (index < subsetsize) + { + unsigned int dindex = indices[index]; + if (dindex < totalsize) + { + unsigned int dindex3 = dindex * 3; + dest[dindex3+0] = fx; + dest[dindex3+1] = fy; + dest[dindex3+2] = fz; + } + } +} + +template +__global__ void ParticleSourceCuda3t_copyValuesWithOffset_kernel(unsigned int totalsize, unsigned int subsetsize, real* dest, const unsigned int* indices, const real* src, real fx, real fy, real fz) +{ + unsigned int index0 = blockIdx.x * BSIZE; + unsigned int index = index0+threadIdx.x; + if (index < subsetsize) + { + unsigned int dindex = indices[index]; + if (dindex < totalsize) + { + unsigned int dindex3 = dindex * 3; + unsigned int index3 = index * 3; + dest[dindex3+0] = src[index3+0]+fx; + dest[dindex3+1] = src[index3+1]+fy; + dest[dindex3+2] = src[index3+2]+fz; + } + } +} + +////////////////////// +// CPU-side methods // +////////////////////// + + +void ParticleSourceCuda3f_fillValues(unsigned int totalsize, unsigned int subsetsize, void* dest, const void* indices, float fx, float fy, float fz) +{ + dim3 threads(BSIZE,1); + dim3 grid((subsetsize+BSIZE-1)/BSIZE,1); + {ParticleSourceCuda3t_fillValues_kernel<<< grid, threads >>>(totalsize, subsetsize, (float*)dest, (const unsigned int*)indices, fx, fy, fz); mycudaDebugError("ParticleSourceCuda3t_fillValues_kernel");} +} + +void ParticleSourceCuda3f_copyValuesWithOffset(unsigned int totalsize, unsigned int subsetsize, void* dest, const void* indices, const void* src, float fx, float fy, float fz) +{ + dim3 threads(BSIZE,1); + dim3 grid((subsetsize+BSIZE-1)/BSIZE,1); + {ParticleSourceCuda3t_copyValuesWithOffset_kernel<<< grid, threads >>>(totalsize, subsetsize, (float*)dest, (const unsigned int*)indices, (const float*)src, fx, fy, fz); mycudaDebugError("ParticleSourceCuda3t_copyValuesWithOffset_kernel");} +} + +#ifdef SOFA_GPU_CUDA_DOUBLE + +void ParticleSourceCuda3d_fillValues(unsigned int totalsize, unsigned int subsetsize, void* dest, const void* indices, double fx, double fy, double fz) +{ + dim3 threads(BSIZE,1); + dim3 grid((subsetsize+BSIZE-1)/BSIZE,1); + {ParticleSourceCuda3t_fillValues_kernel<<< grid, threads >>>(totalsize, subsetsize, (double*)dest, (const unsigned int*)indices, fx, fy, fz); mycudaDebugError("ParticleSourceCuda3t_fillValues_kernel");} +} + +void ParticleSourceCuda3d_copyValuesWithOffset(unsigned int totalsize, unsigned int subsetsize, void* dest, const void* indices, const void* src, double fx, double fy, double fz) +{ + dim3 threads(BSIZE,1); + dim3 grid((subsetsize+BSIZE-1)/BSIZE,1); + {ParticleSourceCuda3t_copyValuesWithOffset_kernel<<< grid, threads >>>(totalsize, subsetsize, (double*)dest, (const unsigned int*)indices, (const double*)src, fx, fy, fz); mycudaDebugError("ParticleSourceCuda3t_copyValuesWithOffset_kernel");} +} + +#endif // SOFA_GPU_CUDA_DOUBLE + +#if defined(__cplusplus) && CUDA_VERSION < 2000 +} // namespace cuda +} // namespace gpu +} // namespace sofa +#endif diff --git a/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticleSource.h b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticleSource.h new file mode 100644 index 0000000..37847f0 --- /dev/null +++ b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticleSource.h @@ -0,0 +1,27 @@ +/****************************************************************************** +* SOFA, Simulation Open-Framework Architecture * +* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU Lesser General Public License as published by * +* the Free Software Foundation; either version 2.1 of the License, or (at * +* your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, but WITHOUT * +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * +* for more details. * +* * +* You should have received a copy of the GNU Lesser General Public License * +* along with this program. If not, see . * +******************************************************************************* +* Authors: The SOFA Team and external contributors (see Authors.txt) * +* * +* Contact information: contact@sofa-framework.org * +******************************************************************************/ +#ifndef SOFA_GPU_CUDA_CUDAPARTICLESOURCE_H +#define SOFA_GPU_CUDA_CUDAPARTICLESOURCE_H + +#include + +#endif diff --git a/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticleSource.inl b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticleSource.inl new file mode 100644 index 0000000..9a66fdb --- /dev/null +++ b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticleSource.inl @@ -0,0 +1,138 @@ +/****************************************************************************** +* SOFA, Simulation Open-Framework Architecture * +* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU Lesser General Public License as published by * +* the Free Software Foundation; either version 2.1 of the License, or (at * +* your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, but WITHOUT * +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * +* for more details. * +* * +* You should have received a copy of the GNU Lesser General Public License * +* along with this program. If not, see . * +******************************************************************************* +* Authors: The SOFA Team and external contributors (see Authors.txt) * +* * +* Contact information: contact@sofa-framework.org * +******************************************************************************/ +#ifndef SOFA_GPU_CUDA_CUDAPARTICLESOURCE_INL +#define SOFA_GPU_CUDA_CUDAPARTICLESOURCE_INL + +#include "CudaParticleSource.h" +#include + +namespace sofa +{ + + +namespace gpu::cuda +{ + +extern "C" +{ + + void ParticleSourceCuda3f_fillValues(unsigned int totalsize, unsigned int subsetsize, void* dest, const void* indices, float fx, float fy, float fz); + void ParticleSourceCuda3f_copyValuesWithOffset(unsigned int totalsize, unsigned int subsetsize, void* dest, const void* indices, const void* src, float fx, float fy, float fz); + + +#ifdef SOFA_GPU_CUDA_DOUBLE + + void ParticleSourceCuda3d_fillValues(unsigned int totalsize, unsigned int subsetsize, void* dest, const void* indices, double fx, double fy, double fz); + void ParticleSourceCuda3d_copyValuesWithOffset(unsigned int totalsize, unsigned int subsetsize, void* dest, const void* indices, const void* src, double fx, double fy, double fz); + +#endif // SOFA_GPU_CUDA_DOUBLE +} + +} // namespace gpu::cuda + + +namespace component::misc +{ + +using namespace gpu::cuda; + +// template <> +// void ParticleSource::projectResponse(VecDeriv& res) +// { +// if (!this->mstate) return; +// const VecIndex& lastparticles = this->lastparticles.getValue(); +// if (lastparticles.empty()) return; +// double time = getContext()->getTime(); +// if (time < f_start.getValue() || time > f_stop.getValue()) return; +// // constraint the last values +// //mycudaMemset(((Deriv*)res.deviceWrite())+lastparticles[0], 0, lastparticles.size()*sizeof(Deriv)); +// ParticleSourceCuda3f_fillValues(res.size(), lastparticles.size(), res.deviceWrite(), lastparticles.deviceRead(), 0, 0, 0); +// } + +// template <> +// void ParticleSource::projectVelocity(VecDeriv& res) +// { +// if (!this->mstate) return; +// const VecIndex& lastparticles = this->lastparticles.getValue(); +// if (lastparticles.empty()) return; +// double time = getContext()->getTime(); +// if (time < f_start.getValue() || time > f_stop.getValue()) return; +// // constraint the last values +// Deriv vel = f_velocity.getValue(); +// #if 1 +// //mycudaMemset(((Deriv*)res.deviceWrite())+lastparticles[0], 0, lastparticles.size()*sizeof(Coord)); +// ParticleSourceCuda3f_fillValues(res.size(), lastparticles.size(), res.deviceWrite(), lastparticles.deviceRead(), vel[0], vel[1], vel[2]); +// #else +// for (unsigned int s=0; s +// void ParticleSource::projectPosition(VecCoord& res) +// { +// if (!this->mstate) return; +// const VecIndex& lastparticles = this->lastparticles.getValue(); +// if (lastparticles.empty()) return; +// double time = getContext()->getTime(); +// if (time < f_start.getValue() || time > f_stop.getValue()) return; +// // constraint the last values +// Deriv vel = f_velocity.getValue(); +// vel *= (time-lasttime); +// //mycudaMemset(((Deriv*)res.deviceWrite())+lastparticles[0], 0, lastparticles.size()*sizeof(Coord)); +// ParticleSourceCuda3f_copyValuesWithOffset(res.size(), lastparticles.size(), res.deviceWrite(), lastparticles.deviceRead(), lastpos.deviceRead(), vel[0], vel[1], vel[2]); +// } + + +// #ifdef SOFA_GPU_CUDA_DOUBLE + +// template <> +// void ParticleSource::projectResponse(VecDeriv& res) +// { +// if (!this->mstate) return; +// const VecIndex& lastparticles = this->lastparticles.getValue(); +// if (lastparticles.empty()) return; +// double time = getContext()->getTime(); +// if (time < f_start.getValue() || time > f_stop.getValue()) return; +// // constraint the last values +// mycudaMemset(((Deriv*)res.deviceWrite())+lastparticles[0], 0, lastparticles.size()*sizeof(Coord)); +// } + +// template <> +// void ParticleSource::projectVelocity(VecDeriv& /*res*/) +// { +// } + +// template <> +// void ParticleSource::projectPosition(VecDeriv& /*res*/) +// { +// } + +// #endif // SOFA_GPU_CUDA_DOUBLE + +} // namespace component::misc + + +} // namespace sofa + +#endif diff --git a/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticlesRepulsionForceField.cpp b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticlesRepulsionForceField.cpp new file mode 100644 index 0000000..68d2d48 --- /dev/null +++ b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticlesRepulsionForceField.cpp @@ -0,0 +1,43 @@ +/****************************************************************************** +* SOFA, Simulation Open-Framework Architecture * +* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU Lesser General Public License as published by * +* the Free Software Foundation; either version 2.1 of the License, or (at * +* your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, but WITHOUT * +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * +* for more details. * +* * +* You should have received a copy of the GNU Lesser General Public License * +* along with this program. If not, see . * +******************************************************************************* +* Authors: The SOFA Team and external contributors (see Authors.txt) * +* * +* Contact information: contact@sofa-framework.org * +******************************************************************************/ +#include +#include "CudaParticlesRepulsionForceField.inl" +#include +#include +#include + + +namespace sofa::gpu::cuda +{ + +int ParticlesRepulsionForceFieldCudaClass = core::RegisterObject("Supports GPU-side computations using CUDA") + .add< component::forcefield::ParticlesRepulsionForceField >() +#ifdef SOFA_GPU_CUDA_DOUBLE + .add< component::forcefield::ParticlesRepulsionForceField >() +#endif // SOFA_GPU_CUDA_DOUBLE + ; + +} // namespace sofa::gpu::cuda + + + + diff --git a/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticlesRepulsionForceField.cu b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticlesRepulsionForceField.cu new file mode 100644 index 0000000..cafd2e8 --- /dev/null +++ b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticlesRepulsionForceField.cu @@ -0,0 +1,320 @@ +/****************************************************************************** +* SOFA, Simulation Open-Framework Architecture * +* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU Lesser General Public License as published by * +* the Free Software Foundation; either version 2.1 of the License, or (at * +* your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, but WITHOUT * +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * +* for more details. * +* * +* You should have received a copy of the GNU Lesser General Public License * +* along with this program. If not, see . * +******************************************************************************* +* Authors: The SOFA Team and external contributors (see Authors.txt) * +* * +* Contact information: contact@sofa-framework.org * +******************************************************************************/ +#include +#include +#include "cuda.h" + +#if defined(__cplusplus) && CUDA_VERSION < 2000 +namespace sofa +{ +namespace gpu +{ +namespace cuda +{ +#endif + +template +class GPURepulsion +{ +public: + real d; + real d2; + real stiffness; + real damping; +}; + +typedef GPURepulsion GPURepulsion3f; +typedef GPURepulsion GPURepulsion3d; + +extern "C" +{ + void ParticlesRepulsionForceFieldCuda3f_addForce (unsigned int size, const void* cells, const void* cellGhost, GPURepulsion3f* repulsion, void* f, const void* x, const void* v ); + void ParticlesRepulsionForceFieldCuda3f_addDForce(unsigned int size, const void* cells, const void* cellGhost, GPURepulsion3f* repulsion, void* f, const void* x, const void* dx); + +#ifdef SOFA_GPU_CUDA_DOUBLE + + void ParticlesRepulsionForceFieldCuda3d_addForce (unsigned int size, const void* cells, const void* cellGhost, GPURepulsion3d* repulsion, void* f, const void* x, const void* v ); + void ParticlesRepulsionForceFieldCuda3d_addDForce(unsigned int size, const void* cells, const void* cellGhost, GPURepulsion3d* repulsion, void* f, const void* x, const void* dx); + +#endif // SOFA_GPU_CUDA_DOUBLE +} + +////////////////////// +// GPU-side methods // +////////////////////// + +template +__device__ void ParticlesRepulsionCalcForce(CudaVec3 x1, CudaVec3 v1, CudaVec3 x2, CudaVec3 v2, CudaVec3& force, GPURepulsion& rep) +{ + CudaVec3 n = x2-x1; + real d2 = norm2(n); + if (d2 < rep.d2) + { + CudaVec3 vi = v2-v1; + real inv_d = rsqrtf(d2); + n *= inv_d; + real d = d2*inv_d - rep.d; + real forceIntensity = rep.stiffness*d; + real dampingIntensity = rep.damping*d; + force += n*forceIntensity - vi*dampingIntensity; + } +} + +template +__device__ void ParticlesRepulsionCalcDForce(CudaVec3 x1, CudaVec3 dx1, CudaVec3 x2, CudaVec3 dx2, CudaVec3& dforce, GPURepulsion& rep) +{ + CudaVec3 n = x2-x1; + real d2 = norm2(n); + if (d2 < rep.d2) + { + CudaVec3 dxi = dx2-dx1; + //real inv_d = rsqrtf(d2); + //n *= inv_d; + //dforce = n * (rep.stiffness * dot(dxi, n)); + + //dforce = (n * (rep.stiffness * dot(dxi, n)))/d2; + + dforce += n * (real)__fdividef((rep.stiffness * dot(dxi, n)),d2); + } +} + +template +__global__ void ParticlesRepulsionForceFieldCuda3t_addForce_kernel(int size, const int *cells, const int *cellGhost, GPURepulsion repulsion, real* f, const real* x, const real* v) +{ + __shared__ int2 range; + __shared__ int ghost; + __shared__ real temp_x[BSIZE*3]; + __shared__ real temp_v[BSIZE*3]; + int tx3 = threadIdx.x * 3; + for (int cell = blockIdx.x; cell < size; cell += gridDim.x) + { + if (!threadIdx.x) + { + //range = *(const int2*)(cells+cell); + range.x = cells[cell]; + range.y = cells[cell+1]; + range.y &= ~(1U<<31); + ghost = cellGhost[cell]; + } + __syncthreads(); + if (range.x <= 0) continue; // no actual particle in this cell + for (int px0 = range.x; px0 < ghost; px0 += BSIZE) + { + int px = px0 + threadIdx.x; + CudaVec3 xi; + CudaVec3 vi; + CudaVec3 force; + int index; + if (px < range.y) + { + index = cells[px]; + xi = ((const CudaVec3*)x)[index]; + temp_x[tx3 ] = xi.x; + temp_x[tx3+1] = xi.y; + temp_x[tx3+2] = xi.z; + vi = ((const CudaVec3*)v)[index]; + temp_v[tx3 ] = vi.x; + temp_v[tx3+1] = vi.y; + temp_v[tx3+2] = vi.z; + } + __syncthreads(); + if (px < ghost) + { + // actual particle -> compute interactions + force = CudaVec3::make(0,0,0); + int np = min(range.y-px0,BSIZE); + for (int i=0; i < np; ++i) + { + if (i != threadIdx.x) + ParticlesRepulsionCalcForce(xi, vi, ((const CudaVec3*)temp_x)[i], ((const CudaVec3*)temp_v)[i], force, repulsion); + } + } + __syncthreads(); + // loop through other groups of particles + for (int py0 = range.x; py0 < range.y; py0 += BSIZE) + { + if (py0 == px0) continue; + int py = py0 + threadIdx.x; + if (py < range.y) + { + int index2 = cells[py]; + CudaVec3 xj = ((const CudaVec3*)x)[index2]; + temp_x[tx3 ] = xj.x; + temp_x[tx3+1] = xj.y; + temp_x[tx3+2] = xj.z; + CudaVec3 vj = ((const CudaVec3*)v)[index2]; + temp_v[tx3 ] = vj.x; + temp_v[tx3+1] = vj.y; + temp_v[tx3+2] = vj.z; + } + __syncthreads(); + if (px < ghost) + { + // actual particle -> compute interactions + int np = min(range.y-py0,BSIZE); + for (int i=0; i < np; ++i) + { + ParticlesRepulsionCalcForce(xi, vi, ((const CudaVec3*)temp_x)[i], ((const CudaVec3*)temp_v)[i], force, repulsion); + } + } + __syncthreads(); + } + if (px < ghost) + { + // actual particle -> write computed force + ((CudaVec3*)f)[index] += force; + } + } + } +} + +template +__global__ void ParticlesRepulsionForceFieldCuda3t_addDForce_kernel(int size, const int *cells, const int *cellGhost, GPURepulsion repulsion, real* df, const real* x, const real* dx) +{ + __shared__ int2 range; + __shared__ int ghost; + __shared__ real temp_x[BSIZE*3]; + __shared__ real temp_dx[BSIZE*3]; + int tx3 = threadIdx.x * 3; + for (int cell = blockIdx.x; cell < size; cell += gridDim.x) + { + if (!threadIdx.x) + { + //range = *(const int2*)(cells+cell); + range.x = cells[cell]; + range.y = cells[cell+1]; + range.y &= ~(1U<<31); + ghost = cellGhost[cell]; + } + __syncthreads(); + if (range.x <= 0) continue; // no actual particle in this cell + for (int px0 = range.x; px0 < ghost; px0 += BSIZE) + { + int px = px0 + threadIdx.x; + CudaVec3 xi; + CudaVec3 dxi; + CudaVec3 dforce; + int index; + if (px < range.y) + { + index = cells[px]; + xi = ((const CudaVec3*)x)[index]; + temp_x[tx3 ] = xi.x; + temp_x[tx3+1] = xi.y; + temp_x[tx3+2] = xi.z; + dxi = ((const CudaVec3*)dx)[index]; + temp_dx[tx3 ] = dxi.x; + temp_dx[tx3+1] = dxi.y; + temp_dx[tx3+2] = dxi.z; + } + __syncthreads(); + if (px < ghost) + { + // actual particle -> compute interactions + dforce = CudaVec3::make(0,0,0); + int np = min(range.y-px0,BSIZE); + for (int i=0; i < np; ++i) + { + if (i != threadIdx.x) + ParticlesRepulsionCalcDForce(xi, dxi, ((const CudaVec3*)temp_x)[i], ((const CudaVec3*)temp_dx)[i], dforce, repulsion); + } + } + __syncthreads(); + // loop through other groups of particles + for (int py0 = range.x; py0 < range.y; py0 += BSIZE) + { + if (py0 == px0) continue; + int py = py0 + threadIdx.x; + if (py < range.y) + { + int index2 = cells[py]; + CudaVec3 xj = ((const CudaVec3*)x)[index2]; + temp_x[tx3 ] = xj.x; + temp_x[tx3+1] = xj.y; + temp_x[tx3+2] = xj.z; + CudaVec3 dxj = ((const CudaVec3*)dx)[index2]; + temp_dx[tx3 ] = dxj.x; + temp_dx[tx3+1] = dxj.y; + temp_dx[tx3+2] = dxj.z; + } + __syncthreads(); + if (px < ghost) + { + // actual particle -> compute interactions + int np = min(range.y-py0,BSIZE); + for (int i=0; i < np; ++i) + { + ParticlesRepulsionCalcDForce(xi, dxi, ((const CudaVec3*)temp_x)[i], ((const CudaVec3*)temp_dx)[i], dforce, repulsion); + } + } + __syncthreads(); + } + if (px < ghost) + { + // actual particle -> write computed force + ((CudaVec3*)df)[index] += dforce; + } + } + } +} + +////////////////////// +// CPU-side methods // +////////////////////// + +void ParticlesRepulsionForceFieldCuda3f_addForce(unsigned int size, const void* cells, const void* cellGhost, GPURepulsion3f* repulsion, void* f, const void* x, const void* v) +{ + dim3 threads(BSIZE,1); + dim3 grid(60,1); + {ParticlesRepulsionForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *repulsion, (float*)f, (const float*)x, (const float*)v); mycudaDebugError("ParticlesRepulsionForceFieldCuda3t_addForce_kernel");} +} + +void ParticlesRepulsionForceFieldCuda3f_addDForce(unsigned int size, const void* cells, const void* cellGhost, GPURepulsion3f* repulsion, void* df, const void* x, const void* dx) +{ + dim3 threads(BSIZE,1); + dim3 grid(60/BSIZE,1); + {ParticlesRepulsionForceFieldCuda3t_addDForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *repulsion, (float*)df, (const float*)x, (const float*)dx); mycudaDebugError("ParticlesRepulsionForceFieldCuda3t_addDForce_kernel");} +} + +#ifdef SOFA_GPU_CUDA_DOUBLE + +void ParticlesRepulsionForceFieldCuda3d_addForce(unsigned int size, const void* cells, const void* cellGhost, GPURepulsion3d* repulsion, void* f, const void* x, const void* v) +{ + dim3 threads(BSIZE,1); + dim3 grid(60/BSIZE,1); + {ParticlesRepulsionForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *repulsion, (double*)f, (const double*)x, (const double*)v); mycudaDebugError("ParticlesRepulsionForceFieldCuda3t_addForce_kernel");} +} + +void ParticlesRepulsionForceFieldCuda3d_addDForce(unsigned int size, const void* cells, const void* cellGhost, GPURepulsion3d* repulsion, void* df, const void* x, const void* dx) +{ + dim3 threads(BSIZE,1); + dim3 grid(60/BSIZE,1); + {ParticlesRepulsionForceFieldCuda3t_addDForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *repulsion, (double*)df, (const double*)x, (const double*)dx); mycudaDebugError("ParticlesRepulsionForceFieldCuda3t_addDForce_kernel");} +} + +#endif // SOFA_GPU_CUDA_DOUBLE + +#if defined(__cplusplus) && CUDA_VERSION < 2000 +} // namespace cuda +} // namespace gpu +} // namespace sofa +#endif diff --git a/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticlesRepulsionForceField.h b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticlesRepulsionForceField.h new file mode 100644 index 0000000..7631366 --- /dev/null +++ b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticlesRepulsionForceField.h @@ -0,0 +1,75 @@ +/****************************************************************************** +* SOFA, Simulation Open-Framework Architecture * +* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU Lesser General Public License as published by * +* the Free Software Foundation; either version 2.1 of the License, or (at * +* your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, but WITHOUT * +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * +* for more details. * +* * +* You should have received a copy of the GNU Lesser General Public License * +* along with this program. If not, see . * +******************************************************************************* +* Authors: The SOFA Team and external contributors (see Authors.txt) * +* * +* Contact information: contact@sofa-framework.org * +******************************************************************************/ +#ifndef SOFA_GPU_CUDA_CUDAPARTICLESREPULSIONFORCEFIELD_H +#define SOFA_GPU_CUDA_CUDAPARTICLESREPULSIONFORCEFIELD_H + +#include +#include +#include + +namespace sofa +{ + + +namespace gpu::cuda +{ + +template +struct GPURepulsion +{ + real d; + real d2; + real stiffness; + real damping; +}; + +typedef GPURepulsion GPURepulsion3f; +typedef GPURepulsion GPURepulsion3d; + +} // namespace gpu::cuda + + +namespace component::forcefield +{ + +template <> +void ParticlesRepulsionForceField::addForce(const core::MechanicalParams* mparams, DataVecDeriv& d_f, const DataVecCoord& d_x, const DataVecDeriv& d_v); + +template <> +void ParticlesRepulsionForceField::addDForce(const core::MechanicalParams* mparams, DataVecDeriv& d_df, const DataVecDeriv& d_dx); + +#ifdef SOFA_GPU_CUDA_DOUBLE + +template <> +void ParticlesRepulsionForceField::addForce(const core::MechanicalParams* mparams, DataVecDeriv& d_f, const DataVecCoord& d_x, const DataVecDeriv& d_v); + +template <> +void ParticlesRepulsionForceField::addDForce(const core::MechanicalParams* mparams, DataVecDeriv& d_df, const DataVecDeriv& d_dx); + +#endif // SOFA_GPU_CUDA_DOUBLE + +} // namespace component::forcefield + + +} // namespace sofa + +#endif diff --git a/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticlesRepulsionForceField.inl b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticlesRepulsionForceField.inl new file mode 100644 index 0000000..2931e07 --- /dev/null +++ b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaParticlesRepulsionForceField.inl @@ -0,0 +1,168 @@ +/****************************************************************************** +* SOFA, Simulation Open-Framework Architecture * +* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU Lesser General Public License as published by * +* the Free Software Foundation; either version 2.1 of the License, or (at * +* your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, but WITHOUT * +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * +* for more details. * +* * +* You should have received a copy of the GNU Lesser General Public License * +* along with this program. If not, see . * +******************************************************************************* +* Authors: The SOFA Team and external contributors (see Authors.txt) * +* * +* Contact information: contact@sofa-framework.org * +******************************************************************************/ +#ifndef SOFA_GPU_CUDA_CUDAPARTICLESREPULSIONFORCEFIELD_INL +#define SOFA_GPU_CUDA_CUDAPARTICLESREPULSIONFORCEFIELD_INL + +#include "CudaParticlesRepulsionForceField.h" +#include +//#include + +namespace sofa +{ + + +namespace gpu::cuda +{ + +extern "C" +{ + + void ParticlesRepulsionForceFieldCuda3f_addForce (unsigned int size, const void* cells, const void* cellGhost, GPURepulsion3f* repulsion, void* f, const void* x, const void* v ); + void ParticlesRepulsionForceFieldCuda3f_addDForce(unsigned int size, const void* cells, const void* cellGhost, GPURepulsion3f* repulsion, void* f, const void* x, const void* dx); + +#ifdef SOFA_GPU_CUDA_DOUBLE + + void ParticlesRepulsionForceFieldCuda3d_addForce (unsigned int size, const void* cells, const void* cellGhost, GPURepulsion3d* repulsion, void* f, const void* x, const void* v ); + void ParticlesRepulsionForceFieldCuda3d_addDForce(unsigned int size, const void* cells, const void* cellGhost, GPURepulsion3d* repulsion, void* f, const void* x, const void* dx); + +#endif // SOFA_GPU_CUDA_DOUBLE +} + +} // namespace gpu::cuda + + +namespace component::forcefield +{ + +using namespace gpu::cuda; + + +template <> +void ParticlesRepulsionForceField::addForce(const core::MechanicalParams* /*mparams*/, DataVecDeriv& d_f, const DataVecCoord& d_x, const DataVecDeriv& d_v) +{ + if (grid == NULL) return; + + VecDeriv& f = *d_f.beginEdit(); + const VecCoord& x = d_x.getValue(); + const VecDeriv& v = d_v.getValue(); + + grid->updateGrid(x); + GPURepulsion3f repulsion; + repulsion.d = distance.getValue(); + repulsion.d2 = repulsion.d*repulsion.d; + repulsion.stiffness = stiffness.getValue(); + repulsion.damping = damping.getValue(); + f.resize(x.size()); + Grid::Grid* g = grid->getGrid(); + ParticlesRepulsionForceFieldCuda3f_addForce( + g->getNbCells(), g->getCellsVector().deviceRead(), g->getCellGhostVector().deviceRead(), + &repulsion, f.deviceWrite(), x.deviceRead(), v.deviceRead()); + + d_f.endEdit(); +} + +template <> +void ParticlesRepulsionForceField::addDForce(const core::MechanicalParams* mparams, DataVecDeriv& d_df, const DataVecDeriv& d_dx) +{ + if (grid == NULL) return; + + VecDeriv& df = *d_df.beginEdit(); + const VecDeriv& dx = d_dx.getValue(); + Real kFactor = (Real)sofa::core::mechanicalparams::kFactorIncludingRayleighDamping(mparams,this->rayleighStiffness.getValue()); + Real bFactor = (Real)sofa::core::mechanicalparams::bFactor(mparams); + + const VecCoord& x = this->mstate->read(sofa::core::vec_id::read_access::position)->getValue(); + GPURepulsion3f repulsion; + repulsion.d = distance.getValue(); + repulsion.d2 = repulsion.d*repulsion.d; + repulsion.stiffness = (float)(stiffness.getValue()*kFactor); + repulsion.damping = (float)(damping.getValue()*bFactor); + df.resize(dx.size()); + Grid::Grid* g = grid->getGrid(); + ParticlesRepulsionForceFieldCuda3f_addDForce( + g->getNbCells(), g->getCellsVector().deviceRead(), g->getCellGhostVector().deviceRead(), + &repulsion, df.deviceWrite(), x.deviceRead(), dx.deviceRead()); + + d_df.endEdit(); +} + + +#ifdef SOFA_GPU_CUDA_DOUBLE + +template <> +void ParticlesRepulsionForceField::addForce(const core::MechanicalParams* /*mparams*/, DataVecDeriv& d_f, const DataVecCoord& d_x, const DataVecDeriv& d_v) +{ + if (grid == NULL) return; + + VecDeriv& f = *d_f.beginEdit(); + const VecCoord& x = d_x.getValue(); + const VecDeriv& v = d_v.getValue(); + + grid->updateGrid(x); + GPURepulsion3d repulsion; + repulsion.d = distance.getValue(); + repulsion.d2 = repulsion.d*repulsion.d; + repulsion.stiffness = stiffness.getValue(); + repulsion.damping = damping.getValue(); + f.resize(x.size()); + Grid::Grid* g = grid->getGrid(); + ParticlesRepulsionForceFieldCuda3d_addForce( + g->getNbCells(), g->getCellsVector().deviceRead(), g->getCellGhostVector().deviceRead(), + &repulsion, f.deviceWrite(), x.deviceRead(), v.deviceRead()); + + d_f.endEdit(); +} + +template <> +void ParticlesRepulsionForceField::addDForce(const core::MechanicalParams* mparams, DataVecDeriv& d_df, const DataVecDeriv& d_dx) +{ + if (grid == NULL) return; + + VecDeriv& df = *d_df.beginEdit(); + const VecDeriv& dx = d_dx.getValue(); + Real kFactor = (Real)sofa::core::mechanicalparams::kFactorIncludingRayleighDamping(mparams,this->rayleighStiffness.getValue()); + Real bFactor = (Real)sofa::core::mechanicalparams::bFactor(mparams); + + const VecCoord& x = this->mstate->read(sofa::core::vec_id::read_access::position)->getValue(); + GPURepulsion3d repulsion; + repulsion.d = distance.getValue(); + repulsion.d2 = repulsion.d*repulsion.d; + repulsion.stiffness = stiffness.getValue()*kFactor; + repulsion.damping = damping.getValue()*bFactor; + df.resize(dx.size()); + Grid::Grid* g = grid->getGrid(); + ParticlesRepulsionForceFieldCuda3d_addDForce( + g->getNbCells(), g->getCellsVector().deviceRead(), g->getCellGhostVector().deviceRead(), + &repulsion, df.deviceWrite(), x.deviceRead(), dx.deviceRead()); + + d_df.endEdit(); +} + +#endif // SOFA_GPU_CUDA_DOUBLE + + +} // namespace component::forcefield + + +} // namespace sofa + +#endif diff --git a/extensions/CUDA/src/SofaSphFluid/CUDA/CudaSPHFluidForceField.cpp b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaSPHFluidForceField.cpp new file mode 100644 index 0000000..c6dd42f --- /dev/null +++ b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaSPHFluidForceField.cpp @@ -0,0 +1,43 @@ +/****************************************************************************** +* SOFA, Simulation Open-Framework Architecture * +* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU Lesser General Public License as published by * +* the Free Software Foundation; either version 2.1 of the License, or (at * +* your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, but WITHOUT * +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * +* for more details. * +* * +* You should have received a copy of the GNU Lesser General Public License * +* along with this program. If not, see . * +******************************************************************************* +* Authors: The SOFA Team and external contributors (see Authors.txt) * +* * +* Contact information: contact@sofa-framework.org * +******************************************************************************/ +#include +#include "CudaSPHFluidForceField.inl" +#include +#include +#include + + +namespace sofa::gpu::cuda +{ + +int SPHFluidForceFieldCudaClass = core::RegisterObject("Supports GPU-side computations using CUDA") + .add< component::forcefield::SPHFluidForceField >() +#ifdef SOFA_GPU_CUDA_DOUBLE + .add< component::forcefield::SPHFluidForceField >() +#endif // SOFA_GPU_CUDA_DOUBLE + ; + +} // namespace sofa::gpu::cuda + + + + diff --git a/extensions/CUDA/src/SofaSphFluid/CUDA/CudaSPHFluidForceField.cu b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaSPHFluidForceField.cu new file mode 100644 index 0000000..1ab6b88 --- /dev/null +++ b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaSPHFluidForceField.cu @@ -0,0 +1,1246 @@ +/****************************************************************************** +* SOFA, Simulation Open-Framework Architecture * +* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU Lesser General Public License as published by * +* the Free Software Foundation; either version 2.1 of the License, or (at * +* your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, but WITHOUT * +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * +* for more details. * +* * +* You should have received a copy of the GNU Lesser General Public License * +* along with this program. If not, see . * +******************************************************************************* +* Authors: The SOFA Team and external contributors (see Authors.txt) * +* * +* Contact information: contact@sofa-framework.org * +******************************************************************************/ +#include +#include +#include "cuda.h" + +#if defined(__cplusplus) && CUDA_VERSION < 2000 +namespace sofa +{ +namespace gpu +{ +namespace cuda +{ +#endif + +template +class GPUSPHFluid +{ +public: + real h; ///< particles radius + real h2; ///< particles radius squared + real inv_h2; ///< particles radius squared inverse + real stiffness; ///< pressure stiffness + real mass; ///< particles mass + real mass2; ///< particles mass squared + real density0; ///< 1000 kg/m3 for water + real viscosity; + real surfaceTension; + + // Precomputed constants for smoothing kernels + real CWd; ///< = constWd(h) + //real CgradWd; ///< = constGradWd(h) + real CgradWp; ///< = constGradWp(h) + real ClaplacianWv; ///< = constLaplacianWv(h) +}; + +typedef GPUSPHFluid GPUSPHFluid3f; +typedef GPUSPHFluid GPUSPHFluid3d; + +extern "C" +{ + + void SPHFluidForceFieldCuda3f_computeDensity(int kernelType, int pressureType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3f* params, void* pos4, const void* x); + void SPHFluidForceFieldCuda3f_addForce (int kernelType, int pressureType, int viscosityType, int surfaceTensionType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3f* params, void* f, const void* pos4, const void* v); +//void SPHFluidForceFieldCuda3f_addDForce(int kernelType, int pressureType, int viscosityType, int surfaceTensionType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3f* params, void* f, const void* pos4, const void* v, const void* dx); + +#ifdef SOFA_GPU_CUDA_DOUBLE + + void SPHFluidForceFieldCuda3d_computeDensity(int kernelType, int pressureType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3d* params, void* pos4, const void* x); + void SPHFluidForceFieldCuda3d_addForce (int kernelType, int pressureType, int viscosityType, int surfaceTensionType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3d* params, void* f, const void* pos4, const void* v); +//void SPHFluidForceFieldCuda3d_addDForce(int kernelType, int pressureType, int viscosityType, int surfaceTensionType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3d* params, void* f, const void* pos4, const void* v, const void* dx); + +#endif // SOFA_GPU_CUDA_DOUBLE + +} + +////////////////////// +// GPU-side methods // +////////////////////// + +#ifndef R_PI +#ifdef M_PI +#define R_PI M_PI +#else +#define R_PI 3.141592653589793238462 +#endif +#endif + +template +class SPH; + +template +class SPH +{ +public: + typedef real Real; + typedef CudaVec3 Deriv; + static Real constW(Real h) + { + return (Real) (48/R_PI)/(h*h*h); + } + + static __device__ Real W0(Real C) + { + return C*((Real)(1.0/6.0)); + } + static __device__ Real W0() + { + return ((Real)(1.0/6.0)); + } + static __device__ Real W(Real r_h, Real C) + { + Real s = 1-r_h; + if (r_h < (Real)0.5) return C*((Real)(1.0/6.0) - r_h*r_h*s); + else return C*((Real)(1.0/3.0) * (s*s*s)); + } + static __device__ Real W2(Real r2_h2, Real C) + { + Real r_h = sqrtf(r2_h2); + Real s = 1-r_h; + if (r2_h2 < (Real)0.25) return C*((Real)(1.0/6.0) - r2_h2*s); + else return C*((Real)(1.0/3.0) * (s*s*s)); + } + static __device__ Real W2(Real r2_h2) + { + Real r_h = sqrtf(r2_h2); + Real s = 1-r_h; + if (r2_h2 < (Real)0.25) return ((Real)(1.0/6.0) - r2_h2*s); + else return ((Real)(1.0/3.0) * (s*s*s)); + } + static Real constGradW(Real h) + { + return constW(h)/(h*h); + } + static __device__ Deriv gradW(const Deriv& d, Real r_h, Real C) + { + Real g; + if (r_h < (Real)0.5) g = 3*r_h - 2; + else { Real s = 1-r_h; g = -s*s/r_h; } + return d*(C*g); + } + static Real constLaplacianW(Real h) + { + return constW(h)/(h*h); + } + static __device__ Real laplacianW(Real r_h, Real C) + { + if (r_h < (Real)0.5) return C*(12*r_h-6); + else if (r_h < (Real)1) return C*(6-4*r_h-2/r_h); + else return 0; + } + /// Density Smoothing Kernel + static Real constWd(Real h) + { + return constW(h); + } + static __device__ Real Wd(Real r_h, Real C) + { + return W(r_h,C); + } + static __device__ Real Wd2(Real r2_h2, Real C) + { + return W2(r2_h2,C); + } + static __device__ Real Wd2(Real r2_h2) + { + return W2(r2_h2); + } + static __device__ Real Wd0() + { + return W0(); + } + static Real constGradWd(Real h) + { + return constGradW(h); + } + static __device__ Deriv gradWd(const Deriv& d, Real r_h, Real C) + { + return gradW(d,r_h,C); + } + static __device__ Deriv gradWd2(const Deriv& d, Real r2_h2, Real C) + { + return gradW(d,sqrtf(r2_h2),C); + } + static Real constLaplacianWd(Real h) + { + return constLaplacianW(h); + } + static __device__ Real laplacianWd(Real r_h, Real C) + { + return laplacianW(r_h,C); + } + static __device__ Real laplacianWd2(Real r2_h2, Real C) + { + return laplacianW(sqrtf(r2_h2),C); + } + + /// Pressure Smoothing Kernel + static Real constWp(Real h) + { + return constW(h); + } + static __device__ Real Wp(Real r_h, Real C) + { + return W(r_h,C); + } + static Real constGradWp(Real h) + { + return constGradW(h); + } + static __device__ Deriv gradWp(const Deriv& d, Real r_h, Real C) + { + return gradW(d,r_h,C); + } + + /// Viscosity Smoothing Kernel + static Real constWv(Real h) + { + return constW(h); + } + static __device__ Real Wv(Real r_h, Real C) + { + return W(r_h,C); + } + static __device__ Real Wv2(Real r2_h2, Real r_h, Real C) + { + return W2(r2_h2,r_h,C); + } + static Real constGradWv(Real h) + { + return constGradW(h); + } + static __device__ Deriv gradWv(const Deriv& d, Real r_h, Real C) + { + return gradW(d,r_h,C); + } + static __device__ Deriv gradWv2(const Deriv& d, Real r2_h2, Real r_h, Real C) + { + return gradW(d,r_h,C); + } + static Real constLaplacianWv(Real h) + { + return constLaplacianW(h); + } + + static __device__ Real laplacianWv(Real r_h, Real C) + { + return laplacianW(r_h,C); + } +}; + +template +class SPH +{ +public: + typedef real Real; + typedef CudaVec3 Deriv; + /// Density Smoothing Kernel: W = 315 / 64pih9 * (h2 - r2)3 = 315 / 64pih3 * (1 - (r/h)2)3 + static Real constWd(Real h) + { + return (Real)(315 / (64*R_PI*h*h*h)); + } + static __device__ Real Wd(Real r_h, Real C) + { + Real a = (1-r_h*r_h); + return C*a*a*a; + } + static __device__ Real Wd2(Real r2_h2, Real C) + { + Real a = (1-r2_h2); + return C*a*a*a; + } + static __device__ Real Wd2(Real r2_h2) + { + Real a = (1-r2_h2); + return a*a*a; + } + static __device__ Real Wd0(Real C) + { + return C; + } + static __device__ Real Wd0() + { + return 1; + } + static Real constGradWd(Real h) + { + return -6*constWd(h)/(h*h); + } + static __device__ Deriv gradWd(const Deriv& d, Real r_h, Real C) + { + Real a = (1-r_h*r_h); + return d*(C*a*a); + } + static __device__ Deriv gradWd2(const Deriv& d, Real r2_h2, Real C) + { + Real a = (1-r2_h2); + return d*(C*a*a); + } + static Real constLaplacianWd(Real h) + { + return -6*constWd(h)/(h*h); + } + static __device__ Real laplacianWd(Real r_h, Real C) + { + Real r2_h2 = r_h*r_h; + return C*((1-r2_h2)*(3-7*r2_h2)); + } + static __device__ Real laplacianWd2(Real r2_h2, Real C) + { + return C*((1-r2_h2)*(3-7*r2_h2)); + } + + /// Pressure Smoothing Kernel: W = 15 / pih6 (h - r)3 = 15 / pih3 (1 - r/h)3 + static Real constWp(Real h) + { + return (Real)(15 / (R_PI*h*h*h)); + } + static __device__ Real Wp(Real r_h, Real C) + { + Real a = (1-r_h); + return C*a*a*a; + } + static Real constGradWp(Real h) + { + return (-3*constWp(h)) / (h*h); + } + static __device__ Deriv gradWp(const Deriv& d, Real r_h, Real C) + { + Real a = (1-r_h); + return d * (C*a*a/r_h); + } + + /// Viscosity Smoothing Kernel: W = 15/(2pih3) (-r3/2h3 + r2/h2 + h/2r - 1) + static Real constWv(Real h) + { + return (Real)(15/(2*R_PI*h*h*h)); + } + static __device__ Real Wv(Real r_h, Real C) + { + Real r2_h2 = r_h*r_h; + Real r3_h3 = r2_h2*r_h; + return C*(-0.5f*r3_h3 + r2_h2 + 0.5f/r_h - 1); + } + static __device__ Real Wv2(Real r2_h2, Real r_h, Real C) + { + Real r3_h3 = r2_h2*r_h; + return C*(-0.5f*r3_h3 + r2_h2 + 0.5f/r_h - 1); + } + static Real constGradWv(Real h) + { + return constWv(h)/(h*h); + } + static __device__ Deriv gradWv(const Deriv& d, Real r_h, Real C) + { + Real r3_h3 = r_h*r_h*r_h; + return d * (C*(2.0f -3.0f*r_h - 0.5f/r3_h3)); + } + static __device__ Deriv gradWv2(const Deriv& d, Real r2_h2, Real r_h, Real C) + { + Real r3_h3 = r2_h2*r_h; + return d * (C*(-3*r_h + 4 - 1/r3_h3)); + } + static Real constLaplacianWv(Real h) + { + return 6*constWv(h)/(h*h); + } + + static __device__ Real laplacianWv(Real r_h, Real C) + { + return C*(1-r_h); + } +}; + +template +__device__ void SPHFluidInitDensity(CudaVec3 /*x1*/, real& density, GPUSPHFluid& params) +{ + density = 0; //params.mass * params.CWd; //SPH::Wd(0,params.CWd); +} + +template +__device__ void SPHFluidCalcDensity(CudaVec3 x1, CudaVec3 x2, real& density, GPUSPHFluid& params) +{ + CudaVec3 n = x2-x1; + real d2 = norm2(n); + if (sqrtf(d2) < params.h) + //if (d2 < params.h2) + { + //real r_h = rsqrtf(params.h2/d2); + //real r_h = sqrtf(d2/params.h2); + real r2_h2 = (d2/params.h2); + real d = SPH::Wd2(r2_h2); + density += d; + } +} + +template +__device__ void SPHFluidFinalDensity(CudaVec3 /*x1*/, real& density, GPUSPHFluid& params) +{ + density = (SPH::Wd0()+density) * params.CWd * params.mass; //SPH::Wd(0,params.CWd); +} + +template +__device__ void SPHFluidCalcForce(CudaVec4 x1, CudaVec3 v1, CudaVec4 x2, CudaVec3 v2, CudaVec3& force, GPUSPHFluid& params) +{ + CudaVec3 n = CudaVec3::make(x2.x-x1.x,x2.y-x1.y,x2.z-x1.z); + real d2 = norm2(n); + if (d2 < params.h2) + { + //real r_h = rsqrtf(params.h2/d2); + //real r_h = sqrtf(d2/params.h2); + real r2_h2 = (d2/params.h2); + real r_h = sqrtf(r2_h2); + real density1 = x1.w; + real density2 = x2.w; + + // Pressure + real pressure1 = params.stiffness * (density1 - params.density0); + real pressure2 = params.stiffness * (density2 - params.density0); + real pressureFV = params.mass2 * (pressure1 / (density1*density1) + pressure2 / (density2*density2) ); + + // Viscosity + if (viscosityType == 1) + { + force += ( v2 - v1 ) * ( params.mass2 * params.viscosity * SPH::laplacianWv(r_h,params.ClaplacianWv) / (density1 * density2) ); + } + else if (viscosityType == 2) + { + real vx = dot(n,v2-v1); + if (vx < 0) + pressureFV += - (vx * params.viscosity * params.h * params.mass / ((r2_h2 + 0.01f*params.h2)*(density1+density2)*0.5f)); + } + + force += SPH::gradWp(n, r_h, params.CgradWp) * pressureFV; + } +} +/* +template +__device__ void SPHFluidCalcDForce(CudaVec4 x1, CudaVec3 v1, CudaVec3 dx1, CudaVec4 x2, CudaVec3 v2, CudaVec3 dx2, CudaVec3& dforce, GPUSPHFluid& params) +{ + CudaVec3 n = CudaVec3::make(x2.x-x1.x,x2.y-x1.y,x2.z-x1.z); + real d2 = norm2(n); + if (d2 < params.h2) + { + CudaVec3 dxi = dx2-dx1; + + real inv_d = rsqrtf(d2); + + real r2_h2 = (d2/params.h2); + real r_h = sqrtf(r2_h2); + + real density1 = x1.w; + real density2 = x2.w; + + // Pressure + real pressure1 = params.stiffness * (density1 - params.density0); + real pressure2 = params.stiffness * (density2 - params.density0); + + real fpressure = params.mass2 * (pressure1 / (density1*density1) + pressure2 / (density2*density2) ); + + // Derivatives + + CudaVec3 dn = dxi * inv_d; + + real dr_h = dot(dn,n)/params.h; + //real dr2_h2 = 2*r_h*dr_h; + + real ddensity = dot(dxi,SPH::gradWd2(n, r2_h2, params.CgradWd)); + real dpressure = params.stiffness*ddensity; + // d(a/b^2) = (d(a)*b-2*a*d(b))/b^3 + real dfpressure = params.mass2 * ( (dpressure*density1-2*pressure1*ddensity)/(density1*density1*density1) + +(dpressure*density2-2*pressure2*ddensity)/(density2*density2*density2) ); + real a = (1-r_h); + real dWp = params.CgradWp*a*a; + real ddWp = -2*params.CgradWp*a*dr_h; + + // f = n * dWp * fpressure + // df = dn * (dWp * fpressure) + n * (ddWp * fpressure + dWp * dfpressure); + + dforce += dn * (dWp * fpressure) + n * (ddWp * fpressure + dWp * dfpressure); + + // Viscosity + // force += ( v2 - v1 ) * ( params.mass2 * params.viscosity * params.ClaplacianWv * (1-r_h) / (density1 * density2) ); + real d1d2 = density1*density2; + dforce += dxi * ( params.mass2 * params.viscosity * params.ClaplacianWv * (1-r_h) / (density1 * density2) ) + + (v2-v1) * ( params.mass2 * params.viscosity * params.ClaplacianWv * (-dr_h*d1d2 - (1-r_h)*ddensity*(density1+density2))/(d1d2*d1d2)); + + } +} +*/ +template +__global__ void SPHFluidForceFieldCuda3t_computeDensity_kernel(int size, const int *cells, const int *cellGhost, GPUSPHFluid params, real* pos4, const real* x) +{ + __shared__ int2 range; + __shared__ int ghost; + __shared__ real temp_x[BSIZE*3]; + int tx3 = threadIdx.x * 3; + for (int cell = blockIdx.x; cell < size; cell += gridDim.x) + { + __syncthreads(); + if (!threadIdx.x) + { + //range = *(const int2*)(cells+cell); + range.x = cells[cell]; + range.y = cells[cell+1]; + range.y &= ~(1U<<31); + ghost = cellGhost[cell]; + } + __syncthreads(); + if (range.x <= 0) continue; // no actual particle in this cell + for (int px0 = range.x; px0 < ghost; px0 += BSIZE) + { + int px = px0 + threadIdx.x; + CudaVec3 xi; + real density; + int index; + if (px < range.y) + { + index = cells[px]; + xi = ((const CudaVec3*)x)[index]; + temp_x[tx3 ] = xi.x; + temp_x[tx3+1] = xi.y; + temp_x[tx3+2] = xi.z; + } + __syncthreads(); + if (px < ghost) + { + // actual particle -> compute interactions + SPHFluidInitDensity(xi, density, params); + + int np = min(range.y-px0,BSIZE); + for (int i=0; i < np; ++i) + { + if (i != threadIdx.x) + SPHFluidCalcDensity(xi, ((const CudaVec3*)temp_x)[i], density, params); + } + } + __syncthreads(); + // loop through other groups of particles + for (int py0 = range.x; py0 < range.y; py0 += BSIZE) + { + if (py0 == px0) continue; + int py = py0 + threadIdx.x; + if (py < range.y) + { + int index2 = cells[py]; + CudaVec3 xj = ((const CudaVec3*)x)[index2]; + temp_x[tx3 ] = xj.x; + temp_x[tx3+1] = xj.y; + temp_x[tx3+2] = xj.z; + } + __syncthreads(); + if (px < ghost) + { + // actual particle -> compute interactions + int np = min(range.y-py0,BSIZE); + for (int i=0; i < np; ++i) + { + SPHFluidCalcDensity(xi, ((const CudaVec3*)temp_x)[i], density, params); + } + } + __syncthreads(); + } + if (px < ghost) + { + // actual particle -> write computed density + SPHFluidFinalDensity(xi, density, params); + CudaVec4 res = CudaVec4::make(xi.x,xi.y,xi.z,density); + ((CudaVec4*)pos4)[index] = res; + } + } + } +} + +template +__global__ void SPHFluidForceFieldCuda3t_addForce_kernel(int size, const int *cells, const int *cellGhost, GPUSPHFluid params, real* f, const real* pos4, const real* v) +{ + __shared__ int2 range; + __shared__ int ghost; + __shared__ real temp_x[BSIZE*4]; + __shared__ real temp_v[BSIZE*3]; + int tx3 = threadIdx.x * 3; + int tx4 = threadIdx.x << 2; + for (int cell = blockIdx.x; cell < size; cell += gridDim.x) + { + if (!threadIdx.x) + { + //range = *(const int2*)(cells+cell); + range.x = cells[cell]; + range.y = cells[cell+1]; + range.y &= ~(1U<<31); + ghost = cellGhost[cell]; + } + __syncthreads(); + if (range.x <= 0) continue; // no actual particle in this cell + for (int px0 = range.x; px0 < ghost; px0 += BSIZE) + { + int px = px0 + threadIdx.x; + CudaVec4 xi; + CudaVec3 vi; + CudaVec3 force; + int index; + if (px < range.y) + { + index = cells[px]; + xi = ((const CudaVec4*)pos4)[index]; + temp_x[tx4 ] = xi.x; + temp_x[tx4+1] = xi.y; + temp_x[tx4+2] = xi.z; + temp_x[tx4+3] = xi.w; + vi = ((const CudaVec3*)v)[index]; + temp_v[tx3 ] = vi.x; + temp_v[tx3+1] = vi.y; + temp_v[tx3+2] = vi.z; + } + __syncthreads(); + if (px < ghost) + { + // actual particle -> compute interactions + force = CudaVec3::make(0,0,0); + int np = min(range.y-px0,BSIZE); + for (int i=0; i < np; ++i) + { + if (i != threadIdx.x) + SPHFluidCalcForce(xi, vi, ((const CudaVec4*)temp_x)[i], ((const CudaVec3*)temp_v)[i], force, params); + } + } + __syncthreads(); + // loop through other groups of particles + for (int py0 = range.x; py0 < range.y; py0 += BSIZE) + { + if (py0 == px0) continue; + int py = py0 + threadIdx.x; + if (py < range.y) + { + int index2 = cells[py]; + CudaVec4 xj = ((const CudaVec4*)pos4)[index2]; + temp_x[tx4 ] = xj.x; + temp_x[tx4+1] = xj.y; + temp_x[tx4+2] = xj.z; + temp_x[tx4+3] = xj.w; + CudaVec3 vj = ((const CudaVec3*)v)[index2]; + temp_v[tx3 ] = vj.x; + temp_v[tx3+1] = vj.y; + temp_v[tx3+2] = vj.z; + } + __syncthreads(); + if (px < ghost) + { + // actual particle -> compute interactions + int np = min(range.y-py0,BSIZE); + for (int i=0; i < np; ++i) + { + SPHFluidCalcForce(xi, vi, ((const CudaVec4*)temp_x)[i], ((const CudaVec3*)temp_v)[i], force, params); + } + } + __syncthreads(); + } + if (px < ghost) + { + // actual particle -> write computed force + ((CudaVec3*)f)[index] += force; + } + } + } +} +/* +template +__global__ void SPHFluidForceFieldCuda3t_addDForce_kernel(int size, const int *cells, const int *cellGhost, GPUSPHFluid params, real* df, const real* pos4, const real* v, const real* dx) +{ + __shared__ int2 range; + __shared__ int ghost; + __shared__ real temp_x[BSIZE*4]; + __shared__ real temp_v[BSIZE*3]; + __shared__ real temp_dx[BSIZE*3]; + int tx3 = threadIdx.x * 3; + int tx4 = threadIdx.x << 2; + for (int cell = blockIdx.x; cell < size; cell += gridDim.x) + { + if (!threadIdx.x) + { + //range = *(const int2*)(cells+cell); + range.x = cells[cell]; + range.y = cells[cell+1]; + range.y &= ~(1U<<31); + ghost = cellGhost[cell]; + } + __syncthreads(); + if (range.x <= 0) continue; // no actual particle in this cell + for (int px0 = range.x; px0 < ghost; px0 += BSIZE) + { + int px = px0 + threadIdx.x; + CudaVec4 xi; + CudaVec3 vi; + CudaVec3 dxi; + CudaVec3 dforce; + int index; + if (px < range.y) + { + index = cells[px]; + xi = ((const CudaVec4*)pos4)[index]; + temp_x[tx4 ] = xi.x; + temp_x[tx4+1] = xi.y; + temp_x[tx4+2] = xi.z; + temp_x[tx4+3] = xi.w; + vi = ((const CudaVec3*)v)[index]; + temp_v[tx3 ] = vi.x; + temp_v[tx3+1] = vi.y; + temp_v[tx3+2] = vi.z; + dxi = ((const CudaVec3*)dx)[index]; + temp_dx[tx3 ] = dxi.x; + temp_dx[tx3+1] = dxi.y; + temp_dx[tx3+2] = dxi.z; + } + __syncthreads(); + if (px < ghost) + { // actual particle -> compute interactions + dforce = CudaVec3::make(0,0,0); + int np = min(range.y-px0,BSIZE); + for (int i=0; i < np; ++i) + { + if (i != threadIdx.x) + SPHFluidCalcDForce(xi, vi, dxi, ((const CudaVec4*)temp_x)[i], ((const CudaVec3*)temp_v)[i], ((const CudaVec3*)temp_dx)[i], dforce, params); + } + } + __syncthreads(); + // loop through other groups of particles + for (int py0 = range.x; py0 < range.y; py0 += BSIZE) + { + if (py0 == px0) continue; + int py = py0 + threadIdx.x; + if (py < range.y) + { + int index2 = cells[py]; + CudaVec4 xj = ((const CudaVec4*)pos4)[index2]; + temp_x[tx4 ] = xj.x; + temp_x[tx4+1] = xj.y; + temp_x[tx4+2] = xj.z; + temp_x[tx4+3] = xj.w; + CudaVec3 vj = ((const CudaVec3*)v)[index2]; + temp_v[tx3 ] = vj.x; + temp_v[tx3+1] = vj.y; + temp_v[tx3+2] = vj.z; + CudaVec3 dxj = ((const CudaVec3*)dx)[index2]; + temp_dx[tx3 ] = dxj.x; + temp_dx[tx3+1] = dxj.y; + temp_dx[tx3+2] = dxj.z; + } + __syncthreads(); + if (px < ghost) + { // actual particle -> compute interactions + int np = min(range.y-py0,BSIZE); + for (int i=0; i < np; ++i) + { + SPHFluidCalcDForce(xi, vi, dxi, ((const CudaVec4*)temp_x)[i], ((const CudaVec3*)temp_v)[i], ((const CudaVec3*)temp_dx)[i], dforce, params); + } + } + __syncthreads(); + } + if (px < ghost) + { // actual particle -> write computed force + ((CudaVec3*)df)[index] += dforce; + } + } + } +} +*/ +////////////////////// +// CPU-side methods // +////////////////////// + +void SPHFluidForceFieldCuda3f_computeDensity(int kernelType, int pressureType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3f* params, void* pos4, const void* x) +{ + dim3 threads(BSIZE,1); + dim3 grid(60,1); + switch(kernelType) + { + case 0: + switch(pressureType) + { + case 0: //SPHFluidForceFieldCuda3t_computeDensity_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)pos4, (const float*)x); + break; + case 1: SPHFluidForceFieldCuda3t_computeDensity_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)pos4, (const float*)x); + break; + default: break; + } + break; + case 1: + switch(pressureType) + { + case 0: //SPHFluidForceFieldCuda3t_computeDensity_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)pos4, (const float*)x); + break; + case 1: SPHFluidForceFieldCuda3t_computeDensity_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)pos4, (const float*)x); + break; + default: break; + } + break; + default: break; + } + mycudaDebugError("SPHFluidForceFieldCuda3t_computeDensity_kernel"); +} + +void SPHFluidForceFieldCuda3f_addForce(int kernelType, int pressureType, int viscosityType, int surfaceTensionType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3f* params, void* f, const void* x, const void* v) +{ + dim3 threads(BSIZE,1); + dim3 grid(60,1); + switch(kernelType) + { + case 0: + switch(pressureType) + { + case 0: + switch(viscosityType) + { + case 0: + switch(surfaceTensionType) + { + case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); + break; + case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); + break; + case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); + break; + default: break; + } + break; + case 1: + switch(surfaceTensionType) + { + case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); + break; + case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); + break; + case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); + break; + default: break; + } + break; + case 2: + switch(surfaceTensionType) + { + case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); + break; + case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); + break; + case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); + break; + default: break; + } + break; + default: break; + } + break; + case 1: + switch(viscosityType) + { + case 0: + switch(surfaceTensionType) + { + case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); + break; + case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); + break; + case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); + break; + default: break; + } + break; + case 1: + switch(surfaceTensionType) + { + case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); + break; + case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); + break; + case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); + break; + default: break; + } + break; + case 2: + switch(surfaceTensionType) + { + case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); + break; + case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); + break; + case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); + break; + default: break; + } + break; + default: break; + } + break; + default: break; + } + break; + case 1: + switch(pressureType) + { + case 0: + switch(viscosityType) + { + case 0: + switch(surfaceTensionType) + { + case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); + break; + case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); + break; + case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); + break; + default: break; + } + break; + case 1: + switch(surfaceTensionType) + { + case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); + break; + case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); + break; + case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); + break; + default: break; + } + break; + case 2: + switch(surfaceTensionType) + { + case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); + break; + case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); + break; + case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); + break; + default: break; + } + break; + default: break; + } + break; + case 1: + switch(viscosityType) + { + case 0: + switch(surfaceTensionType) + { + case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); + break; + case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); + break; + case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); + break; + default: break; + } + break; + case 1: + switch(surfaceTensionType) + { + case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); + break; + case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); + break; + case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); + break; + default: break; + } + break; + case 2: + switch(surfaceTensionType) + { + case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); + break; + case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); + break; + case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); + break; + default: break; + } + break; + default: break; + } + break; + default: break; + } + break; + default: break; + } + mycudaDebugError("SPHFluidForceFieldCuda3t_addForce_kernel"); +} +/* +void SPHFluidForceFieldCuda3f_addDForce(unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3f* params, void* df, const void* x, const void* v, const void* dx) +{ + dim3 threads(BSIZE,1); + dim3 grid(60/BSIZE,1); + {SPHFluidForceFieldCuda3t_addDForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)df, (const float*)x, (const float*)v, (const float*)dx); mycudaDebugError("SPHFluidForceFieldCuda3t_addDForce_kernel");} +} +*/ +#ifdef SOFA_GPU_CUDA_DOUBLE + +void SPHFluidForceFieldCuda3d_computeDensity(int kernelType, int pressureType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3d* params, void* pos4, const void* x) +{ + dim3 threads(BSIZE,1); + dim3 grid(60,1); + switch(kernelType) + { + case 0: + switch(pressureType) + { + case 0: //SPHFluidForceFieldCuda3t_computeDensity_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)pos4, (const double*)x); + break; + case 1: SPHFluidForceFieldCuda3t_computeDensity_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)pos4, (const double*)x); + break; + default: break; + } + break; + case 1: + switch(pressureType) + { + case 0: //SPHFluidForceFieldCuda3t_computeDensity_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)pos4, (const double*)x); + break; + case 1: SPHFluidForceFieldCuda3t_computeDensity_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)pos4, (const double*)x); + break; + default: break; + } + break; + default: break; + } + mycudaDebugError("SPHFluidForceFieldCuda3t_computeDensity_kernel"); + +} + +void SPHFluidForceFieldCuda3d_addForce (int kernelType, int pressureType, int viscosityType, int surfaceTensionType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3d* params, void* f, const void* x, const void* v) +{ + dim3 threads(BSIZE,1); + dim3 grid(60,1); + switch(kernelType) + { + case 0: + switch(pressureType) + { + case 0: + switch(viscosityType) + { + case 0: + switch(surfaceTensionType) + { + case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); + break; + case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); + break; + case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); + break; + default: break; + } + break; + case 1: + switch(surfaceTensionType) + { + case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); + break; + case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); + break; + case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); + break; + default: break; + } + break; + case 2: + switch(surfaceTensionType) + { + case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); + break; + case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); + break; + case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); + break; + default: break; + } + break; + default: break; + } + break; + case 1: + switch(viscosityType) + { + case 0: + switch(surfaceTensionType) + { + case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); + break; + case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); + break; + case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); + break; + default: break; + } + break; + case 1: + switch(surfaceTensionType) + { + case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); + break; + case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); + break; + case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); + break; + default: break; + } + break; + case 2: + switch(surfaceTensionType) + { + case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); + break; + case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); + break; + case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); + break; + default: break; + } + break; + default: break; + } + break; + default: break; + } + break; + case 1: + switch(pressureType) + { + case 0: + switch(viscosityType) + { + case 0: + switch(surfaceTensionType) + { + case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); + break; + case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); + break; + case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); + break; + default: break; + } + break; + case 1: + switch(surfaceTensionType) + { + case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); + break; + case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); + break; + case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); + break; + default: break; + } + break; + case 2: + switch(surfaceTensionType) + { + case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); + break; + case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); + break; + case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); + break; + default: break; + } + break; + default: break; + } + break; + case 1: + switch(viscosityType) + { + case 0: + switch(surfaceTensionType) + { + case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); + break; + case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); + break; + case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); + break; + default: break; + } + break; + case 1: + switch(surfaceTensionType) + { + case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); + break; + case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); + break; + case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); + break; + default: break; + } + break; + case 2: + switch(surfaceTensionType) + { + case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); + break; + case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); + break; + case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); + break; + default: break; + } + break; + default: break; + } + break; + default: break; + } + break; + default: break; + } + mycudaDebugError("SPHFluidForceFieldCuda3t_addForce_kernel"); + /* + dim3 threads(BSIZE,1); + dim3 grid(60/BSIZE,1); + if (params->surfaceTension > 0) + {SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); mycudaDebugError("SPHFluidForceFieldCuda3t_addForce_kernel");} + else + {SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); mycudaDebugError("SPHFluidForceFieldCuda3t_addForce_kernel");} + */ +} +/* +void SPHFluidForceFieldCuda3d_addDForce(unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3d* params, void* df, const void* x, const void* v, const void* dx) +{ + dim3 threads(BSIZE,1); + dim3 grid(60/BSIZE,1); + {SPHFluidForceFieldCuda3t_addDForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)df, (const double*)x, (const double*)v, (const double*)dx); mycudaDebugError("SPHFluidForceFieldCuda3t_addDForce_kernel");} +} +*/ +#endif // SOFA_GPU_CUDA_DOUBLE + +#if defined(__cplusplus) && CUDA_VERSION < 2000 +} // namespace cuda +} // namespace gpu +} // namespace sofa +#endif diff --git a/extensions/CUDA/src/SofaSphFluid/CUDA/CudaSPHFluidForceField.h b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaSPHFluidForceField.h new file mode 100644 index 0000000..7b9ead1 --- /dev/null +++ b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaSPHFluidForceField.h @@ -0,0 +1,136 @@ +/****************************************************************************** +* SOFA, Simulation Open-Framework Architecture * +* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU Lesser General Public License as published by * +* the Free Software Foundation; either version 2.1 of the License, or (at * +* your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, but WITHOUT * +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * +* for more details. * +* * +* You should have received a copy of the GNU Lesser General Public License * +* along with this program. If not, see . * +******************************************************************************* +* Authors: The SOFA Team and external contributors (see Authors.txt) * +* * +* Contact information: contact@sofa-framework.org * +******************************************************************************/ +#ifndef SOFA_GPU_CUDA_CUDASPHFLUIDFORCEFIELD_H +#define SOFA_GPU_CUDA_CUDASPHFLUIDFORCEFIELD_H + +#include +#include +#include +#include + +namespace sofa +{ + + +namespace gpu::cuda +{ + +template +struct GPUSPHFluid +{ + real h; ///< particles radius + real h2; ///< particles radius squared + real inv_h2; ///< particles radius squared inverse + real stiffness; ///< pressure stiffness + real mass; ///< particles mass + real mass2; ///< particles mass squared + real density0; ///< 1000 kg/m3 for water + real viscosity; + real surfaceTension; + + // Precomputed constants for smoothing kernels + real CWd; ///< = constWd(h) + //real CgradWd; ///< = constGradWd(h) + real CgradWp; ///< = constGradWp(h) + real ClaplacianWv; ///< = constLaplacianWv(h) +}; + +typedef GPUSPHFluid GPUSPHFluid3f; +typedef GPUSPHFluid GPUSPHFluid3d; + +} // namespace gpu::cuda + + +namespace component::forcefield +{ + +template +class SPHFluidForceFieldInternalData< gpu::cuda::CudaVectorTypes > +{ +public: + typedef gpu::cuda::CudaVectorTypes DataTypes; + typedef SPHFluidForceFieldInternalData Data; + typedef SPHFluidForceField Main; + typedef typename DataTypes::Coord Coord; + typedef typename DataTypes::Real Real; + gpu::cuda::GPUSPHFluid params; + gpu::cuda::CudaVector pos4; + + void fillParams(Main* m, int kernelType, double kFactor=1.0, double bFactor=1.0) + { + Real h = m->d_particleRadius.getValue(); + params.h = h; + params.h2 = h*h; + params.inv_h2 = 1/(h*h); + params.stiffness = (Real)(kFactor*m->d_pressureStiffness.getValue()); + params.mass = m->d_particleMass.getValue(); + params.mass2 = params.mass*params.mass; + params.density0 = m->d_density0.getValue(); + params.viscosity = (Real)(bFactor*m->d_viscosity.getValue()); + params.surfaceTension = (Real)(kFactor*m->d_surfaceTension.getValue()); + if (kernelType == 1) + { + params.CWd = SPHKernel::constW(h); + //params.CgradWd = SPHKernel::constGradW(h); + params.CgradWp = SPHKernel::constGradW(h); + params.ClaplacianWv = SPHKernel::constLaplacianW(h); + } + else + { + params.CWd = SPHKernel::constW(h); + //params.CgradWd = SPHKernel::constGradW(h); + params.CgradWp = SPHKernel::constGradW(h); + params.ClaplacianWv = SPHKernel::constLaplacianW(h); + } + } + + void Kernels_computeDensity(int kernelType, int pressureType, int gsize, const void* cells, const void* cellGhost, void* pos4, const void* x); + void Kernels_addForce(int kernelType, int pressureType, int viscosityType, int surfaceTensionType, int gsize, const void* cells, const void* cellGhost, void* f, const void* pos4, const void* vel); + //void Kernels_addDForce(int kernelType, int pressureType, int viscosityType, int surfaceTensionType, int gsize, const void* cells, const void* cellGhost, void* f, const void* pos4, const void* dx, const void* vel); +}; + + +template <> +void SPHFluidForceField::addForce(const core::MechanicalParams* mparams, DataVecDeriv& d_f, const DataVecCoord& d_x, const DataVecDeriv& d_v); + +template <> +void SPHFluidForceField::addDForce(const core::MechanicalParams* mparams, DataVecDeriv& d_df, const DataVecDeriv& d_dx); + +template <> +void SPHFluidForceField::draw(const core::visual::VisualParams* vparams); + +#ifdef SOFA_GPU_CUDA_DOUBLE + +template <> +void SPHFluidForceField::addForce(const core::MechanicalParams* mparams, DataVecDeriv& d_f, const DataVecCoord& d_x, const DataVecDeriv& d_v); + +template <> +void SPHFluidForceField::addDForce(const core::MechanicalParams* mparams, DataVecDeriv& d_df, const DataVecDeriv& d_dx); + +#endif // SOFA_GPU_CUDA_DOUBLE + +} // namespace component::forcefield + + +} // namespace sofa + +#endif diff --git a/extensions/CUDA/src/SofaSphFluid/CUDA/CudaSPHFluidForceField.inl b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaSPHFluidForceField.inl new file mode 100644 index 0000000..752ea1f --- /dev/null +++ b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaSPHFluidForceField.inl @@ -0,0 +1,273 @@ +/****************************************************************************** +* SOFA, Simulation Open-Framework Architecture * +* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU Lesser General Public License as published by * +* the Free Software Foundation; either version 2.1 of the License, or (at * +* your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, but WITHOUT * +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * +* for more details. * +* * +* You should have received a copy of the GNU Lesser General Public License * +* along with this program. If not, see . * +******************************************************************************* +* Authors: The SOFA Team and external contributors (see Authors.txt) * +* * +* Contact information: contact@sofa-framework.org * +******************************************************************************/ +#ifndef SOFA_GPU_CUDA_CUDASPHFLUIDFORCEFIELD_INL +#define SOFA_GPU_CUDA_CUDASPHFLUIDFORCEFIELD_INL + +#include "CudaSPHFluidForceField.h" +#include +#include +#include + +namespace sofa +{ + + +namespace gpu::cuda +{ + +extern "C" +{ + +void SPHFluidForceFieldCuda3f_computeDensity(int kernelType, int pressureType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3f* params, void* pos4, const void* x); +void SPHFluidForceFieldCuda3f_addForce (int kernelType, int pressureType, int viscosityType, int surfaceTensionType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3f* params, void* f, const void* pos4, const void* v); +//void SPHFluidForceFieldCuda3f_addDForce(int kernelType, int pressureType, int viscosityType, int surfaceTensionType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3f* params, void* f, const void* pos4, const void* v, const void* dx); + +#ifdef SOFA_GPU_CUDA_DOUBLE + +void SPHFluidForceFieldCuda3d_computeDensity(int kernelType, int pressureType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3d* params, void* pos4, const void* x); +void SPHFluidForceFieldCuda3d_addForce (int kernelType, int pressureType, int viscosityType, int surfaceTensionType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3d* params, void* f, const void* pos4, const void* v); +//void SPHFluidForceFieldCuda3d_addDForce(int kernelType, int pressureType, int viscosityType, int surfaceTensionType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3d* params, void* f, const void* pos4, const void* v, const void* dx); + +#endif // SOFA_GPU_CUDA_DOUBLE +} + +} // namespace gpu::cuda + + +namespace component::forcefield +{ + +using namespace gpu::cuda; + +template<> +void SPHFluidForceFieldInternalData::Kernels_computeDensity(int kernelType, int pressureType, int gsize, const void* cells, const void* cellGhost, void* pos4, const void* x) +{ + SPHFluidForceFieldCuda3f_computeDensity(kernelType, pressureType, gsize, cells, cellGhost, ¶ms, pos4, x); +} + +template<> +void SPHFluidForceFieldInternalData::Kernels_addForce(int kernelType, int pressureType, int viscosityType, int surfaceTensionType, int gsize, const void* cells, const void* cellGhost, void* f, const void* pos4, const void* v) +{ + SPHFluidForceFieldCuda3f_addForce (kernelType, pressureType, viscosityType, surfaceTensionType, gsize, cells, cellGhost, ¶ms, f, pos4, v); +} +/* +template<> +void SPHFluidForceFieldInternalData::Kernels_addDForce(int kernelType, int pressureType, int viscosityType, int surfaceTensionType, int gsize, const void* cells, const void* cellGhost, void* f, const void* pos4, const void* v, const void* dx) +{ + SPHFluidForceFieldCuda3f_addDForce(kernelType, pressureType, viscosityType, surfaceTensionType, gsize, cells, cellGhost, ¶ms, f, pos4, v, dx); +} +*/ +template <> +void SPHFluidForceField::addForce(const core::MechanicalParams* /*mparams*/, DataVecDeriv& d_f, const DataVecCoord& d_x, const DataVecDeriv& d_v) +{ + if (m_grid == NULL) return; + + const int kernelT = d_kernelType.getValue(); + const int pressureT = d_pressureType.getValue(); + const Real viscosity = this->d_viscosity.getValue(); + const int viscosityT = (viscosity == 0) ? 0 : d_viscosityType.getValue(); + const Real surfaceTension = this->d_surfaceTension.getValue(); + const int surfaceTensionT = (surfaceTension <= 0) ? 0 : d_surfaceTensionType.getValue(); + + VecDeriv& f = *d_f.beginEdit(); + const VecCoord& x = d_x.getValue(); + const VecDeriv& v = d_v.getValue(); + + m_grid->updateGrid(x); + data.fillParams(this, kernelT); + f.resize(x.size()); + Grid::Grid* g = m_grid->getGrid(); + data.pos4.recreate(x.size()); + data.Kernels_computeDensity( kernelT, pressureT, + g->getNbCells(), g->getCellsVector().deviceRead(), g->getCellGhostVector().deviceRead(), + data.pos4.deviceWrite(), x.deviceRead()); + + msg_info() << "density[" << 0 << "] = " << data.pos4[0][3] + << "density[" << data.pos4.size()/2 << "] = " << data.pos4[data.pos4.size()/2][3]; + + data.Kernels_addForce( kernelT, pressureT, viscosityT, surfaceTensionT, + g->getNbCells(), g->getCellsVector().deviceRead(), g->getCellGhostVector().deviceRead(), + f.deviceWrite(), data.pos4.deviceRead(), v.deviceRead()); + + d_f.endEdit(); +} + +template <> +void SPHFluidForceField::addDForce(const core::MechanicalParams* mparams, DataVecDeriv& /*d_df*/, const DataVecDeriv& /*d_dx*/) +{ + mparams->setKFactorUsed(true); +#if 0 + if (m_grid == NULL) return; + + const int kernelT = d_kernelType.getValue(); + const int pressureT = d_pressureType.getValue(); + const Real viscosity = this->d_viscosity.getValue(); + const int viscosityT = (viscosity == 0) ? 0 : d_viscosityType.getValue(); + const Real surfaceTension = this->d_surfaceTension.getValue(); + const int surfaceTensionT = (surfaceTension <= 0) ? 0 : d_surfaceTensionType.getValue(); + + VecDeriv& df = *d_df.beginEdit(); + const VecDeriv& dx = d_dx.getValue(); + + const VecDeriv& v = this->mstate->read(core::vec_id::read_access::velocity)->getValue(); + data.fillParams(this, kernelT, mparams->kFactor(), sofa::core::mechanicalparams::bFactor(mparams)); + df.resize(dx.size()); + Grid::Grid* g = m_grid->getGrid(); + data.Kernels_addDForce( kernelT, pressureT, viscosityT, surfaceTensionT, + g->getNbCells(), g->getCellsVector().deviceRead(), g->getCellGhostVector().deviceRead(), + df.deviceWrite(), data.pos4.deviceRead(), v.deviceRead(), dx.deviceRead()); + + d_df.endEdit(); +#endif +} + + +#ifdef SOFA_GPU_CUDA_DOUBLE + + +template<> +void SPHFluidForceFieldInternalData::Kernels_computeDensity(int kernelType, int pressureType, int gsize, const void* cells, const void* cellGhost, void* pos4, const void* x) +{ + SPHFluidForceFieldCuda3d_computeDensity(kernelType, pressureType, gsize, cells, cellGhost, ¶ms, pos4, x); +} + +template<> +void SPHFluidForceFieldInternalData::Kernels_addForce(int kernelType, int pressureType, int viscosityType, int surfaceTensionType, int gsize, const void* cells, const void* cellGhost, void* f, const void* pos4, const void* v) +{ + SPHFluidForceFieldCuda3d_addForce (kernelType, pressureType, viscosityType, surfaceTensionType, gsize, cells, cellGhost, ¶ms, f, pos4, v); +} +/* +template<> +void SPHFluidForceFieldInternalData::Kernels_addDForce(int kernelType, int pressureType, int viscosityType, int surfaceTensionType, int gsize, const void* cells, const void* cellGhost, void* f, const void* pos4, const void* v, const void* dx) +{ + SPHFluidForceFieldCuda3d_addDForce(kernelType, pressureType, viscosityType, surfaceTensionType, gsize, cells, cellGhost, ¶ms, f, pos4, v, dx); +} +*/ +template <> +void SPHFluidForceField::addForce(const core::MechanicalParams* /*mparams*/, DataVecDeriv& d_f, const DataVecCoord& d_x, const DataVecDeriv& d_v) +{ + if (m_grid == NULL) return; + + const int kernelT = d_kernelType.getValue(); + const int pressureT = d_pressureType.getValue(); + const Real viscosity = this->d_viscosity.getValue(); + const int viscosityT = (viscosity == 0) ? 0 : d_viscosityType.getValue(); + const Real surfaceTension = this->d_surfaceTension.getValue(); + const int surfaceTensionT = (surfaceTension <= 0) ? 0 : d_surfaceTensionType.getValue(); + + VecDeriv& f = *d_f.beginEdit(); + const VecCoord& x = d_x.getValue(); + const VecDeriv& v = d_v.getValue(); + + m_grid->updateGrid(x); + data.fillParams(this, kernelT); + f.resize(x.size()); + Grid::Grid* g = m_grid->getGrid(); + data.pos4.recreate(x.size()); + data.Kernels_computeDensity( kernelT, pressureT, + g->getNbCells(), g->getCellsVector().deviceRead(), g->getCellGhostVector().deviceRead(), + data.pos4.deviceWrite(), x.deviceRead()); + msg_info() << "density[" << 0 << "] = " << data.pos4[0][3] + << "density[" << data.pos4.size()/2 << "] = " << data.pos4[data.pos4.size()/2][3]; + data.Kernels_addForce( kernelT, pressureT, viscosityT, surfaceTensionT, + g->getNbCells(), g->getCellsVector().deviceRead(), g->getCellGhostVector().deviceRead(), + f.deviceWrite(), data.pos4.deviceRead(), v.deviceRead()); + + d_f.endEdit(); +} + +template <> +void SPHFluidForceField::addDForce(const core::MechanicalParams* mparams, DataVecDeriv& /*d_df*/, const DataVecDeriv& /*d_dx*/) +{ + mparams->setKFactorUsed(true); +#if 0 + if (m_grid == NULL) return; + + const int kernelT = d_kernelType.getValue(); + const int pressureT = d_pressureType.getValue(); + const Real viscosity = this->d_viscosity.getValue(); + const int viscosityT = (viscosity == 0) ? 0 : d_viscosityType.getValue(); + const Real surfaceTension = this->d_surfaceTension.getValue(); + const int surfaceTensionT = (surfaceTension <= 0) ? 0 : d_surfaceTensionType.getValue(); + + VecDeriv& df = *d_df.beginEdit(); + const VecDeriv& dx = d_dx.getValue(); + //const VecCoord& x = this->mstate->read(core::ConstVecCoordId::position())->getValue(); + const VecDeriv& v = this->mstate->read(core::vec_id::read_access::velocity)->getValue(); + data.fillParams(this, mparams->kFactor(), sofa::core::mechanicalparams::bFactor(mparams)); + df.resize(dx.size()); + Grid::Grid* g = m_grid->getGrid(); + data.Kernels_addDForce( kernelT, pressureT, viscosityT, surfaceTensionT, + g->getNbCells(), g->getCellsVector().deviceRead(), g->getCellGhostVector().deviceRead(), + df.deviceWrite(), data.pos4.deviceRead(), v.deviceRead(), dx.deviceRead()); + d_df.endEdit(); +#endif +} + +#endif // SOFA_GPU_CUDA_DOUBLE + + + +template <> +void SPHFluidForceField::draw(const core::visual::VisualParams* vparams) +{ +#if SOFACUDA_HAVE_SOFA_GL == 1 + if (!vparams->displayFlags().getShowForceFields()) return; + //if (m_grid != NULL) + // grid->draw(vparams); + helper::ReadAccessor x = this->mstate->read(sofa::core::vec_id::read_access::position)->getValue(); + helper::ReadAccessor > pos4 = this->data.pos4; + if (pos4.empty()) return; + glDisable(GL_LIGHTING); + glColor3f(0,1,1); + glDisable(GL_BLEND); + glDepthMask(1); + glPointSize(5); + glBegin(GL_POINTS); + for (unsigned int i=0; i. * +******************************************************************************* +* Authors: The SOFA Team and external contributors (see Authors.txt) * +* * +* Contact information: contact@sofa-framework.org * +******************************************************************************/ +#define SOFASPHFLUID_CUDA_CUDASPATIALGRIDCONTAINER_CPP +#include +#include +#include + + +namespace sofa::component::container +{ + +using namespace sofa::defaulttype; +using namespace sofa::gpu::cuda; +using namespace core::behavior; + + +int SpatialGridContainerCudaClass = core::RegisterObject("GPU support using CUDA.") + .add< SpatialGridContainer >() + ; + +template class SOFA_SOFASPHFLUID_CUDA_API SpatialGridContainer< CudaVec3fTypes >; +template class SOFA_SOFASPHFLUID_CUDA_API SpatialGrid< SpatialGridTypes< CudaVec3fTypes > >; + +#ifdef SOFA_GPU_CUDA_DOUBLE + +template class SOFA_SOFASPHFLUID_CUDA_API SpatialGridContainer< CudaVec3dTypes >; +template class SOFA_SOFASPHFLUID_CUDA_API SpatialGrid< SpatialGridTypes< CudaVec3dTypes > >; + +#endif // SOFA_GPU_CUDA_DOUBLE + +} // namespace sofa::component::container + + + + diff --git a/extensions/CUDA/src/SofaSphFluid/CUDA/CudaSpatialGridContainer.cu b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaSpatialGridContainer.cu new file mode 100644 index 0000000..83af602 --- /dev/null +++ b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaSpatialGridContainer.cu @@ -0,0 +1,377 @@ +/****************************************************************************** +* SOFA, Simulation Open-Framework Architecture * +* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU Lesser General Public License as published by * +* the Free Software Foundation; either version 2.1 of the License, or (at * +* your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, but WITHOUT * +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * +* for more details. * +* * +* You should have received a copy of the GNU Lesser General Public License * +* along with this program. If not, see . * +******************************************************************************* +* Authors: The SOFA Team and external contributors (see Authors.txt) * +* * +* Contact information: contact@sofa-framework.org * +******************************************************************************/ +/* PART OF THIS FILE IS FROM NVIDIA CUDA SDK particles demo: + * + * Copyright 1993-2006 NVIDIA Corporation. All rights reserved. + * + * NOTICE TO USER: + * + * This source code is subject to NVIDIA ownership rights under U.S. and + * international Copyright laws. + * + * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE + * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR + * IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH + * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. + * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, + * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS + * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE + * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE + * OR PERFORMANCE OF THIS SOURCE CODE. + * + * U.S. Government End Users. This source code is a "commercial item" as + * that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of + * "commercial computer software" and "commercial computer software + * documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) + * and is provided to the U.S. Government only as a commercial end item. + * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through + * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the + * source code with only those rights set forth herein. + */ + + +#include +#include +#include +#include + +#if defined(__cplusplus) && CUDA_VERSION < 2000 +namespace sofa +{ +namespace gpu +{ +namespace cuda +{ +#endif + +extern "C" +{ + void SpatialGridContainer3f_computeHash(int cellBits, float cellWidth, int nbPoints, void* particleIndex8, void* particleHash8, const void* x); + void SpatialGridContainer3f1_computeHash(int cellBits, float cellWidth, int nbPoints, void* particleIndex8, void* particleHash8, const void* x); + void SpatialGridContainer_findCellRange(int cellBits, int index0, float cellWidth, int nbPoints, const void* particleHash8, void* cellRange, void* cellGhost); +//void SpatialGridContainer3f_reorderData(int nbPoints, const void* particleHash, void* sorted, const void* x); +//void SpatialGridContainer3f1_reorderData(int nbPoints, const void* particleHash, void* sorted, const void* x); +} + +#define USE_TEX 0 + +struct GridParams +{ + float cellWidth; + float invCellWidth; + int cellMask; + float halfCellWidth; + float invHalfCellWidth; +}; + +// large prime numbers +#define HASH_PX 73856093 +#define HASH_PY 19349663 +#define HASH_PZ 83492791 + +////////////////////// +// GPU-side methods // +////////////////////// + +#if USE_TEX +texture cellRangeTex; +#endif + +__constant__ GridParams gridParams; + +// calculate cell in grid from position +template +__device__ int3 calcGridPos(T p) +{ + int3 i; + i.x = __float2int_rd(p.x * gridParams.invCellWidth); + i.y = __float2int_rd(p.y * gridParams.invCellWidth); + i.z = __float2int_rd(p.z * gridParams.invCellWidth); + return i; +} + +// calculate address in grid from position +__device__ unsigned int calcGridHashI(int3 p) +{ + //return ((p.x<<10)^(p.y<<5)^(p.z)) & gridParams.cellMask; + //return ((p.x)^(p.y)^(p.z)) & gridParams.cellMask; + return (__mul24(HASH_PX,p.x)^__mul24(HASH_PY,p.y)^__mul24(HASH_PZ,p.z)) & gridParams.cellMask; + //return (p.x) & gridParams.cellMask; +} + +// calculate address in grid from position +template +__device__ unsigned int calcGridHash(T p) +{ + return calcGridHashI(calcGridPos(p)); +} + + +__device__ __inline__ float3 getPos3(const float4* pos, int index0, int index) +{ + float4 p = pos[index]; + return make_float3(p.x,p.y,p.z); +} + +__shared__ float ftemp[BSIZE*3]; + +__device__ __inline__ float3 getPos3(const float3* pos, int index0, int index) +{ + //return pos[index]; + + int index03 = index0 * 3; + int index3 = threadIdx.x * 3; + ftemp[threadIdx.x] = ((const float*)pos)[index03+threadIdx.x]; + ftemp[threadIdx.x+BSIZE] = ((const float*)pos)[index03+threadIdx.x+BSIZE]; + ftemp[threadIdx.x+2*BSIZE] = ((const float*)pos)[index03+threadIdx.x+2*BSIZE]; + __syncthreads(); + return make_float3(ftemp[index3],ftemp[index3+1],ftemp[index3+2]); +} + +__device__ __inline__ float4 getPos4(const float4* pos, int index0, int index) +{ + return pos[index]; +} + +__device__ __inline__ float4 getPos4(const float3* pos, int index0, int index) +{ + int index3 = threadIdx.x * 3; + pos += index0; + ftemp[threadIdx.x] = ((const float*)pos)[threadIdx.x]; + ftemp[threadIdx.x+BSIZE] = ((const float*)pos)[threadIdx.x+BSIZE]; + ftemp[threadIdx.x+2*BSIZE] = ((const float*)pos)[threadIdx.x+2*BSIZE]; + __syncthreads(); + return make_float4(ftemp[index3],ftemp[index3+1],ftemp[index3+2],0.0f); +} + +__device__ __inline__ float4 getPos4(const float4* pos, int index) +{ + return pos[index]; +} + +__device__ __inline__ float4 getPos4(const float3* pos, int index) +{ + float3 p = pos[index]; + return make_float4(p.x,p.y,p.z,1.0f); +} + +// calculate grid hash value for each particle +template +__global__ void +computeHashD(const TIn* pos, + unsigned int* particleIndex8, unsigned int* particleHash8, int n) +{ + int index0 = (blockIdx.x*BSIZE); + int index = index0 + threadIdx.x; + int nt = n - index0; if (nt > BSIZE) nt = BSIZE; + float3 p = getPos3(pos,index0,index); + + int3 hgpos; + hgpos.x = __float2int_rd(p.x * gridParams.invHalfCellWidth); + hgpos.y = __float2int_rd(p.y * gridParams.invHalfCellWidth); + hgpos.z = __float2int_rd(p.z * gridParams.invHalfCellWidth); + int halfcell = ((hgpos.x&1) + ((hgpos.y&1)<<1) + ((hgpos.z&1)<<2))^7; + // compute the first cell to be influenced by the particle + hgpos.x = (hgpos.x-1) >> 1; + hgpos.y = (hgpos.y-1) >> 1; + hgpos.z = (hgpos.z-1) >> 1; + + __syncthreads(); + + __shared__ int hx[3*BSIZE]; + int x = threadIdx.x; + +// hx[x] = (__mul24(HASH_PX,hgpos.x) << 3)+halfcell; +// hy[x] = __mul24(HASH_PY,hgpos.y); +// hz[x] = __mul24(HASH_PZ,hgpos.z); + hx[x] = ((HASH_PX*hgpos.x) << 3)+halfcell; + hx[BSIZE+x] = (HASH_PY*hgpos.y); + hx[2*BSIZE+x] = (HASH_PZ*hgpos.z); + __syncthreads(); + int3 dH; + dH.x = (x&1 ? HASH_PX : 0); + dH.y = (x&2 ? HASH_PY : 0); + dH.z = (x&4 ? HASH_PZ : 0); + int x_7 = x&7; + int index0_8_x_7 = (index0 << 3) + x_7; + for (unsigned int lx = x>>3; lx < nt; lx+=(BSIZE>>3)) + { + particleIndex8[index0_8_x_7 + (lx<<3)] = index0 + lx; + int3 h; + h.x = hx[lx]; + h.y = hx[BSIZE+lx]; + h.z = hx[2*BSIZE+lx]; + int hc = h.x & 7; + h.x = (h.x>>3) + dH.x; + h.y += dH.y; + h.z += dH.z; + unsigned int hash = ((h.x ^ h.y ^ h.z) & gridParams.cellMask)<<1; + if (hc != x_7) ++hash; + particleHash8[index0_8_x_7 + (lx<<3)] = hash; + } +} + +// find start of each cell in sorted particle list by comparing with previous hash value +// one thread per particle +__global__ void +findCellRangeD(int index0, const unsigned int* particleHash, + int * cellRange, int* cellGhost, int n) +{ + unsigned int i = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; + __shared__ unsigned int hash[BSIZE]; + if (i < n) + hash[threadIdx.x] = particleHash[i]; + + __syncthreads(); + + if (i < n) + { + bool firstInCell; + bool firstGhost; + unsigned int cur = hash[threadIdx.x]; + if (i == 0) + { + firstInCell = true; + firstGhost = cur&1; + } + else + { + unsigned int prev; + if (threadIdx.x > 0) + prev = hash[threadIdx.x-1]; + else + prev = particleHash[i-1]; + firstInCell = ((prev>>1) != (cur>>1)); + firstGhost = ((prev != cur) && (cur&1)); + if (firstInCell) + { + if ((prev>>1) < (cur>>1)-1) + cellRange[ (prev>>1)+1 ] = (index0+i) | (1U<<31); + if (!(prev&1)) // no ghost particles in previous cell + cellGhost[ prev>>1 ] = index0+i; + } + } + if (firstInCell) + cellRange[ cur>>1 ] = index0+i; + if (firstGhost) + cellGhost[ cur>>1 ] = index0+i; + if (i == n-1) + { + cellRange[ (cur>>1)+1 ] = (index0+n) | (1U<<31); + if (!(cur&1)) + cellGhost[ cur>>1 ] = index0+n; + } + } +} + +// rearrange particle data into sorted order +template +__global__ void +reorderDataD(const uint2* particleHash, // particle id sorted by hash + const TIn* oldPos, + float4* sortedPos, int n + ) +{ + int index0 = __mul24(blockIdx.x, blockDim.x); + int index = index0 + threadIdx.x; + if (index < n) + { + volatile uint2 sortedData = particleHash[index]; + //float4 pos = getPos4(oldPos,index0,index); + float4 pos = getPos4(oldPos,sortedData.y); + sortedPos[index] = pos; + } +} + + +////////////////////// +// CPU-side methods // +////////////////////// + +void SpatialGridContainer3f_computeHash(int cellBits, float cellWidth, int nbPoints, void* particleIndex8, void* particleHash8, const void* x) +{ + GridParams p; + p.cellWidth = cellWidth; + p.invCellWidth = 1.0f/cellWidth; + p.cellMask = (1<<<< grid, threads >>>((const float3*)x, (unsigned int*)particleIndex8, (unsigned int*)particleHash8, nbPoints); mycudaDebugError("computeHashD");} + } +} + +void SpatialGridContainer3f1_computeHash(int cellBits, float cellWidth, int nbPoints, void* particleIndex8, void* particleHash8, const void* x) +{ + GridParams p; + p.cellWidth = cellWidth; + p.invCellWidth = 1.0f/cellWidth; + p.cellMask = (1<<<< grid, threads >>>((const float4*)x, (unsigned int*)particleIndex8, (unsigned int*)particleHash8, nbPoints); mycudaDebugError("computeHashD");} + } +} + +void SpatialGridContainer_findCellRange(int cellBits, int index0, float cellWidth, int nbPoints, const void* particleHash8, void* cellRange, void* cellGhost) +{ + cudaMemset(cellRange, 0, ((1<>>(index0, (const unsigned int*)particleHash8, (int*)cellRange, (int*)cellGhost, 8*nbPoints); mycudaDebugError("findCellRangeD");} + } +} +/* +void SpatialGridContainer3f_reorderData(int nbPoints, const void* particleHash, void* sorted, const void* x) +{ + dim3 threads(BSIZE,1); + dim3 grid((nbPoints+BSIZE-1)/BSIZE,1); + {reorderDataD<<< grid, threads >>>((const uint2*)particleHash, (const float3*)x, (float4*)sorted, nbPoints); mycudaDebugError("reorderDataD");} +} + +void SpatialGridContainer3f1_reorderData(int nbPoints, const void* particleHash, void* sorted, const void* x) +{ + dim3 threads(BSIZE,1); + dim3 grid((nbPoints+BSIZE-1)/BSIZE,1); + {reorderDataD<<< grid, threads >>>((const uint2*)particleHash, (const float4*)x, (float4*)sorted, nbPoints); mycudaDebugError("reorderDataD");} +} +*/ +#if defined(__cplusplus) && CUDA_VERSION < 2000 +} // namespace cuda +} // namespace gpu +} // namespace sofa +#endif diff --git a/extensions/CUDA/src/SofaSphFluid/CUDA/CudaSpatialGridContainer.h b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaSpatialGridContainer.h new file mode 100644 index 0000000..da55f21 --- /dev/null +++ b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaSpatialGridContainer.h @@ -0,0 +1,141 @@ +/****************************************************************************** +* SOFA, Simulation Open-Framework Architecture * +* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU Lesser General Public License as published by * +* the Free Software Foundation; either version 2.1 of the License, or (at * +* your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, but WITHOUT * +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * +* for more details. * +* * +* You should have received a copy of the GNU Lesser General Public License * +* along with this program. If not, see . * +******************************************************************************* +* Authors: The SOFA Team and external contributors (see Authors.txt) * +* * +* Contact information: contact@sofa-framework.org * +******************************************************************************/ +// +// C++ Interface: SpatialGridContainer +// +// Description: +// +// +// Author: The SOFA team , (C) 2006 +// +// Copyright: See COPYING file that comes with this distribution +// +// + +#ifndef SOFA_GPU_CUDA_CUDASPATIALGRIDCONTAINER_H +#define SOFA_GPU_CUDA_CUDASPATIALGRIDCONTAINER_H + +#include +#include +#include +#include + + +namespace sofa::component::container +{ + +using namespace sofa::defaulttype; + +template +class SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVectorTypes > > +{ +public: + typedef SpatialGridTypes < gpu::cuda::CudaVectorTypes > DataTypes; + typedef typename DataTypes::Real Real; + typedef typename DataTypes::Coord Coord; + typedef typename DataTypes::VecCoord VecCoord; + typedef typename DataTypes::CellData CellData; + typedef typename DataTypes::GridData GridData; + //typedef typename DataTypes::NeighborListener NeighborListener; + typedef typename DataTypes::ParticleField ParticleField; + + enum + { + HASH_PX = 73856093, + HASH_PY = 19349663, + HASH_PZ = 83492791, + }; + +public: + SpatialGrid(Real cellWidth); + + ~SpatialGrid(); + + void update(const VecCoord& x); + + void draw(const core::visual::VisualParams*); + + template + void findNeighbors(NeighborListener* dest, Real dist); + + void computeField(ParticleField* field, Real dist); + + /// Change particles ordering inside a given cell have contiguous indices + /// + /// Fill the old2new and new2old arrays giving the permutation to apply + void reorderIndices(type::vector* old2new, type::vector* new2old); + GridData data; + + Real getCellWidth() const { return cellWidth; } + Real getInvCellWidth() const { return invCellWidth; } + + int getCellBits() const { return cellBits; } + int getNbCells() const { return nbCells; } + + int getCell(const Coord& c) const + { + return ( (helper::rfloor(c[0]*invCellWidth*0.5f) * HASH_PX) ^ + (helper::rfloor(c[1]*invCellWidth*0.5f) * HASH_PY) ^ + (helper::rfloor(c[2]*invCellWidth*0.5f) * HASH_PZ) ) & ((1 << cellBits)-1); + } + + //const sofa::gpu::cuda::CudaVector< unsigned int >& getParticleIndexVector() const { return particleIndex; } + const sofa::gpu::cuda::CudaVector< int >& getCellsVector() const { return cells; } + const sofa::gpu::cuda::CudaVector< int >& getCellGhostVector() const { return cellGhost; } + +protected: + const Real cellWidth; + const Real invCellWidth; + int cellBits, nbCells; + sofa::gpu::cuda::CudaVector< unsigned int > /*particleIndex,*/ particleHash; + //sofa::gpu::cuda::CudaVector< int > cellRange; + sofa::gpu::cuda::CudaVector< int > cells; + sofa::gpu::cuda::CudaVector< int > cellGhost; + sofa::gpu::cuda::CudaVector< sofa::gpu::cuda::Vec3f1 > sortedPos; + const VecCoord* lastX; + + void kernel_computeHash( + int cellBits, Real cellWidth, int nbPoints, void* particleIndex, void* particleHash, const void* x); + + void kernel_updateGrid( + int cellBits, int index0, Real cellWidth, int nbPoints, const void* particleHash, + void* cells, void* cellGhost); + //void kernel_reorderData(int nbPoints, const void* particleIndex, const void* particleHash, void* sorted, const void* x); + +}; + +#if !defined(SOFASPHFLUID_CUDA_CUDASPATIALGRIDCONTAINER_CPP) +extern template class SOFA_SOFASPHFLUID_CUDA_API SpatialGridContainer; +extern template class SOFA_SOFASPHFLUID_CUDA_API SpatialGrid >; + +#ifdef SOFA_GPU_CUDA_DOUBLE + +extern template class SOFA_SOFASPHFLUID_CUDA_API SpatialGridContainer< sofa::gpu::cuda::CudaVec3dTypes >; +extern template class SOFA_SOFASPHFLUID_CUDA_API SpatialGrid< sofa::component::container::SpatialGridTypes< sofa::gpu::cuda::CudaVec3dTypes > >; + +#endif // SOFA_GPU_CUDA_DOUBLE + +#endif + +} // namespace sofa::component::container + +#endif diff --git a/extensions/CUDA/src/SofaSphFluid/CUDA/CudaSpatialGridContainer.inl b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaSpatialGridContainer.inl new file mode 100644 index 0000000..e75986a --- /dev/null +++ b/extensions/CUDA/src/SofaSphFluid/CUDA/CudaSpatialGridContainer.inl @@ -0,0 +1,336 @@ +/****************************************************************************** +* SOFA, Simulation Open-Framework Architecture * +* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU Lesser General Public License as published by * +* the Free Software Foundation; either version 2.1 of the License, or (at * +* your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, but WITHOUT * +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * +* for more details. * +* * +* You should have received a copy of the GNU Lesser General Public License * +* along with this program. If not, see . * +******************************************************************************* +* Authors: The SOFA Team and external contributors (see Authors.txt) * +* * +* Contact information: contact@sofa-framework.org * +******************************************************************************/ +// +// C++ Interface: SpatialGridContainer +// +// Description: +// +// +// Author: The SOFA team , (C) 2006 +// +// Copyright: See COPYING file that comes with this distribution +// +// + +#ifndef SOFA_GPU_CUDA_CUDASPATIALGRIDCONTAINER_INL +#define SOFA_GPU_CUDA_CUDASPATIALGRIDCONTAINER_INL + +#include +#include +#include +#include + +namespace sofa +{ + + +namespace gpu::cuda +{ + +extern "C" +{ + void SpatialGridContainer3f_computeHash(int cellBits, float cellWidth, int nbPoints, void* particleIndex8, void* particleHash8, const void* x); + void SpatialGridContainer3f1_computeHash(int cellBits, float cellWidth, int nbPoints, void* particleIndex8, void* particleHash8, const void* x); + void SpatialGridContainer_findCellRange(int cellBits, int index0, float cellWidth, int nbPoints, const void* particleHash8, void* cellRange, void* cellGhost); +//void SpatialGridContainer3f_reorderData(int nbPoints, const void* particleHash, void* sorted, const void* x); +//void SpatialGridContainer3f1_reorderData(int nbPoints, const void* particleHash, void* sorted, const void* x); +} + +} // namespace gpu::cuda + + +namespace component::container +{ + +using namespace sofa::helper; + +// template +// typename SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVectorTypes > >::Grid SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVectorTypes > >::emptyGrid; + +template +SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVectorTypes > >::SpatialGrid(Real cellWidth) + : cellWidth(cellWidth), invCellWidth(1/cellWidth), lastX(NULL) +{ + cellBits = 15; + nbCells = 1< +SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVectorTypes > >::~SpatialGrid() +{ +} + +template template +void SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVectorTypes > >::findNeighbors(NeighborListener* /*dest*/, Real /*dist*/) +{ + msg_error("SpatialGrid") << "TODO: SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVectorTypes > >::findNeighbors(NeighborListener* dest, Real dist)"; +} + +template +void SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVectorTypes > >::computeField(ParticleField* /*field*/, Real /*dist*/) +{ + msg_error("SpatialGrid") << "TODO: SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVectorTypes > >::computeField(ParticleField* field, Real dist)"; +} + +template +void SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVectorTypes > >::reorderIndices(type::vector* /*old2new*/, type::vector* /*new2old*/) +{ + msg_error("SpatialGrid") << "TODO: SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVectorTypes > >::reorderIndices(type::vector* old2new, type::vector* new2old)"; +} + + +template<> +void SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVec3fTypes > >::kernel_computeHash( + int cellBits, Real cellWidth, int nbPoints, void* particleIndex, void* particleHash, + const void* x) +{ + /* + { + helper::ReadAccessor< sofa::gpu::cuda::CudaVector< unsigned int > > pparticleHash = this->particleHash; + for (int i=0;iparticleHash.deviceWrite(); + { + helper::ReadAccessor< sofa::gpu::cuda::CudaVector< unsigned int > > pparticleHash = this->particleHash; + for (int i=0;i +void SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVec3fTypes > >::kernel_updateGrid( + int cellBits, int index0, Real cellWidth, int nbPoints, const void* particleHash, + void* cells, void* cellGhost) +{ + //int nbbits = 8; + //while (nbbits < cellBits + 1) nbbits+=8; + //CudaSort(particleHash,particleIndex,nbbits,nbPoints*8); + gpu::cuda::SpatialGridContainer_findCellRange(cellBits, index0, cellWidth, nbPoints, particleHash, cells, cellGhost); +} + +template<> +void SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVec3f1Types > >::kernel_computeHash( + int cellBits, Real cellWidth, int nbPoints, void* particleIndex, void* particleHash, + const void* x) +{ + gpu::cuda::SpatialGridContainer3f1_computeHash(cellBits, cellWidth, nbPoints, particleIndex, particleHash, x); +} + +template<> +void SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVec3f1Types > >::kernel_updateGrid( + int cellBits, int index0, Real cellWidth, int nbPoints, const void* particleHash, + void* cells, void* cellGhost) +{ + gpu::cuda::SpatialGridContainer_findCellRange(cellBits, index0, cellWidth, nbPoints, particleHash, cells, cellGhost); +} + +#ifdef SOFA_GPU_CUDA_DOUBLE + +template<> +void SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVec3dTypes > >::kernel_computeHash( + int /*cellBits*/, Real /*cellWidth*/, int /*nbPoints*/, void* /*particleIndex*/, void* /*particleHash*/, + const void* /*x*/) +{ + msg_error("SpatialGrid") << "TODO: SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVec3dTypes > >::kernel_computeHash()"; +} + +template<> +void SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVec3dTypes > >::kernel_updateGrid( + int /*cellBits*/, int /*index0*/, Real /*cellWidth*/, int /*nbPoints*/, const void* /*particleHash*/, + void* /*cells*/, void* /*cellGhost*/) +{ + msg_error("SpatialGrid") << "TODO: SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVec3dTypes > >::kernel_updateGrid()"; +} + +#endif // SOFA_GPU_CUDA_DOUBLE + +/* +template<> +void SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVec3fTypes > >::kernel_reorderData(int nbPoints, const void* particleHash, void* sorted, const void* x) +{ + gpu::cuda::SpatialGridContainer3f_reorderData(nbPoints, particleHash, sorted, x); +} + +template<> +void SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVec3f1Types > >::kernel_reorderData(int nbPoints, const void* particleHash, void* sorted, const void* x) +{ + gpu::cuda::SpatialGridContainer3f1_reorderData(nbPoints, particleHash, sorted, x); +} +*/ + +template +void SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVectorTypes > >::update(const VecCoord& x) +{ + lastX = &x; + data.clear(); + int nbPoints = x.size(); + int index0 = nbCells+BSIZE; + /*particleIndex*/ cells.recreate(index0+nbPoints*8,8*BSIZE); + particleHash.recreate(nbPoints*8,8*BSIZE); + + { + unsigned int numElements = (unsigned int)nbPoints*8; + sofa::gpu::cuda::CudaSortPrepare(numElements); + } + + //cells.recreate(nbCells+1); + cellGhost.recreate(nbCells); + //sortedPos.recreate(nbPoints); +#if 0 + { + helper::WriteAccessor< sofa::gpu::cuda::CudaVector< int > > pcells = cells; + helper::WriteAccessor< sofa::gpu::cuda::CudaVector< unsigned int > > pparticleHash = particleHash; + helper::ReadAccessor< VecCoord > px = x; + const Real pcellWidth = cellWidth*2.0f; + //const Real pinvCellWidth = 1.0f/pcellWidth; + const int pcellMask = (1<> 1; + hgpos_y = (hgpos_y-1) >> 1; + hgpos_z = (hgpos_z-1) >> 1; + unsigned int hx = (HASH_PX*hgpos_x); + unsigned int hy = (HASH_PY*hgpos_y); + unsigned int hz = (HASH_PZ*hgpos_z); + for (int x=0; x<8; ++x) + { + unsigned int h_x = hx; if (x&1) h_x += HASH_PX; + unsigned int h_y = hy; if (x&2) h_y += HASH_PY; + unsigned int h_z = hz; if (x&4) h_z += HASH_PZ; + unsigned int hash = ((h_x ^ h_y ^ h_z) & pcellMask)<<1; + if (halfcell != x) ++hash; + pcells[index0 + i*8 + x] = i; + pparticleHash[i*8 + x] = hash; + } + } + } +#endif +#if 0 + type::vector< std::pair > cpusort; + cpusort.resize(8*nbPoints); + for (int i=0; i<8*nbPoints; ++i) + cpusort[i] = std::make_pair(pparticleHash[i],pcells[index0+i]); + std::sort(cpusort.begin(),cpusort.end(),compare_pair_first); + + for (int i=0; i<8*nbPoints; ++i) + { + pparticleHash[i] = cpusort[i].first; + pcells[index0+i] = cpusort[i].second; + } +#endif + + kernel_computeHash( + cellBits, cellWidth*2, nbPoints, cells.deviceWriteAt(index0), particleHash.deviceWrite(), + x.deviceRead()); + + { + int nbbits = 8; + while (nbbits < cellBits + 1) nbbits+=8; + sofa::gpu::cuda::CudaSort(&particleHash,0, &cells,index0, nbPoints*8, nbbits); + } + + kernel_updateGrid( + cellBits, index0, cellWidth*2, nbPoints, particleHash.deviceRead(), + cells.deviceWrite(), cellGhost.deviceWrite()); +#if 0 + std::cout << nbPoints*8 << " entries in " << nbCells << " cells." << std::endl; + int nfill = 0; + for (int c=0; c cellBegin + nbPoints/2) // || nfill >= 100 && nfill < 110) + std::cout << "Cell " << c << ": range = " << cellBegin-index0 << " - " << cellEnd-index0 << " ghost = " << cellGhost[c]-index0 << std::endl; + ++nfill; + } + std::cout << ((1000*nfill)/nbCells) * 0.1 << " % cells with particles." << std::endl; +#endif + + //kernel_reorderData(nbPoints, particleHash.deviceRead(), sortedPos.deviceWrite(), x.deviceRead()); +} + +template +void SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVectorTypes > >::draw(const core::visual::VisualParams* ) +{ +#if SOFACUDA_HAVE_SOFA_GL == 1 + if (!lastX) return; + int nbPoints = particleHash.size(); + int index0 = nbCells+BSIZE; + glDisable(GL_LIGHTING); + glColor4f(1,1,1,1); + glPointSize(3); + glBegin(GL_POINTS); + unsigned int last = 0; + for (int i=0; i>=1; + //if (cell != 0 && cell != 65535) + // std::cout << i << ": "< "< "<>2)&3; + int b = (cell>>4)&3; + glColor4ub(63+r*64,63+g*64,63+b*64,255); + //glVertex3fv(sortedPos[i].ptr()); + sofa::gl::glVertexT((*lastX)[p]); + } + glEnd(); + glPointSize(1); +#endif // SOFACUDA_HAVE_SOFA_GL == 1 +} + +} // namespace component::container + + +} // namespace sofa + +#endif diff --git a/extensions/CUDA/src/SofaSphFluid/CUDA/config.h.in b/extensions/CUDA/src/SofaSphFluid/CUDA/config.h.in new file mode 100644 index 0000000..3c937ec --- /dev/null +++ b/extensions/CUDA/src/SofaSphFluid/CUDA/config.h.in @@ -0,0 +1,37 @@ +/****************************************************************************** +* SOFA, Simulation Open-Framework Architecture * +* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU Lesser General Public License as published by * +* the Free Software Foundation; either version 2.1 of the License, or (at * +* your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, but WITHOUT * +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * +* for more details. * +* * +* You should have received a copy of the GNU Lesser General Public License * +* along with this program. If not, see . * +******************************************************************************* +* Authors: The SOFA Team and external contributors (see Authors.txt) * +* * +* Contact information: contact@sofa-framework.org * +******************************************************************************/ +#pragma once + +#include + +#ifdef SOFA_BUILD_SOFASPHFLUID_CUDA +# define SOFA_TARGET @PROJECT_NAME@ +# define SOFA_SOFASPHFLUID_CUDA_API SOFA_EXPORT_DYNAMIC_LIBRARY +#else +# define SOFA_SOFASPHFLUID_CUDA_API SOFA_IMPORT_DYNAMIC_LIBRARY +#endif + +namespace sofasphfluid::cuda +{ + constexpr const char* MODULE_NAME = "@PROJECT_NAME@"; + constexpr const char* MODULE_VERSION = "@PROJECT_VERSION@"; +} // namespace sofasphfluid::cuda diff --git a/extensions/CUDA/src/SofaSphFluid/CUDA/init.cpp b/extensions/CUDA/src/SofaSphFluid/CUDA/init.cpp new file mode 100644 index 0000000..93bd1e6 --- /dev/null +++ b/extensions/CUDA/src/SofaSphFluid/CUDA/init.cpp @@ -0,0 +1,61 @@ +/****************************************************************************** +* SofaSphFluid plugin * +* (c) 2006 Inria, University of Lille, CNRS * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU Lesser General Public License as published by * +* the Free Software Foundation; either version 2.1 of the License, or (at * +* your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, but WITHOUT * +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * +* for more details. * +* * +* You should have received a copy of the GNU Lesser General Public License * +* along with this program. If not, see . * +******************************************************************************* +* Authors: see Authors.md * +* * +* Contact information: contact@sofa-framework.org * +******************************************************************************/ +#include +#include +#include + +namespace sofasphfluid::cuda +{ + +extern "C" { + SOFA_EXPORT_DYNAMIC_LIBRARY void initExternalModule(); + SOFA_EXPORT_DYNAMIC_LIBRARY const char* getModuleName(); + SOFA_EXPORT_DYNAMIC_LIBRARY const char* getModuleVersion(); +} + +void initExternalModule() +{ + init(); +} + +const char* getModuleName() +{ + return MODULE_NAME; +} + +const char* getModuleVersion() +{ + return MODULE_VERSION; +} + +void init() +{ + static bool first = true; + if (first) + { + sofasphfluid::init(); + sofa::gpu::cuda::init(); + first = false; + } +} + +} // namespace sofasphfluid::cuda diff --git a/extensions/CUDA/src/SofaSphFluid/CUDA/init.h b/extensions/CUDA/src/SofaSphFluid/CUDA/init.h new file mode 100644 index 0000000..3267619 --- /dev/null +++ b/extensions/CUDA/src/SofaSphFluid/CUDA/init.h @@ -0,0 +1,29 @@ +/****************************************************************************** +* SOFA, Simulation Open-Framework Architecture * +* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU Lesser General Public License as published by * +* the Free Software Foundation; either version 2.1 of the License, or (at * +* your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, but WITHOUT * +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * +* for more details. * +* * +* You should have received a copy of the GNU Lesser General Public License * +* along with this program. If not, see . * +******************************************************************************* +* Authors: The SOFA Team and external contributors (see Authors.txt) * +* * +* Contact information: contact@sofa-framework.org * +******************************************************************************/ +#pragma once + +#include + +namespace sofasphfluid::cuda +{ +SOFA_SOFASPHFLUID_CUDA_API void init(); +} // namespace sofasphdluid::cuda diff --git a/src/SofaSphFluid/initSPHFluid.cpp b/src/SofaSphFluid/initSPHFluid.cpp index 0c146b3..de55649 100644 --- a/src/SofaSphFluid/initSPHFluid.cpp +++ b/src/SofaSphFluid/initSPHFluid.cpp @@ -20,6 +20,7 @@ * Contact information: contact@sofa-framework.org * ******************************************************************************/ #include +#include #include #include @@ -65,7 +66,7 @@ SOFA_SPH_FLUID_API const char* getModuleDescription(); SOFA_SPH_FLUID_API void registerObjects(sofa::core::ObjectFactory* factory); } -void initExternalModule() +void init() { static bool first = true; if (first) @@ -77,6 +78,11 @@ void initExternalModule() } } +void initExternalModule() +{ + init(); +} + const char* getModuleName() { return MODULE_NAME; diff --git a/src/SofaSphFluid/initSPHFluid.h b/src/SofaSphFluid/initSPHFluid.h new file mode 100644 index 0000000..a0a7b4a --- /dev/null +++ b/src/SofaSphFluid/initSPHFluid.h @@ -0,0 +1,30 @@ +/****************************************************************************** +* SOFA, Simulation Open-Framework Architecture * +* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU Lesser General Public License as published by * +* the Free Software Foundation; either version 2.1 of the License, or (at * +* your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, but WITHOUT * +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * +* for more details. * +* * +* You should have received a copy of the GNU Lesser General Public License * +* along with this program. If not, see . * +******************************************************************************* +* Authors: The SOFA Team and external contributors (see Authors.txt) * +* * +* Contact information: contact@sofa-framework.org * +******************************************************************************/ +#pragma once +#include + +namespace sofasphfluid +{ + +SOFA_SPH_FLUID_API void init(); + +}