diff --git a/applications/plugins/SofaCUDA/CMakeLists.txt b/applications/plugins/SofaCUDA/CMakeLists.txt index 26634413007..c409df1ee05 100644 --- a/applications/plugins/SofaCUDA/CMakeLists.txt +++ b/applications/plugins/SofaCUDA/CMakeLists.txt @@ -273,35 +273,6 @@ if(Sofa.GUI.Qt_FOUND) list(APPEND SOURCE_FILES sofa/gpu/gui/CudaDataWidget.cpp) endif() -find_package(SofaSphFluid QUIET) -if(SofaSphFluid_FOUND) - list(APPEND HEADER_FILES - sofa/gpu/cuda/CudaParticleSource.h - sofa/gpu/cuda/CudaParticleSource.inl - sofa/gpu/cuda/CudaSPHFluidForceField.h - sofa/gpu/cuda/CudaSPHFluidForceField.inl - sofa/gpu/cuda/CudaParticlesRepulsionForceField.h - sofa/gpu/cuda/CudaParticlesRepulsionForceField.inl - sofa/gpu/cuda/CudaSpatialGridContainer.h - sofa/gpu/cuda/CudaSpatialGridContainer.inl - ) - list(APPEND SOURCE_FILES - sofa/gpu/cuda/CudaParticleSource.cpp - sofa/gpu/cuda/CudaSPHFluidForceField.cpp - sofa/gpu/cuda/CudaParticlesRepulsionForceField.cpp - sofa/gpu/cuda/CudaSpatialGridContainer.cpp - ) - list(APPEND CUDA_SOURCES - sofa/gpu/cuda/CudaParticleSource.cu - sofa/gpu/cuda/CudaSPHFluidForceField.cu - sofa/gpu/cuda/CudaParticlesRepulsionForceField.cu - sofa/gpu/cuda/CudaSpatialGridContainer.cu - ) - message(STATUS "SofaCUDA: optional dependency to SofaSphFluid found. ") -else() - message(STATUS "SofaCUDA: optional dependency SofaSphFluid not found. ") -endif() - sofa_find_package(SofaValidation QUIET) if(SofaValidation_FOUND) list(APPEND SOURCE_FILES @@ -416,10 +387,6 @@ if(SofaValidation_FOUND) target_link_libraries(${PROJECT_NAME} SofaValidation) endif() -if(SofaSphFluid_FOUND) - target_link_libraries(${PROJECT_NAME} SofaSphFluid) -endif() - if(SOFACUDA_CUBLAS) cuda_add_cublas_to_target(${PROJECT_NAME}) target_link_libraries(${PROJECT_NAME} ${CUDA_SPARSE_LIBRARY}) diff --git a/applications/plugins/SofaCUDA/SofaCUDAConfig.cmake.in b/applications/plugins/SofaCUDA/SofaCUDAConfig.cmake.in index 14f09c7c6ca..2687c0dd184 100644 --- a/applications/plugins/SofaCUDA/SofaCUDAConfig.cmake.in +++ b/applications/plugins/SofaCUDA/SofaCUDAConfig.cmake.in @@ -45,11 +45,6 @@ if(SOFACUDA_HAVE_SOFADISTANCEGRID) endif() endif() -set(SOFACUDA_HAVE_SOFASPHFLUID @SOFACUDA_HAVE_SOFASPHFLUID@) -if(SOFACUDA_HAVE_SOFASPHFLUID) - find_package(SofaSphFluid QUIET REQUIRED) -endif() - set(SOFACUDA_HAVE_SOFAVALIDATION @SOFACUDA_HAVE_SOFAVALIDATION@) if(SOFACUDA_HAVE_SOFAVALIDATION) find_package(SofaValidation QUIET REQUIRED) diff --git a/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaParticleSource.cpp b/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaParticleSource.cpp deleted file mode 100644 index 9b7441f4ee4..00000000000 --- a/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaParticleSource.cpp +++ /dev/null @@ -1,56 +0,0 @@ -/****************************************************************************** -* SOFA, Simulation Open-Framework Architecture * -* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU Lesser General Public License as published by * -* the Free Software Foundation; either version 2.1 of the License, or (at * -* your option) any later version. * -* * -* This program is distributed in the hope that it will be useful, but WITHOUT * -* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * -* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * -* for more details. * -* * -* You should have received a copy of the GNU Lesser General Public License * -* along with this program. If not, see . * -******************************************************************************* -* Authors: The SOFA Team and external contributors (see Authors.txt) * -* * -* Contact information: contact@sofa-framework.org * -******************************************************************************/ -#include "CudaTypes.h" -#include "CudaParticleSource.inl" -#include -#include -#include - -namespace sofa -{ - - -namespace core::behavior -{ -template class ProjectiveConstraintSet; -#ifdef SOFA_GPU_CUDA_DOUBLE -template class ProjectiveConstraintSet; -#endif -} // namespace core::behavior - - -namespace gpu::cuda -{ - - - -int ParticleSourceCudaClass = core::RegisterObject("Supports GPU-side computations using CUDA") - .add< component::misc::ParticleSource >() -#ifdef SOFA_GPU_CUDA_DOUBLE - .add< component::misc::ParticleSource >() -#endif // SOFA_GPU_CUDA_DOUBLE - ; - -} // namespace gpu::cuda - - -} // namespace sofa diff --git a/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaParticleSource.cu b/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaParticleSource.cu deleted file mode 100644 index 6f03d33de69..00000000000 --- a/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaParticleSource.cu +++ /dev/null @@ -1,132 +0,0 @@ -/****************************************************************************** -* SOFA, Simulation Open-Framework Architecture * -* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU Lesser General Public License as published by * -* the Free Software Foundation; either version 2.1 of the License, or (at * -* your option) any later version. * -* * -* This program is distributed in the hope that it will be useful, but WITHOUT * -* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * -* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * -* for more details. * -* * -* You should have received a copy of the GNU Lesser General Public License * -* along with this program. If not, see . * -******************************************************************************* -* Authors: The SOFA Team and external contributors (see Authors.txt) * -* * -* Contact information: contact@sofa-framework.org * -******************************************************************************/ -#include "CudaCommon.h" -#include "CudaMath.h" -#include "cuda.h" - -#if defined(__cplusplus) && CUDA_VERSION < 2000 -namespace sofa -{ -namespace gpu -{ -namespace cuda -{ -#endif - -extern "C" -{ - - void ParticleSourceCuda3f_fillValues(unsigned int totalsize, unsigned int subsetsize, void* dest, const void* indices, float fx, float fy, float fz); - void ParticleSourceCuda3f_copyValuesWithOffset(unsigned int totalsize, unsigned int subsetsize, void* dest, const void* indices, const void* src, float fx, float fy, float fz); - -#ifdef SOFA_GPU_CUDA_DOUBLE - - void ParticleSourceCuda3d_fillValues(unsigned int totalsize, unsigned int subsetsize, void* dest, const void* indices, double fx, double fy, double fz); - void ParticleSourceCuda3d_copyValuesWithOffset(unsigned int totalsize, unsigned int subsetsize, void* dest, const void* indices, const void* src, double fx, double fy, double fz); - -#endif // SOFA_GPU_CUDA_DOUBLE -} - -////////////////////// -// GPU-side methods // -////////////////////// - - -template -__global__ void ParticleSourceCuda3t_fillValues_kernel(unsigned int totalsize, unsigned int subsetsize, real* dest, const unsigned int* indices, real fx, real fy, real fz) -{ - unsigned int index0 = blockIdx.x * BSIZE; - unsigned int index = index0+threadIdx.x; - if (index < subsetsize) - { - unsigned int dindex = indices[index]; - if (dindex < totalsize) - { - unsigned int dindex3 = dindex * 3; - dest[dindex3+0] = fx; - dest[dindex3+1] = fy; - dest[dindex3+2] = fz; - } - } -} - -template -__global__ void ParticleSourceCuda3t_copyValuesWithOffset_kernel(unsigned int totalsize, unsigned int subsetsize, real* dest, const unsigned int* indices, const real* src, real fx, real fy, real fz) -{ - unsigned int index0 = blockIdx.x * BSIZE; - unsigned int index = index0+threadIdx.x; - if (index < subsetsize) - { - unsigned int dindex = indices[index]; - if (dindex < totalsize) - { - unsigned int dindex3 = dindex * 3; - unsigned int index3 = index * 3; - dest[dindex3+0] = src[index3+0]+fx; - dest[dindex3+1] = src[index3+1]+fy; - dest[dindex3+2] = src[index3+2]+fz; - } - } -} - -////////////////////// -// CPU-side methods // -////////////////////// - - -void ParticleSourceCuda3f_fillValues(unsigned int totalsize, unsigned int subsetsize, void* dest, const void* indices, float fx, float fy, float fz) -{ - dim3 threads(BSIZE,1); - dim3 grid((subsetsize+BSIZE-1)/BSIZE,1); - {ParticleSourceCuda3t_fillValues_kernel<<< grid, threads >>>(totalsize, subsetsize, (float*)dest, (const unsigned int*)indices, fx, fy, fz); mycudaDebugError("ParticleSourceCuda3t_fillValues_kernel");} -} - -void ParticleSourceCuda3f_copyValuesWithOffset(unsigned int totalsize, unsigned int subsetsize, void* dest, const void* indices, const void* src, float fx, float fy, float fz) -{ - dim3 threads(BSIZE,1); - dim3 grid((subsetsize+BSIZE-1)/BSIZE,1); - {ParticleSourceCuda3t_copyValuesWithOffset_kernel<<< grid, threads >>>(totalsize, subsetsize, (float*)dest, (const unsigned int*)indices, (const float*)src, fx, fy, fz); mycudaDebugError("ParticleSourceCuda3t_copyValuesWithOffset_kernel");} -} - -#ifdef SOFA_GPU_CUDA_DOUBLE - -void ParticleSourceCuda3d_fillValues(unsigned int totalsize, unsigned int subsetsize, void* dest, const void* indices, double fx, double fy, double fz) -{ - dim3 threads(BSIZE,1); - dim3 grid((subsetsize+BSIZE-1)/BSIZE,1); - {ParticleSourceCuda3t_fillValues_kernel<<< grid, threads >>>(totalsize, subsetsize, (double*)dest, (const unsigned int*)indices, fx, fy, fz); mycudaDebugError("ParticleSourceCuda3t_fillValues_kernel");} -} - -void ParticleSourceCuda3d_copyValuesWithOffset(unsigned int totalsize, unsigned int subsetsize, void* dest, const void* indices, const void* src, double fx, double fy, double fz) -{ - dim3 threads(BSIZE,1); - dim3 grid((subsetsize+BSIZE-1)/BSIZE,1); - {ParticleSourceCuda3t_copyValuesWithOffset_kernel<<< grid, threads >>>(totalsize, subsetsize, (double*)dest, (const unsigned int*)indices, (const double*)src, fx, fy, fz); mycudaDebugError("ParticleSourceCuda3t_copyValuesWithOffset_kernel");} -} - -#endif // SOFA_GPU_CUDA_DOUBLE - -#if defined(__cplusplus) && CUDA_VERSION < 2000 -} // namespace cuda -} // namespace gpu -} // namespace sofa -#endif diff --git a/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaParticleSource.h b/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaParticleSource.h deleted file mode 100644 index 9bd87e8a94a..00000000000 --- a/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaParticleSource.h +++ /dev/null @@ -1,27 +0,0 @@ -/****************************************************************************** -* SOFA, Simulation Open-Framework Architecture * -* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU Lesser General Public License as published by * -* the Free Software Foundation; either version 2.1 of the License, or (at * -* your option) any later version. * -* * -* This program is distributed in the hope that it will be useful, but WITHOUT * -* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * -* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * -* for more details. * -* * -* You should have received a copy of the GNU Lesser General Public License * -* along with this program. If not, see . * -******************************************************************************* -* Authors: The SOFA Team and external contributors (see Authors.txt) * -* * -* Contact information: contact@sofa-framework.org * -******************************************************************************/ -#ifndef SOFA_GPU_CUDA_CUDAPARTICLESOURCE_H -#define SOFA_GPU_CUDA_CUDAPARTICLESOURCE_H - -#include - -#endif diff --git a/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaParticleSource.inl b/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaParticleSource.inl deleted file mode 100644 index 6ee83d77af5..00000000000 --- a/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaParticleSource.inl +++ /dev/null @@ -1,138 +0,0 @@ -/****************************************************************************** -* SOFA, Simulation Open-Framework Architecture * -* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU Lesser General Public License as published by * -* the Free Software Foundation; either version 2.1 of the License, or (at * -* your option) any later version. * -* * -* This program is distributed in the hope that it will be useful, but WITHOUT * -* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * -* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * -* for more details. * -* * -* You should have received a copy of the GNU Lesser General Public License * -* along with this program. If not, see . * -******************************************************************************* -* Authors: The SOFA Team and external contributors (see Authors.txt) * -* * -* Contact information: contact@sofa-framework.org * -******************************************************************************/ -#ifndef SOFA_GPU_CUDA_CUDAPARTICLESOURCE_INL -#define SOFA_GPU_CUDA_CUDAPARTICLESOURCE_INL - -#include "CudaParticleSource.h" -#include - -namespace sofa -{ - - -namespace gpu::cuda -{ - -extern "C" -{ - - void ParticleSourceCuda3f_fillValues(unsigned int totalsize, unsigned int subsetsize, void* dest, const void* indices, float fx, float fy, float fz); - void ParticleSourceCuda3f_copyValuesWithOffset(unsigned int totalsize, unsigned int subsetsize, void* dest, const void* indices, const void* src, float fx, float fy, float fz); - - -#ifdef SOFA_GPU_CUDA_DOUBLE - - void ParticleSourceCuda3d_fillValues(unsigned int totalsize, unsigned int subsetsize, void* dest, const void* indices, double fx, double fy, double fz); - void ParticleSourceCuda3d_copyValuesWithOffset(unsigned int totalsize, unsigned int subsetsize, void* dest, const void* indices, const void* src, double fx, double fy, double fz); - -#endif // SOFA_GPU_CUDA_DOUBLE -} - -} // namespace gpu::cuda - - -namespace component::misc -{ - -using namespace gpu::cuda; - -// template <> -// void ParticleSource::projectResponse(VecDeriv& res) -// { -// if (!this->mstate) return; -// const VecIndex& lastparticles = this->lastparticles.getValue(); -// if (lastparticles.empty()) return; -// double time = getContext()->getTime(); -// if (time < f_start.getValue() || time > f_stop.getValue()) return; -// // constraint the last values -// //mycudaMemset(((Deriv*)res.deviceWrite())+lastparticles[0], 0, lastparticles.size()*sizeof(Deriv)); -// ParticleSourceCuda3f_fillValues(res.size(), lastparticles.size(), res.deviceWrite(), lastparticles.deviceRead(), 0, 0, 0); -// } - -// template <> -// void ParticleSource::projectVelocity(VecDeriv& res) -// { -// if (!this->mstate) return; -// const VecIndex& lastparticles = this->lastparticles.getValue(); -// if (lastparticles.empty()) return; -// double time = getContext()->getTime(); -// if (time < f_start.getValue() || time > f_stop.getValue()) return; -// // constraint the last values -// Deriv vel = f_velocity.getValue(); -// #if 1 -// //mycudaMemset(((Deriv*)res.deviceWrite())+lastparticles[0], 0, lastparticles.size()*sizeof(Coord)); -// ParticleSourceCuda3f_fillValues(res.size(), lastparticles.size(), res.deviceWrite(), lastparticles.deviceRead(), vel[0], vel[1], vel[2]); -// #else -// for (unsigned int s=0; s -// void ParticleSource::projectPosition(VecCoord& res) -// { -// if (!this->mstate) return; -// const VecIndex& lastparticles = this->lastparticles.getValue(); -// if (lastparticles.empty()) return; -// double time = getContext()->getTime(); -// if (time < f_start.getValue() || time > f_stop.getValue()) return; -// // constraint the last values -// Deriv vel = f_velocity.getValue(); -// vel *= (time-lasttime); -// //mycudaMemset(((Deriv*)res.deviceWrite())+lastparticles[0], 0, lastparticles.size()*sizeof(Coord)); -// ParticleSourceCuda3f_copyValuesWithOffset(res.size(), lastparticles.size(), res.deviceWrite(), lastparticles.deviceRead(), lastpos.deviceRead(), vel[0], vel[1], vel[2]); -// } - - -// #ifdef SOFA_GPU_CUDA_DOUBLE - -// template <> -// void ParticleSource::projectResponse(VecDeriv& res) -// { -// if (!this->mstate) return; -// const VecIndex& lastparticles = this->lastparticles.getValue(); -// if (lastparticles.empty()) return; -// double time = getContext()->getTime(); -// if (time < f_start.getValue() || time > f_stop.getValue()) return; -// // constraint the last values -// mycudaMemset(((Deriv*)res.deviceWrite())+lastparticles[0], 0, lastparticles.size()*sizeof(Coord)); -// } - -// template <> -// void ParticleSource::projectVelocity(VecDeriv& /*res*/) -// { -// } - -// template <> -// void ParticleSource::projectPosition(VecDeriv& /*res*/) -// { -// } - -// #endif // SOFA_GPU_CUDA_DOUBLE - -} // namespace component::misc - - -} // namespace sofa - -#endif diff --git a/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaParticlesRepulsionForceField.cpp b/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaParticlesRepulsionForceField.cpp deleted file mode 100644 index 042d77da5b5..00000000000 --- a/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaParticlesRepulsionForceField.cpp +++ /dev/null @@ -1,43 +0,0 @@ -/****************************************************************************** -* SOFA, Simulation Open-Framework Architecture * -* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU Lesser General Public License as published by * -* the Free Software Foundation; either version 2.1 of the License, or (at * -* your option) any later version. * -* * -* This program is distributed in the hope that it will be useful, but WITHOUT * -* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * -* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * -* for more details. * -* * -* You should have received a copy of the GNU Lesser General Public License * -* along with this program. If not, see . * -******************************************************************************* -* Authors: The SOFA Team and external contributors (see Authors.txt) * -* * -* Contact information: contact@sofa-framework.org * -******************************************************************************/ -#include -#include "CudaParticlesRepulsionForceField.inl" -#include -#include -#include - - -namespace sofa::gpu::cuda -{ - -int ParticlesRepulsionForceFieldCudaClass = core::RegisterObject("Supports GPU-side computations using CUDA") - .add< component::forcefield::ParticlesRepulsionForceField >() -#ifdef SOFA_GPU_CUDA_DOUBLE - .add< component::forcefield::ParticlesRepulsionForceField >() -#endif // SOFA_GPU_CUDA_DOUBLE - ; - -} // namespace sofa::gpu::cuda - - - - diff --git a/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaParticlesRepulsionForceField.cu b/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaParticlesRepulsionForceField.cu deleted file mode 100644 index 639ac3ed8a1..00000000000 --- a/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaParticlesRepulsionForceField.cu +++ /dev/null @@ -1,320 +0,0 @@ -/****************************************************************************** -* SOFA, Simulation Open-Framework Architecture * -* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU Lesser General Public License as published by * -* the Free Software Foundation; either version 2.1 of the License, or (at * -* your option) any later version. * -* * -* This program is distributed in the hope that it will be useful, but WITHOUT * -* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * -* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * -* for more details. * -* * -* You should have received a copy of the GNU Lesser General Public License * -* along with this program. If not, see . * -******************************************************************************* -* Authors: The SOFA Team and external contributors (see Authors.txt) * -* * -* Contact information: contact@sofa-framework.org * -******************************************************************************/ -#include -#include -#include "cuda.h" - -#if defined(__cplusplus) && CUDA_VERSION < 2000 -namespace sofa -{ -namespace gpu -{ -namespace cuda -{ -#endif - -template -class GPURepulsion -{ -public: - real d; - real d2; - real stiffness; - real damping; -}; - -typedef GPURepulsion GPURepulsion3f; -typedef GPURepulsion GPURepulsion3d; - -extern "C" -{ - void ParticlesRepulsionForceFieldCuda3f_addForce (unsigned int size, const void* cells, const void* cellGhost, GPURepulsion3f* repulsion, void* f, const void* x, const void* v ); - void ParticlesRepulsionForceFieldCuda3f_addDForce(unsigned int size, const void* cells, const void* cellGhost, GPURepulsion3f* repulsion, void* f, const void* x, const void* dx); - -#ifdef SOFA_GPU_CUDA_DOUBLE - - void ParticlesRepulsionForceFieldCuda3d_addForce (unsigned int size, const void* cells, const void* cellGhost, GPURepulsion3d* repulsion, void* f, const void* x, const void* v ); - void ParticlesRepulsionForceFieldCuda3d_addDForce(unsigned int size, const void* cells, const void* cellGhost, GPURepulsion3d* repulsion, void* f, const void* x, const void* dx); - -#endif // SOFA_GPU_CUDA_DOUBLE -} - -////////////////////// -// GPU-side methods // -////////////////////// - -template -__device__ void ParticlesRepulsionCalcForce(CudaVec3 x1, CudaVec3 v1, CudaVec3 x2, CudaVec3 v2, CudaVec3& force, GPURepulsion& rep) -{ - CudaVec3 n = x2-x1; - real d2 = norm2(n); - if (d2 < rep.d2) - { - CudaVec3 vi = v2-v1; - real inv_d = rsqrtf(d2); - n *= inv_d; - real d = d2*inv_d - rep.d; - real forceIntensity = rep.stiffness*d; - real dampingIntensity = rep.damping*d; - force += n*forceIntensity - vi*dampingIntensity; - } -} - -template -__device__ void ParticlesRepulsionCalcDForce(CudaVec3 x1, CudaVec3 dx1, CudaVec3 x2, CudaVec3 dx2, CudaVec3& dforce, GPURepulsion& rep) -{ - CudaVec3 n = x2-x1; - real d2 = norm2(n); - if (d2 < rep.d2) - { - CudaVec3 dxi = dx2-dx1; - //real inv_d = rsqrtf(d2); - //n *= inv_d; - //dforce = n * (rep.stiffness * dot(dxi, n)); - - //dforce = (n * (rep.stiffness * dot(dxi, n)))/d2; - - dforce += n * (real)__fdividef((rep.stiffness * dot(dxi, n)),d2); - } -} - -template -__global__ void ParticlesRepulsionForceFieldCuda3t_addForce_kernel(int size, const int *cells, const int *cellGhost, GPURepulsion repulsion, real* f, const real* x, const real* v) -{ - __shared__ int2 range; - __shared__ int ghost; - __shared__ real temp_x[BSIZE*3]; - __shared__ real temp_v[BSIZE*3]; - int tx3 = threadIdx.x * 3; - for (int cell = blockIdx.x; cell < size; cell += gridDim.x) - { - if (!threadIdx.x) - { - //range = *(const int2*)(cells+cell); - range.x = cells[cell]; - range.y = cells[cell+1]; - range.y &= ~(1U<<31); - ghost = cellGhost[cell]; - } - __syncthreads(); - if (range.x <= 0) continue; // no actual particle in this cell - for (int px0 = range.x; px0 < ghost; px0 += BSIZE) - { - int px = px0 + threadIdx.x; - CudaVec3 xi; - CudaVec3 vi; - CudaVec3 force; - int index; - if (px < range.y) - { - index = cells[px]; - xi = ((const CudaVec3*)x)[index]; - temp_x[tx3 ] = xi.x; - temp_x[tx3+1] = xi.y; - temp_x[tx3+2] = xi.z; - vi = ((const CudaVec3*)v)[index]; - temp_v[tx3 ] = vi.x; - temp_v[tx3+1] = vi.y; - temp_v[tx3+2] = vi.z; - } - __syncthreads(); - if (px < ghost) - { - // actual particle -> compute interactions - force = CudaVec3::make(0,0,0); - int np = min(range.y-px0,BSIZE); - for (int i=0; i < np; ++i) - { - if (i != threadIdx.x) - ParticlesRepulsionCalcForce(xi, vi, ((const CudaVec3*)temp_x)[i], ((const CudaVec3*)temp_v)[i], force, repulsion); - } - } - __syncthreads(); - // loop through other groups of particles - for (int py0 = range.x; py0 < range.y; py0 += BSIZE) - { - if (py0 == px0) continue; - int py = py0 + threadIdx.x; - if (py < range.y) - { - int index2 = cells[py]; - CudaVec3 xj = ((const CudaVec3*)x)[index2]; - temp_x[tx3 ] = xj.x; - temp_x[tx3+1] = xj.y; - temp_x[tx3+2] = xj.z; - CudaVec3 vj = ((const CudaVec3*)v)[index2]; - temp_v[tx3 ] = vj.x; - temp_v[tx3+1] = vj.y; - temp_v[tx3+2] = vj.z; - } - __syncthreads(); - if (px < ghost) - { - // actual particle -> compute interactions - int np = min(range.y-py0,BSIZE); - for (int i=0; i < np; ++i) - { - ParticlesRepulsionCalcForce(xi, vi, ((const CudaVec3*)temp_x)[i], ((const CudaVec3*)temp_v)[i], force, repulsion); - } - } - __syncthreads(); - } - if (px < ghost) - { - // actual particle -> write computed force - ((CudaVec3*)f)[index] += force; - } - } - } -} - -template -__global__ void ParticlesRepulsionForceFieldCuda3t_addDForce_kernel(int size, const int *cells, const int *cellGhost, GPURepulsion repulsion, real* df, const real* x, const real* dx) -{ - __shared__ int2 range; - __shared__ int ghost; - __shared__ real temp_x[BSIZE*3]; - __shared__ real temp_dx[BSIZE*3]; - int tx3 = threadIdx.x * 3; - for (int cell = blockIdx.x; cell < size; cell += gridDim.x) - { - if (!threadIdx.x) - { - //range = *(const int2*)(cells+cell); - range.x = cells[cell]; - range.y = cells[cell+1]; - range.y &= ~(1U<<31); - ghost = cellGhost[cell]; - } - __syncthreads(); - if (range.x <= 0) continue; // no actual particle in this cell - for (int px0 = range.x; px0 < ghost; px0 += BSIZE) - { - int px = px0 + threadIdx.x; - CudaVec3 xi; - CudaVec3 dxi; - CudaVec3 dforce; - int index; - if (px < range.y) - { - index = cells[px]; - xi = ((const CudaVec3*)x)[index]; - temp_x[tx3 ] = xi.x; - temp_x[tx3+1] = xi.y; - temp_x[tx3+2] = xi.z; - dxi = ((const CudaVec3*)dx)[index]; - temp_dx[tx3 ] = dxi.x; - temp_dx[tx3+1] = dxi.y; - temp_dx[tx3+2] = dxi.z; - } - __syncthreads(); - if (px < ghost) - { - // actual particle -> compute interactions - dforce = CudaVec3::make(0,0,0); - int np = min(range.y-px0,BSIZE); - for (int i=0; i < np; ++i) - { - if (i != threadIdx.x) - ParticlesRepulsionCalcDForce(xi, dxi, ((const CudaVec3*)temp_x)[i], ((const CudaVec3*)temp_dx)[i], dforce, repulsion); - } - } - __syncthreads(); - // loop through other groups of particles - for (int py0 = range.x; py0 < range.y; py0 += BSIZE) - { - if (py0 == px0) continue; - int py = py0 + threadIdx.x; - if (py < range.y) - { - int index2 = cells[py]; - CudaVec3 xj = ((const CudaVec3*)x)[index2]; - temp_x[tx3 ] = xj.x; - temp_x[tx3+1] = xj.y; - temp_x[tx3+2] = xj.z; - CudaVec3 dxj = ((const CudaVec3*)dx)[index2]; - temp_dx[tx3 ] = dxj.x; - temp_dx[tx3+1] = dxj.y; - temp_dx[tx3+2] = dxj.z; - } - __syncthreads(); - if (px < ghost) - { - // actual particle -> compute interactions - int np = min(range.y-py0,BSIZE); - for (int i=0; i < np; ++i) - { - ParticlesRepulsionCalcDForce(xi, dxi, ((const CudaVec3*)temp_x)[i], ((const CudaVec3*)temp_dx)[i], dforce, repulsion); - } - } - __syncthreads(); - } - if (px < ghost) - { - // actual particle -> write computed force - ((CudaVec3*)df)[index] += dforce; - } - } - } -} - -////////////////////// -// CPU-side methods // -////////////////////// - -void ParticlesRepulsionForceFieldCuda3f_addForce(unsigned int size, const void* cells, const void* cellGhost, GPURepulsion3f* repulsion, void* f, const void* x, const void* v) -{ - dim3 threads(BSIZE,1); - dim3 grid(60,1); - {ParticlesRepulsionForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *repulsion, (float*)f, (const float*)x, (const float*)v); mycudaDebugError("ParticlesRepulsionForceFieldCuda3t_addForce_kernel");} -} - -void ParticlesRepulsionForceFieldCuda3f_addDForce(unsigned int size, const void* cells, const void* cellGhost, GPURepulsion3f* repulsion, void* df, const void* x, const void* dx) -{ - dim3 threads(BSIZE,1); - dim3 grid(60/BSIZE,1); - {ParticlesRepulsionForceFieldCuda3t_addDForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *repulsion, (float*)df, (const float*)x, (const float*)dx); mycudaDebugError("ParticlesRepulsionForceFieldCuda3t_addDForce_kernel");} -} - -#ifdef SOFA_GPU_CUDA_DOUBLE - -void ParticlesRepulsionForceFieldCuda3d_addForce(unsigned int size, const void* cells, const void* cellGhost, GPURepulsion3d* repulsion, void* f, const void* x, const void* v) -{ - dim3 threads(BSIZE,1); - dim3 grid(60/BSIZE,1); - {ParticlesRepulsionForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *repulsion, (double*)f, (const double*)x, (const double*)v); mycudaDebugError("ParticlesRepulsionForceFieldCuda3t_addForce_kernel");} -} - -void ParticlesRepulsionForceFieldCuda3d_addDForce(unsigned int size, const void* cells, const void* cellGhost, GPURepulsion3d* repulsion, void* df, const void* x, const void* dx) -{ - dim3 threads(BSIZE,1); - dim3 grid(60/BSIZE,1); - {ParticlesRepulsionForceFieldCuda3t_addDForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *repulsion, (double*)df, (const double*)x, (const double*)dx); mycudaDebugError("ParticlesRepulsionForceFieldCuda3t_addDForce_kernel");} -} - -#endif // SOFA_GPU_CUDA_DOUBLE - -#if defined(__cplusplus) && CUDA_VERSION < 2000 -} // namespace cuda -} // namespace gpu -} // namespace sofa -#endif diff --git a/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaParticlesRepulsionForceField.h b/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaParticlesRepulsionForceField.h deleted file mode 100644 index 632e35283e9..00000000000 --- a/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaParticlesRepulsionForceField.h +++ /dev/null @@ -1,75 +0,0 @@ -/****************************************************************************** -* SOFA, Simulation Open-Framework Architecture * -* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU Lesser General Public License as published by * -* the Free Software Foundation; either version 2.1 of the License, or (at * -* your option) any later version. * -* * -* This program is distributed in the hope that it will be useful, but WITHOUT * -* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * -* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * -* for more details. * -* * -* You should have received a copy of the GNU Lesser General Public License * -* along with this program. If not, see . * -******************************************************************************* -* Authors: The SOFA Team and external contributors (see Authors.txt) * -* * -* Contact information: contact@sofa-framework.org * -******************************************************************************/ -#ifndef SOFA_GPU_CUDA_CUDAPARTICLESREPULSIONFORCEFIELD_H -#define SOFA_GPU_CUDA_CUDAPARTICLESREPULSIONFORCEFIELD_H - -#include -#include -#include - -namespace sofa -{ - - -namespace gpu::cuda -{ - -template -struct GPURepulsion -{ - real d; - real d2; - real stiffness; - real damping; -}; - -typedef GPURepulsion GPURepulsion3f; -typedef GPURepulsion GPURepulsion3d; - -} // namespace gpu::cuda - - -namespace component::forcefield -{ - -template <> -void ParticlesRepulsionForceField::addForce(const core::MechanicalParams* mparams, DataVecDeriv& d_f, const DataVecCoord& d_x, const DataVecDeriv& d_v); - -template <> -void ParticlesRepulsionForceField::addDForce(const core::MechanicalParams* mparams, DataVecDeriv& d_df, const DataVecDeriv& d_dx); - -#ifdef SOFA_GPU_CUDA_DOUBLE - -template <> -void ParticlesRepulsionForceField::addForce(const core::MechanicalParams* mparams, DataVecDeriv& d_f, const DataVecCoord& d_x, const DataVecDeriv& d_v); - -template <> -void ParticlesRepulsionForceField::addDForce(const core::MechanicalParams* mparams, DataVecDeriv& d_df, const DataVecDeriv& d_dx); - -#endif // SOFA_GPU_CUDA_DOUBLE - -} // namespace component::forcefield - - -} // namespace sofa - -#endif diff --git a/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaParticlesRepulsionForceField.inl b/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaParticlesRepulsionForceField.inl deleted file mode 100644 index 7d8ea0fd4b1..00000000000 --- a/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaParticlesRepulsionForceField.inl +++ /dev/null @@ -1,168 +0,0 @@ -/****************************************************************************** -* SOFA, Simulation Open-Framework Architecture * -* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU Lesser General Public License as published by * -* the Free Software Foundation; either version 2.1 of the License, or (at * -* your option) any later version. * -* * -* This program is distributed in the hope that it will be useful, but WITHOUT * -* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * -* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * -* for more details. * -* * -* You should have received a copy of the GNU Lesser General Public License * -* along with this program. If not, see . * -******************************************************************************* -* Authors: The SOFA Team and external contributors (see Authors.txt) * -* * -* Contact information: contact@sofa-framework.org * -******************************************************************************/ -#ifndef SOFA_GPU_CUDA_CUDAPARTICLESREPULSIONFORCEFIELD_INL -#define SOFA_GPU_CUDA_CUDAPARTICLESREPULSIONFORCEFIELD_INL - -#include "CudaParticlesRepulsionForceField.h" -#include -//#include - -namespace sofa -{ - - -namespace gpu::cuda -{ - -extern "C" -{ - - void ParticlesRepulsionForceFieldCuda3f_addForce (unsigned int size, const void* cells, const void* cellGhost, GPURepulsion3f* repulsion, void* f, const void* x, const void* v ); - void ParticlesRepulsionForceFieldCuda3f_addDForce(unsigned int size, const void* cells, const void* cellGhost, GPURepulsion3f* repulsion, void* f, const void* x, const void* dx); - -#ifdef SOFA_GPU_CUDA_DOUBLE - - void ParticlesRepulsionForceFieldCuda3d_addForce (unsigned int size, const void* cells, const void* cellGhost, GPURepulsion3d* repulsion, void* f, const void* x, const void* v ); - void ParticlesRepulsionForceFieldCuda3d_addDForce(unsigned int size, const void* cells, const void* cellGhost, GPURepulsion3d* repulsion, void* f, const void* x, const void* dx); - -#endif // SOFA_GPU_CUDA_DOUBLE -} - -} // namespace gpu::cuda - - -namespace component::forcefield -{ - -using namespace gpu::cuda; - - -template <> -void ParticlesRepulsionForceField::addForce(const core::MechanicalParams* /*mparams*/, DataVecDeriv& d_f, const DataVecCoord& d_x, const DataVecDeriv& d_v) -{ - if (grid == NULL) return; - - VecDeriv& f = *d_f.beginEdit(); - const VecCoord& x = d_x.getValue(); - const VecDeriv& v = d_v.getValue(); - - grid->updateGrid(x); - GPURepulsion3f repulsion; - repulsion.d = distance.getValue(); - repulsion.d2 = repulsion.d*repulsion.d; - repulsion.stiffness = stiffness.getValue(); - repulsion.damping = damping.getValue(); - f.resize(x.size()); - Grid::Grid* g = grid->getGrid(); - ParticlesRepulsionForceFieldCuda3f_addForce( - g->getNbCells(), g->getCellsVector().deviceRead(), g->getCellGhostVector().deviceRead(), - &repulsion, f.deviceWrite(), x.deviceRead(), v.deviceRead()); - - d_f.endEdit(); -} - -template <> -void ParticlesRepulsionForceField::addDForce(const core::MechanicalParams* mparams, DataVecDeriv& d_df, const DataVecDeriv& d_dx) -{ - if (grid == NULL) return; - - VecDeriv& df = *d_df.beginEdit(); - const VecDeriv& dx = d_dx.getValue(); - Real kFactor = (Real)sofa::core::mechanicalparams::kFactorIncludingRayleighDamping(mparams,this->rayleighStiffness.getValue()); - Real bFactor = (Real)sofa::core::mechanicalparams::bFactor(mparams); - - const VecCoord& x = this->mstate->read(sofa::core::vec_id::read_access::position)->getValue(); - GPURepulsion3f repulsion; - repulsion.d = distance.getValue(); - repulsion.d2 = repulsion.d*repulsion.d; - repulsion.stiffness = (float)(stiffness.getValue()*kFactor); - repulsion.damping = (float)(damping.getValue()*bFactor); - df.resize(dx.size()); - Grid::Grid* g = grid->getGrid(); - ParticlesRepulsionForceFieldCuda3f_addDForce( - g->getNbCells(), g->getCellsVector().deviceRead(), g->getCellGhostVector().deviceRead(), - &repulsion, df.deviceWrite(), x.deviceRead(), dx.deviceRead()); - - d_df.endEdit(); -} - - -#ifdef SOFA_GPU_CUDA_DOUBLE - -template <> -void ParticlesRepulsionForceField::addForce(const core::MechanicalParams* /*mparams*/, DataVecDeriv& d_f, const DataVecCoord& d_x, const DataVecDeriv& d_v) -{ - if (grid == NULL) return; - - VecDeriv& f = *d_f.beginEdit(); - const VecCoord& x = d_x.getValue(); - const VecDeriv& v = d_v.getValue(); - - grid->updateGrid(x); - GPURepulsion3d repulsion; - repulsion.d = distance.getValue(); - repulsion.d2 = repulsion.d*repulsion.d; - repulsion.stiffness = stiffness.getValue(); - repulsion.damping = damping.getValue(); - f.resize(x.size()); - Grid::Grid* g = grid->getGrid(); - ParticlesRepulsionForceFieldCuda3d_addForce( - g->getNbCells(), g->getCellsVector().deviceRead(), g->getCellGhostVector().deviceRead(), - &repulsion, f.deviceWrite(), x.deviceRead(), v.deviceRead()); - - d_f.endEdit(); -} - -template <> -void ParticlesRepulsionForceField::addDForce(const core::MechanicalParams* mparams, DataVecDeriv& d_df, const DataVecDeriv& d_dx) -{ - if (grid == NULL) return; - - VecDeriv& df = *d_df.beginEdit(); - const VecDeriv& dx = d_dx.getValue(); - Real kFactor = (Real)sofa::core::mechanicalparams::kFactorIncludingRayleighDamping(mparams,this->rayleighStiffness.getValue()); - Real bFactor = (Real)sofa::core::mechanicalparams::bFactor(mparams); - - const VecCoord& x = this->mstate->read(sofa::core::vec_id::read_access::position)->getValue(); - GPURepulsion3d repulsion; - repulsion.d = distance.getValue(); - repulsion.d2 = repulsion.d*repulsion.d; - repulsion.stiffness = stiffness.getValue()*kFactor; - repulsion.damping = damping.getValue()*bFactor; - df.resize(dx.size()); - Grid::Grid* g = grid->getGrid(); - ParticlesRepulsionForceFieldCuda3d_addDForce( - g->getNbCells(), g->getCellsVector().deviceRead(), g->getCellGhostVector().deviceRead(), - &repulsion, df.deviceWrite(), x.deviceRead(), dx.deviceRead()); - - d_df.endEdit(); -} - -#endif // SOFA_GPU_CUDA_DOUBLE - - -} // namespace component::forcefield - - -} // namespace sofa - -#endif diff --git a/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaSPHFluidForceField.cpp b/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaSPHFluidForceField.cpp deleted file mode 100644 index 68a52f54bd7..00000000000 --- a/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaSPHFluidForceField.cpp +++ /dev/null @@ -1,43 +0,0 @@ -/****************************************************************************** -* SOFA, Simulation Open-Framework Architecture * -* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU Lesser General Public License as published by * -* the Free Software Foundation; either version 2.1 of the License, or (at * -* your option) any later version. * -* * -* This program is distributed in the hope that it will be useful, but WITHOUT * -* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * -* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * -* for more details. * -* * -* You should have received a copy of the GNU Lesser General Public License * -* along with this program. If not, see . * -******************************************************************************* -* Authors: The SOFA Team and external contributors (see Authors.txt) * -* * -* Contact information: contact@sofa-framework.org * -******************************************************************************/ -#include -#include "CudaSPHFluidForceField.inl" -#include -#include -#include - - -namespace sofa::gpu::cuda -{ - -int SPHFluidForceFieldCudaClass = core::RegisterObject("Supports GPU-side computations using CUDA") - .add< component::forcefield::SPHFluidForceField >() -#ifdef SOFA_GPU_CUDA_DOUBLE - .add< component::forcefield::SPHFluidForceField >() -#endif // SOFA_GPU_CUDA_DOUBLE - ; - -} // namespace sofa::gpu::cuda - - - - diff --git a/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaSPHFluidForceField.cu b/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaSPHFluidForceField.cu deleted file mode 100644 index 283ec7a7f2d..00000000000 --- a/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaSPHFluidForceField.cu +++ /dev/null @@ -1,1246 +0,0 @@ -/****************************************************************************** -* SOFA, Simulation Open-Framework Architecture * -* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU Lesser General Public License as published by * -* the Free Software Foundation; either version 2.1 of the License, or (at * -* your option) any later version. * -* * -* This program is distributed in the hope that it will be useful, but WITHOUT * -* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * -* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * -* for more details. * -* * -* You should have received a copy of the GNU Lesser General Public License * -* along with this program. If not, see . * -******************************************************************************* -* Authors: The SOFA Team and external contributors (see Authors.txt) * -* * -* Contact information: contact@sofa-framework.org * -******************************************************************************/ -#include -#include -#include "cuda.h" - -#if defined(__cplusplus) && CUDA_VERSION < 2000 -namespace sofa -{ -namespace gpu -{ -namespace cuda -{ -#endif - -template -class GPUSPHFluid -{ -public: - real h; ///< particles radius - real h2; ///< particles radius squared - real inv_h2; ///< particles radius squared inverse - real stiffness; ///< pressure stiffness - real mass; ///< particles mass - real mass2; ///< particles mass squared - real density0; ///< 1000 kg/m3 for water - real viscosity; - real surfaceTension; - - // Precomputed constants for smoothing kernels - real CWd; ///< = constWd(h) - //real CgradWd; ///< = constGradWd(h) - real CgradWp; ///< = constGradWp(h) - real ClaplacianWv; ///< = constLaplacianWv(h) -}; - -typedef GPUSPHFluid GPUSPHFluid3f; -typedef GPUSPHFluid GPUSPHFluid3d; - -extern "C" -{ - - void SPHFluidForceFieldCuda3f_computeDensity(int kernelType, int pressureType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3f* params, void* pos4, const void* x); - void SPHFluidForceFieldCuda3f_addForce (int kernelType, int pressureType, int viscosityType, int surfaceTensionType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3f* params, void* f, const void* pos4, const void* v); -//void SPHFluidForceFieldCuda3f_addDForce(int kernelType, int pressureType, int viscosityType, int surfaceTensionType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3f* params, void* f, const void* pos4, const void* v, const void* dx); - -#ifdef SOFA_GPU_CUDA_DOUBLE - - void SPHFluidForceFieldCuda3d_computeDensity(int kernelType, int pressureType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3d* params, void* pos4, const void* x); - void SPHFluidForceFieldCuda3d_addForce (int kernelType, int pressureType, int viscosityType, int surfaceTensionType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3d* params, void* f, const void* pos4, const void* v); -//void SPHFluidForceFieldCuda3d_addDForce(int kernelType, int pressureType, int viscosityType, int surfaceTensionType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3d* params, void* f, const void* pos4, const void* v, const void* dx); - -#endif // SOFA_GPU_CUDA_DOUBLE - -} - -////////////////////// -// GPU-side methods // -////////////////////// - -#ifndef R_PI -#ifdef M_PI -#define R_PI M_PI -#else -#define R_PI 3.141592653589793238462 -#endif -#endif - -template -class SPH; - -template -class SPH -{ -public: - typedef real Real; - typedef CudaVec3 Deriv; - static Real constW(Real h) - { - return (Real) (48/R_PI)/(h*h*h); - } - - static __device__ Real W0(Real C) - { - return C*((Real)(1.0/6.0)); - } - static __device__ Real W0() - { - return ((Real)(1.0/6.0)); - } - static __device__ Real W(Real r_h, Real C) - { - Real s = 1-r_h; - if (r_h < (Real)0.5) return C*((Real)(1.0/6.0) - r_h*r_h*s); - else return C*((Real)(1.0/3.0) * (s*s*s)); - } - static __device__ Real W2(Real r2_h2, Real C) - { - Real r_h = sqrtf(r2_h2); - Real s = 1-r_h; - if (r2_h2 < (Real)0.25) return C*((Real)(1.0/6.0) - r2_h2*s); - else return C*((Real)(1.0/3.0) * (s*s*s)); - } - static __device__ Real W2(Real r2_h2) - { - Real r_h = sqrtf(r2_h2); - Real s = 1-r_h; - if (r2_h2 < (Real)0.25) return ((Real)(1.0/6.0) - r2_h2*s); - else return ((Real)(1.0/3.0) * (s*s*s)); - } - static Real constGradW(Real h) - { - return constW(h)/(h*h); - } - static __device__ Deriv gradW(const Deriv& d, Real r_h, Real C) - { - Real g; - if (r_h < (Real)0.5) g = 3*r_h - 2; - else { Real s = 1-r_h; g = -s*s/r_h; } - return d*(C*g); - } - static Real constLaplacianW(Real h) - { - return constW(h)/(h*h); - } - static __device__ Real laplacianW(Real r_h, Real C) - { - if (r_h < (Real)0.5) return C*(12*r_h-6); - else if (r_h < (Real)1) return C*(6-4*r_h-2/r_h); - else return 0; - } - /// Density Smoothing Kernel - static Real constWd(Real h) - { - return constW(h); - } - static __device__ Real Wd(Real r_h, Real C) - { - return W(r_h,C); - } - static __device__ Real Wd2(Real r2_h2, Real C) - { - return W2(r2_h2,C); - } - static __device__ Real Wd2(Real r2_h2) - { - return W2(r2_h2); - } - static __device__ Real Wd0() - { - return W0(); - } - static Real constGradWd(Real h) - { - return constGradW(h); - } - static __device__ Deriv gradWd(const Deriv& d, Real r_h, Real C) - { - return gradW(d,r_h,C); - } - static __device__ Deriv gradWd2(const Deriv& d, Real r2_h2, Real C) - { - return gradW(d,sqrtf(r2_h2),C); - } - static Real constLaplacianWd(Real h) - { - return constLaplacianW(h); - } - static __device__ Real laplacianWd(Real r_h, Real C) - { - return laplacianW(r_h,C); - } - static __device__ Real laplacianWd2(Real r2_h2, Real C) - { - return laplacianW(sqrtf(r2_h2),C); - } - - /// Pressure Smoothing Kernel - static Real constWp(Real h) - { - return constW(h); - } - static __device__ Real Wp(Real r_h, Real C) - { - return W(r_h,C); - } - static Real constGradWp(Real h) - { - return constGradW(h); - } - static __device__ Deriv gradWp(const Deriv& d, Real r_h, Real C) - { - return gradW(d,r_h,C); - } - - /// Viscosity Smoothing Kernel - static Real constWv(Real h) - { - return constW(h); - } - static __device__ Real Wv(Real r_h, Real C) - { - return W(r_h,C); - } - static __device__ Real Wv2(Real r2_h2, Real r_h, Real C) - { - return W2(r2_h2,r_h,C); - } - static Real constGradWv(Real h) - { - return constGradW(h); - } - static __device__ Deriv gradWv(const Deriv& d, Real r_h, Real C) - { - return gradW(d,r_h,C); - } - static __device__ Deriv gradWv2(const Deriv& d, Real r2_h2, Real r_h, Real C) - { - return gradW(d,r_h,C); - } - static Real constLaplacianWv(Real h) - { - return constLaplacianW(h); - } - - static __device__ Real laplacianWv(Real r_h, Real C) - { - return laplacianW(r_h,C); - } -}; - -template -class SPH -{ -public: - typedef real Real; - typedef CudaVec3 Deriv; - /// Density Smoothing Kernel: W = 315 / 64pih9 * (h2 - r2)3 = 315 / 64pih3 * (1 - (r/h)2)3 - static Real constWd(Real h) - { - return (Real)(315 / (64*R_PI*h*h*h)); - } - static __device__ Real Wd(Real r_h, Real C) - { - Real a = (1-r_h*r_h); - return C*a*a*a; - } - static __device__ Real Wd2(Real r2_h2, Real C) - { - Real a = (1-r2_h2); - return C*a*a*a; - } - static __device__ Real Wd2(Real r2_h2) - { - Real a = (1-r2_h2); - return a*a*a; - } - static __device__ Real Wd0(Real C) - { - return C; - } - static __device__ Real Wd0() - { - return 1; - } - static Real constGradWd(Real h) - { - return -6*constWd(h)/(h*h); - } - static __device__ Deriv gradWd(const Deriv& d, Real r_h, Real C) - { - Real a = (1-r_h*r_h); - return d*(C*a*a); - } - static __device__ Deriv gradWd2(const Deriv& d, Real r2_h2, Real C) - { - Real a = (1-r2_h2); - return d*(C*a*a); - } - static Real constLaplacianWd(Real h) - { - return -6*constWd(h)/(h*h); - } - static __device__ Real laplacianWd(Real r_h, Real C) - { - Real r2_h2 = r_h*r_h; - return C*((1-r2_h2)*(3-7*r2_h2)); - } - static __device__ Real laplacianWd2(Real r2_h2, Real C) - { - return C*((1-r2_h2)*(3-7*r2_h2)); - } - - /// Pressure Smoothing Kernel: W = 15 / pih6 (h - r)3 = 15 / pih3 (1 - r/h)3 - static Real constWp(Real h) - { - return (Real)(15 / (R_PI*h*h*h)); - } - static __device__ Real Wp(Real r_h, Real C) - { - Real a = (1-r_h); - return C*a*a*a; - } - static Real constGradWp(Real h) - { - return (-3*constWp(h)) / (h*h); - } - static __device__ Deriv gradWp(const Deriv& d, Real r_h, Real C) - { - Real a = (1-r_h); - return d * (C*a*a/r_h); - } - - /// Viscosity Smoothing Kernel: W = 15/(2pih3) (-r3/2h3 + r2/h2 + h/2r - 1) - static Real constWv(Real h) - { - return (Real)(15/(2*R_PI*h*h*h)); - } - static __device__ Real Wv(Real r_h, Real C) - { - Real r2_h2 = r_h*r_h; - Real r3_h3 = r2_h2*r_h; - return C*(-0.5f*r3_h3 + r2_h2 + 0.5f/r_h - 1); - } - static __device__ Real Wv2(Real r2_h2, Real r_h, Real C) - { - Real r3_h3 = r2_h2*r_h; - return C*(-0.5f*r3_h3 + r2_h2 + 0.5f/r_h - 1); - } - static Real constGradWv(Real h) - { - return constWv(h)/(h*h); - } - static __device__ Deriv gradWv(const Deriv& d, Real r_h, Real C) - { - Real r3_h3 = r_h*r_h*r_h; - return d * (C*(2.0f -3.0f*r_h - 0.5f/r3_h3)); - } - static __device__ Deriv gradWv2(const Deriv& d, Real r2_h2, Real r_h, Real C) - { - Real r3_h3 = r2_h2*r_h; - return d * (C*(-3*r_h + 4 - 1/r3_h3)); - } - static Real constLaplacianWv(Real h) - { - return 6*constWv(h)/(h*h); - } - - static __device__ Real laplacianWv(Real r_h, Real C) - { - return C*(1-r_h); - } -}; - -template -__device__ void SPHFluidInitDensity(CudaVec3 /*x1*/, real& density, GPUSPHFluid& params) -{ - density = 0; //params.mass * params.CWd; //SPH::Wd(0,params.CWd); -} - -template -__device__ void SPHFluidCalcDensity(CudaVec3 x1, CudaVec3 x2, real& density, GPUSPHFluid& params) -{ - CudaVec3 n = x2-x1; - real d2 = norm2(n); - if (sqrtf(d2) < params.h) - //if (d2 < params.h2) - { - //real r_h = rsqrtf(params.h2/d2); - //real r_h = sqrtf(d2/params.h2); - real r2_h2 = (d2/params.h2); - real d = SPH::Wd2(r2_h2); - density += d; - } -} - -template -__device__ void SPHFluidFinalDensity(CudaVec3 /*x1*/, real& density, GPUSPHFluid& params) -{ - density = (SPH::Wd0()+density) * params.CWd * params.mass; //SPH::Wd(0,params.CWd); -} - -template -__device__ void SPHFluidCalcForce(CudaVec4 x1, CudaVec3 v1, CudaVec4 x2, CudaVec3 v2, CudaVec3& force, GPUSPHFluid& params) -{ - CudaVec3 n = CudaVec3::make(x2.x-x1.x,x2.y-x1.y,x2.z-x1.z); - real d2 = norm2(n); - if (d2 < params.h2) - { - //real r_h = rsqrtf(params.h2/d2); - //real r_h = sqrtf(d2/params.h2); - real r2_h2 = (d2/params.h2); - real r_h = sqrtf(r2_h2); - real density1 = x1.w; - real density2 = x2.w; - - // Pressure - real pressure1 = params.stiffness * (density1 - params.density0); - real pressure2 = params.stiffness * (density2 - params.density0); - real pressureFV = params.mass2 * (pressure1 / (density1*density1) + pressure2 / (density2*density2) ); - - // Viscosity - if (viscosityType == 1) - { - force += ( v2 - v1 ) * ( params.mass2 * params.viscosity * SPH::laplacianWv(r_h,params.ClaplacianWv) / (density1 * density2) ); - } - else if (viscosityType == 2) - { - real vx = dot(n,v2-v1); - if (vx < 0) - pressureFV += - (vx * params.viscosity * params.h * params.mass / ((r2_h2 + 0.01f*params.h2)*(density1+density2)*0.5f)); - } - - force += SPH::gradWp(n, r_h, params.CgradWp) * pressureFV; - } -} -/* -template -__device__ void SPHFluidCalcDForce(CudaVec4 x1, CudaVec3 v1, CudaVec3 dx1, CudaVec4 x2, CudaVec3 v2, CudaVec3 dx2, CudaVec3& dforce, GPUSPHFluid& params) -{ - CudaVec3 n = CudaVec3::make(x2.x-x1.x,x2.y-x1.y,x2.z-x1.z); - real d2 = norm2(n); - if (d2 < params.h2) - { - CudaVec3 dxi = dx2-dx1; - - real inv_d = rsqrtf(d2); - - real r2_h2 = (d2/params.h2); - real r_h = sqrtf(r2_h2); - - real density1 = x1.w; - real density2 = x2.w; - - // Pressure - real pressure1 = params.stiffness * (density1 - params.density0); - real pressure2 = params.stiffness * (density2 - params.density0); - - real fpressure = params.mass2 * (pressure1 / (density1*density1) + pressure2 / (density2*density2) ); - - // Derivatives - - CudaVec3 dn = dxi * inv_d; - - real dr_h = dot(dn,n)/params.h; - //real dr2_h2 = 2*r_h*dr_h; - - real ddensity = dot(dxi,SPH::gradWd2(n, r2_h2, params.CgradWd)); - real dpressure = params.stiffness*ddensity; - // d(a/b^2) = (d(a)*b-2*a*d(b))/b^3 - real dfpressure = params.mass2 * ( (dpressure*density1-2*pressure1*ddensity)/(density1*density1*density1) - +(dpressure*density2-2*pressure2*ddensity)/(density2*density2*density2) ); - real a = (1-r_h); - real dWp = params.CgradWp*a*a; - real ddWp = -2*params.CgradWp*a*dr_h; - - // f = n * dWp * fpressure - // df = dn * (dWp * fpressure) + n * (ddWp * fpressure + dWp * dfpressure); - - dforce += dn * (dWp * fpressure) + n * (ddWp * fpressure + dWp * dfpressure); - - // Viscosity - // force += ( v2 - v1 ) * ( params.mass2 * params.viscosity * params.ClaplacianWv * (1-r_h) / (density1 * density2) ); - real d1d2 = density1*density2; - dforce += dxi * ( params.mass2 * params.viscosity * params.ClaplacianWv * (1-r_h) / (density1 * density2) ) - + (v2-v1) * ( params.mass2 * params.viscosity * params.ClaplacianWv * (-dr_h*d1d2 - (1-r_h)*ddensity*(density1+density2))/(d1d2*d1d2)); - - } -} -*/ -template -__global__ void SPHFluidForceFieldCuda3t_computeDensity_kernel(int size, const int *cells, const int *cellGhost, GPUSPHFluid params, real* pos4, const real* x) -{ - __shared__ int2 range; - __shared__ int ghost; - __shared__ real temp_x[BSIZE*3]; - int tx3 = threadIdx.x * 3; - for (int cell = blockIdx.x; cell < size; cell += gridDim.x) - { - __syncthreads(); - if (!threadIdx.x) - { - //range = *(const int2*)(cells+cell); - range.x = cells[cell]; - range.y = cells[cell+1]; - range.y &= ~(1U<<31); - ghost = cellGhost[cell]; - } - __syncthreads(); - if (range.x <= 0) continue; // no actual particle in this cell - for (int px0 = range.x; px0 < ghost; px0 += BSIZE) - { - int px = px0 + threadIdx.x; - CudaVec3 xi; - real density; - int index; - if (px < range.y) - { - index = cells[px]; - xi = ((const CudaVec3*)x)[index]; - temp_x[tx3 ] = xi.x; - temp_x[tx3+1] = xi.y; - temp_x[tx3+2] = xi.z; - } - __syncthreads(); - if (px < ghost) - { - // actual particle -> compute interactions - SPHFluidInitDensity(xi, density, params); - - int np = min(range.y-px0,BSIZE); - for (int i=0; i < np; ++i) - { - if (i != threadIdx.x) - SPHFluidCalcDensity(xi, ((const CudaVec3*)temp_x)[i], density, params); - } - } - __syncthreads(); - // loop through other groups of particles - for (int py0 = range.x; py0 < range.y; py0 += BSIZE) - { - if (py0 == px0) continue; - int py = py0 + threadIdx.x; - if (py < range.y) - { - int index2 = cells[py]; - CudaVec3 xj = ((const CudaVec3*)x)[index2]; - temp_x[tx3 ] = xj.x; - temp_x[tx3+1] = xj.y; - temp_x[tx3+2] = xj.z; - } - __syncthreads(); - if (px < ghost) - { - // actual particle -> compute interactions - int np = min(range.y-py0,BSIZE); - for (int i=0; i < np; ++i) - { - SPHFluidCalcDensity(xi, ((const CudaVec3*)temp_x)[i], density, params); - } - } - __syncthreads(); - } - if (px < ghost) - { - // actual particle -> write computed density - SPHFluidFinalDensity(xi, density, params); - CudaVec4 res = CudaVec4::make(xi.x,xi.y,xi.z,density); - ((CudaVec4*)pos4)[index] = res; - } - } - } -} - -template -__global__ void SPHFluidForceFieldCuda3t_addForce_kernel(int size, const int *cells, const int *cellGhost, GPUSPHFluid params, real* f, const real* pos4, const real* v) -{ - __shared__ int2 range; - __shared__ int ghost; - __shared__ real temp_x[BSIZE*4]; - __shared__ real temp_v[BSIZE*3]; - int tx3 = threadIdx.x * 3; - int tx4 = threadIdx.x << 2; - for (int cell = blockIdx.x; cell < size; cell += gridDim.x) - { - if (!threadIdx.x) - { - //range = *(const int2*)(cells+cell); - range.x = cells[cell]; - range.y = cells[cell+1]; - range.y &= ~(1U<<31); - ghost = cellGhost[cell]; - } - __syncthreads(); - if (range.x <= 0) continue; // no actual particle in this cell - for (int px0 = range.x; px0 < ghost; px0 += BSIZE) - { - int px = px0 + threadIdx.x; - CudaVec4 xi; - CudaVec3 vi; - CudaVec3 force; - int index; - if (px < range.y) - { - index = cells[px]; - xi = ((const CudaVec4*)pos4)[index]; - temp_x[tx4 ] = xi.x; - temp_x[tx4+1] = xi.y; - temp_x[tx4+2] = xi.z; - temp_x[tx4+3] = xi.w; - vi = ((const CudaVec3*)v)[index]; - temp_v[tx3 ] = vi.x; - temp_v[tx3+1] = vi.y; - temp_v[tx3+2] = vi.z; - } - __syncthreads(); - if (px < ghost) - { - // actual particle -> compute interactions - force = CudaVec3::make(0,0,0); - int np = min(range.y-px0,BSIZE); - for (int i=0; i < np; ++i) - { - if (i != threadIdx.x) - SPHFluidCalcForce(xi, vi, ((const CudaVec4*)temp_x)[i], ((const CudaVec3*)temp_v)[i], force, params); - } - } - __syncthreads(); - // loop through other groups of particles - for (int py0 = range.x; py0 < range.y; py0 += BSIZE) - { - if (py0 == px0) continue; - int py = py0 + threadIdx.x; - if (py < range.y) - { - int index2 = cells[py]; - CudaVec4 xj = ((const CudaVec4*)pos4)[index2]; - temp_x[tx4 ] = xj.x; - temp_x[tx4+1] = xj.y; - temp_x[tx4+2] = xj.z; - temp_x[tx4+3] = xj.w; - CudaVec3 vj = ((const CudaVec3*)v)[index2]; - temp_v[tx3 ] = vj.x; - temp_v[tx3+1] = vj.y; - temp_v[tx3+2] = vj.z; - } - __syncthreads(); - if (px < ghost) - { - // actual particle -> compute interactions - int np = min(range.y-py0,BSIZE); - for (int i=0; i < np; ++i) - { - SPHFluidCalcForce(xi, vi, ((const CudaVec4*)temp_x)[i], ((const CudaVec3*)temp_v)[i], force, params); - } - } - __syncthreads(); - } - if (px < ghost) - { - // actual particle -> write computed force - ((CudaVec3*)f)[index] += force; - } - } - } -} -/* -template -__global__ void SPHFluidForceFieldCuda3t_addDForce_kernel(int size, const int *cells, const int *cellGhost, GPUSPHFluid params, real* df, const real* pos4, const real* v, const real* dx) -{ - __shared__ int2 range; - __shared__ int ghost; - __shared__ real temp_x[BSIZE*4]; - __shared__ real temp_v[BSIZE*3]; - __shared__ real temp_dx[BSIZE*3]; - int tx3 = threadIdx.x * 3; - int tx4 = threadIdx.x << 2; - for (int cell = blockIdx.x; cell < size; cell += gridDim.x) - { - if (!threadIdx.x) - { - //range = *(const int2*)(cells+cell); - range.x = cells[cell]; - range.y = cells[cell+1]; - range.y &= ~(1U<<31); - ghost = cellGhost[cell]; - } - __syncthreads(); - if (range.x <= 0) continue; // no actual particle in this cell - for (int px0 = range.x; px0 < ghost; px0 += BSIZE) - { - int px = px0 + threadIdx.x; - CudaVec4 xi; - CudaVec3 vi; - CudaVec3 dxi; - CudaVec3 dforce; - int index; - if (px < range.y) - { - index = cells[px]; - xi = ((const CudaVec4*)pos4)[index]; - temp_x[tx4 ] = xi.x; - temp_x[tx4+1] = xi.y; - temp_x[tx4+2] = xi.z; - temp_x[tx4+3] = xi.w; - vi = ((const CudaVec3*)v)[index]; - temp_v[tx3 ] = vi.x; - temp_v[tx3+1] = vi.y; - temp_v[tx3+2] = vi.z; - dxi = ((const CudaVec3*)dx)[index]; - temp_dx[tx3 ] = dxi.x; - temp_dx[tx3+1] = dxi.y; - temp_dx[tx3+2] = dxi.z; - } - __syncthreads(); - if (px < ghost) - { // actual particle -> compute interactions - dforce = CudaVec3::make(0,0,0); - int np = min(range.y-px0,BSIZE); - for (int i=0; i < np; ++i) - { - if (i != threadIdx.x) - SPHFluidCalcDForce(xi, vi, dxi, ((const CudaVec4*)temp_x)[i], ((const CudaVec3*)temp_v)[i], ((const CudaVec3*)temp_dx)[i], dforce, params); - } - } - __syncthreads(); - // loop through other groups of particles - for (int py0 = range.x; py0 < range.y; py0 += BSIZE) - { - if (py0 == px0) continue; - int py = py0 + threadIdx.x; - if (py < range.y) - { - int index2 = cells[py]; - CudaVec4 xj = ((const CudaVec4*)pos4)[index2]; - temp_x[tx4 ] = xj.x; - temp_x[tx4+1] = xj.y; - temp_x[tx4+2] = xj.z; - temp_x[tx4+3] = xj.w; - CudaVec3 vj = ((const CudaVec3*)v)[index2]; - temp_v[tx3 ] = vj.x; - temp_v[tx3+1] = vj.y; - temp_v[tx3+2] = vj.z; - CudaVec3 dxj = ((const CudaVec3*)dx)[index2]; - temp_dx[tx3 ] = dxj.x; - temp_dx[tx3+1] = dxj.y; - temp_dx[tx3+2] = dxj.z; - } - __syncthreads(); - if (px < ghost) - { // actual particle -> compute interactions - int np = min(range.y-py0,BSIZE); - for (int i=0; i < np; ++i) - { - SPHFluidCalcDForce(xi, vi, dxi, ((const CudaVec4*)temp_x)[i], ((const CudaVec3*)temp_v)[i], ((const CudaVec3*)temp_dx)[i], dforce, params); - } - } - __syncthreads(); - } - if (px < ghost) - { // actual particle -> write computed force - ((CudaVec3*)df)[index] += dforce; - } - } - } -} -*/ -////////////////////// -// CPU-side methods // -////////////////////// - -void SPHFluidForceFieldCuda3f_computeDensity(int kernelType, int pressureType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3f* params, void* pos4, const void* x) -{ - dim3 threads(BSIZE,1); - dim3 grid(60,1); - switch(kernelType) - { - case 0: - switch(pressureType) - { - case 0: //SPHFluidForceFieldCuda3t_computeDensity_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)pos4, (const float*)x); - break; - case 1: SPHFluidForceFieldCuda3t_computeDensity_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)pos4, (const float*)x); - break; - default: break; - } - break; - case 1: - switch(pressureType) - { - case 0: //SPHFluidForceFieldCuda3t_computeDensity_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)pos4, (const float*)x); - break; - case 1: SPHFluidForceFieldCuda3t_computeDensity_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)pos4, (const float*)x); - break; - default: break; - } - break; - default: break; - } - mycudaDebugError("SPHFluidForceFieldCuda3t_computeDensity_kernel"); -} - -void SPHFluidForceFieldCuda3f_addForce(int kernelType, int pressureType, int viscosityType, int surfaceTensionType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3f* params, void* f, const void* x, const void* v) -{ - dim3 threads(BSIZE,1); - dim3 grid(60,1); - switch(kernelType) - { - case 0: - switch(pressureType) - { - case 0: - switch(viscosityType) - { - case 0: - switch(surfaceTensionType) - { - case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); - break; - case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); - break; - case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); - break; - default: break; - } - break; - case 1: - switch(surfaceTensionType) - { - case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); - break; - case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); - break; - case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); - break; - default: break; - } - break; - case 2: - switch(surfaceTensionType) - { - case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); - break; - case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); - break; - case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); - break; - default: break; - } - break; - default: break; - } - break; - case 1: - switch(viscosityType) - { - case 0: - switch(surfaceTensionType) - { - case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); - break; - case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); - break; - case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); - break; - default: break; - } - break; - case 1: - switch(surfaceTensionType) - { - case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); - break; - case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); - break; - case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); - break; - default: break; - } - break; - case 2: - switch(surfaceTensionType) - { - case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); - break; - case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); - break; - case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); - break; - default: break; - } - break; - default: break; - } - break; - default: break; - } - break; - case 1: - switch(pressureType) - { - case 0: - switch(viscosityType) - { - case 0: - switch(surfaceTensionType) - { - case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); - break; - case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); - break; - case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); - break; - default: break; - } - break; - case 1: - switch(surfaceTensionType) - { - case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); - break; - case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); - break; - case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); - break; - default: break; - } - break; - case 2: - switch(surfaceTensionType) - { - case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); - break; - case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); - break; - case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); - break; - default: break; - } - break; - default: break; - } - break; - case 1: - switch(viscosityType) - { - case 0: - switch(surfaceTensionType) - { - case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); - break; - case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); - break; - case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); - break; - default: break; - } - break; - case 1: - switch(surfaceTensionType) - { - case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); - break; - case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); - break; - case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); - break; - default: break; - } - break; - case 2: - switch(surfaceTensionType) - { - case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); - break; - case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); - break; - case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)f, (const float*)x, (const float*)v); - break; - default: break; - } - break; - default: break; - } - break; - default: break; - } - break; - default: break; - } - mycudaDebugError("SPHFluidForceFieldCuda3t_addForce_kernel"); -} -/* -void SPHFluidForceFieldCuda3f_addDForce(unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3f* params, void* df, const void* x, const void* v, const void* dx) -{ - dim3 threads(BSIZE,1); - dim3 grid(60/BSIZE,1); - {SPHFluidForceFieldCuda3t_addDForce_kernel<<< grid, threads, BSIZE*3*sizeof(float) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (float*)df, (const float*)x, (const float*)v, (const float*)dx); mycudaDebugError("SPHFluidForceFieldCuda3t_addDForce_kernel");} -} -*/ -#ifdef SOFA_GPU_CUDA_DOUBLE - -void SPHFluidForceFieldCuda3d_computeDensity(int kernelType, int pressureType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3d* params, void* pos4, const void* x) -{ - dim3 threads(BSIZE,1); - dim3 grid(60,1); - switch(kernelType) - { - case 0: - switch(pressureType) - { - case 0: //SPHFluidForceFieldCuda3t_computeDensity_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)pos4, (const double*)x); - break; - case 1: SPHFluidForceFieldCuda3t_computeDensity_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)pos4, (const double*)x); - break; - default: break; - } - break; - case 1: - switch(pressureType) - { - case 0: //SPHFluidForceFieldCuda3t_computeDensity_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)pos4, (const double*)x); - break; - case 1: SPHFluidForceFieldCuda3t_computeDensity_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)pos4, (const double*)x); - break; - default: break; - } - break; - default: break; - } - mycudaDebugError("SPHFluidForceFieldCuda3t_computeDensity_kernel"); - -} - -void SPHFluidForceFieldCuda3d_addForce (int kernelType, int pressureType, int viscosityType, int surfaceTensionType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3d* params, void* f, const void* x, const void* v) -{ - dim3 threads(BSIZE,1); - dim3 grid(60,1); - switch(kernelType) - { - case 0: - switch(pressureType) - { - case 0: - switch(viscosityType) - { - case 0: - switch(surfaceTensionType) - { - case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); - break; - case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); - break; - case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); - break; - default: break; - } - break; - case 1: - switch(surfaceTensionType) - { - case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); - break; - case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); - break; - case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); - break; - default: break; - } - break; - case 2: - switch(surfaceTensionType) - { - case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); - break; - case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); - break; - case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); - break; - default: break; - } - break; - default: break; - } - break; - case 1: - switch(viscosityType) - { - case 0: - switch(surfaceTensionType) - { - case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); - break; - case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); - break; - case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); - break; - default: break; - } - break; - case 1: - switch(surfaceTensionType) - { - case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); - break; - case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); - break; - case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); - break; - default: break; - } - break; - case 2: - switch(surfaceTensionType) - { - case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); - break; - case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); - break; - case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); - break; - default: break; - } - break; - default: break; - } - break; - default: break; - } - break; - case 1: - switch(pressureType) - { - case 0: - switch(viscosityType) - { - case 0: - switch(surfaceTensionType) - { - case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); - break; - case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); - break; - case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); - break; - default: break; - } - break; - case 1: - switch(surfaceTensionType) - { - case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); - break; - case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); - break; - case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); - break; - default: break; - } - break; - case 2: - switch(surfaceTensionType) - { - case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); - break; - case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); - break; - case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); - break; - default: break; - } - break; - default: break; - } - break; - case 1: - switch(viscosityType) - { - case 0: - switch(surfaceTensionType) - { - case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); - break; - case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); - break; - case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); - break; - default: break; - } - break; - case 1: - switch(surfaceTensionType) - { - case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); - break; - case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); - break; - case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); - break; - default: break; - } - break; - case 2: - switch(surfaceTensionType) - { - case 0: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); - break; - case 1: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); - break; - case 2: SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); - break; - default: break; - } - break; - default: break; - } - break; - default: break; - } - break; - default: break; - } - mycudaDebugError("SPHFluidForceFieldCuda3t_addForce_kernel"); - /* - dim3 threads(BSIZE,1); - dim3 grid(60/BSIZE,1); - if (params->surfaceTension > 0) - {SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); mycudaDebugError("SPHFluidForceFieldCuda3t_addForce_kernel");} - else - {SPHFluidForceFieldCuda3t_addForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)f, (const double*)x, (const double*)v); mycudaDebugError("SPHFluidForceFieldCuda3t_addForce_kernel");} - */ -} -/* -void SPHFluidForceFieldCuda3d_addDForce(unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3d* params, void* df, const void* x, const void* v, const void* dx) -{ - dim3 threads(BSIZE,1); - dim3 grid(60/BSIZE,1); - {SPHFluidForceFieldCuda3t_addDForce_kernel<<< grid, threads, BSIZE*3*sizeof(double) >>>(size, (const int*) cells, (const int*) cellGhost, *params, (double*)df, (const double*)x, (const double*)v, (const double*)dx); mycudaDebugError("SPHFluidForceFieldCuda3t_addDForce_kernel");} -} -*/ -#endif // SOFA_GPU_CUDA_DOUBLE - -#if defined(__cplusplus) && CUDA_VERSION < 2000 -} // namespace cuda -} // namespace gpu -} // namespace sofa -#endif diff --git a/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaSPHFluidForceField.h b/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaSPHFluidForceField.h deleted file mode 100644 index 9f59ad575e7..00000000000 --- a/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaSPHFluidForceField.h +++ /dev/null @@ -1,135 +0,0 @@ -/****************************************************************************** -* SOFA, Simulation Open-Framework Architecture * -* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU Lesser General Public License as published by * -* the Free Software Foundation; either version 2.1 of the License, or (at * -* your option) any later version. * -* * -* This program is distributed in the hope that it will be useful, but WITHOUT * -* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * -* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * -* for more details. * -* * -* You should have received a copy of the GNU Lesser General Public License * -* along with this program. If not, see . * -******************************************************************************* -* Authors: The SOFA Team and external contributors (see Authors.txt) * -* * -* Contact information: contact@sofa-framework.org * -******************************************************************************/ -#ifndef SOFA_GPU_CUDA_CUDASPHFLUIDFORCEFIELD_H -#define SOFA_GPU_CUDA_CUDASPHFLUIDFORCEFIELD_H - -#include -#include -#include - -namespace sofa -{ - - -namespace gpu::cuda -{ - -template -struct GPUSPHFluid -{ - real h; ///< particles radius - real h2; ///< particles radius squared - real inv_h2; ///< particles radius squared inverse - real stiffness; ///< pressure stiffness - real mass; ///< particles mass - real mass2; ///< particles mass squared - real density0; ///< 1000 kg/m3 for water - real viscosity; - real surfaceTension; - - // Precomputed constants for smoothing kernels - real CWd; ///< = constWd(h) - //real CgradWd; ///< = constGradWd(h) - real CgradWp; ///< = constGradWp(h) - real ClaplacianWv; ///< = constLaplacianWv(h) -}; - -typedef GPUSPHFluid GPUSPHFluid3f; -typedef GPUSPHFluid GPUSPHFluid3d; - -} // namespace gpu::cuda - - -namespace component::forcefield -{ - -template -class SPHFluidForceFieldInternalData< gpu::cuda::CudaVectorTypes > -{ -public: - typedef gpu::cuda::CudaVectorTypes DataTypes; - typedef SPHFluidForceFieldInternalData Data; - typedef SPHFluidForceField Main; - typedef typename DataTypes::Coord Coord; - typedef typename DataTypes::Real Real; - gpu::cuda::GPUSPHFluid params; - gpu::cuda::CudaVector pos4; - - void fillParams(Main* m, int kernelType, double kFactor=1.0, double bFactor=1.0) - { - Real h = m->d_particleRadius.getValue(); - params.h = h; - params.h2 = h*h; - params.inv_h2 = 1/(h*h); - params.stiffness = (Real)(kFactor*m->d_pressureStiffness.getValue()); - params.mass = m->d_particleMass.getValue(); - params.mass2 = params.mass*params.mass; - params.density0 = m->d_density0.getValue(); - params.viscosity = (Real)(bFactor*m->d_viscosity.getValue()); - params.surfaceTension = (Real)(kFactor*m->d_surfaceTension.getValue()); - if (kernelType == 1) - { - params.CWd = SPHKernel::constW(h); - //params.CgradWd = SPHKernel::constGradW(h); - params.CgradWp = SPHKernel::constGradW(h); - params.ClaplacianWv = SPHKernel::constLaplacianW(h); - } - else - { - params.CWd = SPHKernel::constW(h); - //params.CgradWd = SPHKernel::constGradW(h); - params.CgradWp = SPHKernel::constGradW(h); - params.ClaplacianWv = SPHKernel::constLaplacianW(h); - } - } - - void Kernels_computeDensity(int kernelType, int pressureType, int gsize, const void* cells, const void* cellGhost, void* pos4, const void* x); - void Kernels_addForce(int kernelType, int pressureType, int viscosityType, int surfaceTensionType, int gsize, const void* cells, const void* cellGhost, void* f, const void* pos4, const void* vel); - //void Kernels_addDForce(int kernelType, int pressureType, int viscosityType, int surfaceTensionType, int gsize, const void* cells, const void* cellGhost, void* f, const void* pos4, const void* dx, const void* vel); -}; - - -template <> -void SPHFluidForceField::addForce(const core::MechanicalParams* mparams, DataVecDeriv& d_f, const DataVecCoord& d_x, const DataVecDeriv& d_v); - -template <> -void SPHFluidForceField::addDForce(const core::MechanicalParams* mparams, DataVecDeriv& d_df, const DataVecDeriv& d_dx); - -template <> -void SPHFluidForceField::draw(const core::visual::VisualParams* vparams); - -#ifdef SOFA_GPU_CUDA_DOUBLE - -template <> -void SPHFluidForceField::addForce(const core::MechanicalParams* mparams, DataVecDeriv& d_f, const DataVecCoord& d_x, const DataVecDeriv& d_v); - -template <> -void SPHFluidForceField::addDForce(const core::MechanicalParams* mparams, DataVecDeriv& d_df, const DataVecDeriv& d_dx); - -#endif // SOFA_GPU_CUDA_DOUBLE - -} // namespace component::forcefield - - -} // namespace sofa - -#endif diff --git a/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaSPHFluidForceField.inl b/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaSPHFluidForceField.inl deleted file mode 100644 index 752ea1f1502..00000000000 --- a/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaSPHFluidForceField.inl +++ /dev/null @@ -1,273 +0,0 @@ -/****************************************************************************** -* SOFA, Simulation Open-Framework Architecture * -* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU Lesser General Public License as published by * -* the Free Software Foundation; either version 2.1 of the License, or (at * -* your option) any later version. * -* * -* This program is distributed in the hope that it will be useful, but WITHOUT * -* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * -* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * -* for more details. * -* * -* You should have received a copy of the GNU Lesser General Public License * -* along with this program. If not, see . * -******************************************************************************* -* Authors: The SOFA Team and external contributors (see Authors.txt) * -* * -* Contact information: contact@sofa-framework.org * -******************************************************************************/ -#ifndef SOFA_GPU_CUDA_CUDASPHFLUIDFORCEFIELD_INL -#define SOFA_GPU_CUDA_CUDASPHFLUIDFORCEFIELD_INL - -#include "CudaSPHFluidForceField.h" -#include -#include -#include - -namespace sofa -{ - - -namespace gpu::cuda -{ - -extern "C" -{ - -void SPHFluidForceFieldCuda3f_computeDensity(int kernelType, int pressureType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3f* params, void* pos4, const void* x); -void SPHFluidForceFieldCuda3f_addForce (int kernelType, int pressureType, int viscosityType, int surfaceTensionType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3f* params, void* f, const void* pos4, const void* v); -//void SPHFluidForceFieldCuda3f_addDForce(int kernelType, int pressureType, int viscosityType, int surfaceTensionType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3f* params, void* f, const void* pos4, const void* v, const void* dx); - -#ifdef SOFA_GPU_CUDA_DOUBLE - -void SPHFluidForceFieldCuda3d_computeDensity(int kernelType, int pressureType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3d* params, void* pos4, const void* x); -void SPHFluidForceFieldCuda3d_addForce (int kernelType, int pressureType, int viscosityType, int surfaceTensionType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3d* params, void* f, const void* pos4, const void* v); -//void SPHFluidForceFieldCuda3d_addDForce(int kernelType, int pressureType, int viscosityType, int surfaceTensionType, unsigned int size, const void* cells, const void* cellGhost, GPUSPHFluid3d* params, void* f, const void* pos4, const void* v, const void* dx); - -#endif // SOFA_GPU_CUDA_DOUBLE -} - -} // namespace gpu::cuda - - -namespace component::forcefield -{ - -using namespace gpu::cuda; - -template<> -void SPHFluidForceFieldInternalData::Kernels_computeDensity(int kernelType, int pressureType, int gsize, const void* cells, const void* cellGhost, void* pos4, const void* x) -{ - SPHFluidForceFieldCuda3f_computeDensity(kernelType, pressureType, gsize, cells, cellGhost, ¶ms, pos4, x); -} - -template<> -void SPHFluidForceFieldInternalData::Kernels_addForce(int kernelType, int pressureType, int viscosityType, int surfaceTensionType, int gsize, const void* cells, const void* cellGhost, void* f, const void* pos4, const void* v) -{ - SPHFluidForceFieldCuda3f_addForce (kernelType, pressureType, viscosityType, surfaceTensionType, gsize, cells, cellGhost, ¶ms, f, pos4, v); -} -/* -template<> -void SPHFluidForceFieldInternalData::Kernels_addDForce(int kernelType, int pressureType, int viscosityType, int surfaceTensionType, int gsize, const void* cells, const void* cellGhost, void* f, const void* pos4, const void* v, const void* dx) -{ - SPHFluidForceFieldCuda3f_addDForce(kernelType, pressureType, viscosityType, surfaceTensionType, gsize, cells, cellGhost, ¶ms, f, pos4, v, dx); -} -*/ -template <> -void SPHFluidForceField::addForce(const core::MechanicalParams* /*mparams*/, DataVecDeriv& d_f, const DataVecCoord& d_x, const DataVecDeriv& d_v) -{ - if (m_grid == NULL) return; - - const int kernelT = d_kernelType.getValue(); - const int pressureT = d_pressureType.getValue(); - const Real viscosity = this->d_viscosity.getValue(); - const int viscosityT = (viscosity == 0) ? 0 : d_viscosityType.getValue(); - const Real surfaceTension = this->d_surfaceTension.getValue(); - const int surfaceTensionT = (surfaceTension <= 0) ? 0 : d_surfaceTensionType.getValue(); - - VecDeriv& f = *d_f.beginEdit(); - const VecCoord& x = d_x.getValue(); - const VecDeriv& v = d_v.getValue(); - - m_grid->updateGrid(x); - data.fillParams(this, kernelT); - f.resize(x.size()); - Grid::Grid* g = m_grid->getGrid(); - data.pos4.recreate(x.size()); - data.Kernels_computeDensity( kernelT, pressureT, - g->getNbCells(), g->getCellsVector().deviceRead(), g->getCellGhostVector().deviceRead(), - data.pos4.deviceWrite(), x.deviceRead()); - - msg_info() << "density[" << 0 << "] = " << data.pos4[0][3] - << "density[" << data.pos4.size()/2 << "] = " << data.pos4[data.pos4.size()/2][3]; - - data.Kernels_addForce( kernelT, pressureT, viscosityT, surfaceTensionT, - g->getNbCells(), g->getCellsVector().deviceRead(), g->getCellGhostVector().deviceRead(), - f.deviceWrite(), data.pos4.deviceRead(), v.deviceRead()); - - d_f.endEdit(); -} - -template <> -void SPHFluidForceField::addDForce(const core::MechanicalParams* mparams, DataVecDeriv& /*d_df*/, const DataVecDeriv& /*d_dx*/) -{ - mparams->setKFactorUsed(true); -#if 0 - if (m_grid == NULL) return; - - const int kernelT = d_kernelType.getValue(); - const int pressureT = d_pressureType.getValue(); - const Real viscosity = this->d_viscosity.getValue(); - const int viscosityT = (viscosity == 0) ? 0 : d_viscosityType.getValue(); - const Real surfaceTension = this->d_surfaceTension.getValue(); - const int surfaceTensionT = (surfaceTension <= 0) ? 0 : d_surfaceTensionType.getValue(); - - VecDeriv& df = *d_df.beginEdit(); - const VecDeriv& dx = d_dx.getValue(); - - const VecDeriv& v = this->mstate->read(core::vec_id::read_access::velocity)->getValue(); - data.fillParams(this, kernelT, mparams->kFactor(), sofa::core::mechanicalparams::bFactor(mparams)); - df.resize(dx.size()); - Grid::Grid* g = m_grid->getGrid(); - data.Kernels_addDForce( kernelT, pressureT, viscosityT, surfaceTensionT, - g->getNbCells(), g->getCellsVector().deviceRead(), g->getCellGhostVector().deviceRead(), - df.deviceWrite(), data.pos4.deviceRead(), v.deviceRead(), dx.deviceRead()); - - d_df.endEdit(); -#endif -} - - -#ifdef SOFA_GPU_CUDA_DOUBLE - - -template<> -void SPHFluidForceFieldInternalData::Kernels_computeDensity(int kernelType, int pressureType, int gsize, const void* cells, const void* cellGhost, void* pos4, const void* x) -{ - SPHFluidForceFieldCuda3d_computeDensity(kernelType, pressureType, gsize, cells, cellGhost, ¶ms, pos4, x); -} - -template<> -void SPHFluidForceFieldInternalData::Kernels_addForce(int kernelType, int pressureType, int viscosityType, int surfaceTensionType, int gsize, const void* cells, const void* cellGhost, void* f, const void* pos4, const void* v) -{ - SPHFluidForceFieldCuda3d_addForce (kernelType, pressureType, viscosityType, surfaceTensionType, gsize, cells, cellGhost, ¶ms, f, pos4, v); -} -/* -template<> -void SPHFluidForceFieldInternalData::Kernels_addDForce(int kernelType, int pressureType, int viscosityType, int surfaceTensionType, int gsize, const void* cells, const void* cellGhost, void* f, const void* pos4, const void* v, const void* dx) -{ - SPHFluidForceFieldCuda3d_addDForce(kernelType, pressureType, viscosityType, surfaceTensionType, gsize, cells, cellGhost, ¶ms, f, pos4, v, dx); -} -*/ -template <> -void SPHFluidForceField::addForce(const core::MechanicalParams* /*mparams*/, DataVecDeriv& d_f, const DataVecCoord& d_x, const DataVecDeriv& d_v) -{ - if (m_grid == NULL) return; - - const int kernelT = d_kernelType.getValue(); - const int pressureT = d_pressureType.getValue(); - const Real viscosity = this->d_viscosity.getValue(); - const int viscosityT = (viscosity == 0) ? 0 : d_viscosityType.getValue(); - const Real surfaceTension = this->d_surfaceTension.getValue(); - const int surfaceTensionT = (surfaceTension <= 0) ? 0 : d_surfaceTensionType.getValue(); - - VecDeriv& f = *d_f.beginEdit(); - const VecCoord& x = d_x.getValue(); - const VecDeriv& v = d_v.getValue(); - - m_grid->updateGrid(x); - data.fillParams(this, kernelT); - f.resize(x.size()); - Grid::Grid* g = m_grid->getGrid(); - data.pos4.recreate(x.size()); - data.Kernels_computeDensity( kernelT, pressureT, - g->getNbCells(), g->getCellsVector().deviceRead(), g->getCellGhostVector().deviceRead(), - data.pos4.deviceWrite(), x.deviceRead()); - msg_info() << "density[" << 0 << "] = " << data.pos4[0][3] - << "density[" << data.pos4.size()/2 << "] = " << data.pos4[data.pos4.size()/2][3]; - data.Kernels_addForce( kernelT, pressureT, viscosityT, surfaceTensionT, - g->getNbCells(), g->getCellsVector().deviceRead(), g->getCellGhostVector().deviceRead(), - f.deviceWrite(), data.pos4.deviceRead(), v.deviceRead()); - - d_f.endEdit(); -} - -template <> -void SPHFluidForceField::addDForce(const core::MechanicalParams* mparams, DataVecDeriv& /*d_df*/, const DataVecDeriv& /*d_dx*/) -{ - mparams->setKFactorUsed(true); -#if 0 - if (m_grid == NULL) return; - - const int kernelT = d_kernelType.getValue(); - const int pressureT = d_pressureType.getValue(); - const Real viscosity = this->d_viscosity.getValue(); - const int viscosityT = (viscosity == 0) ? 0 : d_viscosityType.getValue(); - const Real surfaceTension = this->d_surfaceTension.getValue(); - const int surfaceTensionT = (surfaceTension <= 0) ? 0 : d_surfaceTensionType.getValue(); - - VecDeriv& df = *d_df.beginEdit(); - const VecDeriv& dx = d_dx.getValue(); - //const VecCoord& x = this->mstate->read(core::ConstVecCoordId::position())->getValue(); - const VecDeriv& v = this->mstate->read(core::vec_id::read_access::velocity)->getValue(); - data.fillParams(this, mparams->kFactor(), sofa::core::mechanicalparams::bFactor(mparams)); - df.resize(dx.size()); - Grid::Grid* g = m_grid->getGrid(); - data.Kernels_addDForce( kernelT, pressureT, viscosityT, surfaceTensionT, - g->getNbCells(), g->getCellsVector().deviceRead(), g->getCellGhostVector().deviceRead(), - df.deviceWrite(), data.pos4.deviceRead(), v.deviceRead(), dx.deviceRead()); - d_df.endEdit(); -#endif -} - -#endif // SOFA_GPU_CUDA_DOUBLE - - - -template <> -void SPHFluidForceField::draw(const core::visual::VisualParams* vparams) -{ -#if SOFACUDA_HAVE_SOFA_GL == 1 - if (!vparams->displayFlags().getShowForceFields()) return; - //if (m_grid != NULL) - // grid->draw(vparams); - helper::ReadAccessor x = this->mstate->read(sofa::core::vec_id::read_access::position)->getValue(); - helper::ReadAccessor > pos4 = this->data.pos4; - if (pos4.empty()) return; - glDisable(GL_LIGHTING); - glColor3f(0,1,1); - glDisable(GL_BLEND); - glDepthMask(1); - glPointSize(5); - glBegin(GL_POINTS); - for (unsigned int i=0; i. * -******************************************************************************* -* Authors: The SOFA Team and external contributors (see Authors.txt) * -* * -* Contact information: contact@sofa-framework.org * -******************************************************************************/ -#include -#include -#include - - -namespace sofa::component::container -{ - -using namespace sofa::defaulttype; -using namespace sofa::gpu::cuda; -using namespace core::behavior; - - -int SpatialGridContainerCudaClass = core::RegisterObject("GPU support using CUDA.") - .add< SpatialGridContainer >() - ; - -template class SOFA_GPU_CUDA_API SpatialGridContainer< CudaVec3fTypes >; -template class SOFA_GPU_CUDA_API SpatialGrid< SpatialGridTypes< CudaVec3fTypes > >; - -#ifdef SOFA_GPU_CUDA_DOUBLE - -template class SpatialGridContainer< CudaVec3dTypes >; -template class SpatialGrid< SpatialGridTypes< CudaVec3dTypes > >; - -#endif // SOFA_GPU_CUDA_DOUBLE - -} // namespace sofa::component::container - - - - diff --git a/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaSpatialGridContainer.cu b/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaSpatialGridContainer.cu deleted file mode 100644 index 9f459e141f3..00000000000 --- a/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaSpatialGridContainer.cu +++ /dev/null @@ -1,377 +0,0 @@ -/****************************************************************************** -* SOFA, Simulation Open-Framework Architecture * -* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU Lesser General Public License as published by * -* the Free Software Foundation; either version 2.1 of the License, or (at * -* your option) any later version. * -* * -* This program is distributed in the hope that it will be useful, but WITHOUT * -* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * -* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * -* for more details. * -* * -* You should have received a copy of the GNU Lesser General Public License * -* along with this program. If not, see . * -******************************************************************************* -* Authors: The SOFA Team and external contributors (see Authors.txt) * -* * -* Contact information: contact@sofa-framework.org * -******************************************************************************/ -/* PART OF THIS FILE IS FROM NVIDIA CUDA SDK particles demo: - * - * Copyright 1993-2006 NVIDIA Corporation. All rights reserved. - * - * NOTICE TO USER: - * - * This source code is subject to NVIDIA ownership rights under U.S. and - * international Copyright laws. - * - * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE - * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR - * IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH - * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. - * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, - * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS - * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE - * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE - * OR PERFORMANCE OF THIS SOURCE CODE. - * - * U.S. Government End Users. This source code is a "commercial item" as - * that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of - * "commercial computer software" and "commercial computer software - * documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) - * and is provided to the U.S. Government only as a commercial end item. - * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through - * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the - * source code with only those rights set forth herein. - */ - - -#include -#include -#include -#include - -#if defined(__cplusplus) && CUDA_VERSION < 2000 -namespace sofa -{ -namespace gpu -{ -namespace cuda -{ -#endif - -extern "C" -{ - void SpatialGridContainer3f_computeHash(int cellBits, float cellWidth, int nbPoints, void* particleIndex8, void* particleHash8, const void* x); - void SpatialGridContainer3f1_computeHash(int cellBits, float cellWidth, int nbPoints, void* particleIndex8, void* particleHash8, const void* x); - void SpatialGridContainer_findCellRange(int cellBits, int index0, float cellWidth, int nbPoints, const void* particleHash8, void* cellRange, void* cellGhost); -//void SpatialGridContainer3f_reorderData(int nbPoints, const void* particleHash, void* sorted, const void* x); -//void SpatialGridContainer3f1_reorderData(int nbPoints, const void* particleHash, void* sorted, const void* x); -} - -#define USE_TEX 0 - -struct GridParams -{ - float cellWidth; - float invCellWidth; - int cellMask; - float halfCellWidth; - float invHalfCellWidth; -}; - -// large prime numbers -#define HASH_PX 73856093 -#define HASH_PY 19349663 -#define HASH_PZ 83492791 - -////////////////////// -// GPU-side methods // -////////////////////// - -#if USE_TEX -texture cellRangeTex; -#endif - -__constant__ GridParams gridParams; - -// calculate cell in grid from position -template -__device__ int3 calcGridPos(T p) -{ - int3 i; - i.x = __float2int_rd(p.x * gridParams.invCellWidth); - i.y = __float2int_rd(p.y * gridParams.invCellWidth); - i.z = __float2int_rd(p.z * gridParams.invCellWidth); - return i; -} - -// calculate address in grid from position -__device__ unsigned int calcGridHashI(int3 p) -{ - //return ((p.x<<10)^(p.y<<5)^(p.z)) & gridParams.cellMask; - //return ((p.x)^(p.y)^(p.z)) & gridParams.cellMask; - return (__mul24(HASH_PX,p.x)^__mul24(HASH_PY,p.y)^__mul24(HASH_PZ,p.z)) & gridParams.cellMask; - //return (p.x) & gridParams.cellMask; -} - -// calculate address in grid from position -template -__device__ unsigned int calcGridHash(T p) -{ - return calcGridHashI(calcGridPos(p)); -} - - -__device__ __inline__ float3 getPos3(const float4* pos, int index0, int index) -{ - float4 p = pos[index]; - return make_float3(p.x,p.y,p.z); -} - -__shared__ float ftemp[BSIZE*3]; - -__device__ __inline__ float3 getPos3(const float3* pos, int index0, int index) -{ - //return pos[index]; - - int index03 = index0 * 3; - int index3 = threadIdx.x * 3; - ftemp[threadIdx.x] = ((const float*)pos)[index03+threadIdx.x]; - ftemp[threadIdx.x+BSIZE] = ((const float*)pos)[index03+threadIdx.x+BSIZE]; - ftemp[threadIdx.x+2*BSIZE] = ((const float*)pos)[index03+threadIdx.x+2*BSIZE]; - __syncthreads(); - return make_float3(ftemp[index3],ftemp[index3+1],ftemp[index3+2]); -} - -__device__ __inline__ float4 getPos4(const float4* pos, int index0, int index) -{ - return pos[index]; -} - -__device__ __inline__ float4 getPos4(const float3* pos, int index0, int index) -{ - int index3 = threadIdx.x * 3; - pos += index0; - ftemp[threadIdx.x] = ((const float*)pos)[threadIdx.x]; - ftemp[threadIdx.x+BSIZE] = ((const float*)pos)[threadIdx.x+BSIZE]; - ftemp[threadIdx.x+2*BSIZE] = ((const float*)pos)[threadIdx.x+2*BSIZE]; - __syncthreads(); - return make_float4(ftemp[index3],ftemp[index3+1],ftemp[index3+2],0.0f); -} - -__device__ __inline__ float4 getPos4(const float4* pos, int index) -{ - return pos[index]; -} - -__device__ __inline__ float4 getPos4(const float3* pos, int index) -{ - float3 p = pos[index]; - return make_float4(p.x,p.y,p.z,1.0f); -} - -// calculate grid hash value for each particle -template -__global__ void -computeHashD(const TIn* pos, - unsigned int* particleIndex8, unsigned int* particleHash8, int n) -{ - int index0 = (blockIdx.x*BSIZE); - int index = index0 + threadIdx.x; - int nt = n - index0; if (nt > BSIZE) nt = BSIZE; - float3 p = getPos3(pos,index0,index); - - int3 hgpos; - hgpos.x = __float2int_rd(p.x * gridParams.invHalfCellWidth); - hgpos.y = __float2int_rd(p.y * gridParams.invHalfCellWidth); - hgpos.z = __float2int_rd(p.z * gridParams.invHalfCellWidth); - int halfcell = ((hgpos.x&1) + ((hgpos.y&1)<<1) + ((hgpos.z&1)<<2))^7; - // compute the first cell to be influenced by the particle - hgpos.x = (hgpos.x-1) >> 1; - hgpos.y = (hgpos.y-1) >> 1; - hgpos.z = (hgpos.z-1) >> 1; - - __syncthreads(); - - __shared__ int hx[3*BSIZE]; - int x = threadIdx.x; - -// hx[x] = (__mul24(HASH_PX,hgpos.x) << 3)+halfcell; -// hy[x] = __mul24(HASH_PY,hgpos.y); -// hz[x] = __mul24(HASH_PZ,hgpos.z); - hx[x] = ((HASH_PX*hgpos.x) << 3)+halfcell; - hx[BSIZE+x] = (HASH_PY*hgpos.y); - hx[2*BSIZE+x] = (HASH_PZ*hgpos.z); - __syncthreads(); - int3 dH; - dH.x = (x&1 ? HASH_PX : 0); - dH.y = (x&2 ? HASH_PY : 0); - dH.z = (x&4 ? HASH_PZ : 0); - int x_7 = x&7; - int index0_8_x_7 = (index0 << 3) + x_7; - for (unsigned int lx = x>>3; lx < nt; lx+=(BSIZE>>3)) - { - particleIndex8[index0_8_x_7 + (lx<<3)] = index0 + lx; - int3 h; - h.x = hx[lx]; - h.y = hx[BSIZE+lx]; - h.z = hx[2*BSIZE+lx]; - int hc = h.x & 7; - h.x = (h.x>>3) + dH.x; - h.y += dH.y; - h.z += dH.z; - unsigned int hash = ((h.x ^ h.y ^ h.z) & gridParams.cellMask)<<1; - if (hc != x_7) ++hash; - particleHash8[index0_8_x_7 + (lx<<3)] = hash; - } -} - -// find start of each cell in sorted particle list by comparing with previous hash value -// one thread per particle -__global__ void -findCellRangeD(int index0, const unsigned int* particleHash, - int * cellRange, int* cellGhost, int n) -{ - unsigned int i = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; - __shared__ unsigned int hash[BSIZE]; - if (i < n) - hash[threadIdx.x] = particleHash[i]; - - __syncthreads(); - - if (i < n) - { - bool firstInCell; - bool firstGhost; - unsigned int cur = hash[threadIdx.x]; - if (i == 0) - { - firstInCell = true; - firstGhost = cur&1; - } - else - { - unsigned int prev; - if (threadIdx.x > 0) - prev = hash[threadIdx.x-1]; - else - prev = particleHash[i-1]; - firstInCell = ((prev>>1) != (cur>>1)); - firstGhost = ((prev != cur) && (cur&1)); - if (firstInCell) - { - if ((prev>>1) < (cur>>1)-1) - cellRange[ (prev>>1)+1 ] = (index0+i) | (1U<<31); - if (!(prev&1)) // no ghost particles in previous cell - cellGhost[ prev>>1 ] = index0+i; - } - } - if (firstInCell) - cellRange[ cur>>1 ] = index0+i; - if (firstGhost) - cellGhost[ cur>>1 ] = index0+i; - if (i == n-1) - { - cellRange[ (cur>>1)+1 ] = (index0+n) | (1U<<31); - if (!(cur&1)) - cellGhost[ cur>>1 ] = index0+n; - } - } -} - -// rearrange particle data into sorted order -template -__global__ void -reorderDataD(const uint2* particleHash, // particle id sorted by hash - const TIn* oldPos, - float4* sortedPos, int n - ) -{ - int index0 = __mul24(blockIdx.x, blockDim.x); - int index = index0 + threadIdx.x; - if (index < n) - { - volatile uint2 sortedData = particleHash[index]; - //float4 pos = getPos4(oldPos,index0,index); - float4 pos = getPos4(oldPos,sortedData.y); - sortedPos[index] = pos; - } -} - - -////////////////////// -// CPU-side methods // -////////////////////// - -void SpatialGridContainer3f_computeHash(int cellBits, float cellWidth, int nbPoints, void* particleIndex8, void* particleHash8, const void* x) -{ - GridParams p; - p.cellWidth = cellWidth; - p.invCellWidth = 1.0f/cellWidth; - p.cellMask = (1<<<< grid, threads >>>((const float3*)x, (unsigned int*)particleIndex8, (unsigned int*)particleHash8, nbPoints); mycudaDebugError("computeHashD");} - } -} - -void SpatialGridContainer3f1_computeHash(int cellBits, float cellWidth, int nbPoints, void* particleIndex8, void* particleHash8, const void* x) -{ - GridParams p; - p.cellWidth = cellWidth; - p.invCellWidth = 1.0f/cellWidth; - p.cellMask = (1<<<< grid, threads >>>((const float4*)x, (unsigned int*)particleIndex8, (unsigned int*)particleHash8, nbPoints); mycudaDebugError("computeHashD");} - } -} - -void SpatialGridContainer_findCellRange(int cellBits, int index0, float cellWidth, int nbPoints, const void* particleHash8, void* cellRange, void* cellGhost) -{ - cudaMemset(cellRange, 0, ((1<>>(index0, (const unsigned int*)particleHash8, (int*)cellRange, (int*)cellGhost, 8*nbPoints); mycudaDebugError("findCellRangeD");} - } -} -/* -void SpatialGridContainer3f_reorderData(int nbPoints, const void* particleHash, void* sorted, const void* x) -{ - dim3 threads(BSIZE,1); - dim3 grid((nbPoints+BSIZE-1)/BSIZE,1); - {reorderDataD<<< grid, threads >>>((const uint2*)particleHash, (const float3*)x, (float4*)sorted, nbPoints); mycudaDebugError("reorderDataD");} -} - -void SpatialGridContainer3f1_reorderData(int nbPoints, const void* particleHash, void* sorted, const void* x) -{ - dim3 threads(BSIZE,1); - dim3 grid((nbPoints+BSIZE-1)/BSIZE,1); - {reorderDataD<<< grid, threads >>>((const uint2*)particleHash, (const float4*)x, (float4*)sorted, nbPoints); mycudaDebugError("reorderDataD");} -} -*/ -#if defined(__cplusplus) && CUDA_VERSION < 2000 -} // namespace cuda -} // namespace gpu -} // namespace sofa -#endif diff --git a/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaSpatialGridContainer.h b/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaSpatialGridContainer.h deleted file mode 100644 index f1f3697f3e0..00000000000 --- a/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaSpatialGridContainer.h +++ /dev/null @@ -1,128 +0,0 @@ -/****************************************************************************** -* SOFA, Simulation Open-Framework Architecture * -* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU Lesser General Public License as published by * -* the Free Software Foundation; either version 2.1 of the License, or (at * -* your option) any later version. * -* * -* This program is distributed in the hope that it will be useful, but WITHOUT * -* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * -* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * -* for more details. * -* * -* You should have received a copy of the GNU Lesser General Public License * -* along with this program. If not, see . * -******************************************************************************* -* Authors: The SOFA Team and external contributors (see Authors.txt) * -* * -* Contact information: contact@sofa-framework.org * -******************************************************************************/ -// -// C++ Interface: SpatialGridContainer -// -// Description: -// -// -// Author: The SOFA team , (C) 2006 -// -// Copyright: See COPYING file that comes with this distribution -// -// - -#ifndef SOFA_GPU_CUDA_CUDASPATIALGRIDCONTAINER_H -#define SOFA_GPU_CUDA_CUDASPATIALGRIDCONTAINER_H - -#include -#include -#include - - -namespace sofa::component::container -{ - -using namespace sofa::defaulttype; - -template -class SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVectorTypes > > -{ -public: - typedef SpatialGridTypes < gpu::cuda::CudaVectorTypes > DataTypes; - typedef typename DataTypes::Real Real; - typedef typename DataTypes::Coord Coord; - typedef typename DataTypes::VecCoord VecCoord; - typedef typename DataTypes::CellData CellData; - typedef typename DataTypes::GridData GridData; - //typedef typename DataTypes::NeighborListener NeighborListener; - typedef typename DataTypes::ParticleField ParticleField; - - enum - { - HASH_PX = 73856093, - HASH_PY = 19349663, - HASH_PZ = 83492791, - }; - -public: - SpatialGrid(Real cellWidth); - - ~SpatialGrid(); - - void update(const VecCoord& x); - - void draw(const core::visual::VisualParams*); - - template - void findNeighbors(NeighborListener* dest, Real dist); - - void computeField(ParticleField* field, Real dist); - - /// Change particles ordering inside a given cell have contiguous indices - /// - /// Fill the old2new and new2old arrays giving the permutation to apply - void reorderIndices(type::vector* old2new, type::vector* new2old); - GridData data; - - Real getCellWidth() const { return cellWidth; } - Real getInvCellWidth() const { return invCellWidth; } - - int getCellBits() const { return cellBits; } - int getNbCells() const { return nbCells; } - - int getCell(const Coord& c) const - { - return ( (helper::rfloor(c[0]*invCellWidth*0.5f) * HASH_PX) ^ - (helper::rfloor(c[1]*invCellWidth*0.5f) * HASH_PY) ^ - (helper::rfloor(c[2]*invCellWidth*0.5f) * HASH_PZ) ) & ((1 << cellBits)-1); - } - - //const sofa::gpu::cuda::CudaVector< unsigned int >& getParticleIndexVector() const { return particleIndex; } - const sofa::gpu::cuda::CudaVector< int >& getCellsVector() const { return cells; } - const sofa::gpu::cuda::CudaVector< int >& getCellGhostVector() const { return cellGhost; } - -protected: - const Real cellWidth; - const Real invCellWidth; - int cellBits, nbCells; - sofa::gpu::cuda::CudaVector< unsigned int > /*particleIndex,*/ particleHash; - //sofa::gpu::cuda::CudaVector< int > cellRange; - sofa::gpu::cuda::CudaVector< int > cells; - sofa::gpu::cuda::CudaVector< int > cellGhost; - sofa::gpu::cuda::CudaVector< sofa::gpu::cuda::Vec3f1 > sortedPos; - const VecCoord* lastX; - - void kernel_computeHash( - int cellBits, Real cellWidth, int nbPoints, void* particleIndex, void* particleHash, const void* x); - - void kernel_updateGrid( - int cellBits, int index0, Real cellWidth, int nbPoints, const void* particleHash, - void* cells, void* cellGhost); - //void kernel_reorderData(int nbPoints, const void* particleIndex, const void* particleHash, void* sorted, const void* x); - -}; - -} // namespace sofa::component::container - - -#endif diff --git a/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaSpatialGridContainer.inl b/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaSpatialGridContainer.inl deleted file mode 100644 index 4f2bb3f424b..00000000000 --- a/applications/plugins/SofaCUDA/sofa/gpu/cuda/CudaSpatialGridContainer.inl +++ /dev/null @@ -1,336 +0,0 @@ -/****************************************************************************** -* SOFA, Simulation Open-Framework Architecture * -* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * -* * -* This program is free software; you can redistribute it and/or modify it * -* under the terms of the GNU Lesser General Public License as published by * -* the Free Software Foundation; either version 2.1 of the License, or (at * -* your option) any later version. * -* * -* This program is distributed in the hope that it will be useful, but WITHOUT * -* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * -* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * -* for more details. * -* * -* You should have received a copy of the GNU Lesser General Public License * -* along with this program. If not, see . * -******************************************************************************* -* Authors: The SOFA Team and external contributors (see Authors.txt) * -* * -* Contact information: contact@sofa-framework.org * -******************************************************************************/ -// -// C++ Interface: SpatialGridContainer -// -// Description: -// -// -// Author: The SOFA team , (C) 2006 -// -// Copyright: See COPYING file that comes with this distribution -// -// - -#ifndef SOFA_GPU_CUDA_CUDASPATIALGRIDCONTAINER_INL -#define SOFA_GPU_CUDA_CUDASPATIALGRIDCONTAINER_INL - -#include -#include -#include -#include - -namespace sofa -{ - - -namespace gpu::cuda -{ - -extern "C" -{ - void SpatialGridContainer3f_computeHash(int cellBits, float cellWidth, int nbPoints, void* particleIndex8, void* particleHash8, const void* x); - void SpatialGridContainer3f1_computeHash(int cellBits, float cellWidth, int nbPoints, void* particleIndex8, void* particleHash8, const void* x); - void SpatialGridContainer_findCellRange(int cellBits, int index0, float cellWidth, int nbPoints, const void* particleHash8, void* cellRange, void* cellGhost); -//void SpatialGridContainer3f_reorderData(int nbPoints, const void* particleHash, void* sorted, const void* x); -//void SpatialGridContainer3f1_reorderData(int nbPoints, const void* particleHash, void* sorted, const void* x); -} - -} // namespace gpu::cuda - - -namespace component::container -{ - -using namespace sofa::helper; - -// template -// typename SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVectorTypes > >::Grid SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVectorTypes > >::emptyGrid; - -template -SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVectorTypes > >::SpatialGrid(Real cellWidth) - : cellWidth(cellWidth), invCellWidth(1/cellWidth), lastX(NULL) -{ - cellBits = 15; - nbCells = 1< -SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVectorTypes > >::~SpatialGrid() -{ -} - -template template -void SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVectorTypes > >::findNeighbors(NeighborListener* /*dest*/, Real /*dist*/) -{ - msg_error("SpatialGrid") << "TODO: SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVectorTypes > >::findNeighbors(NeighborListener* dest, Real dist)"; -} - -template -void SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVectorTypes > >::computeField(ParticleField* /*field*/, Real /*dist*/) -{ - msg_error("SpatialGrid") << "TODO: SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVectorTypes > >::computeField(ParticleField* field, Real dist)"; -} - -template -void SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVectorTypes > >::reorderIndices(type::vector* /*old2new*/, type::vector* /*new2old*/) -{ - msg_error("SpatialGrid") << "TODO: SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVectorTypes > >::reorderIndices(type::vector* old2new, type::vector* new2old)"; -} - - -template<> -void SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVec3fTypes > >::kernel_computeHash( - int cellBits, Real cellWidth, int nbPoints, void* particleIndex, void* particleHash, - const void* x) -{ - /* - { - helper::ReadAccessor< sofa::gpu::cuda::CudaVector< unsigned int > > pparticleHash = this->particleHash; - for (int i=0;iparticleHash.deviceWrite(); - { - helper::ReadAccessor< sofa::gpu::cuda::CudaVector< unsigned int > > pparticleHash = this->particleHash; - for (int i=0;i -void SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVec3fTypes > >::kernel_updateGrid( - int cellBits, int index0, Real cellWidth, int nbPoints, const void* particleHash, - void* cells, void* cellGhost) -{ - //int nbbits = 8; - //while (nbbits < cellBits + 1) nbbits+=8; - //CudaSort(particleHash,particleIndex,nbbits,nbPoints*8); - gpu::cuda::SpatialGridContainer_findCellRange(cellBits, index0, cellWidth, nbPoints, particleHash, cells, cellGhost); -} - -template<> -void SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVec3f1Types > >::kernel_computeHash( - int cellBits, Real cellWidth, int nbPoints, void* particleIndex, void* particleHash, - const void* x) -{ - gpu::cuda::SpatialGridContainer3f1_computeHash(cellBits, cellWidth, nbPoints, particleIndex, particleHash, x); -} - -template<> -void SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVec3f1Types > >::kernel_updateGrid( - int cellBits, int index0, Real cellWidth, int nbPoints, const void* particleHash, - void* cells, void* cellGhost) -{ - gpu::cuda::SpatialGridContainer_findCellRange(cellBits, index0, cellWidth, nbPoints, particleHash, cells, cellGhost); -} - -#ifdef SOFA_GPU_CUDA_DOUBLE - -template<> -void SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVec3dTypes > >::kernel_computeHash( - int /*cellBits*/, Real /*cellWidth*/, int /*nbPoints*/, void* /*particleIndex*/, void* /*particleHash*/, - const void* /*x*/) -{ - msg_error("SpatialGrid") << "TODO: SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVec3dTypes > >::kernel_computeHash()"; -} - -template<> -void SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVec3dTypes > >::kernel_updateGrid( - int /*cellBits*/, int /*index0*/, Real /*cellWidth*/, int /*nbPoints*/, const void* /*particleHash*/, - void* /*cells*/, void* /*cellGhost*/) -{ - msg_error("SpatialGrid") << "TODO: SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVec3dTypes > >::kernel_updateGrid()"; -} - -#endif // SOFA_GPU_CUDA_DOUBLE - -/* -template<> -void SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVec3fTypes > >::kernel_reorderData(int nbPoints, const void* particleHash, void* sorted, const void* x) -{ - gpu::cuda::SpatialGridContainer3f_reorderData(nbPoints, particleHash, sorted, x); -} - -template<> -void SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVec3f1Types > >::kernel_reorderData(int nbPoints, const void* particleHash, void* sorted, const void* x) -{ - gpu::cuda::SpatialGridContainer3f1_reorderData(nbPoints, particleHash, sorted, x); -} -*/ - -template -void SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVectorTypes > >::update(const VecCoord& x) -{ - lastX = &x; - data.clear(); - int nbPoints = x.size(); - int index0 = nbCells+BSIZE; - /*particleIndex*/ cells.recreate(index0+nbPoints*8,8*BSIZE); - particleHash.recreate(nbPoints*8,8*BSIZE); - - { - unsigned int numElements = (unsigned int)nbPoints*8; - sofa::gpu::cuda::CudaSortPrepare(numElements); - } - - //cells.recreate(nbCells+1); - cellGhost.recreate(nbCells); - //sortedPos.recreate(nbPoints); -#if 0 - { - helper::WriteAccessor< sofa::gpu::cuda::CudaVector< int > > pcells = cells; - helper::WriteAccessor< sofa::gpu::cuda::CudaVector< unsigned int > > pparticleHash = particleHash; - helper::ReadAccessor< VecCoord > px = x; - const Real pcellWidth = cellWidth*2.0f; - //const Real pinvCellWidth = 1.0f/pcellWidth; - const int pcellMask = (1<> 1; - hgpos_y = (hgpos_y-1) >> 1; - hgpos_z = (hgpos_z-1) >> 1; - unsigned int hx = (HASH_PX*hgpos_x); - unsigned int hy = (HASH_PY*hgpos_y); - unsigned int hz = (HASH_PZ*hgpos_z); - for (int x=0; x<8; ++x) - { - unsigned int h_x = hx; if (x&1) h_x += HASH_PX; - unsigned int h_y = hy; if (x&2) h_y += HASH_PY; - unsigned int h_z = hz; if (x&4) h_z += HASH_PZ; - unsigned int hash = ((h_x ^ h_y ^ h_z) & pcellMask)<<1; - if (halfcell != x) ++hash; - pcells[index0 + i*8 + x] = i; - pparticleHash[i*8 + x] = hash; - } - } - } -#endif -#if 0 - type::vector< std::pair > cpusort; - cpusort.resize(8*nbPoints); - for (int i=0; i<8*nbPoints; ++i) - cpusort[i] = std::make_pair(pparticleHash[i],pcells[index0+i]); - std::sort(cpusort.begin(),cpusort.end(),compare_pair_first); - - for (int i=0; i<8*nbPoints; ++i) - { - pparticleHash[i] = cpusort[i].first; - pcells[index0+i] = cpusort[i].second; - } -#endif - - kernel_computeHash( - cellBits, cellWidth*2, nbPoints, cells.deviceWriteAt(index0), particleHash.deviceWrite(), - x.deviceRead()); - - { - int nbbits = 8; - while (nbbits < cellBits + 1) nbbits+=8; - sofa::gpu::cuda::CudaSort(&particleHash,0, &cells,index0, nbPoints*8, nbbits); - } - - kernel_updateGrid( - cellBits, index0, cellWidth*2, nbPoints, particleHash.deviceRead(), - cells.deviceWrite(), cellGhost.deviceWrite()); -#if 0 - std::cout << nbPoints*8 << " entries in " << nbCells << " cells." << std::endl; - int nfill = 0; - for (int c=0; c cellBegin + nbPoints/2) // || nfill >= 100 && nfill < 110) - std::cout << "Cell " << c << ": range = " << cellBegin-index0 << " - " << cellEnd-index0 << " ghost = " << cellGhost[c]-index0 << std::endl; - ++nfill; - } - std::cout << ((1000*nfill)/nbCells) * 0.1 << " % cells with particles." << std::endl; -#endif - - //kernel_reorderData(nbPoints, particleHash.deviceRead(), sortedPos.deviceWrite(), x.deviceRead()); -} - -template -void SpatialGrid< SpatialGridTypes < gpu::cuda::CudaVectorTypes > >::draw(const core::visual::VisualParams* ) -{ -#if SOFACUDA_HAVE_SOFA_GL == 1 - if (!lastX) return; - int nbPoints = particleHash.size(); - int index0 = nbCells+BSIZE; - glDisable(GL_LIGHTING); - glColor4f(1,1,1,1); - glPointSize(3); - glBegin(GL_POINTS); - unsigned int last = 0; - for (int i=0; i>=1; - //if (cell != 0 && cell != 65535) - // std::cout << i << ": "< "< "<>2)&3; - int b = (cell>>4)&3; - glColor4ub(63+r*64,63+g*64,63+b*64,255); - //glVertex3fv(sortedPos[i].ptr()); - sofa::gl::glVertexT((*lastX)[p]); - } - glEnd(); - glPointSize(1); -#endif // SOFACUDA_HAVE_SOFA_GL == 1 -} - -} // namespace component::container - - -} // namespace sofa - -#endif