BlueBrain
diff --git a/‎CMakeLists.txt‎
Lines changed: 4 additions & 0 deletions b/‎CMakeLists.txt‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎coreneuron/nrnconf.h‎
Lines changed: 1 addition & 1 deletion b/‎coreneuron/nrnconf.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎coreneuron/nrniv/cellorder.cpp‎
Lines changed: 6 additions & 5 deletions b/‎coreneuron/nrniv/cellorder.cpp‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎coreneuron/nrniv/cellorder1.cpp‎
Lines changed: 11 additions & 9 deletions b/‎coreneuron/nrniv/cellorder1.cpp‎
Lines changed: 11 additions & 9 deletions
diff --git a/‎coreneuron/nrniv/main1.cpp‎
Lines changed: 42 additions & 41 deletions b/‎coreneuron/nrniv/main1.cpp‎
Lines changed: 42 additions & 41 deletions
diff --git a/‎coreneuron/nrniv/memory.h‎
Lines changed: 52 additions & 7 deletions b/‎coreneuron/nrniv/memory.h‎
Lines changed: 52 additions & 7 deletions
diff --git a/‎coreneuron/nrniv/mk_mech.cpp‎
Lines changed: 3 additions & 1 deletion b/‎coreneuron/nrniv/mk_mech.cpp‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎coreneuron/nrniv/nrn2core_direct.h‎
Lines changed: 10 additions & 18 deletions b/‎coreneuron/nrniv/nrn2core_direct.h‎
Lines changed: 10 additions & 18 deletions
@@ -63,6 +63,7 @@ option(ENABLE_ISPC_TARGET "Enable ispc interoperability structs and data" OFF)
 option(ENABLE_NMODL "Enable external nmodl source-to-source compiler" OFF)
 option(ENABLE_CALIPER "Enable Caliper instrumentation" OFF)
 option(CORENEURON_ENABLE_LIKWID "Enable LIKWID instrumentation" OFF)
+option(ENABLE_UNIFIED "Enable Unified Memory implementation of GPU" OFF)
 
 ## set C++11 standard to be default
 set(CMAKE_CXX_STANDARD 11)
@@ -264,6 +265,9 @@ endif()
 
 if(ENABLE_OPENACC)
     set(COMPILE_LIBRARY_TYPE "STATIC")
+    if (ENABLE_UNIFIED)
+        add_definitions( -DUNIFIED_MEMORY)
+    endif()
     if(${CMAKE_C_COMPILER_ID} STREQUAL "PGI")
         add_definitions( -DPG_ACC_BUGS)
         set(ACC_FLAGS "-acc -Minline=size:200,levels:10")
 
@@ -78,7 +78,7 @@ extern void* emalloc(size_t size);
 extern void* ecalloc(size_t n, size_t size);
 extern void* erealloc(void* ptr, size_t size);
 extern void* emalloc_align(size_t size, size_t alignment);
-extern void* ecalloc_align(size_t n, size_t alignment, size_t size);
+extern void* ecalloc_align(size_t n, size_t size, size_t alignment);
 extern double hoc_Exp(double x);
 extern void check_bbcore_write_version(const char*);
 
 
@@ -5,6 +5,7 @@
 #include "coreneuron/nrniv/cellorder.h"
 #include "coreneuron/nrniv/tnode.h"
 #include "coreneuron/nrniv/lpt.h"
+#include "coreneuron/nrniv/memory.h"
 
 #include "coreneuron/nrniv/node_permute.h"  // for print_quality
 #include <set>
@@ -80,13 +81,13 @@ InterleaveInfo& InterleaveInfo::operator=(const InterleaveInfo& info) {
 
 InterleaveInfo::~InterleaveInfo() {
     if (stride) {
-        delete[] stride;
-        delete[] firstnode;
-        delete[] lastnode;
-        delete[] cellsize;
+        free_memory(stride);
+        free_memory(firstnode);
+        free_memory(lastnode);
+        free_memory(cellsize);
     }
     if (stridedispl) {
-        delete[] stridedispl;
+        free_memory(stridedispl);
     }
     if (idle) {
         delete[] nnode;
 
@@ -5,6 +5,7 @@
 
 // just for use_interleave_permute
 #include "coreneuron/nrniv/nrniv_decl.h"
+#include "coreneuron/nrniv/memory.h"
 
 #include <map>
 #include <set>
@@ -531,9 +532,9 @@ static void admin1(int ncell,
     // cellsize is the number of nodes in the cell not counting root.
     // nstride is the maximum cell size (not counting root)
     // stride[i] is the number of cells with an ith node.
-    firstnode = new int[ncell];
-    lastnode = new int[ncell];
-    cellsize = new int[ncell];
+    firstnode = (int*)ecalloc_align(ncell, sizeof(int));
+    lastnode = (int*)ecalloc_align(ncell, sizeof(int));
+    cellsize = (int*)ecalloc_align(ncell, sizeof(int));
 
     nwarp = (ncell % warpsize == 0) ? (ncell / warpsize) : (ncell / warpsize + 1);
 
@@ -557,7 +558,7 @@ static void admin1(int ncell,
         }
     }
 
-    stride = new int[nstride + 1];  // in case back substitution accesses this
+    stride = (int*)ecalloc_align(nstride + 1, sizeof(int));
     for (int i = 0; i <= nstride; ++i) {
         stride[i] = 0;
     }
@@ -617,10 +618,11 @@ static void admin2(int ncell,
     // ncore is the number of warps * warpsize
     nwarp = nodevec[ncell - 1]->groupindex + 1;
 
-    ncycles = new int[nwarp];
-    stridedispl = new int[nwarp + 1];  // running sum of ncycles (start at 0)
-    rootbegin = new int[nwarp + 1];    // index (+1) of first root in warp.
-    nodebegin = new int[nwarp + 1];    // index (+1) of first node in warp.
+    ncycles = (int*)ecalloc_align(nwarp, sizeof(int));
+    stridedispl =
+        (int*)ecalloc_align(nwarp + 1, sizeof(int));          // running sum of ncycles (start at 0)
+    rootbegin = (int*)ecalloc_align(nwarp + 1, sizeof(int));  // index (+1) of first root in warp.
+    nodebegin = (int*)ecalloc_align(nwarp + 1, sizeof(int));  // index (+1) of first node in warp.
 
     // rootbegin and nodebegin are the root index values + 1 of the last of
     // the sequence of constant groupindex
@@ -650,7 +652,7 @@ static void admin2(int ncell,
     }
 
     // strides
-    strides = new int[nstride];
+    strides = (int*)ecalloc_align(nstride, sizeof(int));
     nstride = 0;
     for (size_t iwarp = 0; iwarp < (size_t)nwarp; ++iwarp) {
         size_t j = size_t(nodebegin[iwarp + 1]);
 
@@ -364,52 +364,53 @@ const char* nrn_version(int) {
 // bsize = 0 then per step transfer
 // bsize > 1 then full trajectory save into arrays.
 void get_nrn_trajectory_requests(int bsize) {
-  if (nrn2core_get_trajectory_requests_) {
-    for (int tid=0; tid < nrn_nthread; ++tid) {
-      NrnThread& nt = nrn_threads[tid];
-      int n_pr;
-      int n_trajec;
-      int* types;
-      int* indices;
-      void** vpr;
-      double** varrays;
-      double** pvars;
-
-      // bsize is passed by reference, the return value will determine if
-      // per step return or entire trajectory return.
-      (*nrn2core_get_trajectory_requests_)(tid, bsize, n_pr, vpr, n_trajec, types, indices, pvars, varrays);
-      delete_trajectory_requests(nt);
-      if (n_trajec) {
-        TrajectoryRequests* tr = new TrajectoryRequests;
-        nt.trajec_requests = tr;
-        tr->bsize = bsize;
-        tr->n_pr = n_pr;
-        tr->n_trajec = n_trajec;
-        tr->vsize = 0;
-        tr->vpr = vpr;
-        tr->gather = new double*[n_trajec];
-        tr->varrays = varrays;
-        tr->scatter = pvars;
-        for (int i=0; i < n_trajec; ++i) {
-          tr->gather[i] = stdindex2ptr(types[i], indices[i], nt);
+    if (nrn2core_get_trajectory_requests_) {
+        for (int tid = 0; tid < nrn_nthread; ++tid) {
+            NrnThread& nt = nrn_threads[tid];
+            int n_pr;
+            int n_trajec;
+            int* types;
+            int* indices;
+            void** vpr;
+            double** varrays;
+            double** pvars;
+
+            // bsize is passed by reference, the return value will determine if
+            // per step return or entire trajectory return.
+            (*nrn2core_get_trajectory_requests_)(tid, bsize, n_pr, vpr, n_trajec, types, indices,
+                                                 pvars, varrays);
+            delete_trajectory_requests(nt);
+            if (n_trajec) {
+                TrajectoryRequests* tr = new TrajectoryRequests;
+                nt.trajec_requests = tr;
+                tr->bsize = bsize;
+                tr->n_pr = n_pr;
+                tr->n_trajec = n_trajec;
+                tr->vsize = 0;
+                tr->vpr = vpr;
+                tr->gather = new double*[n_trajec];
+                tr->varrays = varrays;
+                tr->scatter = pvars;
+                for (int i = 0; i < n_trajec; ++i) {
+                    tr->gather[i] = stdindex2ptr(types[i], indices[i], nt);
+                }
+                delete[] types;
+                delete[] indices;
+            }
         }
-        delete [] types;
-        delete [] indices;
-      }
     }
-  }
 }
 
 static void trajectory_return() {
-  if (nrn2core_trajectory_return_) {
-    for (int tid=0; tid < nrn_nthread; ++tid) {
-      NrnThread& nt = nrn_threads[tid];
-      TrajectoryRequests* tr = nt.trajec_requests;
-      if (tr && tr->varrays) {
-        (*nrn2core_trajectory_return_)(tid, tr->n_pr, tr->vsize, tr->vpr, nt._t);
-      }
+    if (nrn2core_trajectory_return_) {
+        for (int tid = 0; tid < nrn_nthread; ++tid) {
+            NrnThread& nt = nrn_threads[tid];
+            TrajectoryRequests* tr = nt.trajec_requests;
+            if (tr && tr->varrays) {
+                (*nrn2core_trajectory_return_)(tid, tr->n_pr, tr->vsize, tr->vpr, nt._t);
+            }
+        }
     }
-  }
 }
 
 }  // namespace coreneuron
@@ -493,7 +494,7 @@ extern "C" int run_solve_core(int argc, char** argv) {
         if (corenrn_embedded) {
             // arg is vector size required but NEURON can instead
             // specify that returns will be on a per time step basis.
-            get_nrn_trajectory_requests(int(tstop/dt) + 2);
+            get_nrn_trajectory_requests(int(tstop / dt) + 2);
             (*nrn2core_part2_clean_)();
         }
 
 
@@ -29,11 +29,57 @@ THE POSSIBILITY OF SUCH DAMAGE.
 #ifndef _H_MEMORY_
 #define _H_MEMORY_
 
-#include <stdlib.h>
 #include <string.h>
+
 #include "coreneuron/nrniv/nrn_assert.h"
 
+#if !defined(NRN_SOA_BYTE_ALIGN)
+// for layout 0, every range variable array must be aligned by at least 16 bytes (the size of the
+// simd memory bus)
+#define NRN_SOA_BYTE_ALIGN (8 * sizeof(double))
+#endif
+
+/// for gpu builds with unified memory support
+#if (defined(__CUDACC__) || defined(UNIFIED_MEMORY))
+
+#include <cuda_runtime_api.h>
+
+// TODO : error handling for CUDA routines
+inline void alloc_memory(void*& pointer, size_t num_bytes, size_t /*alignment*/) {
+    cudaMallocManaged(&pointer, num_bytes);
+}
+
+inline void calloc_memory(void*& pointer, size_t num_bytes, size_t /*alignment*/) {
+    alloc_memory(pointer, num_bytes, 64);
+    cudaMemset(pointer, 0, num_bytes);
+}
+
+inline void free_memory(void* pointer) {
+    cudaFree(pointer);
+}
+
+/// for cpu builds use posix memalign
+#else
+
+#include <stdlib.h>
+
+inline void alloc_memory(void*& pointer, size_t num_bytes, size_t alignment) {
+    nrn_assert(posix_memalign(&pointer, alignment, num_bytes) == 0);
+}
+
+inline void calloc_memory(void*& pointer, size_t num_bytes, size_t alignment) {
+    alloc_memory(pointer, num_bytes, alignment);
+    memset(pointer, 0, num_bytes);
+}
+
+inline void free_memory(void* pointer) {
+    free(pointer);
+}
+
+#endif
+
 namespace coreneuron {
+
 /** Independent function to compute the needed chunkding,
     the chunk argument is the number of doubles the chunk is chunkded upon.
 */
@@ -57,23 +103,22 @@ inline bool is_aligned(void* pointer, size_t alignment) {
 
 /** Allocate the aligned memory.
  */
-inline void* emalloc_align(size_t size, size_t alignment) {
+inline void* emalloc_align(size_t size, size_t alignment = NRN_SOA_BYTE_ALIGN) {
     void* memptr;
-    nrn_assert(posix_memalign(&memptr, alignment, size) == 0);
+    alloc_memory(memptr, size, alignment);
     nrn_assert(is_aligned(memptr, alignment));
     return memptr;
 }
 
-/** Allocate the aligned memory and set it to 1.
+/** Allocate the aligned memory and set it to 0.
  */
-inline void* ecalloc_align(size_t n, size_t alignment, size_t size) {
+inline void* ecalloc_align(size_t n, size_t size, size_t alignment = NRN_SOA_BYTE_ALIGN) {
     void* p;
     if (n == 0) {
         return (void*)0;
     }
-    nrn_assert(posix_memalign(&p, alignment, n * size) == 0);
+    calloc_memory(p, n * size, alignment);
     nrn_assert(is_aligned(p, alignment));
-    memset(p, 1, n * size);  // Avoid native division by zero (cyme...)
     return p;
 }
 }  // namespace coreneuron
 
@@ -119,7 +119,9 @@ void mk_mech(const char* datpath) {
 // we are embedded in NEURON, get info as stringstream from nrnbbcore_write.cpp
 static void mk_mech() {
     static bool done = false;
-    if (done) { return; }
+    if (done) {
+        return;
+    }
     nrn_need_byteswap = 0;
     std::stringstream ss;
     nrn_assert(nrn2core_mkmech_info_);
 
@@ -92,28 +92,20 @@ extern void (*nrn2core_part2_clean_)();
 
 /* what variables to send back to NEURON on each time step */
 extern void (*nrn2core_get_trajectory_requests_)(int tid,
-                                                int& bsize,
-                                                int& n_pr,
-                                                void**& vpr,
-                                                int& n_trajec,
-                                                int*& types,
-                                                int*& indices,
-                                                double**& pvars,
-                                                double**& varrays);
+                                                 int& bsize,
+                                                 int& n_pr,
+                                                 void**& vpr,
+                                                 int& n_trajec,
+                                                 int*& types,
+                                                 int*& indices,
+                                                 double**& pvars,
+                                                 double**& varrays);
 
 /* send values to NEURON on each time step */
-extern void (*nrn2core_trajectory_values_)(int tid,
-                                          int n_pr,
-                                          void** vpr,
-                                          double t);
+extern void (*nrn2core_trajectory_values_)(int tid, int n_pr, void** vpr, double t);
 
 /* Filled the Vector data arrays and send back the sizes at end of run */
-extern void (*nrn2core_trajectory_return_)(int tid,
-                                          int n_pr,
-                                          int vecsz,
-                                          void** vpr,
-                                          double t);
-
+extern void (*nrn2core_trajectory_return_)(int tid, int n_pr, int vecsz, void** vpr, double t);
 }
 
 #endif /* nrn2core_direct_h */