Skip to content
This repository was archived by the owner on Mar 20, 2023. It is now read-only.

Commit 67a05f3

Browse files
authored
OpenACC pragma annotation was missing for kernels using euler (#85)
* cudaMemset wrong pointer fix * User provided flags should take precedence
1 parent 11f5622 commit 67a05f3

File tree

3 files changed

+7
-3
lines changed

3 files changed

+7
-3
lines changed

CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -206,8 +206,8 @@ if(ENABLE_OPENACC)
206206
if (ENABLE_OPENACC_INFO)
207207
set(ACC_FLAGS "${ACC_FLAGS} -Minfo=acc")
208208
endif()
209-
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ACC_FLAGS}")
210-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ACC_FLAGS}")
209+
set(CMAKE_C_FLAGS "${ACC_FLAGS} ${CMAKE_C_FLAGS}")
210+
set(CMAKE_CXX_FLAGS "${ACC_FLAGS} ${CMAKE_CXX_FLAGS}")
211211
else()
212212
message(WARNING "OpenACC implementation is only supported and tested using only PGI")
213213
message(WARNING "Add required compiler flags to enabled OpenACC")

coreneuron/kinderiv.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,10 @@
5757
fout.write('#pragma acc routine seq\n')
5858
fout.write('extern int %s%s(void*, double*, _threadargsproto_);\n' % (item[0], item[1]))
5959

60+
for item in euler:
61+
fout.write('#pragma acc routine seq\n')
62+
fout.write('extern int %s%s(_threadargsproto_);\n' % (item[0], item[1]))
63+
6064
fout.write("\n/* callback indices */\n")
6165
derivoffset = 1
6266
kinoffset = 1

coreneuron/utils/randoms/nrnran123.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ nrnran123_State* nrnran123_newstream3(uint32_t id1, uint32_t id2, uint32_t id3)
149149
nrnran123_State* s;
150150

151151
cudaMalloc((void**)&s, sizeof(nrnran123_State));
152-
cudaMemset((void**)&s, 0, sizeof(nrnran123_State));
152+
cudaMemset((void*)s, 0, sizeof(nrnran123_State));
153153

154154
nrnran123_setup_cuda_newstream<<<1, 1>>>(s, id1, id2, id3);
155155
cudaDeviceSynchronize();

0 commit comments

Comments
 (0)