From cb12f2ead749ebdbc6eb0eb48b766ff02558aeba Mon Sep 17 00:00:00 2001 From: Patrick Seewald Date: Thu, 12 Mar 2020 12:41:07 +0100 Subject: [PATCH 01/34] Tensors: remove timeset/timestop for often called subroutines --- src/tas/dbcsr_tas_base.F | 31 ++++--------------------------- src/tas/dbcsr_tas_reshape_ops.F | 3 --- src/tensors/dbcsr_tensor_block.F | 25 ------------------------- 3 files changed, 4 insertions(+), 55 deletions(-) diff --git a/src/tas/dbcsr_tas_base.F b/src/tas/dbcsr_tas_base.F index 0954fe55549..aa27d0b5f8e 100644 --- a/src/tas/dbcsr_tas_base.F +++ b/src/tas/dbcsr_tas_base.F @@ -701,9 +701,7 @@ SUBROUTINE dbcsr_tas_iterator_next_area_block(iterator, row, column, block, tran CHARACTER(LEN=*), PARAMETER :: routineN = 'dbcsr_tas_iterator_next_area_block', & routineP = moduleN//':'//routineN - INTEGER :: column_group, handle, row_group - - CALL timeset(routineN, handle) + INTEGER :: column_group, row_group CALL dbcsr_iterator_next_block(iterator%iter, row_group, column_group, block, transposed, block_number, & row_size, col_size) @@ -711,7 +709,6 @@ SUBROUTINE dbcsr_tas_iterator_next_area_block(iterator, row, column, block, tran CALL block_index_local_to_global(iterator%dist%info, iterator%dist, row_group=row_group, column_group=column_group, & row=row, column=column) - CALL timestop(handle) END SUBROUTINE SUBROUTINE dbcsr_tas_iterator_next_block_index(iterator, row, column, block_number, & @@ -729,17 +726,14 @@ SUBROUTINE dbcsr_tas_iterator_next_block_index(iterator, row, column, block_numb CHARACTER(LEN=*), PARAMETER :: routineN = 'dbcsr_tas_iterator_next_block_index', & routineP = moduleN//':'//routineN - INTEGER :: column_group, handle, row_group + INTEGER :: column_group, row_group - CALL timeset(routineN, handle) CALL dbcsr_iterator_next_block(iterator%iter, row_group, column_group, block_number, transposed, blk_p, & row_size, col_size) CALL block_index_local_to_global(iterator%dist%info, iterator%dist, row_group=row_group, column_group=column_group, & row=row, column=column) - CALL timestop(handle) - END SUBROUTINE SUBROUTINE dbcsr_tas_reserve_blocks_index(matrix, rows, columns) @@ -787,15 +781,12 @@ SUBROUTINE dbcsr_tas_put_block_area(matrix, row, col, block, transposed, summati CHARACTER(LEN=*), PARAMETER :: routineN = 'dbcsr_tas_put_block_area', & routineP = moduleN//':'//routineN - INTEGER :: col_group, handle, row_group - - CALL timeset(routineN, handle) + INTEGER :: col_group, row_group CALL block_index_global_to_local(dbcsr_tas_info(matrix), matrix%dist, row=row, column=col, & row_group=row_group, column_group=col_group) CALL dbcsr_put_block(matrix%matrix, row_group, col_group, block, transposed=transposed, summation=summation, scale=scale) - CALL timestop(handle) END SUBROUTINE SUBROUTINE dbcsr_tas_get_block_p_area(matrix, row, col, block, transposed, found, row_size, col_size) @@ -809,17 +800,13 @@ SUBROUTINE dbcsr_tas_get_block_p_area(matrix, row, col, block, transposed, found CHARACTER(LEN=*), PARAMETER :: routineN = 'dbcsr_tas_get_block_p_area', & routineP = moduleN//':'//routineN - INTEGER :: col_group, handle, row_group - - CALL timeset(routineN, handle) + INTEGER :: col_group, row_group CALL block_index_global_to_local(dbcsr_tas_info(matrix), matrix%dist, row=row, column=col, & row_group=row_group, column_group=col_group) CALL dbcsr_get_block_p(matrix%matrix, row_group, col_group, block, transposed, found, row_size=row_size, col_size=col_size) - CALL timestop(handle) - END SUBROUTINE SUBROUTINE dbcsr_tas_copy_distribution(dist_in, dist_out, own_dist) @@ -1074,16 +1061,13 @@ SUBROUTINE dbcsr_tas_iterator_next_block_${dsuffix}$ (iterator, row, column, blo INTEGER :: row_group, column_group CHARACTER(LEN=*), PARAMETER :: routineN = 'dbcsr_tas_iterator_next_block_${dsuffix}$', & routineP = moduleN//':'//routineN - INTEGER :: handle - CALL timeset(routineN, handle) CALL dbcsr_iterator_next_block(iterator%iter, row_group, column_group, block, transposed, block_number, & row_size, col_size) CALL block_index_local_to_global(iterator%dist%info, iterator%dist, row_group=row_group, column_group=column_group, & row=row, column=column) - CALL timestop(handle) END SUBROUTINE #:endfor @@ -1100,15 +1084,12 @@ SUBROUTINE dbcsr_tas_put_block_${dsuffix}$ (matrix, row, col, block, transposed, INTEGER :: col_group, row_group CHARACTER(LEN=*), PARAMETER :: routineN = 'dbcsr_tas_put_block_${dsuffix}$', & routineP = moduleN//':'//routineN - INTEGER :: handle - CALL timeset(routineN, handle) CALL block_index_global_to_local(matrix%dist%info, matrix%dist, row=row, column=col, & row_group=row_group, column_group=col_group) CALL dbcsr_put_block(matrix%matrix, row_group, col_group, block, transposed=transposed, summation=summation, scale=scale) - CALL timestop(handle) END SUBROUTINE #:endfor @@ -1125,16 +1106,12 @@ SUBROUTINE dbcsr_tas_get_block_p_${dsuffix}$ (matrix, row, col, block, transpose INTEGER :: col_group, row_group CHARACTER(LEN=*), PARAMETER :: routineN = 'dbcsr_tas_get_block_p_${dsuffix}$', & routineP = moduleN//':'//routineN - INTEGER :: handle - - CALL timeset(routineN, handle) CALL block_index_global_to_local(matrix%dist%info, matrix%dist, row=row, column=col, & row_group=row_group, column_group=col_group) CALL dbcsr_get_block_p(matrix%matrix, row_group, col_group, block, transposed, found, row_size=row_size, col_size=col_size) - CALL timestop(handle) END SUBROUTINE #:endfor diff --git a/src/tas/dbcsr_tas_reshape_ops.F b/src/tas/dbcsr_tas_reshape_ops.F index 17364215d27..c80e113cfac 100644 --- a/src/tas/dbcsr_tas_reshape_ops.F +++ b/src/tas/dbcsr_tas_reshape_ops.F @@ -715,12 +715,10 @@ SUBROUTINE block_buffer_get_next_area_block(buffer, ndata, index, block, advance #:for dparam, dtype, dsuffix in dtype_float_list ${dtype}$, DIMENSION(:, :), POINTER :: data_${dsuffix}$ => NULL() #:endfor - INTEGER :: handle CHARACTER(LEN=*), PARAMETER :: routineN = 'block_buffer_get_next_area_block', & routineP = moduleN//':'//routineN - CALL timeset(routineN, handle) IF (PRESENT(block)) THEN CALL dbcsr_data_get_sizes(block, sizes, valid) DBCSR_ASSERT(valid) @@ -739,7 +737,6 @@ SUBROUTINE block_buffer_get_next_area_block(buffer, ndata, index, block, advance ENDIF #:endfor END SELECT - CALL timestop(handle) END SUBROUTINE #:for dparam, dtype, dsuffix in dtype_float_list diff --git a/src/tensors/dbcsr_tensor_block.F b/src/tensors/dbcsr_tensor_block.F index eeba52254cb..29fb8717165 100644 --- a/src/tensors/dbcsr_tensor_block.F +++ b/src/tensors/dbcsr_tensor_block.F @@ -189,11 +189,9 @@ SUBROUTINE dbcsr_t_iterator_start(iterator, tensor) !! Generalization of dbcsr_iterator_start for tensors. TYPE(dbcsr_t_iterator_type), INTENT(OUT) :: iterator TYPE(dbcsr_t_type), INTENT(IN) :: tensor - INTEGER :: handle CHARACTER(LEN=*), PARAMETER :: routineN = 'dbcsr_t_iterator_start', & routineP = moduleN//':'//routineN - CALL timeset(routineN, handle) DBCSR_ASSERT(tensor%valid) CALL dbcsr_tas_iterator_start(iterator%iter, tensor%matrix_rep) @@ -202,24 +200,20 @@ SUBROUTINE dbcsr_t_iterator_start(iterator, tensor) iterator%blk_sizes = tensor%blk_sizes iterator%blk_offsets = tensor%blk_offsets - CALL timestop(handle) END SUBROUTINE SUBROUTINE dbcsr_t_iterator_stop(iterator) !! Generalization of dbcsr_iterator_stop for tensors. TYPE(dbcsr_t_iterator_type), INTENT(INOUT) :: iterator - INTEGER :: handle CHARACTER(LEN=*), PARAMETER :: routineN = 'dbcsr_t_iterator_stop', & routineP = moduleN//':'//routineN - CALL timeset(routineN, handle) CALL dbcsr_tas_iterator_stop(iterator%iter) CALL destroy_nd_to_2d_mapping(iterator%nd_index) CALL destroy_nd_to_2d_mapping(iterator%nd_index_blk) CALL destroy_array_list(iterator%blk_sizes) CALL destroy_array_list(iterator%blk_offsets) - CALL timestop(handle) END SUBROUTINE PURE FUNCTION ndims_iterator(iterator) @@ -252,11 +246,9 @@ SUBROUTINE dbcsr_t_iterator_next_block(iterator, ind_nd, blk, blk_p, blk_size, b !! blk offset in each dimension INTEGER(KIND=int_8), DIMENSION(2) :: ind_2d - INTEGER :: handle CHARACTER(LEN=*), PARAMETER :: routineN = 'dbcsr_t_iterator_next_block', & routineP = moduleN//':'//routineN - CALL timeset(routineN, handle) CALL dbcsr_tas_iterator_next_block(iterator%iter, ind_2d(1), ind_2d(2), blk, blk_p=blk_p) ind_nd(:) = get_nd_indices(iterator%nd_index_blk, ind_2d) @@ -265,21 +257,17 @@ SUBROUTINE dbcsr_t_iterator_next_block(iterator, ind_nd, blk, blk_p, blk_size, b ! offset since block index mapping is not consistent with element index mapping IF (PRESENT(blk_offset)) blk_offset(:) = get_array_elements(iterator%blk_offsets, ind_nd) - CALL timestop(handle) END SUBROUTINE FUNCTION dbcsr_t_iterator_blocks_left(iterator) !! Generalization of dbcsr_iterator_blocks_left for tensors. TYPE(dbcsr_t_iterator_type), INTENT(IN) :: iterator LOGICAL :: dbcsr_t_iterator_blocks_left - INTEGER :: handle CHARACTER(LEN=*), PARAMETER :: routineN = 'dbcsr_t_iterator_blocks_left', & routineP = moduleN//':'//routineN - CALL timeset(routineN, handle) dbcsr_t_iterator_blocks_left = dbcsr_tas_iterator_blocks_left(iterator%iter) - CALL timestop(handle) END FUNCTION SUBROUTINE dbcsr_t_reserve_blocks_index_array(tensor, blk_ind) @@ -501,18 +489,15 @@ SUBROUTINE dbcsr_t_get_anyd_block(tensor, ind, block, found) !! block to get LOGICAL, INTENT(OUT) :: found !! whether block was found - INTEGER :: handle CHARACTER(LEN=*), PARAMETER :: routineN = 'dbcsr_t_get_anyd_block', & routineP = moduleN//':'//routineN - CALL timeset(routineN, handle) SELECT CASE (dbcsr_t_get_data_type(tensor)) #:for dparam, dtype, dsuffix in dtype_float_list CASE (${dparam}$) CALL dbcsr_t_get_anyd_block_${dsuffix}$ (tensor, ind, block, found) #:endfor END SELECT - CALL timestop(handle) END SUBROUTINE SUBROUTINE dbcsr_t_put_anyd_block(tensor, ind, block, summation, scale) @@ -528,11 +513,9 @@ SUBROUTINE dbcsr_t_put_anyd_block(tensor, ind, block, summation, scale) !! whether block should be summed to existing block TYPE(dbcsr_scalar_type), INTENT(IN), OPTIONAL :: scale !! scaling factor - INTEGER :: handle CHARACTER(LEN=*), PARAMETER :: routineN = 'dbcsr_t_put_anyd_block', & routineP = moduleN//':'//routineN - CALL timeset(routineN, handle) SELECT CASE (block%data_type) #:for dparam, dtype, dsuffix in dtype_float_list CASE (${dparam}$) @@ -544,7 +527,6 @@ SUBROUTINE dbcsr_t_put_anyd_block(tensor, ind, block, summation, scale) #:endfor END SELECT - CALL timestop(handle) END SUBROUTINE #:for dparam, dtype, dsuffix in dtype_float_list @@ -622,7 +604,6 @@ SUBROUTINE dbcsr_t_put_${ndim}$d_block_${dsuffix}$ (tensor, ind, sizes, block, s INTEGER, DIMENSION(2) :: dims_2d ${dtype}$, ALLOCATABLE, DIMENSION(:, :) :: block_2d TYPE(nd_to_2d_mapping) :: map_blk - INTEGER :: handle CHARACTER(LEN=*), PARAMETER :: routineN = 'dbcsr_t_put_${ndim}$d_block_${dsuffix}$', & routineP = moduleN//':'//routineN LOGICAL :: found @@ -635,7 +616,6 @@ SUBROUTINE dbcsr_t_put_${ndim}$d_block_${dsuffix}$ (tensor, ind, sizes, block, s DBCSR_ASSERT(found) ENDIF - CALL timeset(routineN, handle) ! reshape block CALL dbcsr_t_get_mapping_info(tensor%nd_index_blk, map1_2d=map1_2d, map2_2d=map2_2d) CALL create_nd_to_2d_mapping(map_blk, sizes, map1_2d, map2_2d) @@ -649,7 +629,6 @@ SUBROUTINE dbcsr_t_put_${ndim}$d_block_${dsuffix}$ (tensor, ind, sizes, block, s CALL dbcsr_tas_put_block(tensor%matrix_rep, ind_2d(1), ind_2d(2), block_2d, summation=summation, & scale=scale) - CALL timestop(handle) END SUBROUTINE #:endfor #:endfor @@ -699,12 +678,9 @@ SUBROUTINE dbcsr_t_get_${ndim}$d_block_${dsuffix}$ (tensor, ind, sizes, block, f ${dtype}$, DIMENSION(:, :), ALLOCATABLE :: block_2d TYPE(nd_to_2d_mapping) :: map_blk LOGICAL :: tr - INTEGER :: handle CHARACTER(LEN=*), PARAMETER :: routineN = 'dbcsr_t_get_${ndim}$d_block_${dsuffix}$', & routineP = moduleN//':'//routineN - CALL timeset(routineN, handle) - NULLIFY (block_2d_ptr) ! convert block index @@ -721,7 +697,6 @@ SUBROUTINE dbcsr_t_get_${ndim}$d_block_${dsuffix}$ (tensor, ind, sizes, block, f CALL reshape_2d_to_nd_block(map_blk, block_2d, block) ENDIF - CALL timestop(handle) END SUBROUTINE #:endfor #:endfor From 68220c4be9e8bf3918bf17d4538152b11c52a22a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tiziano=20M=C3=BCller?= Date: Mon, 16 Mar 2020 15:55:20 +0100 Subject: [PATCH 02/34] fix typo in filetypes fixes #315 --- .pre-commit/check_header.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit/check_header.py b/.pre-commit/check_header.py index 55e8279a064..c005af68546 100755 --- a/.pre-commit/check_header.py +++ b/.pre-commit/check_header.py @@ -17,7 +17,7 @@ from contextlib import contextmanager TYPES = { - "c_cpp": [".c", "h", ".cc", ".hh", ".cxx", ".cpp", ".hpp", ".cu"], + "c_cpp": [".c", ".h", ".cc", ".hh", ".cxx", ".hxx", ".cpp", ".hpp", ".cu"], "python": [".py"], "fortran": [".F", ".f", ".f90", ".f03"], } From 2256de62f746e5f15483653d3c2909ebfdb5dec5 Mon Sep 17 00:00:00 2001 From: alazzaro Date: Wed, 18 Mar 2020 03:22:07 -0500 Subject: [PATCH 03/34] Add output of the memory pool for CPU --- src/core/dbcsr_config.F | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/core/dbcsr_config.F b/src/core/dbcsr_config.F index a599c7a9f8e..53ed4dd9f5b 100644 --- a/src/core/dbcsr_config.F +++ b/src/core/dbcsr_config.F @@ -385,6 +385,9 @@ SUBROUTINE dbcsr_print_config(unit_nr) "DBCSR| Multiplication size k stacks", dbcsr_cfg%nk_stacks ENDIF + WRITE (UNIT=unit_nr, FMT='(1X,A,T80,L1)') & + "DBCSR| Use memory pool for CPU allocation", dbcsr_cfg%use_mempools_cpu + IF (has_mpi) THEN IF (dbcsr_cfg%num_layers_3D < 2) THEN WRITE (UNIT=unit_nr, FMT='(1X,A,T75,A)') & From 7132ee1082b0297e7dd50417447f5ec331122b07 Mon Sep 17 00:00:00 2001 From: alazzaro Date: Wed, 18 Mar 2020 03:27:27 -0500 Subject: [PATCH 04/34] Promote CCE warning to an error --- cmake/CompilerConfiguration.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/CompilerConfiguration.cmake b/cmake/CompilerConfiguration.cmake index 03b0b436b7a..b51064b7e6f 100644 --- a/cmake/CompilerConfiguration.cmake +++ b/cmake/CompilerConfiguration.cmake @@ -20,9 +20,9 @@ elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "NAG") set(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -C=all") # some checks are not available with OpenMP endif () elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray") - set(CMAKE_Fortran_FLAGS "-f free -M3105") # -M3105: hide a false-positive warning about modified loop variables due to loop fusing + set(CMAKE_Fortran_FLAGS "-f free -M3105 -ME7212") # -M3105: hide a false-positive warning about modified loop variables due to loop fusing, promote warning 7212 to an error set(CMAKE_Fortran_FLAGS_RELEASE "-O2") - set(CMAKE_Fortran_FLAGS_DEBUG "-G2 -ME7212") # promote warning 7212 to an error + set(CMAKE_Fortran_FLAGS_DEBUG "-G2") set(CMAKE_Fortran_MODOUT_FLAG "-ef") # override to get lower-case module file names else () message(WARNING "\ From f3558fd4e7307f93a41fabfadc03a57ce74ad140 Mon Sep 17 00:00:00 2001 From: Patrick Seewald Date: Wed, 18 Mar 2020 20:57:51 +0100 Subject: [PATCH 05/34] MPI wrapper: remove timeset/timestop that cause significant overhead --- src/mpi/dbcsr_mpiwrap.F | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/src/mpi/dbcsr_mpiwrap.F b/src/mpi/dbcsr_mpiwrap.F index 1122511d29b..df9c32e87be 100644 --- a/src/mpi/dbcsr_mpiwrap.F +++ b/src/mpi/dbcsr_mpiwrap.F @@ -747,7 +747,7 @@ SUBROUTINE mp_reordering(mp_comm, mp_new_comm, ranks_order) ierr = 0 #if defined(__parallel) - CALL mpi_comm_group(mp_comm, oldgroup, ierr); + CALL mpi_comm_group(mp_comm, oldgroup, ierr); IF (ierr /= 0) CALL mp_stop(ierr, "mpi_comm_group @ mp_reordering") CALL mpi_group_incl(oldgroup, SIZE(ranks_order), ranks_order, newgroup, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_group_incl @ mp_reordering") @@ -1091,10 +1091,9 @@ RECURSIVE SUBROUTINE mp_environ_l(numtask, taskid, groupid) CHARACTER(LEN=*), PARAMETER :: routineN = 'mp_environ_l', routineP = moduleN//':'//routineN - INTEGER :: handle, ierr + INTEGER :: ierr ierr = 0 - CALL timeset(routineN, handle) IF (PRESENT(numtask)) numtask = 1 IF (PRESENT(taskid)) taskid = 0 @@ -1111,7 +1110,6 @@ RECURSIVE SUBROUTINE mp_environ_l(numtask, taskid, groupid) #else MARK_USED(groupid) #endif - CALL timestop(handle) END SUBROUTINE mp_environ_l @@ -1124,13 +1122,12 @@ SUBROUTINE mp_environ_c(numtask, dims, task_coor, groupid) CHARACTER(LEN=*), PARAMETER :: routineN = 'mp_environ_c', & routineP = moduleN//':'//routineN - INTEGER :: handle, ierr + INTEGER :: ierr #if defined(__parallel) LOGICAL, DIMENSION(2) :: periods #endif ierr = 0 - CALL timeset(routineN, handle) numtask = 1 task_coor = 0 dims = 1 @@ -1143,7 +1140,6 @@ SUBROUTINE mp_environ_c(numtask, dims, task_coor, groupid) #else MARK_USED(groupid) #endif - CALL timestop(handle) END SUBROUTINE mp_environ_c @@ -1155,10 +1151,9 @@ SUBROUTINE mp_environ_c2(comm, ndims, dims, task_coor, periods) CHARACTER(LEN=*), PARAMETER :: routineN = 'mp_environ_c2', routineP = moduleN//':'//routineN - INTEGER :: handle, ierr + INTEGER :: ierr ierr = 0 - CALL timeset(routineN, handle) task_coor = 0 dims = 1 @@ -1169,7 +1164,6 @@ SUBROUTINE mp_environ_c2(comm, ndims, dims, task_coor, periods) #else MARK_USED(comm) #endif - CALL timestop(handle) END SUBROUTINE mp_environ_c2 @@ -1235,10 +1229,9 @@ SUBROUTINE mp_cart_coords(comm, rank, coords) CHARACTER(LEN=*), PARAMETER :: routineN = 'mp_cart_coords', routineP = moduleN//':'//routineN - INTEGER :: handle, ierr, m + INTEGER :: ierr, m ierr = 0 - CALL timeset(routineN, handle) m = SIZE(coords) #if defined(__parallel) @@ -1249,7 +1242,6 @@ SUBROUTINE mp_cart_coords(comm, rank, coords) MARK_USED(rank) MARK_USED(comm) #endif - CALL timestop(handle) END SUBROUTINE mp_cart_coords @@ -1452,10 +1444,9 @@ SUBROUTINE mp_cart_rank(group, pos, rank) CHARACTER(LEN=*), PARAMETER :: routineN = 'mp_cart_rank', routineP = moduleN//':'//routineN - INTEGER :: handle, ierr + INTEGER :: ierr ierr = 0 - CALL timeset(routineN, handle) #if defined(__parallel) CALL mpi_cart_rank(group, pos, rank, ierr) @@ -1465,7 +1456,6 @@ SUBROUTINE mp_cart_rank(group, pos, rank) MARK_USED(group) MARK_USED(pos) #endif - CALL timestop(handle) END SUBROUTINE mp_cart_rank From 6cef0701a227675db8a8dbd0ad8ef25b0b7792db Mon Sep 17 00:00:00 2001 From: Patrick Seewald Date: Thu, 19 Mar 2020 11:16:01 +0100 Subject: [PATCH 06/34] MPI wrapper: clean up add_perf - make message size mandatory and remove all calls without message size - remove count argument, assuming it's always 1 --- src/mpi/dbcsr_mpiwrap.F | 145 ++++++++++++++++------------------------ 1 file changed, 56 insertions(+), 89 deletions(-) diff --git a/src/mpi/dbcsr_mpiwrap.F b/src/mpi/dbcsr_mpiwrap.F index df9c32e87be..f28c361b80e 100644 --- a/src/mpi/dbcsr_mpiwrap.F +++ b/src/mpi/dbcsr_mpiwrap.F @@ -764,7 +764,6 @@ SUBROUTINE mp_reordering(mp_comm, mp_new_comm, ranks_order) mp_new_comm = newcomm debug_comm_count = debug_comm_count + 1 - CALL add_perf(perf_id=1, count=1) #else MARK_USED(ranks_order) mp_new_comm = mp_comm @@ -949,11 +948,10 @@ SUBROUTINE describe_mp_perf_env(scr) CALL mp_perf_env_describe(perf_env, scr) END SUBROUTINE describe_mp_perf_env - SUBROUTINE add_perf(perf_id, count, msg_size) + SUBROUTINE add_perf(perf_id, msg_size) !! adds the performance informations of one call INTEGER, INTENT(in) :: perf_id - INTEGER, INTENT(in), OPTIONAL :: count - INTEGER, INTENT(in), OPTIONAL :: msg_size + INTEGER, INTENT(in) :: msg_size #if defined(__parallel) TYPE(mp_perf_type), POINTER :: mp_perf @@ -962,15 +960,10 @@ SUBROUTINE add_perf(perf_id, count, msg_size) IF (.NOT. ASSOCIATED(mp_perf_stack(stack_pointer)%mp_perf_env)) return mp_perf => mp_perf_stack(stack_pointer)%mp_perf_env%mp_perfs(perf_id) - IF (PRESENT(count)) THEN - mp_perf%count = mp_perf%count + count - END IF - IF (PRESENT(msg_size)) THEN - mp_perf%msg_size = mp_perf%msg_size + REAL(msg_size, dp) - END IF + mp_perf%count = mp_perf%count + 1 + mp_perf%msg_size = mp_perf%msg_size + REAL(msg_size, dp) #else MARK_USED(perf_id) - MARK_USED(count) MARK_USED(msg_size) #endif @@ -1039,7 +1032,6 @@ SUBROUTINE mp_sync(group) #if defined(__parallel) CALL mpi_barrier(group, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_barrier @ mp_sync") - CALL add_perf(perf_id=5, count=1) #else MARK_USED(group) #endif @@ -1065,7 +1057,6 @@ SUBROUTINE mp_isync(group, request) #if __MPI_VERSION > 2 CALL mpi_ibarrier(group, request, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_ibarrier @ mp_isync") - CALL add_perf(perf_id=26, count=1) #else MARK_USED(group) MARK_USED(request) @@ -1211,7 +1202,6 @@ SUBROUTINE mp_cart_create(comm_old, ndims, dims, pos, comm_cart) CALL mpi_cart_get(comm_cart, ndims, dims, period, pos, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_cart_get @ mp_cart_create") END IF - CALL add_perf(perf_id=1, count=1) #else pos(1:ndims) = 0 dims = 1 @@ -1474,11 +1464,8 @@ SUBROUTINE mp_wait(request) CALL timeset(routineN, handle) #if defined(__parallel) - CALL mpi_wait(request, MPI_STATUS_IGNORE, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_wait @ mp_wait") - - CALL add_perf(perf_id=9, count=1) #else MARK_USED(request) #endif @@ -1510,7 +1497,6 @@ SUBROUTINE mp_waitall_1(requests) CALL mpi_waitall_internal(count, requests, status, ierr) ! MPI_STATUSES_IGNORE openmpi workaround IF (ierr /= 0) CALL mp_stop(ierr, "mpi_waitall @ mp_waitall_1") DEALLOCATE (status) - CALL add_perf(perf_id=9, count=1) #else MARK_USED(requests) #endif @@ -1540,8 +1526,6 @@ SUBROUTINE mp_waitall_2(requests) CALL mpi_waitall_internal(count, requests, status, ierr) ! MPI_STATUSES_IGNORE openmpi workaround IF (ierr /= 0) CALL mp_stop(ierr, "mpi_waitall @ mp_waitall_2") DEALLOCATE (status) - - CALL add_perf(perf_id=9, count=1) #else MARK_USED(requests) #endif @@ -1585,8 +1569,6 @@ SUBROUTINE mp_waitany(requests, completed) CALL mpi_waitany(count, requests, completed, MPI_STATUS_IGNORE, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_waitany @ mp_waitany") - - CALL add_perf(perf_id=9, count=1) #else MARK_USED(requests) completed = 1 @@ -1751,7 +1733,6 @@ SUBROUTINE mp_comm_split_direct(comm, sub_comm, color, key) CALL mpi_comm_split(comm, color, my_key, sub_comm, ierr) debug_comm_count = debug_comm_count + 1 IF (ierr /= mpi_success) CALL mp_stop(ierr, routineN) - CALL add_perf(perf_id=10, count=1) #else CALL mp_comm_dup(comm, sub_comm) MARK_USED(color) @@ -1870,8 +1851,6 @@ SUBROUTINE mp_comm_split(comm, sub_comm, ngroups, group_distribution, & CALL mpi_comm_split(comm, color, 0, sub_comm, ierr) debug_comm_count = debug_comm_count + 1 IF (ierr /= mpi_success) CALL mp_stop(ierr, "in "//routineP//" split") - - CALL add_perf(perf_id=10, count=1) #else CALL mp_comm_dup(comm, sub_comm) group_distribution(0) = 0 @@ -1953,7 +1932,7 @@ SUBROUTINE mp_bcast_b(msg, source, gid) #if defined(__parallel) CALL mpi_bcast(msg, msglen, MPI_LOGICAL, source, gid, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_bcast @ "//routineN) - CALL add_perf(perf_id=2, count=1, msg_size=msglen*loglen) + CALL add_perf(perf_id=2, msg_size=msglen*loglen) #else MARK_USED(msg) MARK_USED(source) @@ -1977,7 +1956,7 @@ SUBROUTINE mp_bcast_bv(msg, source, gid) #if defined(__parallel) CALL mpi_bcast(msg, msglen, MPI_LOGICAL, source, gid, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_bcast @ "//routineN) - CALL add_perf(perf_id=2, count=1, msg_size=msglen*loglen) + CALL add_perf(perf_id=2, msg_size=msglen*loglen) #else MARK_USED(source) MARK_USED(gid) @@ -2028,7 +2007,7 @@ SUBROUTINE mp_isend_bv(msgin, dest, comm, request, tag) END IF IF (ierr /= 0) CALL mp_stop(ierr, "mpi_isend @ "//routineN) - CALL add_perf(perf_id=11, count=1, msg_size=msglen*loglen) + CALL add_perf(perf_id=11, msg_size=msglen*loglen) #else DBCSR_ABORT("mp_isend called in non parallel case") MARK_USED(msgin) @@ -2084,7 +2063,7 @@ SUBROUTINE mp_irecv_bv(msgout, source, comm, request, tag) END IF IF (ierr /= 0) CALL mp_stop(ierr, "mpi_ircv @ "//routineN) - CALL add_perf(perf_id=12, count=1, msg_size=msglen*loglen) + CALL add_perf(perf_id=12, msg_size=msglen*loglen) #else DBCSR_ABORT("mp_irecv called in non parallel case") MARK_USED(msgout) @@ -2135,7 +2114,7 @@ SUBROUTINE mp_bcast_av(msg, source, gid) msg(i:i) = CHAR(imsg(i)) END DO DEALLOCATE (imsg) - CALL add_perf(perf_id=2, count=1, msg_size=msglen*charlen) + CALL add_perf(perf_id=2, msg_size=msglen*charlen) #else MARK_USED(msg) MARK_USED(source) @@ -2196,7 +2175,7 @@ SUBROUTINE mp_bcast_am(msg, source, gid) END DO DEALLOCATE (imsg) DEALLOCATE (imsglen) - CALL add_perf(perf_id=2, count=1, msg_size=msglen*charlen*msgsiz) + CALL add_perf(perf_id=2, msg_size=msglen*charlen*msgsiz) #else MARK_USED(msg) MARK_USED(source) @@ -2243,7 +2222,7 @@ SUBROUTINE mp_minloc_dv(msg, gid) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_allreduce @ "//routineN) msg = res DEALLOCATE (res) - CALL add_perf(perf_id=3, count=1, msg_size=msglen*real_8_size) + CALL add_perf(perf_id=3, msg_size=msglen*real_8_size) #else MARK_USED(msg) MARK_USED(gid) @@ -2287,7 +2266,7 @@ SUBROUTINE mp_maxloc_dv(msg, gid) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_allreduce @ "//routineN) msg = res DEALLOCATE (res) - CALL add_perf(perf_id=3, count=1, msg_size=msglen*real_8_size) + CALL add_perf(perf_id=3, msg_size=msglen*real_8_size) #else MARK_USED(msg) MARK_USED(gid) @@ -2899,8 +2878,6 @@ SUBROUTINE mp_win_free(win) CALL mpi_win_free(win, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_win_free @ "//routineN) - - CALL add_perf(perf_id=21, count=1) #else MARK_USED(win) win = mp_win_null @@ -2955,8 +2932,6 @@ SUBROUTINE mp_win_lock_all(win) DBCSR_ABORT("mp_win_lock_all requires MPI-3 standard") #endif IF (ierr /= 0) CALL mp_stop(ierr, "mpi_win_lock_all @ "//routineN) - - CALL add_perf(perf_id=19, count=1) #else MARK_USED(win) #endif @@ -2984,8 +2959,6 @@ SUBROUTINE mp_win_unlock_all(win) DBCSR_ABORT("mp_win_unlock_all requires MPI-3 standard") #endif IF (ierr /= 0) CALL mp_stop(ierr, "mpi_win_unlock_all @ "//routineN) - - CALL add_perf(perf_id=19, count=1) #else MARK_USED(win) #endif @@ -3038,7 +3011,7 @@ SUBROUTINE mp_alltoall_${nametype1}$11v(sb, scount, sdispl, rb, rcount, rdispl, rb, rcount, rdispl, ${mpi_type1}$, group, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_alltoallv @ "//routineN) msglen = SUM(scount) + SUM(rcount) - CALL add_perf(perf_id=6, count=1, msg_size=msglen*${bytes1}$) + CALL add_perf(perf_id=6, msg_size=msglen*${bytes1}$) #else MARK_USED(group) MARK_USED(scount) @@ -3092,7 +3065,7 @@ SUBROUTINE mp_alltoall_${nametype1}$ (sb, rb, count, group) CALL mpi_comm_size(group, np, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_comm_size @ "//routineN) msglen = 2*count*np - CALL add_perf(perf_id=6, count=1, msg_size=msglen*${bytes1}$) + CALL add_perf(perf_id=6, msg_size=msglen*${bytes1}$) #else MARK_USED(count) MARK_USED(group) @@ -3128,7 +3101,7 @@ SUBROUTINE mp_alltoall_${nametype1}$22(sb, rb, count, group) CALL mpi_comm_size(group, np, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_comm_size @ "//routineN) msglen = 2*SIZE(sb)*np - CALL add_perf(perf_id=6, count=1, msg_size=msglen*${bytes1}$) + CALL add_perf(perf_id=6, msg_size=msglen*${bytes1}$) #else MARK_USED(count) MARK_USED(group) @@ -3166,7 +3139,7 @@ SUBROUTINE mp_alltoall_${nametype1}$44(sb, rb, count, group) CALL mpi_comm_size(group, np, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_comm_size @ "//routineN) msglen = 2*count*np - CALL add_perf(perf_id=6, count=1, msg_size=msglen*${bytes1}$) + CALL add_perf(perf_id=6, msg_size=msglen*${bytes1}$) #else MARK_USED(count) MARK_USED(group) @@ -3201,7 +3174,7 @@ SUBROUTINE mp_send_${nametype1}$ (msg, dest, tag, gid) #if defined(__parallel) CALL mpi_send(msg, msglen, ${mpi_type1}$, dest, tag, gid, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_send @ "//routineN) - CALL add_perf(perf_id=13, count=1, msg_size=msglen*${bytes1}$) + CALL add_perf(perf_id=13, msg_size=msglen*${bytes1}$) #else MARK_USED(msg) MARK_USED(dest) @@ -3233,7 +3206,7 @@ SUBROUTINE mp_send_${nametype1}$v(msg, dest, tag, gid) #if defined(__parallel) CALL mpi_send(msg, msglen, ${mpi_type1}$, dest, tag, gid, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_send @ "//routineN) - CALL add_perf(perf_id=13, count=1, msg_size=msglen*${bytes1}$) + CALL add_perf(perf_id=13, msg_size=msglen*${bytes1}$) #else MARK_USED(msg) MARK_USED(dest) @@ -3275,7 +3248,7 @@ SUBROUTINE mp_recv_${nametype1}$ (msg, source, tag, gid) ALLOCATE (status(MPI_STATUS_SIZE)) CALL mpi_recv(msg, msglen, ${mpi_type1}$, source, tag, gid, status, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_recv @ "//routineN) - CALL add_perf(perf_id=14, count=1, msg_size=msglen*${bytes1}$) + CALL add_perf(perf_id=14, msg_size=msglen*${bytes1}$) source = status(MPI_SOURCE) tag = status(MPI_TAG) DEALLOCATE (status) @@ -3315,7 +3288,7 @@ SUBROUTINE mp_recv_${nametype1}$v(msg, source, tag, gid) ALLOCATE (status(MPI_STATUS_SIZE)) CALL mpi_recv(msg, msglen, ${mpi_type1}$, source, tag, gid, status, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_recv @ "//routineN) - CALL add_perf(perf_id=14, count=1, msg_size=msglen*${bytes1}$) + CALL add_perf(perf_id=14, msg_size=msglen*${bytes1}$) source = status(MPI_SOURCE) tag = status(MPI_TAG) DEALLOCATE (status) @@ -3354,7 +3327,7 @@ SUBROUTINE mp_bcast_${nametype1}$ (msg, source, gid) #if defined(__parallel) CALL mpi_bcast(msg, msglen, ${mpi_type1}$, source, gid, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_bcast @ "//routineN) - CALL add_perf(perf_id=2, count=1, msg_size=msglen*${bytes1}$) + CALL add_perf(perf_id=2, msg_size=msglen*${bytes1}$) #else MARK_USED(msg) MARK_USED(source) @@ -3389,7 +3362,7 @@ SUBROUTINE mp_ibcast_${nametype1}$ (msg, source, gid, request) #if __MPI_VERSION > 2 CALL mpi_ibcast(msg, msglen, ${mpi_type1}$, source, gid, request, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_ibcast @ "//routineN) - CALL add_perf(perf_id=22, count=1, msg_size=msglen*${bytes1}$) + CALL add_perf(perf_id=22, msg_size=msglen*${bytes1}$) #else MARK_USED(msg) MARK_USED(source) @@ -3426,7 +3399,7 @@ SUBROUTINE mp_bcast_${nametype1}$v(msg, source, gid) #if defined(__parallel) CALL mpi_bcast(msg, msglen, ${mpi_type1}$, source, gid, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_bcast @ "//routineN) - CALL add_perf(perf_id=2, count=1, msg_size=msglen*${bytes1}$) + CALL add_perf(perf_id=2, msg_size=msglen*${bytes1}$) #else MARK_USED(source) MARK_USED(gid) @@ -3456,7 +3429,7 @@ SUBROUTINE mp_ibcast_${nametype1}$v(msg, source, gid, request) #if __MPI_VERSION > 2 CALL mpi_ibcast(msg, msglen, ${mpi_type1}$, source, gid, request, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_ibcast @ "//routineN) - CALL add_perf(perf_id=22, count=1, msg_size=msglen*${bytes1}$) + CALL add_perf(perf_id=22, msg_size=msglen*${bytes1}$) #else MARK_USED(source) MARK_USED(gid) @@ -3491,7 +3464,7 @@ SUBROUTINE mp_bcast_${nametype1}$m(msg, source, gid) #if defined(__parallel) CALL mpi_bcast(msg, msglen, ${mpi_type1}$, source, gid, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_bcast @ "//routineN) - CALL add_perf(perf_id=2, count=1, msg_size=msglen*${bytes1}$) + CALL add_perf(perf_id=2, msg_size=msglen*${bytes1}$) #else MARK_USED(source) MARK_USED(gid) @@ -3519,7 +3492,7 @@ SUBROUTINE mp_bcast_${nametype1}$3(msg, source, gid) #if defined(__parallel) CALL mpi_bcast(msg, msglen, ${mpi_type1}$, source, gid, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_bcast @ "//routineN) - CALL add_perf(perf_id=2, count=1, msg_size=msglen*${bytes1}$) + CALL add_perf(perf_id=2, msg_size=msglen*${bytes1}$) #else MARK_USED(source) MARK_USED(gid) @@ -3550,7 +3523,7 @@ SUBROUTINE mp_sum_${nametype1}$ (msg, gid) #if defined(__parallel) CALL mpi_allreduce(MPI_IN_PLACE, msg, msglen, ${mpi_type1}$, MPI_SUM, gid, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_allreduce @ "//routineN) - CALL add_perf(perf_id=3, count=1, msg_size=msglen*${bytes1}$) + CALL add_perf(perf_id=3, msg_size=msglen*${bytes1}$) #else MARK_USED(msg) MARK_USED(gid) @@ -3583,7 +3556,7 @@ SUBROUTINE mp_sum_${nametype1}$v(msg, gid) CALL mpi_allreduce(MPI_IN_PLACE, msg, msglen, ${mpi_type1}$, MPI_SUM, gid, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_allreduce @ "//routineN) END IF - CALL add_perf(perf_id=3, count=1, msg_size=msglen*${bytes1}$) + CALL add_perf(perf_id=3, msg_size=msglen*${bytes1}$) #else MARK_USED(msg) MARK_USED(gid) @@ -3620,7 +3593,7 @@ SUBROUTINE mp_isum_${nametype1}$v(msg, gid, request) ELSE request = mp_request_null ENDIF - CALL add_perf(perf_id=23, count=1, msg_size=msglen*${bytes1}$) + CALL add_perf(perf_id=23, msg_size=msglen*${bytes1}$) #else MARK_USED(msg) MARK_USED(msglen) @@ -3668,7 +3641,7 @@ SUBROUTINE mp_sum_${nametype1}$m(msg, gid) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_allreduce @ "//routineN) END IF ENDDO - CALL add_perf(perf_id=3, count=1, msg_size=msglensum*${bytes1}$) + CALL add_perf(perf_id=3, msg_size=msglensum*${bytes1}$) #else MARK_USED(msg) MARK_USED(gid) @@ -3698,7 +3671,7 @@ SUBROUTINE mp_sum_${nametype1}$m3(msg, gid) CALL mpi_allreduce(MPI_IN_PLACE, msg, msglen, ${mpi_type1}$, MPI_SUM, gid, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_allreduce @ "//routineN) END IF - CALL add_perf(perf_id=3, count=1, msg_size=msglen*${bytes1}$) + CALL add_perf(perf_id=3, msg_size=msglen*${bytes1}$) #else MARK_USED(gid) #endif @@ -3728,7 +3701,7 @@ SUBROUTINE mp_sum_${nametype1}$m4(msg, gid) CALL mpi_allreduce(MPI_IN_PLACE, msg, msglen, ${mpi_type1}$, MPI_SUM, gid, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_allreduce @ "//routineN) END IF - CALL add_perf(perf_id=3, count=1, msg_size=msglen*${bytes1}$) + CALL add_perf(perf_id=3, msg_size=msglen*${bytes1}$) #else MARK_USED(gid) #endif @@ -3774,7 +3747,7 @@ SUBROUTINE mp_sum_root_${nametype1}$v(msg, root, gid) END IF DEALLOCATE (res) END IF - CALL add_perf(perf_id=3, count=1, msg_size=msglen*${bytes1}$) + CALL add_perf(perf_id=3, msg_size=msglen*${bytes1}$) #else MARK_USED(root) MARK_USED(gid) @@ -3818,7 +3791,7 @@ SUBROUTINE mp_sum_root_${nametype1}$m(msg, root, gid) END IF DEALLOCATE (res) END IF - CALL add_perf(perf_id=3, count=1, msg_size=msglen*${bytes1}$) + CALL add_perf(perf_id=3, msg_size=msglen*${bytes1}$) #else MARK_USED(root) MARK_USED(gid) @@ -3855,7 +3828,7 @@ SUBROUTINE mp_sum_partial_${nametype1}$m(msg, res, gid) CALL mpi_scan(msg, res, msglen, ${mpi_type1}$, MPI_SUM, gid, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_scan @ "//routineN) END IF - CALL add_perf(perf_id=3, count=1, msg_size=msglen*${bytes1}$) + CALL add_perf(perf_id=3, msg_size=msglen*${bytes1}$) ! perf_id is same as for other summation routines #else res = msg @@ -3887,7 +3860,7 @@ SUBROUTINE mp_max_${nametype1}$ (msg, gid) #if defined(__parallel) CALL mpi_allreduce(MPI_IN_PLACE, msg, msglen, ${mpi_type1}$, MPI_MAX, gid, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_allreduce @ "//routineN) - CALL add_perf(perf_id=3, count=1, msg_size=msglen*${bytes1}$) + CALL add_perf(perf_id=3, msg_size=msglen*${bytes1}$) #else MARK_USED(msg) MARK_USED(gid) @@ -3916,7 +3889,7 @@ SUBROUTINE mp_max_${nametype1}$v(msg, gid) #if defined(__parallel) CALL mpi_allreduce(MPI_IN_PLACE, msg, msglen, ${mpi_type1}$, MPI_MAX, gid, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_allreduce @ "//routineN) - CALL add_perf(perf_id=3, count=1, msg_size=msglen*${bytes1}$) + CALL add_perf(perf_id=3, msg_size=msglen*${bytes1}$) #else MARK_USED(gid) #endif @@ -3946,7 +3919,7 @@ SUBROUTINE mp_min_${nametype1}$ (msg, gid) #if defined(__parallel) CALL mpi_allreduce(MPI_IN_PLACE, msg, msglen, ${mpi_type1}$, MPI_MIN, gid, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_allreduce @ "//routineN) - CALL add_perf(perf_id=3, count=1, msg_size=msglen*${bytes1}$) + CALL add_perf(perf_id=3, msg_size=msglen*${bytes1}$) #else MARK_USED(msg) MARK_USED(gid) @@ -3978,7 +3951,7 @@ SUBROUTINE mp_min_${nametype1}$v(msg, gid) #if defined(__parallel) CALL mpi_allreduce(MPI_IN_PLACE, msg, msglen, ${mpi_type1}$, MPI_MIN, gid, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_allreduce @ "//routineN) - CALL add_perf(perf_id=3, count=1, msg_size=msglen*${bytes1}$) + CALL add_perf(perf_id=3, msg_size=msglen*${bytes1}$) #else MARK_USED(gid) #endif @@ -4009,7 +3982,7 @@ SUBROUTINE mp_prod_${nametype1}$ (msg, gid) #if defined(__parallel) CALL mpi_allreduce(MPI_IN_PLACE, msg, msglen, ${mpi_type1}$, MPI_PROD, gid, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_allreduce @ "//routineN) - CALL add_perf(perf_id=3, count=1, msg_size=msglen*${bytes1}$) + CALL add_perf(perf_id=3, msg_size=msglen*${bytes1}$) #else MARK_USED(msg) MARK_USED(gid) @@ -4044,7 +4017,7 @@ SUBROUTINE mp_scatter_${nametype1}$v(msg_scatter, msg, root, gid) CALL mpi_scatter(msg_scatter, msglen, ${mpi_type1}$, msg, & msglen, ${mpi_type1}$, root, gid, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_scatter @ "//routineN) - CALL add_perf(perf_id=4, count=1, msg_size=msglen*${bytes1}$) + CALL add_perf(perf_id=4, msg_size=msglen*${bytes1}$) #else MARK_USED(root) MARK_USED(gid) @@ -4081,7 +4054,7 @@ SUBROUTINE mp_iscatter_${nametype1}$ (msg_scatter, msg, root, gid, request) CALL mpi_iscatter(msg_scatter, msglen, ${mpi_type1}$, msg, & msglen, ${mpi_type1}$, root, gid, request, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_iscatter @ "//routineN) - CALL add_perf(perf_id=24, count=1, msg_size=1*${bytes1}$) + CALL add_perf(perf_id=24, msg_size=1*${bytes1}$) #else MARK_USED(msg_scatter) MARK_USED(msg) @@ -4127,7 +4100,7 @@ SUBROUTINE mp_iscatter_${nametype1}$v2(msg_scatter, msg, root, gid, request) CALL mpi_iscatter(msg_scatter, msglen, ${mpi_type1}$, msg, & msglen, ${mpi_type1}$, root, gid, request, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_iscatter @ "//routineN) - CALL add_perf(perf_id=24, count=1, msg_size=1*${bytes1}$) + CALL add_perf(perf_id=24, msg_size=1*${bytes1}$) #else MARK_USED(msg_scatter) MARK_USED(msg) @@ -4173,7 +4146,7 @@ SUBROUTINE mp_iscatterv_${nametype1}$v(msg_scatter, sendcounts, displs, msg, rec CALL mpi_iscatterv(msg_scatter, sendcounts, displs, ${mpi_type1}$, msg, & recvcount, ${mpi_type1}$, root, gid, request, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_iscatterv @ "//routineN) - CALL add_perf(perf_id=24, count=1, msg_size=1*${bytes1}$) + CALL add_perf(perf_id=24, msg_size=1*${bytes1}$) #else MARK_USED(msg_scatter) MARK_USED(sendcounts) @@ -4224,7 +4197,7 @@ SUBROUTINE mp_gather_${nametype1}$ (msg, msg_gather, root, gid) CALL mpi_gather(msg, msglen, ${mpi_type1}$, msg_gather, & msglen, ${mpi_type1}$, root, gid, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_gather @ "//routineN) - CALL add_perf(perf_id=4, count=1, msg_size=msglen*${bytes1}$) + CALL add_perf(perf_id=4, msg_size=msglen*${bytes1}$) #else MARK_USED(root) MARK_USED(gid) @@ -4261,7 +4234,7 @@ SUBROUTINE mp_gather_${nametype1}$v(msg, msg_gather, root, gid) CALL mpi_gather(msg, msglen, ${mpi_type1}$, msg_gather, & msglen, ${mpi_type1}$, root, gid, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_gather @ "//routineN) - CALL add_perf(perf_id=4, count=1, msg_size=msglen*${bytes1}$) + CALL add_perf(perf_id=4, msg_size=msglen*${bytes1}$) #else MARK_USED(root) MARK_USED(gid) @@ -4298,7 +4271,7 @@ SUBROUTINE mp_gather_${nametype1}$m(msg, msg_gather, root, gid) CALL mpi_gather(msg, msglen, ${mpi_type1}$, msg_gather, & msglen, ${mpi_type1}$, root, gid, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_gather @ "//routineN) - CALL add_perf(perf_id=4, count=1, msg_size=msglen*${bytes1}$) + CALL add_perf(perf_id=4, msg_size=msglen*${bytes1}$) #else MARK_USED(root) MARK_USED(gid) @@ -4348,7 +4321,6 @@ SUBROUTINE mp_gatherv_${nametype1}$v(sendbuf, recvbuf, recvcounts, displs, root, root, comm, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_gatherv @ "//routineN) CALL add_perf(perf_id=4, & - count=1, & msg_size=sendcount*${bytes1}$) #else MARK_USED(recvcounts) @@ -4398,7 +4370,6 @@ SUBROUTINE mp_igatherv_${nametype1}$v(sendbuf, sendcount, recvbuf, recvcounts, d root, comm, request, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_igatherv @ "//routineN) CALL add_perf(perf_id=24, & - count=1, & msg_size=sendcount*${bytes1}$) #else MARK_USED(sendbuf) @@ -5158,7 +5129,7 @@ SUBROUTINE mp_sum_scatter_${nametype1}$v(msgout, msgin, rcount, gid) gid, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_reduce_scatter @ "//routineN) - CALL add_perf(perf_id=3, count=1, & + CALL add_perf(perf_id=3, & msg_size=rcount(1)*2*${bytes1}$) #else MARK_USED(rcount) @@ -5201,7 +5172,7 @@ SUBROUTINE mp_sendrecv_${nametype1}$v(msgin, dest, msgout, source, comm) CALL mpi_sendrecv(msgin, msglen_in, ${mpi_type1}$, dest, send_tag, msgout, & msglen_out, ${mpi_type1}$, source, recv_tag, comm, MPI_STATUS_IGNORE, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_sendrecv @ "//routineN) - CALL add_perf(perf_id=7, count=1, & + CALL add_perf(perf_id=7, & msg_size=(msglen_in + msglen_out)*${bytes1}$/2) #else MARK_USED(dest) @@ -5257,7 +5228,7 @@ SUBROUTINE mp_isendrecv_${nametype1}$ (msgin, dest, msgout, source, comm, send_r comm, send_request, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_isend @ "//routineN) - CALL add_perf(perf_id=8, count=1, msg_size=2*${bytes1}$) + CALL add_perf(perf_id=8, msg_size=2*${bytes1}$) #else MARK_USED(dest) MARK_USED(source) @@ -5331,7 +5302,7 @@ SUBROUTINE mp_isendrecv_${nametype1}$v(msgin, dest, msgout, source, comm, send_r IF (ierr /= 0) CALL mp_stop(ierr, "mpi_isend @ "//routineN) msglen = (msglen + SIZE(msgout, 1) + 1)/2 - CALL add_perf(perf_id=8, count=1, msg_size=msglen*${bytes1}$) + CALL add_perf(perf_id=8, msg_size=msglen*${bytes1}$) #else MARK_USED(dest) MARK_USED(source) @@ -5382,7 +5353,7 @@ SUBROUTINE mp_isend_${nametype1}$v(msgin, dest, comm, request, tag) END IF IF (ierr /= 0) CALL mp_stop(ierr, "mpi_isend @ "//routineN) - CALL add_perf(perf_id=11, count=1, msg_size=msglen*${bytes1}$) + CALL add_perf(perf_id=11, msg_size=msglen*${bytes1}$) #else MARK_USED(msgin) MARK_USED(dest) @@ -5436,7 +5407,7 @@ SUBROUTINE mp_isend_${nametype1}$m2(msgin, dest, comm, request, tag) END IF IF (ierr /= 0) CALL mp_stop(ierr, "mpi_isend @ "//routineN) - CALL add_perf(perf_id=11, count=1, msg_size=msglen*${bytes1}$) + CALL add_perf(perf_id=11, msg_size=msglen*${bytes1}$) #else MARK_USED(msgin) MARK_USED(dest) @@ -5488,7 +5459,7 @@ SUBROUTINE mp_irecv_${nametype1}$v(msgout, source, comm, request, tag) END IF IF (ierr /= 0) CALL mp_stop(ierr, "mpi_irecv @ "//routineN) - CALL add_perf(perf_id=12, count=1, msg_size=msglen*${bytes1}$) + CALL add_perf(perf_id=12, msg_size=msglen*${bytes1}$) #else DBCSR_ABORT("mp_irecv called in non parallel case") MARK_USED(msgout) @@ -5541,7 +5512,7 @@ SUBROUTINE mp_irecv_${nametype1}$m2(msgout, source, comm, request, tag) END IF IF (ierr /= 0) CALL mp_stop(ierr, "mpi_irecv @ "//routineN) - CALL add_perf(perf_id=12, count=1, msg_size=msglen*${bytes1}$) + CALL add_perf(perf_id=12, msg_size=msglen*${bytes1}$) #else MARK_USED(msgout) MARK_USED(source) @@ -5584,8 +5555,6 @@ SUBROUTINE mp_win_create_${nametype1}$v(base, comm, win) CALL mpi_win_create(foo, len, ${bytes1}$, MPI_INFO_NULL, comm, win, ierr) ENDIF IF (ierr /= 0) CALL mp_stop(ierr, "mpi_win_create @ "//routineN) - - CALL add_perf(perf_id=20, count=1) #else MARK_USED(base) MARK_USED(comm) @@ -5675,7 +5644,7 @@ SUBROUTINE mp_rget_${nametype1}$v(base, source, win, win_data, myproc, disp, req #endif IF (ierr /= 0) CALL mp_stop(ierr, "mpi_rget @ "//routineN) - CALL add_perf(perf_id=25, count=1, msg_size=SIZE(base)*${bytes1}$) + CALL add_perf(perf_id=25, msg_size=SIZE(base)*${bytes1}$) #else MARK_USED(source) MARK_USED(win) @@ -5756,7 +5725,6 @@ SUBROUTINE mp_allocate_${nametype1}$ (DATA, len, stat) CALL mp_alloc_mem(DATA, len, stat=ierr) IF (ierr /= 0 .AND. .NOT. PRESENT(stat)) & CALL mp_stop(ierr, "mpi_alloc_mem @ "//routineN) - CALL add_perf(perf_id=15, count=1) #else ALLOCATE (DATA(len), stat=ierr) IF (ierr /= 0 .AND. .NOT. PRESENT(stat)) & @@ -5789,7 +5757,6 @@ SUBROUTINE mp_deallocate_${nametype1}$ (DATA, stat) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_free_mem @ "//routineN) ENDIF NULLIFY (DATA) - CALL add_perf(perf_id=15, count=1) #else DEALLOCATE (DATA) IF (PRESENT(stat)) stat = 0 From 13db0e2a6a9168481f7f0e985c64585fc25bc267 Mon Sep 17 00:00:00 2001 From: Patrick Seewald Date: Mon, 16 Mar 2020 16:14:30 +0100 Subject: [PATCH 07/34] Tensors: optimize block and index operations - if no index transpositions take place, tensor blocks have same representation in memory as matrix blocks and reshape can be avoided - split blocks before index permutation in dbcsr_t_copy to avoid reshape - Replace costly reshape in dbcsr_put_block2d by pointer bounds remapping - specialized instead of generic functions for index mapping - Reduce overhead of often-used operations on small arrays by using static instead of dynamic arrays, and by using loops instead of vector subscripts - Remove unnecessary asserts --- src/block/dbcsr_block_access.f90 | 27 ++-- src/tas/dbcsr_tas_split.F | 35 +---- src/tensors/dbcsr_allocate_wrap.F | 37 +++-- src/tensors/dbcsr_tensor.F | 29 ++-- src/tensors/dbcsr_tensor.fypp | 5 + src/tensors/dbcsr_tensor_block.F | 152 ++++++++------------ src/tensors/dbcsr_tensor_index.F | 217 ++++++++++++++++------------- src/tensors/dbcsr_tensor_reshape.F | 2 +- src/tensors/dbcsr_tensor_split.F | 38 +++-- src/tensors/dbcsr_tensor_test.F | 4 +- src/tensors/dbcsr_tensor_types.F | 49 +++---- 11 files changed, 293 insertions(+), 302 deletions(-) diff --git a/src/block/dbcsr_block_access.f90 b/src/block/dbcsr_block_access.f90 index 2be8513f7c9..2338e18364c 100644 --- a/src/block/dbcsr_block_access.f90 +++ b/src/block/dbcsr_block_access.f90 @@ -252,7 +252,8 @@ SUBROUTINE dbcsr_put_block2d_${nametype1}$ (matrix, row, col, block, lb_row_col, INTEGER, INTENT(IN) :: row, col !! the row !! the column - ${type1}$, DIMENSION(:, :), INTENT(IN) :: block + ${type1}$, DIMENSION(:, :), INTENT(IN), & + CONTIGUOUS, TARGET :: block !! the block to put INTEGER, DIMENSION(2), OPTIONAL, INTENT(INOUT) :: lb_row_col LOGICAL, INTENT(IN), OPTIONAL :: transposed, summation @@ -262,28 +263,16 @@ SUBROUTINE dbcsr_put_block2d_${nametype1}$ (matrix, row, col, block, lb_row_col, ${type1}$, INTENT(IN), OPTIONAL :: scale !! scale the block being added + ${type1}$, DIMENSION(:), POINTER :: block_1d + CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_put_block2d_${nametype1}$', & routineP = moduleN//':'//routineN - LOGICAL :: tr, do_sum + NULLIFY(block_1d) - IF (PRESENT(transposed)) THEN - tr = transposed - ELSE - tr = .FALSE. - ENDIF - IF (PRESENT(summation)) THEN - do_sum = summation - ELSE - do_sum = .FALSE. - ENDIF - IF (PRESENT(scale)) THEN - CALL dbcsr_put_block(matrix, row, col, & - RESHAPE(block, (/SIZE(block)/)), lb_row_col, tr, do_sum, flop, scale) - ELSE - CALL dbcsr_put_block(matrix, row, col, & - RESHAPE(block, (/SIZE(block)/)), lb_row_col, tr, do_sum, flop) - ENDIF + block_1d(1:SIZE(block)) => block + + CALL dbcsr_put_block(matrix, row, col, block_1d, lb_row_col, transposed, summation, flop, scale) END SUBROUTINE dbcsr_put_block2d_${nametype1}$ SUBROUTINE dbcsr_put_block_${nametype1}$ (matrix, row, col, block, lb_row_col, transposed, & diff --git a/src/tas/dbcsr_tas_split.F b/src/tas/dbcsr_tas_split.F index 9e80b0a7e28..95a2e139d2f 100644 --- a/src/tas/dbcsr_tas_split.F +++ b/src/tas/dbcsr_tas_split.F @@ -43,7 +43,6 @@ MODULE dbcsr_tas_split dbcsr_tas_create_split_rows_or_cols, & group_to_mrowcol, & group_to_world_proc_map, & - mrowcol_to_group, & rowsplit, & world_to_group_proc_map, & accept_pgrid_dims, & @@ -550,13 +549,6 @@ SUBROUTINE block_index_local_to_global(info, dist, row_group, column_group, & !! global block row !! global block column - IF (PRESENT(row)) THEN - DBCSR_ASSERT(PRESENT(row_group)) - ENDIF - IF (PRESENT(column)) THEN - DBCSR_ASSERT(PRESENT(column_group)) - ENDIF - SELECT CASE (info%split_rowcol) CASE (rowsplit) ASSOCIATE (rows=>dist%local_rowcols) @@ -580,40 +572,17 @@ SUBROUTINE block_index_global_to_local(info, dist, row, column, row_group, colum CHARACTER(LEN=*), PARAMETER :: routineN = 'block_index_global_to_local', & routineP = moduleN//':'//routineN - IF (PRESENT(row_group)) THEN - DBCSR_ASSERT(PRESENT(row)) - ENDIF - IF (PRESENT(column_group)) THEN - DBCSR_ASSERT(PRESENT(column)) - ENDIF - SELECT CASE (info%split_rowcol) CASE (rowsplit) - IF (PRESENT(row_group)) CALL mrowcol_to_group(dist, row, row_group) + IF (PRESENT(row_group)) row_group = i8_bsearch(dist%local_rowcols, row) IF (PRESENT(column_group)) column_group = INT(column) CASE (colsplit) IF (PRESENT(row_group)) row_group = INT(row) - IF (PRESENT(column_group)) CALL mrowcol_to_group(dist, column, column_group) + IF (PRESENT(column_group)) column_group = i8_bsearch(dist%local_rowcols, column) END SELECT END SUBROUTINE - SUBROUTINE mrowcol_to_group(dist, rowcol, rowcol_group) - !! map matrix rows/column to group local rows/column - - CLASS(dbcsr_tas_distribution_type), INTENT(IN) :: dist - INTEGER(KIND=int_8), INTENT(IN) :: rowcol - !! global row/column to map - INTEGER, INTENT(OUT) :: rowcol_group - !! group local row/column - CHARACTER(LEN=*), PARAMETER :: routineN = 'mrowcol_to_group', & - routineP = moduleN//':'//routineN - - ASSOCIATE (rowcols=>dist%local_rowcols) - rowcol_group = i8_bsearch(rowcols, rowcol) - END ASSOCIATE - END SUBROUTINE - FUNCTION i8_bsearch(array, el, l_index, u_index) result(res) !! binary search for 8-byte integers INTEGER(KIND=int_8), intent(in) :: array(:) diff --git a/src/tensors/dbcsr_allocate_wrap.F b/src/tensors/dbcsr_allocate_wrap.F index f8591582c6e..c659c507517 100644 --- a/src/tensors/dbcsr_allocate_wrap.F +++ b/src/tensors/dbcsr_allocate_wrap.F @@ -36,6 +36,7 @@ MODULE dbcsr_allocate_wrap #:for dparam, dtype, dsuffix in dtype_all_list #:for dim in range(1, maxdim+1) + SUBROUTINE allocate_${dim}$d_${dsuffix}$ (array, shape_spec, source, order) !! Allocate array according to shape_spec. Possibly assign array from source. !! @note this does not fully replace Fortran RESHAPE intrinsic since source and target array must @@ -49,29 +50,35 @@ SUBROUTINE allocate_${dim}$d_${dsuffix}$ (array, shape_spec, source, order) !! source array to be copied to target array, must have same rank as target array. INTEGER, DIMENSION(${dim}$), INTENT(IN), OPTIONAL :: order !! in which order to copy source to array (same convention as RESHAPE intrinsic). - INTEGER, DIMENSION(${dim}$) :: order_prv INTEGER, DIMENSION(${dim}$) :: shape_prv - INTEGER :: i - - DBCSR_ASSERT(PRESENT(shape_spec) .OR. PRESENT(source)) - IF (PRESENT(order)) THEN - order_prv(:) = order(:) + IF (PRESENT(shape_spec)) THEN + IF (PRESENT(order)) THEN + shape_prv(order) = shape_spec + ELSE + shape_prv = shape_spec + ENDIF + ELSEIF (PRESENT(source)) THEN + IF (PRESENT(order)) THEN + shape_prv(order) = SHAPE(source) + ELSE + shape_prv = SHAPE(source) + ENDIF ELSE - order_prv(:) = (/(i, i=1, ${dim}$)/) + DBCSR_ABORT("either source or shape_spec must be present") ENDIF - IF (PRESENT(source) .AND. .NOT. PRESENT(shape_spec)) THEN - shape_prv(order_prv) = SHAPE(source) + IF (PRESENT(source)) THEN + IF (PRESENT(order)) THEN + ALLOCATE (array(${arrlist("shape_prv", nmax=dim)}$)) + array(${shape_colon(dim)}$) = RESHAPE(source, shape_prv, order=order) + ELSE + ALLOCATE (array(${arrlist("shape_prv", nmax=dim)}$), source=source) + ENDIF ELSE - shape_prv(order_prv) = shape_spec + ALLOCATE (array(${arrlist("shape_prv", nmax=dim)}$)) ENDIF - ALLOCATE (array(${arrlist("shape_prv", nmax=dim)}$)) - - IF (PRESENT(source)) THEN - array(${shape_colon(dim)}$) = RESHAPE(source, shape_prv, order=order_prv) - ENDIF END SUBROUTINE #:endfor #:endfor diff --git a/src/tensors/dbcsr_tensor.F b/src/tensors/dbcsr_tensor.F index 7a0c0eb248e..b5ccf67dc3a 100644 --- a/src/tensors/dbcsr_tensor.F +++ b/src/tensors/dbcsr_tensor.F @@ -39,7 +39,7 @@ MODULE dbcsr_tensor dbcsr_t_iterator_blocks_left, dbcsr_t_iterator_stop, dbcsr_t_iterator_next_block, & ndims_iterator, dbcsr_t_reserve_blocks, block_nd, destroy_block USE dbcsr_tensor_index, ONLY: & - dbcsr_t_get_mapping_info, nd_to_2d_mapping, dbcsr_t_inverse_order, permute_index, get_nd_indices + dbcsr_t_get_mapping_info, nd_to_2d_mapping, dbcsr_t_inverse_order, permute_index, get_nd_indices_tensor USE dbcsr_tensor_types, ONLY: & dbcsr_t_create, dbcsr_t_get_data_type, dbcsr_t_type, ndims_tensor, dims_tensor, & dbcsr_t_distribution_type, dbcsr_t_distribution, dbcsr_t_nd_mp_comm, dbcsr_t_destroy, & @@ -141,6 +141,7 @@ SUBROUTINE dbcsr_t_copy(tensor_in, tensor_out, order, summation, bounds, move_da summation_prv, new_in_1, new_in_2, & new_in_3, new_out_1, block_compatible, & move_prv + TYPE(array_list) :: blk_sizes_in CALL timeset(routineN, handle) @@ -176,25 +177,31 @@ SUBROUTINE dbcsr_t_copy(tensor_in, tensor_out, order, summation, bounds, move_da ENDIF IF (PRESENT(order)) THEN - ALLOCATE (in_tmp_2) - CALL dbcsr_t_permute_index(in_tmp_1, in_tmp_2, order) - new_in_2 = .TRUE. + CALL reorder_arrays(in_tmp_1%blk_sizes, blk_sizes_in, order=order) + block_compatible = check_equal(blk_sizes_in, tensor_out%blk_sizes) ELSE - in_tmp_2 => in_tmp_1 + block_compatible = check_equal(in_tmp_1%blk_sizes, tensor_out%blk_sizes) ENDIF - block_compatible = check_equal(in_tmp_2%blk_sizes, tensor_out%blk_sizes) IF (.NOT. block_compatible) THEN - ALLOCATE (in_tmp_3, out_tmp_1) - CALL dbcsr_t_make_compatible_blocks(in_tmp_2, tensor_out, in_tmp_3, out_tmp_1, & + ALLOCATE (in_tmp_2, out_tmp_1) + CALL dbcsr_t_make_compatible_blocks(in_tmp_1, tensor_out, in_tmp_2, out_tmp_1, order=order, & nodata2=.NOT. summation_prv, move_data=move_prv) - new_in_3 = .TRUE.; new_out_1 = .TRUE. + new_in_2 = .TRUE.; new_out_1 = .TRUE. move_prv = .TRUE. ELSE - in_tmp_3 => in_tmp_2 + in_tmp_2 => in_tmp_1 out_tmp_1 => tensor_out ENDIF + IF (PRESENT(order)) THEN + ALLOCATE (in_tmp_3) + CALL dbcsr_t_permute_index(in_tmp_2, in_tmp_3, order) + new_in_3 = .TRUE. + ELSE + in_tmp_3 => in_tmp_2 + ENDIF + CALL dbcsr_t_get_mapping_info(in_tmp_3%nd_index, map1_2d=map1_in_1, map2_2d=map1_in_2) CALL dbcsr_t_get_mapping_info(out_tmp_1%nd_index, map1_2d=map2_in_1, map2_2d=map2_in_2) @@ -820,7 +827,7 @@ SUBROUTINE dbcsr_t_contract_expert(alpha, tensor_1, tensor_2, beta, tensor_3, & nblk = SIZE(result_index_2d,1) ALLOCATE(result_index(nblk, dbcsr_t_ndims(tensor_contr_3))) DO iblk = 1, nblk - result_index(iblk,:) = get_nd_indices(tensor_contr_3%nd_index_blk, result_index_2d(iblk,:)) + result_index(iblk,:) = get_nd_indices_tensor(tensor_contr_3%nd_index_blk, result_index_2d(iblk,:)) ENDDO IF (new_1) THEN diff --git a/src/tensors/dbcsr_tensor.fypp b/src/tensors/dbcsr_tensor.fypp index 8249cb709b5..fa96e8089b3 100644 --- a/src/tensors/dbcsr_tensor.fypp +++ b/src/tensors/dbcsr_tensor.fypp @@ -57,6 +57,11 @@ $: ", ".join([name + "_" + str(i) + suffix for i in range(nmin, nmax+1)]) $: ','.join([':']*n) #:enddef +#:def shape_explicit(name, n=maxrank) +#! explicit shape for pointer bounds remapping +$: ", ".join(['LBOUND('+name+', '+ str(i) + '):UBOUND('+name+', '+str(i)+')' for i in range(1,n+1)]) +#:enddef + #:def uselist(list_in) #! comma-separated list of unique entries of list_in $: ", ".join(list(set(list_in))) diff --git a/src/tensors/dbcsr_tensor_block.F b/src/tensors/dbcsr_tensor_block.F index 29fb8717165..0645a65cb5b 100644 --- a/src/tensors/dbcsr_tensor_block.F +++ b/src/tensors/dbcsr_tensor_block.F @@ -30,8 +30,8 @@ MODULE dbcsr_tensor_block USE dbcsr_kinds, ONLY: & ${uselist(dtype_float_prec)}$, int_8 USE dbcsr_tensor_index, ONLY: & - nd_to_2d_mapping, ndims_mapping, get_nd_indices, destroy_nd_to_2d_mapping, get_2d_indices, & - dbcsr_t_get_mapping_info, create_nd_to_2d_mapping + nd_to_2d_mapping, ndims_mapping, get_nd_indices_tensor, destroy_nd_to_2d_mapping, get_2d_indices_tensor, & + create_nd_to_2d_mapping USE dbcsr_array_list_methods, ONLY: & array_list, get_array_elements, destroy_array_list, sizes_of_arrays, create_array_list, & get_arrays @@ -61,9 +61,7 @@ MODULE dbcsr_tensor_block dbcsr_t_reserve_blocks, & dbcsr_t_reserved_block_indices, & destroy_block, & - ndims_iterator, & - reshape_2d_to_nd_block, & - reshape_nd_to_2d_block + ndims_iterator TYPE dbcsr_t_iterator_type TYPE(dbcsr_tas_iterator) :: iter @@ -98,22 +96,6 @@ MODULE dbcsr_tensor_block MODULE PROCEDURE create_block_nodata END INTERFACE - INTERFACE reshape_nd_to_2d_block -#:for dparam, dtype, dsuffix in dtype_float_list -#:for ndim in ndims - MODULE PROCEDURE reshape_block_t2m_${ndim}$d_${dsuffix}$ -#:endfor -#:endfor - END INTERFACE reshape_nd_to_2d_block - - INTERFACE reshape_2d_to_nd_block -#:for dparam, dtype, dsuffix in dtype_float_list -#:for ndim in ndims - MODULE PROCEDURE reshape_block_m2t_${ndim}$d_${dsuffix}$ -#:endfor -#:endfor - END INTERFACE reshape_2d_to_nd_block - INTERFACE dbcsr_t_put_block #:for dparam, dtype, dsuffix in dtype_float_list #:for ndim in ndims @@ -251,7 +233,7 @@ SUBROUTINE dbcsr_t_iterator_next_block(iterator, ind_nd, blk, blk_p, blk_size, b CALL dbcsr_tas_iterator_next_block(iterator%iter, ind_2d(1), ind_2d(2), blk, blk_p=blk_p) - ind_nd(:) = get_nd_indices(iterator%nd_index_blk, ind_2d) + ind_nd(:) = get_nd_indices_tensor(iterator%nd_index_blk, ind_2d) IF (PRESENT(blk_size)) blk_size(:) = get_array_elements(iterator%blk_sizes, ind_nd) ! note: blk_offset needs to be determined by tensor metadata, can not be derived from 2d row/col ! offset since block index mapping is not consistent with element index mapping @@ -313,7 +295,7 @@ SUBROUTINE dbcsr_t_reserve_blocks_index(tensor, ${varlist("blk_ind")}$) DO iblk = 1, nblk iblk_nd(:) = iblk ind_nd(:) = get_array_elements(blks, iblk_nd) - ind_2d(:) = get_2d_indices(tensor%nd_index_blk, ind_nd) + ind_2d(:) = get_2d_indices_tensor(tensor%nd_index_blk, ind_nd) rows(iblk) = ind_2d(1); cols(iblk) = ind_2d(2) ENDDO @@ -587,44 +569,56 @@ SUBROUTINE dbcsr_t_put_${ndim}$d_block_${dsuffix}$ (tensor, ind, sizes, block, s !! Template for dbcsr_t_put_block. TYPE(dbcsr_t_type), INTENT(INOUT) :: tensor - INTEGER, DIMENSION(ndims_tensor(tensor)), INTENT(IN) :: ind + INTEGER, DIMENSION(${ndim}$), INTENT(IN) :: ind !! block index - INTEGER, DIMENSION(ndims_tensor(tensor)), INTENT(IN) :: sizes + INTEGER, DIMENSION(${ndim}$), INTENT(IN) :: sizes !! block size ${dtype}$, DIMENSION(${arrlist("sizes", nmax=ndim)}$), & - INTENT(IN) :: block + INTENT(IN), TARGET :: block !! block to put LOGICAL, INTENT(IN), OPTIONAL :: summation !! whether block should be summed to existing block ${dtype}$, INTENT(IN), OPTIONAL :: scale !! scaling factor - INTEGER, ALLOCATABLE, DIMENSION(:) :: map1_2d, map2_2d INTEGER(KIND=int_8), DIMENSION(2) :: ind_2d - INTEGER, DIMENSION(2) :: dims_2d - ${dtype}$, ALLOCATABLE, DIMENSION(:, :) :: block_2d - TYPE(nd_to_2d_mapping) :: map_blk + INTEGER, DIMENSION(2) :: shape_2d + ${dtype}$, POINTER, DIMENSION(:, :) :: block_2d + INTEGER, DIMENSION(${ndim}$) :: shape_nd CHARACTER(LEN=*), PARAMETER :: routineN = 'dbcsr_t_put_${ndim}$d_block_${dsuffix}$', & routineP = moduleN//':'//routineN LOGICAL :: found - ${dtype}$, DIMENSION(${arrlist("sizes", nmax=ndim)}$) & - :: block_check + ${dtype}$, DIMENSION(${arrlist("sizes", nmax=ndim)}$) :: block_check + LOGICAL, PARAMETER :: debug = .FALSE. + INTEGER :: i + + NULLIFY (block_2d) IF (debug) THEN CALL dbcsr_t_get_block(tensor, ind, sizes, block_check, found=found) DBCSR_ASSERT(found) ENDIF - ! reshape block - CALL dbcsr_t_get_mapping_info(tensor%nd_index_blk, map1_2d=map1_2d, map2_2d=map2_2d) - CALL create_nd_to_2d_mapping(map_blk, sizes, map1_2d, map2_2d) - CALL dbcsr_t_get_mapping_info(map_blk, dims_2d=dims_2d) - CALL allocate_any(block_2d, shape_spec=dims_2d) - CALL reshape_nd_to_2d_block(map_blk, block, block_2d) + ASSOCIATE (map_nd=>tensor%nd_index_blk%map_nd, & + map1_2d=>tensor%nd_index_blk%map1_2d, & + map2_2d=>tensor%nd_index_blk%map2_2d) + + shape_2d = [PRODUCT(sizes(map1_2d)), PRODUCT(sizes(map2_2d))] + + IF (ALL([map1_2d, map2_2d] == (/(i, i=1, ${ndim}$)/))) THEN + ! to avoid costly reshape can do pointer bounds remapping as long as arrays are equivalent in memory + block_2d(1:shape_2d(1), 1:shape_2d(2)) => block(${shape_colon(ndim)}$) + ELSE + ! need reshape due to rank reordering + ALLOCATE (block_2d(shape_2d(1), shape_2d(2))) + shape_nd(map_nd) = sizes + block_2d(:, :) = RESHAPE(RESHAPE(block, SHAPE=shape_nd, order=map_nd), SHAPE=shape_2d) + ENDIF + + ind_2d(:) = get_2d_indices_tensor(tensor%nd_index_blk, ind) - ! convert block index - ind_2d(:) = get_2d_indices(tensor%nd_index_blk, ind) + END ASSOCIATE CALL dbcsr_tas_put_block(tensor%matrix_rep, ind_2d(1), ind_2d(2), block_2d, summation=summation, & scale=scale) @@ -639,7 +633,7 @@ SUBROUTINE dbcsr_t_allocate_and_get_${ndim}$d_block_${dsuffix}$ (tensor, ind, bl !! allocate and get block TYPE(dbcsr_t_type), INTENT(INOUT) :: tensor - INTEGER, DIMENSION(ndims_tensor(tensor)), INTENT(IN) :: ind + INTEGER, DIMENSION(${ndim}$), INTENT(IN) :: ind !! block index ${dtype}$, DIMENSION(${shape_colon(ndim)}$), & ALLOCATABLE, INTENT(OUT) :: block @@ -662,9 +656,9 @@ SUBROUTINE dbcsr_t_get_${ndim}$d_block_${dsuffix}$ (tensor, ind, sizes, block, f !! Template for dbcsr_t_get_block. TYPE(dbcsr_t_type), INTENT(INOUT) :: tensor - INTEGER, DIMENSION(ndims_tensor(tensor)), INTENT(IN) :: ind + INTEGER, DIMENSION(${ndim}$), INTENT(IN) :: ind !! block index - INTEGER, DIMENSION(ndims_tensor(tensor)), INTENT(IN) :: sizes + INTEGER, DIMENSION(${ndim}$), INTENT(IN) :: sizes !! block size ${dtype}$, DIMENSION(${arrlist("sizes", nmax=ndim)}$), & INTENT(OUT) :: block @@ -672,67 +666,37 @@ SUBROUTINE dbcsr_t_get_${ndim}$d_block_${dsuffix}$ (tensor, ind, sizes, block, f LOGICAL, INTENT(OUT) :: found !! whether block was found - INTEGER, ALLOCATABLE, DIMENSION(:) :: map1_2d, map2_2d - INTEGER(KIND=int_8), DIMENSION(2) :: ind_2d - ${dtype}$, DIMENSION(:, :), POINTER :: block_2d_ptr => NULL() - ${dtype}$, DIMENSION(:, :), ALLOCATABLE :: block_2d - TYPE(nd_to_2d_mapping) :: map_blk + INTEGER(KIND=int_8), DIMENSION(2) :: ind_2d + ${dtype}$, DIMENSION(:, :), POINTER, CONTIGUOUS :: block_2d_ptr => NULL() LOGICAL :: tr + INTEGER :: i + ${dtype}$, DIMENSION(${shape_colon(ndim)}$), POINTER :: block_ptr CHARACTER(LEN=*), PARAMETER :: routineN = 'dbcsr_t_get_${ndim}$d_block_${dsuffix}$', & routineP = moduleN//':'//routineN - NULLIFY (block_2d_ptr) + NULLIFY (block_2d_ptr, block_ptr) - ! convert block index - ind_2d(:) = get_2d_indices(tensor%nd_index_blk, ind) - CALL dbcsr_tas_get_block_p(tensor%matrix_rep, ind_2d(1), ind_2d(2), block_2d_ptr, tr, found) - DBCSR_ASSERT(.NOT. tr) + ind_2d(:) = get_2d_indices_tensor(tensor%nd_index_blk, ind) - IF (found) THEN - ! convert pointer to allocatable - CALL allocate_any(block_2d, source=block_2d_ptr) + ASSOCIATE (map1_2d=>tensor%nd_index_blk%map1_2d, & + map2_2d=>tensor%nd_index_blk%map2_2d) - CALL dbcsr_t_get_mapping_info(tensor%nd_index_blk, map1_2d=map1_2d, map2_2d=map2_2d) - CALL create_nd_to_2d_mapping(map_blk, sizes, map1_2d, map2_2d) - CALL reshape_2d_to_nd_block(map_blk, block_2d, block) - ENDIF + CALL dbcsr_tas_get_block_p(tensor%matrix_rep, ind_2d(1), ind_2d(2), block_2d_ptr, tr, found) + DBCSR_ASSERT(.NOT. tr) - END SUBROUTINE -#:endfor -#:endfor - -#:for dparam, dtype, dsuffix in dtype_float_list -#:for ndim in ndims - - SUBROUTINE reshape_block_m2t_${ndim}$d_${dsuffix}$ (map, block_2d, block) - !! reshape matrix to tensor block - TYPE(nd_to_2d_mapping), INTENT(IN) :: map - ${dtype}$, DIMENSION(:, :), INTENT(IN) :: block_2d - ${dtype}$, DIMENSION(${arrlist("map%dims_nd", nmax=ndim)}$), & - INTENT(OUT) :: block - INTEGER, DIMENSION(ndims_mapping(map)) :: map_tmp + IF (found) THEN + IF (ALL([map1_2d, map2_2d] == (/(i, i=1, ${ndim}$)/))) THEN + ! to avoid costly reshape can do pointer bounds remapping as long as arrays are equivalent in memory + block_ptr(${shape_explicit('block', ndim)}$) => block_2d_ptr(:, :) + block(${shape_colon(ndim)}$) = block_ptr(${shape_colon(ndim)}$) + ELSE + ! need reshape due to rank reordering + block(${shape_colon(ndim)}$) = RESHAPE(block_2d_ptr, SHAPE=SHAPE(block), ORDER=[map1_2d, map2_2d]) + ENDIF + ENDIF - map_tmp(:) = [map%map1_2d, map%map2_2d] - block(${shape_colon(ndim)}$) = RESHAPE(block_2d, SHAPE=SHAPE(block), ORDER=map_tmp) - END SUBROUTINE -#:endfor -#:endfor + END ASSOCIATE -#:for dparam, dtype, dsuffix in dtype_float_list -#:for ndim in ndims - SUBROUTINE reshape_block_t2m_${ndim}$d_${dsuffix}$ (map, block, block_2d) - !! reshape tensor to matrix block - TYPE(nd_to_2d_mapping), INTENT(IN) :: map - ${dtype}$, DIMENSION(${arrlist("map%dims_nd", nmax=ndim)}$), INTENT(IN) :: block - ${dtype}$, DIMENSION(map%dims_2d(1), map%dims_2d(2)), INTENT(OUT) :: block_2d - - INTEGER, DIMENSION(ndims_mapping(map)) :: shape_in, shape_reordered - ${dtype}$, ALLOCATABLE, DIMENSION(${shape_colon(ndim)}$) :: block_tmp - - shape_in = SHAPE(block) - shape_reordered(map%map_nd) = shape_in - CALL allocate_any(block_tmp, source=block, order=map%map_nd) - block_2d(:, :) = RESHAPE(block_tmp, SHAPE=SHAPE(block_2d)) END SUBROUTINE #:endfor #:endfor diff --git a/src/tensors/dbcsr_tensor_index.F b/src/tensors/dbcsr_tensor_index.F index 6b38cd18667..5d7ef18a3dc 100644 --- a/src/tensors/dbcsr_tensor_index.F +++ b/src/tensors/dbcsr_tensor_index.F @@ -12,21 +12,26 @@ MODULE dbcsr_tensor_index USE dbcsr_allocate_wrap, ONLY: allocate_any USE dbcsr_kinds, ONLY: int_8 #include "base/dbcsr_base_uses.f90" +#:include "dbcsr_tensor.fypp" IMPLICIT NONE PRIVATE CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'dbcsr_tensor_index' PUBLIC :: & - combine_index, & + combine_tensor_index, & + combine_pgrid_index, & create_nd_to_2d_mapping, & destroy_nd_to_2d_mapping, & - get_2d_indices, & + get_2d_indices_tensor, & + get_2d_indices_pgrid, & dbcsr_t_get_mapping_info, & - get_nd_indices, & + get_nd_indices_tensor, & + get_nd_indices_pgrid, & nd_to_2d_mapping, & ndims_mapping, & - split_index, & + split_tensor_index, & + split_pgrid_index, & dbcsr_t_inverse_order, & permute_index @@ -176,142 +181,168 @@ SUBROUTINE dbcsr_t_get_mapping_info(map, ndim_nd, ndim1_2d, ndim2_2d, dims_2d_i8 END SUBROUTINE dbcsr_t_get_mapping_info - FUNCTION combine_index(ind_in, dims, base, col_major) RESULT(ind_out) + PURE FUNCTION combine_tensor_index(ind_in, dims) RESULT(ind_out) !! transform nd index to flat index - INTEGER, DIMENSION(:), INTENT(IN) :: ind_in, dims !! nd index !! nd dimensions - INTEGER, INTENT(IN), OPTIONAL :: base - !! base index, default 1 (Fortran style) - LOGICAL, INTENT(IN), OPTIONAL :: col_major - !! column major ordering, default .TRUE. (Fortran style) INTEGER(KIND=int_8) :: ind_out !! flat index + INTEGER :: i_dim - INTEGER :: i_dim, my_base - LOGICAL :: my_col_major + ind_out = ind_in(SIZE(dims)) + DO i_dim = SIZE(dims) - 1, 1, -1 + ind_out = (ind_out - 1)*dims(i_dim) + ind_in(i_dim) + ENDDO - DBCSR_ASSERT(SIZE(ind_in) .EQ. SIZE(dims)) + END FUNCTION - IF (PRESENT(base)) THEN - my_base = base - ELSE - my_base = 1 - ENDIF + PURE FUNCTION combine_pgrid_index(ind_in, dims) RESULT(ind_out) + !! transform nd index to flat index - IF (PRESENT(col_major)) THEN - my_col_major = col_major - ELSE - my_col_major = .TRUE. - ENDIF + INTEGER, DIMENSION(:), INTENT(IN) :: ind_in, dims + !! nd index + !! nd dimensions + INTEGER :: ind_out + !! flat index - IF (my_col_major) THEN - i_dim = SIZE(dims) - 1 - ind_out = ind_in(i_dim + 1) - DO WHILE (i_dim .GE. 1) - ind_out = (ind_out - my_base)*dims(i_dim) + (ind_in(i_dim) - my_base) + my_base - i_dim = i_dim - 1 - ENDDO - ELSE - i_dim = 2 - ind_out = ind_in(i_dim - 1) - DO WHILE (i_dim .LE. SIZE(dims)) - ind_out = (ind_out - my_base)*dims(i_dim) + (ind_in(i_dim) - my_base) + my_base - i_dim = i_dim + 1 - ENDDO - ENDIF - END FUNCTION combine_index + INTEGER :: i_dim - FUNCTION split_index(ind_in, dims, base, col_major) RESULT(ind_out) + ind_out = ind_in(1) + DO i_dim = 2, SIZE(dims) + ind_out = ind_out*dims(i_dim) + ind_in(i_dim) + ENDDO + END FUNCTION + + PURE FUNCTION split_tensor_index(ind_in, dims) RESULT(ind_out) !! transform flat index to nd index INTEGER(KIND=int_8), INTENT(IN) :: ind_in !! flat index INTEGER, DIMENSION(:), INTENT(IN) :: dims !! nd dimensions - INTEGER, INTENT(IN), OPTIONAL :: base - !! base index, default 1 (Fortran style) - LOGICAL, INTENT(IN), OPTIONAL :: col_major - !! column major ordering, default .TRUE. (Fortran style) INTEGER, DIMENSION(SIZE(dims)) :: ind_out !! nd index INTEGER(KIND=int_8) :: tmp - INTEGER :: i_dim, my_base - LOGICAL :: my_col_major + INTEGER :: i_dim - IF (PRESENT(base)) THEN - my_base = base - ELSE - my_base = 1 - ENDIF + tmp = ind_in + DO i_dim=1,SIZE(dims) + ind_out(i_dim) = INT(MOD(tmp - 1, INT(dims(i_dim), int_8)) + 1) + tmp = (tmp - 1)/dims(i_dim) + 1 + END DO - IF (PRESENT(col_major)) THEN - my_col_major = col_major - ELSE - my_col_major = .TRUE. - ENDIF + END FUNCTION - IF (my_col_major) THEN - i_dim = 1 - tmp = ind_in - DO WHILE (i_dim .LE. SIZE(dims)) - ind_out(i_dim) = INT(MOD(tmp - my_base, INT(dims(i_dim), int_8)) + my_base) - tmp = (tmp - my_base)/dims(i_dim) + my_base - i_dim = i_dim + 1 - END DO - ELSE - i_dim = SIZE(dims) - tmp = ind_in - DO WHILE (i_dim .GE. 1) - ind_out(i_dim) = INT(MOD(tmp - my_base, INT(dims(i_dim), int_8)) + my_base) - tmp = (tmp - my_base)/dims(i_dim) + my_base - i_dim = i_dim - 1 - END DO - ENDIF - END FUNCTION split_index + PURE FUNCTION split_pgrid_index(ind_in, dims) RESULT(ind_out) + !! transform flat index to nd index + + INTEGER, INTENT(IN) :: ind_in + !! flat index + INTEGER, DIMENSION(:), INTENT(IN) :: dims + !! nd dimensions + INTEGER, DIMENSION(SIZE(dims)) :: ind_out + !! nd index - FUNCTION get_2d_indices(map, ind_in) RESULT(ind_out) + INTEGER :: tmp + INTEGER :: i_dim + + tmp = ind_in + DO i_dim = SIZE(dims), 1, -1 + ind_out(i_dim) = MOD(tmp, dims(i_dim)) + tmp = tmp/dims(i_dim) + END DO + END FUNCTION + + PURE FUNCTION get_2d_indices_tensor(map, ind_in) RESULT(ind_out) !! transform nd index to 2d index, using info from index mapping. TYPE(nd_to_2d_mapping), INTENT(IN) :: map !! index mapping - INTEGER, DIMENSION(ndims_mapping(map)), INTENT(IN) :: ind_in + INTEGER, DIMENSION(map%ndim_nd), INTENT(IN) :: ind_in !! nd index INTEGER(KIND=int_8), DIMENSION(2) :: ind_out !! 2d index + INTEGER :: i + INTEGER, DIMENSION(${maxrank}$) :: ind_tmp + + DO i=1, map%ndim1_2d + ind_tmp(i) = ind_in(map%map1_2d(i)) + ENDDO + ind_out(1) = combine_tensor_index(ind_tmp(:map%ndim1_2d), map%dims1_2d) + + DO i=1, map%ndim2_2d + ind_tmp(i) = ind_in(map%map2_2d(i)) + ENDDO + ind_out(2) = combine_tensor_index(ind_tmp(:map%ndim2_2d), map%dims2_2d) + END FUNCTION - INTEGER, DIMENSION(map%ndim2_2d) :: ind_2 - INTEGER, DIMENSION(map%ndim1_2d) :: ind_1 + PURE FUNCTION get_2d_indices_pgrid(map, ind_in) RESULT(ind_out) + !! transform nd index to 2d index, using info from index mapping. - ind_1(:) = ind_in(map%map1_2d) - ind_2(:) = ind_in(map%map2_2d) - ind_out(1) = combine_index(ind_1, map%dims1_2d, base=map%base, col_major=map%col_major) - ind_out(2) = combine_index(ind_2, map%dims2_2d, base=map%base, col_major=map%col_major) - END FUNCTION get_2d_indices + TYPE(nd_to_2d_mapping), INTENT(IN) :: map + !! index mapping + INTEGER, DIMENSION(map%ndim_nd), INTENT(IN) :: ind_in + !! nd index + INTEGER, DIMENSION(2) :: ind_out + !! 2d index + INTEGER :: i + INTEGER, DIMENSION(${maxrank}$) :: ind_tmp + + DO i=1, map%ndim1_2d + ind_tmp(i) = ind_in(map%map1_2d(i)) + ENDDO + ind_out(1) = combine_pgrid_index(ind_tmp(:map%ndim1_2d), map%dims1_2d) + + DO i=1, map%ndim2_2d + ind_tmp(i) = ind_in(map%map2_2d(i)) + ENDDO + ind_out(2) = combine_pgrid_index(ind_tmp(:map%ndim2_2d), map%dims2_2d) + END FUNCTION - FUNCTION get_nd_indices(map, ind_in) RESULT(ind_out) + PURE FUNCTION get_nd_indices_tensor(map, ind_in) RESULT(ind_out) !! transform 2d index to nd index, using info from index mapping. TYPE(nd_to_2d_mapping), INTENT(IN) :: map !! index mapping INTEGER(KIND=int_8), DIMENSION(2), INTENT(IN) :: ind_in !! 2d index - INTEGER, DIMENSION(ndims_mapping(map)) :: ind_out + INTEGER, DIMENSION(map%ndim_nd) :: ind_out !! nd index + INTEGER, DIMENSION(${maxrank}$) :: ind_tmp + INTEGER :: i + + ind_tmp(:map%ndim1_2d) = split_tensor_index(ind_in(1), map%dims1_2d) + + DO i=1, map%ndim1_2d + ind_out(map%map1_2d(i)) = ind_tmp(i) + ENDDO + + ind_tmp(:map%ndim2_2d) = split_tensor_index(ind_in(2), map%dims2_2d) + + DO i=1, map%ndim2_2d + ind_out(map%map2_2d(i)) = ind_tmp(i) + ENDDO + + END FUNCTION + + PURE FUNCTION get_nd_indices_pgrid(map, ind_in) RESULT(ind_out) + !! transform 2d index to nd index, using info from index mapping. - INTEGER, DIMENSION(map%ndim2_2d) :: ind_2 - INTEGER, DIMENSION(map%ndim1_2d) :: ind_1 + TYPE(nd_to_2d_mapping), INTENT(IN) :: map + !! index mapping + INTEGER, DIMENSION(2), INTENT(IN) :: ind_in + !! 2d index + INTEGER, DIMENSION(map%ndim_nd) :: ind_out + !! nd index - ind_1(:) = split_index(ind_in(1), map%dims1_2d, base=map%base, col_major=map%col_major) - ind_2(:) = split_index(ind_in(2), map%dims2_2d, base=map%base, col_major=map%col_major) + ind_out(map%map1_2d) = split_pgrid_index(ind_in(1), map%dims1_2d) + ind_out(map%map2_2d) = split_pgrid_index(ind_in(2), map%dims2_2d) - ind_out(map%map1_2d) = ind_1 - ind_out(map%map2_2d) = ind_2 - END FUNCTION get_nd_indices + END FUNCTION - FUNCTION dbcsr_t_inverse_order(order) + PURE FUNCTION dbcsr_t_inverse_order(order) !! Invert order INTEGER, DIMENSION(:), INTENT(IN) :: order INTEGER, DIMENSION(SIZE(order)) :: dbcsr_t_inverse_order diff --git a/src/tensors/dbcsr_tensor_reshape.F b/src/tensors/dbcsr_tensor_reshape.F index c52fbfb9aa8..8e92dcb28ba 100644 --- a/src/tensors/dbcsr_tensor_reshape.F +++ b/src/tensors/dbcsr_tensor_reshape.F @@ -19,7 +19,7 @@ MODULE dbcsr_tensor_reshape USE dbcsr_tensor_block, ONLY: & block_nd, create_block, destroy_block, dbcsr_t_iterator_type, dbcsr_t_iterator_next_block, & dbcsr_t_iterator_blocks_left, dbcsr_t_iterator_start, dbcsr_t_iterator_stop, dbcsr_t_get_block, & - dbcsr_t_reserve_blocks, dbcsr_t_put_block, reshape_2d_to_nd_block + dbcsr_t_reserve_blocks, dbcsr_t_put_block USE dbcsr_tensor_types, ONLY: dbcsr_t_blk_sizes, & dbcsr_t_create, & dbcsr_t_get_data_type, & diff --git a/src/tensors/dbcsr_tensor_split.F b/src/tensors/dbcsr_tensor_split.F index 2b3d2b6ac96..005fc7eb71f 100644 --- a/src/tensors/dbcsr_tensor_split.F +++ b/src/tensors/dbcsr_tensor_split.F @@ -25,7 +25,8 @@ MODULE dbcsr_tensor_split dbcsr_t_iterator_next_block, & dbcsr_t_reserve_blocks, & dbcsr_t_reserved_block_indices - USE dbcsr_tensor_index, ONLY: dbcsr_t_get_mapping_info + USE dbcsr_tensor_index, ONLY: dbcsr_t_get_mapping_info, & + dbcsr_t_inverse_order USE dbcsr_tensor_types, ONLY: dbcsr_t_create, & dbcsr_t_get_data_type, & dbcsr_t_type, & @@ -475,7 +476,7 @@ SUBROUTINE dbcsr_t_split_copyback(tensor_split_in, tensor_out, summation) END SUBROUTINE - SUBROUTINE dbcsr_t_make_compatible_blocks(tensor1, tensor2, tensor1_split, tensor2_split, nodata1, nodata2, move_data) + SUBROUTINE dbcsr_t_make_compatible_blocks(tensor1, tensor2, tensor1_split, tensor2_split, order, nodata1, nodata2, move_data) !! split two tensors with same total sizes but different block sizes such that they have equal !! block sizes !! \move_data memory optimization: transfer data s.t. tensor1 and tensor2 may be empty on return @@ -486,13 +487,17 @@ SUBROUTINE dbcsr_t_make_compatible_blocks(tensor1, tensor2, tensor1_split, tenso TYPE(dbcsr_t_type), INTENT(OUT) :: tensor1_split, tensor2_split !! tensor 1 with split blocks !! tensor 2 with split blocks + INTEGER, DIMENSION(ndims_tensor(tensor1)), & + INTENT(IN), OPTIONAL :: order + LOGICAL, INTENT(IN), OPTIONAL :: nodata1, nodata2, move_data !! don't copy data of tensor 1 !! don't copy data of tensor 2 - INTEGER, DIMENSION(:), ALLOCATABLE :: ${varlist("blk_size_split")}$, & + INTEGER, DIMENSION(:), ALLOCATABLE :: ${varlist("blk_size_split_1")}$, ${varlist("blk_size_split_2")}$, & blk_size_d_1, blk_size_d_2, blk_size_d_split - INTEGER :: size_sum_1, size_sum_2, size_sum, bind_1, bind_2, isplit, bs, idim + INTEGER :: size_sum_1, size_sum_2, size_sum, bind_1, bind_2, isplit, bs, idim, i LOGICAL :: move_prv, nodata1_prv, nodata2_prv + INTEGER, DIMENSION(ndims_tensor(tensor1)) :: order_prv IF (PRESENT(move_data)) THEN move_prv = move_data @@ -511,8 +516,14 @@ SUBROUTINE dbcsr_t_make_compatible_blocks(tensor1, tensor2, tensor1_split, tenso nodata2_prv = .FALSE. ENDIF - DO idim = 1, ndims_tensor(tensor1) - CALL get_ith_array(tensor1%blk_sizes, idim, blk_size_d_1) + IF (PRESENT(order)) THEN + order_prv(:) = dbcsr_t_inverse_order(order) + ELSE + order_prv(:) = (/(i, i=1, ndims_tensor(tensor1))/) + ENDIF + + DO idim = 1, ndims_tensor(tensor2) + CALL get_ith_array(tensor1%blk_sizes, order_prv(idim), blk_size_d_1) CALL get_ith_array(tensor2%blk_sizes, idim, blk_size_d_2) ALLOCATE (blk_size_d_split(SIZE(blk_size_d_1) + SIZE(blk_size_d_2))) size_sum_1 = 0 @@ -563,20 +574,27 @@ SUBROUTINE dbcsr_t_make_compatible_blocks(tensor1, tensor2, tensor1_split, tenso blk_size_d_split(isplit) = bs ENDIF +#:for idim in range(1, maxdim+1) + IF (order_prv(idim) == ${idim}$) THEN + CALL allocate_any(blk_size_split_1_${idim}$, source=blk_size_d_split(:isplit)) + ENDIF +#:endfor + #:for idim in range(1, maxdim+1) IF (idim == ${idim}$) THEN - CALL allocate_any(blk_size_split_${idim}$, source=blk_size_d_split(:isplit)) + CALL allocate_any(blk_size_split_2_${idim}$, source=blk_size_d_split(:isplit)) ENDIF #:endfor - DEALLOCATE (blk_size_d_split) + + DEALLOCATE (blk_size_d_split, blk_size_d_1, blk_size_d_2) ENDDO #:for ndim in ndims IF (ndims_tensor(tensor1) == ${ndim}$) THEN - CALL dbcsr_t_split_blocks_generic(tensor1, tensor1_split, ${varlist("blk_size_split", nmax=ndim)}$, nodata=nodata1) + CALL dbcsr_t_split_blocks_generic(tensor1, tensor1_split, ${varlist("blk_size_split_1", nmax=ndim)}$, nodata=nodata1) IF (move_prv .AND. .NOT. nodata1_prv) CALL dbcsr_t_clear(tensor1) - CALL dbcsr_t_split_blocks_generic(tensor2, tensor2_split, ${varlist("blk_size_split", nmax=ndim)}$, nodata=nodata2) + CALL dbcsr_t_split_blocks_generic(tensor2, tensor2_split, ${varlist("blk_size_split_2", nmax=ndim)}$, nodata=nodata2) IF (move_prv .AND. .NOT. nodata2_prv) CALL dbcsr_t_clear(tensor2) ENDIF #:endfor diff --git a/src/tensors/dbcsr_tensor_test.F b/src/tensors/dbcsr_tensor_test.F index 2fae73e91f5..e782af2cd5a 100644 --- a/src/tensors/dbcsr_tensor_test.F +++ b/src/tensors/dbcsr_tensor_test.F @@ -48,7 +48,7 @@ MODULE dbcsr_tensor_test mp_sum, & mp_cart_create USE dbcsr_allocate_wrap, ONLY: allocate_any - USE dbcsr_tensor_index, ONLY: combine_index, & + USE dbcsr_tensor_index, ONLY: combine_tensor_index, & dbcsr_t_get_mapping_info USE dbcsr_tas_test, ONLY: dbcsr_tas_checksum USE dbcsr_data_types, ONLY: dbcsr_scalar_type @@ -503,7 +503,7 @@ SUBROUTINE enumerate_block_elements(blk_size, blk_offset, tensor_size, ${varlist #:endfor arr_ind(:) = [${varlist("i", nmax=ndim)}$] tens_ind(:) = arr_ind(:) + blk_offset(:) - 1 - blk_${ndim}$ (${arrlist("arr_ind", nmax=ndim)}$) = combine_index(tens_ind, tensor_size) + blk_${ndim}$ (${arrlist("arr_ind", nmax=ndim)}$) = combine_tensor_index(tens_ind, tensor_size) #:for idim in range(ndim,0,-1) ENDDO #:endfor diff --git a/src/tensors/dbcsr_tensor_types.F b/src/tensors/dbcsr_tensor_types.F index b5c3996c490..ef77a3f80d8 100644 --- a/src/tensors/dbcsr_tensor_types.F +++ b/src/tensors/dbcsr_tensor_types.F @@ -33,8 +33,9 @@ MODULE dbcsr_tensor_types USE dbcsr_tas_types, ONLY: & dbcsr_tas_type, dbcsr_tas_distribution_type, dbcsr_tas_split_info USE dbcsr_tensor_index, ONLY: & - get_2d_indices, get_nd_indices, create_nd_to_2d_mapping, destroy_nd_to_2d_mapping, & - dbcsr_t_get_mapping_info, nd_to_2d_mapping, split_index, combine_index, ndims_mapping + get_2d_indices_tensor, get_nd_indices_pgrid, create_nd_to_2d_mapping, destroy_nd_to_2d_mapping, & + dbcsr_t_get_mapping_info, nd_to_2d_mapping, split_tensor_index, combine_tensor_index, combine_pgrid_index, & + split_pgrid_index, ndims_mapping USE dbcsr_tas_split, ONLY: & dbcsr_tas_create_split_rows_or_cols, dbcsr_tas_release_info, dbcsr_tas_info_hold, & dbcsr_tas_create_split, dbcsr_tas_get_split_info @@ -132,9 +133,9 @@ MODULE dbcsr_tensor_types TYPE(array_list) :: nd_dist CONTAINS ! map matrix index to process grid: - PROCEDURE :: dist => r_dist_t + PROCEDURE :: dist => tas_dist_t ! map process grid to matrix index: - PROCEDURE :: rowcols => r_rowcols_t + PROCEDURE :: rowcols => tas_rowcols_t END TYPE ! block size object for one matrix index @@ -144,7 +145,7 @@ MODULE dbcsr_tensor_types ! block size only for this matrix dimension: TYPE(array_list) :: blk_size CONTAINS - PROCEDURE :: data => r_blk_size_t + PROCEDURE :: data => tas_blk_size_t END TYPE INTERFACE dbcsr_t_create @@ -217,29 +218,29 @@ FUNCTION new_dbcsr_tas_dist_t(nd_dist, map_blks, map_grid, which_dim) new_dbcsr_tas_dist_t%nmrowcol = matrix_dims(which_dim) END FUNCTION - FUNCTION r_dist_t(t, rowcol) + FUNCTION tas_dist_t(t, rowcol) CLASS(dbcsr_tas_dist_t), INTENT(IN) :: t INTEGER(KIND=int_8), INTENT(IN) :: rowcol - INTEGER, DIMENSION(SIZE(t%dims)) :: ind_blk - INTEGER, DIMENSION(SIZE(t%dims)) :: dist_blk - INTEGER :: r_dist_t + INTEGER, DIMENSION(${maxrank}$) :: ind_blk + INTEGER, DIMENSION(${maxrank}$) :: dist_blk + INTEGER :: tas_dist_t - ind_blk(:) = split_index(rowcol, t%dims, base=1, col_major=.TRUE.) - dist_blk(:) = get_array_elements(t%nd_dist, ind_blk) - r_dist_t = INT(combine_index(dist_blk, t%dims_grid, base=0, col_major=.FALSE.)) + ind_blk(:SIZE(t%dims)) = split_tensor_index(rowcol, t%dims) + dist_blk(:SIZE(t%dims)) = get_array_elements(t%nd_dist, ind_blk(:SIZE(t%dims))) + tas_dist_t = combine_pgrid_index(dist_blk(:SIZE(t%dims)), t%dims_grid) END FUNCTION - FUNCTION r_rowcols_t(t, dist) + FUNCTION tas_rowcols_t(t, dist) CLASS(dbcsr_tas_dist_t), INTENT(IN) :: t INTEGER, INTENT(IN) :: dist - INTEGER(KIND=int_8), DIMENSION(:), ALLOCATABLE :: r_rowcols_t - INTEGER, DIMENSION(SIZE(t%dims)) :: dist_blk + INTEGER(KIND=int_8), DIMENSION(:), ALLOCATABLE :: tas_rowcols_t + INTEGER, DIMENSION(${maxrank}$) :: dist_blk INTEGER, DIMENSION(:), ALLOCATABLE :: ${varlist("dist")}$, ${varlist("blks")}$, blks_tmp, nd_ind INTEGER :: ${varlist("i")}$, i, iblk, iblk_count, nblks INTEGER(KIND=int_8) :: nrowcols TYPE(array_list) :: blks - dist_blk(:) = split_index(INT(dist, int_8), t%dims_grid, base=0, col_major=.FALSE.) + dist_blk(:SIZE(t%dims)) = split_pgrid_index(dist, t%dims_grid) #:for ndim in range(1, maxdim+1) IF (SIZE(t%dims) == ${ndim}$) THEN @@ -271,7 +272,7 @@ FUNCTION r_rowcols_t(t, dist) #:endfor nrowcols = PRODUCT(INT(sizes_of_arrays(blks), int_8)) - ALLOCATE (r_rowcols_t(nrowcols)) + ALLOCATE (tas_rowcols_t(nrowcols)) #:for ndim in range(1, maxdim+1) IF (SIZE(t%dims) == ${ndim}$) THEN @@ -283,7 +284,7 @@ FUNCTION r_rowcols_t(t, dist) i = i + 1 nd_ind(:) = get_array_elements(blks, [${varlist("i", nmax=ndim)}$]) - r_rowcols_t(i) = combine_index(nd_ind, t%dims, base=1, col_major=.TRUE.) + tas_rowcols_t(i) = combine_tensor_index(nd_ind, t%dims) #:for idim in range(1,ndim+1) ENDDO #:endfor @@ -327,16 +328,16 @@ FUNCTION new_dbcsr_tas_blk_size_t(blk_size, map_blks, which_dim) KIND=int_8)) END FUNCTION - FUNCTION r_blk_size_t(t, rowcol) + FUNCTION tas_blk_size_t(t, rowcol) CLASS(dbcsr_tas_blk_size_t), INTENT(IN) :: t INTEGER(KIND=int_8), INTENT(IN) :: rowcol - INTEGER :: r_blk_size_t + INTEGER :: tas_blk_size_t INTEGER, DIMENSION(SIZE(t%dims)) :: ind_blk INTEGER, DIMENSION(SIZE(t%dims)) :: blk_size - ind_blk(:) = split_index(rowcol, t%dims, base=1, col_major=.TRUE.) + ind_blk(:) = split_tensor_index(rowcol, t%dims) blk_size(:) = get_array_elements(t%blk_size, ind_blk) - r_blk_size_t = PRODUCT(blk_size) + tas_blk_size_t = PRODUCT(blk_size) END FUNCTION @@ -1105,7 +1106,7 @@ SUBROUTINE dbcsr_t_get_stored_coordinates(tensor, ind_nd, processor) INTEGER(KIND=int_8), DIMENSION(2) :: ind_2d - ind_2d(:) = get_2d_indices(tensor%nd_index_blk, ind_nd) + ind_2d(:) = get_2d_indices_tensor(tensor%nd_index_blk, ind_nd) CALL dbcsr_tas_get_stored_coordinates(tensor%matrix_rep, ind_2d(1), ind_2d(2), processor) END SUBROUTINE @@ -1285,7 +1286,7 @@ SUBROUTINE mp_environ_pgrid(pgrid, dims, task_coor) CALL mp_environ(nproc, dims_2d, task_coor_2d, pgrid%mp_comm_2d) CALL mp_environ(nproc, dims_2d, task_coor_2d, pgrid%mp_comm_2d) CALL dbcsr_t_get_mapping_info(pgrid%nd_index_grid, dims_nd=dims) - task_coor = get_nd_indices(pgrid%nd_index_grid, INT(task_coor_2d, KIND=int_8)) + task_coor = get_nd_indices_pgrid(pgrid%nd_index_grid, task_coor_2d) END SUBROUTINE #:for dparam, dtype, dsuffix in dtype_float_list From 37ffadbf2c7976ee4a5d4d9c7c12047ee2ac4c3a Mon Sep 17 00:00:00 2001 From: Patrick Seewald Date: Mon, 16 Mar 2020 20:05:07 +0100 Subject: [PATCH 08/34] Tensors: Get data_type directly instead of going through costly dbcsr_get_info --- src/tas/dbcsr_tas_base.F | 15 ++++++++------- src/tensors/dbcsr_tensor_types.F | 4 ++-- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/tas/dbcsr_tas_base.F b/src/tas/dbcsr_tas_base.F index aa27d0b5f8e..6bc4e610523 100644 --- a/src/tas/dbcsr_tas_base.F +++ b/src/tas/dbcsr_tas_base.F @@ -858,13 +858,6 @@ FUNCTION dbcsr_tas_info(matrix) dbcsr_tas_info = matrix%dist%info END FUNCTION - FUNCTION dbcsr_tas_get_data_type(matrix) RESULT(data_type) - TYPE(dbcsr_tas_type), INTENT(IN) :: matrix - INTEGER :: data_type - - CALL dbcsr_get_info(matrix%matrix, data_type=data_type) - END FUNCTION - FUNCTION dbcsr_tas_nblkrows_total(matrix) RESULT(nblkrows_total) TYPE(dbcsr_tas_type), INTENT(IN) :: matrix INTEGER(KIND=int_8) :: nblkrows_total @@ -960,6 +953,14 @@ FUNCTION dbcsr_tas_get_nze_total(matrix) CALL mp_sum(dbcsr_tas_get_nze_total, info%mp_comm) END FUNCTION + FUNCTION dbcsr_tas_get_data_type(matrix) RESULT(data_type) + !! As dbcsr_get_data_type + TYPE(dbcsr_tas_type), INTENT(IN) :: matrix + INTEGER :: data_type + + data_type = dbcsr_get_data_type(matrix%matrix) + END FUNCTION + FUNCTION dbcsr_tas_get_data_size(matrix) RESULT(data_size) !! As dbcsr_get_data_size TYPE(dbcsr_tas_type), INTENT(IN) :: matrix diff --git a/src/tensors/dbcsr_tensor_types.F b/src/tensors/dbcsr_tensor_types.F index ef77a3f80d8..fe2d26b977e 100644 --- a/src/tensors/dbcsr_tensor_types.F +++ b/src/tensors/dbcsr_tensor_types.F @@ -29,7 +29,7 @@ MODULE dbcsr_tensor_types dbcsr_tas_distribution_destroy, dbcsr_tas_finalize, dbcsr_tas_get_info, & dbcsr_tas_destroy, dbcsr_tas_get_stored_coordinates, dbcsr_tas_set, dbcsr_tas_filter, & dbcsr_tas_get_num_blocks, dbcsr_tas_get_num_blocks_total, dbcsr_tas_get_data_size, dbcsr_tas_get_nze, & - dbcsr_tas_get_nze_total, dbcsr_tas_clear + dbcsr_tas_get_nze_total, dbcsr_tas_clear, dbcsr_tas_get_data_type USE dbcsr_tas_types, ONLY: & dbcsr_tas_type, dbcsr_tas_distribution_type, dbcsr_tas_split_info USE dbcsr_tensor_index, ONLY: & @@ -1068,7 +1068,7 @@ FUNCTION dbcsr_t_get_data_type(tensor) RESULT(data_type) TYPE(dbcsr_t_type), INTENT(IN) :: tensor INTEGER :: data_type - CALL dbcsr_tas_get_info(tensor%matrix_rep, data_type=data_type) + data_type = dbcsr_tas_get_data_type(tensor%matrix_rep) END FUNCTION SUBROUTINE dbcsr_t_blk_sizes(tensor, ind, blk_size) From fff560334567f358c563c4bc0a63ca256eaf8237 Mon Sep 17 00:00:00 2001 From: Patrick Seewald Date: Mon, 23 Mar 2020 20:06:06 +0100 Subject: [PATCH 09/34] TAS: calculate process ID directly instead of invoking mpi_cart_rank --- src/tas/dbcsr_tas_base.F | 4 +++- src/tas/dbcsr_tas_split.F | 3 +++ src/tas/dbcsr_tas_types.F | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/tas/dbcsr_tas_base.F b/src/tas/dbcsr_tas_base.F index 6bc4e610523..c9cc5177ce8 100644 --- a/src/tas/dbcsr_tas_base.F +++ b/src/tas/dbcsr_tas_base.F @@ -476,7 +476,9 @@ SUBROUTINE dbcsr_tas_get_stored_coordinates(matrix, row, column, processor) pcoord(1) = matrix%dist%row_dist%dist(row) pcoord(2) = matrix%dist%col_dist%dist(column) info = dbcsr_tas_info(matrix) - CALL mp_cart_rank(info%mp_comm, pcoord, processor) + + ! workaround for inefficient mpi_cart_rank + processor = pcoord(1)*info%pdims(2) + pcoord(2) END SUBROUTINE diff --git a/src/tas/dbcsr_tas_split.F b/src/tas/dbcsr_tas_split.F index 95a2e139d2f..37491748509 100644 --- a/src/tas/dbcsr_tas_split.F +++ b/src/tas/dbcsr_tas_split.F @@ -105,6 +105,7 @@ SUBROUTINE dbcsr_tas_create_split_rows_or_cols(split_info, mp_comm, ngroup, igro CALL mp_environ(numproc, iproc, mp_comm) CALL mp_environ(numproc, pdims, pcoord, mp_comm) + split_info%pdims = pdims CALL mp_environ(numproc_group, iproc_group, mp_comm_group) @@ -448,6 +449,8 @@ SUBROUTINE dbcsr_tas_release_info(split_info) CALL mp_comm_free(split_info%mp_comm) DEALLOCATE (split_info%refcount) ENDIF + + split_info%pdims = 0 END SUBROUTINE SUBROUTINE dbcsr_tas_info_hold(split_info) diff --git a/src/tas/dbcsr_tas_types.F b/src/tas/dbcsr_tas_types.F index a9d846d2548..0f01c81aa67 100644 --- a/src/tas/dbcsr_tas_types.F +++ b/src/tas/dbcsr_tas_types.F @@ -34,6 +34,7 @@ MODULE dbcsr_tas_types ! info on distribution of matrix rows / columns to different subgroups. TYPE dbcsr_tas_split_info INTEGER :: mp_comm ! global communicator + INTEGER, DIMENSION(2) :: pdims ! dimensions of process grid INTEGER :: igroup ! which subgroup do I belong to INTEGER :: ngroup ! how many groups in total INTEGER :: split_rowcol ! split row or column? From f299de7ef0a6d67c126a09da6577b48b1298b103 Mon Sep 17 00:00:00 2001 From: Hans Pabst Date: Wed, 25 Mar 2020 08:34:27 +0100 Subject: [PATCH 10/34] More compact version-checks: revised TO_VERSION macro and provided variants (TO_VERSION2, TO_VERSION3). Introduced libxsmm_diff to implement family of array_eq functions. Prettify. --- src/base/dbcsr_base_uses.f90 | 9 ++++++--- src/block/dbcsr_block_operations.F | 4 ++-- src/mm/dbcsr_mm_hostdrv.F | 2 +- src/mm/dbcsr_mm_hostdrv.f90 | 20 ++++++++++---------- src/tas/dbcsr_tas_util.F | 23 ++++++++++++++++------- src/utils/dbcsr_toollib.F | 2 +- 6 files changed, 36 insertions(+), 24 deletions(-) diff --git a/src/base/dbcsr_base_uses.f90 b/src/base/dbcsr_base_uses.f90 index 6b4b49622ed..dbe832f69a6 100644 --- a/src/base/dbcsr_base_uses.f90 +++ b/src/base/dbcsr_base_uses.f90 @@ -42,9 +42,12 @@ ! and will be optimized away completely by the compiler #define MARK_USED(foo) IF(.FALSE.)THEN; DO ; IF(SIZE(SHAPE(foo))==-1) EXIT ; END DO ; ENDIF -! Calculate version number from 3-components. Can be used for comparison e.g., -! TO_VERSION(4, 9, 0) <= TO_VERSION(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) -#define TO_VERSION(MAJOR, MINOR, UPDATE) ((MAJOR) * 10000 + (MINOR) * 100 + (UPDATE)) +! Calculate version number from 2 or 3 components. Can be used for comparison e.g., +! TO_VERSION3(4, 9, 0) <= TO_VERSION3(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +! TO_VERSION(8, 0) <= TO_VERSION(__GNUC__, __GNUC_MINOR__) +#define TO_VERSION2(MAJOR, MINOR) ((MAJOR) * 10000 + (MINOR) * 100) +#define TO_VERSION3(MAJOR, MINOR, UPDATE) (TO_VERSION2(MAJOR, MINOR) + (UPDATE)) +#define TO_VERSION TO_VERSION2 ! LIBXSMM has a FORTRAN-suitable header with macro/version definitions (since v1.8.2). ! Allows macro-toggles (in addition to parameters). diff --git a/src/block/dbcsr_block_operations.F b/src/block/dbcsr_block_operations.F index 8500afe20fd..a9688a8fb8e 100644 --- a/src/block/dbcsr_block_operations.F +++ b/src/block/dbcsr_block_operations.F @@ -32,10 +32,10 @@ MODULE dbcsr_block_operations !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads IMPLICIT NONE -#if defined(__LIBXSMM) && TO_VERSION(1, 10, 0) < TO_VERSION(LIBXSMM_CONFIG_VERSION_MAJOR, LIBXSMM_CONFIG_VERSION_MINOR, LIBXSMM_CONFIG_VERSION_UPDATE) +#if defined(__LIBXSMM) && TO_VERSION(1, 10) < TO_VERSION(LIBXSMM_CONFIG_VERSION_MAJOR, LIBXSMM_CONFIG_VERSION_MINOR) # define __LIBXSMM_BLOCKOPS #endif -#if defined(__LIBXSMM) && TO_VERSION(1, 10, 0) <= TO_VERSION(LIBXSMM_CONFIG_VERSION_MAJOR, LIBXSMM_CONFIG_VERSION_MINOR, LIBXSMM_CONFIG_VERSION_UPDATE) +#if defined(__LIBXSMM) && TO_VERSION(1, 10) <= TO_VERSION(LIBXSMM_CONFIG_VERSION_MAJOR, LIBXSMM_CONFIG_VERSION_MINOR) # define __LIBXSMM_TRANS #endif #if defined(__MKL) || defined(__LIBXSMM_TRANS) || defined(__LIBXSMM_BLOCKOPS) || !defined(NDEBUG) diff --git a/src/mm/dbcsr_mm_hostdrv.F b/src/mm/dbcsr_mm_hostdrv.F index dd72eb56ba5..225ce1d5302 100644 --- a/src/mm/dbcsr_mm_hostdrv.F +++ b/src/mm/dbcsr_mm_hostdrv.F @@ -182,7 +182,7 @@ SUBROUTINE dbcsr_mm_hostdrv_process(this, left, right, params, stack_size, & #if defined(__LIBXSMM) CASE (mm_driver_xsmm) SELECT CASE (this%data_area%d%data_type) -#if TO_VERSION(1, 10, 0) < TO_VERSION(LIBXSMM_CONFIG_VERSION_MAJOR, LIBXSMM_CONFIG_VERSION_MINOR, LIBXSMM_CONFIG_VERSION_UPDATE) +#if TO_VERSION(1, 10) < TO_VERSION(LIBXSMM_CONFIG_VERSION_MAJOR, LIBXSMM_CONFIG_VERSION_MINOR) CASE (dbcsr_type_real_4) CALL xsmm_process_mm_batch_s(stack_descr, params, stack_size, & left%data_area%d%r_sp, right%data_area%d%r_sp, this%data_area%d%r_sp, used_smm) diff --git a/src/mm/dbcsr_mm_hostdrv.f90 b/src/mm/dbcsr_mm_hostdrv.f90 index 338c7a5ca13..0949788a5f0 100644 --- a/src/mm/dbcsr_mm_hostdrv.f90 +++ b/src/mm/dbcsr_mm_hostdrv.f90 @@ -140,7 +140,7 @@ SUBROUTINE smm_process_mm_stack_${nametype1}$ (stack_descr, params, & MARK_USED(stack_descr) END SUBROUTINE smm_process_mm_stack_${nametype1}$ -#if defined(__LIBXSMM) && TO_VERSION(1, 10, 0) < TO_VERSION(LIBXSMM_CONFIG_VERSION_MAJOR, LIBXSMM_CONFIG_VERSION_MINOR, LIBXSMM_CONFIG_VERSION_UPDATE) +#if defined(__LIBXSMM) && TO_VERSION(1, 10) < TO_VERSION(LIBXSMM_CONFIG_VERSION_MAJOR, LIBXSMM_CONFIG_VERSION_MINOR) SUBROUTINE xsmm_process_mm_batch_${nametype1}$ (stack_descr, params, & stack_size, a_data, b_data, c_data, used_smm) !! Processes MM stack and issues libxsmm calls @@ -177,24 +177,24 @@ SUBROUTINE xsmm_process_mm_batch_${nametype1}$ (stack_descr, params, & IF (stack_descr%defined_mnk) THEN ! homogeneous stack CALL libxsmm_gemm_batch(LIBXSMM_GEMM_PRECISION, LIBXSMM_GEMM_PRECISION, 'N', 'N', & m=stack_descr%m, n=stack_descr%n, k=stack_descr%k, & - alpha=libxsmm_ptr0(one), a=libxsmm_ptr0(a_data(LBOUND(a_data,1))), & + alpha=libxsmm_ptr0(one), a=libxsmm_ptr0(a_data(LBOUND(a_data, 1))), & lda=stack_descr%m, & - b=libxsmm_ptr0(b_data(LBOUND(b_data,1))), & + b=libxsmm_ptr0(b_data(LBOUND(b_data, 1))), & ldb=stack_descr%k, & - beta=libxsmm_ptr0(one), c=libxsmm_ptr0(c_data(LBOUND(c_data,1))), & + beta=libxsmm_ptr0(one), c=libxsmm_ptr0(c_data(LBOUND(c_data, 1))), & ldc=stack_descr%m, index_base=1, & index_stride=KIND(params)*dbcsr_ps_width, & - stride_a=libxsmm_ptr0(params(p_a_first,1)), & - stride_b=libxsmm_ptr0(params(p_b_first,1)), & - stride_c=libxsmm_ptr0(params(p_c_first,1)), & + stride_a=libxsmm_ptr0(params(p_a_first, 1)), & + stride_b=libxsmm_ptr0(params(p_b_first, 1)), & + stride_c=libxsmm_ptr0(params(p_c_first, 1)), & batchsize=stack_size) used_smm = .TRUE. ELSE ! Dispatch for every (different) matrix DO sp = 1, stack_size CALL libxsmm_gemm(m=params(p_m, sp), n=params(p_n, sp), k=params(p_k, sp), & - a=a_data(params(p_a_first,sp)), & - b=b_data(params(p_b_first,sp)), & - c=c_data(params(p_c_first,sp)), & + a=a_data(params(p_a_first, sp)), & + b=b_data(params(p_b_first, sp)), & + c=c_data(params(p_c_first, sp)), & alpha=one, beta=one) ENDDO used_smm = .FALSE. diff --git a/src/tas/dbcsr_tas_util.F b/src/tas/dbcsr_tas_util.F index b436156685f..c44840c96d1 100644 --- a/src/tas/dbcsr_tas_util.F +++ b/src/tas/dbcsr_tas_util.F @@ -19,6 +19,12 @@ MODULE dbcsr_tas_util USE dbcsr_mpiwrap, ONLY: mp_cart_rank, & mp_environ USE dbcsr_index_operations, ONLY: dbcsr_sort_indices +#if TO_VERSION(1, 11) < TO_VERSION(LIBXSMM_CONFIG_VERSION_MAJOR, LIBXSMM_CONFIG_VERSION_MINOR) + USE libxsmm, ONLY: libxsmm_diff +# define PURE_ARRAY_EQ +#else +# define PURE_ARRAY_EQ PURE +#endif #include "base/dbcsr_base_uses.f90" IMPLICIT NONE @@ -116,7 +122,6 @@ SUBROUTINE index_unique(index_in, index_out) ALLOCATE (index_out(count, 2)) index_out(:, :) = index_tmp(1:count, :) - END SUBROUTINE SUBROUTINE invert_transpose_flag(trans_flag) @@ -129,22 +134,26 @@ SUBROUTINE invert_transpose_flag(trans_flag) ENDIF END SUBROUTINE - PURE FUNCTION array_eq_i(arr1, arr2) + PURE_ARRAY_EQ FUNCTION array_eq_i(arr1, arr2) INTEGER, DIMENSION(:), INTENT(IN) :: arr1, arr2 LOGICAL :: array_eq_i - +#if TO_VERSION(1, 11) < TO_VERSION(LIBXSMM_CONFIG_VERSION_MAJOR, LIBXSMM_CONFIG_VERSION_MINOR) + array_eq_i = .NOT. libxsmm_diff(arr1, arr2) +#else array_eq_i = .FALSE. IF (SIZE(arr1) .EQ. SIZE(arr2)) array_eq_i = ALL(arr1 == arr2) - +#endif END FUNCTION - PURE FUNCTION array_eq_i8(arr1, arr2) + PURE_ARRAY_EQ FUNCTION array_eq_i8(arr1, arr2) INTEGER(KIND=int_8), DIMENSION(:), INTENT(IN) :: arr1, arr2 LOGICAL :: array_eq_i8 - +#if TO_VERSION(1, 11) < TO_VERSION(LIBXSMM_CONFIG_VERSION_MAJOR, LIBXSMM_CONFIG_VERSION_MINOR) + array_eq_i8 = .NOT. libxsmm_diff(arr1, arr2) +#else array_eq_i8 = .FALSE. IF (SIZE(arr1) .EQ. SIZE(arr2)) array_eq_i8 = ALL(arr1 == arr2) - +#endif END FUNCTION END MODULE diff --git a/src/utils/dbcsr_toollib.F b/src/utils/dbcsr_toollib.F index 2a595e1ee97..49796dd4cb2 100644 --- a/src/utils/dbcsr_toollib.F +++ b/src/utils/dbcsr_toollib.F @@ -167,7 +167,7 @@ FUNCTION joaat_hash(key) RESULT(hash_index) !! we return already the index in the table as a final result ! LIBXSMM: at least v1.9.0-6 is required -#if defined(__LIBXSMM) && TO_VERSION(1, 10, 0) <= TO_VERSION(LIBXSMM_CONFIG_VERSION_MAJOR, LIBXSMM_CONFIG_VERSION_MINOR, LIBXSMM_CONFIG_VERSION_UPDATE) +#if defined(__LIBXSMM) && TO_VERSION(1, 10) <= TO_VERSION(LIBXSMM_CONFIG_VERSION_MAJOR, LIBXSMM_CONFIG_VERSION_MINOR) USE libxsmm, ONLY: libxsmm_hash INTEGER, PARAMETER :: seed = 0 INTEGER, DIMENSION(:), INTENT(IN) :: key From d172997a9f353deaed3316203dafbd500f747ffe Mon Sep 17 00:00:00 2001 From: Hans Pabst Date: Wed, 25 Mar 2020 08:54:54 +0100 Subject: [PATCH 11/34] Make base-uses available prior to using TO_VERSION macro. Adjusted version check. --- src/tas/dbcsr_tas_util.F | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/tas/dbcsr_tas_util.F b/src/tas/dbcsr_tas_util.F index c44840c96d1..dd526df1065 100644 --- a/src/tas/dbcsr_tas_util.F +++ b/src/tas/dbcsr_tas_util.F @@ -19,13 +19,14 @@ MODULE dbcsr_tas_util USE dbcsr_mpiwrap, ONLY: mp_cart_rank, & mp_environ USE dbcsr_index_operations, ONLY: dbcsr_sort_indices -#if TO_VERSION(1, 11) < TO_VERSION(LIBXSMM_CONFIG_VERSION_MAJOR, LIBXSMM_CONFIG_VERSION_MINOR) + +#include "base/dbcsr_base_uses.f90" +#if TO_VERSION(1, 11) <= TO_VERSION(LIBXSMM_CONFIG_VERSION_MAJOR, LIBXSMM_CONFIG_VERSION_MINOR) USE libxsmm, ONLY: libxsmm_diff # define PURE_ARRAY_EQ #else # define PURE_ARRAY_EQ PURE #endif -#include "base/dbcsr_base_uses.f90" IMPLICIT NONE PRIVATE @@ -137,7 +138,7 @@ SUBROUTINE invert_transpose_flag(trans_flag) PURE_ARRAY_EQ FUNCTION array_eq_i(arr1, arr2) INTEGER, DIMENSION(:), INTENT(IN) :: arr1, arr2 LOGICAL :: array_eq_i -#if TO_VERSION(1, 11) < TO_VERSION(LIBXSMM_CONFIG_VERSION_MAJOR, LIBXSMM_CONFIG_VERSION_MINOR) +#if TO_VERSION(1, 11) <= TO_VERSION(LIBXSMM_CONFIG_VERSION_MAJOR, LIBXSMM_CONFIG_VERSION_MINOR) array_eq_i = .NOT. libxsmm_diff(arr1, arr2) #else array_eq_i = .FALSE. @@ -148,7 +149,7 @@ PURE_ARRAY_EQ FUNCTION array_eq_i(arr1, arr2) PURE_ARRAY_EQ FUNCTION array_eq_i8(arr1, arr2) INTEGER(KIND=int_8), DIMENSION(:), INTENT(IN) :: arr1, arr2 LOGICAL :: array_eq_i8 -#if TO_VERSION(1, 11) < TO_VERSION(LIBXSMM_CONFIG_VERSION_MAJOR, LIBXSMM_CONFIG_VERSION_MINOR) +#if TO_VERSION(1, 11) <= TO_VERSION(LIBXSMM_CONFIG_VERSION_MAJOR, LIBXSMM_CONFIG_VERSION_MINOR) array_eq_i8 = .NOT. libxsmm_diff(arr1, arr2) #else array_eq_i8 = .FALSE. From 6ba885dbc6555c7be431ef46d9282661d7c6c473 Mon Sep 17 00:00:00 2001 From: Hans Pabst Date: Wed, 25 Mar 2020 09:53:03 +0100 Subject: [PATCH 12/34] Implemented array_equality functions using libxsmm_diff (module dbcsr_array_types). --- src/core/dbcsr_array_types.F | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/src/core/dbcsr_array_types.F b/src/core/dbcsr_array_types.F index bdbffb368fa..6c0f100357c 100644 --- a/src/core/dbcsr_array_types.F +++ b/src/core/dbcsr_array_types.F @@ -10,6 +10,14 @@ MODULE dbcsr_array_types !! Array objects with reference counting. +#include "base/dbcsr_base_uses.f90" +#if TO_VERSION(1, 11) <= TO_VERSION(LIBXSMM_CONFIG_VERSION_MAJOR, LIBXSMM_CONFIG_VERSION_MINOR) + USE libxsmm, ONLY: libxsmm_diff +# define PURE_ARRAY_EQUALITY +#else +# define PURE_ARRAY_EQUALITY PURE +#endif + IMPLICIT NONE PRIVATE @@ -45,12 +53,10 @@ MODULE dbcsr_array_types INTERFACE array_size MODULE PROCEDURE array_size_i1d END INTERFACE - INTERFACE array_equality MODULE PROCEDURE array_equality_i1 MODULE PROCEDURE array_equality_i1d END INTERFACE - INTERFACE array_get MODULE PROCEDURE array_get_i1d MODULE PROCEDURE array_get_i1 @@ -60,6 +66,7 @@ MODULE dbcsr_array_types INTEGER, DIMENSION(:), POINTER :: DATA => Null() INTEGER :: refcount = 0 END TYPE array_i1d_type + TYPE array_i1d_obj TYPE(array_i1d_type), POINTER :: low => Null() END TYPE array_i1d_obj @@ -88,6 +95,7 @@ SUBROUTINE array_new_i1d(array, DATA, gift) array%low%data(:) = DATA(:) ENDIF END SUBROUTINE array_new_i1d + SUBROUTINE array_new_i1d_lb(array, DATA, lb) TYPE(array_i1d_obj), INTENT(OUT) :: array INTEGER, DIMENSION(:), INTENT(IN) :: DATA @@ -101,12 +109,13 @@ SUBROUTINE array_new_i1d_lb(array, DATA, lb) ALLOCATE (array%low%data(lb:ub)) array%low%data(:) = DATA(:) END SUBROUTINE array_new_i1d_lb + SUBROUTINE array_hold_i1d(array) TYPE(array_i1d_obj), INTENT(INOUT) :: array - !$OMP ATOMIC array%low%refcount = array%low%refcount + 1 END SUBROUTINE array_hold_i1d + SUBROUTINE array_release_i1d(array) TYPE(array_i1d_obj), INTENT(INOUT) :: array @@ -118,11 +127,13 @@ SUBROUTINE array_release_i1d(array) ENDIF ENDIF END SUBROUTINE array_release_i1d + PURE SUBROUTINE array_nullify_i1d(array) TYPE(array_i1d_obj), INTENT(INOUT) :: array NULLIFY (array%low) END SUBROUTINE array_nullify_i1d + PURE FUNCTION array_exists_i1d(array) RESULT(array_exists) TYPE(array_i1d_obj), INTENT(IN) :: array LOGICAL :: array_exists @@ -130,6 +141,7 @@ PURE FUNCTION array_exists_i1d(array) RESULT(array_exists) array_exists = ASSOCIATED(array%low) IF (array_exists) array_exists = array%low%refcount .GT. 0 END FUNCTION array_exists_i1d + FUNCTION array_data_i1d(array) RESULT(DATA) TYPE(array_i1d_obj), INTENT(IN) :: array INTEGER, DIMENSION(:), POINTER :: DATA @@ -140,6 +152,7 @@ FUNCTION array_data_i1d(array) RESULT(DATA) NULLIFY (DATA) ENDIF END FUNCTION array_data_i1d + PURE FUNCTION array_size_i1d(array) RESULT(the_size) TYPE(array_i1d_obj), INTENT(IN) :: array INTEGER :: the_size @@ -150,26 +163,37 @@ PURE FUNCTION array_size_i1d(array) RESULT(the_size) the_size = 0 ENDIF END FUNCTION array_size_i1d - PURE FUNCTION array_equality_i1(array1, array2) RESULT(are_equal) + + PURE_ARRAY_EQUALITY FUNCTION array_equality_i1(array1, array2) RESULT(are_equal) INTEGER, DIMENSION(:), POINTER :: array1, array2 LOGICAL :: are_equal are_equal = .FALSE. IF (ASSOCIATED(array1) .AND. ASSOCIATED(array2)) THEN +#if TO_VERSION(1, 11) <= TO_VERSION(LIBXSMM_CONFIG_VERSION_MAJOR, LIBXSMM_CONFIG_VERSION_MINOR) + are_equal = .NOT. libxsmm_diff(array1, array2) +#else IF (SIZE(array1) .NE. SIZE(array2)) RETURN are_equal = ALL(array1 .EQ. array2) +#endif ENDIF END FUNCTION array_equality_i1 - PURE FUNCTION array_equality_i1d(array1, array2) RESULT(are_equal) + + PURE_ARRAY_EQUALITY FUNCTION array_equality_i1d(array1, array2) RESULT(are_equal) TYPE(array_i1d_obj), INTENT(IN) :: array1, array2 LOGICAL :: are_equal are_equal = .FALSE. IF (ASSOCIATED(array1%low) .AND. ASSOCIATED(array2%low)) THEN +#if TO_VERSION(1, 11) <= TO_VERSION(LIBXSMM_CONFIG_VERSION_MAJOR, LIBXSMM_CONFIG_VERSION_MINOR) + are_equal = .NOT. libxsmm_diff(array1%low%data, array2%low%data) +#else IF (SIZE(array1%low%data) .NE. SIZE(array2%low%data)) RETURN are_equal = ALL(array1%low%data .EQ. array2%low%data) +#endif ENDIF END FUNCTION array_equality_i1d + PURE FUNCTION array_get_i1d(array, index1) RESULT(value) TYPE(array_i1d_obj), INTENT(IN) :: array INTEGER, INTENT(IN) :: index1 @@ -177,6 +201,7 @@ PURE FUNCTION array_get_i1d(array, index1) RESULT(value) value = array%low%data(index1) END FUNCTION array_get_i1d + PURE FUNCTION array_get_i1(array, index1) RESULT(value) INTEGER, DIMENSION(:), INTENT(IN), POINTER :: array INTEGER, INTENT(IN) :: index1 From 0305cc68e77a2da1e03bdb3cc37cd87ae75e0325 Mon Sep 17 00:00:00 2001 From: Patrick Seewald Date: Tue, 24 Mar 2020 20:44:47 +0100 Subject: [PATCH 13/34] Tensors: avoid use of intent(out) allocatables at the API level intent(out) allocatables cause problems with C interoperability. - allocations of arrays with known sizes should be controlled by the caller - helper functions to get array sizes - `dbcsr_t_contract_index` is the only exception, array size is not known prior to calling this function --- src/tensors/dbcsr_array_list_methods.F | 25 ++++- src/tensors/dbcsr_tensor.F | 90 +++++++++-------- src/tensors/dbcsr_tensor_api.F | 15 ++- src/tensors/dbcsr_tensor_block.F | 9 +- src/tensors/dbcsr_tensor_index.F | 41 +++++--- src/tensors/dbcsr_tensor_io.F | 20 ++-- src/tensors/dbcsr_tensor_split.F | 12 ++- src/tensors/dbcsr_tensor_test.F | 60 ++++++------ src/tensors/dbcsr_tensor_types.F | 128 +++++++++++++++++++++---- tests/dbcsr_tensor_unittest.F | 12 +-- 10 files changed, 282 insertions(+), 130 deletions(-) diff --git a/src/tensors/dbcsr_array_list_methods.F b/src/tensors/dbcsr_array_list_methods.F index ac8252dd8b4..c2cdd48f0b8 100644 --- a/src/tensors/dbcsr_array_list_methods.F +++ b/src/tensors/dbcsr_array_list_methods.F @@ -44,6 +44,11 @@ MODULE dbcsr_array_list_methods INTEGER, DIMENSION(:), ALLOCATABLE :: ptr END TYPE + INTERFACE get_ith_array + MODULE PROCEDURE allocate_and_get_ith_array + MODULE PROCEDURE get_ith_array + END INTERFACE + CONTAINS PURE FUNCTION number_of_arrays(list) @@ -55,7 +60,7 @@ PURE FUNCTION number_of_arrays(list) END FUNCTION number_of_arrays - FUNCTION get_array_elements(list, indices) + PURE FUNCTION get_array_elements(list, indices) !! Get an element for each array. TYPE(array_list), INTENT(IN) :: list @@ -167,7 +172,23 @@ SUBROUTINE get_arrays(list, ${varlist("data")}$, i_selected) END SUBROUTINE get_arrays - SUBROUTINE get_ith_array(list, i, array) + SUBROUTINE get_ith_array(list, i, array_size, array) + !! get ith array + TYPE(array_list), INTENT(IN) :: list + INTEGER, INTENT(IN) :: i + INTEGER, INTENT(IN) :: array_size + INTEGER, DIMENSION(array_size), INTENT(OUT) :: array + + ASSOCIATE (ptr=>list%ptr, col_data=>list%col_data) + DBCSR_ASSERT(i <= number_of_arrays(list)) + + array(:) = col_data(ptr(i):ptr(i + 1) - 1) + + END ASSOCIATE + + END SUBROUTINE + + SUBROUTINE allocate_and_get_ith_array(list, i, array) !! get ith array TYPE(array_list), INTENT(IN) :: list INTEGER, INTENT(IN) :: i diff --git a/src/tensors/dbcsr_tensor.F b/src/tensors/dbcsr_tensor.F index b5ccf67dc3a..ffb8eb3d363 100644 --- a/src/tensors/dbcsr_tensor.F +++ b/src/tensors/dbcsr_tensor.F @@ -39,14 +39,15 @@ MODULE dbcsr_tensor dbcsr_t_iterator_blocks_left, dbcsr_t_iterator_stop, dbcsr_t_iterator_next_block, & ndims_iterator, dbcsr_t_reserve_blocks, block_nd, destroy_block USE dbcsr_tensor_index, ONLY: & - dbcsr_t_get_mapping_info, nd_to_2d_mapping, dbcsr_t_inverse_order, permute_index, get_nd_indices_tensor + dbcsr_t_get_mapping_info, nd_to_2d_mapping, dbcsr_t_inverse_order, permute_index, get_nd_indices_tensor, & + ndims_mapping_row, ndims_mapping_column USE dbcsr_tensor_types, ONLY: & dbcsr_t_create, dbcsr_t_get_data_type, dbcsr_t_type, ndims_tensor, dims_tensor, & dbcsr_t_distribution_type, dbcsr_t_distribution, dbcsr_t_nd_mp_comm, dbcsr_t_destroy, & dbcsr_t_distribution_destroy, dbcsr_t_distribution_new_expert, dbcsr_t_get_stored_coordinates, & blk_dims_tensor, dbcsr_t_hold, dbcsr_t_pgrid_type, mp_environ_pgrid, dbcsr_t_filter, & dbcsr_t_clear, dbcsr_t_finalize, dbcsr_t_get_num_blocks, dbcsr_t_scale, & - dbcsr_t_get_num_blocks_total, dbcsr_t_get_info + dbcsr_t_get_num_blocks_total, dbcsr_t_get_info, ndims_matrix_row, ndims_matrix_column USE dbcsr_kinds, ONLY: & ${uselist(dtype_float_prec)}$, default_string_length, int_8 USE dbcsr_mpiwrap, ONLY: & @@ -79,7 +80,6 @@ MODULE dbcsr_tensor PUBLIC :: & dbcsr_t_contract, & dbcsr_t_copy, & - dbcsr_t_dims, & dbcsr_t_get_block, & dbcsr_t_get_stored_coordinates, & dbcsr_t_inverse_order, & @@ -88,7 +88,6 @@ MODULE dbcsr_tensor dbcsr_t_iterator_start, & dbcsr_t_iterator_stop, & dbcsr_t_iterator_type, & - dbcsr_t_ndims, & dbcsr_t_put_block, & dbcsr_t_reserve_blocks, & dbcsr_t_copy_matrix_to_tensor, & @@ -98,14 +97,6 @@ MODULE dbcsr_tensor dbcsr_t_batched_contract_init, & dbcsr_t_batched_contract_finalize - INTERFACE dbcsr_t_ndims - MODULE PROCEDURE ndims_tensor - END INTERFACE - - INTERFACE dbcsr_t_dims - MODULE PROCEDURE dims_tensor - END INTERFACE - CONTAINS SUBROUTINE dbcsr_t_copy(tensor_in, tensor_out, order, summation, bounds, move_data, unit_nr) @@ -202,7 +193,12 @@ SUBROUTINE dbcsr_t_copy(tensor_in, tensor_out, order, summation, bounds, move_da in_tmp_3 => in_tmp_2 ENDIF + ALLOCATE(map1_in_1(ndims_matrix_row(in_tmp_3))) + ALLOCATE(map1_in_2(ndims_matrix_column(in_tmp_3))) CALL dbcsr_t_get_mapping_info(in_tmp_3%nd_index, map1_2d=map1_in_1, map2_2d=map1_in_2) + + ALLOCATE(map2_in_1(ndims_matrix_row(out_tmp_1))) + ALLOCATE(map2_in_2(ndims_matrix_column(out_tmp_1))) CALL dbcsr_t_get_mapping_info(out_tmp_1%nd_index, map1_2d=map2_in_1, map2_2d=map2_in_2) IF (.NOT. PRESENT(order)) THEN @@ -350,7 +346,7 @@ SUBROUTINE dbcsr_t_copy_tensor_to_matrix(tensor_in, matrix_out, summation) !! copy tensor to matrix TYPE(dbcsr_t_type), INTENT(INOUT) :: tensor_in - TYPE(dbcsr_type), INTENT(INOUT) :: matrix_out + TYPE(dbcsr_type), INTENT(INOUT) :: matrix_out LOGICAL, INTENT(IN), OPTIONAL :: summation !! matrix_out = matrix_out + tensor_in TYPE(dbcsr_t_iterator_type) :: iter @@ -556,8 +552,8 @@ SUBROUTINE dbcsr_t_contract_expert(alpha, tensor_1, tensor_2, beta, tensor_3, & indchar2_mod, indchar3_mod CHARACTER(LEN=1), DIMENSION(15) :: alph = & ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o'] - INTEGER, DIMENSION(2, dbcsr_t_ndims(tensor_1)) :: bounds_t1 - INTEGER, DIMENSION(2, dbcsr_t_ndims(tensor_2)) :: bounds_t2 + INTEGER, DIMENSION(2, ndims_tensor(tensor_1)) :: bounds_t1 + INTEGER, DIMENSION(2, ndims_tensor(tensor_2)) :: bounds_t2 LOGICAL :: do_crop_1, do_crop_2, do_write_3, nodata_3 TYPE(dbcsr_tas_split_info), POINTER :: split_opt @@ -576,13 +572,13 @@ SUBROUTINE dbcsr_t_contract_expert(alpha, tensor_1, tensor_2, beta, tensor_3, & assert_stmt = SIZE(map_2) .EQ. SIZE(notcontract_2) DBCSR_ASSERT(assert_stmt) - assert_stmt = SIZE(notcontract_1) + SIZE(contract_1) .EQ. dbcsr_t_ndims(tensor_1) + assert_stmt = SIZE(notcontract_1) + SIZE(contract_1) .EQ. ndims_tensor(tensor_1) DBCSR_ASSERT(assert_stmt) - assert_stmt = SIZE(notcontract_2) + SIZE(contract_2) .EQ. dbcsr_t_ndims(tensor_2) + assert_stmt = SIZE(notcontract_2) + SIZE(contract_2) .EQ. ndims_tensor(tensor_2) DBCSR_ASSERT(assert_stmt) - assert_stmt = SIZE(map_1) + SIZE(map_2) .EQ. dbcsr_t_ndims(tensor_3) + assert_stmt = SIZE(map_1) + SIZE(map_2) .EQ. ndims_tensor(tensor_3) DBCSR_ASSERT(assert_stmt) assert_stmt = dbcsr_t_get_data_type(tensor_1) .EQ. dbcsr_t_get_data_type(tensor_2) @@ -662,9 +658,9 @@ SUBROUTINE dbcsr_t_contract_expert(alpha, tensor_1, tensor_2, beta, tensor_3, & data_type = dbcsr_t_get_data_type(tensor_crop_1) ! align tensor index with data, tensor data is not modified - ndims1 = dbcsr_t_ndims(tensor_crop_1) - ndims2 = dbcsr_t_ndims(tensor_crop_2) - ndims3 = dbcsr_t_ndims(tensor_3) + ndims1 = ndims_tensor(tensor_crop_1) + ndims2 = ndims_tensor(tensor_crop_2) + ndims3 = ndims_tensor(tensor_3) ALLOCATE (indchar1(ndims1), indchar1_mod(ndims1)) ALLOCATE (indchar2(ndims2), indchar2_mod(ndims2)) ALLOCATE (indchar3(ndims3), indchar3_mod(ndims3)) @@ -825,7 +821,7 @@ SUBROUTINE dbcsr_t_contract_expert(alpha, tensor_1, tensor_2, beta, tensor_3, & result_index=result_index_2d) nblk = SIZE(result_index_2d,1) - ALLOCATE(result_index(nblk, dbcsr_t_ndims(tensor_contr_3))) + ALLOCATE(result_index(nblk, ndims_tensor(tensor_contr_3))) DO iblk = 1, nblk result_index(iblk,:) = get_nd_indices_tensor(tensor_contr_3%nd_index_blk, result_index_2d(iblk,:)) ENDDO @@ -980,9 +976,9 @@ SUBROUTINE align_tensor(tensor_in, contract_in, notcontract_in, & INTENT(OUT) :: contract_out INTEGER, DIMENSION(SIZE(notcontract_in)), & INTENT(OUT) :: notcontract_out - CHARACTER(LEN=1), DIMENSION(dbcsr_t_ndims(tensor_in)), INTENT(IN) :: indp_in - CHARACTER(LEN=1), DIMENSION(dbcsr_t_ndims(tensor_in)), INTENT(OUT) :: indp_out - INTEGER, DIMENSION(dbcsr_t_ndims(tensor_in)) :: align + CHARACTER(LEN=1), DIMENSION(ndims_tensor(tensor_in)), INTENT(IN) :: indp_in + CHARACTER(LEN=1), DIMENSION(ndims_tensor(tensor_in)), INTENT(OUT) :: indp_out + INTEGER, DIMENSION(ndims_tensor(tensor_in)) :: align CALL dbcsr_t_align_index(tensor_in, tensor_out, order=align) contract_out = align(contract_in) @@ -1093,7 +1089,7 @@ SUBROUTINE reshape_mm_compatible(tensor1, tensor2, tensor1_out, tensor2_out, ind IF (ref_tensor == 1) THEN ! tensor 1 is reference and tensor 2 is reshaped compatible with tensor 1 IF (compat1 == 0 .OR. optimize_dist_prv) THEN ! tensor 1 is not contraction compatible --> reshape IF (io_unit > 0) WRITE (unit_nr, '(T2,A,1X,A)') "Redistribution of", TRIM(tensor1%name) - ALLOCATE (dims(dbcsr_t_ndims(tensor1))) + ALLOCATE (dims(ndims_tensor(tensor1))) CALL blk_dims_tensor(tensor1, dims) nblkrows = PRODUCT(INT(dims(ind1_linked), KIND=int_8)) nblkcols = PRODUCT(INT(dims(ind1_free), KIND=int_8)) @@ -1115,6 +1111,7 @@ SUBROUTINE reshape_mm_compatible(tensor1, tensor2, tensor1_out, tensor2_out, ind IF (compat1 == 1) THEN ! linked index is first 2d dimension ! get distribution of linked index, tensor 2 must adopt this distribution ! get grid dimensions of linked index + ALLOCATE(mp_dims(ndims_mapping_row(dist_in%pgrid%nd_index_grid))) CALL dbcsr_t_get_mapping_info(dist_in%pgrid%nd_index_grid, dims1_2d=mp_dims) ALLOCATE (tensor2_out) CALL dbcsr_t_remap(tensor2, ind2_linked, ind2_free, tensor2_out, comm_2d=dist_in%pgrid%mp_comm_2d, & @@ -1122,6 +1119,7 @@ SUBROUTINE reshape_mm_compatible(tensor1, tensor2, tensor1_out, tensor2_out, ind ELSEIF (compat1 == 2) THEN ! linked index is second 2d dimension ! get distribution of linked index, tensor 2 must adopt this distribution ! get grid dimensions of linked index + ALLOCATE(mp_dims(ndims_mapping_column(dist_in%pgrid%nd_index_grid))) CALL dbcsr_t_get_mapping_info(dist_in%pgrid%nd_index_grid, dims2_2d=mp_dims) ALLOCATE (tensor2_out) CALL dbcsr_t_remap(tensor2, ind2_free, ind2_linked, tensor2_out, comm_2d=dist_in%pgrid%mp_comm_2d, & @@ -1137,7 +1135,7 @@ SUBROUTINE reshape_mm_compatible(tensor1, tensor2, tensor1_out, tensor2_out, ind ELSE ! tensor 2 is reference and tensor 1 is reshaped compatible with tensor 2 IF (compat2 == 0 .OR. optimize_dist_prv) THEN ! tensor 2 is not contraction compatible --> reshape IF (io_unit > 0) WRITE (unit_nr, '(T2,A,1X,A)') "Redistribution of", TRIM(tensor2%name) - ALLOCATE (dims(dbcsr_t_ndims(tensor2))) + ALLOCATE (dims(ndims_tensor(tensor2))) CALL blk_dims_tensor(tensor2, dims) nblkrows = PRODUCT(INT(dims(ind2_linked), KIND=int_8)) nblkcols = PRODUCT(INT(dims(ind2_free), KIND=int_8)) @@ -1156,11 +1154,13 @@ SUBROUTINE reshape_mm_compatible(tensor1, tensor2, tensor1_out, tensor2_out, ind dist_in = dbcsr_t_distribution(tensor2_out) dist_list = array_sublist(dist_in%nd_dist, ind2_linked) IF (compat2 == 1) THEN + ALLOCATE(mp_dims(ndims_mapping_row(dist_in%pgrid%nd_index_grid))) CALL dbcsr_t_get_mapping_info(dist_in%pgrid%nd_index_grid, dims1_2d=mp_dims) ALLOCATE (tensor1_out) CALL dbcsr_t_remap(tensor1, ind1_linked, ind1_free, tensor1_out, comm_2d=dist_in%pgrid%mp_comm_2d, & dist1=dist_list, mp_dims_1=mp_dims, nodata=nodata1, move_data=move_data_1) ELSEIF (compat2 == 2) THEN + ALLOCATE(mp_dims(ndims_mapping_column(dist_in%pgrid%nd_index_grid))) CALL dbcsr_t_get_mapping_info(dist_in%pgrid%nd_index_grid, dims2_2d=mp_dims) ALLOCATE (tensor1_out) CALL dbcsr_t_remap(tensor1, ind1_free, ind1_linked, tensor1_out, comm_2d=dist_in%pgrid%mp_comm_2d, & @@ -1318,7 +1318,8 @@ FUNCTION compat_map(nd_index, compat_ind) !! Check if 2d index is compatible with tensor index TYPE(nd_to_2d_mapping), INTENT(IN) :: nd_index INTEGER, DIMENSION(:), INTENT(IN) :: compat_ind - INTEGER, DIMENSION(:), ALLOCATABLE :: map1, map2 + INTEGER, DIMENSION(ndims_mapping_row(nd_index)) :: map1 + INTEGER, DIMENSION(ndims_mapping_column(nd_index)) :: map2 INTEGER :: compat_map CALL dbcsr_t_get_mapping_info(nd_index, map1_2d=map1, map2_2d=map2) @@ -1354,9 +1355,10 @@ SUBROUTINE index_linked_sort(ind_ref, ind_dep) FUNCTION opt_pgrid(tensor, tas_split_info) TYPE(dbcsr_t_type), INTENT(IN) :: tensor TYPE(dbcsr_tas_split_info), INTENT(IN) :: tas_split_info - INTEGER, DIMENSION(:), ALLOCATABLE :: map1, map2 + INTEGER, DIMENSION(ndims_matrix_row(tensor)) :: map1 + INTEGER, DIMENSION(ndims_matrix_column(tensor)) :: map2 TYPE(dbcsr_t_pgrid_type) :: opt_pgrid - INTEGER, DIMENSION(dbcsr_t_ndims(tensor)) :: dims + INTEGER, DIMENSION(ndims_tensor(tensor)) :: dims CALL dbcsr_t_get_mapping_info(tensor%pgrid%nd_index_grid, map1_2d=map1, map2_2d=map2) CALL blk_dims_tensor(tensor, dims) @@ -1416,7 +1418,7 @@ SUBROUTINE dbcsr_t_remap(tensor_in, map1_2d, map2_2d, tensor_out, comm_2d, dist1 ${varlist("nd_dist")}$ TYPE(dbcsr_t_distribution_type) :: dist INTEGER :: comm_2d_prv, handle, i - INTEGER, DIMENSION(dbcsr_t_ndims(tensor_in)) :: pdims, myploc + INTEGER, DIMENSION(ndims_tensor(tensor_in)) :: pdims, myploc CHARACTER(LEN=*), PARAMETER :: routineN = 'dbcsr_t_remap', & routineP = moduleN//':'//routineN LOGICAL :: nodata_prv @@ -1503,7 +1505,8 @@ SUBROUTINE dbcsr_t_align_index(tensor_in, tensor_out, order) TYPE(dbcsr_t_type), INTENT(INOUT) :: tensor_in TYPE(dbcsr_t_type), INTENT(OUT) :: tensor_out - INTEGER, DIMENSION(:), ALLOCATABLE :: map1_2d, map2_2d + INTEGER, DIMENSION(ndims_matrix_row(tensor_in)) :: map1_2d + INTEGER, DIMENSION(ndims_matrix_column(tensor_in)) :: map2_2d INTEGER, DIMENSION(ndims_tensor(tensor_in)), & INTENT(OUT), OPTIONAL :: order !! permutation resulting from alignment @@ -1580,7 +1583,7 @@ SUBROUTINE dbcsr_t_get_nonzero_index(tensor, dim, bounds, ind) !! bounds of (full) tensor index LOGICAL, ALLOCATABLE, DIMENSION(:), INTENT(OUT) :: ind !! indices with occupied blocks - INTEGER, DIMENSION(dbcsr_t_ndims(tensor)) :: bdims, blk_index, blk_size, blk_offset + INTEGER, DIMENSION(ndims_tensor(tensor)) :: bdims, blk_index, blk_size, blk_offset INTEGER :: bdim, blk, idim TYPE(dbcsr_t_iterator_type) :: iter @@ -1664,8 +1667,8 @@ FUNCTION dbcsr_t_need_contract(tensor_1, tensor_2, contract_1, notcontract_1, & OPTIONAL :: bounds_3 INTEGER :: i LOGICAL, DIMENSION(:), ALLOCATABLE :: ind1, ind2 - INTEGER, DIMENSION(2, dbcsr_t_ndims(tensor_1)) :: bounds_t1 - INTEGER, DIMENSION(2, dbcsr_t_ndims(tensor_2)) :: bounds_t2 + INTEGER, DIMENSION(2, ndims_tensor(tensor_1)) :: bounds_t1 + INTEGER, DIMENSION(2, ndims_tensor(tensor_2)) :: bounds_t2 LOGICAL :: dbcsr_t_need_contract CHARACTER(LEN=*), PARAMETER :: routineN = 'dbcsr_t_need_contract', & routineP = moduleN//':'//routineN @@ -1714,10 +1717,10 @@ SUBROUTINE dbcsr_t_map_bounds_to_tensors(tensor_1, tensor_2, & TYPE(dbcsr_t_type), INTENT(IN) :: tensor_1, tensor_2 INTEGER, DIMENSION(:), INTENT(IN) :: contract_1, contract_2, & notcontract_1, notcontract_2 - INTEGER, DIMENSION(2, dbcsr_t_ndims(tensor_1)), & + INTEGER, DIMENSION(2, ndims_tensor(tensor_1)), & INTENT(OUT) :: bounds_t1 !! bounds mapped to tensor_1 - INTEGER, DIMENSION(2, dbcsr_t_ndims(tensor_2)), & + INTEGER, DIMENSION(2, ndims_tensor(tensor_2)), & INTENT(OUT) :: bounds_t2 !! bounds mapped to tensor_2 INTEGER, DIMENSION(2, SIZE(contract_1)), & @@ -1765,15 +1768,20 @@ SUBROUTINE dbcsr_t_print_contraction_index(tensor_1, indchar1, tensor_2, indchar !! print tensor contraction indices in a human readable way TYPE(dbcsr_t_type), INTENT(IN) :: tensor_1, tensor_2, tensor_3 - CHARACTER(LEN=1), DIMENSION(dbcsr_t_ndims(tensor_1)), INTENT(IN) :: indchar1 + CHARACTER(LEN=1), DIMENSION(ndims_tensor(tensor_1)), INTENT(IN) :: indchar1 !! characters printed for index of tensor 1 - CHARACTER(LEN=1), DIMENSION(dbcsr_t_ndims(tensor_2)), INTENT(IN) :: indchar2 + CHARACTER(LEN=1), DIMENSION(ndims_tensor(tensor_2)), INTENT(IN) :: indchar2 !! characters printed for index of tensor 2 - CHARACTER(LEN=1), DIMENSION(dbcsr_t_ndims(tensor_3)), INTENT(IN) :: indchar3 + CHARACTER(LEN=1), DIMENSION(ndims_tensor(tensor_3)), INTENT(IN) :: indchar3 !! characters printed for index of tensor 3 INTEGER, INTENT(IN) :: unit_nr !! output unit - INTEGER, DIMENSION(:), ALLOCATABLE :: map11, map12, map21, map22, map31, map32 + INTEGER, DIMENSION(ndims_matrix_row(tensor_1)) :: map11 + INTEGER, DIMENSION(ndims_matrix_column(tensor_1)) :: map12 + INTEGER, DIMENSION(ndims_matrix_row(tensor_2)) :: map21 + INTEGER, DIMENSION(ndims_matrix_column(tensor_2)) :: map22 + INTEGER, DIMENSION(ndims_matrix_row(tensor_3)) :: map31 + INTEGER, DIMENSION(ndims_matrix_column(tensor_3)) :: map32 INTEGER :: ichar1, ichar2, ichar3 CALL dbcsr_t_get_mapping_info(tensor_1%nd_index_blk, map1_2d=map11, map2_2d=map12) diff --git a/src/tensors/dbcsr_tensor_api.F b/src/tensors/dbcsr_tensor_api.F index 686704db49c..cfbd6129639 100644 --- a/src/tensors/dbcsr_tensor_api.F +++ b/src/tensors/dbcsr_tensor_api.F @@ -19,7 +19,8 @@ MODULE dbcsr_tensor_api dbcsr_t_contract, dbcsr_t_get_block, dbcsr_t_get_stored_coordinates, dbcsr_t_put_block, & dbcsr_t_reserve_blocks, dbcsr_t_copy_matrix_to_tensor, dbcsr_t_copy, & dbcsr_t_copy_tensor_to_matrix, dbcsr_t_need_contract, dbcsr_t_batched_contract_init, & - dbcsr_t_batched_contract_finalize, dbcsr_t_ndims, dbcsr_t_contract_index + dbcsr_t_batched_contract_finalize, & + dbcsr_t_contract_index USE dbcsr_tensor_block, ONLY: & dbcsr_t_iterator_blocks_left, dbcsr_t_iterator_next_block, dbcsr_t_iterator_start, & dbcsr_t_iterator_stop, dbcsr_t_iterator_type, dbcsr_t_reserved_block_indices @@ -31,7 +32,10 @@ MODULE dbcsr_tensor_api dbcsr_t_mp_environ_pgrid => mp_environ_pgrid, dbcsr_t_blk_sizes, dbcsr_t_get_info, & dbcsr_t_finalize, dbcsr_t_scale, dbcsr_t_get_nze, dbcsr_t_get_nze_total, & dbcsr_t_get_num_blocks, dbcsr_t_get_num_blocks_total, dbcsr_t_clear, & - dbcsr_t_mp_dims_create, dbcsr_t_pgrid_change_dims + dbcsr_t_mp_dims_create, dbcsr_t_pgrid_change_dims, dbcsr_t_ndims => ndims_tensor, & + dbcsr_t_dims => dims_tensor, dbcsr_t_ndims_matrix_row => ndims_matrix_row, & + dbcsr_t_ndims_matrix_column => ndims_matrix_column, dbcsr_t_blk_size, dbcsr_t_nblks_local, & + dbcsr_t_nblks_total USE dbcsr_tensor_test, ONLY: & dbcsr_t_contract_test, dbcsr_t_checksum USE dbcsr_tensor_split, ONLY: & @@ -92,8 +96,15 @@ MODULE dbcsr_tensor_api PUBLIC :: dbcsr_t_batched_contract_init PUBLIC :: dbcsr_t_batched_contract_finalize PUBLIC :: dbcsr_t_ndims + PUBLIC :: dbcsr_t_dims PUBLIC :: dbcsr_t_pgrid_change_dims PUBLIC :: dbcsr_t_reserved_block_indices PUBLIC :: dbcsr_t_contract_index + PUBLIC :: dbcsr_t_ndims_matrix_row + PUBLIC :: dbcsr_t_ndims_matrix_column + PUBLIC :: dbcsr_t_nblks_local + PUBLIC :: dbcsr_t_nblks_total + PUBLIC :: dbcsr_t_blk_size + END MODULE dbcsr_tensor_api diff --git a/src/tensors/dbcsr_tensor_block.F b/src/tensors/dbcsr_tensor_block.F index 0645a65cb5b..aeacc35760f 100644 --- a/src/tensors/dbcsr_tensor_block.F +++ b/src/tensors/dbcsr_tensor_block.F @@ -37,7 +37,7 @@ MODULE dbcsr_tensor_block get_arrays USE dbcsr_tensor_types, ONLY: & dbcsr_t_type, ndims_tensor, dbcsr_t_get_data_type, dbcsr_t_blk_sizes, dbcsr_t_get_num_blocks, & - dbcsr_t_finalize + dbcsr_t_finalize, ndims_matrix_row, ndims_matrix_column USE dbcsr_dist_operations, ONLY: & checker_tr USE dbcsr_toollib, ONLY: & @@ -312,7 +312,7 @@ SUBROUTINE dbcsr_t_reserve_blocks_template(tensor_in, tensor_out) TYPE(dbcsr_t_type), INTENT(INOUT) :: tensor_out INTEGER :: handle - INTEGER, DIMENSION(:, :), ALLOCATABLE :: blk_ind + INTEGER, DIMENSION(dbcsr_t_get_num_blocks(tensor_in), ndims_tensor(tensor_in)) :: blk_ind CHARACTER(LEN=*), PARAMETER :: routineN = 'dbcsr_t_reserve_blocks_template', & routineP = moduleN//':'//routineN @@ -410,14 +410,12 @@ SUBROUTINE dbcsr_t_reserved_block_indices(tensor, blk_ind) INTEGER :: blk, iblk, nblk TYPE(dbcsr_t_iterator_type) :: iterator INTEGER, DIMENSION(ndims_tensor(tensor)) :: ind_nd - INTEGER, DIMENSION(:, :), ALLOCATABLE, INTENT(OUT) :: blk_ind + INTEGER, DIMENSION(dbcsr_t_get_num_blocks(tensor), ndims_tensor(tensor)), INTENT(OUT) :: blk_ind DBCSR_ASSERT(tensor%valid) nblk = dbcsr_t_get_num_blocks(tensor) - ALLOCATE (blk_ind(nblk, ndims_tensor(tensor))) - CALL dbcsr_t_iterator_start(iterator, tensor) DO iblk = 1, nblk CALL dbcsr_t_iterator_next_block(iterator, ind_nd, blk) @@ -580,7 +578,6 @@ SUBROUTINE dbcsr_t_put_${ndim}$d_block_${dsuffix}$ (tensor, ind, sizes, block, s !! whether block should be summed to existing block ${dtype}$, INTENT(IN), OPTIONAL :: scale !! scaling factor - INTEGER(KIND=int_8), DIMENSION(2) :: ind_2d INTEGER, DIMENSION(2) :: shape_2d ${dtype}$, POINTER, DIMENSION(:, :) :: block_2d diff --git a/src/tensors/dbcsr_tensor_index.F b/src/tensors/dbcsr_tensor_index.F index 5d7ef18a3dc..f459ff75053 100644 --- a/src/tensors/dbcsr_tensor_index.F +++ b/src/tensors/dbcsr_tensor_index.F @@ -32,6 +32,8 @@ MODULE dbcsr_tensor_index ndims_mapping, & split_tensor_index, & split_pgrid_index, & + ndims_mapping_row, & + ndims_mapping_column, & dbcsr_t_inverse_order, & permute_index @@ -119,6 +121,20 @@ PURE FUNCTION ndims_mapping(map) ndims_mapping = map%ndim_nd END FUNCTION + PURE FUNCTION ndims_mapping_row(map) + !! how many tensor dimensions are mapped to matrix row + TYPE(nd_to_2d_mapping), INTENT(IN) :: map + INTEGER :: ndims_mapping_row + ndims_mapping_row = map%ndim1_2d + END FUNCTION + + PURE FUNCTION ndims_mapping_column(map) + !! how many tensor dimensions are mapped to matrix column + TYPE(nd_to_2d_mapping), INTENT(IN) :: map + INTEGER :: ndims_mapping_column + ndims_mapping_column = map%ndim2_2d + END FUNCTION + SUBROUTINE dbcsr_t_get_mapping_info(map, ndim_nd, ndim1_2d, ndim2_2d, dims_2d_i8, dims_2d, dims_nd, dims1_2d, dims2_2d, & map1_2d, map2_2d, map_nd, base, col_major) !! get mapping info @@ -135,11 +151,17 @@ SUBROUTINE dbcsr_t_get_mapping_info(map, ndim_nd, ndim1_2d, ndim2_2d, dims_2d_i8 INTEGER, DIMENSION(ndims_mapping(map)), & INTENT(OUT), OPTIONAL :: dims_nd !! nd dimensions - INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(OUT), & - OPTIONAL :: dims1_2d, dims2_2d, map1_2d, map2_2d + INTEGER, DIMENSION(ndims_mapping_row(map)), INTENT(OUT), & + OPTIONAL :: dims1_2d !! dimensions that map to first 2d index + INTEGER, DIMENSION(ndims_mapping_column(map)), INTENT(OUT), & + OPTIONAL :: dims2_2d !! dimensions that map to second 2d index + INTEGER, DIMENSION(ndims_mapping_row(map)), INTENT(OUT), & + OPTIONAL :: map1_2d !! indices that map to first 2d index + INTEGER, DIMENSION(ndims_mapping_column(map)), INTENT(OUT), & + OPTIONAL :: map2_2d !! indices that map to second 2d index INTEGER, DIMENSION(ndims_mapping(map)), & INTENT(OUT), OPTIONAL :: map_nd @@ -158,16 +180,16 @@ SUBROUTINE dbcsr_t_get_mapping_info(map, ndim_nd, ndim1_2d, ndim2_2d, dims_2d_i8 dims_nd(:) = map%dims_nd(:) ENDIF IF (PRESENT(dims1_2d)) THEN - CALL allocate_any(dims1_2d, source=map%dims1_2d) + dims1_2d(:) = map%dims1_2d ENDIF IF (PRESENT(dims2_2d)) THEN - CALL allocate_any(dims2_2d, source=map%dims2_2d) + dims2_2d(:) = map%dims2_2d ENDIF IF (PRESENT(map1_2d)) THEN - CALL allocate_any(map1_2d, source=map%map1_2d) + map1_2d(:) = map%map1_2d ENDIF IF (PRESENT(map2_2d)) THEN - CALL allocate_any(map2_2d, source=map%map2_2d) + map2_2d(:) = map%map2_2d ENDIF IF (PRESENT(map_nd)) THEN map_nd(:) = map%map_nd(:) @@ -360,17 +382,14 @@ SUBROUTINE permute_index(map_in, map_out, order) INTENT(IN) :: order INTEGER :: ndim_nd - INTEGER, ALLOCATABLE, DIMENSION(:) :: map1_2d, map1_2d_reorder, map2_2d, & - map2_2d_reorder + INTEGER, DIMENSION(ndims_mapping_row(map_in)) :: map1_2d, map1_2d_reorder + INTEGER, DIMENSION(ndims_mapping_column(map_in)) :: map2_2d, map2_2d_reorder INTEGER, DIMENSION(ndims_mapping(map_in)) :: dims_nd, dims_reorder CALL dbcsr_t_get_mapping_info(map_in, ndim_nd, dims_nd=dims_nd, map1_2d=map1_2d, map2_2d=map2_2d) dims_reorder(order) = dims_nd - CALL allocate_any(map1_2d_reorder, shape_spec=SHAPE(map1_2d)) - CALL allocate_any(map2_2d_reorder, shape_spec=SHAPE(map2_2d)) - map1_2d_reorder(:) = order(map1_2d) map2_2d_reorder(:) = order(map2_2d) diff --git a/src/tensors/dbcsr_tensor_io.F b/src/tensors/dbcsr_tensor_io.F index 0205b83fb38..4ea18a19e2f 100644 --- a/src/tensors/dbcsr_tensor_io.F +++ b/src/tensors/dbcsr_tensor_io.F @@ -17,7 +17,7 @@ MODULE dbcsr_tensor_io USE dbcsr_tensor_types, ONLY: & dbcsr_t_get_info, dbcsr_t_type, ndims_tensor, dbcsr_t_get_num_blocks, dbcsr_t_get_num_blocks_total, & blk_dims_tensor, dbcsr_t_get_stored_coordinates, dbcsr_t_get_nze, dbcsr_t_get_nze_total, & - dbcsr_t_pgrid_type + dbcsr_t_pgrid_type, dbcsr_t_nblks_total USE dbcsr_kinds, ONLY: default_string_length, int_8, real_8 USE dbcsr_mpiwrap, ONLY: mp_environ, mp_sum, mp_max USE dbcsr_tensor_block, ONLY: & @@ -49,17 +49,17 @@ SUBROUTINE dbcsr_t_write_tensor_info(tensor, output_unit, full_info) LOGICAL, OPTIONAL, INTENT(IN) :: full_info !! Whether to print distribution and block size vectors INTEGER, DIMENSION(ndims_tensor(tensor)) :: nblks_total, nfull_total, pdims, my_ploc, nblks_local, nfull_local - INTEGER, DIMENSION(:), ALLOCATABLE :: ${varlist("blks_local")}$ - INTEGER, DIMENSION(:), ALLOCATABLE :: ${varlist("proc_dist")}$ - INTEGER, DIMENSION(:), ALLOCATABLE :: ${varlist("blk_size")}$ - INTEGER, DIMENSION(:), ALLOCATABLE :: ${varlist("blk_offset")}$ - CHARACTER(len=default_string_length) :: name - INTEGER :: idim - INTEGER :: iblk + +#:for idim in range(1, maxdim+1) + INTEGER, DIMENSION(dbcsr_t_nblks_total(tensor,${idim}$)) :: proc_dist_${idim}$ + INTEGER, DIMENSION(dbcsr_t_nblks_total(tensor,${idim}$)) :: blk_size_${idim}$ +#:endfor + CHARACTER(len=default_string_length) :: name + INTEGER :: idim + INTEGER :: iblk CALL dbcsr_t_get_info(tensor, nblks_total, nfull_total, nblks_local, nfull_local, pdims, my_ploc, & - ${varlist("blks_local")}$, ${varlist("proc_dist")}$, ${varlist("blk_size")}$, & - ${varlist("blk_offset")}$, & + ${varlist("proc_dist")}$, ${varlist("blk_size")}$, & name=name) IF (output_unit > 0) THEN diff --git a/src/tensors/dbcsr_tensor_split.F b/src/tensors/dbcsr_tensor_split.F index 005fc7eb71f..3ba73d8667c 100644 --- a/src/tensors/dbcsr_tensor_split.F +++ b/src/tensors/dbcsr_tensor_split.F @@ -39,7 +39,9 @@ MODULE dbcsr_tensor_split dbcsr_t_finalize, & dbcsr_t_get_num_blocks, & dbcsr_t_blk_offsets, & - dbcsr_t_blk_sizes + dbcsr_t_blk_sizes, & + ndims_matrix_row, & + ndims_matrix_column USE dbcsr_api, ONLY: ${uselist(dtype_float_param)}$ USE dbcsr_kinds, ONLY: ${uselist(dtype_float_prec)}$ @@ -77,8 +79,9 @@ SUBROUTINE dbcsr_t_split_blocks_generic(tensor_in, tensor_out, ${varlist("blk_si INTEGER :: ${varlist("split_blk")}$ INTEGER :: idim, i, isplit_sum, blk, nsplit, handle, splitsum, bcount INTEGER, DIMENSION(:, :), ALLOCATABLE :: blks_to_allocate - INTEGER, DIMENSION(:), ALLOCATABLE :: dist_d, blk_size_d, blk_size_split_d, dist_split_d, & - map1_2d, map2_2d + INTEGER, DIMENSION(:), ALLOCATABLE :: dist_d, blk_size_d, blk_size_split_d, dist_split_d + INTEGER, DIMENSION(ndims_matrix_row(tensor_in)) :: map1_2d + INTEGER, DIMENSION(ndims_matrix_column(tensor_in)) :: map2_2d INTEGER, DIMENSION(ndims_tensor(tensor_in)) :: blk_index, blk_size, blk_offset, & blk_shape INTEGER, DIMENSION(${maxdim}$) :: bi_split, inblock_offset @@ -627,9 +630,10 @@ SUBROUTINE dbcsr_t_crop(tensor_in, tensor_out, bounds, move_data) CALL dbcsr_t_create(tensor_in, tensor_out) ! reserve blocks inside bounds + ALLOCATE(blk_ind(dbcsr_t_get_num_blocks(tensor_in), ndims_tensor(tensor_in))) CALL dbcsr_t_reserved_block_indices(tensor_in, blk_ind) nblk = dbcsr_t_get_num_blocks(tensor_in) - ALLOCATE (blk_ind_tmp(nblk, ndims_tensor(tensor_in))) + ALLOCATE(blk_ind_tmp(dbcsr_t_get_num_blocks(tensor_in), ndims_tensor(tensor_in))) blk_ind_tmp(:, :) = 0 iblk = 0 blk_loop: DO iblk_all = 1, nblk diff --git a/src/tensors/dbcsr_tensor_test.F b/src/tensors/dbcsr_tensor_test.F index e782af2cd5a..1e86363ac6b 100644 --- a/src/tensors/dbcsr_tensor_test.F +++ b/src/tensors/dbcsr_tensor_test.F @@ -18,9 +18,9 @@ MODULE dbcsr_tensor_test USE dbcsr_tas_base, ONLY: dbcsr_tas_info USE dbcsr_tensor, ONLY: & dbcsr_t_copy, dbcsr_t_get_block, dbcsr_t_iterator_type, dbcsr_t_iterator_blocks_left, & - dbcsr_t_iterator_next_block, dbcsr_t_iterator_start, dbcsr_t_iterator_stop, dbcsr_t_ndims, & + dbcsr_t_iterator_next_block, dbcsr_t_iterator_start, dbcsr_t_iterator_stop, & dbcsr_t_reserve_blocks, dbcsr_t_get_stored_coordinates, dbcsr_t_put_block, & - dbcsr_t_contract, dbcsr_t_inverse_order, dbcsr_t_dims + dbcsr_t_contract, dbcsr_t_inverse_order USE dbcsr_tensor_block, ONLY: block_nd USE dbcsr_tensor_types, ONLY: dbcsr_t_create, & dbcsr_t_destroy, & @@ -28,6 +28,7 @@ MODULE dbcsr_tensor_test dbcsr_t_distribution_type, & dbcsr_t_distribution_destroy, & dims_tensor, & + ndims_tensor, & dbcsr_t_distribution_new, & dbcsr_t_nd_mp_comm, & dbcsr_t_get_data_type, & @@ -48,8 +49,7 @@ MODULE dbcsr_tensor_test mp_sum, & mp_cart_create USE dbcsr_allocate_wrap, ONLY: allocate_any - USE dbcsr_tensor_index, ONLY: combine_tensor_index, & - dbcsr_t_get_mapping_info + USE dbcsr_tensor_index, ONLY: combine_tensor_index USE dbcsr_tas_test, ONLY: dbcsr_tas_checksum USE dbcsr_data_types, ONLY: dbcsr_scalar_type #include "base/dbcsr_base_uses.f90" @@ -97,7 +97,7 @@ FUNCTION dbcsr_t_equal(tensor1, tensor2) TYPE(dbcsr_t_type) :: tensor2_tmp TYPE(dbcsr_t_iterator_type) :: iter TYPE(block_nd) :: blk_data1, blk_data2 - INTEGER, DIMENSION(dbcsr_t_ndims(tensor1)) :: blk_size, ind_nd + INTEGER, DIMENSION(ndims_tensor(tensor1)) :: blk_size, ind_nd LOGICAL :: found ! create a copy of tensor2 that has exact same data format as tensor1 @@ -404,7 +404,7 @@ SUBROUTINE dbcsr_t_setup_test_tensor(tensor, mp_comm, enumerate, ${varlist("blk_ INTEGER :: i, ib, my_nblks_alloc, nblks_alloc, proc INTEGER, ALLOCATABLE, DIMENSION(:) :: ${varlist("my_blk_ind")}$ - INTEGER, DIMENSION(dbcsr_t_ndims(tensor)) :: blk_index, blk_offset, blk_size, & + INTEGER, DIMENSION(ndims_tensor(tensor)) :: blk_index, blk_offset, blk_size, & tensor_dims INTEGER, DIMENSION(:, :), ALLOCATABLE :: ind_nd #:for ndim in ndims @@ -416,11 +416,11 @@ SUBROUTINE dbcsr_t_setup_test_tensor(tensor, mp_comm, enumerate, ${varlist("blk_ nblks_alloc = SIZE(blk_ind_1) CALL mp_environ(numnodes, mynode, mp_comm) - ALLOCATE (ind_nd(nblks_alloc, dbcsr_t_ndims(tensor))) + ALLOCATE (ind_nd(nblks_alloc, ndims_tensor(tensor))) my_nblks_alloc = 0 DO ib = 1, nblks_alloc #:for ndim in ndims - IF (dbcsr_t_ndims(tensor) == ${ndim}$) THEN + IF (ndims_tensor(tensor) == ${ndim}$) THEN ind_nd(ib, :) = [${varlist("blk_ind", nmax=ndim, suffix="(ib)")}$] ENDIF #:endfor @@ -431,7 +431,7 @@ SUBROUTINE dbcsr_t_setup_test_tensor(tensor, mp_comm, enumerate, ${varlist("blk_ ENDDO #:for dim in range(1, maxdim+1) - IF (dbcsr_t_ndims(tensor) >= ${dim}$) THEN + IF (ndims_tensor(tensor) >= ${dim}$) THEN ALLOCATE (my_blk_ind_${dim}$ (my_nblks_alloc)) ENDIF #:endfor @@ -442,7 +442,7 @@ SUBROUTINE dbcsr_t_setup_test_tensor(tensor, mp_comm, enumerate, ${varlist("blk_ IF (proc == mynode) THEN i = i + 1 #:for dim in range(1, maxdim+1) - IF (dbcsr_t_ndims(tensor) >= ${dim}$) THEN + IF (ndims_tensor(tensor) >= ${dim}$) THEN my_blk_ind_${dim}$ (i) = blk_ind_${dim}$ (ib) ENDIF #:endfor @@ -450,7 +450,7 @@ SUBROUTINE dbcsr_t_setup_test_tensor(tensor, mp_comm, enumerate, ${varlist("blk_ ENDDO #:for ndim in ndims - IF (dbcsr_t_ndims(tensor) == ${ndim}$) THEN + IF (ndims_tensor(tensor) == ${ndim}$) THEN CALL dbcsr_t_reserve_blocks(tensor, ${varlist("my_blk_ind", nmax=ndim)}$) ENDIF #:endfor @@ -460,7 +460,7 @@ SUBROUTINE dbcsr_t_setup_test_tensor(tensor, mp_comm, enumerate, ${varlist("blk_ CALL dbcsr_t_iterator_next_block(iterator, blk_index, blk, blk_size=blk_size, blk_offset=blk_offset) #:for ndim in ndims - IF (dbcsr_t_ndims(tensor) == ${ndim}$) THEN + IF (ndims_tensor(tensor) == ${ndim}$) THEN CALL allocate_any(blk_values_${ndim}$, shape_spec=blk_size) CALL dims_tensor(tensor, tensor_dims) IF (enumerate) THEN @@ -522,14 +522,14 @@ SUBROUTINE dist_sparse_tensor_to_repl_dense_${ndim}$d_array_${dsuffix}$ (tensor, ${dtype}$, ALLOCATABLE, DIMENSION(${shape_colon(ndim)}$), & INTENT(OUT) :: array ${dtype}$, ALLOCATABLE, DIMENSION(${shape_colon(ndim)}$) :: block - INTEGER, DIMENSION(dbcsr_t_ndims(tensor)) :: dims_nd, ind_nd, blk_size, blk_offset + INTEGER, DIMENSION(ndims_tensor(tensor)) :: dims_nd, ind_nd, blk_size, blk_offset TYPE(dbcsr_t_iterator_type) :: iterator INTEGER :: blk, idim - INTEGER, DIMENSION(dbcsr_t_ndims(tensor)) :: blk_start, blk_end + INTEGER, DIMENSION(ndims_tensor(tensor)) :: blk_start, blk_end LOGICAL :: found - DBCSR_ASSERT(dbcsr_t_ndims(tensor) .EQ. ${ndim}$) - CALL dbcsr_t_get_mapping_info(tensor%nd_index, dims_nd=dims_nd) + DBCSR_ASSERT(ndims_tensor(tensor) .EQ. ${ndim}$) + CALL dbcsr_t_get_info(tensor, nfull_total=dims_nd) CALL allocate_any(array, shape_spec=dims_nd) array(${shape_colon(ndim)}$) = 0.0_${dprec}$ @@ -539,7 +539,7 @@ SUBROUTINE dist_sparse_tensor_to_repl_dense_${ndim}$d_array_${dsuffix}$ (tensor, CALL dbcsr_t_get_block(tensor, ind_nd, block, found) DBCSR_ASSERT(found) - DO idim = 1, dbcsr_t_ndims(tensor) + DO idim = 1, ndims_tensor(tensor) blk_start(idim) = blk_offset(idim) blk_end(idim) = blk_offset(idim) + blk_size(idim) - 1 ENDDO @@ -562,19 +562,19 @@ SUBROUTINE repl_dense_${ndim}$d_array_to_dist_sparse_tensor_${dsuffix}$ (tensor, ${dtype}$, ALLOCATABLE, DIMENSION(${shape_colon(ndim)}$), & INTENT(INOUT) :: array ${dtype}$, ALLOCATABLE, DIMENSION(${shape_colon(ndim)}$) :: block - INTEGER, DIMENSION(dbcsr_t_ndims(tensor)) :: dims_nd, ind_nd, blk_size, blk_offset + INTEGER, DIMENSION(ndims_tensor(tensor)) :: dims_nd, ind_nd, blk_size, blk_offset TYPE(dbcsr_t_iterator_type) :: iterator INTEGER :: blk, idim - INTEGER, DIMENSION(dbcsr_t_ndims(tensor)) :: blk_start, blk_end + INTEGER, DIMENSION(ndims_tensor(tensor)) :: blk_start, blk_end - DBCSR_ASSERT(dbcsr_t_ndims(tensor) .EQ. ${ndim}$) - CALL dbcsr_t_get_mapping_info(tensor%nd_index, dims_nd=dims_nd) + DBCSR_ASSERT(ndims_tensor(tensor) .EQ. ${ndim}$) + CALL dbcsr_t_get_info(tensor, nfull_total=dims_nd) CALL dbcsr_t_iterator_start(iterator, tensor) DO WHILE (dbcsr_t_iterator_blocks_left(iterator)) CALL dbcsr_t_iterator_next_block(iterator, ind_nd, blk, blk_size=blk_size, blk_offset=blk_offset) CALL allocate_any(block, shape_spec=blk_size) - DO idim = 1, dbcsr_t_ndims(tensor) + DO idim = 1, ndims_tensor(tensor) blk_start(idim) = blk_offset(idim) blk_end(idim) = blk_offset(idim) + blk_size(idim) - 1 ENDDO @@ -617,8 +617,8 @@ SUBROUTINE dbcsr_t_contract_test(alpha, tensor_1, tensor_2, beta, tensor_3, & INTEGER :: io_unit, mynode, numnodes, mp_comm INTEGER, DIMENSION(:), ALLOCATABLE :: size_1, size_2, size_3, & order_t1, order_t2, order_t3 - INTEGER, DIMENSION(2, dbcsr_t_ndims(tensor_1)) :: bounds_t1 - INTEGER, DIMENSION(2, dbcsr_t_ndims(tensor_2)) :: bounds_t2 + INTEGER, DIMENSION(2, ndims_tensor(tensor_1)) :: bounds_t1 + INTEGER, DIMENSION(2, ndims_tensor(tensor_2)) :: bounds_t2 #:for ndim in ndims REAL(KIND=real_8), ALLOCATABLE, & @@ -673,7 +673,7 @@ SUBROUTINE dbcsr_t_contract_test(alpha, tensor_1, tensor_2, beta, tensor_3, & CALL dbcsr_t_write_blocks(tensor_1, io_unit, unit_nr, write_int) ENDIF - SELECT CASE (dbcsr_t_ndims(tensor_3)) + SELECT CASE (ndims_tensor(tensor_3)) #:for ndim in ndims CASE (${ndim}$) CALL dist_sparse_tensor_to_repl_dense_array(tensor_3, array_3_0_${ndim}$d) @@ -724,7 +724,7 @@ SUBROUTINE dbcsr_t_contract_test(alpha, tensor_1, tensor_2, beta, tensor_3, & ! Convert tensors to simple multidimensional arrays #:for i in range(1,4) - SELECT CASE (dbcsr_t_ndims(tensor_${i}$)) + SELECT CASE (ndims_tensor(tensor_${i}$)) #:for ndim in ndims CASE (${ndim}$) #:if i < 3 @@ -744,7 +744,7 @@ SUBROUTINE dbcsr_t_contract_test(alpha, tensor_1, tensor_2, beta, tensor_3, & ! Get array sizes #:for i in range(1,4) - SELECT CASE (dbcsr_t_ndims(tensor_${i}$)) + SELECT CASE (ndims_tensor(tensor_${i}$)) #:for ndim in ndims CASE (${ndim}$) CALL allocate_any(size_${i}$, source=SHAPE(array_${i}$_${ndim}$d)) @@ -754,7 +754,7 @@ SUBROUTINE dbcsr_t_contract_test(alpha, tensor_1, tensor_2, beta, tensor_3, & #:endfor #:for i in range(1,4) - ALLOCATE (order_t${i}$ (dbcsr_t_ndims(tensor_${i}$))) + ALLOCATE (order_t${i}$ (ndims_tensor(tensor_${i}$))) #:endfor ASSOCIATE (map_t1_1=>notcontract_1, map_t1_2=>contract_1, & @@ -764,7 +764,7 @@ SUBROUTINE dbcsr_t_contract_test(alpha, tensor_1, tensor_2, beta, tensor_3, & #:for i in range(1,4) order_t${i}$ (:) = dbcsr_t_inverse_order([map_t${i}$_1, map_t${i}$_2]) - SELECT CASE (dbcsr_t_ndims(tensor_${i}$)) + SELECT CASE (ndims_tensor(tensor_${i}$)) #:for ndim in ndims CASE (${ndim}$) CALL allocate_any(array_${i}$_rs${ndim}$d, source=array_${i}$_${ndim}$d, order=order_t${i}$) @@ -774,7 +774,7 @@ SUBROUTINE dbcsr_t_contract_test(alpha, tensor_1, tensor_2, beta, tensor_3, & END SELECT #:endfor - SELECT CASE (dbcsr_t_ndims(tensor_3)) + SELECT CASE (ndims_tensor(tensor_3)) #:for ndim in ndims CASE (${ndim}$) CALL allocate_any(array_3_0_rs${ndim}$d, source=array_3_0_${ndim}$d, order=order_t3) diff --git a/src/tensors/dbcsr_tensor_types.F b/src/tensors/dbcsr_tensor_types.F index fe2d26b977e..f3ba21f94af 100644 --- a/src/tensors/dbcsr_tensor_types.F +++ b/src/tensors/dbcsr_tensor_types.F @@ -35,7 +35,7 @@ MODULE dbcsr_tensor_types USE dbcsr_tensor_index, ONLY: & get_2d_indices_tensor, get_nd_indices_pgrid, create_nd_to_2d_mapping, destroy_nd_to_2d_mapping, & dbcsr_t_get_mapping_info, nd_to_2d_mapping, split_tensor_index, combine_tensor_index, combine_pgrid_index, & - split_pgrid_index, ndims_mapping + split_pgrid_index, ndims_mapping, ndims_mapping_row, ndims_mapping_column USE dbcsr_tas_split, ONLY: & dbcsr_tas_create_split_rows_or_cols, dbcsr_tas_release_info, dbcsr_tas_info_hold, & dbcsr_tas_create_split, dbcsr_tas_get_split_info @@ -89,7 +89,12 @@ MODULE dbcsr_tensor_types dbcsr_t_type, & dims_tensor, & mp_environ_pgrid, & - ndims_tensor + ndims_tensor,& + ndims_matrix_row,& + ndims_matrix_column,& + dbcsr_t_nblks_local,& + dbcsr_t_nblks_total,& + dbcsr_t_blk_size TYPE dbcsr_t_pgrid_type TYPE(nd_to_2d_mapping) :: nd_index_grid @@ -194,18 +199,24 @@ FUNCTION new_dbcsr_tas_dist_t(nd_dist, map_blks, map_grid, which_dim) INTEGER, DIMENSION(:), ALLOCATABLE :: index_map IF (which_dim == 1) THEN + ALLOCATE (new_dbcsr_tas_dist_t%dims(ndims_mapping_row(map_blks))) + ALLOCATE (index_map(ndims_mapping_row(map_blks))) CALL dbcsr_t_get_mapping_info(map_blks, & dims_2d_i8=matrix_dims, & map1_2d=index_map, & dims1_2d=new_dbcsr_tas_dist_t%dims) + ALLOCATE (new_dbcsr_tas_dist_t%dims_grid(ndims_mapping_row(map_grid))) CALL dbcsr_t_get_mapping_info(map_grid, & dims_2d=grid_dims, & dims1_2d=new_dbcsr_tas_dist_t%dims_grid) ELSEIF (which_dim == 2) THEN + ALLOCATE (new_dbcsr_tas_dist_t%dims(ndims_mapping_column(map_blks))) + ALLOCATE (index_map(ndims_mapping_column(map_blks))) CALL dbcsr_t_get_mapping_info(map_blks, & dims_2d_i8=matrix_dims, & map2_2d=index_map, & dims2_2d=new_dbcsr_tas_dist_t%dims) + ALLOCATE (new_dbcsr_tas_dist_t%dims_grid(ndims_mapping_column(map_grid))) CALL dbcsr_t_get_mapping_info(map_grid, & dims_2d=grid_dims, & dims2_2d=new_dbcsr_tas_dist_t%dims_grid) @@ -308,11 +319,15 @@ FUNCTION new_dbcsr_tas_blk_size_t(blk_size, map_blks, which_dim) TYPE(dbcsr_tas_blk_size_t) :: new_dbcsr_tas_blk_size_t IF (which_dim == 1) THEN + ALLOCATE (index_map(ndims_mapping_row(map_blks))) + ALLOCATE (new_dbcsr_tas_blk_size_t%dims(ndims_mapping_row(map_blks))) CALL dbcsr_t_get_mapping_info(map_blks, & dims_2d_i8=matrix_dims, & map1_2d=index_map, & dims1_2d=new_dbcsr_tas_blk_size_t%dims) ELSEIF (which_dim == 2) THEN + ALLOCATE (index_map(ndims_mapping_column(map_blks))) + ALLOCATE (new_dbcsr_tas_blk_size_t%dims(ndims_mapping_column(map_blks))) CALL dbcsr_t_get_mapping_info(map_blks, & dims_2d_i8=matrix_dims, & map2_2d=index_map, & @@ -537,7 +552,8 @@ SUBROUTINE dbcsr_t_distribution_new(dist, pgrid, ${varlist("nd_dist")}$) TYPE(dbcsr_t_distribution_type), INTENT(OUT) :: dist TYPE(dbcsr_t_pgrid_type), INTENT(IN) :: pgrid INTEGER, DIMENSION(:), INTENT(IN), OPTIONAL :: ${varlist("nd_dist")}$ - INTEGER, DIMENSION(:), ALLOCATABLE :: map1_2d, map2_2d + INTEGER, DIMENSION(ndims_mapping_row(pgrid%nd_index_grid)) :: map1_2d + INTEGER, DIMENSION(ndims_mapping_column(pgrid%nd_index_grid)) :: map2_2d INTEGER :: ndims CALL dbcsr_t_get_mapping_info(pgrid%nd_index_grid, map1_2d=map1_2d, map2_2d=map2_2d, ndim_nd=ndims) @@ -570,7 +586,8 @@ SUBROUTINE dbcsr_t_distribution_new_expert(dist, pgrid, map1_2d, map2_2d, ${varl TYPE(dbcsr_tas_dist_t) :: row_dist_obj, col_dist_obj TYPE(dbcsr_t_pgrid_type) :: pgrid_prv LOGICAL :: need_pgrid_remap - INTEGER, DIMENSION(:), ALLOCATABLE :: map1_2d_check, map2_2d_check + INTEGER, DIMENSION(ndims_mapping_row(pgrid%nd_index_grid)) :: map1_2d_check + INTEGER, DIMENSION(ndims_mapping_column(pgrid%nd_index_grid)) :: map2_2d_check CHARACTER(LEN=*), PARAMETER :: routineN = 'dbcsr_t_distribution_new', & routineP = moduleN//':'//routineN @@ -866,6 +883,8 @@ SUBROUTINE dbcsr_t_create_template(tensor_in, tensor, name, dist, map1_2d, map2_ CALL allocate_any(map1_2d_prv, source=map1_2d) CALL allocate_any(map2_2d_prv, source=map2_2d) ELSE + ALLOCATE(map1_2d_prv(ndims_matrix_row(tensor_in))) + ALLOCATE(map2_2d_prv(ndims_matrix_column(tensor_in))) CALL dbcsr_t_get_mapping_info(tensor_in%nd_index_blk, map1_2d=map1_2d_prv, map2_2d=map2_2d_prv) ENDIF IF (PRESENT(name)) THEN @@ -1219,7 +1238,8 @@ SUBROUTINE dbcsr_t_pgrid_remap(pgrid_in, map1_2d, map2_2d, pgrid_out) !! new mapping TYPE(dbcsr_t_pgrid_type), INTENT(OUT) :: pgrid_out INTEGER, DIMENSION(:), ALLOCATABLE :: dims - INTEGER, ALLOCATABLE, DIMENSION(:) :: map1_2d_old, map2_2d_old + INTEGER, DIMENSION(ndims_mapping_row(pgrid_in%nd_index_grid)) :: map1_2d_old + INTEGER, DIMENSION(ndims_mapping_column(pgrid_in%nd_index_grid)) :: map2_2d_old ALLOCATE (dims(SIZE(map1_2d) + SIZE(map2_2d))) CALL dbcsr_t_get_mapping_info(pgrid_in%nd_index_grid, dims_nd=dims, map1_2d=map1_2d_old, map2_2d=map2_2d_old) @@ -1241,7 +1261,8 @@ SUBROUTINE dbcsr_t_pgrid_change_dims(pgrid, pdims) !! new process grid dimensions, should all be set > 0 TYPE(dbcsr_t_pgrid_type) :: pgrid_tmp INTEGER :: nsplit, dimsplit - INTEGER, DIMENSION(:), ALLOCATABLE :: map1_2d, map2_2d + INTEGER, DIMENSION(ndims_mapping_row(pgrid%nd_index_grid)) :: map1_2d + INTEGER, DIMENSION(ndims_mapping_column(pgrid%nd_index_grid)) :: map2_2d TYPe(nd_to_2d_mapping) :: nd_index_grid INTEGER, DIMENSION(2) :: pdims_2d @@ -1339,14 +1360,16 @@ SUBROUTINE dbcsr_t_get_info(tensor, nblks_total, & !! process coordinates in process grid INTEGER, INTENT(OUT), OPTIONAL, DIMENSION(ndims_tensor(tensor)) :: pdims !! process grid dimensions - INTEGER, DIMENSION(:), ALLOCATABLE, INTENT(OUT), OPTIONAL :: ${varlist("blks_local")}$ - !! local blocks along dimension 1 and 2 - INTEGER, DIMENSION(:), ALLOCATABLE, INTENT(OUT), OPTIONAL :: ${varlist("proc_dist")}$ - !! distribution vector along dimension 1 and 2 - INTEGER, DIMENSION(:), ALLOCATABLE, INTENT(OUT), OPTIONAL :: ${varlist("blk_size")}$ - !! block sizes along dimension 1 and 2 - INTEGER, DIMENSION(:), ALLOCATABLE, INTENT(OUT), OPTIONAL :: ${varlist("blk_offset")}$ - !! block offsets along dimension 1 and 2 +#:for idim in range(1, maxdim+1) + INTEGER, DIMENSION(dbcsr_t_nblks_local(tensor,${idim}$)), INTENT(OUT), OPTIONAL :: blks_local_${idim}$ + !! local blocks along dimension ${idim}$ + INTEGER, DIMENSION(dbcsr_t_nblks_total(tensor,${idim}$)), INTENT(OUT), OPTIONAL :: proc_dist_${idim}$ + !! distribution along dimension ${idim}$ + INTEGER, DIMENSION(dbcsr_t_nblks_total(tensor,${idim}$)), INTENT(OUT), OPTIONAL :: blk_size_${idim}$ + !! block sizes along dimension ${idim}$ + INTEGER, DIMENSION(dbcsr_t_nblks_total(tensor,${idim}$)), INTENT(OUT), OPTIONAL :: blk_offset_${idim}$ + !! block offsets along dimension ${idim}$ +#:endfor TYPE(dbcsr_t_distribution_type), INTENT(OUT), OPTIONAL :: distribution !! distribution object CHARACTER(len=*), INTENT(OUT), OPTIONAL :: name @@ -1366,10 +1389,18 @@ SUBROUTINE dbcsr_t_get_info(tensor, nblks_total, & #:for idim in range(1, maxdim+1) IF (${idim}$ <= ndims_tensor(tensor)) THEN - IF (PRESENT(blks_local_${idim}$)) CALL get_ith_array(tensor%blks_local, ${idim}$, blks_local_${idim}$) - IF (PRESENT(proc_dist_${idim}$)) CALL get_ith_array(tensor%nd_dist, ${idim}$, proc_dist_${idim}$) - IF (PRESENT(blk_size_${idim}$)) CALL get_ith_array(tensor%blk_sizes, ${idim}$, blk_size_${idim}$) - IF (PRESENT(blk_offset_${idim}$)) CALL get_ith_array(tensor%blk_offsets, ${idim}$, blk_offset_${idim}$) + IF (PRESENT(blks_local_${idim}$)) CALL get_ith_array(tensor%blks_local, ${idim}$, & + dbcsr_t_nblks_local(tensor, ${idim}$), & + blks_local_${idim}$) + IF (PRESENT(proc_dist_${idim}$)) CALL get_ith_array(tensor%nd_dist, ${idim}$, & + dbcsr_t_nblks_total(tensor, ${idim}$), & + proc_dist_${idim}$) + IF (PRESENT(blk_size_${idim}$)) CALL get_ith_array(tensor%blk_sizes, ${idim}$, & + dbcsr_t_nblks_total(tensor, ${idim}$), & + blk_size_${idim}$) + IF (PRESENT(blk_offset_${idim}$)) CALL get_ith_array(tensor%blk_offsets, ${idim}$, & + dbcsr_t_nblks_total(tensor, ${idim}$), & + blk_offset_${idim}$) ENDIF #:endfor @@ -1434,4 +1465,65 @@ FUNCTION dbcsr_t_get_nze_total(tensor) dbcsr_t_get_nze_total = dbcsr_tas_get_nze_total(tensor%matrix_rep) END FUNCTION + PURE FUNCTION dbcsr_t_nblks_local(tensor, idim) + !! local number of blocks along dimension idim + TYPE(dbcsr_t_type), INTENT(IN) :: tensor + INTEGER, INTENT(IN) :: idim + INTEGER :: dbcsr_t_nblks_local + + IF (idim > ndims_tensor(tensor)) THEN + dbcsr_t_nblks_local = 0 + ELSE + dbcsr_t_nblks_local = tensor%nblks_local(idim) + ENDIF + + END FUNCTION + + PURE FUNCTION dbcsr_t_nblks_total(tensor, idim) + !! total numbers of blocks along dimension idim + TYPE(dbcsr_t_type), INTENT(IN) :: tensor + INTEGER, INTENT(IN) :: idim + INTEGER :: dbcsr_t_nblks_total + + IF (idim > ndims_tensor(tensor)) THEN + dbcsr_t_nblks_total = 0 + ELSE + dbcsr_t_nblks_total = tensor%nd_index_blk%dims_nd(idim) + ENDIF + END FUNCTION + + PURE FUNCTION dbcsr_t_blk_size(tensor, ind, idim) + !! block size of block with index ind along dimension idim + TYPE(dbcsr_t_type), INTENT(IN) :: tensor + INTEGER, DIMENSION(ndims_tensor(tensor)), & + INTENT(IN) :: ind + INTEGER, INTENT(IN) :: idim + INTEGER, DIMENSION(ndims_tensor(tensor)) :: blk_size + INTEGER :: dbcsr_t_blk_size + + IF (idim > ndims_tensor(tensor)) THEN + dbcsr_t_blk_size = 0 + ELSE + blk_size(:) = get_array_elements(tensor%blk_sizes, ind) + dbcsr_t_blk_size = blk_size(idim) + ENDIF + END FUNCTION + + PURE FUNCTION ndims_matrix_row(tensor) + !! how many tensor dimensions are mapped to matrix row + TYPE(dbcsr_t_type), INTENT(IN) :: tensor + INTEGER(int_8) :: ndims_matrix_row + + ndims_matrix_row = ndims_mapping_row(tensor%nd_index_blk) + + END FUNCTION + + PURE FUNCTION ndims_matrix_column(tensor) + !! how many tensor dimensions are mapped to matrix column + TYPE(dbcsr_t_type), INTENT(IN) :: tensor + INTEGER(int_8) :: ndims_matrix_column + + ndims_matrix_column = ndims_mapping_column(tensor%nd_index_blk) + END FUNCTION + END MODULE diff --git a/tests/dbcsr_tensor_unittest.F b/tests/dbcsr_tensor_unittest.F index 30bc4bdbfa1..14f965df48e 100644 --- a/tests/dbcsr_tensor_unittest.F +++ b/tests/dbcsr_tensor_unittest.F @@ -33,9 +33,9 @@ PROGRAM dbcsr_tensor_unittest dbcsr_t_pgrid_type, & dbcsr_t_pgrid_create, & dbcsr_t_get_info, & - dbcsr_t_pgrid_destroy + dbcsr_t_pgrid_destroy, & + ndims_tensor USE dbcsr_data_methods, ONLY: dbcsr_scalar - USE dbcsr_tensor, ONLY: dbcsr_t_ndims USE dbcsr_kinds, ONLY: real_8 #include "base/dbcsr_base_uses.f90" @@ -410,7 +410,7 @@ PROGRAM dbcsr_tensor_unittest CALL dbcsr_t_setup_test_tensor(tensor_B, mp_comm, .FALSE., blk_ind_3_2, blk_ind_4_2) CALL dbcsr_t_setup_test_tensor(tensor_C, mp_comm, .FALSE., blk_ind_1_3, blk_ind_2_3, blk_ind_4_3) - ALLOCATE(bounds_t(dbcsr_t_ndims(tensor_B))) + ALLOCATE(bounds_t(ndims_tensor(tensor_B))) CALL dbcsr_t_get_info(tensor_B, nfull_total=bounds_t) ALLOCATE(bounds(2,1)) @@ -462,7 +462,7 @@ PROGRAM dbcsr_tensor_unittest CALL dbcsr_t_setup_test_tensor(tensor_B, mp_comm, .FALSE., blk_ind_3_2, blk_ind_4_2) CALL dbcsr_t_setup_test_tensor(tensor_C, mp_comm, .FALSE., blk_ind_1_3, blk_ind_2_3, blk_ind_4_3) - ALLOCATE(bounds_t(dbcsr_t_ndims(tensor_C))) + ALLOCATE(bounds_t(ndims_tensor(tensor_C))) CALL dbcsr_t_get_info(tensor_C, nfull_total=bounds_t) ALLOCATE(bounds(2,2)) @@ -517,7 +517,7 @@ PROGRAM dbcsr_tensor_unittest CALL dbcsr_t_setup_test_tensor(tensor_B, mp_comm, .FALSE., blk_ind_1_4, blk_ind_2_4, blk_ind_4_4, blk_ind_5_4) CALL dbcsr_t_setup_test_tensor(tensor_C, mp_comm, .FALSE., blk_ind_3_5, blk_ind_4_5, blk_ind_5_5) - ALLOCATE(bounds_t(dbcsr_t_ndims(tensor_A))) + ALLOCATE(bounds_t(ndims_tensor(tensor_A))) CALL dbcsr_t_get_info(tensor_A, nfull_total=bounds_t) ALLOCATE(bounds_1(2, 2)) bounds_1(1, 1) = 7 @@ -526,7 +526,7 @@ PROGRAM dbcsr_tensor_unittest bounds_1(2, 2) = bounds_t(1) DEALLOCATE(bounds_t) - ALLOCATE(bounds_t(dbcsr_t_ndims(tensor_B))) + ALLOCATE(bounds_t(ndims_tensor(tensor_B))) CALL dbcsr_t_get_info(tensor_B, nfull_total=bounds_t) ALLOCATE(bounds_2(2, 2)) bounds_2(1, 1) = 1 From fcf367f7a60efc7594edf346286ecb421ed283f0 Mon Sep 17 00:00:00 2001 From: Patrick Seewald Date: Wed, 25 Mar 2020 14:26:06 +0100 Subject: [PATCH 14/34] CI CRAY compiler: srun with option -u (--unbuffered) to make sure that output is printed --- .ci/daint.cscs.ch/cray.build.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/.ci/daint.cscs.ch/cray.build.sh b/.ci/daint.cscs.ch/cray.build.sh index 622509e9bd3..f428822db2e 100755 --- a/.ci/daint.cscs.ch/cray.build.sh +++ b/.ci/daint.cscs.ch/cray.build.sh @@ -36,6 +36,7 @@ cmake \ -DBLAS_FOUND=ON -DBLAS_LIBRARIES="-lsci_cray_mpi_mp" \ -DLAPACK_FOUND=ON -DLAPACK_LIBRARIES="-lsci_cray_mpi_mp" \ -DMPIEXEC_EXECUTABLE="$(command -v srun)" \ + -DMPIEXEC_PREFLAGS="-u" \ -DTEST_MPI_RANKS=${SLURM_NTASKS} \ -DTEST_OMP_THREADS=${SLURM_CPUS_PER_TASK} \ "${WORKSPACE}" |& tee -a "${STAGE_NAME}.out" From 196ecd057453b8c476a7c5440986435495780dfd Mon Sep 17 00:00:00 2001 From: Hans Pabst Date: Wed, 25 Mar 2020 12:51:34 +0100 Subject: [PATCH 15/34] Avoid array-temporary. --- src/dist/dbcsr_dist_util.F | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/dist/dbcsr_dist_util.F b/src/dist/dbcsr_dist_util.F index 10c0b632a32..7ba28aa6944 100644 --- a/src/dist/dbcsr_dist_util.F +++ b/src/dist/dbcsr_dist_util.F @@ -754,15 +754,19 @@ END SUBROUTINE dbcsr_verify_matrix PURE SUBROUTINE count_bins(nelements, bins, nbins, bin_counts) INTEGER, INTENT(IN) :: nelements - INTEGER, DIMENSION(1:nelements), INTENT(IN) :: bins + INTEGER, DIMENSION(:), INTENT(IN) :: bins INTEGER, INTENT(IN) :: nbins INTEGER, DIMENSION(1:nbins), INTENT(OUT) :: bin_counts - INTEGER :: el + INTEGER :: bin, i, i0, i1 + ! PURE: DBCSR_ASSERT(nelements .EQ. SIZE(bins)) bin_counts(:) = 0 - DO el = 1, nelements - bin_counts(bins(el)) = bin_counts(bins(el)) + 1 + i0 = LBOUND(bins, 1) + i1 = i0 + nelements - 1 + DO i = i0, i1 + bin = bins(i) + bin_counts(bin) = bin_counts(bin) + 1 ENDDO END SUBROUTINE count_bins From 451794b11e6ac5d41018927789d30fcb78aa0da7 Mon Sep 17 00:00:00 2001 From: Hans Pabst Date: Wed, 11 Mar 2020 18:40:07 +0100 Subject: [PATCH 16/34] Removed OpenMP critical section around mp_allocate since it calls MPI_TYPE_SIZE() and MPI_Mem_alloc() both of which are thread-safe (contribution suggested by Dmitri M.). --- src/data/dbcsr_ptr_util.f90 | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/data/dbcsr_ptr_util.f90 b/src/data/dbcsr_ptr_util.f90 index fbd588910d9..60d1fb8243c 100644 --- a/src/data/dbcsr_ptr_util.f90 +++ b/src/data/dbcsr_ptr_util.f90 @@ -207,9 +207,7 @@ SUBROUTINE mem_alloc_${nametype1}$ (mem, n, mem_type) IF (mem_type%acc_hostalloc .AND. n > 1) THEN CALL acc_hostmem_allocate(mem, n, mem_type%acc_stream) ELSE IF (mem_type%mpi .AND. dbcsr_data_allocation%use_mpi_allocator) THEN -!$OMP critical(allocate) CALL mp_allocate(mem, n) -!$OMP end critical(allocate) ELSE ALLOCATE (mem(n)) ENDIF From 2217af16fbfa79e572a9dc362690b25b27e33c7b Mon Sep 17 00:00:00 2001 From: Hans Pabst Date: Mon, 23 Mar 2020 09:17:17 +0100 Subject: [PATCH 17/34] Fixed a typo. --- src/mm/dbcsr_mm_cannon.F | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mm/dbcsr_mm_cannon.F b/src/mm/dbcsr_mm_cannon.F index 584fab1c4a0..78d470bf90e 100644 --- a/src/mm/dbcsr_mm_cannon.F +++ b/src/mm/dbcsr_mm_cannon.F @@ -1586,7 +1586,7 @@ SUBROUTINE multiply_cannon(left_set, right_set, product_matrix, & CALL dbcsr_print(right_buffer_calc%mats(v_ki_right, 1), nodata=.TRUE.) ENDIF ! - ! form here the code for dbcsr_mm_driver_inner_init was taken + ! from here the code for dbcsr_mm_driver_inner_init was taken ! IF (.FALSE.) WRITE (*, *) routineN//" TICK", metronome ! Since the right matrix is shifted vertically, the From 77efe133aaf07fc6070e33dd1ad8d65b90c78085 Mon Sep 17 00:00:00 2001 From: Hans Pabst Date: Tue, 24 Mar 2020 08:31:43 +0100 Subject: [PATCH 18/34] Adjusted calc_norms implementation (removed ABS, introduced simd-loop). --- src/mm/dbcsr_mm_common.f90 | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/src/mm/dbcsr_mm_common.f90 b/src/mm/dbcsr_mm_common.f90 index 979e102efc0..4097cc2dde6 100644 --- a/src/mm/dbcsr_mm_common.f90 +++ b/src/mm/dbcsr_mm_common.f90 @@ -14,33 +14,37 @@ SUBROUTINE calc_norms_${nametype1}$ (norms, nblks, & !! Calculates norms of the entire matrix with minimal overhead. REAL(kind=sp), DIMENSION(:), INTENT(OUT) :: norms INTEGER, INTENT(IN) :: nblks - INTEGER, DIMENSION(3, nblks), INTENT(IN) :: blki + INTEGER, DIMENSION(3, nblks), INTENT(IN) :: blki INTEGER, DIMENSION(:), INTENT(IN) :: rbs, cbs ${type1}$, DIMENSION(:), & - INTENT(IN) :: DATA + INTENT(IN) :: DATA - INTEGER :: blk, bp, bpe, row, col + INTEGER, PARAMETER :: simd = 64 / ${typesize1}$ + INTEGER :: i, n, blk, bp, bpe, row, col REAL(kind=sp) :: val ! --------------------------------------------------------------------------- !$OMP parallel default(none) & -!$OMP private (row, col, blk, bp, bpe, val) & -!$OMP shared (nblks) & +!$OMP private (i, n, row, col, blk, bp, bpe, val) & +!$OMP shared (nblks, simd) & !$OMP shared (rbs, cbs, blki, & !$OMP data, norms) !$OMP do - DO blk = 1, nblks - IF (blki(3, blk) .NE. 0) THEN - row = blki(1, blk) - col = blki(2, blk) + DO i = 1, nblks, simd + n = MIN(i + simd, nblks) + DO blk = i, n bp = blki(3, blk) - bpe = bp + rbs(row)*cbs(col) - 1 - val = SQRT(REAL(SUM(ABS(DATA(bp:bpe))**2), KIND=sp)) - ELSE - val = 0.0_sp - ENDIF - norms(blk) = val + IF (bp .NE. 0) THEN + row = blki(1, blk) + col = blki(2, blk) + bpe = bp + rbs(row) * cbs(col) - 1 + val = SQRT(REAL(SUM(DATA(bp:bpe)**2), KIND=sp)) + ELSE + val = 0.0_sp + ENDIF + norms(blk) = val + ENDDO ENDDO !$OMP end do !$OMP end parallel From 532f328cc9da31b70beaedf4079a130dd30a5387 Mon Sep 17 00:00:00 2001 From: Hans Pabst Date: Tue, 24 Mar 2020 09:21:33 +0100 Subject: [PATCH 19/34] Code cleanup and prettify. --- src/mm/dbcsr_mm_common.f90 | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/mm/dbcsr_mm_common.f90 b/src/mm/dbcsr_mm_common.f90 index 4097cc2dde6..05c403f7fe3 100644 --- a/src/mm/dbcsr_mm_common.f90 +++ b/src/mm/dbcsr_mm_common.f90 @@ -8,7 +8,7 @@ !--------------------------------------------------------------------------------------------------! #:include '../data/dbcsr.fypp' -#:for n, nametype1, base1, prec1, kind1, type1, dkind1 in inst_params_float +#:for n, nametype1, base1, prec1, kind1, type1, typesize1, dkind1 in inst_params_float SUBROUTINE calc_norms_${nametype1}$ (norms, nblks, & blki, rbs, cbs, DATA) !! Calculates norms of the entire matrix with minimal overhead. @@ -19,7 +19,7 @@ SUBROUTINE calc_norms_${nametype1}$ (norms, nblks, & ${type1}$, DIMENSION(:), & INTENT(IN) :: DATA - INTEGER, PARAMETER :: simd = 64 / ${typesize1}$ + INTEGER, PARAMETER :: simd = 64/${typesize1}$ INTEGER :: i, n, blk, bp, bpe, row, col REAL(kind=sp) :: val @@ -27,9 +27,7 @@ SUBROUTINE calc_norms_${nametype1}$ (norms, nblks, & !$OMP parallel default(none) & !$OMP private (i, n, row, col, blk, bp, bpe, val) & -!$OMP shared (nblks, simd) & -!$OMP shared (rbs, cbs, blki, & -!$OMP data, norms) +!$OMP shared (nblks, rbs, cbs, blki, data, norms) !$OMP do DO i = 1, nblks, simd n = MIN(i + simd, nblks) @@ -38,7 +36,7 @@ SUBROUTINE calc_norms_${nametype1}$ (norms, nblks, & IF (bp .NE. 0) THEN row = blki(1, blk) col = blki(2, blk) - bpe = bp + rbs(row) * cbs(col) - 1 + bpe = bp + rbs(row)*cbs(col) - 1 val = SQRT(REAL(SUM(DATA(bp:bpe)**2), KIND=sp)) ELSE val = 0.0_sp From 608be18d94cd28b07fdb13d0c069e9638bfc9bf0 Mon Sep 17 00:00:00 2001 From: Hans Pabst Date: Tue, 24 Mar 2020 14:24:55 +0100 Subject: [PATCH 20/34] Revised implementation. --- src/mm/dbcsr_mm_common.f90 | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/src/mm/dbcsr_mm_common.f90 b/src/mm/dbcsr_mm_common.f90 index 05c403f7fe3..777edb4ff78 100644 --- a/src/mm/dbcsr_mm_common.f90 +++ b/src/mm/dbcsr_mm_common.f90 @@ -19,30 +19,41 @@ SUBROUTINE calc_norms_${nametype1}$ (norms, nblks, & ${type1}$, DIMENSION(:), & INTENT(IN) :: DATA - INTEGER, PARAMETER :: simd = 64/${typesize1}$ + INTEGER, PARAMETER :: nsimd = (2*64)/${typesize1}$ INTEGER :: i, n, blk, bp, bpe, row, col - REAL(kind=sp) :: val + REAL(kind=sp) :: vals(0:nsimd - 1) ! --------------------------------------------------------------------------- !$OMP parallel default(none) & -!$OMP private (i, n, row, col, blk, bp, bpe, val) & +!$OMP private (i, n, row, col, blk, bp, bpe, vals) & !$OMP shared (nblks, rbs, cbs, blki, data, norms) !$OMP do - DO i = 1, nblks, simd - n = MIN(i + simd, nblks) - DO blk = i, n - bp = blki(3, blk) + DO i = 1, nblks, nsimd + n = MIN(nsimd - 1, nblks - i) + DO blk = 0, n + bp = blki(3, blk + i) IF (bp .NE. 0) THEN - row = blki(1, blk) - col = blki(2, blk) + row = blki(1, blk + i) + col = blki(2, blk + i) bpe = bp + rbs(row)*cbs(col) - 1 - val = SQRT(REAL(SUM(DATA(bp:bpe)**2), KIND=sp)) + vals(blk) = REAL(SUM(DATA(bp:bpe)**2), KIND=sp) ELSE - val = 0.0_sp + vals(blk) = 0.0_sp ENDIF - norms(blk) = val ENDDO + ! SIMD: SQRT is not part of above IF-condition + IF (n .EQ. (nsimd - 1)) THEN +!$OMP simd + DO blk = 0, nsimd - 1 + norms(blk + i) = SQRT(vals(blk)) + ENDDO +!$OMP end simd + ELSE ! remainder + DO blk = 0, n + norms(blk + i) = SQRT(vals(blk)) + ENDDO + ENDIF ENDDO !$OMP end do !$OMP end parallel From 0b4548048dcee47f66bfb5a8d541d941a5c623e7 Mon Sep 17 00:00:00 2001 From: Hans Pabst Date: Wed, 25 Mar 2020 17:29:57 +0100 Subject: [PATCH 21/34] Reworked to use OpenMP tasks. Note: default(none) was removed from the parallel region since it is cumbersome given that variable-kinds are specified on a per-task construct basis (and we do not want to suggest any private copies at the outer scope as we also removed the work-distribution [OMP-DO]). --- src/mm/dbcsr_mm_common.f90 | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/src/mm/dbcsr_mm_common.f90 b/src/mm/dbcsr_mm_common.f90 index 777edb4ff78..f836e55128a 100644 --- a/src/mm/dbcsr_mm_common.f90 +++ b/src/mm/dbcsr_mm_common.f90 @@ -21,41 +21,44 @@ SUBROUTINE calc_norms_${nametype1}$ (norms, nblks, & INTEGER, PARAMETER :: nsimd = (2*64)/${typesize1}$ INTEGER :: i, n, blk, bp, bpe, row, col - REAL(kind=sp) :: vals(0:nsimd - 1) + REAL(kind=sp) :: vals(nsimd) ! --------------------------------------------------------------------------- -!$OMP parallel default(none) & -!$OMP private (i, n, row, col, blk, bp, bpe, vals) & -!$OMP shared (nblks, rbs, cbs, blki, data, norms) -!$OMP do - DO i = 1, nblks, nsimd - n = MIN(nsimd - 1, nblks - i) - DO blk = 0, n +!$OMP parallel +!$OMP single + DO i = 0, nblks - 1, nsimd + n = MIN(nsimd, nblks - i) + DO blk = 1, n bp = blki(3, blk + i) IF (bp .NE. 0) THEN +!$OMP task firstprivate(i, blk, row, col, bp, bpe) row = blki(1, blk + i) col = blki(2, blk + i) bpe = bp + rbs(row)*cbs(col) - 1 vals(blk) = REAL(SUM(DATA(bp:bpe)**2), KIND=sp) +!$OMP end task ELSE vals(blk) = 0.0_sp ENDIF ENDDO - ! SIMD: SQRT is not part of above IF-condition - IF (n .EQ. (nsimd - 1)) THEN + ! SIMD: SQRT is intentionally not in above IF-condition +!$OMP taskwait +!$OMP task firstprivate(i, blk, n, vals) + IF (n .EQ. nsimd) THEN !$OMP simd - DO blk = 0, nsimd - 1 + DO blk = 1, nsimd norms(blk + i) = SQRT(vals(blk)) ENDDO !$OMP end simd ELSE ! remainder - DO blk = 0, n + DO blk = 1, n norms(blk + i) = SQRT(vals(blk)) ENDDO ENDIF +!$OMP end task ENDDO -!$OMP end do +!$OMP end single !$OMP end parallel END SUBROUTINE calc_norms_${nametype1}$ #:endfor From 12248d38709a69b74183d21ffb5f907f5f245cbd Mon Sep 17 00:00:00 2001 From: Hans Pabst Date: Wed, 25 Mar 2020 17:43:08 +0100 Subject: [PATCH 22/34] Introduced default(none) for the task construct. --- src/mm/dbcsr_mm_common.f90 | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mm/dbcsr_mm_common.f90 b/src/mm/dbcsr_mm_common.f90 index f836e55128a..c9b2ccd33f7 100644 --- a/src/mm/dbcsr_mm_common.f90 +++ b/src/mm/dbcsr_mm_common.f90 @@ -32,7 +32,8 @@ SUBROUTINE calc_norms_${nametype1}$ (norms, nblks, & DO blk = 1, n bp = blki(3, blk + i) IF (bp .NE. 0) THEN -!$OMP task firstprivate(i, blk, row, col, bp, bpe) +!$OMP task default(none) shared(DATA, vals, blki, rbs, cbs) & +!$OMP firstprivate(i, blk, row, col, bp, bpe) row = blki(1, blk + i) col = blki(2, blk + i) bpe = bp + rbs(row)*cbs(col) - 1 @@ -44,7 +45,8 @@ SUBROUTINE calc_norms_${nametype1}$ (norms, nblks, & ENDDO ! SIMD: SQRT is intentionally not in above IF-condition !$OMP taskwait -!$OMP task firstprivate(i, blk, n, vals) +!$OMP task default(none) shared (norms) & +!$OMP firstprivate(i, blk, n, vals) IF (n .EQ. nsimd) THEN !$OMP simd DO blk = 1, nsimd From 4ab49022479f834a2a2ab42c0ecffb5e91e04c28 Mon Sep 17 00:00:00 2001 From: Hans Pabst Date: Thu, 26 Mar 2020 09:22:54 +0100 Subject: [PATCH 23/34] Revised implementation and specified dynamic schedule. --- src/mm/dbcsr_mm_common.f90 | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/src/mm/dbcsr_mm_common.f90 b/src/mm/dbcsr_mm_common.f90 index c9b2ccd33f7..414637856d2 100644 --- a/src/mm/dbcsr_mm_common.f90 +++ b/src/mm/dbcsr_mm_common.f90 @@ -19,34 +19,30 @@ SUBROUTINE calc_norms_${nametype1}$ (norms, nblks, & ${type1}$, DIMENSION(:), & INTENT(IN) :: DATA - INTEGER, PARAMETER :: nsimd = (2*64)/${typesize1}$ + INTEGER, PARAMETER :: nsimd = (1*64)/${typesize1}$ INTEGER :: i, n, blk, bp, bpe, row, col REAL(kind=sp) :: vals(nsimd) ! --------------------------------------------------------------------------- -!$OMP parallel -!$OMP single +!$OMP parallel default(none) & +!$OMP shared(DATA, norms, nblks, rbs, cbs, blki) & +!$OMP private(vals, i, n, row, col, blk, bp, bpe) +!$OMP do schedule(dynamic) DO i = 0, nblks - 1, nsimd n = MIN(nsimd, nblks - i) DO blk = 1, n bp = blki(3, blk + i) IF (bp .NE. 0) THEN -!$OMP task default(none) shared(DATA, vals, blki, rbs, cbs) & -!$OMP firstprivate(i, blk, row, col, bp, bpe) row = blki(1, blk + i) col = blki(2, blk + i) bpe = bp + rbs(row)*cbs(col) - 1 vals(blk) = REAL(SUM(DATA(bp:bpe)**2), KIND=sp) -!$OMP end task ELSE vals(blk) = 0.0_sp ENDIF ENDDO ! SIMD: SQRT is intentionally not in above IF-condition -!$OMP taskwait -!$OMP task default(none) shared (norms) & -!$OMP firstprivate(i, blk, n, vals) IF (n .EQ. nsimd) THEN !$OMP simd DO blk = 1, nsimd @@ -58,9 +54,8 @@ SUBROUTINE calc_norms_${nametype1}$ (norms, nblks, & norms(blk + i) = SQRT(vals(blk)) ENDDO ENDIF -!$OMP end task ENDDO -!$OMP end single +!$OMP end do !$OMP end parallel END SUBROUTINE calc_norms_${nametype1}$ #:endfor From 5f1f483146463f7d09a8bbf66bb535de235ded16 Mon Sep 17 00:00:00 2001 From: Hans Pabst Date: Thu, 26 Mar 2020 16:24:06 +0100 Subject: [PATCH 24/34] Revised number of elements processed by inner loop. --- src/mm/dbcsr_mm_common.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mm/dbcsr_mm_common.f90 b/src/mm/dbcsr_mm_common.f90 index 414637856d2..b1500290531 100644 --- a/src/mm/dbcsr_mm_common.f90 +++ b/src/mm/dbcsr_mm_common.f90 @@ -19,7 +19,7 @@ SUBROUTINE calc_norms_${nametype1}$ (norms, nblks, & ${type1}$, DIMENSION(:), & INTENT(IN) :: DATA - INTEGER, PARAMETER :: nsimd = (1*64)/${typesize1}$ + INTEGER, PARAMETER :: nsimd = (4*64)/${typesize1}$ INTEGER :: i, n, blk, bp, bpe, row, col REAL(kind=sp) :: vals(nsimd) From 25d1f406b9eadfce3aebbf5c03660ac0197bf22c Mon Sep 17 00:00:00 2001 From: Patrick Seewald Date: Fri, 27 Mar 2020 16:31:57 +0100 Subject: [PATCH 25/34] TAS/Tensor unittests: avoid list directed output This ensures that different compilers write the same output --- src/tas/dbcsr_tas_test.F | 16 ++++++++-------- src/tensors/dbcsr_tensor_test.F | 8 ++++---- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/tas/dbcsr_tas_test.F b/src/tas/dbcsr_tas_test.F index 9047b603dab..3c8395e3984 100644 --- a/src/tas/dbcsr_tas_test.F +++ b/src/tas/dbcsr_tas_test.F @@ -169,7 +169,7 @@ SUBROUTINE dbcsr_tas_benchmark_mm(transa, transb, transc, matrix_a, matrix_b, ma IF (PRESENT(io_unit)) THEN IF (io_unit > 0) THEN - WRITE (io_unit, *) "starting tall-and-skinny benchmark" + WRITE (io_unit, "(A)") "starting tall-and-skinny benchmark" ENDIF ENDIF CALL timeset("benchmark_tas_mm", handle1) @@ -179,7 +179,7 @@ SUBROUTINE dbcsr_tas_benchmark_mm(transa, transb, transc, matrix_a, matrix_b, ma CALL timestop(handle1) IF (PRESENT(io_unit)) THEN IF (io_unit > 0) THEN - WRITE (io_unit, *) "tall-and-skinny benchmark completed" + WRITE (io_unit, "(A)") "tall-and-skinny benchmark completed" ENDIF ENDIF @@ -223,7 +223,7 @@ SUBROUTINE dbcsr_tas_benchmark_mm(transa, transb, transc, matrix_a, matrix_b, ma CALL dbcsr_complete_redistribute(dbcsr_b, dbcsr_b_mm) IF (PRESENT(io_unit)) THEN IF (io_unit > 0) THEN - WRITE (io_unit, *) "starting dbcsr benchmark" + WRITE (io_unit, "(A)") "starting dbcsr benchmark" ENDIF ENDIF CALL timeset("benchmark_dbcsr_mm", handle2) @@ -232,7 +232,7 @@ SUBROUTINE dbcsr_tas_benchmark_mm(transa, transb, transc, matrix_a, matrix_b, ma CALL timestop(handle2) IF (PRESENT(io_unit)) THEN IF (io_unit > 0) THEN - WRITE (io_unit, *) "dbcsr benchmark completed" + WRITE (io_unit, "(A)") "dbcsr benchmark completed" ENDIF ENDIF @@ -348,14 +348,14 @@ SUBROUTINE dbcsr_tas_test_mm(transa, transb, transc, matrix_a, matrix_b, matrix_ IF (ABS(norm) .GT. test_tol) THEN WRITE (io_unit, '(A, A, A, A, A, 1X, A)') TRIM(matrix_a%matrix%name), transa, ' X ', TRIM(matrix_b%matrix%name), & transb, 'failed!' - WRITE (io_unit, *) "checksums", sq_cs, rc_cs - WRITE (io_unit, *) "difference norm", norm + WRITE (io_unit, "(A,1X,E9.2,1X,E9.2)") "checksums", sq_cs, rc_cs + WRITE (io_unit, "(A,1X,E9.2)") "difference norm", norm DBCSR_ABORT("") ELSE WRITE (io_unit, '(A, A, A, A, A, 1X, A)') TRIM(matrix_a%matrix%name), transa, ' X ', TRIM(matrix_b%matrix%name), & transb, 'passed!' - WRITE (io_unit, *) "checksums", sq_cs, rc_cs - WRITE (io_unit, *) "difference norm", norm + WRITE (io_unit, "(A,1X,E9.2,1X,E9.2)") "checksums", sq_cs, rc_cs + WRITE (io_unit, "(A,1X,E9.2)") "difference norm", norm ENDIF ENDIF diff --git a/src/tensors/dbcsr_tensor_test.F b/src/tensors/dbcsr_tensor_test.F index 1e86363ac6b..6dcad430a24 100644 --- a/src/tensors/dbcsr_tensor_test.F +++ b/src/tensors/dbcsr_tensor_test.F @@ -662,9 +662,9 @@ SUBROUTINE dbcsr_t_contract_test(alpha, tensor_1, tensor_2, beta, tensor_3, & IF (debug) THEN IF (io_unit > 0) THEN - WRITE (io_unit, *) "checksum ", TRIM(tensor_1%name), cs_1 - WRITE (io_unit, *) "checksum ", TRIM(tensor_2%name), cs_2 - WRITE (io_unit, *) "checksum ", TRIM(tensor_3%name), cs_3 + WRITE (io_unit, "(A, E9.2)") "checksum ", TRIM(tensor_1%name), cs_1 + WRITE (io_unit, "(A, E9.2)") "checksum ", TRIM(tensor_2%name), cs_2 + WRITE (io_unit, "(A, E9.2)") "checksum ", TRIM(tensor_3%name), cs_3 ENDIF ENDIF @@ -692,7 +692,7 @@ SUBROUTINE dbcsr_t_contract_test(alpha, tensor_1, tensor_2, beta, tensor_3, & IF (debug) THEN IF (io_unit > 0) THEN - WRITE (io_unit, *) "checksum ", TRIM(tensor_3%name), cs_3 + WRITE (io_unit, "(A, E9.2)") "checksum ", TRIM(tensor_3%name), cs_3 ENDIF ENDIF From 846e6cfe7c97d2486feac8b694b733baecc4f5ef Mon Sep 17 00:00:00 2001 From: Patrick Seewald Date: Fri, 27 Mar 2020 16:22:11 +0100 Subject: [PATCH 26/34] TAS/Tensor unittests: use larnv instead of fortran random_number for reproducibility this should give reproducible behaviour - with different parallelization - with different compilers --- src/tas/dbcsr_tas_test.F | 22 +++++++++++-- src/tensors/dbcsr_tensor_test.F | 58 +++++++++++++++++++++------------ tests/dbcsr_tensor_unittest.F | 30 ++++++++--------- 3 files changed, 71 insertions(+), 39 deletions(-) diff --git a/src/tas/dbcsr_tas_test.F b/src/tas/dbcsr_tas_test.F index 3c8395e3984..0a85b213432 100644 --- a/src/tas/dbcsr_tas_test.F +++ b/src/tas/dbcsr_tas_test.F @@ -44,6 +44,8 @@ MODULE dbcsr_tas_test USE dbcsr_operations, ONLY: dbcsr_maxabs, & dbcsr_add USE dbcsr_transformations, ONLY: dbcsr_complete_redistribute + USE dbcsr_blas_operations, ONLY: & + dbcsr_lapack_larnv, set_larnv_seed #include "base/dbcsr_base_uses.f90" IMPLICIT NONE @@ -56,6 +58,8 @@ MODULE dbcsr_tas_test dbcsr_tas_setup_test_matrix, & dbcsr_tas_test_mm + INTEGER, SAVE :: randmat_counter = 0 + CONTAINS SUBROUTINE dbcsr_tas_setup_test_matrix(matrix, mp_comm_out, mp_comm, nrows, ncols, rbsizes, cbsizes, & !! Setup tall-and-skinny matrix for testing @@ -75,7 +79,7 @@ SUBROUTINE dbcsr_tas_setup_test_matrix(matrix, mp_comm_out, mp_comm, nrows, ncol INTEGER :: col_size, max_col_size, max_nze, & max_row_size, mynode, node_holds_blk, & numnodes, nze, row_size - INTEGER(KIND=int_8) :: col, col_s, row, row_s + INTEGER(KIND=int_8) :: col, col_s, row, row_s, nrow, ncol INTEGER, DIMENSION(2) :: pcoord, pdims LOGICAL :: reuse_comm_prv, tr REAL(KIND=real_8) :: rn @@ -85,11 +89,17 @@ SUBROUTINE dbcsr_tas_setup_test_matrix(matrix, mp_comm_out, mp_comm, nrows, ncol TYPE(dbcsr_tas_distribution_type) :: dist CHARACTER(LEN=*), PARAMETER :: routineN = 'dbcsr_tas_setup_test_matrix' INTEGER :: handle + INTEGER, DIMENSION(4) :: iseed, jseed ! we don't reserve blocks prior to putting them, so this time is meaningless and should not ! be considered in benchmark! CALL timeset(routineN, handle) + ! Check that the counter was initialised (or has not overflowed) + DBCSR_ASSERT(randmat_counter .NE. 0) + ! the counter goes into the seed. Every new call gives a new random matrix + randmat_counter = randmat_counter + 1 + IF (PRESENT(reuse_comm)) THEN reuse_comm_prv = reuse_comm ELSE @@ -119,11 +129,16 @@ SUBROUTINE dbcsr_tas_setup_test_matrix(matrix, mp_comm_out, mp_comm, nrows, ncol max_col_size = MAXVAL(cbsizes) max_nze = max_row_size*max_col_size + nrow = dbcsr_tas_nblkrows_total(matrix) + ncol = dbcsr_tas_nblkcols_total(matrix) + ALLOCATE (values(max_row_size, max_col_size)) + CALL set_larnv_seed(7, 42, 3, 42, randmat_counter, jseed) + DO row = 1, dbcsr_tas_nblkrows_total(matrix) DO col = 1, dbcsr_tas_nblkcols_total(matrix) - CALL RANDOM_NUMBER(rn) + CALL dlarnv(1, jseed, 1, rn) IF (rn .LT. sparsity) THEN tr = .FALSE. row_s = row; col_s = col @@ -133,7 +148,8 @@ SUBROUTINE dbcsr_tas_setup_test_matrix(matrix, mp_comm_out, mp_comm, nrows, ncol row_size = rbsize_obj%data(row_s) col_size = cbsize_obj%data(col_s) nze = row_size*col_size - CALL RANDOM_NUMBER(values(1:row_size, 1:col_size)) + CALL set_larnv_seed(INT(row_s), INT(nrow), INT(col_s), INT(ncol), randmat_counter, iseed) + CALL dlarnv(1, iseed, max_nze, values) CALL dbcsr_tas_put_block(matrix, row_s, col_s, values(1:row_size, 1:col_size)) ENDIF ENDIF diff --git a/src/tensors/dbcsr_tensor_test.F b/src/tensors/dbcsr_tensor_test.F index 6dcad430a24..9f222d42227 100644 --- a/src/tensors/dbcsr_tensor_test.F +++ b/src/tensors/dbcsr_tensor_test.F @@ -49,9 +49,12 @@ MODULE dbcsr_tensor_test mp_sum, & mp_cart_create USE dbcsr_allocate_wrap, ONLY: allocate_any - USE dbcsr_tensor_index, ONLY: combine_tensor_index + USE dbcsr_tensor_index, ONLY: & + combine_tensor_index, get_2d_indices_tensor, dbcsr_t_get_mapping_info USE dbcsr_tas_test, ONLY: dbcsr_tas_checksum USE dbcsr_data_types, ONLY: dbcsr_scalar_type + USE dbcsr_blas_operations, ONLY: & + dbcsr_lapack_larnv, set_larnv_seed #include "base/dbcsr_base_uses.f90" IMPLICIT NONE @@ -86,6 +89,10 @@ MODULE dbcsr_tensor_test #:endfor #:endfor END INTERFACE + + + INTEGER, SAVE :: randmat_counter = 0 + CONTAINS FUNCTION dbcsr_t_equal(tensor1, tensor2) @@ -278,8 +285,8 @@ SUBROUTINE dbcsr_t_test_formats(ndims, mp_comm, unit_nr, verbose, & #:for dim in range(1, maxdim+1) IF (${dim}$ <= ndims) THEN nblks = SIZE(blk_size_${dim}$) - CALL dbcsr_t_random_dist(dist1_${dim}$, nblks, pdims(${dim}$), mp_comm) - CALL dbcsr_t_random_dist(dist2_${dim}$, nblks, pdims(${dim}$), mp_comm) + CALL dbcsr_t_random_dist(dist1_${dim}$, nblks, pdims(${dim}$)) + CALL dbcsr_t_random_dist(dist2_${dim}$, nblks, pdims(${dim}$)) ENDIF #:endfor @@ -370,26 +377,20 @@ SUBROUTINE dbcsr_t_test_formats(ndims, mp_comm, unit_nr, verbose, & CALL dbcsr_t_pgrid_destroy(comm_nd) END SUBROUTINE - SUBROUTINE dbcsr_t_random_dist(dist_array, dist_size, nbins, mp_comm) - !! Create test distribution + SUBROUTINE dbcsr_t_random_dist(dist_array, dist_size, nbins) + INTEGER, DIMENSION(:), INTENT(OUT), ALLOCATABLE :: dist_array + INTEGER, INTENT(IN) :: dist_size, nbins - INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(OUT) :: dist_array - INTEGER, INTENT(IN) :: dist_size, nbins, mp_comm - REAL, DIMENSION(dist_size) :: rn - INTEGER, DIMENSION(dist_size) :: rn_int - INTEGER :: numnodes, mynode + INTEGER :: i - CALL mp_environ(numnodes, mynode, mp_comm) - - IF (mynode .EQ. 0) THEN - CALL RANDOM_NUMBER(rn) - ENDIF - CALL mp_bcast(rn, 0, mp_comm) + ALLOCATE (dist_array(dist_size)) + !CALL RANDOM_NUMBER (dist_array) + DO i = 1, dist_size + dist_array(i) = MODULO(nbins - i, nbins) + END DO - rn_int = FLOOR(rn*nbins) - CALL allocate_any(dist_array, source=rn_int) + END SUBROUTINE - END SUBROUTINE dbcsr_t_random_dist SUBROUTINE dbcsr_t_setup_test_tensor(tensor, mp_comm, enumerate, ${varlist("blk_ind")}$) !! Allocate and fill test tensor - entries are enumerated by their index s.t. they only depend @@ -402,7 +403,7 @@ SUBROUTINE dbcsr_t_setup_test_tensor(tensor, mp_comm, enumerate, ${varlist("blk_ !! index along respective dimension of non-zero blocks INTEGER :: blk, numnodes, mynode - INTEGER :: i, ib, my_nblks_alloc, nblks_alloc, proc + INTEGER :: i, ib, my_nblks_alloc, nblks_alloc, proc, nze INTEGER, ALLOCATABLE, DIMENSION(:) :: ${varlist("my_blk_ind")}$ INTEGER, DIMENSION(ndims_tensor(tensor)) :: blk_index, blk_offset, blk_size, & tensor_dims @@ -412,10 +413,18 @@ SUBROUTINE dbcsr_t_setup_test_tensor(tensor, mp_comm, enumerate, ${varlist("blk_ DIMENSION(${shape_colon(ndim)}$) :: blk_values_${ndim}$ #:endfor TYPE(dbcsr_t_iterator_type) :: iterator + INTEGER, DIMENSION(4) :: iseed + INTEGER, DIMENSION(2) :: blk_index_2d, nblks_2d nblks_alloc = SIZE(blk_ind_1) CALL mp_environ(numnodes, mynode, mp_comm) + IF(.NOT. enumerate) THEN + DBCSR_ASSERT(randmat_counter .NE. 0) + + randmat_counter = randmat_counter + 1 + ENDIF + ALLOCATE (ind_nd(nblks_alloc, ndims_tensor(tensor))) my_nblks_alloc = 0 DO ib = 1, nblks_alloc @@ -459,6 +468,13 @@ SUBROUTINE dbcsr_t_setup_test_tensor(tensor, mp_comm, enumerate, ${varlist("blk_ DO WHILE (dbcsr_t_iterator_blocks_left(iterator)) CALL dbcsr_t_iterator_next_block(iterator, blk_index, blk, blk_size=blk_size, blk_offset=blk_offset) + IF(.NOT. enumerate) THEN + blk_index_2d = INT(get_2d_indices_tensor(tensor%nd_index_blk, blk_index)) + CALL dbcsr_t_get_mapping_info(tensor%nd_index_blk, dims_2d=nblks_2d) + CALL set_larnv_seed(blk_index_2d(1), nblks_2d(1), blk_index_2d(2), nblks_2d(2), randmat_counter, iseed) + nze = PRODUCT(blk_size) + ENDIF + #:for ndim in ndims IF (ndims_tensor(tensor) == ${ndim}$) THEN CALL allocate_any(blk_values_${ndim}$, shape_spec=blk_size) @@ -466,7 +482,7 @@ SUBROUTINE dbcsr_t_setup_test_tensor(tensor, mp_comm, enumerate, ${varlist("blk_ IF (enumerate) THEN CALL enumerate_block_elements(blk_size, blk_offset, tensor_dims, blk_${ndim}$=blk_values_${ndim}$) ELSE - CALL random_number(blk_values_${ndim}$) + CALL dlarnv(1, iseed, nze, blk_values_${ndim}$) ENDIF CALL dbcsr_t_put_block(tensor, blk_index, blk_size, blk_values_${ndim}$) DEALLOCATE (blk_values_${ndim}$) diff --git a/tests/dbcsr_tensor_unittest.F b/tests/dbcsr_tensor_unittest.F index 14f965df48e..d61481f48b5 100644 --- a/tests/dbcsr_tensor_unittest.F +++ b/tests/dbcsr_tensor_unittest.F @@ -275,25 +275,25 @@ PROGRAM dbcsr_tensor_unittest CALL dbcsr_t_pgrid_create(mp_comm, pdims_3d, pgrid_3d) CALL dbcsr_t_pgrid_create(mp_comm, pdims_2d, pgrid_2d) - CALL dbcsr_t_random_dist(dist1_1, nblks_1, pdims_3d(1), mp_comm) - CALL dbcsr_t_random_dist(dist1_2, nblks_2, pdims_3d(2), mp_comm) - CALL dbcsr_t_random_dist(dist1_3, nblks_3, pdims_3d(3), mp_comm) + CALL dbcsr_t_random_dist(dist1_1, nblks_1, pdims_3d(1)) + CALL dbcsr_t_random_dist(dist1_2, nblks_2, pdims_3d(2)) + CALL dbcsr_t_random_dist(dist1_3, nblks_3, pdims_3d(3)) - CALL dbcsr_t_random_dist(dist2_1, nblks_3, pdims_2d(1), mp_comm) - CALL dbcsr_t_random_dist(dist2_2, nblks_4, pdims_2d(2), mp_comm) + CALL dbcsr_t_random_dist(dist2_1, nblks_3, pdims_2d(1)) + CALL dbcsr_t_random_dist(dist2_2, nblks_4, pdims_2d(2)) - CALL dbcsr_t_random_dist(dist3_1, nblks_1, pdims_3d(1), mp_comm) - CALL dbcsr_t_random_dist(dist3_2, nblks_2, pdims_3d(2), mp_comm) - CALL dbcsr_t_random_dist(dist3_3, nblks_4, pdims_3d(3), mp_comm) + CALL dbcsr_t_random_dist(dist3_1, nblks_1, pdims_3d(1)) + CALL dbcsr_t_random_dist(dist3_2, nblks_2, pdims_3d(2)) + CALL dbcsr_t_random_dist(dist3_3, nblks_4, pdims_3d(3)) - CALL dbcsr_t_random_dist(dist4_1, nblks_1, pdims_4d(1), mp_comm) - CALL dbcsr_t_random_dist(dist4_2, nblks_2, pdims_4d(2), mp_comm) - CALL dbcsr_t_random_dist(dist4_3, nblks_4, pdims_4d(3), mp_comm) - CALL dbcsr_t_random_dist(dist4_4, nblks_5, pdims_4d(4), mp_comm) + CALL dbcsr_t_random_dist(dist4_1, nblks_1, pdims_4d(1)) + CALL dbcsr_t_random_dist(dist4_2, nblks_2, pdims_4d(2)) + CALL dbcsr_t_random_dist(dist4_3, nblks_4, pdims_4d(3)) + CALL dbcsr_t_random_dist(dist4_4, nblks_5, pdims_4d(4)) - CALL dbcsr_t_random_dist(dist5_1, nblks_3, pdims_3d(1), mp_comm) - CALL dbcsr_t_random_dist(dist5_2, nblks_4, pdims_3d(2), mp_comm) - CALL dbcsr_t_random_dist(dist5_3, nblks_5, pdims_3d(3), mp_comm) + CALL dbcsr_t_random_dist(dist5_1, nblks_3, pdims_3d(1)) + CALL dbcsr_t_random_dist(dist5_2, nblks_4, pdims_3d(2)) + CALL dbcsr_t_random_dist(dist5_3, nblks_5, pdims_3d(3)) !--------------------------------------------------------------------------------------------------! ! Test 4: Testing tensor contraction (12|3)x(3|4)=(12|4) ! From 224479c43e58305d2da2f41eff727baca5ef72c8 Mon Sep 17 00:00:00 2001 From: Alfio Lazzaro Date: Tue, 31 Mar 2020 11:13:48 +0200 Subject: [PATCH 27/34] Pretty --- src/core/dbcsr_config.F | 2 +- src/mpi/dbcsr_mpiwrap.F | 2 +- src/tensors/dbcsr_tensor_api.F | 1 - src/tensors/dbcsr_tensor_index.F | 8 ++++---- src/tensors/dbcsr_tensor_split.F | 1 - src/tensors/dbcsr_tensor_test.F | 4 +--- 6 files changed, 7 insertions(+), 11 deletions(-) diff --git a/src/core/dbcsr_config.F b/src/core/dbcsr_config.F index 53ed4dd9f5b..dadd180692f 100644 --- a/src/core/dbcsr_config.F +++ b/src/core/dbcsr_config.F @@ -386,7 +386,7 @@ SUBROUTINE dbcsr_print_config(unit_nr) ENDIF WRITE (UNIT=unit_nr, FMT='(1X,A,T80,L1)') & - "DBCSR| Use memory pool for CPU allocation", dbcsr_cfg%use_mempools_cpu + "DBCSR| Use memory pool for CPU allocation", dbcsr_cfg%use_mempools_cpu IF (has_mpi) THEN IF (dbcsr_cfg%num_layers_3D < 2) THEN diff --git a/src/mpi/dbcsr_mpiwrap.F b/src/mpi/dbcsr_mpiwrap.F index f28c361b80e..932045e37ef 100644 --- a/src/mpi/dbcsr_mpiwrap.F +++ b/src/mpi/dbcsr_mpiwrap.F @@ -747,7 +747,7 @@ SUBROUTINE mp_reordering(mp_comm, mp_new_comm, ranks_order) ierr = 0 #if defined(__parallel) - CALL mpi_comm_group(mp_comm, oldgroup, ierr); + CALL mpi_comm_group(mp_comm, oldgroup, ierr); IF (ierr /= 0) CALL mp_stop(ierr, "mpi_comm_group @ mp_reordering") CALL mpi_group_incl(oldgroup, SIZE(ranks_order), ranks_order, newgroup, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_group_incl @ mp_reordering") diff --git a/src/tensors/dbcsr_tensor_api.F b/src/tensors/dbcsr_tensor_api.F index cfbd6129639..72f7a1ab814 100644 --- a/src/tensors/dbcsr_tensor_api.F +++ b/src/tensors/dbcsr_tensor_api.F @@ -106,5 +106,4 @@ MODULE dbcsr_tensor_api PUBLIC :: dbcsr_t_nblks_total PUBLIC :: dbcsr_t_blk_size - END MODULE dbcsr_tensor_api diff --git a/src/tensors/dbcsr_tensor_index.F b/src/tensors/dbcsr_tensor_index.F index f459ff75053..29c156167bb 100644 --- a/src/tensors/dbcsr_tensor_index.F +++ b/src/tensors/dbcsr_tensor_index.F @@ -212,10 +212,10 @@ PURE FUNCTION combine_tensor_index(ind_in, dims) RESULT(ind_out) !! flat index INTEGER :: i_dim - ind_out = ind_in(SIZE(dims)) - DO i_dim = SIZE(dims) - 1, 1, -1 - ind_out = (ind_out - 1)*dims(i_dim) + ind_in(i_dim) - ENDDO + ind_out = ind_in(SIZE(dims)) + DO i_dim = SIZE(dims) - 1, 1, -1 + ind_out = (ind_out - 1)*dims(i_dim) + ind_in(i_dim) + ENDDO END FUNCTION diff --git a/src/tensors/dbcsr_tensor_split.F b/src/tensors/dbcsr_tensor_split.F index 3ba73d8667c..a1368534cc1 100644 --- a/src/tensors/dbcsr_tensor_split.F +++ b/src/tensors/dbcsr_tensor_split.F @@ -589,7 +589,6 @@ SUBROUTINE dbcsr_t_make_compatible_blocks(tensor1, tensor2, tensor1_split, tenso ENDIF #:endfor - DEALLOCATE (blk_size_d_split, blk_size_d_1, blk_size_d_2) ENDDO diff --git a/src/tensors/dbcsr_tensor_test.F b/src/tensors/dbcsr_tensor_test.F index 9f222d42227..5c20e9ec844 100644 --- a/src/tensors/dbcsr_tensor_test.F +++ b/src/tensors/dbcsr_tensor_test.F @@ -90,7 +90,6 @@ MODULE dbcsr_tensor_test #:endfor END INTERFACE - INTEGER, SAVE :: randmat_counter = 0 CONTAINS @@ -391,7 +390,6 @@ SUBROUTINE dbcsr_t_random_dist(dist_array, dist_size, nbins) END SUBROUTINE - SUBROUTINE dbcsr_t_setup_test_tensor(tensor, mp_comm, enumerate, ${varlist("blk_ind")}$) !! Allocate and fill test tensor - entries are enumerated by their index s.t. they only depend !! on global properties of the tensor but not on distribution, matrix representation, etc. @@ -406,7 +404,7 @@ SUBROUTINE dbcsr_t_setup_test_tensor(tensor, mp_comm, enumerate, ${varlist("blk_ INTEGER :: i, ib, my_nblks_alloc, nblks_alloc, proc, nze INTEGER, ALLOCATABLE, DIMENSION(:) :: ${varlist("my_blk_ind")}$ INTEGER, DIMENSION(ndims_tensor(tensor)) :: blk_index, blk_offset, blk_size, & - tensor_dims + tensor_dims INTEGER, DIMENSION(:, :), ALLOCATABLE :: ind_nd #:for ndim in ndims REAL(KIND=real_8), ALLOCATABLE, & From ebc406295469f753c33fcd38bdcd1e3d845db53f Mon Sep 17 00:00:00 2001 From: Alfio Lazzaro Date: Tue, 31 Mar 2020 04:54:20 -0500 Subject: [PATCH 28/34] Add automatically CXXFLAGS for the NVCC host compilation, close #327 --- .cp2k/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.cp2k/Makefile b/.cp2k/Makefile index 20d4f164e8b..c66ba0b9a1d 100644 --- a/.cp2k/Makefile +++ b/.cp2k/Makefile @@ -138,6 +138,9 @@ CXXFLAGS += -D__CUDA ifeq (,$(findstring -arch,$(ACCFLAGS))) override ACCFLAGS += -arch sm_$(ARCH_NUMBER) endif +ifeq (,$(findstring -Xcompiler,$(ACCFLAGS))) +override ACCFLAGS += -Xcompiler="$(CXXFLAGS)" +endif # If compiling with hipcc else ifneq (,$(findstring hipcc,$(ACC))) override ACCFLAGS += -D__HIP From f217ddaa9c5b43fd4848c023a519dff357801f6b Mon Sep 17 00:00:00 2001 From: Patrick Seewald Date: Thu, 2 Apr 2020 20:53:41 +0200 Subject: [PATCH 29/34] Tensors: use static array size for `dbcsr_t_contract_index` --- src/tensors/dbcsr_tensor.F | 30 +++++++++++++++++++++++------- src/tensors/dbcsr_tensor_api.F | 3 ++- src/tensors/dbcsr_tensor_index.F | 2 +- src/tensors/dbcsr_tensor_types.F | 30 +++++++++++++++++++++++++++++- 4 files changed, 55 insertions(+), 10 deletions(-) diff --git a/src/tensors/dbcsr_tensor.F b/src/tensors/dbcsr_tensor.F index ffb8eb3d363..2b73f21c782 100644 --- a/src/tensors/dbcsr_tensor.F +++ b/src/tensors/dbcsr_tensor.F @@ -47,7 +47,8 @@ MODULE dbcsr_tensor dbcsr_t_distribution_destroy, dbcsr_t_distribution_new_expert, dbcsr_t_get_stored_coordinates, & blk_dims_tensor, dbcsr_t_hold, dbcsr_t_pgrid_type, mp_environ_pgrid, dbcsr_t_filter, & dbcsr_t_clear, dbcsr_t_finalize, dbcsr_t_get_num_blocks, dbcsr_t_scale, & - dbcsr_t_get_num_blocks_total, dbcsr_t_get_info, ndims_matrix_row, ndims_matrix_column + dbcsr_t_get_num_blocks_total, dbcsr_t_get_info, ndims_matrix_row, ndims_matrix_column, & + dbcsr_t_max_nblks_local USE dbcsr_kinds, ONLY: & ${uselist(dtype_float_prec)}$, default_string_length, int_8 USE dbcsr_mpiwrap, ONLY: & @@ -489,7 +490,8 @@ SUBROUTINE dbcsr_t_contract_expert(alpha, tensor_1, tensor_2, beta, tensor_3, & map_1, map_2, & bounds_1, bounds_2, bounds_3, & optimize_dist, pgrid_opt_1, pgrid_opt_2, pgrid_opt_3, & - filter_eps, flop, move_data, retain_sparsity, result_index, unit_nr, log_verbose) + filter_eps, flop, move_data, retain_sparsity, & + nblks_local, result_index, unit_nr, log_verbose) !! expert routine for tensor contraction. For internal use only. TYPE(dbcsr_scalar_type), INTENT(IN) :: alpha TYPE(dbcsr_t_type), INTENT(INOUT), TARGET :: tensor_1 @@ -519,7 +521,9 @@ SUBROUTINE dbcsr_t_contract_expert(alpha, tensor_1, tensor_2, beta, tensor_3, & INTEGER(KIND=int_8), INTENT(OUT), OPTIONAL :: flop LOGICAL, INTENT(IN), OPTIONAL :: move_data LOGICAL, INTENT(IN), OPTIONAL :: retain_sparsity - INTEGER, DIMENSION(:, :), ALLOCATABLE, & + INTEGER, INTENT(OUT), OPTIONAL :: nblks_local + !! number of local blocks on this MPI rank + INTEGER, DIMENSION(dbcsr_t_max_nblks_local(tensor_3), ndims_tensor(tensor_3)), & OPTIONAL, INTENT(OUT) :: result_index !! get indices of non-zero tensor blocks for tensor_3 without actually performing contraction !! this is an estimate based on block norm multiplication @@ -821,7 +825,14 @@ SUBROUTINE dbcsr_t_contract_expert(alpha, tensor_1, tensor_2, beta, tensor_3, & result_index=result_index_2d) nblk = SIZE(result_index_2d,1) - ALLOCATE(result_index(nblk, ndims_tensor(tensor_contr_3))) + IF(PRESENT(nblks_local)) nblks_local = nblk + IF (SIZE(result_index, 1) < nblk) THEN + CALL dbcsr_abort(__LOCATION__, & + "allocated size of `result_index` is too small. This error occurs due to a high load imbalance of distributed tensor data.") + ENDIF + + result_index = 0 + DO iblk = 1, nblk result_index(iblk,:) = get_nd_indices_tensor(tensor_contr_3%nd_index_blk, result_index_2d(iblk,:)) ENDDO @@ -1612,7 +1623,8 @@ SUBROUTINE dbcsr_t_contract_index(alpha, tensor_1, tensor_2, beta, tensor_3, & contract_2, notcontract_2, & map_1, map_2, & bounds_1, bounds_2, bounds_3, & - filter_eps, result_index) + filter_eps, & + nblks_local, result_index) !! get indices of non-zero tensor blocks for contraction result without actually !! performing contraction. !! this is an estimate based on block norm multiplication. @@ -1635,9 +1647,12 @@ SUBROUTINE dbcsr_t_contract_index(alpha, tensor_1, tensor_2, beta, tensor_3, & INTEGER, DIMENSION(2, SIZE(notcontract_2)), & INTENT(IN), OPTIONAL :: bounds_3 REAL(KIND=real_8), INTENT(IN), OPTIONAL :: filter_eps - INTEGER, DIMENSION(:, :), ALLOCATABLE, & + INTEGER, INTENT(OUT) :: nblks_local + !! number of local blocks on this MPI rank + INTEGER, DIMENSION(dbcsr_t_max_nblks_local(tensor_3), ndims_tensor(tensor_3)), & INTENT(OUT) :: result_index - !! indices of non-zero tensor blocks for tensor_3 + !! indices of local non-zero tensor blocks for tensor_3 + !! only the elements result_index(:nblks_local, :) are relevant (all others are set to 0) CALL dbcsr_t_contract_expert(alpha, tensor_1, tensor_2, beta, tensor_3, & contract_1, notcontract_1, & @@ -1647,6 +1662,7 @@ SUBROUTINE dbcsr_t_contract_index(alpha, tensor_1, tensor_2, beta, tensor_3, & bounds_2=bounds_2, & bounds_3=bounds_3, & filter_eps=filter_eps, & + nblks_local=nblks_local, & result_index=result_index) END SUBROUTINE diff --git a/src/tensors/dbcsr_tensor_api.F b/src/tensors/dbcsr_tensor_api.F index 72f7a1ab814..ac7d7238a20 100644 --- a/src/tensors/dbcsr_tensor_api.F +++ b/src/tensors/dbcsr_tensor_api.F @@ -35,7 +35,7 @@ MODULE dbcsr_tensor_api dbcsr_t_mp_dims_create, dbcsr_t_pgrid_change_dims, dbcsr_t_ndims => ndims_tensor, & dbcsr_t_dims => dims_tensor, dbcsr_t_ndims_matrix_row => ndims_matrix_row, & dbcsr_t_ndims_matrix_column => ndims_matrix_column, dbcsr_t_blk_size, dbcsr_t_nblks_local, & - dbcsr_t_nblks_total + dbcsr_t_nblks_total, dbcsr_t_max_nblks_local USE dbcsr_tensor_test, ONLY: & dbcsr_t_contract_test, dbcsr_t_checksum USE dbcsr_tensor_split, ONLY: & @@ -105,5 +105,6 @@ MODULE dbcsr_tensor_api PUBLIC :: dbcsr_t_nblks_local PUBLIC :: dbcsr_t_nblks_total PUBLIC :: dbcsr_t_blk_size + PUBLIC :: dbcsr_t_max_nblks_local END MODULE dbcsr_tensor_api diff --git a/src/tensors/dbcsr_tensor_index.F b/src/tensors/dbcsr_tensor_index.F index 29c156167bb..0ce0fe5e5c6 100644 --- a/src/tensors/dbcsr_tensor_index.F +++ b/src/tensors/dbcsr_tensor_index.F @@ -135,7 +135,7 @@ PURE FUNCTION ndims_mapping_column(map) ndims_mapping_column = map%ndim2_2d END FUNCTION - SUBROUTINE dbcsr_t_get_mapping_info(map, ndim_nd, ndim1_2d, ndim2_2d, dims_2d_i8, dims_2d, dims_nd, dims1_2d, dims2_2d, & + PURE SUBROUTINE dbcsr_t_get_mapping_info(map, ndim_nd, ndim1_2d, ndim2_2d, dims_2d_i8, dims_2d, dims_nd, dims1_2d, dims2_2d, & map1_2d, map2_2d, map_nd, base, col_major) !! get mapping info diff --git a/src/tensors/dbcsr_tensor_types.F b/src/tensors/dbcsr_tensor_types.F index f3ba21f94af..28a735e8131 100644 --- a/src/tensors/dbcsr_tensor_types.F +++ b/src/tensors/dbcsr_tensor_types.F @@ -94,12 +94,14 @@ MODULE dbcsr_tensor_types ndims_matrix_column,& dbcsr_t_nblks_local,& dbcsr_t_nblks_total,& - dbcsr_t_blk_size + dbcsr_t_blk_size,& + dbcsr_t_max_nblks_local TYPE dbcsr_t_pgrid_type TYPE(nd_to_2d_mapping) :: nd_index_grid INTEGER :: mp_comm_2d TYPE(dbcsr_tas_split_info), ALLOCATABLE :: tas_split_info + INTEGER :: nproc END TYPE TYPE dbcsr_t_type @@ -1206,6 +1208,9 @@ SUBROUTINE dbcsr_t_pgrid_create_expert(mp_comm, dims, pgrid, map1_2d, map2_2d, t ALLOCATE (pgrid%tas_split_info, SOURCE=info) ENDIF + ! store number of MPI ranks because we need it for PURE function dbcsr_t_max_nblks_local + pgrid%nproc = nproc + CALL timestop(handle) END SUBROUTINE @@ -1526,4 +1531,27 @@ PURE FUNCTION ndims_matrix_column(tensor) ndims_matrix_column = ndims_mapping_column(tensor%nd_index_blk) END FUNCTION + PURE FUNCTION dbcsr_t_max_nblks_local(tensor) RESULT(blk_count) + !! returns an estimate of maximum number of local blocks in tensor + !! (irrespective of the actual number of currently present blocks) + !! this estimate is based on the following assumption: tensor data is dense and + !! load balancing is within a factor of 2 + TYPE(dbcsr_t_type), INTENT(IN) :: tensor + INTEGER :: blk_count, nproc + INTEGER, DIMENSION(ndims_tensor(tensor)) :: bdims + INTEGER(int_8) :: blk_count_total + INTEGER, PARAMETER :: max_load_imbalance = 2 + + CALL dbcsr_t_get_mapping_info(tensor%nd_index_blk, dims_nd=bdims) + + blk_count_total = PRODUCT(INT(bdims, int_8)) + + ! can not call an MPI routine due to PURE + !CALL mp_environ(nproc, myproc, tensor%pgrid%mp_comm_2d) + nproc = tensor%pgrid%nproc + + blk_count = INT(blk_count_total/nproc*max_load_imbalance) + + END FUNCTION + END MODULE From 9ba2e43f2661a9567a2ec460e9768afdff190137 Mon Sep 17 00:00:00 2001 From: Patrick Seewald Date: Fri, 3 Apr 2020 07:45:15 +0200 Subject: [PATCH 30/34] fix leak due to 13db0e2a6a9168481f7f0e985c64585fc25bc267 --- src/tensors/dbcsr_tensor_block.F | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/tensors/dbcsr_tensor_block.F b/src/tensors/dbcsr_tensor_block.F index aeacc35760f..90585bba387 100644 --- a/src/tensors/dbcsr_tensor_block.F +++ b/src/tensors/dbcsr_tensor_block.F @@ -584,13 +584,14 @@ SUBROUTINE dbcsr_t_put_${ndim}$d_block_${dsuffix}$ (tensor, ind, sizes, block, s INTEGER, DIMENSION(${ndim}$) :: shape_nd CHARACTER(LEN=*), PARAMETER :: routineN = 'dbcsr_t_put_${ndim}$d_block_${dsuffix}$', & routineP = moduleN//':'//routineN - LOGICAL :: found + LOGICAL :: found, new_block ${dtype}$, DIMENSION(${arrlist("sizes", nmax=ndim)}$) :: block_check LOGICAL, PARAMETER :: debug = .FALSE. INTEGER :: i NULLIFY (block_2d) + new_block = .FALSE. IF (debug) THEN CALL dbcsr_t_get_block(tensor, ind, sizes, block_check, found=found) @@ -609,6 +610,7 @@ SUBROUTINE dbcsr_t_put_${ndim}$d_block_${dsuffix}$ (tensor, ind, sizes, block, s ELSE ! need reshape due to rank reordering ALLOCATE (block_2d(shape_2d(1), shape_2d(2))) + new_block = .TRUE. shape_nd(map_nd) = sizes block_2d(:, :) = RESHAPE(RESHAPE(block, SHAPE=shape_nd, order=map_nd), SHAPE=shape_2d) ENDIF @@ -620,6 +622,8 @@ SUBROUTINE dbcsr_t_put_${ndim}$d_block_${dsuffix}$ (tensor, ind, sizes, block, s CALL dbcsr_tas_put_block(tensor%matrix_rep, ind_2d(1), ind_2d(2), block_2d, summation=summation, & scale=scale) + IF(new_block) DEALLOCATE (block_2d) + END SUBROUTINE #:endfor #:endfor From 6358c6671bb7415f5b063171a94014ab1bdcc67e Mon Sep 17 00:00:00 2001 From: Alfio Lazzaro Date: Fri, 3 Apr 2020 14:31:43 +0200 Subject: [PATCH 31/34] Use short name for Fortran source tests --- tests/CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 26c06832874..a2d88f3872f 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -109,6 +109,12 @@ foreach (dbcsr_test ${DBCSR_TESTS}) endif () endforeach () +# set the __SHORT_FILE__ per file for dbcsr sources +foreach (tests_src ${DBCSR_PERF_SRCS} ${dbcsr_unittest_common_SRCS} ${dbcsr_unittest1_SRCS} ${dbcsr_unittest2_SRCS} ${dbcsr_unittest3_SRCS} ${dbcsr_tensor_unittest_SRCS} ${dbcsr_tas_unittest_SRCS} ${dbcsr_test_csr_conversions_SRCS}) + # add_fypp_sources returns a path in the current binary dir + get_filename_component(short_file "${tests_src}" NAME) + set_source_files_properties(${tests_src} PROPERTIES COMPILE_DEFINITIONS __SHORT_FILE__="${short_file}") +endforeach () # =================================== GPU BACKEND TESTS (CUDA / HIP) if (USE_CUDA OR USE_HIP) From fcfa8ae3551ae0d3517db59cbc3ebaea0dccc967 Mon Sep 17 00:00:00 2001 From: Alfio Lazzaro Date: Wed, 8 Apr 2020 17:23:03 +0200 Subject: [PATCH 32/34] pretty --- src/tensors/dbcsr_tensor_index.F | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tensors/dbcsr_tensor_index.F b/src/tensors/dbcsr_tensor_index.F index 0ce0fe5e5c6..debe23de00d 100644 --- a/src/tensors/dbcsr_tensor_index.F +++ b/src/tensors/dbcsr_tensor_index.F @@ -136,7 +136,7 @@ PURE FUNCTION ndims_mapping_column(map) END FUNCTION PURE SUBROUTINE dbcsr_t_get_mapping_info(map, ndim_nd, ndim1_2d, ndim2_2d, dims_2d_i8, dims_2d, dims_nd, dims1_2d, dims2_2d, & - map1_2d, map2_2d, map_nd, base, col_major) + map1_2d, map2_2d, map_nd, base, col_major) !! get mapping info TYPE(nd_to_2d_mapping), INTENT(IN) :: map From 5135e014948081d1c7bced3f11521b36e40bd09f Mon Sep 17 00:00:00 2001 From: Patrick Seewald Date: Wed, 8 Apr 2020 14:59:56 +0200 Subject: [PATCH 33/34] Fix 77efe133aaf07fc6070e33dd1ad8d65b90c78085 for the complex case --- src/mm/dbcsr_mm_common.f90 | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/mm/dbcsr_mm_common.f90 b/src/mm/dbcsr_mm_common.f90 index b1500290531..fe91522a936 100644 --- a/src/mm/dbcsr_mm_common.f90 +++ b/src/mm/dbcsr_mm_common.f90 @@ -37,7 +37,11 @@ SUBROUTINE calc_norms_${nametype1}$ (norms, nblks, & row = blki(1, blk + i) col = blki(2, blk + i) bpe = bp + rbs(row)*cbs(col) - 1 +#:if nametype1 in ['d', 's'] vals(blk) = REAL(SUM(DATA(bp:bpe)**2), KIND=sp) +#:else + vals(blk) = REAL(SUM(ABS(DATA(bp:bpe))**2), KIND=sp) +#:endif ELSE vals(blk) = 0.0_sp ENDIF From 36e12cbe8979879d7978ed545329fd842943b21b Mon Sep 17 00:00:00 2001 From: Patrick Seewald Date: Wed, 8 Apr 2020 17:49:15 +0200 Subject: [PATCH 34/34] Bump version to X.Y.Z --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index caf71dd4863..4d412d6636f 100644 --- a/VERSION +++ b/VERSION @@ -1,6 +1,6 @@ MAJOR = 2 MINOR = 1 -PATCH = 0-rc10 +PATCH = 0-rc11 # A specific DATE (YYYY-MM-DD) fixes an official release, otherwise # it is considered Development version. DATE =