From 10e87c65c7711457bfdebc0eff9349fbc7d24d3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tiziano=20M=C3=BCller?= Date: Wed, 8 Apr 2020 19:22:09 +0200 Subject: [PATCH 01/19] pre-commit: include fprettify --- .pre-commit-config.yaml | 5 ++ src/base/dbcsr_machine_posix.f90 | 2 +- src/block/dbcsr_block_access.f90 | 2 +- src/block/dbcsr_index_operations.F | 2 +- src/core/dbcsr_lib.F | 22 ++++----- src/dbcsr_api.F | 10 ++-- src/dbcsr_api_c.F | 8 ++-- src/mm/dbcsr_acc_operations.F | 32 ++++++------- src/mpi/dbcsr_mp_methods.F | 8 ++-- src/ops/dbcsr_tests.F | 2 +- src/tas/dbcsr_tas_base.F | 4 +- src/tas/dbcsr_tas_mm.F | 66 +++++++++++++------------- src/tas/dbcsr_tas_split.F | 4 +- src/tensors/dbcsr_array_list_methods.F | 6 +-- src/tensors/dbcsr_tensor.F | 54 ++++++++++----------- src/tensors/dbcsr_tensor_block.F | 24 +++++----- src/tensors/dbcsr_tensor_index.F | 14 +++--- src/tensors/dbcsr_tensor_io.F | 4 +- src/tensors/dbcsr_tensor_split.F | 4 +- src/tensors/dbcsr_tensor_test.F | 10 ++-- src/tensors/dbcsr_tensor_types.F | 44 ++++++++--------- 21 files changed, 166 insertions(+), 161 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 926777e517c..46b96a1031d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -27,3 +27,8 @@ repos: entry: '^\s*!>' language: pygrep types: [text] + - id: prettify + name: Run prettify + entry: ./tools/fprettify/fprettify.py + language: script + files: '^src/.*\.(F|f90)$' diff --git a/src/base/dbcsr_machine_posix.f90 b/src/base/dbcsr_machine_posix.f90 index 79eeedbfeba..48d10fb5e38 100644 --- a/src/base/dbcsr_machine_posix.f90 +++ b/src/base/dbcsr_machine_posix.f90 @@ -46,7 +46,7 @@ SUBROUTINE m_flush(lunit) !! Flush a given unit INTEGER, INTENT(IN) :: lunit - FLUSH(lunit) + FLUSH (lunit) END SUBROUTINE m_flush FUNCTION m_procrun(pid) RESULT(run_on) diff --git a/src/block/dbcsr_block_access.f90 b/src/block/dbcsr_block_access.f90 index 2338e18364c..bc681168c45 100644 --- a/src/block/dbcsr_block_access.f90 +++ b/src/block/dbcsr_block_access.f90 @@ -268,7 +268,7 @@ SUBROUTINE dbcsr_put_block2d_${nametype1}$ (matrix, row, col, block, lb_row_col, CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_put_block2d_${nametype1}$', & routineP = moduleN//':'//routineN - NULLIFY(block_1d) + NULLIFY (block_1d) block_1d(1:SIZE(block)) => block diff --git a/src/block/dbcsr_index_operations.F b/src/block/dbcsr_index_operations.F index db6c7b6e141..df3f96c0306 100644 --- a/src/block/dbcsr_index_operations.F +++ b/src/block/dbcsr_index_operations.F @@ -263,7 +263,7 @@ PURE SUBROUTINE dbcsr_expand_row_index_2d(row_p, row_i, nrows, dst_i) !! Expands a row_p index INTEGER, INTENT(IN) :: nrows, dst_i INTEGER, DIMENSION(1:nrows + 1), INTENT(IN) :: row_p - INTEGER, DIMENSION(:,:), INTENT(OUT) :: row_i + INTEGER, DIMENSION(:, :), INTENT(OUT) :: row_i INTEGER :: row diff --git a/src/core/dbcsr_lib.F b/src/core/dbcsr_lib.F index 2eee4ef21e8..9f9160227ad 100644 --- a/src/core/dbcsr_lib.F +++ b/src/core/dbcsr_lib.F @@ -25,12 +25,12 @@ MODULE dbcsr_lib USE dbcsr_mpiwrap, ONLY: add_mp_perf_env, & describe_mp_perf_env, & has_mp_perf_env, & - mp_environ, mp_cart_rank,& - rm_mp_perf_env,& + mp_environ, mp_cart_rank, & + rm_mp_perf_env, & mp_comm_free USE dbcsr_mm, ONLY: dbcsr_multiply_clear_mempools, & dbcsr_multiply_lib_finalize, & - dbcsr_multiply_lib_init,& + dbcsr_multiply_lib_init, & dbcsr_multiply_print_statistics USE dbcsr_timings, ONLY: add_timer_env, & rm_timer_env, & @@ -44,9 +44,9 @@ MODULE dbcsr_lib dbcsr_logger_type, & dbcsr_rm_default_logger USE dbcsr_base_hooks, ONLY: timeset_hook, & - timestop_hook,& - dbcsr_abort_hook,& - dbcsr_warn_hook,& + timestop_hook, & + dbcsr_abort_hook, & + dbcsr_warn_hook, & dbcsr_abort_interface, dbcsr_warn_interface, & timeset_interface, timestop_interface use dbcsr_types, only: dbcsr_mp_obj @@ -295,12 +295,12 @@ SUBROUTINE dbcsr_finalize_lib() CALL dbcsr_rm_default_logger() CALL rm_mp_perf_env() CALL rm_timer_env() - NULLIFY(logger) + NULLIFY (logger) ENDIF - NULLIFY(timeset_hook) - NULLIFY(timestop_hook) - NULLIFY(dbcsr_abort_hook) - NULLIFY(dbcsr_warn_hook) + NULLIFY (timeset_hook) + NULLIFY (timestop_hook) + NULLIFY (dbcsr_abort_hook) + NULLIFY (dbcsr_warn_hook) CALL dbcsr_mp_release(mp_env) CALL mp_comm_free(default_group) #if defined(__LIBXSMM) diff --git a/src/dbcsr_api.F b/src/dbcsr_api.F index 1c5e69543e5..7bc537d7aca 100644 --- a/src/dbcsr_api.F +++ b/src/dbcsr_api.F @@ -105,8 +105,8 @@ MODULE dbcsr_api dbcsr_trace_prv => dbcsr_trace, & dbcsr_dot_prv => dbcsr_dot, & dbcsr_triu_prv => dbcsr_triu, & - dbcsr_clear_prv => dbcsr_clear,& - dbcsr_add_block_node_prv => dbcsr_add_block_node,& + dbcsr_clear_prv => dbcsr_clear, & + dbcsr_add_block_node_prv => dbcsr_add_block_node, & dbcsr_conform_scalar_prv => dbcsr_conform_scalar USE dbcsr_test_methods, ONLY: dbcsr_reset_randmat_seed USE dbcsr_tests, ONLY: dbcsr_run_tests, & @@ -700,7 +700,7 @@ SUBROUTINE dbcsr_distribution_get(dist, row_dist, col_dist, & LOGICAL, INTENT(OUT), OPTIONAL :: subgroups_defined INTEGER, INTENT(OUT), OPTIONAL :: prow_group, pcol_group - call dbcsr_distribution_get_prv(dist % prv, row_dist, col_dist, & + call dbcsr_distribution_get_prv(dist%prv, row_dist, col_dist, & nrows, ncols, has_threads, & group, mynode, numnodes, nprows, npcols, myprow, mypcol, pgrid, & subgroups_defined, prow_group, pcol_group) @@ -1077,7 +1077,7 @@ SUBROUTINE dbcsr_add_block_node(matrix, block_row, block_col, block) REAL(KIND=dp), DIMENSION(:, :), POINTER :: block !! the block to put - call dbcsr_add_block_node_prv(matrix%prv, block_row, block_col, block) + call dbcsr_add_block_node_prv(matrix%prv, block_row, block_col, block) END SUBROUTINE dbcsr_add_block_node #:include 'data/dbcsr.fypp' @@ -1090,7 +1090,7 @@ FUNCTION make_conformant_scalar_${nametype1}$ (scalar, matrix) RESULT(encapsulat TYPE(dbcsr_type), INTENT(IN) :: matrix TYPE(dbcsr_scalar_type) :: encapsulated - encapsulated = dbcsr_conform_scalar_prv(scalar, matrix % prv) + encapsulated = dbcsr_conform_scalar_prv(scalar, matrix%prv) END FUNCTION make_conformant_scalar_${nametype1}$ SUBROUTINE dbcsr_reserve_block2d_${nametype1}$ (matrix, row, col, block, transposed, existed) diff --git a/src/dbcsr_api_c.F b/src/dbcsr_api_c.F index 52438bb6004..be5328b3567 100644 --- a/src/dbcsr_api_c.F +++ b/src/dbcsr_api_c.F @@ -10,7 +10,7 @@ MODULE dbcsr_api_c USE, INTRINSIC :: ISO_C_BINDING, ONLY: c_loc, c_ptr, c_double, C_NULL_CHAR, & - c_f_pointer, c_int, c_char, c_null_ptr, c_bool + c_f_pointer, c_int, c_char, c_null_ptr, c_bool USE dbcsr_api USE dbcsr_machine, ONLY: default_output_unit @@ -161,7 +161,7 @@ SUBROUTINE c_dbcsr_print(c_matrix) bind(C, name="c_dbcsr_print") CALL dbcsr_print(matrix) ! Fortran and C may use different buffers for I/O, make sure we flush before returning: - flush(default_output_unit) + flush (default_output_unit) END SUBROUTINE SUBROUTINE c_dbcsr_get_stored_coordinates(c_matrix, row, col, processor) bind(C, name="c_dbcsr_get_stored_coordinates") @@ -173,7 +173,7 @@ SUBROUTINE c_dbcsr_get_stored_coordinates(c_matrix, row, col, processor) bind(C, CALL c_f_pointer(c_matrix, matrix) - CALL dbcsr_get_stored_coordinates(matrix, row+1, col+1, processor) + CALL dbcsr_get_stored_coordinates(matrix, row + 1, col + 1, processor) END SUBROUTINE SUBROUTINE c_dbcsr_put_block_d(c_matrix, row, col, block, block_length) bind(C, name="c_dbcsr_put_block_d") @@ -185,7 +185,7 @@ SUBROUTINE c_dbcsr_put_block_d(c_matrix, row, col, block, block_length) bind(C, CALL c_f_pointer(c_matrix, matrix) - CALL dbcsr_put_block(matrix, row+1, col+1, block) + CALL dbcsr_put_block(matrix, row + 1, col + 1, block) END SUBROUTINE SUBROUTINE c_dbcsr_multiply_d(transa, transb, alpha, c_matrix_a, c_matrix_b, beta, c_matrix_c, retain_sparsity) & diff --git a/src/mm/dbcsr_acc_operations.F b/src/mm/dbcsr_acc_operations.F index 30f3cb3d506..69f8b050255 100644 --- a/src/mm/dbcsr_acc_operations.F +++ b/src/mm/dbcsr_acc_operations.F @@ -17,9 +17,9 @@ MODULE dbcsr_acc_operations USE dbcsr_acc_stream, ONLY: acc_stream_cptr, & acc_stream_type, & acc_stream_synchronize - USE dbcsr_config, ONLY: max_kernel_dim, & - cublas_handles, & - hipblas_handles + USE dbcsr_config, ONLY: max_kernel_dim, & + cublas_handles, & + hipblas_handles USE dbcsr_mm_types, ONLY: dbcsr_ps_width USE dbcsr_kinds, ONLY: real_8, dp USE dbcsr_types, ONLY: dbcsr_type_real_8 @@ -159,20 +159,20 @@ SUBROUTINE dbcsr_acc_do_mm_stack(param_stack_host, param_stack_dev, stack_size, #if (__DBCSR_ACC == 2) istat = cublas_dgemm_cu(cublas_handles(ithread + 1)%handle_ptr, & #else - istat = hipblas_dgemm_hip(hipblas_handles(ithread + 1)%handle_ptr, & + istat=hipblas_dgemm_hip(hipblas_handles(ithread + 1)%handle_ptr, & #endif - 'N', transb, & - INT(param_stack_host(1, istack), KIND=C_INT), & - INT(param_stack_host(2, istack), KIND=C_INT), & - INT(param_stack_host(3, istack), KIND=C_INT), & - INT(param_stack_host(4, istack)-1, KIND=C_INT), & - INT(param_stack_host(5, istack)-1, KIND=C_INT), & - INT(param_stack_host(6, istack)-1, KIND=C_INT), & - acc_devmem_cptr(a_data), & - acc_devmem_cptr(b_data), & - acc_devmem_cptr(c_data), & - 1.0_dp, 1.0_dp, & - acc_stream_cptr(stream)) + 'N', transb, & + INT(param_stack_host(1, istack), KIND=C_INT), & + INT(param_stack_host(2, istack), KIND=C_INT), & + INT(param_stack_host(3, istack), KIND=C_INT), & + INT(param_stack_host(4, istack) - 1, KIND=C_INT), & + INT(param_stack_host(5, istack) - 1, KIND=C_INT), & + INT(param_stack_host(6, istack) - 1, KIND=C_INT), & + acc_devmem_cptr(a_data), & + acc_devmem_cptr(b_data), & + acc_devmem_cptr(c_data), & + 1.0_dp, 1.0_dp, & + acc_stream_cptr(stream)) #if (__DBCSR_ACC == 2) IF (istat /= 0) DBCSR_ABORT("failed to run CUBLAS.") #else diff --git a/src/mpi/dbcsr_mp_methods.F b/src/mpi/dbcsr_mp_methods.F index bd248e253bd..a8804129a10 100644 --- a/src/mpi/dbcsr_mp_methods.F +++ b/src/mpi/dbcsr_mp_methods.F @@ -12,10 +12,10 @@ MODULE dbcsr_mp_methods dbcsr_mp_release USE dbcsr_mpiwrap, ONLY: mp_cart_create, & mp_cart_sub, & - mp_comm_free,& - mp_environ,& - mp_cart_rank,& - mp_dims_create,& + mp_comm_free, & + mp_environ, & + mp_cart_rank, & + mp_dims_create, & mp_comm_null USE dbcsr_types, ONLY: dbcsr_mp_obj diff --git a/src/ops/dbcsr_tests.F b/src/ops/dbcsr_tests.F index cea7e890275..c867fd929a7 100644 --- a/src/ops/dbcsr_tests.F +++ b/src/ops/dbcsr_tests.F @@ -36,7 +36,7 @@ MODULE dbcsr_tests dbcsr_mp_new, & dbcsr_mp_npcols, & dbcsr_mp_nprows, & - dbcsr_mp_release,& + dbcsr_mp_release, & dbcsr_mp_make_env USE dbcsr_mpiwrap, ONLY: & mp_comm_free, mp_environ, mp_max, mp_sum, mp_sync diff --git a/src/tas/dbcsr_tas_base.F b/src/tas/dbcsr_tas_base.F index c9cc5177ce8..012c34817ae 100644 --- a/src/tas/dbcsr_tas_base.F +++ b/src/tas/dbcsr_tas_base.F @@ -192,7 +192,7 @@ SUBROUTINE dbcsr_tas_create_new(matrix, name, dist, data_type, & CASE (rowsplit) matrix%nblkrowscols_split = matrix%nblkrows - ASSOCIATE (rows=>dist%local_rowcols) + ASSOCIATE (rows => dist%local_rowcols) nrows = SIZE(rows) ncols = INT(dist%col_dist%nmrowcol) ALLOCATE (row_blk_size_vec(nrows)) @@ -207,7 +207,7 @@ SUBROUTINE dbcsr_tas_create_new(matrix, name, dist, data_type, & CASE (colsplit) matrix%nblkrowscols_split = matrix%nblkcols - ASSOCIATE (cols=>dist%local_rowcols) + ASSOCIATE (cols => dist%local_rowcols) ncols = SIZE(cols) nrows = INT(dist%row_dist%nmrowcol) ALLOCATE (row_blk_size_vec(nrows)) diff --git a/src/tas/dbcsr_tas_mm.F b/src/tas/dbcsr_tas_mm.F index 8a8411429c4..437c10fc639 100644 --- a/src/tas/dbcsr_tas_mm.F +++ b/src/tas/dbcsr_tas_mm.F @@ -92,7 +92,7 @@ RECURSIVE SUBROUTINE dbcsr_tas_multiply(transa, transb, transc, alpha, matrix_a, !! memory optimization: move data to matrix_c such that matrix_a is empty on return !! memory optimization: move data to matrix_c such that matrix_b is empty on return !! for internal use only - INTEGER(int_8), DIMENSION(:,:), ALLOCATABLE, INTENT(OUT), OPTIONAL :: result_index + INTEGER(int_8), DIMENSION(:, :), ALLOCATABLE, INTENT(OUT), OPTIONAL :: result_index INTEGER, OPTIONAL, INTENT(IN) :: io_unit !! unit number for logging output LOGICAL, OPTIONAL, INTENT(IN) :: log_verbose @@ -140,8 +140,8 @@ RECURSIVE SUBROUTINE dbcsr_tas_multiply(transa, transb, transc, alpha, matrix_a, ENDIF nodata_3 = .TRUE. - IF(PRESENT(retain_sparsity)) THEN - IF(retain_sparsity) nodata_3 = .FALSE. + IF (PRESENT(retain_sparsity)) THEN + IF (retain_sparsity) nodata_3 = .FALSE. ENDIF ! get prestored info for multiplication strategy in case of batched mm @@ -186,7 +186,7 @@ RECURSIVE SUBROUTINE dbcsr_tas_multiply(transa, transb, transc, alpha, matrix_a, ENDIF ENDIF - IF(batched_repl > 0) THEN + IF (batched_repl > 0) THEN simple_split_save = simple_split_prv simple_split_prv = .TRUE. ENDIF @@ -259,7 +259,7 @@ RECURSIVE SUBROUTINE dbcsr_tas_multiply(transa, transb, transc, alpha, matrix_a, nze_b = dbcsr_tas_get_nze_total(matrix_b) CALL dbcsr_tas_result_index(transa, transb, transc, matrix_a, matrix_b, matrix_c, filter_eps, & blk_ind=result_index, nze=nze_c, retain_sparsity=retain_sparsity) - IF(PRESENT(result_index)) THEN + IF (PRESENT(result_index)) THEN CALL timestop(handle) RETURN ENDIF @@ -277,7 +277,7 @@ RECURSIVE SUBROUTINE dbcsr_tas_multiply(transa, transb, transc, alpha, matrix_a, WRITE (io_unit_prv, "(T4,A,T68,I13)") "Est. optimal split factor:", nsplit ENDIF - ELSEIF(batched_repl > 0) THEN + ELSEIF (batched_repl > 0) THEN nsplit = nsplit_batched nsplit_opt = nsplit max_mm_dim = max_mm_dim_batched @@ -416,9 +416,9 @@ RECURSIVE SUBROUTINE dbcsr_tas_multiply(transa, transb, transc, alpha, matrix_a, WRITE (io_unit_prv, "(T2, A)") "SPLIT / PARALLELIZATION INFO" ENDIF CALL dbcsr_tas_write_split_info(info, io_unit_prv) - IF(ASSOCIATED(matrix_a_rs)) CALL dbcsr_tas_write_matrix_info(matrix_a_rs, io_unit_prv, full_info=log_verbose) - IF(ASSOCIATED(matrix_b_rs)) CALL dbcsr_tas_write_matrix_info(matrix_b_rs, io_unit_prv, full_info=log_verbose) - IF(ASSOCIATED(matrix_c_rs)) CALL dbcsr_tas_write_matrix_info(matrix_c_rs, io_unit_prv, full_info=log_verbose) + IF (ASSOCIATED(matrix_a_rs)) CALL dbcsr_tas_write_matrix_info(matrix_a_rs, io_unit_prv, full_info=log_verbose) + IF (ASSOCIATED(matrix_b_rs)) CALL dbcsr_tas_write_matrix_info(matrix_b_rs, io_unit_prv, full_info=log_verbose) + IF (ASSOCIATED(matrix_c_rs)) CALL dbcsr_tas_write_matrix_info(matrix_c_rs, io_unit_prv, full_info=log_verbose) IF (io_unit_prv > 0) THEN IF (opt_pgrid) THEN WRITE (io_unit_prv, "(T4, A, 1X, A)") "Change process grid:", "Yes" @@ -608,7 +608,7 @@ RECURSIVE SUBROUTINE dbcsr_tas_multiply(transa, transb, transc, alpha, matrix_a, IF (opt_pgrid .AND. matrix_a%do_batched == 0) THEN CALL dbcsr_tas_destroy(matrix_a_rep) - DEALLOCATE(matrix_a_rep) + DEALLOCATE (matrix_a_rep) ENDIF CALL convert_to_new_pgrid(mp_comm_mm, matrix_b_rs%matrix, matrix_b_mm, optimize_pgrid=opt_pgrid, move_data=move_b) @@ -690,10 +690,10 @@ RECURSIVE SUBROUTINE dbcsr_tas_multiply(transa, transb, transc, alpha, matrix_a, ENDIF IF (PRESENT(move_data_a)) THEN - IF(move_data_a) CALL dbcsr_tas_clear(matrix_a) + IF (move_data_a) CALL dbcsr_tas_clear(matrix_a) ENDIF IF (PRESENT(move_data_b)) THEN - IF(move_data_b) CALL dbcsr_tas_clear(matrix_b) + IF (move_data_b) CALL dbcsr_tas_clear(matrix_b) ENDIF IF (PRESENT(flop)) THEN @@ -1150,9 +1150,9 @@ FUNCTION dist_compatible(mat_a, mat_b, split_rc_a, split_rc_b, io_unit) info_a = dbcsr_tas_info(mat_a) info_b = dbcsr_tas_info(mat_b) CALL dbcsr_tas_get_split_info(info_a, split_rowcol=split_check) - IF(split_check /= split_rc_a) RETURN + IF (split_check /= split_rc_a) RETURN CALL dbcsr_tas_get_split_info(info_b, split_rowcol=split_check) - IF(split_check /= split_rc_b) RETURN + IF (split_check /= split_rc_b) RETURN ! check if communicators are equivalent (global process grid and subgrids) ! Note: mpi_comm_compare is not sufficient since this does not compare associated Cartesian grids. @@ -1289,7 +1289,7 @@ SUBROUTINE dbcsr_tas_result_index(transa, transb, transc, matrix_a, matrix_b, ma TYPE(dbcsr_tas_type), POINTER :: matrix_a_bnorm, matrix_b_bnorm, matrix_c_bnorm REAL(KIND=real_8), INTENT(IN), OPTIONAL :: filter_eps INTEGER, INTENT(IN), OPTIONAL :: io_unit - INTEGER(int_8), DIMENSION(:,:), ALLOCATABLE, INTENT(OUT), OPTIONAL :: blk_ind + INTEGER(int_8), DIMENSION(:, :), ALLOCATABLE, INTENT(OUT), OPTIONAL :: blk_ind LOGICAL, INTENT(IN), OPTIONAL :: retain_sparsity INTEGER(int_8), INTENT(OUT), OPTIONAL :: nze @@ -1302,14 +1302,14 @@ SUBROUTINE dbcsr_tas_result_index(transa, transb, transc, matrix_a, matrix_b, ma CALL timeset(routineN, handle) - IF(PRESENT(retain_sparsity)) THEN + IF (PRESENT(retain_sparsity)) THEN retain_sparsity_prv = retain_sparsity ELSE retain_sparsity_prv = .FALSE. ENDIF IF (.NOT. retain_sparsity_prv) THEN - ALLOCATE(matrix_a_bnorm, matrix_b_bnorm, matrix_c_bnorm) + ALLOCATE (matrix_a_bnorm, matrix_b_bnorm, matrix_c_bnorm) CALL create_block_norms_matrix(matrix_a, matrix_a_bnorm) CALL create_block_norms_matrix(matrix_b, matrix_b_bnorm) CALL create_block_norms_matrix(matrix_c, matrix_c_bnorm, nodata=.TRUE.) @@ -1321,33 +1321,33 @@ SUBROUTINE dbcsr_tas_result_index(transa, transb, transc, matrix_a, matrix_b, ma CALL dbcsr_tas_destroy(matrix_a_bnorm) CALL dbcsr_tas_destroy(matrix_b_bnorm) - DEALLOCATE(matrix_a_bnorm, matrix_b_bnorm) + DEALLOCATE (matrix_a_bnorm, matrix_b_bnorm) ELSE matrix_c_bnorm => matrix_c ENDIF nblk = dbcsr_tas_get_num_blocks(matrix_c_bnorm) - IF(PRESENT(blk_ind)) ALLOCATE(blk_ind(nblk, 2)) + IF (PRESENT(blk_ind)) ALLOCATE (blk_ind(nblk, 2)) CALL dbcsr_tas_iterator_start(iter, matrix_c_bnorm) - IF(PRESENT(nze)) nze = 0 - DO iblk=1,nblk + IF (PRESENT(nze)) nze = 0 + DO iblk = 1, nblk CALL dbcsr_tas_iterator_next_block(iter, row, col, bn) row_size = matrix_c%row_blk_size%data(row) col_size = matrix_c%col_blk_size%data(col) - IF(PRESENT(nze)) nze = nze + row_size*col_size - IF(PRESENT(blk_ind)) blk_ind(iblk, :) = [row, col] + IF (PRESENT(nze)) nze = nze + row_size*col_size + IF (PRESENT(blk_ind)) blk_ind(iblk, :) = [row, col] ENDDO CALL dbcsr_tas_iterator_stop(iter) - IF(PRESENT(nze)) THEN + IF (PRESENT(nze)) THEN CALL dbcsr_tas_get_split_info(dbcsr_tas_info(matrix_a), mp_comm=mp_comm) CALL mp_sum(nze, mp_comm) ENDIF IF (.NOT. retain_sparsity_prv) THEN CALL dbcsr_tas_destroy(matrix_c_bnorm) - DEALLOCATE(matrix_c_bnorm) + DEALLOCATE (matrix_c_bnorm) ENDIF CALL timestop(handle) @@ -1371,14 +1371,14 @@ FUNCTION split_factor_estimate(max_mm_dim, nze_a, nze_b, nze_c, numnodes) RESULT INTEGER :: nsplit, nsplit_comm, nsplit_memory INTEGER(KIND=int_8) :: max_nze, min_nze - SELECT CASE(max_mm_dim) - CASE(1) + SELECT CASE (max_mm_dim) + CASE (1) min_nze = MAX(nze_b, 1_int_8) max_nze = MAX(MAXVAL([nze_a, nze_c]), 1_int_8) - CASE(2) + CASE (2) min_nze = MAX(nze_c, 1_int_8) max_nze = MAX(MAXVAL([nze_a, nze_b]), 1_int_8) - CASE(3) + CASE (3) min_nze = MAX(nze_a, 1_int_8) max_nze = MAX(MAXVAL([nze_b, nze_c]), 1_int_8) CASE DEFAULT @@ -1386,7 +1386,7 @@ FUNCTION split_factor_estimate(max_mm_dim, nze_a, nze_b, nze_c, numnodes) RESULT END SELECT nsplit_comm = NINT((REAL(nze_a + nze_b, real_8)/(2*min_nze))**(2._real_8/3)*REAL(numnodes, real_8)**(1._real_8/3)) - IF(nsplit_comm == 0) nsplit_comm = 1 + IF (nsplit_comm == 0) nsplit_comm = 1 ! nsplit_memory protects against excess memory usage ! actual split factor may be up to default_nsplit_accept_ratio_1 times larger, so the largest nsplit @@ -1530,7 +1530,7 @@ SUBROUTINE convert_to_new_pgrid(mp_comm_cart, matrix_in, matrix_out, move_data, SUBROUTINE dbcsr_tas_batched_mm_init(matrix) TYPE(dbcsr_tas_type), INTENT(INOUT) :: matrix matrix%do_batched = 1 - ALLOCATE(matrix%mm_storage) + ALLOCATE (matrix%mm_storage) matrix%mm_storage%batched_out = .FALSE. END SUBROUTINE @@ -1538,7 +1538,7 @@ SUBROUTINE dbcsr_tas_batched_mm_finalize(matrix) TYPE(dbcsr_tas_type), INTENT(INOUT) :: matrix IF (matrix%do_batched == 0) RETURN - ASSOCIATE (storage=>matrix%mm_storage) + ASSOCIATE (storage => matrix%mm_storage) IF (storage%batched_out) THEN CALL dbcsr_tas_merge(storage%store_batched%matrix, storage%store_batched_repl, move_data=.TRUE.) CALL dbcsr_scale(matrix%matrix, storage%batched_beta) @@ -1556,7 +1556,7 @@ SUBROUTINE dbcsr_tas_batched_mm_finalize(matrix) storage%batched_out = .FALSE. END ASSOCIATE - DEALLOCATE(matrix%mm_storage) + DEALLOCATE (matrix%mm_storage) matrix%do_batched = 0 END SUBROUTINE diff --git a/src/tas/dbcsr_tas_split.F b/src/tas/dbcsr_tas_split.F index 37491748509..a7b300a19f6 100644 --- a/src/tas/dbcsr_tas_split.F +++ b/src/tas/dbcsr_tas_split.F @@ -554,12 +554,12 @@ SUBROUTINE block_index_local_to_global(info, dist, row_group, column_group, & SELECT CASE (info%split_rowcol) CASE (rowsplit) - ASSOCIATE (rows=>dist%local_rowcols) + ASSOCIATE (rows => dist%local_rowcols) IF (PRESENT(row)) row = rows(row_group) IF (PRESENT(column)) column = column_group END ASSOCIATE CASE (colsplit) - ASSOCIATE (cols=>dist%local_rowcols) + ASSOCIATE (cols => dist%local_rowcols) IF (PRESENT(row)) row = row_group IF (PRESENT(column)) column = cols(column_group) END ASSOCIATE diff --git a/src/tensors/dbcsr_array_list_methods.F b/src/tensors/dbcsr_array_list_methods.F index c2cdd48f0b8..d92e74253ea 100644 --- a/src/tensors/dbcsr_array_list_methods.F +++ b/src/tensors/dbcsr_array_list_methods.F @@ -162,7 +162,7 @@ SUBROUTINE get_arrays(list, ${varlist("data")}$, i_selected) o(1:ndata) = (/(i, i=1, ndata)/) ENDIF - ASSOCIATE (ptr=>list%ptr, col_data=>list%col_data) + ASSOCIATE (ptr => list%ptr, col_data => list%col_data) #:for dim in range(1, maxdim+1) IF (ndata > ${dim-1}$) THEN CALL allocate_any(data_${dim}$, source=col_data(ptr(o(${dim}$)):ptr(o(${dim}$) + 1) - 1)) @@ -179,7 +179,7 @@ SUBROUTINE get_ith_array(list, i, array_size, array) INTEGER, INTENT(IN) :: array_size INTEGER, DIMENSION(array_size), INTENT(OUT) :: array - ASSOCIATE (ptr=>list%ptr, col_data=>list%col_data) + ASSOCIATE (ptr => list%ptr, col_data => list%col_data) DBCSR_ASSERT(i <= number_of_arrays(list)) array(:) = col_data(ptr(i):ptr(i + 1) - 1) @@ -194,7 +194,7 @@ SUBROUTINE allocate_and_get_ith_array(list, i, array) INTEGER, INTENT(IN) :: i INTEGER, DIMENSION(:), ALLOCATABLE, INTENT(OUT) :: array - ASSOCIATE (ptr=>list%ptr, col_data=>list%col_data) + ASSOCIATE (ptr => list%ptr, col_data => list%col_data) DBCSR_ASSERT(i <= number_of_arrays(list)) CALL allocate_any(array, source=col_data(ptr(i):ptr(i + 1) - 1)) diff --git a/src/tensors/dbcsr_tensor.F b/src/tensors/dbcsr_tensor.F index 2b73f21c782..ceee371ac8b 100644 --- a/src/tensors/dbcsr_tensor.F +++ b/src/tensors/dbcsr_tensor.F @@ -159,8 +159,8 @@ SUBROUTINE dbcsr_t_copy(tensor_in, tensor_out, order, summation, bounds, move_da summation_prv = .FALSE. ENDIF - IF(PRESENT(bounds)) THEN - ALLOCATE(in_tmp_1) + IF (PRESENT(bounds)) THEN + ALLOCATE (in_tmp_1) CALL dbcsr_t_crop(tensor_in, in_tmp_1, bounds=bounds, move_data=move_prv) new_in_1 = .TRUE. move_prv = .TRUE. @@ -194,18 +194,18 @@ SUBROUTINE dbcsr_t_copy(tensor_in, tensor_out, order, summation, bounds, move_da in_tmp_3 => in_tmp_2 ENDIF - ALLOCATE(map1_in_1(ndims_matrix_row(in_tmp_3))) - ALLOCATE(map1_in_2(ndims_matrix_column(in_tmp_3))) + ALLOCATE (map1_in_1(ndims_matrix_row(in_tmp_3))) + ALLOCATE (map1_in_2(ndims_matrix_column(in_tmp_3))) CALL dbcsr_t_get_mapping_info(in_tmp_3%nd_index, map1_2d=map1_in_1, map2_2d=map1_in_2) - ALLOCATE(map2_in_1(ndims_matrix_row(out_tmp_1))) - ALLOCATE(map2_in_2(ndims_matrix_column(out_tmp_1))) + ALLOCATE (map2_in_1(ndims_matrix_row(out_tmp_1))) + ALLOCATE (map2_in_2(ndims_matrix_column(out_tmp_1))) CALL dbcsr_t_get_mapping_info(out_tmp_1%nd_index, map1_2d=map2_in_1, map2_2d=map2_in_2) IF (.NOT. PRESENT(order)) THEN IF (array_eq_i(map1_in_1, map2_in_1) .AND. array_eq_i(map1_in_2, map2_in_2)) THEN dist_compatible_tas = check_equal(in_tmp_3%nd_dist, out_tmp_1%nd_dist) - ELSEIF(array_eq_i([map1_in_1, map1_in_2], [map2_in_1, map2_in_2])) THEN + ELSEIF (array_eq_i([map1_in_1, map1_in_2], [map2_in_1, map2_in_2])) THEN dist_compatible_tensor = check_equal(in_tmp_3%nd_dist, out_tmp_1%nd_dist) ENDIF ENDIF @@ -213,7 +213,7 @@ SUBROUTINE dbcsr_t_copy(tensor_in, tensor_out, order, summation, bounds, move_da IF (dist_compatible_tas) THEN CALL dbcsr_tas_copy(out_tmp_1%matrix_rep, in_tmp_3%matrix_rep, summation) IF (move_prv) CALL dbcsr_t_clear(in_tmp_3) - ELSEIF(dist_compatible_tensor) THEN + ELSEIF (dist_compatible_tensor) THEN CALL dbcsr_t_copy_nocomm(in_tmp_3, out_tmp_1, summation) IF (move_prv) CALL dbcsr_t_clear(in_tmp_3) ELSE @@ -276,7 +276,7 @@ SUBROUTINE dbcsr_t_copy_nocomm(tensor_in, tensor_out, summation) CALL dbcsr_t_reserve_blocks(tensor_in, tensor_out) CALL dbcsr_t_iterator_start(iter, tensor_in) - DO WHILE(dbcsr_t_iterator_blocks_left(iter)) + DO WHILE (dbcsr_t_iterator_blocks_left(iter)) CALL dbcsr_t_iterator_next_block(iter, ind_nd, blk) CALL dbcsr_t_get_block(tensor_in, ind_nd, blk_data, found) DBCSR_ASSERT(found) @@ -310,8 +310,8 @@ SUBROUTINE dbcsr_t_copy_matrix_to_tensor(matrix_in, tensor_out, summation) CALL timeset(routineN, handle) DBCSR_ASSERT(tensor_out%valid) - IF(dbcsr_has_symmetry(matrix_in)) THEN - ALLOCATE(matrix_in_desym) + IF (dbcsr_has_symmetry(matrix_in)) THEN + ALLOCATE (matrix_in_desym) CALL dbcsr_desymmetrize(matrix_in, matrix_in_desym) ELSE matrix_in_desym => matrix_in @@ -334,9 +334,9 @@ SUBROUTINE dbcsr_t_copy_matrix_to_tensor(matrix_in, tensor_out, summation) ENDDO CALL dbcsr_iterator_stop(iter) - IF(dbcsr_has_symmetry(matrix_in)) THEN + IF (dbcsr_has_symmetry(matrix_in)) THEN CALL dbcsr_release(matrix_in_desym) - DEALLOCATE(matrix_in_desym) + DEALLOCATE (matrix_in_desym) ENDIF CALL timestop(handle) @@ -371,7 +371,7 @@ SUBROUTINE dbcsr_t_copy_tensor_to_matrix(tensor_in, matrix_out, summation) CALL dbcsr_t_iterator_start(iter, tensor_in) DO WHILE (dbcsr_t_iterator_blocks_left(iter)) CALL dbcsr_t_iterator_next_block(iter, ind_2d, blk) - IF(dbcsr_has_symmetry(matrix_out) .AND. checker_tr(ind_2d(1), ind_2d(2))) CYCLE + IF (dbcsr_has_symmetry(matrix_out) .AND. checker_tr(ind_2d(1), ind_2d(2))) CYCLE CALL dbcsr_t_get_block(tensor_in, ind_2d, block, found) DBCSR_ASSERT(found) @@ -534,7 +534,7 @@ SUBROUTINE dbcsr_t_contract_expert(alpha, tensor_1, tensor_2, beta, tensor_3, & TYPE(dbcsr_t_type) :: tensor_algn_1, tensor_algn_2, tensor_algn_3 TYPE(dbcsr_t_type), POINTER :: tensor_crop_1, tensor_crop_2 - INTEGER(int_8), DIMENSION(:,:), ALLOCATABLE :: result_index_2d + INTEGER(int_8), DIMENSION(:, :), ALLOCATABLE :: result_index_2d LOGICAL :: assert_stmt INTEGER :: data_type, max_mm_dim, max_tensor, mp_comm, & iblk, nblk @@ -597,8 +597,8 @@ SUBROUTINE dbcsr_t_contract_expert(alpha, tensor_1, tensor_2, beta, tensor_3, & ENDIF nodata_3 = .TRUE. - IF(PRESENT(retain_sparsity)) THEN - IF(retain_sparsity) nodata_3 = .FALSE. + IF (PRESENT(retain_sparsity)) THEN + IF (retain_sparsity) nodata_3 = .FALSE. ENDIF CALL dbcsr_t_map_bounds_to_tensors(tensor_1, tensor_2, & @@ -809,7 +809,7 @@ SUBROUTINE dbcsr_t_contract_expert(alpha, tensor_1, tensor_2, beta, tensor_3, & IF (new_2) CALL dbcsr_t_write_tensor_dist(tensor_contr_2, unit_nr) ENDIF - IF(.NOT. PRESENT(result_index)) THEN + IF (.NOT. PRESENT(result_index)) THEN CALL dbcsr_tas_multiply(trans_1, trans_2, trans_3, alpha, & tensor_contr_1%matrix_rep, tensor_contr_2%matrix_rep, & beta, & @@ -824,8 +824,8 @@ SUBROUTINE dbcsr_t_contract_expert(alpha, tensor_1, tensor_2, beta, tensor_3, & tensor_contr_3%matrix_rep, filter_eps=filter_eps, & result_index=result_index_2d) - nblk = SIZE(result_index_2d,1) - IF(PRESENT(nblks_local)) nblks_local = nblk + nblk = SIZE(result_index_2d, 1) + IF (PRESENT(nblks_local)) nblks_local = nblk IF (SIZE(result_index, 1) < nblk) THEN CALL dbcsr_abort(__LOCATION__, & "allocated size of `result_index` is too small. This error occurs due to a high load imbalance of distributed tensor data.") @@ -834,7 +834,7 @@ SUBROUTINE dbcsr_t_contract_expert(alpha, tensor_1, tensor_2, beta, tensor_3, & result_index = 0 DO iblk = 1, nblk - result_index(iblk,:) = get_nd_indices_tensor(tensor_contr_3%nd_index_blk, result_index_2d(iblk,:)) + result_index(iblk, :) = get_nd_indices_tensor(tensor_contr_3%nd_index_blk, result_index_2d(iblk, :)) ENDDO IF (new_1) THEN @@ -960,8 +960,8 @@ SUBROUTINE dbcsr_t_contract_expert(alpha, tensor_1, tensor_2, beta, tensor_3, & DEALLOCATE (tensor_contr_3) ENDIF - IF(PRESENT(move_data)) THEN - IF(move_data) THEN + IF (PRESENT(move_data)) THEN + IF (move_data) THEN CALL dbcsr_t_clear(tensor_1) CALL dbcsr_t_clear(tensor_2) ENDIF @@ -1122,7 +1122,7 @@ SUBROUTINE reshape_mm_compatible(tensor1, tensor2, tensor1_out, tensor2_out, ind IF (compat1 == 1) THEN ! linked index is first 2d dimension ! get distribution of linked index, tensor 2 must adopt this distribution ! get grid dimensions of linked index - ALLOCATE(mp_dims(ndims_mapping_row(dist_in%pgrid%nd_index_grid))) + ALLOCATE (mp_dims(ndims_mapping_row(dist_in%pgrid%nd_index_grid))) CALL dbcsr_t_get_mapping_info(dist_in%pgrid%nd_index_grid, dims1_2d=mp_dims) ALLOCATE (tensor2_out) CALL dbcsr_t_remap(tensor2, ind2_linked, ind2_free, tensor2_out, comm_2d=dist_in%pgrid%mp_comm_2d, & @@ -1130,7 +1130,7 @@ SUBROUTINE reshape_mm_compatible(tensor1, tensor2, tensor1_out, tensor2_out, ind ELSEIF (compat1 == 2) THEN ! linked index is second 2d dimension ! get distribution of linked index, tensor 2 must adopt this distribution ! get grid dimensions of linked index - ALLOCATE(mp_dims(ndims_mapping_column(dist_in%pgrid%nd_index_grid))) + ALLOCATE (mp_dims(ndims_mapping_column(dist_in%pgrid%nd_index_grid))) CALL dbcsr_t_get_mapping_info(dist_in%pgrid%nd_index_grid, dims2_2d=mp_dims) ALLOCATE (tensor2_out) CALL dbcsr_t_remap(tensor2, ind2_free, ind2_linked, tensor2_out, comm_2d=dist_in%pgrid%mp_comm_2d, & @@ -1165,13 +1165,13 @@ SUBROUTINE reshape_mm_compatible(tensor1, tensor2, tensor1_out, tensor2_out, ind dist_in = dbcsr_t_distribution(tensor2_out) dist_list = array_sublist(dist_in%nd_dist, ind2_linked) IF (compat2 == 1) THEN - ALLOCATE(mp_dims(ndims_mapping_row(dist_in%pgrid%nd_index_grid))) + ALLOCATE (mp_dims(ndims_mapping_row(dist_in%pgrid%nd_index_grid))) CALL dbcsr_t_get_mapping_info(dist_in%pgrid%nd_index_grid, dims1_2d=mp_dims) ALLOCATE (tensor1_out) CALL dbcsr_t_remap(tensor1, ind1_linked, ind1_free, tensor1_out, comm_2d=dist_in%pgrid%mp_comm_2d, & dist1=dist_list, mp_dims_1=mp_dims, nodata=nodata1, move_data=move_data_1) ELSEIF (compat2 == 2) THEN - ALLOCATE(mp_dims(ndims_mapping_column(dist_in%pgrid%nd_index_grid))) + ALLOCATE (mp_dims(ndims_mapping_column(dist_in%pgrid%nd_index_grid))) CALL dbcsr_t_get_mapping_info(dist_in%pgrid%nd_index_grid, dims2_2d=mp_dims) ALLOCATE (tensor1_out) CALL dbcsr_t_remap(tensor1, ind1_free, ind1_linked, tensor1_out, comm_2d=dist_in%pgrid%mp_comm_2d, & diff --git a/src/tensors/dbcsr_tensor_block.F b/src/tensors/dbcsr_tensor_block.F index 90585bba387..1935568400c 100644 --- a/src/tensors/dbcsr_tensor_block.F +++ b/src/tensors/dbcsr_tensor_block.F @@ -340,8 +340,8 @@ SUBROUTINE dbcsr_t_reserve_blocks_matrix_to_tensor(matrix_in, tensor_out) CALL timeset(routineN, handle) - IF(dbcsr_has_symmetry(matrix_in)) THEN - ALLOCATE(matrix_in_desym) + IF (dbcsr_has_symmetry(matrix_in)) THEN + ALLOCATE (matrix_in_desym) CALL dbcsr_desymmetrize(matrix_in, matrix_in_desym) ELSE matrix_in_desym => matrix_in @@ -358,9 +358,9 @@ SUBROUTINE dbcsr_t_reserve_blocks_matrix_to_tensor(matrix_in, tensor_out) CALL dbcsr_t_reserve_blocks(tensor_out, blk_ind_1, blk_ind_2) - IF(dbcsr_has_symmetry(matrix_in)) THEN + IF (dbcsr_has_symmetry(matrix_in)) THEN CALL dbcsr_release(matrix_in_desym) - DEALLOCATE(matrix_in_desym) + DEALLOCATE (matrix_in_desym) ENDIF CALL timestop(handle) @@ -386,7 +386,7 @@ SUBROUTINE dbcsr_t_reserve_blocks_tensor_to_matrix(tensor_in, matrix_out) CALL dbcsr_t_iterator_start(iter, tensor_in) iblk = 0 - DO WHILE(dbcsr_t_iterator_blocks_left(iter)) + DO WHILE (dbcsr_t_iterator_blocks_left(iter)) CALL dbcsr_t_iterator_next_block(iter, ind_2d, blk) IF (dbcsr_has_symmetry(matrix_out)) THEN IF (checker_tr(ind_2d(1), ind_2d(2))) CYCLE @@ -432,7 +432,7 @@ SUBROUTINE create_block_data_${dsuffix}$ (block, sizes, array) INTEGER, DIMENSION(:), INTENT(IN) :: sizes ${dtype}$, DIMENSION(PRODUCT(sizes)), INTENT(IN) :: array - ASSOCIATE (blk=>block%${dsuffix}$) + ASSOCIATE (blk => block%${dsuffix}$) block%data_type = ${dparam}$ CALL allocate_any(blk%sizes, source=sizes) CALL allocate_any(blk%blk, source=array) @@ -598,9 +598,9 @@ SUBROUTINE dbcsr_t_put_${ndim}$d_block_${dsuffix}$ (tensor, ind, sizes, block, s DBCSR_ASSERT(found) ENDIF - ASSOCIATE (map_nd=>tensor%nd_index_blk%map_nd, & - map1_2d=>tensor%nd_index_blk%map1_2d, & - map2_2d=>tensor%nd_index_blk%map2_2d) + ASSOCIATE (map_nd => tensor%nd_index_blk%map_nd, & + map1_2d => tensor%nd_index_blk%map1_2d, & + map2_2d => tensor%nd_index_blk%map2_2d) shape_2d = [PRODUCT(sizes(map1_2d)), PRODUCT(sizes(map2_2d))] @@ -622,7 +622,7 @@ SUBROUTINE dbcsr_t_put_${ndim}$d_block_${dsuffix}$ (tensor, ind, sizes, block, s CALL dbcsr_tas_put_block(tensor%matrix_rep, ind_2d(1), ind_2d(2), block_2d, summation=summation, & scale=scale) - IF(new_block) DEALLOCATE (block_2d) + IF (new_block) DEALLOCATE (block_2d) END SUBROUTINE #:endfor @@ -679,8 +679,8 @@ SUBROUTINE dbcsr_t_get_${ndim}$d_block_${dsuffix}$ (tensor, ind, sizes, block, f ind_2d(:) = get_2d_indices_tensor(tensor%nd_index_blk, ind) - ASSOCIATE (map1_2d=>tensor%nd_index_blk%map1_2d, & - map2_2d=>tensor%nd_index_blk%map2_2d) + ASSOCIATE (map1_2d => tensor%nd_index_blk%map1_2d, & + map2_2d => tensor%nd_index_blk%map2_2d) CALL dbcsr_tas_get_block_p(tensor%matrix_rep, ind_2d(1), ind_2d(2), block_2d_ptr, tr, found) DBCSR_ASSERT(.NOT. tr) diff --git a/src/tensors/dbcsr_tensor_index.F b/src/tensors/dbcsr_tensor_index.F index debe23de00d..64786b72fed 100644 --- a/src/tensors/dbcsr_tensor_index.F +++ b/src/tensors/dbcsr_tensor_index.F @@ -250,7 +250,7 @@ PURE FUNCTION split_tensor_index(ind_in, dims) RESULT(ind_out) INTEGER :: i_dim tmp = ind_in - DO i_dim=1,SIZE(dims) + DO i_dim = 1, SIZE(dims) ind_out(i_dim) = INT(MOD(tmp - 1, INT(dims(i_dim), int_8)) + 1) tmp = (tmp - 1)/dims(i_dim) + 1 END DO @@ -289,12 +289,12 @@ PURE FUNCTION get_2d_indices_tensor(map, ind_in) RESULT(ind_out) INTEGER :: i INTEGER, DIMENSION(${maxrank}$) :: ind_tmp - DO i=1, map%ndim1_2d + DO i = 1, map%ndim1_2d ind_tmp(i) = ind_in(map%map1_2d(i)) ENDDO ind_out(1) = combine_tensor_index(ind_tmp(:map%ndim1_2d), map%dims1_2d) - DO i=1, map%ndim2_2d + DO i = 1, map%ndim2_2d ind_tmp(i) = ind_in(map%map2_2d(i)) ENDDO ind_out(2) = combine_tensor_index(ind_tmp(:map%ndim2_2d), map%dims2_2d) @@ -312,12 +312,12 @@ PURE FUNCTION get_2d_indices_pgrid(map, ind_in) RESULT(ind_out) INTEGER :: i INTEGER, DIMENSION(${maxrank}$) :: ind_tmp - DO i=1, map%ndim1_2d + DO i = 1, map%ndim1_2d ind_tmp(i) = ind_in(map%map1_2d(i)) ENDDO ind_out(1) = combine_pgrid_index(ind_tmp(:map%ndim1_2d), map%dims1_2d) - DO i=1, map%ndim2_2d + DO i = 1, map%ndim2_2d ind_tmp(i) = ind_in(map%map2_2d(i)) ENDDO ind_out(2) = combine_pgrid_index(ind_tmp(:map%ndim2_2d), map%dims2_2d) @@ -337,13 +337,13 @@ PURE FUNCTION get_nd_indices_tensor(map, ind_in) RESULT(ind_out) ind_tmp(:map%ndim1_2d) = split_tensor_index(ind_in(1), map%dims1_2d) - DO i=1, map%ndim1_2d + DO i = 1, map%ndim1_2d ind_out(map%map1_2d(i)) = ind_tmp(i) ENDDO ind_tmp(:map%ndim2_2d) = split_tensor_index(ind_in(2), map%dims2_2d) - DO i=1, map%ndim2_2d + DO i = 1, map%ndim2_2d ind_out(map%map2_2d(i)) = ind_tmp(i) ENDDO diff --git a/src/tensors/dbcsr_tensor_io.F b/src/tensors/dbcsr_tensor_io.F index 4ea18a19e2f..f17f8d43cfd 100644 --- a/src/tensors/dbcsr_tensor_io.F +++ b/src/tensors/dbcsr_tensor_io.F @@ -51,8 +51,8 @@ SUBROUTINE dbcsr_t_write_tensor_info(tensor, output_unit, full_info) INTEGER, DIMENSION(ndims_tensor(tensor)) :: nblks_total, nfull_total, pdims, my_ploc, nblks_local, nfull_local #:for idim in range(1, maxdim+1) - INTEGER, DIMENSION(dbcsr_t_nblks_total(tensor,${idim}$)) :: proc_dist_${idim}$ - INTEGER, DIMENSION(dbcsr_t_nblks_total(tensor,${idim}$)) :: blk_size_${idim}$ + INTEGER, DIMENSION(dbcsr_t_nblks_total(tensor, ${idim}$)) :: proc_dist_${idim}$ + INTEGER, DIMENSION(dbcsr_t_nblks_total(tensor, ${idim}$)) :: blk_size_${idim}$ #:endfor CHARACTER(len=default_string_length) :: name INTEGER :: idim diff --git a/src/tensors/dbcsr_tensor_split.F b/src/tensors/dbcsr_tensor_split.F index a1368534cc1..62875293661 100644 --- a/src/tensors/dbcsr_tensor_split.F +++ b/src/tensors/dbcsr_tensor_split.F @@ -629,10 +629,10 @@ SUBROUTINE dbcsr_t_crop(tensor_in, tensor_out, bounds, move_data) CALL dbcsr_t_create(tensor_in, tensor_out) ! reserve blocks inside bounds - ALLOCATE(blk_ind(dbcsr_t_get_num_blocks(tensor_in), ndims_tensor(tensor_in))) + ALLOCATE (blk_ind(dbcsr_t_get_num_blocks(tensor_in), ndims_tensor(tensor_in))) CALL dbcsr_t_reserved_block_indices(tensor_in, blk_ind) nblk = dbcsr_t_get_num_blocks(tensor_in) - ALLOCATE(blk_ind_tmp(dbcsr_t_get_num_blocks(tensor_in), ndims_tensor(tensor_in))) + ALLOCATE (blk_ind_tmp(dbcsr_t_get_num_blocks(tensor_in), ndims_tensor(tensor_in))) blk_ind_tmp(:, :) = 0 iblk = 0 blk_loop: DO iblk_all = 1, nblk diff --git a/src/tensors/dbcsr_tensor_test.F b/src/tensors/dbcsr_tensor_test.F index 5c20e9ec844..06866757440 100644 --- a/src/tensors/dbcsr_tensor_test.F +++ b/src/tensors/dbcsr_tensor_test.F @@ -417,7 +417,7 @@ SUBROUTINE dbcsr_t_setup_test_tensor(tensor, mp_comm, enumerate, ${varlist("blk_ nblks_alloc = SIZE(blk_ind_1) CALL mp_environ(numnodes, mynode, mp_comm) - IF(.NOT. enumerate) THEN + IF (.NOT. enumerate) THEN DBCSR_ASSERT(randmat_counter .NE. 0) randmat_counter = randmat_counter + 1 @@ -466,7 +466,7 @@ SUBROUTINE dbcsr_t_setup_test_tensor(tensor, mp_comm, enumerate, ${varlist("blk_ DO WHILE (dbcsr_t_iterator_blocks_left(iterator)) CALL dbcsr_t_iterator_next_block(iterator, blk_index, blk, blk_size=blk_size, blk_offset=blk_offset) - IF(.NOT. enumerate) THEN + IF (.NOT. enumerate) THEN blk_index_2d = INT(get_2d_indices_tensor(tensor%nd_index_blk, blk_index)) CALL dbcsr_t_get_mapping_info(tensor%nd_index_blk, dims_2d=nblks_2d) CALL set_larnv_seed(blk_index_2d(1), nblks_2d(1), blk_index_2d(2), nblks_2d(2), randmat_counter, iseed) @@ -771,9 +771,9 @@ SUBROUTINE dbcsr_t_contract_test(alpha, tensor_1, tensor_2, beta, tensor_3, & ALLOCATE (order_t${i}$ (ndims_tensor(tensor_${i}$))) #:endfor - ASSOCIATE (map_t1_1=>notcontract_1, map_t1_2=>contract_1, & - map_t2_1=>notcontract_2, map_t2_2=>contract_2, & - map_t3_1=>map_1, map_t3_2=>map_2) + ASSOCIATE (map_t1_1 => notcontract_1, map_t1_2 => contract_1, & + map_t2_1 => notcontract_2, map_t2_2 => contract_2, & + map_t3_1 => map_1, map_t3_2 => map_2) #:for i in range(1,4) order_t${i}$ (:) = dbcsr_t_inverse_order([map_t${i}$_1, map_t${i}$_2]) diff --git a/src/tensors/dbcsr_tensor_types.F b/src/tensors/dbcsr_tensor_types.F index 28a735e8131..f0df10466f7 100644 --- a/src/tensors/dbcsr_tensor_types.F +++ b/src/tensors/dbcsr_tensor_types.F @@ -89,12 +89,12 @@ MODULE dbcsr_tensor_types dbcsr_t_type, & dims_tensor, & mp_environ_pgrid, & - ndims_tensor,& - ndims_matrix_row,& - ndims_matrix_column,& - dbcsr_t_nblks_local,& - dbcsr_t_nblks_total,& - dbcsr_t_blk_size,& + ndims_tensor, & + ndims_matrix_row, & + ndims_matrix_column, & + dbcsr_t_nblks_local, & + dbcsr_t_nblks_total, & + dbcsr_t_blk_size, & dbcsr_t_max_nblks_local TYPE dbcsr_t_pgrid_type @@ -410,7 +410,7 @@ FUNCTION dbcsr_t_nd_mp_comm(comm_2d, map1_2d, map2_2d, dims_nd, dims1_nd, dims2_ dims1_nd_prv(:) = dims1_nd ELSE - IF(PRESENT(tdims)) THEN + IF (PRESENT(tdims)) THEN CALL dbcsr_t_mp_dims_create(dims_2d(1), dims1_nd_prv, tdims(map1_2d)) ELSE CALL mp_dims_create(dims_2d(1), dims1_nd_prv) @@ -420,7 +420,7 @@ FUNCTION dbcsr_t_nd_mp_comm(comm_2d, map1_2d, map2_2d, dims_nd, dims1_nd, dims2_ IF (PRESENT(dims2_nd)) THEN dims2_nd_prv(:) = dims2_nd ELSE - IF(PRESENT(tdims)) THEN + IF (PRESENT(tdims)) THEN CALL dbcsr_t_mp_dims_create(dims_2d(2), dims2_nd_prv, tdims(map2_2d)) ELSE CALL mp_dims_create(dims_2d(2), dims2_nd_prv) @@ -460,7 +460,7 @@ RECURSIVE SUBROUTINE dbcsr_t_mp_dims_create(nodes, dims, tensor_dims, lb_ratio) INTEGER :: pdims_rem, idim, pdim REAL(real_8) :: lb_ratio_prv - IF(PRESENT(lb_ratio)) THEN + IF (PRESENT(lb_ratio)) THEN lb_ratio_prv = lb_ratio ELSE lb_ratio_prv = 0.2_real_8 @@ -469,16 +469,16 @@ RECURSIVE SUBROUTINE dbcsr_t_mp_dims_create(nodes, dims, tensor_dims, lb_ratio) CALL allocate_any(dims_store, source=dims) ! get default process grid dimensions - IF(any(dims == 0)) THEN + IF (any(dims == 0)) THEN CALL mp_dims_create(nodes, dims) ENDIF ! sort dimensions such that problematic grid dimensions (those who should be corrected) come first - ALLOCATE(sort_key(SIZE(tensor_dims))) + ALLOCATE (sort_key(SIZE(tensor_dims))) sort_key(:) = REAL(tensor_dims, real_8)/dims CALL allocate_any(tensor_dims_sorted, source=tensor_dims) - ALLOCATE(sort_indices(SIZE(sort_key))) + ALLOCATE (sort_indices(SIZE(sort_key))) CALL sort(sort_key, SIZE(sort_key), sort_indices) tensor_dims_sorted(:) = tensor_dims_sorted(sort_indices) dims(:) = dims(sort_indices) @@ -489,7 +489,7 @@ RECURSIVE SUBROUTINE dbcsr_t_mp_dims_create(nodes, dims, tensor_dims, lb_ratio) DO idim = 1, SIZE(tensor_dims_sorted) IF (.NOT. accept_pdims_loadbalancing(pdims_rem, dims(idim), tensor_dims_sorted(idim), lb_ratio_prv)) THEN pdim = tensor_dims_sorted(idim) - DO WHILE(.NOT. accept_pdims_loadbalancing(pdims_rem, pdim, tensor_dims_sorted(idim), lb_ratio_prv)) + DO WHILE (.NOT. accept_pdims_loadbalancing(pdims_rem, pdim, tensor_dims_sorted(idim), lb_ratio_prv)) pdim = pdim - 1 ENDDO dims(idim) = pdim @@ -498,7 +498,7 @@ RECURSIVE SUBROUTINE dbcsr_t_mp_dims_create(nodes, dims, tensor_dims, lb_ratio) IF (idim .NE. SIZE(tensor_dims_sorted)) THEN dims(idim + 1:) = 0 CALL mp_dims_create(pdims_rem, dims(idim + 1:)) - ELSEIF(lb_ratio_prv < 1.0_real_8) THEN + ELSEIF (lb_ratio_prv < 1.0_real_8) THEN ! resort to a less strict load imbalance factor dims(:) = dims_store CALL dbcsr_t_mp_dims_create(nodes, dims, tensor_dims, 1.1_real_8) @@ -885,8 +885,8 @@ SUBROUTINE dbcsr_t_create_template(tensor_in, tensor, name, dist, map1_2d, map2_ CALL allocate_any(map1_2d_prv, source=map1_2d) CALL allocate_any(map2_2d_prv, source=map2_2d) ELSE - ALLOCATE(map1_2d_prv(ndims_matrix_row(tensor_in))) - ALLOCATE(map2_2d_prv(ndims_matrix_column(tensor_in))) + ALLOCATE (map1_2d_prv(ndims_matrix_row(tensor_in))) + ALLOCATE (map2_2d_prv(ndims_matrix_column(tensor_in))) CALL dbcsr_t_get_mapping_info(tensor_in%nd_index_blk, map1_2d=map1_2d_prv, map2_2d=map2_2d_prv) ENDIF IF (PRESENT(name)) THEN @@ -1192,7 +1192,7 @@ SUBROUTINE dbcsr_t_pgrid_create_expert(mp_comm, dims, pgrid, map1_2d, map2_2d, t CALL mp_environ(nproc, iproc, mp_comm) IF (ANY(dims == 0)) THEN - IF(.NOT. PRESENT(tensor_dims)) THEN + IF (.NOT. PRESENT(tensor_dims)) THEN CALL mp_dims_create(nproc, dims) ELSE CALL dbcsr_t_mp_dims_create(nproc, dims, tensor_dims) @@ -1276,7 +1276,7 @@ SUBROUTINE dbcsr_t_pgrid_change_dims(pgrid, pdims) CALL dbcsr_t_get_mapping_info(pgrid%nd_index_grid, map1_2d=map1_2d, map2_2d=map2_2d) CALL create_nd_to_2d_mapping(nd_index_grid, pdims, map1_2d, map2_2d, base=0, col_major=.FALSE.) CALL dbcsr_t_get_mapping_info(nd_index_grid, dims_2d=pdims_2d) - IF(MOD(pdims_2d(dimsplit),nsplit) == 0) THEN + IF (MOD(pdims_2d(dimsplit), nsplit) == 0) THEN CALL dbcsr_t_pgrid_create_expert(pgrid%mp_comm_2d, pdims, pgrid_tmp, map1_2d=map1_2d, map2_2d=map2_2d, & nsplit=nsplit, dimsplit=dimsplit) ELSE @@ -1366,13 +1366,13 @@ SUBROUTINE dbcsr_t_get_info(tensor, nblks_total, & INTEGER, INTENT(OUT), OPTIONAL, DIMENSION(ndims_tensor(tensor)) :: pdims !! process grid dimensions #:for idim in range(1, maxdim+1) - INTEGER, DIMENSION(dbcsr_t_nblks_local(tensor,${idim}$)), INTENT(OUT), OPTIONAL :: blks_local_${idim}$ + INTEGER, DIMENSION(dbcsr_t_nblks_local(tensor, ${idim}$)), INTENT(OUT), OPTIONAL :: blks_local_${idim}$ !! local blocks along dimension ${idim}$ - INTEGER, DIMENSION(dbcsr_t_nblks_total(tensor,${idim}$)), INTENT(OUT), OPTIONAL :: proc_dist_${idim}$ + INTEGER, DIMENSION(dbcsr_t_nblks_total(tensor, ${idim}$)), INTENT(OUT), OPTIONAL :: proc_dist_${idim}$ !! distribution along dimension ${idim}$ - INTEGER, DIMENSION(dbcsr_t_nblks_total(tensor,${idim}$)), INTENT(OUT), OPTIONAL :: blk_size_${idim}$ + INTEGER, DIMENSION(dbcsr_t_nblks_total(tensor, ${idim}$)), INTENT(OUT), OPTIONAL :: blk_size_${idim}$ !! block sizes along dimension ${idim}$ - INTEGER, DIMENSION(dbcsr_t_nblks_total(tensor,${idim}$)), INTENT(OUT), OPTIONAL :: blk_offset_${idim}$ + INTEGER, DIMENSION(dbcsr_t_nblks_total(tensor, ${idim}$)), INTENT(OUT), OPTIONAL :: blk_offset_${idim}$ !! block offsets along dimension ${idim}$ #:endfor TYPE(dbcsr_t_distribution_type), INTENT(OUT), OPTIONAL :: distribution From 18fcdb1859b51918a14fae8a06d673799bbb61b9 Mon Sep 17 00:00:00 2001 From: Alfio Lazzaro Date: Wed, 8 Apr 2020 23:28:35 +0200 Subject: [PATCH 02/19] Check MPI sub-communicators when DBCSR finalize --- src/core/dbcsr_lib.F | 17 +++++++++++++++-- src/mpi/dbcsr_mpiwrap.F | 15 +++++++++++++-- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/src/core/dbcsr_lib.F b/src/core/dbcsr_lib.F index 9f9160227ad..3c3000f4ba3 100644 --- a/src/core/dbcsr_lib.F +++ b/src/core/dbcsr_lib.F @@ -27,7 +27,8 @@ MODULE dbcsr_lib has_mp_perf_env, & mp_environ, mp_cart_rank, & rm_mp_perf_env, & - mp_comm_free + mp_comm_free, & + mp_get_comm_count USE dbcsr_mm, ONLY: dbcsr_multiply_clear_mempools, & dbcsr_multiply_lib_finalize, & dbcsr_multiply_lib_init, & @@ -71,6 +72,7 @@ MODULE dbcsr_lib PUBLIC :: dbcsr_print_statistics LOGICAL, PRIVATE, SAVE :: is_initialized = .FALSE. + LOGICAL, PRIVATE, SAVE :: check_comm_count = .FALSE. TYPE(dbcsr_logger_type), POINTER :: logger => Null() TYPE(dbcsr_mp_obj), SAVE :: mp_env @@ -134,10 +136,10 @@ SUBROUTINE dbcsr_init_lib_def(mp_comm, io_unit, accdrv_active_device_id) END SUBROUTINE dbcsr_init_lib_def SUBROUTINE dbcsr_init_lib_hooks(mp_comm, & - !! Initialize the DBCSR library using external loggers and timer callbacks in_timeset_hook, in_timestop_hook, & in_abort_hook, in_warn_hook, io_unit, & accdrv_active_device_id) + !! Initialize the DBCSR library using external loggers and timer callbacks INTEGER, INTENT(IN) :: mp_comm PROCEDURE(timeset_interface), INTENT(IN), POINTER :: in_timeset_hook PROCEDURE(timestop_interface), INTENT(IN), POINTER :: in_timestop_hook @@ -186,6 +188,12 @@ SUBROUTINE dbcsr_init_lib_pre(mp_comm, io_unit, accdrv_active_device_id) ext_io_unit = 0 IF (mynode .EQ. 0) ext_io_unit = default_output_unit ENDIF + + ! if MPI was not initialized in DBCSR, then need to check for the number of communicators + ! when we finalize DBCSR + IF (mp_get_comm_count() .EQ. 0) THEN + check_comm_count = .TRUE. + ENDIF CALL dbcsr_mp_make_env(mp_env, default_group, mp_comm) #if defined(__LIBXSMM) @@ -309,6 +317,11 @@ SUBROUTINE dbcsr_finalize_lib() ! Reset Acc ID CALL reset_accdrv_active_device_id() + ! Check the number of communicators + IF (check_comm_count .AND. mp_get_comm_count() .NE. 0) THEN + DBCSR_ABORT("Number of DBCSR sub-communicators is not zero!") + ENDIF + END SUBROUTINE dbcsr_finalize_lib SUBROUTINE dbcsr_print_statistics(print_timers, callgraph_filename) diff --git a/src/mpi/dbcsr_mpiwrap.F b/src/mpi/dbcsr_mpiwrap.F index 932045e37ef..42134408282 100644 --- a/src/mpi/dbcsr_mpiwrap.F +++ b/src/mpi/dbcsr_mpiwrap.F @@ -118,11 +118,12 @@ MODULE dbcsr_mpiwrap #if defined(__parallel) ! internal reference counter used to debug communicator leaks - INTEGER, PRIVATE, SAVE :: debug_comm_count + INTEGER, PRIVATE, SAVE :: debug_comm_count = 0 #endif ! init and error PUBLIC :: mp_world_init, mp_world_finalize + PUBLIC :: mp_get_comm_count PUBLIC :: mp_abort ! performance gathering @@ -724,6 +725,16 @@ SUBROUTINE mp_world_init(mp_comm) CALL add_mp_perf_env() END SUBROUTINE mp_world_init + FUNCTION mp_get_comm_count() + !! Return the current number of communicators + INTEGER :: mp_get_comm_count + + mp_get_comm_count = 0 +#if defined(__parallel) + mp_get_comm_count = debug_comm_count +#endif + END FUNCTION mp_get_comm_count + SUBROUTINE mp_reordering(mp_comm, mp_new_comm, ranks_order) !! re-create the system default communicator with a different MPI !! rank order @@ -747,7 +758,7 @@ SUBROUTINE mp_reordering(mp_comm, mp_new_comm, ranks_order) ierr = 0 #if defined(__parallel) - CALL mpi_comm_group(mp_comm, oldgroup, ierr); + CALL mpi_comm_group(mp_comm, oldgroup, ierr); IF (ierr /= 0) CALL mp_stop(ierr, "mpi_comm_group @ mp_reordering") CALL mpi_group_incl(oldgroup, SIZE(ranks_order), ranks_order, newgroup, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_group_incl @ mp_reordering") From 21f91d84e3cda3eaf838a3b510077c9e01c8aeb8 Mon Sep 17 00:00:00 2001 From: Alfio Lazzaro Date: Thu, 9 Apr 2020 10:34:39 +0200 Subject: [PATCH 03/19] Apply whitespace pretty --- .cp2k/Makefile | 2 +- src/mpi/dbcsr_mpiwrap.F | 8 ++--- tests/dbcsr_performance_driver.F | 4 +-- tests/dbcsr_performance_multiply.F | 58 +++++++++++++++--------------- tests/dbcsr_tensor_unittest.F | 32 ++++++++--------- 5 files changed, 52 insertions(+), 52 deletions(-) diff --git a/.cp2k/Makefile b/.cp2k/Makefile index c66ba0b9a1d..ec8272d7593 100644 --- a/.cp2k/Makefile +++ b/.cp2k/Makefile @@ -258,7 +258,7 @@ prettyclean: define pretty_func @mkdir -p $(PRETTYOBJDIR) @touch $2 - $(TOOLSDIR)/fprettify/fprettify.py --disable-whitespace $1 + $(TOOLSDIR)/fprettify/fprettify.py $1 endef $(PRETTYOBJDIR)/%.pretty: %.F diff --git a/src/mpi/dbcsr_mpiwrap.F b/src/mpi/dbcsr_mpiwrap.F index 42134408282..723c946c5a1 100644 --- a/src/mpi/dbcsr_mpiwrap.F +++ b/src/mpi/dbcsr_mpiwrap.F @@ -727,11 +727,11 @@ END SUBROUTINE mp_world_init FUNCTION mp_get_comm_count() !! Return the current number of communicators - INTEGER :: mp_get_comm_count + INTEGER :: mp_get_comm_count - mp_get_comm_count = 0 + mp_get_comm_count = 0 #if defined(__parallel) - mp_get_comm_count = debug_comm_count + mp_get_comm_count = debug_comm_count #endif END FUNCTION mp_get_comm_count @@ -758,7 +758,7 @@ SUBROUTINE mp_reordering(mp_comm, mp_new_comm, ranks_order) ierr = 0 #if defined(__parallel) - CALL mpi_comm_group(mp_comm, oldgroup, ierr); + CALL mpi_comm_group(mp_comm, oldgroup, ierr); IF (ierr /= 0) CALL mp_stop(ierr, "mpi_comm_group @ mp_reordering") CALL mpi_group_incl(oldgroup, SIZE(ranks_order), ranks_order, newgroup, ierr) IF (ierr /= 0) CALL mp_stop(ierr, "mpi_group_incl @ mp_reordering") diff --git a/tests/dbcsr_performance_driver.F b/tests/dbcsr_performance_driver.F index e78e0fc0c02..0fd1b56e665 100644 --- a/tests/dbcsr_performance_driver.F +++ b/tests/dbcsr_performance_driver.F @@ -13,7 +13,7 @@ PROGRAM dbcsr_performance_driver USE dbcsr_files, ONLY: open_file USE dbcsr_kinds, ONLY: default_string_length USE dbcsr_lib, ONLY: dbcsr_finalize_lib, & - dbcsr_init_lib,& + dbcsr_init_lib, & dbcsr_print_statistics USE dbcsr_machine, ONLY: default_output_unit, & m_getarg, & @@ -89,7 +89,7 @@ PROGRAM dbcsr_performance_driver ! ! initialize libdbcsr - CALL dbcsr_init_lib(mp_comm,io_unit) + CALL dbcsr_init_lib(mp_comm, io_unit) ! initialize libdbcsr errors CALL timeset(routineN, handle) diff --git a/tests/dbcsr_performance_multiply.F b/tests/dbcsr_performance_multiply.F index b78dbde80bc..f6f663ba9c9 100644 --- a/tests/dbcsr_performance_multiply.F +++ b/tests/dbcsr_performance_multiply.F @@ -87,7 +87,7 @@ SUBROUTINE dbcsr_perf_multiply(group, mp_env, npdims, io_unit, narg, args_shift, ! ! parsing - IF (narg .LT. args_shift+31) THEN + IF (narg .LT. args_shift + 31) THEN WRITE (io_unit, *) "Input file format:" WRITE (io_unit, *) " npcols for MPI grid \\" WRITE (io_unit, *) " use MPI-RMA algorithm \\" @@ -106,40 +106,40 @@ SUBROUTINE dbcsr_perf_multiply(group, mp_env, npdims, io_unit, narg, args_shift, DBCSR_ABORT("narg not correct") ENDIF - matrix_sizes(1) = atoi(args(args_shift+1)) - matrix_sizes(2) = atoi(args(args_shift+2)) - matrix_sizes(3) = atoi(args(args_shift+3)) - sparsities(1) = ator(args(args_shift+4)) - sparsities(2) = ator(args(args_shift+5)) - sparsities(3) = ator(args(args_shift+6)) - trans(1) = args(args_shift+7) - trans(2) = args(args_shift+8) - symmetries(1) = args(args_shift+9) - symmetries(2) = args(args_shift+10) - symmetries(3) = args(args_shift+11) - TYPE = atoi(args(args_shift+12)) - alpha(1) = ator(args(args_shift+13)) - alpha(2) = ator(args(args_shift+14)) - beta(1) = ator(args(args_shift+15)) - beta(2) = ator(args(args_shift+16)) - limits(1) = atoi(args(args_shift+17)) - limits(2) = atoi(args(args_shift+18)) - limits(3) = atoi(args(args_shift+19)) - limits(4) = atoi(args(args_shift+20)) - limits(5) = atoi(args(args_shift+21)) - limits(6) = atoi(args(args_shift+22)) - retain_sparsity = atol(args(args_shift+23)) - nrep = atoi(args(args_shift+24)) - mblk_to_read = atoi(args(args_shift+25)) - nblk_to_read = atoi(args(args_shift+26)) - kblk_to_read = atoi(args(args_shift+27)) + matrix_sizes(1) = atoi(args(args_shift + 1)) + matrix_sizes(2) = atoi(args(args_shift + 2)) + matrix_sizes(3) = atoi(args(args_shift + 3)) + sparsities(1) = ator(args(args_shift + 4)) + sparsities(2) = ator(args(args_shift + 5)) + sparsities(3) = ator(args(args_shift + 6)) + trans(1) = args(args_shift + 7) + trans(2) = args(args_shift + 8) + symmetries(1) = args(args_shift + 9) + symmetries(2) = args(args_shift + 10) + symmetries(3) = args(args_shift + 11) + TYPE = atoi(args(args_shift + 12)) + alpha(1) = ator(args(args_shift + 13)) + alpha(2) = ator(args(args_shift + 14)) + beta(1) = ator(args(args_shift + 15)) + beta(2) = ator(args(args_shift + 16)) + limits(1) = atoi(args(args_shift + 17)) + limits(2) = atoi(args(args_shift + 18)) + limits(3) = atoi(args(args_shift + 19)) + limits(4) = atoi(args(args_shift + 20)) + limits(5) = atoi(args(args_shift + 21)) + limits(6) = atoi(args(args_shift + 22)) + retain_sparsity = atol(args(args_shift + 23)) + nrep = atoi(args(args_shift + 24)) + mblk_to_read = atoi(args(args_shift + 25)) + nblk_to_read = atoi(args(args_shift + 26)) + kblk_to_read = atoi(args(args_shift + 27)) IF (narg < 34 + 2*(mblk_to_read + nblk_to_read + kblk_to_read)) & DBCSR_ABORT("narg not correct") ALLOCATE (bs_m(2*mblk_to_read), bs_n(2*nblk_to_read), bs_k(2*kblk_to_read)) - i = args_shift+27 + i = args_shift + 27 DO iblk = 1, mblk_to_read i = i + 1 bs_m(2*(iblk - 1) + 1) = atoi(args(i)) diff --git a/tests/dbcsr_tensor_unittest.F b/tests/dbcsr_tensor_unittest.F index d61481f48b5..229cf795a19 100644 --- a/tests/dbcsr_tensor_unittest.F +++ b/tests/dbcsr_tensor_unittest.F @@ -36,7 +36,7 @@ PROGRAM dbcsr_tensor_unittest dbcsr_t_pgrid_destroy, & ndims_tensor USE dbcsr_data_methods, ONLY: dbcsr_scalar - USE dbcsr_kinds, ONLY: real_8 + USE dbcsr_kinds, ONLY: real_8 #include "base/dbcsr_base_uses.f90" IMPLICIT NONE @@ -410,10 +410,10 @@ PROGRAM dbcsr_tensor_unittest CALL dbcsr_t_setup_test_tensor(tensor_B, mp_comm, .FALSE., blk_ind_3_2, blk_ind_4_2) CALL dbcsr_t_setup_test_tensor(tensor_C, mp_comm, .FALSE., blk_ind_1_3, blk_ind_2_3, blk_ind_4_3) - ALLOCATE(bounds_t(ndims_tensor(tensor_B))) + ALLOCATE (bounds_t(ndims_tensor(tensor_B))) CALL dbcsr_t_get_info(tensor_B, nfull_total=bounds_t) - ALLOCATE(bounds(2,1)) + ALLOCATE (bounds(2, 1)) bounds(1, 1) = 1 bounds(2, 1) = bounds_t(1) - 21 @@ -462,15 +462,15 @@ PROGRAM dbcsr_tensor_unittest CALL dbcsr_t_setup_test_tensor(tensor_B, mp_comm, .FALSE., blk_ind_3_2, blk_ind_4_2) CALL dbcsr_t_setup_test_tensor(tensor_C, mp_comm, .FALSE., blk_ind_1_3, blk_ind_2_3, blk_ind_4_3) - ALLOCATE(bounds_t(ndims_tensor(tensor_C))) + ALLOCATE (bounds_t(ndims_tensor(tensor_C))) CALL dbcsr_t_get_info(tensor_C, nfull_total=bounds_t) - ALLOCATE(bounds(2,2)) + ALLOCATE (bounds(2, 2)) bounds(1, 1) = 4 bounds(2, 1) = bounds_t(1) bounds(1, 2) = 13 bounds(2, 2) = bounds_t(2) - 10 - DEALLOCATE(bounds_t) + DEALLOCATE (bounds_t) CALL dbcsr_t_contract_test(dbcsr_scalar(0.2_real_8), tensor_C, tensor_B, dbcsr_scalar(0.8_real_8), tensor_A, & [3], [1, 2], & @@ -517,30 +517,30 @@ PROGRAM dbcsr_tensor_unittest CALL dbcsr_t_setup_test_tensor(tensor_B, mp_comm, .FALSE., blk_ind_1_4, blk_ind_2_4, blk_ind_4_4, blk_ind_5_4) CALL dbcsr_t_setup_test_tensor(tensor_C, mp_comm, .FALSE., blk_ind_3_5, blk_ind_4_5, blk_ind_5_5) - ALLOCATE(bounds_t(ndims_tensor(tensor_A))) + ALLOCATE (bounds_t(ndims_tensor(tensor_A))) CALL dbcsr_t_get_info(tensor_A, nfull_total=bounds_t) - ALLOCATE(bounds_1(2, 2)) + ALLOCATE (bounds_1(2, 2)) bounds_1(1, 1) = 7 bounds_1(2, 1) = bounds_t(2) - 17 bounds_1(1, 2) = 8 bounds_1(2, 2) = bounds_t(1) - DEALLOCATE(bounds_t) + DEALLOCATE (bounds_t) - ALLOCATE(bounds_t(ndims_tensor(tensor_B))) + ALLOCATE (bounds_t(ndims_tensor(tensor_B))) CALL dbcsr_t_get_info(tensor_B, nfull_total=bounds_t) - ALLOCATE(bounds_2(2, 2)) + ALLOCATE (bounds_2(2, 2)) bounds_2(1, 1) = 1 bounds_2(2, 1) = bounds_t(3) bounds_2(1, 2) = 1 bounds_2(2, 2) = bounds_t(4) - 18 - DEALLOCATE(bounds_t) + DEALLOCATE (bounds_t) CALL dbcsr_t_contract_test(dbcsr_scalar(0.2_real_8), tensor_A, tensor_B, dbcsr_scalar(0.8_real_8), tensor_C, & [2, 1], [3], & [2, 1], [3, 4], & [1], [2, 3], & default_output_unit, & - bounds_1=bounds_1,& + bounds_1=bounds_1, & bounds_3=bounds_2, & log_verbose=verbose, & write_int=.TRUE.) @@ -783,9 +783,9 @@ PROGRAM dbcsr_tensor_unittest DEALLOCATE (blk_ind_1_3, blk_ind_2_3, blk_ind_4_3) DEALLOCATE (blk_ind_1_4, blk_ind_2_4, blk_ind_4_4, blk_ind_5_4) DEALLOCATE (blk_ind_3_5, blk_ind_4_5, blk_ind_5_5) - DEALLOCATE(size_1, size_2, size_3, size_4, size_5, dist1_1, dist1_2, dist1_3, & - dist2_1, dist2_2, dist3_1, dist3_2, dist3_3, dist4_1, dist4_2, & - dist4_3, dist4_4, dist5_1, dist5_2, dist5_3) + DEALLOCATE (size_1, size_2, size_3, size_4, size_5, dist1_1, dist1_2, dist1_3, & + dist2_1, dist2_2, dist3_1, dist3_2, dist3_3, dist4_1, dist4_2, & + dist4_3, dist4_4, dist5_1, dist5_2, dist5_3) CALL dbcsr_t_pgrid_destroy(pgrid_3d) CALL dbcsr_t_pgrid_destroy(pgrid_2d) CALL dbcsr_t_pgrid_destroy(pgrid_4d) From 0a0502faa4b1d6c369a7ac2b5f51b32021b7c650 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tiziano=20M=C3=BCller?= Date: Thu, 9 Apr 2020 11:05:36 +0200 Subject: [PATCH 04/19] docker-build-env: bump libxsmm to 1.15, cmake to 3.17 --- tools/docker/Dockerfile.build-env-ubuntu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/docker/Dockerfile.build-env-ubuntu b/tools/docker/Dockerfile.build-env-ubuntu index 3af7daa1ba0..3df1f5a5895 100644 --- a/tools/docker/Dockerfile.build-env-ubuntu +++ b/tools/docker/Dockerfile.build-env-ubuntu @@ -31,8 +31,8 @@ ENV LANG en_US.utf8 # Many of our tools rely on a `python` executable but are python-3 compatible RUN ln -s python3 /usr/bin/python -ARG libxsmm_version=1.14 -ARG cmake_version=3.16.20200203-g7c93c0e +ARG libxsmm_version=1.15 +ARG cmake_version=3.17.0 ARG ninja_version=1.10.0 RUN set -ex && \ @@ -42,7 +42,7 @@ RUN set -ex && \ git-archive-all RUN set -ex && \ - curl -LsS https://cmake.org/files/dev/cmake-${cmake_version}-Linux-x86_64.tar.gz | tar --strip-components=1 -xz -C /usr/local + curl -LsS https://github.com/Kitware/CMake/releases/download/v${cmake_version}/cmake-${cmake_version}-Linux-x86_64.tar.gz | tar --strip-components=1 -xz -C /usr/local RUN set -ex && \ curl -LsS https://github.com/ninja-build/ninja/releases/download/v${ninja_version}/ninja-linux.zip | funzip > /usr/local/bin/ninja \ From c981c53344a4f74fbd1a7e680c7ca43c0e4583ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tiziano=20M=C3=BCller?= Date: Thu, 9 Apr 2020 11:14:26 +0200 Subject: [PATCH 05/19] pre-commit: use fprettify from repo instead of submodule --- .pre-commit-config.yaml | 9 +- cmake/compiler-tests/f2008-contiguous.f90 | 2 +- cmake/compiler-tests/f2008-norm2.f90 | 6 +- .../f95-reshape-order-allocatable.f90 | 12 +- examples/dbcsr_example_1.F | 12 +- examples/dbcsr_example_2.F | 16 +- examples/dbcsr_example_3.F | 19 +- tools/build_libsmm/lib_gen.f90 | 96 +-- tools/build_libsmm/multrec_gen.f90 | 734 +++++++++--------- tools/build_libsmm/mults.f90 | 412 +++++----- tools/build_libsmm/small_gen.f90 | 150 ++-- tools/build_libsmm/tiny_gen.f90 | 180 ++--- 12 files changed, 823 insertions(+), 825 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 46b96a1031d..dc696949f04 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,6 +14,10 @@ repos: - id: flake8 - id: check-ast - id: check-yaml +- repo: https://github.com/pseewald/fprettify + rev: v0.3.6 + hooks: + - id: fprettify - repo: local hooks: - id: check-header @@ -27,8 +31,3 @@ repos: entry: '^\s*!>' language: pygrep types: [text] - - id: prettify - name: Run prettify - entry: ./tools/fprettify/fprettify.py - language: script - files: '^src/.*\.(F|f90)$' diff --git a/cmake/compiler-tests/f2008-contiguous.f90 b/cmake/compiler-tests/f2008-contiguous.f90 index 24609622dc1..935c07c34e8 100644 --- a/cmake/compiler-tests/f2008-contiguous.f90 +++ b/cmake/compiler-tests/f2008-contiguous.f90 @@ -15,7 +15,7 @@ program main integer, contiguous, pointer :: ptr(:) ! allocated data is always contiguous - allocate(targ(10)) + allocate (targ(10)) ptr => targ ! IS_CONTIGUOUS was implemented in gcc-9 and is therefore not tested for yet diff --git a/cmake/compiler-tests/f2008-norm2.f90 b/cmake/compiler-tests/f2008-norm2.f90 index 42b8ab5a70a..76fdc41a4a1 100644 --- a/cmake/compiler-tests/f2008-norm2.f90 +++ b/cmake/compiler-tests/f2008-norm2.f90 @@ -8,7 +8,7 @@ !--------------------------------------------------------------------------------------------------! program main - implicit none - real :: x(2) = [ real :: 3, 4 ] - if (abs(norm2(x) - 5.) > 1.0D-5) stop 1 + implicit none + real :: x(2) = [real :: 3, 4] + if (abs(norm2(x) - 5.) > 1.0D-5) stop 1 end program diff --git a/cmake/compiler-tests/f95-reshape-order-allocatable.f90 b/cmake/compiler-tests/f95-reshape-order-allocatable.f90 index 787ec241a36..0558af57270 100644 --- a/cmake/compiler-tests/f95-reshape-order-allocatable.f90 +++ b/cmake/compiler-tests/f95-reshape-order-allocatable.f90 @@ -7,12 +7,12 @@ ! SPDX-License-Identifier: GPL-2.0+ ! !--------------------------------------------------------------------------------------------------! program test_reshape - integer, dimension(4) :: x = [1,2,3,4] - integer, dimension(:), allocatable :: order + integer, dimension(4) :: x = [1, 2, 3, 4] + integer, dimension(:), allocatable :: order - allocate(order(2)) - order(:) = [2,1] + allocate (order(2)) + order(:) = [2, 1] - ! PGI <= 19.10 does not accept allocatables for the order parameter - print *, reshape(x, shape=[2,2], order=order) + ! PGI <= 19.10 does not accept allocatables for the order parameter + print *, reshape(x, shape=[2, 2], order=order) end program diff --git a/examples/dbcsr_example_1.F b/examples/dbcsr_example_1.F index fd8e5b9f180..fe3bd3f7a44 100644 --- a/examples/dbcsr_example_1.F +++ b/examples/dbcsr_example_1.F @@ -12,10 +12,10 @@ PROGRAM dbcsr_example_1 !! This example shows how to create a dbcsr matrix USE mpi - USE dbcsr_api, ONLY: & - dbcsr_create, dbcsr_distribution_new, dbcsr_distribution_release, dbcsr_distribution_type, & - dbcsr_finalize, dbcsr_finalize_lib, dbcsr_init_lib, dbcsr_print, dbcsr_release, & - dbcsr_type, dbcsr_type_no_symmetry, dbcsr_type_real_8 + USE dbcsr_api, ONLY: & + dbcsr_create, dbcsr_distribution_new, dbcsr_distribution_release, dbcsr_distribution_type, & + dbcsr_finalize, dbcsr_finalize_lib, dbcsr_init_lib, dbcsr_print, dbcsr_release, & + dbcsr_type, dbcsr_type_no_symmetry, dbcsr_type_real_8 IMPLICIT NONE @@ -34,7 +34,7 @@ PROGRAM dbcsr_example_1 ! ! initialize mpi -!$ IF ( .FALSE. ) THEN +!$ IF (.FALSE.) THEN CALL mpi_init(ierr) IF (ierr /= 0) STOP "Error in MPI_Init" !$ ELSE @@ -140,7 +140,7 @@ SUBROUTINE random_dist(dist_array, dist_size, nbins) ALLOCATE (dist_array(dist_size)) DO i = 1, dist_size - dist_array(i) = MODULO(nbins-i, nbins) + dist_array(i) = MODULO(nbins - i, nbins) END DO END SUBROUTINE random_dist diff --git a/examples/dbcsr_example_2.F b/examples/dbcsr_example_2.F index d2d08110752..1945f1e3c4b 100644 --- a/examples/dbcsr_example_2.F +++ b/examples/dbcsr_example_2.F @@ -12,11 +12,11 @@ PROGRAM dbcsr_example_2 !! This example shows how to set a dbcsr matrix USE mpi - USE dbcsr_api, ONLY: & - dbcsr_create, dbcsr_distribution_get, dbcsr_distribution_new, dbcsr_distribution_release, & - dbcsr_distribution_type, dbcsr_finalize, dbcsr_finalize_lib, dbcsr_get_stored_coordinates, & - dbcsr_init_lib, dbcsr_nblkcols_total, dbcsr_nblkrows_total, dbcsr_print, dbcsr_put_block, & - dbcsr_release, dbcsr_type, dbcsr_type_no_symmetry, dbcsr_type_real_8 + USE dbcsr_api, ONLY: & + dbcsr_create, dbcsr_distribution_get, dbcsr_distribution_new, dbcsr_distribution_release, & + dbcsr_distribution_type, dbcsr_finalize, dbcsr_finalize_lib, dbcsr_get_stored_coordinates, & + dbcsr_init_lib, dbcsr_nblkcols_total, dbcsr_nblkrows_total, dbcsr_print, dbcsr_put_block, & + dbcsr_release, dbcsr_type, dbcsr_type_no_symmetry, dbcsr_type_real_8 IMPLICIT NONE @@ -37,7 +37,7 @@ PROGRAM dbcsr_example_2 ! ! initialize mpi -!$ IF ( .FALSE. ) THEN +!$ IF (.FALSE.) THEN CALL mpi_init(ierr) IF (ierr /= 0) STOP "Error in MPI_Init" !$ ELSE @@ -118,7 +118,7 @@ PROGRAM dbcsr_example_2 ! ! loop over the blocks, build a tridiagonal matrix DO row = 1, dbcsr_nblkrows_total(matrix_a) - DO col = MAX(row-1, 1), MIN(row+1, dbcsr_nblkcols_total(matrix_a)) + DO col = MAX(row - 1, 1), MIN(row + 1, dbcsr_nblkcols_total(matrix_a)) ! ! get the node id that holds this (row, col) block row_s = row; col_s = col @@ -179,7 +179,7 @@ SUBROUTINE random_dist(dist_array, dist_size, nbins) ALLOCATE (dist_array(dist_size)) DO i = 1, dist_size - dist_array(i) = MODULO(nbins-i, nbins) + dist_array(i) = MODULO(nbins - i, nbins) END DO END SUBROUTINE random_dist diff --git a/examples/dbcsr_example_3.F b/examples/dbcsr_example_3.F index 79086782230..62f886397a9 100644 --- a/examples/dbcsr_example_3.F +++ b/examples/dbcsr_example_3.F @@ -12,11 +12,11 @@ PROGRAM dbcsr_example_3 !! This example shows how to multiply two dbcsr matrices USE mpi - USE dbcsr_api, ONLY: & - dbcsr_create, dbcsr_distribution_get, dbcsr_distribution_new, dbcsr_distribution_release, & - dbcsr_distribution_type, dbcsr_finalize, dbcsr_finalize_lib, dbcsr_get_stored_coordinates, & - dbcsr_init_lib, dbcsr_multiply, dbcsr_nblkcols_total, dbcsr_nblkrows_total, dbcsr_print, & - dbcsr_put_block, dbcsr_release, dbcsr_type, dbcsr_type_no_symmetry, dbcsr_type_real_8 + USE dbcsr_api, ONLY: & + dbcsr_create, dbcsr_distribution_get, dbcsr_distribution_new, dbcsr_distribution_release, & + dbcsr_distribution_type, dbcsr_finalize, dbcsr_finalize_lib, dbcsr_get_stored_coordinates, & + dbcsr_init_lib, dbcsr_multiply, dbcsr_nblkcols_total, dbcsr_nblkrows_total, dbcsr_print, & + dbcsr_put_block, dbcsr_release, dbcsr_type, dbcsr_type_no_symmetry, dbcsr_type_real_8 IMPLICIT NONE @@ -38,7 +38,7 @@ PROGRAM dbcsr_example_3 ! ! initialize mpi -!$ IF ( .FALSE. ) THEN +!$ IF (.FALSE.) THEN CALL mpi_init(ierr) IF (ierr /= 0) STOP "Error in MPI_Init" !$ ELSE @@ -127,7 +127,7 @@ PROGRAM dbcsr_example_3 CALL dbcsr_distribution_get(dist, mynode=mynode) ALLOCATE (values(max_nze)) DO row = 1, dbcsr_nblkrows_total(matrix_a) - DO col = MAX(row-1, 1), MIN(row+1, dbcsr_nblkcols_total(matrix_a)) + DO col = MAX(row - 1, 1), MIN(row + 1, dbcsr_nblkcols_total(matrix_a)) row_s = row; col_s = col CALL dbcsr_get_stored_coordinates(matrix_a, row_s, col_s, node_holds_blk) IF (node_holds_blk .EQ. mynode) THEN @@ -144,7 +144,7 @@ PROGRAM dbcsr_example_3 CALL dbcsr_distribution_get(dist, mynode=mynode) ALLOCATE (values(max_nze)) DO row = 1, dbcsr_nblkrows_total(matrix_b) - DO col = MAX(row-1, 1), MIN(row+1, dbcsr_nblkcols_total(matrix_b)) + DO col = MAX(row - 1, 1), MIN(row + 1, dbcsr_nblkcols_total(matrix_b)) row_s = row; col_s = col CALL dbcsr_get_stored_coordinates(matrix_b, row_s, col_s, node_holds_blk) IF (node_holds_blk .EQ. mynode) THEN @@ -181,7 +181,6 @@ PROGRAM dbcsr_example_3 CALL dbcsr_distribution_release(dist) DEALLOCATE (row_blk_sizes, col_blk_sizes) - ! free comm CALL mpi_comm_free(group, ierr) IF (ierr /= 0) STOP "Error in MPI_Comm_free" @@ -205,7 +204,7 @@ SUBROUTINE random_dist(dist_array, dist_size, nbins) ALLOCATE (dist_array(dist_size)) DO i = 1, dist_size - dist_array(i) = MODULO(nbins-i, nbins) + dist_array(i) = MODULO(nbins - i, nbins) END DO END SUBROUTINE random_dist diff --git a/tools/build_libsmm/lib_gen.f90 b/tools/build_libsmm/lib_gen.f90 index b2936be6c5b..d361dda148d 100644 --- a/tools/build_libsmm/lib_gen.f90 +++ b/tools/build_libsmm/lib_gen.f90 @@ -2,68 +2,68 @@ PROGRAM lib_gen USE mults USE multrec_gen IMPLICIT NONE - - INTEGER :: M,N,K - CHARACTER(LEN=1024) :: arg,filename_small,filename_tiny,line,label + + INTEGER :: M, N, K + CHARACTER(LEN=1024) :: arg, filename_small, filename_tiny, line, label REAL :: tmp - INTEGER, DIMENSION(:,:), ALLOCATABLE :: small_opts + INTEGER, DIMENSION(:, :), ALLOCATABLE :: small_opts REAL, DIMENSION(:), ALLOCATABLE :: small_perf - INTEGER :: opt,iline,nline,transpose_flavor,data_type, SIMD_size + INTEGER :: opt, iline, nline, transpose_flavor, data_type, SIMD_size CHARACTER(LEN=10), PARAMETER :: stack_size_label = "stack_size" - - CALL GET_COMMAND_ARGUMENT(1,arg) - READ(arg,*) M - CALL GET_COMMAND_ARGUMENT(2,arg) - READ(arg,*) N - CALL GET_COMMAND_ARGUMENT(3,arg) - READ(arg,*) K - CALL GET_COMMAND_ARGUMENT(4,arg) - READ(arg,*) transpose_flavor - CALL GET_COMMAND_ARGUMENT(5,arg) - READ(arg,*) data_type - CALL GET_COMMAND_ARGUMENT(6,arg) - READ(arg,*) SIMD_size - CALL GET_COMMAND_ARGUMENT(7,filename_small) - CALL GET_COMMAND_ARGUMENT(8,filename_tiny) + + CALL GET_COMMAND_ARGUMENT(1, arg) + READ (arg, *) M + CALL GET_COMMAND_ARGUMENT(2, arg) + READ (arg, *) N + CALL GET_COMMAND_ARGUMENT(3, arg) + READ (arg, *) K + CALL GET_COMMAND_ARGUMENT(4, arg) + READ (arg, *) transpose_flavor + CALL GET_COMMAND_ARGUMENT(5, arg) + READ (arg, *) data_type + CALL GET_COMMAND_ARGUMENT(6, arg) + READ (arg, *) SIMD_size + CALL GET_COMMAND_ARGUMENT(7, filename_small) + CALL GET_COMMAND_ARGUMENT(8, filename_tiny) ! ! filename is the result of small optimization (cat small_gen_optimal.out ) ! 6 13 6 4 0.756046 6.613 ! - OPEN(UNIT=10,FILE=filename_small) - REWIND(10) - nline=0 + OPEN (UNIT=10, FILE=filename_small) + REWIND (10) + nline = 0 DO - READ(10,*,END=999) line - nline=nline+1 + READ (10, *, END=999) line + nline = nline + 1 ENDDO -999 CONTINUE - ALLOCATE(small_opts(4,nline)) - ALLOCATE(small_perf(nline)) - REWIND(10) - DO iline=1,nline - READ(10,'(A1024)',END=999) line - READ(line,*) small_opts(:,iline),tmp,small_perf(iline) +999 CONTINUE + ALLOCATE (small_opts(4, nline)) + ALLOCATE (small_perf(nline)) + REWIND (10) + DO iline = 1, nline + READ (10, '(A1024)', END=999) line + READ (line, *) small_opts(:, iline), tmp, small_perf(iline) ENDDO - CLOSE(10) + CLOSE (10) - CALL find_small_opt(opt,small_opts,m,n,k) + CALL find_small_opt(opt, small_opts, m, n, k) - label="" - CALL mult_versions(M,N,K,opt,label,transpose_flavor,data_type,SIMD_size,filename_tiny,stack_size_label,.TRUE.) + label = "" + CALL mult_versions(M, N, K, opt, label, transpose_flavor, data_type, SIMD_size, filename_tiny, stack_size_label, .TRUE.) CONTAINS - SUBROUTINE find_small_opt(opt,small_opts,m,n,k) - INTEGER, INTENT(OUT) :: opt - INTEGER, DIMENSION(:,:) :: small_opts - INTEGER :: m,n,k - INTEGER :: i - ! by default we call dgemm (but this should never happen) - opt=3 - DO i=1,SIZE(small_opts,2) - IF (ALL(small_opts(1:3,i)==(/m,n,k/))) opt=small_opts(4,i) - ENDDO - END SUBROUTINE find_small_opt - + SUBROUTINE find_small_opt(opt, small_opts, m, n, k) + INTEGER, INTENT(OUT) :: opt + INTEGER, DIMENSION(:, :) :: small_opts + INTEGER :: m, n, k + INTEGER :: i + ! by default we call dgemm (but this should never happen) + opt = 3 + DO i = 1, SIZE(small_opts, 2) + IF (ALL(small_opts(1:3, i) == (/m, n, k/))) opt = small_opts(4, i) + ENDDO + END SUBROUTINE find_small_opt + END PROGRAM lib_gen diff --git a/tools/build_libsmm/multrec_gen.f90 b/tools/build_libsmm/multrec_gen.f90 index 8883326d2e4..a78405d5a56 100644 --- a/tools/build_libsmm/multrec_gen.f90 +++ b/tools/build_libsmm/multrec_gen.f90 @@ -12,375 +12,375 @@ ! 9) libxsmm ! MODULE multrec_gen - USE mults - IMPLICIT NONE + USE mults + IMPLICIT NONE CONTAINS - SUBROUTINE find_tiny_opts(opts,tiny_opts,m,n,k) - INTEGER, INTENT(OUT) :: opts(4) - INTEGER, DIMENSION(:,:) :: tiny_opts - INTEGER :: m,n,k - INTEGER :: i - opts=(/5,1,1,1/) - DO i=1,SIZE(tiny_opts,2) - IF (ALL(tiny_opts(1:3,i)==(/m,n,k/))) opts=tiny_opts(4:7,i) - ENDDO - END SUBROUTINE find_tiny_opts - - RECURSIVE SUBROUTINE MULTREC(mi,mf,ni,nf,ki,kf,block_size,tiny_opts,transpose_flavor,data_type) - INTEGER :: mi,mf,ni,nf,ki,kf,block_size, tiny_opts(:,:), transpose_flavor,data_type - INTEGER :: M,N,K,opts(4) - INTEGER :: cut,s1 - - M=mf-mi+1 - N=nf-ni+1 - K=kf-ki+1 - - ! small sizes are done directly, otherwise we recurse - IF (M<=block_size .AND. N<=block_size .AND. K<=block_size) THEN - CALL find_tiny_opts(opts,tiny_opts,m,n,k) - CALL smm_inner(mi,mf,ni,nf,ki,kf,opts(1),opts(2),opts(3),opts(4),transpose_flavor,data_type) - ELSE - ! a three case recursion - IF (M>=MAX(N,K)) cut=1 - IF (K>=MAX(N,M)) cut=2 - IF (N>=MAX(M,K)) cut=3 - SELECT CASE(cut) - CASE(1) - s1=((M/2+block_size-1)/block_size)*block_size - CALL MULTREC(mi,mi+s1-1,ni,nf,ki,kf,block_size,tiny_opts,transpose_flavor,data_type) - CALL MULTREC(mi+s1,mf,ni,nf,ki,kf,block_size,tiny_opts,transpose_flavor,data_type) - CASE(2) - s1=((K/2+block_size-1)/block_size)*block_size - CALL MULTREC(mi,mf,ni,nf,ki,ki+s1-1,block_size,tiny_opts,transpose_flavor,data_type) - CALL MULTREC(mi,mf,ni,nf,ki+s1,kf,block_size,tiny_opts,transpose_flavor,data_type) - CASE(3) - s1=((N/2+block_size-1)/block_size)*block_size - CALL MULTREC(mi,mf,ni,ni+s1-1,ki,kf,block_size,tiny_opts,transpose_flavor,data_type) - CALL MULTREC(mi,mf,ni+s1,nf,ki,kf,block_size,tiny_opts,transpose_flavor,data_type) - END SELECT - ENDIF - END SUBROUTINE MULTREC - - FUNCTION trsum(last) - LOGICAL :: last - CHARACTER(LEN=25) :: trsum - IF (last) THEN - trsum="" - ELSE - trsum="+ &" - ENDIF - END FUNCTION trsum - - SUBROUTINE write_subroutine_stack(label,M,N,K,transpose_flavor,data_type,version,stack_size_label,Cbuffer_row,Cbuffer_col) - CHARACTER(LEN=*) :: label - INTEGER :: M,N,K,transpose_flavor,data_type,version - CHARACTER(LEN=*), OPTIONAL :: stack_size_label - INTEGER, OPTIONAL :: Cbuffer_row, Cbuffer_col - - IF (PRESENT(stack_size_label)) THEN - IF (stack_size_label/="") THEN - write(6,'(A)') "#ifdef __INTEL_OFFLOAD" - IF (PRESENT(Cbuffer_row).AND.PRESENT(Cbuffer_col)) THEN - write(6,'(A,I0,A,I0,A,I0,A)') "!dir$ attributes offload:mic :: smm_" & - //trstr(transpose_flavor,data_type)//"_", & - M,"_",N,"_",K,TRIM(label)//"_buffer" - ELSE - write(6,'(A,I0,A,I0,A,I0,A)') "!dir$ attributes offload:mic :: smm_" & - //trstr(transpose_flavor,data_type)//"_", & - M,"_",N,"_",K,TRIM(label) - ENDIF - write(6,'(A)') "#endif" - write(6,'(A,I0,A,I0,A,I0,A)') " SUBROUTINE smm_"//trstr(transpose_flavor,data_type)//"_",& - M,"_",N,"_",K,"_stack"//TRIM(label)//"("//TRIM(trparam(stack_size_label))//")" - CALL write_stack_params(data_type,stack_size_label) - write(6,'(A)') " INTEGER :: sp" - IF (PRESENT(Cbuffer_row).AND.PRESENT(Cbuffer_col)) THEN - write(6,'(A,I0,A,I0,A)') " "//trdat(data_type,.FALSE.)//":: Cbuffer(",Cbuffer_row,",",Cbuffer_col,")" - ENDIF - IF (version.eq.9) THEN - write (6,'(A)') " INTERFACE" - write(6,'(A,I0,A,I0,A,I0,A)') " SUBROUTINE smm_"//trstr(transpose_flavor,data_type)//"_",& - M,"_",N,"_",K,TRIM(label)//"(A,B,C)" - CALL write_matrix_defs(M,N,K,transpose_flavor,data_type,.TRUE.,version.eq.9) - write(6,'(A)') " END SUBROUTINE" - write (6,'(A)') " END INTERFACE" - ENDIF - write(6,'(A)') " DO sp = 1, "//TRIM(stack_size_label) - IF (PRESENT(Cbuffer_row).AND.PRESENT(Cbuffer_col)) THEN - write(6,'(A,I0,A,I0,A,I0,A)') " CALL smm_"//trstr(transpose_flavor,data_type)//"_",& - M,"_",N,"_",K,& + SUBROUTINE find_tiny_opts(opts, tiny_opts, m, n, k) + INTEGER, INTENT(OUT) :: opts(4) + INTEGER, DIMENSION(:, :) :: tiny_opts + INTEGER :: m, n, k + INTEGER :: i + opts = (/5, 1, 1, 1/) + DO i = 1, SIZE(tiny_opts, 2) + IF (ALL(tiny_opts(1:3, i) == (/m, n, k/))) opts = tiny_opts(4:7, i) + ENDDO + END SUBROUTINE find_tiny_opts + + RECURSIVE SUBROUTINE MULTREC(mi, mf, ni, nf, ki, kf, block_size, tiny_opts, transpose_flavor, data_type) + INTEGER :: mi, mf, ni, nf, ki, kf, block_size, tiny_opts(:, :), transpose_flavor, data_type + INTEGER :: M, N, K, opts(4) + INTEGER :: cut, s1 + + M = mf - mi + 1 + N = nf - ni + 1 + K = kf - ki + 1 + + ! small sizes are done directly, otherwise we recurse + IF (M <= block_size .AND. N <= block_size .AND. K <= block_size) THEN + CALL find_tiny_opts(opts, tiny_opts, m, n, k) + CALL smm_inner(mi, mf, ni, nf, ki, kf, opts(1), opts(2), opts(3), opts(4), transpose_flavor, data_type) + ELSE + ! a three case recursion + IF (M >= MAX(N, K)) cut = 1 + IF (K >= MAX(N, M)) cut = 2 + IF (N >= MAX(M, K)) cut = 3 + SELECT CASE (cut) + CASE (1) + s1 = ((M/2 + block_size - 1)/block_size)*block_size + CALL MULTREC(mi, mi + s1 - 1, ni, nf, ki, kf, block_size, tiny_opts, transpose_flavor, data_type) + CALL MULTREC(mi + s1, mf, ni, nf, ki, kf, block_size, tiny_opts, transpose_flavor, data_type) + CASE (2) + s1 = ((K/2 + block_size - 1)/block_size)*block_size + CALL MULTREC(mi, mf, ni, nf, ki, ki + s1 - 1, block_size, tiny_opts, transpose_flavor, data_type) + CALL MULTREC(mi, mf, ni, nf, ki + s1, kf, block_size, tiny_opts, transpose_flavor, data_type) + CASE (3) + s1 = ((N/2 + block_size - 1)/block_size)*block_size + CALL MULTREC(mi, mf, ni, ni + s1 - 1, ki, kf, block_size, tiny_opts, transpose_flavor, data_type) + CALL MULTREC(mi, mf, ni + s1, nf, ki, kf, block_size, tiny_opts, transpose_flavor, data_type) + END SELECT + ENDIF + END SUBROUTINE MULTREC + + FUNCTION trsum(last) + LOGICAL :: last + CHARACTER(LEN=25) :: trsum + IF (last) THEN + trsum = "" + ELSE + trsum = "+ &" + ENDIF + END FUNCTION trsum + + SUBROUTINE write_subroutine_stack(label, M, N, K, transpose_flavor, data_type, version, stack_size_label, Cbuffer_row, Cbuffer_col) + CHARACTER(LEN=*) :: label + INTEGER :: M, N, K, transpose_flavor, data_type, version + CHARACTER(LEN=*), OPTIONAL :: stack_size_label + INTEGER, OPTIONAL :: Cbuffer_row, Cbuffer_col + + IF (PRESENT(stack_size_label)) THEN + IF (stack_size_label /= "") THEN + write (6, '(A)') "#ifdef __INTEL_OFFLOAD" + IF (PRESENT(Cbuffer_row) .AND. PRESENT(Cbuffer_col)) THEN + write (6, '(A,I0,A,I0,A,I0,A)') "!dir$ attributes offload:mic :: smm_" & + //trstr(transpose_flavor, data_type)//"_", & + M, "_", N, "_", K, TRIM(label)//"_buffer" + ELSE + write (6, '(A,I0,A,I0,A,I0,A)') "!dir$ attributes offload:mic :: smm_" & + //trstr(transpose_flavor, data_type)//"_", & + M, "_", N, "_", K, TRIM(label) + ENDIF + write (6, '(A)') "#endif" + write (6, '(A,I0,A,I0,A,I0,A)') " SUBROUTINE smm_"//trstr(transpose_flavor, data_type)//"_", & + M, "_", N, "_", K, "_stack"//TRIM(label)//"("//TRIM(trparam(stack_size_label))//")" + CALL write_stack_params(data_type, stack_size_label) + write (6, '(A)') " INTEGER :: sp" + IF (PRESENT(Cbuffer_row) .AND. PRESENT(Cbuffer_col)) THEN + write (6, '(A,I0,A,I0,A)') " "//trdat(data_type, .FALSE.)//":: Cbuffer(", Cbuffer_row, ",", Cbuffer_col, ")" + ENDIF + IF (version .eq. 9) THEN + write (6, '(A)') " INTERFACE" + write (6, '(A,I0,A,I0,A,I0,A)') " SUBROUTINE smm_"//trstr(transpose_flavor, data_type)//"_", & + M, "_", N, "_", K, TRIM(label)//"(A,B,C)" + CALL write_matrix_defs(M, N, K, transpose_flavor, data_type, .TRUE., version .eq. 9) + write (6, '(A)') " END SUBROUTINE" + write (6, '(A)') " END INTERFACE" + ENDIF + write (6, '(A)') " DO sp = 1, "//TRIM(stack_size_label) + IF (PRESENT(Cbuffer_row) .AND. PRESENT(Cbuffer_col)) THEN + write (6, '(A,I0,A,I0,A,I0,A)') " CALL smm_"//trstr(transpose_flavor, data_type)//"_", & + M, "_", N, "_", K, & TRIM(label)//"_buffer(A(params(p_a_first,sp)),B(params(p_b_first,sp)),C(params(p_c_first,sp)),Cbuffer)" - ELSE - write(6,'(A,I0,A,I0,A,I0,A)') " CALL smm_"//trstr(transpose_flavor,data_type)//"_",& - M,"_",N,"_",K,& + ELSE + write (6, '(A,I0,A,I0,A,I0,A)') " CALL smm_"//trstr(transpose_flavor, data_type)//"_", & + M, "_", N, "_", K, & TRIM(label)//"(A(params(p_a_first,sp)),B(params(p_b_first,sp)),C(params(p_c_first,sp)))" - ENDIF - write(6,'(A)') " ENDDO" - write(6,'(A)') " END SUBROUTINE" - ENDIF - ENDIF - END SUBROUTINE write_subroutine_stack - - SUBROUTINE MULTVECTOR(label,M,N,K,transpose_flavor,data_type,nSIMD,stride,stack_size_label) - INTEGER :: M,N,K,sj,je,ji,sl,le,li - INTEGER :: transpose_flavor,data_type,nSIMD,stride - INTEGER :: multElements,modElements - CHARACTER(LEN=*) :: label - CHARACTER(LEN=*), OPTIONAL :: stack_size_label - LOGICAL :: do_stack - - multElements=(M/nSIMD)*nSIMD - modElements=MOD(M,nSIMD) - - IF (modElements>0) THEN - do_stack=.FALSE. - IF (PRESENT(stack_size_label)) THEN - IF (stack_size_label/="") do_stack=.TRUE. - ENDIF - IF (do_stack) THEN - write(6,'(A,I0,A,I0,A,I0,A)') " PURE SUBROUTINE smm_"//trstr(transpose_flavor,data_type)//"_",& - M,"_",N,"_",K,TRIM(label)//"_buffer(A,B,C,Cbuffer)" - write(6,'(A,I0,A,I0,A)') " "//trdat(data_type,.FALSE.,"INOUT")//" :: Cbuffer(",nSIMD,",",MIN(stride,N),")" - ELSE - write(6,'(A,I0,A,I0,A,I0,A)') " PURE SUBROUTINE smm_"//trstr(transpose_flavor,data_type)//"_",& - M,"_",N,"_",K,TRIM(label)//"(A,B,C)" - write(6,'(A,I0,A,I0,A)') " "//trdat(data_type,.FALSE.)//" :: Cbuffer(",nSIMD,",",MIN(stride,N),")" - ENDIF - ELSE - if (PRESENT(stack_size_label)) THEN - write(6,'(A,I0,A,I0,A,I0,A)') " PURE SUBROUTINE smm_"//trstr(transpose_flavor,data_type)//"_",& - M,"_",N,"_",K,TRIM(label)//"(A,B,C)" - ELSE - RETURN - ENDIF - ENDIF - - CALL write_matrix_defs(M,N,K,transpose_flavor,data_type,.TRUE.,.FALSE.) - - write(6,'(A)') " INTEGER :: i" - - sj=stride ! blocking dimension in N - sl=stride ! blocking dimension in K - - DO je=1,N,sj - - DO le=1,K,sl - - IF (multElements>0) THEN - write(6,'(A,I0,A,I0,A,I0)') " DO i=",1,",",multElements,",",1 - DO ji=je,MIN(je+sj-1,N),1 - write(6,'(A,I0,A,I0,A)') " C(i,",ji,")=C(i,",ji,")+ &" - DO li=le,MIN(le+sl-1,K),1 - write (6,'(A,I0,A,I0,A,I0,A)') " A(i,",& - li,")*B(",li,",",ji,")"//trsum(li==MIN(le+sl-1,K)) - ENDDO - ENDDO - write(6,'(A)') " ENDDO " - ENDIF - - ! consider remaining elements - IF (modElements>0) THEN - write(6,'(A,I0,A,I0,A,I0)') " DO i=",1,",",nSIMD,",",1 - DO ji=je,MIN(je+sj-1,N),1 - IF (le>1) THEN - write(6,'(A,I0,A,I0,A)') " Cbuffer(i,",& - MOD(ji-1,sj)+1,")=Cbuffer(i,",MOD(ji-1,sj)+1,")+ &" - ELSE - write(6,'(A,I0,A,I0,A,I0,A)') " Cbuffer(i,",& - MOD(ji-1,sj)+1,")=C(i+",multElements,",",ji,")+ &" - ENDIF - - DO li=le,MIN(le+sl-1,K),1 - write (6,'(A,I0,A,I0,A,I0,A,I0,A)') " A(i+",& - multElements,",",li,")*B(",li,",",ji,")"//trsum(li==MIN(le+sl-1,K)) - ENDDO - ENDDO - write(6,'(A)') " ENDDO " - - ENDIF - ENDDO - - ! copy the remaining elements - IF (modElements>0) THEN - write(6,'(A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A)') " C(",& - 1+multElements,":",M,",",je,":",MIN(je+sj-1,N),")=Cbuffer(", & - 1,":",modElements,",1:",MIN(je+sj,N+1)-je,")" - - ENDIF - - ENDDO - - write(6,'(A)') " END SUBROUTINE" - - if (PRESENT(stack_size_label)) THEN - - IF (modElements>0.AND.nSIMD>0.AND.stride>0) THEN - CALL write_subroutine_stack(label,M,N,K,transpose_flavor,data_type,8,stack_size_label,nSIMD,MIN(stride,N)) - ELSE - CALL write_subroutine_stack(label,M,N,K,transpose_flavor,data_type,8,stack_size_label) - ENDIF - ENDIF - - END SUBROUTINE MULTVECTOR - - SUBROUTINE mult_versions(M,N,K,version,label,transpose_flavor,data_type,SIMD_size,filename,& - stack_size_label,write_buffer_interface) - INTEGER :: M,N,K,version,transpose_flavor,data_type,SIMD_size - INTEGER, ALLOCATABLE, DIMENSION(:,:) :: tiny_opts - INTEGER :: best_square(4) - REAL, ALLOCATABLE, DIMENSION(:) :: tiny_perf,square_perf - CHARACTER(LEN=1024) :: filename,line - CHARACTER(LEN=*), OPTIONAL :: stack_size_label - LOGICAL, OPTIONAL :: write_buffer_interface - CHARACTER(LEN=*) :: label - INTEGER :: opts(4),blocksize,i,iline,nline,max_dim,isquare - REAL :: tmp - INTEGER :: size_type, nSIMD - INTEGER, PARAMETER :: stride=8 ! used for the unrolling - size_type=0; nSIMD=0 - - ! only in the case of SIMD_size=32(i.e. AVX/AVX2) and SIMD_size=64(i.e. KNC/AVX512) - IF ((SIMD_size==32 .OR. SIMD_size==64) .AND. transpose_flavor==1 .AND. data_type<=2 .AND. & - (LABEL=="" .OR. version==8)) THEN - - SELECT CASE(data_type) - CASE(1) - size_type=8 !double precision bytes - CASE(2) - size_type=4 !single precision bytes - END SELECT - - nSIMD=SIMD_size/size_type - ENDIF - - ! - ! filename is the result of tiny optimization (cat tiny_gen_optimal.out) - ! 1 1 1 5 1 1 1 0.376023 0.532 - ! - OPEN(UNIT=10,FILE=filename) - REWIND(10) - nline=0 - DO - READ(10,*,END=999) line - nline=nline+1 - ENDDO -999 CONTINUE - ALLOCATE(tiny_opts(7,nline)) - ALLOCATE(tiny_perf(nline)) - REWIND(10) - DO iline=1,nline - READ(10,'(A1024)',END=999) line - READ(line,*) tiny_opts(:,iline),tmp,tiny_perf(iline) - ENDDO - CLOSE(10) - - ! find square sizes that give good performance with tiny opts - max_dim=MAXVAL(tiny_opts(1:3,:)) - ALLOCATE(square_perf(max_dim)) - square_perf=-1 - DO iline=1,nline - IF (tiny_opts(1,iline)==tiny_opts(2,iline) .AND. tiny_opts(1,iline)==tiny_opts(3,iline)) THEN - square_perf(tiny_opts(1,iline))=tiny_perf(iline) - ENDIF - ENDDO - best_square=-1 - DO isquare=1,SIZE(best_square) - tmp=-HUGE(tmp) - DO i=1,max_dim - IF (square_perf(i)>tmp .AND. .NOT. ANY(best_square.EQ.i)) THEN - tmp=square_perf(i) - best_square(isquare)=i - ENDIF - ENDDO - ENDDO - IF (ANY(best_square<1)) ERROR STOP "tiny opts file needs sufficiently many square sizes" - - IF ((version.ge.1.and.version.le.7).or.version.eq.9) THEN - IF (version.ne.3.and.version.ne.9) THEN - write(6,'(A,I0,A,I0,A,I0,A)') " PURE SUBROUTINE smm_"//trstr(transpose_flavor,data_type)//"_",& - M,"_",N,"_",K,TRIM(label)//"(A,B,C)" - ELSE - write(6,'(A,I0,A,I0,A,I0,A)') " SUBROUTINE smm_"//trstr(transpose_flavor,data_type)//"_",& - M,"_",N,"_",K,TRIM(label)//"(A,B,C)" - IF (version.eq.9) THEN - write (6,'(A)') " USE, INTRINSIC :: ISO_C_BINDING" - ENDIF - ENDIF - CALL write_matrix_defs(M,N,K,transpose_flavor,data_type,.TRUE.,version.eq.9) - ENDIF - - SELECT CASE(version) - CASE(1) - ! generation of the tiny version - write(6,'(A)') " INTEGER ::i,j,l" - CALL find_tiny_opts(opts,tiny_opts,m,n,k) - CALL smm_inner(1,M,1,N,1,K,opts(1),opts(2),opts(3),opts(4),transpose_flavor,data_type) - CASE(2) - ! generation of the matmul version - SELECT CASE(transpose_flavor) - CASE(1) - write(6,'(A)') " C = C + MATMUL(A,B) ! so easy" - CASE(2) - write(6,'(A)') " C = C + MATMUL(TRANSPOSE(A),B) ! so easy" - CASE(3) - write(6,'(A)') " C = C + MATMUL(A,TRANSPOSE(B)) ! so easy" - CASE(4) - write(6,'(A)') " C = C + MATMUL(TRANSPOSE(A),TRANSPOSE(B)) ! so easy" - END SELECT - CASE(3) - ! generation of the gemm version - WRITE(6,'(A)') " "//trdat(data_type,.FALSE.)//", PARAMETER :: one=1" - write(6,'(A)') "#ifdef __INTEL_OFFLOAD" - write(6,'(A)') "!dir$ attributes offload:mic :: "//trgemm(data_type) - write(6,'(A)') "#endif" - SELECT CASE(transpose_flavor) - CASE(1) - write(6,'(A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A)') & - " CALL "//trgemm(data_type)//"('N','N',",M,",",N,",",K,",one,A,",M,",B,",K,",one,C,",M,")" - CASE(2) - write(6,'(A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A)') & - " CALL "//trgemm(data_type)//"('T','N',",M,",",N,",",K,",one,A,",K,",B,",K,",one,C,",M,")" - CASE(3) - write(6,'(A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A)') & - " CALL "//trgemm(data_type)//"('N','T',",M,",",N,",",K,",one,A,",M,",B,",N,",one,C,",M,")" - CASE(4) - write(6,'(A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A)') & - " CALL "//trgemm(data_type)//"('T','T',",M,",",N,",",K,",one,A,",K,",B,",N,",one,C,",M,")" - END SELECT - CASE(4,5,6,7) - isquare=version-3 - ! generation of the multrec versions - write(6,'(A)') " INTEGER ::i,j,l" - blocksize=best_square(isquare) - CALL MULTREC(1,M,1,N,1,K,blocksize,tiny_opts,transpose_flavor,data_type) - CASE(8) - ! generation of the vector version - IF (nSIMD>0) THEN - CALL MULTVECTOR(label,M,N,K,transpose_flavor,data_type,nSIMD,stride,stack_size_label) - IF (PRESENT(write_buffer_interface)) THEN - IF (write_buffer_interface) THEN - CALL MULTVECTOR(label,M,N,K,transpose_flavor,data_type,nSIMD,stride) - ENDIF - ENDIF - - ENDIF - CASE(9) - write (6,'(A)') " INTERFACE" - write (6,'(A,I0,A,I0,A,I0,A)') " SUBROUTINE libxsmm_",M,"_",N,"_",K,"(A,B,C) BIND(C)" - write (6,'(A)') " USE, INTRINSIC :: ISO_C_BINDING, ONLY : C_PTR" - write (6,'(A)') " TYPE(C_PTR), VALUE :: C, B, A" - write (6,'(A,I0,A,I0,A,I0)') " END SUBROUTINE libxsmm_",M,"_",N,"_",K - write (6,'(A)') " END INTERFACE" - write (6,'(A,I0,A,I0,A,I0,A)') " CALL libxsmm_",M,"_",N,"_",K,"(C_LOC(A),C_LOC(B),C_LOC(C))" - CASE DEFAULT - ERROR STOP "MISSING CASE mult_versions" - END SELECT - - IF ((version.ge.1.and.version.le.7).or.version.eq.9) THEN - write(6,'(A)') " END SUBROUTINE" - CALL write_subroutine_stack(label,M,N,K,transpose_flavor,data_type,version,stack_size_label) - ENDIF - - END SUBROUTINE mult_versions + ENDIF + write (6, '(A)') " ENDDO" + write (6, '(A)') " END SUBROUTINE" + ENDIF + ENDIF + END SUBROUTINE write_subroutine_stack + + SUBROUTINE MULTVECTOR(label, M, N, K, transpose_flavor, data_type, nSIMD, stride, stack_size_label) + INTEGER :: M, N, K, sj, je, ji, sl, le, li + INTEGER :: transpose_flavor, data_type, nSIMD, stride + INTEGER :: multElements, modElements + CHARACTER(LEN=*) :: label + CHARACTER(LEN=*), OPTIONAL :: stack_size_label + LOGICAL :: do_stack + + multElements = (M/nSIMD)*nSIMD + modElements = MOD(M, nSIMD) + + IF (modElements > 0) THEN + do_stack = .FALSE. + IF (PRESENT(stack_size_label)) THEN + IF (stack_size_label /= "") do_stack = .TRUE. + ENDIF + IF (do_stack) THEN + write (6, '(A,I0,A,I0,A,I0,A)') " PURE SUBROUTINE smm_"//trstr(transpose_flavor, data_type)//"_", & + M, "_", N, "_", K, TRIM(label)//"_buffer(A,B,C,Cbuffer)" + write (6, '(A,I0,A,I0,A)') " "//trdat(data_type, .FALSE., "INOUT")//" :: Cbuffer(", nSIMD, ",", MIN(stride, N), ")" + ELSE + write (6, '(A,I0,A,I0,A,I0,A)') " PURE SUBROUTINE smm_"//trstr(transpose_flavor, data_type)//"_", & + M, "_", N, "_", K, TRIM(label)//"(A,B,C)" + write (6, '(A,I0,A,I0,A)') " "//trdat(data_type, .FALSE.)//" :: Cbuffer(", nSIMD, ",", MIN(stride, N), ")" + ENDIF + ELSE + if (PRESENT(stack_size_label)) THEN + write (6, '(A,I0,A,I0,A,I0,A)') " PURE SUBROUTINE smm_"//trstr(transpose_flavor, data_type)//"_", & + M, "_", N, "_", K, TRIM(label)//"(A,B,C)" + ELSE + RETURN + ENDIF + ENDIF + + CALL write_matrix_defs(M, N, K, transpose_flavor, data_type, .TRUE., .FALSE.) + + write (6, '(A)') " INTEGER :: i" + + sj = stride ! blocking dimension in N + sl = stride ! blocking dimension in K + + DO je = 1, N, sj + + DO le = 1, K, sl + + IF (multElements > 0) THEN + write (6, '(A,I0,A,I0,A,I0)') " DO i=", 1, ",", multElements, ",", 1 + DO ji = je, MIN(je + sj - 1, N), 1 + write (6, '(A,I0,A,I0,A)') " C(i,", ji, ")=C(i,", ji, ")+ &" + DO li = le, MIN(le + sl - 1, K), 1 + write (6, '(A,I0,A,I0,A,I0,A)') " A(i,", & + li, ")*B(", li, ",", ji, ")"//trsum(li == MIN(le + sl - 1, K)) + ENDDO + ENDDO + write (6, '(A)') " ENDDO " + ENDIF + + ! consider remaining elements + IF (modElements > 0) THEN + write (6, '(A,I0,A,I0,A,I0)') " DO i=", 1, ",", nSIMD, ",", 1 + DO ji = je, MIN(je + sj - 1, N), 1 + IF (le > 1) THEN + write (6, '(A,I0,A,I0,A)') " Cbuffer(i,", & + MOD(ji - 1, sj) + 1, ")=Cbuffer(i,", MOD(ji - 1, sj) + 1, ")+ &" + ELSE + write (6, '(A,I0,A,I0,A,I0,A)') " Cbuffer(i,", & + MOD(ji - 1, sj) + 1, ")=C(i+", multElements, ",", ji, ")+ &" + ENDIF + + DO li = le, MIN(le + sl - 1, K), 1 + write (6, '(A,I0,A,I0,A,I0,A,I0,A)') " A(i+", & + multElements, ",", li, ")*B(", li, ",", ji, ")"//trsum(li == MIN(le + sl - 1, K)) + ENDDO + ENDDO + write (6, '(A)') " ENDDO " + + ENDIF + ENDDO + + ! copy the remaining elements + IF (modElements > 0) THEN + write (6, '(A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A)') " C(", & + 1 + multElements, ":", M, ",", je, ":", MIN(je + sj - 1, N), ")=Cbuffer(", & + 1, ":", modElements, ",1:", MIN(je + sj, N + 1) - je, ")" + + ENDIF + + ENDDO + + write (6, '(A)') " END SUBROUTINE" + + if (PRESENT(stack_size_label)) THEN + + IF (modElements > 0 .AND. nSIMD > 0 .AND. stride > 0) THEN + CALL write_subroutine_stack(label, M, N, K, transpose_flavor, data_type, 8, stack_size_label, nSIMD, MIN(stride, N)) + ELSE + CALL write_subroutine_stack(label, M, N, K, transpose_flavor, data_type, 8, stack_size_label) + ENDIF + ENDIF + + END SUBROUTINE MULTVECTOR + + SUBROUTINE mult_versions(M, N, K, version, label, transpose_flavor, data_type, SIMD_size, filename, & + stack_size_label, write_buffer_interface) + INTEGER :: M, N, K, version, transpose_flavor, data_type, SIMD_size + INTEGER, ALLOCATABLE, DIMENSION(:, :) :: tiny_opts + INTEGER :: best_square(4) + REAL, ALLOCATABLE, DIMENSION(:) :: tiny_perf, square_perf + CHARACTER(LEN=1024) :: filename, line + CHARACTER(LEN=*), OPTIONAL :: stack_size_label + LOGICAL, OPTIONAL :: write_buffer_interface + CHARACTER(LEN=*) :: label + INTEGER :: opts(4), blocksize, i, iline, nline, max_dim, isquare + REAL :: tmp + INTEGER :: size_type, nSIMD + INTEGER, PARAMETER :: stride = 8 ! used for the unrolling + size_type = 0; nSIMD = 0 + + ! only in the case of SIMD_size=32(i.e. AVX/AVX2) and SIMD_size=64(i.e. KNC/AVX512) + IF ((SIMD_size == 32 .OR. SIMD_size == 64) .AND. transpose_flavor == 1 .AND. data_type <= 2 .AND. & + (LABEL == "" .OR. version == 8)) THEN + + SELECT CASE (data_type) + CASE (1) + size_type = 8 !double precision bytes + CASE (2) + size_type = 4 !single precision bytes + END SELECT + + nSIMD = SIMD_size/size_type + ENDIF + + ! + ! filename is the result of tiny optimization (cat tiny_gen_optimal.out) + ! 1 1 1 5 1 1 1 0.376023 0.532 + ! + OPEN (UNIT=10, FILE=filename) + REWIND (10) + nline = 0 + DO + READ (10, *, END=999) line + nline = nline + 1 + ENDDO +999 CONTINUE + ALLOCATE (tiny_opts(7, nline)) + ALLOCATE (tiny_perf(nline)) + REWIND (10) + DO iline = 1, nline + READ (10, '(A1024)', END=999) line + READ (line, *) tiny_opts(:, iline), tmp, tiny_perf(iline) + ENDDO + CLOSE (10) + + ! find square sizes that give good performance with tiny opts + max_dim = MAXVAL(tiny_opts(1:3, :)) + ALLOCATE (square_perf(max_dim)) + square_perf = -1 + DO iline = 1, nline + IF (tiny_opts(1, iline) == tiny_opts(2, iline) .AND. tiny_opts(1, iline) == tiny_opts(3, iline)) THEN + square_perf(tiny_opts(1, iline)) = tiny_perf(iline) + ENDIF + ENDDO + best_square = -1 + DO isquare = 1, SIZE(best_square) + tmp = -HUGE(tmp) + DO i = 1, max_dim + IF (square_perf(i) > tmp .AND. .NOT. ANY(best_square .EQ. i)) THEN + tmp = square_perf(i) + best_square(isquare) = i + ENDIF + ENDDO + ENDDO + IF (ANY(best_square < 1)) ERROR STOP "tiny opts file needs sufficiently many square sizes" + + IF ((version .ge. 1 .and. version .le. 7) .or. version .eq. 9) THEN + IF (version .ne. 3 .and. version .ne. 9) THEN + write (6, '(A,I0,A,I0,A,I0,A)') " PURE SUBROUTINE smm_"//trstr(transpose_flavor, data_type)//"_", & + M, "_", N, "_", K, TRIM(label)//"(A,B,C)" + ELSE + write (6, '(A,I0,A,I0,A,I0,A)') " SUBROUTINE smm_"//trstr(transpose_flavor, data_type)//"_", & + M, "_", N, "_", K, TRIM(label)//"(A,B,C)" + IF (version .eq. 9) THEN + write (6, '(A)') " USE, INTRINSIC :: ISO_C_BINDING" + ENDIF + ENDIF + CALL write_matrix_defs(M, N, K, transpose_flavor, data_type, .TRUE., version .eq. 9) + ENDIF + + SELECT CASE (version) + CASE (1) + ! generation of the tiny version + write (6, '(A)') " INTEGER ::i,j,l" + CALL find_tiny_opts(opts, tiny_opts, m, n, k) + CALL smm_inner(1, M, 1, N, 1, K, opts(1), opts(2), opts(3), opts(4), transpose_flavor, data_type) + CASE (2) + ! generation of the matmul version + SELECT CASE (transpose_flavor) + CASE (1) + write (6, '(A)') " C = C + MATMUL(A,B) ! so easy" + CASE (2) + write (6, '(A)') " C = C + MATMUL(TRANSPOSE(A),B) ! so easy" + CASE (3) + write (6, '(A)') " C = C + MATMUL(A,TRANSPOSE(B)) ! so easy" + CASE (4) + write (6, '(A)') " C = C + MATMUL(TRANSPOSE(A),TRANSPOSE(B)) ! so easy" + END SELECT + CASE (3) + ! generation of the gemm version + WRITE (6, '(A)') " "//trdat(data_type, .FALSE.)//", PARAMETER :: one=1" + write (6, '(A)') "#ifdef __INTEL_OFFLOAD" + write (6, '(A)') "!dir$ attributes offload:mic :: "//trgemm(data_type) + write (6, '(A)') "#endif" + SELECT CASE (transpose_flavor) + CASE (1) + write (6, '(A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A)') & + " CALL "//trgemm(data_type)//"('N','N',", M, ",", N, ",", K, ",one,A,", M, ",B,", K, ",one,C,", M, ")" + CASE (2) + write (6, '(A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A)') & + " CALL "//trgemm(data_type)//"('T','N',", M, ",", N, ",", K, ",one,A,", K, ",B,", K, ",one,C,", M, ")" + CASE (3) + write (6, '(A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A)') & + " CALL "//trgemm(data_type)//"('N','T',", M, ",", N, ",", K, ",one,A,", M, ",B,", N, ",one,C,", M, ")" + CASE (4) + write (6, '(A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A)') & + " CALL "//trgemm(data_type)//"('T','T',", M, ",", N, ",", K, ",one,A,", K, ",B,", N, ",one,C,", M, ")" + END SELECT + CASE (4, 5, 6, 7) + isquare = version - 3 + ! generation of the multrec versions + write (6, '(A)') " INTEGER ::i,j,l" + blocksize = best_square(isquare) + CALL MULTREC(1, M, 1, N, 1, K, blocksize, tiny_opts, transpose_flavor, data_type) + CASE (8) + ! generation of the vector version + IF (nSIMD > 0) THEN + CALL MULTVECTOR(label, M, N, K, transpose_flavor, data_type, nSIMD, stride, stack_size_label) + IF (PRESENT(write_buffer_interface)) THEN + IF (write_buffer_interface) THEN + CALL MULTVECTOR(label, M, N, K, transpose_flavor, data_type, nSIMD, stride) + ENDIF + ENDIF + + ENDIF + CASE (9) + write (6, '(A)') " INTERFACE" + write (6, '(A,I0,A,I0,A,I0,A)') " SUBROUTINE libxsmm_", M, "_", N, "_", K, "(A,B,C) BIND(C)" + write (6, '(A)') " USE, INTRINSIC :: ISO_C_BINDING, ONLY : C_PTR" + write (6, '(A)') " TYPE(C_PTR), VALUE :: C, B, A" + write (6, '(A,I0,A,I0,A,I0)') " END SUBROUTINE libxsmm_", M, "_", N, "_", K + write (6, '(A)') " END INTERFACE" + write (6, '(A,I0,A,I0,A,I0,A)') " CALL libxsmm_", M, "_", N, "_", K, "(C_LOC(A),C_LOC(B),C_LOC(C))" + CASE DEFAULT + ERROR STOP "MISSING CASE mult_versions" + END SELECT + + IF ((version .ge. 1 .and. version .le. 7) .or. version .eq. 9) THEN + write (6, '(A)') " END SUBROUTINE" + CALL write_subroutine_stack(label, M, N, K, transpose_flavor, data_type, version, stack_size_label) + ENDIF + + END SUBROUTINE mult_versions END MODULE multrec_gen diff --git a/tools/build_libsmm/mults.f90 b/tools/build_libsmm/mults.f90 index d22c8c950a9..04462eb0d72 100644 --- a/tools/build_libsmm/mults.f90 +++ b/tools/build_libsmm/mults.f90 @@ -1,228 +1,228 @@ MODULE mults - IMPLICIT NONE + IMPLICIT NONE CONTAINS - FUNCTION trdat(data_type,write_target,in_intent_label) - INTEGER :: data_type - LOGICAL :: write_target - CHARACTER(LEN=*), OPTIONAL :: in_intent_label - CHARACTER(LEN=50) :: options - CHARACTER(LEN=50) :: trdat + FUNCTION trdat(data_type, write_target, in_intent_label) + INTEGER :: data_type + LOGICAL :: write_target + CHARACTER(LEN=*), OPTIONAL :: in_intent_label + CHARACTER(LEN=50) :: options + CHARACTER(LEN=50) :: trdat - options="" - IF (PRESENT(in_intent_label)) THEN - IF (in_intent_label/="") THEN - options=", INTENT("//TRIM(in_intent_label)//")" - ENDIF - ENDIF - IF (write_target) THEN - options=TRIM(options)//", TARGET" - ENDIF - SELECT CASE(data_type) - CASE(1) - trdat="REAL(KIND=KIND(0.0D0))"//TRIM(options) - CASE(2) - trdat="REAL(KIND=KIND(0.0))"//TRIM(options) - CASE(3) - trdat="COMPLEX(KIND=KIND(0.0D0))"//TRIM(options) - CASE(4) - trdat="COMPLEX(KIND=KIND(0.0))"//TRIM(options) - END SELECT - END FUNCTION + options = "" + IF (PRESENT(in_intent_label)) THEN + IF (in_intent_label /= "") THEN + options = ", INTENT("//TRIM(in_intent_label)//")" + ENDIF + ENDIF + IF (write_target) THEN + options = TRIM(options)//", TARGET" + ENDIF + SELECT CASE (data_type) + CASE (1) + trdat = "REAL(KIND=KIND(0.0D0))"//TRIM(options) + CASE (2) + trdat = "REAL(KIND=KIND(0.0))"//TRIM(options) + CASE (3) + trdat = "COMPLEX(KIND=KIND(0.0D0))"//TRIM(options) + CASE (4) + trdat = "COMPLEX(KIND=KIND(0.0))"//TRIM(options) + END SELECT + END FUNCTION - FUNCTION trgemm(data_type) - INTEGER :: data_type - CHARACTER(LEN=5) :: trgemm - SELECT CASE(data_type) - CASE(1) - trgemm="DGEMM" - CASE(2) - trgemm="SGEMM" - CASE(3) - trgemm="ZGEMM" - CASE(4) - trgemm="CGEMM" - END SELECT - END FUNCTION + FUNCTION trgemm(data_type) + INTEGER :: data_type + CHARACTER(LEN=5) :: trgemm + SELECT CASE (data_type) + CASE (1) + trgemm = "DGEMM" + CASE (2) + trgemm = "SGEMM" + CASE (3) + trgemm = "ZGEMM" + CASE (4) + trgemm = "CGEMM" + END SELECT + END FUNCTION - FUNCTION trstr(transpose_flavor,data_type) - INTEGER :: transpose_flavor, data_type - CHARACTER(LEN=3) :: trstr - CHARACTER(LEN=1) :: dstr - SELECT CASE(data_type) - CASE(1) - dstr="d" - CASE(2) - dstr="s" - CASE(3) - dstr="z" - CASE(4) - dstr="c" - END SELECT - SELECT CASE(transpose_flavor) - CASE(1) - trstr=dstr//"nn" - CASE(2) - trstr=dstr//"tn" - CASE(3) - trstr=dstr//"nt" - CASE(4) - trstr=dstr//"tt" - END SELECT - END FUNCTION trstr + FUNCTION trstr(transpose_flavor, data_type) + INTEGER :: transpose_flavor, data_type + CHARACTER(LEN=3) :: trstr + CHARACTER(LEN=1) :: dstr + SELECT CASE (data_type) + CASE (1) + dstr = "d" + CASE (2) + dstr = "s" + CASE (3) + dstr = "z" + CASE (4) + dstr = "c" + END SELECT + SELECT CASE (transpose_flavor) + CASE (1) + trstr = dstr//"nn" + CASE (2) + trstr = dstr//"tn" + CASE (3) + trstr = dstr//"nt" + CASE (4) + trstr = dstr//"tt" + END SELECT + END FUNCTION trstr - FUNCTION trparam(stack_size_label) - CHARACTER(LEN=*), OPTIONAL :: stack_size_label - CHARACTER(LEN=128) :: trparam - if (PRESENT(stack_size_label)) THEN - trparam = "A,B,C,"//TRIM(stack_size_label)//",dbcsr_ps_width,params,p_a_first,p_b_first,p_c_first" - ELSE - trparam = "A,B,C" - ENDIF - END FUNCTION trparam + FUNCTION trparam(stack_size_label) + CHARACTER(LEN=*), OPTIONAL :: stack_size_label + CHARACTER(LEN=128) :: trparam + if (PRESENT(stack_size_label)) THEN + trparam = "A,B,C,"//TRIM(stack_size_label)//",dbcsr_ps_width,params,p_a_first,p_b_first,p_c_first" + ELSE + trparam = "A,B,C" + ENDIF + END FUNCTION trparam - SUBROUTINE write_stack_params(data_type,stack_size_label) - INTEGER :: data_type - CHARACTER(LEN=*), OPTIONAL :: stack_size_label - CALL write_matrix_defs(data_type=data_type,write_intent=.TRUE.,write_target=.FALSE.) - IF (PRESENT(stack_size_label)) THEN - write(6,'(A)') " INTEGER, INTENT(IN) :: "//TRIM(stack_size_label)//", dbcsr_ps_width" - write(6,'(A)') " INTEGER, INTENT(IN) :: params(dbcsr_ps_width, "//TRIM(stack_size_label)//")" - write(6,'(A)') " INTEGER, INTENT(IN) :: p_a_first, p_b_first, p_c_first" - ENDIF - END SUBROUTINE write_stack_params + SUBROUTINE write_stack_params(data_type, stack_size_label) + INTEGER :: data_type + CHARACTER(LEN=*), OPTIONAL :: stack_size_label + CALL write_matrix_defs(data_type=data_type, write_intent=.TRUE., write_target=.FALSE.) + IF (PRESENT(stack_size_label)) THEN + write (6, '(A)') " INTEGER, INTENT(IN) :: "//TRIM(stack_size_label)//", dbcsr_ps_width" + write (6, '(A)') " INTEGER, INTENT(IN) :: params(dbcsr_ps_width, "//TRIM(stack_size_label)//")" + write (6, '(A)') " INTEGER, INTENT(IN) :: p_a_first, p_b_first, p_c_first" + ENDIF + END SUBROUTINE write_stack_params - SUBROUTINE write_matrix_defs(M,N,K,transpose_flavor,data_type,write_intent,& - write_target,stack_size_label,padding) - INTEGER, OPTIONAL :: M,N,K,transpose_flavor - INTEGER :: data_type - LOGICAL :: write_intent, write_target - CHARACTER(LEN=*), OPTIONAL :: stack_size_label - LOGICAL, OPTIONAL :: padding - CHARACTER(LEN=50) :: intent_label - LOGICAL :: do_padding + SUBROUTINE write_matrix_defs(M, N, K, transpose_flavor, data_type, write_intent, & + write_target, stack_size_label, padding) + INTEGER, OPTIONAL :: M, N, K, transpose_flavor + INTEGER :: data_type + LOGICAL :: write_intent, write_target + CHARACTER(LEN=*), OPTIONAL :: stack_size_label + LOGICAL, OPTIONAL :: padding + CHARACTER(LEN=50) :: intent_label + LOGICAL :: do_padding - IF (PRESENT(M).AND.PRESENT(N).AND.PRESENT(K).AND.PRESENT(transpose_flavor)) THEN - IF (PRESENT(stack_size_label)) THEN - ! +8 ... the buffered routines need to be able to read past the last 'used' elements of the C array. - ! the array therefore needs to be padded appropriately. - write(6,'(A)') " "//trdat(data_type,write_target)// & - " :: C(M*N*"//TRIM(stack_size_label)// & - "+8), B(K*N*"//TRIM(stack_size_label)// & - "), A(M*K*"//TRIM(stack_size_label)//")" - ELSE - IF (write_intent) THEN - write(6,'(A,I0,A,I0,A)') & - " "//trdat(data_type,write_target,"INOUT")//" :: C(",M,",",N,")" - intent_label="IN" + IF (PRESENT(M) .AND. PRESENT(N) .AND. PRESENT(K) .AND. PRESENT(transpose_flavor)) THEN + IF (PRESENT(stack_size_label)) THEN + ! +8 ... the buffered routines need to be able to read past the last 'used' elements of the C array. + ! the array therefore needs to be padded appropriately. + write (6, '(A)') " "//trdat(data_type, write_target)// & + " :: C(M*N*"//TRIM(stack_size_label)// & + "+8), B(K*N*"//TRIM(stack_size_label)// & + "), A(M*K*"//TRIM(stack_size_label)//")" ELSE - do_padding=.FALSE. - IF (PRESENT(padding)) THEN - IF (padding) do_padding=.TRUE. - ENDIF - IF (do_padding) THEN - write(6,'(A)') & - " "//trdat(data_type,write_target)//" :: C(M*N+8)" + IF (write_intent) THEN + write (6, '(A,I0,A,I0,A)') & + " "//trdat(data_type, write_target, "INOUT")//" :: C(", M, ",", N, ")" + intent_label = "IN" ELSE - write(6,'(A,I0,A,I0,A)') & - " "//trdat(data_type,write_target)//" :: C(",M,",",N,")" + do_padding = .FALSE. + IF (PRESENT(padding)) THEN + IF (padding) do_padding = .TRUE. + ENDIF + IF (do_padding) THEN + write (6, '(A)') & + " "//trdat(data_type, write_target)//" :: C(M*N+8)" + ELSE + write (6, '(A,I0,A,I0,A)') & + " "//trdat(data_type, write_target)//" :: C(", M, ",", N, ")" + ENDIF + intent_label = "" ENDIF - intent_label="" + SELECT CASE (transpose_flavor) + CASE (1) + write (6, '(A,I0,A,I0,A,I0,A,I0,A)') & + " "//trdat(data_type, write_target, intent_label)//" :: B(", K, ",", N, "), A(", M, ",", K, ")" + CASE (2) + write (6, '(A,I0,A,I0,A,I0,A,I0,A)') & + " "//trdat(data_type, write_target, intent_label)//" :: B(", K, ",", N, "), A(", K, ",", M, ")" + CASE (3) + write (6, '(A,I0,A,I0,A,I0,A,I0,A)') & + " "//trdat(data_type, write_target, intent_label)//" :: B(", N, ",", K, "), A(", M, ",", K, ")" + CASE (4) + write (6, '(A,I0,A,I0,A,I0,A,I0,A)') & + " "//trdat(data_type, write_target, intent_label)//" :: B(", N, ",", K, "), A(", K, ",", M, ")" + END SELECT ENDIF - SELECT CASE(transpose_flavor) - CASE(1) - write(6,'(A,I0,A,I0,A,I0,A,I0,A)') & - " "//trdat(data_type,write_target,intent_label)//" :: B(",K,",",N,"), A(",M,",",K,")" - CASE(2) - write(6,'(A,I0,A,I0,A,I0,A,I0,A)') & - " "//trdat(data_type,write_target,intent_label)//" :: B(",K,",",N,"), A(",K,",",M,")" - CASE(3) - write(6,'(A,I0,A,I0,A,I0,A,I0,A)') & - " "//trdat(data_type,write_target,intent_label)//" :: B(",N,",",K,"), A(",M,",",K,")" - CASE(4) - write(6,'(A,I0,A,I0,A,I0,A,I0,A)') & - " "//trdat(data_type,write_target,intent_label)//" :: B(",N,",",K,"), A(",K,",",M,")" - END SELECT - ENDIF - ELSE - IF (write_intent) THEN - write(6,'(A)') " "//trdat(data_type,write_target,"INOUT")//" :: C(*)" - write(6,'(A)') " "//trdat(data_type,write_target,"IN")//" :: B(*), A(*)" ELSE - write(6,'(A)') " "//trdat(data_type,write_target)//" :: C(*)" - write(6,'(A)') " "//trdat(data_type,write_target)//" :: B(*), A(*)" + IF (write_intent) THEN + write (6, '(A)') " "//trdat(data_type, write_target, "INOUT")//" :: C(*)" + write (6, '(A)') " "//trdat(data_type, write_target, "IN")//" :: B(*), A(*)" + ELSE + write (6, '(A)') " "//trdat(data_type, write_target)//" :: C(*)" + write (6, '(A)') " "//trdat(data_type, write_target)//" :: B(*), A(*)" + ENDIF ENDIF - ENDIF - END SUBROUTINE write_matrix_defs + END SUBROUTINE write_matrix_defs - SUBROUTINE smm_inner(mi,mf,ni,nf,ki,kf,iloop,mu,nu,ku,transpose_flavor,data_type) - INTEGER :: mi,mf,ni,nf,ki,kf,iloop,mu,nu,ku,transpose_flavor,data_type - INTEGER :: im,in,ik,ido - INTEGER :: loop_order(3,6),have_loops + SUBROUTINE smm_inner(mi, mf, ni, nf, ki, kf, iloop, mu, nu, ku, transpose_flavor, data_type) + INTEGER :: mi, mf, ni, nf, ki, kf, iloop, mu, nu, ku, transpose_flavor, data_type + INTEGER :: im, in, ik, ido + INTEGER :: loop_order(3, 6), have_loops - loop_order(:,1)=(/1,2,3/) - loop_order(:,2)=(/2,1,3/) - loop_order(:,3)=(/2,3,1/) - loop_order(:,4)=(/1,3,2/) - loop_order(:,5)=(/3,1,2/) - loop_order(:,6)=(/3,2,1/) - have_loops=0 - CALL out_loop(mi,mf,ni,nf,ki,kf,mu,nu,ku,loop_order(1,iloop),have_loops) - CALL out_loop(mi,mf,ni,nf,ki,kf,mu,nu,ku,loop_order(2,iloop),have_loops) - CALL out_loop(mi,mf,ni,nf,ki,kf,mu,nu,ku,loop_order(3,iloop),have_loops) - ! what is the fastest order for these loops ? Does it matter ? - DO im=0,mu-1 - DO in=0,nu-1 - DO ik=0,ku-1 - SELECT CASE(transpose_flavor) - CASE(1) - write(6,'(A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A)') & - " C(i+",im,",j+",in,")=C(i+",im,",j+",in,")+A(i+",im,",l+",ik,")*B(l+",ik,",j+",in,")" - CASE(2) - write(6,'(A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A)') & - " C(i+",im,",j+",in,")=C(i+",im,",j+",in,")+A(l+",ik,",i+",im,")*B(l+",ik,",j+",in,")" - CASE(3) - write(6,'(A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A)') & - " C(i+",im,",j+",in,")=C(i+",im,",j+",in,")+A(i+",im,",l+",ik,")*B(j+",in,",l+",ik,")" - CASE(4) - write(6,'(A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A)') & - " C(i+",im,",j+",in,")=C(i+",im,",j+",in,")+A(l+",ik,",i+",im,")*B(j+",in,",l+",ik,")" - END SELECT - ENDDO - ENDDO - ENDDO - DO ido=1,have_loops - write(6,'(A)') " ENDDO " - ENDDO - END SUBROUTINE smm_inner + loop_order(:, 1) = (/1, 2, 3/) + loop_order(:, 2) = (/2, 1, 3/) + loop_order(:, 3) = (/2, 3, 1/) + loop_order(:, 4) = (/1, 3, 2/) + loop_order(:, 5) = (/3, 1, 2/) + loop_order(:, 6) = (/3, 2, 1/) + have_loops = 0 + CALL out_loop(mi, mf, ni, nf, ki, kf, mu, nu, ku, loop_order(1, iloop), have_loops) + CALL out_loop(mi, mf, ni, nf, ki, kf, mu, nu, ku, loop_order(2, iloop), have_loops) + CALL out_loop(mi, mf, ni, nf, ki, kf, mu, nu, ku, loop_order(3, iloop), have_loops) + ! what is the fastest order for these loops ? Does it matter ? + DO im = 0, mu - 1 + DO in = 0, nu - 1 + DO ik = 0, ku - 1 + SELECT CASE (transpose_flavor) + CASE (1) + write (6, '(A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A)') & + " C(i+", im, ",j+", in, ")=C(i+", im, ",j+", in, ")+A(i+", im, ",l+", ik, ")*B(l+", ik, ",j+", in, ")" + CASE (2) + write (6, '(A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A)') & + " C(i+", im, ",j+", in, ")=C(i+", im, ",j+", in, ")+A(l+", ik, ",i+", im, ")*B(l+", ik, ",j+", in, ")" + CASE (3) + write (6, '(A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A)') & + " C(i+", im, ",j+", in, ")=C(i+", im, ",j+", in, ")+A(i+", im, ",l+", ik, ")*B(j+", in, ",l+", ik, ")" + CASE (4) + write (6, '(A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A)') & + " C(i+", im, ",j+", in, ")=C(i+", im, ",j+", in, ")+A(l+", ik, ",i+", im, ")*B(j+", in, ",l+", ik, ")" + END SELECT + ENDDO + ENDDO + ENDDO + DO ido = 1, have_loops + write (6, '(A)') " ENDDO " + ENDDO + END SUBROUTINE smm_inner - SUBROUTINE out_loop(mi,mf,ni,nf,ki,kf,mu,nu,ku,ichoice,have_loops) - INTEGER :: mi,mf,ni,nf,ki,kf,ichoice,mu,nu,ku,have_loops - IF (ichoice==1) THEN - IF (nf-ni+1>nu) THEN - write(6,'(A,I0,A,I0,A,I0)') " DO j=",ni,",",nf,",",nu - have_loops=have_loops+1 - ELSE - write(6,'(A,I0)') " j=",ni - ENDIF - ENDIF - IF (ichoice==2) THEN - IF (mf-mi+1>mu) THEN - write(6,'(A,I0,A,I0,A,I0)') " DO i=",mi,",",mf,",",mu - have_loops=have_loops+1 - ELSE - write(6,'(A,I0)') " i=",mi - ENDIF - ENDIF - IF (ichoice==3) THEN - IF (kf-ki+1>ku) THEN - write(6,'(A,I0,A,I0,A,I0)') " DO l=",ki,",",kf,",",ku - have_loops=have_loops+1 - ELSE - write(6,'(A,I0)') " l=",ki - ENDIF - ENDIF - END SUBROUTINE + SUBROUTINE out_loop(mi, mf, ni, nf, ki, kf, mu, nu, ku, ichoice, have_loops) + INTEGER :: mi, mf, ni, nf, ki, kf, ichoice, mu, nu, ku, have_loops + IF (ichoice == 1) THEN + IF (nf - ni + 1 > nu) THEN + write (6, '(A,I0,A,I0,A,I0)') " DO j=", ni, ",", nf, ",", nu + have_loops = have_loops + 1 + ELSE + write (6, '(A,I0)') " j=", ni + ENDIF + ENDIF + IF (ichoice == 2) THEN + IF (mf - mi + 1 > mu) THEN + write (6, '(A,I0,A,I0,A,I0)') " DO i=", mi, ",", mf, ",", mu + have_loops = have_loops + 1 + ELSE + write (6, '(A,I0)') " i=", mi + ENDIF + ENDIF + IF (ichoice == 3) THEN + IF (kf - ki + 1 > ku) THEN + write (6, '(A,I0,A,I0,A,I0)') " DO l=", ki, ",", kf, ",", ku + have_loops = have_loops + 1 + ELSE + write (6, '(A,I0)') " l=", ki + ENDIF + ENDIF + END SUBROUTINE END MODULE mults diff --git a/tools/build_libsmm/small_gen.f90 b/tools/build_libsmm/small_gen.f90 index ea9791e556f..cb91278ec75 100644 --- a/tools/build_libsmm/small_gen.f90 +++ b/tools/build_libsmm/small_gen.f90 @@ -16,106 +16,106 @@ PROGRAM small_gen USE multrec_gen IMPLICIT NONE - CHARACTER(LEN=1024) :: arg,filename,label - INTEGER :: M,N,K,transpose_flavor,data_type,SIMD_size - INTEGER :: ibest_square=3, best_square=4 + CHARACTER(LEN=1024) :: arg, filename, label + INTEGER :: M, N, K, transpose_flavor, data_type, SIMD_size + INTEGER :: ibest_square = 3, best_square = 4 INTEGER :: isquare LOGICAL :: do_libxsmm - CALL GET_COMMAND_ARGUMENT(1,arg) - READ(arg,*) M - CALL GET_COMMAND_ARGUMENT(2,arg) - READ(arg,*) N - CALL GET_COMMAND_ARGUMENT(3,arg) - READ(arg,*) K - CALL GET_COMMAND_ARGUMENT(4,arg) - READ(arg,*) transpose_flavor - CALL GET_COMMAND_ARGUMENT(5,arg) - READ(arg,*) data_type - CALL GET_COMMAND_ARGUMENT(6,arg) - READ(arg,*) SIMD_size - CALL GET_COMMAND_ARGUMENT(7,filename) + CALL GET_COMMAND_ARGUMENT(1, arg) + READ (arg, *) M + CALL GET_COMMAND_ARGUMENT(2, arg) + READ (arg, *) N + CALL GET_COMMAND_ARGUMENT(3, arg) + READ (arg, *) K + CALL GET_COMMAND_ARGUMENT(4, arg) + READ (arg, *) transpose_flavor + CALL GET_COMMAND_ARGUMENT(5, arg) + READ (arg, *) data_type + CALL GET_COMMAND_ARGUMENT(6, arg) + READ (arg, *) SIMD_size + CALL GET_COMMAND_ARGUMENT(7, filename) - IF (COMMAND_ARGUMENT_COUNT().gt.7) THEN + IF (COMMAND_ARGUMENT_COUNT() .gt. 7) THEN do_libxsmm = .TRUE. ELSE do_libxsmm = .FALSE. END IF ! generation of the tiny version - write(label,'(A,I0)') "_",1 - CALL mult_versions(M,N,K,1,label,transpose_flavor,data_type,SIMD_size,filename) + write (label, '(A,I0)') "_", 1 + CALL mult_versions(M, N, K, 1, label, transpose_flavor, data_type, SIMD_size, filename) ! generation of the matmul version - write(label,'(A,I0)') "_",2 - CALL mult_versions(M,N,K,2,label,transpose_flavor,data_type,SIMD_size,filename) + write (label, '(A,I0)') "_", 2 + CALL mult_versions(M, N, K, 2, label, transpose_flavor, data_type, SIMD_size, filename) ! generation of the dgemm version - write(label,'(A,I0)') "_",3 - CALL mult_versions(M,N,K,3,label,transpose_flavor,data_type,SIMD_size,filename) + write (label, '(A,I0)') "_", 3 + CALL mult_versions(M, N, K, 3, label, transpose_flavor, data_type, SIMD_size, filename) ! generation of the multrec versions (4) - DO isquare=ibest_square+1,ibest_square+best_square - write(label,'(A,I0)') "_",isquare - CALL mult_versions(M,N,K,isquare,label,transpose_flavor,data_type,SIMD_size,filename) + DO isquare = ibest_square + 1, ibest_square + best_square + write (label, '(A,I0)') "_", isquare + CALL mult_versions(M, N, K, isquare, label, transpose_flavor, data_type, SIMD_size, filename) ENDDO - - ! generation of the vector version, + + ! generation of the vector version, ! only in the case of SIMD_size=32(i.e. AVX/AVX2) and SIMD_size=64(i.e. KNC/AVX512) - ibest_square=ibest_square+1 - IF ((SIMD_size==32 .OR. SIMD_size==64) .AND. transpose_flavor==1 .AND. data_type<=2) THEN - write(label,'(A,I0)') "_",ibest_square+best_square - CALL mult_versions(M,N,K,ibest_square+best_square,label,& - transpose_flavor,data_type,SIMD_size,filename,stack_size_label="") + ibest_square = ibest_square + 1 + IF ((SIMD_size == 32 .OR. SIMD_size == 64) .AND. transpose_flavor == 1 .AND. data_type <= 2) THEN + write (label, '(A,I0)') "_", ibest_square + best_square + CALL mult_versions(M, N, K, ibest_square + best_square, label, & + transpose_flavor, data_type, SIMD_size, filename, stack_size_label="") ENDIF ! generation of the libxsmm version interface - ibest_square=ibest_square+1 - IF (do_libxsmm .AND. transpose_flavor==1 .AND. data_type<=2) THEN - write(label,'(A,I0)') "_",ibest_square+best_square - CALL mult_versions(M,N,K,ibest_square+best_square,label,& - transpose_flavor,data_type,SIMD_size,filename,stack_size_label="") + ibest_square = ibest_square + 1 + IF (do_libxsmm .AND. transpose_flavor == 1 .AND. data_type <= 2) THEN + write (label, '(A,I0)') "_", ibest_square + best_square + CALL mult_versions(M, N, K, ibest_square + best_square, label, & + transpose_flavor, data_type, SIMD_size, filename, stack_size_label="") END IF - write(6,'(A)') "#ifdef __INTEL_OFFLOAD" - write(6,'(A)') "!dir$ attributes offload:mic :: run_kernels" - write(6,'(A)') "#endif" - write(6,'(A,I0,A,I0,A,I0,A)') "SUBROUTINE small_find_",M,"_",N,"_",K,"(unit)" - write(6,'(A)') " IMPLICIT NONE" - write(6,'(A)') " INTEGER :: unit ! Output unit" - write(6,'(A,I0,A,I0,A,I0,A,I0)') " INTEGER, PARAMETER :: M=",M,",N=",N,",K=",K - write(6,'(A)') " CHARACTER(len=64) :: filename" - CALL write_matrix_defs(M,N,K,transpose_flavor,data_type,.FALSE.,.FALSE.,padding=.TRUE.) - write(6,'(A)') " INTERFACE" - write(6,'(A)') " SUBROUTINE X("//TRIM(trparam())//")" - CALL write_matrix_defs(data_type=data_type,write_intent=.TRUE.,write_target=.FALSE.) - write(6,'(A)') " END SUBROUTINE" - write(6,'(A)') " END INTERFACE" - DO isquare=1,ibest_square+best_square - IF (isquare==8 .AND. ((SIMD_size/=32 .AND. SIMD_size/=64) .OR. transpose_flavor/=1 .OR. data_type>2)) CYCLE - IF (isquare==9 .AND. ((.NOT.do_libxsmm) .OR. transpose_flavor/=1 .OR. data_type>2)) CYCLE - write(6,'(A,I0,A,I0,A,I0,A,I0)') "PROCEDURE(X) :: smm_"//trstr(transpose_flavor,data_type)//"_",& - M,"_",N,"_",K,"_",isquare + write (6, '(A)') "#ifdef __INTEL_OFFLOAD" + write (6, '(A)') "!dir$ attributes offload:mic :: run_kernels" + write (6, '(A)') "#endif" + write (6, '(A,I0,A,I0,A,I0,A)') "SUBROUTINE small_find_", M, "_", N, "_", K, "(unit)" + write (6, '(A)') " IMPLICIT NONE" + write (6, '(A)') " INTEGER :: unit ! Output unit" + write (6, '(A,I0,A,I0,A,I0,A,I0)') " INTEGER, PARAMETER :: M=", M, ",N=", N, ",K=", K + write (6, '(A)') " CHARACTER(len=64) :: filename" + CALL write_matrix_defs(M, N, K, transpose_flavor, data_type, .FALSE., .FALSE., padding=.TRUE.) + write (6, '(A)') " INTERFACE" + write (6, '(A)') " SUBROUTINE X("//TRIM(trparam())//")" + CALL write_matrix_defs(data_type=data_type, write_intent=.TRUE., write_target=.FALSE.) + write (6, '(A)') " END SUBROUTINE" + write (6, '(A)') " END INTERFACE" + DO isquare = 1, ibest_square + best_square + IF (isquare == 8 .AND. ((SIMD_size /= 32 .AND. SIMD_size /= 64) .OR. transpose_flavor /= 1 .OR. data_type > 2)) CYCLE + IF (isquare == 9 .AND. ((.NOT. do_libxsmm) .OR. transpose_flavor /= 1 .OR. data_type > 2)) CYCLE + write (6, '(A,I0,A,I0,A,I0,A,I0)') "PROCEDURE(X) :: smm_"//trstr(transpose_flavor, data_type)//"_", & + M, "_", N, "_", K, "_", isquare ENDDO - write(6,'(A,I0,A,I0)') " INTEGER, PARAMETER :: Nmin=5,Nk=1,Nloop=",ibest_square+best_square - write(6,'(A)') " TYPE t_kernels" - write(6,'(A)') " PROCEDURE(X), POINTER, NOPASS :: ptr" - write(6,'(A)') " END TYPE t_kernels" - write(6,'(A)') " TYPE(t_kernels) :: kernels(Nk,Nloop)" - write(6,'(A)') " INTEGER :: mnk(3,Nk) ! mu, nu, ku" - DO isquare=1,ibest_square+best_square - IF ((isquare==8 .AND. ((SIMD_size/=32 .AND. SIMD_size/=64) .OR. transpose_flavor/=1 .OR. data_type>2)) .OR. & - (isquare==9 .AND. ((.NOT.do_libxsmm) .OR. transpose_flavor/=1 .OR. data_type>2))) THEN - write(6,'(A,I0,A)') " kernels(Nk,",isquare,")%ptr => Null()" + write (6, '(A,I0,A,I0)') " INTEGER, PARAMETER :: Nmin=5,Nk=1,Nloop=", ibest_square + best_square + write (6, '(A)') " TYPE t_kernels" + write (6, '(A)') " PROCEDURE(X), POINTER, NOPASS :: ptr" + write (6, '(A)') " END TYPE t_kernels" + write (6, '(A)') " TYPE(t_kernels) :: kernels(Nk,Nloop)" + write (6, '(A)') " INTEGER :: mnk(3,Nk) ! mu, nu, ku" + DO isquare = 1, ibest_square + best_square + IF ((isquare == 8 .AND. ((SIMD_size /= 32 .AND. SIMD_size /= 64) .OR. transpose_flavor /= 1 .OR. data_type > 2)) .OR. & + (isquare == 9 .AND. ((.NOT. do_libxsmm) .OR. transpose_flavor /= 1 .OR. data_type > 2))) THEN + write (6, '(A,I0,A)') " kernels(Nk,", isquare, ")%ptr => Null()" ELSE - write(6,'(A,I0,A,I0,A,I0,A,I0,A,I0)') " kernels(Nk,",isquare,")%ptr => smm_"//trstr(transpose_flavor,data_type)//"_",& - M,"_",N,"_",K,"_",isquare + write (6, '(A,I0,A,I0,A,I0,A,I0,A,I0)') " kernels(Nk,", isquare, ")%ptr => smm_"//trstr(transpose_flavor, data_type)//"_", & + M, "_", N, "_", K, "_", isquare ENDIF ENDDO - write(6,'(A,I0,A,I0,A,I0,A)') " filename='small_find_",M,"_",N,"_",K,".out'" - write(6,'(A)') " C = 0 ; A = 0 ; B = 0" - write(6,'(A)') " mnk=0" - write(6,'(A)') " CALL run_kernels(filename,unit,M,N,K,A,B,C,Nmin,Nk,Nloop,kernels,mnk)" - write(6,'(A,I0,A,I0,A,I0)') "END SUBROUTINE small_find_",M,"_",N,"_",K - + write (6, '(A,I0,A,I0,A,I0,A)') " filename='small_find_", M, "_", N, "_", K, ".out'" + write (6, '(A)') " C = 0 ; A = 0 ; B = 0" + write (6, '(A)') " mnk=0" + write (6, '(A)') " CALL run_kernels(filename,unit,M,N,K,A,B,C,Nmin,Nk,Nloop,kernels,mnk)" + write (6, '(A,I0,A,I0,A,I0)') "END SUBROUTINE small_find_", M, "_", N, "_", K + END PROGRAM small_gen diff --git a/tools/build_libsmm/tiny_gen.f90 b/tools/build_libsmm/tiny_gen.f90 index 73cce79a17c..973bfb043a8 100644 --- a/tools/build_libsmm/tiny_gen.f90 +++ b/tools/build_libsmm/tiny_gen.f90 @@ -1,104 +1,104 @@ PROGRAM tiny_gen - USE mults - IMPLICIT NONE + USE mults + IMPLICIT NONE - INTEGER :: M,N,K,transpose_flavor,data_type - INTEGER, PARAMETER :: Nloop=6 - CHARACTER(LEN=80) :: arg - INTEGER :: Nk,mi,mf,ni,nf,ki,kf,iloop,ku,nu,mu + INTEGER :: M, N, K, transpose_flavor, data_type + INTEGER, PARAMETER :: Nloop = 6 + CHARACTER(LEN=80) :: arg + INTEGER :: Nk, mi, mf, ni, nf, ki, kf, iloop, ku, nu, mu - CALL GET_COMMAND_ARGUMENT(1,arg) - READ(arg,*) M - CALL GET_COMMAND_ARGUMENT(2,arg) - READ(arg,*) N - CALL GET_COMMAND_ARGUMENT(3,arg) - READ(arg,*) K - CALL GET_COMMAND_ARGUMENT(4,arg) - READ(arg,*) transpose_flavor - CALL GET_COMMAND_ARGUMENT(5,arg) - READ(arg,*) data_type - - CALL loop_variants(1) - write(6,'(A)') "#ifdef __INTEL_OFFLOAD" - write(6,'(A)') "!dir$ attributes offload:mic :: run_kernels" - write(6,'(A)') "#endif" - write(6,'(A,I0,A,I0,A,I0,A)') "SUBROUTINE tiny_find_",M,"_",N,"_",K,"(unit)" - write(6,'(A)') " IMPLICIT NONE" - write(6,'(A)') " INTEGER :: unit ! Output unit" - write(6,'(A,I0,A,I0,A,I0)') " INTEGER, PARAMETER :: M=",M,",N=",N,",K=",K - write(6,'(A)') " CHARACTER(len=64) :: filename" - CALL write_matrix_defs(M,N,K,transpose_flavor,data_type,.FALSE.,.FALSE.) - write(6,'(A)') " INTERFACE" - write(6,'(A)') " SUBROUTINE X(A,B,C)" - CALL write_matrix_defs(data_type=data_type,write_intent=.FALSE.,write_target=.FALSE.) - write(6,'(A)') " END SUBROUTINE" - write(6,'(A)') " END INTERFACE" - CALL loop_variants(2) - write(6,'(A,I0,A,I0)') " INTEGER, PARAMETER :: Nmin=2, Nk=",Nk,", Nloop=",Nloop - write(6,'(A)') " TYPE t_kernels" - write(6,'(A)') " PROCEDURE(X), POINTER, NOPASS :: ptr" - write(6,'(A)') " END TYPE t_kernels" - write(6,'(A)') " TYPE(t_kernels) :: kernels(Nk,Nloop)" - write(6,'(A)') " INTEGER :: mnk(3,Nk) ! mu, nu, ku" - CALL loop_variants(3) - write(6,'(A,I0,A,I0,A,I0,A)') " filename='tiny_find_",M,"_",N,"_",K,".out'" - write(6,'(A)') " C = 0 ; A = 0 ; B = 0" - write(6,'(A)') " CALL run_kernels(filename,unit,M,N,K,A,B,C,Nmin,Nk,Nloop,kernels,mnk)" - write(6,'(A,I0,A,I0,A,I0)') "END SUBROUTINE tiny_find_",M,"_",N,"_",K + CALL GET_COMMAND_ARGUMENT(1, arg) + READ (arg, *) M + CALL GET_COMMAND_ARGUMENT(2, arg) + READ (arg, *) N + CALL GET_COMMAND_ARGUMENT(3, arg) + READ (arg, *) K + CALL GET_COMMAND_ARGUMENT(4, arg) + READ (arg, *) transpose_flavor + CALL GET_COMMAND_ARGUMENT(5, arg) + READ (arg, *) data_type + + CALL loop_variants(1) + write (6, '(A)') "#ifdef __INTEL_OFFLOAD" + write (6, '(A)') "!dir$ attributes offload:mic :: run_kernels" + write (6, '(A)') "#endif" + write (6, '(A,I0,A,I0,A,I0,A)') "SUBROUTINE tiny_find_", M, "_", N, "_", K, "(unit)" + write (6, '(A)') " IMPLICIT NONE" + write (6, '(A)') " INTEGER :: unit ! Output unit" + write (6, '(A,I0,A,I0,A,I0)') " INTEGER, PARAMETER :: M=", M, ",N=", N, ",K=", K + write (6, '(A)') " CHARACTER(len=64) :: filename" + CALL write_matrix_defs(M, N, K, transpose_flavor, data_type, .FALSE., .FALSE.) + write (6, '(A)') " INTERFACE" + write (6, '(A)') " SUBROUTINE X(A,B,C)" + CALL write_matrix_defs(data_type=data_type, write_intent=.FALSE., write_target=.FALSE.) + write (6, '(A)') " END SUBROUTINE" + write (6, '(A)') " END INTERFACE" + CALL loop_variants(2) + write (6, '(A,I0,A,I0)') " INTEGER, PARAMETER :: Nmin=2, Nk=", Nk, ", Nloop=", Nloop + write (6, '(A)') " TYPE t_kernels" + write (6, '(A)') " PROCEDURE(X), POINTER, NOPASS :: ptr" + write (6, '(A)') " END TYPE t_kernels" + write (6, '(A)') " TYPE(t_kernels) :: kernels(Nk,Nloop)" + write (6, '(A)') " INTEGER :: mnk(3,Nk) ! mu, nu, ku" + CALL loop_variants(3) + write (6, '(A,I0,A,I0,A,I0,A)') " filename='tiny_find_", M, "_", N, "_", K, ".out'" + write (6, '(A)') " C = 0 ; A = 0 ; B = 0" + write (6, '(A)') " CALL run_kernels(filename,unit,M,N,K,A,B,C,Nmin,Nk,Nloop,kernels,mnk)" + write (6, '(A,I0,A,I0,A,I0)') "END SUBROUTINE tiny_find_", M, "_", N, "_", K CONTAINS - SUBROUTINE loop_variants(itype) + SUBROUTINE loop_variants(itype) + + INTEGER :: itype - INTEGER :: itype + mi = 1; mf = M; ni = 1; nf = N; ki = 1; kf = K - mi=1 ; mf=M ; ni=1 ; nf=N ; ki=1 ; kf=K + DO iloop = 1, Nloop - DO iloop=1,Nloop + IF (itype .ne. 1) Nk = 0 - IF (itype.ne.1) Nk=0 + DO mu = 1, M + DO nu = 1, N + DO ku = 1, K + ! unroll only if no cleanup is needed + IF (MOD(M, mu) .NE. 0) CYCLE + IF (MOD(N, nu) .NE. 0) CYCLE + IF (MOD(K, ku) .NE. 0) CYCLE + ! do not unroll with more than 32 mults in the inner loop + ! (some more unrolling can be faster, but we reduce size and speedup lib generation) + IF (mu*nu*ku > 32) CYCLE - DO mu=1,M - DO nu=1,N - DO ku=1,K - ! unroll only if no cleanup is needed - IF (MOD(M,mu).NE.0) CYCLE - IF (MOD(N,nu).NE.0) CYCLE - IF (MOD(K,ku).NE.0) CYCLE - ! do not unroll with more than 32 mults in the inner loop - ! (some more unrolling can be faster, but we reduce size and speedup lib generation) - IF (mu*nu*ku>32) CYCLE + SELECT CASE (itype) + CASE (1) + write (6, '(A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A)') & + " SUBROUTINE smm_"//trstr(transpose_flavor, data_type)//"_", M, "_", N, "_", K, "_", & + iloop, "_", mu, "_", nu, "_", ku, "(A,B,C)" + CALL write_matrix_defs(M, N, K, transpose_flavor, data_type, .FALSE., .FALSE.) + write (6, '(A)') " INTEGER ::i,j,l" + CALL smm_inner(mi, mf, ni, nf, ki, kf, iloop, mu, nu, ku, transpose_flavor, data_type) + write (6, '(A)') " END SUBROUTINE" + CASE (2) + Nk = Nk + 1 + write (6, '(A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A,I0)') & + " PROCEDURE(X) :: smm_"//trstr(transpose_flavor, data_type)//"_", M, "_", N, "_", K, "_", iloop, & + "_", mu, "_", nu, "_", ku + CASE (3) + Nk = Nk + 1 + write (6, '(A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A,I0)') " kernels(", Nk, ",", iloop, & + ")%ptr => smm_"//trstr(transpose_flavor, data_type)//"_", M, "_", N, "_", K, "_", & + iloop, "_", mu, "_", nu, "_", ku + IF (iloop .eq. 1) THEN + write (6, '(A,I0,A,I0,A,I0,A,I0,A,I0,A,I0)') " mnk(1,", Nk, ")=", mu, & + " ; mnk(2,", Nk, ")=", nu, & + " ; mnk(3,", Nk, ")=", ku + END IF + END SELECT + ENDDO + ENDDO + ENDDO + ENDDO - SELECT CASE(itype) - CASE(1) - write(6,'(A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A)') & - " SUBROUTINE smm_"//trstr(transpose_flavor,data_type)//"_",M,"_",N,"_",K,"_", & - iloop,"_",mu,"_",nu,"_",ku,"(A,B,C)" - CALL write_matrix_defs(M,N,K,transpose_flavor,data_type,.FALSE.,.FALSE.) - write(6,'(A)') " INTEGER ::i,j,l" - CALL smm_inner(mi,mf,ni,nf,ki,kf,iloop,mu,nu,ku,transpose_flavor,data_type) - write(6,'(A)') " END SUBROUTINE" - CASE(2) - Nk=Nk+1 - write(6,'(A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A,I0)') & - " PROCEDURE(X) :: smm_"//trstr(transpose_flavor,data_type)//"_",M,"_",N,"_",K,"_",iloop, & - "_",mu,"_",nu,"_",ku - CASE(3) - Nk=Nk+1 - write(6,'(A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,A,I0)') " kernels(",Nk,",",iloop, & - ")%ptr => smm_"//trstr(transpose_flavor,data_type)//"_",M,"_",N,"_",K,"_", & - iloop,"_",mu,"_",nu,"_",ku - IF (iloop.eq.1) THEN - write(6,'(A,I0,A,I0,A,I0,A,I0,A,I0,A,I0)') " mnk(1,",Nk,")=",mu, & - " ; mnk(2,",Nk,")=",nu, & - " ; mnk(3,",Nk,")=",ku - END IF - END SELECT - ENDDO - ENDDO - ENDDO - ENDDO - - END SUBROUTINE loop_variants + END SUBROUTINE loop_variants END PROGRAM tiny_gen From 7535c16f0e6b5804bd99b6d0d82df6c2b8a2e9c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tiziano=20M=C3=BCller?= Date: Thu, 9 Apr 2020 11:15:14 +0200 Subject: [PATCH 06/19] pre-commit: use flake8 from its repo (via pre-commit-hooks is deprecated) --- .pre-commit-config.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index dc696949f04..6aec0c8d817 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,10 +8,13 @@ repos: rev: stable hooks: - id: black +- repo: https://gitlab.com/pycqa/flake8 + rev: 3.7.9 + hooks: + - id: flake8 - repo: https://github.com/pre-commit/pre-commit-hooks rev: v2.5.0 hooks: - - id: flake8 - id: check-ast - id: check-yaml - repo: https://github.com/pseewald/fprettify From a74d0d48c4c807443012b9114b7d890630d55f96 Mon Sep 17 00:00:00 2001 From: Alfio Lazzaro Date: Thu, 9 Apr 2020 13:34:00 +0200 Subject: [PATCH 07/19] Remove fprettify submodule --- .gitmodules | 3 --- tools/fprettify | 1 - 2 files changed, 4 deletions(-) delete mode 160000 tools/fprettify diff --git a/.gitmodules b/.gitmodules index f1850931a4f..b469e77b804 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,3 @@ [submodule "tools/build_utils/fypp"] path = tools/build_utils/fypp url = https://github.com/aradi/fypp.git -[submodule "tools/fprettify"] - path = tools/fprettify - url = https://github.com/pseewald/fprettify diff --git a/tools/fprettify b/tools/fprettify deleted file mode 160000 index c27fff438bb..00000000000 --- a/tools/fprettify +++ /dev/null @@ -1 +0,0 @@ -Subproject commit c27fff438bb0124c854962b9fa4eb4b3b1b40030 From 26d58eefa51c6612f617e2fe901b360503fd4a4b Mon Sep 17 00:00:00 2001 From: Alfio Lazzaro Date: Thu, 9 Apr 2020 14:52:32 +0200 Subject: [PATCH 08/19] Remove prettifier mentions --- .cp2k/Makefile | 44 ----------------------------------------- .pre-commit-config.yaml | 2 +- CONTRIBUTING.md | 3 ++- 3 files changed, 3 insertions(+), 46 deletions(-) diff --git a/.cp2k/Makefile b/.cp2k/Makefile index ec8272d7593..414821731dc 100644 --- a/.cp2k/Makefile +++ b/.cp2k/Makefile @@ -53,7 +53,6 @@ DBCSRCP2K := $(DBCSRHOME)/.cp2k MAKEFILE := $(DBCSRCP2K)/Makefile LIBDIR := $(DBCSRHOME)/lib OBJDIR := $(DBCSRHOME)/obj -PRETTYOBJDIR := $(OBJDIR)/prettified TOOLSDIR := $(DBCSRHOME)/tools FYPPEXE := $(TOOLSDIR)/build_utils/fypp/bin/fypp SRCDIR := $(DBCSRHOME)/src @@ -163,7 +162,6 @@ endif # Declare PHONY targets ===================================================== .PHONY : dirs makedep \ default_target $(LIBRARY) \ - pretty prettyclean \ clean realclean \ version @@ -192,13 +190,6 @@ ifneq ($(ACC),) endif endif -# OBJECTS used for pretty -ALL_OBJECTS := $(addsuffix .o, $(basename $(notdir $(OBJ_SRC_FILES)))) -ALL_OBJECTS += $(addsuffix .o, $(basename $(notdir $(shell cd $(TESTSDIR); find . -name "*.F")))) -ALL_OBJECTS += $(addsuffix .o, $(basename $(notdir $(shell cd $(TESTSDIR); find . -name "*.c")))) -ALL_OBJECTS += $(addsuffix .o, $(basename $(notdir $(shell cd $(TESTSDIR); find . -name "*.cpp")))) -ALL_OBJECTS += $(addsuffix .o, $(basename $(notdir $(shell cd $(TESTSDIR); find . -name "*.cu")))) - # Included files used by Fypp preprocessor and standard includes INCLUDED_SRC_FILES := $(filter-out base_uses.f90, $(notdir $(shell find $(SRCDIR) -name "*.f90"))) INCLUDED_SRC_FILES += $(notdir $(shell find $(TESTSDIR) -name "*.f90")) @@ -247,41 +238,6 @@ realclean: clean rm -rf `find $(DBCSRHOME) -name "*.pyc"` rm -rf `find $(DBCSRHOME) -name "*.callgraph"` -# Prettyfier stuff ========================================================== -vpath %.pretty $(PRETTYOBJDIR) - -pretty: $(addprefix $(PRETTYOBJDIR)/, $(ALL_OBJECTS:.o=.pretty)) $(addprefix $(PRETTYOBJDIR)/, $(INCLUDED_SRC_FILES:.f90=.pretty_included)) - -prettyclean: - -rm -rf $(PRETTYOBJDIR) - -define pretty_func - @mkdir -p $(PRETTYOBJDIR) - @touch $2 - $(TOOLSDIR)/fprettify/fprettify.py $1 -endef - -$(PRETTYOBJDIR)/%.pretty: %.F - $(call pretty_func, $<, $@) - -$(PRETTYOBJDIR)/%.pretty_included: %.f90 - $(call pretty_func, $<, $@) - -$(PRETTYOBJDIR)/%.pretty: %.c -# TODO: call indent here? - @mkdir -p $(PRETTYOBJDIR) - @touch $@ - -$(PRETTYOBJDIR)/%.pretty: %.cpp -# TODO: call indent here? - @mkdir -p $(PRETTYOBJDIR) - @touch $@ - -$(PRETTYOBJDIR)/%.pretty: %.cu -# TODO: call indent here? - @mkdir -p $(PRETTYOBJDIR) - @touch $@ - # Libsmm_acc stuff ========================================================== $(LIBSMM_ACC_ABS_DIR)/parameters.h: $(LIBSMM_ACC_ABS_DIR)/generate_parameters.py $(wildcard $(LIBSMM_ACC_ABS_DIR)/parameters_*.txt) cd $(LIBSMM_ACC_ABS_DIR); ./generate_parameters.py --gpu_version=$(GPUVER) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6aec0c8d817..70997136e2e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,7 @@ default_language_version: python: python3 -exclude: '^tools/(prettify/fprettify|build_utils/fypp)' +exclude: '^tools/(build_utils/fypp)' fail_fast: false repos: - repo: https://github.com/ambv/black diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 50a288ba3ac..d735451ae45 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -11,7 +11,7 @@ DBCSR developers can find additional information on the [Development](https://gi ## Fortran Code conventions -The code can be formatted with the prettify tool by running `make -j pretty`. +The code is automatically formatted (via pre-commit hooks) by the [prettify tool](https://github.com/pseewald/fprettify/). Please make sure that you follow the following code conventions (based on [CP2K conventions](https://www.cp2k.org/dev:codingconventions)): 1. Every `USE` statement should have an `ONLY:` clause, which lists the imported symbols. @@ -26,5 +26,6 @@ Please make sure that you follow the following code conventions (based on [CP2K 10. Each preprocessor flag should start with two underscores and be documented in the [documentation](./docs/guide/3-developer-guide/3-programming/1-overview/index.md#list-of-macros-used-in-the-code). 11. All routines in the API must start with the `dbcsr_` namespace. For submodules API (e.g. [DBCSR Tensors](./src/tensors)), each function has to start with the `dbcsr__` namespace. 12. If you are including files (i.e. macro `#include`), note that the base directory is `src`, please use relative path to it (e.g. `#include "base/dbcsr_base_uses.f90"` instead of `#include "../base/dbcsr_base_uses.f90"`). +13. All Fortran keywords (`FUNCTION`, `SUBROUTINE`, data types...) must be in capital letters. **Most important, please avoid committing dead code and useless comments!** From e9b0f3ff511806452990f36ba8c1b86ca95f6d5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tiziano=20M=C3=BCller?= Date: Thu, 9 Apr 2020 16:01:10 +0200 Subject: [PATCH 09/19] cmake: factor out API VERSION, fix libdbcsr SONAME this essentially fixes #283 since you can build a shared library using cmake -DBUILD_SHARED_LIBS=ON The SONAME of the library will be libdbcsr.so.2.1, meaning dependencies will have to be rebuilt after every minor version bump. --- CMakeLists.txt | 1 + src/CMakeLists.txt | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 16689eb87d3..60bc4124015 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,6 +50,7 @@ endif () project(dbcsr DESCRIPTION "DBCSR: Distributed Block Compressed Sparse Row matrix library (https://dbcsr.cp2k.org)") set(dbcsr_VERSION ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}${VERSION_GIT}) +set(dbcsr_APIVERSION ${VERSION_MAJOR}.${VERSION_MINOR}) # ================================================================================================= # OPTIONS diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c2965597214..bcee0839c6b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -140,7 +140,7 @@ add_library(dbcsr ${DBCSR_SRCS}) set_target_properties(dbcsr PROPERTIES VERSION ${dbcsr_VERSION} - SOVERSION ${VERSION_MAJOR}) + SOVERSION ${dbcsr_APIVERSION}) if (TARGET PkgConfig::deps) target_link_libraries(dbcsr PRIVATE PkgConfig::deps) @@ -340,7 +340,7 @@ if (WITH_C_API) set_target_properties(dbcsr_c PROPERTIES VERSION ${dbcsr_VERSION} - SOVERSION ${VERSION_MAJOR}.${VERSION_MINOR}) + SOVERSION ${dbcsr_APIVERSION}) target_link_libraries(dbcsr_c PRIVATE dbcsr) target_link_libraries(dbcsr_c PUBLIC MPI::MPI_C) # the C API always needs MPI From 3fd51f63b4499e55c968ebcc24aab036f208db98 Mon Sep 17 00:00:00 2001 From: Alfio Lazzaro Date: Fri, 10 Apr 2020 10:34:00 +0200 Subject: [PATCH 10/19] Check for optional parameter for distribution in API --- src/dbcsr_api.F | 47 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/src/dbcsr_api.F b/src/dbcsr_api.F index 7bc537d7aca..81b4a80beae 100644 --- a/src/dbcsr_api.F +++ b/src/dbcsr_api.F @@ -660,6 +660,8 @@ SUBROUTINE dbcsr_get_info(matrix, nblkrows_total, nblkcols_total, & CHARACTER, INTENT(OUT), OPTIONAL :: matrix_type INTEGER, INTENT(OUT), OPTIONAL :: data_type, group + TYPE(dbcsr_dist_prv_obj) :: dist + CALL dbcsr_get_info_prv(matrix=matrix%prv, & nblkrows_total=nblkrows_total, & nblkcols_total=nblkcols_total, & @@ -679,11 +681,16 @@ SUBROUTINE dbcsr_get_info(matrix, nblkrows_total, nblkcols_total, & col_blk_size=col_blk_size, & row_blk_offset=row_blk_offset, & col_blk_offset=col_blk_offset, & - distribution=distribution%prv, & + distribution=dist, & name=name, & matrix_type=matrix_type, & data_type=data_type, & group=group) + + IF (PRESENT(distribution)) THEN + distribution%prv = dist + ENDIF + END SUBROUTINE dbcsr_get_info SUBROUTINE dbcsr_distribution_get(dist, row_dist, col_dist, & @@ -833,8 +840,13 @@ SUBROUTINE dbcsr_distribution_new(dist, template, group, pgrid, row_dist, col_di INTEGER, DIMENSION(:), INTENT(IN), POINTER :: row_dist, col_dist LOGICAL, INTENT(IN), OPTIONAL :: reuse_arrays - call dbcsr_distribution_new_prv(dist%prv, template%prv, group, pgrid, row_dist, col_dist, & - reuse_arrays) + IF (PRESENT(template)) THEN + call dbcsr_distribution_new_prv(dist%prv, template%prv, group, pgrid, row_dist, col_dist, & + reuse_arrays) + ELSE + call dbcsr_distribution_new_prv(dist%prv, group=group, pgrid=pgrid, row_dist=row_dist, col_dist=col_dist, & + reuse_arrays=reuse_arrays) + ENDIF END SUBROUTINE dbcsr_distribution_new SUBROUTINE dbcsr_print_statistics(print_timers, callgraph_filename) @@ -896,11 +908,19 @@ SUBROUTINE dbcsr_create_template(matrix, name, template, & LOGICAL, INTENT(IN), OPTIONAL :: reuse_arrays, mutable_work CHARACTER, INTENT(IN), OPTIONAL :: replication_type - call dbcsr_create_prv(matrix%prv, template%prv, name, & - dist%prv, matrix_type, & - row_blk_size, col_blk_size, nze, data_type, & - reuse_arrays=reuse_arrays, mutable_work=mutable_work, & - replication_type=replication_type) + IF (PRESENT(dist)) THEN + CALL dbcsr_create_prv(matrix%prv, template%prv, name, & + dist%prv, matrix_type, & + row_blk_size, col_blk_size, nze, data_type, & + reuse_arrays=reuse_arrays, mutable_work=mutable_work, & + replication_type=replication_type) + ELSE + CALL dbcsr_create_prv(matrix%prv, template%prv, name, & + matrix_type=matrix_type, & + row_blk_size=row_blk_size, col_blk_size=col_blk_size, nze=nze, data_type=data_type, & + reuse_arrays=reuse_arrays, mutable_work=mutable_work, & + replication_type=replication_type) + ENDIF END SUBROUTINE dbcsr_create_template SUBROUTINE dbcsr_filter(matrix, eps, method, use_absolute, filter_diag) @@ -973,9 +993,14 @@ SUBROUTINE dbcsr_transposed(transposed, normal, shallow_data_copy, & TYPE(dbcsr_distribution_type), INTENT(IN), & OPTIONAL :: use_distribution - CALL dbcsr_transposed_prv(transposed%prv, normal%prv, shallow_data_copy, & - transpose_data, transpose_distribution, & - use_distribution%prv) + IF (PRESENT(use_distribution)) THEN + CALL dbcsr_transposed_prv(transposed%prv, normal%prv, shallow_data_copy, & + transpose_data, transpose_distribution, & + use_distribution%prv) + ELSE + CALL dbcsr_transposed_prv(transposed%prv, normal%prv, shallow_data_copy, & + transpose_data, transpose_distribution) + ENDIF END SUBROUTINE dbcsr_transposed SUBROUTINE dbcsr_function_of_elements(matrix_a, func, a0, a1, a2) From b44e9b868f700be889d673e84e31efaeff087a36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tiziano=20M=C3=BCller?= Date: Fri, 10 Apr 2020 21:00:19 +0200 Subject: [PATCH 11/19] docker: use Ubuntu 20.04 for OpenBLAS with OpenMP --- tools/docker/Dockerfile.build-env-ubuntu | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/tools/docker/Dockerfile.build-env-ubuntu b/tools/docker/Dockerfile.build-env-ubuntu index 3df1f5a5895..c929d67809b 100644 --- a/tools/docker/Dockerfile.build-env-ubuntu +++ b/tools/docker/Dockerfile.build-env-ubuntu @@ -1,20 +1,26 @@ -FROM ubuntu:19.10 +FROM ubuntu:20.04 # we need at least Ubuntu 19.10 for: # git > 2.18 # lcov > 1.13 # openmpi > 2.1 +# we need at least Ubuntu 20.04 for: +# ninja >= 1.10 +# an OpenBLAS built with OpenMP-support + +ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update \ && apt-get install -y \ locales \ gfortran \ g++ \ - libopenmpi-dev \ openmpi-bin \ - libopenblas-dev \ - libopenmpi-dev openmpi-bin \ + libopenmpi-dev \ mpich \ + libmpich-dev \ + libopenblas-openmp-dev \ + ninja-build \ lcov \ pkg-config \ git \ @@ -33,7 +39,6 @@ RUN ln -s python3 /usr/bin/python ARG libxsmm_version=1.15 ARG cmake_version=3.17.0 -ARG ninja_version=1.10.0 RUN set -ex && \ pip3 install \ @@ -44,10 +49,6 @@ RUN set -ex && \ RUN set -ex && \ curl -LsS https://github.com/Kitware/CMake/releases/download/v${cmake_version}/cmake-${cmake_version}-Linux-x86_64.tar.gz | tar --strip-components=1 -xz -C /usr/local -RUN set -ex && \ - curl -LsS https://github.com/ninja-build/ninja/releases/download/v${ninja_version}/ninja-linux.zip | funzip > /usr/local/bin/ninja \ - && chmod a+x /usr/local/bin/ninja - RUN set -ex && \ curl -LsS https://github.com/hfp/libxsmm/archive/${libxsmm_version}.tar.gz | tar -xz -C /opt \ && ln -s libxsmm-${libxsmm_version} /opt/libxsmm \ From bc5b4c64372e633c4aad95eb2b719aa43ba69e15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tiziano=20M=C3=BCller?= Date: Thu, 9 Apr 2020 18:26:00 +0200 Subject: [PATCH 12/19] cmake: drop FindPkgConfig imported from v3.12 This reverts commit 001d1135096c5467bd697a3b3f66135d9440bffe. We now require CMake 3.12 anyway, so this is already there. --- cmake/FindPackageHandleStandardArgs.cmake | 386 ----------- cmake/FindPackageMessage.cmake | 48 -- cmake/FindPkgConfig.cmake | 737 ---------------------- 3 files changed, 1171 deletions(-) delete mode 100644 cmake/FindPackageHandleStandardArgs.cmake delete mode 100644 cmake/FindPackageMessage.cmake delete mode 100644 cmake/FindPkgConfig.cmake diff --git a/cmake/FindPackageHandleStandardArgs.cmake b/cmake/FindPackageHandleStandardArgs.cmake deleted file mode 100644 index 1722d6aafbf..00000000000 --- a/cmake/FindPackageHandleStandardArgs.cmake +++ /dev/null @@ -1,386 +0,0 @@ -# Distributed under the OSI-approved BSD 3-Clause License. See accompanying -# file Copyright.txt or https://cmake.org/licensing for details. - -#[=======================================================================[.rst: -FindPackageHandleStandardArgs ------------------------------ - -This module provides a function intended to be used in :ref:`Find Modules` -implementing :command:`find_package()` calls. It handles the -``REQUIRED``, ``QUIET`` and version-related arguments of ``find_package``. -It also sets the ``_FOUND`` variable. The package is -considered found if all variables listed contain valid results, e.g. -valid filepaths. - -.. command:: find_package_handle_standard_args - - There are two signatures:: - - find_package_handle_standard_args( - (DEFAULT_MSG|) - ... - ) - - find_package_handle_standard_args( - [FOUND_VAR ] - [REQUIRED_VARS ...] - [VERSION_VAR ] - [HANDLE_COMPONENTS] - [CONFIG_MODE] - [FAIL_MESSAGE ] - ) - - The ``_FOUND`` variable will be set to ``TRUE`` if all - the variables ``...`` are valid and any optional - constraints are satisfied, and ``FALSE`` otherwise. A success or - failure message may be displayed based on the results and on - whether the ``REQUIRED`` and/or ``QUIET`` option was given to - the :command:`find_package` call. - - The options are: - - ``(DEFAULT_MSG|)`` - In the simple signature this specifies the failure message. - Use ``DEFAULT_MSG`` to ask for a default message to be computed - (recommended). Not valid in the full signature. - - ``FOUND_VAR `` - Obsolete. Specifies either ``_FOUND`` or - ``_FOUND`` as the result variable. This exists only - for compatibility with older versions of CMake and is now ignored. - Result variables of both names are always set for compatibility. - - ``REQUIRED_VARS ...`` - Specify the variables which are required for this package. - These may be named in the generated failure message asking the - user to set the missing variable values. Therefore these should - typically be cache entries such as ``FOO_LIBRARY`` and not output - variables like ``FOO_LIBRARIES``. - - ``VERSION_VAR `` - Specify the name of a variable that holds the version of the package - that has been found. This version will be checked against the - (potentially) specified required version given to the - :command:`find_package` call, including its ``EXACT`` option. - The default messages include information about the required - version and the version which has been actually found, both - if the version is ok or not. - - ``HANDLE_COMPONENTS`` - Enable handling of package components. In this case, the command - will report which components have been found and which are missing, - and the ``_FOUND`` variable will be set to ``FALSE`` - if any of the required components (i.e. not the ones listed after - the ``OPTIONAL_COMPONENTS`` option of :command:`find_package`) are - missing. - - ``CONFIG_MODE`` - Specify that the calling find module is a wrapper around a - call to ``find_package( NO_MODULE)``. This implies - a ``VERSION_VAR`` value of ``_VERSION``. The command - will automatically check whether the package configuration file - was found. - - ``FAIL_MESSAGE `` - Specify a custom failure message instead of using the default - generated message. Not recommended. - -Example for the simple signature: - -.. code-block:: cmake - - find_package_handle_standard_args(LibXml2 DEFAULT_MSG - LIBXML2_LIBRARY LIBXML2_INCLUDE_DIR) - -The ``LibXml2`` package is considered to be found if both -``LIBXML2_LIBRARY`` and ``LIBXML2_INCLUDE_DIR`` are valid. -Then also ``LibXml2_FOUND`` is set to ``TRUE``. If it is not found -and ``REQUIRED`` was used, it fails with a -:command:`message(FATAL_ERROR)`, independent whether ``QUIET`` was -used or not. If it is found, success will be reported, including -the content of the first ````. On repeated CMake runs, -the same message will not be printed again. - -Example for the full signature: - -.. code-block:: cmake - - find_package_handle_standard_args(LibArchive - REQUIRED_VARS LibArchive_LIBRARY LibArchive_INCLUDE_DIR - VERSION_VAR LibArchive_VERSION) - -In this case, the ``LibArchive`` package is considered to be found if -both ``LibArchive_LIBRARY`` and ``LibArchive_INCLUDE_DIR`` are valid. -Also the version of ``LibArchive`` will be checked by using the version -contained in ``LibArchive_VERSION``. Since no ``FAIL_MESSAGE`` is given, -the default messages will be printed. - -Another example for the full signature: - -.. code-block:: cmake - - find_package(Automoc4 QUIET NO_MODULE HINTS /opt/automoc4) - find_package_handle_standard_args(Automoc4 CONFIG_MODE) - -In this case, a ``FindAutmoc4.cmake`` module wraps a call to -``find_package(Automoc4 NO_MODULE)`` and adds an additional search -directory for ``automoc4``. Then the call to -``find_package_handle_standard_args`` produces a proper success/failure -message. -#]=======================================================================] - -include(${CMAKE_CURRENT_LIST_DIR}/FindPackageMessage.cmake) - -# internal helper macro -macro(_FPHSA_FAILURE_MESSAGE _msg) - if (${_NAME}_FIND_REQUIRED) - message(FATAL_ERROR "${_msg}") - else () - if (NOT ${_NAME}_FIND_QUIETLY) - message(STATUS "${_msg}") - endif () - endif () -endmacro() - - -# internal helper macro to generate the failure message when used in CONFIG_MODE: -macro(_FPHSA_HANDLE_FAILURE_CONFIG_MODE) - # _CONFIG is set, but FOUND is false, this means that some other of the REQUIRED_VARS was not found: - if(${_NAME}_CONFIG) - _FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: missing:${MISSING_VARS} (found ${${_NAME}_CONFIG} ${VERSION_MSG})") - else() - # If _CONSIDERED_CONFIGS is set, the config-file has been found, but no suitable version. - # List them all in the error message: - if(${_NAME}_CONSIDERED_CONFIGS) - set(configsText "") - list(LENGTH ${_NAME}_CONSIDERED_CONFIGS configsCount) - math(EXPR configsCount "${configsCount} - 1") - foreach(currentConfigIndex RANGE ${configsCount}) - list(GET ${_NAME}_CONSIDERED_CONFIGS ${currentConfigIndex} filename) - list(GET ${_NAME}_CONSIDERED_VERSIONS ${currentConfigIndex} version) - string(APPEND configsText " ${filename} (version ${version})\n") - endforeach() - if (${_NAME}_NOT_FOUND_MESSAGE) - string(APPEND configsText " Reason given by package: ${${_NAME}_NOT_FOUND_MESSAGE}\n") - endif() - _FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE} ${VERSION_MSG}, checked the following files:\n${configsText}") - - else() - # Simple case: No Config-file was found at all: - _FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: found neither ${_NAME}Config.cmake nor ${_NAME_LOWER}-config.cmake ${VERSION_MSG}") - endif() - endif() -endmacro() - - -function(FIND_PACKAGE_HANDLE_STANDARD_ARGS _NAME _FIRST_ARG) - -# Set up the arguments for `cmake_parse_arguments`. - set(options CONFIG_MODE HANDLE_COMPONENTS) - set(oneValueArgs FAIL_MESSAGE VERSION_VAR FOUND_VAR) - set(multiValueArgs REQUIRED_VARS) - -# Check whether we are in 'simple' or 'extended' mode: - set(_KEYWORDS_FOR_EXTENDED_MODE ${options} ${oneValueArgs} ${multiValueArgs} ) - list(FIND _KEYWORDS_FOR_EXTENDED_MODE "${_FIRST_ARG}" INDEX) - - if(${INDEX} EQUAL -1) - set(FPHSA_FAIL_MESSAGE ${_FIRST_ARG}) - set(FPHSA_REQUIRED_VARS ${ARGN}) - set(FPHSA_VERSION_VAR) - else() - cmake_parse_arguments(FPHSA "${options}" "${oneValueArgs}" "${multiValueArgs}" ${_FIRST_ARG} ${ARGN}) - - if(FPHSA_UNPARSED_ARGUMENTS) - message(FATAL_ERROR "Unknown keywords given to FIND_PACKAGE_HANDLE_STANDARD_ARGS(): \"${FPHSA_UNPARSED_ARGUMENTS}\"") - endif() - - if(NOT FPHSA_FAIL_MESSAGE) - set(FPHSA_FAIL_MESSAGE "DEFAULT_MSG") - endif() - - # In config-mode, we rely on the variable _CONFIG, which is set by find_package() - # when it successfully found the config-file, including version checking: - if(FPHSA_CONFIG_MODE) - list(INSERT FPHSA_REQUIRED_VARS 0 ${_NAME}_CONFIG) - list(REMOVE_DUPLICATES FPHSA_REQUIRED_VARS) - set(FPHSA_VERSION_VAR ${_NAME}_VERSION) - endif() - - if(NOT FPHSA_REQUIRED_VARS) - message(FATAL_ERROR "No REQUIRED_VARS specified for FIND_PACKAGE_HANDLE_STANDARD_ARGS()") - endif() - endif() - -# now that we collected all arguments, process them - - if("x${FPHSA_FAIL_MESSAGE}" STREQUAL "xDEFAULT_MSG") - set(FPHSA_FAIL_MESSAGE "Could NOT find ${_NAME}") - endif() - - list(GET FPHSA_REQUIRED_VARS 0 _FIRST_REQUIRED_VAR) - - string(TOUPPER ${_NAME} _NAME_UPPER) - string(TOLOWER ${_NAME} _NAME_LOWER) - - if(FPHSA_FOUND_VAR) - if(FPHSA_FOUND_VAR MATCHES "^${_NAME}_FOUND$" OR FPHSA_FOUND_VAR MATCHES "^${_NAME_UPPER}_FOUND$") - set(_FOUND_VAR ${FPHSA_FOUND_VAR}) - else() - message(FATAL_ERROR "The argument for FOUND_VAR is \"${FPHSA_FOUND_VAR}\", but only \"${_NAME}_FOUND\" and \"${_NAME_UPPER}_FOUND\" are valid names.") - endif() - else() - set(_FOUND_VAR ${_NAME_UPPER}_FOUND) - endif() - - # collect all variables which were not found, so they can be printed, so the - # user knows better what went wrong (#6375) - set(MISSING_VARS "") - set(DETAILS "") - # check if all passed variables are valid - set(FPHSA_FOUND_${_NAME} TRUE) - foreach(_CURRENT_VAR ${FPHSA_REQUIRED_VARS}) - if(NOT ${_CURRENT_VAR}) - set(FPHSA_FOUND_${_NAME} FALSE) - string(APPEND MISSING_VARS " ${_CURRENT_VAR}") - else() - string(APPEND DETAILS "[${${_CURRENT_VAR}}]") - endif() - endforeach() - if(FPHSA_FOUND_${_NAME}) - set(${_NAME}_FOUND TRUE) - set(${_NAME_UPPER}_FOUND TRUE) - else() - set(${_NAME}_FOUND FALSE) - set(${_NAME_UPPER}_FOUND FALSE) - endif() - - # component handling - unset(FOUND_COMPONENTS_MSG) - unset(MISSING_COMPONENTS_MSG) - - if(FPHSA_HANDLE_COMPONENTS) - foreach(comp ${${_NAME}_FIND_COMPONENTS}) - if(${_NAME}_${comp}_FOUND) - - if(NOT DEFINED FOUND_COMPONENTS_MSG) - set(FOUND_COMPONENTS_MSG "found components: ") - endif() - string(APPEND FOUND_COMPONENTS_MSG " ${comp}") - - else() - - if(NOT DEFINED MISSING_COMPONENTS_MSG) - set(MISSING_COMPONENTS_MSG "missing components: ") - endif() - string(APPEND MISSING_COMPONENTS_MSG " ${comp}") - - if(${_NAME}_FIND_REQUIRED_${comp}) - set(${_NAME}_FOUND FALSE) - string(APPEND MISSING_VARS " ${comp}") - endif() - - endif() - endforeach() - set(COMPONENT_MSG "${FOUND_COMPONENTS_MSG} ${MISSING_COMPONENTS_MSG}") - string(APPEND DETAILS "[c${COMPONENT_MSG}]") - endif() - - # version handling: - set(VERSION_MSG "") - set(VERSION_OK TRUE) - - # check with DEFINED here as the requested or found version may be "0" - if (DEFINED ${_NAME}_FIND_VERSION) - if(DEFINED ${FPHSA_VERSION_VAR}) - set(_FOUND_VERSION ${${FPHSA_VERSION_VAR}}) - - if(${_NAME}_FIND_VERSION_EXACT) # exact version required - # count the dots in the version string - string(REGEX REPLACE "[^.]" "" _VERSION_DOTS "${_FOUND_VERSION}") - # add one dot because there is one dot more than there are components - string(LENGTH "${_VERSION_DOTS}." _VERSION_DOTS) - if (_VERSION_DOTS GREATER ${_NAME}_FIND_VERSION_COUNT) - # Because of the C++ implementation of find_package() ${_NAME}_FIND_VERSION_COUNT - # is at most 4 here. Therefore a simple lookup table is used. - if (${_NAME}_FIND_VERSION_COUNT EQUAL 1) - set(_VERSION_REGEX "[^.]*") - elseif (${_NAME}_FIND_VERSION_COUNT EQUAL 2) - set(_VERSION_REGEX "[^.]*\\.[^.]*") - elseif (${_NAME}_FIND_VERSION_COUNT EQUAL 3) - set(_VERSION_REGEX "[^.]*\\.[^.]*\\.[^.]*") - else () - set(_VERSION_REGEX "[^.]*\\.[^.]*\\.[^.]*\\.[^.]*") - endif () - string(REGEX REPLACE "^(${_VERSION_REGEX})\\..*" "\\1" _VERSION_HEAD "${_FOUND_VERSION}") - unset(_VERSION_REGEX) - if (NOT ${_NAME}_FIND_VERSION VERSION_EQUAL _VERSION_HEAD) - set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is exact version \"${${_NAME}_FIND_VERSION}\"") - set(VERSION_OK FALSE) - else () - set(VERSION_MSG "(found suitable exact version \"${_FOUND_VERSION}\")") - endif () - unset(_VERSION_HEAD) - else () - if (NOT ${_NAME}_FIND_VERSION VERSION_EQUAL _FOUND_VERSION) - set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is exact version \"${${_NAME}_FIND_VERSION}\"") - set(VERSION_OK FALSE) - else () - set(VERSION_MSG "(found suitable exact version \"${_FOUND_VERSION}\")") - endif () - endif () - unset(_VERSION_DOTS) - - else() # minimum version specified: - if (${_NAME}_FIND_VERSION VERSION_GREATER _FOUND_VERSION) - set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is at least \"${${_NAME}_FIND_VERSION}\"") - set(VERSION_OK FALSE) - else () - set(VERSION_MSG "(found suitable version \"${_FOUND_VERSION}\", minimum required is \"${${_NAME}_FIND_VERSION}\")") - endif () - endif() - - else() - - # if the package was not found, but a version was given, add that to the output: - if(${_NAME}_FIND_VERSION_EXACT) - set(VERSION_MSG "(Required is exact version \"${${_NAME}_FIND_VERSION}\")") - else() - set(VERSION_MSG "(Required is at least version \"${${_NAME}_FIND_VERSION}\")") - endif() - - endif() - else () - # Check with DEFINED as the found version may be 0. - if(DEFINED ${FPHSA_VERSION_VAR}) - set(VERSION_MSG "(found version \"${${FPHSA_VERSION_VAR}}\")") - endif() - endif () - - if(VERSION_OK) - string(APPEND DETAILS "[v${${FPHSA_VERSION_VAR}}(${${_NAME}_FIND_VERSION})]") - else() - set(${_NAME}_FOUND FALSE) - endif() - - - # print the result: - if (${_NAME}_FOUND) - FIND_PACKAGE_MESSAGE(${_NAME} "Found ${_NAME}: ${${_FIRST_REQUIRED_VAR}} ${VERSION_MSG} ${COMPONENT_MSG}" "${DETAILS}") - else () - - if(FPHSA_CONFIG_MODE) - _FPHSA_HANDLE_FAILURE_CONFIG_MODE() - else() - if(NOT VERSION_OK) - _FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: ${VERSION_MSG} (found ${${_FIRST_REQUIRED_VAR}})") - else() - _FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE} (missing:${MISSING_VARS}) ${VERSION_MSG}") - endif() - endif() - - endif () - - set(${_NAME}_FOUND ${${_NAME}_FOUND} PARENT_SCOPE) - set(${_NAME_UPPER}_FOUND ${${_NAME}_FOUND} PARENT_SCOPE) -endfunction() diff --git a/cmake/FindPackageMessage.cmake b/cmake/FindPackageMessage.cmake deleted file mode 100644 index 1cdfde81605..00000000000 --- a/cmake/FindPackageMessage.cmake +++ /dev/null @@ -1,48 +0,0 @@ -# Distributed under the OSI-approved BSD 3-Clause License. See accompanying -# file Copyright.txt or https://cmake.org/licensing for details. - -#[=======================================================================[.rst: -FindPackageMessage ------------------- - - - -FIND_PACKAGE_MESSAGE( "message for user" "find result details") - -This macro is intended to be used in FindXXX.cmake modules files. It -will print a message once for each unique find result. This is useful -for telling the user where a package was found. The first argument -specifies the name (XXX) of the package. The second argument -specifies the message to display. The third argument lists details -about the find result so that if they change the message will be -displayed again. The macro also obeys the QUIET argument to the -find_package command. - -Example: - -:: - - if(X11_FOUND) - FIND_PACKAGE_MESSAGE(X11 "Found X11: ${X11_X11_LIB}" - "[${X11_X11_LIB}][${X11_INCLUDE_DIR}]") - else() - ... - endif() -#]=======================================================================] - -function(FIND_PACKAGE_MESSAGE pkg msg details) - # Avoid printing a message repeatedly for the same find result. - if(NOT ${pkg}_FIND_QUIETLY) - string(REPLACE "\n" "" details "${details}") - set(DETAILS_VAR FIND_PACKAGE_MESSAGE_DETAILS_${pkg}) - if(NOT "${details}" STREQUAL "${${DETAILS_VAR}}") - # The message has not yet been printed. - message(STATUS "${msg}") - - # Save the find details in the cache to avoid printing the same - # message again. - set("${DETAILS_VAR}" "${details}" - CACHE INTERNAL "Details about finding ${pkg}") - endif() - endif() -endfunction() diff --git a/cmake/FindPkgConfig.cmake b/cmake/FindPkgConfig.cmake deleted file mode 100644 index a45aef24962..00000000000 --- a/cmake/FindPkgConfig.cmake +++ /dev/null @@ -1,737 +0,0 @@ -# Distributed under the OSI-approved BSD 3-Clause License. See accompanying -# file Copyright.txt or https://cmake.org/licensing for details. - -#[========================================[.rst: -FindPkgConfig -------------- - -A ``pkg-config`` module for CMake. - -Finds the ``pkg-config`` executable and adds the :command:`pkg_get_variable`, -:command:`pkg_check_modules` and :command:`pkg_search_module` commands. The -following variables will also be set: - -``PKG_CONFIG_FOUND`` - if pkg-config executable was found -``PKG_CONFIG_EXECUTABLE`` - pathname of the pkg-config program -``PKG_CONFIG_VERSION_STRING`` - version of pkg-config (since CMake 2.8.8) - -#]========================================] - -### Common stuff #### -set(PKG_CONFIG_VERSION 1) - -# find pkg-config, use PKG_CONFIG if set -if((NOT PKG_CONFIG_EXECUTABLE) AND (NOT "$ENV{PKG_CONFIG}" STREQUAL "")) - set(PKG_CONFIG_EXECUTABLE "$ENV{PKG_CONFIG}" CACHE FILEPATH "pkg-config executable") -endif() -find_program(PKG_CONFIG_EXECUTABLE NAMES pkg-config DOC "pkg-config executable") -mark_as_advanced(PKG_CONFIG_EXECUTABLE) - -if (PKG_CONFIG_EXECUTABLE) - execute_process(COMMAND ${PKG_CONFIG_EXECUTABLE} --version - OUTPUT_VARIABLE PKG_CONFIG_VERSION_STRING - ERROR_QUIET - OUTPUT_STRIP_TRAILING_WHITESPACE) -endif () - -include(${CMAKE_CURRENT_LIST_DIR}/FindPackageHandleStandardArgs.cmake) -find_package_handle_standard_args(PkgConfig - REQUIRED_VARS PKG_CONFIG_EXECUTABLE - VERSION_VAR PKG_CONFIG_VERSION_STRING) - -# This is needed because the module name is "PkgConfig" but the name of -# this variable has always been PKG_CONFIG_FOUND so this isn't automatically -# handled by FPHSA. -set(PKG_CONFIG_FOUND "${PKGCONFIG_FOUND}") - -# Unsets the given variables -macro(_pkgconfig_unset var) - set(${var} "" CACHE INTERNAL "") -endmacro() - -macro(_pkgconfig_set var value) - set(${var} ${value} CACHE INTERNAL "") -endmacro() - -# Invokes pkgconfig, cleans up the result and sets variables -macro(_pkgconfig_invoke _pkglist _prefix _varname _regexp) - set(_pkgconfig_invoke_result) - - execute_process( - COMMAND ${PKG_CONFIG_EXECUTABLE} ${ARGN} ${_pkglist} - OUTPUT_VARIABLE _pkgconfig_invoke_result - RESULT_VARIABLE _pkgconfig_failed - OUTPUT_STRIP_TRAILING_WHITESPACE) - - if (_pkgconfig_failed) - set(_pkgconfig_${_varname} "") - _pkgconfig_unset(${_prefix}_${_varname}) - else() - string(REGEX REPLACE "[\r\n]" " " _pkgconfig_invoke_result "${_pkgconfig_invoke_result}") - - if (NOT ${_regexp} STREQUAL "") - string(REGEX REPLACE "${_regexp}" " " _pkgconfig_invoke_result "${_pkgconfig_invoke_result}") - endif() - - separate_arguments(_pkgconfig_invoke_result) - - #message(STATUS " ${_varname} ... ${_pkgconfig_invoke_result}") - set(_pkgconfig_${_varname} ${_pkgconfig_invoke_result}) - _pkgconfig_set(${_prefix}_${_varname} "${_pkgconfig_invoke_result}") - endif() -endmacro() - -#[========================================[.rst: -.. command:: pkg_get_variable - - Retrieves the value of a pkg-config variable ``varName`` and stores it in the - result variable ``resultVar`` in the calling scope. - - .. code-block:: cmake - - pkg_get_variable( ) - - If ``pkg-config`` returns multiple values for the specified variable, - ``resultVar`` will contain a :ref:`;-list `. - - For example: - - .. code-block:: cmake - - pkg_get_variable(GI_GIRDIR gobject-introspection-1.0 girdir) -#]========================================] -function (pkg_get_variable result pkg variable) - _pkgconfig_invoke("${pkg}" "prefix" "result" "" "--variable=${variable}") - set("${result}" - "${prefix_result}" - PARENT_SCOPE) -endfunction () - -# Invokes pkgconfig two times; once without '--static' and once with -# '--static' -macro(_pkgconfig_invoke_dyn _pkglist _prefix _varname cleanup_regexp) - _pkgconfig_invoke("${_pkglist}" ${_prefix} ${_varname} "${cleanup_regexp}" ${ARGN}) - _pkgconfig_invoke("${_pkglist}" ${_prefix} STATIC_${_varname} "${cleanup_regexp}" --static ${ARGN}) -endmacro() - -# Splits given arguments into options and a package list -macro(_pkgconfig_parse_options _result _is_req _is_silent _no_cmake_path _no_cmake_environment_path _imp_target _imp_target_global) - set(${_is_req} 0) - set(${_is_silent} 0) - set(${_no_cmake_path} 0) - set(${_no_cmake_environment_path} 0) - set(${_imp_target} 0) - set(${_imp_target_global} 0) - if(DEFINED PKG_CONFIG_USE_CMAKE_PREFIX_PATH) - if(NOT PKG_CONFIG_USE_CMAKE_PREFIX_PATH) - set(${_no_cmake_path} 1) - set(${_no_cmake_environment_path} 1) - endif() - elseif(CMAKE_MINIMUM_REQUIRED_VERSION VERSION_LESS 3.1) - set(${_no_cmake_path} 1) - set(${_no_cmake_environment_path} 1) - endif() - - foreach(_pkg ${ARGN}) - if (_pkg STREQUAL "REQUIRED") - set(${_is_req} 1) - endif () - if (_pkg STREQUAL "QUIET") - set(${_is_silent} 1) - endif () - if (_pkg STREQUAL "NO_CMAKE_PATH") - set(${_no_cmake_path} 1) - endif() - if (_pkg STREQUAL "NO_CMAKE_ENVIRONMENT_PATH") - set(${_no_cmake_environment_path} 1) - endif() - if (_pkg STREQUAL "IMPORTED_TARGET") - set(${_imp_target} 1) - endif() - if (_pkg STREQUAL "GLOBAL") - set(${_imp_target_global} 1) - endif() - endforeach() - - if (${_imp_target_global} AND NOT ${_imp_target}) - message(SEND_ERROR "the argument GLOBAL may only be used together with IMPORTED_TARGET") - endif() - - set(${_result} ${ARGN}) - list(REMOVE_ITEM ${_result} "REQUIRED") - list(REMOVE_ITEM ${_result} "QUIET") - list(REMOVE_ITEM ${_result} "NO_CMAKE_PATH") - list(REMOVE_ITEM ${_result} "NO_CMAKE_ENVIRONMENT_PATH") - list(REMOVE_ITEM ${_result} "IMPORTED_TARGET") - list(REMOVE_ITEM ${_result} "GLOBAL") -endmacro() - -# Add the content of a variable or an environment variable to a list of -# paths -# Usage: -# - _pkgconfig_add_extra_path(_extra_paths VAR) -# - _pkgconfig_add_extra_path(_extra_paths ENV VAR) -function(_pkgconfig_add_extra_path _extra_paths_var _var) - set(_is_env 0) - if(ARGC GREATER 2 AND _var STREQUAL "ENV") - set(_var ${ARGV2}) - set(_is_env 1) - endif() - if(NOT _is_env) - if(NOT "${${_var}}" STREQUAL "") - list(APPEND ${_extra_paths_var} ${${_var}}) - endif() - else() - if(NOT "$ENV{${_var}}" STREQUAL "") - file(TO_CMAKE_PATH "$ENV{${_var}}" _path) - list(APPEND ${_extra_paths_var} ${_path}) - unset(_path) - endif() - endif() - set(${_extra_paths_var} ${${_extra_paths_var}} PARENT_SCOPE) -endfunction() - -# scan the LDFLAGS returned by pkg-config for library directories and -# libraries, figure out the absolute paths of that libraries in the -# given directories -function(_pkg_find_libs _prefix _no_cmake_path _no_cmake_environment_path) - unset(_libs) - unset(_find_opts) - - # set the options that are used as long as the .pc file does not provide a library - # path to look into - if(_no_cmake_path) - list(APPEND _find_opts "NO_CMAKE_PATH") - endif() - if(_no_cmake_environment_path) - list(APPEND _find_opts "NO_CMAKE_ENVIRONMENT_PATH") - endif() - - unset(_search_paths) - foreach (flag IN LISTS ${_prefix}_LDFLAGS) - if (flag MATCHES "^-L(.*)") - list(APPEND _search_paths ${CMAKE_MATCH_1}) - continue() - endif() - if (flag MATCHES "^-l(.*)") - set(_pkg_search "${CMAKE_MATCH_1}") - else() - continue() - endif() - - if(_search_paths) - # Firstly search in -L paths - find_library(pkgcfg_lib_${_prefix}_${_pkg_search} - NAMES ${_pkg_search} - HINTS ${_search_paths} NO_DEFAULT_PATH) - endif() - find_library(pkgcfg_lib_${_prefix}_${_pkg_search} - NAMES ${_pkg_search} - ${_find_opts}) - list(APPEND _libs "${pkgcfg_lib_${_prefix}_${_pkg_search}}") - endforeach() - - set(${_prefix}_LINK_LIBRARIES "${_libs}" PARENT_SCOPE) -endfunction() - -# create an imported target from all the information returned by pkg-config -function(_pkg_create_imp_target _prefix _imp_target_global) - # only create the target if it is linkable, i.e. no executables - if (NOT TARGET PkgConfig::${_prefix} - AND ( ${_prefix}_INCLUDE_DIRS OR ${_prefix}_LINK_LIBRARIES OR ${_prefix}_CFLAGS_OTHER )) - if(${_imp_target_global}) - set(_global_opt "GLOBAL") - else() - unset(_global_opt) - endif() - add_library(PkgConfig::${_prefix} INTERFACE IMPORTED ${_global_opt}) - - if(${_prefix}_INCLUDE_DIRS) - set_property(TARGET PkgConfig::${_prefix} PROPERTY - INTERFACE_INCLUDE_DIRECTORIES "${${_prefix}_INCLUDE_DIRS}") - endif() - if(${_prefix}_LINK_LIBRARIES) - set_property(TARGET PkgConfig::${_prefix} PROPERTY - INTERFACE_LINK_LIBRARIES "${${_prefix}_LINK_LIBRARIES}") - endif() - if(${_prefix}_CFLAGS_OTHER) - set_property(TARGET PkgConfig::${_prefix} PROPERTY - INTERFACE_COMPILE_OPTIONS "${${_prefix}_CFLAGS_OTHER}") - endif() - endif() -endfunction() - -# recalculate the dynamic output -# this is a macro and not a function so the result of _pkg_find_libs is automatically propagated -macro(_pkg_recalculate _prefix _no_cmake_path _no_cmake_environment_path _imp_target _imp_target_global) - _pkg_find_libs(${_prefix} ${_no_cmake_path} ${_no_cmake_environment_path}) - if(${_imp_target}) - _pkg_create_imp_target(${_prefix} ${_imp_target_global}) - endif() -endmacro() - -### -macro(_pkg_check_modules_internal _is_required _is_silent _no_cmake_path _no_cmake_environment_path _imp_target _imp_target_global _prefix) - _pkgconfig_unset(${_prefix}_FOUND) - _pkgconfig_unset(${_prefix}_VERSION) - _pkgconfig_unset(${_prefix}_PREFIX) - _pkgconfig_unset(${_prefix}_INCLUDEDIR) - _pkgconfig_unset(${_prefix}_LIBDIR) - _pkgconfig_unset(${_prefix}_LIBS) - _pkgconfig_unset(${_prefix}_LIBS_L) - _pkgconfig_unset(${_prefix}_LIBS_PATHS) - _pkgconfig_unset(${_prefix}_LIBS_OTHER) - _pkgconfig_unset(${_prefix}_CFLAGS) - _pkgconfig_unset(${_prefix}_CFLAGS_I) - _pkgconfig_unset(${_prefix}_CFLAGS_OTHER) - _pkgconfig_unset(${_prefix}_STATIC_LIBDIR) - _pkgconfig_unset(${_prefix}_STATIC_LIBS) - _pkgconfig_unset(${_prefix}_STATIC_LIBS_L) - _pkgconfig_unset(${_prefix}_STATIC_LIBS_PATHS) - _pkgconfig_unset(${_prefix}_STATIC_LIBS_OTHER) - _pkgconfig_unset(${_prefix}_STATIC_CFLAGS) - _pkgconfig_unset(${_prefix}_STATIC_CFLAGS_I) - _pkgconfig_unset(${_prefix}_STATIC_CFLAGS_OTHER) - - # create a better addressable variable of the modules and calculate its size - set(_pkg_check_modules_list ${ARGN}) - list(LENGTH _pkg_check_modules_list _pkg_check_modules_cnt) - - if(PKG_CONFIG_EXECUTABLE) - # give out status message telling checked module - if (NOT ${_is_silent}) - if (_pkg_check_modules_cnt EQUAL 1) - message(STATUS "Checking for module '${_pkg_check_modules_list}'") - else() - message(STATUS "Checking for modules '${_pkg_check_modules_list}'") - endif() - endif() - - set(_pkg_check_modules_packages) - set(_pkg_check_modules_failed) - - set(_extra_paths) - - if(NOT _no_cmake_path) - _pkgconfig_add_extra_path(_extra_paths CMAKE_PREFIX_PATH) - _pkgconfig_add_extra_path(_extra_paths CMAKE_FRAMEWORK_PATH) - _pkgconfig_add_extra_path(_extra_paths CMAKE_APPBUNDLE_PATH) - endif() - - if(NOT _no_cmake_environment_path) - _pkgconfig_add_extra_path(_extra_paths ENV CMAKE_PREFIX_PATH) - _pkgconfig_add_extra_path(_extra_paths ENV CMAKE_FRAMEWORK_PATH) - _pkgconfig_add_extra_path(_extra_paths ENV CMAKE_APPBUNDLE_PATH) - endif() - - if(NOT "${_extra_paths}" STREQUAL "") - # Save the PKG_CONFIG_PATH environment variable, and add paths - # from the CMAKE_PREFIX_PATH variables - set(_pkgconfig_path_old "$ENV{PKG_CONFIG_PATH}") - set(_pkgconfig_path "${_pkgconfig_path_old}") - if(NOT "${_pkgconfig_path}" STREQUAL "") - file(TO_CMAKE_PATH "${_pkgconfig_path}" _pkgconfig_path) - endif() - - # Create a list of the possible pkgconfig subfolder (depending on - # the system - set(_lib_dirs) - if(NOT DEFINED CMAKE_SYSTEM_NAME - OR (CMAKE_SYSTEM_NAME MATCHES "^(Linux|kFreeBSD|GNU)$" - AND NOT CMAKE_CROSSCOMPILING)) - if(EXISTS "/etc/debian_version") # is this a debian system ? - if(CMAKE_LIBRARY_ARCHITECTURE) - list(APPEND _lib_dirs "lib/${CMAKE_LIBRARY_ARCHITECTURE}/pkgconfig") - endif() - else() - # not debian, check the FIND_LIBRARY_USE_LIB32_PATHS and FIND_LIBRARY_USE_LIB64_PATHS properties - get_property(uselib32 GLOBAL PROPERTY FIND_LIBRARY_USE_LIB32_PATHS) - if(uselib32 AND CMAKE_SIZEOF_VOID_P EQUAL 4) - list(APPEND _lib_dirs "lib32/pkgconfig") - endif() - get_property(uselib64 GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS) - if(uselib64 AND CMAKE_SIZEOF_VOID_P EQUAL 8) - list(APPEND _lib_dirs "lib64/pkgconfig") - endif() - get_property(uselibx32 GLOBAL PROPERTY FIND_LIBRARY_USE_LIBX32_PATHS) - if(uselibx32 AND CMAKE_INTERNAL_PLATFORM_ABI STREQUAL "ELF X32") - list(APPEND _lib_dirs "libx32/pkgconfig") - endif() - endif() - endif() - if(CMAKE_SYSTEM_NAME STREQUAL "FreeBSD" AND NOT CMAKE_CROSSCOMPILING) - list(APPEND _lib_dirs "libdata/pkgconfig") - endif() - list(APPEND _lib_dirs "lib/pkgconfig") - list(APPEND _lib_dirs "share/pkgconfig") - - # Check if directories exist and eventually append them to the - # pkgconfig path list - foreach(_prefix_dir ${_extra_paths}) - foreach(_lib_dir ${_lib_dirs}) - if(EXISTS "${_prefix_dir}/${_lib_dir}") - list(APPEND _pkgconfig_path "${_prefix_dir}/${_lib_dir}") - list(REMOVE_DUPLICATES _pkgconfig_path) - endif() - endforeach() - endforeach() - - # Prepare and set the environment variable - if(NOT "${_pkgconfig_path}" STREQUAL "") - # remove empty values from the list - list(REMOVE_ITEM _pkgconfig_path "") - file(TO_NATIVE_PATH "${_pkgconfig_path}" _pkgconfig_path) - if(UNIX) - string(REPLACE ";" ":" _pkgconfig_path "${_pkgconfig_path}") - string(REPLACE "\\ " " " _pkgconfig_path "${_pkgconfig_path}") - endif() - set(ENV{PKG_CONFIG_PATH} "${_pkgconfig_path}") - endif() - - # Unset variables - unset(_lib_dirs) - unset(_pkgconfig_path) - endif() - - # iterate through module list and check whether they exist and match the required version - foreach (_pkg_check_modules_pkg ${_pkg_check_modules_list}) - set(_pkg_check_modules_exist_query) - - # check whether version is given - if (_pkg_check_modules_pkg MATCHES "(.*[^><])(=|[><]=?)(.*)") - set(_pkg_check_modules_pkg_name "${CMAKE_MATCH_1}") - set(_pkg_check_modules_pkg_op "${CMAKE_MATCH_2}") - set(_pkg_check_modules_pkg_ver "${CMAKE_MATCH_3}") - else() - set(_pkg_check_modules_pkg_name "${_pkg_check_modules_pkg}") - set(_pkg_check_modules_pkg_op) - set(_pkg_check_modules_pkg_ver) - endif() - - _pkgconfig_unset(${_prefix}_${_pkg_check_modules_pkg_name}_VERSION) - _pkgconfig_unset(${_prefix}_${_pkg_check_modules_pkg_name}_PREFIX) - _pkgconfig_unset(${_prefix}_${_pkg_check_modules_pkg_name}_INCLUDEDIR) - _pkgconfig_unset(${_prefix}_${_pkg_check_modules_pkg_name}_LIBDIR) - - list(APPEND _pkg_check_modules_packages "${_pkg_check_modules_pkg_name}") - - # create the final query which is of the format: - # * > - # * >= - # * = - # * <= - # * < - # * --exists - list(APPEND _pkg_check_modules_exist_query --print-errors --short-errors) - if (_pkg_check_modules_pkg_op) - list(APPEND _pkg_check_modules_exist_query "${_pkg_check_modules_pkg_name} ${_pkg_check_modules_pkg_op} ${_pkg_check_modules_pkg_ver}") - else() - list(APPEND _pkg_check_modules_exist_query --exists) - list(APPEND _pkg_check_modules_exist_query "${_pkg_check_modules_pkg_name}") - endif() - - # execute the query - execute_process( - COMMAND ${PKG_CONFIG_EXECUTABLE} ${_pkg_check_modules_exist_query} - RESULT_VARIABLE _pkgconfig_retval - ERROR_VARIABLE _pkgconfig_error - ERROR_STRIP_TRAILING_WHITESPACE) - - # evaluate result and tell failures - if (_pkgconfig_retval) - if(NOT ${_is_silent}) - message(STATUS " ${_pkgconfig_error}") - endif() - - set(_pkg_check_modules_failed 1) - endif() - endforeach() - - if(_pkg_check_modules_failed) - # fail when requested - if (${_is_required}) - message(FATAL_ERROR "A required package was not found") - endif () - else() - # when we are here, we checked whether requested modules - # exist. Now, go through them and set variables - - _pkgconfig_set(${_prefix}_FOUND 1) - list(LENGTH _pkg_check_modules_packages pkg_count) - - # iterate through all modules again and set individual variables - foreach (_pkg_check_modules_pkg ${_pkg_check_modules_packages}) - # handle case when there is only one package required - if (pkg_count EQUAL 1) - set(_pkg_check_prefix "${_prefix}") - else() - set(_pkg_check_prefix "${_prefix}_${_pkg_check_modules_pkg}") - endif() - - _pkgconfig_invoke(${_pkg_check_modules_pkg} "${_pkg_check_prefix}" VERSION "" --modversion ) - pkg_get_variable("${_pkg_check_prefix}_PREFIX" ${_pkg_check_modules_pkg} "prefix") - pkg_get_variable("${_pkg_check_prefix}_INCLUDEDIR" ${_pkg_check_modules_pkg} "includedir") - pkg_get_variable("${_pkg_check_prefix}_LIBDIR" ${_pkg_check_modules_pkg} "libdir") - foreach (variable IN ITEMS PREFIX INCLUDEDIR LIBDIR) - _pkgconfig_set("${_pkg_check_prefix}_${variable}" "${${_pkg_check_prefix}_${variable}}") - endforeach () - - if (NOT ${_is_silent}) - message(STATUS " Found ${_pkg_check_modules_pkg}, version ${_pkgconfig_VERSION}") - endif () - endforeach() - - # set variables which are combined for multiple modules - _pkgconfig_invoke_dyn("${_pkg_check_modules_packages}" "${_prefix}" LIBRARIES "(^| )-l" --libs-only-l ) - _pkgconfig_invoke_dyn("${_pkg_check_modules_packages}" "${_prefix}" LIBRARY_DIRS "(^| )-L" --libs-only-L ) - _pkgconfig_invoke_dyn("${_pkg_check_modules_packages}" "${_prefix}" LDFLAGS "" --libs ) - _pkgconfig_invoke_dyn("${_pkg_check_modules_packages}" "${_prefix}" LDFLAGS_OTHER "" --libs-only-other ) - - _pkgconfig_invoke_dyn("${_pkg_check_modules_packages}" "${_prefix}" INCLUDE_DIRS "(^| )-I" --cflags-only-I ) - _pkgconfig_invoke_dyn("${_pkg_check_modules_packages}" "${_prefix}" CFLAGS "" --cflags ) - _pkgconfig_invoke_dyn("${_pkg_check_modules_packages}" "${_prefix}" CFLAGS_OTHER "" --cflags-only-other ) - - _pkg_recalculate("${_prefix}" ${_no_cmake_path} ${_no_cmake_environment_path} ${_imp_target} ${_imp_target_global}) - endif() - - if(NOT "${_extra_paths}" STREQUAL "") - # Restore the environment variable - set(ENV{PKG_CONFIG_PATH} "${_pkgconfig_path_old}") - endif() - - unset(_extra_paths) - unset(_pkgconfig_path_old) - else() - if (${_is_required}) - message(SEND_ERROR "pkg-config tool not found") - endif () - endif() -endmacro() - - -#[========================================[.rst: -.. command:: pkg_check_modules - - Checks for all the given modules, setting a variety of result variables in - the calling scope. - - .. code-block:: cmake - - pkg_check_modules( - [REQUIRED] [QUIET] - [NO_CMAKE_PATH] - [NO_CMAKE_ENVIRONMENT_PATH] - [IMPORTED_TARGET [GLOBAL]] - [...]) - - When the ``REQUIRED`` argument is given, the command will fail with an error - if module(s) could not be found. - - When the ``QUIET`` argument is given, no status messages will be printed. - - By default, if :variable:`CMAKE_MINIMUM_REQUIRED_VERSION` is 3.1 or - later, or if :variable:`PKG_CONFIG_USE_CMAKE_PREFIX_PATH` is set to a - boolean ``True`` value, then the :variable:`CMAKE_PREFIX_PATH`, - :variable:`CMAKE_FRAMEWORK_PATH`, and :variable:`CMAKE_APPBUNDLE_PATH` cache - and environment variables will be added to the ``pkg-config`` search path. - The ``NO_CMAKE_PATH`` and ``NO_CMAKE_ENVIRONMENT_PATH`` arguments - disable this behavior for the cache variables and environment variables - respectively. - - The ``IMPORTED_TARGET`` argument will create an imported target named - ``PkgConfig::`` that can be passed directly as an argument to - :command:`target_link_libraries`. The ``GLOBAL`` argument will make the - imported target available in global scope. - - Each ```` can be either a bare module name or it can be a - module name with a version constraint (operators ``=``, ``<``, ``>``, - ``<=`` and ``>=`` are supported). The following are examples for a module - named ``foo`` with various constraints: - - - ``foo`` matches any version. - - ``foo<2`` only matches versions before 2. - - ``foo>=3.1`` matches any version from 3.1 or later. - - ``foo=1.2.3`` requires that foo must be exactly version 1.2.3. - - The following variables may be set upon return. Two sets of values exist: - One for the common case (`` = ``) and another for the - information ``pkg-config`` provides when called with the ``--static`` - option (`` = _STATIC``). - - ``_FOUND`` - set to 1 if module(s) exist - ``_LIBRARIES`` - only the libraries (without the '-l') - ``_LINK_LIBRARIES`` - the libraries and their absolute paths - ``_LIBRARY_DIRS`` - the paths of the libraries (without the '-L') - ``_LDFLAGS`` - all required linker flags - ``_LDFLAGS_OTHER`` - all other linker flags - ``_INCLUDE_DIRS`` - the '-I' preprocessor flags (without the '-I') - ``_CFLAGS`` - all required cflags - ``_CFLAGS_OTHER`` - the other compiler flags - - All but ``_FOUND`` may be a :ref:`;-list ` if the - associated variable returned from ``pkg-config`` has multiple values. - - There are some special variables whose prefix depends on the number of - ```` given. When there is only one ````, - ```` will simply be ````, but if two or more ```` - items are given, ```` will be ``_``. - - ``_VERSION`` - version of the module - ``_PREFIX`` - prefix directory of the module - ``_INCLUDEDIR`` - include directory of the module - ``_LIBDIR`` - lib directory of the module - - Examples: - - .. code-block:: cmake - - pkg_check_modules (GLIB2 glib-2.0) - - Looks for any version of glib2. If found, the output variable - ``GLIB2_VERSION`` will hold the actual version found. - - .. code-block:: cmake - - pkg_check_modules (GLIB2 glib-2.0>=2.10) - - Looks for at least version 2.10 of glib2. If found, the output variable - ``GLIB2_VERSION`` will hold the actual version found. - - .. code-block:: cmake - - pkg_check_modules (FOO glib-2.0>=2.10 gtk+-2.0) - - Looks for both glib2-2.0 (at least version 2.10) and any version of - gtk2+-2.0. Only if both are found will ``FOO`` be considered found. - The ``FOO_glib-2.0_VERSION`` and ``FOO_gtk+-2.0_VERSION`` variables will be - set to their respective found module versions. - - .. code-block:: cmake - - pkg_check_modules (XRENDER REQUIRED xrender) - - Requires any version of ``xrender``. Example output variables set by a - successful call:: - - XRENDER_LIBRARIES=Xrender;X11 - XRENDER_STATIC_LIBRARIES=Xrender;X11;pthread;Xau;Xdmcp -#]========================================] -macro(pkg_check_modules _prefix _module0) - _pkgconfig_parse_options(_pkg_modules _pkg_is_required _pkg_is_silent _no_cmake_path _no_cmake_environment_path _imp_target _imp_target_global "${_module0}" ${ARGN}) - # check cached value - if (NOT DEFINED __pkg_config_checked_${_prefix} OR __pkg_config_checked_${_prefix} LESS ${PKG_CONFIG_VERSION} OR NOT ${_prefix}_FOUND OR - (NOT "${ARGN}" STREQUAL "" AND NOT "${__pkg_config_arguments_${_prefix}}" STREQUAL "${_module0};${ARGN}") OR - ( "${ARGN}" STREQUAL "" AND NOT "${__pkg_config_arguments_${_prefix}}" STREQUAL "${_module0}")) - _pkg_check_modules_internal("${_pkg_is_required}" "${_pkg_is_silent}" ${_no_cmake_path} ${_no_cmake_environment_path} ${_imp_target} ${_imp_target_global} "${_prefix}" ${_pkg_modules}) - - _pkgconfig_set(__pkg_config_checked_${_prefix} ${PKG_CONFIG_VERSION}) - if (${_prefix}_FOUND) - _pkgconfig_set(__pkg_config_arguments_${_prefix} "${_module0};${ARGN}") - endif() - else() - if (${_prefix}_FOUND) - _pkg_recalculate("${_prefix}" ${_no_cmake_path} ${_no_cmake_environment_path} ${_imp_target} ${_imp_target_global}) - endif() - endif() -endmacro() - - -#[========================================[.rst: -.. command:: pkg_search_module - - The behavior of this command is the same as :command:`pkg_check_modules`, - except that rather than checking for all the specified modules, it searches - for just the first successful match. - - .. code-block:: cmake - - pkg_search_module( - [REQUIRED] [QUIET] - [NO_CMAKE_PATH] - [NO_CMAKE_ENVIRONMENT_PATH] - [IMPORTED_TARGET [GLOBAL]] - [...]) - - Example: - - .. code-block:: cmake - - pkg_search_module (BAR libxml-2.0 libxml2 libxml>=2) -#]========================================] -macro(pkg_search_module _prefix _module0) - _pkgconfig_parse_options(_pkg_modules_alt _pkg_is_required _pkg_is_silent _no_cmake_path _no_cmake_environment_path _imp_target _imp_target_global "${_module0}" ${ARGN}) - # check cached value - if (NOT DEFINED __pkg_config_checked_${_prefix} OR __pkg_config_checked_${_prefix} LESS ${PKG_CONFIG_VERSION} OR NOT ${_prefix}_FOUND) - set(_pkg_modules_found 0) - - if (NOT ${_pkg_is_silent}) - message(STATUS "Checking for one of the modules '${_pkg_modules_alt}'") - endif () - - # iterate through all modules and stop at the first working one. - foreach(_pkg_alt ${_pkg_modules_alt}) - if(NOT _pkg_modules_found) - _pkg_check_modules_internal(0 1 ${_no_cmake_path} ${_no_cmake_environment_path} ${_imp_target} ${_imp_target_global} "${_prefix}" "${_pkg_alt}") - endif() - - if (${_prefix}_FOUND) - set(_pkg_modules_found 1) - endif() - endforeach() - - if (NOT ${_prefix}_FOUND) - if(${_pkg_is_required}) - message(SEND_ERROR "None of the required '${_pkg_modules_alt}' found") - endif() - endif() - - _pkgconfig_set(__pkg_config_checked_${_prefix} ${PKG_CONFIG_VERSION}) - elseif (${_prefix}_FOUND) - _pkg_recalculate("${_prefix}" ${_no_cmake_path} ${_no_cmake_environment_path} ${_imp_target} ${_imp_target_global}) - endif() -endmacro() - - -#[========================================[.rst: -Variables Affecting Behavior -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. variable:: PKG_CONFIG_EXECUTABLE - - This can be set to the path of the pkg-config executable. If not provided, - it will be set by the module as a result of calling :command:`find_program` - internally. The ``PKG_CONFIG`` environment variable can be used as a hint. - -.. variable:: PKG_CONFIG_USE_CMAKE_PREFIX_PATH - - Specifies whether :command:`pkg_check_modules` and - :command:`pkg_search_module` should add the paths in the - :variable:`CMAKE_PREFIX_PATH`, :variable:`CMAKE_FRAMEWORK_PATH` and - :variable:`CMAKE_APPBUNDLE_PATH` cache and environment variables to the - ``pkg-config`` search path. - - If this variable is not set, this behavior is enabled by default if - :variable:`CMAKE_MINIMUM_REQUIRED_VERSION` is 3.1 or later, disabled - otherwise. -#]========================================] - - -### Local Variables: -### mode: cmake -### End: From 2b26e4e5ee6f48a7fef07d6cf66c17ee50fca91f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tiziano=20M=C3=BCller?= Date: Sat, 11 Apr 2020 09:15:58 +0200 Subject: [PATCH 13/19] cmake: import FindBLAS, FindLAPACK, CheckFortranSourceRuns FindBLAS and FindLAPACK originate from CMake upstream at 14ffa6e, CheckFortranSourceRuns from 1bc5214. The latter is not present in CMake 3.12 (our officially supported one). --- cmake/CheckFortranSourceRuns.cmake | 173 ++++++ cmake/FindBLAS.cmake | 936 +++++++++++++++++++++++++++++ cmake/FindLAPACK.cmake | 530 ++++++++++++++++ 3 files changed, 1639 insertions(+) create mode 100644 cmake/CheckFortranSourceRuns.cmake create mode 100644 cmake/FindBLAS.cmake create mode 100644 cmake/FindLAPACK.cmake diff --git a/cmake/CheckFortranSourceRuns.cmake b/cmake/CheckFortranSourceRuns.cmake new file mode 100644 index 00000000000..f858b84a816 --- /dev/null +++ b/cmake/CheckFortranSourceRuns.cmake @@ -0,0 +1,173 @@ +# Distributed under the OSI-approved BSD 3-Clause License. See accompanying +# file Copyright.txt or https://cmake.org/licensing for details. + +#[=======================================================================[.rst: +CheckFortranSourceRuns +---------------------- + +Check if given Fortran source compiles and links into an executable and can +subsequently be run. + +.. command:: check_fortran_source_runs + + .. code-block:: cmake + + check_fortran_source_runs( + [SRC_EXT ]) + + Check that the source supplied in ```` can be compiled as a Fortran source + file, linked as an executable and then run. The ```` must be a Fortran program + containing at least an ``end`` statement--for example: + + .. code-block:: cmake + + check_fortran_source_runs("real :: x[*]; call co_sum(x); end" F2018coarrayOK) + + This command can help avoid costly build processes when a compiler lacks support + for a necessary feature, or a particular vendor library is not compatible with + the Fortran compiler version being used. Some of these failures only occur at runtime + instead of linktime, and a trivial runtime example can catch the issue before the + main build process. + + If the ```` could be built and run + successfully, the internal cache variable specified by ```` will + be set to 1, otherwise it will be set to an value that evaluates to boolean + false (e.g. an empty string or an error message). + + By default, the test source file will be given a ``.F90`` file extension. The + ``SRC_EXT`` option can be used to override this with ``.`` instead. + + The underlying check is performed by the :command:`try_run` command. The + compile and link commands can be influenced by setting any of the following + variables prior to calling ``check_fortran_source_runs()``: + + ``CMAKE_REQUIRED_FLAGS`` + Additional flags to pass to the compiler. Note that the contents of + :variable:`CMAKE_Fortran_FLAGS _FLAGS>` and its associated + configuration-specific variable are automatically added to the compiler + command before the contents of ``CMAKE_REQUIRED_FLAGS``. + + ``CMAKE_REQUIRED_DEFINITIONS`` + A :ref:`;-list ` of compiler definitions of the form + ``-DFOO`` or ``-DFOO=bar``. A definition for the name specified by + ```` will also be added automatically. + + ``CMAKE_REQUIRED_INCLUDES`` + A :ref:`;-list ` of header search paths to pass to + the compiler. These will be the only header search paths used by + ``try_run()``, i.e. the contents of the :prop_dir:`INCLUDE_DIRECTORIES` + directory property will be ignored. + + ``CMAKE_REQUIRED_LINK_OPTIONS`` + A :ref:`;-list ` of options to add to the link + command (see :command:`try_run` for further details). + + ``CMAKE_REQUIRED_LIBRARIES`` + A :ref:`;-list ` of libraries to add to the link + command. These can be the name of system libraries or they can be + :ref:`Imported Targets ` (see :command:`try_run` for + further details). + + ``CMAKE_REQUIRED_QUIET`` + If this variable evaluates to a boolean true value, all status messages + associated with the check will be suppressed. + + The check is only performed once, with the result cached in the variable + named by ````. Every subsequent CMake run will re-use this cached + value rather than performing the check again, even if the ```` changes. + In order to force the check to be re-evaluated, the variable named by + ```` must be manually removed from the cache. + +#]=======================================================================] + +include_guard(GLOBAL) + +macro(CHECK_Fortran_SOURCE_RUNS SOURCE VAR) + if(NOT DEFINED "${VAR}") + set(_SRC_EXT) + set(_key) + foreach(arg ${ARGN}) + if("${arg}" MATCHES "^(SRC_EXT)$") + set(_key "${arg}") + elseif(_key) + list(APPEND _${_key} "${arg}") + else() + message(FATAL_ERROR "Unknown argument:\n ${arg}\n") + endif() + endforeach() + if(NOT _SRC_EXT) + set(_SRC_EXT F90) + endif() + if(CMAKE_REQUIRED_LINK_OPTIONS) + set(CHECK_Fortran_SOURCE_COMPILES_ADD_LINK_OPTIONS + LINK_OPTIONS ${CMAKE_REQUIRED_LINK_OPTIONS}) + else() + set(CHECK_Fortran_SOURCE_COMPILES_ADD_LINK_OPTIONS) + endif() + if(CMAKE_REQUIRED_LIBRARIES) + set(CHECK_Fortran_SOURCE_COMPILES_ADD_LIBRARIES + LINK_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES}) + else() + set(CHECK_Fortran_SOURCE_COMPILES_ADD_LIBRARIES) + endif() + if(CMAKE_REQUIRED_INCLUDES) + set(CHECK_Fortran_SOURCE_COMPILES_ADD_INCLUDES + "-DINCLUDE_DIRECTORIES:STRING=${CMAKE_REQUIRED_INCLUDES}") + else() + set(CHECK_Fortran_SOURCE_COMPILES_ADD_INCLUDES) + endif() + file(WRITE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/src.${_SRC_EXT}" + "${SOURCE}\n") + + if(NOT CMAKE_REQUIRED_QUIET) + message(CHECK_START "Performing Test ${VAR}") + endif() + try_run(${VAR}_EXITCODE ${VAR}_COMPILED + ${CMAKE_BINARY_DIR} + ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/src.${_SRC_EXT} + COMPILE_DEFINITIONS -D${VAR} ${CMAKE_REQUIRED_DEFINITIONS} + ${CHECK_Fortran_SOURCE_COMPILES_ADD_LINK_OPTIONS} + ${CHECK_Fortran_SOURCE_COMPILES_ADD_LIBRARIES} + CMAKE_FLAGS -DCOMPILE_DEFINITIONS:STRING=${CMAKE_REQUIRED_FLAGS} + -DCMAKE_SKIP_RPATH:BOOL=${CMAKE_SKIP_RPATH} + "${CHECK_Fortran_SOURCE_COMPILES_ADD_INCLUDES}" + COMPILE_OUTPUT_VARIABLE OUTPUT + RUN_OUTPUT_VARIABLE RUN_OUTPUT) + + # if it did not compile make the return value fail code of 1 + if(NOT ${VAR}_COMPILED) + set(${VAR}_EXITCODE 1) + endif() + # if the return value was 0 then it worked + if("${${VAR}_EXITCODE}" EQUAL 0) + set(${VAR} 1 CACHE INTERNAL "Test ${VAR}") + if(NOT CMAKE_REQUIRED_QUIET) + message(CHECK_PASS "Success") + endif() + file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeOutput.log + "Performing Fortran SOURCE FILE Test ${VAR} succeeded with the following output:\n" + "${OUTPUT}\n" + "...and run output:\n" + "${RUN_OUTPUT}\n" + "Return value: ${${VAR}}\n" + "Source file was:\n${SOURCE}\n") + else() + if(CMAKE_CROSSCOMPILING AND "${${VAR}_EXITCODE}" MATCHES "FAILED_TO_RUN") + set(${VAR} "${${VAR}_EXITCODE}") + else() + set(${VAR} "" CACHE INTERNAL "Test ${VAR}") + endif() + + if(NOT CMAKE_REQUIRED_QUIET) + message(CHECK_FAIL "Failed") + endif() + file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeError.log + "Performing Fortran SOURCE FILE Test ${VAR} failed with the following output:\n" + "${OUTPUT}\n" + "...and run output:\n" + "${RUN_OUTPUT}\n" + "Return value: ${${VAR}_EXITCODE}\n" + "Source file was:\n${SOURCE}\n") + endif() + endif() +endmacro() diff --git a/cmake/FindBLAS.cmake b/cmake/FindBLAS.cmake new file mode 100644 index 00000000000..caed7eff4cb --- /dev/null +++ b/cmake/FindBLAS.cmake @@ -0,0 +1,936 @@ +# Distributed under the OSI-approved BSD 3-Clause License. See accompanying +# file Copyright.txt or https://cmake.org/licensing for details. + +#[=======================================================================[.rst: +FindBLAS +-------- + +Find Basic Linear Algebra Subprograms (BLAS) library + +This module finds an installed Fortran library that implements the +BLAS linear-algebra interface (see http://www.netlib.org/blas/). + +The approach follows that taken for the ``autoconf`` macro file, +``acx_blas.m4`` (distributed at +http://ac-archive.sourceforge.net/ac-archive/acx_blas.html). + +Input Variables +^^^^^^^^^^^^^^^ + +The following variables may be set to influence this module's behavior: + +``BLA_STATIC`` + if ``ON`` use static linkage + +``BLA_VENDOR`` + If set, checks only the specified vendor, if not set checks all the + possibilities. List of vendors valid in this module: + + * ``Goto`` + * ``OpenBLAS`` + * ``FLAME`` + * ``ATLAS PhiPACK`` + * ``CXML`` + * ``DXML`` + * ``SunPerf`` + * ``SCSL`` + * ``SGIMATH`` + * ``IBMESSL`` + * ``Intel10_32`` (intel mkl v10 32 bit) + * ``Intel10_64lp`` (intel mkl v10+ 64 bit, threaded code, lp64 model) + * ``Intel10_64lp_seq`` (intel mkl v10+ 64 bit, sequential code, lp64 model) + * ``Intel10_64ilp`` (intel mkl v10+ 64 bit, threaded code, ilp64 model) + * ``Intel10_64ilp_seq`` (intel mkl v10+ 64 bit, sequential code, ilp64 model) + * ``Intel10_64_dyn`` (intel mkl v10+ 64 bit, single dynamic library) + * ``Intel`` (obsolete versions of mkl 32 and 64 bit) + * ``ACML`` + * ``ACML_MP`` + * ``ACML_GPU`` + * ``Apple`` + * ``NAS`` + * ``Arm`` + * ``Arm_mp`` + * ``Arm_ilp64`` + * ``Arm_ilp64_mp`` + * ``Generic`` + +``BLA_F95`` + if ``ON`` tries to find the BLAS95 interfaces + +``BLA_PREFER_PKGCONFIG`` + if set ``pkg-config`` will be used to search for a BLAS library first + and if one is found that is preferred + +Result Variables +^^^^^^^^^^^^^^^^ + +This module defines the following variables: + +``BLAS_FOUND`` + library implementing the BLAS interface is found +``BLAS_LINKER_FLAGS`` + uncached list of required linker flags (excluding ``-l`` and ``-L``). +``BLAS_LIBRARIES`` + uncached list of libraries (using full path name) to link against + to use BLAS (may be empty if compiler implicitly links BLAS) +``BLAS95_LIBRARIES`` + uncached list of libraries (using full path name) to link against + to use BLAS95 interface +``BLAS95_FOUND`` + library implementing the BLAS95 interface is found + +.. note:: + + C, CXX or Fortran must be enabled to detect a BLAS library. + C or CXX must be enabled to use Intel Math Kernel Library (MKL). + + For example, to use Intel MKL libraries and/or Intel compiler: + + .. code-block:: cmake + + set(BLA_VENDOR Intel10_64lp) + find_package(BLAS) + +Hints +^^^^^ + +Set the ``MKLROOT`` environment variable to a directory that contains an MKL +installation, or add the directory to the dynamic library loader environment +variable for your platform (``LIB``, ``DYLD_LIBRARY_PATH`` or +``LD_LIBRARY_PATH``). + +#]=======================================================================] + +# Check the language being used +if(NOT (CMAKE_C_COMPILER_LOADED OR CMAKE_CXX_COMPILER_LOADED OR CMAKE_Fortran_COMPILER_LOADED)) + if(BLAS_FIND_REQUIRED) + message(FATAL_ERROR "FindBLAS requires Fortran, C, or C++ to be enabled.") + else() + message(STATUS "Looking for BLAS... - NOT found (Unsupported languages)") + return() + endif() +endif() + +if(CMAKE_Fortran_COMPILER_LOADED) + include(${CMAKE_CURRENT_LIST_DIR}/CheckFortranFunctionExists.cmake) +else() + include(${CMAKE_CURRENT_LIST_DIR}/CheckFunctionExists.cmake) +endif() +include(${CMAKE_CURRENT_LIST_DIR}/CMakePushCheckState.cmake) +include(${CMAKE_CURRENT_LIST_DIR}/FindPackageHandleStandardArgs.cmake) +cmake_push_check_state() +set(CMAKE_REQUIRED_QUIET ${BLAS_FIND_QUIETLY}) + +if(BLA_PREFER_PKGCONFIG) + find_package(PkgConfig) + pkg_check_modules(PKGC_BLAS blas) + if(PKGC_BLAS_FOUND) + set(BLAS_FOUND ${PKGC_BLAS_FOUND}) + set(BLAS_LIBRARIES "${PKGC_BLAS_LINK_LIBRARIES}") + return() + endif() +endif() + +set(_blas_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES}) +if(BLA_STATIC) + if(WIN32) + set(CMAKE_FIND_LIBRARY_SUFFIXES .lib ${CMAKE_FIND_LIBRARY_SUFFIXES}) + else() + set(CMAKE_FIND_LIBRARY_SUFFIXES .a ${CMAKE_FIND_LIBRARY_SUFFIXES}) + endif() +else() + if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + # for ubuntu's libblas3gf and liblapack3gf packages + set(CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES} .so.3gf) + endif() +endif() + +# TODO: move this stuff to a separate module + +macro(CHECK_BLAS_LIBRARIES LIBRARIES _prefix _name _flags _list _threadlibs _addlibdir _subdirs) + # This macro checks for the existence of the combination of fortran libraries + # given by _list. If the combination is found, this macro checks (using the + # Check_Fortran_Function_Exists macro) whether can link against that library + # combination using the name of a routine given by _name using the linker + # flags given by _flags. If the combination of libraries is found and passes + # the link test, LIBRARIES is set to the list of complete library paths that + # have been found. Otherwise, LIBRARIES is set to FALSE. + + # N.B. _prefix is the prefix applied to the names of all cached variables that + # are generated internally and marked advanced by this macro. + # _addlibdir is a list of additional search paths. _subdirs is a list of path + # suffixes to be used by find_library(). + + set(_libraries_work TRUE) + set(${LIBRARIES}) + set(_combined_name) + + set(_extaddlibdir "${_addlibdir}") + if(WIN32) + list(APPEND _extaddlibdir ENV LIB) + elseif(APPLE) + list(APPEND _extaddlibdir ENV DYLD_LIBRARY_PATH) + else() + list(APPEND _extaddlibdir ENV LD_LIBRARY_PATH) + endif() + list(APPEND _extaddlibdir "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") + + foreach(_library ${_list}) + if(_library MATCHES "^-Wl,--(start|end)-group$") + # Respect linker flags like --start/end-group (required by MKL) + set(${LIBRARIES} ${${LIBRARIES}} "${_library}") + else() + set(_combined_name ${_combined_name}_${_library}) + if(NOT "${_threadlibs}" STREQUAL "") + set(_combined_name ${_combined_name}_threadlibs) + endif() + if(_libraries_work) + find_library(${_prefix}_${_library}_LIBRARY + NAMES ${_library} + PATHS ${_extaddlibdir} + PATH_SUFFIXES ${_subdirs} + ) + #message("DEBUG: find_library(${_library}) got ${${_prefix}_${_library}_LIBRARY}") + mark_as_advanced(${_prefix}_${_library}_LIBRARY) + set(${LIBRARIES} ${${LIBRARIES}} ${${_prefix}_${_library}_LIBRARY}) + set(_libraries_work ${${_prefix}_${_library}_LIBRARY}) + endif() + endif() + endforeach() + + if(_libraries_work) + # Test this combination of libraries. + set(CMAKE_REQUIRED_LIBRARIES ${_flags} ${${LIBRARIES}} ${_threadlibs}) + #message("DEBUG: CMAKE_REQUIRED_LIBRARIES = ${CMAKE_REQUIRED_LIBRARIES}") + if(CMAKE_Fortran_COMPILER_LOADED) + check_fortran_function_exists("${_name}" ${_prefix}${_combined_name}_WORKS) + else() + check_function_exists("${_name}_" ${_prefix}${_combined_name}_WORKS) + endif() + set(CMAKE_REQUIRED_LIBRARIES) + set(_libraries_work ${${_prefix}${_combined_name}_WORKS}) + endif() + + if(_libraries_work) + if("${_list}" STREQUAL "") + set(${LIBRARIES} "${LIBRARIES}-PLACEHOLDER-FOR-EMPTY-LIBRARIES") + else() + set(${LIBRARIES} ${${LIBRARIES}} ${_threadlibs}) + endif() + else() + set(${LIBRARIES} FALSE) + endif() + #message("DEBUG: ${LIBRARIES} = ${${LIBRARIES}}") +endmacro() + +set(BLAS_LINKER_FLAGS) +set(BLAS_LIBRARIES) +set(BLAS95_LIBRARIES) +if(NOT $ENV{BLA_VENDOR} STREQUAL "") + set(BLA_VENDOR $ENV{BLA_VENDOR}) +else() + if(NOT BLA_VENDOR) + set(BLA_VENDOR "All") + endif() +endif() + +# Implicitly linked BLAS libraries? +if(BLA_VENDOR STREQUAL "All") + if(NOT BLAS_LIBRARIES) + check_blas_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" + "" + "" + "" + "" + ) + endif() +endif() + +# BLAS in the Intel MKL 10+ library? +if(BLA_VENDOR MATCHES "Intel" OR BLA_VENDOR STREQUAL "All") + if(NOT BLAS_LIBRARIES) + if(CMAKE_C_COMPILER_LOADED OR CMAKE_CXX_COMPILER_LOADED) + # System-specific settings + if(WIN32) + if(BLA_STATIC) + set(BLAS_mkl_DLL_SUFFIX "") + else() + set(BLAS_mkl_DLL_SUFFIX "_dll") + endif() + else() + if(BLA_STATIC) + set(BLAS_mkl_START_GROUP "-Wl,--start-group") + set(BLAS_mkl_END_GROUP "-Wl,--end-group") + else() + set(BLAS_mkl_START_GROUP "") + set(BLAS_mkl_END_GROUP "") + endif() + # Switch to GNU Fortran support layer if needed (but not on Apple, where MKL does not provide it) + if(CMAKE_Fortran_COMPILER_LOADED AND CMAKE_Fortran_COMPILER_ID STREQUAL "GNU" AND NOT APPLE) + set(BLAS_mkl_INTFACE "gf") + set(BLAS_mkl_THREADING "gnu") + set(BLAS_mkl_OMP "gomp") + else() + set(BLAS_mkl_INTFACE "intel") + set(BLAS_mkl_THREADING "intel") + set(BLAS_mkl_OMP "iomp5") + endif() + set(BLAS_mkl_LM "-lm") + set(BLAS_mkl_LDL "-ldl") + endif() + + if(BLAS_FIND_QUIETLY OR NOT BLAS_FIND_REQUIRED) + find_package(Threads) + else() + find_package(Threads REQUIRED) + endif() + + if(BLA_VENDOR MATCHES "_64ilp") + set(BLAS_mkl_ILP_MODE "ilp64") + else() + set(BLAS_mkl_ILP_MODE "lp64") + endif() + + set(BLAS_SEARCH_LIBS "") + + if(BLA_F95) + set(BLAS_mkl_SEARCH_SYMBOL "sgemm_f95") + set(_LIBRARIES BLAS95_LIBRARIES) + if(WIN32) + # Find the main file (32-bit or 64-bit) + set(BLAS_SEARCH_LIBS_WIN_MAIN "") + if(BLA_VENDOR STREQUAL "Intel10_32" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS_WIN_MAIN + "mkl_blas95${BLAS_mkl_DLL_SUFFIX} mkl_intel_c${BLAS_mkl_DLL_SUFFIX}") + endif() + + if(BLA_VENDOR MATCHES "^Intel10_64i?lp" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS_WIN_MAIN + "mkl_blas95_${BLAS_mkl_ILP_MODE}${BLAS_mkl_DLL_SUFFIX} mkl_intel_${BLAS_mkl_ILP_MODE}${BLAS_mkl_DLL_SUFFIX}") + endif() + + # Add threading/sequential libs + set(BLAS_SEARCH_LIBS_WIN_THREAD "") + if(BLA_VENDOR MATCHES "^Intel10_64i?lp$" OR BLA_VENDOR STREQUAL "All") + # old version + list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD + "libguide40 mkl_intel_thread${BLAS_mkl_DLL_SUFFIX}") + # mkl >= 10.3 + list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD + "libiomp5md mkl_intel_thread${BLAS_mkl_DLL_SUFFIX}") + endif() + if(BLA_VENDOR MATCHES "^Intel10_64i?lp_seq$" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD + "mkl_sequential${BLAS_mkl_DLL_SUFFIX}") + endif() + + # Cartesian product of the above + foreach(MAIN ${BLAS_SEARCH_LIBS_WIN_MAIN}) + foreach(THREAD ${BLAS_SEARCH_LIBS_WIN_THREAD}) + list(APPEND BLAS_SEARCH_LIBS + "${MAIN} ${THREAD} mkl_core${BLAS_mkl_DLL_SUFFIX}") + endforeach() + endforeach() + else() + if(BLA_VENDOR STREQUAL "Intel10_32" OR BLA_VENDOR STREQUAL "All") + # old version + list(APPEND BLAS_SEARCH_LIBS + "mkl_blas95 mkl_${BLAS_mkl_INTFACE} mkl_${BLAS_mkl_THREADING}_thread mkl_core guide") + + # mkl >= 10.3 + list(APPEND BLAS_SEARCH_LIBS + "${BLAS_mkl_START_GROUP} mkl_blas95 mkl_${BLAS_mkl_INTFACE} mkl_${BLAS_mkl_THREADING}_thread mkl_core ${BLAS_mkl_END_GROUP} ${BLAS_mkl_OMP}") + endif() + if(BLA_VENDOR MATCHES "^Intel10_64i?lp$" OR BLA_VENDOR STREQUAL "All") + # old version + list(APPEND BLAS_SEARCH_LIBS + "mkl_blas95 mkl_${BLAS_mkl_INTFACE}_${BLAS_mkl_ILP_MODE} mkl_${BLAS_mkl_THREADING}_thread mkl_core guide") + + # mkl >= 10.3 + list(APPEND BLAS_SEARCH_LIBS + "${BLAS_mkl_START_GROUP} mkl_blas95_${BLAS_mkl_ILP_MODE} mkl_${BLAS_mkl_INTFACE}_${BLAS_mkl_ILP_MODE} mkl_${BLAS_mkl_THREADING}_thread mkl_core ${BLAS_mkl_END_GROUP} ${BLAS_mkl_OMP}") + endif() + if(BLA_VENDOR MATCHES "^Intel10_64i?lp_seq$" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS + "${BLAS_mkl_START_GROUP} mkl_blas95_${BLAS_mkl_ILP_MODE} mkl_${BLAS_mkl_INTFACE}_${BLAS_mkl_ILP_MODE} mkl_sequential mkl_core ${BLAS_mkl_END_GROUP}") + endif() + endif() + else() + set(BLAS_mkl_SEARCH_SYMBOL sgemm) + set(_LIBRARIES BLAS_LIBRARIES) + if(WIN32) + # Find the main file (32-bit or 64-bit) + set(BLAS_SEARCH_LIBS_WIN_MAIN "") + if(BLA_VENDOR STREQUAL "Intel10_32" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS_WIN_MAIN + "mkl_intel_c${BLAS_mkl_DLL_SUFFIX}") + endif() + if(BLA_VENDOR MATCHES "^Intel10_64i?lp" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS_WIN_MAIN + "mkl_intel_${BLAS_mkl_ILP_MODE}${BLAS_mkl_DLL_SUFFIX}") + endif() + + # Add threading/sequential libs + set(BLAS_SEARCH_LIBS_WIN_THREAD "") + if(BLA_VENDOR MATCHES "^Intel10_64i?lp$" OR BLA_VENDOR STREQUAL "All") + # old version + list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD + "libguide40 mkl_intel_thread${BLAS_mkl_DLL_SUFFIX}") + # mkl >= 10.3 + list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD + "libiomp5md mkl_intel_thread${BLAS_mkl_DLL_SUFFIX}") + endif() + if(BLA_VENDOR MATCHES "^Intel10_64i?lp_seq$" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD + "mkl_sequential${BLAS_mkl_DLL_SUFFIX}") + endif() + + # Cartesian product of the above + foreach(MAIN ${BLAS_SEARCH_LIBS_WIN_MAIN}) + foreach(THREAD ${BLAS_SEARCH_LIBS_WIN_THREAD}) + list(APPEND BLAS_SEARCH_LIBS + "${MAIN} ${THREAD} mkl_core${BLAS_mkl_DLL_SUFFIX}") + endforeach() + endforeach() + else() + if(BLA_VENDOR STREQUAL "Intel10_32" OR BLA_VENDOR STREQUAL "All") + # old version + list(APPEND BLAS_SEARCH_LIBS + "mkl_${BLAS_mkl_INTFACE} mkl_${BLAS_mkl_THREADING}_thread mkl_core guide") + + # mkl >= 10.3 + list(APPEND BLAS_SEARCH_LIBS + "${BLAS_mkl_START_GROUP} mkl_${BLAS_mkl_INTFACE} mkl_${BLAS_mkl_THREADING}_thread mkl_core ${BLAS_mkl_END_GROUP} ${BLAS_mkl_OMP}") + endif() + if(BLA_VENDOR MATCHES "^Intel10_64i?lp$" OR BLA_VENDOR STREQUAL "All") + # old version + list(APPEND BLAS_SEARCH_LIBS + "mkl_${BLAS_mkl_INTFACE}_${BLAS_mkl_ILP_MODE} mkl_${BLAS_mkl_THREADING}_thread mkl_core guide") + + # mkl >= 10.3 + list(APPEND BLAS_SEARCH_LIBS + "${BLAS_mkl_START_GROUP} mkl_${BLAS_mkl_INTFACE}_${BLAS_mkl_ILP_MODE} mkl_${BLAS_mkl_THREADING}_thread mkl_core ${BLAS_mkl_END_GROUP} ${BLAS_mkl_OMP}") + endif() + if(BLA_VENDOR MATCHES "^Intel10_64i?lp_seq$" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS + "${BLAS_mkl_START_GROUP} mkl_${BLAS_mkl_INTFACE}_${BLAS_mkl_ILP_MODE} mkl_sequential mkl_core ${BLAS_mkl_END_GROUP}") + endif() + + #older vesions of intel mkl libs + if(BLA_VENDOR STREQUAL "Intel" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS + "mkl") + list(APPEND BLAS_SEARCH_LIBS + "mkl_ia32") + list(APPEND BLAS_SEARCH_LIBS + "mkl_em64t") + endif() + endif() + endif() + + if(BLA_VENDOR MATCHES "^Intel10_64_dyn$" OR BLA_VENDOR STREQUAL "All") + # mkl >= 10.3 with single dynamic library + list(APPEND BLAS_SEARCH_LIBS + "mkl_rt") + endif() + + # MKL uses a multitude of partially platform-specific subdirectories: + if(BLA_VENDOR STREQUAL "Intel10_32") + set(BLAS_mkl_ARCH_NAME "ia32") + else() + set(BLAS_mkl_ARCH_NAME "intel64") + endif() + if(WIN32) + set(BLAS_mkl_OS_NAME "win") + elseif(APPLE) + set(BLAS_mkl_OS_NAME "mac") + else() + set(BLAS_mkl_OS_NAME "lin") + endif() + if(DEFINED ENV{MKLROOT}) + set(BLAS_mkl_MKLROOT "$ENV{MKLROOT}") + # If MKLROOT points to the subdirectory 'mkl', use the parent directory instead + # so we can better detect other relevant libraries in 'compiler' or 'tbb': + get_filename_component(BLAS_mkl_MKLROOT_LAST_DIR "${BLAS_mkl_MKLROOT}" NAME) + if(BLAS_mkl_MKLROOT_LAST_DIR STREQUAL "mkl") + get_filename_component(BLAS_mkl_MKLROOT "${BLAS_mkl_MKLROOT}" DIRECTORY) + endif() + endif() + set(BLAS_mkl_LIB_PATH_SUFFIXES + "compiler/lib" "compiler/lib/${BLAS_mkl_ARCH_NAME}_${BLAS_mkl_OS_NAME}" + "mkl/lib" "mkl/lib/${BLAS_mkl_ARCH_NAME}_${BLAS_mkl_OS_NAME}" + "lib/${BLAS_mkl_ARCH_NAME}_${BLAS_mkl_OS_NAME}") + + foreach(IT ${BLAS_SEARCH_LIBS}) + string(REPLACE " " ";" SEARCH_LIBS ${IT}) + if(NOT ${_LIBRARIES}) + check_blas_libraries( + ${_LIBRARIES} + BLAS + ${BLAS_mkl_SEARCH_SYMBOL} + "" + "${SEARCH_LIBS}" + "${CMAKE_THREAD_LIBS_INIT};${BLAS_mkl_LM};${BLAS_mkl_LDL}" + "${BLAS_mkl_MKLROOT}" + "${BLAS_mkl_LIB_PATH_SUFFIXES}" + ) + endif() + endforeach() + + unset(BLAS_mkl_ILP_MODE) + unset(BLAS_mkl_INTFACE) + unset(BLAS_mkl_THREADING) + unset(BLAS_mkl_OMP) + unset(BLAS_mkl_DLL_SUFFIX) + unset(BLAS_mkl_LM) + unset(BLAS_mkl_LDL) + unset(BLAS_mkl_MKLROOT) + unset(BLAS_mkl_MKLROOT_LAST_DIR) + unset(BLAS_mkl_ARCH_NAME) + unset(BLAS_mkl_OS_NAME) + unset(BLAS_mkl_LIB_PATH_SUFFIXES) + endif() + endif() +endif() + +if(BLA_F95) + find_package_handle_standard_args(BLAS REQUIRED_VARS BLAS95_LIBRARIES) + set(BLAS95_FOUND ${BLAS_FOUND}) + if(BLAS_FOUND) + set(BLAS_LIBRARIES "${BLAS95_LIBRARIES}") + endif() +endif() + +# gotoblas? (http://www.tacc.utexas.edu/tacc-projects/gotoblas2) +if(BLA_VENDOR STREQUAL "Goto" OR BLA_VENDOR STREQUAL "All") + if(NOT BLAS_LIBRARIES) + check_blas_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" + "goto2" + "" + "" + "" + ) + endif() +endif() + +# OpenBLAS? (http://www.openblas.net) +if(BLA_VENDOR STREQUAL "OpenBLAS" OR BLA_VENDOR STREQUAL "All") + if(NOT BLAS_LIBRARIES) + check_blas_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" + "openblas" + "" + "" + "" + ) + endif() + if(NOT BLAS_LIBRARIES AND (CMAKE_C_COMPILER_LOADED OR CMAKE_CXX_COMPILER_LOADED)) + if(BLAS_FIND_QUIETLY OR NOT BLAS_FIND_REQUIRED) + find_package(Threads) + else() + find_package(Threads REQUIRED) + endif() + check_blas_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" + "openblas" + "${CMAKE_THREAD_LIBS_INIT}" + "" + "" + ) + endif() +endif() + +# ArmPL blas library? (https://developer.arm.com/tools-and-software/server-and-hpc/compile/arm-compiler-for-linux/arm-performance-libraries) +if(BLA_VENDOR MATCHES "Arm" OR BLA_VENDOR STREQUAL "All") + + # Check for 64bit Integer support + if(BLA_VENDOR MATCHES "_ilp64") + set(BLAS_armpl_LIB "armpl_ilp64") + else() + set(BLAS_armpl_LIB "armpl_lp64") + endif() + + # Check for OpenMP support, VIA BLA_VENDOR of Arm_mp or Arm_ipl64_mp + if(BLA_VENDOR MATCHES "_mp") + set(BLAS_armpl_LIB "${BLAS_armpl_LIB}_mp") + endif() + + if(NOT BLAS_LIBRARIES) + check_blas_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" + "${BLAS_armpl_LIB}" + "" + "" + "" + ) + endif() + +endif() + +# FLAME's blis library? (https://github.com/flame/blis) +if(BLA_VENDOR STREQUAL "FLAME" OR BLA_VENDOR STREQUAL "All") + if(NOT BLAS_LIBRARIES) + check_blas_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" + "blis" + "" + "" + "" + ) + endif() +endif() + +# BLAS in the ATLAS library? (http://math-atlas.sourceforge.net/) +if(BLA_VENDOR STREQUAL "ATLAS" OR BLA_VENDOR STREQUAL "All") + if(NOT BLAS_LIBRARIES) + check_blas_libraries( + BLAS_LIBRARIES + BLAS + dgemm + "" + "blas;f77blas;atlas" + "" + "" + "" + ) + endif() +endif() + +# BLAS in PhiPACK libraries? (requires generic BLAS lib, too) +if(BLA_VENDOR STREQUAL "PhiPACK" OR BLA_VENDOR STREQUAL "All") + if(NOT BLAS_LIBRARIES) + check_blas_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" + "sgemm;dgemm;blas" + "" + "" + "" + ) + endif() +endif() + +# BLAS in Alpha CXML library? +if(BLA_VENDOR STREQUAL "CXML" OR BLA_VENDOR STREQUAL "All") + if(NOT BLAS_LIBRARIES) + check_blas_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" + "cxml" + "" + "" + "" + ) + endif() +endif() + +# BLAS in Alpha DXML library? (now called CXML, see above) +if(BLA_VENDOR STREQUAL "DXML" OR BLA_VENDOR STREQUAL "All") + if(NOT BLAS_LIBRARIES) + check_blas_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" + "dxml" + "" + "" + "" + ) + endif() +endif() + +# BLAS in Sun Performance library? +if(BLA_VENDOR STREQUAL "SunPerf" OR BLA_VENDOR STREQUAL "All") + if(NOT BLAS_LIBRARIES) + check_blas_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "-xlic_lib=sunperf" + "sunperf;sunmath" + "" + "" + "" + ) + if(BLAS_LIBRARIES) + set(BLAS_LINKER_FLAGS "-xlic_lib=sunperf") + endif() + endif() +endif() + +# BLAS in SCSL library? (SGI/Cray Scientific Library) +if(BLA_VENDOR STREQUAL "SCSL" OR BLA_VENDOR STREQUAL "All") + if(NOT BLAS_LIBRARIES) + check_blas_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" + "scsl" + "" + "" + "" + ) + endif() +endif() + +# BLAS in SGIMATH library? +if(BLA_VENDOR STREQUAL "SGIMATH" OR BLA_VENDOR STREQUAL "All") + if(NOT BLAS_LIBRARIES) + check_blas_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" + "complib.sgimath" + "" + "" + "" + ) + endif() +endif() + +# BLAS in IBM ESSL library? (requires generic BLAS lib, too) +if(BLA_VENDOR STREQUAL "IBMESSL" OR BLA_VENDOR STREQUAL "All") + if(NOT BLAS_LIBRARIES) + check_blas_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" + "essl;blas" + "" + "" + "" + ) + endif() +endif() + +# BLAS in acml library? +if(BLA_VENDOR MATCHES "ACML" OR BLA_VENDOR STREQUAL "All") + if(((BLA_VENDOR STREQUAL "ACML") AND (NOT BLAS_ACML_LIB_DIRS)) OR + ((BLA_VENDOR STREQUAL "ACML_MP") AND (NOT BLAS_ACML_MP_LIB_DIRS)) OR + ((BLA_VENDOR STREQUAL "ACML_GPU") AND (NOT BLAS_ACML_GPU_LIB_DIRS)) + ) + # try to find acml in "standard" paths + if(WIN32) + file(GLOB _ACML_ROOT "C:/AMD/acml*/ACML-EULA.txt") + else() + file(GLOB _ACML_ROOT "/opt/acml*/ACML-EULA.txt") + endif() + if(WIN32) + file(GLOB _ACML_GPU_ROOT "C:/AMD/acml*/GPGPUexamples") + else() + file(GLOB _ACML_GPU_ROOT "/opt/acml*/GPGPUexamples") + endif() + list(GET _ACML_ROOT 0 _ACML_ROOT) + list(GET _ACML_GPU_ROOT 0 _ACML_GPU_ROOT) + if(_ACML_ROOT) + get_filename_component(_ACML_ROOT ${_ACML_ROOT} PATH) + if(SIZEOF_INTEGER EQUAL 8) + set(_ACML_PATH_SUFFIX "_int64") + else() + set(_ACML_PATH_SUFFIX "") + endif() + if(CMAKE_Fortran_COMPILER_ID STREQUAL "Intel") + set(_ACML_COMPILER32 "ifort32") + set(_ACML_COMPILER64 "ifort64") + elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "SunPro") + set(_ACML_COMPILER32 "sun32") + set(_ACML_COMPILER64 "sun64") + elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "PGI") + set(_ACML_COMPILER32 "pgi32") + if(WIN32) + set(_ACML_COMPILER64 "win64") + else() + set(_ACML_COMPILER64 "pgi64") + endif() + elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "Open64") + # 32 bit builds not supported on Open64 but for code simplicity + # We'll just use the same directory twice + set(_ACML_COMPILER32 "open64_64") + set(_ACML_COMPILER64 "open64_64") + elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "NAG") + set(_ACML_COMPILER32 "nag32") + set(_ACML_COMPILER64 "nag64") + else() + set(_ACML_COMPILER32 "gfortran32") + set(_ACML_COMPILER64 "gfortran64") + endif() + + if(BLA_VENDOR STREQUAL "ACML_MP") + set(_ACML_MP_LIB_DIRS + "${_ACML_ROOT}/${_ACML_COMPILER32}_mp${_ACML_PATH_SUFFIX}/lib" + "${_ACML_ROOT}/${_ACML_COMPILER64}_mp${_ACML_PATH_SUFFIX}/lib") + else() + set(_ACML_LIB_DIRS + "${_ACML_ROOT}/${_ACML_COMPILER32}${_ACML_PATH_SUFFIX}/lib" + "${_ACML_ROOT}/${_ACML_COMPILER64}${_ACML_PATH_SUFFIX}/lib") + endif() + endif() +elseif(BLAS_${BLA_VENDOR}_LIB_DIRS) + set(_${BLA_VENDOR}_LIB_DIRS ${BLAS_${BLA_VENDOR}_LIB_DIRS}) +endif() + +if(BLA_VENDOR STREQUAL "ACML_MP") + foreach(BLAS_ACML_MP_LIB_DIRS ${_ACML_MP_LIB_DIRS}) + check_blas_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" "acml_mp;acml_mv" "" ${BLAS_ACML_MP_LIB_DIRS} "" + ) + if(BLAS_LIBRARIES) + break() + endif() + endforeach() +elseif(BLA_VENDOR STREQUAL "ACML_GPU") + foreach(BLAS_ACML_GPU_LIB_DIRS ${_ACML_GPU_LIB_DIRS}) + check_blas_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" "acml;acml_mv;CALBLAS" "" ${BLAS_ACML_GPU_LIB_DIRS} "" + ) + if(BLAS_LIBRARIES) + break() + endif() + endforeach() +else() + foreach(BLAS_ACML_LIB_DIRS ${_ACML_LIB_DIRS}) + check_blas_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" "acml;acml_mv" "" ${BLAS_ACML_LIB_DIRS} "" + ) + if(BLAS_LIBRARIES) + break() + endif() + endforeach() +endif() + +# Either acml or acml_mp should be in LD_LIBRARY_PATH but not both +if(NOT BLAS_LIBRARIES) + check_blas_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" + "acml;acml_mv" + "" + "" + "" + ) +endif() +if(NOT BLAS_LIBRARIES) + check_blas_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" + "acml_mp;acml_mv" + "" + "" + "" + ) +endif() +if(NOT BLAS_LIBRARIES) + check_blas_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" + "acml;acml_mv;CALBLAS" + "" + "" + "" + ) +endif() +endif() # ACML + +# Apple BLAS library? +if(BLA_VENDOR STREQUAL "Apple" OR BLA_VENDOR STREQUAL "All") + if(NOT BLAS_LIBRARIES) + check_blas_libraries( + BLAS_LIBRARIES + BLAS + dgemm + "" + "Accelerate" + "" + "" + "" + ) + endif() +endif() + +# Apple NAS (vecLib) library? +if(BLA_VENDOR STREQUAL "NAS" OR BLA_VENDOR STREQUAL "All") + if(NOT BLAS_LIBRARIES) + check_blas_libraries( + BLAS_LIBRARIES + BLAS + dgemm + "" + "vecLib" + "" + "" + "" + ) + endif() +endif() + +# Generic BLAS library? +if(BLA_VENDOR STREQUAL "Generic" OR BLA_VENDOR STREQUAL "All") + if(NOT BLAS_LIBRARIES) + check_blas_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" + "blas" + "" + "" + "" + ) + endif() +endif() + +if(NOT BLA_F95) + find_package_handle_standard_args(BLAS REQUIRED_VARS BLAS_LIBRARIES) +endif() + +# On compilers that implicitly link BLAS (such as ftn, cc, and CC on Cray HPC machines) +# we used a placeholder for empty BLAS_LIBRARIES to get through our logic above. +if(BLAS_LIBRARIES STREQUAL "BLAS_LIBRARIES-PLACEHOLDER-FOR-EMPTY-LIBRARIES") + set(BLAS_LIBRARIES "") +endif() + +cmake_pop_check_state() +set(CMAKE_FIND_LIBRARY_SUFFIXES ${_blas_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES}) diff --git a/cmake/FindLAPACK.cmake b/cmake/FindLAPACK.cmake new file mode 100644 index 00000000000..e5bd58d443d --- /dev/null +++ b/cmake/FindLAPACK.cmake @@ -0,0 +1,530 @@ +# Distributed under the OSI-approved BSD 3-Clause License. See accompanying +# file Copyright.txt or https://cmake.org/licensing for details. + +#[=======================================================================[.rst: +FindLAPACK +---------- + +Find Linear Algebra PACKage (LAPACK) library + +This module finds an installed Fortran library that implements the +LAPACK linear-algebra interface (see http://www.netlib.org/lapack/). + +The approach follows that taken for the ``autoconf`` macro file, +``acx_lapack.m4`` (distributed at +http://ac-archive.sourceforge.net/ac-archive/acx_lapack.html). + +Input Variables +^^^^^^^^^^^^^^^ + +The following variables may be set to influence this module's behavior: + +``BLA_STATIC`` + if ``ON`` use static linkage + +``BLA_VENDOR`` + If set, checks only the specified vendor, if not set checks all the + possibilities. List of vendors valid in this module: + + * ``OpenBLAS`` + * ``FLAME`` + * ``Intel10_32`` (intel mkl v10 32 bit) + * ``Intel10_64lp`` (intel mkl v10+ 64 bit, threaded code, lp64 model) + * ``Intel10_64lp_seq`` (intel mkl v10+ 64 bit, sequential code, lp64 model) + * ``Intel10_64ilp`` (intel mkl v10+ 64 bit, threaded code, ilp64 model) + * ``Intel10_64ilp_seq`` (intel mkl v10+ 64 bit, sequential code, ilp64 model) + * ``Intel10_64_dyn`` (intel mkl v10+ 64 bit, single dynamic library) + * ``Intel`` (obsolete versions of mkl 32 and 64 bit) + * ``ACML`` + * ``Apple`` + * ``NAS`` + * ``Arm`` + * ``Arm_mp`` + * ``Arm_ilp64`` + * ``Arm_ilp64_mp`` + * ``Generic`` + +``BLA_F95`` + if ``ON`` tries to find the BLAS95/LAPACK95 interfaces + +Result Variables +^^^^^^^^^^^^^^^^ + +This module defines the following variables: + +``LAPACK_FOUND`` + library implementing the LAPACK interface is found +``LAPACK_LINKER_FLAGS`` + uncached list of required linker flags (excluding ``-l`` and ``-L``). +``LAPACK_LIBRARIES`` + uncached list of libraries (using full path name) to link against + to use LAPACK +``LAPACK95_LIBRARIES`` + uncached list of libraries (using full path name) to link against + to use LAPACK95 +``LAPACK95_FOUND`` + library implementing the LAPACK95 interface is found + +.. note:: + + C, CXX or Fortran must be enabled to detect a BLAS/LAPACK library. + C or CXX must be enabled to use Intel Math Kernel Library (MKL). + + For example, to use Intel MKL libraries and/or Intel compiler: + + .. code-block:: cmake + + set(BLA_VENDOR Intel10_64lp) + find_package(LAPACK) +#]=======================================================================] + +# Check the language being used +if(NOT (CMAKE_C_COMPILER_LOADED OR CMAKE_CXX_COMPILER_LOADED OR CMAKE_Fortran_COMPILER_LOADED)) + if(LAPACK_FIND_REQUIRED) + message(FATAL_ERROR "FindLAPACK requires Fortran, C, or C++ to be enabled.") + else() + message(STATUS "Looking for LAPACK... - NOT found (Unsupported languages)") + return() + endif() +endif() + +if(CMAKE_Fortran_COMPILER_LOADED) + include(${CMAKE_CURRENT_LIST_DIR}/CheckFortranFunctionExists.cmake) +else() + include(${CMAKE_CURRENT_LIST_DIR}/CheckFunctionExists.cmake) +endif() +include(${CMAKE_CURRENT_LIST_DIR}/CMakePushCheckState.cmake) + +cmake_push_check_state() +set(CMAKE_REQUIRED_QUIET ${LAPACK_FIND_QUIETLY}) + +set(LAPACK_FOUND FALSE) +set(LAPACK95_FOUND FALSE) + +set(_lapack_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES}) +if(BLA_STATIC) + if(WIN32) + set(CMAKE_FIND_LIBRARY_SUFFIXES .lib ${CMAKE_FIND_LIBRARY_SUFFIXES}) + else() + set(CMAKE_FIND_LIBRARY_SUFFIXES .a ${CMAKE_FIND_LIBRARY_SUFFIXES}) + endif() +else() + if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + # for ubuntu's libblas3gf and liblapack3gf packages + set(CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES} .so.3gf) + endif() +endif() + +# TODO: move this stuff to a separate module + +macro(CHECK_LAPACK_LIBRARIES LIBRARIES _prefix _name _flags _list _threadlibs _addlibdir _subdirs _blas) + # This macro checks for the existence of the combination of fortran libraries + # given by _list. If the combination is found, this macro checks (using the + # Check_Fortran_Function_Exists macro) whether can link against that library + # combination using the name of a routine given by _name using the linker + # flags given by _flags. If the combination of libraries is found and passes + # the link test, LIBRARIES is set to the list of complete library paths that + # have been found. Otherwise, LIBRARIES is set to FALSE. + + # N.B. _prefix is the prefix applied to the names of all cached variables that + # are generated internally and marked advanced by this macro. + # _addlibdir is a list of additional search paths. _subdirs is a list of path + # suffixes to be used by find_library(). + + set(_libraries_work TRUE) + set(${LIBRARIES}) + set(_combined_name) + + set(_extaddlibdir "${_addlibdir}") + if(WIN32) + list(APPEND _extaddlibdir ENV LIB) + elseif(APPLE) + list(APPEND _extaddlibdir ENV DYLD_LIBRARY_PATH) + else() + list(APPEND _extaddlibdir ENV LD_LIBRARY_PATH) + endif() + list(APPEND _extaddlibdir "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") + + foreach(_library ${_list}) + if(_library MATCHES "^-Wl,--(start|end)-group$") + # Respect linker flags like --start/end-group (required by MKL) + set(${LIBRARIES} ${${LIBRARIES}} "${_library}") + else() + set(_combined_name ${_combined_name}_${_library}) + if(_libraries_work) + find_library(${_prefix}_${_library}_LIBRARY + NAMES ${_library} + PATHS ${_extaddlibdir} + PATH_SUFFIXES ${_subdirs} + ) + #message("DEBUG: find_library(${_library}) got ${${_prefix}_${_library}_LIBRARY}") + mark_as_advanced(${_prefix}_${_library}_LIBRARY) + set(${LIBRARIES} ${${LIBRARIES}} ${${_prefix}_${_library}_LIBRARY}) + set(_libraries_work ${${_prefix}_${_library}_LIBRARY}) + endif() + endif() + endforeach() + + if(_libraries_work) + # Test this combination of libraries. + set(CMAKE_REQUIRED_LIBRARIES ${_flags} ${${LIBRARIES}} ${_blas} ${_threadlibs}) + #message("DEBUG: CMAKE_REQUIRED_LIBRARIES = ${CMAKE_REQUIRED_LIBRARIES}") + if(CMAKE_Fortran_COMPILER_LOADED) + check_fortran_function_exists("${_name}" ${_prefix}${_combined_name}_WORKS) + else() + check_function_exists("${_name}_" ${_prefix}${_combined_name}_WORKS) + endif() + set(CMAKE_REQUIRED_LIBRARIES) + set(_libraries_work ${${_prefix}${_combined_name}_WORKS}) + endif() + + if(_libraries_work) + if("${_list}${_blas}" STREQUAL "") + set(${LIBRARIES} "${LIBRARIES}-PLACEHOLDER-FOR-EMPTY-LIBRARIES") + else() + set(${LIBRARIES} ${${LIBRARIES}} ${_blas} ${_threadlibs}) + endif() + else() + set(${LIBRARIES} FALSE) + endif() + #message("DEBUG: ${LIBRARIES} = ${${LIBRARIES}}") +endmacro() + +set(LAPACK_LINKER_FLAGS) +set(LAPACK_LIBRARIES) +set(LAPACK95_LIBRARIES) + +if(LAPACK_FIND_QUIETLY OR NOT LAPACK_FIND_REQUIRED) + find_package(BLAS) +else() + find_package(BLAS REQUIRED) +endif() + +if(BLAS_FOUND) + set(LAPACK_LINKER_FLAGS ${BLAS_LINKER_FLAGS}) + if(NOT $ENV{BLA_VENDOR} STREQUAL "") + set(BLA_VENDOR $ENV{BLA_VENDOR}) + else() + if(NOT BLA_VENDOR) + set(BLA_VENDOR "All") + endif() + endif() + + # LAPACK in the Intel MKL 10+ library? + if(BLA_VENDOR MATCHES "Intel" OR BLA_VENDOR STREQUAL "All") + if(NOT LAPACK_LIBRARIES) + if(CMAKE_C_COMPILER_LOADED OR CMAKE_CXX_COMPILER_LOADED) + # System-specific settings + if(NOT WIN32) + set(LAPACK_mkl_LM "-lm") + set(LAPACK_mkl_LDL "-ldl") + endif() + + if(LAPACK_FIND_QUIETLY OR NOT LAPACK_FIND_REQUIRED) + find_package(Threads) + else() + find_package(Threads REQUIRED) + endif() + + if(BLA_VENDOR MATCHES "_64ilp") + set(LAPACK_mkl_ILP_MODE "ilp64") + else() + set(LAPACK_mkl_ILP_MODE "lp64") + endif() + + set(LAPACK_SEARCH_LIBS "") + + if(BLA_F95) + set(LAPACK_mkl_SEARCH_SYMBOL "cheev_f95") + set(_LIBRARIES LAPACK95_LIBRARIES) + set(_BLAS_LIBRARIES ${BLAS95_LIBRARIES}) + + # old + list(APPEND LAPACK_SEARCH_LIBS + "mkl_lapack95") + # new >= 10.3 + list(APPEND LAPACK_SEARCH_LIBS + "mkl_intel_c") + list(APPEND LAPACK_SEARCH_LIBS + "mkl_lapack95_${LAPACK_mkl_ILP_MODE}") + else() + set(LAPACK_mkl_SEARCH_SYMBOL "cheev") + set(_LIBRARIES LAPACK_LIBRARIES) + set(_BLAS_LIBRARIES ${BLAS_LIBRARIES}) + + # old and new >= 10.3 + list(APPEND LAPACK_SEARCH_LIBS + "mkl_lapack") + endif() + + # MKL uses a multitude of partially platform-specific subdirectories: + if(BLA_VENDOR STREQUAL "Intel10_32") + set(LAPACK_mkl_ARCH_NAME "ia32") + else() + set(LAPACK_mkl_ARCH_NAME "intel64") + endif() + if(WIN32) + set(LAPACK_mkl_OS_NAME "win") + elseif(APPLE) + set(LAPACK_mkl_OS_NAME "mac") + else() + set(LAPACK_mkl_OS_NAME "lin") + endif() + if(DEFINED ENV{MKLROOT}) + set(LAPACK_mkl_MKLROOT "$ENV{MKLROOT}") + # If MKLROOT points to the subdirectory 'mkl', use the parent directory instead + # so we can better detect other relevant libraries in 'compiler' or 'tbb': + get_filename_component(LAPACK_mkl_MKLROOT_LAST_DIR "${LAPACK_mkl_MKLROOT}" NAME) + if(LAPACK_mkl_MKLROOT_LAST_DIR STREQUAL "mkl") + get_filename_component(LAPACK_mkl_MKLROOT "${LAPACK_mkl_MKLROOT}" DIRECTORY) + endif() + endif() + set(LAPACK_mkl_LIB_PATH_SUFFIXES + "compiler/lib" "compiler/lib/${LAPACK_mkl_ARCH_NAME}_${LAPACK_mkl_OS_NAME}" + "mkl/lib" "mkl/lib/${LAPACK_mkl_ARCH_NAME}_${LAPACK_mkl_OS_NAME}" + "lib/${LAPACK_mkl_ARCH_NAME}_${LAPACK_mkl_OS_NAME}") + + # First try empty lapack libs + if(NOT ${_LIBRARIES}) + check_lapack_libraries( + ${_LIBRARIES} + LAPACK + ${LAPACK_mkl_SEARCH_SYMBOL} + "" + "" + "${CMAKE_THREAD_LIBS_INIT};${LAPACK_mkl_LM};${LAPACK_mkl_LDL}" + "${LAPACK_mkl_MKLROOT}" + "${LAPACK_mkl_LIB_PATH_SUFFIXES}" + "${_BLAS_LIBRARIES}" + ) + endif() + + # Then try the search libs + foreach(IT ${LAPACK_SEARCH_LIBS}) + string(REPLACE " " ";" SEARCH_LIBS ${IT}) + if(NOT ${_LIBRARIES}) + check_lapack_libraries( + ${_LIBRARIES} + LAPACK + ${LAPACK_mkl_SEARCH_SYMBOL} + "" + "${SEARCH_LIBS}" + "${CMAKE_THREAD_LIBS_INIT};${LAPACK_mkl_LM};${LAPACK_mkl_LDL}" + "${LAPACK_mkl_MKLROOT}" + "${LAPACK_mkl_LIB_PATH_SUFFIXES}" + "${_BLAS_LIBRARIES}" + ) + endif() + endforeach() + + unset(LAPACK_mkl_ILP_MODE) + unset(LAPACK_mkl_SEARCH_SYMBOL) + unset(LAPACK_mkl_LM) + unset(LAPACK_mkl_LDL) + unset(LAPACK_mkl_MKLROOT) + unset(LAPACK_mkl_ARCH_NAME) + unset(LAPACK_mkl_OS_NAME) + unset(LAPACK_mkl_LIB_PATH_SUFFIXES) + endif() + endif() + endif() + + # gotoblas? (http://www.tacc.utexas.edu/tacc-projects/gotoblas2) + if(BLA_VENDOR STREQUAL "Goto" OR BLA_VENDOR STREQUAL "All") + if(NOT LAPACK_LIBRARIES) + check_lapack_libraries( + LAPACK_LIBRARIES + LAPACK + cheev + "" + "goto2" + "" + "" + "" + "${BLAS_LIBRARIES}" + ) + endif() + endif() + + # OpenBLAS? (http://www.openblas.net) + if(BLA_VENDOR STREQUAL "OpenBLAS" OR BLA_VENDOR STREQUAL "All") + if(NOT LAPACK_LIBRARIES) + check_lapack_libraries( + LAPACK_LIBRARIES + LAPACK + cheev + "" + "openblas" + "" + "" + "" + "${BLAS_LIBRARIES}" + ) + endif() + endif() + + # ArmPL? (https://developer.arm.com/tools-and-software/server-and-hpc/compile/arm-compiler-for-linux/arm-performance-libraries) + if(BLA_VENDOR MATCHES "Arm" OR BLA_VENDOR STREQUAL "All") + + # Check for 64bit Integer support + if(BLA_VENDOR MATCHES "_ilp64") + set(LAPACK_armpl_LIB "armpl_ilp64") + else() + set(LAPACK_armpl_LIB "armpl_lp64") + endif() + + # Check for OpenMP support, VIA BLA_VENDOR of Arm_mp or Arm_ipl64_mp + if(BLA_VENDOR MATCHES "_mp") + set(LAPACK_armpl_LIB "${LAPACK_armpl_LIB}_mp") + endif() + + if(NOT LAPACK_LIBRARIES) + check_lapack_libraries( + LAPACK_LIBRARIES + LAPACK + cheev + "" + "${LAPACK_armpl_LIB}" + "" + "" + "" + "${BLAS_LIBRARIES}" + ) + endif() + endif() + + # FLAME's blis library? (https://github.com/flame/blis) + if(BLA_VENDOR STREQUAL "FLAME" OR BLA_VENDOR STREQUAL "All") + if(NOT LAPACK_LIBRARIES) + check_lapack_libraries( + LAPACK_LIBRARIES + LAPACK + cheev + "" + "flame" + "" + "" + "" + "${BLAS_LIBRARIES}" + ) + endif() + endif() + + # BLAS in acml library? + if(BLA_VENDOR MATCHES "ACML" OR BLA_VENDOR STREQUAL "All") + if(BLAS_LIBRARIES MATCHES ".+acml.+") + set(LAPACK_LIBRARIES ${BLAS_LIBRARIES}) + endif() + endif() + + # Apple LAPACK library? + if(BLA_VENDOR STREQUAL "Apple" OR BLA_VENDOR STREQUAL "All") + if(NOT LAPACK_LIBRARIES) + check_lapack_libraries( + LAPACK_LIBRARIES + LAPACK + cheev + "" + "Accelerate" + "" + "" + "" + "${BLAS_LIBRARIES}" + ) + endif() + endif() + + # Apple NAS (vecLib) library? + if(BLA_VENDOR STREQUAL "NAS" OR BLA_VENDOR STREQUAL "All") + if(NOT LAPACK_LIBRARIES) + check_lapack_libraries( + LAPACK_LIBRARIES + LAPACK + cheev + "" + "vecLib" + "" + "" + "" + "${BLAS_LIBRARIES}" + ) + endif() + endif() + + # Generic LAPACK library? + if(BLA_VENDOR STREQUAL "Generic" OR + BLA_VENDOR STREQUAL "ATLAS" OR + BLA_VENDOR STREQUAL "All") + if(NOT LAPACK_LIBRARIES) + check_lapack_libraries( + LAPACK_LIBRARIES + LAPACK + cheev + "" + "lapack" + "" + "" + "" + "${BLAS_LIBRARIES}" + ) + endif() + endif() +else() + message(STATUS "LAPACK requires BLAS") +endif() + +if(BLA_F95) + if(LAPACK95_LIBRARIES) + set(LAPACK95_FOUND TRUE) + else() + set(LAPACK95_FOUND FALSE) + endif() + if(NOT LAPACK_FIND_QUIETLY) + if(LAPACK95_FOUND) + message(STATUS "A library with LAPACK95 API found.") + else() + if(LAPACK_FIND_REQUIRED) + message(FATAL_ERROR + "A required library with LAPACK95 API not found. Please specify library location." + ) + else() + message(STATUS + "A library with LAPACK95 API not found. Please specify library location." + ) + endif() + endif() + endif() + set(LAPACK_FOUND "${LAPACK95_FOUND}") + set(LAPACK_LIBRARIES "${LAPACK95_LIBRARIES}") +else() + if(LAPACK_LIBRARIES) + set(LAPACK_FOUND TRUE) + else() + set(LAPACK_FOUND FALSE) + endif() + + if(NOT LAPACK_FIND_QUIETLY) + if(LAPACK_FOUND) + message(STATUS "A library with LAPACK API found.") + else() + if(LAPACK_FIND_REQUIRED) + message(FATAL_ERROR + "A required library with LAPACK API not found. Please specify library location." + ) + else() + message(STATUS + "A library with LAPACK API not found. Please specify library location." + ) + endif() + endif() + endif() +endif() + +# On compilers that implicitly link LAPACK (such as ftn, cc, and CC on Cray HPC machines) +# we used a placeholder for empty LAPACK_LIBRARIES to get through our logic above. +if(LAPACK_LIBRARIES STREQUAL "LAPACK_LIBRARIES-PLACEHOLDER-FOR-EMPTY-LIBRARIES") + set(LAPACK_LIBRARIES "") +endif() + +cmake_pop_check_state() +set(CMAKE_FIND_LIBRARY_SUFFIXES ${_lapack_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES}) From f9dca0730aa891273d06aa131321797ab2708724 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tiziano=20M=C3=BCller?= Date: Fri, 10 Apr 2020 18:21:57 +0200 Subject: [PATCH 14/19] cmake: patch FindBLAS to check OpenBLAS for OMP Uses the openblas_get_parallel() function to check whether OpenBLAS is built with OMP. If not, looks for other OpenBLAS library candidates libopenblas_omp.so/.a and libopenblas_openmp.so/.a. FindLAPACK.cmake is needed because it calls FindBLAS, otherwise it will use the CMake provided FindBLAS.cmake, not our patched one. fixes #295 --- cmake/FindBLAS.cmake | 103 +++++++++++++++++++++++++++++++++++------ cmake/FindLAPACK.cmake | 6 +-- 2 files changed, 93 insertions(+), 16 deletions(-) diff --git a/cmake/FindBLAS.cmake b/cmake/FindBLAS.cmake index caed7eff4cb..aa46ae8ec21 100644 --- a/cmake/FindBLAS.cmake +++ b/cmake/FindBLAS.cmake @@ -112,24 +112,26 @@ if(NOT (CMAKE_C_COMPILER_LOADED OR CMAKE_CXX_COMPILER_LOADED OR CMAKE_Fortran_CO endif() if(CMAKE_Fortran_COMPILER_LOADED) - include(${CMAKE_CURRENT_LIST_DIR}/CheckFortranFunctionExists.cmake) + include(CheckFortranFunctionExists) + include(CheckFortranSourceRuns) else() - include(${CMAKE_CURRENT_LIST_DIR}/CheckFunctionExists.cmake) + include(CheckFunctionExists) endif() -include(${CMAKE_CURRENT_LIST_DIR}/CMakePushCheckState.cmake) -include(${CMAKE_CURRENT_LIST_DIR}/FindPackageHandleStandardArgs.cmake) +include(CMakePushCheckState) +include(FindPackageHandleStandardArgs) cmake_push_check_state() set(CMAKE_REQUIRED_QUIET ${BLAS_FIND_QUIETLY}) -if(BLA_PREFER_PKGCONFIG) - find_package(PkgConfig) - pkg_check_modules(PKGC_BLAS blas) - if(PKGC_BLAS_FOUND) - set(BLAS_FOUND ${PKGC_BLAS_FOUND}) - set(BLAS_LIBRARIES "${PKGC_BLAS_LINK_LIBRARIES}") - return() - endif() -endif() +# Disable since we otherwise skip the additional search for OpenMP-enabled OpenBLAS +# if(BLA_PREFER_PKGCONFIG) +# find_package(PkgConfig) +# pkg_check_modules(PKGC_BLAS blas) +# if(PKGC_BLAS_FOUND) +# set(BLAS_FOUND ${PKGC_BLAS_FOUND}) +# set(BLAS_LIBRARIES "${PKGC_BLAS_LINK_LIBRARIES}") +# return() +# endif() +# endif() set(_blas_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES}) if(BLA_STATIC) @@ -223,6 +225,19 @@ macro(CHECK_BLAS_LIBRARIES LIBRARIES _prefix _name _flags _list _threadlibs _add #message("DEBUG: ${LIBRARIES} = ${${LIBRARIES}}") endmacro() +# DBCSR extension: +macro(CHECK_OPENBLAS_OMP_SUPPORT) + set(_openblascheck_ORIG_CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES}) + set(CMAKE_REQUIRED_LIBRARIES ${BLAS_LIBRARIES}) + check_fortran_source_runs("integer, external :: openblas_get_parallel; if (openblas_get_parallel() /= 2) error stop ; end" _openblascheck_has_OMP) + set(CMAKE_REQUIRED_LIBRARIES ${_openblascheck_ORIG_CMAKE_REQUIRED_LIBRARIES}) + + if (NOT _openblascheck_has_OMP) + message(VERBOSE "The OpenBLAS library '${BLAS_LIBRARIES}' is not built with OpenMP, ignoring due to USE_OPENMP=1.") + set(BLAS_LIBRARIES) + endif () +endmacro() + set(BLAS_LINKER_FLAGS) set(BLAS_LIBRARIES) set(BLAS95_LIBRARIES) @@ -534,7 +549,38 @@ if(BLA_VENDOR STREQUAL "OpenBLAS" OR BLA_VENDOR STREQUAL "All") "" "" ) + + if (BLAS_LIBRARIES AND USE_OPENMP) + check_openblas_omp_support() + endif () endif() + + if (NOT BLAS_LIBRARIES) + check_blas_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" + "openblas_omp" + "" + "" + "" + ) + endif () + + if (NOT BLAS_LIBRARIES) + check_blas_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" + "openblas_openmp" + "" + "" + "" + ) + endif () + if(NOT BLAS_LIBRARIES AND (CMAKE_C_COMPILER_LOADED OR CMAKE_CXX_COMPILER_LOADED)) if(BLAS_FIND_QUIETLY OR NOT BLAS_FIND_REQUIRED) find_package(Threads) @@ -551,6 +597,37 @@ if(BLA_VENDOR STREQUAL "OpenBLAS" OR BLA_VENDOR STREQUAL "All") "" "" ) + + if (BLAS_LIBRARIES AND USE_OPENMP) + check_openblas_omp_support() + endif () + + if (NOT BLAS_LIBRARIES) + check_blas_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" + "openblas_omp" + "${CMAKE_THREAD_LIBS_INIT}" + "" + "" + ) + endif () + + if (NOT BLAS_LIBRARIES) + check_blas_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" + "openblas_openmp" + "${CMAKE_THREAD_LIBS_INIT}" + "" + "" + ) + endif () + endif() endif() diff --git a/cmake/FindLAPACK.cmake b/cmake/FindLAPACK.cmake index e5bd58d443d..ba07ec00487 100644 --- a/cmake/FindLAPACK.cmake +++ b/cmake/FindLAPACK.cmake @@ -89,11 +89,11 @@ if(NOT (CMAKE_C_COMPILER_LOADED OR CMAKE_CXX_COMPILER_LOADED OR CMAKE_Fortran_CO endif() if(CMAKE_Fortran_COMPILER_LOADED) - include(${CMAKE_CURRENT_LIST_DIR}/CheckFortranFunctionExists.cmake) + include(CheckFortranFunctionExists) else() - include(${CMAKE_CURRENT_LIST_DIR}/CheckFunctionExists.cmake) + include(CheckFunctionExists) endif() -include(${CMAKE_CURRENT_LIST_DIR}/CMakePushCheckState.cmake) +include(CMakePushCheckState) cmake_push_check_state() set(CMAKE_REQUIRED_QUIET ${LAPACK_FIND_QUIETLY}) From 9041c952677d5cec4f888fc62738e09de812c0b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tiziano=20M=C3=BCller?= Date: Fri, 10 Apr 2020 18:27:29 +0200 Subject: [PATCH 15/19] cmake: move OMP check before BLAS, avoid double BLAS search --- CMakeLists.txt | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 60bc4124015..b47ae5e6a19 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -100,10 +100,14 @@ set(CMAKE_CXX_STANDARD 11) # ================================================================================================= # PACKAGE DISCOVERY (compiler configuration can impact package discovery) -# =================================== BLAS, LAPACK, PkgConfig -find_package(BLAS REQUIRED) -find_package(LAPACK REQUIRED) # needed for some of the integrated test routines +# =================================== OpenMP and OpenMP/offload backend +if (USE_OPENMP) + find_package(OpenMP REQUIRED) +endif () + +# =================================== BLAS & LAPACK, PkgConfig find_package(PkgConfig) +find_package(LAPACK REQUIRED) # needed for some of the integrated test routines, also calls find_package(BLAS) # =================================== Python # this module looks preferably for version 3 of Python. If not found, version 2 is searched @@ -137,11 +141,6 @@ Intel MPI compiler wrappers. Check the INSTALL.md for more information.") endif () endif () -# =================================== OpenMP and OpenMP/offload backend -if (USE_OPENMP) - find_package(OpenMP REQUIRED) -endif () - # =================================== SMM (Small Matrix-Matrix multiplication) if (USE_SMM MATCHES "blas") message("-- Using BLAS for Small Matrix Multiplication") From 1dc9166913824cb164695d94e90eda1334355534 Mon Sep 17 00:00:00 2001 From: Patrick Seewald Date: Thu, 9 Apr 2020 20:42:42 +0200 Subject: [PATCH 16/19] Tensors: avoid unneeded communications if logging is disabled --- src/tas/dbcsr_tas_io.F | 135 +++++++++------- src/tas/dbcsr_tas_mm.F | 214 ++++++++++++------------- src/tas/dbcsr_tas_test.F | 4 +- src/tensors/dbcsr_tensor.F | 288 +++++++++++++++++----------------- src/tensors/dbcsr_tensor_io.F | 83 ++++++---- 5 files changed, 373 insertions(+), 351 deletions(-) diff --git a/src/tas/dbcsr_tas_io.F b/src/tas/dbcsr_tas_io.F index 07d67456f77..bf406600b1b 100644 --- a/src/tas/dbcsr_tas_io.F +++ b/src/tas/dbcsr_tas_io.F @@ -18,7 +18,7 @@ MODULE dbcsr_tas_io int_8, real_8, default_string_length USE dbcsr_tas_base, ONLY: & dbcsr_tas_get_info, dbcsr_tas_get_num_blocks, dbcsr_tas_get_num_blocks_total, dbcsr_tas_get_nze_total, & - dbcsr_tas_get_nze, dbcsr_tas_nblkrows_total, dbcsr_tas_nblkcols_total + dbcsr_tas_get_nze, dbcsr_tas_nblkrows_total, dbcsr_tas_nblkcols_total, dbcsr_tas_info USE dbcsr_tas_split, ONLY: & dbcsr_tas_get_split_info, rowsplit, colsplit USE dbcsr_mpiwrap, ONLY: & @@ -34,73 +34,77 @@ MODULE dbcsr_tas_io PUBLIC :: & dbcsr_tas_write_dist, & dbcsr_tas_write_matrix_info, & - dbcsr_tas_write_split_info + dbcsr_tas_write_split_info, & + prep_output_unit CONTAINS - SUBROUTINE dbcsr_tas_write_matrix_info(matrix, output_unit, full_info) + SUBROUTINE dbcsr_tas_write_matrix_info(matrix, unit_nr, full_info) !! Write basic infos of tall-and-skinny matrix: block dimensions, full dimensions, process grid dimensions TYPE(dbcsr_tas_type), INTENT(IN) :: matrix - INTEGER, INTENT(IN) :: output_unit + INTEGER, INTENT(IN) :: unit_nr LOGICAL, OPTIONAL, INTENT(IN) :: full_info !! Whether to print distribution and block size vectors INTEGER(KIND=int_8) :: nblkrows_total, nblkcols_total, nfullrows_total, & nfullcols_total - INTEGER :: nprow, npcol + INTEGER :: nprow, npcol, unit_nr_prv CLASS(dbcsr_tas_distribution), ALLOCATABLE :: proc_row_dist, proc_col_dist CLASS(dbcsr_tas_rowcol_data), ALLOCATABLE :: row_blk_size, col_blk_size INTEGER(KIND=int_8) :: iblk CHARACTER(default_string_length) :: name + unit_nr_prv = prep_output_unit(unit_nr) + IF (unit_nr_prv == 0) RETURN + CALL dbcsr_tas_get_info(matrix, nblkrows_total=nblkrows_total, nblkcols_total=nblkcols_total, & nfullrows_total=nfullrows_total, nfullcols_total=nfullcols_total, & nprow=nprow, npcol=npcol, proc_row_dist=proc_row_dist, proc_col_dist=proc_col_dist, & row_blk_size=row_blk_size, col_blk_size=col_blk_size, name=name) - IF (output_unit > 0) THEN - WRITE (output_unit, "(T2,A)") & + IF (unit_nr_prv > 0) THEN + WRITE (unit_nr_prv, "(T2,A)") & "GLOBAL INFO OF "//TRIM(name) - WRITE (output_unit, "(T4,A,1X)", advance="no") "block dimensions:" - WRITE (output_unit, "(I12,I12)", advance="no") nblkrows_total, nblkcols_total - WRITE (output_unit, "(/T4,A,1X)", advance="no") "full dimensions:" - WRITE (output_unit, "(I14,I14)", advance="no") nfullrows_total, nfullcols_total - WRITE (output_unit, "(/T4,A,1X)", advance="no") "process grid dimensions:" - WRITE (output_unit, "(I10,I10)", advance="no") nprow, npcol + WRITE (unit_nr_prv, "(T4,A,1X)", advance="no") "block dimensions:" + WRITE (unit_nr_prv, "(I12,I12)", advance="no") nblkrows_total, nblkcols_total + WRITE (unit_nr_prv, "(/T4,A,1X)", advance="no") "full dimensions:" + WRITE (unit_nr_prv, "(I14,I14)", advance="no") nfullrows_total, nfullcols_total + WRITE (unit_nr_prv, "(/T4,A,1X)", advance="no") "process grid dimensions:" + WRITE (unit_nr_prv, "(I10,I10)", advance="no") nprow, npcol IF (PRESENT(full_info)) THEN IF (full_info) THEN - WRITE (output_unit, '(/T4,A)', advance='no') "Block sizes:" - WRITE (output_unit, '(/T8,A)', advance='no') 'Row:' + WRITE (unit_nr_prv, '(/T4,A)', advance='no') "Block sizes:" + WRITE (unit_nr_prv, '(/T8,A)', advance='no') 'Row:' DO iblk = 1, row_blk_size%nmrowcol - WRITE (output_unit, '(I4,1X)', advance='no') row_blk_size%data(iblk) + WRITE (unit_nr_prv, '(I4,1X)', advance='no') row_blk_size%data(iblk) ENDDO - WRITE (output_unit, '(/T8,A)', advance='no') 'Column:' + WRITE (unit_nr_prv, '(/T8,A)', advance='no') 'Column:' DO iblk = 1, col_blk_size%nmrowcol - WRITE (output_unit, '(I4,1X)', advance='no') col_blk_size%data(iblk) + WRITE (unit_nr_prv, '(I4,1X)', advance='no') col_blk_size%data(iblk) ENDDO - WRITE (output_unit, '(/T4,A)', advance='no') "Block distribution:" - WRITE (output_unit, '(/T8,A)', advance='no') 'Row:' + WRITE (unit_nr_prv, '(/T4,A)', advance='no') "Block distribution:" + WRITE (unit_nr_prv, '(/T8,A)', advance='no') 'Row:' DO iblk = 1, proc_row_dist%nmrowcol - WRITE (output_unit, '(I4,1X)', advance='no') proc_row_dist%dist(iblk) + WRITE (unit_nr_prv, '(I4,1X)', advance='no') proc_row_dist%dist(iblk) ENDDO - WRITE (output_unit, '(/T8,A)', advance='no') 'Column:' + WRITE (unit_nr_prv, '(/T8,A)', advance='no') 'Column:' DO iblk = 1, proc_col_dist%nmrowcol - WRITE (output_unit, '(I4,1X)', advance='no') proc_col_dist%dist(iblk) + WRITE (unit_nr_prv, '(I4,1X)', advance='no') proc_col_dist%dist(iblk) ENDDO ENDIF ENDIF - WRITE (output_unit, *) + WRITE (unit_nr_prv, *) ENDIF END SUBROUTINE - SUBROUTINE dbcsr_tas_write_dist(matrix, output_unit, full_info) + SUBROUTINE dbcsr_tas_write_dist(matrix, unit_nr, full_info) !! Write info on tall-and-skinny matrix distribution & load balance TYPE(dbcsr_tas_type), INTENT(IN) :: matrix - INTEGER, INTENT(IN) :: output_unit + INTEGER, INTENT(IN) :: unit_nr LOGICAL, INTENT(IN), OPTIONAL :: full_info !! Whether to print subgroup DBCSR distribution @@ -114,10 +118,13 @@ SUBROUTINE dbcsr_tas_write_dist(matrix, output_unit, full_info) nblock_s, nelement_s, nblock_s_max REAL(KIND=real_8) :: occupation INTEGER, DIMENSION(:), POINTER :: rowdist => NULL(), coldist => NULL() - INTEGER :: split_rowcol, icol, irow + INTEGER :: split_rowcol, icol, irow, unit_nr_prv + + unit_nr_prv = prep_output_unit(unit_nr) + IF (unit_nr_prv == 0) RETURN - CALL dbcsr_tas_get_info(matrix, name=name) CALL dbcsr_tas_get_split_info(matrix%dist%info, mp_comm, ngroup, igroup, mp_comm_group, split_rowcol) + CALL dbcsr_tas_get_info(matrix, name=name) CALL mp_environ(nproc, iproc, mp_comm) nblock = dbcsr_tas_get_num_blocks(matrix) @@ -147,49 +154,52 @@ SUBROUTINE dbcsr_tas_write_dist(matrix, output_unit, full_info) rowdist => dbcsr_distribution_row_dist(matrix%matrix%dist) coldist => dbcsr_distribution_col_dist(matrix%matrix%dist) - IF (output_unit > 0) THEN - WRITE (output_unit, "(T2,A)") & + IF (unit_nr_prv > 0) THEN + WRITE (unit_nr_prv, "(T2,A)") & "DISTRIBUTION OF "//TRIM(name) - WRITE (output_unit, "(T15,A,T68,I13)") "Number of non-zero blocks:", nblock_p_sum - WRITE (output_unit, "(T15,A,T75,F6.2)") "Percentage of non-zero blocks:", occupation - WRITE (output_unit, "(T15,A,T68,I13)") "Average number of blocks per group:", (nblock_p_sum + ngroup - 1)/ngroup - WRITE (output_unit, "(T15,A,T68,I13)") "Maximum number of blocks per group:", nblock_s_max - WRITE (output_unit, "(T15,A,T68,I13)") "Average number of matrix elements per group:", (nelement_p_sum + ngroup - 1)/ngroup - WRITE (output_unit, "(T15,A,T68,I13)") "Maximum number of matrix elements per group:", nelement_s_max - WRITE (output_unit, "(T15,A,T68,I13)") "Average number of blocks per CPU:", (nblock_p_sum + nproc - 1)/nproc - WRITE (output_unit, "(T15,A,T68,I13)") "Maximum number of blocks per CPU:", nblock_p_max - WRITE (output_unit, "(T15,A,T68,I13)") "Average number of matrix elements per CPU:", (nelement_p_sum + nproc - 1)/nproc - WRITE (output_unit, "(T15,A,T68,I13)") "Maximum number of matrix elements per CPU:", nelement_p_max + WRITE (unit_nr_prv, "(T15,A,T68,I13)") "Number of non-zero blocks:", nblock_p_sum + WRITE (unit_nr_prv, "(T15,A,T75,F6.2)") "Percentage of non-zero blocks:", occupation + WRITE (unit_nr_prv, "(T15,A,T68,I13)") "Average number of blocks per group:", (nblock_p_sum + ngroup - 1)/ngroup + WRITE (unit_nr_prv, "(T15,A,T68,I13)") "Maximum number of blocks per group:", nblock_s_max + WRITE (unit_nr_prv, "(T15,A,T68,I13)") "Average number of matrix elements per group:", (nelement_p_sum + ngroup - 1)/ngroup + WRITE (unit_nr_prv, "(T15,A,T68,I13)") "Maximum number of matrix elements per group:", nelement_s_max + WRITE (unit_nr_prv, "(T15,A,T68,I13)") "Average number of blocks per CPU:", (nblock_p_sum + nproc - 1)/nproc + WRITE (unit_nr_prv, "(T15,A,T68,I13)") "Maximum number of blocks per CPU:", nblock_p_max + WRITE (unit_nr_prv, "(T15,A,T68,I13)") "Average number of matrix elements per CPU:", (nelement_p_sum + nproc - 1)/nproc + WRITE (unit_nr_prv, "(T15,A,T68,I13)") "Maximum number of matrix elements per CPU:", nelement_p_max IF (PRESENT(full_info)) THEN IF (full_info) THEN - WRITE (output_unit, "(T15,A)") "Row distribution on subgroup:" - WRITE (output_unit, '(T15)', advance='no') + WRITE (unit_nr_prv, "(T15,A)") "Row distribution on subgroup:" + WRITE (unit_nr_prv, '(T15)', advance='no') DO irow = 1, SIZE(rowdist) - WRITE (output_unit, '(I3, 1X)', advance='no') rowdist(irow) + WRITE (unit_nr_prv, '(I3, 1X)', advance='no') rowdist(irow) ENDDO - WRITE (output_unit, "(/T15,A)") "Column distribution on subgroup:" - WRITE (output_unit, '(T15)', advance='no') + WRITE (unit_nr_prv, "(/T15,A)") "Column distribution on subgroup:" + WRITE (unit_nr_prv, '(T15)', advance='no') DO icol = 1, SIZE(coldist) - WRITE (output_unit, '(I3, 1X)', advance='no') coldist(icol) + WRITE (unit_nr_prv, '(I3, 1X)', advance='no') coldist(icol) ENDDO - WRITE (output_unit, *) + WRITE (unit_nr_prv, *) ENDIF ENDIF ENDIF END SUBROUTINE - SUBROUTINE dbcsr_tas_write_split_info(info, io_unit, name) + SUBROUTINE dbcsr_tas_write_split_info(info, unit_nr, name) !! Print info on how matrix is split TYPE(dbcsr_tas_split_info), INTENT(IN) :: info - INTEGER, INTENT(IN) :: io_unit + INTEGER, INTENT(IN) :: unit_nr CHARACTER(len=*), INTENT(IN), OPTIONAL :: name INTEGER :: groupsize, igroup, mp_comm, & mp_comm_group, mynode, nsplit, & - numnodes, split_rowcol + numnodes, split_rowcol, unit_nr_prv INTEGER, DIMENSION(2) :: coord, dims, groupcoord, groupdims, & pgrid_offset CHARACTER(len=:), ALLOCATABLE :: name_prv + unit_nr_prv = prep_output_unit(unit_nr) + IF (unit_nr_prv == 0) RETURN + IF (PRESENT(name)) THEN ALLOCATE (name_prv, SOURCE=TRIM(name)) ELSE @@ -197,27 +207,40 @@ SUBROUTINE dbcsr_tas_write_split_info(info, io_unit, name) ENDIF CALL dbcsr_tas_get_split_info(info, mp_comm, nsplit, igroup, mp_comm_group, split_rowcol, pgrid_offset) + CALL mp_environ(numnodes, mynode, mp_comm) CALL mp_environ(numnodes, dims, coord, mp_comm) CALL mp_environ(groupsize, groupdims, groupcoord, mp_comm_group) - IF (io_unit > 0) THEN + IF (unit_nr_prv > 0) THEN SELECT CASE (split_rowcol) CASE (rowsplit) - WRITE (io_unit, "(T4,A,I4,1X,A,I4)") name_prv//"splitting rows by factor", nsplit + WRITE (unit_nr_prv, "(T4,A,I4,1X,A,I4)") name_prv//"splitting rows by factor", nsplit CASE (colsplit) - WRITE (io_unit, "(T4,A,I4,1X,A,I4)") name_prv//"splitting columns by factor", nsplit + WRITE (unit_nr_prv, "(T4,A,I4,1X,A,I4)") name_prv//"splitting columns by factor", nsplit END SELECT - WRITE (io_unit, "(T4,A,I4,A1,I4)") name_prv//"global grid sizes:", dims(1), "x", dims(2) + WRITE (unit_nr_prv, "(T4,A,I4,A1,I4)") name_prv//"global grid sizes:", dims(1), "x", dims(2) ENDIF - IF (io_unit > 0) THEN - WRITE (io_unit, "(T4,A,I4,A1,I4)") & + IF (unit_nr_prv > 0) THEN + WRITE (unit_nr_prv, "(T4,A,I4,A1,I4)") & name_prv//"grid sizes on subgroups:", & groupdims(1), "x", groupdims(2) ENDIF END SUBROUTINE + FUNCTION prep_output_unit(unit_nr) RESULT(unit_nr_out) + INTEGER, INTENT(IN), OPTIONAL :: unit_nr + INTEGER :: unit_nr_out + + IF (PRESENT(unit_nr)) THEN + unit_nr_out = unit_nr + ELSE + unit_nr_out = 0 + ENDIF + + END FUNCTION + END MODULE diff --git a/src/tas/dbcsr_tas_mm.F b/src/tas/dbcsr_tas_mm.F index 437c10fc639..3edbcbf05d7 100644 --- a/src/tas/dbcsr_tas_mm.F +++ b/src/tas/dbcsr_tas_mm.F @@ -51,7 +51,7 @@ MODULE dbcsr_tas_mm USE dbcsr_operations, ONLY: & dbcsr_scale, dbcsr_get_info, dbcsr_copy, dbcsr_clear, dbcsr_add USE dbcsr_tas_io, ONLY: & - dbcsr_tas_write_dist, dbcsr_tas_write_matrix_info, dbcsr_tas_write_split_info + dbcsr_tas_write_dist, dbcsr_tas_write_matrix_info, dbcsr_tas_write_split_info, prep_output_unit USE dbcsr_work_operations, ONLY: dbcsr_create, dbcsr_finalize USE dbcsr_transformations, ONLY: dbcsr_redistribute USE dbcsr_dist_methods, ONLY: dbcsr_distribution_new @@ -70,7 +70,7 @@ MODULE dbcsr_tas_mm RECURSIVE SUBROUTINE dbcsr_tas_multiply(transa, transb, transc, alpha, matrix_a, matrix_b, beta, matrix_c, & optimize_dist, split_opt, filter_eps, flop, move_data_a, & - move_data_b, retain_sparsity, simple_split, result_index, io_unit, log_verbose) + move_data_b, retain_sparsity, simple_split, result_index, unit_nr, log_verbose) !! tall-and-skinny matrix-matrix multiplication. Undocumented dummy arguments are identical to !! arguments of dbcsr_multiply (see dbcsr_mm, dbcsr_multiply_generic). @@ -93,7 +93,7 @@ RECURSIVE SUBROUTINE dbcsr_tas_multiply(transa, transb, transc, alpha, matrix_a, !! memory optimization: move data to matrix_c such that matrix_b is empty on return !! for internal use only INTEGER(int_8), DIMENSION(:, :), ALLOCATABLE, INTENT(OUT), OPTIONAL :: result_index - INTEGER, OPTIONAL, INTENT(IN) :: io_unit + INTEGER, OPTIONAL, INTENT(IN) :: unit_nr !! unit number for logging output LOGICAL, OPTIONAL, INTENT(IN) :: log_verbose !! only for testing: verbose output @@ -106,7 +106,7 @@ RECURSIVE SUBROUTINE dbcsr_tas_multiply(transa, transb, transc, alpha, matrix_a, INTEGER, DIMENSION(2) :: pdims, pcoord, pcoord_sub, pdims_sub INTEGER(KIND=int_8), DIMENSION(3) :: dims INTEGER :: max_mm_dim, data_type, mp_comm, comm_tmp, & - handle, handle2, io_unit_prv, nsplit, nsplit_opt, numproc, numproc_sub, iproc, & + handle, handle2, unit_nr_prv, nsplit, nsplit_opt, numproc, numproc_sub, iproc, & mp_comm_group, mp_comm_mm, split_rc, split_a, split_b, split_c, & mp_comm_opt, batched_repl, max_mm_dim_batched, nsplit_batched CHARACTER(LEN=1) :: tr_case, transa_prv, transb_prv, transc_prv @@ -124,11 +124,7 @@ RECURSIVE SUBROUTINE dbcsr_tas_multiply(transa, transb, transc, alpha, matrix_a, NULLIFY (matrix_b_rs, matrix_a_rs, matrix_c_rs, matrix_a_mm, matrix_b_mm, matrix_c_mm) - IF (PRESENT(io_unit)) THEN - io_unit_prv = io_unit - ELSE - io_unit_prv = 0 - ENDIF + unit_nr_prv = prep_output_unit(unit_nr) IF (PRESENT(simple_split)) THEN simple_split_prv = simple_split @@ -209,23 +205,20 @@ RECURSIVE SUBROUTINE dbcsr_tas_multiply(transa, transb, transc, alpha, matrix_a, dims_b = [dbcsr_tas_nblkrows_total(matrix_b), dbcsr_tas_nblkcols_total(matrix_b)] dims_c = [dbcsr_tas_nblkrows_total(matrix_c), dbcsr_tas_nblkcols_total(matrix_c)] - IF (PRESENT(io_unit)) THEN - IF (io_unit_prv .GT. 0) THEN - WRITE (io_unit_prv, '(A)') repeat("-", 80) - WRITE (io_unit_prv, '(A,1X,A,1X,A,1X,A,1X,A,1X,A)') "DBCSR TAS MATRIX MULTIPLICATION:", & - TRIM(matrix_a%matrix%name), 'x', TRIM(matrix_b%matrix%name), '=', TRIM(matrix_c%matrix%name) - WRITE (io_unit_prv, '(A)') repeat("-", 80) - ENDIF - IF (do_batched) THEN - IF (io_unit_prv > 0) THEN - WRITE (io_unit_prv, "(T2,A)") & - "BATCHED PROCESSING OF MATMUL" - IF (batched_repl > 0) THEN - WRITE (io_unit_prv, "(T4,A,T80,I1)") "reusing replicated matrix:", batched_repl - ENDIF + IF (unit_nr_prv .GT. 0) THEN + WRITE (unit_nr_prv, '(A)') repeat("-", 80) + WRITE (unit_nr_prv, '(A,1X,A,1X,A,1X,A,1X,A,1X,A)') "DBCSR TAS MATRIX MULTIPLICATION:", & + TRIM(matrix_a%matrix%name), 'x', TRIM(matrix_b%matrix%name), '=', TRIM(matrix_c%matrix%name) + WRITE (unit_nr_prv, '(A)') repeat("-", 80) + ENDIF + IF (do_batched) THEN + IF (unit_nr_prv > 0) THEN + WRITE (unit_nr_prv, "(T2,A)") & + "BATCHED PROCESSING OF MATMUL" + IF (batched_repl > 0) THEN + WRITE (unit_nr_prv, "(T4,A,T80,I1)") "reusing replicated matrix:", batched_repl ENDIF ENDIF - ENDIF IF (transa_prv .EQ. dbcsr_transpose) THEN @@ -246,8 +239,8 @@ RECURSIVE SUBROUTINE dbcsr_tas_multiply(transa, transb, transc, alpha, matrix_a, tr_case = '' - IF (io_unit_prv > 0) THEN - WRITE (io_unit_prv, "(T2,A, 1X, I12, 1X, I12, 1X, I12)") "mm dims:", dims(1), dims(2), dims(3) + IF (unit_nr_prv > 0) THEN + WRITE (unit_nr_prv, "(T2,A, 1X, I12, 1X, I12, 1X, I12)") "mm dims:", dims(1), dims(2), dims(3) ENDIF CALL dbcsr_tas_get_split_info(dbcsr_tas_info(matrix_a), mp_comm=mp_comm) @@ -268,13 +261,13 @@ RECURSIVE SUBROUTINE dbcsr_tas_multiply(transa, transb, transc, alpha, matrix_a, nsplit = split_factor_estimate(max_mm_dim, nze_a, nze_b, nze_c, numproc) nsplit_opt = nsplit - IF (io_unit_prv > 0) THEN - WRITE (io_unit_prv, "(T2,A)") & + IF (unit_nr_prv > 0) THEN + WRITE (unit_nr_prv, "(T2,A)") & "MM PARAMETERS" - WRITE (io_unit_prv, "(T4,A,T68,I13)") "Est. number of matrix elements per CPU of result matrix:", & + WRITE (unit_nr_prv, "(T4,A,T68,I13)") "Est. number of matrix elements per CPU of result matrix:", & (nze_c + numproc - 1)/numproc - WRITE (io_unit_prv, "(T4,A,T68,I13)") "Est. optimal split factor:", nsplit + WRITE (unit_nr_prv, "(T4,A,T68,I13)") "Est. optimal split factor:", nsplit ENDIF ELSEIF (batched_repl > 0) THEN @@ -282,10 +275,10 @@ RECURSIVE SUBROUTINE dbcsr_tas_multiply(transa, transb, transc, alpha, matrix_a, nsplit_opt = nsplit max_mm_dim = max_mm_dim_batched simple_split_prv = simple_split_save - IF (io_unit_prv > 0) THEN - WRITE (io_unit_prv, "(T2,A)") & + IF (unit_nr_prv > 0) THEN + WRITE (unit_nr_prv, "(T2,A)") & "MM PARAMETERS" - WRITE (io_unit_prv, "(T4,A,T68,I13)") "Est. optimal split factor:", nsplit + WRITE (unit_nr_prv, "(T4,A,T68,I13)") "Est. optimal split factor:", nsplit ENDIF ELSE @@ -305,7 +298,7 @@ RECURSIVE SUBROUTINE dbcsr_tas_multiply(transa, transb, transc, alpha, matrix_a, opt_nsplit=batched_repl == 0, & split_rc_1=split_a, split_rc_2=split_c, & nodata2=nodata_3, comm_new=comm_tmp, & - move_data_1=move_a, unit_nr=io_unit) + move_data_1=move_a, unit_nr=unit_nr_prv) info = dbcsr_tas_info(matrix_a_rs) CALL dbcsr_tas_get_split_info(info, split_rowcol=split_rc, mp_comm=mp_comm) @@ -319,11 +312,11 @@ RECURSIVE SUBROUTINE dbcsr_tas_multiply(transa, transb, transc, alpha, matrix_a, tr_case = transa_prv - IF (io_unit_prv > 0) THEN + IF (unit_nr_prv > 0) THEN IF (tr_case == 'N') THEN - WRITE (io_unit_prv, "(T2,A, 1X, A)") "mm case:", "| x + = |" + WRITE (unit_nr_prv, "(T2,A, 1X, A)") "mm case:", "| x + = |" ELSE - WRITE (io_unit_prv, "(T2,A, 1X, A)") "mm case:", "--T x + = --T" + WRITE (unit_nr_prv, "(T2,A, 1X, A)") "mm case:", "--T x + = --T" ENDIF ENDIF @@ -336,7 +329,7 @@ RECURSIVE SUBROUTINE dbcsr_tas_multiply(transa, transb, transc, alpha, matrix_a, opt_nsplit=batched_repl == 0, & split_rc_1=split_a, split_rc_2=split_b, & comm_new=comm_tmp, & - move_data_1=move_a, move_data_2=move_b, unit_nr=io_unit) + move_data_1=move_a, move_data_2=move_b, unit_nr=unit_nr_prv) info = dbcsr_tas_info(matrix_a_rs) CALL dbcsr_tas_get_split_info(info, split_rowcol=split_rc, mp_comm=mp_comm) @@ -354,11 +347,11 @@ RECURSIVE SUBROUTINE dbcsr_tas_multiply(transa, transb, transc, alpha, matrix_a, new_c = matrix_c%do_batched == 0 tr_case = transa_prv - IF (io_unit_prv > 0) THEN + IF (unit_nr_prv > 0) THEN IF (tr_case == 'N') THEN - WRITE (io_unit_prv, "(T2,A, 1X, A)") "mm case:", "-- x --T = +" + WRITE (unit_nr_prv, "(T2,A, 1X, A)") "mm case:", "-- x --T = +" ELSE - WRITE (io_unit_prv, "(T2,A, 1X, A)") "mm case:", "|T x | = +" + WRITE (unit_nr_prv, "(T2,A, 1X, A)") "mm case:", "|T x | = +" ENDIF ENDIF @@ -371,7 +364,7 @@ RECURSIVE SUBROUTINE dbcsr_tas_multiply(transa, transb, transc, alpha, matrix_a, opt_nsplit=batched_repl == 0, & split_rc_1=split_b, split_rc_2=split_c, & nodata2=nodata_3, comm_new=comm_tmp, & - move_data_1=move_b, unit_nr=io_unit) + move_data_1=move_b, unit_nr=unit_nr_prv) info = dbcsr_tas_info(matrix_b_rs) CALL dbcsr_tas_get_split_info(info, split_rowcol=split_rc, mp_comm=mp_comm) @@ -384,11 +377,11 @@ RECURSIVE SUBROUTINE dbcsr_tas_multiply(transa, transb, transc, alpha, matrix_a, tr_case = transb_prv - IF (io_unit_prv > 0) THEN + IF (unit_nr_prv > 0) THEN IF (tr_case == 'N') THEN - WRITE (io_unit_prv, "(T2,A, 1X, A)") "mm case:", "+ x -- = --" + WRITE (unit_nr_prv, "(T2,A, 1X, A)") "mm case:", "+ x -- = --" ELSE - WRITE (io_unit_prv, "(T2,A, 1X, A)") "mm case:", "+ x |T = |T" + WRITE (unit_nr_prv, "(T2,A, 1X, A)") "mm case:", "+ x |T = |T" ENDIF ENDIF @@ -411,19 +404,19 @@ RECURSIVE SUBROUTINE dbcsr_tas_multiply(transa, transb, transc, alpha, matrix_a, filter_eps_prv = 0.0_real_8 ENDIF - IF (PRESENT(io_unit)) THEN - IF (io_unit_prv > 0) THEN - WRITE (io_unit_prv, "(T2, A)") "SPLIT / PARALLELIZATION INFO" + IF (unit_nr_prv /= 0) THEN + IF (unit_nr_prv > 0) THEN + WRITE (unit_nr_prv, "(T2, A)") "SPLIT / PARALLELIZATION INFO" ENDIF - CALL dbcsr_tas_write_split_info(info, io_unit_prv) - IF (ASSOCIATED(matrix_a_rs)) CALL dbcsr_tas_write_matrix_info(matrix_a_rs, io_unit_prv, full_info=log_verbose) - IF (ASSOCIATED(matrix_b_rs)) CALL dbcsr_tas_write_matrix_info(matrix_b_rs, io_unit_prv, full_info=log_verbose) - IF (ASSOCIATED(matrix_c_rs)) CALL dbcsr_tas_write_matrix_info(matrix_c_rs, io_unit_prv, full_info=log_verbose) - IF (io_unit_prv > 0) THEN + CALL dbcsr_tas_write_split_info(info, unit_nr_prv) + IF (ASSOCIATED(matrix_a_rs)) CALL dbcsr_tas_write_matrix_info(matrix_a_rs, unit_nr_prv, full_info=log_verbose) + IF (ASSOCIATED(matrix_b_rs)) CALL dbcsr_tas_write_matrix_info(matrix_b_rs, unit_nr_prv, full_info=log_verbose) + IF (ASSOCIATED(matrix_c_rs)) CALL dbcsr_tas_write_matrix_info(matrix_c_rs, unit_nr_prv, full_info=log_verbose) + IF (unit_nr_prv > 0) THEN IF (opt_pgrid) THEN - WRITE (io_unit_prv, "(T4, A, 1X, A)") "Change process grid:", "Yes" + WRITE (unit_nr_prv, "(T4, A, 1X, A)") "Change process grid:", "Yes" ELSE - WRITE (io_unit_prv, "(T4, A, 1X, A)") "Change process grid:", "No" + WRITE (unit_nr_prv, "(T4, A, 1X, A)") "Change process grid:", "No" ENDIF ENDIF ENDIF @@ -451,9 +444,9 @@ RECURSIVE SUBROUTINE dbcsr_tas_multiply(transa, transb, transc, alpha, matrix_a, CALL dbcsr_tas_destroy(matrix_b_rs) DEALLOCATE (matrix_b_rs) ENDIF - IF (PRESENT(io_unit)) THEN - CALL dbcsr_tas_write_dist(matrix_a_rs, io_unit) - CALL dbcsr_tas_write_dist(matrix_b_rep, io_unit, full_info=log_verbose) + IF (unit_nr_prv /= 0) THEN + CALL dbcsr_tas_write_dist(matrix_a_rs, unit_nr_prv) + CALL dbcsr_tas_write_dist(matrix_b_rep, unit_nr_prv, full_info=log_verbose) ENDIF CALL convert_to_new_pgrid(mp_comm_mm, matrix_a_rs%matrix, matrix_a_mm, optimize_pgrid=opt_pgrid, move_data=move_a) @@ -512,8 +505,8 @@ RECURSIVE SUBROUTINE dbcsr_tas_multiply(transa, transb, transc, alpha, matrix_a, IF (PRESENT(filter_eps)) CALL dbcsr_tas_filter(matrix_c_rs, filter_eps) - IF (PRESENT(io_unit)) THEN - CALL dbcsr_tas_write_dist(matrix_c_rs, io_unit) + IF (unit_nr_prv /= 0) THEN + CALL dbcsr_tas_write_dist(matrix_c_rs, unit_nr_prv) ENDIF CASE (2) @@ -528,9 +521,9 @@ RECURSIVE SUBROUTINE dbcsr_tas_multiply(transa, transb, transc, alpha, matrix_a, matrix_c_rep => matrix_c%mm_storage%store_batched_repl ENDIF - IF (PRESENT(io_unit)) THEN - CALL dbcsr_tas_write_dist(matrix_a_rs, io_unit) - CALL dbcsr_tas_write_dist(matrix_b_rs, io_unit) + IF (unit_nr_prv /= 0) THEN + CALL dbcsr_tas_write_dist(matrix_a_rs, unit_nr_prv) + CALL dbcsr_tas_write_dist(matrix_b_rs, unit_nr_prv) ENDIF CALL convert_to_new_pgrid(mp_comm_mm, matrix_a_rs%matrix, matrix_a_mm, optimize_pgrid=opt_pgrid, move_data=move_a) @@ -567,8 +560,8 @@ RECURSIVE SUBROUTINE dbcsr_tas_multiply(transa, transb, transc, alpha, matrix_a, IF (new_b) DEALLOCATE (matrix_b_rs) ENDIF - IF (PRESENT(io_unit)) THEN - CALL dbcsr_tas_write_dist(matrix_c_rep, io_unit, full_info=log_verbose) + IF (unit_nr_prv /= 0) THEN + CALL dbcsr_tas_write_dist(matrix_c_rep, unit_nr_prv, full_info=log_verbose) ENDIF IF (matrix_c%do_batched == 0) THEN @@ -598,9 +591,9 @@ RECURSIVE SUBROUTINE dbcsr_tas_multiply(transa, transb, transc, alpha, matrix_a, CALL dbcsr_tas_destroy(matrix_a_rs) DEALLOCATE (matrix_a_rs) ENDIF - IF (PRESENT(io_unit)) THEN - CALL dbcsr_tas_write_dist(matrix_a_rep, io_unit, full_info=log_verbose) - CALL dbcsr_tas_write_dist(matrix_b_rs, io_unit) + IF (unit_nr_prv /= 0) THEN + CALL dbcsr_tas_write_dist(matrix_a_rep, unit_nr_prv, full_info=log_verbose) + CALL dbcsr_tas_write_dist(matrix_b_rs, unit_nr_prv) ENDIF CALL convert_to_new_pgrid(mp_comm_mm, matrix_a_rep%matrix, matrix_a_mm, optimize_pgrid=opt_pgrid, & @@ -658,8 +651,8 @@ RECURSIVE SUBROUTINE dbcsr_tas_multiply(transa, transb, transc, alpha, matrix_a, IF (PRESENT(filter_eps)) CALL dbcsr_tas_filter(matrix_c_rs, filter_eps) - IF (PRESENT(io_unit)) THEN - CALL dbcsr_tas_write_dist(matrix_c_rs, io_unit) + IF (unit_nr_prv /= 0) THEN + CALL dbcsr_tas_write_dist(matrix_c_rs, unit_nr_prv) ENDIF END SELECT @@ -704,10 +697,10 @@ RECURSIVE SUBROUTINE dbcsr_tas_multiply(transa, transb, transc, alpha, matrix_a, IF (PRESENT(optimize_dist)) THEN IF (optimize_dist) CALL mp_comm_free(comm_tmp) ENDIF - IF (io_unit_prv > 0) THEN - WRITE (io_unit_prv, '(A)') repeat("-", 80) - WRITE (io_unit_prv, '(A,1X,A,1X,A,1X,A,1X,A,1X,A)') "TAS MATRIX MULTIPLICATION DONE" - WRITE (io_unit_prv, '(A)') repeat("-", 80) + IF (unit_nr_prv > 0) THEN + WRITE (unit_nr_prv, '(A)') repeat("-", 80) + WRITE (unit_nr_prv, '(A,1X,A,1X,A,1X,A,1X,A,1X,A)') "TAS MATRIX MULTIPLICATION DONE" + WRITE (unit_nr_prv, '(A)') repeat("-", 80) ENDIF CALL timestop(handle) @@ -834,7 +827,7 @@ SUBROUTINE reshape_mm_compatible(matrix1_in, matrix2_in, matrix1_out, matrix2_ou INTEGER(KIND=int_8) :: d1, d2 CHARACTER(LEN=*), PARAMETER :: routineN = 'reshape_mm_compatible', & routineP = moduleN//':'//routineN - INTEGER :: handle, mp_comm, numnodes, io_unit, & + INTEGER :: handle, mp_comm, numnodes, unit_nr_prv, & nsplit_prv, ref, split_rc_ref INTEGER, DIMENSION(2) :: pcoord, pdims LOGICAL :: optimize_dist_prv, trans1_newdist, trans2_newdist @@ -859,12 +852,9 @@ SUBROUTINE reshape_mm_compatible(matrix1_in, matrix2_in, matrix1_out, matrix2_ou nodata2_prv = .FALSE. ENDIF + unit_nr_prv = prep_output_unit(unit_nr) + NULLIFY (matrix1_out, matrix2_out) - IF (PRESENT(unit_nr)) THEN - io_unit = unit_nr - ELSE - io_unit = 0 - ENDIF IF (PRESENT(optimize_dist)) THEN optimize_dist_prv = optimize_dist @@ -906,25 +896,25 @@ SUBROUTINE reshape_mm_compatible(matrix1_in, matrix2_in, matrix1_out, matrix2_ou move_data=move_data_1, nodata=nodata1, opt_nsplit=opt_nsplit) CALL change_split(matrix2_in, matrix2_out, nsplit_prv, split_rc_2, new2, & move_data=move_data_2, nodata=nodata2, opt_nsplit=opt_nsplit) - IF (io_unit > 0) THEN - WRITE (io_unit, "(T2,A,1X,A,1X,A,1X,A)") "No redistribution of", TRIM(matrix1_in%matrix%name), & + IF (unit_nr_prv > 0) THEN + WRITE (unit_nr_prv, "(T2,A,1X,A,1X,A,1X,A)") "No redistribution of", TRIM(matrix1_in%matrix%name), & "and", TRIM(matrix2_in%matrix%name) IF (new1) THEN - WRITE (io_unit, "(T2,A,1X,A,1X,A)") "Change split factor of", TRIM(matrix1_in%matrix%name), ": Yes" + WRITE (unit_nr_prv, "(T2,A,1X,A,1X,A)") "Change split factor of", TRIM(matrix1_in%matrix%name), ": Yes" ELSE - WRITE (io_unit, "(T2,A,1X,A,1X,A)") "Change split factor of", TRIM(matrix1_in%matrix%name), ": No" + WRITE (unit_nr_prv, "(T2,A,1X,A,1X,A)") "Change split factor of", TRIM(matrix1_in%matrix%name), ": No" ENDIF IF (new2) THEN - WRITE (io_unit, "(T2,A,1X,A,1X,A)") "Change split factor of", TRIM(matrix2_in%matrix%name), ": Yes" + WRITE (unit_nr_prv, "(T2,A,1X,A,1X,A)") "Change split factor of", TRIM(matrix2_in%matrix%name), ": Yes" ELSE - WRITE (io_unit, "(T2,A,1X,A,1X,A)") "Change split factor of", TRIM(matrix2_in%matrix%name), ": No" + WRITE (unit_nr_prv, "(T2,A,1X,A,1X,A)") "Change split factor of", TRIM(matrix2_in%matrix%name), ": No" ENDIF ENDIF ELSE IF (optimize_dist_prv) THEN - IF (io_unit > 0) THEN - WRITE (io_unit, "(T2,A,1X,A,1X,A,1X,A)") "Optimizing distribution of", TRIM(matrix1_in%matrix%name), & + IF (unit_nr_prv > 0) THEN + WRITE (unit_nr_prv, "(T2,A,1X,A,1X,A,1X,A)") "Optimizing distribution of", TRIM(matrix1_in%matrix%name), & "and", TRIM(matrix2_in%matrix%name) ENDIF @@ -1002,8 +992,8 @@ SUBROUTINE reshape_mm_compatible(matrix1_in, matrix2_in, matrix1_out, matrix2_ou ELSE SELECT CASE (ref) CASE (1) - IF (io_unit > 0) THEN - WRITE (io_unit, "(T2,A,1X,A)") "Redistribution of", TRIM(matrix2_in%matrix%name) + IF (unit_nr_prv > 0) THEN + WRITE (unit_nr_prv, "(T2,A,1X,A)") "Redistribution of", TRIM(matrix2_in%matrix%name) ENDIF CALL change_split(matrix1_in, matrix1_out, nsplit_prv, split_rc_1, new1, & @@ -1014,8 +1004,8 @@ SUBROUTINE reshape_mm_compatible(matrix1_in, matrix2_in, matrix1_out, matrix2_ou nodata=nodata2, move_data=move_data_2) new2 = .TRUE. CASE (2) - IF (io_unit > 0) THEN - WRITE (io_unit, "(T2,A,1X,A)") "Redistribution of", TRIM(matrix1_in%matrix%name) + IF (unit_nr_prv > 0) THEN + WRITE (unit_nr_prv, "(T2,A,1X,A)") "Redistribution of", TRIM(matrix1_in%matrix%name) ENDIF CALL change_split(matrix2_in, matrix2_out, nsplit_prv, split_rc_2, new2, & @@ -1127,23 +1117,19 @@ SUBROUTINE change_split(matrix_in, matrix_out, nsplit, split_rowcol, is_new, opt CALL timestop(handle) END SUBROUTINE - FUNCTION dist_compatible(mat_a, mat_b, split_rc_a, split_rc_b, io_unit) + FUNCTION dist_compatible(mat_a, mat_b, split_rc_a, split_rc_b, unit_nr) !! Check whether matrices have same distribution and same split. TYPE(dbcsr_tas_type), INTENT(IN) :: mat_a, mat_b INTEGER, INTENT(IN) :: split_rc_a, split_rc_b - INTEGER, INTENT(IN), OPTIONAL :: io_unit + INTEGER, INTENT(IN), OPTIONAL :: unit_nr LOGICAL :: dist_compatible INTEGER :: res, same_local_rowcols, split_check TYPE(dbcsr_tas_split_info) :: info_a, info_b - INTEGER :: io_unit_prv, numproc + INTEGER :: unit_nr_prv, numproc INTEGER, DIMENSION(2) :: pdims_a, pdims_b, pcoord_a, pcoord_b - IF (PRESENT(io_unit)) THEN - io_unit_prv = io_unit - ELSE - io_unit_prv = 0 - ENDIF + unit_nr_prv = prep_output_unit(unit_nr) dist_compatible = .FALSE. @@ -1168,34 +1154,34 @@ FUNCTION dist_compatible(mat_a, mat_b, split_rc_a, split_rc_b, io_unit) RETURN ENDIF - IF (io_unit_prv > 0) THEN - WRITE (io_unit_prv, *) "mp comm compatible" + IF (unit_nr_prv > 0) THEN + WRITE (unit_nr_prv, *) "mp comm compatible" ENDIF IF (mat_a%dist%info%split_rowcol == mat_b%dist%info%split_rowcol) THEN - IF (io_unit_prv > 0) THEN - WRITE (io_unit_prv, *) "split compatible" + IF (unit_nr_prv > 0) THEN + WRITE (unit_nr_prv, *) "split compatible" ENDIF same_local_rowcols = MERGE(1, 0, array_eq(mat_a%dist%local_rowcols, mat_b%dist%local_rowcols)) CALL mp_sum(same_local_rowcols, info_a%mp_comm) IF (same_local_rowcols == numproc) THEN - IF (io_unit_prv > 0) THEN - WRITE (io_unit_prv, *) "local rowcols compatible" + IF (unit_nr_prv > 0) THEN + WRITE (unit_nr_prv, *) "local rowcols compatible" ENDIF dist_compatible = .TRUE. ELSE - IF (io_unit_prv > 0) THEN - WRITE (io_unit_prv, *) "local rowcols A", mat_a%dist%local_rowcols - WRITE (io_unit_prv, *) "local rowcols B", mat_b%dist%local_rowcols + IF (unit_nr_prv > 0) THEN + WRITE (unit_nr_prv, *) "local rowcols A", mat_a%dist%local_rowcols + WRITE (unit_nr_prv, *) "local rowcols B", mat_b%dist%local_rowcols ENDIF ENDIF ENDIF - IF (io_unit_prv > 0) THEN - WRITE (io_unit_prv, *) "is compatible?", dist_compatible + IF (unit_nr_prv > 0) THEN + WRITE (unit_nr_prv, *) "is compatible?", dist_compatible ENDIF END FUNCTION @@ -1281,14 +1267,14 @@ SUBROUTINE reshape_mm_template(template, matrix_in, matrix_out, trans, split_rc, END SUBROUTINE SUBROUTINE dbcsr_tas_result_index(transa, transb, transc, matrix_a, matrix_b, matrix_c, filter_eps, & - io_unit, blk_ind, nze, retain_sparsity) + unit_nr, blk_ind, nze, retain_sparsity) !! Estimate sparsity pattern of C resulting from A x B = C by multiplying the block norms of A and B !! Same dummy arguments as dbcsr_tas_multiply CHARACTER(LEN=1), INTENT(IN) :: transa, transb, transc TYPE(dbcsr_tas_type), INTENT(INOUT), TARGET :: matrix_a, matrix_b, matrix_c TYPE(dbcsr_tas_type), POINTER :: matrix_a_bnorm, matrix_b_bnorm, matrix_c_bnorm REAL(KIND=real_8), INTENT(IN), OPTIONAL :: filter_eps - INTEGER, INTENT(IN), OPTIONAL :: io_unit + INTEGER, INTENT(IN), OPTIONAL :: unit_nr INTEGER(int_8), DIMENSION(:, :), ALLOCATABLE, INTENT(OUT), OPTIONAL :: blk_ind LOGICAL, INTENT(IN), OPTIONAL :: retain_sparsity INTEGER(int_8), INTENT(OUT), OPTIONAL :: nze @@ -1317,7 +1303,7 @@ SUBROUTINE dbcsr_tas_result_index(transa, transb, transc, matrix_a, matrix_b, ma CALL dbcsr_tas_multiply(transa, transb, transc, dbcsr_scalar(1.0_real_8), matrix_a_bnorm, & matrix_b_bnorm, dbcsr_scalar(0.0_real_8), matrix_c_bnorm, & filter_eps=filter_eps, move_data_a=.TRUE., move_data_b=.TRUE., & - simple_split=.TRUE., io_unit=io_unit) + simple_split=.TRUE., unit_nr=unit_nr) CALL dbcsr_tas_destroy(matrix_a_bnorm) CALL dbcsr_tas_destroy(matrix_b_bnorm) diff --git a/src/tas/dbcsr_tas_test.F b/src/tas/dbcsr_tas_test.F index 0a85b213432..f907971d030 100644 --- a/src/tas/dbcsr_tas_test.F +++ b/src/tas/dbcsr_tas_test.F @@ -191,7 +191,7 @@ SUBROUTINE dbcsr_tas_benchmark_mm(transa, transb, transc, matrix_a, matrix_b, ma CALL timeset("benchmark_tas_mm", handle1) CALL dbcsr_tas_multiply(transa, transb, transc, dbcsr_scalar(1.0_real_8), matrix_a, matrix_b, & dbcsr_scalar(0.0_real_8), matrix_c, & - filter_eps=filter_eps, io_unit=io_unit) + filter_eps=filter_eps, unit_nr=io_unit) CALL timestop(handle1) IF (PRESENT(io_unit)) THEN IF (io_unit > 0) THEN @@ -296,7 +296,7 @@ SUBROUTINE dbcsr_tas_test_mm(transa, transb, transc, matrix_a, matrix_b, matrix_ CALL dbcsr_tas_multiply(transa, transb, transc, dbcsr_scalar(1.0_real_8), matrix_a, matrix_b, & dbcsr_scalar(0.0_real_8), matrix_c, & - filter_eps=filter_eps, io_unit=io_unit, log_verbose=log_verbose, optimize_dist=.TRUE.) + filter_eps=filter_eps, unit_nr=io_unit, log_verbose=log_verbose, optimize_dist=.TRUE.) CALL dbcsr_tas_convert_to_dbcsr(matrix_a, dbcsr_a) CALL dbcsr_tas_convert_to_dbcsr(matrix_b, dbcsr_b) diff --git a/src/tensors/dbcsr_tensor.F b/src/tensors/dbcsr_tensor.F index ceee371ac8b..a409b052f94 100644 --- a/src/tensors/dbcsr_tensor.F +++ b/src/tensors/dbcsr_tensor.F @@ -66,7 +66,7 @@ MODULE dbcsr_tensor USE dbcsr_tensor_split, ONLY: & dbcsr_t_split_copyback, dbcsr_t_make_compatible_blocks, dbcsr_t_crop USE dbcsr_tensor_io, ONLY: & - dbcsr_t_write_tensor_info, dbcsr_t_write_tensor_dist + dbcsr_t_write_tensor_info, dbcsr_t_write_tensor_dist, prep_output_unit USE dbcsr_dist_operations, ONLY: & checker_tr USE dbcsr_toollib, ONLY: & @@ -124,7 +124,7 @@ SUBROUTINE dbcsr_t_copy(tensor_in, tensor_out, order, summation, bounds, move_da TYPE(dbcsr_t_type), POINTER :: in_tmp_1 => NULL(), in_tmp_2 => NULL(), & in_tmp_3 => NULL(), out_tmp_1 => NULL() - INTEGER :: handle + INTEGER :: handle, unit_nr_prv INTEGER, DIMENSION(:), ALLOCATABLE :: map1_in_1, map1_in_2, map2_in_1, map2_in_2 CHARACTER(LEN=*), PARAMETER :: routineN = 'dbcsr_t_copy', & @@ -139,6 +139,8 @@ SUBROUTINE dbcsr_t_copy(tensor_in, tensor_out, order, summation, bounds, move_da DBCSR_ASSERT(tensor_out%valid) + unit_nr_prv = prep_output_unit(unit_nr) + IF (PRESENT(move_data)) THEN move_prv = move_data ELSE @@ -236,7 +238,7 @@ SUBROUTINE dbcsr_t_copy(tensor_in, tensor_out, order, summation, bounds, move_da ENDIF IF (new_out_1) THEN - IF (PRESENT(unit_nr)) THEN + IF (unit_nr_prv /= 0) THEN CALL dbcsr_t_write_tensor_dist(out_tmp_1, unit_nr) ENDIF CALL dbcsr_t_split_copyback(out_tmp_1, tensor_out, summation) @@ -537,7 +539,7 @@ SUBROUTINE dbcsr_t_contract_expert(alpha, tensor_1, tensor_2, beta, tensor_3, & INTEGER(int_8), DIMENSION(:, :), ALLOCATABLE :: result_index_2d LOGICAL :: assert_stmt INTEGER :: data_type, max_mm_dim, max_tensor, mp_comm, & - iblk, nblk + iblk, nblk, unit_nr_prv INTEGER, DIMENSION(SIZE(contract_1)) :: contract_1_mod INTEGER, DIMENSION(SIZE(notcontract_1)) :: notcontract_1_mod INTEGER, DIMENSION(SIZE(contract_2)) :: contract_2_mod @@ -547,7 +549,7 @@ SUBROUTINE dbcsr_t_contract_expert(alpha, tensor_1, tensor_2, beta, tensor_3, & CHARACTER(LEN=1) :: trans_1, trans_2, trans_3 LOGICAL :: new_1, new_2, new_3, move_data_1, move_data_2 INTEGER :: ndims1, ndims2, ndims3 - INTEGER :: occ_1, occ_2 + INTEGER :: occ_1, occ_2, write_log INTEGER, DIMENSION(:), ALLOCATABLE :: dims1, dims2, dims3 CHARACTER(LEN=*), PARAMETER :: routineN = 'dbcsr_t_contract_expert', & @@ -588,6 +590,8 @@ SUBROUTINE dbcsr_t_contract_expert(alpha, tensor_1, tensor_2, beta, tensor_3, & assert_stmt = dbcsr_t_get_data_type(tensor_1) .EQ. dbcsr_t_get_data_type(tensor_2) DBCSR_ASSERT(assert_stmt) + unit_nr_prv = prep_output_unit(unit_nr) + IF (PRESENT(move_data)) THEN move_data_1 = move_data move_data_2 = move_data @@ -646,17 +650,17 @@ SUBROUTINE dbcsr_t_contract_expert(alpha, tensor_1, tensor_2, beta, tensor_3, & RETURN ENDIF - IF (PRESENT(unit_nr)) THEN - IF (unit_nr > 0) THEN - WRITE (unit_nr, '(A)') repeat("-", 80) - WRITE (unit_nr, '(A,1X,A,1X,A,1X,A,1X,A,1X,A)') "DBCSR TENSOR CONTRACTION:", & + IF (unit_nr_prv /= 0) THEN + IF (unit_nr_prv > 0) THEN + WRITE (unit_nr_prv, '(A)') repeat("-", 80) + WRITE (unit_nr_prv, '(A,1X,A,1X,A,1X,A,1X,A,1X,A)') "DBCSR TENSOR CONTRACTION:", & TRIM(tensor_crop_1%name), 'x', TRIM(tensor_crop_2%name), '=', TRIM(tensor_3%name) - WRITE (unit_nr, '(A)') repeat("-", 80) + WRITE (unit_nr_prv, '(A)') repeat("-", 80) ENDIF - CALL dbcsr_t_write_tensor_info(tensor_crop_1, unit_nr, full_info=log_verbose) - CALL dbcsr_t_write_tensor_dist(tensor_crop_1, unit_nr) - CALL dbcsr_t_write_tensor_info(tensor_crop_2, unit_nr, full_info=log_verbose) - CALL dbcsr_t_write_tensor_dist(tensor_crop_2, unit_nr) + CALL dbcsr_t_write_tensor_info(tensor_crop_1, unit_nr_prv, full_info=log_verbose) + CALL dbcsr_t_write_tensor_dist(tensor_crop_1, unit_nr_prv) + CALL dbcsr_t_write_tensor_info(tensor_crop_2, unit_nr_prv, full_info=log_verbose) + CALL dbcsr_t_write_tensor_dist(tensor_crop_2, unit_nr_prv) ENDIF data_type = dbcsr_t_get_data_type(tensor_crop_1) @@ -677,13 +681,11 @@ SUBROUTINE dbcsr_t_contract_expert(alpha, tensor_1, tensor_2, beta, tensor_3, & indchar3(map_1) = indchar1(notcontract_1) indchar3(map_2) = indchar2(notcontract_2) - IF (PRESENT(unit_nr)) CALL dbcsr_t_print_contraction_index(tensor_crop_1, indchar1, & + IF (unit_nr_prv /= 0) CALL dbcsr_t_print_contraction_index(tensor_crop_1, indchar1, & tensor_crop_2, indchar2, & - tensor_3, indchar3, unit_nr) - IF (PRESENT(unit_nr)) THEN - IF (unit_nr > 0) THEN - WRITE (unit_nr, '(T2,A)') "aligning tensor index with data" - ENDIF + tensor_3, indchar3, unit_nr_prv) + IF (unit_nr_prv > 0) THEN + WRITE (unit_nr_prv, '(T2,A)') "aligning tensor index with data" ENDIF CALL align_tensor(tensor_crop_1, contract_1, notcontract_1, & @@ -695,9 +697,9 @@ SUBROUTINE dbcsr_t_contract_expert(alpha, tensor_1, tensor_2, beta, tensor_3, & CALL align_tensor(tensor_3, map_1, map_2, & tensor_algn_3, map_1_mod, map_2_mod, indchar3, indchar3_mod) - IF (PRESENT(unit_nr)) CALL dbcsr_t_print_contraction_index(tensor_algn_1, indchar1_mod, & + IF (unit_nr_prv /= 0) CALL dbcsr_t_print_contraction_index(tensor_algn_1, indchar1_mod, & tensor_algn_2, indchar2_mod, & - tensor_algn_3, indchar3_mod, unit_nr) + tensor_algn_3, indchar3_mod, unit_nr_prv) ALLOCATE (dims1(ndims1)) ALLOCATE (dims2(ndims2)) @@ -715,11 +717,9 @@ SUBROUTINE dbcsr_t_contract_expert(alpha, tensor_1, tensor_2, beta, tensor_3, & max_tensor = MAXLOC([PRODUCT(INT(dims1, int_8)), PRODUCT(INT(dims2, int_8)), PRODUCT(INT(dims3, int_8))], DIM=1) SELECT CASE (max_mm_dim) CASE (1) - IF (PRESENT(unit_nr)) THEN - IF (unit_nr > 0) THEN - WRITE (unit_nr, '(T2,A)') "large tensors: 1, 3; small tensor: 2" - WRITE (unit_nr, '(T2,A)') "sorting contraction indices" - ENDIF + IF (unit_nr_prv > 0) THEN + WRITE (unit_nr_prv, '(T2,A)') "large tensors: 1, 3; small tensor: 2" + WRITE (unit_nr_prv, '(T2,A)') "sorting contraction indices" ENDIF CALL index_linked_sort(contract_1_mod, contract_2_mod) CALL index_linked_sort(map_2_mod, notcontract_2_mod) @@ -735,17 +735,15 @@ SUBROUTINE dbcsr_t_contract_expert(alpha, tensor_1, tensor_2, beta, tensor_3, & CALL reshape_mm_compatible(tensor_algn_1, tensor_algn_3, tensor_contr_1, tensor_contr_3, & contract_1_mod, notcontract_1_mod, map_2_mod, map_1_mod, & trans_1, trans_3, new_1, new_3, nodata2=nodata_3, optimize_dist=optimize_dist, & - move_data_1=move_data_1, unit_nr=unit_nr) + move_data_1=move_data_1, unit_nr=unit_nr_prv) CALL reshape_mm_small(tensor_algn_2, contract_2_mod, notcontract_2_mod, tensor_contr_2, trans_2, & - new_2, move_data=move_data_2, unit_nr=unit_nr) + new_2, move_data=move_data_2, unit_nr=unit_nr_prv) CASE (2) - IF (PRESENT(unit_nr)) THEN - IF (unit_nr > 0) THEN - WRITE (unit_nr, '(T2,A)') "large tensors: 1, 2; small tensor: 3" - WRITE (unit_nr, '(T2,A)') "sorting contraction indices" - ENDIF + IF (unit_nr_prv > 0) THEN + WRITE (unit_nr_prv, '(T2,A)') "large tensors: 1, 2; small tensor: 3" + WRITE (unit_nr_prv, '(T2,A)') "sorting contraction indices" ENDIF CALL index_linked_sort(notcontract_1_mod, map_1_mod) @@ -762,18 +760,16 @@ SUBROUTINE dbcsr_t_contract_expert(alpha, tensor_1, tensor_2, beta, tensor_3, & CALL reshape_mm_compatible(tensor_algn_1, tensor_algn_2, tensor_contr_1, tensor_contr_2, & notcontract_1_mod, contract_1_mod, notcontract_2_mod, contract_2_mod, & trans_1, trans_2, new_1, new_2, optimize_dist=optimize_dist, & - move_data_1=move_data_1, move_data_2=move_data_2, unit_nr=unit_nr) + move_data_1=move_data_1, move_data_2=move_data_2, unit_nr=unit_nr_prv) CALL invert_transpose_flag(trans_1) CALL reshape_mm_small(tensor_algn_3, map_1_mod, map_2_mod, tensor_contr_3, trans_3, & - new_3, nodata=nodata_3, unit_nr=unit_nr) + new_3, nodata=nodata_3, unit_nr=unit_nr_prv) CASE (3) - IF (PRESENT(unit_nr)) THEN - IF (unit_nr > 0) THEN - WRITE (unit_nr, '(T2,A)') "large tensors: 2, 3; small tensor: 1" - WRITE (unit_nr, '(T2,A)') "sorting contraction indices" - ENDIF + IF (unit_nr_prv > 0) THEN + WRITE (unit_nr_prv, '(T2,A)') "large tensors: 2, 3; small tensor: 1" + WRITE (unit_nr_prv, '(T2,A)') "sorting contraction indices" ENDIF CALL index_linked_sort(map_1_mod, notcontract_1_mod) CALL index_linked_sort(contract_2_mod, contract_1_mod) @@ -789,24 +785,24 @@ SUBROUTINE dbcsr_t_contract_expert(alpha, tensor_1, tensor_2, beta, tensor_3, & CALL reshape_mm_compatible(tensor_algn_2, tensor_algn_3, tensor_contr_2, tensor_contr_3, & contract_2_mod, notcontract_2_mod, map_1_mod, map_2_mod, & trans_2, trans_3, new_2, new_3, nodata2=nodata_3, optimize_dist=optimize_dist, & - move_data_1=move_data_2, unit_nr=unit_nr) + move_data_1=move_data_2, unit_nr=unit_nr_prv) CALL invert_transpose_flag(trans_2) CALL invert_transpose_flag(trans_3) CALL reshape_mm_small(tensor_algn_1, notcontract_1_mod, contract_1_mod, tensor_contr_1, & - trans_1, new_1, move_data=move_data_1, unit_nr=unit_nr) + trans_1, new_1, move_data=move_data_1, unit_nr=unit_nr_prv) END SELECT - IF (PRESENT(unit_nr)) CALL dbcsr_t_print_contraction_index(tensor_contr_1, indchar1_mod, & + IF (unit_nr_prv /= 0) CALL dbcsr_t_print_contraction_index(tensor_contr_1, indchar1_mod, & tensor_contr_2, indchar2_mod, & - tensor_contr_3, indchar3_mod, unit_nr) - IF (PRESENT(unit_nr)) THEN - IF (new_1) CALL dbcsr_t_write_tensor_info(tensor_contr_1, unit_nr, full_info=log_verbose) - IF (new_1) CALL dbcsr_t_write_tensor_dist(tensor_contr_1, unit_nr) - IF (new_2) CALL dbcsr_t_write_tensor_info(tensor_contr_2, unit_nr, full_info=log_verbose) - IF (new_2) CALL dbcsr_t_write_tensor_dist(tensor_contr_2, unit_nr) + tensor_contr_3, indchar3_mod, unit_nr_prv) + IF (unit_nr_prv /= 0) THEN + IF (new_1) CALL dbcsr_t_write_tensor_info(tensor_contr_1, unit_nr_prv, full_info=log_verbose) + IF (new_1) CALL dbcsr_t_write_tensor_dist(tensor_contr_1, unit_nr_prv) + IF (new_2) CALL dbcsr_t_write_tensor_info(tensor_contr_2, unit_nr_prv, full_info=log_verbose) + IF (new_2) CALL dbcsr_t_write_tensor_dist(tensor_contr_2, unit_nr_prv) ENDIF IF (.NOT. PRESENT(result_index)) THEN @@ -814,7 +810,7 @@ SUBROUTINE dbcsr_t_contract_expert(alpha, tensor_1, tensor_2, beta, tensor_3, & tensor_contr_1%matrix_rep, tensor_contr_2%matrix_rep, & beta, & tensor_contr_3%matrix_rep, filter_eps=filter_eps, flop=flop, & - io_unit=unit_nr, log_verbose=log_verbose, & + unit_nr=unit_nr_prv, log_verbose=log_verbose, & split_opt=split_opt, & move_data_a=move_data_1, move_data_b=move_data_2, retain_sparsity=retain_sparsity) ELSE @@ -896,14 +892,14 @@ SUBROUTINE dbcsr_t_contract_expert(alpha, tensor_1, tensor_2, beta, tensor_3, & DEALLOCATE (split_opt) ENDIF - IF (PRESENT(unit_nr)) THEN + IF (unit_nr_prv /= 0) THEN do_write_3 = .TRUE. IF (tensor_contr_3%matrix_rep%do_batched > 0) THEN IF (tensor_contr_3%matrix_rep%mm_storage%batched_out) do_write_3 = .FALSE. ENDIF IF (do_write_3) THEN - CALL dbcsr_t_write_tensor_info(tensor_contr_3, unit_nr, full_info=log_verbose) - CALL dbcsr_t_write_tensor_dist(tensor_contr_3, unit_nr) + CALL dbcsr_t_write_tensor_info(tensor_contr_3, unit_nr_prv, full_info=log_verbose) + CALL dbcsr_t_write_tensor_dist(tensor_contr_3, unit_nr_prv) ENDIF ENDIF @@ -916,9 +912,9 @@ SUBROUTINE dbcsr_t_contract_expert(alpha, tensor_1, tensor_2, beta, tensor_3, & ! pointer to data of tensor_3 ENDIF - IF (PRESENT(unit_nr)) THEN - IF (new_3 .AND. do_write_3) CALL dbcsr_t_write_tensor_info(tensor_3, unit_nr, full_info=log_verbose) - IF (new_3 .AND. do_write_3) CALL dbcsr_t_write_tensor_dist(tensor_3, unit_nr) + IF (unit_nr_prv /= 0) THEN + IF (new_3 .AND. do_write_3) CALL dbcsr_t_write_tensor_info(tensor_3, unit_nr_prv, full_info=log_verbose) + IF (new_3 .AND. do_write_3) CALL dbcsr_t_write_tensor_dist(tensor_3, unit_nr_prv) ENDIF CALL dbcsr_t_destroy(tensor_algn_1) @@ -967,12 +963,10 @@ SUBROUTINE dbcsr_t_contract_expert(alpha, tensor_1, tensor_2, beta, tensor_3, & ENDIF ENDIF - IF (PRESENT(unit_nr)) THEN - IF (unit_nr > 0) THEN - WRITE (unit_nr, '(A)') repeat("-", 80) - WRITE (unit_nr, '(A)') "TENSOR CONTRACTION DONE" - WRITE (unit_nr, '(A)') repeat("-", 80) - ENDIF + IF (unit_nr_prv > 0) THEN + WRITE (unit_nr_prv, '(A)') repeat("-", 80) + WRITE (unit_nr_prv, '(A)') "TENSOR CONTRACTION DONE" + WRITE (unit_nr_prv, '(A)') repeat("-", 80) ENDIF END SUBROUTINE @@ -1034,7 +1028,7 @@ SUBROUTINE reshape_mm_compatible(tensor1, tensor2, tensor1_out, tensor2_out, ind INTEGER, INTENT(IN), OPTIONAL :: unit_nr !! output unit INTEGER :: ref_tensor, compat1, compat1_old, compat2, compat2_old, & - comm_2d, io_unit + comm_2d, unit_nr_prv TYPE(array_list) :: dist_list INTEGER, DIMENSION(:), ALLOCATABLE :: mp_dims, dims TYPE(dbcsr_t_distribution_type) :: dist_in @@ -1042,11 +1036,8 @@ SUBROUTINE reshape_mm_compatible(tensor1, tensor2, tensor1_out, tensor2_out, ind LOGICAL :: optimize_dist_prv NULLIFY (tensor1_out, tensor2_out) - IF (PRESENT(unit_nr)) THEN - io_unit = unit_nr - ELSE - io_unit = 0 - ENDIF + + unit_nr_prv = prep_output_unit(unit_nr) IF (SIZE(ind1_free) .GE. SIZE(ind2_free)) THEN ref_tensor = 1 @@ -1065,24 +1056,24 @@ SUBROUTINE reshape_mm_compatible(tensor1, tensor2, tensor1_out, tensor2_out, ind compat1_old = compat1 compat2_old = compat2 - IF (io_unit > 0) THEN - WRITE (io_unit, '(T2,A,1X,A,A,1X)', advance='no') "compatibility of", TRIM(tensor1%name), ":" + IF (unit_nr_prv > 0) THEN + WRITE (unit_nr_prv, '(T2,A,1X,A,A,1X)', advance='no') "compatibility of", TRIM(tensor1%name), ":" SELECT CASE (compat1) CASE (0) - WRITE (io_unit, '(A)') "Not compatible" + WRITE (unit_nr_prv, '(A)') "Not compatible" CASE (1) - WRITE (io_unit, '(A)') "Normal" + WRITE (unit_nr_prv, '(A)') "Normal" CASE (2) - WRITE (io_unit, '(A)') "Transposed" + WRITE (unit_nr_prv, '(A)') "Transposed" END SELECT - WRITE (io_unit, '(T2,A,1X,A,A,1X)', advance='no') "compatibility of", TRIM(tensor2%name), ":" + WRITE (unit_nr_prv, '(T2,A,1X,A,A,1X)', advance='no') "compatibility of", TRIM(tensor2%name), ":" SELECT CASE (compat2) CASE (0) - WRITE (io_unit, '(A)') "Not compatible" + WRITE (unit_nr_prv, '(A)') "Not compatible" CASE (1) - WRITE (io_unit, '(A)') "Normal" + WRITE (unit_nr_prv, '(A)') "Normal" CASE (2) - WRITE (io_unit, '(A)') "Transposed" + WRITE (unit_nr_prv, '(A)') "Transposed" END SELECT ENDIF @@ -1099,7 +1090,7 @@ SUBROUTINE reshape_mm_compatible(tensor1, tensor2, tensor1_out, tensor2_out, ind IF (ref_tensor == 1) THEN ! tensor 1 is reference and tensor 2 is reshaped compatible with tensor 1 IF (compat1 == 0 .OR. optimize_dist_prv) THEN ! tensor 1 is not contraction compatible --> reshape - IF (io_unit > 0) WRITE (unit_nr, '(T2,A,1X,A)') "Redistribution of", TRIM(tensor1%name) + IF (unit_nr_prv > 0) WRITE (unit_nr_prv, '(T2,A,1X,A)') "Redistribution of", TRIM(tensor1%name) ALLOCATE (dims(ndims_tensor(tensor1))) CALL blk_dims_tensor(tensor1, dims) nblkrows = PRODUCT(INT(dims(ind1_linked), KIND=int_8)) @@ -1111,11 +1102,11 @@ SUBROUTINE reshape_mm_compatible(tensor1, tensor2, tensor1_out, tensor2_out, ind CALL mp_comm_free(comm_2d) compat1 = 1 ELSE - IF (io_unit > 0) WRITE (unit_nr, '(T2,A,1X,A)') "No redistribution of", TRIM(tensor1%name) + IF (unit_nr_prv > 0) WRITE (unit_nr_prv, '(T2,A,1X,A)') "No redistribution of", TRIM(tensor1%name) tensor1_out => tensor1 ENDIF IF (compat2 == 0 .OR. optimize_dist_prv) THEN ! tensor 2 is not contraction compatible --> reshape - IF (io_unit > 0) WRITE (unit_nr, '(T2,A,1X,A,1X,A,1X,A)') "Redistribution of", & + IF (unit_nr_prv > 0) WRITE (unit_nr_prv, '(T2,A,1X,A,1X,A,1X,A)') "Redistribution of", & TRIM(tensor2%name), "compatible with", TRIM(tensor1%name) dist_in = dbcsr_t_distribution(tensor1_out) dist_list = array_sublist(dist_in%nd_dist, ind1_linked) @@ -1140,12 +1131,12 @@ SUBROUTINE reshape_mm_compatible(tensor1, tensor2, tensor1_out, tensor2_out, ind ENDIF compat2 = compat1 ELSE - IF (io_unit > 0) WRITE (unit_nr, '(T2,A,1X,A)') "No redistribution of", TRIM(tensor2%name) + IF (unit_nr_prv > 0) WRITE (unit_nr_prv, '(T2,A,1X,A)') "No redistribution of", TRIM(tensor2%name) tensor2_out => tensor2 ENDIF ELSE ! tensor 2 is reference and tensor 1 is reshaped compatible with tensor 2 IF (compat2 == 0 .OR. optimize_dist_prv) THEN ! tensor 2 is not contraction compatible --> reshape - IF (io_unit > 0) WRITE (unit_nr, '(T2,A,1X,A)') "Redistribution of", TRIM(tensor2%name) + IF (unit_nr_prv > 0) WRITE (unit_nr_prv, '(T2,A,1X,A)') "Redistribution of", TRIM(tensor2%name) ALLOCATE (dims(ndims_tensor(tensor2))) CALL blk_dims_tensor(tensor2, dims) nblkrows = PRODUCT(INT(dims(ind2_linked), KIND=int_8)) @@ -1156,11 +1147,11 @@ SUBROUTINE reshape_mm_compatible(tensor1, tensor2, tensor1_out, tensor2_out, ind CALL mp_comm_free(comm_2d) compat2 = 1 ELSE - IF (io_unit > 0) WRITE (unit_nr, '(T2,A,1X,A)') "No redistribution of", TRIM(tensor2%name) + IF (unit_nr_prv > 0) WRITE (unit_nr_prv, '(T2,A,1X,A)') "No redistribution of", TRIM(tensor2%name) tensor2_out => tensor2 ENDIF IF (compat1 == 0 .OR. optimize_dist_prv) THEN ! tensor 1 is not contraction compatible --> reshape - IF (io_unit > 0) WRITE (unit_nr, '(T2,A,1X,A,1X,A,1X,A)') "Redistribution of", TRIM(tensor1%name), & + IF (unit_nr_prv > 0) WRITE (unit_nr_prv, '(T2,A,1X,A,1X,A,1X,A)') "Redistribution of", TRIM(tensor1%name), & "compatible with", TRIM(tensor2%name) dist_in = dbcsr_t_distribution(tensor2_out) dist_list = array_sublist(dist_in%nd_dist, ind2_linked) @@ -1181,7 +1172,7 @@ SUBROUTINE reshape_mm_compatible(tensor1, tensor2, tensor1_out, tensor2_out, ind ENDIF compat1 = compat2 ELSE - IF (io_unit > 0) WRITE (unit_nr, '(T2,A,1X,A)') "No redistribution of", TRIM(tensor1%name) + IF (unit_nr_prv > 0) WRITE (unit_nr_prv, '(T2,A,1X,A)') "No redistribution of", TRIM(tensor1%name) tensor1_out => tensor1 ENDIF ENDIF @@ -1204,27 +1195,27 @@ SUBROUTINE reshape_mm_compatible(tensor1, tensor2, tensor1_out, tensor2_out, ind DBCSR_ABORT("should not happen") END SELECT - IF (io_unit > 0) THEN + IF (unit_nr_prv > 0) THEN IF (compat1 .NE. compat1_old) THEN - WRITE (io_unit, '(T2,A,1X,A,A,1X)', advance='no') "compatibility of", TRIM(tensor1_out%name), ":" + WRITE (unit_nr_prv, '(T2,A,1X,A,A,1X)', advance='no') "compatibility of", TRIM(tensor1_out%name), ":" SELECT CASE (compat1) CASE (0) - WRITE (io_unit, '(A)') "Not compatible" + WRITE (unit_nr_prv, '(A)') "Not compatible" CASE (1) - WRITE (io_unit, '(A)') "Normal" + WRITE (unit_nr_prv, '(A)') "Normal" CASE (2) - WRITE (io_unit, '(A)') "Transposed" + WRITE (unit_nr_prv, '(A)') "Transposed" END SELECT ENDIF IF (compat2 .NE. compat2_old) THEN - WRITE (io_unit, '(T2,A,1X,A,A,1X)', advance='no') "compatibility of", TRIM(tensor2_out%name), ":" + WRITE (unit_nr_prv, '(T2,A,1X,A,A,1X)', advance='no') "compatibility of", TRIM(tensor2_out%name), ":" SELECT CASE (compat2) CASE (0) - WRITE (io_unit, '(A)') "Not compatible" + WRITE (unit_nr_prv, '(A)') "Not compatible" CASE (1) - WRITE (io_unit, '(A)') "Normal" + WRITE (unit_nr_prv, '(A)') "Normal" CASE (2) - WRITE (io_unit, '(A)') "Transposed" + WRITE (unit_nr_prv, '(A)') "Transposed" END SELECT ENDIF ENDIF @@ -1254,7 +1245,7 @@ SUBROUTINE reshape_mm_small(tensor_in, ind1, ind2, tensor_out, trans, new, nodat !! memory optimization: transfer data s.t. tensor_in may be empty on return INTEGER, INTENT(IN), OPTIONAL :: unit_nr !! output unit - INTEGER :: compat1, compat2, compat1_old, compat2_old, io_unit + INTEGER :: compat1, compat2, compat1_old, compat2_old, unit_nr_prv LOGICAL :: nodata_prv NULLIFY (tensor_out) @@ -1264,29 +1255,25 @@ SUBROUTINE reshape_mm_small(tensor_in, ind1, ind2, tensor_out, trans, new, nodat nodata_prv = .FALSE. ENDIF - IF (PRESENT(unit_nr)) THEN - io_unit = unit_nr - ELSE - io_unit = 0 - ENDIF + unit_nr_prv = prep_output_unit(unit_nr) new = .FALSE. compat1 = compat_map(tensor_in%nd_index, ind1) compat2 = compat_map(tensor_in%nd_index, ind2) compat1_old = compat1; compat2_old = compat2 - IF (io_unit > 0) THEN - WRITE (io_unit, '(T2,A,1X,A,A,1X)', advance='no') "compatibility of", TRIM(tensor_in%name), ":" + IF (unit_nr_prv > 0) THEN + WRITE (unit_nr_prv, '(T2,A,1X,A,A,1X)', advance='no') "compatibility of", TRIM(tensor_in%name), ":" IF (compat1 == 1 .AND. compat2 == 2) THEN - WRITE (io_unit, '(A)') "Normal" + WRITE (unit_nr_prv, '(A)') "Normal" ELSEIF (compat1 == 2 .AND. compat2 == 1) THEN - WRITE (io_unit, '(A)') "Transposed" + WRITE (unit_nr_prv, '(A)') "Transposed" ELSE - WRITE (io_unit, '(A)') "Not compatible" + WRITE (unit_nr_prv, '(A)') "Not compatible" ENDIF ENDIF IF (compat1 == 0 .or. compat2 == 0) THEN ! index mapping not compatible with contract index - IF (io_unit > 0) WRITE (unit_nr, '(T2,A,1X,A)') "Redistribution of", TRIM(tensor_in%name) + IF (unit_nr_prv > 0) WRITE (unit_nr_prv, '(T2,A,1X,A)') "Redistribution of", TRIM(tensor_in%name) ALLOCATE (tensor_out) CALL dbcsr_t_remap(tensor_in, ind1, ind2, tensor_out, nodata=nodata, move_data=move_data) @@ -1298,7 +1285,7 @@ SUBROUTINE reshape_mm_small(tensor_in, ind1, ind2, tensor_out, trans, new, nodat compat2 = 2 new = .TRUE. ELSE - IF (io_unit > 0) WRITE (unit_nr, '(T2,A,1X,A)') "No redistribution of", TRIM(tensor_in%name) + IF (unit_nr_prv > 0) WRITE (unit_nr_prv, '(T2,A,1X,A)') "No redistribution of", TRIM(tensor_in%name) tensor_out => tensor_in ENDIF @@ -1310,15 +1297,15 @@ SUBROUTINE reshape_mm_small(tensor_in, ind1, ind2, tensor_out, trans, new, nodat DBCSR_ABORT("this should not happen") ENDIF - IF (io_unit > 0) THEN + IF (unit_nr_prv > 0) THEN IF (compat1_old .NE. compat1 .OR. compat2_old .NE. compat2) THEN - WRITE (io_unit, '(T2,A,1X,A,A,1X)', advance='no') "compatibility of", TRIM(tensor_out%name), ":" + WRITE (unit_nr_prv, '(T2,A,1X,A,A,1X)', advance='no') "compatibility of", TRIM(tensor_out%name), ":" IF (compat1 == 1 .AND. compat2 == 2) THEN - WRITE (io_unit, '(A)') "Normal" + WRITE (unit_nr_prv, '(A)') "Normal" ELSEIF (compat1 == 2 .AND. compat2 == 1) THEN - WRITE (io_unit, '(A)') "Transposed" + WRITE (unit_nr_prv, '(A)') "Transposed" ELSE - WRITE (io_unit, '(A)') "Not compatible" + WRITE (unit_nr_prv, '(A)') "Not compatible" ENDIF ENDIF ENDIF @@ -1798,53 +1785,57 @@ SUBROUTINE dbcsr_t_print_contraction_index(tensor_1, indchar1, tensor_2, indchar INTEGER, DIMENSION(ndims_matrix_column(tensor_2)) :: map22 INTEGER, DIMENSION(ndims_matrix_row(tensor_3)) :: map31 INTEGER, DIMENSION(ndims_matrix_column(tensor_3)) :: map32 - INTEGER :: ichar1, ichar2, ichar3 + INTEGER :: ichar1, ichar2, ichar3, unit_nr_prv - CALL dbcsr_t_get_mapping_info(tensor_1%nd_index_blk, map1_2d=map11, map2_2d=map12) - CALL dbcsr_t_get_mapping_info(tensor_2%nd_index_blk, map1_2d=map21, map2_2d=map22) - CALL dbcsr_t_get_mapping_info(tensor_3%nd_index_blk, map1_2d=map31, map2_2d=map32) + unit_nr_prv = prep_output_unit(unit_nr) - IF (unit_nr .GT. 0) THEN - WRITE (unit_nr, '(T2,A)') "INDEX INFO" - WRITE (unit_nr, '(T15,A)', advance='no') "tensor index: (" + IF (unit_nr_prv /= 0) THEN + CALL dbcsr_t_get_mapping_info(tensor_1%nd_index_blk, map1_2d=map11, map2_2d=map12) + CALL dbcsr_t_get_mapping_info(tensor_2%nd_index_blk, map1_2d=map21, map2_2d=map22) + CALL dbcsr_t_get_mapping_info(tensor_3%nd_index_blk, map1_2d=map31, map2_2d=map32) + ENDIF + + IF (unit_nr_prv > 0) THEN + WRITE (unit_nr_prv, '(T2,A)') "INDEX INFO" + WRITE (unit_nr_prv, '(T15,A)', advance='no') "tensor index: (" DO ichar1 = 1, SIZE(indchar1) - WRITE (unit_nr, '(A1)', advance='no') indchar1(ichar1) + WRITE (unit_nr_prv, '(A1)', advance='no') indchar1(ichar1) ENDDO - WRITE (unit_nr, '(A)', advance='no') ") x (" + WRITE (unit_nr_prv, '(A)', advance='no') ") x (" DO ichar2 = 1, SIZE(indchar2) - WRITE (unit_nr, '(A1)', advance='no') indchar2(ichar2) + WRITE (unit_nr_prv, '(A1)', advance='no') indchar2(ichar2) ENDDO - WRITE (unit_nr, '(A)', advance='no') ") = (" + WRITE (unit_nr_prv, '(A)', advance='no') ") = (" DO ichar3 = 1, SIZE(indchar3) - WRITE (unit_nr, '(A1)', advance='no') indchar3(ichar3) + WRITE (unit_nr_prv, '(A1)', advance='no') indchar3(ichar3) ENDDO - WRITE (unit_nr, '(A)') ")" + WRITE (unit_nr_prv, '(A)') ")" - WRITE (unit_nr, '(T15,A)', advance='no') "matrix index: (" + WRITE (unit_nr_prv, '(T15,A)', advance='no') "matrix index: (" DO ichar1 = 1, SIZE(map11) - WRITE (unit_nr, '(A1)', advance='no') indchar1(map11(ichar1)) + WRITE (unit_nr_prv, '(A1)', advance='no') indchar1(map11(ichar1)) ENDDO - WRITE (unit_nr, '(A1)', advance='no') "|" + WRITE (unit_nr_prv, '(A1)', advance='no') "|" DO ichar1 = 1, SIZE(map12) - WRITE (unit_nr, '(A1)', advance='no') indchar1(map12(ichar1)) + WRITE (unit_nr_prv, '(A1)', advance='no') indchar1(map12(ichar1)) ENDDO - WRITE (unit_nr, '(A)', advance='no') ") x (" + WRITE (unit_nr_prv, '(A)', advance='no') ") x (" DO ichar2 = 1, SIZE(map21) - WRITE (unit_nr, '(A1)', advance='no') indchar2(map21(ichar2)) + WRITE (unit_nr_prv, '(A1)', advance='no') indchar2(map21(ichar2)) ENDDO - WRITE (unit_nr, '(A1)', advance='no') "|" + WRITE (unit_nr_prv, '(A1)', advance='no') "|" DO ichar2 = 1, SIZE(map22) - WRITE (unit_nr, '(A1)', advance='no') indchar2(map22(ichar2)) + WRITE (unit_nr_prv, '(A1)', advance='no') indchar2(map22(ichar2)) ENDDO - WRITE (unit_nr, '(A)', advance='no') ") = (" + WRITE (unit_nr_prv, '(A)', advance='no') ") = (" DO ichar3 = 1, SIZE(map31) - WRITE (unit_nr, '(A1)', advance='no') indchar3(map31(ichar3)) + WRITE (unit_nr_prv, '(A1)', advance='no') indchar3(map31(ichar3)) ENDDO - WRITE (unit_nr, '(A1)', advance='no') "|" + WRITE (unit_nr_prv, '(A1)', advance='no') "|" DO ichar3 = 1, SIZE(map32) - WRITE (unit_nr, '(A1)', advance='no') indchar3(map32(ichar3)) + WRITE (unit_nr_prv, '(A1)', advance='no') indchar3(map32(ichar3)) ENDDO - WRITE (unit_nr, '(A)') ")" + WRITE (unit_nr_prv, '(A)') ")" ENDIF END SUBROUTINE @@ -1868,23 +1859,24 @@ SUBROUTINE dbcsr_t_batched_contract_finalize(tensor, unit_nr) TYPE(dbcsr_t_type), INTENT(INOUT) :: tensor INTEGER, INTENT(IN), OPTIONAL :: unit_nr LOGICAL :: do_write + INTEGER :: unit_nr_prv + + unit_nr_prv = prep_output_unit(unit_nr) do_write = .FALSE. - IF (PRESENT(unit_nr)) THEN - IF (tensor%matrix_rep%do_batched > 0) THEN - IF (tensor%matrix_rep%mm_storage%batched_out) do_write = .TRUE. - ENDIF + IF (tensor%matrix_rep%do_batched > 0) THEN + IF (tensor%matrix_rep%mm_storage%batched_out) do_write = .TRUE. ENDIF CALL dbcsr_tas_batched_mm_finalize(tensor%matrix_rep) - IF (do_write .AND. PRESENT(unit_nr)) THEN - IF (unit_nr > 0) THEN - WRITE (unit_nr, "(T2,A)") & + IF (do_write .AND. unit_nr_prv /= 0) THEN + IF (unit_nr_prv > 0) THEN + WRITE (unit_nr_prv, "(T2,A)") & "FINALIZING BATCHED PROCESSING OF MATMUL" ENDIF - CALL dbcsr_t_write_tensor_info(tensor, unit_nr) - CALL dbcsr_t_write_tensor_dist(tensor, unit_nr) + CALL dbcsr_t_write_tensor_info(tensor, unit_nr_prv) + CALL dbcsr_t_write_tensor_dist(tensor, unit_nr_prv) ENDIF END SUBROUTINE diff --git a/src/tensors/dbcsr_tensor_io.F b/src/tensors/dbcsr_tensor_io.F index f17f8d43cfd..66614c9d16e 100644 --- a/src/tensors/dbcsr_tensor_io.F +++ b/src/tensors/dbcsr_tensor_io.F @@ -37,15 +37,16 @@ MODULE dbcsr_tensor_io dbcsr_t_write_blocks, & dbcsr_t_write_block, & dbcsr_t_write_block_indices, & - dbcsr_t_write_split_info + dbcsr_t_write_split_info, & + prep_output_unit CONTAINS - SUBROUTINE dbcsr_t_write_tensor_info(tensor, output_unit, full_info) + SUBROUTINE dbcsr_t_write_tensor_info(tensor, unit_nr, full_info) !! Write tensor global info: block dimensions, full dimensions and process grid dimensions TYPE(dbcsr_t_type), INTENT(IN) :: tensor - INTEGER, INTENT(IN) :: output_unit + INTEGER, INTENT(IN) :: unit_nr LOGICAL, OPTIONAL, INTENT(IN) :: full_info !! Whether to print distribution and block size vectors INTEGER, DIMENSION(ndims_tensor(tensor)) :: nblks_total, nfull_total, pdims, my_ploc, nblks_local, nfull_local @@ -53,72 +54,80 @@ SUBROUTINE dbcsr_t_write_tensor_info(tensor, output_unit, full_info) #:for idim in range(1, maxdim+1) INTEGER, DIMENSION(dbcsr_t_nblks_total(tensor, ${idim}$)) :: proc_dist_${idim}$ INTEGER, DIMENSION(dbcsr_t_nblks_total(tensor, ${idim}$)) :: blk_size_${idim}$ + INTEGER, DIMENSION(dbcsr_t_nblks_total(tensor, ${idim}$)) :: blks_local_${idim}$ #:endfor CHARACTER(len=default_string_length) :: name INTEGER :: idim INTEGER :: iblk + INTEGER :: unit_nr_prv + + unit_nr_prv = prep_output_unit(unit_nr) + IF (unit_nr_prv == 0) RETURN CALL dbcsr_t_get_info(tensor, nblks_total, nfull_total, nblks_local, nfull_local, pdims, my_ploc, & - ${varlist("proc_dist")}$, ${varlist("blk_size")}$, & + ${varlist("blks_local")}$, ${varlist("proc_dist")}$, ${varlist("blk_size")}$, & name=name) - IF (output_unit > 0) THEN - WRITE (output_unit, "(T2,A)") & + IF (unit_nr_prv > 0) THEN + WRITE (unit_nr_prv, "(T2,A)") & "GLOBAL INFO OF "//TRIM(name) - WRITE (output_unit, "(T4,A,1X)", advance="no") "block dimensions:" + WRITE (unit_nr_prv, "(T4,A,1X)", advance="no") "block dimensions:" DO idim = 1, ndims_tensor(tensor) - WRITE (output_unit, "(I6)", advance="no") nblks_total(idim) + WRITE (unit_nr_prv, "(I6)", advance="no") nblks_total(idim) ENDDO - WRITE (output_unit, "(/T4,A,1X)", advance="no") "full dimensions:" + WRITE (unit_nr_prv, "(/T4,A,1X)", advance="no") "full dimensions:" DO idim = 1, ndims_tensor(tensor) - WRITE (output_unit, "(I8)", advance="no") nfull_total(idim) + WRITE (unit_nr_prv, "(I8)", advance="no") nfull_total(idim) ENDDO - WRITE (output_unit, "(/T4,A,1X)", advance="no") "process grid dimensions:" + WRITE (unit_nr_prv, "(/T4,A,1X)", advance="no") "process grid dimensions:" DO idim = 1, ndims_tensor(tensor) - WRITE (output_unit, "(I6)", advance="no") pdims(idim) + WRITE (unit_nr_prv, "(I6)", advance="no") pdims(idim) ENDDO - WRITE (output_unit, *) + WRITE (unit_nr_prv, *) IF (PRESENT(full_info)) THEN IF (full_info) THEN - WRITE (output_unit, '(T4,A)', advance='no') "Block sizes:" + WRITE (unit_nr_prv, '(T4,A)', advance='no') "Block sizes:" #:for dim in range(1, maxdim+1) IF (ndims_tensor(tensor) >= ${dim}$) THEN - WRITE (output_unit, '(/T8,A,1X,I1,A,1X)', advance='no') 'Dim', ${dim}$, ':' + WRITE (unit_nr_prv, '(/T8,A,1X,I1,A,1X)', advance='no') 'Dim', ${dim}$, ':' DO iblk = 1, SIZE(blk_size_${dim}$) - WRITE (output_unit, '(I2,1X)', advance='no') blk_size_${dim}$ (iblk) + WRITE (unit_nr_prv, '(I2,1X)', advance='no') blk_size_${dim}$ (iblk) ENDDO ENDIF #:endfor - WRITE (output_unit, '(/T4,A)', advance='no') "Block distribution:" + WRITE (unit_nr_prv, '(/T4,A)', advance='no') "Block distribution:" #:for dim in range(1, maxdim+1) IF (ndims_tensor(tensor) >= ${dim}$) THEN - WRITE (output_unit, '(/T8,A,1X,I1,A,1X)', advance='no') 'Dim', ${dim}$, ':' + WRITE (unit_nr_prv, '(/T8,A,1X,I1,A,1X)', advance='no') 'Dim', ${dim}$, ':' DO iblk = 1, SIZE(proc_dist_${dim}$) - WRITE (output_unit, '(I3,1X)', advance='no') proc_dist_${dim}$ (iblk) + WRITE (unit_nr_prv, '(I3,1X)', advance='no') proc_dist_${dim}$ (iblk) ENDDO ENDIF #:endfor ENDIF - WRITE (output_unit, *) + WRITE (unit_nr_prv, *) ENDIF ENDIF END SUBROUTINE - SUBROUTINE dbcsr_t_write_tensor_dist(tensor, output_unit) + SUBROUTINE dbcsr_t_write_tensor_dist(tensor, unit_nr) !! Write info on tensor distribution & load balance TYPE(dbcsr_t_type), INTENT(IN) :: tensor - INTEGER, INTENT(IN) :: output_unit + INTEGER, INTENT(IN) :: unit_nr INTEGER :: nproc, myproc, nblock_max, nelement_max INTEGER(KIND=int_8) :: nblock_sum, nelement_sum, nblock_tot - INTEGER :: nblock, nelement + INTEGER :: nblock, nelement, unit_nr_prv INTEGER :: mp_comm INTEGER, DIMENSION(2) :: tmp INTEGER, DIMENSION(ndims_tensor(tensor)) :: bdims REAL(KIND=real_8) :: occupation mp_comm = tensor%pgrid%mp_comm_2d + unit_nr_prv = prep_output_unit(unit_nr) + IF (unit_nr_prv == 0) RETURN + CALL mp_environ(nproc, myproc, mp_comm) nblock = dbcsr_t_get_num_blocks(tensor) @@ -137,15 +146,15 @@ SUBROUTINE dbcsr_t_write_tensor_dist(tensor, output_unit) occupation = -1.0_real_8 IF (nblock_tot .NE. 0) occupation = 100.0_real_8*REAL(nblock_sum, real_8)/REAL(nblock_tot, real_8) - IF (output_unit > 0) THEN - WRITE (output_unit, "(T2,A)") & + IF (unit_nr_prv > 0) THEN + WRITE (unit_nr_prv, "(T2,A)") & "DISTRIBUTION OF "//TRIM(tensor%name) - WRITE (output_unit, "(T15,A,T68,I13)") "Number of non-zero blocks:", nblock_sum - WRITE (output_unit, "(T15,A,T75,F6.2)") "Percentage of non-zero blocks:", occupation - WRITE (output_unit, "(T15,A,T68,I13)") "Average number of blocks per CPU:", (nblock_sum + nproc - 1)/nproc - WRITE (output_unit, "(T15,A,T68,I13)") "Maximum number of blocks per CPU:", nblock_max - WRITE (output_unit, "(T15,A,T68,I13)") "Average number of matrix elements per CPU:", (nelement_sum + nproc - 1)/nproc - WRITE (output_unit, "(T15,A,T68,I13)") "Maximum number of matrix elements per CPU:", nelement_max + WRITE (unit_nr_prv, "(T15,A,T68,I13)") "Number of non-zero blocks:", nblock_sum + WRITE (unit_nr_prv, "(T15,A,T75,F6.2)") "Percentage of non-zero blocks:", occupation + WRITE (unit_nr_prv, "(T15,A,T68,I13)") "Average number of blocks per CPU:", (nblock_sum + nproc - 1)/nproc + WRITE (unit_nr_prv, "(T15,A,T68,I13)") "Maximum number of blocks per CPU:", nblock_max + WRITE (unit_nr_prv, "(T15,A,T68,I13)") "Average number of matrix elements per CPU:", (nelement_sum + nproc - 1)/nproc + WRITE (unit_nr_prv, "(T15,A,T68,I13)") "Maximum number of matrix elements per CPU:", nelement_max ENDIF END SUBROUTINE @@ -284,4 +293,16 @@ SUBROUTINE dbcsr_t_write_split_info(pgrid, unit_nr) CALL dbcsr_tas_write_split_info(pgrid%tas_split_info, unit_nr) ENDIF END SUBROUTINE + + FUNCTION prep_output_unit(unit_nr) RESULT(unit_nr_out) + INTEGER, INTENT(IN), OPTIONAL :: unit_nr + INTEGER :: unit_nr_out + + IF (PRESENT(unit_nr)) THEN + unit_nr_out = unit_nr + ELSE + unit_nr_out = 0 + ENDIF + + END FUNCTION END MODULE From 699c43ffd36516507b650a2878b64b319d571ca0 Mon Sep 17 00:00:00 2001 From: Patrick Seewald Date: Fri, 10 Apr 2020 10:44:48 +0200 Subject: [PATCH 17/19] Tensors: improve load balancing - improved algorithm for load-balanced default distribution - add default distribution to tensor API --- src/tas/dbcsr_tas_global.F | 89 +++++++++++++++++++++------- src/tas/dbcsr_tas_mm.F | 6 +- src/tas/dbcsr_tas_test.F | 49 +++++++--------- src/tensors/dbcsr_tensor.F | 41 +++---------- src/tensors/dbcsr_tensor_api.F | 3 +- src/tensors/dbcsr_tensor_test.F | 43 ++++---------- src/tensors/dbcsr_tensor_types.F | 26 +++++++-- tests/dbcsr_tas_unittest.F | 58 +++++++++---------- tests/dbcsr_tensor_unittest.F | 99 +++++++++++++++++--------------- 9 files changed, 214 insertions(+), 200 deletions(-) diff --git a/src/tas/dbcsr_tas_global.F b/src/tas/dbcsr_tas_global.F index 705a543ad1f..ebd31e8a881 100644 --- a/src/tas/dbcsr_tas_global.F +++ b/src/tas/dbcsr_tas_global.F @@ -14,7 +14,8 @@ MODULE dbcsr_tas_global !! given row or column. Hence global array data such as distribution and block sizes are specified as !! function objects, leaving up to the caller how to efficiently store global data. - USE dbcsr_kinds, ONLY: int_8 + USE dbcsr_kinds, ONLY: int_8, dp + USE dbcsr_toollib, ONLY: sort #include "base/dbcsr_base_uses.f90" IMPLICIT NONE @@ -32,7 +33,7 @@ MODULE dbcsr_tas_global dbcsr_tas_dist_repl, & dbcsr_tas_distribution, & dbcsr_tas_rowcol_data, & - cyclic_weighted_dist + dbcsr_tas_default_distvec ! abstract type for distribution vectors along one dimension TYPE, ABSTRACT :: dbcsr_tas_distribution @@ -341,32 +342,76 @@ FUNCTION dbcsr_tas_dist_arb_default(nprowcol, nmrowcol, block_sizes) bsize_vec(ind) = block_sizes%data(ind) ENDDO - CALL cyclic_weighted_dist(INT(nmrowcol), nprowcol, bsize_vec, dist_vec) + CALL dbcsr_tas_default_distvec(INT(nmrowcol), nprowcol, bsize_vec, dist_vec) dbcsr_tas_dist_arb_default = dbcsr_tas_dist_arb(dist_vec, nprowcol, nmrowcol) END FUNCTION - SUBROUTINE cyclic_weighted_dist(nel, nbin, weights, dist) - INTEGER, INTENT(IN) :: nel - INTEGER, INTENT(IN) :: nbin - INTEGER, DIMENSION(nel), INTENT(IN) :: weights - INTEGER, DIMENSION(nel), INTENT(OUT) :: dist - INTEGER, DIMENSION(nbin) :: occup - INTEGER :: iel, ibin - INTEGER :: niter + SUBROUTINE dbcsr_tas_default_distvec(nblk, nproc, blk_size, dist) + !! get a load-balanced and randomized distribution along one tensor dimension + INTEGER, INTENT(IN) :: nblk + !! number of blocks (along one tensor dimension) + INTEGER, INTENT(IN) :: nproc + !! number of processes (along one process grid dimension) + INTEGER, DIMENSION(nblk), INTENT(IN) :: blk_size + !! block sizes + INTEGER, DIMENSION(nblk), INTENT(OUT) :: dist + !! distribution + + CALL distribute_lpt_random(nblk, nproc, blk_size, dist) + + END SUBROUTINE + + SUBROUTINE distribute_lpt_random(nel, nbin, weights, dist) + !! distribute `nel` elements with weights `weights` over `nbin` bins. + !! load balanced distribution is obtained by using LPT algorithm together with randomization over equivalent bins + !! (i.e. randomization over all bins with the smallest accumulated weight) + INTEGER, INTENT(IN) :: nel, nbin + INTEGER, DIMENSION(nel), INTENT(IN) :: weights + INTEGER, DIMENSION(nel), INTENT(OUT) :: dist + + INTEGER :: i, i_select, ibin, iel, min_occup, & + n_avail + INTEGER, ALLOCATABLE, DIMENSION(:) :: bins_avail + INTEGER, DIMENSION(4) :: iseed + INTEGER, DIMENSION(nel) :: sort_index, weights_s + INTEGER, DIMENSION(nbin) :: occup + LOGICAL, DIMENSION(nbin) :: bin_mask + REAL(dp) :: rand + INTEGER, PARAMETER :: n_idle = 1000 + + ! initialize seed based on input arguments such that random numbers are deterministic across all processes + iseed(1) = nel; iseed(2) = nbin; iseed(3) = MAXVAL(weights); iseed(4) = MINVAL(weights) + + iseed(4) = iseed(4)*2 + 1 ! odd + + iseed(:) = MODULO(iseed(:), 2**12) + + DO i = 1, n_idle + CALL dlarnv(1, iseed, 1, rand) + ENDDO occup(:) = 0 - ibin = 0 - DO iel = 1, nel - niter = 0 - ibin = MOD(ibin + 1, nbin) - DO WHILE (occup(ibin + 1) + weights(iel) .GE. MAXVAL(occup)) - IF (MINLOC(occup, DIM=1) == ibin + 1) EXIT - ibin = MOD(ibin + 1, nbin) - niter = niter + 1 - ENDDO - dist(iel) = ibin - occup(ibin + 1) = occup(ibin + 1) + weights(iel) + weights_s = weights + CALL sort(weights_s, nel, sort_index) + + occup(:) = 0 + DO iel = nel, 1, -1 + min_occup = MINVAL(occup, 1) + + ! available bins with min. occupancy + bin_mask = occup == min_occup + n_avail = COUNT(bin_mask) + ALLOCATE (bins_avail(n_avail)) + bins_avail(:) = PACK((/(i, i=1, nbin)/), MASK=bin_mask) + + CALL dlarnv(1, iseed, 1, rand) + i_select = FLOOR(rand*n_avail) + 1 + ibin = bins_avail(i_select) + DEALLOCATE (bins_avail) + + dist(sort_index(iel)) = ibin - 1 + occup(ibin) = occup(ibin) + weights_s(iel) ENDDO END SUBROUTINE diff --git a/src/tas/dbcsr_tas_mm.F b/src/tas/dbcsr_tas_mm.F index 3edbcbf05d7..4f3d3c68cd3 100644 --- a/src/tas/dbcsr_tas_mm.F +++ b/src/tas/dbcsr_tas_mm.F @@ -33,7 +33,7 @@ MODULE dbcsr_tas_mm dbcsr_tas_distribution_type, dbcsr_tas_split_info, dbcsr_tas_type, dbcsr_tas_iterator USE dbcsr_tas_global, ONLY: & dbcsr_tas_dist_cyclic, dbcsr_tas_dist_arb, dbcsr_tas_distribution, dbcsr_tas_dist_arb_default, & - dbcsr_tas_rowcol_data, dbcsr_tas_blk_size_one, cyclic_weighted_dist + dbcsr_tas_rowcol_data, dbcsr_tas_blk_size_one, dbcsr_tas_default_distvec USE dbcsr_tas_reshape_ops, ONLY: & dbcsr_tas_merge, dbcsr_tas_replicate, dbcsr_tas_reshape USE dbcsr_tas_split, ONLY: & @@ -1493,8 +1493,8 @@ SUBROUTINE convert_to_new_pgrid(mp_comm_cart, matrix_in, matrix_out, move_data, CALL mp_environ(nproc, pdims, pcoord, mp_comm_cart) ALLOCATE (row_dist(nbrows), col_dist(nbcols)) - CALL cyclic_weighted_dist(nbrows, pdims(1), rbsize, row_dist) - CALL cyclic_weighted_dist(nbcols, pdims(2), rcsize, col_dist) + CALL dbcsr_tas_default_distvec(nbrows, pdims(1), rbsize, row_dist) + CALL dbcsr_tas_default_distvec(nbcols, pdims(2), rcsize, col_dist) mp_obj = dbcsr_mp_environ(mp_comm_cart) CALL dbcsr_distribution_new(dist, mp_obj, row_dist, col_dist, reuse_arrays=.TRUE.) diff --git a/src/tas/dbcsr_tas_test.F b/src/tas/dbcsr_tas_test.F index f907971d030..c1d52fa6cca 100644 --- a/src/tas/dbcsr_tas_test.F +++ b/src/tas/dbcsr_tas_test.F @@ -23,7 +23,8 @@ MODULE dbcsr_tas_test USE dbcsr_tas_types, ONLY: dbcsr_tas_distribution_type, & dbcsr_tas_type USE dbcsr_tas_global, ONLY: dbcsr_tas_blk_size_arb, & - dbcsr_tas_dist_cyclic + dbcsr_tas_dist_cyclic, & + dbcsr_tas_default_distvec USE dbcsr_tas_mm, ONLY: dbcsr_tas_multiply USE dbcsr_tas_split, ONLY: dbcsr_tas_mp_comm, & dbcsr_tas_get_split_info @@ -208,12 +209,15 @@ SUBROUTINE dbcsr_tas_benchmark_mm(transa, transb, transc, matrix_a, matrix_b, ma npdims(:) = 0 CALL mp_cart_create(mp_comm, 2, npdims, myploc, comm_dbcsr) - CALL random_dist(rd_a, dbcsr_nblkrows_total(dbcsr_a), npdims(1)) - CALL random_dist(cd_a, dbcsr_nblkcols_total(dbcsr_a), npdims(2)) - CALL random_dist(rd_b, dbcsr_nblkrows_total(dbcsr_b), npdims(1)) - CALL random_dist(cd_b, dbcsr_nblkcols_total(dbcsr_b), npdims(2)) - CALL random_dist(rd_c, dbcsr_nblkrows_total(dbcsr_c), npdims(1)) - CALL random_dist(cd_c, dbcsr_nblkcols_total(dbcsr_c), npdims(2)) + ALLOCATE (rd_a(dbcsr_nblkrows_total(dbcsr_a))); ALLOCATE (cd_a(dbcsr_nblkcols_total(dbcsr_a))) + ALLOCATE (rd_b(dbcsr_nblkrows_total(dbcsr_b))); ALLOCATE (cd_b(dbcsr_nblkcols_total(dbcsr_b))) + ALLOCATE (rd_c(dbcsr_nblkrows_total(dbcsr_c))); ALLOCATE (cd_c(dbcsr_nblkcols_total(dbcsr_c))) + CALL dbcsr_tas_default_distvec(INT(dbcsr_nblkrows_total(dbcsr_a)), npdims(1), dbcsr_row_block_sizes(dbcsr_a), rd_a) + CALL dbcsr_tas_default_distvec(INT(dbcsr_nblkcols_total(dbcsr_a)), npdims(2), dbcsr_col_block_sizes(dbcsr_a), cd_a) + CALL dbcsr_tas_default_distvec(INT(dbcsr_nblkrows_total(dbcsr_b)), npdims(1), dbcsr_row_block_sizes(dbcsr_b), rd_b) + CALL dbcsr_tas_default_distvec(INT(dbcsr_nblkcols_total(dbcsr_b)), npdims(2), dbcsr_col_block_sizes(dbcsr_b), cd_b) + CALL dbcsr_tas_default_distvec(INT(dbcsr_nblkrows_total(dbcsr_c)), npdims(1), dbcsr_row_block_sizes(dbcsr_c), rd_c) + CALL dbcsr_tas_default_distvec(INT(dbcsr_nblkcols_total(dbcsr_c)), npdims(2), dbcsr_col_block_sizes(dbcsr_c), cd_c) mp_environ_tmp = dbcsr_mp_environ(comm_dbcsr) CALL dbcsr_distribution_new(dist_a, mp_environ_tmp, rd_a, cd_a, reuse_arrays=.TRUE.) @@ -291,7 +295,7 @@ SUBROUTINE dbcsr_tas_test_mm(transa, transb, transc, matrix_a, matrix_b, matrix_ CALL dbcsr_tas_get_split_info(dbcsr_tas_info(matrix_a), mp_comm=mp_comm) CALL mp_environ(numnodes, mynode, mp_comm) - io_unit = 0 + io_unit = -1 IF (mynode .EQ. 0) io_unit = unit_nr CALL dbcsr_tas_multiply(transa, transb, transc, dbcsr_scalar(1.0_real_8), matrix_a, matrix_b, & @@ -305,12 +309,15 @@ SUBROUTINE dbcsr_tas_test_mm(transa, transb, transc, matrix_a, matrix_b, matrix_ npdims(:) = 0 CALL mp_cart_create(mp_comm, 2, npdims, myploc, comm_dbcsr) - CALL random_dist(rd_a, dbcsr_nblkrows_total(dbcsr_a), npdims(1)) - CALL random_dist(cd_a, dbcsr_nblkcols_total(dbcsr_a), npdims(2)) - CALL random_dist(rd_b, dbcsr_nblkrows_total(dbcsr_b), npdims(1)) - CALL random_dist(cd_b, dbcsr_nblkcols_total(dbcsr_b), npdims(2)) - CALL random_dist(rd_c, dbcsr_nblkrows_total(dbcsr_c), npdims(1)) - CALL random_dist(cd_c, dbcsr_nblkcols_total(dbcsr_c), npdims(2)) + ALLOCATE (rd_a(dbcsr_nblkrows_total(dbcsr_a))); ALLOCATE (cd_a(dbcsr_nblkcols_total(dbcsr_a))) + ALLOCATE (rd_b(dbcsr_nblkrows_total(dbcsr_b))); ALLOCATE (cd_b(dbcsr_nblkcols_total(dbcsr_b))) + ALLOCATE (rd_c(dbcsr_nblkrows_total(dbcsr_c))); ALLOCATE (cd_c(dbcsr_nblkcols_total(dbcsr_c))) + CALL dbcsr_tas_default_distvec(INT(dbcsr_nblkrows_total(dbcsr_a)), npdims(1), dbcsr_row_block_sizes(dbcsr_a), rd_a) + CALL dbcsr_tas_default_distvec(INT(dbcsr_nblkcols_total(dbcsr_a)), npdims(2), dbcsr_col_block_sizes(dbcsr_a), cd_a) + CALL dbcsr_tas_default_distvec(INT(dbcsr_nblkrows_total(dbcsr_b)), npdims(1), dbcsr_row_block_sizes(dbcsr_b), rd_b) + CALL dbcsr_tas_default_distvec(INT(dbcsr_nblkcols_total(dbcsr_b)), npdims(2), dbcsr_col_block_sizes(dbcsr_b), cd_b) + CALL dbcsr_tas_default_distvec(INT(dbcsr_nblkrows_total(dbcsr_c)), npdims(1), dbcsr_row_block_sizes(dbcsr_c), rd_c) + CALL dbcsr_tas_default_distvec(INT(dbcsr_nblkcols_total(dbcsr_c)), npdims(2), dbcsr_col_block_sizes(dbcsr_c), cd_c) mp_environ_tmp = dbcsr_mp_environ(comm_dbcsr) CALL dbcsr_distribution_new(dist_a, mp_environ_tmp, rd_a, cd_a, reuse_arrays=.TRUE.) @@ -417,18 +424,4 @@ SUBROUTINE dbcsr_tas_random_bsizes(sizes, repeat, block_sizes) ENDDO END SUBROUTINE - SUBROUTINE random_dist(dist_array, dist_size, nbins) - !! Create random distribution - INTEGER, DIMENSION(:), INTENT(out), POINTER :: dist_array - INTEGER, INTENT(in) :: dist_size, nbins - - INTEGER :: i - - ALLOCATE (dist_array(dist_size)) - DO i = 1, dist_size - dist_array(i) = MODULO(nbins - i, nbins) - END DO - - END SUBROUTINE random_dist - END MODULE diff --git a/src/tensors/dbcsr_tensor.F b/src/tensors/dbcsr_tensor.F index a409b052f94..13cfc3d8336 100644 --- a/src/tensors/dbcsr_tensor.F +++ b/src/tensors/dbcsr_tensor.F @@ -48,9 +48,9 @@ MODULE dbcsr_tensor blk_dims_tensor, dbcsr_t_hold, dbcsr_t_pgrid_type, mp_environ_pgrid, dbcsr_t_filter, & dbcsr_t_clear, dbcsr_t_finalize, dbcsr_t_get_num_blocks, dbcsr_t_scale, & dbcsr_t_get_num_blocks_total, dbcsr_t_get_info, ndims_matrix_row, ndims_matrix_column, & - dbcsr_t_max_nblks_local + dbcsr_t_max_nblks_local, dbcsr_t_default_distvec USE dbcsr_kinds, ONLY: & - ${uselist(dtype_float_prec)}$, default_string_length, int_8 + ${uselist(dtype_float_prec)}$, default_string_length, int_8, dp USE dbcsr_mpiwrap, ONLY: & mp_environ, mp_max, mp_sum USE dbcsr_toollib, ONLY: & @@ -456,6 +456,8 @@ SUBROUTINE dbcsr_t_contract(alpha, tensor_1, tensor_2, beta, tensor_3, & !! enforce the sparsity pattern of the existing tensor_3; default is no INTEGER, OPTIONAL, INTENT(IN) :: unit_nr !! output unit for logging + !! set it to -1 on ranks that should not write (and any valid unit number on ranks that should write output) + !! if 0 on ALL ranks, no output is written LOGICAL, INTENT(IN), OPTIONAL :: log_verbose !! verbose logging (for testing only) @@ -549,7 +551,7 @@ SUBROUTINE dbcsr_t_contract_expert(alpha, tensor_1, tensor_2, beta, tensor_3, & CHARACTER(LEN=1) :: trans_1, trans_2, trans_3 LOGICAL :: new_1, new_2, new_3, move_data_1, move_data_2 INTEGER :: ndims1, ndims2, ndims3 - INTEGER :: occ_1, occ_2, write_log + INTEGER :: occ_1, occ_2 INTEGER, DIMENSION(:), ALLOCATABLE :: dims1, dims2, dims3 CHARACTER(LEN=*), PARAMETER :: routineN = 'dbcsr_t_contract_expert', & @@ -1366,36 +1368,6 @@ FUNCTION opt_pgrid(tensor, tas_split_info) CALL dbcsr_tas_info_hold(opt_pgrid%tas_split_info) END FUNCTION - SUBROUTINE new_default_dist(nel, nbin, weights, dist) - !! Default distribution that is more or less cyclic (round robin) and load balanced with different - !! weights for each element. - !! This is used for creating adhoc distributions whenever tensors are mapped to new grids. - - INTEGER, INTENT(IN) :: nel - INTEGER, INTENT(IN) :: nbin - INTEGER, DIMENSION(nel), INTENT(IN) :: weights - INTEGER, DIMENSION(:), ALLOCATABLE, INTENT(OUT) :: dist - INTEGER, DIMENSION(nbin) :: occup - INTEGER :: iel, ibin - INTEGER :: niter - - ALLOCATE (dist(nel)) - occup(:) = 0 - ibin = 0 - DO iel = 1, nel - niter = 0 - ibin = MOD(ibin + 1, nbin) - DO WHILE (occup(ibin + 1) + weights(iel) .GE. MAXVAL(occup)) - IF (MINLOC(occup, DIM=1) == ibin + 1) EXIT - ibin = MOD(ibin + 1, nbin) - niter = niter + 1 - ENDDO - dist(iel) = ibin - occup(ibin + 1) = occup(ibin + 1) + weights(iel) - ENDDO - - END SUBROUTINE - SUBROUTINE dbcsr_t_remap(tensor_in, map1_2d, map2_2d, tensor_out, comm_2d, dist1, dist2, & mp_dims_1, mp_dims_2, name, nodata, move_data) !! Copy tensor to tensor with modified index mapping @@ -1470,7 +1442,8 @@ SUBROUTINE dbcsr_t_remap(tensor_in, map1_2d, map2_2d, tensor_out, comm_2d, dist1 ENDIF IF (.NOT. ALLOCATED(nd_dist_${idim}$)) THEN - CALL new_default_dist(SIZE(blk_sizes_${idim}$), pdims(${idim}$), blk_sizes_${idim}$, nd_dist_${idim}$) + ALLOCATE (nd_dist_${idim}$ (SIZE(blk_sizes_${idim}$))) + CALL dbcsr_t_default_distvec(SIZE(blk_sizes_${idim}$), pdims(${idim}$), blk_sizes_${idim}$, nd_dist_${idim}$) ENDIF #:endfor CALL dbcsr_t_distribution_new_expert(dist, comm_nd, map1_2d, map2_2d, & diff --git a/src/tensors/dbcsr_tensor_api.F b/src/tensors/dbcsr_tensor_api.F index ac7d7238a20..215dc539fd1 100644 --- a/src/tensors/dbcsr_tensor_api.F +++ b/src/tensors/dbcsr_tensor_api.F @@ -35,7 +35,7 @@ MODULE dbcsr_tensor_api dbcsr_t_mp_dims_create, dbcsr_t_pgrid_change_dims, dbcsr_t_ndims => ndims_tensor, & dbcsr_t_dims => dims_tensor, dbcsr_t_ndims_matrix_row => ndims_matrix_row, & dbcsr_t_ndims_matrix_column => ndims_matrix_column, dbcsr_t_blk_size, dbcsr_t_nblks_local, & - dbcsr_t_nblks_total, dbcsr_t_max_nblks_local + dbcsr_t_nblks_total, dbcsr_t_max_nblks_local, dbcsr_t_default_distvec USE dbcsr_tensor_test, ONLY: & dbcsr_t_contract_test, dbcsr_t_checksum USE dbcsr_tensor_split, ONLY: & @@ -106,5 +106,6 @@ MODULE dbcsr_tensor_api PUBLIC :: dbcsr_t_nblks_total PUBLIC :: dbcsr_t_blk_size PUBLIC :: dbcsr_t_max_nblks_local + PUBLIC :: dbcsr_t_default_distvec END MODULE dbcsr_tensor_api diff --git a/src/tensors/dbcsr_tensor_test.F b/src/tensors/dbcsr_tensor_test.F index 06866757440..677772a1497 100644 --- a/src/tensors/dbcsr_tensor_test.F +++ b/src/tensors/dbcsr_tensor_test.F @@ -22,21 +22,11 @@ MODULE dbcsr_tensor_test dbcsr_t_reserve_blocks, dbcsr_t_get_stored_coordinates, dbcsr_t_put_block, & dbcsr_t_contract, dbcsr_t_inverse_order USE dbcsr_tensor_block, ONLY: block_nd - USE dbcsr_tensor_types, ONLY: dbcsr_t_create, & - dbcsr_t_destroy, & - dbcsr_t_type, & - dbcsr_t_distribution_type, & - dbcsr_t_distribution_destroy, & - dims_tensor, & - ndims_tensor, & - dbcsr_t_distribution_new, & - dbcsr_t_nd_mp_comm, & - dbcsr_t_get_data_type, & - mp_environ_pgrid, & - dbcsr_t_pgrid_type, & - dbcsr_t_pgrid_create, & - dbcsr_t_pgrid_destroy, & - dbcsr_t_get_info + USE dbcsr_tensor_types, ONLY: & + dbcsr_t_create, dbcsr_t_destroy, dbcsr_t_type, dbcsr_t_distribution_type, dbcsr_t_distribution_destroy, & + dims_tensor, ndims_tensor, dbcsr_t_distribution_new, dbcsr_t_nd_mp_comm, dbcsr_t_get_data_type, & + mp_environ_pgrid, dbcsr_t_pgrid_type, dbcsr_t_pgrid_create, dbcsr_t_pgrid_destroy, dbcsr_t_get_info, & + dbcsr_t_default_distvec USE dbcsr_tensor_io, ONLY: & dbcsr_t_write_blocks, dbcsr_t_write_block_indices USE dbcsr_kinds, ONLY: ${uselist(dtype_float_prec)}$, & @@ -63,7 +53,6 @@ MODULE dbcsr_tensor_test PUBLIC :: & dbcsr_t_setup_test_tensor, & - dbcsr_t_random_dist, & dbcsr_t_contract_test, & dbcsr_t_test_formats, & dbcsr_t_checksum @@ -284,8 +273,10 @@ SUBROUTINE dbcsr_t_test_formats(ndims, mp_comm, unit_nr, verbose, & #:for dim in range(1, maxdim+1) IF (${dim}$ <= ndims) THEN nblks = SIZE(blk_size_${dim}$) - CALL dbcsr_t_random_dist(dist1_${dim}$, nblks, pdims(${dim}$)) - CALL dbcsr_t_random_dist(dist2_${dim}$, nblks, pdims(${dim}$)) + ALLOCATE (dist1_${dim}$ (nblks)) + ALLOCATE (dist2_${dim}$ (nblks)) + CALL dbcsr_t_default_distvec(nblks, pdims(${dim}$), blk_size_${dim}$, dist1_${dim}$) + CALL dbcsr_t_default_distvec(nblks, pdims(${dim}$), blk_size_${dim}$, dist2_${dim}$) ENDIF #:endfor @@ -376,20 +367,6 @@ SUBROUTINE dbcsr_t_test_formats(ndims, mp_comm, unit_nr, verbose, & CALL dbcsr_t_pgrid_destroy(comm_nd) END SUBROUTINE - SUBROUTINE dbcsr_t_random_dist(dist_array, dist_size, nbins) - INTEGER, DIMENSION(:), INTENT(OUT), ALLOCATABLE :: dist_array - INTEGER, INTENT(IN) :: dist_size, nbins - - INTEGER :: i - - ALLOCATE (dist_array(dist_size)) - !CALL RANDOM_NUMBER (dist_array) - DO i = 1, dist_size - dist_array(i) = MODULO(nbins - i, nbins) - END DO - - END SUBROUTINE - SUBROUTINE dbcsr_t_setup_test_tensor(tensor, mp_comm, enumerate, ${varlist("blk_ind")}$) !! Allocate and fill test tensor - entries are enumerated by their index s.t. they only depend !! on global properties of the tensor but not on distribution, matrix representation, etc. @@ -659,7 +636,7 @@ SUBROUTINE dbcsr_t_contract_test(alpha, tensor_1, tensor_2, beta, tensor_3, & mp_comm = tensor_1%pgrid%mp_comm_2d CALL mp_environ(numnodes, mynode, mp_comm) - io_unit = 0 + io_unit = -1 IF (mynode .EQ. 0) io_unit = unit_nr cs_1 = dbcsr_t_checksum(tensor_1) diff --git a/src/tensors/dbcsr_tensor_types.F b/src/tensors/dbcsr_tensor_types.F index f0df10466f7..8c355d54361 100644 --- a/src/tensors/dbcsr_tensor_types.F +++ b/src/tensors/dbcsr_tensor_types.F @@ -39,10 +39,10 @@ MODULE dbcsr_tensor_types USE dbcsr_tas_split, ONLY: & dbcsr_tas_create_split_rows_or_cols, dbcsr_tas_release_info, dbcsr_tas_info_hold, & dbcsr_tas_create_split, dbcsr_tas_get_split_info - USE dbcsr_kinds, ONLY: default_string_length, int_8 + USE dbcsr_kinds, ONLY: default_string_length, int_8, dp USE dbcsr_mpiwrap, ONLY: & mp_cart_create, mp_cart_rank, mp_environ, mp_dims_create, mp_comm_free, mp_comm_dup, mp_sum, mp_max - USE dbcsr_tas_global, ONLY: dbcsr_tas_distribution, dbcsr_tas_rowcol_data + USE dbcsr_tas_global, ONLY: dbcsr_tas_distribution, dbcsr_tas_rowcol_data, dbcsr_tas_default_distvec USE dbcsr_allocate_wrap, ONLY: allocate_any USE dbcsr_data_types, ONLY: dbcsr_scalar_type USE dbcsr_operations, ONLY: dbcsr_scale @@ -95,7 +95,8 @@ MODULE dbcsr_tensor_types dbcsr_t_nblks_local, & dbcsr_t_nblks_total, & dbcsr_t_blk_size, & - dbcsr_t_max_nblks_local + dbcsr_t_max_nblks_local, & + dbcsr_t_default_distvec TYPE dbcsr_t_pgrid_type TYPE(nd_to_2d_mapping) :: nd_index_grid @@ -551,9 +552,12 @@ SUBROUTINE dbcsr_t_nd_mp_free(mp_comm) END SUBROUTINE dbcsr_t_nd_mp_free SUBROUTINE dbcsr_t_distribution_new(dist, pgrid, ${varlist("nd_dist")}$) + !! Create a tensor distribution. TYPE(dbcsr_t_distribution_type), INTENT(OUT) :: dist TYPE(dbcsr_t_pgrid_type), INTENT(IN) :: pgrid + !! process grid INTEGER, DIMENSION(:), INTENT(IN), OPTIONAL :: ${varlist("nd_dist")}$ + !! distribution vectors for all tensor dimensions INTEGER, DIMENSION(ndims_mapping_row(pgrid%nd_index_grid)) :: map1_2d INTEGER, DIMENSION(ndims_mapping_column(pgrid%nd_index_grid)) :: map2_2d INTEGER :: ndims @@ -574,7 +578,6 @@ SUBROUTINE dbcsr_t_distribution_new_expert(dist, pgrid, map1_2d, map2_2d, ${varl INTEGER, DIMENSION(:), INTENT(IN) :: map2_2d !! which nd-indices map to second matrix index and in which order INTEGER, DIMENSION(:), INTENT(IN), OPTIONAL :: ${varlist("nd_dist")}$ - !! distribution vector for first and second dimension LOGICAL, INTENT(IN), OPTIONAL :: own_comm !! whether distribution should own communicator INTEGER :: ndims, comm_2d @@ -1554,4 +1557,19 @@ PURE FUNCTION dbcsr_t_max_nblks_local(tensor) RESULT(blk_count) END FUNCTION + SUBROUTINE dbcsr_t_default_distvec(nblk, nproc, blk_size, dist) + !! get a load-balanced and randomized distribution along one tensor dimension + INTEGER, INTENT(IN) :: nblk + !! number of blocks (along one tensor dimension) + INTEGER, INTENT(IN) :: nproc + !! number of processes (along one process grid dimension) + INTEGER, DIMENSION(nblk), INTENT(IN) :: blk_size + !! block sizes + INTEGER, DIMENSION(nblk), INTENT(OUT) :: dist + !! distribution + + CALL dbcsr_tas_default_distvec(nblk, nproc, blk_size, dist) + + END SUBROUTINE + END MODULE diff --git a/tests/dbcsr_tas_unittest.F b/tests/dbcsr_tas_unittest.F index 077bdb698df..6eecacaa323 100644 --- a/tests/dbcsr_tas_unittest.F +++ b/tests/dbcsr_tas_unittest.F @@ -48,7 +48,7 @@ PROGRAM dbcsr_tas_unittest CALL mp_environ(numnodes, mynode, mp_comm) - io_unit = 0 + io_unit = -1 IF (mynode .EQ. 0) io_unit = default_output_unit CALL dbcsr_init_lib(mp_comm, io_unit) @@ -93,34 +93,34 @@ PROGRAM dbcsr_tas_unittest TRIM(Ct%matrix%name), dbcsr_tas_nblkrows_total(Ct), 'X', dbcsr_tas_nblkcols_total(Ct) CALL dbcsr_tas_write_split_info(dbcsr_tas_info(Ct), io_unit, name="Ct") - CALL dbcsr_tas_test_mm('N', 'N', 'N', B, A, Ct_out, unit_nr=default_output_unit, filter_eps=filter_eps) - CALL dbcsr_tas_test_mm('T', 'N', 'N', Bt, A, Ct_out, unit_nr=default_output_unit, filter_eps=filter_eps) - CALL dbcsr_tas_test_mm('N', 'T', 'N', B, At, Ct_out, unit_nr=default_output_unit, filter_eps=filter_eps) - CALL dbcsr_tas_test_mm('T', 'T', 'N', Bt, At, Ct_out, unit_nr=default_output_unit, filter_eps=filter_eps) - CALL dbcsr_tas_test_mm('N', 'N', 'T', B, A, C_out, unit_nr=default_output_unit, filter_eps=filter_eps) - CALL dbcsr_tas_test_mm('T', 'N', 'T', Bt, A, C_out, unit_nr=default_output_unit, filter_eps=filter_eps) - CALL dbcsr_tas_test_mm('N', 'T', 'T', B, At, C_out, unit_nr=default_output_unit, filter_eps=filter_eps) - CALL dbcsr_tas_test_mm('T', 'T', 'T', Bt, At, C_out, unit_nr=default_output_unit, filter_eps=filter_eps) - - CALL dbcsr_tas_test_mm('N', 'N', 'N', A, C, Bt_out, unit_nr=default_output_unit, filter_eps=filter_eps) - CALL dbcsr_tas_test_mm('T', 'N', 'N', At, C, Bt_out, unit_nr=default_output_unit, filter_eps=filter_eps) - CALL dbcsr_tas_test_mm('N', 'T', 'N', A, Ct, Bt_out, unit_nr=default_output_unit, filter_eps=filter_eps) - CALL dbcsr_tas_test_mm('T', 'T', 'N', At, Ct, Bt_out, unit_nr=default_output_unit, filter_eps=filter_eps) - - CALL dbcsr_tas_test_mm('N', 'N', 'T', A, C, B_out, unit_nr=default_output_unit, filter_eps=filter_eps) - CALL dbcsr_tas_test_mm('T', 'N', 'T', At, C, B_out, unit_nr=default_output_unit, filter_eps=filter_eps) - CALL dbcsr_tas_test_mm('N', 'T', 'T', A, Ct, B_out, unit_nr=default_output_unit, filter_eps=filter_eps) - CALL dbcsr_tas_test_mm('T', 'T', 'T', At, Ct, B_out, unit_nr=default_output_unit, filter_eps=filter_eps) - - CALL dbcsr_tas_test_mm('N', 'N', 'N', C, B, At_out, unit_nr=default_output_unit, filter_eps=filter_eps) - CALL dbcsr_tas_test_mm('T', 'N', 'N', Ct, B, At_out, unit_nr=default_output_unit, filter_eps=filter_eps) - CALL dbcsr_tas_test_mm('N', 'T', 'N', C, Bt, At_out, unit_nr=default_output_unit, filter_eps=filter_eps) - CALL dbcsr_tas_test_mm('T', 'T', 'N', Ct, Bt, At_out, unit_nr=default_output_unit, filter_eps=filter_eps) - - CALL dbcsr_tas_test_mm('N', 'N', 'T', C, B, A_out, unit_nr=default_output_unit, filter_eps=filter_eps) - CALL dbcsr_tas_test_mm('T', 'N', 'T', Ct, B, A_out, unit_nr=default_output_unit, filter_eps=filter_eps) - CALL dbcsr_tas_test_mm('N', 'T', 'T', C, Bt, A_out, unit_nr=default_output_unit, filter_eps=filter_eps) - CALL dbcsr_tas_test_mm('T', 'T', 'T', Ct, Bt, A_out, unit_nr=default_output_unit, filter_eps=filter_eps) + CALL dbcsr_tas_test_mm('N', 'N', 'N', B, A, Ct_out, unit_nr=io_unit, filter_eps=filter_eps) + CALL dbcsr_tas_test_mm('T', 'N', 'N', Bt, A, Ct_out, unit_nr=io_unit, filter_eps=filter_eps) + CALL dbcsr_tas_test_mm('N', 'T', 'N', B, At, Ct_out, unit_nr=io_unit, filter_eps=filter_eps) + CALL dbcsr_tas_test_mm('T', 'T', 'N', Bt, At, Ct_out, unit_nr=io_unit, filter_eps=filter_eps) + CALL dbcsr_tas_test_mm('N', 'N', 'T', B, A, C_out, unit_nr=io_unit, filter_eps=filter_eps) + CALL dbcsr_tas_test_mm('T', 'N', 'T', Bt, A, C_out, unit_nr=io_unit, filter_eps=filter_eps) + CALL dbcsr_tas_test_mm('N', 'T', 'T', B, At, C_out, unit_nr=io_unit, filter_eps=filter_eps) + CALL dbcsr_tas_test_mm('T', 'T', 'T', Bt, At, C_out, unit_nr=io_unit, filter_eps=filter_eps) + + CALL dbcsr_tas_test_mm('N', 'N', 'N', A, C, Bt_out, unit_nr=io_unit, filter_eps=filter_eps) + CALL dbcsr_tas_test_mm('T', 'N', 'N', At, C, Bt_out, unit_nr=io_unit, filter_eps=filter_eps) + CALL dbcsr_tas_test_mm('N', 'T', 'N', A, Ct, Bt_out, unit_nr=io_unit, filter_eps=filter_eps) + CALL dbcsr_tas_test_mm('T', 'T', 'N', At, Ct, Bt_out, unit_nr=io_unit, filter_eps=filter_eps) + + CALL dbcsr_tas_test_mm('N', 'N', 'T', A, C, B_out, unit_nr=io_unit, filter_eps=filter_eps) + CALL dbcsr_tas_test_mm('T', 'N', 'T', At, C, B_out, unit_nr=io_unit, filter_eps=filter_eps) + CALL dbcsr_tas_test_mm('N', 'T', 'T', A, Ct, B_out, unit_nr=io_unit, filter_eps=filter_eps) + CALL dbcsr_tas_test_mm('T', 'T', 'T', At, Ct, B_out, unit_nr=io_unit, filter_eps=filter_eps) + + CALL dbcsr_tas_test_mm('N', 'N', 'N', C, B, At_out, unit_nr=io_unit, filter_eps=filter_eps) + CALL dbcsr_tas_test_mm('T', 'N', 'N', Ct, B, At_out, unit_nr=io_unit, filter_eps=filter_eps) + CALL dbcsr_tas_test_mm('N', 'T', 'N', C, Bt, At_out, unit_nr=io_unit, filter_eps=filter_eps) + CALL dbcsr_tas_test_mm('T', 'T', 'N', Ct, Bt, At_out, unit_nr=io_unit, filter_eps=filter_eps) + + CALL dbcsr_tas_test_mm('N', 'N', 'T', C, B, A_out, unit_nr=io_unit, filter_eps=filter_eps) + CALL dbcsr_tas_test_mm('T', 'N', 'T', Ct, B, A_out, unit_nr=io_unit, filter_eps=filter_eps) + CALL dbcsr_tas_test_mm('N', 'T', 'T', C, Bt, A_out, unit_nr=io_unit, filter_eps=filter_eps) + CALL dbcsr_tas_test_mm('T', 'T', 'T', Ct, Bt, A_out, unit_nr=io_unit, filter_eps=filter_eps) CALL dbcsr_tas_destroy(A) CALL dbcsr_tas_destroy(At) diff --git a/tests/dbcsr_tensor_unittest.F b/tests/dbcsr_tensor_unittest.F index 229cf795a19..37599c1b2da 100644 --- a/tests/dbcsr_tensor_unittest.F +++ b/tests/dbcsr_tensor_unittest.F @@ -20,21 +20,12 @@ PROGRAM dbcsr_tensor_unittest mp_world_finalize, & mp_world_init USE dbcsr_tensor_test, ONLY: dbcsr_t_contract_test, & - dbcsr_t_random_dist, & dbcsr_t_setup_test_tensor, & dbcsr_t_test_formats - USE dbcsr_tensor_types, ONLY: dbcsr_t_create, & - dbcsr_t_destroy, & - dbcsr_t_distribution_destroy, & - dbcsr_t_distribution_new, & - dbcsr_t_distribution_type, & - dbcsr_t_nd_mp_comm, & - dbcsr_t_type, & - dbcsr_t_pgrid_type, & - dbcsr_t_pgrid_create, & - dbcsr_t_get_info, & - dbcsr_t_pgrid_destroy, & - ndims_tensor + USE dbcsr_tensor_types, ONLY: & + dbcsr_t_create, dbcsr_t_destroy, dbcsr_t_distribution_destroy, dbcsr_t_distribution_new, & + dbcsr_t_distribution_type, dbcsr_t_nd_mp_comm, dbcsr_t_type, dbcsr_t_pgrid_type, & + dbcsr_t_pgrid_create, dbcsr_t_get_info, dbcsr_t_pgrid_destroy, ndims_tensor, dbcsr_t_default_distvec USE dbcsr_data_methods, ONLY: dbcsr_scalar USE dbcsr_kinds, ONLY: real_8 #include "base/dbcsr_base_uses.f90" @@ -70,7 +61,7 @@ PROGRAM dbcsr_tensor_unittest CALL mp_environ(numnodes, mynode, mp_comm) ! set standard output parameters - io_unit = 0 + io_unit = -1 IF (mynode .EQ. 0) io_unit = default_output_unit ! initialize libdbcsr @@ -103,7 +94,7 @@ PROGRAM dbcsr_tensor_unittest blk_ind_2(:) = [1, 3, 11, 15, 4, 17, 21, 6, 9, 13, 19, 7] !& ! Test tensor formats - CALL dbcsr_t_test_formats(ndims, mp_comm, default_output_unit, verbose, & + CALL dbcsr_t_test_formats(ndims, mp_comm, io_unit, verbose, & blk_size_1=size_1, blk_size_2=size_2, & blk_ind_1=blk_ind_1, blk_ind_2=blk_ind_2) @@ -137,7 +128,7 @@ PROGRAM dbcsr_tensor_unittest blk_ind_3(:) = [1, 3, 3, 2, 3, 2] !& ! Test tensor formats - CALL dbcsr_t_test_formats(ndims, mp_comm, default_output_unit, verbose, & + CALL dbcsr_t_test_formats(ndims, mp_comm, io_unit, verbose, & blk_size_1=size_1, blk_size_2=size_2, blk_size_3=size_3, & blk_ind_1=blk_ind_1, blk_ind_2=blk_ind_2, blk_ind_3=blk_ind_3) @@ -174,7 +165,7 @@ PROGRAM dbcsr_tensor_unittest blk_ind_4(:) = [3, 2, 3, 1, 1, 2, 1, 3, 2, 2, 3, 1, 3, 2, 1, 1, 3, 2, 2] !& ! Test tensor formats - CALL dbcsr_t_test_formats(ndims, mp_comm, default_output_unit, verbose, & + CALL dbcsr_t_test_formats(ndims, mp_comm, io_unit, verbose, & blk_size_1=size_1, blk_size_2=size_2, blk_size_3=size_3, blk_size_4=size_4, & blk_ind_1=blk_ind_1, blk_ind_2=blk_ind_2, blk_ind_3=blk_ind_3, blk_ind_4=blk_ind_4) @@ -275,25 +266,40 @@ PROGRAM dbcsr_tensor_unittest CALL dbcsr_t_pgrid_create(mp_comm, pdims_3d, pgrid_3d) CALL dbcsr_t_pgrid_create(mp_comm, pdims_2d, pgrid_2d) - CALL dbcsr_t_random_dist(dist1_1, nblks_1, pdims_3d(1)) - CALL dbcsr_t_random_dist(dist1_2, nblks_2, pdims_3d(2)) - CALL dbcsr_t_random_dist(dist1_3, nblks_3, pdims_3d(3)) - - CALL dbcsr_t_random_dist(dist2_1, nblks_3, pdims_2d(1)) - CALL dbcsr_t_random_dist(dist2_2, nblks_4, pdims_2d(2)) - - CALL dbcsr_t_random_dist(dist3_1, nblks_1, pdims_3d(1)) - CALL dbcsr_t_random_dist(dist3_2, nblks_2, pdims_3d(2)) - CALL dbcsr_t_random_dist(dist3_3, nblks_4, pdims_3d(3)) - - CALL dbcsr_t_random_dist(dist4_1, nblks_1, pdims_4d(1)) - CALL dbcsr_t_random_dist(dist4_2, nblks_2, pdims_4d(2)) - CALL dbcsr_t_random_dist(dist4_3, nblks_4, pdims_4d(3)) - CALL dbcsr_t_random_dist(dist4_4, nblks_5, pdims_4d(4)) - - CALL dbcsr_t_random_dist(dist5_1, nblks_3, pdims_3d(1)) - CALL dbcsr_t_random_dist(dist5_2, nblks_4, pdims_3d(2)) - CALL dbcsr_t_random_dist(dist5_3, nblks_5, pdims_3d(3)) + ALLOCATE (dist1_1(nblks_1)) + CALL dbcsr_t_default_distvec(nblks_1, pdims_3d(1), size_1, dist1_1) + ALLOCATE (dist1_2(nblks_2)) + CALL dbcsr_t_default_distvec(nblks_2, pdims_3d(2), size_2, dist1_2) + ALLOCATE (dist1_3(nblks_3)) + CALL dbcsr_t_default_distvec(nblks_3, pdims_3d(3), size_3, dist1_3) + + ALLOCATE (dist2_1(nblks_3)) + CALL dbcsr_t_default_distvec(nblks_3, pdims_2d(1), size_3, dist2_1) + ALLOCATE (dist2_2(nblks_4)) + CALL dbcsr_t_default_distvec(nblks_4, pdims_2d(2), size_4, dist2_2) + + ALLOCATE (dist3_1(nblks_1)) + CALL dbcsr_t_default_distvec(nblks_1, pdims_3d(1), size_1, dist3_1) + ALLOCATE (dist3_2(nblks_2)) + CALL dbcsr_t_default_distvec(nblks_2, pdims_3d(2), size_2, dist3_2) + ALLOCATE (dist3_3(nblks_4)) + CALL dbcsr_t_default_distvec(nblks_4, pdims_3d(3), size_4, dist3_3) + + ALLOCATE (dist4_1(nblks_1)) + CALL dbcsr_t_default_distvec(nblks_1, pdims_4d(1), size_1, dist4_1) + ALLOCATE (dist4_2(nblks_2)) + CALL dbcsr_t_default_distvec(nblks_2, pdims_4d(2), size_2, dist4_2) + ALLOCATE (dist4_3(nblks_4)) + CALL dbcsr_t_default_distvec(nblks_4, pdims_4d(3), size_4, dist4_3) + ALLOCATE (dist4_4(nblks_5)) + CALL dbcsr_t_default_distvec(nblks_5, pdims_4d(4), size_5, dist4_4) + + ALLOCATE (dist5_1(nblks_3)) + CALL dbcsr_t_default_distvec(nblks_3, pdims_3d(1), size_3, dist5_1) + ALLOCATE (dist5_2(nblks_4)) + CALL dbcsr_t_default_distvec(nblks_4, pdims_3d(2), size_4, dist5_2) + ALLOCATE (dist5_3(nblks_5)) + CALL dbcsr_t_default_distvec(nblks_5, pdims_3d(3), size_5, dist5_3) !--------------------------------------------------------------------------------------------------! ! Test 4: Testing tensor contraction (12|3)x(3|4)=(12|4) ! @@ -320,13 +326,14 @@ PROGRAM dbcsr_tensor_unittest CALL dbcsr_t_setup_test_tensor(tensor_A, mp_comm, .FALSE., blk_ind_1_1, blk_ind_2_1, blk_ind_3_1) CALL dbcsr_t_setup_test_tensor(tensor_B, mp_comm, .FALSE., blk_ind_3_2, blk_ind_4_2) + CALL dbcsr_t_setup_test_tensor(tensor_C, mp_comm, .FALSE., blk_ind_1_3, blk_ind_2_3, blk_ind_4_3) CALL dbcsr_t_contract_test(dbcsr_scalar(0.9_real_8), tensor_A, tensor_B, dbcsr_scalar(0.1_real_8), tensor_C, & [3], [2, 1], & [1], [2], & [2, 1], [3], & - default_output_unit, & + io_unit, & log_verbose=verbose, & write_int=.TRUE.) @@ -370,7 +377,7 @@ PROGRAM dbcsr_tensor_unittest [3], [1, 2], & [1], [2], & [1, 2], [3], & - default_output_unit, & + io_unit, & log_verbose=verbose, & write_int=.TRUE.) @@ -421,7 +428,7 @@ PROGRAM dbcsr_tensor_unittest [1], [2], & [3], [1, 2], & [3], [1, 2], & - default_output_unit, & + io_unit, & bounds_1=bounds, & log_verbose=verbose, & write_int=.TRUE.) @@ -476,7 +483,7 @@ PROGRAM dbcsr_tensor_unittest [3], [1, 2], & [2], [1], & [1, 2], [3], & - default_output_unit, & + io_unit, & bounds_2=bounds, & log_verbose=verbose, & write_int=.TRUE.) @@ -539,7 +546,7 @@ PROGRAM dbcsr_tensor_unittest [2, 1], [3], & [2, 1], [3, 4], & [1], [2, 3], & - default_output_unit, & + io_unit, & bounds_1=bounds_1, & bounds_3=bounds_2, & log_verbose=verbose, & @@ -585,7 +592,7 @@ PROGRAM dbcsr_tensor_unittest [2, 1], [3], & [2, 1], [3, 4], & [1], [2, 3], & - default_output_unit, & + io_unit, & log_verbose=verbose, & write_int=.TRUE.) @@ -629,7 +636,7 @@ PROGRAM dbcsr_tensor_unittest [1, 2], [3], & [1, 2], [3, 4], & [1], [2, 3], & - default_output_unit, & + io_unit, & log_verbose=verbose, & write_int=.TRUE.) @@ -673,7 +680,7 @@ PROGRAM dbcsr_tensor_unittest [2, 1], [4, 3], & [2, 1], [3], & [3, 2], [1], & - default_output_unit, & + io_unit, & log_verbose=verbose, & write_int=.TRUE.) @@ -717,7 +724,7 @@ PROGRAM dbcsr_tensor_unittest [2, 1], [3, 4], & [2, 1], [3], & [2, 3], [1], & - default_output_unit, & + io_unit, & log_verbose=verbose, & write_int=.TRUE.) @@ -761,7 +768,7 @@ PROGRAM dbcsr_tensor_unittest [1], [2, 3], & [3], [1, 2], & [3, 4], [1, 2], & - default_output_unit, & + io_unit, & log_verbose=verbose, & write_int=.TRUE.) From c1e22b38da670cf3d18dce198de8bd9d4b83c373 Mon Sep 17 00:00:00 2001 From: Patrick Seewald Date: Wed, 15 Apr 2020 16:03:03 +0200 Subject: [PATCH 18/19] CI: change default timeout to 900s --- .ci/daint.cscs.ch/cray.test.sh | 2 +- .ci/daint.cscs.ch/gnu.test.sh | 2 +- .ci/daint.cscs.ch/intel.test.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.ci/daint.cscs.ch/cray.test.sh b/.ci/daint.cscs.ch/cray.test.sh index eedda379d67..e064e707888 100755 --- a/.ci/daint.cscs.ch/cray.test.sh +++ b/.ci/daint.cscs.ch/cray.test.sh @@ -31,4 +31,4 @@ export OMP_PROC_BIND=TRUE # set thread affinity # document the current environment env |& tee -a "${STAGE_NAME}.out" -env CTEST_OUTPUT_ON_FAILURE=1 make test |& tee -a "${STAGE_NAME}.out" +env CTEST_OUTPUT_ON_FAILURE=1 make test ARGS="--timeout 900" |& tee -a "${STAGE_NAME}.out" diff --git a/.ci/daint.cscs.ch/gnu.test.sh b/.ci/daint.cscs.ch/gnu.test.sh index 498ab70eb7d..c0469575f48 100755 --- a/.ci/daint.cscs.ch/gnu.test.sh +++ b/.ci/daint.cscs.ch/gnu.test.sh @@ -32,4 +32,4 @@ export OMP_PROC_BIND=TRUE # set thread affinity # document the current environment env |& tee -a "${STAGE_NAME}.out" -env CTEST_OUTPUT_ON_FAILURE=1 make test |& tee -a "${STAGE_NAME}.out" +env CTEST_OUTPUT_ON_FAILURE=1 make test ARGS="--timeout 900" |& tee -a "${STAGE_NAME}.out" diff --git a/.ci/daint.cscs.ch/intel.test.sh b/.ci/daint.cscs.ch/intel.test.sh index b3c6ee7a8fe..03b94246565 100755 --- a/.ci/daint.cscs.ch/intel.test.sh +++ b/.ci/daint.cscs.ch/intel.test.sh @@ -32,4 +32,4 @@ export OMP_PROC_BIND=TRUE # set thread affinity # document the current environment env |& tee -a "${STAGE_NAME}.out" -env CTEST_OUTPUT_ON_FAILURE=1 make test |& tee -a "${STAGE_NAME}.out" +env CTEST_OUTPUT_ON_FAILURE=1 make test ARGS="--timeout 900" |& tee -a "${STAGE_NAME}.out" From 9b3a2d66203097cc3f4cf3069b476261a754bab5 Mon Sep 17 00:00:00 2001 From: Patrick Seewald Date: Wed, 15 Apr 2020 17:00:32 +0200 Subject: [PATCH 19/19] Bump version to 2.1.0-rc12 --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 4d412d6636f..6a7db1fcdb9 100644 --- a/VERSION +++ b/VERSION @@ -1,6 +1,6 @@ MAJOR = 2 MINOR = 1 -PATCH = 0-rc11 +PATCH = 0-rc12 # A specific DATE (YYYY-MM-DD) fixes an official release, otherwise # it is considered Development version. DATE =