Skip to content

Commit

Permalink
Merge branch 'release-2.1.0-rc4'
Browse files Browse the repository at this point in the history
  • Loading branch information
pseewald committed Feb 22, 2020
2 parents af275be + 994352d commit d871ed2
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 33 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
MAJOR = 2
MINOR = 1
PATCH = 0-rc3
PATCH = 0-rc4
# A specific DATE (YYYY-MM-DD) fixes an official release, otherwise
# it is considered Development version.
DATE =
24 changes: 19 additions & 5 deletions src/tas/dbcsr_tas_mm.F
Original file line number Diff line number Diff line change
Expand Up @@ -252,9 +252,9 @@ RECURSIVE SUBROUTINE dbcsr_tas_multiply(transa, transb, transc, alpha, matrix_a,
nze_b = dbcsr_tas_get_nze_total(matrix_b)
nze_c = result_sparsity_estimate(transa, transb, transc, matrix_a, matrix_b, matrix_c, filter_eps)

nsplit = split_factor_estimate(nze_a, nze_b, nze_c, numproc)
max_mm_dim = MAXLOC(dims, 1)
nsplit = split_factor_estimate(max_mm_dim, nze_a, nze_b, nze_c, numproc)
nsplit_opt = nsplit
max_mm_dim = MINLOC([nze_b, nze_c, nze_a], 1)

IF (io_unit_prv > 0) THEN
WRITE (io_unit_prv, "(T2,A)") &
Expand Down Expand Up @@ -1307,13 +1307,14 @@ FUNCTION result_sparsity_estimate(transa, transb, transc, matrix_a, matrix_b, ma
END FUNCTION
FUNCTION split_factor_estimate(nze_a, nze_b, nze_c, numnodes) RESULT(nsplit)
FUNCTION split_factor_estimate(max_mm_dim, nze_a, nze_b, nze_c, numnodes) RESULT(nsplit)
!! Estimate optimal split factor for AxB=C from occupancies (number of non-zero elements)
!! This estimate is based on the minimization of communication volume whereby
!! the communication of CARMA n-split step and CANNON-multiplication of submatrices are
!! considered.
!! \result estimated split factor
INTEGER, INTENT(IN) :: max_mm_dim
INTEGER(KIND=int_8), INTENT(IN) :: nze_a, nze_b, nze_c
!! number of non-zeroes in A
!! number of non-zeroes in B
Expand All @@ -1323,9 +1324,22 @@ FUNCTION split_factor_estimate(nze_a, nze_b, nze_c, numnodes) RESULT(nsplit)
INTEGER :: nsplit, nsplit_comm, nsplit_memory
INTEGER(KIND=int_8) :: max_nze, min_nze
min_nze = MAX(MINVAL([nze_a, nze_b, nze_c]), 1_int_8)
max_nze = MAX(MAXVAL([nze_a, nze_b, nze_c]), 1_int_8)
SELECT CASE(max_mm_dim)
CASE(1)
min_nze = MAX(nze_b, 1_int_8)
max_nze = MAX(MAXVAL([nze_a, nze_c]), 1_int_8)
CASE(2)
min_nze = MAX(nze_c, 1_int_8)
max_nze = MAX(MAXVAL([nze_a, nze_b]), 1_int_8)
CASE(3)
min_nze = MAX(nze_a, 1_int_8)
max_nze = MAX(MAXVAL([nze_b, nze_c]), 1_int_8)
CASE DEFAULT
DBCSR_ABORT("")
END SELECT
nsplit_comm = NINT((REAL(nze_a + nze_b, real_8)/(2*min_nze))**(2._real_8/3)*REAL(numnodes, real_8)**(1._real_8/3))
IF(nsplit_comm == 0) nsplit_comm = 1
! nsplit_memory protects against excess memory usage
! actual split factor may be up to default_nsplit_accept_ratio_1 times larger, so the largest nsplit
Expand Down
6 changes: 3 additions & 3 deletions src/tas/dbcsr_tas_types.F
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,9 @@ MODULE dbcsr_tas_types

! storage and flags for batched matrix multiplication
INTEGER :: do_batched = 0 ! whether we are doing batched MM:
! - 0 no batched MM
! - 1 batched MM but mm_storage not yet initialized
! - 2 batched MM and mm_storage initialized
! - 0 no batched MM
! - 1 batched MM but mm_storage not yet initialized
! - 2 batched MM and mm_storage initialized
TYPE(dbcsr_tas_mm_storage), ALLOCATABLE :: mm_storage ! storage for batched processing of matrix matrix multiplication.

END TYPE
Expand Down
67 changes: 44 additions & 23 deletions src/tensors/dbcsr_tensor.F
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ MODULE dbcsr_tensor

CONTAINS

SUBROUTINE dbcsr_t_copy(tensor_in, tensor_out, order, summation, move_data, unit_nr)
SUBROUTINE dbcsr_t_copy(tensor_in, tensor_out, order, summation, bounds, move_data, unit_nr)
!! Copy tensor data.
!! Redistributes tensor data according to distributions of target and source tensor.
!! Permutes tensor index according to `order` argument (if present).
Expand All @@ -124,33 +124,39 @@ SUBROUTINE dbcsr_t_copy(tensor_in, tensor_out, order, summation, move_data, unit
INTENT(IN), OPTIONAL :: order
!! Permutation of target tensor index. Exact same convention as order argument of RESHAPE intrinsic
LOGICAL, INTENT(IN), OPTIONAL :: summation, move_data
INTEGER, DIMENSION(2, ndims_tensor(tensor_in)), &
INTENT(IN), OPTIONAL :: bounds
!! crop tensor data: start and end index for each tensor dimension
INTEGER, INTENT(IN), OPTIONAL :: unit_nr

TYPE(dbcsr_t_type), POINTER :: in_tmp_1 => NULL(), in_tmp_2 => NULL(), out_tmp_1 => NULL()
TYPE(dbcsr_t_type), POINTER :: in_tmp_1 => NULL(), in_tmp_2 => NULL(), &
in_tmp_3 => NULL(), out_tmp_1 => NULL()
INTEGER :: handle
INTEGER, DIMENSION(:), ALLOCATABLE :: map1_in_1, map1_in_2, map2_in_1, map2_in_2

CHARACTER(LEN=*), PARAMETER :: routineN = 'dbcsr_t_copy', &
routineP = moduleN//':'//routineN
LOGICAL :: dist_compatible_tas, dist_compatible_tensor, summation_prv, new_in_1, &
new_in_2, new_out_1, block_compatible, &
LOGICAL :: dist_compatible_tas, dist_compatible_tensor, &
summation_prv, new_in_1, new_in_2, &
new_in_3, new_out_1, block_compatible, &
move_prv

CALL timeset(routineN, handle)

DBCSR_ASSERT(tensor_out%valid)

IF (PRESENT(move_data)) THEN
move_prv = move_data
ELSE
move_prv = .FALSE.
ENDIF

DBCSR_ASSERT(tensor_out%valid)

dist_compatible_tas = .FALSE.
dist_compatible_tensor = .FALSE.
block_compatible = .FALSE.
new_in_1 = .FALSE.
new_in_2 = .FALSE.
new_in_3 = .FALSE.
new_out_1 = .FALSE.

IF (PRESENT(summation)) THEN
Expand All @@ -159,44 +165,54 @@ SUBROUTINE dbcsr_t_copy(tensor_in, tensor_out, order, summation, move_data, unit
summation_prv = .FALSE.
ENDIF

IF (PRESENT(order)) THEN
ALLOCATE (in_tmp_1)
CALL dbcsr_t_permute_index(tensor_in, in_tmp_1, order)
IF(PRESENT(bounds)) THEN
ALLOCATE(in_tmp_1)
CALL dbcsr_t_crop(tensor_in, in_tmp_1, bounds=bounds, move_data=move_prv)
new_in_1 = .TRUE.
move_prv = .TRUE.
ELSE
in_tmp_1 => tensor_in
ENDIF

block_compatible = check_equal(in_tmp_1%blk_sizes, tensor_out%blk_sizes)
IF (.NOT. block_compatible) THEN
ALLOCATE (in_tmp_2, out_tmp_1)
new_in_2 = .TRUE.; new_out_1 = .TRUE.
CALL dbcsr_t_make_compatible_blocks(in_tmp_1, tensor_out, in_tmp_2, out_tmp_1, &
nodata2=.NOT. summation_prv, move_data=move_data)
IF (PRESENT(order)) THEN
ALLOCATE (in_tmp_2)
CALL dbcsr_t_permute_index(in_tmp_1, in_tmp_2, order)
new_in_2 = .TRUE.
ELSE
in_tmp_2 => in_tmp_1
ENDIF

block_compatible = check_equal(in_tmp_2%blk_sizes, tensor_out%blk_sizes)
IF (.NOT. block_compatible) THEN
ALLOCATE (in_tmp_3, out_tmp_1)
CALL dbcsr_t_make_compatible_blocks(in_tmp_2, tensor_out, in_tmp_3, out_tmp_1, &
nodata2=.NOT. summation_prv, move_data=move_prv)
new_in_3 = .TRUE.; new_out_1 = .TRUE.
move_prv = .TRUE.
ELSE
in_tmp_3 => in_tmp_2
out_tmp_1 => tensor_out
ENDIF

CALL dbcsr_t_get_mapping_info(in_tmp_2%nd_index, map1_2d=map1_in_1, map2_2d=map1_in_2)
CALL dbcsr_t_get_mapping_info(in_tmp_3%nd_index, map1_2d=map1_in_1, map2_2d=map1_in_2)
CALL dbcsr_t_get_mapping_info(out_tmp_1%nd_index, map1_2d=map2_in_1, map2_2d=map2_in_2)

IF (.NOT. PRESENT(order)) THEN
IF (array_eq_i(map1_in_1, map2_in_1) .AND. array_eq_i(map1_in_2, map2_in_2)) THEN
dist_compatible_tas = check_equal(in_tmp_2%nd_dist, out_tmp_1%nd_dist)
dist_compatible_tas = check_equal(in_tmp_3%nd_dist, out_tmp_1%nd_dist)
ELSEIF(array_eq_i([map1_in_1, map1_in_2], [map2_in_1, map2_in_2])) THEN
dist_compatible_tensor = check_equal(in_tmp_2%nd_dist, out_tmp_1%nd_dist)
dist_compatible_tensor = check_equal(in_tmp_3%nd_dist, out_tmp_1%nd_dist)
ENDIF
ENDIF

IF (dist_compatible_tas) THEN
CALL dbcsr_tas_copy(out_tmp_1%matrix_rep, in_tmp_2%matrix_rep, summation)
IF (move_prv) CALL dbcsr_t_clear(in_tmp_2)
CALL dbcsr_tas_copy(out_tmp_1%matrix_rep, in_tmp_3%matrix_rep, summation)
IF (move_prv) CALL dbcsr_t_clear(in_tmp_3)
ELSEIF(dist_compatible_tensor) THEN
CALL dbcsr_t_copy_nocomm(in_tmp_2, out_tmp_1, summation)
IF (move_prv) CALL dbcsr_t_clear(in_tmp_2)
CALL dbcsr_t_copy_nocomm(in_tmp_3, out_tmp_1, summation)
IF (move_prv) CALL dbcsr_t_clear(in_tmp_3)
ELSE
CALL dbcsr_t_reshape(in_tmp_2, out_tmp_1, summation, move_data=move_data)
CALL dbcsr_t_reshape(in_tmp_3, out_tmp_1, summation, move_data=move_prv)
ENDIF

IF (new_in_1) THEN
Expand All @@ -209,6 +225,11 @@ SUBROUTINE dbcsr_t_copy(tensor_in, tensor_out, order, summation, move_data, unit
DEALLOCATE (in_tmp_2)
ENDIF

IF (new_in_3) THEN
CALL dbcsr_t_destroy(in_tmp_3)
DEALLOCATE (in_tmp_3)
ENDIF

IF (new_out_1) THEN
IF (PRESENT(unit_nr)) THEN
CALL dbcsr_t_write_tensor_dist(out_tmp_1, unit_nr)
Expand Down
3 changes: 2 additions & 1 deletion src/tensors/dbcsr_tensor_api.F
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ MODULE dbcsr_tensor_api
dbcsr_t_contract, dbcsr_t_get_block, dbcsr_t_get_stored_coordinates, dbcsr_t_put_block, &
dbcsr_t_reserve_blocks, dbcsr_t_copy_matrix_to_tensor, dbcsr_t_copy, &
dbcsr_t_copy_tensor_to_matrix, dbcsr_t_need_contract, dbcsr_t_batched_contract_init, &
dbcsr_t_batched_contract_finalize
dbcsr_t_batched_contract_finalize, dbcsr_t_ndims
USE dbcsr_tensor_block, ONLY: &
dbcsr_t_iterator_blocks_left, dbcsr_t_iterator_next_block, dbcsr_t_iterator_start, &
dbcsr_t_iterator_stop, dbcsr_t_iterator_type
Expand Down Expand Up @@ -91,5 +91,6 @@ MODULE dbcsr_tensor_api
PUBLIC :: dbcsr_t_mp_dims_create
PUBLIC :: dbcsr_t_batched_contract_init
PUBLIC :: dbcsr_t_batched_contract_finalize
PUBLIC :: dbcsr_t_ndims

END MODULE dbcsr_tensor_api

0 comments on commit d871ed2

Please sign in to comment.