5
5
# it does double the number of operations compared to accumulate,
6
6
# though for cheap operations like + this does not have much impact (20%)
7
7
function _accumulate_pairwise! (op:: Op , c:: AbstractVector{T} , v:: AbstractVector , s, i1, n):: T where {T,Op}
8
- @inbounds if n < 128
9
- s_ = v[i1]
10
- c[i1] = op (s, s_)
8
+ if n < 128
9
+ @inbounds s_ = v[i1]
10
+ ci1 = op (s, s_)
11
+ @inbounds c[i1] = ci1
11
12
for i = i1+ 1 : i1+ n- 1
12
- s_ = op (s_, v[i])
13
- c[i] = op (s, s_)
13
+ s_ = op (s_, @inbounds (v[i]))
14
+ ci = op (s, s_)
15
+ @inbounds c[i] = ci
14
16
end
15
17
else
16
18
n2 = n >> 1
@@ -26,7 +28,8 @@ function accumulate_pairwise!(op::Op, result::AbstractVector, v::AbstractVector)
26
28
n = length (li)
27
29
n == 0 && return result
28
30
i1 = first (li)
29
- @inbounds result[i1] = v1 = reduce_first (op,v[i1])
31
+ v1 = reduce_first (op, @inbounds (v[i1]))
32
+ @inbounds result[i1] = v1
30
33
n == 1 && return result
31
34
_accumulate_pairwise! (op, result, v, v1, i1+ 1 , n- 1 )
32
35
return result
@@ -378,16 +381,16 @@ function _accumulate!(op, B, A, dims::Integer, init::Union{Nothing, Some})
378
381
# We can accumulate to a temporary variable, which allows
379
382
# register usage and will be slightly faster
380
383
ind1 = inds_t[1 ]
381
- @inbounds for I in CartesianIndices (tail (inds_t))
384
+ for I in CartesianIndices (tail (inds_t))
382
385
if init === nothing
383
- tmp = reduce_first (op, A[first (ind1), I])
386
+ tmp = reduce_first (op, @inbounds ( A[first (ind1), I]) )
384
387
else
385
- tmp = op (something (init), A[first (ind1), I])
388
+ tmp = op (something (init), @inbounds ( A[first (ind1), I]) )
386
389
end
387
- B[first (ind1), I] = tmp
390
+ @inbounds B[first (ind1), I] = tmp
388
391
for i_1 = first (ind1)+ 1 : last (ind1)
389
- tmp = op (tmp, A[i_1, I])
390
- B[i_1, I] = tmp
392
+ tmp = op (tmp, @inbounds ( A[i_1, I]) )
393
+ @inbounds B[i_1, I] = tmp
391
394
end
392
395
end
393
396
else
@@ -401,25 +404,31 @@ end
401
404
@noinline function _accumulaten! (op, B, A, R1, ind, R2, init:: Nothing )
402
405
# Copy the initial element in each 1d vector along dimension `dim`
403
406
ii = first (ind)
404
- @inbounds for J in R2, I in R1
405
- B[I, ii, J] = reduce_first (op, A[I, ii, J])
407
+ for J in R2, I in R1
408
+ tmp = reduce_first (op, @inbounds (A[I, ii, J]))
409
+ @inbounds B[I, ii, J] = tmp
406
410
end
407
411
# Accumulate
408
- @inbounds for J in R2, i in first (ind)+ 1 : last (ind), I in R1
409
- B[I, i, J] = op (B[I, i- 1 , J], A[I, i, J])
412
+ for J in R2, i in first (ind)+ 1 : last (ind), I in R1
413
+ @inbounds Bv, Av = B[I, i- 1 , J], A[I, i, J]
414
+ tmp = op (Bv, Av)
415
+ @inbounds B[I, i, J] = tmp
410
416
end
411
417
B
412
418
end
413
419
414
420
@noinline function _accumulaten! (op, B, A, R1, ind, R2, init:: Some )
415
421
# Copy the initial element in each 1d vector along dimension `dim`
416
422
ii = first (ind)
417
- @inbounds for J in R2, I in R1
418
- B[I, ii, J] = op (something (init), A[I, ii, J])
423
+ for J in R2, I in R1
424
+ tmp = op (something (init), @inbounds (A[I, ii, J]))
425
+ @inbounds B[I, ii, J] = tmp
419
426
end
420
427
# Accumulate
421
- @inbounds for J in R2, i in first (ind)+ 1 : last (ind), I in R1
422
- B[I, i, J] = op (B[I, i- 1 , J], A[I, i, J])
428
+ for J in R2, i in first (ind)+ 1 : last (ind), I in R1
429
+ @inbounds Bv, Av = B[I, i- 1 , J], A[I, i, J]
430
+ tmp = op (Bv, Av)
431
+ @inbounds B[I, i, J] = tmp
423
432
end
424
433
B
425
434
end
@@ -433,10 +442,10 @@ function _accumulate1!(op, B, v1, A::AbstractVector, dim::Integer)
433
442
cur_val = v1
434
443
B[i1] = cur_val
435
444
next = iterate (inds, state)
436
- @inbounds while next != = nothing
445
+ while next != = nothing
437
446
(i, state) = next
438
- cur_val = op (cur_val, A[i])
439
- B[i] = cur_val
447
+ cur_val = op (cur_val, @inbounds ( A[i]) )
448
+ @inbounds B[i] = cur_val
440
449
next = iterate (inds, state)
441
450
end
442
451
return B
0 commit comments