5
5
# it does double the number of operations compared to accumulate,
6
6
# though for cheap operations like + this does not have much impact (20%)
7
7
function _accumulate_pairwise! (op:: Op , c:: AbstractVector{T} , v:: AbstractVector , s, i1, n):: T where {T,Op}
8
- @inbounds if n < 128
9
- s_ = v[i1]
10
- c[i1] = op (s, s_)
8
+ if n < 128
9
+ @inbounds s_ = v[i1]
10
+ ci1 = op (s, s_)
11
+ @inbounds c[i1] = ci1
11
12
for i = i1+ 1 : i1+ n- 1
12
- s_ = op (s_, v[i])
13
- c[i] = op (s, s_)
13
+ s_ = op (s_, @inbounds (v[i]))
14
+ ci = op (s, s_)
15
+ @inbounds c[i] = ci
14
16
end
15
17
else
16
18
n2 = n >> 1
@@ -26,7 +28,8 @@ function accumulate_pairwise!(op::Op, result::AbstractVector, v::AbstractVector)
26
28
n = length (li)
27
29
n == 0 && return result
28
30
i1 = first (li)
29
- @inbounds result[i1] = v1 = reduce_first (op,v[i1])
31
+ v1 = reduce_first (op, @inbounds (v[i1]))
32
+ @inbounds result[i1] = v1
30
33
n == 1 && return result
31
34
_accumulate_pairwise! (op, result, v, v1, i1+ 1 , n- 1 )
32
35
return result
@@ -379,16 +382,16 @@ function _accumulate!(op, B, A, dims::Integer, init::Union{Nothing, Some})
379
382
# We can accumulate to a temporary variable, which allows
380
383
# register usage and will be slightly faster
381
384
ind1 = inds_t[1 ]
382
- @inbounds for I in CartesianIndices (tail (inds_t))
385
+ for I in CartesianIndices (tail (inds_t))
383
386
if init === nothing
384
- tmp = reduce_first (op, A[first (ind1), I])
387
+ tmp = reduce_first (op, @inbounds ( A[first (ind1), I]) )
385
388
else
386
- tmp = op (something (init), A[first (ind1), I])
389
+ tmp = op (something (init), @inbounds ( A[first (ind1), I]) )
387
390
end
388
- B[first (ind1), I] = tmp
391
+ @inbounds B[first (ind1), I] = tmp
389
392
for i_1 = first (ind1)+ 1 : last (ind1)
390
- tmp = op (tmp, A[i_1, I])
391
- B[i_1, I] = tmp
393
+ tmp = op (tmp, @inbounds ( A[i_1, I]) )
394
+ @inbounds B[i_1, I] = tmp
392
395
end
393
396
end
394
397
else
@@ -402,25 +405,31 @@ end
402
405
@noinline function _accumulaten! (op, B, A, R1, ind, R2, init:: Nothing )
403
406
# Copy the initial element in each 1d vector along dimension `dim`
404
407
ii = first (ind)
405
- @inbounds for J in R2, I in R1
406
- B[I, ii, J] = reduce_first (op, A[I, ii, J])
408
+ for J in R2, I in R1
409
+ tmp = reduce_first (op, @inbounds (A[I, ii, J]))
410
+ @inbounds B[I, ii, J] = tmp
407
411
end
408
412
# Accumulate
409
- @inbounds for J in R2, i in first (ind)+ 1 : last (ind), I in R1
410
- B[I, i, J] = op (B[I, i- 1 , J], A[I, i, J])
413
+ for J in R2, i in first (ind)+ 1 : last (ind), I in R1
414
+ @inbounds Bv, Av = B[I, i- 1 , J], A[I, i, J]
415
+ tmp = op (Bv, Av)
416
+ @inbounds B[I, i, J] = tmp
411
417
end
412
418
B
413
419
end
414
420
415
421
@noinline function _accumulaten! (op, B, A, R1, ind, R2, init:: Some )
416
422
# Copy the initial element in each 1d vector along dimension `dim`
417
423
ii = first (ind)
418
- @inbounds for J in R2, I in R1
419
- B[I, ii, J] = op (something (init), A[I, ii, J])
424
+ for J in R2, I in R1
425
+ tmp = op (something (init), @inbounds (A[I, ii, J]))
426
+ @inbounds B[I, ii, J] = tmp
420
427
end
421
428
# Accumulate
422
- @inbounds for J in R2, i in first (ind)+ 1 : last (ind), I in R1
423
- B[I, i, J] = op (B[I, i- 1 , J], A[I, i, J])
429
+ for J in R2, i in first (ind)+ 1 : last (ind), I in R1
430
+ @inbounds Bv, Av = B[I, i- 1 , J], A[I, i, J]
431
+ tmp = op (Bv, Av)
432
+ @inbounds B[I, i, J] = tmp
424
433
end
425
434
B
426
435
end
@@ -434,10 +443,10 @@ function _accumulate1!(op, B, v1, A::AbstractVector, dim::Integer)
434
443
cur_val = v1
435
444
B[i1] = cur_val
436
445
next = iterate (inds, state)
437
- @inbounds while next != = nothing
446
+ while next != = nothing
438
447
(i, state) = next
439
- cur_val = op (cur_val, A[i])
440
- B[i] = cur_val
448
+ cur_val = op (cur_val, @inbounds ( A[i]) )
449
+ @inbounds B[i] = cur_val
441
450
next = iterate (inds, state)
442
451
end
443
452
return B
0 commit comments