We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 78fdc68 commit 1d7c2c1Copy full SHA for 1d7c2c1
Project.toml
@@ -1,7 +1,7 @@
1
name = "LoopVectorization"
2
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
3
authors = ["Chris Elrod <[email protected]>"]
4
-version = "0.8.21"
+version = "0.8.22"
5
6
[deps]
7
DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
src/LoopVectorization.jl
@@ -16,7 +16,7 @@ using SIMDPirates: VECTOR_SYMBOLS, evadd, evsub, evmul, evfdiv, vrange,
16
reduced_add, reduced_prod, reduce_to_add, reduced_max, reduced_min, vsum, vprod, vmaximum, vminimum,
17
sizeequivalentfloat, sizeequivalentint, vadd!, vsub!, vmul!, vfdiv!, vfmadd!, vfnmadd!, vfmsub!, vfnmsub!,
18
vfmadd231, vfmsub231, vfnmadd231, vfnmsub231, sizeequivalentfloat, sizeequivalentint, #prefetch,
19
- vmullog2, vmullog10, vdivlog2, vdivlog10, vmullog2add!, vmullog10add!, vdivlog2add!, vdivlog10add!, vfmaddaddone
+ vmullog2, vmullog10, vdivlog2, vdivlog10, vmullog2add!, vmullog10add!, vdivlog2add!, vdivlog10add!, vfmaddaddone, vadd1
20
using SLEEFPirates: pow
21
using Base.Broadcast: Broadcasted, DefaultArrayStyle
22
using LinearAlgebra: Adjoint, Transpose
src/add_loads.jl
@@ -1,4 +1,5 @@
-function maybeaddref!(ls::LoopSet, op, ref)
+function maybeaddref!(ls::LoopSet, op)
+ ref = op.ref
id = findfirst(r -> r == ref, ls.refs_aliasing_syms)
# try to CSE
if isnothing(id)
@@ -12,8 +13,7 @@ end
12
13
14
function add_load!(ls::LoopSet, op::Operation, actualarray::Bool = true, broadcast::Bool = false)
15
@assert isload(op)
- ref = op.ref
- if (id = maybeaddref!(ls, op, ref)) > 0 # try to CSE
+ if (id = maybeaddref!(ls, op)) > 0 # try to CSE
opp = ls.opdict[ls.syms_aliasing_refs[id]] # throw an error if not found.
return isstore(opp) ? getop(ls, first(parents(opp))) : opp
end
src/costs.jl
@@ -130,6 +130,7 @@ const COST = Dict{Symbol,InstructionCost}(
130
:(*) => InstructionCost(4,0.5),
131
:(/) => InstructionCost(13,4.0,-2.0),
132
:vadd => InstructionCost(4,0.5),
133
+ :vadd1 => InstructionCost(4,0.5),
134
:add_fast => InstructionCost(4,0.5),
135
:vsub => InstructionCost(4,0.5),
136
:sub_fast => InstructionCost(4,0.5),
0 commit comments