From e390c17d533d426a25425d637a5b6729d7d2e6d4 Mon Sep 17 00:00:00 2001 From: Chris Elrod Date: Mon, 30 Mar 2020 01:59:31 -0400 Subject: [PATCH] Need to use opoffsets for symlicms. --- Manifest.toml | 8 ++++---- Project.toml | 4 ++-- src/determinestrategy.jl | 22 +++++++++++++++++++--- src/graphs.jl | 2 +- src/lower_compute.jl | 3 ++- src/reconstruct_loopset.jl | 3 ++- 6 files changed, 30 insertions(+), 12 deletions(-) diff --git a/Manifest.toml b/Manifest.toml index 7557d0168..a5b654f08 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -32,9 +32,9 @@ deps = ["Base64"] uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" [[OffsetArrays]] -git-tree-sha1 = "6a35d9446b40ae5004cd7bd0f1ae3505528c7fd6" +git-tree-sha1 = "930db8ef90483570107f2396b1ffc6680f08e8b7" uuid = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" -version = "1.0.3" +version = "1.0.4" [[Random]] deps = ["Serialization"] @@ -42,9 +42,9 @@ uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" [[SIMDPirates]] deps = ["VectorizationBase"] -git-tree-sha1 = "8f89aa38f5e4e89f2a474ffdc850fc21d6ab9ed4" +git-tree-sha1 = "53c43af0172c24b0783bd93650bd8b78afb3e57b" uuid = "21efa798-c60a-11e8-04d3-e1a92915a26a" -version = "0.7.4" +version = "0.7.5" [[SLEEFPirates]] deps = ["Libdl", "SIMDPirates", "VectorizationBase"] diff --git a/Project.toml b/Project.toml index b26f9a2c4..9d8769fbc 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "LoopVectorization" uuid = "bdcacae8-1622-11e9-2a5c-532679323890" authors = ["Chris Elrod "] -version = "0.6.24" +version = "0.6.25" [deps] LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" @@ -13,7 +13,7 @@ VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f" [compat] OffsetArrays = "1" -SIMDPirates = "0.7.4" +SIMDPirates = "0.7.5" SLEEFPirates = "0.4" UnPack = "0" VectorizationBase = "0.9.5" diff --git a/src/determinestrategy.jl b/src/determinestrategy.jl index 6db326092..e12a53eaf 100644 --- a/src/determinestrategy.jl +++ b/src/determinestrategy.jl @@ -245,6 +245,19 @@ function tile_cost(X, U, T, UL, TL) # X[1]*Tfactor*Ufactor + X[4] + X[2] * Tfactor + X[3] * Ufactor X[1] + X[4] + X[2] * Tfactor + X[3] * Ufactor end +# function itertilesize(X, UL, TL) +# cb = Inf +# Ub = 1; Tb = 1 +# for U ∈ 1:4, T ∈ 1:4 +# c = tile_cost(X, U, T, UL, TL) +# @show U, T, c +# if cb > c +# cb = c +# Ub = U; Tb = T +# end +# end +# Ub, Tb, cb +# end function solve_tilesize(X, R, UL, TL) # @inbounds any(iszero, (R[1],R[2],R[3])) && return -1,-1,Inf #solve_smalltilesize(X, R, Umax, Tmax) first(iszero(R)) && return -1,-1,Inf #solve_smalltilesize(X, R, Umax, Tmax) @@ -253,14 +266,17 @@ function solve_tilesize(X, R, UL, TL) # first solving for U via quadratic formula # X is vector of costs, and R is of register pressures RR = REGISTER_COUNT - R[3] - R[4] # RR ≡ RemainingRegisters + R[1] + R[2] > 0.5RR && return 1,1, tile_cost(X, 1, 1, UL, TL) a = (R[1])^2*X[2] - (R[2])^2*R[1]*X[3]/RR b = 2*R[1]*R[2]*X[3] c = -RR*R[1]*X[3] - Ufloat = (sqrt(b^2 - 4a*c) - b) / (2a) - Tfloat = (RR - Ufloat*R[2])/(Ufloat*R[1]) - # @show Ufloat, Tfloat + discriminant = b^2 - 4a*c + discriminant < 0 && return -1,-1,Inf + Ufloat = (sqrt(discriminant) - b) / (2a) + Tfloat = (RR - max(1.0,Ufloat)*R[2])/(max(1.0,Ufloat)*R[1]) if !(isfinite(Tfloat) && isfinite(Ufloat)) return 4, 4, tile_cost(X, 4, 4, UL, TL) + # return itertilesize(X, UL, TL) end Ulow = max(1, floor(Int, Ufloat)) # must be at least 1 Tlow = max(1, floor(Int, Tfloat)) # must be at least 1 diff --git a/src/graphs.jl b/src/graphs.jl index 8146b991d..bf6a8588b 100644 --- a/src/graphs.jl +++ b/src/graphs.jl @@ -469,7 +469,7 @@ function add_operation!( elseif RHS.head === :if add_if!(ls, LHS, RHS, elementbytes, position) else - throw("Expression not recognized:\n$x") + throw("Expression not recognized:\n$RHS") end end add_operation!(ls::LoopSet, RHS::Expr, elementbytes::Int, position::Int) = add_operation!(ls, gensym(:LHS), RHS, elementbytes, position) diff --git a/src/lower_compute.jl b/src/lower_compute.jl index 796db5877..19c158c2c 100644 --- a/src/lower_compute.jl +++ b/src/lower_compute.jl @@ -28,7 +28,7 @@ function lower_compute!( end parentsunrolled = isunrolled_sym.(parents_op, unrolled, tiled) if instr.instr === :identity && name(first(parents_op)) === var && isone(length(parents_op)) - if (opunrolled == first(parentsunrolled)) && ((!isnothing(suffix)) == first(parentstiled)) + if (opunrolled == first(parentsunrolled)) && ((!isnothing(suffix)) == parentstiled[1]) return end end @@ -58,6 +58,7 @@ function lower_compute!( for u ∈ 0:U-1 push!(q.args, Expr(:(=), Symbol(newparentname, u), Symbol(parentname, u))) end + @show parentop reduce_expr!(q, newparentname, Instruction(reduction_to_single_vector(instruction(newparentop))), U) push!(q.args, Expr(:(=), newparentname, Symbol(newparentname, 0))) end diff --git a/src/reconstruct_loopset.jl b/src/reconstruct_loopset.jl index 7a3d6cade..bab2500a7 100644 --- a/src/reconstruct_loopset.jl +++ b/src/reconstruct_loopset.jl @@ -169,7 +169,7 @@ function process_metadata!(ls::LoopSet, AM, num_arrays::Int) for (i,si) ∈ enumerate(AM[3].parameters) sii = si::Int s = gensym(:symlicm) - push!(ls.preamble_symsym, (si, s)) + push!(ls.preamble_symsym, (opoffsets[sii] + 1, s)) pushpreamble!(ls, Expr(:(=), s, Expr(:macrocall, Symbol("@inbounds"), LineNumberNode(@__LINE__,Symbol(@__FILE__)), Expr(:ref, :vargs, num_arrays + i)))) end expandbyoffset!(ls.preamble_symint, AM[4].parameters, opoffsets) @@ -343,6 +343,7 @@ end # elbytes(::VectorizationBase.AbstractPointer{T}) where {T} = sizeof(T)::Int typeeltype(::Type{P}) where {T,P<:VectorizationBase.AbstractPointer{T}} = T typeeltype(::Type{<:AbstractRange{T}}) where {T} = T +# typeeltype(::Any) = Int8 function add_array_symbols!(ls::LoopSet, arraysymbolinds::Vector{Symbol}, offset::Int) for (i,as) ∈ enumerate(arraysymbolinds)