From 5795539d8f7d0959fe1ee9fb2c2512d7e5fd4661 Mon Sep 17 00:00:00 2001 From: Chris Elrod Date: Sun, 19 Apr 2020 11:14:41 -0400 Subject: [PATCH] Update SIMDPirates version and correctly calculate loopdependencies of variables marked as possible reductions that are not. --- Project.toml | 2 +- benchmark/driver.jl | 2 +- docs/src/assets/bench_AmulB_v1.svg | 2 +- docs/src/assets/bench_AmulBt_v1.svg | 2 +- docs/src/assets/bench_Amulvb_v1.svg | 2 +- docs/src/assets/bench_AplusAt_v1.svg | 2 +- docs/src/assets/bench_AtmulB_v1.svg | 2 +- docs/src/assets/bench_AtmulBt_v1.svg | 2 +- docs/src/assets/bench_Atmulvb_v1.svg | 2 +- docs/src/assets/bench_aplusBc_v1.svg | 2 +- docs/src/assets/bench_dot3_v1.svg | 2 +- docs/src/assets/bench_dot_v1.svg | 2 +- docs/src/assets/bench_exp_v1.svg | 2 +- docs/src/assets/bench_filter2d_3x3_v1.svg | 2 +- docs/src/assets/bench_filter2d_dynamic_v1.svg | 2 +- .../src/assets/bench_filter2d_unrolled_v1.svg | 2 +- docs/src/assets/bench_logdettriangle_v1.svg | 2 +- docs/src/assets/bench_random_access_v1.svg | 2 +- docs/src/assets/bench_selfdot_v1.svg | 2 +- docs/src/assets/bench_sse_v1.svg | 2 +- src/add_compute.jl | 1 + test/miscellaneous.jl | 19 +++++++++++++++++++ 22 files changed, 40 insertions(+), 20 deletions(-) diff --git a/Project.toml b/Project.toml index ed66efecc..e32f73f70 100644 --- a/Project.toml +++ b/Project.toml @@ -15,7 +15,7 @@ VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f" [compat] DocStringExtensions = "0.8" OffsetArrays = "1" -SIMDPirates = "0.7.13" +SIMDPirates = "0.7.14" SLEEFPirates = "0.4.4" UnPack = "0" VectorizationBase = "0.10.4" diff --git a/benchmark/driver.jl b/benchmark/driver.jl index 9d536f7c5..c30f9c380 100644 --- a/benchmark/driver.jl +++ b/benchmark/driver.jl @@ -45,7 +45,7 @@ logdettriangle_bench = benchmark_logdettriangle(sizes) v = 1 filetype = "svg" -const PICTURES = joinpath(pkgdir("LoopVectorization"), "docs", "src", "assets") +const PICTURES = joinpath(pkgdir(LoopVectorization), "docs", "src", "assets") save(joinpath(PICTURES, "bench_filter2d_dynamic_v$v.$filetype"), plot(filter2d_dynamic_bench)); save(joinpath(PICTURES, "bench_filter2d_3x3_v$v.$filetype"), plot(filter2d_3x3_bench)); save(joinpath(PICTURES, "bench_filter2d_unrolled_v$v.$filetype"), plot(filter2d_unrolled_bench)); diff --git a/docs/src/assets/bench_AmulB_v1.svg b/docs/src/assets/bench_AmulB_v1.svg index 948d413de..e12871dec 100644 --- a/docs/src/assets/bench_AmulB_v1.svg +++ b/docs/src/assets/bench_AmulB_v1.svg @@ -1,3 +1,3 @@ -0102030405060708090100110120130140150160170180190200210220230240250260Size0102030405060708090100110120130GFLOPSClangGFort-intrinsicGFortranJuliaLoopVectorizationMKLOpenBLASg++ & Eigen-3iccicpc & Eigen-3ifortifort-intrinsicMethod +0102030405060708090100110120130140150160170180190200210220230240250260Size0102030405060708090100110120130GFLOPSClangGFort-intrinsicGFortranJuliaLoopVectorizationMKLOpenBLASg++ & Eigen-3iccicpc & Eigen-3ifortifort-intrinsicMethod diff --git a/docs/src/assets/bench_AmulBt_v1.svg b/docs/src/assets/bench_AmulBt_v1.svg index 9c9a52cab..05b04560c 100644 --- a/docs/src/assets/bench_AmulBt_v1.svg +++ b/docs/src/assets/bench_AmulBt_v1.svg @@ -1,3 +1,3 @@ -0102030405060708090100110120130140150160170180190200210220230240250260Size0102030405060708090100110120130GFLOPSClangGFort-intrinsicGFortranJuliaLoopVectorizationMKLOpenBLASg++ & Eigen-3iccicpc & Eigen-3ifortifort-intrinsicMethod +0102030405060708090100110120130140150160170180190200210220230240250260Size0102030405060708090100110120130GFLOPSClangGFort-intrinsicGFortranJuliaLoopVectorizationMKLOpenBLASg++ & Eigen-3iccicpc & Eigen-3ifortifort-intrinsicMethod diff --git a/docs/src/assets/bench_Amulvb_v1.svg b/docs/src/assets/bench_Amulvb_v1.svg index 7be924d6c..907ed55fc 100644 --- a/docs/src/assets/bench_Amulvb_v1.svg +++ b/docs/src/assets/bench_Amulvb_v1.svg @@ -1,3 +1,3 @@ -0102030405060708090100110120130140150160170180190200210220230240250260Size05101520253035404550556065707580GFLOPSClangGFort-intrinsicGFortranJuliaLoopVectorizationMKLOpenBLASg++ & Eigen-3iccicpc & Eigen-3ifortifort-intrinsicMethod +0102030405060708090100110120130140150160170180190200210220230240250260Size05101520253035404550556065707580GFLOPSClangGFort-intrinsicGFortranJuliaLoopVectorizationMKLOpenBLASg++ & Eigen-3iccicpc & Eigen-3ifortifort-intrinsicMethod diff --git a/docs/src/assets/bench_AplusAt_v1.svg b/docs/src/assets/bench_AplusAt_v1.svg index b065eca61..97226ef88 100644 --- a/docs/src/assets/bench_AplusAt_v1.svg +++ b/docs/src/assets/bench_AplusAt_v1.svg @@ -1,3 +1,3 @@ -0102030405060708090100110120130140150160170180190200210220230240250260Size0.00.51.01.52.02.53.03.54.04.55.0GFLOPSClangGFortranGFortran-builtinJuliaLoopVectorizationg++ & Eigen-3iccicpc & Eigen-3ifortifort-builtinMethod +0102030405060708090100110120130140150160170180190200210220230240250260Size0.00.51.01.52.02.53.03.54.04.55.0GFLOPSClangGFortranGFortran-builtinJuliaLoopVectorizationg++ & Eigen-3iccicpc & Eigen-3ifortifort-builtinMethod diff --git a/docs/src/assets/bench_AtmulB_v1.svg b/docs/src/assets/bench_AtmulB_v1.svg index 4e753e043..74ba02d81 100644 --- a/docs/src/assets/bench_AtmulB_v1.svg +++ b/docs/src/assets/bench_AtmulB_v1.svg @@ -1,3 +1,3 @@ -0102030405060708090100110120130140150160170180190200210220230240250260Size0102030405060708090100110120GFLOPSClangGFort-intrinsicGFortranJuliaLoopVectorizationMKLOpenBLASg++ & Eigen-3iccicpc & Eigen-3ifortifort-intrinsicMethod +0102030405060708090100110120130140150160170180190200210220230240250260Size0102030405060708090100110120GFLOPSClangGFort-intrinsicGFortranJuliaLoopVectorizationMKLOpenBLASg++ & Eigen-3iccicpc & Eigen-3ifortifort-intrinsicMethod diff --git a/docs/src/assets/bench_AtmulBt_v1.svg b/docs/src/assets/bench_AtmulBt_v1.svg index 7ea478924..dcf929dc7 100644 --- a/docs/src/assets/bench_AtmulBt_v1.svg +++ b/docs/src/assets/bench_AtmulBt_v1.svg @@ -1,3 +1,3 @@ -0102030405060708090100110120130140150160170180190200210220230240250260Size0102030405060708090100110120GFLOPSClangGFort-intrinsicGFortranJuliaLoopVectorizationMKLOpenBLASg++ & Eigen-3iccicpc & Eigen-3ifortifort-intrinsicMethod +0102030405060708090100110120130140150160170180190200210220230240250260Size0102030405060708090100110120GFLOPSClangGFort-intrinsicGFortranJuliaLoopVectorizationMKLOpenBLASg++ & Eigen-3iccicpc & Eigen-3ifortifort-intrinsicMethod diff --git a/docs/src/assets/bench_Atmulvb_v1.svg b/docs/src/assets/bench_Atmulvb_v1.svg index 1945d87c7..b7951e15d 100644 --- a/docs/src/assets/bench_Atmulvb_v1.svg +++ b/docs/src/assets/bench_Atmulvb_v1.svg @@ -1,3 +1,3 @@ -0102030405060708090100110120130140150160170180190200210220230240250260Size02468101214161820222426283032343638404244GFLOPSClangGFort-intrinsicGFortranJuliaLoopVectorizationMKLOpenBLASg++ & Eigen-3iccicpc & Eigen-3ifortifort-intrinsicMethod +0102030405060708090100110120130140150160170180190200210220230240250260Size02468101214161820222426283032343638404244GFLOPSClangGFort-intrinsicGFortranJuliaLoopVectorizationMKLOpenBLASg++ & Eigen-3iccicpc & Eigen-3ifortifort-intrinsicMethod diff --git a/docs/src/assets/bench_aplusBc_v1.svg b/docs/src/assets/bench_aplusBc_v1.svg index 47fe7b3d1..07f8b0778 100644 --- a/docs/src/assets/bench_aplusBc_v1.svg +++ b/docs/src/assets/bench_aplusBc_v1.svg @@ -1,3 +1,3 @@ -0102030405060708090100110120130140150160170180190200210220230240250260Size024681012141618202224262830GFLOPSClangGFortranJuliaLoopVectorizationg++ & Eigen-3iccicpc & Eigen-3ifortMethod +0102030405060708090100110120130140150160170180190200210220230240250260Size024681012141618202224262830GFLOPSClangGFortranJuliaLoopVectorizationg++ & Eigen-3iccicpc & Eigen-3ifortMethod diff --git a/docs/src/assets/bench_dot3_v1.svg b/docs/src/assets/bench_dot3_v1.svg index 5ebaddd87..5483617af 100644 --- a/docs/src/assets/bench_dot3_v1.svg +++ b/docs/src/assets/bench_dot3_v1.svg @@ -1,3 +1,3 @@ -0102030405060708090100110120130140150160170180190200210220230240250260Size05101520253035404550556065707580GFLOPSClangGFortranJuliaLinearAlgebraLoopVectorizationg++ & Eigen-3iccicpc & Eigen-3ifortMethod +0102030405060708090100110120130140150160170180190200210220230240250260Size051015202530354045505560657075808590GFLOPSClangGFortranJuliaLinearAlgebraLoopVectorizationg++ & Eigen-3iccicpc & Eigen-3ifortMethod diff --git a/docs/src/assets/bench_dot_v1.svg b/docs/src/assets/bench_dot_v1.svg index 91b0385da..8f7aa896a 100644 --- a/docs/src/assets/bench_dot_v1.svg +++ b/docs/src/assets/bench_dot_v1.svg @@ -1,3 +1,3 @@ -0102030405060708090100110120130140150160170180190200210220230240250260Size02468101214161820222426283032343638404244GFLOPSClangGFortranJuliaLoopVectorizationOpenBLASg++ & Eigen-3iccicpc & Eigen-3ifortMethod +0102030405060708090100110120130140150160170180190200210220230240250260Size02468101214161820222426283032343638404244GFLOPSClangGFortranJuliaLoopVectorizationOpenBLASg++ & Eigen-3iccicpc & Eigen-3ifortMethod diff --git a/docs/src/assets/bench_exp_v1.svg b/docs/src/assets/bench_exp_v1.svg index c59c0d26a..bbaf972d4 100644 --- a/docs/src/assets/bench_exp_v1.svg +++ b/docs/src/assets/bench_exp_v1.svg @@ -1,3 +1,3 @@ -0102030405060708090100110120130140150160170180190200210220230240250260Size0.00.20.40.60.81.01.21.41.61.82.02.22.42.62.8GFLOPSClangGFortranJuliaLoopVectorizationiccifortMethod +0102030405060708090100110120130140150160170180190200210220230240250260Size0.00.20.40.60.81.01.21.41.61.82.02.22.42.6GFLOPSClangGFortranJuliaLoopVectorizationiccifortMethod diff --git a/docs/src/assets/bench_filter2d_3x3_v1.svg b/docs/src/assets/bench_filter2d_3x3_v1.svg index 5472fbcbf..68a325219 100644 --- a/docs/src/assets/bench_filter2d_3x3_v1.svg +++ b/docs/src/assets/bench_filter2d_3x3_v1.svg @@ -1,3 +1,3 @@ -0102030405060708090100110120130140150160170180190200210220230240250260Size05101520253035404550556065707580GFLOPSClangGFortranJuliaLoopVectorizationiccifortMethod +0102030405060708090100110120130140150160170180190200210220230240250260Size05101520253035404550556065707580GFLOPSClangGFortranJuliaLoopVectorizationiccifortMethod diff --git a/docs/src/assets/bench_filter2d_dynamic_v1.svg b/docs/src/assets/bench_filter2d_dynamic_v1.svg index 26a4929bc..24818e8a5 100644 --- a/docs/src/assets/bench_filter2d_dynamic_v1.svg +++ b/docs/src/assets/bench_filter2d_dynamic_v1.svg @@ -1,3 +1,3 @@ -0102030405060708090100110120130140150160170180190200210220230240250260Size051015202530354045505560GFLOPSClangGFortranJuliaLoopVectorizationiccifortMethod +0102030405060708090100110120130140150160170180190200210220230240250260Size051015202530354045505560GFLOPSClangGFortranJuliaLoopVectorizationiccifortMethod diff --git a/docs/src/assets/bench_filter2d_unrolled_v1.svg b/docs/src/assets/bench_filter2d_unrolled_v1.svg index bb73a1291..c723fe24a 100644 --- a/docs/src/assets/bench_filter2d_unrolled_v1.svg +++ b/docs/src/assets/bench_filter2d_unrolled_v1.svg @@ -1,3 +1,3 @@ -0102030405060708090100110120130140150160170180190200210220230240250260Size05101520253035404550556065707580GFLOPSClangGFortranJuliaLoopVectorizationiccifortMethod +0102030405060708090100110120130140150160170180190200210220230240250260Size051015202530354045505560657075808590GFLOPSClangGFortranJuliaLoopVectorizationiccifortMethod diff --git a/docs/src/assets/bench_logdettriangle_v1.svg b/docs/src/assets/bench_logdettriangle_v1.svg index efd3c5d85..1939f46f1 100644 --- a/docs/src/assets/bench_logdettriangle_v1.svg +++ b/docs/src/assets/bench_logdettriangle_v1.svg @@ -1,3 +1,3 @@ -0102030405060708090100110120130140150160170180190200210220230240250260Size0.00.10.20.30.40.50.60.70.80.91.01.11.21.3GFLOPSClangGFortranJuliaJulia-builtinLoopVectorizationiccifortMethod +0102030405060708090100110120130140150160170180190200210220230240250260Size0.00.10.20.30.40.50.60.70.80.91.01.11.21.3GFLOPSClangGFortranJuliaJulia-builtinLoopVectorizationiccifortMethod diff --git a/docs/src/assets/bench_random_access_v1.svg b/docs/src/assets/bench_random_access_v1.svg index 73296bbfe..f409f3542 100644 --- a/docs/src/assets/bench_random_access_v1.svg +++ b/docs/src/assets/bench_random_access_v1.svg @@ -1,3 +1,3 @@ -0102030405060708090100110120130140150160170180190200210220230240250260Size0.00.20.40.60.81.01.21.41.61.82.02.22.42.62.83.03.23.43.63.84.0GFLOPSClangGFortranJuliaLoopVectorizationiccifortMethod +0102030405060708090100110120130140150160170180190200210220230240250260Size0.00.20.40.60.81.01.21.41.61.82.02.22.42.62.83.03.23.43.63.84.0GFLOPSClangGFortranJuliaLoopVectorizationiccifortMethod diff --git a/docs/src/assets/bench_selfdot_v1.svg b/docs/src/assets/bench_selfdot_v1.svg index 260d0250a..d91f24228 100644 --- a/docs/src/assets/bench_selfdot_v1.svg +++ b/docs/src/assets/bench_selfdot_v1.svg @@ -1,3 +1,3 @@ -0102030405060708090100110120130140150160170180190200210220230240250260Size051015202530354045505560GFLOPSClangGFortranJuliaLoopVectorizationOpenBLASg++ & Eigen-3iccicpc & Eigen-3ifortMethod +0102030405060708090100110120130140150160170180190200210220230240250260Size051015202530354045505560GFLOPSClangGFortranJuliaLoopVectorizationOpenBLASg++ & Eigen-3iccicpc & Eigen-3ifortMethod diff --git a/docs/src/assets/bench_sse_v1.svg b/docs/src/assets/bench_sse_v1.svg index a3b507c89..ae426581c 100644 --- a/docs/src/assets/bench_sse_v1.svg +++ b/docs/src/assets/bench_sse_v1.svg @@ -1,3 +1,3 @@ -0102030405060708090100110120130140150160170180190200210220230240250260Size05101520253035404550556065707580GFLOPSClangGFortranJuliaLoopVectorizationOpenBLASg++ & Eigen-3iccicpc & Eigen-3ifortMethod +0102030405060708090100110120130140150160170180190200210220230240250260Size051015202530354045505560657075808590GFLOPSClangGFortranJuliaLoopVectorizationOpenBLASg++ & Eigen-3iccicpc & Eigen-3ifortMethod diff --git a/src/add_compute.jl b/src/add_compute.jl index 958cec440..6815add63 100644 --- a/src/add_compute.jl +++ b/src/add_compute.jl @@ -240,6 +240,7 @@ function add_compute!( # parent = getop(ls, var, elementbytes) if length(reduceddeps) == 0 insert!(vparents, reduction_ind, parent) + mergesetv!(deps, loopdependencies(parent)) op = Operation(length(operations(ls)), var, elementbytes, instruction(ls,instr), compute, deps, reduceddeps, vparents) pushop!(ls, op, var) else diff --git a/test/miscellaneous.jl b/test/miscellaneous.jl index 05e86ee2b..8c132abc0 100644 --- a/test/miscellaneous.jl +++ b/test/miscellaneous.jl @@ -153,6 +153,20 @@ using Test end end + function setcolumstovectorplus100!(Z::AbstractArray{T}, A) where {T} + for i = axes(A,1), j = axes(Z,2) + acc = zero(T) + acc = acc + A[i] + 100 + Z[i, j] = acc + end + end + function setcolumstovectorplus100avx!(Z::AbstractArray{T}, A) where {T} + @avx for i = axes(A,1), j = axes(Z,2) + acc = zero(T) + acc = acc + A[i] + 100 + Z[i, j] = acc + end + end function mvp(P, basis, coeffs::Vector{T}) where {T} len_c = length(coeffs) @@ -557,6 +571,11 @@ using Test @test dot3v2avx(x, A, y) ≈ d3 @test dot3_avx(x, A, y) ≈ d3 + A2 = similar(A); + setcolumstovectorplus100!(A, x) + setcolumstovectorplus100avx!(A2, x) + @test A == A2 + maxdeg = 20; nbasis = 1_000; dim = 15; r = T == Float32 ? (Int32(1):Int32(maxdeg+1)) : (1:maxdeg+1) basis = rand(r, (dim, nbasis));