Skip to content

Commit ac31711

Browse files
authored
Purge num_threads (#454)
1 parent 35f8310 commit ac31711

File tree

8 files changed

+117
-105
lines changed

8 files changed

+117
-105
lines changed

Project.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.12.143"
4+
version = "0.12.144"
55

66
[deps]
77
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
@@ -34,7 +34,7 @@ ArrayInterface = "6"
3434
ArrayInterfaceCore = "0.1.5"
3535
ArrayInterfaceOffsetArrays = "0.1.2"
3636
ArrayInterfaceStaticArrays = "0.1.2"
37-
CPUSummary = "0.1.3 - 0.1.8, 0.1.11"
37+
CPUSummary = "0.1.3 - 0.1.8, 0.1.11, 0.2.1"
3838
ChainRulesCore = "1"
3939
CloseOpenIntervals = "0.1.10"
4040
DocStringExtensions = "0.8, 0.9"
@@ -43,7 +43,7 @@ HostCPUFeatures = "0.1.10"
4343
IfElse = "0.1"
4444
LayoutPointers = "0.1.11"
4545
OffsetArrays = "1.4.1"
46-
PolyesterWeave = "0.1.10"
46+
PolyesterWeave = "0.1.10, 0.2"
4747
SIMDDualNumbers = "0.1"
4848
SIMDTypes = "0.1"
4949
SLEEFPirates = "0.6.23"

src/LoopVectorization.jl

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,13 @@ using VectorizationBase:
105105
maybestaticsize#,zero_mask
106106

107107
using HostCPUFeatures:
108-
pick_vector_width, register_size, register_count, has_opmask_registers, unwrap, get_cpu_name
109-
using CPUSummary: num_threads, num_cores, cache_linesize, cache_size
108+
pick_vector_width,
109+
register_size,
110+
register_count,
111+
has_opmask_registers,
112+
unwrap,
113+
get_cpu_name
114+
using CPUSummary: num_cores, cache_linesize, cache_size
110115

111116

112117
using IfElse: ifelse

src/broadcast.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -534,7 +534,7 @@ end
534534
::Val{UNROLL},
535535
::Val{dontbc},
536536
) where {T<:NativeTypes,N,BC<:Union{Broadcasted,Product},Mod,UNROLL,dontbc}
537-
2 + 1
537+
# 2 + 1
538538
# we have an N dimensional loop.
539539
# need to construct the LoopSet
540540
ls = LoopSet(Mod)

src/codegen/lower_threads.jl

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -64,14 +64,6 @@ struct StaticType{T} end
6464
)
6565
end
6666

67-
# function approx_cbrt(x)
68-
# s = significand(x)
69-
# e = exponent(x)
70-
71-
# # 40 + 0.00020833333333333335*(x-64000) -2.1701388888888896e-9*(x-64000)^2*0.5 + 5.6514033564814844e-14 * (x-64000)^3/6
72-
# end
73-
lv_max_num_threads() = ifelse(gt(num_threads(), num_cores()), num_cores(), num_threads())
74-
7567
@generated function calc_factors(::StaticInt{nc}) where {nc}
7668
t = Expr(:tuple)
7769
for i nc:-1:1
@@ -148,10 +140,10 @@ end
148140

149141
# if a threaded loop is vectorized, call
150142
@inline function choose_num_blocks(M, ::StaticInt{U}, nt) where {U}
151-
_choose_num_blocks(M % UInt, StaticInt{U}(), nt, lv_max_num_threads())
143+
_choose_num_blocks(M % UInt, StaticInt{U}(), nt, num_cores())
152144
end
153145
# otherwise, call
154-
@inline choose_num_blocks(nt, ::StaticInt{NC} = lv_max_num_threads()) where {NC} =
146+
@inline choose_num_blocks(nt, ::StaticInt{NC} = num_cores()) where {NC} =
155147
@inbounds choose_num_block_table(StaticInt{NC}())[nt]
156148

157149
scale_cost(c) = @fastmath c * (Sys.ARCH === :x86_64 ? 0.0225 : 0.005625)
@@ -168,12 +160,15 @@ end
168160
NT::UInt,
169161
x::Base.BitInteger,
170162
) where {T<:Union{Float32,Float64}}
171-
min(
172-
Base.fptoui(
173-
UInt,
174-
Base.ceil_llvm(Base.mul_float_fast(C, Base.sqrt_llvm_fast(Base.uitofp(T, x)))),
163+
max(
164+
min(
165+
Base.fptoui(
166+
UInt,
167+
Base.ceil_llvm(Base.mul_float_fast(C, Base.sqrt_llvm_fast(Base.uitofp(T, x)))),
168+
),
169+
NT,
175170
),
176-
NT,
171+
one(UInt),
177172
)
178173
end
179174
function push_loop_length_expr!(q::Expr, ls::LoopSet)
@@ -431,9 +426,12 @@ function thread_one_loops_expr(
431426
if all(isstaticloop, ls.loops)
432427
_num_threads = _choose_num_threads(c, ntmax, Int64(looplen))::UInt
433428
_num_threads > 1 || return avx_body(ls, UNROLL)
434-
choose_nthread = Expr(:(=), Symbol("#nthreads#"), _num_threads)
429+
ntcallexpr = Expr(:call, %, Expr(:call, Threads.nthreads), UInt)
430+
choose_nthread =
431+
Expr(:(=), Symbol("#nthreads#"), Expr(:call, min, ntcallexpr, _num_threads))
435432
else
436-
choose_nthread = :(_choose_num_threads($(Float32(c)), $ntmax))
433+
choose_nthread =
434+
:(_choose_num_threads($(Float32(c)), min(Threads.nthreads() % UInt, $ntmax)))
437435
push_loop_length_expr!(choose_nthread, ls)
438436
choose_nthread = Expr(:(=), Symbol("#nthreads#"), choose_nthread)
439437
end
@@ -623,9 +621,12 @@ function thread_two_loops_expr(
623621
if all(isstaticloop, ls.loops)
624622
_num_threads = _choose_num_threads(c, ntmax, Int64(looplen))::UInt
625623
_num_threads > 1 || return avx_body(ls, UNROLL)
626-
choose_nthread = Expr(:(=), Symbol("#nthreads#"), _num_threads)
624+
ntcallexpr = Expr(:call, %, Expr(:call, Threads.nthreads), UInt)
625+
choose_nthread =
626+
Expr(:(=), Symbol("#nthreads#"), Expr(:call, min, ntcallexpr, _num_threads))
627627
else
628-
choose_nthread = :(_choose_num_threads($(Float32(c)), $ntmax))
628+
choose_nthread =
629+
:(_choose_num_threads($(Float32(c)), min(Threads.nthreads() % UInt, $ntmax)))
629630
push_loop_length_expr!(choose_nthread, ls)
630631
choose_nthread = Expr(:(=), Symbol("#nthreads#"), choose_nthread)
631632
end

src/condense_loopset.jl

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -374,8 +374,7 @@ val(x) = Expr(:call, Expr(:curly, :Val, x))
374374
ri = argmin(R)
375375
quote
376376
$(Expr(:meta, :inline))
377-
p, li =
378-
VectorizationBase.tdot(x, (vsub_nsw(getfield(i, 1), one($I)),), strides(x))
377+
p, li = VectorizationBase.tdot(x, (vsub_nsw(getfield(i, 1), one($I)),), strides(x))
379378
ptr = gep(p, li)
380379
si = ArrayInterface.StrideIndex{1,$(R[ri],),$(C === 1 ? 1 : 0)}(
381380
(getfield(strides(x), $ri),),
@@ -572,7 +571,7 @@ end
572571
StaticInt{W}(),
573572
register_size(),
574573
available_registers(),
575-
lv_max_num_threads(),
574+
num_cores(), #FIXME
576575
cache_linesize(),
577576
)
578577
end
@@ -814,11 +813,12 @@ function generate_call_types(
814813
add_external_functions!(extra_args, ls) # extract_external_functions!
815814
add_outerreduct_types!(extra_args, ls) # extract_outerreduct_types!
816815
argcestimate = length(extra_args.args) - 1
817-
for ref = ls.refs_aliasing_syms
816+
for ref in ls.refs_aliasing_syms
818817
argcestimate += length(ref.loopedindex)
819818
end
820819
manyarg = !debug && (argcestimate > 16)
821-
func = debug ? lv(:_turbo_loopset_debug) : (manyarg ? lv(:_turbo_manyarg!) : lv(:_turbo_!))
820+
func =
821+
debug ? lv(:_turbo_loopset_debug) : (manyarg ? lv(:_turbo_manyarg!) : lv(:_turbo_!))
822822
q = Expr(
823823
:call,
824824
func,
@@ -835,18 +835,12 @@ function generate_call_types(
835835
vargsym = gensym(:vargsym)
836836
push!(
837837
q.args,
838-
Expr(:call, GlobalRef(Base, :Val), Expr(:call, GlobalRef(Base, :typeof), vargsym))
838+
Expr(:call, GlobalRef(Base, :Val), Expr(:call, GlobalRef(Base, :typeof), vargsym)),
839839
)
840840
if manyarg
841-
push!(
842-
q.args,
843-
Expr(:call, lv(:flatten_to_tuple), vargsym),
844-
)
841+
push!(q.args, Expr(:call, lv(:flatten_to_tuple), vargsym))
845842
else
846-
push!(
847-
q.args,
848-
Expr(:(...), Expr(:call, lv(:flatten_to_tuple), vargsym)),
849-
)
843+
push!(q.args, Expr(:(...), Expr(:call, lv(:flatten_to_tuple), vargsym)))
850844
end
851845
Expr(:block, Expr(:(=), vargsym, Expr(:tuple, lbarg, extra_args)))
852846
end
@@ -943,6 +937,10 @@ for f ∈ (convert, reinterpret, trunc, unsafe_trunc, round, ceil, floor)
943937
@eval can_turbo(::typeof($f), ::Val{2}) = true
944938
end
945939

940+
# @inline function _can_turbo(f::F, t::Vararg{Any,K}) where {F,K}
941+
# Base.promote_op(f, t...) !== Union{}
942+
# end
943+
946944
"""
947945
check_turbo_safe(ls::LoopSet)
948946

test/.JuliaFormatter.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
indent = 2
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
indent = 2

0 commit comments

Comments
 (0)