diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 359f191..d7bd544 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -10,7 +10,7 @@ jobs: fail-fast: false matrix: version: - - '1.5' + - '1.10' - '1' - 'nightly' os: diff --git a/Project.toml b/Project.toml index 0fc7612..6e18a13 100644 --- a/Project.toml +++ b/Project.toml @@ -7,13 +7,14 @@ version = "0.1.17" BitTwiddlingConvenienceFunctions = "62783981-4cbd-42fc-bca8-16325de8dc4b" IfElse = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173" Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" +Preferences = "21216c6a-2e73-6563-6e65-726566657250" Static = "aedffcd0-7271-4cad-89d0-dc628f76c6d3" [compat] BitTwiddlingConvenienceFunctions = "0.1" IfElse = "0.1" Static = "0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 1" -julia = "1.5" +julia = "1.10" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/docs/src/index.md b/docs/src/index.md index feba82f..1dc53af 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -12,3 +12,9 @@ Documentation for [HostCPUFeatures](https://github.com/JuliaSIMD/HostCPUFeatures ```@autodocs Modules = [HostCPUFeatures] ``` + +## Supported Preferences + + - `cpu_target`: if provided, use this string as your CPU target for feature detection instead of `JULIA_CPU_TARGET` + - `freeze_cpu_target`: if `true`, "freeze" the features detected based on your precompile-time CPU target and do not perform runtime feature detection + - `allow_runtime_invalidation`: if `false`, warn when performing runtime feature detection (instead of invalidating) when CPU features don't match precompile-time diff --git a/src/HostCPUFeatures.jl b/src/HostCPUFeatures.jl index 3e8b5ba..7560a17 100644 --- a/src/HostCPUFeatures.jl +++ b/src/HostCPUFeatures.jl @@ -7,12 +7,32 @@ end using Libdl, Static using Static: Zero, One, lt, gt using IfElse: ifelse +using Preferences using BitTwiddlingConvenienceFunctions: prevpow2, nextpow2, intlog2 export has_feature, fma_fast, pick_vector_width, pick_vector_width_shift, register_count, register_size, simd_integer_register_size +_cpu_target = if @has_preference("cpu_target") + @load_preference("cpu_target") +else + Base.unsafe_string(Base.JLOptions().cpu_target) +end + +const build_cpu_target = if occursin("native", _cpu_target) + "native" # 'native' takes priority if provided +else + split(_cpu_target, ";")[1] +end + +# If true, this will opt-in to "freeze" an under-approximation of the CPU features at precompile- +# time based on the CPU target. +# +# This is only done by default if "native" was excluded from the CPU target (or via a preference). +const freeze_cpu_target = + @load_preference("freeze_cpu_target", false) || build_cpu_target != "native" + function get_cpu_name()::String if isdefined(Sys, :CPU_NAME) Sys.CPU_NAME @@ -37,19 +57,43 @@ unwrap(::StaticSymbol{S}) where {S} = S @noinline function redefine() @debug "Defining CPU name." - define_cpu_name() + redefine_cpu_name() reset_features!() reset_extra_features!() end const BASELINE_CPU_NAME = get_cpu_name() +const allow_eval = @load_preference("allow_runtime_invalidation", false) + +function make_generic(target) + target == "native" && return false + if Sys.ARCH === :x86_64 || Sys.ARCH === :i686 + make_generic_x86(target) + return true + else + return false + end +end + +make_generic(build_cpu_target) + function __init__() ccall(:jl_generating_output, Cint, ()) == 1 && return - if Sys.ARCH === :x86_64 || Sys.ARCH === :i686 - target = Base.unsafe_string(Base.JLOptions().cpu_target) - occursin("native", target) || return make_generic(target) + freeze_cpu_target && return # CPU info fixed at precompile-time + + runtime_target = Base.unsafe_string(Base.JLOptions().cpu_target) + if !occursin("native", runtime_target) + # The CPU target included "native" at pre-compile time, but at runtime it did not! + # + # Fixing this discepancy will invalidate the whole world (so it should probably + # throw an error), but we do it anyway for backwards-compatibility. + if make_generic(runtime_target) + return nothing + end + end + if BASELINE_CPU_NAME != Sys.CPU_NAME::String + redefine() end - BASELINE_CPU_NAME == Sys.CPU_NAME::String || redefine() return nothing end diff --git a/src/cpu_info.jl b/src/cpu_info.jl index 52b49fe..cac0552 100644 --- a/src/cpu_info.jl +++ b/src/cpu_info.jl @@ -39,17 +39,22 @@ function set_features!() end Libc.free(features_cstring) end -set_features!() - +if build_cpu_target == "native" + set_features!() +end function reset_features!() features, features_cstring = feature_string() for ext ∈ features feature, has = process_feature(ext) if _has_feature(feature) ≠ has - @debug "Defining $(has ? "presence" : "absense") of feature $feature." - set_feature(feature, has) + if allow_eval + @debug "Defining $(has ? "presence" : "absense") of feature $feature." + set_feature(feature, has) + else + @warn "Runtime invalidation was disabled, but the CPU info is out-of-date.\nWill continue with incorrect CPU feature flag: $ext." + end end end Libc.free(features_cstring) @@ -58,8 +63,16 @@ end register_size(::Type{T}) where {T} = register_size() register_size(::Type{T}) where {T<:Union{Signed,Unsigned}} = simd_integer_register_size() -function define_cpu_name() +function redefine_cpu_name() cpu = QuoteNode(Symbol(get_cpu_name())) - @eval cpu_name() = Val{$cpu}() + if allow_eval + @eval cpu_name() = Val{$cpu}() + else + @warn "Runtime invalidation was disabled, but the CPU info is out-of-date.\nWill continue with incorrect CPU name (from build time)." + end +end + +let _cpu = QuoteNode(Symbol(build_cpu_target == "native" ? + get_cpu_name() : build_cpu_target)) + @eval cpu_name() = Val{$_cpu}() end -define_cpu_name() diff --git a/src/cpu_info_aarch64.jl b/src/cpu_info_aarch64.jl index 605d54d..e2a2057 100644 --- a/src/cpu_info_aarch64.jl +++ b/src/cpu_info_aarch64.jl @@ -28,7 +28,7 @@ function _set_sve_vector_width!(bytes = _dynamic_register_size()) end -if _has_aarch64_sve()# && !(Bool(has_feature(Val(:aarch64_sve)))) +if build_cpu_target == "native" && _has_aarch64_sve()# && !(Bool(has_feature(Val(:aarch64_sve)))) has_feature(::Val{:aarch64_sve_cpuid}) = True() _set_sve_vector_width!() else @@ -39,10 +39,20 @@ end function reset_extra_features!() drs = _dynamic_register_size() - register_size() ≠ drs && _set_sve_vector_width!(drs) + if register_size() ≠ drs + if allow_eval + _set_sve_vector_width!(drs) + else + @warn "Runtime invalidation was disabled, but the CPU info is out-of-date.\nWill continue with incorrect CPU register size." + end + end hassve = _has_aarch64_sve() if hassve ≠ has_feature(Val(:aarch64_sve_cpuid)) - @eval has_feature(::Val{:aarch64_sve_cpuid}) = $(Expr(:call, hassve ? :True : :False)) + if allow_eval + @eval has_feature(::Val{:aarch64_sve_cpuid}) = $(Expr(:call, hassve ? :True : :False)) + else + @warn "Runtime invalidation was disabled, but the CPU info is out-of-date.\nWill continue with incorrect CPU feature flag: :aarch64_sve_cpuid." + end end end diff --git a/src/cpu_info_x86.jl b/src/cpu_info_x86.jl index b0348c0..0cf1f50 100644 --- a/src/cpu_info_x86.jl +++ b/src/cpu_info_x86.jl @@ -32,18 +32,26 @@ fast_int64_to_double() = has_feature(Val(:x86_64_avx512dq)) fast_half() = False() -@noinline function setfeaturefalse(s) +@inline function setfeaturefalse(s) if has_feature(Val(s)) === True() - @eval has_feature(::Val{$(QuoteNode(s))}) = False() + if allow_eval + @eval has_feature(::Val{$(QuoteNode(s))}) = False() + else + @warn "Runtime invalidation was disabled, but the CPU info is out-of-date.\nWill continue with incorrect CPU feature flag: $s." + end end end -@noinline function setfeaturetrue(s) +@inline function setfeaturetrue(s) if has_feature(Val(s)) === False() - @eval has_feature(::Val{$(QuoteNode(s))}) = True() + if allow_eval + @eval has_feature(::Val{$(QuoteNode(s))}) = True() + else + @warn "Runtime invalidation was disabled, but the CPU info is out-of-date.\nWill continue with incorrect CPU feature flag: $s." + end end end -function make_generic(target) +function make_generic_x86(target) if occursin("tigerlake", target) || occursin("znver4", target) || occursin("sapphirerapids", target) # most feature-complete architectures we use setfeaturetrue(:x86_64_avx512ifma)