diff --git a/gen/rocrand/generator.jl b/gen/rocrand/generator.jl new file mode 100644 index 000000000..29182660d --- /dev/null +++ b/gen/rocrand/generator.jl @@ -0,0 +1,76 @@ +using Clang.Generators +using JuliaFormatter + +include_dir = normpath(joinpath(ENV["ROCM_PATH"], "include")) +rocrand_dir = joinpath(include_dir, "rocrand") +options = load_options("rocrand/rocrand-generator.toml") + +args = get_default_args() +push!(args, "-I$include_dir") + +rocrand_h = read(joinpath(rocrand_dir, "rocrand.h"), String) +open("./rocrand.h", "w") do io + println(io, """ + #include + + typedef void* hipStream_t; + typedef struct { unsigned int x, y, z, w; } uint4; + """) + print(io, rocrand_h) +end +headers = [ + "./rocrand.h" +] + +ctx = create_context(headers, args, options) + +# build without printing so we can do custom rewriting +build!(ctx, BUILDSTAGE_NO_PRINTING) + +# custom rewriter +function rewrite!(e::Expr) + if e.head === :const + @assert Meta.isexpr(e.args[1], :(=)) + rhs = e.args[1].args[2] + if Meta.isexpr(rhs, :call) + if rhs.args[1] == :(*) && rhs.args[3] == :f + e.args[1].args[2] = :(Float32($(rhs.args[2]))) + elseif rhs.args[1] == :(Cuint) + e.args[1].args[2] = :($(rhs.args[2]) % Cuint) + end + end + return e + end + (e.head === :function && Meta.isexpr(e.args[1], :call)) || return e + f = e.args[1].args[1] + if !(f isa Symbol) + @assert f in (:(Base.getproperty), :(Base.setproperty!), :(Base.propertynames)) + return e + end + stmts = e.args[2].args + map!(stmts, stmts) do ex + Meta.isexpr(ex, :macrocall) || return ex + ex.args[1] === Symbol("@ccall") || return ex + # TODO: should this be `@gcsafe_ccall`? + # ex.args[1] = Symbol("@gcsafe_ccall") + Expr(:macrocall, Symbol("@check"), nothing, ex) + end + pushfirst!(stmts, :(AMDGPU.prepare_state())) + return e +end + +function rewrite!(dag::ExprDAG) + for node in get_nodes(dag) + for expr in get_exprs(node) + rewrite!(expr) + end + end +end + +rewrite!(ctx.dag) + +# print +build!(ctx, BUILDSTAGE_PRINTING_ONLY) + +path = options["general"]["output_file_path"] +format_file(path, YASStyle()) diff --git a/gen/rocrand/rocrand-generator.toml b/gen/rocrand/rocrand-generator.toml new file mode 100644 index 000000000..a1d65fd3a --- /dev/null +++ b/gen/rocrand/rocrand-generator.toml @@ -0,0 +1,15 @@ +[general] +library_name = "librocrand" +output_file_path = "../src/rand/librocrand.jl" +export_symbol_prefixes = [] +print_using_CEnum = false +output_ignorelist = [ + "(__)?hip.*", + "(__)?HIP.*", + "rocrand_status", + "half", + "SKEIN_KS_PARITY64", +] + +[codegen] +use_ccall_macro = true diff --git a/src/rand/error.jl b/src/rand/error.jl index ba9a9197f..9d41e253a 100644 --- a/src/rand/error.jl +++ b/src/rand/error.jl @@ -1,6 +1,20 @@ export ROCRANDError import .AMDGPU: @check, check +using CEnum: @cenum + +@cenum rocrand_status::UInt32 begin + ROCRAND_STATUS_SUCCESS = 0 + ROCRAND_STATUS_VERSION_MISMATCH = 100 + ROCRAND_STATUS_NOT_CREATED = 101 + ROCRAND_STATUS_ALLOCATION_FAILED = 102 + ROCRAND_STATUS_TYPE_ERROR = 103 + ROCRAND_STATUS_OUT_OF_RANGE = 104 + ROCRAND_STATUS_LENGTH_NOT_MULTIPLE = 105 + ROCRAND_STATUS_DOUBLE_PRECISION_REQUIRED = 106 + ROCRAND_STATUS_LAUNCH_FAILURE = 107 + ROCRAND_STATUS_INTERNAL_ERROR = 108 +end struct ROCRANDError <: Exception code::rocrand_status diff --git a/src/rand/librocrand.jl b/src/rand/librocrand.jl index c18fb17d5..6b275a6ec 100644 --- a/src/rand/librocrand.jl +++ b/src/rand/librocrand.jl @@ -1,119 +1,277 @@ +struct uint4 + x::Cuint + y::Cuint + z::Cuint + w::Cuint +end + +struct rocrand_discrete_distribution_st + size::Cuint + offset::Cuint + alias::Ptr{Cuint} + probability::Ptr{Cdouble} + cdf::Ptr{Cdouble} +end + +const rocrand_discrete_distribution = Ptr{rocrand_discrete_distribution_st} + +mutable struct rocrand_generator_base_type end + +const rocrand_generator = Ptr{rocrand_generator_base_type} + +@cenum rocrand_rng_type::UInt32 begin + ROCRAND_RNG_PSEUDO_DEFAULT = 400 + ROCRAND_RNG_PSEUDO_XORWOW = 401 + ROCRAND_RNG_PSEUDO_MRG32K3A = 402 + ROCRAND_RNG_PSEUDO_MTGP32 = 403 + ROCRAND_RNG_PSEUDO_PHILOX4_32_10 = 404 + ROCRAND_RNG_PSEUDO_MRG31K3P = 405 + ROCRAND_RNG_PSEUDO_LFSR113 = 406 + ROCRAND_RNG_PSEUDO_MT19937 = 407 + ROCRAND_RNG_PSEUDO_THREEFRY2_32_20 = 408 + ROCRAND_RNG_PSEUDO_THREEFRY2_64_20 = 409 + ROCRAND_RNG_PSEUDO_THREEFRY4_32_20 = 410 + ROCRAND_RNG_PSEUDO_THREEFRY4_64_20 = 411 + ROCRAND_RNG_QUASI_DEFAULT = 500 + ROCRAND_RNG_QUASI_SOBOL32 = 501 + ROCRAND_RNG_QUASI_SCRAMBLED_SOBOL32 = 502 + ROCRAND_RNG_QUASI_SOBOL64 = 504 + ROCRAND_RNG_QUASI_SCRAMBLED_SOBOL64 = 505 +end + +@cenum rocrand_ordering::UInt32 begin + ROCRAND_ORDERING_PSEUDO_BEST = 100 + ROCRAND_ORDERING_PSEUDO_DEFAULT = 101 + ROCRAND_ORDERING_PSEUDO_SEEDED = 102 + ROCRAND_ORDERING_PSEUDO_LEGACY = 103 + ROCRAND_ORDERING_PSEUDO_DYNAMIC = 104 + ROCRAND_ORDERING_QUASI_DEFAULT = 201 +end + +@cenum rocrand_direction_vector_set::UInt32 begin + ROCRAND_DIRECTION_VECTORS_32_JOEKUO6 = 101 + ROCRAND_SCRAMBLED_DIRECTION_VECTORS_32_JOEKUO6 = 102 + ROCRAND_DIRECTION_VECTORS_64_JOEKUO6 = 103 + ROCRAND_SCRAMBLED_DIRECTION_VECTORS_64_JOEKUO6 = 104 +end + function rocrand_create_generator(generator, rng_type) AMDGPU.prepare_state() - @check ccall((:rocrand_create_generator, librocrand), rocrand_status, (Ptr{rocrand_generator}, rocrand_rng_type), generator, rng_type) + @check @ccall(librocrand.rocrand_create_generator(generator::Ptr{rocrand_generator}, + rng_type::rocrand_rng_type)::rocrand_status) +end + +function rocrand_create_generator_host(generator, rng_type) + AMDGPU.prepare_state() + @check @ccall(librocrand.rocrand_create_generator_host(generator::Ptr{rocrand_generator}, + rng_type::rocrand_rng_type)::rocrand_status) +end + +function rocrand_create_generator_host_blocking(generator, rng_type) + AMDGPU.prepare_state() + @check @ccall(librocrand.rocrand_create_generator_host_blocking(generator::Ptr{rocrand_generator}, + rng_type::rocrand_rng_type)::rocrand_status) end function rocrand_destroy_generator(generator) AMDGPU.prepare_state() - @check ccall((:rocrand_destroy_generator, librocrand), rocrand_status, (rocrand_generator,), generator) + @check @ccall(librocrand.rocrand_destroy_generator(generator::rocrand_generator)::rocrand_status) end function rocrand_generate(generator, output_data, n) AMDGPU.prepare_state() - @check ccall((:rocrand_generate, librocrand), rocrand_status, (rocrand_generator, Ptr{UInt32}, Cint), generator, output_data, n) + @check @ccall(librocrand.rocrand_generate(generator::rocrand_generator, + output_data::Ptr{Cuint}, + n::Csize_t)::rocrand_status) +end + +function rocrand_generate_long_long(generator, output_data, n) + AMDGPU.prepare_state() + @check @ccall(librocrand.rocrand_generate_long_long(generator::rocrand_generator, + output_data::Ptr{Culonglong}, + n::Csize_t)::rocrand_status) end function rocrand_generate_char(generator, output_data, n) AMDGPU.prepare_state() - @check ccall((:rocrand_generate_char, librocrand), rocrand_status, (rocrand_generator, Ptr{Cuchar}, Cint), generator, output_data, n) + @check @ccall(librocrand.rocrand_generate_char(generator::rocrand_generator, + output_data::Ptr{Cuchar}, + n::Csize_t)::rocrand_status) end function rocrand_generate_short(generator, output_data, n) AMDGPU.prepare_state() - @check ccall((:rocrand_generate_short, librocrand), rocrand_status, (rocrand_generator, Ptr{UInt16}, Cint), generator, output_data, n) + @check @ccall(librocrand.rocrand_generate_short(generator::rocrand_generator, + output_data::Ptr{Cushort}, + n::Csize_t)::rocrand_status) end function rocrand_generate_uniform(generator, output_data, n) AMDGPU.prepare_state() - @check ccall((:rocrand_generate_uniform, librocrand), rocrand_status, (rocrand_generator, Ptr{Cfloat}, Cint), generator, output_data, n) + @check @ccall(librocrand.rocrand_generate_uniform(generator::rocrand_generator, + output_data::Ptr{Cfloat}, + n::Csize_t)::rocrand_status) end function rocrand_generate_uniform_double(generator, output_data, n) AMDGPU.prepare_state() - @check ccall((:rocrand_generate_uniform_double, librocrand), rocrand_status, (rocrand_generator, Ptr{Cdouble}, Cint), generator, output_data, n) + @check @ccall(librocrand.rocrand_generate_uniform_double(generator::rocrand_generator, + output_data::Ptr{Cdouble}, + n::Csize_t)::rocrand_status) end function rocrand_generate_uniform_half(generator, output_data, n) AMDGPU.prepare_state() - @check ccall((:rocrand_generate_uniform_half, librocrand), rocrand_status, (rocrand_generator, Ptr{half}, Cint), generator, output_data, n) + @check @ccall(librocrand.rocrand_generate_uniform_half(generator::rocrand_generator, + output_data::Ptr{half}, + n::Csize_t)::rocrand_status) end function rocrand_generate_normal(generator, output_data, n, mean, stddev) AMDGPU.prepare_state() - @check ccall((:rocrand_generate_normal, librocrand), rocrand_status, (rocrand_generator, Ptr{Cfloat}, Cint, Cfloat, Cfloat), generator, output_data, n, mean, stddev) + @check @ccall(librocrand.rocrand_generate_normal(generator::rocrand_generator, + output_data::Ptr{Cfloat}, n::Csize_t, + mean::Cfloat, + stddev::Cfloat)::rocrand_status) end function rocrand_generate_normal_double(generator, output_data, n, mean, stddev) AMDGPU.prepare_state() - @check ccall((:rocrand_generate_normal_double, librocrand), rocrand_status, (rocrand_generator, Ptr{Cdouble}, Cint, Cdouble, Cdouble), generator, output_data, n, mean, stddev) + @check @ccall(librocrand.rocrand_generate_normal_double(generator::rocrand_generator, + output_data::Ptr{Cdouble}, + n::Csize_t, mean::Cdouble, + stddev::Cdouble)::rocrand_status) end function rocrand_generate_normal_half(generator, output_data, n, mean, stddev) AMDGPU.prepare_state() - @check ccall((:rocrand_generate_normal_half, librocrand), rocrand_status, (rocrand_generator, Ptr{half}, Cint, half, half), generator, output_data, n, mean, stddev) + @check @ccall(librocrand.rocrand_generate_normal_half(generator::rocrand_generator, + output_data::Ptr{half}, + n::Csize_t, mean::half, + stddev::half)::rocrand_status) end function rocrand_generate_log_normal(generator, output_data, n, mean, stddev) AMDGPU.prepare_state() - @check ccall((:rocrand_generate_log_normal, librocrand), rocrand_status, (rocrand_generator, Ptr{Cfloat}, Cint, Cfloat, Cfloat), generator, output_data, n, mean, stddev) + @check @ccall(librocrand.rocrand_generate_log_normal(generator::rocrand_generator, + output_data::Ptr{Cfloat}, + n::Csize_t, mean::Cfloat, + stddev::Cfloat)::rocrand_status) end function rocrand_generate_log_normal_double(generator, output_data, n, mean, stddev) AMDGPU.prepare_state() - @check ccall((:rocrand_generate_log_normal_double, librocrand), rocrand_status, (rocrand_generator, Ptr{Cdouble}, Cint, Cdouble, Cdouble), generator, output_data, n, mean, stddev) + @check @ccall(librocrand.rocrand_generate_log_normal_double(generator::rocrand_generator, + output_data::Ptr{Cdouble}, + n::Csize_t, mean::Cdouble, + stddev::Cdouble)::rocrand_status) end function rocrand_generate_log_normal_half(generator, output_data, n, mean, stddev) AMDGPU.prepare_state() - @check ccall((:rocrand_generate_log_normal_half, librocrand), rocrand_status, (rocrand_generator, Ptr{half}, Cint, half, half), generator, output_data, n, mean, stddev) + @check @ccall(librocrand.rocrand_generate_log_normal_half(generator::rocrand_generator, + output_data::Ptr{half}, + n::Csize_t, mean::half, + stddev::half)::rocrand_status) end function rocrand_generate_poisson(generator, output_data, n, lambda) AMDGPU.prepare_state() - @check ccall((:rocrand_generate_poisson, librocrand), rocrand_status, (rocrand_generator, Ptr{UInt32}, Cint, Cdouble), generator, output_data, n, lambda) + @check @ccall(librocrand.rocrand_generate_poisson(generator::rocrand_generator, + output_data::Ptr{Cuint}, n::Csize_t, + lambda::Cdouble)::rocrand_status) end function rocrand_initialize_generator(generator) AMDGPU.prepare_state() - @check ccall((:rocrand_initialize_generator, librocrand), rocrand_status, (rocrand_generator,), generator) + @check @ccall(librocrand.rocrand_initialize_generator(generator::rocrand_generator)::rocrand_status) end function rocrand_set_stream(generator, stream) AMDGPU.prepare_state() - @check ccall((:rocrand_set_stream, librocrand), rocrand_status, (rocrand_generator, hipStream_t), generator, stream) + @check @ccall(librocrand.rocrand_set_stream(generator::rocrand_generator, + stream::hipStream_t)::rocrand_status) end function rocrand_set_seed(generator, seed) AMDGPU.prepare_state() - @check ccall((:rocrand_set_seed, librocrand), rocrand_status, (rocrand_generator, Culonglong), generator, seed) + @check @ccall(librocrand.rocrand_set_seed(generator::rocrand_generator, + seed::Culonglong)::rocrand_status) +end + +function rocrand_set_seed_uint4(generator, seed) + AMDGPU.prepare_state() + @check @ccall(librocrand.rocrand_set_seed_uint4(generator::rocrand_generator, + seed::uint4)::rocrand_status) end function rocrand_set_offset(generator, offset) AMDGPU.prepare_state() - @check ccall((:rocrand_set_offset, librocrand), rocrand_status, (rocrand_generator, Culonglong), generator, offset) + @check @ccall(librocrand.rocrand_set_offset(generator::rocrand_generator, + offset::Culonglong)::rocrand_status) +end + +function rocrand_set_ordering(generator, order) + AMDGPU.prepare_state() + @check @ccall(librocrand.rocrand_set_ordering(generator::rocrand_generator, + order::rocrand_ordering)::rocrand_status) end function rocrand_set_quasi_random_generator_dimensions(generator, dimensions) AMDGPU.prepare_state() - @check ccall((:rocrand_set_quasi_random_generator_dimensions, librocrand), rocrand_status, (rocrand_generator, UInt32), generator, dimensions) + @check @ccall(librocrand.rocrand_set_quasi_random_generator_dimensions(generator::rocrand_generator, + dimensions::Cuint)::rocrand_status) end function rocrand_get_version(version) AMDGPU.prepare_state() - @check ccall((:rocrand_get_version, librocrand), rocrand_status, (Ptr{Cint},), version) + @check @ccall(librocrand.rocrand_get_version(version::Ptr{Cint})::rocrand_status) end function rocrand_create_poisson_distribution(lambda, discrete_distribution) AMDGPU.prepare_state() - @check ccall((:rocrand_create_poisson_distribution, librocrand), rocrand_status, (Cdouble, Ptr{rocrand_discrete_distribution}), lambda, discrete_distribution) + @check @ccall(librocrand.rocrand_create_poisson_distribution(lambda::Cdouble, + discrete_distribution::Ptr{rocrand_discrete_distribution})::rocrand_status) end -function rocrand_create_discrete_distribution(probabilities, size, offset, discrete_distribution) AMDGPU.prepare_state() +function rocrand_create_discrete_distribution(probabilities, size, offset, + discrete_distribution) AMDGPU.prepare_state() - @check ccall((:rocrand_create_discrete_distribution, librocrand), rocrand_status, (Ptr{Cdouble}, UInt32, UInt32, Ptr{rocrand_discrete_distribution}), probabilities, size, offset, discrete_distribution) + @check @ccall(librocrand.rocrand_create_discrete_distribution(probabilities::Ptr{Cdouble}, + size::Cuint, + offset::Cuint, + discrete_distribution::Ptr{rocrand_discrete_distribution})::rocrand_status) end function rocrand_destroy_discrete_distribution(discrete_distribution) AMDGPU.prepare_state() - @check ccall((:rocrand_destroy_discrete_distribution, librocrand), rocrand_status, (rocrand_discrete_distribution,), discrete_distribution) + @check @ccall(librocrand.rocrand_destroy_discrete_distribution(discrete_distribution::rocrand_discrete_distribution)::rocrand_status) end + +function rocrand_get_direction_vectors32(vectors, set) + AMDGPU.prepare_state() + @check @ccall(librocrand.rocrand_get_direction_vectors32(vectors::Ptr{Ptr{Cuint}}, + set::rocrand_direction_vector_set)::rocrand_status) +end + +function rocrand_get_direction_vectors64(vectors, set) + AMDGPU.prepare_state() + @check @ccall(librocrand.rocrand_get_direction_vectors64(vectors::Ptr{Ptr{Culonglong}}, + set::rocrand_direction_vector_set)::rocrand_status) +end + +function rocrand_get_scramble_constants32(constants) + AMDGPU.prepare_state() + @check @ccall(librocrand.rocrand_get_scramble_constants32(constants::Ptr{Ptr{Cuint}})::rocrand_status) +end + +function rocrand_get_scramble_constants64(constants) + AMDGPU.prepare_state() + @check @ccall(librocrand.rocrand_get_scramble_constants64(constants::Ptr{Ptr{Culonglong}})::rocrand_status) +end + +const ROCRAND_VERSION = 400200 + +# Skipping MacroDefinition: ROCRANDAPI __attribute__ ( ( visibility ( "default" ) ) ) + +const ROCRAND_DEFAULT_MAX_BLOCK_SIZE = 256 diff --git a/src/rand/librocrand_common.jl b/src/rand/librocrand_common.jl deleted file mode 100644 index 6eccc45ac..000000000 --- a/src/rand/librocrand_common.jl +++ /dev/null @@ -1,43 +0,0 @@ -# Automatically generated using Clang.jl - - -const ROCRAND_VERSION = 201005 - -struct rocrand_discrete_distribution_st - size::UInt32 - offset::UInt32 - alias::Ptr{UInt32} - probability::Ptr{Cdouble} - cdf::Ptr{Cdouble} -end - -const rocrand_discrete_distribution = Ptr{rocrand_discrete_distribution_st} -const ROCRAND_DEFAULT_MAX_BLOCK_SIZE = 256 -const ROCRAND_DEFAULT_MIN_WARPS_PER_EU = 1 -const rocrand_generator_base_type = Cvoid -const rocrand_generator = Ptr{rocrand_generator_base_type} -const half = Float16 - -@cenum rocrand_status::UInt32 begin - ROCRAND_STATUS_SUCCESS = 0 - ROCRAND_STATUS_VERSION_MISMATCH = 100 - ROCRAND_STATUS_NOT_CREATED = 101 - ROCRAND_STATUS_ALLOCATION_FAILED = 102 - ROCRAND_STATUS_TYPE_ERROR = 103 - ROCRAND_STATUS_OUT_OF_RANGE = 104 - ROCRAND_STATUS_LENGTH_NOT_MULTIPLE = 105 - ROCRAND_STATUS_DOUBLE_PRECISION_REQUIRED = 106 - ROCRAND_STATUS_LAUNCH_FAILURE = 107 - ROCRAND_STATUS_INTERNAL_ERROR = 108 -end - -@cenum rocrand_rng_type::UInt32 begin - ROCRAND_RNG_PSEUDO_DEFAULT = 400 - ROCRAND_RNG_PSEUDO_XORWOW = 401 - ROCRAND_RNG_PSEUDO_MRG32K3A = 402 - ROCRAND_RNG_PSEUDO_MTGP32 = 403 - ROCRAND_RNG_PSEUDO_PHILOX4_32_10 = 404 - ROCRAND_RNG_QUASI_DEFAULT = 500 - ROCRAND_RNG_QUASI_SOBOL32 = 501 -end - diff --git a/src/rand/rocRAND.jl b/src/rand/rocRAND.jl index f2cfca695..4a665d836 100644 --- a/src/rand/rocRAND.jl +++ b/src/rand/rocRAND.jl @@ -12,7 +12,7 @@ using Random export rand_logn!, rand_poisson!, rand_logn, rand_poisson -include("librocrand_common.jl") +const half = Float16 include("error.jl") include("librocrand.jl")