diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs index 481f75f337d63..c6aae89f1e51e 100644 --- a/compiler/rustc_codegen_llvm/src/intrinsic.rs +++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs @@ -560,6 +560,12 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> { return Ok(()); } + sym::amdgpu_dispatch_ptr => { + let val = self.call_intrinsic("llvm.amdgcn.dispatch.ptr", &[], &[]); + // Relying on `LLVMBuildPointerCast` to produce an addrspacecast + self.pointercast(val, self.type_ptr()) + } + _ if name.as_str().starts_with("simd_") => { // Unpack non-power-of-2 #[repr(packed, simd)] arguments. // This gives them the expected layout of a regular #[repr(simd)] vector. diff --git a/compiler/rustc_codegen_ssa/src/mir/intrinsic.rs b/compiler/rustc_codegen_ssa/src/mir/intrinsic.rs index f4fae40d8828f..f5ee9406f4bf1 100644 --- a/compiler/rustc_codegen_ssa/src/mir/intrinsic.rs +++ b/compiler/rustc_codegen_ssa/src/mir/intrinsic.rs @@ -112,6 +112,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { | sym::unreachable | sym::cold_path | sym::breakpoint + | sym::amdgpu_dispatch_ptr | sym::assert_zero_valid | sym::assert_mem_uninitialized_valid | sym::assert_inhabited diff --git a/compiler/rustc_hir_analysis/src/check/intrinsic.rs b/compiler/rustc_hir_analysis/src/check/intrinsic.rs index c84c1a8ca16d8..d3d167f6e2544 100644 --- a/compiler/rustc_hir_analysis/src/check/intrinsic.rs +++ b/compiler/rustc_hir_analysis/src/check/intrinsic.rs @@ -70,6 +70,7 @@ fn intrinsic_operation_unsafety(tcx: TyCtxt<'_>, intrinsic_id: LocalDefId) -> hi | sym::add_with_overflow | sym::aggregate_raw_ptr | sym::align_of + | sym::amdgpu_dispatch_ptr | sym::assert_inhabited | sym::assert_mem_uninitialized_valid | sym::assert_zero_valid @@ -286,6 +287,7 @@ pub(crate) fn check_intrinsic_type( let (n_tps, n_cts, inputs, output) = match intrinsic_name { sym::autodiff => (4, 0, vec![param(0), param(1), param(2)], param(3)), sym::abort => (0, 0, vec![], tcx.types.never), + sym::amdgpu_dispatch_ptr => (0, 0, vec![], Ty::new_imm_ptr(tcx, tcx.types.unit)), sym::unreachable => (0, 0, vec![], tcx.types.never), sym::breakpoint => (0, 0, vec![], tcx.types.unit), sym::size_of | sym::align_of | sym::variant_count => (1, 0, vec![], tcx.types.usize), diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index 4080c1cd59ec7..98db0df593bec 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -458,6 +458,7 @@ symbols! { alu32, always, amdgpu, + amdgpu_dispatch_ptr, analysis, and, and_then, diff --git a/library/core/src/intrinsics/gpu.rs b/library/core/src/intrinsics/gpu.rs new file mode 100644 index 0000000000000..9e7624841d0c6 --- /dev/null +++ b/library/core/src/intrinsics/gpu.rs @@ -0,0 +1,23 @@ +//! Intrinsics for GPU targets. +//! +//! Intrinsics in this module are intended for use on GPU targets. +//! They can be target specific but in general GPU targets are similar. + +#![unstable(feature = "gpu_intrinsics", issue = "none")] + +/// Returns a pointer to the HSA kernel dispatch packet. +/// +/// A `gpu-kernel` on amdgpu is always launched through a kernel dispatch packet. +/// The dispatch packet contains the workgroup size, launch size and other data. +/// The content is defined by the [HSA Platform System Architecture Specification], +/// which is implemented e.g. in AMD's [hsa.h]. +/// The intrinsic returns a unit pointer so that rustc does not need to know the packet struct. +/// The pointer is valid for the whole lifetime of the program. +/// +/// [HSA Platform System Architecture Specification]: https://hsafoundation.com/wp-content/uploads/2021/02/HSA-SysArch-1.2.pdf +/// [hsa.h]: https://github.com/ROCm/rocm-systems/blob/rocm-7.1.0/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa.h#L2959 +#[rustc_nounwind] +#[rustc_intrinsic] +#[cfg(target_arch = "amdgpu")] +#[must_use = "returns a pointer that does nothing unless used"] +pub fn amdgpu_dispatch_ptr() -> *const (); diff --git a/library/core/src/intrinsics/mod.rs b/library/core/src/intrinsics/mod.rs index 20f34036b25c9..8d112b4c5d187 100644 --- a/library/core/src/intrinsics/mod.rs +++ b/library/core/src/intrinsics/mod.rs @@ -60,6 +60,7 @@ use crate::{mem, ptr}; mod bounds; pub mod fallback; +pub mod gpu; pub mod mir; pub mod simd; diff --git a/library/coretests/tests/floats/f128.rs b/library/coretests/tests/floats/f128.rs deleted file mode 100644 index 8e4f0c9899e1c..0000000000000 --- a/library/coretests/tests/floats/f128.rs +++ /dev/null @@ -1,50 +0,0 @@ -// FIXME(f16_f128): only tested on platforms that have symbols and aren't buggy -#![cfg(target_has_reliable_f128)] - -use super::assert_biteq; - -// Note these tolerances make sense around zero, but not for more extreme exponents. - -/// Default tolerances. Works for values that should be near precise but not exact. Roughly -/// the precision carried by `100 * 100`. -#[allow(unused)] -const TOL: f128 = 1e-12; - -/// For operations that are near exact, usually not involving math of different -/// signs. -#[allow(unused)] -const TOL_PRECISE: f128 = 1e-28; - -// FIXME(f16_f128,miri): many of these have to be disabled since miri does not yet support -// the intrinsics. - -#[test] -fn test_from() { - assert_biteq!(f128::from(false), 0.0); - assert_biteq!(f128::from(true), 1.0); - assert_biteq!(f128::from(u8::MIN), 0.0); - assert_biteq!(f128::from(42_u8), 42.0); - assert_biteq!(f128::from(u8::MAX), 255.0); - assert_biteq!(f128::from(i8::MIN), -128.0); - assert_biteq!(f128::from(42_i8), 42.0); - assert_biteq!(f128::from(i8::MAX), 127.0); - assert_biteq!(f128::from(u16::MIN), 0.0); - assert_biteq!(f128::from(42_u16), 42.0); - assert_biteq!(f128::from(u16::MAX), 65535.0); - assert_biteq!(f128::from(i16::MIN), -32768.0); - assert_biteq!(f128::from(42_i16), 42.0); - assert_biteq!(f128::from(i16::MAX), 32767.0); - assert_biteq!(f128::from(u32::MIN), 0.0); - assert_biteq!(f128::from(42_u32), 42.0); - assert_biteq!(f128::from(u32::MAX), 4294967295.0); - assert_biteq!(f128::from(i32::MIN), -2147483648.0); - assert_biteq!(f128::from(42_i32), 42.0); - assert_biteq!(f128::from(i32::MAX), 2147483647.0); - // FIXME(f16_f128): Uncomment these tests once the From<{u64,i64}> impls are added. - // assert_eq!(f128::from(u64::MIN), 0.0); - // assert_eq!(f128::from(42_u64), 42.0); - // assert_eq!(f128::from(u64::MAX), 18446744073709551615.0); - // assert_eq!(f128::from(i64::MIN), -9223372036854775808.0); - // assert_eq!(f128::from(42_i64), 42.0); - // assert_eq!(f128::from(i64::MAX), 9223372036854775807.0); -} diff --git a/library/coretests/tests/floats/f16.rs b/library/coretests/tests/floats/f16.rs deleted file mode 100644 index 3cff4259de54f..0000000000000 --- a/library/coretests/tests/floats/f16.rs +++ /dev/null @@ -1,35 +0,0 @@ -// FIXME(f16_f128): only tested on platforms that have symbols and aren't buggy -#![cfg(target_has_reliable_f16)] - -use super::assert_biteq; - -/// Tolerance for results on the order of 10.0e-2 -#[allow(unused)] -const TOL_N2: f16 = 0.0001; - -/// Tolerance for results on the order of 10.0e+0 -#[allow(unused)] -const TOL_0: f16 = 0.01; - -/// Tolerance for results on the order of 10.0e+2 -#[allow(unused)] -const TOL_P2: f16 = 0.5; - -/// Tolerance for results on the order of 10.0e+4 -#[allow(unused)] -const TOL_P4: f16 = 10.0; - -// FIXME(f16_f128,miri): many of these have to be disabled since miri does not yet support -// the intrinsics. - -#[test] -fn test_from() { - assert_biteq!(f16::from(false), 0.0); - assert_biteq!(f16::from(true), 1.0); - assert_biteq!(f16::from(u8::MIN), 0.0); - assert_biteq!(f16::from(42_u8), 42.0); - assert_biteq!(f16::from(u8::MAX), 255.0); - assert_biteq!(f16::from(i8::MIN), -128.0); - assert_biteq!(f16::from(42_i8), 42.0); - assert_biteq!(f16::from(i8::MAX), 127.0); -} diff --git a/library/coretests/tests/floats/mod.rs b/library/coretests/tests/floats/mod.rs index 63d5b8fb2c6e9..87e21b21f310d 100644 --- a/library/coretests/tests/floats/mod.rs +++ b/library/coretests/tests/floats/mod.rs @@ -375,9 +375,6 @@ macro_rules! float_test { }; } -mod f128; -mod f16; - float_test! { name: num, attrs: { @@ -1582,3 +1579,78 @@ float_test! { assert_biteq!((flt(-3.2)).mul_add(2.4, neg_inf), neg_inf); } } + +float_test! { + name: from, + attrs: { + f16: #[cfg(any(miri, target_has_reliable_f16))], + f128: #[cfg(any(miri, target_has_reliable_f128))], + }, + test { + assert_biteq!(Float::from(false), Float::ZERO); + assert_biteq!(Float::from(true), Float::ONE); + + assert_biteq!(Float::from(u8::MIN), Float::ZERO); + assert_biteq!(Float::from(42_u8), 42.0); + assert_biteq!(Float::from(u8::MAX), 255.0); + + assert_biteq!(Float::from(i8::MIN), -128.0); + assert_biteq!(Float::from(42_i8), 42.0); + assert_biteq!(Float::from(i8::MAX), 127.0); + } +} + +float_test! { + name: from_u16_i16, + attrs: { + f16: #[cfg(false)], + const f16: #[cfg(false)], + f128: #[cfg(any(miri, target_has_reliable_f128))], + }, + test { + assert_biteq!(Float::from(u16::MIN), Float::ZERO); + assert_biteq!(Float::from(42_u16), 42.0); + assert_biteq!(Float::from(u16::MAX), 65535.0); + assert_biteq!(Float::from(i16::MIN), -32768.0); + assert_biteq!(Float::from(42_i16), 42.0); + assert_biteq!(Float::from(i16::MAX), 32767.0); + } +} + +float_test! { + name: from_u32_i32, + attrs: { + f16: #[cfg(false)], + const f16: #[cfg(false)], + f32: #[cfg(false)], + const f32: #[cfg(false)], + f128: #[cfg(any(miri, target_has_reliable_f128))], + }, + test { + assert_biteq!(Float::from(u32::MIN), Float::ZERO); + assert_biteq!(Float::from(42_u32), 42.0); + assert_biteq!(Float::from(u32::MAX), 4294967295.0); + assert_biteq!(Float::from(i32::MIN), -2147483648.0); + assert_biteq!(Float::from(42_i32), 42.0); + assert_biteq!(Float::from(i32::MAX), 2147483647.0); + } +} + +// FIXME(f16_f128): Uncomment and adapt these tests once the From<{u64,i64}> impls are added. +// float_test! { +// name: from_u64_i64, +// attrs: { +// f16: #[cfg(false)], +// f32: #[cfg(false)], +// f64: #[cfg(false)], +// f128: #[cfg(any(miri, target_has_reliable_f128))], +// }, +// test { +// assert_biteq!(Float::from(u64::MIN), Float::ZERO); +// assert_biteq!(Float::from(42_u64), 42.0); +// assert_biteq!(Float::from(u64::MAX), 18446744073709551615.0); +// assert_biteq!(Float::from(i64::MIN), -9223372036854775808.0); +// assert_biteq!(Float::from(42_i64), 42.0); +// assert_biteq!(Float::from(i64::MAX), 9223372036854775807.0); +// } +// } diff --git a/tests/codegen-llvm/amdgpu-dispatch-ptr.rs b/tests/codegen-llvm/amdgpu-dispatch-ptr.rs new file mode 100644 index 0000000000000..00bde96c3d596 --- /dev/null +++ b/tests/codegen-llvm/amdgpu-dispatch-ptr.rs @@ -0,0 +1,27 @@ +// Tests the amdgpu_dispatch_ptr intrinsic. + +//@ compile-flags: --crate-type=rlib --target amdgcn-amd-amdhsa -Ctarget-cpu=gfx900 +//@ needs-llvm-components: amdgpu +//@ add-minicore +#![feature(intrinsics, no_core, rustc_attrs)] +#![no_core] + +extern crate minicore; + +pub struct DispatchPacket { + pub header: u16, + pub setup: u16, + pub workgroup_size_x: u16, // and more +} + +#[rustc_intrinsic] +#[rustc_nounwind] +fn amdgpu_dispatch_ptr() -> *const (); + +// CHECK-LABEL: @get_dispatch_data +// CHECK: %[[ORIG_PTR:[^ ]+]] = {{(tail )?}}call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// CHECK-NEXT: %[[PTR:[^ ]+]] = addrspacecast ptr addrspace(4) %[[ORIG_PTR]] to ptr +#[unsafe(no_mangle)] +pub fn get_dispatch_data() -> &'static DispatchPacket { + unsafe { &*(amdgpu_dispatch_ptr() as *const _) } +}