-
Notifications
You must be signed in to change notification settings - Fork 13.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Offload] Rework offloading entry type to be more generic #124018
Conversation
@llvm/pr-subscribers-mlir-llvm @llvm/pr-subscribers-clang Author: Joseph Huber (jhuber6) ChangesSummary: The first 64-bytes are reserved as zero so the OpenMP runtime can detect Patch is 198.62 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/124018.diff 37 Files Affected:
diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index 23a40b8f7c32a3..e8d2dca855dd9b 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -1199,11 +1199,14 @@ void CGNVCUDARuntime::transformManagedVars() {
void CGNVCUDARuntime::createOffloadingEntries() {
SmallVector<char, 32> Out;
StringRef Section = (SectionPrefix + "_offloading_entries").toStringRef(Out);
+ llvm::object::OffloadKind Kind = CGM.getLangOpts().HIP
+ ? llvm::object::OffloadKind::OFK_HIP
+ : llvm::object::OffloadKind::OFK_Cuda;
llvm::Module &M = CGM.getModule();
for (KernelInfo &I : EmittedKernels)
llvm::offloading::emitOffloadingEntry(
- M, KernelHandles[I.Kernel->getName()],
+ M, Kind, KernelHandles[I.Kernel->getName()],
getDeviceSideName(cast<NamedDecl>(I.D)), /*Flags=*/0, /*Data=*/0,
llvm::offloading::OffloadGlobalEntry, Section);
@@ -1221,42 +1224,30 @@ void CGNVCUDARuntime::createOffloadingEntries() {
? static_cast<int32_t>(llvm::offloading::OffloadGlobalNormalized)
: 0);
if (I.Flags.getKind() == DeviceVarFlags::Variable) {
- // TODO: Update the offloading entries struct to avoid this indirection.
if (I.Flags.isManaged()) {
assert(I.Var->getName().ends_with(".managed") &&
"HIP managed variables not transformed");
- // Create a struct to contain the two variables.
auto *ManagedVar = M.getNamedGlobal(
I.Var->getName().drop_back(StringRef(".managed").size()));
- llvm::Constant *StructData[] = {ManagedVar, I.Var};
- llvm::Constant *Initializer = llvm::ConstantStruct::get(
- llvm::offloading::getManagedTy(M), StructData);
- auto *Struct = new llvm::GlobalVariable(
- M, llvm::offloading::getManagedTy(M),
- /*IsConstant=*/true, llvm::GlobalValue::PrivateLinkage, Initializer,
- I.Var->getName(), /*InsertBefore=*/nullptr,
- llvm::GlobalVariable::NotThreadLocal,
- M.getDataLayout().getDefaultGlobalsAddressSpace());
-
llvm::offloading::emitOffloadingEntry(
- M, Struct, getDeviceSideName(I.D), VarSize,
+ M, Kind, I.Var, getDeviceSideName(I.D), VarSize,
llvm::offloading::OffloadGlobalManagedEntry | Flags,
- /*Data=*/static_cast<uint32_t>(I.Var->getAlignment()), Section);
+ /*Data=*/I.Var->getAlignment(), Section, ManagedVar);
} else {
llvm::offloading::emitOffloadingEntry(
- M, I.Var, getDeviceSideName(I.D), VarSize,
+ M, Kind, I.Var, getDeviceSideName(I.D), VarSize,
llvm::offloading::OffloadGlobalEntry | Flags,
/*Data=*/0, Section);
}
} else if (I.Flags.getKind() == DeviceVarFlags::Surface) {
llvm::offloading::emitOffloadingEntry(
- M, I.Var, getDeviceSideName(I.D), VarSize,
+ M, Kind, I.Var, getDeviceSideName(I.D), VarSize,
llvm::offloading::OffloadGlobalSurfaceEntry | Flags,
I.Flags.getSurfTexType(), Section);
} else if (I.Flags.getKind() == DeviceVarFlags::Texture) {
llvm::offloading::emitOffloadingEntry(
- M, I.Var, getDeviceSideName(I.D), VarSize,
+ M, Kind, I.Var, getDeviceSideName(I.D), VarSize,
llvm::offloading::OffloadGlobalTextureEntry | Flags,
I.Flags.getSurfTexType(), Section);
}
diff --git a/clang/test/CodeGenCUDA/offloading-entries.cu b/clang/test/CodeGenCUDA/offloading-entries.cu
index d46a25969e3ecd..4f65e7335d85b4 100644
--- a/clang/test/CodeGenCUDA/offloading-entries.cu
+++ b/clang/test/CodeGenCUDA/offloading-entries.cu
@@ -19,63 +19,61 @@
//.
// CUDA: @managed = global i32 undef, align 4
// CUDA: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1
-// CUDA: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
+// CUDA: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @_Z18__device_stub__foov, ptr @.offloading.entry_name, i64 0, i64 0, ptr null }, section "cuda_offloading_entries", align 1
// CUDA: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1
-// CUDA: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z21__device_stub__kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
+// CUDA: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @_Z21__device_stub__kernelv, ptr @.offloading.entry_name.1, i64 0, i64 0, ptr null }, section "cuda_offloading_entries", align 1
// CUDA: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1
-// CUDA: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
+// CUDA: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @var, ptr @.offloading.entry_name.2, i64 4, i64 0, ptr null }, section "cuda_offloading_entries", align 1
// CUDA: @.offloading.entry_name.3 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading", align 1
-// CUDA: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { ptr @managed, ptr @.offloading.entry_name.3, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
+// CUDA: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @managed, ptr @.offloading.entry_name.3, i64 4, i64 0, ptr null }, section "cuda_offloading_entries", align 1
// CUDA: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
-// CUDA: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.4, i64 4, i32 2, i32 1 }, section "cuda_offloading_entries", align 1
+// CUDA: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 2, ptr @surf, ptr @.offloading.entry_name.4, i64 4, i64 1, ptr null }, section "cuda_offloading_entries", align 1
// CUDA: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
-// CUDA: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.5, i64 4, i32 3, i32 1 }, section "cuda_offloading_entries", align 1
+// CUDA: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 3, ptr @tex, ptr @.offloading.entry_name.5, i64 4, i64 1, ptr null }, section "cuda_offloading_entries", align 1
//.
// HIP: @managed.managed = global i32 0, align 4
// HIP: @managed = externally_initialized global ptr null
// HIP: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1
-// HIP: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z3foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "hip_offloading_entries", align 1
+// HIP: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 0, ptr @_Z3foov, ptr @.offloading.entry_name, i64 0, i64 0, ptr null }, section "hip_offloading_entries", align 1
// HIP: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1
-// HIP: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z6kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "hip_offloading_entries", align 1
+// HIP: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 0, ptr @_Z6kernelv, ptr @.offloading.entry_name.1, i64 0, i64 0, ptr null }, section "hip_offloading_entries", align 1
// HIP: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1
-// HIP: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "hip_offloading_entries", align 1
-// HIP: @managed.managed.3 = private constant %struct.__managed_var { ptr @managed, ptr @managed.managed }
-// HIP: @.offloading.entry_name.4 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading", align 1
-// HIP: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { ptr @managed.managed.3, ptr @.offloading.entry_name.4, i64 4, i32 1, i32 4 }, section "hip_offloading_entries", align 1
-// HIP: @.offloading.entry_name.5 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
-// HIP: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.5, i64 4, i32 2, i32 1 }, section "hip_offloading_entries", align 1
-// HIP: @.offloading.entry_name.6 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
-// HIP: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.6, i64 4, i32 3, i32 1 }, section "hip_offloading_entries", align 1
+// HIP: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 0, ptr @var, ptr @.offloading.entry_name.2, i64 4, i64 0, ptr null }, section "hip_offloading_entries", align 1
+// HIP: @.offloading.entry_name.3 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading", align 1
+// HIP: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 1, ptr @managed.managed, ptr @.offloading.entry_name.3, i64 4, i64 4, ptr @managed }, section "hip_offloading_entries", align 1
+// HIP: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
+// HIP: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 2, ptr @surf, ptr @.offloading.entry_name.4, i64 4, i64 1, ptr null }, section "hip_offloading_entries", align 1
+// HIP: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
+// HIP: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 3, ptr @tex, ptr @.offloading.entry_name.5, i64 4, i64 1, ptr null }, section "hip_offloading_entries", align 1
//.
// CUDA-COFF: @managed = dso_local global i32 undef, align 4
// CUDA-COFF: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1
-// CUDA-COFF: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1
+// CUDA-COFF: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @_Z18__device_stub__foov, ptr @.offloading.entry_name, i64 0, i64 0, ptr null }, section "cuda_offloading_entries$OE", align 1
// CUDA-COFF: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1
-// CUDA-COFF: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z21__device_stub__kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1
+// CUDA-COFF: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @_Z21__device_stub__kernelv, ptr @.offloading.entry_name.1, i64 0, i64 0, ptr null }, section "cuda_offloading_entries$OE", align 1
// CUDA-COFF: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1
-// CUDA-COFF: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1
+// CUDA-COFF: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @var, ptr @.offloading.entry_name.2, i64 4, i64 0, ptr null }, section "cuda_offloading_entries$OE", align 1
// CUDA-COFF: @.offloading.entry_name.3 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading", align 1
-// CUDA-COFF: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { ptr @managed, ptr @.offloading.entry_name.3, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1
+// CUDA-COFF: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @managed, ptr @.offloading.entry_name.3, i64 4, i64 0, ptr null }, section "cuda_offloading_entries$OE", align 1
// CUDA-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
-// CUDA-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.4, i64 4, i32 2, i32 1 }, section "cuda_offloading_entries$OE", align 1
+// CUDA-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 2, ptr @surf, ptr @.offloading.entry_name.4, i64 4, i64 1, ptr null }, section "cuda_offloading_entries$OE", align 1
// CUDA-COFF: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
-// CUDA-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.5, i64 4, i32 3, i32 1 }, section "cuda_offloading_entries$OE", align 1
+// CUDA-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 3, ptr @tex, ptr @.offloading.entry_name.5, i64 4, i64 1, ptr null }, section "cuda_offloading_entries$OE", align 1
//.
// HIP-COFF: @managed.managed = dso_local global i32 0, align 4
// HIP-COFF: @managed = dso_local externally_initialized global ptr null
// HIP-COFF: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1
-// HIP-COFF: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z3foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1
+// HIP-COFF: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 0, ptr @_Z3foov, ptr @.offloading.entry_name, i64 0, i64 0, ptr null }, section "hip_offloading_entries$OE", align 1
// HIP-COFF: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1
-// HIP-COFF: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z6kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1
+// HIP-COFF: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 0, ptr @_Z6kernelv, ptr @.offloading.entry_name.1, i64 0, i64 0, ptr null }, section "hip_offloading_entries$OE", align 1
// HIP-COFF: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1
-// HIP-COFF: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1
-// HIP-COFF: @managed.managed.3 = private constant %struct.__managed_var { ptr @managed, ptr @managed.managed }
-// HIP-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading", align 1
-// HIP-COFF: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { ptr @managed.managed.3, ptr @.offloading.entry_name.4, i64 4, i32 1, i32 4 }, section "hip_offloading_entries$OE", align 1
-// HIP-COFF: @.offloading.entry_name.5 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
-// HIP-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.5, i64 4, i32 2, i32 1 }, section "hip_offloading_entries$OE", align 1
-// HIP-COFF: @.offloading.entry_name.6 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
-// HIP-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.6, i64 4, i32 3, i32 1 }, section "hip_offloading_entries$OE", align 1
+// HIP-COFF: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 0, ptr @var, ptr @.offloading.entry_name.2, i64 4, i64 0, ptr null }, section "hip_offloading_entries$OE", align 1
+// HIP-COFF: @.offloading.entry_name.3 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading", align 1
+// HIP-COFF: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 1, ptr @managed.managed, ptr @.offloading.entry_name.3, i64 4, i64 4, ptr @managed }, section "hip_offloading_entries$OE", align 1
+// HIP-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
+// HIP-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 2, ptr @surf, ptr @.offloading.entry_name.4, i64 4, i64 1, ptr null }, section "hip_offloading_entries$OE", align 1
+// HIP-COFF: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
+// HIP-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 3, ptr @tex, ptr @.offloading.entry_name.5, i64 4, i64 1, ptr null }, section "hip_offloading_entries$OE", align 1
//.
// CUDA-LABEL: @_Z18__device_stub__foov(
// CUDA-NEXT: entry:
diff --git a/clang/test/Driver/linker-wrapper-image.c b/clang/test/Driver/linker-wrapper-image.c
index 7f96f629e91272..197ba4c31f6b41 100644
--- a/clang/test/Driver/linker-wrapper-image.c
+++ b/clang/test/Driver/linker-wrapper-image.c
@@ -73,45 +73,54 @@
// CUDA-NEXT: %1 = call i32 @atexit(ptr @.cuda.fatbin_unreg)
// CUDA-NEXT: ret void
// CUDA-NEXT: }
-
+//
// CUDA: define internal void @.cuda.fatbin_unreg() section ".text.startup" {
// CUDA-NEXT: entry:
// CUDA-NEXT: %0 = load ptr, ptr @.cuda.binary_handle, align 8
// CUDA-NEXT: call void @__cudaUnregisterFatBinary(ptr %0)
// CUDA-NEXT: ret void
// CUDA-NEXT: }
-
+//
// CUDA: define internal void @.cuda.globals_reg(ptr %0) section ".text.startup" {
// CUDA-NEXT: entry:
// CUDA-NEXT: %1 = icmp ne ptr @__start_cuda_offloading_entries, @__stop_cuda_offloading_entries
// CUDA-NEXT: br i1 %1, label %while.entry, label %while.end
-
+//
// CUDA: while.entry:
-// CUDA-NEXT: %entry1 = phi ptr [ @__start_cuda_offloading_entries, %entry ], [ %13, %if.end ]
-// CUDA-NEXT: %2 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 0
+// CUDA-NEXT: %entry1 = phi ptr [ @__start_cuda_offloading_entries, %entry ], [ %16, %if.end ]
+// CUDA-NEXT: %2 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 4
// CUDA-NEXT: %addr = load ptr, ptr %2, align 8
-// CUDA-NEXT: %3 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 1
-// CUDA-NEXT: %name = load ptr, ptr %3, align 8
-// CUDA-NEXT: %4 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 2
-// CUDA-NEXT: %size = load i64, ptr %...
[truncated]
|
@llvm/pr-subscribers-offload Author: Joseph Huber (jhuber6) ChangesSummary: The first 64-bytes are reserved as zero so the OpenMP runtime can detect Patch is 198.62 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/124018.diff 37 Files Affected:
diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index 23a40b8f7c32a3..e8d2dca855dd9b 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -1199,11 +1199,14 @@ void CGNVCUDARuntime::transformManagedVars() {
void CGNVCUDARuntime::createOffloadingEntries() {
SmallVector<char, 32> Out;
StringRef Section = (SectionPrefix + "_offloading_entries").toStringRef(Out);
+ llvm::object::OffloadKind Kind = CGM.getLangOpts().HIP
+ ? llvm::object::OffloadKind::OFK_HIP
+ : llvm::object::OffloadKind::OFK_Cuda;
llvm::Module &M = CGM.getModule();
for (KernelInfo &I : EmittedKernels)
llvm::offloading::emitOffloadingEntry(
- M, KernelHandles[I.Kernel->getName()],
+ M, Kind, KernelHandles[I.Kernel->getName()],
getDeviceSideName(cast<NamedDecl>(I.D)), /*Flags=*/0, /*Data=*/0,
llvm::offloading::OffloadGlobalEntry, Section);
@@ -1221,42 +1224,30 @@ void CGNVCUDARuntime::createOffloadingEntries() {
? static_cast<int32_t>(llvm::offloading::OffloadGlobalNormalized)
: 0);
if (I.Flags.getKind() == DeviceVarFlags::Variable) {
- // TODO: Update the offloading entries struct to avoid this indirection.
if (I.Flags.isManaged()) {
assert(I.Var->getName().ends_with(".managed") &&
"HIP managed variables not transformed");
- // Create a struct to contain the two variables.
auto *ManagedVar = M.getNamedGlobal(
I.Var->getName().drop_back(StringRef(".managed").size()));
- llvm::Constant *StructData[] = {ManagedVar, I.Var};
- llvm::Constant *Initializer = llvm::ConstantStruct::get(
- llvm::offloading::getManagedTy(M), StructData);
- auto *Struct = new llvm::GlobalVariable(
- M, llvm::offloading::getManagedTy(M),
- /*IsConstant=*/true, llvm::GlobalValue::PrivateLinkage, Initializer,
- I.Var->getName(), /*InsertBefore=*/nullptr,
- llvm::GlobalVariable::NotThreadLocal,
- M.getDataLayout().getDefaultGlobalsAddressSpace());
-
llvm::offloading::emitOffloadingEntry(
- M, Struct, getDeviceSideName(I.D), VarSize,
+ M, Kind, I.Var, getDeviceSideName(I.D), VarSize,
llvm::offloading::OffloadGlobalManagedEntry | Flags,
- /*Data=*/static_cast<uint32_t>(I.Var->getAlignment()), Section);
+ /*Data=*/I.Var->getAlignment(), Section, ManagedVar);
} else {
llvm::offloading::emitOffloadingEntry(
- M, I.Var, getDeviceSideName(I.D), VarSize,
+ M, Kind, I.Var, getDeviceSideName(I.D), VarSize,
llvm::offloading::OffloadGlobalEntry | Flags,
/*Data=*/0, Section);
}
} else if (I.Flags.getKind() == DeviceVarFlags::Surface) {
llvm::offloading::emitOffloadingEntry(
- M, I.Var, getDeviceSideName(I.D), VarSize,
+ M, Kind, I.Var, getDeviceSideName(I.D), VarSize,
llvm::offloading::OffloadGlobalSurfaceEntry | Flags,
I.Flags.getSurfTexType(), Section);
} else if (I.Flags.getKind() == DeviceVarFlags::Texture) {
llvm::offloading::emitOffloadingEntry(
- M, I.Var, getDeviceSideName(I.D), VarSize,
+ M, Kind, I.Var, getDeviceSideName(I.D), VarSize,
llvm::offloading::OffloadGlobalTextureEntry | Flags,
I.Flags.getSurfTexType(), Section);
}
diff --git a/clang/test/CodeGenCUDA/offloading-entries.cu b/clang/test/CodeGenCUDA/offloading-entries.cu
index d46a25969e3ecd..4f65e7335d85b4 100644
--- a/clang/test/CodeGenCUDA/offloading-entries.cu
+++ b/clang/test/CodeGenCUDA/offloading-entries.cu
@@ -19,63 +19,61 @@
//.
// CUDA: @managed = global i32 undef, align 4
// CUDA: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1
-// CUDA: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
+// CUDA: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @_Z18__device_stub__foov, ptr @.offloading.entry_name, i64 0, i64 0, ptr null }, section "cuda_offloading_entries", align 1
// CUDA: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1
-// CUDA: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z21__device_stub__kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
+// CUDA: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @_Z21__device_stub__kernelv, ptr @.offloading.entry_name.1, i64 0, i64 0, ptr null }, section "cuda_offloading_entries", align 1
// CUDA: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1
-// CUDA: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
+// CUDA: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @var, ptr @.offloading.entry_name.2, i64 4, i64 0, ptr null }, section "cuda_offloading_entries", align 1
// CUDA: @.offloading.entry_name.3 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading", align 1
-// CUDA: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { ptr @managed, ptr @.offloading.entry_name.3, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
+// CUDA: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @managed, ptr @.offloading.entry_name.3, i64 4, i64 0, ptr null }, section "cuda_offloading_entries", align 1
// CUDA: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
-// CUDA: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.4, i64 4, i32 2, i32 1 }, section "cuda_offloading_entries", align 1
+// CUDA: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 2, ptr @surf, ptr @.offloading.entry_name.4, i64 4, i64 1, ptr null }, section "cuda_offloading_entries", align 1
// CUDA: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
-// CUDA: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.5, i64 4, i32 3, i32 1 }, section "cuda_offloading_entries", align 1
+// CUDA: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 3, ptr @tex, ptr @.offloading.entry_name.5, i64 4, i64 1, ptr null }, section "cuda_offloading_entries", align 1
//.
// HIP: @managed.managed = global i32 0, align 4
// HIP: @managed = externally_initialized global ptr null
// HIP: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1
-// HIP: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z3foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "hip_offloading_entries", align 1
+// HIP: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 0, ptr @_Z3foov, ptr @.offloading.entry_name, i64 0, i64 0, ptr null }, section "hip_offloading_entries", align 1
// HIP: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1
-// HIP: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z6kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "hip_offloading_entries", align 1
+// HIP: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 0, ptr @_Z6kernelv, ptr @.offloading.entry_name.1, i64 0, i64 0, ptr null }, section "hip_offloading_entries", align 1
// HIP: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1
-// HIP: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "hip_offloading_entries", align 1
-// HIP: @managed.managed.3 = private constant %struct.__managed_var { ptr @managed, ptr @managed.managed }
-// HIP: @.offloading.entry_name.4 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading", align 1
-// HIP: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { ptr @managed.managed.3, ptr @.offloading.entry_name.4, i64 4, i32 1, i32 4 }, section "hip_offloading_entries", align 1
-// HIP: @.offloading.entry_name.5 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
-// HIP: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.5, i64 4, i32 2, i32 1 }, section "hip_offloading_entries", align 1
-// HIP: @.offloading.entry_name.6 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
-// HIP: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.6, i64 4, i32 3, i32 1 }, section "hip_offloading_entries", align 1
+// HIP: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 0, ptr @var, ptr @.offloading.entry_name.2, i64 4, i64 0, ptr null }, section "hip_offloading_entries", align 1
+// HIP: @.offloading.entry_name.3 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading", align 1
+// HIP: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 1, ptr @managed.managed, ptr @.offloading.entry_name.3, i64 4, i64 4, ptr @managed }, section "hip_offloading_entries", align 1
+// HIP: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
+// HIP: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 2, ptr @surf, ptr @.offloading.entry_name.4, i64 4, i64 1, ptr null }, section "hip_offloading_entries", align 1
+// HIP: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
+// HIP: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 3, ptr @tex, ptr @.offloading.entry_name.5, i64 4, i64 1, ptr null }, section "hip_offloading_entries", align 1
//.
// CUDA-COFF: @managed = dso_local global i32 undef, align 4
// CUDA-COFF: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1
-// CUDA-COFF: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1
+// CUDA-COFF: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @_Z18__device_stub__foov, ptr @.offloading.entry_name, i64 0, i64 0, ptr null }, section "cuda_offloading_entries$OE", align 1
// CUDA-COFF: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1
-// CUDA-COFF: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z21__device_stub__kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1
+// CUDA-COFF: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @_Z21__device_stub__kernelv, ptr @.offloading.entry_name.1, i64 0, i64 0, ptr null }, section "cuda_offloading_entries$OE", align 1
// CUDA-COFF: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1
-// CUDA-COFF: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1
+// CUDA-COFF: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @var, ptr @.offloading.entry_name.2, i64 4, i64 0, ptr null }, section "cuda_offloading_entries$OE", align 1
// CUDA-COFF: @.offloading.entry_name.3 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading", align 1
-// CUDA-COFF: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { ptr @managed, ptr @.offloading.entry_name.3, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1
+// CUDA-COFF: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @managed, ptr @.offloading.entry_name.3, i64 4, i64 0, ptr null }, section "cuda_offloading_entries$OE", align 1
// CUDA-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
-// CUDA-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.4, i64 4, i32 2, i32 1 }, section "cuda_offloading_entries$OE", align 1
+// CUDA-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 2, ptr @surf, ptr @.offloading.entry_name.4, i64 4, i64 1, ptr null }, section "cuda_offloading_entries$OE", align 1
// CUDA-COFF: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
-// CUDA-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.5, i64 4, i32 3, i32 1 }, section "cuda_offloading_entries$OE", align 1
+// CUDA-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 3, ptr @tex, ptr @.offloading.entry_name.5, i64 4, i64 1, ptr null }, section "cuda_offloading_entries$OE", align 1
//.
// HIP-COFF: @managed.managed = dso_local global i32 0, align 4
// HIP-COFF: @managed = dso_local externally_initialized global ptr null
// HIP-COFF: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1
-// HIP-COFF: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z3foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1
+// HIP-COFF: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 0, ptr @_Z3foov, ptr @.offloading.entry_name, i64 0, i64 0, ptr null }, section "hip_offloading_entries$OE", align 1
// HIP-COFF: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1
-// HIP-COFF: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z6kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1
+// HIP-COFF: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 0, ptr @_Z6kernelv, ptr @.offloading.entry_name.1, i64 0, i64 0, ptr null }, section "hip_offloading_entries$OE", align 1
// HIP-COFF: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1
-// HIP-COFF: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1
-// HIP-COFF: @managed.managed.3 = private constant %struct.__managed_var { ptr @managed, ptr @managed.managed }
-// HIP-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading", align 1
-// HIP-COFF: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { ptr @managed.managed.3, ptr @.offloading.entry_name.4, i64 4, i32 1, i32 4 }, section "hip_offloading_entries$OE", align 1
-// HIP-COFF: @.offloading.entry_name.5 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
-// HIP-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.5, i64 4, i32 2, i32 1 }, section "hip_offloading_entries$OE", align 1
-// HIP-COFF: @.offloading.entry_name.6 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
-// HIP-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.6, i64 4, i32 3, i32 1 }, section "hip_offloading_entries$OE", align 1
+// HIP-COFF: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 0, ptr @var, ptr @.offloading.entry_name.2, i64 4, i64 0, ptr null }, section "hip_offloading_entries$OE", align 1
+// HIP-COFF: @.offloading.entry_name.3 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading", align 1
+// HIP-COFF: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 1, ptr @managed.managed, ptr @.offloading.entry_name.3, i64 4, i64 4, ptr @managed }, section "hip_offloading_entries$OE", align 1
+// HIP-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
+// HIP-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 2, ptr @surf, ptr @.offloading.entry_name.4, i64 4, i64 1, ptr null }, section "hip_offloading_entries$OE", align 1
+// HIP-COFF: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
+// HIP-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 3, ptr @tex, ptr @.offloading.entry_name.5, i64 4, i64 1, ptr null }, section "hip_offloading_entries$OE", align 1
//.
// CUDA-LABEL: @_Z18__device_stub__foov(
// CUDA-NEXT: entry:
diff --git a/clang/test/Driver/linker-wrapper-image.c b/clang/test/Driver/linker-wrapper-image.c
index 7f96f629e91272..197ba4c31f6b41 100644
--- a/clang/test/Driver/linker-wrapper-image.c
+++ b/clang/test/Driver/linker-wrapper-image.c
@@ -73,45 +73,54 @@
// CUDA-NEXT: %1 = call i32 @atexit(ptr @.cuda.fatbin_unreg)
// CUDA-NEXT: ret void
// CUDA-NEXT: }
-
+//
// CUDA: define internal void @.cuda.fatbin_unreg() section ".text.startup" {
// CUDA-NEXT: entry:
// CUDA-NEXT: %0 = load ptr, ptr @.cuda.binary_handle, align 8
// CUDA-NEXT: call void @__cudaUnregisterFatBinary(ptr %0)
// CUDA-NEXT: ret void
// CUDA-NEXT: }
-
+//
// CUDA: define internal void @.cuda.globals_reg(ptr %0) section ".text.startup" {
// CUDA-NEXT: entry:
// CUDA-NEXT: %1 = icmp ne ptr @__start_cuda_offloading_entries, @__stop_cuda_offloading_entries
// CUDA-NEXT: br i1 %1, label %while.entry, label %while.end
-
+//
// CUDA: while.entry:
-// CUDA-NEXT: %entry1 = phi ptr [ @__start_cuda_offloading_entries, %entry ], [ %13, %if.end ]
-// CUDA-NEXT: %2 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 0
+// CUDA-NEXT: %entry1 = phi ptr [ @__start_cuda_offloading_entries, %entry ], [ %16, %if.end ]
+// CUDA-NEXT: %2 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 4
// CUDA-NEXT: %addr = load ptr, ptr %2, align 8
-// CUDA-NEXT: %3 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 1
-// CUDA-NEXT: %name = load ptr, ptr %3, align 8
-// CUDA-NEXT: %4 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 2
-// CUDA-NEXT: %size = load i64, ptr %...
[truncated]
|
@llvm/pr-subscribers-clang-codegen Author: Joseph Huber (jhuber6) ChangesSummary: The first 64-bytes are reserved as zero so the OpenMP runtime can detect Patch is 198.62 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/124018.diff 37 Files Affected:
diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index 23a40b8f7c32a3..e8d2dca855dd9b 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -1199,11 +1199,14 @@ void CGNVCUDARuntime::transformManagedVars() {
void CGNVCUDARuntime::createOffloadingEntries() {
SmallVector<char, 32> Out;
StringRef Section = (SectionPrefix + "_offloading_entries").toStringRef(Out);
+ llvm::object::OffloadKind Kind = CGM.getLangOpts().HIP
+ ? llvm::object::OffloadKind::OFK_HIP
+ : llvm::object::OffloadKind::OFK_Cuda;
llvm::Module &M = CGM.getModule();
for (KernelInfo &I : EmittedKernels)
llvm::offloading::emitOffloadingEntry(
- M, KernelHandles[I.Kernel->getName()],
+ M, Kind, KernelHandles[I.Kernel->getName()],
getDeviceSideName(cast<NamedDecl>(I.D)), /*Flags=*/0, /*Data=*/0,
llvm::offloading::OffloadGlobalEntry, Section);
@@ -1221,42 +1224,30 @@ void CGNVCUDARuntime::createOffloadingEntries() {
? static_cast<int32_t>(llvm::offloading::OffloadGlobalNormalized)
: 0);
if (I.Flags.getKind() == DeviceVarFlags::Variable) {
- // TODO: Update the offloading entries struct to avoid this indirection.
if (I.Flags.isManaged()) {
assert(I.Var->getName().ends_with(".managed") &&
"HIP managed variables not transformed");
- // Create a struct to contain the two variables.
auto *ManagedVar = M.getNamedGlobal(
I.Var->getName().drop_back(StringRef(".managed").size()));
- llvm::Constant *StructData[] = {ManagedVar, I.Var};
- llvm::Constant *Initializer = llvm::ConstantStruct::get(
- llvm::offloading::getManagedTy(M), StructData);
- auto *Struct = new llvm::GlobalVariable(
- M, llvm::offloading::getManagedTy(M),
- /*IsConstant=*/true, llvm::GlobalValue::PrivateLinkage, Initializer,
- I.Var->getName(), /*InsertBefore=*/nullptr,
- llvm::GlobalVariable::NotThreadLocal,
- M.getDataLayout().getDefaultGlobalsAddressSpace());
-
llvm::offloading::emitOffloadingEntry(
- M, Struct, getDeviceSideName(I.D), VarSize,
+ M, Kind, I.Var, getDeviceSideName(I.D), VarSize,
llvm::offloading::OffloadGlobalManagedEntry | Flags,
- /*Data=*/static_cast<uint32_t>(I.Var->getAlignment()), Section);
+ /*Data=*/I.Var->getAlignment(), Section, ManagedVar);
} else {
llvm::offloading::emitOffloadingEntry(
- M, I.Var, getDeviceSideName(I.D), VarSize,
+ M, Kind, I.Var, getDeviceSideName(I.D), VarSize,
llvm::offloading::OffloadGlobalEntry | Flags,
/*Data=*/0, Section);
}
} else if (I.Flags.getKind() == DeviceVarFlags::Surface) {
llvm::offloading::emitOffloadingEntry(
- M, I.Var, getDeviceSideName(I.D), VarSize,
+ M, Kind, I.Var, getDeviceSideName(I.D), VarSize,
llvm::offloading::OffloadGlobalSurfaceEntry | Flags,
I.Flags.getSurfTexType(), Section);
} else if (I.Flags.getKind() == DeviceVarFlags::Texture) {
llvm::offloading::emitOffloadingEntry(
- M, I.Var, getDeviceSideName(I.D), VarSize,
+ M, Kind, I.Var, getDeviceSideName(I.D), VarSize,
llvm::offloading::OffloadGlobalTextureEntry | Flags,
I.Flags.getSurfTexType(), Section);
}
diff --git a/clang/test/CodeGenCUDA/offloading-entries.cu b/clang/test/CodeGenCUDA/offloading-entries.cu
index d46a25969e3ecd..4f65e7335d85b4 100644
--- a/clang/test/CodeGenCUDA/offloading-entries.cu
+++ b/clang/test/CodeGenCUDA/offloading-entries.cu
@@ -19,63 +19,61 @@
//.
// CUDA: @managed = global i32 undef, align 4
// CUDA: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1
-// CUDA: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
+// CUDA: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @_Z18__device_stub__foov, ptr @.offloading.entry_name, i64 0, i64 0, ptr null }, section "cuda_offloading_entries", align 1
// CUDA: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1
-// CUDA: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z21__device_stub__kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
+// CUDA: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @_Z21__device_stub__kernelv, ptr @.offloading.entry_name.1, i64 0, i64 0, ptr null }, section "cuda_offloading_entries", align 1
// CUDA: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1
-// CUDA: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
+// CUDA: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @var, ptr @.offloading.entry_name.2, i64 4, i64 0, ptr null }, section "cuda_offloading_entries", align 1
// CUDA: @.offloading.entry_name.3 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading", align 1
-// CUDA: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { ptr @managed, ptr @.offloading.entry_name.3, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
+// CUDA: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @managed, ptr @.offloading.entry_name.3, i64 4, i64 0, ptr null }, section "cuda_offloading_entries", align 1
// CUDA: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
-// CUDA: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.4, i64 4, i32 2, i32 1 }, section "cuda_offloading_entries", align 1
+// CUDA: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 2, ptr @surf, ptr @.offloading.entry_name.4, i64 4, i64 1, ptr null }, section "cuda_offloading_entries", align 1
// CUDA: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
-// CUDA: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.5, i64 4, i32 3, i32 1 }, section "cuda_offloading_entries", align 1
+// CUDA: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 3, ptr @tex, ptr @.offloading.entry_name.5, i64 4, i64 1, ptr null }, section "cuda_offloading_entries", align 1
//.
// HIP: @managed.managed = global i32 0, align 4
// HIP: @managed = externally_initialized global ptr null
// HIP: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1
-// HIP: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z3foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "hip_offloading_entries", align 1
+// HIP: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 0, ptr @_Z3foov, ptr @.offloading.entry_name, i64 0, i64 0, ptr null }, section "hip_offloading_entries", align 1
// HIP: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1
-// HIP: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z6kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "hip_offloading_entries", align 1
+// HIP: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 0, ptr @_Z6kernelv, ptr @.offloading.entry_name.1, i64 0, i64 0, ptr null }, section "hip_offloading_entries", align 1
// HIP: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1
-// HIP: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "hip_offloading_entries", align 1
-// HIP: @managed.managed.3 = private constant %struct.__managed_var { ptr @managed, ptr @managed.managed }
-// HIP: @.offloading.entry_name.4 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading", align 1
-// HIP: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { ptr @managed.managed.3, ptr @.offloading.entry_name.4, i64 4, i32 1, i32 4 }, section "hip_offloading_entries", align 1
-// HIP: @.offloading.entry_name.5 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
-// HIP: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.5, i64 4, i32 2, i32 1 }, section "hip_offloading_entries", align 1
-// HIP: @.offloading.entry_name.6 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
-// HIP: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.6, i64 4, i32 3, i32 1 }, section "hip_offloading_entries", align 1
+// HIP: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 0, ptr @var, ptr @.offloading.entry_name.2, i64 4, i64 0, ptr null }, section "hip_offloading_entries", align 1
+// HIP: @.offloading.entry_name.3 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading", align 1
+// HIP: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 1, ptr @managed.managed, ptr @.offloading.entry_name.3, i64 4, i64 4, ptr @managed }, section "hip_offloading_entries", align 1
+// HIP: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
+// HIP: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 2, ptr @surf, ptr @.offloading.entry_name.4, i64 4, i64 1, ptr null }, section "hip_offloading_entries", align 1
+// HIP: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
+// HIP: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 3, ptr @tex, ptr @.offloading.entry_name.5, i64 4, i64 1, ptr null }, section "hip_offloading_entries", align 1
//.
// CUDA-COFF: @managed = dso_local global i32 undef, align 4
// CUDA-COFF: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1
-// CUDA-COFF: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1
+// CUDA-COFF: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @_Z18__device_stub__foov, ptr @.offloading.entry_name, i64 0, i64 0, ptr null }, section "cuda_offloading_entries$OE", align 1
// CUDA-COFF: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1
-// CUDA-COFF: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z21__device_stub__kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1
+// CUDA-COFF: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @_Z21__device_stub__kernelv, ptr @.offloading.entry_name.1, i64 0, i64 0, ptr null }, section "cuda_offloading_entries$OE", align 1
// CUDA-COFF: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1
-// CUDA-COFF: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1
+// CUDA-COFF: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @var, ptr @.offloading.entry_name.2, i64 4, i64 0, ptr null }, section "cuda_offloading_entries$OE", align 1
// CUDA-COFF: @.offloading.entry_name.3 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading", align 1
-// CUDA-COFF: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { ptr @managed, ptr @.offloading.entry_name.3, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1
+// CUDA-COFF: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @managed, ptr @.offloading.entry_name.3, i64 4, i64 0, ptr null }, section "cuda_offloading_entries$OE", align 1
// CUDA-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
-// CUDA-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.4, i64 4, i32 2, i32 1 }, section "cuda_offloading_entries$OE", align 1
+// CUDA-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 2, ptr @surf, ptr @.offloading.entry_name.4, i64 4, i64 1, ptr null }, section "cuda_offloading_entries$OE", align 1
// CUDA-COFF: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
-// CUDA-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.5, i64 4, i32 3, i32 1 }, section "cuda_offloading_entries$OE", align 1
+// CUDA-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 3, ptr @tex, ptr @.offloading.entry_name.5, i64 4, i64 1, ptr null }, section "cuda_offloading_entries$OE", align 1
//.
// HIP-COFF: @managed.managed = dso_local global i32 0, align 4
// HIP-COFF: @managed = dso_local externally_initialized global ptr null
// HIP-COFF: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1
-// HIP-COFF: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z3foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1
+// HIP-COFF: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 0, ptr @_Z3foov, ptr @.offloading.entry_name, i64 0, i64 0, ptr null }, section "hip_offloading_entries$OE", align 1
// HIP-COFF: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1
-// HIP-COFF: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z6kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1
+// HIP-COFF: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 0, ptr @_Z6kernelv, ptr @.offloading.entry_name.1, i64 0, i64 0, ptr null }, section "hip_offloading_entries$OE", align 1
// HIP-COFF: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1
-// HIP-COFF: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1
-// HIP-COFF: @managed.managed.3 = private constant %struct.__managed_var { ptr @managed, ptr @managed.managed }
-// HIP-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading", align 1
-// HIP-COFF: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { ptr @managed.managed.3, ptr @.offloading.entry_name.4, i64 4, i32 1, i32 4 }, section "hip_offloading_entries$OE", align 1
-// HIP-COFF: @.offloading.entry_name.5 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
-// HIP-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.5, i64 4, i32 2, i32 1 }, section "hip_offloading_entries$OE", align 1
-// HIP-COFF: @.offloading.entry_name.6 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
-// HIP-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.6, i64 4, i32 3, i32 1 }, section "hip_offloading_entries$OE", align 1
+// HIP-COFF: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 0, ptr @var, ptr @.offloading.entry_name.2, i64 4, i64 0, ptr null }, section "hip_offloading_entries$OE", align 1
+// HIP-COFF: @.offloading.entry_name.3 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading", align 1
+// HIP-COFF: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 1, ptr @managed.managed, ptr @.offloading.entry_name.3, i64 4, i64 4, ptr @managed }, section "hip_offloading_entries$OE", align 1
+// HIP-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
+// HIP-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 2, ptr @surf, ptr @.offloading.entry_name.4, i64 4, i64 1, ptr null }, section "hip_offloading_entries$OE", align 1
+// HIP-COFF: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
+// HIP-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 3, ptr @tex, ptr @.offloading.entry_name.5, i64 4, i64 1, ptr null }, section "hip_offloading_entries$OE", align 1
//.
// CUDA-LABEL: @_Z18__device_stub__foov(
// CUDA-NEXT: entry:
diff --git a/clang/test/Driver/linker-wrapper-image.c b/clang/test/Driver/linker-wrapper-image.c
index 7f96f629e91272..197ba4c31f6b41 100644
--- a/clang/test/Driver/linker-wrapper-image.c
+++ b/clang/test/Driver/linker-wrapper-image.c
@@ -73,45 +73,54 @@
// CUDA-NEXT: %1 = call i32 @atexit(ptr @.cuda.fatbin_unreg)
// CUDA-NEXT: ret void
// CUDA-NEXT: }
-
+//
// CUDA: define internal void @.cuda.fatbin_unreg() section ".text.startup" {
// CUDA-NEXT: entry:
// CUDA-NEXT: %0 = load ptr, ptr @.cuda.binary_handle, align 8
// CUDA-NEXT: call void @__cudaUnregisterFatBinary(ptr %0)
// CUDA-NEXT: ret void
// CUDA-NEXT: }
-
+//
// CUDA: define internal void @.cuda.globals_reg(ptr %0) section ".text.startup" {
// CUDA-NEXT: entry:
// CUDA-NEXT: %1 = icmp ne ptr @__start_cuda_offloading_entries, @__stop_cuda_offloading_entries
// CUDA-NEXT: br i1 %1, label %while.entry, label %while.end
-
+//
// CUDA: while.entry:
-// CUDA-NEXT: %entry1 = phi ptr [ @__start_cuda_offloading_entries, %entry ], [ %13, %if.end ]
-// CUDA-NEXT: %2 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 0
+// CUDA-NEXT: %entry1 = phi ptr [ @__start_cuda_offloading_entries, %entry ], [ %16, %if.end ]
+// CUDA-NEXT: %2 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 4
// CUDA-NEXT: %addr = load ptr, ptr %2, align 8
-// CUDA-NEXT: %3 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 1
-// CUDA-NEXT: %name = load ptr, ptr %3, align 8
-// CUDA-NEXT: %4 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 2
-// CUDA-NEXT: %size = load i64, ptr %...
[truncated]
|
4327291
to
27ddf80
Compare
27ddf80
to
d05511c
Compare
d05511c
to
df4e530
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks like some changes worth in the release note?
b6d76db
to
c66cee0
Compare
Done. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The OpenMP part looks good to me but I wonder how it's gonna affect HIP and CUDA (in terms of their runtime).
@@ -93,7 +93,8 @@ int main(int argc, char **argv) { | |||
void *BAllocStart = reinterpret_cast<void *>( | |||
JsonKernelInfo->getAsObject()->getInteger("BumpAllocVAStart").value()); | |||
|
|||
llvm::offloading::EntryTy KernelEntry = {nullptr, nullptr, 0, 0, 0}; | |||
llvm::offloading::EntryTy KernelEntry = { | |||
static_cast<uint64_t>(-1), 0, 0, 0, nullptr, nullptr, 0, 0, nullptr}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
how about ~0U
?
It doesn't affect HIP or CUDA runtime at all since the registration code is generated outside of the runtime. ABI doesn't even matter there since it's baked-in to the executable unlike OpenMP which goes through a .so. |
✅ With the latest revision this PR passed the C/C++ code formatter. |
Summary: The previous offloading entry type did not fit the current use-cases very well. This widens it and adds a version to prevent further annoyances. It also includes the kind to better sort who's using it. The first 64-bytes are reserved as zero so the OpenMP runtime can detect the old format for binary compatibilitry.
563fd14
to
5870548
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
@@ -25,3 +25,5 @@ Device Runtime | |||
- Changed the OpenMP DeviceRTL to use 'generic' IR. The | |||
``LIBOMPTARGET_DEVICE_ARCHITECTURES`` CMake argument is now unused and will | |||
always build support for AMDGPU and NVPTX targets. | |||
- Updated the offloading entry format but retained backwards compatibility with |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I actually meant LLVM release note. :-) I'm not sure if that's worth it. Your call.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think it's really relevant to LLVM. It's statically used everywhere else so it's just baked in. For libomptarget it's an actual ABI thing.
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/73/builds/12582 Here is the relevant piece of the build log for the reference
|
llvm/llvm-project#124018 has switched to new offload entry, we need to sync with all runtimes before switching, use legacy offload entry for now.
llvm/llvm-project#124018 change the type of offload struct. This PR makes SYCL RT compatible with the new format, while keeping backwards compatibility with the older version (till next ABI-break window). --------- Co-authored-by: Sergey Semenov <[email protected]>
…#17109) llvm/llvm-project#124018 change the type of offload struct. This PR makes SYCL RT compatible with the new format, while keeping backwards compatibility with the older version (till next ABI-break window). --------- Co-authored-by: Sergey Semenov <[email protected]>
Summary:
The previous offloading entry type did not fit the current use-cases
very well. This widens it and adds a version to prevent further
annoyances. It also includes the kind to better sort who's using it.
The first 64-bytes are reserved as zero so the OpenMP runtime can detect
the old format for binary compatibilitry.