Skip to content

Commit 70a16b9

Browse files
authored
[HIP] Support managed variables using the new driver (#123437)
Summary: Previously, managed variables didn't work in rdc mode using the new driver because we just didn't register them. This was previously ignored because we didn't have enough space in the current struct format. This patch amends that by just emitting a struct pair for the two variables and using the single pointer. In the future, a more extensible entry format would be nice, but that can be done later.
1 parent a7a8694 commit 70a16b9

File tree

6 files changed

+112
-58
lines changed

6 files changed

+112
-58
lines changed

clang/lib/CodeGen/CGCUDANV.cpp

+28-6
Original file line numberDiff line numberDiff line change
@@ -1221,12 +1221,34 @@ void CGNVCUDARuntime::createOffloadingEntries() {
12211221
? static_cast<int32_t>(llvm::offloading::OffloadGlobalNormalized)
12221222
: 0);
12231223
if (I.Flags.getKind() == DeviceVarFlags::Variable) {
1224-
llvm::offloading::emitOffloadingEntry(
1225-
M, I.Var, getDeviceSideName(I.D), VarSize,
1226-
(I.Flags.isManaged() ? llvm::offloading::OffloadGlobalManagedEntry
1227-
: llvm::offloading::OffloadGlobalEntry) |
1228-
Flags,
1229-
/*Data=*/0, Section);
1224+
// TODO: Update the offloading entries struct to avoid this indirection.
1225+
if (I.Flags.isManaged()) {
1226+
assert(I.Var->getName().ends_with(".managed") &&
1227+
"HIP managed variables not transformed");
1228+
1229+
// Create a struct to contain the two variables.
1230+
auto *ManagedVar = M.getNamedGlobal(
1231+
I.Var->getName().drop_back(StringRef(".managed").size()));
1232+
llvm::Constant *StructData[] = {ManagedVar, I.Var};
1233+
llvm::Constant *Initializer = llvm::ConstantStruct::get(
1234+
llvm::offloading::getManagedTy(M), StructData);
1235+
auto *Struct = new llvm::GlobalVariable(
1236+
M, llvm::offloading::getManagedTy(M),
1237+
/*IsConstant=*/true, llvm::GlobalValue::PrivateLinkage, Initializer,
1238+
I.Var->getName(), /*InsertBefore=*/nullptr,
1239+
llvm::GlobalVariable::NotThreadLocal,
1240+
M.getDataLayout().getDefaultGlobalsAddressSpace());
1241+
1242+
llvm::offloading::emitOffloadingEntry(
1243+
M, Struct, getDeviceSideName(I.D), VarSize,
1244+
llvm::offloading::OffloadGlobalManagedEntry | Flags,
1245+
/*Data=*/static_cast<uint32_t>(I.Var->getAlignment()), Section);
1246+
} else {
1247+
llvm::offloading::emitOffloadingEntry(
1248+
M, I.Var, getDeviceSideName(I.D), VarSize,
1249+
llvm::offloading::OffloadGlobalEntry | Flags,
1250+
/*Data=*/0, Section);
1251+
}
12301252
} else if (I.Flags.getKind() == DeviceVarFlags::Surface) {
12311253
llvm::offloading::emitOffloadingEntry(
12321254
M, I.Var, getDeviceSideName(I.D), VarSize,

clang/test/CodeGenCUDA/offloading-entries.cu

+36-42
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --global-value-regex ".offloading.entry.*"
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --global-value-regex ".offloading.entry.*" "managed.*"
22
// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fgpu-rdc \
33
// RUN: --offload-new-driver -emit-llvm -o - -x cuda %s | FileCheck \
44
// RUN: --check-prefix=CUDA %s
@@ -14,50 +14,68 @@
1414

1515
#include "Inputs/cuda.h"
1616

17+
#define __managed__ __attribute__((managed))
18+
1719
//.
20+
// CUDA: @managed = global i32 undef, align 4
1821
// CUDA: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1
1922
// CUDA: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
2023
// CUDA: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1
2124
// CUDA: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z21__device_stub__kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
2225
// CUDA: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1
2326
// CUDA: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
24-
// CUDA: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
25-
// CUDA: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.3, i64 4, i32 2, i32 1 }, section "cuda_offloading_entries", align 1
26-
// CUDA: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
27-
// CUDA: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.4, i64 4, i32 3, i32 1 }, section "cuda_offloading_entries", align 1
27+
// CUDA: @.offloading.entry_name.3 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading", align 1
28+
// CUDA: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { ptr @managed, ptr @.offloading.entry_name.3, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
29+
// CUDA: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
30+
// CUDA: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.4, i64 4, i32 2, i32 1 }, section "cuda_offloading_entries", align 1
31+
// CUDA: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
32+
// CUDA: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.5, i64 4, i32 3, i32 1 }, section "cuda_offloading_entries", align 1
2833
//.
34+
// HIP: @managed.managed = global i32 0, align 4
35+
// HIP: @managed = externally_initialized global ptr null
2936
// HIP: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1
3037
// HIP: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z3foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "hip_offloading_entries", align 1
3138
// HIP: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1
3239
// HIP: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z6kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "hip_offloading_entries", align 1
3340
// HIP: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1
3441
// HIP: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "hip_offloading_entries", align 1
35-
// HIP: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
36-
// HIP: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.3, i64 4, i32 2, i32 1 }, section "hip_offloading_entries", align 1
37-
// HIP: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
38-
// HIP: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.4, i64 4, i32 3, i32 1 }, section "hip_offloading_entries", align 1
42+
// HIP: @managed.managed.3 = private constant %struct.__managed_var { ptr @managed, ptr @managed.managed }
43+
// HIP: @.offloading.entry_name.4 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading", align 1
44+
// HIP: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { ptr @managed.managed.3, ptr @.offloading.entry_name.4, i64 4, i32 1, i32 4 }, section "hip_offloading_entries", align 1
45+
// HIP: @.offloading.entry_name.5 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
46+
// HIP: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.5, i64 4, i32 2, i32 1 }, section "hip_offloading_entries", align 1
47+
// HIP: @.offloading.entry_name.6 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
48+
// HIP: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.6, i64 4, i32 3, i32 1 }, section "hip_offloading_entries", align 1
3949
//.
50+
// CUDA-COFF: @managed = dso_local global i32 undef, align 4
4051
// CUDA-COFF: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1
4152
// CUDA-COFF: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1
4253
// CUDA-COFF: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1
4354
// CUDA-COFF: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z21__device_stub__kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1
4455
// CUDA-COFF: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1
4556
// CUDA-COFF: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1
46-
// CUDA-COFF: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
47-
// CUDA-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.3, i64 4, i32 2, i32 1 }, section "cuda_offloading_entries$OE", align 1
48-
// CUDA-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
49-
// CUDA-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.4, i64 4, i32 3, i32 1 }, section "cuda_offloading_entries$OE", align 1
57+
// CUDA-COFF: @.offloading.entry_name.3 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading", align 1
58+
// CUDA-COFF: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { ptr @managed, ptr @.offloading.entry_name.3, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1
59+
// CUDA-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
60+
// CUDA-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.4, i64 4, i32 2, i32 1 }, section "cuda_offloading_entries$OE", align 1
61+
// CUDA-COFF: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
62+
// CUDA-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.5, i64 4, i32 3, i32 1 }, section "cuda_offloading_entries$OE", align 1
5063
//.
64+
// HIP-COFF: @managed.managed = dso_local global i32 0, align 4
65+
// HIP-COFF: @managed = dso_local externally_initialized global ptr null
5166
// HIP-COFF: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1
5267
// HIP-COFF: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z3foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1
5368
// HIP-COFF: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1
5469
// HIP-COFF: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z6kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1
5570
// HIP-COFF: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1
5671
// HIP-COFF: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1
57-
// HIP-COFF: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
58-
// HIP-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.3, i64 4, i32 2, i32 1 }, section "hip_offloading_entries$OE", align 1
59-
// HIP-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
60-
// HIP-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.4, i64 4, i32 3, i32 1 }, section "hip_offloading_entries$OE", align 1
72+
// HIP-COFF: @managed.managed.3 = private constant %struct.__managed_var { ptr @managed, ptr @managed.managed }
73+
// HIP-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading", align 1
74+
// HIP-COFF: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { ptr @managed.managed.3, ptr @.offloading.entry_name.4, i64 4, i32 1, i32 4 }, section "hip_offloading_entries$OE", align 1
75+
// HIP-COFF: @.offloading.entry_name.5 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
76+
// HIP-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.5, i64 4, i32 2, i32 1 }, section "hip_offloading_entries$OE", align 1
77+
// HIP-COFF: @.offloading.entry_name.6 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
78+
// HIP-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.6, i64 4, i32 3, i32 1 }, section "hip_offloading_entries$OE", align 1
6179
//.
6280
// CUDA-LABEL: @_Z18__device_stub__foov(
6381
// CUDA-NEXT: entry:
@@ -91,6 +109,7 @@ __global__ void foo() {}
91109
__device__ int var = 1;
92110
const __device__ int constant = 1;
93111
extern __device__ int external;
112+
__device__ __managed__ int managed = 0;
94113

95114
// CUDA-LABEL: @_Z21__device_stub__kernelv(
96115
// CUDA-NEXT: entry:
@@ -137,28 +156,3 @@ template <typename T, int dim = 1, int mode = 0>
137156
struct __attribute__((device_builtin_texture_type)) texture : public textureReference {};
138157

139158
texture<void> tex;
140-
//.
141-
// CUDA: [[META0:![0-9]+]] = !{ptr @.offloading.entry_name}
142-
// CUDA: [[META1:![0-9]+]] = !{ptr @.offloading.entry_name.1}
143-
// CUDA: [[META2:![0-9]+]] = !{ptr @.offloading.entry_name.2}
144-
// CUDA: [[META3:![0-9]+]] = !{ptr @.offloading.entry_name.3}
145-
// CUDA: [[META4:![0-9]+]] = !{ptr @.offloading.entry_name.4}
146-
//.
147-
// HIP: [[META0:![0-9]+]] = !{ptr @.offloading.entry_name}
148-
// HIP: [[META1:![0-9]+]] = !{ptr @.offloading.entry_name.1}
149-
// HIP: [[META2:![0-9]+]] = !{ptr @.offloading.entry_name.2}
150-
// HIP: [[META3:![0-9]+]] = !{ptr @.offloading.entry_name.3}
151-
// HIP: [[META4:![0-9]+]] = !{ptr @.offloading.entry_name.4}
152-
//.
153-
// CUDA-COFF: [[META0:![0-9]+]] = !{ptr @.offloading.entry_name}
154-
// CUDA-COFF: [[META1:![0-9]+]] = !{ptr @.offloading.entry_name.1}
155-
// CUDA-COFF: [[META2:![0-9]+]] = !{ptr @.offloading.entry_name.2}
156-
// CUDA-COFF: [[META3:![0-9]+]] = !{ptr @.offloading.entry_name.3}
157-
// CUDA-COFF: [[META4:![0-9]+]] = !{ptr @.offloading.entry_name.4}
158-
//.
159-
// HIP-COFF: [[META0:![0-9]+]] = !{ptr @.offloading.entry_name}
160-
// HIP-COFF: [[META1:![0-9]+]] = !{ptr @.offloading.entry_name.1}
161-
// HIP-COFF: [[META2:![0-9]+]] = !{ptr @.offloading.entry_name.2}
162-
// HIP-COFF: [[META3:![0-9]+]] = !{ptr @.offloading.entry_name.3}
163-
// HIP-COFF: [[META4:![0-9]+]] = !{ptr @.offloading.entry_name.4}
164-
//.

clang/test/Driver/linker-wrapper-image.c

+18-10
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@
8787
// CUDA-NEXT: br i1 %1, label %while.entry, label %while.end
8888

8989
// CUDA: while.entry:
90-
// CUDA-NEXT: %entry1 = phi ptr [ @__start_cuda_offloading_entries, %entry ], [ %12, %if.end ]
90+
// CUDA-NEXT: %entry1 = phi ptr [ @__start_cuda_offloading_entries, %entry ], [ %13, %if.end ]
9191
// CUDA-NEXT: %2 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 0
9292
// CUDA-NEXT: %addr = load ptr, ptr %2, align 8
9393
// CUDA-NEXT: %3 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 1
@@ -125,7 +125,11 @@
125125
// CUDA-NEXT: br label %if.end
126126

127127
// CUDA: sw.managed:
128-
// CUDA-NEXT: br label %if.end
128+
// CUDA-NEXT: %managed.addr = load ptr, ptr %addr, align 8
129+
// CUDA-NEXT: %12 = getelementptr inbounds ptr, ptr %addr, i64 1
130+
// CUDA-NEXT: %managed.addr2 = load ptr, ptr %12, align 8
131+
// CUDA-NEXT: call void @__cudaRegisterManagedVar(ptr %0, ptr %managed.addr, ptr %managed.addr2, ptr %name, i64 %size, i32 %textype)
132+
// CUDA-NEXT: br label %if.end
129133

130134
// CUDA: sw.surface:
131135
// CUDA-NEXT: br label %if.end
@@ -134,9 +138,9 @@
134138
// CUDA-NEXT: br label %if.end
135139

136140
// CUDA: if.end:
137-
// CUDA-NEXT: %12 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 1
138-
// CUDA-NEXT: %13 = icmp eq ptr %12, @__stop_cuda_offloading_entries
139-
// CUDA-NEXT: br i1 %13, label %while.end, label %while.entry
141+
// CUDA-NEXT: %13 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 1
142+
// CUDA-NEXT: %14 = icmp eq ptr %13, @__stop_cuda_offloading_entries
143+
// CUDA-NEXT: br i1 %14, label %while.end, label %while.entry
140144

141145
// CUDA: while.end:
142146
// CUDA-NEXT: ret void
@@ -187,7 +191,7 @@
187191
// HIP-NEXT: br i1 %1, label %while.entry, label %while.end
188192

189193
// HIP: while.entry:
190-
// HIP-NEXT: %entry1 = phi ptr [ @__start_hip_offloading_entries, %entry ], [ %12, %if.end ]
194+
// HIP-NEXT: %entry1 = phi ptr [ @__start_hip_offloading_entries, %entry ], [ %13, %if.end ]
191195
// HIP-NEXT: %2 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 0
192196
// HIP-NEXT: %addr = load ptr, ptr %2, align 8
193197
// HIP-NEXT: %3 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 1
@@ -225,7 +229,11 @@
225229
// HIP-NEXT: br label %if.end
226230

227231
// HIP: sw.managed:
228-
// HIP-NEXT: br label %if.end
232+
// HIP-NEXT: %managed.addr = load ptr, ptr %addr, align 8
233+
// HIP-NEXT: %12 = getelementptr inbounds ptr, ptr %addr, i64 1
234+
// HIP-NEXT: %managed.addr2 = load ptr, ptr %12, align 8
235+
// HIP-NEXT: call void @__hipRegisterManagedVar(ptr %0, ptr %managed.addr, ptr %managed.addr2, ptr %name, i64 %size, i32 %textype)
236+
// HIP-NEXT: br label %if.end
229237

230238
// HIP: sw.surface:
231239
// HIP-NEXT: call void @__hipRegisterSurface(ptr %0, ptr %addr, ptr %name, ptr %name, i32 %textype, i32 %extern)
@@ -236,9 +244,9 @@
236244
// HIP-NEXT: br label %if.end
237245

238246
// HIP: if.end:
239-
// HIP-NEXT: %12 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 1
240-
// HIP-NEXT: %13 = icmp eq ptr %12, @__stop_hip_offloading_entries
241-
// HIP-NEXT: br i1 %13, label %while.end, label %while.entry
247+
// HIP-NEXT: %13 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 1
248+
// HIP-NEXT: %14 = icmp eq ptr %13, @__stop_hip_offloading_entries
249+
// HIP-NEXT: br i1 %14, label %while.end, label %while.entry
242250

243251
// HIP: while.end:
244252
// HIP-NEXT: ret void

llvm/include/llvm/Frontend/Offloading/Utility.h

+4
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,10 @@ enum OffloadEntryKindFlag : uint32_t {
5555
/// globals that will be registered with the offloading runtime.
5656
StructType *getEntryTy(Module &M);
5757

58+
/// Returns the struct type we store the two pointers for CUDA / HIP managed
59+
/// variables in. Necessary until we widen the offload entry struct.
60+
StructType *getManagedTy(Module &M);
61+
5862
/// Create an offloading section struct used to register this global at
5963
/// runtime.
6064
///

0 commit comments

Comments
 (0)