Skip to content

Commit 8e09ffa

Browse files
[SYCL] Make SYCL RT compatible with the new offload entry type (#17109)
llvm/llvm-project#124018 change the type of offload struct. This PR makes SYCL RT compatible with the new format, while keeping backwards compatibility with the older version (till next ABI-break window). --------- Co-authored-by: Sergey Semenov <[email protected]>
1 parent cd6a94c commit 8e09ffa

File tree

7 files changed

+98
-41
lines changed

7 files changed

+98
-41
lines changed

clang/test/Driver/sycl-linker-wrapper-image.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ int main() {
3737

3838
// CHECK-DAG: %_pi_device_binary_property_struct = type { ptr, ptr, i32, i64 }
3939
// CHECK-DAG: %_pi_device_binary_property_set_struct = type { ptr, ptr, ptr }
40-
// CHECK-DAG: %struct.__tgt_offload_entry = type { ptr, ptr, i64, i32, i32 }
40+
// CHECK-DAG: %struct.__tgt_offload_entry = type { i64, i16, i16, i32, ptr, ptr, i64, i64, ptr }
4141
// CHECK-DAG: %__sycl.tgt_device_image = type { i16, i8, i8, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr }
4242
// CHECK-DAG: %__sycl.tgt_bin_desc = type { i16, i16, ptr, ptr, ptr }
4343

@@ -55,7 +55,7 @@ int main() {
5555
// CHECK-DAG: @__sycl_offload_prop_sets_arr.5 = internal constant [3 x %_pi_device_binary_property_set_struct] [%_pi_device_binary_property_set_struct { ptr @SYCL_PropSetName, ptr @__sycl_offload_prop_sets_arr, ptr getelementptr ([1 x %_pi_device_binary_property_struct], ptr @__sycl_offload_prop_sets_arr, i64 0, i64 1) }, %_pi_device_binary_property_set_struct { ptr @SYCL_PropSetName.3, ptr @__sycl_offload_prop_sets_arr.2, ptr getelementptr ([1 x %_pi_device_binary_property_struct], ptr @__sycl_offload_prop_sets_arr.2, i64 0, i64 1) }, %_pi_device_binary_property_set_struct { ptr @SYCL_PropSetName.4, ptr null, ptr null }]
5656
// CHECK-DAG: @.sycl_offloading.0.data = internal unnamed_addr constant [912 x i8]
5757
// CHECK-DAG: @__sycl_offload_entry_name = internal unnamed_addr constant [25 x i8] c"_ZTSZ4mainE11fake_kernel\00"
58-
// CHECK-DAG: @__sycl_offload_entries_arr = internal constant [1 x %struct.__tgt_offload_entry] [%struct.__tgt_offload_entry { ptr null, ptr @__sycl_offload_entry_name, i64 0, i32 0, i32 0 }]
58+
// CHECK-DAG: @__sycl_offload_entries_arr = internal constant [1 x %struct.__tgt_offload_entry] [%struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 0, ptr null, ptr @__sycl_offload_entry_name, i64 0, i64 0, ptr null }]
5959
// CHECK-DAG: @.sycl_offloading.0.info = internal local_unnamed_addr constant [2 x i64] [i64 ptrtoint (ptr @.sycl_offloading.0.data to i64), i64 912], section ".tgtimg", align 16
6060
// CHECK-DAG: @llvm.used = appending global [1 x ptr] [ptr @.sycl_offloading.0.info], section "llvm.metadata"
6161
// CHECK-DAG: @.sycl_offloading.device_images = internal unnamed_addr constant [1 x %__sycl.tgt_device_image] [%__sycl.tgt_device_image { i16 2, i8 4, i8 0, ptr @.sycl_offloading.target.0, ptr @.sycl_offloading.opts.compile.0, ptr @.sycl_offloading.opts.link.0, ptr null, ptr null, ptr @.sycl_offloading.0.data, ptr getelementptr ([912 x i8], ptr @.sycl_offloading.0.data, i64 0, i64 912), ptr @__sycl_offload_entries_arr, ptr getelementptr ([1 x %struct.__tgt_offload_entry], ptr @__sycl_offload_entries_arr, i64 0, i64 1), ptr @__sycl_offload_prop_sets_arr.5, ptr getelementptr ([3 x %_pi_device_binary_property_set_struct], ptr @__sycl_offload_prop_sets_arr.5, i64 0, i64 3) }]

llvm/lib/Frontend/Offloading/SYCLOffloadWrapper.cpp

+16-18
Original file line numberDiff line numberDiff line change
@@ -62,18 +62,6 @@ int8_t binaryImageFormatToInt8(SYCLBinaryImageFormat Format) {
6262
}
6363
}
6464

65-
StructType* getLegacyOffloadEntryTy(Module &M) {
66-
LLVMContext &C = M.getContext();
67-
StructType *EntryTy =
68-
StructType::getTypeByName(C, "struct.__tgt_offload_entry");
69-
if (!EntryTy)
70-
EntryTy = StructType::create(
71-
"struct.__tgt_offload_entry", PointerType::getUnqual(C),
72-
PointerType::getUnqual(C), M.getDataLayout().getIntPtrType(C),
73-
Type::getInt32Ty(C), Type::getInt32Ty(C));
74-
return EntryTy;
75-
}
76-
7765
/// Wrapper helper class that creates all LLVM IRs wrapping given images.
7866
/// Note: All created structures, "_pi_device_*", "__sycl_*" and "__tgt*" names
7967
/// in this implementation are aligned with "sycl/include/sycl/detail/pi.h".
@@ -95,7 +83,7 @@ struct Wrapper {
9583

9684
SyclPropTy = getSyclPropTy();
9785
SyclPropSetTy = getSyclPropSetTy();
98-
EntryTy = getLegacyOffloadEntryTy(M);
86+
EntryTy = offloading::getEntryTy(M);
9987
SyclDeviceImageTy = getSyclDeviceImageTy();
10088
SyclBinDescTy = getSyclBinDescTy();
10189
}
@@ -399,16 +387,26 @@ struct Wrapper {
399387
return std::pair<Constant *, Constant *>(NullPtr, NullPtr);
400388
}
401389

402-
auto *Zero = ConstantInt::get(getSizeTTy(), 0);
390+
auto *I64Zero = ConstantInt::get(Type::getInt64Ty(C), 0);
403391
auto *I32Zero = ConstantInt::get(Type::getInt32Ty(C), 0);
404392
auto *NullPtr = Constant::getNullValue(PointerType::getUnqual(C));
405393

406394
SmallVector<Constant *> EntriesInits;
407395
std::unique_ptr<MemoryBuffer> MB = MemoryBuffer::getMemBuffer(Entries);
408-
for (line_iterator LI(*MB); !LI.is_at_eof(); ++LI)
409-
EntriesInits.push_back(ConstantStruct::get(
410-
EntryTy, NullPtr, addStringToModule(*LI, "__sycl_offload_entry_name"),
411-
Zero, I32Zero, I32Zero));
396+
for (line_iterator LI(*MB); !LI.is_at_eof(); ++LI) {
397+
Constant *EntryData[] = {
398+
ConstantExpr::getNullValue(Type::getInt64Ty(C)),
399+
ConstantInt::get(Type::getInt16Ty(C), 1),
400+
ConstantInt::get(Type::getInt16Ty(C), object::OffloadKind::OFK_SYCL),
401+
I32Zero,
402+
NullPtr,
403+
addStringToModule(*LI, "__sycl_offload_entry_name"),
404+
I64Zero,
405+
I64Zero,
406+
NullPtr};
407+
408+
EntriesInits.push_back(ConstantStruct::get(EntryTy, EntryData));
409+
}
412410

413411
auto *Arr = ConstantArray::get(ArrayType::get(EntryTy, EntriesInits.size()),
414412
EntriesInits);

sycl/source/detail/compiler.hpp

+57
Original file line numberDiff line numberDiff line change
@@ -78,13 +78,70 @@
7878

7979
#define __SYCL_PROGRAM_METADATA_TAG_NEED_FINALIZATION "Requires finalization"
8080

81+
// New entry type after
82+
// https://github.com/llvm/llvm-project/pull/124018
83+
// This is a replica of the EntryTy data structure in
84+
// llvm/include/llvm/Frontend/Offloading/Utility.h.
85+
struct _sycl_offload_entry_struct_new {
86+
/// Reserved bytes used to detect an older version of the struct, always zero.
87+
uint64_t Reserved;
88+
/// The current version of the struct for runtime forward compatibility.
89+
uint16_t Version;
90+
/// The expected consumer of this entry, e.g. CUDA or OpenMP.
91+
uint16_t Kind;
92+
/// Flags associated with the global.
93+
uint32_t Flags;
94+
/// The address of the global to be registered by the runtime.
95+
void *Address;
96+
/// The name of the symbol in the device image.
97+
char *SymbolName;
98+
/// The number of bytes the symbol takes.
99+
uint64_t Size;
100+
/// Extra generic data used to register this entry.
101+
uint64_t Data;
102+
/// An extra pointer, usually null.
103+
void *AuxAddr;
104+
};
105+
using sycl_offload_entry_new = _sycl_offload_entry_struct_new *;
106+
81107
// Entry type, matches OpenMP for compatibility
82108
struct _sycl_offload_entry_struct {
83109
void *addr;
84110
char *name;
85111
size_t size;
86112
int32_t flags;
87113
int32_t reserved;
114+
115+
inline bool IsNewOffloadEntryType() {
116+
// Assume this is the new version of the struct.
117+
auto newStruct = reinterpret_cast<sycl_offload_entry_new>(this);
118+
119+
// Check if first 64 bits is equal to 0, next 16 bits is equal to 1, next 16
120+
// bits is equal to 4 (OK_SYCL), and check if Flags are zero. If all these
121+
// conditions are met, then this is a newer version of the struct.
122+
// We can not just rely on checking the first 64 bits, because even for the
123+
// older version of the struct, the first 64 bits (void* addr) are zero.
124+
return newStruct->Reserved == 0 && newStruct->Version == 1 &&
125+
newStruct->Kind == 4 && newStruct->Flags == 0;
126+
}
127+
128+
// Name is the only field that's used in SYCL.
129+
inline char *GetName() {
130+
if (IsNewOffloadEntryType())
131+
return reinterpret_cast<sycl_offload_entry_new>(this)->SymbolName;
132+
133+
return name;
134+
}
135+
136+
// Increment the pointer to the next entry. A mix of old and new offload entry
137+
// types is not supported.
138+
inline _sycl_offload_entry_struct *Increment() {
139+
if (IsNewOffloadEntryType())
140+
return reinterpret_cast<_sycl_offload_entry_struct *>(
141+
reinterpret_cast<sycl_offload_entry_new>(this) + 1);
142+
143+
return this + 1;
144+
}
88145
};
89146
using sycl_offload_entry = _sycl_offload_entry_struct *;
90147

sycl/source/detail/device_binary_image.cpp

+3-2
Original file line numberDiff line numberDiff line change
@@ -120,9 +120,10 @@ void RTDeviceBinaryImage::print() const {
120120
std::cerr << " Link options : "
121121
<< (Bin->LinkOptions ? Bin->LinkOptions : "NULL") << "\n";
122122
std::cerr << " Entries : ";
123+
123124
for (sycl_offload_entry EntriesIt = Bin->EntriesBegin;
124-
EntriesIt != Bin->EntriesEnd; ++EntriesIt)
125-
std::cerr << EntriesIt->name << " ";
125+
EntriesIt != Bin->EntriesEnd; EntriesIt = EntriesIt->Increment())
126+
std::cerr << EntriesIt->GetName() << " ";
126127
std::cerr << "\n";
127128
std::cerr << " Properties [" << Bin->PropertySetsBegin << "-"
128129
<< Bin->PropertySetsEnd << "]:\n";

sycl/source/detail/persistent_device_code_cache.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,8 @@ getSortedImages(const std::vector<const RTDeviceBinaryImage *> &Imgs) {
103103
[](const RTDeviceBinaryImage *A, const RTDeviceBinaryImage *B) {
104104
// All entry names are unique among these images, so comparing the
105105
// first ones is enough.
106-
return std::strcmp(A->getRawData().EntriesBegin->name,
107-
B->getRawData().EntriesBegin->name) < 0;
106+
return std::strcmp(A->getRawData().EntriesBegin->GetName(),
107+
B->getRawData().EntriesBegin->GetName()) < 0;
108108
});
109109
return SortedImgs;
110110
}

sycl/source/detail/program_manager/program_manager.cpp

+16-15
Original file line numberDiff line numberDiff line change
@@ -1867,33 +1867,34 @@ void ProgramManager::addImages(sycl_device_binaries DeviceBinary) {
18671867
m_BinImg2KernelIDs[Img.get()].reset(new std::vector<kernel_id>);
18681868

18691869
for (sycl_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE;
1870-
++EntriesIt) {
1870+
EntriesIt = EntriesIt->Increment()) {
1871+
1872+
auto name = EntriesIt->GetName();
18711873

18721874
// Skip creating unique kernel ID if it is a service kernel.
18731875
// SYCL service kernels are identified by having
18741876
// __sycl_service_kernel__ in the mangled name, primarily as part of
18751877
// the namespace of the name type.
1876-
if (std::strstr(EntriesIt->name, "__sycl_service_kernel__")) {
1877-
m_ServiceKernels.insert(std::make_pair(EntriesIt->name, Img.get()));
1878+
if (std::strstr(name, "__sycl_service_kernel__")) {
1879+
m_ServiceKernels.insert(std::make_pair(name, Img.get()));
18781880
continue;
18791881
}
18801882

18811883
// Skip creating unique kernel ID if it is an exported device
18821884
// function. Exported device functions appear in the offload entries
18831885
// among kernels, but are identifiable by being listed in properties.
1884-
if (m_ExportedSymbolImages.find(EntriesIt->name) !=
1885-
m_ExportedSymbolImages.end())
1886+
if (m_ExportedSymbolImages.find(name) != m_ExportedSymbolImages.end())
18861887
continue;
18871888

18881889
// ... and create a unique kernel ID for the entry
1889-
auto It = m_KernelName2KernelIDs.find(EntriesIt->name);
1890+
auto It = m_KernelName2KernelIDs.find(name);
18901891
if (It == m_KernelName2KernelIDs.end()) {
18911892
std::shared_ptr<detail::kernel_id_impl> KernelIDImpl =
1892-
std::make_shared<detail::kernel_id_impl>(EntriesIt->name);
1893+
std::make_shared<detail::kernel_id_impl>(name);
18931894
sycl::kernel_id KernelID =
18941895
detail::createSyclObjFromImpl<sycl::kernel_id>(KernelIDImpl);
18951896

1896-
It = m_KernelName2KernelIDs.emplace_hint(It, EntriesIt->name, KernelID);
1897+
It = m_KernelName2KernelIDs.emplace_hint(It, name, KernelID);
18971898
}
18981899
m_KernelIDs2BinImage.insert(std::make_pair(It->second, Img.get()));
18991900
m_BinImg2KernelIDs[Img.get()]->push_back(It->second);
@@ -2020,25 +2021,25 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) {
20202021

20212022
// Unmap the unique kernel IDs for the offload entries
20222023
for (sycl_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE;
2023-
++EntriesIt) {
2024+
EntriesIt = EntriesIt->Increment()) {
20242025

20252026
// Drop entry for service kernel
2026-
if (std::strstr(EntriesIt->name, "__sycl_service_kernel__")) {
2027-
m_ServiceKernels.erase(EntriesIt->name);
2027+
if (std::strstr(EntriesIt->GetName(), "__sycl_service_kernel__")) {
2028+
m_ServiceKernels.erase(EntriesIt->GetName());
20282029
continue;
20292030
}
20302031

20312032
// Exported device functions won't have a kernel ID
2032-
if (m_ExportedSymbolImages.find(EntriesIt->name) !=
2033+
if (m_ExportedSymbolImages.find(EntriesIt->GetName()) !=
20332034
m_ExportedSymbolImages.end()) {
20342035
continue;
20352036
}
20362037

20372038
// remove everything associated with this KernelName
2038-
m_KernelUsesAssert.erase(EntriesIt->name);
2039-
m_KernelImplicitLocalArgPos.erase(EntriesIt->name);
2039+
m_KernelUsesAssert.erase(EntriesIt->GetName());
2040+
m_KernelImplicitLocalArgPos.erase(EntriesIt->GetName());
20402041

2041-
if (auto It = m_KernelName2KernelIDs.find(EntriesIt->name);
2042+
if (auto It = m_KernelName2KernelIDs.find(EntriesIt->GetName());
20422043
It != m_KernelName2KernelIDs.end()) {
20432044
m_KernelName2KernelIDs.erase(It);
20442045
m_KernelIDs2BinImage.erase(It->second);

sycl/unittests/kernel-and-program/PersistentDeviceCodeCache.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -336,8 +336,8 @@ TEST_P(PersistentDeviceCodeCache, MultipleImages) {
336336
std::sort(Imgs.begin(), Imgs.end(),
337337
[](const detail::RTDeviceBinaryImage *A,
338338
const detail::RTDeviceBinaryImage *B) {
339-
return std::strcmp(A->getRawData().EntriesBegin->name,
340-
B->getRawData().EntriesBegin->name) < 0;
339+
return std::strcmp(A->getRawData().EntriesBegin->GetName(),
340+
B->getRawData().EntriesBegin->GetName()) < 0;
341341
});
342342
std::string ItemDir = detail::PersistentDeviceCodeCache::getCacheItemPath(
343343
Dev, Imgs, {}, BuildOptions);

0 commit comments

Comments
 (0)