-
Notifications
You must be signed in to change notification settings - Fork 14.5k
[Offload] Have doJITPostProcessing
accept multiple binaries
#148608
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
The device handler for JIT processing images now accepts a list of buffers rather than just one. Devices are expected to either link them all into a single binary, or return an error code if they don't support linking. Currently, only amdgpu supports multiple binaries.
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-offload Author: Ross Brunton (RossBrunton) ChangesThe device handler for JIT processing images now accepts a list of Currently, only amdgpu supports multiple binaries. Full diff: https://github.com/llvm/llvm-project/pull/148608.diff 5 Files Affected:
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 12c7cc62905c9..87ec1e7d01ead 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2066,31 +2066,35 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
uint64_t getStreamBusyWaitMicroseconds() const { return OMPX_StreamBusyWait; }
- Expected<std::unique_ptr<MemoryBuffer>>
- doJITPostProcessing(std::unique_ptr<MemoryBuffer> MB) const override {
+ Expected<std::unique_ptr<MemoryBuffer>> doJITPostProcessing(
+ std::vector<std::unique_ptr<MemoryBuffer>> &&MB) const override {
// TODO: We should try to avoid materialization but there seems to be no
// good linker interface w/o file i/o.
- SmallString<128> LinkerInputFilePath;
- std::error_code EC = sys::fs::createTemporaryFile("amdgpu-pre-link-jit",
- "o", LinkerInputFilePath);
- if (EC)
- return Plugin::error(ErrorCode::HOST_IO,
- "failed to create temporary file for linker");
-
- // Write the file's contents to the output file.
- Expected<std::unique_ptr<FileOutputBuffer>> OutputOrErr =
- FileOutputBuffer::create(LinkerInputFilePath, MB->getBuffer().size());
- if (!OutputOrErr)
- return OutputOrErr.takeError();
- std::unique_ptr<FileOutputBuffer> Output = std::move(*OutputOrErr);
- llvm::copy(MB->getBuffer(), Output->getBufferStart());
- if (Error E = Output->commit())
- return std::move(E);
+ llvm::SmallVector<SmallString<128>> InputFilenames;
+ for (auto &B : MB) {
+ SmallString<128> LinkerInputFilePath;
+ auto &Dest = InputFilenames.emplace_back();
+ std::error_code EC =
+ sys::fs::createTemporaryFile("amdgpu-pre-link-jit", "o", Dest);
+ if (EC)
+ return Plugin::error(ErrorCode::HOST_IO,
+ "failed to create temporary file for linker");
+
+ // Write the file's contents to the output file.
+ Expected<std::unique_ptr<FileOutputBuffer>> OutputOrErr =
+ FileOutputBuffer::create(Dest, B->getBuffer().size());
+ if (!OutputOrErr)
+ return OutputOrErr.takeError();
+ std::unique_ptr<FileOutputBuffer> Output = std::move(*OutputOrErr);
+ llvm::copy(B->getBuffer(), Output->getBufferStart());
+ if (Error E = Output->commit())
+ return std::move(E);
+ }
SmallString<128> LinkerOutputFilePath;
- EC = sys::fs::createTemporaryFile("amdgpu-pre-link-jit", "so",
- LinkerOutputFilePath);
+ std::error_code EC = sys::fs::createTemporaryFile(
+ "amdgpu-pre-link-jit", "so", LinkerOutputFilePath);
if (EC)
return Plugin::error(ErrorCode::HOST_IO,
"failed to create temporary file for linker");
@@ -2105,15 +2109,12 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
"Using `%s` to link JITed amdgcn output.", LLDPath.c_str());
std::string MCPU = "-plugin-opt=mcpu=" + getComputeUnitKind();
- StringRef Args[] = {LLDPath,
- "-flavor",
- "gnu",
- "--no-undefined",
- "-shared",
- MCPU,
- "-o",
- LinkerOutputFilePath.data(),
- LinkerInputFilePath.data()};
+ std::vector<StringRef> Args = {
+ LLDPath, "-flavor", "gnu", "--no-undefined",
+ "-shared", MCPU, "-o", LinkerOutputFilePath.data()};
+ for (auto &N : InputFilenames) {
+ Args.push_back(N);
+ }
std::string Error;
int RC = sys::ExecuteAndWait(LLDPath, Args, std::nullopt, {}, 0, 0, &Error);
@@ -2131,9 +2132,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
if (sys::fs::remove(LinkerOutputFilePath))
return Plugin::error(ErrorCode::HOST_IO,
"failed to remove temporary output file for lld");
- if (sys::fs::remove(LinkerInputFilePath))
- return Plugin::error(ErrorCode::HOST_IO,
- "failed to remove temporary input file for lld");
+ for (auto &N : InputFilenames) {
+ if (sys::fs::remove(N))
+ return Plugin::error(ErrorCode::HOST_IO,
+ "failed to remove temporary input file for lld");
+ }
return std::move(*BufferOrErr);
}
diff --git a/offload/plugins-nextgen/common/include/JIT.h b/offload/plugins-nextgen/common/include/JIT.h
index 8c530436a754b..fe4af5fd651a1 100644
--- a/offload/plugins-nextgen/common/include/JIT.h
+++ b/offload/plugins-nextgen/common/include/JIT.h
@@ -44,7 +44,7 @@ struct JITEngine {
/// called.
using PostProcessingFn =
std::function<Expected<std::unique_ptr<MemoryBuffer>>(
- std::unique_ptr<MemoryBuffer>)>;
+ std::vector<std::unique_ptr<MemoryBuffer>> &&)>;
JITEngine(Triple::ArchType TA);
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index 162b149ab483e..4cb7435bcd95f 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -935,8 +935,12 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
/// Post processing after jit backend. The ownership of \p MB will be taken.
virtual Expected<std::unique_ptr<MemoryBuffer>>
- doJITPostProcessing(std::unique_ptr<MemoryBuffer> MB) const {
- return std::move(MB);
+ doJITPostProcessing(std::vector<std::unique_ptr<MemoryBuffer>> &&MB) const {
+ if (MB.size() > 1)
+ return make_error<error::OffloadError>(
+ error::ErrorCode::UNSUPPORTED,
+ "Plugin does not support linking multiple binaries");
+ return std::move(MB[0]);
}
/// The minimum number of threads we use for a low-trip count combined loop.
diff --git a/offload/plugins-nextgen/common/src/JIT.cpp b/offload/plugins-nextgen/common/src/JIT.cpp
index c82a06e36d8f9..aed508e0d93ac 100644
--- a/offload/plugins-nextgen/common/src/JIT.cpp
+++ b/offload/plugins-nextgen/common/src/JIT.cpp
@@ -292,7 +292,9 @@ JITEngine::compile(const __tgt_device_image &Image,
if (!ObjMBOrErr)
return ObjMBOrErr.takeError();
- auto ImageMBOrErr = PostProcessing(std::move(*ObjMBOrErr));
+ std::vector<std::unique_ptr<MemoryBuffer>> Buffers;
+ Buffers.push_back(std::move(*ObjMBOrErr));
+ auto ImageMBOrErr = PostProcessing(std::move(Buffers));
if (!ImageMBOrErr)
return ImageMBOrErr.takeError();
@@ -314,7 +316,8 @@ JITEngine::process(const __tgt_device_image &Image,
target::plugin::GenericDeviceTy &Device) {
const std::string &ComputeUnitKind = Device.getComputeUnitKind();
- PostProcessingFn PostProcessing = [&Device](std::unique_ptr<MemoryBuffer> MB)
+ PostProcessingFn PostProcessing =
+ [&Device](std::vector<std::unique_ptr<MemoryBuffer>> &&MB)
-> Expected<std::unique_ptr<MemoryBuffer>> {
return Device.doJITPostProcessing(std::move(MB));
};
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index 15193de6ae430..e5f3c6214ae75 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -420,8 +420,14 @@ struct CUDADeviceTy : public GenericDeviceTy {
return Plugin::success();
}
- Expected<std::unique_ptr<MemoryBuffer>>
- doJITPostProcessing(std::unique_ptr<MemoryBuffer> MB) const override {
+ Expected<std::unique_ptr<MemoryBuffer>> doJITPostProcessing(
+ std::vector<std::unique_ptr<MemoryBuffer>> &&MB) const override {
+ // TODO: This should be possible, just needs to be implemented
+ if (MB.size() > 1)
+ return make_error<error::OffloadError>(
+ error::ErrorCode::UNIMPLEMENTED,
+ "CUDA plugin does not support linking multiple binaries");
+
// TODO: We should be able to use the 'nvidia-ptxjitcompiler' interface to
// avoid the call to 'ptxas'.
SmallString<128> PTXInputFilePath;
@@ -433,11 +439,11 @@ struct CUDADeviceTy : public GenericDeviceTy {
// Write the file's contents to the output file.
Expected<std::unique_ptr<FileOutputBuffer>> OutputOrErr =
- FileOutputBuffer::create(PTXInputFilePath, MB->getBuffer().size());
+ FileOutputBuffer::create(PTXInputFilePath, MB[0]->getBuffer().size());
if (!OutputOrErr)
return OutputOrErr.takeError();
std::unique_ptr<FileOutputBuffer> Output = std::move(*OutputOrErr);
- llvm::copy(MB->getBuffer(), Output->getBufferStart());
+ llvm::copy(MB[0]->getBuffer(), Output->getBufferStart());
if (Error E = Output->commit())
return std::move(E);
|
736f4d4
to
fa31376
Compare
if (sys::fs::remove(N)) | ||
return Plugin::error(ErrorCode::HOST_IO, | ||
"failed to remove temporary input file for lld"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I thought there was some temporary file helper in Support that managed the create and remove of the files for you
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
TempFile
exists, but it operates on file descriptors rather than names. I don't think it's possible for a helper to exist that deletes the file in a destructor, since it could fail and it needs to send the error somewhere.
The device handler for JIT processing images now accepts a list of
buffers rather than just one. Devices are expected to either link them
all into a single binary, or return an error code if they don't support
linking.
Currently, only amdgpu supports multiple binaries.