Skip to content

Commit c4af915

Browse files
committed
[Offload] Have doJITPostProcessing accept multiple binaries
The device handler for JIT processing images now accepts a list of buffers rather than just one. Devices are expected to either link them all into a single binary, or return an error code if they don't support linking. Currently, only amdgpu supports multiple binaries.
1 parent a716cc0 commit c4af915

File tree

5 files changed

+57
-41
lines changed

5 files changed

+57
-41
lines changed

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 35 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -2066,31 +2066,35 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
20662066

20672067
uint64_t getStreamBusyWaitMicroseconds() const { return OMPX_StreamBusyWait; }
20682068

2069-
Expected<std::unique_ptr<MemoryBuffer>>
2070-
doJITPostProcessing(std::unique_ptr<MemoryBuffer> MB) const override {
2069+
Expected<std::unique_ptr<MemoryBuffer>> doJITPostProcessing(
2070+
std::vector<std::unique_ptr<MemoryBuffer>> &&MB) const override {
20712071

20722072
// TODO: We should try to avoid materialization but there seems to be no
20732073
// good linker interface w/o file i/o.
2074-
SmallString<128> LinkerInputFilePath;
2075-
std::error_code EC = sys::fs::createTemporaryFile("amdgpu-pre-link-jit",
2076-
"o", LinkerInputFilePath);
2077-
if (EC)
2078-
return Plugin::error(ErrorCode::HOST_IO,
2079-
"failed to create temporary file for linker");
2080-
2081-
// Write the file's contents to the output file.
2082-
Expected<std::unique_ptr<FileOutputBuffer>> OutputOrErr =
2083-
FileOutputBuffer::create(LinkerInputFilePath, MB->getBuffer().size());
2084-
if (!OutputOrErr)
2085-
return OutputOrErr.takeError();
2086-
std::unique_ptr<FileOutputBuffer> Output = std::move(*OutputOrErr);
2087-
llvm::copy(MB->getBuffer(), Output->getBufferStart());
2088-
if (Error E = Output->commit())
2089-
return std::move(E);
2074+
llvm::SmallVector<SmallString<128>> InputFilenames;
2075+
for (auto &B : MB) {
2076+
SmallString<128> LinkerInputFilePath;
2077+
auto &Dest = InputFilenames.emplace_back();
2078+
std::error_code EC =
2079+
sys::fs::createTemporaryFile("amdgpu-pre-link-jit", "o", Dest);
2080+
if (EC)
2081+
return Plugin::error(ErrorCode::HOST_IO,
2082+
"failed to create temporary file for linker");
2083+
2084+
// Write the file's contents to the output file.
2085+
Expected<std::unique_ptr<FileOutputBuffer>> OutputOrErr =
2086+
FileOutputBuffer::create(Dest, B->getBuffer().size());
2087+
if (!OutputOrErr)
2088+
return OutputOrErr.takeError();
2089+
std::unique_ptr<FileOutputBuffer> Output = std::move(*OutputOrErr);
2090+
llvm::copy(B->getBuffer(), Output->getBufferStart());
2091+
if (Error E = Output->commit())
2092+
return std::move(E);
2093+
}
20902094

20912095
SmallString<128> LinkerOutputFilePath;
2092-
EC = sys::fs::createTemporaryFile("amdgpu-pre-link-jit", "so",
2093-
LinkerOutputFilePath);
2096+
std::error_code EC = sys::fs::createTemporaryFile(
2097+
"amdgpu-pre-link-jit", "so", LinkerOutputFilePath);
20942098
if (EC)
20952099
return Plugin::error(ErrorCode::HOST_IO,
20962100
"failed to create temporary file for linker");
@@ -2105,15 +2109,12 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
21052109
"Using `%s` to link JITed amdgcn output.", LLDPath.c_str());
21062110

21072111
std::string MCPU = "-plugin-opt=mcpu=" + getComputeUnitKind();
2108-
StringRef Args[] = {LLDPath,
2109-
"-flavor",
2110-
"gnu",
2111-
"--no-undefined",
2112-
"-shared",
2113-
MCPU,
2114-
"-o",
2115-
LinkerOutputFilePath.data(),
2116-
LinkerInputFilePath.data()};
2112+
std::vector<StringRef> Args = {
2113+
LLDPath, "-flavor", "gnu", "--no-undefined",
2114+
"-shared", MCPU, "-o", LinkerOutputFilePath.data()};
2115+
for (auto &N : InputFilenames) {
2116+
Args.push_back(N);
2117+
}
21172118

21182119
std::string Error;
21192120
int RC = sys::ExecuteAndWait(LLDPath, Args, std::nullopt, {}, 0, 0, &Error);
@@ -2131,9 +2132,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
21312132
if (sys::fs::remove(LinkerOutputFilePath))
21322133
return Plugin::error(ErrorCode::HOST_IO,
21332134
"failed to remove temporary output file for lld");
2134-
if (sys::fs::remove(LinkerInputFilePath))
2135-
return Plugin::error(ErrorCode::HOST_IO,
2136-
"failed to remove temporary input file for lld");
2135+
for (auto &N : InputFilenames) {
2136+
if (sys::fs::remove(N))
2137+
return Plugin::error(ErrorCode::HOST_IO,
2138+
"failed to remove temporary input file for lld");
2139+
}
21372140

21382141
return std::move(*BufferOrErr);
21392142
}

offload/plugins-nextgen/common/include/JIT.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ struct JITEngine {
4444
/// called.
4545
using PostProcessingFn =
4646
std::function<Expected<std::unique_ptr<MemoryBuffer>>(
47-
std::unique_ptr<MemoryBuffer>)>;
47+
std::vector<std::unique_ptr<MemoryBuffer>> &&)>;
4848

4949
JITEngine(Triple::ArchType TA);
5050

offload/plugins-nextgen/common/include/PluginInterface.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -935,8 +935,12 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
935935

936936
/// Post processing after jit backend. The ownership of \p MB will be taken.
937937
virtual Expected<std::unique_ptr<MemoryBuffer>>
938-
doJITPostProcessing(std::unique_ptr<MemoryBuffer> MB) const {
939-
return std::move(MB);
938+
doJITPostProcessing(std::vector<std::unique_ptr<MemoryBuffer>> &&MB) const {
939+
if (MB.size() > 1)
940+
return make_error<error::OffloadError>(
941+
error::ErrorCode::UNSUPPORTED,
942+
"Plugin does not support linking multiple binaries");
943+
return std::move(MB[0]);
940944
}
941945

942946
/// The minimum number of threads we use for a low-trip count combined loop.

offload/plugins-nextgen/common/src/JIT.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,9 @@ JITEngine::compile(const __tgt_device_image &Image,
292292
if (!ObjMBOrErr)
293293
return ObjMBOrErr.takeError();
294294

295-
auto ImageMBOrErr = PostProcessing(std::move(*ObjMBOrErr));
295+
std::vector<std::unique_ptr<MemoryBuffer>> Buffers;
296+
Buffers.push_back(std::move(*ObjMBOrErr));
297+
auto ImageMBOrErr = PostProcessing(std::move(Buffers));
296298
if (!ImageMBOrErr)
297299
return ImageMBOrErr.takeError();
298300

@@ -314,7 +316,8 @@ JITEngine::process(const __tgt_device_image &Image,
314316
target::plugin::GenericDeviceTy &Device) {
315317
const std::string &ComputeUnitKind = Device.getComputeUnitKind();
316318

317-
PostProcessingFn PostProcessing = [&Device](std::unique_ptr<MemoryBuffer> MB)
319+
PostProcessingFn PostProcessing =
320+
[&Device](std::vector<std::unique_ptr<MemoryBuffer>> &&MB)
318321
-> Expected<std::unique_ptr<MemoryBuffer>> {
319322
return Device.doJITPostProcessing(std::move(MB));
320323
};

offload/plugins-nextgen/cuda/src/rtl.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -420,8 +420,14 @@ struct CUDADeviceTy : public GenericDeviceTy {
420420
return Plugin::success();
421421
}
422422

423-
Expected<std::unique_ptr<MemoryBuffer>>
424-
doJITPostProcessing(std::unique_ptr<MemoryBuffer> MB) const override {
423+
Expected<std::unique_ptr<MemoryBuffer>> doJITPostProcessing(
424+
std::vector<std::unique_ptr<MemoryBuffer>> &&MB) const override {
425+
// TODO: This should be possible, just needs to be implemented
426+
if (MB.size() > 1)
427+
return make_error<error::OffloadError>(
428+
error::ErrorCode::UNIMPLEMENTED,
429+
"CUDA plugin does not support linking multiple binaries");
430+
425431
// TODO: We should be able to use the 'nvidia-ptxjitcompiler' interface to
426432
// avoid the call to 'ptxas'.
427433
SmallString<128> PTXInputFilePath;
@@ -433,11 +439,11 @@ struct CUDADeviceTy : public GenericDeviceTy {
433439

434440
// Write the file's contents to the output file.
435441
Expected<std::unique_ptr<FileOutputBuffer>> OutputOrErr =
436-
FileOutputBuffer::create(PTXInputFilePath, MB->getBuffer().size());
442+
FileOutputBuffer::create(PTXInputFilePath, MB[0]->getBuffer().size());
437443
if (!OutputOrErr)
438444
return OutputOrErr.takeError();
439445
std::unique_ptr<FileOutputBuffer> Output = std::move(*OutputOrErr);
440-
llvm::copy(MB->getBuffer(), Output->getBufferStart());
446+
llvm::copy(MB[0]->getBuffer(), Output->getBufferStart());
441447
if (Error E = Output->commit())
442448
return std::move(E);
443449

0 commit comments

Comments
 (0)