Skip to content

[Offload] Have doJITPostProcessing accept multiple binaries #148608

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 35 additions & 32 deletions offload/plugins-nextgen/amdgpu/src/rtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2066,31 +2066,35 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {

uint64_t getStreamBusyWaitMicroseconds() const { return OMPX_StreamBusyWait; }

Expected<std::unique_ptr<MemoryBuffer>>
doJITPostProcessing(std::unique_ptr<MemoryBuffer> MB) const override {
Expected<std::unique_ptr<MemoryBuffer>> doJITPostProcessing(
llvm::SmallVector<std::unique_ptr<MemoryBuffer>> &&MB) const override {

// TODO: We should try to avoid materialization but there seems to be no
// good linker interface w/o file i/o.
SmallString<128> LinkerInputFilePath;
std::error_code EC = sys::fs::createTemporaryFile("amdgpu-pre-link-jit",
"o", LinkerInputFilePath);
if (EC)
return Plugin::error(ErrorCode::HOST_IO,
"failed to create temporary file for linker");

// Write the file's contents to the output file.
Expected<std::unique_ptr<FileOutputBuffer>> OutputOrErr =
FileOutputBuffer::create(LinkerInputFilePath, MB->getBuffer().size());
if (!OutputOrErr)
return OutputOrErr.takeError();
std::unique_ptr<FileOutputBuffer> Output = std::move(*OutputOrErr);
llvm::copy(MB->getBuffer(), Output->getBufferStart());
if (Error E = Output->commit())
return std::move(E);
llvm::SmallVector<SmallString<128>> InputFilenames;
for (auto &B : MB) {
SmallString<128> LinkerInputFilePath;
auto &Dest = InputFilenames.emplace_back();
std::error_code EC =
sys::fs::createTemporaryFile("amdgpu-pre-link-jit", "o", Dest);
if (EC)
return Plugin::error(ErrorCode::HOST_IO,
"failed to create temporary file for linker");

// Write the file's contents to the output file.
Expected<std::unique_ptr<FileOutputBuffer>> OutputOrErr =
FileOutputBuffer::create(Dest, B->getBuffer().size());
if (!OutputOrErr)
return OutputOrErr.takeError();
std::unique_ptr<FileOutputBuffer> Output = std::move(*OutputOrErr);
llvm::copy(B->getBuffer(), Output->getBufferStart());
if (Error E = Output->commit())
return std::move(E);
}

SmallString<128> LinkerOutputFilePath;
EC = sys::fs::createTemporaryFile("amdgpu-pre-link-jit", "so",
LinkerOutputFilePath);
std::error_code EC = sys::fs::createTemporaryFile(
"amdgpu-pre-link-jit", "so", LinkerOutputFilePath);
if (EC)
return Plugin::error(ErrorCode::HOST_IO,
"failed to create temporary file for linker");
Expand All @@ -2105,15 +2109,12 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
"Using `%s` to link JITed amdgcn output.", LLDPath.c_str());

std::string MCPU = "-plugin-opt=mcpu=" + getComputeUnitKind();
StringRef Args[] = {LLDPath,
"-flavor",
"gnu",
"--no-undefined",
"-shared",
MCPU,
"-o",
LinkerOutputFilePath.data(),
LinkerInputFilePath.data()};
std::vector<StringRef> Args = {
LLDPath, "-flavor", "gnu", "--no-undefined",
"-shared", MCPU, "-o", LinkerOutputFilePath.data()};
for (auto &N : InputFilenames) {
Args.push_back(N);
}

std::string Error;
int RC = sys::ExecuteAndWait(LLDPath, Args, std::nullopt, {}, 0, 0, &Error);
Expand All @@ -2131,9 +2132,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
if (sys::fs::remove(LinkerOutputFilePath))
return Plugin::error(ErrorCode::HOST_IO,
"failed to remove temporary output file for lld");
if (sys::fs::remove(LinkerInputFilePath))
return Plugin::error(ErrorCode::HOST_IO,
"failed to remove temporary input file for lld");
for (auto &N : InputFilenames) {
if (sys::fs::remove(N))
return Plugin::error(ErrorCode::HOST_IO,
"failed to remove temporary input file for lld");
Comment on lines +2136 to +2138
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought there was some temporary file helper in Support that managed the create and remove of the files for you

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TempFile exists, but it operates on file descriptors rather than names. I don't think it's possible for a helper to exist that deletes the file in a destructor, since it could fail and it needs to send the error somewhere.

}

return std::move(*BufferOrErr);
}
Expand Down
2 changes: 1 addition & 1 deletion offload/plugins-nextgen/common/include/JIT.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ struct JITEngine {
/// called.
using PostProcessingFn =
std::function<Expected<std::unique_ptr<MemoryBuffer>>(
std::unique_ptr<MemoryBuffer>)>;
llvm::SmallVector<std::unique_ptr<MemoryBuffer>> &&)>;

JITEngine(Triple::ArchType TA);

Expand Down
10 changes: 7 additions & 3 deletions offload/plugins-nextgen/common/include/PluginInterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -934,9 +934,13 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
virtual std::string getComputeUnitKind() const { return "unknown"; }

/// Post processing after jit backend. The ownership of \p MB will be taken.
virtual Expected<std::unique_ptr<MemoryBuffer>>
doJITPostProcessing(std::unique_ptr<MemoryBuffer> MB) const {
return std::move(MB);
virtual Expected<std::unique_ptr<MemoryBuffer>> doJITPostProcessing(
llvm::SmallVector<std::unique_ptr<MemoryBuffer>> &&MB) const {
if (MB.size() > 1)
return make_error<error::OffloadError>(
error::ErrorCode::UNSUPPORTED,
"Plugin does not support linking multiple binaries");
return std::move(MB[0]);
}

/// The minimum number of threads we use for a low-trip count combined loop.
Expand Down
7 changes: 5 additions & 2 deletions offload/plugins-nextgen/common/src/JIT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,9 @@ JITEngine::compile(const __tgt_device_image &Image,
if (!ObjMBOrErr)
return ObjMBOrErr.takeError();

auto ImageMBOrErr = PostProcessing(std::move(*ObjMBOrErr));
llvm::SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
Buffers.push_back(std::move(*ObjMBOrErr));
auto ImageMBOrErr = PostProcessing(std::move(Buffers));
if (!ImageMBOrErr)
return ImageMBOrErr.takeError();

Expand All @@ -314,7 +316,8 @@ JITEngine::process(const __tgt_device_image &Image,
target::plugin::GenericDeviceTy &Device) {
const std::string &ComputeUnitKind = Device.getComputeUnitKind();

PostProcessingFn PostProcessing = [&Device](std::unique_ptr<MemoryBuffer> MB)
PostProcessingFn PostProcessing =
[&Device](llvm::SmallVector<std::unique_ptr<MemoryBuffer>> &&MB)
-> Expected<std::unique_ptr<MemoryBuffer>> {
return Device.doJITPostProcessing(std::move(MB));
};
Expand Down
14 changes: 10 additions & 4 deletions offload/plugins-nextgen/cuda/src/rtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -420,8 +420,14 @@ struct CUDADeviceTy : public GenericDeviceTy {
return Plugin::success();
}

Expected<std::unique_ptr<MemoryBuffer>>
doJITPostProcessing(std::unique_ptr<MemoryBuffer> MB) const override {
Expected<std::unique_ptr<MemoryBuffer>> doJITPostProcessing(
llvm::SmallVector<std::unique_ptr<MemoryBuffer>> &&MB) const override {
// TODO: This should be possible, just needs to be implemented
if (MB.size() > 1)
return make_error<error::OffloadError>(
error::ErrorCode::UNIMPLEMENTED,
"CUDA plugin does not support linking multiple binaries");

// TODO: We should be able to use the 'nvidia-ptxjitcompiler' interface to
// avoid the call to 'ptxas'.
SmallString<128> PTXInputFilePath;
Expand All @@ -433,11 +439,11 @@ struct CUDADeviceTy : public GenericDeviceTy {

// Write the file's contents to the output file.
Expected<std::unique_ptr<FileOutputBuffer>> OutputOrErr =
FileOutputBuffer::create(PTXInputFilePath, MB->getBuffer().size());
FileOutputBuffer::create(PTXInputFilePath, MB[0]->getBuffer().size());
if (!OutputOrErr)
return OutputOrErr.takeError();
std::unique_ptr<FileOutputBuffer> Output = std::move(*OutputOrErr);
llvm::copy(MB->getBuffer(), Output->getBufferStart());
llvm::copy(MB[0]->getBuffer(), Output->getBufferStart());
if (Error E = Output->commit())
return std::move(E);

Expand Down
Loading