Skip to content

Commit bebbd85

Browse files
authored
Merge branch 'release/rocm-rel-7.0' into amd/dev/brcahoon/swdev-541399-7.0
2 parents 1645aa3 + 22d571d commit bebbd85

20 files changed

+463
-229
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ BUILTIN(__builtin_amdgcn_raw_buffer_load_b96, "V3UiQbiiIi", "n")
164164
BUILTIN(__builtin_amdgcn_raw_buffer_load_b128, "V4UiQbiiIi", "n")
165165

166166
TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_load_lds, "vQbv*3IUiiiIiIi", "t", "vmem-to-lds-load-insts")
167+
TARGET_BUILTIN(__builtin_amdgcn_struct_ptr_buffer_load_lds, "vQbv*3IUiiiiIiIi", "t", "vmem-to-lds-load-insts")
167168

168169
//===----------------------------------------------------------------------===//
169170
// Ballot builtins.

clang/lib/Sema/SemaAMDGPU.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
3737

3838
switch (BuiltinID) {
3939
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_load_lds:
40+
case AMDGPU::BI__builtin_amdgcn_struct_ptr_buffer_load_lds:
4041
case AMDGPU::BI__builtin_amdgcn_global_load_lds: {
4142
constexpr const int SizeIdx = 2;
4243
llvm::APSInt Size;
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2+
// REQUIRES: amdgpu-registered-target
3+
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -emit-llvm -o - %s | FileCheck %s
4+
5+
// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_lds(
6+
// CHECK-NEXT: entry:
7+
// CHECK-NEXT: tail call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) [[RSRC:%.*]], ptr addrspace(3) [[LDS:%.*]], i32 1, i32 [[OFFSET:%.*]], i32 [[SOFFSET:%.*]], i32 2, i32 3)
8+
// CHECK-NEXT: ret void
9+
//
10+
void test_amdgcn_raw_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void * lds, int offset, int soffset) {
11+
__builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 1, offset, soffset, 2, 3);
12+
}
13+
14+
// CHECK-LABEL: @test_amdgcn_struct_ptr_buffer_load_lds(
15+
// CHECK-NEXT: entry:
16+
// CHECK-NEXT: tail call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) [[RSRC:%.*]], ptr addrspace(3) [[LDS:%.*]], i32 4, i32 [[VINDEX:%.*]], i32 [[VOFFSET:%.*]], i32 [[SOFFSET:%.*]], i32 2, i32 3)
17+
// CHECK-NEXT: ret void
18+
//
19+
void test_amdgcn_struct_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void * lds, int size, int vindex, int voffset, int soffset) {
20+
__builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, 4, vindex, voffset, soffset, 2, 3);
21+
}

clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-error.cl

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,10 @@ void test_amdgcn_raw_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local vo
88
__builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 4, offset, soffset, 0, x); //expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_load_lds' must be a constant integer}}
99
__builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 3, offset, soffset, 0, 0); //expected-error{{invalid size value}} gfx950-note{{size must be 1, 2, 4, 12 or 16}} gfx90a-note{{size must be 1, 2, or 4}}
1010
}
11+
12+
void test_amdgcn_struct_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void * lds, int size, int vindex, int voffset, int soffset, int x) {
13+
__builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, x, vindex, voffset, soffset, 0, 0); //expected-error{{argument to '__builtin_amdgcn_struct_ptr_buffer_load_lds' must be a constant integer}}
14+
__builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, 4, vindex, voffset, soffset, x, 0); //expected-error{{argument to '__builtin_amdgcn_struct_ptr_buffer_load_lds' must be a constant integer}}
15+
__builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, 4, vindex, voffset, soffset, 0, x); //expected-error{{argument to '__builtin_amdgcn_struct_ptr_buffer_load_lds' must be a constant integer}}
16+
__builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, 3, vindex, voffset, soffset, 0, 0); //expected-error{{invalid size value}} gfx950-note{{size must be 1, 2, 4, 12 or 16}} gfx90a-note{{size must be 1, 2, or 4}}
17+
}
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -S -verify -o - %s
22
// REQUIRES: amdgpu-registered-target
33

4-
void test_amdgcn_raw_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void* lds, int offset, int soffset, int x) {
4+
void test_amdgcn_raw_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void* lds, int vindex, int offset, int soffset) {
55
__builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 4, offset, soffset, 0, 0); //expected-error{{needs target feature vmem-to-lds-load-insts}}
6+
__builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, 4, vindex, offset, soffset, 0, 0); //expected-error{{needs target feature vmem-to-lds-load-insts}}
67
}

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1911,7 +1911,9 @@ class AMDGPUStructBufferLoadLDS : Intrinsic <
19111911
ImmArg<ArgIndex<7>>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>;
19121912
def int_amdgcn_struct_buffer_load_lds : AMDGPUStructBufferLoadLDS;
19131913

1914-
class AMDGPUStructPtrBufferLoadLDS : Intrinsic <
1914+
class AMDGPUStructPtrBufferLoadLDS :
1915+
ClangBuiltin<"__builtin_amdgcn_struct_ptr_buffer_load_lds">,
1916+
Intrinsic <
19151917
[],
19161918
[AMDGPUBufferRsrcTy, // rsrc(SGPR)
19171919
LLVMQualPointerType<3>, // LDS base offset

llvm/include/llvm/Object/OffloadBundle.h

Lines changed: 54 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -31,42 +31,56 @@ namespace llvm {
3131

3232
namespace object {
3333

34+
// CompressedOffloadBundle represents the format for the compressed offload
35+
// bundles.
36+
//
37+
// The format is as follows:
38+
// - Magic Number (4 bytes) - A constant "CCOB".
39+
// - Version (2 bytes)
40+
// - Compression Method (2 bytes) - Uses the values from
41+
// llvm::compression::Format.
42+
// - Total file size (4 bytes in V2, 8 bytes in V3).
43+
// - Uncompressed Size (4 bytes in V1/V2, 8 bytes in V3).
44+
// - Truncated MD5 Hash (8 bytes).
45+
// - Compressed Data (variable length).
3446
class CompressedOffloadBundle {
3547
private:
36-
static inline const size_t MagicSize = 4;
37-
static inline const size_t VersionFieldSize = sizeof(uint16_t);
38-
static inline const size_t MethodFieldSize = sizeof(uint16_t);
39-
static inline const size_t FileSizeFieldSize = sizeof(uint32_t);
40-
static inline const size_t UncompressedSizeFieldSize = sizeof(uint32_t);
41-
static inline const size_t HashFieldSize = sizeof(uint64_t);
42-
static inline const size_t V1HeaderSize =
43-
MagicSize + VersionFieldSize + MethodFieldSize +
44-
UncompressedSizeFieldSize + HashFieldSize;
45-
static inline const size_t V2HeaderSize =
46-
MagicSize + VersionFieldSize + FileSizeFieldSize + MethodFieldSize +
47-
UncompressedSizeFieldSize + HashFieldSize;
4848
static inline const llvm::StringRef MagicNumber = "CCOB";
49-
static inline const uint16_t Version = 2;
5049

5150
public:
51+
struct CompressedBundleHeader {
52+
unsigned Version;
53+
llvm::compression::Format CompressionFormat;
54+
std::optional<size_t> FileSize;
55+
size_t UncompressedFileSize;
56+
uint64_t Hash;
57+
58+
static llvm::Expected<CompressedBundleHeader> tryParse(llvm::StringRef);
59+
};
60+
61+
static inline const uint16_t DefaultVersion = 2;
62+
5263
static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
5364
compress(llvm::compression::Params P, const llvm::MemoryBuffer &Input,
54-
bool Verbose = false);
65+
uint16_t Version, bool Verbose = false);
5566
static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
56-
decompress(llvm::MemoryBufferRef &Input, bool Verbose = false);
67+
decompress(const llvm::MemoryBuffer &Input, bool Verbose = false);
5768
};
5869

5970
/// Bundle entry in binary clang-offload-bundler format.
6071
struct OffloadBundleEntry {
6172
uint64_t Offset = 0u;
6273
uint64_t Size = 0u;
6374
uint64_t IDLength = 0u;
64-
StringRef ID;
65-
OffloadBundleEntry(uint64_t O, uint64_t S, uint64_t I, StringRef T)
66-
: Offset(O), Size(S), IDLength(I), ID(T) {}
75+
std::string ID;
76+
OffloadBundleEntry(uint64_t O, uint64_t S, uint64_t I, std::string T)
77+
: Offset(O), Size(S), IDLength(I) {
78+
ID.reserve(T.size());
79+
ID = T;
80+
}
6781
void dumpInfo(raw_ostream &OS) {
6882
OS << "Offset = " << Offset << ", Size = " << Size
69-
<< ", ID Length = " << IDLength << ", ID = " << ID;
83+
<< ", ID Length = " << IDLength << ", ID = " << ID << "\n";
7084
}
7185
void dumpURI(raw_ostream &OS, StringRef FilePath) {
7286
OS << ID.data() << "\tfile://" << FilePath << "#offset=" << Offset
@@ -81,16 +95,21 @@ class OffloadBundleFatBin {
8195
StringRef FileName;
8296
uint64_t NumberOfEntries;
8397
SmallVector<OffloadBundleEntry> Entries;
98+
bool Decompressed;
8499

85100
public:
101+
std::unique_ptr<MemoryBuffer> DecompressedBuffer;
102+
86103
SmallVector<OffloadBundleEntry> getEntries() { return Entries; }
87104
uint64_t getSize() const { return Size; }
88105
StringRef getFileName() const { return FileName; }
89106
uint64_t getNumEntries() const { return NumberOfEntries; }
107+
bool isDecompressed() const { return Decompressed; }
90108

91-
static Expected<std::unique_ptr<OffloadBundleFatBin>>
92-
create(MemoryBufferRef, uint64_t SectionOffset, StringRef FileName);
93-
Error extractBundle(const ObjectFile &Source);
109+
LLVM_ABI static Expected<std::unique_ptr<OffloadBundleFatBin>>
110+
create(MemoryBufferRef, uint64_t SectionOffset, StringRef FileName,
111+
bool Decompress = false);
112+
LLVM_ABI Error extractBundle(const ObjectFile &Source);
94113

95114
Error dumpEntryToCodeObject();
96115

@@ -105,9 +124,15 @@ class OffloadBundleFatBin {
105124
Entry.dumpURI(outs(), FileName);
106125
}
107126

108-
OffloadBundleFatBin(MemoryBufferRef Source, StringRef File)
109-
: FileName(File), NumberOfEntries(0),
110-
Entries(SmallVector<OffloadBundleEntry>()) {}
127+
OffloadBundleFatBin(MemoryBufferRef Source, StringRef File,
128+
bool Decompress = false)
129+
: FileName(File), Decompressed(Decompress), NumberOfEntries(0),
130+
Entries(SmallVector<OffloadBundleEntry>()) {
131+
if (Decompress) {
132+
DecompressedBuffer =
133+
MemoryBuffer::getMemBufferCopy(Source.getBuffer(), File);
134+
}
135+
}
111136
};
112137

113138
enum UriTypeT { FILE_URI, MEMORY_URI };
@@ -190,6 +215,10 @@ Error extractOffloadBundleFatBinary(
190215
Error extractCodeObject(const ObjectFile &Source, int64_t Offset, int64_t Size,
191216
StringRef OutputFileName);
192217

218+
/// Extract code object memory from the given \p Source object file at \p Offset
219+
/// and of \p Size, and copy into \p OutputFileName.
220+
LLVM_ABI Error extractCodeObject(MemoryBufferRef Buffer, int64_t Offset,
221+
int64_t Size, StringRef OutputFileName);
193222
/// Extracts an Offload Bundle Entry given by URI
194223
Error extractOffloadBundleByURI(StringRef URIstr);
195224

0 commit comments

Comments
 (0)