Skip to content

Commit 6ffb215

Browse files
authored
Merge branch 'release/rocm-rel-7.0' into amd/dev/dsalinas/swdev-522811-rocm-rel-7.0
2 parents 92e3054 + 82aed4e commit 6ffb215

17 files changed

+159
-38
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ BUILTIN(__builtin_amdgcn_raw_buffer_load_b96, "V3UiQbiiIi", "n")
164164
BUILTIN(__builtin_amdgcn_raw_buffer_load_b128, "V4UiQbiiIi", "n")
165165

166166
TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_load_lds, "vQbv*3IUiiiIiIi", "t", "vmem-to-lds-load-insts")
167+
TARGET_BUILTIN(__builtin_amdgcn_struct_ptr_buffer_load_lds, "vQbv*3IUiiiiIiIi", "t", "vmem-to-lds-load-insts")
167168

168169
//===----------------------------------------------------------------------===//
169170
// Ballot builtins.

clang/lib/Sema/SemaAMDGPU.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
3737

3838
switch (BuiltinID) {
3939
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_load_lds:
40+
case AMDGPU::BI__builtin_amdgcn_struct_ptr_buffer_load_lds:
4041
case AMDGPU::BI__builtin_amdgcn_global_load_lds: {
4142
constexpr const int SizeIdx = 2;
4243
llvm::APSInt Size;
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2+
// REQUIRES: amdgpu-registered-target
3+
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -emit-llvm -o - %s | FileCheck %s
4+
5+
// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_lds(
6+
// CHECK-NEXT: entry:
7+
// CHECK-NEXT: tail call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) [[RSRC:%.*]], ptr addrspace(3) [[LDS:%.*]], i32 1, i32 [[OFFSET:%.*]], i32 [[SOFFSET:%.*]], i32 2, i32 3)
8+
// CHECK-NEXT: ret void
9+
//
10+
void test_amdgcn_raw_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void * lds, int offset, int soffset) {
11+
__builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 1, offset, soffset, 2, 3);
12+
}
13+
14+
// CHECK-LABEL: @test_amdgcn_struct_ptr_buffer_load_lds(
15+
// CHECK-NEXT: entry:
16+
// CHECK-NEXT: tail call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) [[RSRC:%.*]], ptr addrspace(3) [[LDS:%.*]], i32 4, i32 [[VINDEX:%.*]], i32 [[VOFFSET:%.*]], i32 [[SOFFSET:%.*]], i32 2, i32 3)
17+
// CHECK-NEXT: ret void
18+
//
19+
void test_amdgcn_struct_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void * lds, int size, int vindex, int voffset, int soffset) {
20+
__builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, 4, vindex, voffset, soffset, 2, 3);
21+
}

clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-error.cl

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,10 @@ void test_amdgcn_raw_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local vo
88
__builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 4, offset, soffset, 0, x); //expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_load_lds' must be a constant integer}}
99
__builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 3, offset, soffset, 0, 0); //expected-error{{invalid size value}} gfx950-note{{size must be 1, 2, 4, 12 or 16}} gfx90a-note{{size must be 1, 2, or 4}}
1010
}
11+
12+
void test_amdgcn_struct_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void * lds, int size, int vindex, int voffset, int soffset, int x) {
13+
__builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, x, vindex, voffset, soffset, 0, 0); //expected-error{{argument to '__builtin_amdgcn_struct_ptr_buffer_load_lds' must be a constant integer}}
14+
__builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, 4, vindex, voffset, soffset, x, 0); //expected-error{{argument to '__builtin_amdgcn_struct_ptr_buffer_load_lds' must be a constant integer}}
15+
__builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, 4, vindex, voffset, soffset, 0, x); //expected-error{{argument to '__builtin_amdgcn_struct_ptr_buffer_load_lds' must be a constant integer}}
16+
__builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, 3, vindex, voffset, soffset, 0, 0); //expected-error{{invalid size value}} gfx950-note{{size must be 1, 2, 4, 12 or 16}} gfx90a-note{{size must be 1, 2, or 4}}
17+
}
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -S -verify -o - %s
22
// REQUIRES: amdgpu-registered-target
33

4-
void test_amdgcn_raw_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void* lds, int offset, int soffset, int x) {
4+
void test_amdgcn_raw_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void* lds, int vindex, int offset, int soffset) {
55
__builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 4, offset, soffset, 0, 0); //expected-error{{needs target feature vmem-to-lds-load-insts}}
6+
__builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, 4, vindex, offset, soffset, 0, 0); //expected-error{{needs target feature vmem-to-lds-load-insts}}
67
}

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1911,7 +1911,9 @@ class AMDGPUStructBufferLoadLDS : Intrinsic <
19111911
ImmArg<ArgIndex<7>>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>;
19121912
def int_amdgcn_struct_buffer_load_lds : AMDGPUStructBufferLoadLDS;
19131913

1914-
class AMDGPUStructPtrBufferLoadLDS : Intrinsic <
1914+
class AMDGPUStructPtrBufferLoadLDS :
1915+
ClangBuiltin<"__builtin_amdgcn_struct_ptr_buffer_load_lds">,
1916+
Intrinsic <
19151917
[],
19161918
[AMDGPUBufferRsrcTy, // rsrc(SGPR)
19171919
LLVMQualPointerType<3>, // LDS base offset

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1336,8 +1336,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
13361336
&AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
13371337
&AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,
13381338
&AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
1339-
&AAUnderlyingObjects::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
1340-
&AAInstanceInfo::ID});
1339+
&AAUnderlyingObjects::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID});
13411340

13421341
AttributorConfig AC(CGUpdater);
13431342
AC.IsClosedWorldModule = Options.IsClosedWorld;

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8672,6 +8672,11 @@ SDValue SITargetLowering::lowerWorkitemID(SelectionDAG &DAG, SDValue Op,
86728672
if (MaxID == 0)
86738673
return DAG.getConstant(0, SL, MVT::i32);
86748674

8675+
// It's undefined behavior if a function marked with the amdgpu-no-*
8676+
// attributes uses the corresponding intrinsic.
8677+
if (!Arg)
8678+
return DAG.getUNDEF(Op->getValueType(0));
8679+
86758680
SDValue Val = loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
86768681
SDLoc(DAG.getEntryNode()), Arg);
86778682

llvm/lib/Transforms/Scalar/StructurizeCFG.cpp

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -614,25 +614,28 @@ void StructurizeCFG::insertConditions(bool Loops) {
614614
BasicBlock *SuccTrue = Term->getSuccessor(0);
615615
BasicBlock *SuccFalse = Term->getSuccessor(1);
616616

617-
BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue];
617+
PhiInserter.Initialize(Boolean, "");
618+
PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default);
618619

619-
if (Preds.size() == 1 && Preds.begin()->first == Parent) {
620-
auto &PI = Preds.begin()->second;
621-
Term->setCondition(PI.Pred);
622-
CondBranchWeights::setMetadata(*Term, PI.Weights);
623-
} else {
624-
PhiInserter.Initialize(Boolean, "");
625-
PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default);
620+
BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue];
626621

627-
NearestCommonDominator Dominator(DT);
628-
Dominator.addBlock(Parent);
622+
NearestCommonDominator Dominator(DT);
623+
Dominator.addBlock(Parent);
629624

630-
for (auto [BB, PI] : Preds) {
631-
assert(BB != Parent);
632-
PhiInserter.AddAvailableValue(BB, PI.Pred);
633-
Dominator.addAndRememberBlock(BB);
625+
PredInfo ParentInfo{nullptr, std::nullopt};
626+
for (auto [BB, PI] : Preds) {
627+
if (BB == Parent) {
628+
ParentInfo = PI;
629+
break;
634630
}
631+
PhiInserter.AddAvailableValue(BB, PI.Pred);
632+
Dominator.addAndRememberBlock(BB);
633+
}
635634

635+
if (ParentInfo.Pred) {
636+
Term->setCondition(ParentInfo.Pred);
637+
CondBranchWeights::setMetadata(*Term, ParentInfo.Weights);
638+
} else {
636639
if (!Dominator.resultIsRememberedBlock())
637640
PhiInserter.AddAvailableValue(Dominator.result(), Default);
638641

llvm/test/CodeGen/AMDGPU/aa-as-infer.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -259,8 +259,7 @@ define void @kernel_argument_promotion_pattern_intra_procedure(ptr %p, i32 %val)
259259
define internal void @use_argument_after_promotion(ptr %p, i32 %val) {
260260
; CHECK-LABEL: define internal void @use_argument_after_promotion(
261261
; CHECK-SAME: ptr [[P:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
262-
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
263-
; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[TMP1]], align 4
262+
; CHECK-NEXT: store i32 [[VAL]], ptr [[P]], align 4
264263
; CHECK-NEXT: ret void
265264
;
266265
store i32 %val, ptr %p

0 commit comments

Comments
 (0)