Skip to content

Commit 3780a28

Browse files
authored
Merge branch 'release/rocm-rel-7.0' into amd/dev/rlieberm/CP-529245
2 parents cf41ebd + 7bf6313 commit 3780a28

File tree

10 files changed

+172
-104
lines changed

10 files changed

+172
-104
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8672,6 +8672,11 @@ SDValue SITargetLowering::lowerWorkitemID(SelectionDAG &DAG, SDValue Op,
86728672
if (MaxID == 0)
86738673
return DAG.getConstant(0, SL, MVT::i32);
86748674

8675+
// It's undefined behavior if a function marked with the amdgpu-no-*
8676+
// attributes uses the corresponding intrinsic.
8677+
if (!Arg)
8678+
return DAG.getUNDEF(Op->getValueType(0));
8679+
86758680
SDValue Val = loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
86768681
SDLoc(DAG.getEntryNode()), Arg);
86778682

llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,15 @@ bool PointerReplacer::collectUsers() {
278278
Worklist.emplace_back(I);
279279
};
280280

281+
auto TryPushInstOperand = [&](Instruction *InstOp) {
282+
if (!UsersToReplace.contains(InstOp)) {
283+
if (!ValuesToRevisit.insert(InstOp))
284+
return false;
285+
Worklist.emplace_back(InstOp);
286+
}
287+
return true;
288+
};
289+
281290
PushUsersToWorklist(&Root);
282291
while (!Worklist.empty()) {
283292
Instruction *Inst = Worklist.pop_back_val();
@@ -310,21 +319,26 @@ bool PointerReplacer::collectUsers() {
310319
// incoming values.
311320
Worklist.emplace_back(PHI);
312321
for (unsigned Idx = 0; Idx < PHI->getNumIncomingValues(); ++Idx) {
313-
auto *IncomingValue = cast<Instruction>(PHI->getIncomingValue(Idx));
314-
if (UsersToReplace.contains(IncomingValue))
315-
continue;
316-
if (!ValuesToRevisit.insert(IncomingValue))
322+
if (!TryPushInstOperand(cast<Instruction>(PHI->getIncomingValue(Idx))))
317323
return false;
318-
Worklist.emplace_back(IncomingValue);
319324
}
320325
} else if (auto *SI = dyn_cast<SelectInst>(Inst)) {
321326
auto *TrueInst = dyn_cast<Instruction>(SI->getTrueValue());
322327
auto *FalseInst = dyn_cast<Instruction>(SI->getFalseValue());
323328
if (!TrueInst || !FalseInst)
324329
return false;
325330

326-
UsersToReplace.insert(SI);
327-
PushUsersToWorklist(SI);
331+
if (isAvailable(TrueInst) && isAvailable(FalseInst)) {
332+
UsersToReplace.insert(SI);
333+
PushUsersToWorklist(SI);
334+
continue;
335+
}
336+
337+
// Push select back onto the stack, followed by unavailable true/false
338+
// value.
339+
Worklist.emplace_back(SI);
340+
if (!TryPushInstOperand(TrueInst) || !TryPushInstOperand(FalseInst))
341+
return false;
328342
} else if (auto *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
329343
UsersToReplace.insert(GEP);
330344
PushUsersToWorklist(GEP);

llvm/lib/Transforms/Scalar/ConstraintElimination.cpp

Lines changed: 7 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -313,8 +313,7 @@ class ConstraintInfo {
313313
/// New variables that need to be added to the system are collected in
314314
/// \p NewVariables.
315315
ConstraintTy getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1,
316-
SmallVectorImpl<Value *> &NewVariables,
317-
bool ForceSignedSystem = false) const;
316+
SmallVectorImpl<Value *> &NewVariables) const;
318317

319318
/// Turns a comparison of the form \p Op0 \p Pred \p Op1 into a vector of
320319
/// constraints using getConstraint. Returns an empty constraint if the result
@@ -331,14 +330,6 @@ class ConstraintInfo {
331330
void transferToOtherSystem(CmpInst::Predicate Pred, Value *A, Value *B,
332331
unsigned NumIn, unsigned NumOut,
333332
SmallVectorImpl<StackEntry> &DFSInStack);
334-
335-
private:
336-
/// Adds facts into constraint system. \p ForceSignedSystem can be set when
337-
/// the \p Pred is eq/ne, and signed constraint system is used when it's
338-
/// specified.
339-
void addFactImpl(CmpInst::Predicate Pred, Value *A, Value *B, unsigned NumIn,
340-
unsigned NumOut, SmallVectorImpl<StackEntry> &DFSInStack,
341-
bool ForceSignedSystem);
342333
};
343334

344335
/// Represents a (Coefficient * Variable) entry after IR decomposition.
@@ -645,12 +636,8 @@ static Decomposition decompose(Value *V,
645636

646637
ConstraintTy
647638
ConstraintInfo::getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1,
648-
SmallVectorImpl<Value *> &NewVariables,
649-
bool ForceSignedSystem) const {
639+
SmallVectorImpl<Value *> &NewVariables) const {
650640
assert(NewVariables.empty() && "NewVariables must be empty when passed in");
651-
assert((!ForceSignedSystem || CmpInst::isEquality(Pred)) &&
652-
"signed system can only be forced on eq/ne");
653-
654641
bool IsEq = false;
655642
bool IsNe = false;
656643

@@ -665,15 +652,15 @@ ConstraintInfo::getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1,
665652
break;
666653
}
667654
case CmpInst::ICMP_EQ:
668-
if (!ForceSignedSystem && match(Op1, m_Zero())) {
655+
if (match(Op1, m_Zero())) {
669656
Pred = CmpInst::ICMP_ULE;
670657
} else {
671658
IsEq = true;
672659
Pred = CmpInst::ICMP_ULE;
673660
}
674661
break;
675662
case CmpInst::ICMP_NE:
676-
if (!ForceSignedSystem && match(Op1, m_Zero())) {
663+
if (match(Op1, m_Zero())) {
677664
Pred = CmpInst::getSwappedPredicate(CmpInst::ICMP_UGT);
678665
std::swap(Op0, Op1);
679666
} else {
@@ -690,7 +677,7 @@ ConstraintInfo::getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1,
690677
return {};
691678

692679
SmallVector<ConditionTy, 4> Preconditions;
693-
bool IsSigned = ForceSignedSystem || CmpInst::isSigned(Pred);
680+
bool IsSigned = CmpInst::isSigned(Pred);
694681
auto &Value2Index = getValue2Index(IsSigned);
695682
auto ADec = decompose(Op0->stripPointerCastsSameRepresentation(),
696683
Preconditions, IsSigned, DL);
@@ -750,7 +737,7 @@ ConstraintInfo::getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1,
750737
int64_t OffsetSum;
751738
if (AddOverflow(Offset1, Offset2, OffsetSum))
752739
return {};
753-
if (Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_ULT)
740+
if (Pred == (IsSigned ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT))
754741
if (AddOverflow(OffsetSum, int64_t(-1), OffsetSum))
755742
return {};
756743
R[0] = OffsetSum;
@@ -1593,20 +1580,10 @@ static bool checkOrAndOpImpliedByOther(
15931580
void ConstraintInfo::addFact(CmpInst::Predicate Pred, Value *A, Value *B,
15941581
unsigned NumIn, unsigned NumOut,
15951582
SmallVectorImpl<StackEntry> &DFSInStack) {
1596-
addFactImpl(Pred, A, B, NumIn, NumOut, DFSInStack, false);
1597-
// If the Pred is eq/ne, also add the fact to signed system.
1598-
if (CmpInst::isEquality(Pred))
1599-
addFactImpl(Pred, A, B, NumIn, NumOut, DFSInStack, true);
1600-
}
1601-
1602-
void ConstraintInfo::addFactImpl(CmpInst::Predicate Pred, Value *A, Value *B,
1603-
unsigned NumIn, unsigned NumOut,
1604-
SmallVectorImpl<StackEntry> &DFSInStack,
1605-
bool ForceSignedSystem) {
16061583
// If the constraint has a pre-condition, skip the constraint if it does not
16071584
// hold.
16081585
SmallVector<Value *> NewVariables;
1609-
auto R = getConstraint(Pred, A, B, NewVariables, ForceSignedSystem);
1586+
auto R = getConstraint(Pred, A, B, NewVariables);
16101587

16111588
// TODO: Support non-equality for facts as well.
16121589
if (!R.isValid(*this) || R.isNe())

llvm/lib/Transforms/Scalar/StructurizeCFG.cpp

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -614,25 +614,28 @@ void StructurizeCFG::insertConditions(bool Loops) {
614614
BasicBlock *SuccTrue = Term->getSuccessor(0);
615615
BasicBlock *SuccFalse = Term->getSuccessor(1);
616616

617-
BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue];
617+
PhiInserter.Initialize(Boolean, "");
618+
PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default);
618619

619-
if (Preds.size() == 1 && Preds.begin()->first == Parent) {
620-
auto &PI = Preds.begin()->second;
621-
Term->setCondition(PI.Pred);
622-
CondBranchWeights::setMetadata(*Term, PI.Weights);
623-
} else {
624-
PhiInserter.Initialize(Boolean, "");
625-
PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default);
620+
BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue];
626621

627-
NearestCommonDominator Dominator(DT);
628-
Dominator.addBlock(Parent);
622+
NearestCommonDominator Dominator(DT);
623+
Dominator.addBlock(Parent);
629624

630-
for (auto [BB, PI] : Preds) {
631-
assert(BB != Parent);
632-
PhiInserter.AddAvailableValue(BB, PI.Pred);
633-
Dominator.addAndRememberBlock(BB);
625+
PredInfo ParentInfo{nullptr, std::nullopt};
626+
for (auto [BB, PI] : Preds) {
627+
if (BB == Parent) {
628+
ParentInfo = PI;
629+
break;
634630
}
631+
PhiInserter.AddAvailableValue(BB, PI.Pred);
632+
Dominator.addAndRememberBlock(BB);
633+
}
635634

635+
if (ParentInfo.Pred) {
636+
Term->setCondition(ParentInfo.Pred);
637+
CondBranchWeights::setMetadata(*Term, ParentInfo.Weights);
638+
} else {
636639
if (!Dominator.resultIsRememberedBlock())
637640
PhiInserter.AddAvailableValue(Dominator.result(), Default);
638641

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -O0 -stop-after=amdgpu-isel -o - %s | FileCheck --check-prefix=SelDAG %s
3+
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=legalizer -o - %s | FileCheck --check-prefix=GlobalISel %s
4+
5+
declare i32 @llvm.amdgcn.workitem.id.x()
6+
declare i32 @llvm.amdgcn.workitem.id.y()
7+
declare i32 @llvm.amdgcn.workitem.id.z()
8+
9+
define amdgpu_ps void @undefined_workitems(ptr %p, ptr %q, ptr %r) {
10+
; SelDAG-LABEL: name: undefined_workitems
11+
; SelDAG: bb.0 (%ir-block.0):
12+
; SelDAG-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
13+
; SelDAG-NEXT: {{ $}}
14+
; SelDAG-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5
15+
; SelDAG-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4
16+
; SelDAG-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
17+
; SelDAG-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2
18+
; SelDAG-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1
19+
; SelDAG-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
20+
; SelDAG-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
21+
; SelDAG-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
22+
; SelDAG-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
23+
; SelDAG-NEXT: [[COPY6:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
24+
; SelDAG-NEXT: [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
25+
; SelDAG-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE2]]
26+
; SelDAG-NEXT: S_ENDPGM 0
27+
;
28+
; GlobalISel-LABEL: name: undefined_workitems
29+
; GlobalISel: bb.1 (%ir-block.0):
30+
; GlobalISel-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
31+
; GlobalISel-NEXT: {{ $}}
32+
; GlobalISel-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
33+
; GlobalISel-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
34+
; GlobalISel-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
35+
; GlobalISel-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
36+
; GlobalISel-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
37+
; GlobalISel-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
38+
; GlobalISel-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
39+
; GlobalISel-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
40+
; GlobalISel-NEXT: [[MV2:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
41+
; GlobalISel-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
42+
; GlobalISel-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
43+
; GlobalISel-NEXT: G_STORE [[COPY6]](s32), [[MV]](p0) :: (store (s32) into %ir.p)
44+
; GlobalISel-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
45+
; GlobalISel-NEXT: G_STORE [[COPY7]](s32), [[MV1]](p0) :: (store (s32) into %ir.q)
46+
; GlobalISel-NEXT: G_STORE [[DEF]](s32), [[MV2]](p0) :: (store (s32) into %ir.r)
47+
; GlobalISel-NEXT: S_ENDPGM 0
48+
%id.x = call i32 @llvm.amdgcn.workitem.id.x()
49+
store i32 %id.x, ptr %p
50+
%id.y = call i32 @llvm.amdgcn.workitem.id.y()
51+
store i32 %id.y, ptr %q
52+
%id.z = call i32 @llvm.amdgcn.workitem.id.z()
53+
store i32 %id.z, ptr %r
54+
ret void
55+
}

llvm/test/Transforms/ConstraintElimination/eq.ll

Lines changed: 0 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -424,53 +424,3 @@ bc_equal:
424424
not_eq:
425425
ret i1 false
426426
}
427-
428-
define i1 @test_eq_for_signed_cmp(i32 noundef %v0, i32 noundef %v1, i32 noundef %v2) {
429-
; CHECK-LABEL: @test_eq_for_signed_cmp(
430-
; CHECK-NEXT: entry:
431-
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V2:%.*]], [[V0:%.*]]
432-
; CHECK-NEXT: [[CMP1:%.*]] = icmp sge i32 [[V0]], [[V1:%.*]]
433-
; CHECK-NEXT: [[AND0:%.*]] = and i1 [[CMP1]], [[CMP]]
434-
; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[V1]], [[V2]]
435-
; CHECK-NEXT: [[AND1:%.*]] = and i1 false, [[AND0]]
436-
; CHECK-NEXT: ret i1 [[AND1]]
437-
;
438-
entry:
439-
%cmp = icmp eq i32 %v2, %v0
440-
%cmp1 = icmp sge i32 %v0, %v1
441-
%and0 = and i1 %cmp1, %cmp
442-
%cmp4 = icmp sgt i32 %v1, %v2
443-
%and1 = and i1 %cmp4, %and0
444-
ret i1 %and1
445-
}
446-
447-
define i1 @test_eq_for_signed_cmp_with_decompsition(i32 noundef %v0, i32 noundef %v1, i32 noundef %v2, i32 noundef %addend0, i32 noundef %addend1) {
448-
; CHECK-LABEL: @test_eq_for_signed_cmp_with_decompsition(
449-
; CHECK-NEXT: entry:
450-
; CHECK-NEXT: [[V0ADD:%.*]] = add nsw i32 [[V0:%.*]], [[ADDEND0:%.*]]
451-
; CHECK-NEXT: [[V1ADD:%.*]] = add nsw i32 [[V1:%.*]], [[ADDEND1:%.*]]
452-
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V2:%.*]], [[V0ADD]]
453-
; CHECK-NEXT: [[CMP1:%.*]] = icmp sge i32 [[V0ADD]], [[V1ADD]]
454-
; CHECK-NEXT: [[CMP2:%.*]] = icmp sge i32 [[ADDEND0]], 0
455-
; CHECK-NEXT: [[CMP3:%.*]] = icmp slt i32 [[ADDEND0]], [[ADDEND1]]
456-
; CHECK-NEXT: [[AND0:%.*]] = and i1 [[CMP1]], [[CMP]]
457-
; CHECK-NEXT: [[AND1:%.*]] = and i1 [[AND0]], [[CMP2]]
458-
; CHECK-NEXT: [[AND2:%.*]] = and i1 [[AND1]], [[CMP3]]
459-
; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[V1]], [[V2]]
460-
; CHECK-NEXT: [[AND3:%.*]] = and i1 false, [[AND2]]
461-
; CHECK-NEXT: ret i1 [[AND3]]
462-
;
463-
entry:
464-
%v0add = add nsw i32 %v0, %addend0
465-
%v1add = add nsw i32 %v1, %addend1
466-
%cmp = icmp eq i32 %v2, %v0add
467-
%cmp1 = icmp sge i32 %v0add, %v1add
468-
%cmp2 = icmp sge i32 %addend0, 0
469-
%cmp3 = icmp slt i32 %addend0, %addend1
470-
%and0 = and i1 %cmp1, %cmp
471-
%and1 = and i1 %and0, %cmp2
472-
%and2 = and i1 %and1, %cmp3
473-
%cmp4 = icmp sgt i32 %v1, %v2
474-
%and3 = and i1 %cmp4, %and2
475-
ret i1 %and3
476-
}

llvm/test/Transforms/ConstraintElimination/ne.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,8 @@ define i1 @test_ne_eq_0(i8 %a, i8 %b) {
7171
; CHECK-NEXT: [[RES_13:%.*]] = xor i1 [[RES_12]], false
7272
; CHECK-NEXT: [[RES_14:%.*]] = xor i1 [[RES_13]], false
7373
; CHECK-NEXT: [[RES_15:%.*]] = xor i1 [[RES_14]], false
74-
; CHECK-NEXT: [[RES_16:%.*]] = xor i1 [[RES_15]], false
74+
; CHECK-NEXT: [[C_12:%.*]] = icmp sgt i8 [[A]], 0
75+
; CHECK-NEXT: [[RES_16:%.*]] = xor i1 [[RES_15]], [[C_12]]
7576
; CHECK-NEXT: ret i1 [[RES_16]]
7677
;
7778
entry:
@@ -208,7 +209,8 @@ define i1 @test_ne_eq_1(i8 %a, i8 %b) {
208209
; CHECK-NEXT: [[RES_13:%.*]] = xor i1 [[RES_12]], true
209210
; CHECK-NEXT: [[RES_14:%.*]] = xor i1 [[RES_13]], true
210211
; CHECK-NEXT: [[RES_15:%.*]] = xor i1 [[RES_14]], false
211-
; CHECK-NEXT: [[RES_16:%.*]] = xor i1 [[RES_15]], true
212+
; CHECK-NEXT: [[C_12:%.*]] = icmp sgt i8 [[A]], 0
213+
; CHECK-NEXT: [[RES_16:%.*]] = xor i1 [[RES_15]], [[C_12]]
212214
; CHECK-NEXT: ret i1 [[RES_16]]
213215
;
214216
entry:

llvm/test/Transforms/ConstraintElimination/pr105785.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ define void @pr105785(ptr %p) {
1515
; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[FOR_IND2]], 3
1616
; CHECK-NEXT: br i1 [[CMP2]], label %[[FOR_BODY3]], label %[[FOR_COND]]
1717
; CHECK: [[FOR_BODY3]]:
18-
; CHECK-NEXT: store i32 -1, ptr [[P]], align 4
18+
; CHECK-NEXT: [[SCMP:%.*]] = call i32 @llvm.scmp.i32.i32(i32 [[FOR_IND]], i32 1)
19+
; CHECK-NEXT: store i32 [[SCMP]], ptr [[P]], align 4
1920
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[FOR_IND2]], 1
2021
; CHECK-NEXT: br label %[[FOR_COND1]]
2122
; CHECK: [[FOR_END6]]:

llvm/test/Transforms/InstCombine/AMDGPU/ptr-replace-alloca.ll

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,4 +76,37 @@ sink:
7676
ret <2 x i64> %val.sink
7777
}
7878

79+
; Crashed in IC PtrReplacer because an invalid select was generated with addrspace(4) and addrspace(5)
80+
; operands.
81+
define amdgpu_kernel void @select_addr4_addr5(ptr addrspace(4) byref([12 x i8]) align 16 %arg) {
82+
; CHECK-LABEL: define amdgpu_kernel void @select_addr4_addr5(
83+
; CHECK-SAME: ptr addrspace(4) byref([12 x i8]) align 16 [[ARG:%.*]]) {
84+
; CHECK-NEXT: [[BB:.*:]]
85+
; CHECK-NEXT: ret void
86+
;
87+
bb:
88+
%alloca = alloca i32, i32 0, align 8, addrspace(5)
89+
%alloca1 = alloca [12 x i8], align 16, addrspace(5)
90+
call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) %alloca1, ptr addrspace(4) %arg, i64 0, i1 false)
91+
%select = select i1 false, ptr addrspace(5) %alloca1, ptr addrspace(5) %alloca
92+
call void @llvm.memcpy.p0.p5.i64(ptr null, ptr addrspace(5) %select, i64 0, i1 false)
93+
ret void
94+
}
95+
96+
; Same as above but with swapped operands on the select.
97+
define amdgpu_kernel void @select_addr4_addr5_swapped(ptr addrspace(4) byref([12 x i8]) align 16 %arg) {
98+
; CHECK-LABEL: define amdgpu_kernel void @select_addr4_addr5_swapped(
99+
; CHECK-SAME: ptr addrspace(4) byref([12 x i8]) align 16 [[ARG:%.*]]) {
100+
; CHECK-NEXT: [[BB:.*:]]
101+
; CHECK-NEXT: ret void
102+
;
103+
bb:
104+
%alloca = alloca i32, i32 0, align 8, addrspace(5)
105+
%alloca1 = alloca [12 x i8], align 16, addrspace(5)
106+
call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) %alloca1, ptr addrspace(4) %arg, i64 0, i1 false)
107+
%select = select i1 false, ptr addrspace(5) %alloca, ptr addrspace(5) %alloca1
108+
call void @llvm.memcpy.p0.p5.i64(ptr null, ptr addrspace(5) %select, i64 0, i1 false)
109+
ret void
110+
}
111+
79112
declare void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noalias writeonly captures(none), ptr addrspace(4) noalias readonly captures(none), i64, i1 immarg) #0

0 commit comments

Comments
 (0)