Skip to content

Commit 90de4a4

Browse files
authored
[LoopFusion] Fix sink instructions (#147501)
If we have instructions in second loop's preheader which can be sunk, we should also be adjusting PHI nodes to receive values from the fused loop's latch block. Fixes #128600
1 parent 495774d commit 90de4a4

File tree

2 files changed

+90
-0
lines changed

2 files changed

+90
-0
lines changed

llvm/lib/Transforms/Scalar/LoopFuse.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1176,6 +1176,28 @@ struct LoopFuser {
11761176
return true;
11771177
}
11781178

1179+
/// This function fixes PHI nodes after fusion in \p SafeToSink.
1180+
/// \p SafeToSink instructions are the instructions that are to be moved past
1181+
/// the fused loop. Thus, the PHI nodes in \p SafeToSink should be updated to
1182+
/// receive values from the fused loop if they are currently taking values
1183+
/// from the first loop (i.e. FC0)'s latch.
1184+
void fixPHINodes(ArrayRef<Instruction *> SafeToSink,
1185+
const FusionCandidate &FC0,
1186+
const FusionCandidate &FC1) const {
1187+
for (Instruction *Inst : SafeToSink) {
1188+
// No update needed for non-PHI nodes.
1189+
PHINode *Phi = dyn_cast<PHINode>(Inst);
1190+
if (!Phi)
1191+
continue;
1192+
for (unsigned I = 0; I < Phi->getNumIncomingValues(); I++) {
1193+
if (Phi->getIncomingBlock(I) != FC0.Latch)
1194+
continue;
1195+
assert(FC1.Latch && "FC1 latch is not set");
1196+
Phi->setIncomingBlock(I, FC1.Latch);
1197+
}
1198+
}
1199+
}
1200+
11791201
/// Collect instructions in the \p FC1 Preheader that can be hoisted
11801202
/// to the \p FC0 Preheader or sunk into the \p FC1 Body
11811203
bool collectMovablePreheaderInsts(
@@ -1481,6 +1503,9 @@ struct LoopFuser {
14811503
assert(I->getParent() == FC1.Preheader);
14821504
I->moveBefore(*FC1.ExitBlock, FC1.ExitBlock->getFirstInsertionPt());
14831505
}
1506+
// PHI nodes in SinkInsts need to be updated to receive values from the
1507+
// fused loop.
1508+
fixPHINodes(SinkInsts, FC0, FC1);
14841509
}
14851510

14861511
/// Determine if two fusion candidates have identical guards
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -passes=loop-fusion -S < %s 2>&1 | FileCheck %s
3+
define dso_local i32 @check_sunk_phi_nodes() {
4+
; CHECK-LABEL: define dso_local i32 @check_sunk_phi_nodes() {
5+
; CHECK-NEXT: [[ENTRY:.*]]:
6+
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
7+
; CHECK: [[FOR_BODY]]:
8+
; CHECK-NEXT: [[SUM1_02:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[ADD:%.*]], %[[FOR_INC6:.*]] ]
9+
; CHECK-NEXT: [[I_01:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_INC6]] ]
10+
; CHECK-NEXT: [[I1_04:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC7:%.*]], %[[FOR_INC6]] ]
11+
; CHECK-NEXT: [[SUM2_03:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[ADD5:%.*]], %[[FOR_INC6]] ]
12+
; CHECK-NEXT: [[ADD]] = add nsw i32 [[SUM1_02]], [[I_01]]
13+
; CHECK-NEXT: br label %[[FOR_INC:.*]]
14+
; CHECK: [[FOR_INC]]:
15+
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I1_04]], [[I1_04]]
16+
; CHECK-NEXT: [[ADD5]] = add nsw i32 [[SUM2_03]], [[MUL]]
17+
; CHECK-NEXT: br label %[[FOR_INC6]]
18+
; CHECK: [[FOR_INC6]]:
19+
; CHECK-NEXT: [[INC]] = add nsw i32 [[I_01]], 1
20+
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 10
21+
; CHECK-NEXT: [[INC7]] = add nsw i32 [[I1_04]], 1
22+
; CHECK-NEXT: [[CMP3:%.*]] = icmp slt i32 [[INC7]], 10
23+
; CHECK-NEXT: br i1 [[CMP3]], label %[[FOR_BODY]], label %[[FOR_END8:.*]]
24+
; CHECK: [[FOR_END8]]:
25+
; CHECK-NEXT: [[SUM2_0_LCSSA:%.*]] = phi i32 [ [[ADD5]], %[[FOR_INC6]] ]
26+
; CHECK-NEXT: [[SUM1_0_LCSSA:%.*]] = phi i32 [ [[ADD]], %[[FOR_INC6]] ]
27+
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SUM1_0_LCSSA]], [[SUM2_0_LCSSA]]
28+
; CHECK-NEXT: ret i32 [[TMP0]]
29+
;
30+
entry:
31+
br label %for.body
32+
33+
for.body: ; preds = %entry, %for.inc
34+
%sum1.02 = phi i32 [ 0, %entry ], [ %add, %for.inc ]
35+
%i.01 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
36+
%add = add nsw i32 %sum1.02, %i.01
37+
br label %for.inc
38+
39+
for.inc: ; preds = %for.body
40+
%inc = add nsw i32 %i.01, 1
41+
%cmp = icmp slt i32 %inc, 10
42+
br i1 %cmp, label %for.body, label %for.end
43+
44+
for.end: ; preds = %for.inc
45+
%sum1.0.lcssa = phi i32 [ %add, %for.inc ]
46+
br label %for.body4
47+
48+
for.body4: ; preds = %for.end, %for.inc6
49+
%i1.04 = phi i32 [ 0, %for.end ], [ %inc7, %for.inc6 ]
50+
%sum2.03 = phi i32 [ 0, %for.end ], [ %add5, %for.inc6 ]
51+
%mul = mul nsw i32 %i1.04, %i1.04
52+
%add5 = add nsw i32 %sum2.03, %mul
53+
br label %for.inc6
54+
55+
for.inc6: ; preds = %for.body4
56+
%inc7 = add nsw i32 %i1.04, 1
57+
%cmp3 = icmp slt i32 %inc7, 10
58+
br i1 %cmp3, label %for.body4, label %for.end8
59+
60+
for.end8: ; preds = %for.inc6
61+
%sum2.0.lcssa = phi i32 [ %add5, %for.inc6 ]
62+
%0 = add i32 %sum1.0.lcssa, %sum2.0.lcssa
63+
ret i32 %0
64+
}
65+

0 commit comments

Comments
 (0)