Skip to content

Commit 38318dd

Browse files
authored
[RISCV][LoopVectorize] Use DataWithEVL as the preferred tail folding style (#148686)
In preparation to eventually make EVL tail folding the default, this patch sets DataWithEVL as the preferred tail folding style for RISC-V, but doesn't enable tail folding by default. And although tail folding isn't enabled by default, the loop vectorizer will actually tail fold loops with a small trip count, so this will cause some EVL vectorized loops to be generated in the default configuration. The EVL tail folding work is still not complete, e.g. we still need to handle interleave groups etc., see #123069, but a lot of these missing features also apply to the data (masked) tail folding strategy, which is the default anyway. The actual overall performance picture is much better, on TSVC EVL tail folding is faster than data on every benchmark on the spacemit-x60[^1]: https://lnt.lukelau.me/db_default/v4/nts/755?compare_to=756 And on SPEC CPU 2017 we see a geomean improvement[^2]: https://lnt.lukelau.me/db_default/v4/nts/751?compare_to=753 This is likely due to masked instructions generally being less performant on the spacemit-x60, up to twice as slow: https://camel-cdr.github.io/rvv-bench-results/bpi_f3/index.html [^1]: These benchmarks don't exactly give the same performance numbers as this patch, but it's a good indicator that EVL tail folding is generally faster than masked tail folding. [^2]: The large code size increase in 505.mcf_r is due to a function being inlined now
1 parent 7cfd32a commit 38318dd

File tree

6 files changed

+227
-453
lines changed

6 files changed

+227
-453
lines changed

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,8 @@ class RISCVTTIImpl final : public BasicTTIImplBase<RISCVTTIImpl> {
116116
}
117117
TailFoldingStyle
118118
getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const override {
119-
return ST->hasVInstructions() ? TailFoldingStyle::Data
120-
: TailFoldingStyle::DataWithoutLaneMask;
119+
return ST->hasVInstructions() ? TailFoldingStyle::DataWithEVL
120+
: TailFoldingStyle::None;
121121
}
122122
std::optional<unsigned> getMaxVScale() const override;
123123
std::optional<unsigned> getVScaleForTuning() const override;

llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -133,33 +133,40 @@ define void @trip8_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
133133
; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4
134134
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
135135
; CHECK: vector.body:
136-
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 8)
137-
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8:%.*]], i32 0
138-
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP9]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison)
136+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
137+
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
138+
; CHECK-NEXT: [[AVL:%.*]] = sub i64 8, [[EVL_BASED_IV]]
139+
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
140+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[EVL_BASED_IV]]
141+
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0
142+
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr align 1 [[TMP9]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
139143
; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 4 x i8> [[WIDE_MASKED_LOAD]], splat (i8 1)
140-
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11:%.*]], i32 0
141-
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP12]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison)
144+
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[EVL_BASED_IV]]
145+
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0
146+
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr align 1 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
142147
; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 4 x i8> [[TMP10]], [[WIDE_MASKED_LOAD1]]
143-
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0
144-
; CHECK-NEXT: call void @llvm.masked.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP13]], ptr [[TMP14]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
145-
; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
148+
; CHECK-NEXT: call void @llvm.vp.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP13]], ptr align 1 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
149+
; CHECK-NEXT: [[TMP14:%.*]] = zext i32 [[TMP5]] to i64
150+
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP14]], [[EVL_BASED_IV]]
151+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
152+
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
146153
; CHECK: middle.block:
147154
; CHECK-NEXT: br label [[FOR_END:%.*]]
148155
; CHECK: scalar.ph:
149156
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ]
150157
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
151158
; CHECK: for.body:
152159
; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
153-
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 [[I_08]]
160+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[I_08]]
154161
; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
155162
; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP15]], 1
156-
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i64 [[I_08]]
163+
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[I_08]]
157164
; CHECK-NEXT: [[TMP16:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
158165
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP16]]
159166
; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1
160167
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1
161168
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 8
162-
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
169+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
163170
; CHECK: for.end:
164171
; CHECK-NEXT: ret void
165172
;
@@ -215,7 +222,7 @@ define void @trip16_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
215222
; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1
216223
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1
217224
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 16
218-
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
225+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
219226
; CHECK: for.end:
220227
; CHECK-NEXT: ret void
221228
;
@@ -272,7 +279,7 @@ define void @trip32_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
272279
; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1
273280
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1
274281
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 32
275-
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
282+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
276283
; CHECK: for.end:
277284
; CHECK-NEXT: ret void
278285
;
@@ -315,7 +322,7 @@ define void @trip24_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
315322
; CHECK-NEXT: store <8 x i8> [[TMP6]], ptr [[TMP5]], align 1
316323
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
317324
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24
318-
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
325+
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
319326
; CHECK: middle.block:
320327
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
321328
; CHECK: scalar.ph:
@@ -332,7 +339,7 @@ define void @trip24_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
332339
; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1
333340
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1
334341
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 24
335-
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
342+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
336343
; CHECK: for.end:
337344
; CHECK-NEXT: ret void
338345
;

llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll

Lines changed: 45 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -7,29 +7,49 @@ define void @test(ptr %p, i64 %a, i8 %b) {
77
; CHECK-NEXT: entry:
88
; CHECK-NEXT: br i1 false, label [[SCALAR_PH1:%.*]], label [[VECTOR_PH:%.*]]
99
; CHECK: vector.ph:
10-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B]], i64 0
11-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
12-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <16 x i64> poison, i64 [[A]], i64 0
13-
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT1]], <16 x i64> poison, <16 x i32> zeroinitializer
14-
; CHECK-NEXT: [[TMP0:%.*]] = shl <16 x i64> [[BROADCAST_SPLAT2]], splat (i64 48)
15-
; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i64> [[TMP0]], splat (i64 52)
16-
; CHECK-NEXT: [[TMP2:%.*]] = trunc <16 x i64> [[TMP1]] to <16 x i32>
17-
; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[BROADCAST_SPLAT]] to <16 x i32>
10+
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32()
11+
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i32 [[TMP0]], 2
12+
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], 1
13+
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 9, [[TMP2]]
14+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP1]]
15+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
16+
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32()
17+
; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i32 [[TMP3]], 2
18+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i8> poison, i8 [[B]], i64 0
19+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
20+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[A]], i64 0
21+
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
22+
; CHECK-NEXT: [[TMP5:%.*]] = shl <vscale x 2 x i64> [[BROADCAST_SPLAT2]], splat (i64 48)
23+
; CHECK-NEXT: [[TMP6:%.*]] = ashr <vscale x 2 x i64> [[TMP5]], splat (i64 52)
24+
; CHECK-NEXT: [[TMP7:%.*]] = trunc <vscale x 2 x i64> [[TMP6]] to <vscale x 2 x i32>
25+
; CHECK-NEXT: [[TMP8:%.*]] = zext <vscale x 2 x i8> [[BROADCAST_SPLAT]] to <vscale x 2 x i32>
26+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[P]], i64 0
27+
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
28+
; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
29+
; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 2 x i32> [[TMP9]], splat (i32 1)
30+
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i32> zeroinitializer, [[TMP10]]
1831
; CHECK-NEXT: br label [[FOR_COND:%.*]]
1932
; CHECK: vector.body:
2033
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_COND]] ]
21-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR_COND]] ]
22-
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[INDEX]], i32 9)
23-
; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <16 x i32> [[VEC_IND]], splat (i32 2)
24-
; CHECK-NEXT: [[TMP5:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i1> [[TMP4]], <16 x i1> zeroinitializer
25-
; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP2]], <16 x i32> [[TMP3]]
26-
; CHECK-NEXT: [[TMP6:%.*]] = shl <16 x i32> [[PREDPHI]], splat (i32 8)
27-
; CHECK-NEXT: [[TMP8:%.*]] = trunc <16 x i32> [[TMP6]] to <16 x i8>
28-
; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i8> [[TMP8]], i32 15
29-
; CHECK-NEXT: store i8 [[TMP40]], ptr [[P]], align 1
30-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
31-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i32> [[VEC_IND]], splat (i32 16)
32-
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[FOR_COND]], !llvm.loop [[LOOP0:![0-9]+]]
34+
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[FOR_COND]] ]
35+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR_COND]] ]
36+
; CHECK-NEXT: [[AVL:%.*]] = sub i32 9, [[EVL_BASED_IV]]
37+
; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[AVL]], i32 2, i1 true)
38+
; CHECK-NEXT: [[TMP12:%.*]] = mul i32 1, [[TMP11]]
39+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP12]], i64 0
40+
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT5]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
41+
; CHECK-NEXT: [[TMP13:%.*]] = icmp ule <vscale x 2 x i32> [[VEC_IND]], splat (i32 8)
42+
; CHECK-NEXT: [[TMP14:%.*]] = icmp sge <vscale x 2 x i32> [[VEC_IND]], splat (i32 2)
43+
; CHECK-NEXT: [[TMP15:%.*]] = select <vscale x 2 x i1> [[TMP13]], <vscale x 2 x i1> [[TMP14]], <vscale x 2 x i1> zeroinitializer
44+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP15]], <vscale x 2 x i32> [[TMP7]], <vscale x 2 x i32> [[TMP8]]
45+
; CHECK-NEXT: [[TMP16:%.*]] = shl <vscale x 2 x i32> [[PREDPHI]], splat (i32 8)
46+
; CHECK-NEXT: [[TMP17:%.*]] = trunc <vscale x 2 x i32> [[TMP16]] to <vscale x 2 x i8>
47+
; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> [[TMP17]], <vscale x 2 x ptr> align 1 [[BROADCAST_SPLAT4]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP11]])
48+
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP11]], [[EVL_BASED_IV]]
49+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP4]]
50+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT6]]
51+
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
52+
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_COND]], !llvm.loop [[LOOP0:![0-9]+]]
3353
; CHECK: middle.block:
3454
; CHECK-NEXT: br label [[EXIT1:%.*]]
3555
; CHECK: scalar.ph:
@@ -52,7 +72,7 @@ define void @test(ptr %p, i64 %a, i8 %b) {
5272
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SHL_I32]] to i8
5373
; CHECK-NEXT: store i8 [[TRUNC]], ptr [[P]], align 1
5474
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV]], 8
55-
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND1]], label [[EXIT1]], !llvm.loop [[LOOP3:![0-9]+]]
75+
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND1]], label [[EXIT1]], !llvm.loop [[LOOP4:![0-9]+]]
5676
; CHECK: exit:
5777
; CHECK-NEXT: ret void
5878
;
@@ -84,8 +104,9 @@ exit: ; preds = %for.body
84104
ret void
85105
}
86106
;.
87-
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
107+
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]], [[META3:![0-9]+]]}
88108
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
89-
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
90-
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
109+
; CHECK: [[META2]] = !{!"llvm.loop.isvectorized.tailfoldingstyle", !"evl"}
110+
; CHECK: [[META3]] = !{!"llvm.loop.unroll.runtime.disable"}
111+
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META3]], [[META1]]}
91112
;.

0 commit comments

Comments
 (0)