Skip to content

Commit 6d3a3e3

Browse files
committed
[VPlan] Check underlying instr for Replicates
1 parent 58d0931 commit 6d3a3e3

File tree

6 files changed

+43
-24
lines changed

6 files changed

+43
-24
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1812,10 +1812,13 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
18121812
static unsigned getHashValue(const VPSingleDefRecipe *Def) {
18131813
const VPlan *Plan = Def->getParent()->getPlan();
18141814
VPTypeAnalysis TypeInfo(*Plan);
1815-
return hash_combine(Def->getVPDefID(), getOpcodeOrIntrinsicID(Def),
1816-
TypeInfo.inferScalarType(Def),
1817-
vputils::isSingleScalar(Def),
1818-
hash_combine_range(Def->operands()));
1815+
hash_code Result = hash_combine(
1816+
Def->getVPDefID(), getOpcodeOrIntrinsicID(Def),
1817+
TypeInfo.inferScalarType(Def), vputils::isSingleScalar(Def),
1818+
hash_combine_range(Def->operands()));
1819+
return isa<VPReplicateRecipe>(Def)
1820+
? hash_combine(Result, Def->getUnderlyingInstr())
1821+
: Result;
18191822
}
18201823

18211824
/// Check equality of underlying data of \p L and \p R.
@@ -1831,7 +1834,9 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
18311834
equal(L->operands(), R->operands());
18321835
assert((!Result || getHashValue(L) == getHashValue(R)) &&
18331836
"Divergent hashes of equal values");
1834-
return Result;
1837+
return Result && isa<VPReplicateRecipe>(L)
1838+
? L->getUnderlyingInstr() == R->getUnderlyingInstr()
1839+
: Result;
18351840
}
18361841
};
18371842
} // end anonymous namespace

llvm/test/Transforms/LoopVectorize/AArch64/drop-poison-generating-flags.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,14 @@ define void @check_widen_intrinsic_with_nnan(ptr noalias %dst.0, ptr noalias %ds
5454
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE6]]
5555
; CHECK: [[PRED_LOAD_CONTINUE6]]:
5656
; CHECK-NEXT: [[TMP24:%.*]] = phi <4 x double> [ [[TMP20]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP23]], %[[PRED_LOAD_IF5]] ]
57-
; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> zeroinitializer, ptr [[TMP7]], i32 8, <4 x i1> [[TMP4]])
57+
; CHECK-NEXT: [[TMP25:%.*]] = add i64 [[INDEX]], -1
58+
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[DST_0]], i64 [[TMP25]]
59+
; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> zeroinitializer, ptr [[TMP26]], i32 8, <4 x i1> [[TMP4]])
5860
; CHECK-NEXT: [[TMP28:%.*]] = fcmp oeq <4 x double> [[TMP24]], zeroinitializer
5961
; CHECK-NEXT: [[TMP29:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP28]], <4 x i1> zeroinitializer
6062
; CHECK-NEXT: [[TMP30:%.*]] = or <4 x i1> [[TMP5]], [[TMP29]]
6163
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP29]], i32 0
62-
; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP31]], i64 [[TMP6]], i64 [[TMP6]]
64+
; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP31]], i64 [[TMP25]], i64 [[TMP6]]
6365
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr [[DST_1]], i64 [[PREDPHI]]
6466
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> splat (i32 10), ptr [[TMP32]], i32 4, <4 x i1> [[TMP30]])
6567
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4

llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,12 @@ define void @test1(ptr %dst, {i64, i64} %sv) {
3030
; FORCED-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
3131
; FORCED: [[VECTOR_PH]]:
3232
; FORCED-NEXT: [[TMP0:%.*]] = extractvalue { i64, i64 } [[SV]], 0
33-
; FORCED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0
33+
; FORCED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0
34+
; FORCED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
35+
; FORCED-NEXT: [[TMP4:%.*]] = extractvalue { i64, i64 } [[SV]], 1
36+
; FORCED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP4]], i64 0
3437
; FORCED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer
35-
; FORCED-NEXT: [[TMP1:%.*]] = add <2 x i64> [[BROADCAST_SPLAT2]], [[BROADCAST_SPLAT2]]
38+
; FORCED-NEXT: [[TMP1:%.*]] = add <2 x i64> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]]
3639
; FORCED-NEXT: br label %[[VECTOR_BODY:.*]]
3740
; FORCED: [[VECTOR_BODY]]:
3841
; FORCED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -83,13 +86,16 @@ define void @test_getVectorCallCost(ptr %dst, {float, float} %sv) {
8386
; FORCED-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
8487
; FORCED: [[VECTOR_PH]]:
8588
; FORCED-NEXT: [[TMP0:%.*]] = extractvalue { float, float } [[SV]], 0
86-
; FORCED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0
89+
; FORCED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0
90+
; FORCED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
91+
; FORCED-NEXT: [[TMP4:%.*]] = extractvalue { float, float } [[SV]], 1
92+
; FORCED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i64 0
8793
; FORCED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT1]], <2 x float> poison, <2 x i32> zeroinitializer
8894
; FORCED-NEXT: br label %[[VECTOR_BODY:.*]]
8995
; FORCED: [[VECTOR_BODY]]:
9096
; FORCED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
9197
; FORCED-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[DST]], i32 [[INDEX]]
92-
; FORCED-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[BROADCAST_SPLAT2]], <2 x float> [[BROADCAST_SPLAT2]])
98+
; FORCED-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[BROADCAST_SPLAT]], <2 x float> [[BROADCAST_SPLAT2]])
9399
; FORCED-NEXT: store <2 x float> [[TMP2]], ptr [[TMP1]], align 4
94100
; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
95101
; FORCED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000

llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,12 @@ define void @widen_extractvalue(ptr %dst, {i64, i64} %sv) #0 {
1717
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 1000, [[TMP3]]
1818
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 1000, [[N_MOD_VF]]
1919
; CHECK-NEXT: [[EXTRACT0:%.*]] = extractvalue { i64, i64 } [[SV]], 0
20-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[EXTRACT0]], i64 0
20+
; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[EXTRACT0]], i64 0
21+
; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
22+
; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { i64, i64 } [[SV]], 1
23+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP10]], i64 0
2124
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
22-
; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 2 x i64> [[BROADCAST_SPLAT2]], [[BROADCAST_SPLAT2]]
25+
; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT2]], [[BROADCAST_SPLAT2]]
2326
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
2427
; CHECK: [[VECTOR_BODY]]:
2528
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]

llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-bswap.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,12 @@ define dso_local void @test(ptr %Arr, i32 signext %Len) {
1616
; CHECK: vector.body:
1717
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1818
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[INDEX]] to i64
19-
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARR:%.*]], i64 [[TMP1]]
19+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[ARR:%.*]], i64 [[TMP1]]
2020
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
2121
; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> [[WIDE_LOAD]])
22-
; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP2]], align 4
22+
; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[INDEX]] to i64
23+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP5]]
24+
; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP6]], align 4
2325
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
2426
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
2527
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]

llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -111,17 +111,18 @@ define void @ld_div2_step1_start0_ind2(ptr noalias %A, ptr noalias %B) {
111111
; VF2: vector.body:
112112
; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
113113
; VF2-NEXT: [[TMP0:%.*]] = udiv i64 [[INDEX]], 2
114-
; VF2-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], [[TMP0]]
115-
; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
116-
; VF2-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8
117-
; VF2-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP3]], 42
118-
; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP4]], i64 0
114+
; VF2-NEXT: [[TMP1:%.*]] = udiv i64 [[INDEX]], 2
115+
; VF2-NEXT: [[TMP2:%.*]] = add i64 [[TMP0]], [[TMP1]]
116+
; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
117+
; VF2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
118+
; VF2-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], 42
119+
; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i64 0
119120
; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
120-
; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
121-
; VF2-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8
121+
; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
122+
; VF2-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8
122123
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
123-
; VF2-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
124-
; VF2-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
124+
; VF2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
125+
; VF2-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
125126
; VF2: middle.block:
126127
; VF2-NEXT: br label [[EXIT:%.*]]
127128
; VF2: scalar.ph:

0 commit comments

Comments
 (0)