Skip to content

Commit a303578

Browse files
committed
[VPlan] Address review
1 parent aa6367a commit a303578

File tree

5 files changed

+35
-30
lines changed

5 files changed

+35
-30
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -792,8 +792,8 @@ class VPIRFlags {
792792
}
793793

794794
CmpInst::Predicate getPredicate() const {
795-
if (OpType != OperationType::Cmp)
796-
return CmpInst::BAD_ICMP_PREDICATE;
795+
assert(OpType == OperationType::Cmp &&
796+
"recipe doesn't have a compare predicate");
797797
return CmpPredicate;
798798
}
799799

@@ -805,6 +805,9 @@ class VPIRFlags {
805805

806806
GEPNoWrapFlags getGEPNoWrapFlags() const { return GEPFlags; }
807807

808+
/// Returns true if recipe has a predicate.
809+
bool hasPredicate() const { return OpType == OperationType::Cmp; }
810+
808811
/// Returns true if the recipe has fast-math flags.
809812
bool hasFastMathFlags() const { return OpType == OperationType::FPMathOp; }
810813

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1796,6 +1796,13 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
17961796
/// account for the data embedded in them while checking for equality or
17971797
/// hashing.
17981798
static bool canHandle(const VPSingleDefRecipe *Def) {
1799+
// The issue with (Insert|Extract)Value is that the index of the
1800+
// insert/extract is not a proper operand in LLVM IR, and hence also not in
1801+
// VPlan.
1802+
if (auto C = getOpcodeOrIntrinsicID(Def))
1803+
if (!C->first && (C->second == Instruction::InsertValue ||
1804+
C->second == Instruction::ExtractValue))
1805+
return false;
17991806
return isa<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
18001807
VPWidenSelectRecipe, VPHistogramRecipe, VPReplicateRecipe,
18011808
VPWidenIntrinsicRecipe>(Def) &&
@@ -1810,10 +1817,9 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
18101817
Def->getVPDefID(), getOpcodeOrIntrinsicID(Def),
18111818
TypeInfo.inferScalarType(Def), vputils::isSingleScalar(Def),
18121819
hash_combine_range(Def->operands()));
1813-
if (isa<VPReplicateRecipe>(Def))
1814-
return hash_combine(Result, Def->getUnderlyingInstr());
18151820
if (auto *RFlags = dyn_cast<VPRecipeWithIRFlags>(Def))
1816-
return hash_combine(Result, RFlags->getPredicate());
1821+
if (RFlags->hasPredicate())
1822+
return hash_combine(Result, RFlags->getPredicate());
18171823
return Result;
18181824
}
18191825

@@ -1828,11 +1834,10 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
18281834
TypeInfo.inferScalarType(L) == TypeInfo.inferScalarType(R) &&
18291835
vputils::isSingleScalar(L) == vputils::isSingleScalar(R) &&
18301836
equal(L->operands(), R->operands());
1831-
if (Result && isa<VPReplicateRecipe>(L))
1832-
Result = L->getUnderlyingInstr() == R->getUnderlyingInstr();
1833-
if (Result && isa<VPRecipeWithIRFlags>(L))
1834-
Result = cast<VPRecipeWithIRFlags>(L)->getPredicate() ==
1835-
cast<VPRecipeWithIRFlags>(R)->getPredicate();
1837+
if (auto *LFlags = dyn_cast<VPRecipeWithIRFlags>(L))
1838+
if (Result && LFlags->hasPredicate())
1839+
Result = LFlags->getPredicate() ==
1840+
cast<VPRecipeWithIRFlags>(R)->getPredicate();
18361841
assert((!Result || getHashValue(L) == getHashValue(R)) &&
18371842
"Divergent hashes of equal values");
18381843
return Result;
@@ -1850,6 +1855,7 @@ void VPlanTransforms::cse(VPlan &Plan) {
18501855
vp_depth_first_shallow(Plan.getEntry()));
18511856
auto VPBBsInsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
18521857
vp_depth_first_shallow(LoopRegion->getEntry()));
1858+
VPDominatorTree VPDT(Plan);
18531859

18541860
// There is existing logic to sink instructions into replicate regions, and
18551861
// we'd be undoing that work if we went through replicate regions. Hence,
@@ -1862,6 +1868,9 @@ void VPlanTransforms::cse(VPlan &Plan) {
18621868
if (!Def || !VPCSEDenseMapInfo::canHandle(Def))
18631869
continue;
18641870
if (VPSingleDefRecipe *V = CSEMap.lookup(Def)) {
1871+
// V must dominate Def for a valid replacement.
1872+
if (!VPDT.dominates(V->getParent(), VPBB))
1873+
continue;
18651874
// Drop poison-generating flags when reusing a value.
18661875
if (auto *RFlags = dyn_cast<VPRecipeWithIRFlags>(V))
18671876
RFlags->dropPoisonGeneratingFlags();

llvm/test/Transforms/LoopVectorize/AArch64/drop-poison-generating-flags.ll

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54,15 +54,11 @@ define void @check_widen_intrinsic_with_nnan(ptr noalias %dst.0, ptr noalias %ds
5454
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE6]]
5555
; CHECK: [[PRED_LOAD_CONTINUE6]]:
5656
; CHECK-NEXT: [[TMP24:%.*]] = phi <4 x double> [ [[TMP20]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP23]], %[[PRED_LOAD_IF5]] ]
57-
; CHECK-NEXT: [[TMP25:%.*]] = add i64 [[INDEX]], -1
58-
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[DST_0]], i64 [[TMP25]]
59-
; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> zeroinitializer, ptr [[TMP26]], i32 8, <4 x i1> [[TMP4]])
57+
; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> zeroinitializer, ptr [[TMP7]], i32 8, <4 x i1> [[TMP4]])
6058
; CHECK-NEXT: [[TMP28:%.*]] = fcmp oeq <4 x double> [[TMP24]], zeroinitializer
6159
; CHECK-NEXT: [[TMP29:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP28]], <4 x i1> zeroinitializer
6260
; CHECK-NEXT: [[TMP30:%.*]] = or <4 x i1> [[TMP5]], [[TMP29]]
63-
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP29]], i32 0
64-
; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP31]], i64 [[TMP25]], i64 [[TMP6]]
65-
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr [[DST_1]], i64 [[PREDPHI]]
61+
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr [[DST_1]], i64 [[TMP6]]
6662
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> splat (i32 10), ptr [[TMP32]], i32 4, <4 x i1> [[TMP30]])
6763
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
6864
; CHECK-NEXT: [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000

llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-bswap.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,10 @@ define dso_local void @test(ptr %Arr, i32 signext %Len) {
1616
; CHECK: vector.body:
1717
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1818
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[INDEX]] to i64
19-
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[ARR:%.*]], i64 [[TMP1]]
19+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARR:%.*]], i64 [[TMP1]]
2020
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
2121
; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> [[WIDE_LOAD]])
22-
; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[INDEX]] to i64
23-
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP5]]
24-
; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP6]], align 4
22+
; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP2]], align 4
2523
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
2624
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
2725
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]

llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -111,18 +111,17 @@ define void @ld_div2_step1_start0_ind2(ptr noalias %A, ptr noalias %B) {
111111
; VF2: vector.body:
112112
; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
113113
; VF2-NEXT: [[TMP0:%.*]] = udiv i64 [[INDEX]], 2
114-
; VF2-NEXT: [[TMP1:%.*]] = udiv i64 [[INDEX]], 2
115-
; VF2-NEXT: [[TMP2:%.*]] = add i64 [[TMP0]], [[TMP1]]
116-
; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
117-
; VF2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
118-
; VF2-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], 42
119-
; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i64 0
114+
; VF2-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], [[TMP0]]
115+
; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
116+
; VF2-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8
117+
; VF2-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP3]], 42
118+
; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP4]], i64 0
120119
; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
121-
; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
122-
; VF2-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8
120+
; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
121+
; VF2-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8
123122
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
124-
; VF2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
125-
; VF2-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
123+
; VF2-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
124+
; VF2-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
126125
; VF2: middle.block:
127126
; VF2-NEXT: br label [[EXIT:%.*]]
128127
; VF2: scalar.ph:

0 commit comments

Comments
 (0)