Skip to content

Commit cc22a76

Browse files
committed
[VPlan] Introduce CSE pass
Requires #151487 to completely subsume the non-VPlan based limited CSE. Inspired by #146856, although the test from that PR remains unchanged: still investigating.
1 parent a95d0cd commit cc22a76

40 files changed

+392
-319
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7308,6 +7308,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
73087308
VPlanTransforms::narrowInterleaveGroups(
73097309
BestVPlan, BestVF,
73107310
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector));
7311+
VPlanTransforms::cse(BestVPlan, *Legal->getWidestInductionType());
73117312
VPlanTransforms::removeDeadRecipes(BestVPlan);
73127313

73137314
VPlanTransforms::convertToConcreteRecipes(BestVPlan,

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,33 @@ void VPDef::dump() const {
122122
}
123123
#endif
124124

125+
bool VPValue::isIdenticalTo(const VPValue *Other) const {
126+
if (getVPValueID() != Other->getVPValueID() ||
127+
hasDefiningRecipe() != Other->hasDefiningRecipe() ||
128+
getUnderlyingValue() != Other->getUnderlyingValue())
129+
return false;
130+
if (hasDefiningRecipe()) {
131+
const VPRecipeBase *DefL = getDefiningRecipe();
132+
const VPRecipeBase *DefR = Other->getDefiningRecipe();
133+
if (vputils::getOpcode(*DefL) != vputils::getOpcode(*DefR) ||
134+
DefL->getNumOperands() != DefR->getNumOperands())
135+
return false;
136+
return equal(DefL->operands(), DefR->operands());
137+
}
138+
return true;
139+
}
140+
141+
hash_code llvm::hash_value(const VPValue &V) {
142+
if (V.hasDefiningRecipe()) {
143+
const VPRecipeBase *Def = V.getDefiningRecipe();
144+
return hash_combine(vputils::getOpcode(*Def),
145+
hash_combine_range(Def->operands()));
146+
}
147+
if (Value *U = V.getUnderlyingValue())
148+
return hash_combine(V.getVPValueID(), U);
149+
return hash_value(V.getVPValueID());
150+
}
151+
125152
VPRecipeBase *VPValue::getDefiningRecipe() {
126153
return cast_or_null<VPRecipeBase>(Def);
127154
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1755,6 +1755,54 @@ void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
17551755
}
17561756
}
17571757

1758+
/// Hash the underlying data of a VPSingleDefRecipe pointer, instead of hashing
1759+
/// the pointer itself.
1760+
namespace {
1761+
struct CSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
1762+
static unsigned getHashValue(const VPSingleDefRecipe *R) {
1763+
return hash_value(*R);
1764+
}
1765+
1766+
static bool isEqual(const VPSingleDefRecipe *LHS,
1767+
const VPSingleDefRecipe *RHS) {
1768+
if (LHS == getEmptyKey() || RHS == getEmptyKey() ||
1769+
LHS == getTombstoneKey() || RHS == getTombstoneKey())
1770+
return LHS == RHS;
1771+
return LHS->isIdenticalTo(RHS);
1772+
}
1773+
};
1774+
} // end anonymous namespace
1775+
1776+
/// Perform a common-subexpression-elimination of VPSingleDefRecipes on the \p
1777+
/// Plan.
1778+
void VPlanTransforms::cse(VPlan &Plan, Type &CanonicalIVTy) {
1779+
DenseMap<VPSingleDefRecipe *, VPSingleDefRecipe *, CSEDenseMapInfo> CSEMap;
1780+
VPTypeAnalysis TypeInfo(&CanonicalIVTy);
1781+
// There is existing logic to sink instructions into replicate regions, and
1782+
// we'd be undoing that work if we went through replicate regions. Hence,
1783+
// don't CSE in replicate regions.
1784+
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
1785+
vp_depth_first_shallow(Plan.getEntry()))) {
1786+
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
1787+
auto *Def = dyn_cast<VPSingleDefRecipe>(&R);
1788+
if (!Def)
1789+
continue;
1790+
// Check if we can replace this instruction with any of the
1791+
// visited instructions.
1792+
if (VPSingleDefRecipe *V = CSEMap.lookup(Def)) {
1793+
// Due to other transforms like truncateToMinimalBitwidths, there is no
1794+
// embeeded type information that we can reliably look at.
1795+
if (TypeInfo.inferScalarType(Def) != TypeInfo.inferScalarType(V))
1796+
continue;
1797+
Def->replaceAllUsesWith(V);
1798+
Def->eraseFromParent();
1799+
} else {
1800+
CSEMap.insert_or_assign(Def, Def);
1801+
}
1802+
}
1803+
}
1804+
}
1805+
17581806
/// Move loop-invariant recipes out of the vector loop region in \p Plan.
17591807
static void licm(VPlan &Plan) {
17601808
VPBasicBlock *Preheader = Plan.getVectorPreheader();

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,10 @@ struct VPlanTransforms {
240240
/// removing dead edges to their successors.
241241
static void removeBranchOnConst(VPlan &Plan);
242242

243+
/// Perform common-subexpression-elimination, which is best done after the \p
244+
/// Plan is executed.
245+
static void cse(VPlan &Plan, Type &CanonicalIVType);
246+
243247
/// If there's a single exit block, optimize its phi recipes that use exiting
244248
/// IV values by feeding them precomputed end values instead, possibly taken
245249
/// one step backwards.

llvm/lib/Transforms/Vectorize/VPlanUtils.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#define LLVM_TRANSFORMS_VECTORIZE_VPLANUTILS_H
1111

1212
#include "VPlan.h"
13+
#include "llvm/ADT/TypeSwitch.h"
1314

1415
namespace llvm {
1516
class ScalarEvolution;
@@ -37,6 +38,14 @@ VPValue *getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr,
3738
/// SCEV expression could be constructed.
3839
const SCEV *getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE);
3940

41+
/// Get any instruction opcode data embedded in recipe \p R.
42+
inline std::optional<unsigned> getOpcode(const VPRecipeBase &R) {
43+
return TypeSwitch<const VPRecipeBase *, std::optional<unsigned>>(&R)
44+
.Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe, VPReplicateRecipe,
45+
VPWidenSelectRecipe>([](auto *I) { return I->getOpcode(); })
46+
.Default([](auto *) { return std::nullopt; });
47+
}
48+
4049
/// Returns true if \p VPV is a single scalar, either because it produces the
4150
/// same value for all lanes or only has its first lane used.
4251
inline bool isSingleScalar(const VPValue *VPV) {

llvm/lib/Transforms/Vectorize/VPlanValue.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,8 +185,15 @@ class LLVM_ABI_FOR_TEST VPValue {
185185
assert(!UnderlyingVal && "Underlying Value is already set.");
186186
UnderlyingVal = Val;
187187
}
188+
189+
// Equality of data.
190+
bool isIdenticalTo(const VPValue *Other) const;
188191
};
189192

193+
// Hash method so VPValue can be de-duplicated in certain
194+
// contexts.
195+
hash_code hash_value(const VPValue &Arg);
196+
190197
typedef DenseMap<Value *, VPValue *> Value2VPValueTy;
191198
typedef DenseMap<VPValue *, Value *> VPValue2ValueTy;
192199

llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -330,11 +330,10 @@ define void @test_widen_induction_step_2(ptr %A, i64 %N, i32 %step) {
330330
; CHECK-NEXT: [[CMP_N11:%.*]] = icmp eq i64 [[N]], [[IND_END]]
331331
; CHECK-NEXT: br i1 [[CMP_N11]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
332332
; CHECK: vec.epilog.scalar.ph:
333-
; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
334-
; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ]
333+
; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
335334
; CHECK-NEXT: br label [[LOOP:%.*]]
336335
; CHECK: loop:
337-
; CHECK-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL5]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ]
336+
; CHECK-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL6]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ]
338337
; CHECK-NEXT: [[IV_2:%.*]] = phi i64 [ [[BC_RESUME_VAL6]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_1_NEXT]], [[LOOP]] ]
339338
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_1]]
340339
; CHECK-NEXT: [[ADD:%.*]] = add i64 [[IV_2]], 10

llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,11 @@ define double @test_reduction_costs() {
2323
; CHECK: [[SCALAR_PH]]:
2424
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
2525
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ]
26-
; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ]
2726
; CHECK-NEXT: br label %[[LOOP_1:.*]]
2827
; CHECK: [[LOOP_1]]:
2928
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_1]] ]
3029
; CHECK-NEXT: [[R_1:%.*]] = phi double [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[R_1_NEXT:%.*]], %[[LOOP_1]] ]
31-
; CHECK-NEXT: [[R_2:%.*]] = phi double [ [[BC_MERGE_RDX2]], %[[SCALAR_PH]] ], [ [[R_2_NEXT:%.*]], %[[LOOP_1]] ]
30+
; CHECK-NEXT: [[R_2:%.*]] = phi double [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[R_2_NEXT:%.*]], %[[LOOP_1]] ]
3231
; CHECK-NEXT: [[R_1_NEXT]] = fadd double [[R_1]], 3.000000e+00
3332
; CHECK-NEXT: [[R_2_NEXT]] = fadd double [[R_2]], 9.000000e+00
3433
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1

llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,6 @@ define i64 @test_ptr_ivs_and_widened_ivs(ptr %src, i32 %N) {
294294
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
295295
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 4
296296
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP3]]
297-
; CHECK-NEXT: [[IND_END1:%.*]] = trunc i64 [[N_VEC]] to i32
298297
; CHECK-NEXT: [[IND_END3:%.*]] = trunc i64 [[N_VEC]] to i32
299298
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
300299
; CHECK: vector.body:
@@ -321,7 +320,7 @@ define i64 @test_ptr_ivs_and_widened_ivs(ptr %src, i32 %N) {
321320
; CHECK: scalar.ph:
322321
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
323322
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[SRC]], [[ENTRY]] ]
324-
; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
323+
; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
325324
; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
326325
; CHECK-NEXT: br label [[LOOP:%.*]]
327326
; CHECK: loop:

llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,8 @@ define void @licm_replicate_call(double %x, ptr %dst) {
99
; CHECK-NEXT: [[ENTRY:.*]]:
1010
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1111
; CHECK: [[VECTOR_PH]]:
12-
; CHECK-NEXT: [[TMP0:%.*]] = tail call double @llvm.pow.f64(double [[X]], double 3.000000e+00)
1312
; CHECK-NEXT: [[TMP1:%.*]] = tail call double @llvm.pow.f64(double [[X]], double 3.000000e+00)
14-
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[TMP0]], i32 0
13+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i32 0
1514
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[TMP1]], i32 1
1615
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
1716
; CHECK: [[VECTOR_BODY]]:

0 commit comments

Comments
 (0)