Skip to content

Commit 67ce2d1

Browse files
committed
[VPlan] Introduce CSE pass
1 parent f03345a commit 67ce2d1

File tree

56 files changed

+351
-404
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+351
-404
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7344,6 +7344,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
73447344
VPlanTransforms::narrowInterleaveGroups(
73457345
BestVPlan, BestVF,
73467346
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector));
7347+
VPlanTransforms::cse(BestVPlan, *Legal->getWidestInductionType());
73477348
VPlanTransforms::removeDeadRecipes(BestVPlan);
73487349

73497350
VPlanTransforms::convertToConcreteRecipes(BestVPlan,

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -897,6 +897,11 @@ struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags {
897897
return R && classof(R);
898898
}
899899

900+
static inline bool classof(const VPSingleDefRecipe *U) {
901+
auto *R = dyn_cast<VPRecipeBase>(U);
902+
return R && classof(R);
903+
}
904+
900905
void execute(VPTransformState &State) override = 0;
901906
};
902907

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1752,6 +1752,76 @@ void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
17521752
}
17531753
}
17541754

1755+
/// Hash the underlying data of a VPSingleDefRecipe pointer, instead of hashing
1756+
/// the pointer itself.
1757+
namespace {
1758+
struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
1759+
static bool isSentinel(const VPSingleDefRecipe *Def) {
1760+
return Def == getEmptyKey() || Def == getTombstoneKey();
1761+
}
1762+
1763+
static bool canHandle(const VPSingleDefRecipe *Def) {
1764+
return isa<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
1765+
VPWidenSelectRecipe, VPHistogramRecipe, VPPartialReductionRecipe,
1766+
VPReplicateRecipe, VPWidenIntrinsicRecipe>(Def);
1767+
}
1768+
1769+
static unsigned getHashValue(const VPSingleDefRecipe *Def) {
1770+
return hash_combine(Def->getVPDefID(), vputils::getOpcode(*Def),
1771+
vputils::isSingleScalar(Def),
1772+
hash_combine_range(Def->operands()));
1773+
}
1774+
1775+
static bool isEqual(const VPSingleDefRecipe *L, const VPSingleDefRecipe *R) {
1776+
if (isSentinel(L) || isSentinel(R))
1777+
return L == R;
1778+
bool Result = L->getVPDefID() == R->getVPDefID() &&
1779+
vputils::getOpcode(*L) == vputils::getOpcode(*R) &&
1780+
vputils::isSingleScalar(L) == vputils::isSingleScalar(R) &&
1781+
equal(L->operands(), R->operands());
1782+
assert(!Result || getHashValue(L) == getHashValue(R));
1783+
return Result;
1784+
}
1785+
};
1786+
} // end anonymous namespace
1787+
1788+
/// Perform a common-subexpression-elimination of VPSingleDefRecipes on the \p
1789+
/// Plan.
1790+
void VPlanTransforms::cse(VPlan &Plan, Type &CanonicalIVTy) {
1791+
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
1792+
if (!LoopRegion)
1793+
return;
1794+
auto VPBBsOutsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
1795+
vp_depth_first_shallow(Plan.getEntry()));
1796+
auto VPBBsInsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
1797+
vp_depth_first_shallow(LoopRegion->getEntry()));
1798+
1799+
// There is existing logic to sink instructions into replicate regions, and
1800+
// we'd be undoing that work if we went through replicate regions. Hence,
1801+
// don't CSE in replicate regions.
1802+
DenseMap<VPSingleDefRecipe *, VPSingleDefRecipe *, VPCSEDenseMapInfo> CSEMap;
1803+
VPTypeAnalysis TypeInfo(&CanonicalIVTy);
1804+
for (VPBasicBlock *VPBB :
1805+
concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion)) {
1806+
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
1807+
auto *Def = dyn_cast<VPSingleDefRecipe>(&R);
1808+
if (!Def || !VPCSEDenseMapInfo::canHandle(Def))
1809+
continue;
1810+
if (VPSingleDefRecipe *V = CSEMap.lookup(Def)) {
1811+
if (TypeInfo.inferScalarType(Def) != TypeInfo.inferScalarType(V))
1812+
continue;
1813+
// Drop poison-generating flags when reusing a value.
1814+
if (auto *RFlags = dyn_cast<VPRecipeWithIRFlags>(V))
1815+
RFlags->dropPoisonGeneratingFlags();
1816+
Def->replaceAllUsesWith(V);
1817+
Def->eraseFromParent();
1818+
continue;
1819+
}
1820+
CSEMap[Def] = Def;
1821+
}
1822+
}
1823+
}
1824+
17551825
/// Move loop-invariant recipes out of the vector loop region in \p Plan.
17561826
static void licm(VPlan &Plan) {
17571827
VPBasicBlock *Preheader = Plan.getVectorPreheader();

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,10 @@ struct VPlanTransforms {
240240
/// removing dead edges to their successors.
241241
static void removeBranchOnConst(VPlan &Plan);
242242

243+
/// Perform common-subexpression-elimination, which is best done after the \p
244+
/// Plan is executed.
245+
static void cse(VPlan &Plan, Type &CanonicalIVType);
246+
243247
/// If there's a single exit block, optimize its phi recipes that use exiting
244248
/// IV values by feeding them precomputed end values instead, possibly taken
245249
/// one step backwards.

llvm/lib/Transforms/Vectorize/VPlanUtils.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#define LLVM_TRANSFORMS_VECTORIZE_VPLANUTILS_H
1111

1212
#include "VPlan.h"
13+
#include "llvm/ADT/TypeSwitch.h"
1314

1415
namespace llvm {
1516
class ScalarEvolution;
@@ -37,6 +38,22 @@ VPValue *getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr,
3738
/// SCEV expression could be constructed.
3839
const SCEV *getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE);
3940

41+
/// Get any instruction opcode data embedded in recipe \p R. Returns an optional
42+
/// pair, where the first element indicates whether it is an intrinsic ID.
43+
inline std::optional<std::pair<bool, unsigned>>
44+
getOpcode(const VPRecipeBase &R) {
45+
return TypeSwitch<const VPRecipeBase *,
46+
std::optional<std::pair<bool, unsigned>>>(&R)
47+
.Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
48+
VPWidenSelectRecipe, VPHistogramRecipe, VPPartialReductionRecipe,
49+
VPReplicateRecipe>(
50+
[](auto *I) { return std::make_pair(false, I->getOpcode()); })
51+
.Case<VPWidenIntrinsicRecipe>([](auto *I) {
52+
return std::make_pair(true, I->getVectorIntrinsicID());
53+
})
54+
.Default([](auto *) { return std::nullopt; });
55+
}
56+
4057
/// Returns true if \p VPV is a single scalar, either because it produces the
4158
/// same value for all lanes or only has its first lane used.
4259
inline bool isSingleScalar(const VPValue *VPV) {

0 commit comments

Comments
 (0)