Skip to content

Commit ceeafe4

Browse files
committed
[VPlan] Introduce CSE pass
1 parent 9d15189 commit ceeafe4

File tree

56 files changed

+862
-598
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+862
-598
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7309,6 +7309,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
73097309
VPlanTransforms::narrowInterleaveGroups(
73107310
BestVPlan, BestVF,
73117311
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector));
7312+
VPlanTransforms::cse(BestVPlan, *Legal->getWidestInductionType());
73127313
VPlanTransforms::removeDeadRecipes(BestVPlan);
73137314

73147315
VPlanTransforms::convertToConcreteRecipes(BestVPlan,

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -897,6 +897,11 @@ struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags {
897897
return R && classof(R);
898898
}
899899

900+
static inline bool classof(const VPSingleDefRecipe *U) {
901+
auto *R = dyn_cast<VPRecipeBase>(U);
902+
return R && classof(R);
903+
}
904+
900905
void execute(VPTransformState &State) override = 0;
901906
};
902907

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1753,6 +1753,76 @@ void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
17531753
}
17541754
}
17551755

1756+
/// Hash the underlying data of a VPSingleDefRecipe pointer, instead of hashing
1757+
/// the pointer itself.
1758+
namespace {
1759+
struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
1760+
static bool isSentinel(const VPSingleDefRecipe *Def) {
1761+
return Def == getEmptyKey() || Def == getTombstoneKey();
1762+
}
1763+
1764+
static bool canHandle(const VPSingleDefRecipe *Def) {
1765+
return isa<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
1766+
VPWidenSelectRecipe, VPHistogramRecipe, VPPartialReductionRecipe,
1767+
VPReplicateRecipe, VPWidenIntrinsicRecipe>(Def);
1768+
}
1769+
1770+
static unsigned getHashValue(const VPSingleDefRecipe *Def) {
1771+
return hash_combine(Def->getVPDefID(), vputils::getOpcode(*Def),
1772+
vputils::isSingleScalar(Def),
1773+
hash_combine_range(Def->operands()));
1774+
}
1775+
1776+
static bool isEqual(const VPSingleDefRecipe *L, const VPSingleDefRecipe *R) {
1777+
if (isSentinel(L) || isSentinel(R))
1778+
return L == R;
1779+
bool Result = L->getVPDefID() == R->getVPDefID() &&
1780+
vputils::getOpcode(*L) == vputils::getOpcode(*R) &&
1781+
vputils::isSingleScalar(L) == vputils::isSingleScalar(R) &&
1782+
equal(L->operands(), R->operands());
1783+
assert(!Result || getHashValue(L) == getHashValue(R));
1784+
return Result;
1785+
}
1786+
};
1787+
} // end anonymous namespace
1788+
1789+
/// Perform a common-subexpression-elimination of VPSingleDefRecipes on the \p
1790+
/// Plan.
1791+
void VPlanTransforms::cse(VPlan &Plan, Type &CanonicalIVTy) {
1792+
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
1793+
if (!LoopRegion)
1794+
return;
1795+
auto VPBBsOutsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
1796+
vp_depth_first_shallow(Plan.getEntry()));
1797+
auto VPBBsInsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
1798+
vp_depth_first_shallow(LoopRegion->getEntry()));
1799+
1800+
// There is existing logic to sink instructions into replicate regions, and
1801+
// we'd be undoing that work if we went through replicate regions. Hence,
1802+
// don't CSE in replicate regions.
1803+
DenseMap<VPSingleDefRecipe *, VPSingleDefRecipe *, VPCSEDenseMapInfo> CSEMap;
1804+
VPTypeAnalysis TypeInfo(&CanonicalIVTy);
1805+
for (VPBasicBlock *VPBB :
1806+
concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion)) {
1807+
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
1808+
auto *Def = dyn_cast<VPSingleDefRecipe>(&R);
1809+
if (!Def || !VPCSEDenseMapInfo::canHandle(Def))
1810+
continue;
1811+
if (VPSingleDefRecipe *V = CSEMap.lookup(Def)) {
1812+
if (TypeInfo.inferScalarType(Def) != TypeInfo.inferScalarType(V))
1813+
continue;
1814+
// Drop poison-generating flags when reusing a value.
1815+
if (auto *RFlags = dyn_cast<VPRecipeWithIRFlags>(V))
1816+
RFlags->dropPoisonGeneratingFlags();
1817+
Def->replaceAllUsesWith(V);
1818+
Def->eraseFromParent();
1819+
continue;
1820+
}
1821+
CSEMap[Def] = Def;
1822+
}
1823+
}
1824+
}
1825+
17561826
/// Move loop-invariant recipes out of the vector loop region in \p Plan.
17571827
static void licm(VPlan &Plan) {
17581828
VPBasicBlock *Preheader = Plan.getVectorPreheader();

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,10 @@ struct VPlanTransforms {
240240
/// removing dead edges to their successors.
241241
static void removeBranchOnConst(VPlan &Plan);
242242

243+
/// Perform common-subexpression-elimination, which is best done after the \p
244+
/// Plan is executed.
245+
static void cse(VPlan &Plan, Type &CanonicalIVType);
246+
243247
/// If there's a single exit block, optimize its phi recipes that use exiting
244248
/// IV values by feeding them precomputed end values instead, possibly taken
245249
/// one step backwards.

llvm/lib/Transforms/Vectorize/VPlanUtils.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#define LLVM_TRANSFORMS_VECTORIZE_VPLANUTILS_H
1111

1212
#include "VPlan.h"
13+
#include "llvm/ADT/TypeSwitch.h"
1314

1415
namespace llvm {
1516
class ScalarEvolution;
@@ -37,6 +38,22 @@ VPValue *getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr,
3738
/// SCEV expression could be constructed.
3839
const SCEV *getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE);
3940

41+
/// Get any instruction opcode data embedded in recipe \p R. Returns an optional
42+
/// pair, where the first element indicates whether it is an intrinsic ID.
43+
inline std::optional<std::pair<bool, unsigned>>
44+
getOpcode(const VPRecipeBase &R) {
45+
return TypeSwitch<const VPRecipeBase *,
46+
std::optional<std::pair<bool, unsigned>>>(&R)
47+
.Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
48+
VPWidenSelectRecipe, VPHistogramRecipe, VPPartialReductionRecipe,
49+
VPReplicateRecipe>(
50+
[](auto *I) { return std::make_pair(false, I->getOpcode()); })
51+
.Case<VPWidenIntrinsicRecipe>([](auto *I) {
52+
return std::make_pair(true, I->getVectorIntrinsicID());
53+
})
54+
.Default([](auto *) { return std::nullopt; });
55+
}
56+
4057
/// Returns true if \p VPV is a single scalar, either because it produces the
4158
/// same value for all lanes or only has its first lane used.
4259
inline bool isSingleScalar(const VPValue *VPV) {

0 commit comments

Comments
 (0)