@@ -947,9 +947,8 @@ class LoopVectorizationCostModel {
947
947
// / user options, for the given register kind.
948
948
bool useMaxBandwidth (TargetTransformInfo::RegisterKind RegKind);
949
949
950
- // / \return True if maximizing vector bandwidth is enabled by the target or
951
- // / user options, for the given vector factor.
952
- bool useMaxBandwidth (ElementCount VF);
950
+ // / \return True if register pressure should be calculated for the given VF.
951
+ bool shouldCalculateRegPressureForVF (ElementCount VF);
953
952
954
953
// / \return The size (in bits) of the smallest and widest types in the code
955
954
// / that needs to be vectorized. We ignore values that remain scalar such as
@@ -1736,6 +1735,9 @@ class LoopVectorizationCostModel {
1736
1735
// / Whether this loop should be optimized for size based on function attribute
1737
1736
// / or profile information.
1738
1737
bool OptForSize;
1738
+
1739
+ // / The highest VF possible for this loop, without using MaxBandwidth.
1740
+ FixedScalableVFPair MaxPermissibleVFWithoutMaxBW;
1739
1741
};
1740
1742
} // end namespace llvm
1741
1743
@@ -3832,10 +3834,16 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
3832
3834
return FixedScalableVFPair::getNone ();
3833
3835
}
3834
3836
3835
- bool LoopVectorizationCostModel::useMaxBandwidth (ElementCount VF) {
3836
- return useMaxBandwidth (VF.isScalable ()
3837
- ? TargetTransformInfo::RGK_ScalableVector
3838
- : TargetTransformInfo::RGK_FixedWidthVector);
3837
+ bool LoopVectorizationCostModel::shouldCalculateRegPressureForVF (
3838
+ ElementCount VF) {
3839
+ if (!useMaxBandwidth (VF.isScalable ()
3840
+ ? TargetTransformInfo::RGK_ScalableVector
3841
+ : TargetTransformInfo::RGK_FixedWidthVector))
3842
+ return false ;
3843
+ // Only calculate register pressure for VFs enabled by MaxBandwidth.
3844
+ return ElementCount::isKnownGT (
3845
+ VF, VF.isScalable () ? MaxPermissibleVFWithoutMaxBW.ScalableVF
3846
+ : MaxPermissibleVFWithoutMaxBW.FixedVF );
3839
3847
}
3840
3848
3841
3849
bool LoopVectorizationCostModel::useMaxBandwidth (
@@ -3911,6 +3919,12 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
3911
3919
ComputeScalableMaxVF ? TargetTransformInfo::RGK_ScalableVector
3912
3920
: TargetTransformInfo::RGK_FixedWidthVector;
3913
3921
ElementCount MaxVF = MaxVectorElementCount;
3922
+
3923
+ if (MaxVF.isScalable ())
3924
+ MaxPermissibleVFWithoutMaxBW.ScalableVF = MaxVF;
3925
+ else
3926
+ MaxPermissibleVFWithoutMaxBW.FixedVF = MaxVF;
3927
+
3914
3928
if (useMaxBandwidth (RegKind)) {
3915
3929
auto MaxVectorElementCountMaxBW = ElementCount::get (
3916
3930
llvm::bit_floor (WidestRegister.getKnownMinValue () / SmallestType),
@@ -4264,9 +4278,10 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
4264
4278
if (VF.isScalar ())
4265
4279
continue ;
4266
4280
4267
- // / Don't consider the VF if it exceeds the number of registers for the
4268
- // / target.
4269
- if (CM.useMaxBandwidth (VF) && RUs[I].exceedsMaxNumRegs (TTI))
4281
+ // / If the VF was proposed due to MaxBandwidth, don't consider the VF if
4282
+ // / it exceeds the number of registers for the target.
4283
+ if (CM.shouldCalculateRegPressureForVF (VF) &&
4284
+ RUs[I].exceedsMaxNumRegs (TTI, ForceTargetNumVectorRegs))
4270
4285
continue ;
4271
4286
4272
4287
InstructionCost C = CM.expectedCost (VF);
@@ -7044,7 +7059,8 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
7044
7059
InstructionCost Cost = cost (*P, VF);
7045
7060
VectorizationFactor CurrentFactor (VF, Cost, ScalarCost);
7046
7061
7047
- if (CM.useMaxBandwidth (VF) && RUs[I].exceedsMaxNumRegs (TTI)) {
7062
+ if (CM.shouldCalculateRegPressureForVF (VF) &&
7063
+ RUs[I].exceedsMaxNumRegs (TTI, ForceTargetNumVectorRegs)) {
7048
7064
LLVM_DEBUG (dbgs () << " LV(REG): Not considering vector loop of width "
7049
7065
<< VF << " because it uses too many registers\n " );
7050
7066
continue ;
0 commit comments