Skip to content

Commit bbccf20

Browse files
committed
Only return a cost estimate for scalable types in AArch64 TTI
1 parent aa3295d commit bbccf20

File tree

2 files changed

+29
-14
lines changed

2 files changed

+29
-14
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -554,7 +554,17 @@ static bool isUnpackedVectorVT(EVT VecVT) {
554554
VecVT.getSizeInBits().getKnownMinValue() < AArch64::SVEBitsPerBlock;
555555
}
556556

557-
static InstructionCost getHistogramCost(const IntrinsicCostAttributes &ICA) {
557+
static InstructionCost getHistogramCost(const AArch64Subtarget *ST,
558+
const IntrinsicCostAttributes &ICA) {
559+
// We need to know at least the number of elements in the vector of buckets
560+
// and the size of each element to update.
561+
if (ICA.getArgTypes().size() < 2)
562+
return InstructionCost::getInvalid();
563+
564+
// Only interested in costing for the hardware instruction from SVE2.
565+
if (!ST->hasSVE2())
566+
return InstructionCost::getInvalid();
567+
558568
Type *BucketPtrsTy = ICA.getArgTypes()[0]; // Type of vector of pointers
559569
Type *EltTy = ICA.getArgTypes()[1]; // Type of bucket elements
560570
unsigned TotalHistCnts = 1;
@@ -579,9 +589,11 @@ static InstructionCost getHistogramCost(const IntrinsicCostAttributes &ICA) {
579589

580590
unsigned NaturalVectorWidth = AArch64::SVEBitsPerBlock / LegalEltSize;
581591
TotalHistCnts = EC / NaturalVectorWidth;
592+
593+
return InstructionCost(BaseHistCntCost * TotalHistCnts);
582594
}
583595

584-
return InstructionCost(BaseHistCntCost * TotalHistCnts);
596+
return InstructionCost::getInvalid();
585597
}
586598

587599
InstructionCost
@@ -597,10 +609,13 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
597609
return InstructionCost::getInvalid();
598610

599611
switch (ICA.getID()) {
600-
case Intrinsic::experimental_vector_histogram_add:
601-
if (!ST->hasSVE2())
602-
return InstructionCost::getInvalid();
603-
return getHistogramCost(ICA);
612+
case Intrinsic::experimental_vector_histogram_add: {
613+
InstructionCost HistCost = getHistogramCost(ST, ICA);
614+
// If the cost isn't valid, we may still be able to scalarize
615+
if (HistCost.isValid())
616+
return HistCost;
617+
break;
618+
}
604619
case Intrinsic::umin:
605620
case Intrinsic::umax:
606621
case Intrinsic::smin:

llvm/test/Analysis/CostModel/AArch64/histograms.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@ define void @histograms() {
88
; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
99
; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
1010
; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
11-
; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
12-
; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
13-
; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
14-
; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
11+
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
12+
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
13+
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
14+
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
1515
; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
1616
; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
1717
; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
@@ -43,10 +43,10 @@ define void @histograms() {
4343
; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
4444
; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
4545
; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
46-
; CHECK-SVE2-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
47-
; CHECK-SVE2-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
48-
; CHECK-SVE2-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
49-
; CHECK-SVE2-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
46+
; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
47+
; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
48+
; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
49+
; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
5050
; CHECK-SVE2-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
5151
; CHECK-SVE2-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
5252
; CHECK-SVE2-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)

0 commit comments

Comments
 (0)