Skip to content

Commit 10f7824

Browse files
authored
[AArch64] Enable other cost kinds for getCmpSelInstrCost. (#144375)
This removes the CostKind == TCK_RecipThroughput limitation from getCmpSelInstrCost, allowing it to return more accurate costs for CodeSize and Lat / SizeLat. Especially for larger vectors under CodeSize, the returned costs are currently 1, not the legalization cost.
1 parent 36cbd43 commit 10f7824

File tree

4 files changed

+77
-81
lines changed

4 files changed

+77
-81
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4264,14 +4264,9 @@ InstructionCost AArch64TTIImpl::getCmpSelInstrCost(
42644264
unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred,
42654265
TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info,
42664266
TTI::OperandValueInfo Op2Info, const Instruction *I) const {
4267-
// TODO: Handle other cost kinds.
4268-
if (CostKind != TTI::TCK_RecipThroughput)
4269-
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
4270-
Op1Info, Op2Info, I);
4271-
42724267
int ISD = TLI->InstructionOpcodeToISD(Opcode);
42734268
// We don't lower some vector selects well that are wider than the register
4274-
// width.
4269+
// width. TODO: Improve this with different cost kinds.
42754270
if (isa<FixedVectorType>(ValTy) && ISD == ISD::SELECT) {
42764271
// We would need this many instructions to hide the scalarization happening.
42774272
const int AmortizationCost = 20;
@@ -4355,9 +4350,10 @@ InstructionCost AArch64TTIImpl::getCmpSelInstrCost(
43554350

43564351
// Treat the icmp in icmp(and, 0) or icmp(and, -1/1) when it can be folded to
43574352
// icmp(and, 0) as free, as we can make use of ands, but only if the
4358-
// comparison is not unsigned.
4359-
if (ValTy->isIntegerTy() && ISD == ISD::SETCC && I &&
4360-
!CmpInst::isUnsigned(VecPred) &&
4353+
// comparison is not unsigned. FIXME: Enable for non-throughput cost kinds
4354+
// providing it will not cause performance regressions.
4355+
if (CostKind == TTI::TCK_RecipThroughput && ValTy->isIntegerTy() &&
4356+
ISD == ISD::SETCC && I && !CmpInst::isUnsigned(VecPred) &&
43614357
TLI->isTypeLegal(TLI->getValueType(DL, ValTy)) &&
43624358
match(I->getOperand(0), m_And(m_Value(), m_Value()))) {
43634359
if (match(I->getOperand(1), m_Zero()))

llvm/test/Analysis/CostModel/AArch64/cmp.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,10 @@ define void @cmps() {
1717
; CHECK-NEXT: Cost Model: Found costs of 1 for: %cf32 = fcmp ogt float undef, undef
1818
; CHECK-NEXT: Cost Model: Found costs of 1 for: %cf64 = fcmp ogt double undef, undef
1919
; CHECK-NEXT: Cost Model: Found costs of 1 for: %cbf64 = fcmp ogt bfloat undef, undef
20-
; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cfv816 = fcmp olt <8 x half> undef, undef
20+
; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cfv816 = fcmp olt <8 x half> undef, undef
2121
; CHECK-NEXT: Cost Model: Found costs of 1 for: %cfv432 = fcmp oge <4 x float> undef, undef
2222
; CHECK-NEXT: Cost Model: Found costs of 1 for: %cfv264 = fcmp oge <2 x double> undef, undef
23-
; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cbfv816 = fcmp olt <8 x bfloat> undef, undef
23+
; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cbfv816 = fcmp olt <8 x bfloat> undef, undef
2424
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
2525
;
2626
%c8 = icmp slt i8 undef, undef

llvm/test/Analysis/CostModel/AArch64/select.ll

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,17 @@ define void @select() {
1111
; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4 = select i1 undef, i64 undef, i64 undef
1212
; CHECK-NEXT: Cost Model: Found costs of 1 for: %v5 = select i1 undef, float undef, float undef
1313
; CHECK-NEXT: Cost Model: Found costs of 1 for: %v6 = select i1 undef, double undef, double undef
14-
; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef
15-
; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef
16-
; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef
17-
; CHECK-NEXT: Cost Model: Found costs of RThru:80 CodeSize:1 Lat:1 SizeLat:1 for: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
18-
; CHECK-NEXT: Cost Model: Found costs of RThru:160 CodeSize:1 Lat:1 SizeLat:1 for: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
19-
; CHECK-NEXT: Cost Model: Found costs of RThru:320 CodeSize:1 Lat:1 SizeLat:1 for: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
20-
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v2float = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
21-
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4float = select <4 x i1> undef, <4 x float> undef, <4 x float> undef
22-
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v2double = select <2 x i1> undef, <2 x double> undef, <2 x double> undef
23-
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4half = select <4 x i1> undef, <4 x half> undef, <4 x half> undef
24-
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8half = select <8 x i1> undef, <8 x half> undef, <8 x half> undef
14+
; CHECK-NEXT: Cost Model: Found costs of 16 for: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef
15+
; CHECK-NEXT: Cost Model: Found costs of 8 for: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef
16+
; CHECK-NEXT: Cost Model: Found costs of 16 for: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef
17+
; CHECK-NEXT: Cost Model: Found costs of 80 for: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
18+
; CHECK-NEXT: Cost Model: Found costs of 160 for: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
19+
; CHECK-NEXT: Cost Model: Found costs of 320 for: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
20+
; CHECK-NEXT: Cost Model: Found costs of 2 for: %v2float = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
21+
; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4float = select <4 x i1> undef, <4 x float> undef, <4 x float> undef
22+
; CHECK-NEXT: Cost Model: Found costs of 2 for: %v2double = select <2 x i1> undef, <2 x double> undef, <2 x double> undef
23+
; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4half = select <4 x i1> undef, <4 x half> undef, <4 x half> undef
24+
; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8half = select <8 x i1> undef, <8 x half> undef, <8 x half> undef
2525
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
2626
;
2727
%v1 = select i1 undef, i8 undef, i8 undef

0 commit comments

Comments
 (0)