[AArch64] Enable other cost kinds for getCmpSelInstrCost. (#144375)

davemgreen · web-flow · commit 10f782456eab · 2025-07-10T07:12:21.000+01:00
This removes the CostKind == TCK_RecipThroughput limitation from
getCmpSelInstrCost, allowing it to return more accurate costs for CodeSize and
Lat / SizeLat. Especially for larger vectors under CodeSize, the returned costs
are currently 1, not the legalization cost.
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -4264,14 +4264,9 @@ InstructionCost AArch64TTIImpl::getCmpSelInstrCost(
     unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred,
     TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info,
     TTI::OperandValueInfo Op2Info, const Instruction *I) const {
-  // TODO: Handle other cost kinds.
-  if (CostKind != TTI::TCK_RecipThroughput)
-    return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
-                                     Op1Info, Op2Info, I);
-
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
   // We don't lower some vector selects well that are wider than the register
-  // width.
+  // width. TODO: Improve this with different cost kinds.
   if (isa<FixedVectorType>(ValTy) && ISD == ISD::SELECT) {
     // We would need this many instructions to hide the scalarization happening.
     const int AmortizationCost = 20;
@@ -4355,9 +4350,10 @@ InstructionCost AArch64TTIImpl::getCmpSelInstrCost(
 
   // Treat the icmp in icmp(and, 0) or icmp(and, -1/1) when it can be folded to
   // icmp(and, 0) as free, as we can make use of ands, but only if the
-  // comparison is not unsigned.
-  if (ValTy->isIntegerTy() && ISD == ISD::SETCC && I &&
-      !CmpInst::isUnsigned(VecPred) &&
+  // comparison is not unsigned. FIXME: Enable for non-throughput cost kinds
+  // providing it will not cause performance regressions.
+  if (CostKind == TTI::TCK_RecipThroughput && ValTy->isIntegerTy() &&
+      ISD == ISD::SETCC && I && !CmpInst::isUnsigned(VecPred) &&
       TLI->isTypeLegal(TLI->getValueType(DL, ValTy)) &&
       match(I->getOperand(0), m_And(m_Value(), m_Value()))) {
     if (match(I->getOperand(1), m_Zero()))
diff --git a/llvm/test/Analysis/CostModel/AArch64/cmp.ll b/llvm/test/Analysis/CostModel/AArch64/cmp.ll
@@ -17,10 +17,10 @@ define void @cmps() {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %cf32 = fcmp ogt float undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %cf64 = fcmp ogt double undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %cbf64 = fcmp ogt bfloat undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cfv816 = fcmp olt <8 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cfv816 = fcmp olt <8 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %cfv432 = fcmp oge <4 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %cfv264 = fcmp oge <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cbfv816 = fcmp olt <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cbfv816 = fcmp olt <8 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %c8 = icmp slt i8 undef, undef
diff --git a/llvm/test/Analysis/CostModel/AArch64/select.ll b/llvm/test/Analysis/CostModel/AArch64/select.ll
@@ -11,17 +11,17 @@ define void @select() {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4 = select i1 undef, i64 undef, i64 undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v5 = select i1 undef, float undef, float undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v6 = select i1 undef, double undef, double undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:80 CodeSize:1 Lat:1 SizeLat:1 for: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:160 CodeSize:1 Lat:1 SizeLat:1 for: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:320 CodeSize:1 Lat:1 SizeLat:1 for: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v2float = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4float = select <4 x i1> undef, <4 x float> undef, <4 x float> undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v2double = select <2 x i1> undef, <2 x double> undef, <2 x double> undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4half = select <4 x i1> undef, <4 x half> undef, <4 x half> undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8half = select <8 x i1> undef, <8 x half> undef, <8 x half> undef
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef
+; CHECK-NEXT:  Cost Model: Found costs of 80 for: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
+; CHECK-NEXT:  Cost Model: Found costs of 160 for: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
+; CHECK-NEXT:  Cost Model: Found costs of 320 for: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v2float = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4float = select <4 x i1> undef, <4 x float> undef, <4 x float> undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v2double = select <2 x i1> undef, <2 x double> undef, <2 x double> undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4half = select <4 x i1> undef, <4 x half> undef, <4 x half> undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8half = select <8 x i1> undef, <8 x half> undef, <8 x half> undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v1 = select i1 undef, i8 undef, i8 undef
diff --git a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll