@@ -2105,6 +2105,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
2105
2105
}
2106
2106
case Intrinsic::get_active_lane_mask:
2107
2107
case Intrinsic::experimental_vector_match:
2108
+ case Intrinsic::experimental_vector_histogram_add:
2108
2109
return thisT ()->getTypeBasedIntrinsicInstrCost (ICA, CostKind);
2109
2110
case Intrinsic::modf:
2110
2111
case Intrinsic::sincos:
@@ -2457,6 +2458,51 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
2457
2458
return thisT ()->getShuffleCost (TTI::SK_Reverse, cast<VectorType>(RetTy),
2458
2459
cast<VectorType>(ICA.getArgTypes ()[0 ]), {},
2459
2460
CostKind, 0 , cast<VectorType>(RetTy));
2461
+ case Intrinsic::experimental_vector_histogram_add:
2462
+ case Intrinsic::experimental_vector_histogram_uadd_sat:
2463
+ case Intrinsic::experimental_vector_histogram_umax:
2464
+ case Intrinsic::experimental_vector_histogram_umin: {
2465
+ FixedVectorType *PtrsTy = dyn_cast<FixedVectorType>(ICA.getArgTypes ()[0 ]);
2466
+ Type *EltTy = ICA.getArgTypes ()[1 ];
2467
+
2468
+ // Targets with scalable vectors must handle this on their own.
2469
+ if (!PtrsTy)
2470
+ return InstructionCost::getInvalid ();
2471
+
2472
+ Align Alignment = thisT ()->DL .getABITypeAlign (EltTy);
2473
+ InstructionCost Cost = 0 ;
2474
+ Cost += thisT ()->getVectorInstrCost (Instruction::ExtractElement, PtrsTy,
2475
+ CostKind, 1 , nullptr , nullptr );
2476
+ Cost += thisT ()->getMemoryOpCost (Instruction::Load, EltTy, Alignment, 0 ,
2477
+ CostKind);
2478
+ switch (IID) {
2479
+ default :
2480
+ llvm_unreachable (" Unhandled histogram update operation." );
2481
+ case Intrinsic::experimental_vector_histogram_add:
2482
+ Cost +=
2483
+ thisT ()->getArithmeticInstrCost (Instruction::Add, EltTy, CostKind);
2484
+ break ;
2485
+ case Intrinsic::experimental_vector_histogram_uadd_sat: {
2486
+ IntrinsicCostAttributes UAddSat (Intrinsic::uadd_sat, EltTy, {EltTy});
2487
+ Cost += thisT ()->getIntrinsicInstrCost (UAddSat, CostKind);
2488
+ break ;
2489
+ }
2490
+ case Intrinsic::experimental_vector_histogram_umax: {
2491
+ IntrinsicCostAttributes UMax (Intrinsic::umax, EltTy, {EltTy});
2492
+ Cost += thisT ()->getIntrinsicInstrCost (UMax, CostKind);
2493
+ break ;
2494
+ }
2495
+ case Intrinsic::experimental_vector_histogram_umin: {
2496
+ IntrinsicCostAttributes UMin (Intrinsic::umin, EltTy, {EltTy});
2497
+ Cost += thisT ()->getIntrinsicInstrCost (UMin, CostKind);
2498
+ break ;
2499
+ }
2500
+ }
2501
+ Cost += thisT ()->getMemoryOpCost (Instruction::Store, EltTy, Alignment, 0 ,
2502
+ CostKind);
2503
+ Cost *= PtrsTy->getNumElements ();
2504
+ return Cost;
2505
+ }
2460
2506
case Intrinsic::get_active_lane_mask: {
2461
2507
Type *ArgTy = ICA.getArgTypes ()[0 ];
2462
2508
EVT ResVT = getTLI ()->getValueType (DL, RetTy, true );
0 commit comments