Skip to content

[CostModel] Provide a default model for histogram intrinsics #149348

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -878,9 +878,6 @@ class TargetTransformInfoImplBase {
switch (ICA.getID()) {
default:
break;
case Intrinsic::experimental_vector_histogram_add:
// For now, we want explicit support from the target for histograms.
return InstructionCost::getInvalid();
case Intrinsic::allow_runtime_check:
case Intrinsic::allow_ubsan_check:
case Intrinsic::annotation:
Expand Down
49 changes: 49 additions & 0 deletions llvm/include/llvm/CodeGen/BasicTTIImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -2110,6 +2110,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
}
case Intrinsic::get_active_lane_mask:
case Intrinsic::experimental_vector_match:
case Intrinsic::experimental_vector_histogram_add:
case Intrinsic::experimental_vector_histogram_uadd_sat:
case Intrinsic::experimental_vector_histogram_umax:
case Intrinsic::experimental_vector_histogram_umin:
return thisT()->getTypeBasedIntrinsicInstrCost(ICA, CostKind);
case Intrinsic::modf:
case Intrinsic::sincos:
Expand Down Expand Up @@ -2458,6 +2462,51 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
thisT()->getArithmeticInstrCost(BinaryOperator::And, RetTy, CostKind);
return Cost;
}
case Intrinsic::experimental_vector_histogram_add:
case Intrinsic::experimental_vector_histogram_uadd_sat:
case Intrinsic::experimental_vector_histogram_umax:
case Intrinsic::experimental_vector_histogram_umin: {
FixedVectorType *PtrsTy = dyn_cast<FixedVectorType>(ICA.getArgTypes()[0]);
Type *EltTy = ICA.getArgTypes()[1];

// Targets with scalable vectors must handle this on their own.
if (!PtrsTy)
return InstructionCost::getInvalid();

Align Alignment = thisT()->DL.getABITypeAlign(EltTy);
InstructionCost Cost = 0;
Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, PtrsTy,
CostKind, 1, nullptr, nullptr);
Cost += thisT()->getMemoryOpCost(Instruction::Load, EltTy, Alignment, 0,
CostKind);
switch (IID) {
default:
llvm_unreachable("Unhandled histogram update operation.");
case Intrinsic::experimental_vector_histogram_add:
Cost +=
thisT()->getArithmeticInstrCost(Instruction::Add, EltTy, CostKind);
break;
case Intrinsic::experimental_vector_histogram_uadd_sat: {
IntrinsicCostAttributes UAddSat(Intrinsic::uadd_sat, EltTy, {EltTy});
Cost += thisT()->getIntrinsicInstrCost(UAddSat, CostKind);
break;
}
case Intrinsic::experimental_vector_histogram_umax: {
IntrinsicCostAttributes UMax(Intrinsic::umax, EltTy, {EltTy});
Cost += thisT()->getIntrinsicInstrCost(UMax, CostKind);
break;
}
case Intrinsic::experimental_vector_histogram_umin: {
IntrinsicCostAttributes UMin(Intrinsic::umin, EltTy, {EltTy});
Cost += thisT()->getIntrinsicInstrCost(UMin, CostKind);
break;
}
}
Cost += thisT()->getMemoryOpCost(Instruction::Store, EltTy, Alignment, 0,
CostKind);
Cost *= PtrsTy->getNumElements();
return Cost;
}
case Intrinsic::get_active_lane_mask: {
Type *ArgTy = ICA.getArgTypes()[0];
EVT ResVT = getTLI()->getValueType(DL, RetTy, true);
Expand Down
27 changes: 21 additions & 6 deletions llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -547,7 +547,17 @@ static bool isUnpackedVectorVT(EVT VecVT) {
VecVT.getSizeInBits().getKnownMinValue() < AArch64::SVEBitsPerBlock;
}

static InstructionCost getHistogramCost(const IntrinsicCostAttributes &ICA) {
static InstructionCost getHistogramCost(const AArch64Subtarget *ST,
const IntrinsicCostAttributes &ICA) {
// We need to know at least the number of elements in the vector of buckets
// and the size of each element to update.
if (ICA.getArgTypes().size() < 2)
return InstructionCost::getInvalid();

// Only interested in costing for the hardware instruction from SVE2.
if (!ST->hasSVE2())
return InstructionCost::getInvalid();

Type *BucketPtrsTy = ICA.getArgTypes()[0]; // Type of vector of pointers
Type *EltTy = ICA.getArgTypes()[1]; // Type of bucket elements
unsigned TotalHistCnts = 1;
Expand All @@ -572,9 +582,11 @@ static InstructionCost getHistogramCost(const IntrinsicCostAttributes &ICA) {

unsigned NaturalVectorWidth = AArch64::SVEBitsPerBlock / LegalEltSize;
TotalHistCnts = EC / NaturalVectorWidth;

return InstructionCost(BaseHistCntCost * TotalHistCnts);
}

return InstructionCost(BaseHistCntCost * TotalHistCnts);
return InstructionCost::getInvalid();
}

InstructionCost
Expand All @@ -590,10 +602,13 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
return InstructionCost::getInvalid();

switch (ICA.getID()) {
case Intrinsic::experimental_vector_histogram_add:
if (!ST->hasSVE2())
return InstructionCost::getInvalid();
return getHistogramCost(ICA);
case Intrinsic::experimental_vector_histogram_add: {
InstructionCost HistCost = getHistogramCost(ST, ICA);
// If the cost isn't valid, we may still be able to scalarize
if (HistCost.isValid())
return HistCost;
break;
}
case Intrinsic::umin:
case Intrinsic::umax:
case Intrinsic::smin:
Expand Down
Loading