diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index ddc8a5eaffa94..a56afe05f66f6 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -878,9 +878,6 @@ class TargetTransformInfoImplBase { switch (ICA.getID()) { default: break; - case Intrinsic::experimental_vector_histogram_add: - // For now, we want explicit support from the target for histograms. - return InstructionCost::getInvalid(); case Intrinsic::allow_runtime_check: case Intrinsic::allow_ubsan_check: case Intrinsic::annotation: diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 1d7c41452f7d5..a43a304e50187 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2110,6 +2110,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { } case Intrinsic::get_active_lane_mask: case Intrinsic::experimental_vector_match: + case Intrinsic::experimental_vector_histogram_add: + case Intrinsic::experimental_vector_histogram_uadd_sat: + case Intrinsic::experimental_vector_histogram_umax: + case Intrinsic::experimental_vector_histogram_umin: return thisT()->getTypeBasedIntrinsicInstrCost(ICA, CostKind); case Intrinsic::modf: case Intrinsic::sincos: @@ -2458,6 +2462,51 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { thisT()->getArithmeticInstrCost(BinaryOperator::And, RetTy, CostKind); return Cost; } + case Intrinsic::experimental_vector_histogram_add: + case Intrinsic::experimental_vector_histogram_uadd_sat: + case Intrinsic::experimental_vector_histogram_umax: + case Intrinsic::experimental_vector_histogram_umin: { + FixedVectorType *PtrsTy = dyn_cast(ICA.getArgTypes()[0]); + Type *EltTy = ICA.getArgTypes()[1]; + + // Targets with scalable vectors must handle this on their own. + if (!PtrsTy) + return InstructionCost::getInvalid(); + + Align Alignment = thisT()->DL.getABITypeAlign(EltTy); + InstructionCost Cost = 0; + Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, PtrsTy, + CostKind, 1, nullptr, nullptr); + Cost += thisT()->getMemoryOpCost(Instruction::Load, EltTy, Alignment, 0, + CostKind); + switch (IID) { + default: + llvm_unreachable("Unhandled histogram update operation."); + case Intrinsic::experimental_vector_histogram_add: + Cost += + thisT()->getArithmeticInstrCost(Instruction::Add, EltTy, CostKind); + break; + case Intrinsic::experimental_vector_histogram_uadd_sat: { + IntrinsicCostAttributes UAddSat(Intrinsic::uadd_sat, EltTy, {EltTy}); + Cost += thisT()->getIntrinsicInstrCost(UAddSat, CostKind); + break; + } + case Intrinsic::experimental_vector_histogram_umax: { + IntrinsicCostAttributes UMax(Intrinsic::umax, EltTy, {EltTy}); + Cost += thisT()->getIntrinsicInstrCost(UMax, CostKind); + break; + } + case Intrinsic::experimental_vector_histogram_umin: { + IntrinsicCostAttributes UMin(Intrinsic::umin, EltTy, {EltTy}); + Cost += thisT()->getIntrinsicInstrCost(UMin, CostKind); + break; + } + } + Cost += thisT()->getMemoryOpCost(Instruction::Store, EltTy, Alignment, 0, + CostKind); + Cost *= PtrsTy->getNumElements(); + return Cost; + } case Intrinsic::get_active_lane_mask: { Type *ArgTy = ICA.getArgTypes()[0]; EVT ResVT = getTLI()->getValueType(DL, RetTy, true); diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 90d3d92d6bbf5..5b29da919c6da 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -547,7 +547,17 @@ static bool isUnpackedVectorVT(EVT VecVT) { VecVT.getSizeInBits().getKnownMinValue() < AArch64::SVEBitsPerBlock; } -static InstructionCost getHistogramCost(const IntrinsicCostAttributes &ICA) { +static InstructionCost getHistogramCost(const AArch64Subtarget *ST, + const IntrinsicCostAttributes &ICA) { + // We need to know at least the number of elements in the vector of buckets + // and the size of each element to update. + if (ICA.getArgTypes().size() < 2) + return InstructionCost::getInvalid(); + + // Only interested in costing for the hardware instruction from SVE2. + if (!ST->hasSVE2()) + return InstructionCost::getInvalid(); + Type *BucketPtrsTy = ICA.getArgTypes()[0]; // Type of vector of pointers Type *EltTy = ICA.getArgTypes()[1]; // Type of bucket elements unsigned TotalHistCnts = 1; @@ -572,9 +582,11 @@ static InstructionCost getHistogramCost(const IntrinsicCostAttributes &ICA) { unsigned NaturalVectorWidth = AArch64::SVEBitsPerBlock / LegalEltSize; TotalHistCnts = EC / NaturalVectorWidth; + + return InstructionCost(BaseHistCntCost * TotalHistCnts); } - return InstructionCost(BaseHistCntCost * TotalHistCnts); + return InstructionCost::getInvalid(); } InstructionCost @@ -590,10 +602,13 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, return InstructionCost::getInvalid(); switch (ICA.getID()) { - case Intrinsic::experimental_vector_histogram_add: - if (!ST->hasSVE2()) - return InstructionCost::getInvalid(); - return getHistogramCost(ICA); + case Intrinsic::experimental_vector_histogram_add: { + InstructionCost HistCost = getHistogramCost(ST, ICA); + // If the cost isn't valid, we may still be able to scalarize + if (HistCost.isValid()) + return HistCost; + break; + } case Intrinsic::umin: case Intrinsic::umax: case Intrinsic::smin: diff --git a/llvm/test/Analysis/CostModel/AArch64/histograms.ll b/llvm/test/Analysis/CostModel/AArch64/histograms.ll new file mode 100644 index 0000000000000..c0489587551b0 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AArch64/histograms.ll @@ -0,0 +1,178 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes="print" -cost-kind=throughput 2>&1 -disable-output -S -mtriple=aarch64--linux-gnu | FileCheck %s --check-prefix=CHECK-NEON +; RUN: opt < %s -passes="print" -cost-kind=throughput 2>&1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sve | FileCheck %s --check-prefix=CHECK-SVE +; RUN: opt < %s -passes="print" -cost-kind=throughput 2>&1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sve2 | FileCheck %s --check-prefix=CHECK-SVE2 + +define void @histograms() { +; CHECK-NEON-LABEL: 'histograms' +; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv2p0.i64( poison, i64 1, poison) +; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32( poison, i32 1, poison) +; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16( poison, i16 1, poison) +; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8( poison, i8 1, poison) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison) +; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv2p0.i64( poison, i64 1, poison) +; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv4p0.i32( poison, i32 1, poison) +; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv8p0.i16( poison, i16 1, poison) +; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv16p0.i8( poison, i8 1, poison) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison) +; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv2p0.i64( poison, i64 1, poison) +; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv4p0.i32( poison, i32 1, poison) +; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv8p0.i16( poison, i16 1, poison) +; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv16p0.i8( poison, i8 1, poison) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.umax.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.umax.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.experimental.vector.histogram.umax.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.experimental.vector.histogram.umax.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison) +; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv2p0.i64( poison, i64 1, poison) +; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv4p0.i32( poison, i32 1, poison) +; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv8p0.i16( poison, i16 1, poison) +; CHECK-NEON-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv16p0.i8( poison, i8 1, poison) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.umin.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.umin.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.experimental.vector.histogram.umin.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.experimental.vector.histogram.umin.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-SVE-LABEL: 'histograms' +; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv2p0.i64( poison, i64 1, poison) +; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32( poison, i32 1, poison) +; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16( poison, i16 1, poison) +; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8( poison, i8 1, poison) +; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison) +; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison) +; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison) +; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison) +; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv2p0.i64( poison, i64 1, poison) +; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv4p0.i32( poison, i32 1, poison) +; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv8p0.i16( poison, i16 1, poison) +; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv16p0.i8( poison, i8 1, poison) +; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison) +; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison) +; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison) +; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison) +; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv2p0.i64( poison, i64 1, poison) +; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv4p0.i32( poison, i32 1, poison) +; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv8p0.i16( poison, i16 1, poison) +; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv16p0.i8( poison, i8 1, poison) +; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.umax.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison) +; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.umax.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison) +; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.experimental.vector.histogram.umax.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison) +; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.experimental.vector.histogram.umax.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison) +; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv2p0.i64( poison, i64 1, poison) +; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv4p0.i32( poison, i32 1, poison) +; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv8p0.i16( poison, i16 1, poison) +; CHECK-SVE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv16p0.i8( poison, i8 1, poison) +; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.umin.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison) +; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.umin.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison) +; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.experimental.vector.histogram.umin.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison) +; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.experimental.vector.histogram.umin.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison) +; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-SVE2-LABEL: 'histograms' +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vector.histogram.add.nxv2p0.i64( poison, i64 1, poison) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32( poison, i32 1, poison) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16( poison, i16 1, poison) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8( poison, i8 1, poison) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison) +; CHECK-SVE2-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv2p0.i64( poison, i64 1, poison) +; CHECK-SVE2-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv4p0.i32( poison, i32 1, poison) +; CHECK-SVE2-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv8p0.i16( poison, i16 1, poison) +; CHECK-SVE2-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv16p0.i8( poison, i8 1, poison) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison) +; CHECK-SVE2-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv2p0.i64( poison, i64 1, poison) +; CHECK-SVE2-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv4p0.i32( poison, i32 1, poison) +; CHECK-SVE2-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv8p0.i16( poison, i16 1, poison) +; CHECK-SVE2-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv16p0.i8( poison, i8 1, poison) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.umax.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.umax.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.experimental.vector.histogram.umax.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.experimental.vector.histogram.umax.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison) +; CHECK-SVE2-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv2p0.i64( poison, i64 1, poison) +; CHECK-SVE2-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv4p0.i32( poison, i32 1, poison) +; CHECK-SVE2-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv8p0.i16( poison, i16 1, poison) +; CHECK-SVE2-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv16p0.i8( poison, i8 1, poison) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.umin.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.umin.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.experimental.vector.histogram.umin.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.experimental.vector.histogram.umin.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + call void @llvm.experimental.vector.histogram.add.nxv2p0.i64( poison, i64 1, poison) + call void @llvm.experimental.vector.histogram.add.nxv4p0.i32( poison, i32 1, poison) + call void @llvm.experimental.vector.histogram.add.nxv8p0.i16( poison, i16 1, poison) + call void @llvm.experimental.vector.histogram.add.nxv16p0.i8( poison, i8 1, poison) + call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison) + call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison) + call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison) + call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison) + call void @llvm.experimental.vector.histogram.uadd.sat.nxv2p0.i64( poison, i64 1, poison) + call void @llvm.experimental.vector.histogram.uadd.sat.nxv4p0.i32( poison, i32 1, poison) + call void @llvm.experimental.vector.histogram.uadd.sat.nxv8p0.i16( poison, i16 1, poison) + call void @llvm.experimental.vector.histogram.uadd.sat.nxv16p0.i8( poison, i8 1, poison) + call void @llvm.experimental.vector.histogram.uadd.sat.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison) + call void @llvm.experimental.vector.histogram.uadd.sat.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison) + call void @llvm.experimental.vector.histogram.uadd.sat.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison) + call void @llvm.experimental.vector.histogram.uadd.sat.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison) + call void @llvm.experimental.vector.histogram.umax.nxv2p0.i64( poison, i64 1, poison) + call void @llvm.experimental.vector.histogram.umax.nxv4p0.i32( poison, i32 1, poison) + call void @llvm.experimental.vector.histogram.umax.nxv8p0.i16( poison, i16 1, poison) + call void @llvm.experimental.vector.histogram.umax.nxv16p0.i8( poison, i8 1, poison) + call void @llvm.experimental.vector.histogram.umax.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison) + call void @llvm.experimental.vector.histogram.umax.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison) + call void @llvm.experimental.vector.histogram.umax.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison) + call void @llvm.experimental.vector.histogram.umax.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison) + call void @llvm.experimental.vector.histogram.umin.nxv2p0.i64( poison, i64 1, poison) + call void @llvm.experimental.vector.histogram.umin.nxv4p0.i32( poison, i32 1, poison) + call void @llvm.experimental.vector.histogram.umin.nxv8p0.i16( poison, i16 1, poison) + call void @llvm.experimental.vector.histogram.umin.nxv16p0.i8( poison, i8 1, poison) + call void @llvm.experimental.vector.histogram.umin.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison) + call void @llvm.experimental.vector.histogram.umin.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison) + call void @llvm.experimental.vector.histogram.umin.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison) + call void @llvm.experimental.vector.histogram.umin.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison) + ret void +} + +declare void @llvm.experimental.vector.histogram.add.nxv2p0.i64(, i64, ) +declare void @llvm.experimental.vector.histogram.add.nxv4p0.i32(, i32, ) +declare void @llvm.experimental.vector.histogram.add.nxv8p0.i16(, i16, ) +declare void @llvm.experimental.vector.histogram.add.nxv16p0.i8(, i8, ) +declare void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr>, i64, <2 x i1>) +declare void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr>, i32, <4 x i1>) +declare void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr>, i16, <8 x i1>) +declare void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr>, i8, <16 x i1>) +declare void @llvm.experimental.vector.histogram.uadd.sat.nxv2p0.i64(, i64, ) +declare void @llvm.experimental.vector.histogram.uadd.sat.nxv4p0.i32(, i32, ) +declare void @llvm.experimental.vector.histogram.uadd.sat.nxv8p0.i16(, i16, ) +declare void @llvm.experimental.vector.histogram.uadd.sat.nxv16p0.i8(, i8, ) +declare void @llvm.experimental.vector.histogram.uadd.sat.v2p0.i64(<2 x ptr>, i64, <2 x i1>) +declare void @llvm.experimental.vector.histogram.uadd.sat.v4p0.i32(<4 x ptr>, i32, <4 x i1>) +declare void @llvm.experimental.vector.histogram.uadd.sat.v8p0.i16(<8 x ptr>, i16, <8 x i1>) +declare void @llvm.experimental.vector.histogram.uadd.sat.v16p0.i8(<16 x ptr>, i8, <16 x i1>) +declare void @llvm.experimental.vector.histogram.umax.nxv2p0.i64(, i64, ) +declare void @llvm.experimental.vector.histogram.umax.nxv4p0.i32(, i32, ) +declare void @llvm.experimental.vector.histogram.umax.nxv8p0.i16(, i16, ) +declare void @llvm.experimental.vector.histogram.umax.nxv16p0.i8(, i8, ) +declare void @llvm.experimental.vector.histogram.umax.v2p0.i64(<2 x ptr>, i64, <2 x i1>) +declare void @llvm.experimental.vector.histogram.umax.v4p0.i32(<4 x ptr>, i32, <4 x i1>) +declare void @llvm.experimental.vector.histogram.umax.v8p0.i16(<8 x ptr>, i16, <8 x i1>) +declare void @llvm.experimental.vector.histogram.umax.v16p0.i8(<16 x ptr>, i8, <16 x i1>) +declare void @llvm.experimental.vector.histogram.umin.nxv2p0.i64(, i64, ) +declare void @llvm.experimental.vector.histogram.umin.nxv4p0.i32(, i32, ) +declare void @llvm.experimental.vector.histogram.umin.nxv8p0.i16(, i16, ) +declare void @llvm.experimental.vector.histogram.umin.nxv16p0.i8(, i8, ) +declare void @llvm.experimental.vector.histogram.umin.v2p0.i64(<2 x ptr>, i64, <2 x i1>) +declare void @llvm.experimental.vector.histogram.umin.v4p0.i32(<4 x ptr>, i32, <4 x i1>) +declare void @llvm.experimental.vector.histogram.umin.v8p0.i16(<8 x ptr>, i16, <8 x i1>) +declare void @llvm.experimental.vector.histogram.umin.v16p0.i8(<16 x ptr>, i8, <16 x i1>) diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll index ee485e2f861b4..abd6e50f771b3 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll @@ -1277,15 +1277,15 @@ define void @histogram_nxv16i8( %buckets, define void @histogram_v2i64(<2 x ptr> %buckets, <2 x i1> %mask) { ; CHECK-VSCALE-1-LABEL: 'histogram_v2i64' -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> %buckets, i64 1, <2 x i1> %mask) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:10 CodeSize:8 Lat:10 SizeLat:10 for: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> %buckets, i64 1, <2 x i1> %mask) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-VSCALE-2-LABEL: 'histogram_v2i64' -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> %buckets, i64 1, <2 x i1> %mask) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:8 Lat:10 SizeLat:10 for: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> %buckets, i64 1, <2 x i1> %mask) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; TYPE_BASED_ONLY-LABEL: 'histogram_v2i64' -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> %buckets, i64 1, <2 x i1> %mask) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:10 CodeSize:8 Lat:10 SizeLat:10 for: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> %buckets, i64 1, <2 x i1> %mask) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> %buckets, i64 1, <2 x i1> %mask) @@ -1294,15 +1294,15 @@ define void @histogram_v2i64(<2 x ptr> %buckets, <2 x i1> %mask) { define void @histogram_v4i32(<4 x ptr> %buckets, <4 x i1> %mask) { ; CHECK-VSCALE-1-LABEL: 'histogram_v4i32' -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> %buckets, i32 1, <4 x i1> %mask) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:20 CodeSize:16 Lat:20 SizeLat:20 for: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> %buckets, i32 1, <4 x i1> %mask) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-VSCALE-2-LABEL: 'histogram_v4i32' -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> %buckets, i32 1, <4 x i1> %mask) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:16 Lat:20 SizeLat:20 for: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> %buckets, i32 1, <4 x i1> %mask) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; TYPE_BASED_ONLY-LABEL: 'histogram_v4i32' -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> %buckets, i32 1, <4 x i1> %mask) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:20 CodeSize:16 Lat:20 SizeLat:20 for: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> %buckets, i32 1, <4 x i1> %mask) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> %buckets, i32 1, <4 x i1> %mask) @@ -1311,15 +1311,15 @@ define void @histogram_v4i32(<4 x ptr> %buckets, <4 x i1> %mask) { define void @histogram_v8i16(<8 x ptr> %buckets, <8 x i1> %mask) { ; CHECK-VSCALE-1-LABEL: 'histogram_v8i16' -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> %buckets, i16 1, <8 x i1> %mask) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:40 CodeSize:32 Lat:40 SizeLat:40 for: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> %buckets, i16 1, <8 x i1> %mask) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-VSCALE-2-LABEL: 'histogram_v8i16' -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> %buckets, i16 1, <8 x i1> %mask) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:40 CodeSize:32 Lat:40 SizeLat:40 for: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> %buckets, i16 1, <8 x i1> %mask) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; TYPE_BASED_ONLY-LABEL: 'histogram_v8i16' -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> %buckets, i16 1, <8 x i1> %mask) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:40 CodeSize:32 Lat:40 SizeLat:40 for: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> %buckets, i16 1, <8 x i1> %mask) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> %buckets, i16 1, <8 x i1> %mask) @@ -1328,15 +1328,15 @@ define void @histogram_v8i16(<8 x ptr> %buckets, <8 x i1> %mask) { define void @histogram_v16i8(<16 x ptr> %buckets, <16 x i1> %mask) { ; CHECK-VSCALE-1-LABEL: 'histogram_v16i8' -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> %buckets, i8 1, <16 x i1> %mask) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:80 CodeSize:64 Lat:80 SizeLat:80 for: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> %buckets, i8 1, <16 x i1> %mask) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-VSCALE-2-LABEL: 'histogram_v16i8' -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> %buckets, i8 1, <16 x i1> %mask) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:80 CodeSize:64 Lat:80 SizeLat:80 for: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> %buckets, i8 1, <16 x i1> %mask) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; TYPE_BASED_ONLY-LABEL: 'histogram_v16i8' -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> %buckets, i8 1, <16 x i1> %mask) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:80 CodeSize:64 Lat:80 SizeLat:80 for: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> %buckets, i8 1, <16 x i1> %mask) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; call void @llvm.experimental.vector.histogram.add.v16p0.i64(<16 x ptr> %buckets, i8 1, <16 x i1> %mask) diff --git a/llvm/test/Transforms/LoopVectorize/histograms.ll b/llvm/test/Transforms/LoopVectorize/histograms.ll index 1adc0bf0c9ec0..f0ceae7d5816b 100644 --- a/llvm/test/Transforms/LoopVectorize/histograms.ll +++ b/llvm/test/Transforms/LoopVectorize/histograms.ll @@ -1,25 +1,48 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt < %s -passes=loop-vectorize,instcombine -enable-histogram-loop-vectorization -force-vector-width=2 -S | FileCheck %s -;; Currently we don't expect this to vectorize, since the generic cost model returns -;; invalid for the histogram intrinsic. define void @simple_histogram(ptr noalias %buckets, ptr readonly %indices, i64 %N) { ; CHECK-LABEL: define void @simple_histogram( ; CHECK-SAME: ptr noalias [[BUCKETS:%.*]], ptr readonly [[INDICES:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -2 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i32> [[WIDE_LOAD]] to <2 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i32, ptr [[BUCKETS]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP1]], i64 1 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i32, ptr [[BUCKETS]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x ptr> [[TMP6]], ptr [[TMP5]], i64 1 +; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.v2p0.i32(<2 x ptr> [[TMP7]], i32 1, <2 x i1> splat (i1 true)) +; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY1:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[INDICES]], i64 [[IV]] +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], [[FOR_BODY1]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV1]] ; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP12]] to i64 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[BUCKETS]], i64 [[IDXPROM1]] ; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1 ; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]] +; CHECK-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY1]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: for.exit: ; CHECK-NEXT: ret void ;