-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[IA] Support vp.store in lowerinterleavedStore #149605
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
29b4c2d
3badd33
f58cc3a
fb72d2c
30f7856
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -266,22 +266,28 @@ bool RISCVTargetLowering::lowerInterleavedLoad( | |
/// | ||
/// Note that the new shufflevectors will be removed and we'll only generate one | ||
/// vsseg3 instruction in CodeGen. | ||
bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI, | ||
bool RISCVTargetLowering::lowerInterleavedStore(Instruction *Store, | ||
Value *LaneMask, | ||
ShuffleVectorInst *SVI, | ||
unsigned Factor) const { | ||
IRBuilder<> Builder(SI); | ||
const DataLayout &DL = SI->getDataLayout(); | ||
IRBuilder<> Builder(Store); | ||
const DataLayout &DL = Store->getDataLayout(); | ||
auto Mask = SVI->getShuffleMask(); | ||
auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType()); | ||
// Given SVI : <n*factor x ty>, then VTy : <n x ty> | ||
auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(), | ||
ShuffleVTy->getNumElements() / Factor); | ||
if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(), | ||
SI->getPointerAddressSpace(), DL)) | ||
auto *XLenTy = Type::getIntNTy(Store->getContext(), Subtarget.getXLen()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have this same pattern in a few cases in this file, I'll do this as a post-commit and get them all. |
||
|
||
Value *Ptr, *VL; | ||
Align Alignment; | ||
if (!getMemOperands(Factor, VTy, XLenTy, Store, Ptr, LaneMask, VL, Alignment)) | ||
return false; | ||
|
||
auto *PtrTy = SI->getPointerOperandType(); | ||
auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen()); | ||
Type *PtrTy = Ptr->getType(); | ||
unsigned AS = PtrTy->getPointerAddressSpace(); | ||
if (!isLegalInterleavedAccessType(VTy, Factor, Alignment, AS, DL)) | ||
return false; | ||
|
||
unsigned Index; | ||
// If the segment store only has one active lane (i.e. the interleave is | ||
|
@@ -292,27 +298,27 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI, | |
unsigned ScalarSizeInBytes = | ||
DL.getTypeStoreSize(ShuffleVTy->getElementType()); | ||
Value *Data = SVI->getOperand(0); | ||
auto *DataVTy = cast<FixedVectorType>(Data->getType()); | ||
Data = Builder.CreateExtractVector(VTy, Data, uint64_t(0)); | ||
Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes); | ||
Value *Offset = ConstantInt::get(XLenTy, Index * ScalarSizeInBytes); | ||
Value *BasePtr = Builder.CreatePtrAdd(SI->getPointerOperand(), Offset); | ||
Value *Mask = Builder.getAllOnesMask(DataVTy->getElementCount()); | ||
Value *VL = Builder.CreateElementCount(Builder.getInt32Ty(), | ||
VTy->getElementCount()); | ||
|
||
CallInst *CI = Builder.CreateIntrinsic( | ||
Intrinsic::experimental_vp_strided_store, | ||
{Data->getType(), BasePtr->getType(), Stride->getType()}, | ||
{Data, BasePtr, Stride, Mask, VL}); | ||
Align Alignment = commonAlignment(SI->getAlign(), Index * ScalarSizeInBytes); | ||
CI->addParamAttr( | ||
1, Attribute::getWithAlignment(CI->getContext(), Alignment)); | ||
Value *BasePtr = Builder.CreatePtrAdd(Ptr, Offset); | ||
// Note: Same VL as above, but i32 not xlen due to signature of | ||
// vp.strided.store | ||
VL = Builder.CreateElementCount(Builder.getInt32Ty(), | ||
VTy->getElementCount()); | ||
|
||
CallInst *CI = | ||
Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_store, | ||
{VTy, BasePtr->getType(), Stride->getType()}, | ||
{Data, BasePtr, Stride, LaneMask, VL}); | ||
Alignment = commonAlignment(Alignment, Index * ScalarSizeInBytes); | ||
CI->addParamAttr(1, | ||
Attribute::getWithAlignment(CI->getContext(), Alignment)); | ||
return true; | ||
} | ||
|
||
Function *VssegNFunc = Intrinsic::getOrInsertDeclaration( | ||
SI->getModule(), FixedVssegIntrIds[Factor - 2], {VTy, PtrTy, XLenTy}); | ||
Store->getModule(), FixedVssegIntrIds[Factor - 2], {VTy, PtrTy, XLenTy}); | ||
|
||
SmallVector<Value *, 10> Ops; | ||
SmallVector<int, 16> NewShuffleMask; | ||
|
@@ -328,13 +334,7 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI, | |
|
||
NewShuffleMask.clear(); | ||
} | ||
// This VL should be OK (should be executable in one vsseg instruction, | ||
// potentially under larger LMULs) because we checked that the fixed vector | ||
// type fits in isLegalInterleavedAccessType | ||
Value *VL = Builder.CreateElementCount(XLenTy, VTy->getElementCount()); | ||
Value *StoreMask = Builder.getAllOnesMask(VTy->getElementCount()); | ||
Ops.append({SI->getPointerOperand(), StoreMask, VL}); | ||
|
||
Ops.append({Ptr, LaneMask, VL}); | ||
Builder.CreateCall(VssegNFunc, Ops); | ||
|
||
return true; | ||
|
@@ -457,91 +457,3 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore( | |
Builder.CreateCall(VssegNFunc, Operands); | ||
return true; | ||
} | ||
|
||
/// Lower an interleaved vp.store into a vssegN intrinsic. | ||
/// | ||
/// E.g. Lower an interleaved vp.store (Factor = 2): | ||
/// | ||
/// %is = tail call <vscale x 64 x i8> | ||
/// @llvm.vector.interleave2.nxv64i8( | ||
/// <vscale x 32 x i8> %load0, | ||
/// <vscale x 32 x i8> %load1 | ||
/// %wide.rvl = shl nuw nsw i32 %rvl, 1 | ||
/// tail call void @llvm.vp.store.nxv64i8.p0( | ||
/// <vscale x 64 x i8> %is, ptr %ptr, | ||
/// %mask, | ||
/// i32 %wide.rvl) | ||
/// | ||
/// Into: | ||
/// call void @llvm.riscv.vsseg2.mask.nxv32i8.i64( | ||
/// <vscale x 32 x i8> %load1, | ||
/// <vscale x 32 x i8> %load2, ptr %ptr, | ||
/// %mask, | ||
/// i64 %rvl) | ||
bool RISCVTargetLowering::lowerInterleavedVPStore( | ||
VPIntrinsic *Store, Value *Mask, | ||
ArrayRef<Value *> InterleaveOperands) const { | ||
assert(Mask && "Expect a valid mask"); | ||
assert(Store->getIntrinsicID() == Intrinsic::vp_store && | ||
"Unexpected intrinsic"); | ||
|
||
const unsigned Factor = InterleaveOperands.size(); | ||
|
||
auto *VTy = dyn_cast<VectorType>(InterleaveOperands[0]->getType()); | ||
if (!VTy) | ||
return false; | ||
|
||
const DataLayout &DL = Store->getDataLayout(); | ||
Align Alignment = Store->getParamAlign(1).value_or( | ||
DL.getABITypeAlign(VTy->getElementType())); | ||
if (!isLegalInterleavedAccessType( | ||
VTy, Factor, Alignment, | ||
Store->getArgOperand(1)->getType()->getPointerAddressSpace(), DL)) | ||
return false; | ||
|
||
IRBuilder<> Builder(Store); | ||
Value *WideEVL = Store->getArgOperand(3); | ||
// Conservatively check if EVL is a multiple of factor, otherwise some | ||
// (trailing) elements might be lost after the transformation. | ||
if (!isMultipleOfN(WideEVL, Store->getDataLayout(), Factor)) | ||
return false; | ||
|
||
auto *PtrTy = Store->getArgOperand(1)->getType(); | ||
auto *XLenTy = Type::getIntNTy(Store->getContext(), Subtarget.getXLen()); | ||
auto *FactorC = ConstantInt::get(WideEVL->getType(), Factor); | ||
Value *EVL = | ||
Builder.CreateZExt(Builder.CreateExactUDiv(WideEVL, FactorC), XLenTy); | ||
|
||
if (isa<FixedVectorType>(VTy)) { | ||
SmallVector<Value *, 8> Operands(InterleaveOperands); | ||
Operands.append({Store->getArgOperand(1), Mask, EVL}); | ||
Builder.CreateIntrinsic(FixedVssegIntrIds[Factor - 2], | ||
{VTy, PtrTy, XLenTy}, Operands); | ||
return true; | ||
} | ||
|
||
unsigned SEW = DL.getTypeSizeInBits(VTy->getElementType()); | ||
unsigned NumElts = VTy->getElementCount().getKnownMinValue(); | ||
Type *VecTupTy = TargetExtType::get( | ||
Store->getContext(), "riscv.vector.tuple", | ||
ScalableVectorType::get(Type::getInt8Ty(Store->getContext()), | ||
NumElts * SEW / 8), | ||
Factor); | ||
|
||
Function *VecInsertFunc = Intrinsic::getOrInsertDeclaration( | ||
Store->getModule(), Intrinsic::riscv_tuple_insert, {VecTupTy, VTy}); | ||
Value *StoredVal = PoisonValue::get(VecTupTy); | ||
for (unsigned i = 0; i < Factor; ++i) | ||
StoredVal = Builder.CreateCall( | ||
VecInsertFunc, {StoredVal, InterleaveOperands[i], Builder.getInt32(i)}); | ||
|
||
Function *VssegNFunc = Intrinsic::getOrInsertDeclaration( | ||
Store->getModule(), ScalableVssegIntrIds[Factor - 2], | ||
{VecTupTy, PtrTy, Mask->getType(), EVL->getType()}); | ||
|
||
Value *Operands[] = {StoredVal, Store->getArgOperand(1), Mask, EVL, | ||
ConstantInt::get(XLenTy, Log2_64(SEW))}; | ||
|
||
Builder.CreateCall(VssegNFunc, Operands); | ||
return true; | ||
} |
Uh oh!
There was an error while loading. Please reload this page.