@@ -266,22 +266,28 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
266
266
// /
267
267
// / Note that the new shufflevectors will be removed and we'll only generate one
268
268
// / vsseg3 instruction in CodeGen.
269
- bool RISCVTargetLowering::lowerInterleavedStore (StoreInst *SI,
269
+ bool RISCVTargetLowering::lowerInterleavedStore (Instruction *Store,
270
+ Value *LaneMask,
270
271
ShuffleVectorInst *SVI,
271
272
unsigned Factor) const {
272
- IRBuilder<> Builder (SI );
273
- const DataLayout &DL = SI ->getDataLayout ();
273
+ IRBuilder<> Builder (Store );
274
+ const DataLayout &DL = Store ->getDataLayout ();
274
275
auto Mask = SVI->getShuffleMask ();
275
276
auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType ());
276
277
// Given SVI : <n*factor x ty>, then VTy : <n x ty>
277
278
auto *VTy = FixedVectorType::get (ShuffleVTy->getElementType (),
278
279
ShuffleVTy->getNumElements () / Factor);
279
- if (!isLegalInterleavedAccessType (VTy, Factor, SI->getAlign (),
280
- SI->getPointerAddressSpace (), DL))
280
+ auto *XLenTy = Type::getIntNTy (Store->getContext (), Subtarget.getXLen ());
281
+
282
+ Value *Ptr, *VL;
283
+ Align Alignment;
284
+ if (!getMemOperands (Factor, VTy, XLenTy, Store, Ptr, LaneMask, VL, Alignment))
281
285
return false ;
282
286
283
- auto *PtrTy = SI->getPointerOperandType ();
284
- auto *XLenTy = Type::getIntNTy (SI->getContext (), Subtarget.getXLen ());
287
+ Type *PtrTy = Ptr->getType ();
288
+ unsigned AS = PtrTy->getPointerAddressSpace ();
289
+ if (!isLegalInterleavedAccessType (VTy, Factor, Alignment, AS, DL))
290
+ return false ;
285
291
286
292
unsigned Index;
287
293
// If the segment store only has one active lane (i.e. the interleave is
@@ -292,27 +298,27 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
292
298
unsigned ScalarSizeInBytes =
293
299
DL.getTypeStoreSize (ShuffleVTy->getElementType ());
294
300
Value *Data = SVI->getOperand (0 );
295
- auto *DataVTy = cast<FixedVectorType>( Data-> getType ( ));
301
+ Data = Builder. CreateExtractVector (VTy, Data, uint64_t ( 0 ));
296
302
Value *Stride = ConstantInt::get (XLenTy, Factor * ScalarSizeInBytes);
297
303
Value *Offset = ConstantInt::get (XLenTy, Index * ScalarSizeInBytes);
298
- Value *BasePtr = Builder.CreatePtrAdd (SI->getPointerOperand (), Offset);
299
- Value *Mask = Builder.getAllOnesMask (DataVTy->getElementCount ());
300
- Value *VL = Builder.CreateElementCount (Builder.getInt32Ty (),
301
- VTy->getElementCount ());
302
-
303
- CallInst *CI = Builder.CreateIntrinsic (
304
- Intrinsic::experimental_vp_strided_store,
305
- {Data->getType (), BasePtr->getType (), Stride->getType ()},
306
- {Data, BasePtr, Stride, Mask, VL});
307
- Align Alignment = commonAlignment (SI->getAlign (), Index * ScalarSizeInBytes);
308
- CI->addParamAttr (
309
- 1 , Attribute::getWithAlignment (CI->getContext (), Alignment));
304
+ Value *BasePtr = Builder.CreatePtrAdd (Ptr, Offset);
305
+ // Note: Same VL as above, but i32 not xlen due to signature of
306
+ // vp.strided.store
307
+ VL = Builder.CreateElementCount (Builder.getInt32Ty (),
308
+ VTy->getElementCount ());
310
309
310
+ CallInst *CI =
311
+ Builder.CreateIntrinsic (Intrinsic::experimental_vp_strided_store,
312
+ {VTy, BasePtr->getType (), Stride->getType ()},
313
+ {Data, BasePtr, Stride, LaneMask, VL});
314
+ Alignment = commonAlignment (Alignment, Index * ScalarSizeInBytes);
315
+ CI->addParamAttr (1 ,
316
+ Attribute::getWithAlignment (CI->getContext (), Alignment));
311
317
return true ;
312
318
}
313
319
314
320
Function *VssegNFunc = Intrinsic::getOrInsertDeclaration (
315
- SI ->getModule (), FixedVssegIntrIds[Factor - 2 ], {VTy, PtrTy, XLenTy});
321
+ Store ->getModule (), FixedVssegIntrIds[Factor - 2 ], {VTy, PtrTy, XLenTy});
316
322
317
323
SmallVector<Value *, 10 > Ops;
318
324
SmallVector<int , 16 > NewShuffleMask;
@@ -328,13 +334,7 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
328
334
329
335
NewShuffleMask.clear ();
330
336
}
331
- // This VL should be OK (should be executable in one vsseg instruction,
332
- // potentially under larger LMULs) because we checked that the fixed vector
333
- // type fits in isLegalInterleavedAccessType
334
- Value *VL = Builder.CreateElementCount (XLenTy, VTy->getElementCount ());
335
- Value *StoreMask = Builder.getAllOnesMask (VTy->getElementCount ());
336
- Ops.append ({SI->getPointerOperand (), StoreMask, VL});
337
-
337
+ Ops.append ({Ptr, LaneMask, VL});
338
338
Builder.CreateCall (VssegNFunc, Ops);
339
339
340
340
return true ;
@@ -457,91 +457,3 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(
457
457
Builder.CreateCall (VssegNFunc, Operands);
458
458
return true ;
459
459
}
460
-
461
- // / Lower an interleaved vp.store into a vssegN intrinsic.
462
- // /
463
- // / E.g. Lower an interleaved vp.store (Factor = 2):
464
- // /
465
- // / %is = tail call <vscale x 64 x i8>
466
- // / @llvm.vector.interleave2.nxv64i8(
467
- // / <vscale x 32 x i8> %load0,
468
- // / <vscale x 32 x i8> %load1
469
- // / %wide.rvl = shl nuw nsw i32 %rvl, 1
470
- // / tail call void @llvm.vp.store.nxv64i8.p0(
471
- // / <vscale x 64 x i8> %is, ptr %ptr,
472
- // / %mask,
473
- // / i32 %wide.rvl)
474
- // /
475
- // / Into:
476
- // / call void @llvm.riscv.vsseg2.mask.nxv32i8.i64(
477
- // / <vscale x 32 x i8> %load1,
478
- // / <vscale x 32 x i8> %load2, ptr %ptr,
479
- // / %mask,
480
- // / i64 %rvl)
481
- bool RISCVTargetLowering::lowerInterleavedVPStore (
482
- VPIntrinsic *Store, Value *Mask,
483
- ArrayRef<Value *> InterleaveOperands) const {
484
- assert (Mask && " Expect a valid mask" );
485
- assert (Store->getIntrinsicID () == Intrinsic::vp_store &&
486
- " Unexpected intrinsic" );
487
-
488
- const unsigned Factor = InterleaveOperands.size ();
489
-
490
- auto *VTy = dyn_cast<VectorType>(InterleaveOperands[0 ]->getType ());
491
- if (!VTy)
492
- return false ;
493
-
494
- const DataLayout &DL = Store->getDataLayout ();
495
- Align Alignment = Store->getParamAlign (1 ).value_or (
496
- DL.getABITypeAlign (VTy->getElementType ()));
497
- if (!isLegalInterleavedAccessType (
498
- VTy, Factor, Alignment,
499
- Store->getArgOperand (1 )->getType ()->getPointerAddressSpace (), DL))
500
- return false ;
501
-
502
- IRBuilder<> Builder (Store);
503
- Value *WideEVL = Store->getArgOperand (3 );
504
- // Conservatively check if EVL is a multiple of factor, otherwise some
505
- // (trailing) elements might be lost after the transformation.
506
- if (!isMultipleOfN (WideEVL, Store->getDataLayout (), Factor))
507
- return false ;
508
-
509
- auto *PtrTy = Store->getArgOperand (1 )->getType ();
510
- auto *XLenTy = Type::getIntNTy (Store->getContext (), Subtarget.getXLen ());
511
- auto *FactorC = ConstantInt::get (WideEVL->getType (), Factor);
512
- Value *EVL =
513
- Builder.CreateZExt (Builder.CreateExactUDiv (WideEVL, FactorC), XLenTy);
514
-
515
- if (isa<FixedVectorType>(VTy)) {
516
- SmallVector<Value *, 8 > Operands (InterleaveOperands);
517
- Operands.append ({Store->getArgOperand (1 ), Mask, EVL});
518
- Builder.CreateIntrinsic (FixedVssegIntrIds[Factor - 2 ],
519
- {VTy, PtrTy, XLenTy}, Operands);
520
- return true ;
521
- }
522
-
523
- unsigned SEW = DL.getTypeSizeInBits (VTy->getElementType ());
524
- unsigned NumElts = VTy->getElementCount ().getKnownMinValue ();
525
- Type *VecTupTy = TargetExtType::get (
526
- Store->getContext (), " riscv.vector.tuple" ,
527
- ScalableVectorType::get (Type::getInt8Ty (Store->getContext ()),
528
- NumElts * SEW / 8 ),
529
- Factor);
530
-
531
- Function *VecInsertFunc = Intrinsic::getOrInsertDeclaration (
532
- Store->getModule (), Intrinsic::riscv_tuple_insert, {VecTupTy, VTy});
533
- Value *StoredVal = PoisonValue::get (VecTupTy);
534
- for (unsigned i = 0 ; i < Factor; ++i)
535
- StoredVal = Builder.CreateCall (
536
- VecInsertFunc, {StoredVal, InterleaveOperands[i], Builder.getInt32 (i)});
537
-
538
- Function *VssegNFunc = Intrinsic::getOrInsertDeclaration (
539
- Store->getModule (), ScalableVssegIntrIds[Factor - 2 ],
540
- {VecTupTy, PtrTy, Mask->getType (), EVL->getType ()});
541
-
542
- Value *Operands[] = {StoredVal, Store->getArgOperand (1 ), Mask, EVL,
543
- ConstantInt::get (XLenTy, Log2_64 (SEW))};
544
-
545
- Builder.CreateCall (VssegNFunc, Operands);
546
- return true ;
547
- }
0 commit comments