diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp index d2b2edf2ebc80..525ef32525b36 100644 --- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -601,31 +601,47 @@ static Value *getMask(Value *WideMask, unsigned Factor, bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic( IntrinsicInst *DI, SmallSetVector &DeadInsts) { Value *LoadedVal = DI->getOperand(0); - if (!LoadedVal->hasOneUse() || !isa(LoadedVal)) + if (!LoadedVal->hasOneUse()) + return false; + + auto *LI = dyn_cast(LoadedVal); + auto *II = dyn_cast(LoadedVal); + if (!LI && !II) return false; const unsigned Factor = getDeinterleaveIntrinsicFactor(DI->getIntrinsicID()); assert(Factor && "unexpected deinterleave intrinsic"); Value *Mask = nullptr; - if (auto *VPLoad = dyn_cast(LoadedVal)) { - if (VPLoad->getIntrinsicID() != Intrinsic::vp_load) + if (LI) { + if (!LI->isSimple()) return false; + + LLVM_DEBUG(dbgs() << "IA: Found a load with deinterleave intrinsic " << *DI + << " and factor = " << Factor << "\n"); + } else { + assert(II); + // Check mask operand. Handle both all-true/false and interleaved mask. - Value *WideMask = VPLoad->getOperand(1); - Mask = getMask(WideMask, Factor, getDeinterleavedVectorType(DI)); - if (!Mask) + Value *WideMask; + switch (II->getIntrinsicID()) { + default: return false; + case Intrinsic::vp_load: + WideMask = II->getOperand(1); + break; + case Intrinsic::masked_load: + WideMask = II->getOperand(2); + break; + } - LLVM_DEBUG(dbgs() << "IA: Found a vp.load with deinterleave intrinsic " - << *DI << " and factor = " << Factor << "\n"); - } else { - auto *LI = cast(LoadedVal); - if (!LI->isSimple()) + Mask = getMask(WideMask, Factor, getDeinterleavedVectorType(DI)); + if (!Mask) return false; - LLVM_DEBUG(dbgs() << "IA: Found a load with deinterleave intrinsic " << *DI - << " and factor = " << Factor << "\n"); + LLVM_DEBUG(dbgs() << "IA: Found a vp.load or masked.load with deinterleave" + << " intrinsic " << *DI << " and factor = " + << Factor << "\n"); } // Try and match this with target specific intrinsics. diff --git a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp index dd68a5556cdb5..6de870c9c9735 100644 --- a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp +++ b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp @@ -131,24 +131,40 @@ static bool getMemOperands(unsigned Factor, VectorType *VTy, Type *XLenTy, : Constant::getAllOnesValue(XLenTy); return true; } - auto *VPLdSt = cast(I); - assert((VPLdSt->getIntrinsicID() == Intrinsic::vp_load || - VPLdSt->getIntrinsicID() == Intrinsic::vp_store) && - "Unexpected intrinsic"); - Ptr = VPLdSt->getMemoryPointerParam(); - Alignment = VPLdSt->getPointerAlignment().value_or( - DL.getABITypeAlign(VTy->getElementType())); + if (auto *VPLdSt = dyn_cast(I)) { + assert((VPLdSt->getIntrinsicID() == Intrinsic::vp_load || + VPLdSt->getIntrinsicID() == Intrinsic::vp_store) && + "Unexpected intrinsic"); + Ptr = VPLdSt->getMemoryPointerParam(); + Alignment = VPLdSt->getPointerAlignment().value_or( + DL.getABITypeAlign(VTy->getElementType())); + + assert(Mask && "vp.load and vp.store needs a mask!"); + + Value *WideEVL = VPLdSt->getVectorLengthParam(); + // Conservatively check if EVL is a multiple of factor, otherwise some + // (trailing) elements might be lost after the transformation. + if (!isMultipleOfN(WideEVL, I->getDataLayout(), Factor)) + return false; - assert(Mask && "vp.load and vp.store needs a mask!"); + auto *FactorC = ConstantInt::get(WideEVL->getType(), Factor); + VL = Builder.CreateZExt(Builder.CreateExactUDiv(WideEVL, FactorC), XLenTy); + return true; + } + auto *II = cast(I); + assert(II->getIntrinsicID() == Intrinsic::masked_load && + "Unexpected intrinsic"); + Ptr = II->getOperand(0); + Alignment = cast(II->getArgOperand(1))->getAlignValue(); - Value *WideEVL = VPLdSt->getVectorLengthParam(); - // Conservatively check if EVL is a multiple of factor, otherwise some - // (trailing) elements might be lost after the transformation. - if (!isMultipleOfN(WideEVL, I->getDataLayout(), Factor)) + if (!isa(II->getOperand(3))) return false; - auto *FactorC = ConstantInt::get(WideEVL->getType(), Factor); - VL = Builder.CreateZExt(Builder.CreateExactUDiv(WideEVL, FactorC), XLenTy); + assert(Mask && "masked.load needs a mask!"); + + VL = isa(VTy) + ? Builder.CreateElementCount(XLenTy, VTy->getElementCount()) + : Constant::getAllOnesValue(XLenTy); return true; } diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll index 578b67e284c5c..96a7b1422005f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll @@ -542,10 +542,8 @@ define { , , , , } @masked_load_factor2(ptr %p) { ; CHECK-LABEL: masked_load_factor2: ; CHECK: # %bb.0: -; CHECK-NEXT: vl4r.v v12, (a0) -; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; CHECK-NEXT: vnsrl.wi v8, v12, 0 -; CHECK-NEXT: vnsrl.wi v10, v12, 8 +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; CHECK-NEXT: vlseg2e8.v v8, (a0) ; CHECK-NEXT: ret %vec = call @llvm.masked.load(ptr %p, i32 4, splat (i1 true), poison) %deinterleaved.results = call {, } @llvm.vector.deinterleave2.nxv32i8( %vec) @@ -555,23 +553,8 @@ define {, } @masked_load_factor2(ptr %p) { define {, , , } @masked_loat_factor4(ptr %p) { ; CHECK-LABEL: masked_loat_factor4: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 2 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb -; CHECK-NEXT: vl4r.v v8, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs4r.v v8, (a0) ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vlseg4e8.v v8, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret %vec = call @llvm.masked.load(ptr %p, i32 4, splat (i1 true), poison) %deinterleaved.results = call {, , , } @llvm.vector.deinterleave4.nxv32i8( %vec) @@ -581,56 +564,8 @@ define {, , , , , , } @masked_loat_factor4_mask(ptr %p, %mask) { ; CHECK-LABEL: masked_loat_factor4_mask: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: add a3, a1, a2 -; CHECK-NEXT: vmv.v.v v9, v8 -; CHECK-NEXT: srli a4, a2, 2 -; CHECK-NEXT: vmv.v.v v10, v8 -; CHECK-NEXT: srli a5, a2, 3 -; CHECK-NEXT: vmv.v.v v11, v8 -; CHECK-NEXT: vsseg4e8.v v8, (a1) -; CHECK-NEXT: vl1r.v v8, (a1) -; CHECK-NEXT: add a1, a4, a5 -; CHECK-NEXT: vl1r.v v9, (a3) -; CHECK-NEXT: add a3, a3, a2 -; CHECK-NEXT: add a2, a3, a2 -; CHECK-NEXT: vl1r.v v10, (a3) -; CHECK-NEXT: vl1r.v v11, (a2) -; CHECK-NEXT: vmsne.vi v9, v9, 0 -; CHECK-NEXT: vmsne.vi v0, v8, 0 -; CHECK-NEXT: vmsne.vi v8, v10, 0 -; CHECK-NEXT: vmsne.vi v10, v11, 0 -; CHECK-NEXT: vsetvli zero, a4, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vx v0, v9, a5 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vx v0, v8, a4 -; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v0, v10, a1 -; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; CHECK-NEXT: vle8.v v8, (a0), v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs4r.v v8, (a0) -; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; CHECK-NEXT: vlseg4e8.v v8, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: vlseg4e8.v v8, (a0), v0.t ; CHECK-NEXT: ret %interleaved.mask = tail call @llvm.vector.interleave4.nxv32i1( %mask, %mask, %mask, %mask) %vec = call @llvm.masked.load(ptr %p, i32 4, %interleaved.mask, poison)