[RISCV][IA] Support masked.load for deinterleaveN matching #149556

preames · 2025-07-18T18:28:57Z

This builds on the whole series of recent API reworks to implement support for deinterleaveN of masked.load. The goal is to be able to enable masked interleave groups in the vectorizer once all the codegen and costing pieces are in place.

I considered including the shuffle path support in this review as well (since the RISCV target specific stuff should be common), but decided to separate it into it's own review just to focus attention on one thing at a time.

This builds on the whole series of recent API reworks to implement support for deinterleaveN of masked.load. The goal is to be able to enable masked interleave groups in the vectorizer once all the codegen and costing pieces are in place. I considered including the shuffle path support in this review as well (since the RISCV target specific stuff should be common), but decided to separate it into it's own review just to focus attention on one thing at a time.

llvmbot · 2025-07-18T18:29:32Z

@llvm/pr-subscribers-backend-risc-v

Author: Philip Reames (preames)

Changes

This builds on the whole series of recent API reworks to implement support for deinterleaveN of masked.load. The goal is to be able to enable masked interleave groups in the vectorizer once all the codegen and costing pieces are in place.

I considered including the shuffle path support in this review as well (since the RISCV target specific stuff should be common), but decided to separate it into it's own review just to focus attention on one thing at a time.

Full diff: https://github.com/llvm/llvm-project/pull/149556.diff

3 Files Affected:

(modified) llvm/lib/CodeGen/InterleavedAccessPass.cpp (+29-13)
(modified) llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp (+30-14)
(modified) llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll (+3-68)

diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index d2b2edf2ebc80..525ef32525b36 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -601,31 +601,47 @@ static Value *getMask(Value *WideMask, unsigned Factor,
 bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
     IntrinsicInst *DI, SmallSetVector<Instruction *, 32> &DeadInsts) {
   Value *LoadedVal = DI->getOperand(0);
-  if (!LoadedVal->hasOneUse() || !isa<LoadInst, VPIntrinsic>(LoadedVal))
+  if (!LoadedVal->hasOneUse())
+    return false;
+
+  auto *LI = dyn_cast<LoadInst>(LoadedVal);
+  auto *II = dyn_cast<IntrinsicInst>(LoadedVal);
+  if (!LI && !II)
     return false;
 
   const unsigned Factor = getDeinterleaveIntrinsicFactor(DI->getIntrinsicID());
   assert(Factor && "unexpected deinterleave intrinsic");
 
   Value *Mask = nullptr;
-  if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadedVal)) {
-    if (VPLoad->getIntrinsicID() != Intrinsic::vp_load)
+  if (LI) {
+    if (!LI->isSimple())
       return false;
+
+    LLVM_DEBUG(dbgs() << "IA: Found a load with deinterleave intrinsic " << *DI
+                      << " and factor = " << Factor << "\n");
+  } else {
+    assert(II);
+
     // Check mask operand. Handle both all-true/false and interleaved mask.
-    Value *WideMask = VPLoad->getOperand(1);
-    Mask = getMask(WideMask, Factor, getDeinterleavedVectorType(DI));
-    if (!Mask)
+    Value *WideMask;
+    switch (II->getIntrinsicID()) {
+    default:
       return false;
+    case Intrinsic::vp_load:
+      WideMask = II->getOperand(1);
+      break;
+    case  Intrinsic::masked_load:
+      WideMask = II->getOperand(2);
+      break;
+    }
 
-    LLVM_DEBUG(dbgs() << "IA: Found a vp.load with deinterleave intrinsic "
-                      << *DI << " and factor = " << Factor << "\n");
-  } else {
-    auto *LI = cast<LoadInst>(LoadedVal);
-    if (!LI->isSimple())
+    Mask = getMask(WideMask, Factor, getDeinterleavedVectorType(DI));
+    if (!Mask)
       return false;
 
-    LLVM_DEBUG(dbgs() << "IA: Found a load with deinterleave intrinsic " << *DI
-                      << " and factor = " << Factor << "\n");
+    LLVM_DEBUG(dbgs() << "IA: Found a vp.load or masked.load with deinterleave"
+                      << " intrinsic " << *DI << " and factor = "
+                      << Factor << "\n");
   }
 
   // Try and match this with target specific intrinsics.
diff --git a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
index dd68a5556cdb5..6de870c9c9735 100644
--- a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
@@ -131,24 +131,40 @@ static bool getMemOperands(unsigned Factor, VectorType *VTy, Type *XLenTy,
                                    : Constant::getAllOnesValue(XLenTy);
     return true;
   }
-  auto *VPLdSt = cast<VPIntrinsic>(I);
-  assert((VPLdSt->getIntrinsicID() == Intrinsic::vp_load ||
-          VPLdSt->getIntrinsicID() == Intrinsic::vp_store) &&
-         "Unexpected intrinsic");
-  Ptr = VPLdSt->getMemoryPointerParam();
-  Alignment = VPLdSt->getPointerAlignment().value_or(
-      DL.getABITypeAlign(VTy->getElementType()));
+  if (auto *VPLdSt = dyn_cast<VPIntrinsic>(I)) {
+    assert((VPLdSt->getIntrinsicID() == Intrinsic::vp_load ||
+            VPLdSt->getIntrinsicID() == Intrinsic::vp_store) &&
+           "Unexpected intrinsic");
+    Ptr = VPLdSt->getMemoryPointerParam();
+    Alignment = VPLdSt->getPointerAlignment().value_or(
+        DL.getABITypeAlign(VTy->getElementType()));
+
+    assert(Mask && "vp.load and vp.store needs a mask!");
+
+    Value *WideEVL = VPLdSt->getVectorLengthParam();
+    // Conservatively check if EVL is a multiple of factor, otherwise some
+    // (trailing) elements might be lost after the transformation.
+    if (!isMultipleOfN(WideEVL, I->getDataLayout(), Factor))
+      return false;
 
-  assert(Mask && "vp.load and vp.store needs a mask!");
+    auto *FactorC = ConstantInt::get(WideEVL->getType(), Factor);
+    VL = Builder.CreateZExt(Builder.CreateExactUDiv(WideEVL, FactorC), XLenTy);
+    return true;
+  }
+  auto *II = cast<IntrinsicInst>(I);
+  assert(II->getIntrinsicID() == Intrinsic::masked_load &&
+         "Unexpected intrinsic");
+  Ptr = II->getOperand(0);
+  Alignment = cast<ConstantInt>(II->getArgOperand(1))->getAlignValue();
 
-  Value *WideEVL = VPLdSt->getVectorLengthParam();
-  // Conservatively check if EVL is a multiple of factor, otherwise some
-  // (trailing) elements might be lost after the transformation.
-  if (!isMultipleOfN(WideEVL, I->getDataLayout(), Factor))
+  if (!isa<UndefValue>(II->getOperand(3)))
     return false;
 
-  auto *FactorC = ConstantInt::get(WideEVL->getType(), Factor);
-  VL = Builder.CreateZExt(Builder.CreateExactUDiv(WideEVL, FactorC), XLenTy);
+  assert(Mask && "masked.load needs a mask!");
+
+  VL = isa<FixedVectorType>(VTy)
+           ? Builder.CreateElementCount(XLenTy, VTy->getElementCount())
+           : Constant::getAllOnesValue(XLenTy);
   return true;
 }
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll
index 578b67e284c5c..96a7b1422005f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll
@@ -542,10 +542,8 @@ define { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x
 define {<vscale x 16 x i8>, <vscale x 16 x i8>} @masked_load_factor2(ptr %p) {
 ; CHECK-LABEL: masked_load_factor2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl4r.v v12, (a0)
-; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
-; CHECK-NEXT:    vnsrl.wi v10, v12, 8
+; CHECK-NEXT:    vsetvli a1, zero, e8, m2, ta, ma
+; CHECK-NEXT:    vlseg2e8.v v8, (a0)
 ; CHECK-NEXT:    ret
   %vec = call <vscale x 32 x i8> @llvm.masked.load(ptr %p, i32 4, <vscale x 32 x i1> splat (i1 true), <vscale x 32 x i8> poison)
   %deinterleaved.results = call {<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> %vec)
@@ -555,23 +553,8 @@ define {<vscale x 16 x i8>, <vscale x 16 x i8>} @masked_load_factor2(ptr %p) {
 define {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @masked_loat_factor4(ptr %p) {
 ; CHECK-LABEL: masked_loat_factor4:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 2
-; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
-; CHECK-NEXT:    vl4r.v v8, (a0)
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs4r.v v8, (a0)
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vlseg4e8.v v8, (a0)
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 2
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    .cfi_def_cfa sp, 16
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NEXT:    ret
   %vec = call <vscale x 32 x i8> @llvm.masked.load(ptr %p, i32 4, <vscale x 32 x i1> splat (i1 true), <vscale x 32 x i8> poison)
   %deinterleaved.results = call {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @llvm.vector.deinterleave4.nxv32i8(<vscale x 32 x i8> %vec)
@@ -581,56 +564,8 @@ define {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i
 define {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @masked_loat_factor4_mask(ptr %p, <vscale x 8 x i1> %mask) {
 ; CHECK-LABEL: masked_loat_factor4_mask:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 3
-; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT:    add a3, a1, a2
-; CHECK-NEXT:    vmv.v.v v9, v8
-; CHECK-NEXT:    srli a4, a2, 2
-; CHECK-NEXT:    vmv.v.v v10, v8
-; CHECK-NEXT:    srli a5, a2, 3
-; CHECK-NEXT:    vmv.v.v v11, v8
-; CHECK-NEXT:    vsseg4e8.v v8, (a1)
-; CHECK-NEXT:    vl1r.v v8, (a1)
-; CHECK-NEXT:    add a1, a4, a5
-; CHECK-NEXT:    vl1r.v v9, (a3)
-; CHECK-NEXT:    add a3, a3, a2
-; CHECK-NEXT:    add a2, a3, a2
-; CHECK-NEXT:    vl1r.v v10, (a3)
-; CHECK-NEXT:    vl1r.v v11, (a2)
-; CHECK-NEXT:    vmsne.vi v9, v9, 0
-; CHECK-NEXT:    vmsne.vi v0, v8, 0
-; CHECK-NEXT:    vmsne.vi v8, v10, 0
-; CHECK-NEXT:    vmsne.vi v10, v11, 0
-; CHECK-NEXT:    vsetvli zero, a4, e8, mf2, tu, ma
-; CHECK-NEXT:    vslideup.vx v0, v9, a5
-; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, tu, ma
-; CHECK-NEXT:    vslideup.vx v0, v8, a4
-; CHECK-NEXT:    vsetvli a2, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vslideup.vx v0, v10, a1
-; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
-; CHECK-NEXT:    vle8.v v8, (a0), v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 2
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vs4r.v v8, (a0)
-; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vlseg4e8.v v8, (a0)
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    .cfi_def_cfa sp, 16
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    vlseg4e8.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
   %interleaved.mask = tail call <vscale x 32 x i1> @llvm.vector.interleave4.nxv32i1(<vscale x 8 x i1> %mask, <vscale x 8 x i1> %mask, <vscale x 8 x i1> %mask, <vscale x 8 x i1> %mask)
   %vec = call <vscale x 32 x i8> @llvm.masked.load(ptr %p, i32 4, <vscale x 32 x i1> %interleaved.mask, <vscale x 32 x i8> poison)

github-actions · 2025-07-18T18:31:29Z

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:

git-clang-format --diff HEAD~1 HEAD --extensions cpp -- llvm/lib/CodeGen/InterleavedAccessPass.cpp llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp

View the diff from clang-format here.

diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 525ef3252..1a4e21edf 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -630,7 +630,7 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
     case Intrinsic::vp_load:
       WideMask = II->getOperand(1);
       break;
-    case  Intrinsic::masked_load:
+    case Intrinsic::masked_load:
       WideMask = II->getOperand(2);
       break;
     }
@@ -640,8 +640,8 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
       return false;
 
     LLVM_DEBUG(dbgs() << "IA: Found a vp.load or masked.load with deinterleave"
-                      << " intrinsic " << *DI << " and factor = "
-                      << Factor << "\n");
+                      << " intrinsic " << *DI << " and factor = " << Factor
+                      << "\n");
   }
 
   // Try and match this with target specific intrinsics.

preames requested review from lukel97, mshockwave and topperc July 18, 2025 18:28

llvmbot added backend:RISC-V llvm:codegen labels Jul 18, 2025

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[RISCV][IA] Support masked.load for deinterleaveN matching #149556

[RISCV][IA] Support masked.load for deinterleaveN matching #149556

preames commented Jul 18, 2025

Uh oh!

llvmbot commented Jul 18, 2025

Uh oh!

github-actions bot commented Jul 18, 2025

Uh oh!

Uh oh!

[RISCV][IA] Support masked.load for deinterleaveN matching #149556

Are you sure you want to change the base?

[RISCV][IA] Support masked.load for deinterleaveN matching #149556

Conversation

preames commented Jul 18, 2025

Uh oh!

llvmbot commented Jul 18, 2025

Uh oh!

github-actions bot commented Jul 18, 2025

Uh oh!

Uh oh!