Skip to content

[RISCV] Add intrinsics for strided segment stores with fixed vectors #152038

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

Conversation

mshockwave
Copy link
Member

@mshockwave mshockwave commented Aug 4, 2025

These are the strided versions of riscv.segN.store.mask intrinsics.


This will be used in the strided segmented store version of #151612

@llvmbot
Copy link
Member

llvmbot commented Aug 4, 2025

@llvm/pr-subscribers-llvm-ir

Author: Min-Yih Hsu (mshockwave)

Changes

These are the strided versions of riscv.segN.store.mask intrinsics.


Full diff: https://github.com/llvm/llvm-project/pull/152038.diff

3 Files Affected:

  • (modified) llvm/include/llvm/IR/IntrinsicsRISCV.td (+11)
  • (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+109-50)
  • (added) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ssegN-store.ll (+72)
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index 99f975faeb85e..c56e390d578a4 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -1736,6 +1736,17 @@ let TargetPrefix = "riscv" in {
                                               [llvm_anyptr_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
                                                llvm_anyint_ty]),
                                   [NoCapture<ArgIndex<nf>>, IntrWriteMem]>;
+
+    // Input: (<stored values>..., pointer, offset, mask, vl)
+    def int_riscv_sseg # nf # _store_mask
+          : DefaultAttrsIntrinsic<[],
+                                  !listconcat([llvm_anyvector_ty],
+                                              !listsplat(LLVMMatchType<0>,
+                                                          !add(nf, -1)),
+                                              [llvm_anyptr_ty, llvm_anyint_ty,
+                                               LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                                               llvm_anyint_ty]),
+                                  [NoCapture<ArgIndex<nf>>, IntrWriteMem]>;
   }
 
 } // TargetPrefix = "riscv"
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index e09e6fb5b26ee..671a107cc8f6b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1839,6 +1839,17 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
                                /*IsStore*/ true,
                                /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
+  case Intrinsic::riscv_sseg2_store_mask:
+  case Intrinsic::riscv_sseg3_store_mask:
+  case Intrinsic::riscv_sseg4_store_mask:
+  case Intrinsic::riscv_sseg5_store_mask:
+  case Intrinsic::riscv_sseg6_store_mask:
+  case Intrinsic::riscv_sseg7_store_mask:
+  case Intrinsic::riscv_sseg8_store_mask:
+    // Operands are (vec, ..., vec, ptr, offset, mask, vl)
+    return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
+                               /*IsStore*/ true,
+                               /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
   case Intrinsic::riscv_vlm:
     return SetRVVLoadStoreInfo(/*PtrOp*/ 0,
                                /*IsStore*/ false,
@@ -11077,69 +11088,117 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
   return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
 }
 
-SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
-                                                 SelectionDAG &DAG) const {
-  unsigned IntNo = Op.getConstantOperandVal(1);
+static SDValue
+lowerFixedVectorSegStoreIntrinsics(unsigned IntNo, SDValue Op,
+                                   const RISCVSubtarget &Subtarget,
+                                   SelectionDAG &DAG) {
+  bool IsStrided;
   switch (IntNo) {
-  default:
-    break;
   case Intrinsic::riscv_seg2_store_mask:
   case Intrinsic::riscv_seg3_store_mask:
   case Intrinsic::riscv_seg4_store_mask:
   case Intrinsic::riscv_seg5_store_mask:
   case Intrinsic::riscv_seg6_store_mask:
   case Intrinsic::riscv_seg7_store_mask:
-  case Intrinsic::riscv_seg8_store_mask: {
-    SDLoc DL(Op);
-    static const Intrinsic::ID VssegInts[] = {
-        Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
-        Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
-        Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
-        Intrinsic::riscv_vsseg8_mask};
+  case Intrinsic::riscv_seg8_store_mask:
+    IsStrided = false;
+    break;
+  case Intrinsic::riscv_sseg2_store_mask:
+  case Intrinsic::riscv_sseg3_store_mask:
+  case Intrinsic::riscv_sseg4_store_mask:
+  case Intrinsic::riscv_sseg5_store_mask:
+  case Intrinsic::riscv_sseg6_store_mask:
+  case Intrinsic::riscv_sseg7_store_mask:
+  case Intrinsic::riscv_sseg8_store_mask:
+    IsStrided = true;
+    break;
+  default:
+    llvm_unreachable("unexpected intrinsic ID");
+  }
 
-    // Operands: (chain, int_id, vec*, ptr, mask, vl)
-    unsigned NF = Op->getNumOperands() - 5;
-    assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
-    MVT XLenVT = Subtarget.getXLenVT();
-    MVT VT = Op->getOperand(2).getSimpleValueType();
-    MVT ContainerVT = getContainerForFixedLengthVector(VT);
-    unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
-                  ContainerVT.getScalarSizeInBits();
-    EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
+  SDLoc DL(Op);
+  static const Intrinsic::ID VssegInts[] = {
+      Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
+      Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
+      Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
+      Intrinsic::riscv_vsseg8_mask};
+  static const Intrinsic::ID VsssegInts[] = {
+      Intrinsic::riscv_vssseg2_mask, Intrinsic::riscv_vssseg3_mask,
+      Intrinsic::riscv_vssseg4_mask, Intrinsic::riscv_vssseg5_mask,
+      Intrinsic::riscv_vssseg6_mask, Intrinsic::riscv_vssseg7_mask,
+      Intrinsic::riscv_vssseg8_mask};
+
+  // Operands: (chain, int_id, vec*, ptr, mask, vl) or
+  // (chain, int_id, vec*, ptr, stride, mask, vl)
+  unsigned NF = Op->getNumOperands() - (IsStrided ? 6 : 5);
+  assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
+  MVT XLenVT = Subtarget.getXLenVT();
+  MVT VT = Op->getOperand(2).getSimpleValueType();
+  MVT ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
+  unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
+                ContainerVT.getScalarSizeInBits();
+  EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
+
+  SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
+  SDValue Mask = Op.getOperand(Op.getNumOperands() - 2);
+  MVT MaskVT = Mask.getSimpleValueType();
+  MVT MaskContainerVT =
+      ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
+  Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
 
-    SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
-    SDValue Mask = Op.getOperand(Op.getNumOperands() - 2);
-    MVT MaskVT = Mask.getSimpleValueType();
-    MVT MaskContainerVT =
-        ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
-    Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
+  SDValue IntID = DAG.getTargetConstant(
+      IsStrided ? VsssegInts[NF - 2] : VssegInts[NF - 2], DL, XLenVT);
+  SDValue Ptr = Op->getOperand(NF + 2);
 
-    SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
-    SDValue Ptr = Op->getOperand(NF + 2);
+  auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
 
-    auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
+  SDValue StoredVal = DAG.getUNDEF(VecTupTy);
+  for (unsigned i = 0; i < NF; i++)
+    StoredVal = DAG.getNode(
+        RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
+        convertToScalableVector(ContainerVT, FixedIntrinsic->getOperand(2 + i),
+                                DAG, Subtarget),
+        DAG.getTargetConstant(i, DL, MVT::i32));
 
-    SDValue StoredVal = DAG.getUNDEF(VecTupTy);
-    for (unsigned i = 0; i < NF; i++)
-      StoredVal = DAG.getNode(
-          RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
-          convertToScalableVector(
-              ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget),
-          DAG.getTargetConstant(i, DL, MVT::i32));
+  SmallVector<SDValue, 10> Ops = {
+      FixedIntrinsic->getChain(),
+      IntID,
+      StoredVal,
+      Ptr,
+      Mask,
+      VL,
+      DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
+  if (IsStrided)
+    Ops.insert(std::next(Ops.begin(), 4),
+               Op.getOperand(Op.getNumOperands() - 3));
+
+  return DAG.getMemIntrinsicNode(
+      ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
+      FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
+}
+
+SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
+                                                 SelectionDAG &DAG) const {
+  unsigned IntNo = Op.getConstantOperandVal(1);
+  switch (IntNo) {
+  default:
+    break;
+  case Intrinsic::riscv_seg2_store_mask:
+  case Intrinsic::riscv_seg3_store_mask:
+  case Intrinsic::riscv_seg4_store_mask:
+  case Intrinsic::riscv_seg5_store_mask:
+  case Intrinsic::riscv_seg6_store_mask:
+  case Intrinsic::riscv_seg7_store_mask:
+  case Intrinsic::riscv_seg8_store_mask:
+  case Intrinsic::riscv_sseg2_store_mask:
+  case Intrinsic::riscv_sseg3_store_mask:
+  case Intrinsic::riscv_sseg4_store_mask:
+  case Intrinsic::riscv_sseg5_store_mask:
+  case Intrinsic::riscv_sseg6_store_mask:
+  case Intrinsic::riscv_sseg7_store_mask:
+  case Intrinsic::riscv_sseg8_store_mask:
+    return lowerFixedVectorSegStoreIntrinsics(IntNo, Op, Subtarget, DAG);
 
-    SDValue Ops[] = {
-        FixedIntrinsic->getChain(),
-        IntID,
-        StoredVal,
-        Ptr,
-        Mask,
-        VL,
-        DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
-
-    return DAG.getMemIntrinsicNode(
-        ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
-        FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
-  }
   case Intrinsic::riscv_sf_vc_xv_se:
     return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XV_SE);
   case Intrinsic::riscv_sf_vc_iv_se:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ssegN-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ssegN-store.ll
new file mode 100644
index 0000000000000..abf289419ec16
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ssegN-store.ll
@@ -0,0 +1,72 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+
+define void @store_factor2(<8 x i8> %v0, <8 x i8> %v1, ptr %ptr, i64 %stride) {
+; CHECK-LABEL: store_factor2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vssseg2e8.v v8, (a0), a1
+; CHECK-NEXT:    ret
+    call void @llvm.riscv.sseg2.store.mask.v8i8.i64.i64(<8 x i8> %v0, <8 x i8> %v1, ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8)
+    ret void
+}
+
+define void @store_factor3(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, ptr %ptr, i64 %stride) {
+; CHECK-LABEL: store_factor3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vssseg3e8.v v8, (a0), a1
+; CHECK-NEXT:    ret
+    call void @llvm.riscv.sseg3.store.mask.v8i8.i64.i64(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8)
+    ret void
+}
+
+define void @store_factor4(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, ptr %ptr, i64 %stride) {
+; CHECK-LABEL: store_factor4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vssseg4e8.v v8, (a0), a1
+; CHECK-NEXT:    ret
+    call void @llvm.riscv.sseg4.store.mask.v8i8.i64.i64(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8)
+    ret void
+}
+
+define void @store_factor5(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, ptr %ptr, i64 %stride) {
+; CHECK-LABEL: store_factor5:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vssseg5e8.v v8, (a0), a1
+; CHECK-NEXT:    ret
+    call void @llvm.riscv.sseg5.store.mask.v8i8.i64.i64(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8)
+    ret void
+}
+
+define void @store_factor6(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5, ptr %ptr, i64 %stride) {
+; CHECK-LABEL: store_factor6:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vssseg6e8.v v8, (a0), a1
+; CHECK-NEXT:    ret
+    call void @llvm.riscv.sseg6.store.mask.v8i8.i64.i64(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5, ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8)
+    ret void
+}
+
+define void @store_factor7(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5, <8 x i8> %v6, ptr %ptr, i64 %stride) {
+; CHECK-LABEL: store_factor7:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vssseg7e8.v v8, (a0), a1
+; CHECK-NEXT:    ret
+    call void @llvm.riscv.sseg7.store.mask.v8i8.i64.i64(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5, <8 x i8> %v6, ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8)
+    ret void
+}
+
+define void @store_factor8(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5, <8 x i8> %v6, <8 x i8> %v7, ptr %ptr, i64 %stride) {
+; CHECK-LABEL: store_factor8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vssseg8e8.v v8, (a0), a1
+; CHECK-NEXT:    ret
+    call void @llvm.riscv.sseg8.store.mask.v8i8.i64.i64(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5, <8 x i8> %v6, <8 x i8> %v7, ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8)
+    ret void
+}

@llvmbot
Copy link
Member

llvmbot commented Aug 4, 2025

@llvm/pr-subscribers-backend-risc-v

Author: Min-Yih Hsu (mshockwave)

Changes

These are the strided versions of riscv.segN.store.mask intrinsics.


Full diff: https://github.com/llvm/llvm-project/pull/152038.diff

3 Files Affected:

  • (modified) llvm/include/llvm/IR/IntrinsicsRISCV.td (+11)
  • (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+109-50)
  • (added) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ssegN-store.ll (+72)
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index 99f975faeb85e..c56e390d578a4 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -1736,6 +1736,17 @@ let TargetPrefix = "riscv" in {
                                               [llvm_anyptr_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
                                                llvm_anyint_ty]),
                                   [NoCapture<ArgIndex<nf>>, IntrWriteMem]>;
+
+    // Input: (<stored values>..., pointer, offset, mask, vl)
+    def int_riscv_sseg # nf # _store_mask
+          : DefaultAttrsIntrinsic<[],
+                                  !listconcat([llvm_anyvector_ty],
+                                              !listsplat(LLVMMatchType<0>,
+                                                          !add(nf, -1)),
+                                              [llvm_anyptr_ty, llvm_anyint_ty,
+                                               LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                                               llvm_anyint_ty]),
+                                  [NoCapture<ArgIndex<nf>>, IntrWriteMem]>;
   }
 
 } // TargetPrefix = "riscv"
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index e09e6fb5b26ee..671a107cc8f6b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1839,6 +1839,17 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
                                /*IsStore*/ true,
                                /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
+  case Intrinsic::riscv_sseg2_store_mask:
+  case Intrinsic::riscv_sseg3_store_mask:
+  case Intrinsic::riscv_sseg4_store_mask:
+  case Intrinsic::riscv_sseg5_store_mask:
+  case Intrinsic::riscv_sseg6_store_mask:
+  case Intrinsic::riscv_sseg7_store_mask:
+  case Intrinsic::riscv_sseg8_store_mask:
+    // Operands are (vec, ..., vec, ptr, offset, mask, vl)
+    return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
+                               /*IsStore*/ true,
+                               /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
   case Intrinsic::riscv_vlm:
     return SetRVVLoadStoreInfo(/*PtrOp*/ 0,
                                /*IsStore*/ false,
@@ -11077,69 +11088,117 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
   return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
 }
 
-SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
-                                                 SelectionDAG &DAG) const {
-  unsigned IntNo = Op.getConstantOperandVal(1);
+static SDValue
+lowerFixedVectorSegStoreIntrinsics(unsigned IntNo, SDValue Op,
+                                   const RISCVSubtarget &Subtarget,
+                                   SelectionDAG &DAG) {
+  bool IsStrided;
   switch (IntNo) {
-  default:
-    break;
   case Intrinsic::riscv_seg2_store_mask:
   case Intrinsic::riscv_seg3_store_mask:
   case Intrinsic::riscv_seg4_store_mask:
   case Intrinsic::riscv_seg5_store_mask:
   case Intrinsic::riscv_seg6_store_mask:
   case Intrinsic::riscv_seg7_store_mask:
-  case Intrinsic::riscv_seg8_store_mask: {
-    SDLoc DL(Op);
-    static const Intrinsic::ID VssegInts[] = {
-        Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
-        Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
-        Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
-        Intrinsic::riscv_vsseg8_mask};
+  case Intrinsic::riscv_seg8_store_mask:
+    IsStrided = false;
+    break;
+  case Intrinsic::riscv_sseg2_store_mask:
+  case Intrinsic::riscv_sseg3_store_mask:
+  case Intrinsic::riscv_sseg4_store_mask:
+  case Intrinsic::riscv_sseg5_store_mask:
+  case Intrinsic::riscv_sseg6_store_mask:
+  case Intrinsic::riscv_sseg7_store_mask:
+  case Intrinsic::riscv_sseg8_store_mask:
+    IsStrided = true;
+    break;
+  default:
+    llvm_unreachable("unexpected intrinsic ID");
+  }
 
-    // Operands: (chain, int_id, vec*, ptr, mask, vl)
-    unsigned NF = Op->getNumOperands() - 5;
-    assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
-    MVT XLenVT = Subtarget.getXLenVT();
-    MVT VT = Op->getOperand(2).getSimpleValueType();
-    MVT ContainerVT = getContainerForFixedLengthVector(VT);
-    unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
-                  ContainerVT.getScalarSizeInBits();
-    EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
+  SDLoc DL(Op);
+  static const Intrinsic::ID VssegInts[] = {
+      Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
+      Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
+      Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
+      Intrinsic::riscv_vsseg8_mask};
+  static const Intrinsic::ID VsssegInts[] = {
+      Intrinsic::riscv_vssseg2_mask, Intrinsic::riscv_vssseg3_mask,
+      Intrinsic::riscv_vssseg4_mask, Intrinsic::riscv_vssseg5_mask,
+      Intrinsic::riscv_vssseg6_mask, Intrinsic::riscv_vssseg7_mask,
+      Intrinsic::riscv_vssseg8_mask};
+
+  // Operands: (chain, int_id, vec*, ptr, mask, vl) or
+  // (chain, int_id, vec*, ptr, stride, mask, vl)
+  unsigned NF = Op->getNumOperands() - (IsStrided ? 6 : 5);
+  assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
+  MVT XLenVT = Subtarget.getXLenVT();
+  MVT VT = Op->getOperand(2).getSimpleValueType();
+  MVT ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
+  unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
+                ContainerVT.getScalarSizeInBits();
+  EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
+
+  SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
+  SDValue Mask = Op.getOperand(Op.getNumOperands() - 2);
+  MVT MaskVT = Mask.getSimpleValueType();
+  MVT MaskContainerVT =
+      ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
+  Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
 
-    SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
-    SDValue Mask = Op.getOperand(Op.getNumOperands() - 2);
-    MVT MaskVT = Mask.getSimpleValueType();
-    MVT MaskContainerVT =
-        ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
-    Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
+  SDValue IntID = DAG.getTargetConstant(
+      IsStrided ? VsssegInts[NF - 2] : VssegInts[NF - 2], DL, XLenVT);
+  SDValue Ptr = Op->getOperand(NF + 2);
 
-    SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
-    SDValue Ptr = Op->getOperand(NF + 2);
+  auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
 
-    auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
+  SDValue StoredVal = DAG.getUNDEF(VecTupTy);
+  for (unsigned i = 0; i < NF; i++)
+    StoredVal = DAG.getNode(
+        RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
+        convertToScalableVector(ContainerVT, FixedIntrinsic->getOperand(2 + i),
+                                DAG, Subtarget),
+        DAG.getTargetConstant(i, DL, MVT::i32));
 
-    SDValue StoredVal = DAG.getUNDEF(VecTupTy);
-    for (unsigned i = 0; i < NF; i++)
-      StoredVal = DAG.getNode(
-          RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
-          convertToScalableVector(
-              ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget),
-          DAG.getTargetConstant(i, DL, MVT::i32));
+  SmallVector<SDValue, 10> Ops = {
+      FixedIntrinsic->getChain(),
+      IntID,
+      StoredVal,
+      Ptr,
+      Mask,
+      VL,
+      DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
+  if (IsStrided)
+    Ops.insert(std::next(Ops.begin(), 4),
+               Op.getOperand(Op.getNumOperands() - 3));
+
+  return DAG.getMemIntrinsicNode(
+      ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
+      FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
+}
+
+SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
+                                                 SelectionDAG &DAG) const {
+  unsigned IntNo = Op.getConstantOperandVal(1);
+  switch (IntNo) {
+  default:
+    break;
+  case Intrinsic::riscv_seg2_store_mask:
+  case Intrinsic::riscv_seg3_store_mask:
+  case Intrinsic::riscv_seg4_store_mask:
+  case Intrinsic::riscv_seg5_store_mask:
+  case Intrinsic::riscv_seg6_store_mask:
+  case Intrinsic::riscv_seg7_store_mask:
+  case Intrinsic::riscv_seg8_store_mask:
+  case Intrinsic::riscv_sseg2_store_mask:
+  case Intrinsic::riscv_sseg3_store_mask:
+  case Intrinsic::riscv_sseg4_store_mask:
+  case Intrinsic::riscv_sseg5_store_mask:
+  case Intrinsic::riscv_sseg6_store_mask:
+  case Intrinsic::riscv_sseg7_store_mask:
+  case Intrinsic::riscv_sseg8_store_mask:
+    return lowerFixedVectorSegStoreIntrinsics(IntNo, Op, Subtarget, DAG);
 
-    SDValue Ops[] = {
-        FixedIntrinsic->getChain(),
-        IntID,
-        StoredVal,
-        Ptr,
-        Mask,
-        VL,
-        DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
-
-    return DAG.getMemIntrinsicNode(
-        ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
-        FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
-  }
   case Intrinsic::riscv_sf_vc_xv_se:
     return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XV_SE);
   case Intrinsic::riscv_sf_vc_iv_se:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ssegN-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ssegN-store.ll
new file mode 100644
index 0000000000000..abf289419ec16
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ssegN-store.ll
@@ -0,0 +1,72 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+
+define void @store_factor2(<8 x i8> %v0, <8 x i8> %v1, ptr %ptr, i64 %stride) {
+; CHECK-LABEL: store_factor2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vssseg2e8.v v8, (a0), a1
+; CHECK-NEXT:    ret
+    call void @llvm.riscv.sseg2.store.mask.v8i8.i64.i64(<8 x i8> %v0, <8 x i8> %v1, ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8)
+    ret void
+}
+
+define void @store_factor3(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, ptr %ptr, i64 %stride) {
+; CHECK-LABEL: store_factor3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vssseg3e8.v v8, (a0), a1
+; CHECK-NEXT:    ret
+    call void @llvm.riscv.sseg3.store.mask.v8i8.i64.i64(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8)
+    ret void
+}
+
+define void @store_factor4(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, ptr %ptr, i64 %stride) {
+; CHECK-LABEL: store_factor4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vssseg4e8.v v8, (a0), a1
+; CHECK-NEXT:    ret
+    call void @llvm.riscv.sseg4.store.mask.v8i8.i64.i64(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8)
+    ret void
+}
+
+define void @store_factor5(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, ptr %ptr, i64 %stride) {
+; CHECK-LABEL: store_factor5:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vssseg5e8.v v8, (a0), a1
+; CHECK-NEXT:    ret
+    call void @llvm.riscv.sseg5.store.mask.v8i8.i64.i64(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8)
+    ret void
+}
+
+define void @store_factor6(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5, ptr %ptr, i64 %stride) {
+; CHECK-LABEL: store_factor6:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vssseg6e8.v v8, (a0), a1
+; CHECK-NEXT:    ret
+    call void @llvm.riscv.sseg6.store.mask.v8i8.i64.i64(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5, ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8)
+    ret void
+}
+
+define void @store_factor7(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5, <8 x i8> %v6, ptr %ptr, i64 %stride) {
+; CHECK-LABEL: store_factor7:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vssseg7e8.v v8, (a0), a1
+; CHECK-NEXT:    ret
+    call void @llvm.riscv.sseg7.store.mask.v8i8.i64.i64(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5, <8 x i8> %v6, ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8)
+    ret void
+}
+
+define void @store_factor8(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5, <8 x i8> %v6, <8 x i8> %v7, ptr %ptr, i64 %stride) {
+; CHECK-LABEL: store_factor8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vssseg8e8.v v8, (a0), a1
+; CHECK-NEXT:    ret
+    call void @llvm.riscv.sseg8.store.mask.v8i8.i64.i64(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5, <8 x i8> %v6, <8 x i8> %v7, ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8)
+    ret void
+}

Copy link
Collaborator

@preames preames left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM w/minor comment

@@ -1736,6 +1736,17 @@ let TargetPrefix = "riscv" in {
[llvm_anyptr_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_anyint_ty]),
[NoCapture<ArgIndex<nf>>, IntrWriteMem]>;

// Input: (<stored values>..., pointer, offset, mask, vl)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't catch this in the previous review, but "offset" should probably be "stride" for clarity.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh yeah I agree, it's fixed now. I'm going to fix the load one in another NFC commit.

@mshockwave mshockwave merged commit c065ed3 into llvm:main Aug 8, 2025
7 of 9 checks passed
@mshockwave mshockwave deleted the patch/rvv/strided-segmented-store-intrinsics branch August 8, 2025 21:08
@llvm-ci
Copy link
Collaborator

llvm-ci commented Aug 8, 2025

LLVM Buildbot has detected a new failure on builder arc-builder running on arc-worker while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/3/builds/20297

Here is the relevant piece of the build log for the reference
Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: CodeGen/X86/sse2-intrinsics-fast-isel.ll' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
/buildbot/worker/arc-folder/build/bin/llc < /buildbot/worker/arc-folder/llvm-project/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse2 | /buildbot/worker/arc-folder/build/bin/FileCheck /buildbot/worker/arc-folder/llvm-project/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll --check-prefixes=CHECK,X86,SSE,X86-SSE # RUN: at line 2
+ /buildbot/worker/arc-folder/build/bin/FileCheck /buildbot/worker/arc-folder/llvm-project/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll --check-prefixes=CHECK,X86,SSE,X86-SSE
+ /buildbot/worker/arc-folder/build/bin/llc -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse2
LLVM ERROR: Cannot select: intrinsic %llvm.x86.sse2.clflush
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: /buildbot/worker/arc-folder/build/bin/llc -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse2
1.	Running pass 'Function Pass Manager' on module '<stdin>'.
2.	Running pass 'X86 DAG->DAG Instruction Selection' on function '@test_mm_clflush'
 #0 0x0000000002322c98 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/buildbot/worker/arc-folder/build/bin/llc+0x2322c98)
 #1 0x000000000231fba5 SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
 #2 0x00007fbce2009630 __restore_rt sigaction.c:0:0
 #3 0x00007fbce0d593d7 raise (/usr/lib64/libc.so.6+0x363d7)
 #4 0x00007fbce0d5aac8 abort (/usr/lib64/libc.so.6+0x37ac8)
 #5 0x000000000071fa73 llvm::json::operator==(llvm::json::Value const&, llvm::json::Value const&) (.cold) JSON.cpp:0:0
 #6 0x00000000020b0569 llvm::SelectionDAGISel::CannotYetSelect(llvm::SDNode*) (/buildbot/worker/arc-folder/build/bin/llc+0x20b0569)
 #7 0x00000000020b508a llvm::SelectionDAGISel::SelectCodeCommon(llvm::SDNode*, unsigned char const*, unsigned int) (/buildbot/worker/arc-folder/build/bin/llc+0x20b508a)
 #8 0x0000000000964297 (anonymous namespace)::X86DAGToDAGISel::Select(llvm::SDNode*) X86ISelDAGToDAG.cpp:0:0
 #9 0x00000000020abddf llvm::SelectionDAGISel::DoInstructionSelection() (/buildbot/worker/arc-folder/build/bin/llc+0x20abddf)
#10 0x00000000020bbaf0 llvm::SelectionDAGISel::CodeGenAndEmitDAG() (/buildbot/worker/arc-folder/build/bin/llc+0x20bbaf0)
#11 0x00000000020bf2ad llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function const&) (/buildbot/worker/arc-folder/build/bin/llc+0x20bf2ad)
#12 0x00000000020c0425 llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&) (/buildbot/worker/arc-folder/build/bin/llc+0x20c0425)
#13 0x00000000020ab5ef llvm::SelectionDAGISelLegacy::runOnMachineFunction(llvm::MachineFunction&) (/buildbot/worker/arc-folder/build/bin/llc+0x20ab5ef)
#14 0x000000000120e877 llvm::MachineFunctionPass::runOnFunction(llvm::Function&) (.part.0) MachineFunctionPass.cpp:0:0
#15 0x000000000185fd62 llvm::FPPassManager::runOnFunction(llvm::Function&) (/buildbot/worker/arc-folder/build/bin/llc+0x185fd62)
#16 0x0000000001860101 llvm::FPPassManager::runOnModule(llvm::Module&) (/buildbot/worker/arc-folder/build/bin/llc+0x1860101)
#17 0x0000000001860d17 llvm::legacy::PassManagerImpl::run(llvm::Module&) (/buildbot/worker/arc-folder/build/bin/llc+0x1860d17)
#18 0x0000000000801a42 compileModule(char**, llvm::LLVMContext&) llc.cpp:0:0
#19 0x0000000000728116 main (/buildbot/worker/arc-folder/build/bin/llc+0x728116)
#20 0x00007fbce0d45555 __libc_start_main (/usr/lib64/libc.so.6+0x22555)
#21 0x00000000007f7c66 _start (/buildbot/worker/arc-folder/build/bin/llc+0x7f7c66)
/buildbot/worker/arc-folder/llvm-project/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll:399:14: error: SSE-LABEL: expected string not found in input
; SSE-LABEL: test_mm_bsrli_si128:
             ^
<stdin>:170:21: note: scanning from here
test_mm_bslli_si128: # @test_mm_bslli_si128
                    ^
<stdin>:178:9: note: possible intended match here
 .globl test_mm_bsrli_si128 # 
        ^

Input file: <stdin>
Check file: /buildbot/worker/arc-folder/llvm-project/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll

-dump-input=help explains the following input dump.
...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants