[RISCV] Combine vslide{up,down} x, poison -> x (llvm#169013)

lukel97 · web-flow · commit 7851b8a65c54 · 2025-11-24T03:54:36.000Z
The motivation for this is that it would be useful to express a
vslideup/vslidedown in a target independent way e.g. from the loop
vectorizer.

We can do this today with @llvm.vector.splice by setting one operand to
poison:

- A slide down can be achieved with @llvm.vector.splice(%x, poison,
slideamt)
- A slide up can be done by @llvm.vector.splice(poison, %x, -slideamt)

E.g.:

    splice(&lt;a,b,c,d&gt;, poison, 3) = &lt;d,poison,poison,poison&gt;
    splice(poison, &lt;a,b,c,d&gt;, -3) = &lt;poison,poison,poison,a&gt;

These splices get lowered to a vslideup + vslidedown pair with one of
the vs2s being poison. We can optimize this away so that we are just
left with a single slideup/slidedown.
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -21802,6 +21802,11 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
       return N->getOperand(0);
     break;
   }
+  case RISCVISD::VSLIDEDOWN_VL:
+  case RISCVISD::VSLIDEUP_VL:
+    if (N->getOperand(1)->isUndef())
+      return N->getOperand(0);
+    break;
   case RISCVISD::VSLIDE1UP_VL:
   case RISCVISD::VFSLIDE1UP_VL: {
     using namespace SDPatternMatch;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vector-splice.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vector-splice.ll
@@ -0,0 +1,91 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple riscv32 -mattr=+v < %s | FileCheck %s
+; RUN: llc -mtriple riscv64 -mattr=+v < %s | FileCheck %s
+; RUN: llc -mtriple riscv32 -mattr=+v,+vl-dependent-latency < %s | FileCheck %s
+; RUN: llc -mtriple riscv64 -mattr=+v,+vl-dependent-latency < %s | FileCheck %s
+
+define <4 x i32> @splice_v4i32_slidedown(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: splice_v4i32_slidedown:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vrgather.vi v9, v8, 3
+; CHECK-NEXT:    vmv.v.v v8, v9
+; CHECK-NEXT:    ret
+  %res = call <4 x i32> @llvm.vector.splice(<4 x i32> %a, <4 x i32> poison, i32 3)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @splice_4i32_slideup(<4 x i32> %a) {
+; CHECK-LABEL: splice_4i32_slideup:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vrgather.vi v9, v8, 0
+; CHECK-NEXT:    vmv.v.v v8, v9
+; CHECK-NEXT:    ret
+  %res = call <4 x i32> @llvm.vector.splice(<4 x i32> poison, <4 x i32> %a, i32 -3)
+  ret <4 x i32> %res
+}
+
+define <8 x i32> @splice_v8i32_slidedown(<8 x i32> %a, <8 x i32> %b) {
+; CHECK-LABEL: splice_v8i32_slidedown:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vslidedown.vi v8, v8, 3
+; CHECK-NEXT:    ret
+  %res = call <8 x i32> @llvm.vector.splice(<8 x i32> %a, <8 x i32> poison, i32 3)
+  ret <8 x i32> %res
+}
+
+define <8 x i32> @splice_v8i32_slideup(<8 x i32> %a) {
+; CHECK-LABEL: splice_v8i32_slideup:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vslideup.vi v10, v8, 3
+; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    ret
+  %res = call <8 x i32> @llvm.vector.splice(<8 x i32> poison, <8 x i32> %a, i32 -3)
+  ret <8 x i32> %res
+}
+
+define <4 x i32> @splice_v4i32_slidedown_undef(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: splice_v4i32_slidedown_undef:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vrgather.vi v9, v8, 3
+; CHECK-NEXT:    vmv.v.v v8, v9
+; CHECK-NEXT:    ret
+  %res = call <4 x i32> @llvm.vector.splice(<4 x i32> %a, <4 x i32> undef, i32 3)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @splice_4i32_slideup_undef(<4 x i32> %a) {
+; CHECK-LABEL: splice_4i32_slideup_undef:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vrgather.vi v9, v8, 0
+; CHECK-NEXT:    vmv.v.v v8, v9
+; CHECK-NEXT:    ret
+  %res = call <4 x i32> @llvm.vector.splice(<4 x i32> undef, <4 x i32> %a, i32 -3)
+  ret <4 x i32> %res
+}
+
+define <8 x i32> @splice_v8i32_slidedown_undef(<8 x i32> %a, <8 x i32> %b) {
+; CHECK-LABEL: splice_v8i32_slidedown_undef:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vslidedown.vi v8, v8, 3
+; CHECK-NEXT:    ret
+  %res = call <8 x i32> @llvm.vector.splice(<8 x i32> %a, <8 x i32> undef, i32 3)
+  ret <8 x i32> %res
+}
+
+define <8 x i32> @splice_v8i32_slideup_undef(<8 x i32> %a) {
+; CHECK-LABEL: splice_v8i32_slideup_undef:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vslideup.vi v10, v8, 3
+; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    ret
+  %res = call <8 x i32> @llvm.vector.splice(<8 x i32> undef, <8 x i32> %a, i32 -3)
+  ret <8 x i32> %res
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll b/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll
@@ -4247,4 +4247,64 @@ define <vscale x 8 x double> @splice_nxv8f64_offset_max(<vscale x 8 x double> %a
   ret <vscale x 8 x double> %res
 }
 
+define <vscale x 2 x i32> @splice_nxv2i32_slidedown(<vscale x 2 x i32> %a) #0 {
+; NOVLDEP-LABEL: splice_nxv2i32_slidedown:
+; NOVLDEP:       # %bb.0:
+; NOVLDEP-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; NOVLDEP-NEXT:    vslidedown.vi v8, v8, 3
+; NOVLDEP-NEXT:    ret
+;
+; VLDEP-LABEL: splice_nxv2i32_slidedown:
+; VLDEP:       # %bb.0:
+; VLDEP-NEXT:    csrr a0, vlenb
+; VLDEP-NEXT:    srli a0, a0, 2
+; VLDEP-NEXT:    addi a0, a0, -3
+; VLDEP-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; VLDEP-NEXT:    vslidedown.vi v8, v8, 3
+; VLDEP-NEXT:    ret
+  %res = call <vscale x 2 x i32> @llvm.vector.splice(<vscale x 2 x i32> %a, <vscale x 2 x i32> poison, i32 3)
+  ret <vscale x 2 x i32> %res
+}
+
+define <vscale x 2 x i32> @splice_nxv2i32_slideup(<vscale x 2 x i32> %a) #0 {
+; CHECK-LABEL: splice_nxv2i32_slideup:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vslideup.vi v9, v8, 3
+; CHECK-NEXT:    vmv.v.v v8, v9
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x i32> @llvm.vector.splice(<vscale x 2 x i32> poison, <vscale x 2 x i32> %a, i32 -3)
+  ret <vscale x 2 x i32> %res
+}
+
+define <vscale x 2 x i32> @splice_nxv2i32_slidedown_undef(<vscale x 2 x i32> %a) #0 {
+; NOVLDEP-LABEL: splice_nxv2i32_slidedown_undef:
+; NOVLDEP:       # %bb.0:
+; NOVLDEP-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; NOVLDEP-NEXT:    vslidedown.vi v8, v8, 3
+; NOVLDEP-NEXT:    ret
+;
+; VLDEP-LABEL: splice_nxv2i32_slidedown_undef:
+; VLDEP:       # %bb.0:
+; VLDEP-NEXT:    csrr a0, vlenb
+; VLDEP-NEXT:    srli a0, a0, 2
+; VLDEP-NEXT:    addi a0, a0, -3
+; VLDEP-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; VLDEP-NEXT:    vslidedown.vi v8, v8, 3
+; VLDEP-NEXT:    ret
+  %res = call <vscale x 2 x i32> @llvm.vector.splice(<vscale x 2 x i32> %a, <vscale x 2 x i32> undef, i32 3)
+  ret <vscale x 2 x i32> %res
+}
+
+define <vscale x 2 x i32> @splice_nxv2i32_slideup_undef(<vscale x 2 x i32> %a) #0 {
+; CHECK-LABEL: splice_nxv2i32_slideup_undef:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vslideup.vi v9, v8, 3
+; CHECK-NEXT:    vmv.v.v v8, v9
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x i32> @llvm.vector.splice(<vscale x 2 x i32> undef, <vscale x 2 x i32> %a, i32 -3)
+  ret <vscale x 2 x i32> %res
+}
+
 attributes #0 = { vscale_range(2,0) }