Skip to content

Commit 7851b8a

Browse files
authored
[RISCV] Combine vslide{up,down} x, poison -> x (llvm#169013)
The motivation for this is that it would be useful to express a vslideup/vslidedown in a target independent way e.g. from the loop vectorizer. We can do this today with @llvm.vector.splice by setting one operand to poison: - A slide down can be achieved with @llvm.vector.splice(%x, poison, slideamt) - A slide up can be done by @llvm.vector.splice(poison, %x, -slideamt) E.g.: splice(<a,b,c,d>, poison, 3) = <d,poison,poison,poison> splice(poison, <a,b,c,d>, -3) = <poison,poison,poison,a> These splices get lowered to a vslideup + vslidedown pair with one of the vs2s being poison. We can optimize this away so that we are just left with a single slideup/slidedown.
1 parent ee4f647 commit 7851b8a

File tree

3 files changed

+156
-0
lines changed

3 files changed

+156
-0
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21802,6 +21802,11 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
2180221802
return N->getOperand(0);
2180321803
break;
2180421804
}
21805+
case RISCVISD::VSLIDEDOWN_VL:
21806+
case RISCVISD::VSLIDEUP_VL:
21807+
if (N->getOperand(1)->isUndef())
21808+
return N->getOperand(0);
21809+
break;
2180521810
case RISCVISD::VSLIDE1UP_VL:
2180621811
case RISCVISD::VFSLIDE1UP_VL: {
2180721812
using namespace SDPatternMatch;
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -mtriple riscv32 -mattr=+v < %s | FileCheck %s
3+
; RUN: llc -mtriple riscv64 -mattr=+v < %s | FileCheck %s
4+
; RUN: llc -mtriple riscv32 -mattr=+v,+vl-dependent-latency < %s | FileCheck %s
5+
; RUN: llc -mtriple riscv64 -mattr=+v,+vl-dependent-latency < %s | FileCheck %s
6+
7+
define <4 x i32> @splice_v4i32_slidedown(<4 x i32> %a, <4 x i32> %b) {
8+
; CHECK-LABEL: splice_v4i32_slidedown:
9+
; CHECK: # %bb.0:
10+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
11+
; CHECK-NEXT: vrgather.vi v9, v8, 3
12+
; CHECK-NEXT: vmv.v.v v8, v9
13+
; CHECK-NEXT: ret
14+
%res = call <4 x i32> @llvm.vector.splice(<4 x i32> %a, <4 x i32> poison, i32 3)
15+
ret <4 x i32> %res
16+
}
17+
18+
define <4 x i32> @splice_4i32_slideup(<4 x i32> %a) {
19+
; CHECK-LABEL: splice_4i32_slideup:
20+
; CHECK: # %bb.0:
21+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
22+
; CHECK-NEXT: vrgather.vi v9, v8, 0
23+
; CHECK-NEXT: vmv.v.v v8, v9
24+
; CHECK-NEXT: ret
25+
%res = call <4 x i32> @llvm.vector.splice(<4 x i32> poison, <4 x i32> %a, i32 -3)
26+
ret <4 x i32> %res
27+
}
28+
29+
define <8 x i32> @splice_v8i32_slidedown(<8 x i32> %a, <8 x i32> %b) {
30+
; CHECK-LABEL: splice_v8i32_slidedown:
31+
; CHECK: # %bb.0:
32+
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
33+
; CHECK-NEXT: vslidedown.vi v8, v8, 3
34+
; CHECK-NEXT: ret
35+
%res = call <8 x i32> @llvm.vector.splice(<8 x i32> %a, <8 x i32> poison, i32 3)
36+
ret <8 x i32> %res
37+
}
38+
39+
define <8 x i32> @splice_v8i32_slideup(<8 x i32> %a) {
40+
; CHECK-LABEL: splice_v8i32_slideup:
41+
; CHECK: # %bb.0:
42+
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
43+
; CHECK-NEXT: vslideup.vi v10, v8, 3
44+
; CHECK-NEXT: vmv.v.v v8, v10
45+
; CHECK-NEXT: ret
46+
%res = call <8 x i32> @llvm.vector.splice(<8 x i32> poison, <8 x i32> %a, i32 -3)
47+
ret <8 x i32> %res
48+
}
49+
50+
define <4 x i32> @splice_v4i32_slidedown_undef(<4 x i32> %a, <4 x i32> %b) {
51+
; CHECK-LABEL: splice_v4i32_slidedown_undef:
52+
; CHECK: # %bb.0:
53+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
54+
; CHECK-NEXT: vrgather.vi v9, v8, 3
55+
; CHECK-NEXT: vmv.v.v v8, v9
56+
; CHECK-NEXT: ret
57+
%res = call <4 x i32> @llvm.vector.splice(<4 x i32> %a, <4 x i32> undef, i32 3)
58+
ret <4 x i32> %res
59+
}
60+
61+
define <4 x i32> @splice_4i32_slideup_undef(<4 x i32> %a) {
62+
; CHECK-LABEL: splice_4i32_slideup_undef:
63+
; CHECK: # %bb.0:
64+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
65+
; CHECK-NEXT: vrgather.vi v9, v8, 0
66+
; CHECK-NEXT: vmv.v.v v8, v9
67+
; CHECK-NEXT: ret
68+
%res = call <4 x i32> @llvm.vector.splice(<4 x i32> undef, <4 x i32> %a, i32 -3)
69+
ret <4 x i32> %res
70+
}
71+
72+
define <8 x i32> @splice_v8i32_slidedown_undef(<8 x i32> %a, <8 x i32> %b) {
73+
; CHECK-LABEL: splice_v8i32_slidedown_undef:
74+
; CHECK: # %bb.0:
75+
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
76+
; CHECK-NEXT: vslidedown.vi v8, v8, 3
77+
; CHECK-NEXT: ret
78+
%res = call <8 x i32> @llvm.vector.splice(<8 x i32> %a, <8 x i32> undef, i32 3)
79+
ret <8 x i32> %res
80+
}
81+
82+
define <8 x i32> @splice_v8i32_slideup_undef(<8 x i32> %a) {
83+
; CHECK-LABEL: splice_v8i32_slideup_undef:
84+
; CHECK: # %bb.0:
85+
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
86+
; CHECK-NEXT: vslideup.vi v10, v8, 3
87+
; CHECK-NEXT: vmv.v.v v8, v10
88+
; CHECK-NEXT: ret
89+
%res = call <8 x i32> @llvm.vector.splice(<8 x i32> undef, <8 x i32> %a, i32 -3)
90+
ret <8 x i32> %res
91+
}

llvm/test/CodeGen/RISCV/rvv/vector-splice.ll

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4247,4 +4247,64 @@ define <vscale x 8 x double> @splice_nxv8f64_offset_max(<vscale x 8 x double> %a
42474247
ret <vscale x 8 x double> %res
42484248
}
42494249

4250+
define <vscale x 2 x i32> @splice_nxv2i32_slidedown(<vscale x 2 x i32> %a) #0 {
4251+
; NOVLDEP-LABEL: splice_nxv2i32_slidedown:
4252+
; NOVLDEP: # %bb.0:
4253+
; NOVLDEP-NEXT: vsetvli a0, zero, e32, m1, ta, ma
4254+
; NOVLDEP-NEXT: vslidedown.vi v8, v8, 3
4255+
; NOVLDEP-NEXT: ret
4256+
;
4257+
; VLDEP-LABEL: splice_nxv2i32_slidedown:
4258+
; VLDEP: # %bb.0:
4259+
; VLDEP-NEXT: csrr a0, vlenb
4260+
; VLDEP-NEXT: srli a0, a0, 2
4261+
; VLDEP-NEXT: addi a0, a0, -3
4262+
; VLDEP-NEXT: vsetvli zero, a0, e32, m1, ta, ma
4263+
; VLDEP-NEXT: vslidedown.vi v8, v8, 3
4264+
; VLDEP-NEXT: ret
4265+
%res = call <vscale x 2 x i32> @llvm.vector.splice(<vscale x 2 x i32> %a, <vscale x 2 x i32> poison, i32 3)
4266+
ret <vscale x 2 x i32> %res
4267+
}
4268+
4269+
define <vscale x 2 x i32> @splice_nxv2i32_slideup(<vscale x 2 x i32> %a) #0 {
4270+
; CHECK-LABEL: splice_nxv2i32_slideup:
4271+
; CHECK: # %bb.0:
4272+
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
4273+
; CHECK-NEXT: vslideup.vi v9, v8, 3
4274+
; CHECK-NEXT: vmv.v.v v8, v9
4275+
; CHECK-NEXT: ret
4276+
%res = call <vscale x 2 x i32> @llvm.vector.splice(<vscale x 2 x i32> poison, <vscale x 2 x i32> %a, i32 -3)
4277+
ret <vscale x 2 x i32> %res
4278+
}
4279+
4280+
define <vscale x 2 x i32> @splice_nxv2i32_slidedown_undef(<vscale x 2 x i32> %a) #0 {
4281+
; NOVLDEP-LABEL: splice_nxv2i32_slidedown_undef:
4282+
; NOVLDEP: # %bb.0:
4283+
; NOVLDEP-NEXT: vsetvli a0, zero, e32, m1, ta, ma
4284+
; NOVLDEP-NEXT: vslidedown.vi v8, v8, 3
4285+
; NOVLDEP-NEXT: ret
4286+
;
4287+
; VLDEP-LABEL: splice_nxv2i32_slidedown_undef:
4288+
; VLDEP: # %bb.0:
4289+
; VLDEP-NEXT: csrr a0, vlenb
4290+
; VLDEP-NEXT: srli a0, a0, 2
4291+
; VLDEP-NEXT: addi a0, a0, -3
4292+
; VLDEP-NEXT: vsetvli zero, a0, e32, m1, ta, ma
4293+
; VLDEP-NEXT: vslidedown.vi v8, v8, 3
4294+
; VLDEP-NEXT: ret
4295+
%res = call <vscale x 2 x i32> @llvm.vector.splice(<vscale x 2 x i32> %a, <vscale x 2 x i32> undef, i32 3)
4296+
ret <vscale x 2 x i32> %res
4297+
}
4298+
4299+
define <vscale x 2 x i32> @splice_nxv2i32_slideup_undef(<vscale x 2 x i32> %a) #0 {
4300+
; CHECK-LABEL: splice_nxv2i32_slideup_undef:
4301+
; CHECK: # %bb.0:
4302+
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
4303+
; CHECK-NEXT: vslideup.vi v9, v8, 3
4304+
; CHECK-NEXT: vmv.v.v v8, v9
4305+
; CHECK-NEXT: ret
4306+
%res = call <vscale x 2 x i32> @llvm.vector.splice(<vscale x 2 x i32> undef, <vscale x 2 x i32> %a, i32 -3)
4307+
ret <vscale x 2 x i32> %res
4308+
}
4309+
42504310
attributes #0 = { vscale_range(2,0) }

0 commit comments

Comments
 (0)