Skip to content

[RISCV][VLOPT] Added support for vmv.s.x and vfmv.s.f #149562

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1059,6 +1059,11 @@ static bool isSupportedInstr(const MachineInstr &MI) {
case RISCV::VMSOF_M:
case RISCV::VIOTA_M:
case RISCV::VID_V:
// Vector Permutation Instructions
// Integer Scalar Move Instructions
// Floating-Point Scalar Move Instructions
case RISCV::VMV_S_X:
case RISCV::VFMV_S_F:
// Vector Slide Instructions
case RISCV::VSLIDEUP_VX:
case RISCV::VSLIDEUP_VI:
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/RISCV/double_reduct.ll
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ define i16 @add_ext_i16(<16 x i8> %a, <16 x i8> %b) {
define i16 @add_ext_v32i16(<32 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: add_ext_v32i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m1, ta, ma
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v11, zero
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vwredsumu.vs v10, v10, v11
Expand Down
3 changes: 2 additions & 1 deletion llvm/test/CodeGen/RISCV/machinelicm-constant-phys-reg.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@ define i32 @test(ptr %a, i64 %n) {
; CHECK-LABEL: test:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: li a3, 0
; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v8, zero
; CHECK-NEXT: .LBB0_1: # %loop
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl1re32.v v9, (a0)
; CHECK-NEXT: mv a2, a3
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

An extra vsetvli is being created here

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at the code for this example, this looks to be an inloop reduction which should be super rare in practice. For vadd, we really should be doing an out of loop reduction instead.

One thing that's worth investigating (but not blocking this review) is why for this case the extra vsetvli isn't being PREd into the loop header. If it was, even in this case the extra toggle would be loop invariant (and I think our backwards walk would kill the toggle anyways.).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIUC, doPRE doesn't handle the loop we see in this program, so it bails.

I'm writing a patch to change that, and removing the WIP from this PR, as changing doPRE is out of the scope of this PR.

; CHECK-NEXT: vredsum.vs v9, v9, v8
; CHECK-NEXT: vmv.x.s a3, v9
; CHECK-NEXT: addw a3, a3, a3
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1055,10 +1055,9 @@ define <512 x i8> @buildvec_not_vid_v512i8_indices_overflow_2() vscale_range(16,
define <8 x i32> @prefix_overwrite(<8 x i32> %vin, i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: prefix_overwrite:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m1, tu, ma
; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; CHECK-NEXT: vmv.s.x v10, a1
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; CHECK-NEXT: vslideup.vi v8, v10, 1
; CHECK-NEXT: vmv.s.x v10, a2
; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1174,7 +1174,7 @@ define double @vreduce_ord_fwadd_v32f64(ptr %x, double %s) {
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v24, fa0
; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma
; CHECK-NEXT: vslidedown.vi v16, v8, 16
Expand Down
15 changes: 5 additions & 10 deletions llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,8 @@ define <vscale x 4 x bfloat> @insertelt_nxv4bf16_idx(<vscale x 4 x bfloat> %v, b
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, a0, 1
; CHECK-NEXT: fmv.x.h a2, fa0
; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, a2
; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
; CHECK-NEXT: vmv.s.x v9, a2
; CHECK-NEXT: vslideup.vx v8, v9, a0
; CHECK-NEXT: ret
%r = insertelement <vscale x 4 x bfloat> %v, bfloat %elt, i32 %idx
Expand Down Expand Up @@ -388,19 +387,17 @@ define <vscale x 4 x half> @insertelt_nxv4f16_idx(<vscale x 4 x half> %v, half %
; ZVFH-LABEL: insertelt_nxv4f16_idx:
; ZVFH: # %bb.0:
; ZVFH-NEXT: addi a1, a0, 1
; ZVFH-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; ZVFH-NEXT: vfmv.s.f v9, fa0
; ZVFH-NEXT: vsetvli zero, a1, e16, m1, tu, ma
; ZVFH-NEXT: vfmv.s.f v9, fa0
; ZVFH-NEXT: vslideup.vx v8, v9, a0
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: insertelt_nxv4f16_idx:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: addi a1, a0, 1
; ZVFHMIN-NEXT: fmv.x.h a2, fa0
; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vmv.s.x v9, a2
; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m1, tu, ma
; ZVFHMIN-NEXT: vmv.s.x v9, a2
; ZVFHMIN-NEXT: vslideup.vx v8, v9, a0
; ZVFHMIN-NEXT: ret
%r = insertelement <vscale x 4 x half> %v, half %elt, i32 %idx
Expand Down Expand Up @@ -643,9 +640,8 @@ define <vscale x 2 x float> @insertelt_nxv2f32_idx(<vscale x 2 x float> %v, floa
; CHECK-LABEL: insertelt_nxv2f32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, a0, 1
; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vslideup.vx v8, v9, a0
; CHECK-NEXT: ret
%r = insertelement <vscale x 2 x float> %v, float %elt, i32 %idx
Expand Down Expand Up @@ -779,9 +775,8 @@ define <vscale x 1 x double> @insertelt_nxv1f64_idx(<vscale x 1 x double> %v, do
; CHECK-LABEL: insertelt_nxv1f64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, a0, 1
; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli zero, a1, e64, m1, tu, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vslideup.vx v8, v9, a0
; CHECK-NEXT: ret
%r = insertelement <vscale x 1 x double> %v, double %elt, i32 %idx
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,8 @@ define <vscale x 8 x i1> @insertelt_idx_nxv8i1(<vscale x 8 x i1> %x, i1 %elt, i6
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: addi a2, a1, 1
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: vsetvli zero, a2, e8, m1, tu, ma
; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: vslideup.vx v8, v9, a1
; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; CHECK-NEXT: vand.vi v8, v8, 1
Expand Down
9 changes: 3 additions & 6 deletions llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,8 @@ define <vscale x 8 x i8> @insertelt_nxv8i8_idx(<vscale x 8 x i8> %v, i8 signext
; CHECK-LABEL: insertelt_nxv8i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a2, a1, 1
; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: vsetvli zero, a2, e8, m1, tu, ma
; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: vslideup.vx v8, v9, a1
; CHECK-NEXT: ret
%r = insertelement <vscale x 8 x i8> %v, i8 %elt, i32 %idx
Expand Down Expand Up @@ -333,9 +332,8 @@ define <vscale x 4 x i16> @insertelt_nxv4i16_idx(<vscale x 4 x i16> %v, i16 sign
; CHECK-LABEL: insertelt_nxv4i16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a2, a1, 1
; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: vsetvli zero, a2, e16, m1, tu, ma
; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: vslideup.vx v8, v9, a1
; CHECK-NEXT: ret
%r = insertelement <vscale x 4 x i16> %v, i16 %elt, i32 %idx
Expand Down Expand Up @@ -503,9 +501,8 @@ define <vscale x 2 x i32> @insertelt_nxv2i32_idx(<vscale x 2 x i32> %v, i32 %elt
; CHECK-LABEL: insertelt_nxv2i32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a2, a1, 1
; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, ma
; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: vslideup.vx v8, v9, a1
; CHECK-NEXT: ret
%r = insertelement <vscale x 2 x i32> %v, i32 %elt, i32 %idx
Expand Down
9 changes: 3 additions & 6 deletions llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,8 @@ define <vscale x 8 x i8> @insertelt_nxv8i8_idx(<vscale x 8 x i8> %v, i8 signext
; CHECK-LABEL: insertelt_nxv8i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a2, a1, 1
; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: vsetvli zero, a2, e8, m1, tu, ma
; CHECK-NEXT: vmv.s.x v9, a0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's interesting that we now perform the vmv.s.x in tu policy. I thought that we didn't go from ta -> tu generally? Is the passthru in this vmv.s.x not poison? Either way I'm going out on a limb here and assuming this is profitable given we're avoiding a VL toggle

; CHECK-NEXT: vslideup.vx v8, v9, a1
; CHECK-NEXT: ret
%r = insertelement <vscale x 8 x i8> %v, i8 %elt, i32 %idx
Expand Down Expand Up @@ -333,9 +332,8 @@ define <vscale x 4 x i16> @insertelt_nxv4i16_idx(<vscale x 4 x i16> %v, i16 sign
; CHECK-LABEL: insertelt_nxv4i16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a2, a1, 1
; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: vsetvli zero, a2, e16, m1, tu, ma
; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: vslideup.vx v8, v9, a1
; CHECK-NEXT: ret
%r = insertelement <vscale x 4 x i16> %v, i16 %elt, i32 %idx
Expand Down Expand Up @@ -503,9 +501,8 @@ define <vscale x 2 x i32> @insertelt_nxv2i32_idx(<vscale x 2 x i32> %v, i32 sign
; CHECK-LABEL: insertelt_nxv2i32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a2, a1, 1
; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, ma
; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: vslideup.vx v8, v9, a1
; CHECK-NEXT: ret
%r = insertelement <vscale x 2 x i32> %v, i32 %elt, i32 %idx
Expand Down
32 changes: 16 additions & 16 deletions llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ define float @vreduce_ord_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
define float @vreduce_fwadd_nxv1f32(<vscale x 1 x half> %v, float %s) {
; CHECK-LABEL: vreduce_fwadd_nxv1f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v9
Expand All @@ -126,7 +126,7 @@ define float @vreduce_fwadd_nxv1f32(<vscale x 1 x half> %v, float %s) {
define float @vreduce_ord_fwadd_nxv1f32(<vscale x 1 x half> %v, float %s) {
; CHECK-LABEL: vreduce_ord_fwadd_nxv1f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v9
Expand Down Expand Up @@ -167,9 +167,9 @@ define float @vreduce_ord_fadd_nxv2f32(<vscale x 2 x float> %v, float %s) {
define float @vreduce_fwadd_nxv2f32(<vscale x 2 x half> %v, float %s) {
; CHECK-LABEL: vreduce_fwadd_nxv2f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v9
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
Expand All @@ -182,9 +182,9 @@ define float @vreduce_fwadd_nxv2f32(<vscale x 2 x half> %v, float %s) {
define float @vreduce_ord_fwadd_nxv2f32(<vscale x 2 x half> %v, float %s) {
; CHECK-LABEL: vreduce_ord_fwadd_nxv2f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v9
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
Expand Down Expand Up @@ -223,7 +223,7 @@ define float @vreduce_ord_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
define float @vreduce_fwadd_nxv4f32(<vscale x 4 x half> %v, float %s) {
; CHECK-LABEL: vreduce_fwadd_nxv4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v9
Expand All @@ -238,7 +238,7 @@ define float @vreduce_fwadd_nxv4f32(<vscale x 4 x half> %v, float %s) {
define float @vreduce_ord_fwadd_nxv4f32(<vscale x 4 x half> %v, float %s) {
; CHECK-LABEL: vreduce_ord_fwadd_nxv4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v9
Expand Down Expand Up @@ -279,9 +279,9 @@ define double @vreduce_ord_fadd_nxv1f64(<vscale x 1 x double> %v, double %s) {
define double @vreduce_fwadd_nxv1f64(<vscale x 1 x float> %v, double %s) {
; CHECK-LABEL: vreduce_fwadd_nxv1f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v9
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
Expand All @@ -294,9 +294,9 @@ define double @vreduce_fwadd_nxv1f64(<vscale x 1 x float> %v, double %s) {
define double @vreduce_ord_fwadd_nxv1f64(<vscale x 1 x float> %v, double %s) {
; CHECK-LABEL: vreduce_ord_fwadd_nxv1f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v9
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
Expand Down Expand Up @@ -335,7 +335,7 @@ define double @vreduce_ord_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
define double @vreduce_fwadd_nxv2f64(<vscale x 2 x float> %v, double %s) {
; CHECK-LABEL: vreduce_fwadd_nxv2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v9
Expand All @@ -350,7 +350,7 @@ define double @vreduce_fwadd_nxv2f64(<vscale x 2 x float> %v, double %s) {
define double @vreduce_ord_fwadd_nxv2f64(<vscale x 2 x float> %v, double %s) {
; CHECK-LABEL: vreduce_ord_fwadd_nxv2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v9
Expand Down Expand Up @@ -391,7 +391,7 @@ define double @vreduce_ord_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
define double @vreduce_fwadd_nxv4f64(<vscale x 4 x float> %v, double %s) {
; CHECK-LABEL: vreduce_fwadd_nxv4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v10
Expand All @@ -406,7 +406,7 @@ define double @vreduce_fwadd_nxv4f64(<vscale x 4 x float> %v, double %s) {
define double @vreduce_ord_fwadd_nxv4f64(<vscale x 4 x float> %v, double %s) {
; CHECK-LABEL: vreduce_ord_fwadd_nxv4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v10
Expand Down
Loading