From 2b54de9e42c185aa3eb51ad547f62223015b0556 Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Fri, 18 Jul 2025 15:52:19 -0300 Subject: [PATCH 1/6] Added support for vmv and vfmv Signed-off-by: Mikhail R. Gadelha --- llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 5 ++ llvm/test/CodeGen/RISCV/double_reduct.ll | 2 +- .../RISCV/machinelicm-constant-phys-reg.ll | 3 +- .../RISCV/rvv/fixed-vectors-int-buildvec.ll | 3 +- .../RISCV/rvv/fixed-vectors-reduction-fp.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll | 15 ++---- llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll | 2 +- .../CodeGen/RISCV/rvv/insertelt-int-rv32.ll | 9 ++-- .../CodeGen/RISCV/rvv/insertelt-int-rv64.ll | 9 ++-- .../RISCV/rvv/vreductions-fp-sdnode.ll | 32 ++++++------- .../test/CodeGen/RISCV/rvv/vreductions-int.ll | 48 +++++++++---------- 11 files changed, 62 insertions(+), 68 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index e656e8bb99d86..c4a1a2fa05d7d 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -1026,6 +1026,11 @@ static bool isSupportedInstr(const MachineInstr &MI) { case RISCV::VSSRA_VV: case RISCV::VSSRA_VX: case RISCV::VSSRA_VI: + // Vector Permutation Instructions + // Integer Scalar Move Instructions + // Floating-Point Scalar Move Instructions + case RISCV::VMV_S_X: + case RISCV::VFMV_S_F: // Vector Narrowing Fixed-Point Clip Instructions case RISCV::VNCLIPU_WV: case RISCV::VNCLIPU_WX: diff --git a/llvm/test/CodeGen/RISCV/double_reduct.ll b/llvm/test/CodeGen/RISCV/double_reduct.ll index cecdd77a079e4..4407e4de9143b 100644 --- a/llvm/test/CodeGen/RISCV/double_reduct.ll +++ b/llvm/test/CodeGen/RISCV/double_reduct.ll @@ -106,7 +106,7 @@ define i16 @add_ext_i16(<16 x i8> %a, <16 x i8> %b) { define i16 @add_ext_v32i16(<32 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: add_ext_v32i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v11, zero ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vwredsumu.vs v10, v10, v11 diff --git a/llvm/test/CodeGen/RISCV/machinelicm-constant-phys-reg.ll b/llvm/test/CodeGen/RISCV/machinelicm-constant-phys-reg.ll index 83e9bf661ab1c..ca0239b8319ad 100644 --- a/llvm/test/CodeGen/RISCV/machinelicm-constant-phys-reg.ll +++ b/llvm/test/CodeGen/RISCV/machinelicm-constant-phys-reg.ll @@ -7,13 +7,14 @@ define i32 @test(ptr %a, i64 %n) { ; CHECK-LABEL: test: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v8, zero ; CHECK-NEXT: .LBB0_1: # %loop ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl1re32.v v9, (a0) ; CHECK-NEXT: mv a2, a3 ; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma ; CHECK-NEXT: vredsum.vs v9, v9, v8 ; CHECK-NEXT: vmv.x.s a3, v9 ; CHECK-NEXT: addw a3, a3, a3 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll index d9bb007a10f71..87e4fe26c43db 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -1055,10 +1055,9 @@ define <512 x i8> @buildvec_not_vid_v512i8_indices_overflow_2() vscale_range(16, define <8 x i32> @prefix_overwrite(<8 x i32> %vin, i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-LABEL: prefix_overwrite: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m1, tu, ma +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; CHECK-NEXT: vmv.s.x v10, a1 ; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 1 ; CHECK-NEXT: vmv.s.x v10, a2 ; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll index a426f8c619e99..68f2e44143981 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll @@ -1174,7 +1174,7 @@ define double @vreduce_ord_fwadd_v32f64(ptr %x, double %s) { ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v24, fa0 ; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; CHECK-NEXT: vslidedown.vi v16, v8, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll index 607e0085c3f46..acc99cf4a5258 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll @@ -110,9 +110,8 @@ define @insertelt_nxv4bf16_idx( %v, b ; CHECK: # %bb.0: ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: fmv.x.h a2, fa0 -; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a2 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma +; CHECK-NEXT: vmv.s.x v9, a2 ; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: ret %r = insertelement %v, bfloat %elt, i32 %idx @@ -388,9 +387,8 @@ define @insertelt_nxv4f16_idx( %v, half % ; ZVFH-LABEL: insertelt_nxv4f16_idx: ; ZVFH: # %bb.0: ; ZVFH-NEXT: addi a1, a0, 1 -; ZVFH-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; ZVFH-NEXT: vfmv.s.f v9, fa0 ; ZVFH-NEXT: vsetvli zero, a1, e16, m1, tu, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 ; ZVFH-NEXT: vslideup.vx v8, v9, a0 ; ZVFH-NEXT: ret ; @@ -398,9 +396,8 @@ define @insertelt_nxv4f16_idx( %v, half % ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: addi a1, a0, 1 ; ZVFHMIN-NEXT: fmv.x.h a2, fa0 -; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vmv.s.x v9, a2 ; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m1, tu, ma +; ZVFHMIN-NEXT: vmv.s.x v9, a2 ; ZVFHMIN-NEXT: vslideup.vx v8, v9, a0 ; ZVFHMIN-NEXT: ret %r = insertelement %v, half %elt, i32 %idx @@ -643,9 +640,8 @@ define @insertelt_nxv2f32_idx( %v, floa ; CHECK-LABEL: insertelt_nxv2f32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: addi a1, a0, 1 -; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: ret %r = insertelement %v, float %elt, i32 %idx @@ -779,9 +775,8 @@ define @insertelt_nxv1f64_idx( %v, do ; CHECK-LABEL: insertelt_nxv1f64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: addi a1, a0, 1 -; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, tu, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: ret %r = insertelement %v, double %elt, i32 %idx diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll index 7f57f4fd22ff5..81c9e3401319d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll @@ -131,8 +131,8 @@ define @insertelt_idx_nxv8i1( %x, i1 %elt, i6 ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: addi a2, a1, 1 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetvli zero, a2, e8, m1, tu, ma +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll index c3cc90c6a8de3..8e1fc63c304be 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll @@ -129,9 +129,8 @@ define @insertelt_nxv8i8_idx( %v, i8 signext ; CHECK-LABEL: insertelt_nxv8i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: addi a2, a1, 1 -; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetvli zero, a2, e8, m1, tu, ma +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx @@ -333,9 +332,8 @@ define @insertelt_nxv4i16_idx( %v, i16 sign ; CHECK-LABEL: insertelt_nxv4i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: addi a2, a1, 1 -; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetvli zero, a2, e16, m1, tu, ma +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 %idx @@ -503,9 +501,8 @@ define @insertelt_nxv2i32_idx( %v, i32 %elt ; CHECK-LABEL: insertelt_nxv2i32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: addi a2, a1, 1 -; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, ma +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 %idx diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll index 0e43cbf0f4518..dedbd5928f78f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll @@ -129,9 +129,8 @@ define @insertelt_nxv8i8_idx( %v, i8 signext ; CHECK-LABEL: insertelt_nxv8i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: addi a2, a1, 1 -; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetvli zero, a2, e8, m1, tu, ma +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx @@ -333,9 +332,8 @@ define @insertelt_nxv4i16_idx( %v, i16 sign ; CHECK-LABEL: insertelt_nxv4i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: addi a2, a1, 1 -; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetvli zero, a2, e16, m1, tu, ma +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 %idx @@ -503,9 +501,8 @@ define @insertelt_nxv2i32_idx( %v, i32 sign ; CHECK-LABEL: insertelt_nxv2i32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: addi a2, a1, 1 -; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, ma +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 %idx diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll index 78aae96242fd3..49c42e5b8f7e1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll @@ -111,7 +111,7 @@ define float @vreduce_ord_fadd_nxv1f32( %v, float %s) { define float @vreduce_fwadd_nxv1f32( %v, float %s) { ; CHECK-LABEL: vreduce_fwadd_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 @@ -126,7 +126,7 @@ define float @vreduce_fwadd_nxv1f32( %v, float %s) { define float @vreduce_ord_fwadd_nxv1f32( %v, float %s) { ; CHECK-LABEL: vreduce_ord_fwadd_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 @@ -167,9 +167,9 @@ define float @vreduce_ord_fadd_nxv2f32( %v, float %s) { define float @vreduce_fwadd_nxv2f32( %v, float %s) { ; CHECK-LABEL: vreduce_fwadd_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -182,9 +182,9 @@ define float @vreduce_fwadd_nxv2f32( %v, float %s) { define float @vreduce_ord_fwadd_nxv2f32( %v, float %s) { ; CHECK-LABEL: vreduce_ord_fwadd_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -223,7 +223,7 @@ define float @vreduce_ord_fadd_nxv4f32( %v, float %s) { define float @vreduce_fwadd_nxv4f32( %v, float %s) { ; CHECK-LABEL: vreduce_fwadd_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 @@ -238,7 +238,7 @@ define float @vreduce_fwadd_nxv4f32( %v, float %s) { define float @vreduce_ord_fwadd_nxv4f32( %v, float %s) { ; CHECK-LABEL: vreduce_ord_fwadd_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 @@ -279,9 +279,9 @@ define double @vreduce_ord_fadd_nxv1f64( %v, double %s) { define double @vreduce_fwadd_nxv1f64( %v, double %s) { ; CHECK-LABEL: vreduce_fwadd_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -294,9 +294,9 @@ define double @vreduce_fwadd_nxv1f64( %v, double %s) { define double @vreduce_ord_fwadd_nxv1f64( %v, double %s) { ; CHECK-LABEL: vreduce_ord_fwadd_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -335,7 +335,7 @@ define double @vreduce_ord_fadd_nxv2f64( %v, double %s) { define double @vreduce_fwadd_nxv2f64( %v, double %s) { ; CHECK-LABEL: vreduce_fwadd_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 @@ -350,7 +350,7 @@ define double @vreduce_fwadd_nxv2f64( %v, double %s) { define double @vreduce_ord_fwadd_nxv2f64( %v, double %s) { ; CHECK-LABEL: vreduce_ord_fwadd_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 @@ -391,7 +391,7 @@ define double @vreduce_ord_fadd_nxv4f64( %v, double %s) { define double @vreduce_fwadd_nxv4f64( %v, double %s) { ; CHECK-LABEL: vreduce_fwadd_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v10 @@ -406,7 +406,7 @@ define double @vreduce_fwadd_nxv4f64( %v, double %s) { define double @vreduce_ord_fwadd_nxv4f64( %v, double %s) { ; CHECK-LABEL: vreduce_ord_fwadd_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v10 diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll index fac5e31ecf94e..863876776fabc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll @@ -339,7 +339,7 @@ define signext i16 @vreduce_add_nxv1i16( %v) { define signext i16 @vwreduce_add_nxv1i8( %v) { ; CHECK-LABEL: vwreduce_add_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 @@ -354,7 +354,7 @@ define signext i16 @vwreduce_add_nxv1i8( %v) { define signext i16 @vwreduce_uadd_nxv1i8( %v) { ; CHECK-LABEL: vwreduce_uadd_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 @@ -475,7 +475,7 @@ define signext i16 @vreduce_add_nxv2i16( %v) { define signext i16 @vwreduce_add_nxv2i8( %v) { ; CHECK-LABEL: vwreduce_add_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 @@ -490,7 +490,7 @@ define signext i16 @vwreduce_add_nxv2i8( %v) { define signext i16 @vwreduce_uadd_nxv2i8( %v) { ; CHECK-LABEL: vwreduce_uadd_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 @@ -611,9 +611,9 @@ define signext i16 @vreduce_add_nxv4i16( %v) { define signext i16 @vwreduce_add_nxv4i8( %v) { ; CHECK-LABEL: vwreduce_add_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -626,9 +626,9 @@ define signext i16 @vwreduce_add_nxv4i8( %v) { define signext i16 @vwreduce_uadd_nxv4i8( %v) { ; CHECK-LABEL: vwreduce_uadd_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -747,7 +747,7 @@ define signext i32 @vreduce_add_nxv1i32( %v) { define signext i32 @vwreduce_add_nxv1i16( %v) { ; CHECK-LABEL: vwreduce_add_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 @@ -762,7 +762,7 @@ define signext i32 @vwreduce_add_nxv1i16( %v) { define signext i32 @vwreduce_uadd_nxv1i16( %v) { ; CHECK-LABEL: vwreduce_uadd_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 @@ -883,9 +883,9 @@ define signext i32 @vreduce_add_nxv2i32( %v) { define signext i32 @vwreduce_add_nxv2i16( %v) { ; CHECK-LABEL: vwreduce_add_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -898,9 +898,9 @@ define signext i32 @vwreduce_add_nxv2i16( %v) { define signext i32 @vwreduce_uadd_nxv2i16( %v) { ; CHECK-LABEL: vwreduce_uadd_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -1019,7 +1019,7 @@ define signext i32 @vreduce_add_nxv4i32( %v) { define signext i32 @vwreduce_add_nxv4i16( %v) { ; CHECK-LABEL: vwreduce_add_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 @@ -1034,7 +1034,7 @@ define signext i32 @vwreduce_add_nxv4i16( %v) { define signext i32 @vwreduce_uadd_nxv4i16( %v) { ; CHECK-LABEL: vwreduce_uadd_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 @@ -1180,9 +1180,9 @@ define i64 @vwreduce_add_nxv1i32( %v) { ; ; RV64-LABEL: vwreduce_add_nxv1i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v9, zero -; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; RV64-NEXT: vwredsum.vs v8, v8, v9 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 @@ -1208,9 +1208,9 @@ define i64 @vwreduce_uadd_nxv1i32( %v) { ; ; RV64-LABEL: vwreduce_uadd_nxv1i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v9, zero -; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; RV64-NEXT: vwredsumu.vs v8, v8, v9 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 @@ -1432,7 +1432,7 @@ define i64 @vwreduce_add_nxv2i32( %v) { ; ; RV64-LABEL: vwreduce_add_nxv2i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v9, zero ; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; RV64-NEXT: vwredsum.vs v8, v8, v9 @@ -1460,7 +1460,7 @@ define i64 @vwreduce_uadd_nxv2i32( %v) { ; ; RV64-LABEL: vwreduce_uadd_nxv2i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v9, zero ; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; RV64-NEXT: vwredsumu.vs v8, v8, v9 @@ -1684,7 +1684,7 @@ define i64 @vwreduce_add_nxv4i32( %v) { ; ; RV64-LABEL: vwreduce_add_nxv4i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, zero ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; RV64-NEXT: vwredsum.vs v8, v8, v10 @@ -1712,7 +1712,7 @@ define i64 @vwreduce_uadd_nxv4i32( %v) { ; ; RV64-LABEL: vwreduce_uadd_nxv4i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, zero ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; RV64-NEXT: vwredsumu.vs v8, v8, v10 From a703c3be7524aac57e746f4dec48923efbe7871d Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Thu, 24 Jul 2025 16:19:12 -0300 Subject: [PATCH 2/6] Check for loops Signed-off-by: Mikhail R. Gadelha --- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 63 +++++++++++++++++-- llvm/test/CodeGen/AArch64/misched-cutoff.mir | 2 +- .../RISCV/machinelicm-constant-phys-reg.ll | 3 +- .../CodeGen/RISCV/rvv/sink-splat-operands.ll | 4 +- 4 files changed, 63 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index 90e1c47a71c89..185cc61e7c41e 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -943,8 +943,10 @@ RISCVInsertVSETVLI::getInfoForVSETVLI(const MachineInstr &MI) const { NewInfo.setAVLImm(MI.getOperand(1).getImm()); } else { assert(MI.getOpcode() == RISCV::PseudoVSETVLI || - MI.getOpcode() == RISCV::PseudoVSETVLIX0); - if (MI.getOpcode() == RISCV::PseudoVSETVLIX0) + MI.getOpcode() == RISCV::PseudoVSETVLIX0 || + MI.getOpcode() == RISCV::PseudoVSETVLIX0X0); + if (MI.getOpcode() == RISCV::PseudoVSETVLIX0 || + MI.getOpcode() == RISCV::PseudoVSETVLIX0X0) NewInfo.setAVLVLMAX(); else if (MI.getOperand(1).isUndef()) // Otherwise use an AVL of 1 to avoid depending on previous vl. @@ -1511,12 +1513,21 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { /// this is geared to catch the common case of a fixed length vsetvl in a single /// block loop when it could execute once in the preheader instead. void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { + // Only works for either one predecessor, or two predecessors if it's a loop + if (MBB.pred_empty() && MBB.pred_size() > 2) + return; + if (!BlockInfo[MBB.getNumber()].Pred.isUnknown()) return; + bool isLoop = false; + MachineBasicBlock *UnavailablePred = nullptr; VSETVLIInfo AvailableInfo; + MachineBasicBlock *PreviousPred = nullptr; for (MachineBasicBlock *P : MBB.predecessors()) { + isLoop |= (P == &MBB); + const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit; if (PredInfo.isUnknown()) { if (UnavailablePred) @@ -1525,8 +1536,24 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { } else if (!AvailableInfo.isValid()) { AvailableInfo = PredInfo; } else if (AvailableInfo != PredInfo) { - return; + if (!isLoop) + return; + + DemandedFields PREDemands; + PREDemands.demandVTYPE(); + + if (!PredInfo.isCompatible(PREDemands, AvailableInfo, LIS)) + return; + + // States are VTYPE-compatible, prefer the more general state + // Choose VLMAX over immediate when both are tail-agnostic + if (PredInfo.hasAVLVLMAX() && AvailableInfo.hasAVLImm()) { + AvailableInfo = PredInfo; + UnavailablePred = PreviousPred; + } } + + PreviousPred = P; } // Unreachable, single pred, or full redundancy. Note that FRE is handled by @@ -1543,7 +1570,7 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { return; // Critical edge - TODO: consider splitting? - if (UnavailablePred->succ_size() != 1) + if (UnavailablePred->succ_size() != 1 && !isLoop) return; // If the AVL value is a register (other than our VLMAX sentinel), @@ -1571,21 +1598,49 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { VSETVLIInfo OldInfo = BlockInfo[MBB.getNumber()].Pred; VSETVLIInfo CurInfo = AvailableInfo; int TransitionsRemoved = 0; + + LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to " + << UnavailablePred->getName() << "\n" + << " Old state: " << OldInfo << "\n" + << " New state: " << CurInfo << "\n"); + for (const MachineInstr &MI : MBB) { + if (RISCVII::hasSEWOp(MI.getDesc().TSFlags)) + if (!hasUndefinedPassthru(MI)) + return; // Unsafe to change VL/VTYPE for this loop. + const VSETVLIInfo LastInfo = CurInfo; const VSETVLIInfo LastOldInfo = OldInfo; transferBefore(CurInfo, MI); transferBefore(OldInfo, MI); + + LLVM_DEBUG(dbgs() << "PRE VSETVLI 1 from " << MBB.getName() << " to " + << UnavailablePred->getName() << "\n" + << " Old state: " << OldInfo << "\n" + << " New state: " << CurInfo << "\n"); + if (CurInfo == LastInfo) TransitionsRemoved++; if (LastOldInfo == OldInfo) TransitionsRemoved--; transferAfter(CurInfo, MI); transferAfter(OldInfo, MI); + + LLVM_DEBUG(dbgs() << "PRE VSETVLI 2 from " << MBB.getName() << " to " + << UnavailablePred->getName() << "\n" + << " Old state: " << OldInfo << "\n" + << " New state: " << CurInfo << "\n\n"); + if (CurInfo == OldInfo) // Convergence. All transitions after this must match by construction. break; } + + LLVM_DEBUG(dbgs() << "PRE VSETVLI 3 from " << MBB.getName() << " to " + << UnavailablePred->getName() << "\n" + << " Old state: " << OldInfo << "\n" + << " New state: " << CurInfo << "\n"); + if (CurInfo != OldInfo || TransitionsRemoved <= 0) // Issues 1 and 2 above return; diff --git a/llvm/test/CodeGen/AArch64/misched-cutoff.mir b/llvm/test/CodeGen/AArch64/misched-cutoff.mir index a61eb64318a39..94c94f51d3f4d 100644 --- a/llvm/test/CodeGen/AArch64/misched-cutoff.mir +++ b/llvm/test/CodeGen/AArch64/misched-cutoff.mir @@ -42,8 +42,8 @@ body: | ; CHECK-CUTOFF: liveins: $w1, $x0 ; CHECK-CUTOFF-NEXT: {{ $}} ; CHECK-CUTOFF-NEXT: $w8 = LDRWui $x0, 1, implicit-def $x8 :: (load (s32) from %ir.0) - ; CHECK-CUTOFF-NEXT: STRWui $w1, $x0, 2 :: (store (s32) into %ir.arrayidx1) ; CHECK-CUTOFF-NEXT: $w9 = LDRWui $x0, 0, implicit-def $x9 :: (load (s32) from %ir.arrayidx19, align 8) + ; CHECK-CUTOFF-NEXT: STRWui $w1, $x0, 2 :: (store (s32) into %ir.arrayidx1) ; CHECK-CUTOFF-NEXT: $x0 = ADDXrr killed $x9, killed $x8 ; CHECK-CUTOFF-NEXT: RET_ReallyLR implicit $x0 $w8 = LDRWui $x0, 1, implicit-def $x8 :: (load (s32) from %ir.0) diff --git a/llvm/test/CodeGen/RISCV/machinelicm-constant-phys-reg.ll b/llvm/test/CodeGen/RISCV/machinelicm-constant-phys-reg.ll index ca0239b8319ad..83e9bf661ab1c 100644 --- a/llvm/test/CodeGen/RISCV/machinelicm-constant-phys-reg.ll +++ b/llvm/test/CodeGen/RISCV/machinelicm-constant-phys-reg.ll @@ -7,14 +7,13 @@ define i32 @test(ptr %a, i64 %n) { ; CHECK-LABEL: test: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v8, zero ; CHECK-NEXT: .LBB0_1: # %loop ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl1re32.v v9, (a0) ; CHECK-NEXT: mv a2, a3 ; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma ; CHECK-NEXT: vredsum.vs v9, v9, v8 ; CHECK-NEXT: vmv.x.s a3, v9 ; CHECK-NEXT: addw a3, a3, a3 diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll index c216fb65a6a5b..f057396b7f0f6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll @@ -2087,13 +2087,13 @@ define void @sink_splat_fma_scalable(ptr noalias nocapture %a, ptr noalias nocap ; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: mv a7, a1 ; CHECK-NEXT: mv t0, a3 -; CHECK-NEXT: vsetvli t1, zero, e32, m1, ta, ma ; CHECK-NEXT: .LBB34_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl1re32.v v8, (a6) ; CHECK-NEXT: vl1re32.v v9, (a7) ; CHECK-NEXT: sub t0, t0, a4 ; CHECK-NEXT: add a7, a7, a2 +; CHECK-NEXT: vsetvli t1, zero, e32, m1, ta, ma ; CHECK-NEXT: vfmacc.vf v9, fa0, v8 ; CHECK-NEXT: vs1r.v v9, (a6) ; CHECK-NEXT: add a6, a6, a2 @@ -2187,13 +2187,13 @@ define void @sink_splat_fma_commute_scalable(ptr noalias nocapture %a, ptr noali ; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: mv a7, a1 ; CHECK-NEXT: mv t0, a3 -; CHECK-NEXT: vsetvli t1, zero, e32, m1, ta, ma ; CHECK-NEXT: .LBB35_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl1re32.v v8, (a6) ; CHECK-NEXT: vl1re32.v v9, (a7) ; CHECK-NEXT: sub t0, t0, a4 ; CHECK-NEXT: add a7, a7, a2 +; CHECK-NEXT: vsetvli t1, zero, e32, m1, ta, ma ; CHECK-NEXT: vfmacc.vf v9, fa0, v8 ; CHECK-NEXT: vs1r.v v9, (a6) ; CHECK-NEXT: add a6, a6, a2 From 54b51074a1e37d0f012c7598a70bab7a4280ebe3 Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Sun, 27 Jul 2025 16:06:35 -0300 Subject: [PATCH 3/6] Slightly improve on the code Signed-off-by: Mikhail R. Gadelha --- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 25 +++++++++++--------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index 185cc61e7c41e..ec2a1a286156d 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -1513,21 +1513,17 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { /// this is geared to catch the common case of a fixed length vsetvl in a single /// block loop when it could execute once in the preheader instead. void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { - // Only works for either one predecessor, or two predecessors if it's a loop - if (MBB.pred_empty() && MBB.pred_size() > 2) + // We need a prepredecessor to move the VSETVLI to + if (MBB.pred_empty()) return; if (!BlockInfo[MBB.getNumber()].Pred.isUnknown()) return; - bool isLoop = false; - MachineBasicBlock *UnavailablePred = nullptr; VSETVLIInfo AvailableInfo; MachineBasicBlock *PreviousPred = nullptr; for (MachineBasicBlock *P : MBB.predecessors()) { - isLoop |= (P == &MBB); - const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit; if (PredInfo.isUnknown()) { if (UnavailablePred) @@ -1536,9 +1532,6 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { } else if (!AvailableInfo.isValid()) { AvailableInfo = PredInfo; } else if (AvailableInfo != PredInfo) { - if (!isLoop) - return; - DemandedFields PREDemands; PREDemands.demandVTYPE(); @@ -1553,7 +1546,17 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { } } - PreviousPred = P; + // Filter out loops + SmallVector Preds = {MBB.predecessors().begin(), + MBB.predecessors().end()}; + Preds.erase(std::remove_if(Preds.begin(), Preds.end(), + [&](MachineBasicBlock *P) { return P == &MBB; }), + Preds.end()); + + // Only works for one Pred for now + if (Preds.size() != 1) + return; + PreviousPred = *Preds.begin(); } // Unreachable, single pred, or full redundancy. Note that FRE is handled by @@ -1570,7 +1573,7 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { return; // Critical edge - TODO: consider splitting? - if (UnavailablePred->succ_size() != 1 && !isLoop) + if (UnavailablePred->succ_size() != 1) return; // If the AVL value is a register (other than our VLMAX sentinel), From 01b806b3187507b2503161c7b8efb3703977a51e Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Sun, 27 Jul 2025 17:59:51 -0300 Subject: [PATCH 4/6] Revert "Slightly improve on the code" This reverts commit 54b51074a1e37d0f012c7598a70bab7a4280ebe3. --- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 25 +++++++++----------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index ec2a1a286156d..185cc61e7c41e 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -1513,17 +1513,21 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { /// this is geared to catch the common case of a fixed length vsetvl in a single /// block loop when it could execute once in the preheader instead. void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { - // We need a prepredecessor to move the VSETVLI to - if (MBB.pred_empty()) + // Only works for either one predecessor, or two predecessors if it's a loop + if (MBB.pred_empty() && MBB.pred_size() > 2) return; if (!BlockInfo[MBB.getNumber()].Pred.isUnknown()) return; + bool isLoop = false; + MachineBasicBlock *UnavailablePred = nullptr; VSETVLIInfo AvailableInfo; MachineBasicBlock *PreviousPred = nullptr; for (MachineBasicBlock *P : MBB.predecessors()) { + isLoop |= (P == &MBB); + const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit; if (PredInfo.isUnknown()) { if (UnavailablePred) @@ -1532,6 +1536,9 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { } else if (!AvailableInfo.isValid()) { AvailableInfo = PredInfo; } else if (AvailableInfo != PredInfo) { + if (!isLoop) + return; + DemandedFields PREDemands; PREDemands.demandVTYPE(); @@ -1546,17 +1553,7 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { } } - // Filter out loops - SmallVector Preds = {MBB.predecessors().begin(), - MBB.predecessors().end()}; - Preds.erase(std::remove_if(Preds.begin(), Preds.end(), - [&](MachineBasicBlock *P) { return P == &MBB; }), - Preds.end()); - - // Only works for one Pred for now - if (Preds.size() != 1) - return; - PreviousPred = *Preds.begin(); + PreviousPred = P; } // Unreachable, single pred, or full redundancy. Note that FRE is handled by @@ -1573,7 +1570,7 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { return; // Critical edge - TODO: consider splitting? - if (UnavailablePred->succ_size() != 1) + if (UnavailablePred->succ_size() != 1 && !isLoop) return; // If the AVL value is a register (other than our VLMAX sentinel), From f6add81a5f63d525dfaeade7df0f815b17dbbec2 Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Sun, 27 Jul 2025 18:01:34 -0300 Subject: [PATCH 5/6] Revert "Check for loops" This reverts commit a703c3be7524aac57e746f4dec48923efbe7871d. --- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 63 ++----------------- llvm/test/CodeGen/AArch64/misched-cutoff.mir | 2 +- .../RISCV/machinelicm-constant-phys-reg.ll | 3 +- .../CodeGen/RISCV/rvv/sink-splat-operands.ll | 4 +- 4 files changed, 9 insertions(+), 63 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index 185cc61e7c41e..90e1c47a71c89 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -943,10 +943,8 @@ RISCVInsertVSETVLI::getInfoForVSETVLI(const MachineInstr &MI) const { NewInfo.setAVLImm(MI.getOperand(1).getImm()); } else { assert(MI.getOpcode() == RISCV::PseudoVSETVLI || - MI.getOpcode() == RISCV::PseudoVSETVLIX0 || - MI.getOpcode() == RISCV::PseudoVSETVLIX0X0); - if (MI.getOpcode() == RISCV::PseudoVSETVLIX0 || - MI.getOpcode() == RISCV::PseudoVSETVLIX0X0) + MI.getOpcode() == RISCV::PseudoVSETVLIX0); + if (MI.getOpcode() == RISCV::PseudoVSETVLIX0) NewInfo.setAVLVLMAX(); else if (MI.getOperand(1).isUndef()) // Otherwise use an AVL of 1 to avoid depending on previous vl. @@ -1513,21 +1511,12 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { /// this is geared to catch the common case of a fixed length vsetvl in a single /// block loop when it could execute once in the preheader instead. void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { - // Only works for either one predecessor, or two predecessors if it's a loop - if (MBB.pred_empty() && MBB.pred_size() > 2) - return; - if (!BlockInfo[MBB.getNumber()].Pred.isUnknown()) return; - bool isLoop = false; - MachineBasicBlock *UnavailablePred = nullptr; VSETVLIInfo AvailableInfo; - MachineBasicBlock *PreviousPred = nullptr; for (MachineBasicBlock *P : MBB.predecessors()) { - isLoop |= (P == &MBB); - const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit; if (PredInfo.isUnknown()) { if (UnavailablePred) @@ -1536,24 +1525,8 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { } else if (!AvailableInfo.isValid()) { AvailableInfo = PredInfo; } else if (AvailableInfo != PredInfo) { - if (!isLoop) - return; - - DemandedFields PREDemands; - PREDemands.demandVTYPE(); - - if (!PredInfo.isCompatible(PREDemands, AvailableInfo, LIS)) - return; - - // States are VTYPE-compatible, prefer the more general state - // Choose VLMAX over immediate when both are tail-agnostic - if (PredInfo.hasAVLVLMAX() && AvailableInfo.hasAVLImm()) { - AvailableInfo = PredInfo; - UnavailablePred = PreviousPred; - } + return; } - - PreviousPred = P; } // Unreachable, single pred, or full redundancy. Note that FRE is handled by @@ -1570,7 +1543,7 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { return; // Critical edge - TODO: consider splitting? - if (UnavailablePred->succ_size() != 1 && !isLoop) + if (UnavailablePred->succ_size() != 1) return; // If the AVL value is a register (other than our VLMAX sentinel), @@ -1598,49 +1571,21 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { VSETVLIInfo OldInfo = BlockInfo[MBB.getNumber()].Pred; VSETVLIInfo CurInfo = AvailableInfo; int TransitionsRemoved = 0; - - LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to " - << UnavailablePred->getName() << "\n" - << " Old state: " << OldInfo << "\n" - << " New state: " << CurInfo << "\n"); - for (const MachineInstr &MI : MBB) { - if (RISCVII::hasSEWOp(MI.getDesc().TSFlags)) - if (!hasUndefinedPassthru(MI)) - return; // Unsafe to change VL/VTYPE for this loop. - const VSETVLIInfo LastInfo = CurInfo; const VSETVLIInfo LastOldInfo = OldInfo; transferBefore(CurInfo, MI); transferBefore(OldInfo, MI); - - LLVM_DEBUG(dbgs() << "PRE VSETVLI 1 from " << MBB.getName() << " to " - << UnavailablePred->getName() << "\n" - << " Old state: " << OldInfo << "\n" - << " New state: " << CurInfo << "\n"); - if (CurInfo == LastInfo) TransitionsRemoved++; if (LastOldInfo == OldInfo) TransitionsRemoved--; transferAfter(CurInfo, MI); transferAfter(OldInfo, MI); - - LLVM_DEBUG(dbgs() << "PRE VSETVLI 2 from " << MBB.getName() << " to " - << UnavailablePred->getName() << "\n" - << " Old state: " << OldInfo << "\n" - << " New state: " << CurInfo << "\n\n"); - if (CurInfo == OldInfo) // Convergence. All transitions after this must match by construction. break; } - - LLVM_DEBUG(dbgs() << "PRE VSETVLI 3 from " << MBB.getName() << " to " - << UnavailablePred->getName() << "\n" - << " Old state: " << OldInfo << "\n" - << " New state: " << CurInfo << "\n"); - if (CurInfo != OldInfo || TransitionsRemoved <= 0) // Issues 1 and 2 above return; diff --git a/llvm/test/CodeGen/AArch64/misched-cutoff.mir b/llvm/test/CodeGen/AArch64/misched-cutoff.mir index 94c94f51d3f4d..a61eb64318a39 100644 --- a/llvm/test/CodeGen/AArch64/misched-cutoff.mir +++ b/llvm/test/CodeGen/AArch64/misched-cutoff.mir @@ -42,8 +42,8 @@ body: | ; CHECK-CUTOFF: liveins: $w1, $x0 ; CHECK-CUTOFF-NEXT: {{ $}} ; CHECK-CUTOFF-NEXT: $w8 = LDRWui $x0, 1, implicit-def $x8 :: (load (s32) from %ir.0) - ; CHECK-CUTOFF-NEXT: $w9 = LDRWui $x0, 0, implicit-def $x9 :: (load (s32) from %ir.arrayidx19, align 8) ; CHECK-CUTOFF-NEXT: STRWui $w1, $x0, 2 :: (store (s32) into %ir.arrayidx1) + ; CHECK-CUTOFF-NEXT: $w9 = LDRWui $x0, 0, implicit-def $x9 :: (load (s32) from %ir.arrayidx19, align 8) ; CHECK-CUTOFF-NEXT: $x0 = ADDXrr killed $x9, killed $x8 ; CHECK-CUTOFF-NEXT: RET_ReallyLR implicit $x0 $w8 = LDRWui $x0, 1, implicit-def $x8 :: (load (s32) from %ir.0) diff --git a/llvm/test/CodeGen/RISCV/machinelicm-constant-phys-reg.ll b/llvm/test/CodeGen/RISCV/machinelicm-constant-phys-reg.ll index 83e9bf661ab1c..ca0239b8319ad 100644 --- a/llvm/test/CodeGen/RISCV/machinelicm-constant-phys-reg.ll +++ b/llvm/test/CodeGen/RISCV/machinelicm-constant-phys-reg.ll @@ -7,13 +7,14 @@ define i32 @test(ptr %a, i64 %n) { ; CHECK-LABEL: test: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v8, zero ; CHECK-NEXT: .LBB0_1: # %loop ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl1re32.v v9, (a0) ; CHECK-NEXT: mv a2, a3 ; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma ; CHECK-NEXT: vredsum.vs v9, v9, v8 ; CHECK-NEXT: vmv.x.s a3, v9 ; CHECK-NEXT: addw a3, a3, a3 diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll index f057396b7f0f6..c216fb65a6a5b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll @@ -2087,13 +2087,13 @@ define void @sink_splat_fma_scalable(ptr noalias nocapture %a, ptr noalias nocap ; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: mv a7, a1 ; CHECK-NEXT: mv t0, a3 +; CHECK-NEXT: vsetvli t1, zero, e32, m1, ta, ma ; CHECK-NEXT: .LBB34_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl1re32.v v8, (a6) ; CHECK-NEXT: vl1re32.v v9, (a7) ; CHECK-NEXT: sub t0, t0, a4 ; CHECK-NEXT: add a7, a7, a2 -; CHECK-NEXT: vsetvli t1, zero, e32, m1, ta, ma ; CHECK-NEXT: vfmacc.vf v9, fa0, v8 ; CHECK-NEXT: vs1r.v v9, (a6) ; CHECK-NEXT: add a6, a6, a2 @@ -2187,13 +2187,13 @@ define void @sink_splat_fma_commute_scalable(ptr noalias nocapture %a, ptr noali ; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: mv a7, a1 ; CHECK-NEXT: mv t0, a3 +; CHECK-NEXT: vsetvli t1, zero, e32, m1, ta, ma ; CHECK-NEXT: .LBB35_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl1re32.v v8, (a6) ; CHECK-NEXT: vl1re32.v v9, (a7) ; CHECK-NEXT: sub t0, t0, a4 ; CHECK-NEXT: add a7, a7, a2 -; CHECK-NEXT: vsetvli t1, zero, e32, m1, ta, ma ; CHECK-NEXT: vfmacc.vf v9, fa0, v8 ; CHECK-NEXT: vs1r.v v9, (a6) ; CHECK-NEXT: add a6, a6, a2 From f3c04324588da87edd9f1c69d8d5c18faf35cd07 Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Wed, 30 Jul 2025 14:16:31 -0300 Subject: [PATCH 6/6] Move enum values Signed-off-by: Mikhail R. Gadelha --- llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index c4a1a2fa05d7d..377fed8afb57f 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -1026,11 +1026,6 @@ static bool isSupportedInstr(const MachineInstr &MI) { case RISCV::VSSRA_VV: case RISCV::VSSRA_VX: case RISCV::VSSRA_VI: - // Vector Permutation Instructions - // Integer Scalar Move Instructions - // Floating-Point Scalar Move Instructions - case RISCV::VMV_S_X: - case RISCV::VFMV_S_F: // Vector Narrowing Fixed-Point Clip Instructions case RISCV::VNCLIPU_WV: case RISCV::VNCLIPU_WX: @@ -1064,6 +1059,11 @@ static bool isSupportedInstr(const MachineInstr &MI) { case RISCV::VMSOF_M: case RISCV::VIOTA_M: case RISCV::VID_V: + // Vector Permutation Instructions + // Integer Scalar Move Instructions + // Floating-Point Scalar Move Instructions + case RISCV::VMV_S_X: + case RISCV::VFMV_S_F: // Vector Slide Instructions case RISCV::VSLIDEUP_VX: case RISCV::VSLIDEUP_VI: