diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index bf2e04caa0a61..b5ffe407b951a 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -2934,6 +2934,25 @@ performVectorExtendToFPCombine(SDNode *N, return DAG.getNode(N->getOpcode(), SDLoc(N), ResVT, Conv); } +static SDValue +performVectorNonNegToFPCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + auto &DAG = DCI.DAG; + + SDNodeFlags Flags = N->getFlags(); + SDValue Op0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // Optimize uitofp to sitofp when the sign bit is known to be zero. + // Depending on the target (runtime) backend, this might be performance + // neutral (e.g. AArch64) or a significant improvement (e.g. x86_64). + if (VT.isVector() && (Flags.hasNonNeg() || DAG.SignBitIsZero(Op0))) { + return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, Op0); + } + + return SDValue(); +} + static SDValue performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { auto &DAG = DCI.DAG; @@ -3515,6 +3534,9 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N, case ISD::ZERO_EXTEND: return performVectorExtendCombine(N, DCI); case ISD::UINT_TO_FP: + if (auto ExtCombine = performVectorExtendToFPCombine(N, DCI)) + return ExtCombine; + return performVectorNonNegToFPCombine(N, DCI); case ISD::SINT_TO_FP: return performVectorExtendToFPCombine(N, DCI); case ISD::FP_TO_SINT_SAT: diff --git a/llvm/test/CodeGen/WebAssembly/simd-conversions.ll b/llvm/test/CodeGen/WebAssembly/simd-conversions.ll index 8459ec8101ff2..b355a0d60317b 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-conversions.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-conversions.ll @@ -441,3 +441,31 @@ define <2 x double> @promote_mixed_v2f64(<4 x float> %x, <4 x float> %y) { %a = fpext <2 x float> %v to <2 x double> ret <2 x double> %a } + +define <4 x float> @convert_u_v4f32_maybeneg(<4 x i32> %x) { +; CHECK-LABEL: convert_u_v4f32_maybeneg: +; CHECK: .functype convert_u_v4f32_maybeneg (v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 1 +; CHECK-NEXT: i32x4.shr_s +; CHECK-NEXT: f32x4.convert_i32x4_u +; CHECK-NEXT: # fallthrough-return + %a = ashr <4 x i32> %x, + %b = uitofp <4 x i32> %a to <4 x float> + ret <4 x float> %b +} + +define <4 x float> @convert_u_v4f32_nonneg(<4 x i32> %x) { +; CHECK-LABEL: convert_u_v4f32_nonneg: +; CHECK: .functype convert_u_v4f32_nonneg (v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 1 +; CHECK-NEXT: i32x4.shr_u +; CHECK-NEXT: f32x4.convert_i32x4_s +; CHECK-NEXT: # fallthrough-return + %a = lshr <4 x i32> %x, + %b = uitofp <4 x i32> %a to <4 x float> + ret <4 x float> %b +} diff --git a/llvm/test/CodeGen/WebAssembly/simd-extending-convert.ll b/llvm/test/CodeGen/WebAssembly/simd-extending-convert.ll index c93b8aa7fb42e..eb39f90e68701 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-extending-convert.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-extending-convert.ll @@ -12,7 +12,7 @@ define <4 x float> @extend_to_float_low_i16x8_u(<8 x i16> %x) { ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32x4.extend_low_i16x8_u -; CHECK-NEXT: f32x4.convert_i32x4_u +; CHECK-NEXT: f32x4.convert_i32x4_s ; CHECK-NEXT: # fallthrough-return %low = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> %extended = uitofp <4 x i16> %low to <4 x float> @@ -25,7 +25,7 @@ define <4 x float> @extend_to_float_high_i16x8_u(<8 x i16> %x) { ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32x4.extend_high_i16x8_u -; CHECK-NEXT: f32x4.convert_i32x4_u +; CHECK-NEXT: f32x4.convert_i32x4_s ; CHECK-NEXT: # fallthrough-return %high = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> %extended = uitofp <4 x i16> %high to <4 x float> @@ -39,7 +39,7 @@ define <4 x float> @extend_to_float_low_i8x16_u(<8 x i8> %x) { ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i16x8.extend_low_i8x16_u ; CHECK-NEXT: i32x4.extend_low_i16x8_u -; CHECK-NEXT: f32x4.convert_i32x4_u +; CHECK-NEXT: f32x4.convert_i32x4_s ; CHECK-NEXT: # fallthrough-return %low = shufflevector <8 x i8> %x, <8 x i8> undef, <4 x i32> %extended = uitofp <4 x i8> %low to <4 x float> @@ -55,7 +55,7 @@ define <4 x float> @extend_to_float_high_i8x16_u(<8 x i8> %x) { ; CHECK-NEXT: i8x16.shuffle 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK-NEXT: i16x8.extend_low_i8x16_u ; CHECK-NEXT: i32x4.extend_low_i16x8_u -; CHECK-NEXT: f32x4.convert_i32x4_u +; CHECK-NEXT: f32x4.convert_i32x4_s ; CHECK-NEXT: # fallthrough-return %high = shufflevector <8 x i8> %x, <8 x i8> undef, <4 x i32> %extended = uitofp <4 x i8> %high to <4 x float> @@ -136,7 +136,7 @@ define <2 x double> @extend_to_double_low_i16x4_u(<4 x i16> %x) { ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32x4.extend_low_i16x8_u -; CHECK-NEXT: f64x2.convert_low_i32x4_u +; CHECK-NEXT: f64x2.convert_low_i32x4_s ; CHECK-NEXT: # fallthrough-return %low = shufflevector <4 x i16> %x, <4 x i16> undef, <2 x i32> %extended = uitofp <2 x i16> %low to <2 x double>