diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index bf2e04caa0a61..7ed87e110d57f 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -46,6 +46,10 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( : TargetLowering(TM), Subtarget(&STI) { auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32; + // Set the load count for memcmp expand optimization + MaxLoadsPerMemcmp = 8; + MaxLoadsPerMemcmpOptSize = 4; + // Booleans always contain 0 or 1. setBooleanContents(ZeroOrOneBooleanContent); // Except in SIMD vectors diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp index 4f159996e4c6c..52e706514226b 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp @@ -141,6 +141,21 @@ InstructionCost WebAssemblyTTIImpl::getCastInstrCost( return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); } +WebAssemblyTTIImpl::TTI::MemCmpExpansionOptions +WebAssemblyTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { + TTI::MemCmpExpansionOptions Options; + + Options.AllowOverlappingLoads = true; + + // TODO: Teach WebAssembly backend about load v128. + + Options.LoadSizes.append({8, 4, 2, 1}); + Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize); + Options.NumLoadsPerBlock = Options.MaxNumLoads; + + return Options; +} + InstructionCost WebAssemblyTTIImpl::getMemoryOpCost( unsigned Opcode, Type *Ty, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo, diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h index d83b8d1f45dbd..c915eeb07d4fd 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h @@ -73,6 +73,10 @@ class WebAssemblyTTIImpl final : public BasicTTIImplBase { getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I = nullptr) const override; + + TTI::MemCmpExpansionOptions + enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override; + InstructionCost getMemoryOpCost( unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, diff --git a/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll b/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll new file mode 100644 index 0000000000000..8030438645f82 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll @@ -0,0 +1,151 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s + +target triple = "wasm32-unknown-unknown" + +declare i32 @memcmp(ptr, ptr, i32) + +define i1 @memcmp_expand_3(ptr %a, ptr %b) { +; CHECK-LABEL: memcmp_expand_3: +; CHECK: .functype memcmp_expand_3 (i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.load16_u $push7=, 0($0):p2align=0 +; CHECK-NEXT: i32.load16_u $push6=, 0($1):p2align=0 +; CHECK-NEXT: i32.xor $push8=, $pop7, $pop6 +; CHECK-NEXT: i32.const $push0=, 2 +; CHECK-NEXT: i32.add $push3=, $0, $pop0 +; CHECK-NEXT: i32.load8_u $push4=, 0($pop3) +; CHECK-NEXT: i32.const $push13=, 2 +; CHECK-NEXT: i32.add $push1=, $1, $pop13 +; CHECK-NEXT: i32.load8_u $push2=, 0($pop1) +; CHECK-NEXT: i32.xor $push5=, $pop4, $pop2 +; CHECK-NEXT: i32.or $push9=, $pop8, $pop5 +; CHECK-NEXT: i32.const $push10=, 65535 +; CHECK-NEXT: i32.and $push11=, $pop9, $pop10 +; CHECK-NEXT: i32.eqz $push12=, $pop11 +; CHECK-NEXT: return $pop12 + %cmp_3 = call i32 @memcmp(ptr %a, ptr %b, i32 3) + %res = icmp eq i32 %cmp_3, 0 + ret i1 %res +} + +define i1 @memcmp_expand_5(ptr %a, ptr %b) { +; CHECK-LABEL: memcmp_expand_5: +; CHECK: .functype memcmp_expand_5 (i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.load $push7=, 0($0):p2align=0 +; CHECK-NEXT: i32.load $push6=, 0($1):p2align=0 +; CHECK-NEXT: i32.xor $push8=, $pop7, $pop6 +; CHECK-NEXT: i32.const $push0=, 4 +; CHECK-NEXT: i32.add $push3=, $0, $pop0 +; CHECK-NEXT: i32.load8_u $push4=, 0($pop3) +; CHECK-NEXT: i32.const $push11=, 4 +; CHECK-NEXT: i32.add $push1=, $1, $pop11 +; CHECK-NEXT: i32.load8_u $push2=, 0($pop1) +; CHECK-NEXT: i32.xor $push5=, $pop4, $pop2 +; CHECK-NEXT: i32.or $push9=, $pop8, $pop5 +; CHECK-NEXT: i32.eqz $push10=, $pop9 +; CHECK-NEXT: return $pop10 + %cmp_5 = call i32 @memcmp(ptr %a, ptr %b, i32 5) + %res = icmp eq i32 %cmp_5, 0 + ret i1 %res +} + +define i1 @memcmp_expand_7(ptr %a, ptr %b) { +; CHECK-LABEL: memcmp_expand_7: +; CHECK: .functype memcmp_expand_7 (i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.load $push7=, 0($0):p2align=0 +; CHECK-NEXT: i32.load $push6=, 0($1):p2align=0 +; CHECK-NEXT: i32.xor $push8=, $pop7, $pop6 +; CHECK-NEXT: i32.const $push0=, 3 +; CHECK-NEXT: i32.add $push3=, $0, $pop0 +; CHECK-NEXT: i32.load $push4=, 0($pop3):p2align=0 +; CHECK-NEXT: i32.const $push11=, 3 +; CHECK-NEXT: i32.add $push1=, $1, $pop11 +; CHECK-NEXT: i32.load $push2=, 0($pop1):p2align=0 +; CHECK-NEXT: i32.xor $push5=, $pop4, $pop2 +; CHECK-NEXT: i32.or $push9=, $pop8, $pop5 +; CHECK-NEXT: i32.eqz $push10=, $pop9 +; CHECK-NEXT: return $pop10 + %cmp_7 = call i32 @memcmp(ptr %a, ptr %b, i32 7) + %res = icmp eq i32 %cmp_7, 0 + ret i1 %res +} + +; INFO: Negative test +; Should not expand even with simd128 +define i1 @memcmp_expand_129(ptr %a, ptr %b) { +; CHECK-LABEL: memcmp_expand_129: +; CHECK: .functype memcmp_expand_129 (i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const $push0=, 129 +; CHECK-NEXT: call $push1=, memcmp, $0, $1, $pop0 +; CHECK-NEXT: i32.eqz $push2=, $pop1 +; CHECK-NEXT: return $pop2 + %cmp_129 = call i32 @memcmp(ptr %a, ptr %b, i32 129) + %res = icmp eq i32 %cmp_129, 0 + ret i1 %res +} + +define i1 @memcmp_expand_2(ptr %a, ptr %b) { +; CHECK-LABEL: memcmp_expand_2: +; CHECK: .functype memcmp_expand_2 (i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.load16_u $push1=, 0($0):p2align=0 +; CHECK-NEXT: i32.load16_u $push0=, 0($1):p2align=0 +; CHECK-NEXT: i32.eq $push2=, $pop1, $pop0 +; CHECK-NEXT: return $pop2 + %cmp_2 = call i32 @memcmp(ptr %a, ptr %b, i32 2) + %res = icmp eq i32 %cmp_2, 0 + ret i1 %res +} + +define i1 @memcmp_expand_2_align(ptr align(2) %a, ptr align(2) %b) { +; CHECK-LABEL: memcmp_expand_2_align: +; CHECK: .functype memcmp_expand_2_align (i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.load16_u $push1=, 0($0) +; CHECK-NEXT: i32.load16_u $push0=, 0($1) +; CHECK-NEXT: i32.eq $push2=, $pop1, $pop0 +; CHECK-NEXT: return $pop2 + %cmp_2 = call i32 @memcmp(ptr %a, ptr %b, i32 2) + %res = icmp eq i32 %cmp_2, 0 + ret i1 %res +} + +define i1 @memcmp_expand_8(ptr %a, ptr %b) { +; CHECK-LABEL: memcmp_expand_8: +; CHECK: .functype memcmp_expand_8 (i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i64.load $push1=, 0($0):p2align=0 +; CHECK-NEXT: i64.load $push0=, 0($1):p2align=0 +; CHECK-NEXT: i64.eq $push2=, $pop1, $pop0 +; CHECK-NEXT: return $pop2 + %cmp_8 = call i32 @memcmp(ptr %a, ptr %b, i32 8) + %res = icmp eq i32 %cmp_8, 0 + ret i1 %res +} + +; TODO: Should be using a single load i64x2 or equivalent in bitsizes +define i1 @memcmp_expand_16(ptr %a, ptr %b) { +; CHECK-LABEL: memcmp_expand_16: +; CHECK: .functype memcmp_expand_16 (i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i64.load $push7=, 0($0):p2align=0 +; CHECK-NEXT: i64.load $push6=, 0($1):p2align=0 +; CHECK-NEXT: i64.xor $push8=, $pop7, $pop6 +; CHECK-NEXT: i32.const $push0=, 8 +; CHECK-NEXT: i32.add $push3=, $0, $pop0 +; CHECK-NEXT: i64.load $push4=, 0($pop3):p2align=0 +; CHECK-NEXT: i32.const $push11=, 8 +; CHECK-NEXT: i32.add $push1=, $1, $pop11 +; CHECK-NEXT: i64.load $push2=, 0($pop1):p2align=0 +; CHECK-NEXT: i64.xor $push5=, $pop4, $pop2 +; CHECK-NEXT: i64.or $push9=, $pop8, $pop5 +; CHECK-NEXT: i64.eqz $push10=, $pop9 +; CHECK-NEXT: return $pop10 + %cmp_16 = call i32 @memcmp(ptr %a, ptr %b, i32 16) + %res = icmp eq i32 %cmp_16, 0 + ret i1 %res +}