Skip to content

Commit 343f747

Browse files
authored
[WebAssembly] Add support for memcmp expansion (#148298)
Fixes #61400 Added test case in llvm/test/CodeGen/WebAssembly/memcmp-expand.ll
1 parent b42f96b commit 343f747

File tree

4 files changed

+174
-0
lines changed

4 files changed

+174
-0
lines changed

llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
4646
: TargetLowering(TM), Subtarget(&STI) {
4747
auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
4848

49+
// Set the load count for memcmp expand optimization
50+
MaxLoadsPerMemcmp = 8;
51+
MaxLoadsPerMemcmpOptSize = 4;
52+
4953
// Booleans always contain 0 or 1.
5054
setBooleanContents(ZeroOrOneBooleanContent);
5155
// Except in SIMD vectors

llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,21 @@ InstructionCost WebAssemblyTTIImpl::getCastInstrCost(
141141
return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
142142
}
143143

144+
WebAssemblyTTIImpl::TTI::MemCmpExpansionOptions
145+
WebAssemblyTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
146+
TTI::MemCmpExpansionOptions Options;
147+
148+
Options.AllowOverlappingLoads = true;
149+
150+
// TODO: Teach WebAssembly backend about load v128.
151+
152+
Options.LoadSizes.append({8, 4, 2, 1});
153+
Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
154+
Options.NumLoadsPerBlock = Options.MaxNumLoads;
155+
156+
return Options;
157+
}
158+
144159
InstructionCost WebAssemblyTTIImpl::getMemoryOpCost(
145160
unsigned Opcode, Type *Ty, Align Alignment, unsigned AddressSpace,
146161
TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo,

llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,10 @@ class WebAssemblyTTIImpl final : public BasicTTIImplBase<WebAssemblyTTIImpl> {
7373
getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
7474
TTI::CastContextHint CCH, TTI::TargetCostKind CostKind,
7575
const Instruction *I = nullptr) const override;
76+
77+
TTI::MemCmpExpansionOptions
78+
enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override;
79+
7680
InstructionCost getMemoryOpCost(
7781
unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
7882
TTI::TargetCostKind CostKind,
Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s
3+
4+
target triple = "wasm32-unknown-unknown"
5+
6+
declare i32 @memcmp(ptr, ptr, i32)
7+
8+
define i1 @memcmp_expand_3(ptr %a, ptr %b) {
9+
; CHECK-LABEL: memcmp_expand_3:
10+
; CHECK: .functype memcmp_expand_3 (i32, i32) -> (i32)
11+
; CHECK-NEXT: # %bb.0:
12+
; CHECK-NEXT: i32.load16_u $push7=, 0($0):p2align=0
13+
; CHECK-NEXT: i32.load16_u $push6=, 0($1):p2align=0
14+
; CHECK-NEXT: i32.xor $push8=, $pop7, $pop6
15+
; CHECK-NEXT: i32.const $push0=, 2
16+
; CHECK-NEXT: i32.add $push3=, $0, $pop0
17+
; CHECK-NEXT: i32.load8_u $push4=, 0($pop3)
18+
; CHECK-NEXT: i32.const $push13=, 2
19+
; CHECK-NEXT: i32.add $push1=, $1, $pop13
20+
; CHECK-NEXT: i32.load8_u $push2=, 0($pop1)
21+
; CHECK-NEXT: i32.xor $push5=, $pop4, $pop2
22+
; CHECK-NEXT: i32.or $push9=, $pop8, $pop5
23+
; CHECK-NEXT: i32.const $push10=, 65535
24+
; CHECK-NEXT: i32.and $push11=, $pop9, $pop10
25+
; CHECK-NEXT: i32.eqz $push12=, $pop11
26+
; CHECK-NEXT: return $pop12
27+
%cmp_3 = call i32 @memcmp(ptr %a, ptr %b, i32 3)
28+
%res = icmp eq i32 %cmp_3, 0
29+
ret i1 %res
30+
}
31+
32+
define i1 @memcmp_expand_5(ptr %a, ptr %b) {
33+
; CHECK-LABEL: memcmp_expand_5:
34+
; CHECK: .functype memcmp_expand_5 (i32, i32) -> (i32)
35+
; CHECK-NEXT: # %bb.0:
36+
; CHECK-NEXT: i32.load $push7=, 0($0):p2align=0
37+
; CHECK-NEXT: i32.load $push6=, 0($1):p2align=0
38+
; CHECK-NEXT: i32.xor $push8=, $pop7, $pop6
39+
; CHECK-NEXT: i32.const $push0=, 4
40+
; CHECK-NEXT: i32.add $push3=, $0, $pop0
41+
; CHECK-NEXT: i32.load8_u $push4=, 0($pop3)
42+
; CHECK-NEXT: i32.const $push11=, 4
43+
; CHECK-NEXT: i32.add $push1=, $1, $pop11
44+
; CHECK-NEXT: i32.load8_u $push2=, 0($pop1)
45+
; CHECK-NEXT: i32.xor $push5=, $pop4, $pop2
46+
; CHECK-NEXT: i32.or $push9=, $pop8, $pop5
47+
; CHECK-NEXT: i32.eqz $push10=, $pop9
48+
; CHECK-NEXT: return $pop10
49+
%cmp_5 = call i32 @memcmp(ptr %a, ptr %b, i32 5)
50+
%res = icmp eq i32 %cmp_5, 0
51+
ret i1 %res
52+
}
53+
54+
define i1 @memcmp_expand_7(ptr %a, ptr %b) {
55+
; CHECK-LABEL: memcmp_expand_7:
56+
; CHECK: .functype memcmp_expand_7 (i32, i32) -> (i32)
57+
; CHECK-NEXT: # %bb.0:
58+
; CHECK-NEXT: i32.load $push7=, 0($0):p2align=0
59+
; CHECK-NEXT: i32.load $push6=, 0($1):p2align=0
60+
; CHECK-NEXT: i32.xor $push8=, $pop7, $pop6
61+
; CHECK-NEXT: i32.const $push0=, 3
62+
; CHECK-NEXT: i32.add $push3=, $0, $pop0
63+
; CHECK-NEXT: i32.load $push4=, 0($pop3):p2align=0
64+
; CHECK-NEXT: i32.const $push11=, 3
65+
; CHECK-NEXT: i32.add $push1=, $1, $pop11
66+
; CHECK-NEXT: i32.load $push2=, 0($pop1):p2align=0
67+
; CHECK-NEXT: i32.xor $push5=, $pop4, $pop2
68+
; CHECK-NEXT: i32.or $push9=, $pop8, $pop5
69+
; CHECK-NEXT: i32.eqz $push10=, $pop9
70+
; CHECK-NEXT: return $pop10
71+
%cmp_7 = call i32 @memcmp(ptr %a, ptr %b, i32 7)
72+
%res = icmp eq i32 %cmp_7, 0
73+
ret i1 %res
74+
}
75+
76+
; INFO: Negative test
77+
; Should not expand even with simd128
78+
define i1 @memcmp_expand_129(ptr %a, ptr %b) {
79+
; CHECK-LABEL: memcmp_expand_129:
80+
; CHECK: .functype memcmp_expand_129 (i32, i32) -> (i32)
81+
; CHECK-NEXT: # %bb.0:
82+
; CHECK-NEXT: i32.const $push0=, 129
83+
; CHECK-NEXT: call $push1=, memcmp, $0, $1, $pop0
84+
; CHECK-NEXT: i32.eqz $push2=, $pop1
85+
; CHECK-NEXT: return $pop2
86+
%cmp_129 = call i32 @memcmp(ptr %a, ptr %b, i32 129)
87+
%res = icmp eq i32 %cmp_129, 0
88+
ret i1 %res
89+
}
90+
91+
define i1 @memcmp_expand_2(ptr %a, ptr %b) {
92+
; CHECK-LABEL: memcmp_expand_2:
93+
; CHECK: .functype memcmp_expand_2 (i32, i32) -> (i32)
94+
; CHECK-NEXT: # %bb.0:
95+
; CHECK-NEXT: i32.load16_u $push1=, 0($0):p2align=0
96+
; CHECK-NEXT: i32.load16_u $push0=, 0($1):p2align=0
97+
; CHECK-NEXT: i32.eq $push2=, $pop1, $pop0
98+
; CHECK-NEXT: return $pop2
99+
%cmp_2 = call i32 @memcmp(ptr %a, ptr %b, i32 2)
100+
%res = icmp eq i32 %cmp_2, 0
101+
ret i1 %res
102+
}
103+
104+
define i1 @memcmp_expand_2_align(ptr align(2) %a, ptr align(2) %b) {
105+
; CHECK-LABEL: memcmp_expand_2_align:
106+
; CHECK: .functype memcmp_expand_2_align (i32, i32) -> (i32)
107+
; CHECK-NEXT: # %bb.0:
108+
; CHECK-NEXT: i32.load16_u $push1=, 0($0)
109+
; CHECK-NEXT: i32.load16_u $push0=, 0($1)
110+
; CHECK-NEXT: i32.eq $push2=, $pop1, $pop0
111+
; CHECK-NEXT: return $pop2
112+
%cmp_2 = call i32 @memcmp(ptr %a, ptr %b, i32 2)
113+
%res = icmp eq i32 %cmp_2, 0
114+
ret i1 %res
115+
}
116+
117+
define i1 @memcmp_expand_8(ptr %a, ptr %b) {
118+
; CHECK-LABEL: memcmp_expand_8:
119+
; CHECK: .functype memcmp_expand_8 (i32, i32) -> (i32)
120+
; CHECK-NEXT: # %bb.0:
121+
; CHECK-NEXT: i64.load $push1=, 0($0):p2align=0
122+
; CHECK-NEXT: i64.load $push0=, 0($1):p2align=0
123+
; CHECK-NEXT: i64.eq $push2=, $pop1, $pop0
124+
; CHECK-NEXT: return $pop2
125+
%cmp_8 = call i32 @memcmp(ptr %a, ptr %b, i32 8)
126+
%res = icmp eq i32 %cmp_8, 0
127+
ret i1 %res
128+
}
129+
130+
; TODO: Should be using a single load i64x2 or equivalent in bitsizes
131+
define i1 @memcmp_expand_16(ptr %a, ptr %b) {
132+
; CHECK-LABEL: memcmp_expand_16:
133+
; CHECK: .functype memcmp_expand_16 (i32, i32) -> (i32)
134+
; CHECK-NEXT: # %bb.0:
135+
; CHECK-NEXT: i64.load $push7=, 0($0):p2align=0
136+
; CHECK-NEXT: i64.load $push6=, 0($1):p2align=0
137+
; CHECK-NEXT: i64.xor $push8=, $pop7, $pop6
138+
; CHECK-NEXT: i32.const $push0=, 8
139+
; CHECK-NEXT: i32.add $push3=, $0, $pop0
140+
; CHECK-NEXT: i64.load $push4=, 0($pop3):p2align=0
141+
; CHECK-NEXT: i32.const $push11=, 8
142+
; CHECK-NEXT: i32.add $push1=, $1, $pop11
143+
; CHECK-NEXT: i64.load $push2=, 0($pop1):p2align=0
144+
; CHECK-NEXT: i64.xor $push5=, $pop4, $pop2
145+
; CHECK-NEXT: i64.or $push9=, $pop8, $pop5
146+
; CHECK-NEXT: i64.eqz $push10=, $pop9
147+
; CHECK-NEXT: return $pop10
148+
%cmp_16 = call i32 @memcmp(ptr %a, ptr %b, i32 16)
149+
%res = icmp eq i32 %cmp_16, 0
150+
ret i1 %res
151+
}

0 commit comments

Comments
 (0)