Skip to content

Commit e96ab26

Browse files
committed
Resolve review comments:
- Enable optimization for AArch64 only. - Optimize only when both LHS, RHS value range are within legal type. - Use a single Builder Change-Id: I11d674440364594e4bca839495036975cd403aa5
1 parent cc25135 commit e96ab26

20 files changed

+3026
-12840
lines changed

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 66 additions & 440 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AArch64/i128-math.ll

Lines changed: 83 additions & 366 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AArch64/i128_with_overflow.ll

Lines changed: 38 additions & 133 deletions
Original file line numberDiff line numberDiff line change
@@ -224,10 +224,9 @@ cleanup:
224224
define i128 @test_umul_i128(i128 noundef %x, i128 noundef %y) {
225225
; CHECK-LABEL: test_umul_i128:
226226
; CHECK: // %bb.0: // %overflow.entry
227-
; CHECK-NEXT: cbz x1, .LBB4_3
228-
; CHECK-NEXT: // %bb.1: // %overflow.lhs
229-
; CHECK-NEXT: cbz x3, .LBB4_5
230-
; CHECK-NEXT: // %bb.2: // %overflow
227+
; CHECK-NEXT: orr x8, x1, x3
228+
; CHECK-NEXT: cbz x8, .LBB4_2
229+
; CHECK-NEXT: // %bb.1: // %overflow
231230
; CHECK-NEXT: mul x9, x3, x0
232231
; CHECK-NEXT: cmp x1, #0
233232
; CHECK-NEXT: ccmp x3, #0, #4, ne
@@ -241,31 +240,16 @@ define i128 @test_umul_i128(i128 noundef %x, i128 noundef %y) {
241240
; CHECK-NEXT: cset w8, ne
242241
; CHECK-NEXT: adds x1, x11, x9
243242
; CHECK-NEXT: csinc w8, w8, wzr, lo
244-
; CHECK-NEXT: tbnz w8, #0, .LBB4_7
245-
; CHECK-NEXT: b .LBB4_8
246-
; CHECK-NEXT: .LBB4_3: // %overflow.no.lhs
243+
; CHECK-NEXT: tbnz w8, #0, .LBB4_3
244+
; CHECK-NEXT: b .LBB4_4
245+
; CHECK-NEXT: .LBB4_2: // %overflow.no
247246
; CHECK-NEXT: umulh x8, x0, x2
248-
; CHECK-NEXT: cbz x3, .LBB4_9
249-
; CHECK-NEXT: // %bb.4: // %overflow.no.lhs.only
250-
; CHECK-NEXT: madd x8, x1, x2, x8
251-
; CHECK-NEXT: umulh x9, x0, x3
252-
; CHECK-NEXT: mul x10, x0, x3
253-
; CHECK-NEXT: mul x11, x1, x3
247+
; CHECK-NEXT: madd x8, x0, x3, x8
254248
; CHECK-NEXT: mul x0, x0, x2
255-
; CHECK-NEXT: b .LBB4_6
256-
; CHECK-NEXT: .LBB4_5: // %overflow.no.rhs.only
257-
; CHECK-NEXT: umulh x8, x2, x0
258-
; CHECK-NEXT: umulh x9, x2, x1
259-
; CHECK-NEXT: madd x8, x3, x0, x8
260-
; CHECK-NEXT: mul x10, x2, x1
261-
; CHECK-NEXT: mul x11, x3, x1
262-
; CHECK-NEXT: mul x0, x2, x0
263-
; CHECK-NEXT: .LBB4_6: // %overflow.res
264-
; CHECK-NEXT: adds x1, x8, x10
265-
; CHECK-NEXT: adcs xzr, x9, x11
266-
; CHECK-NEXT: cset w8, ne
267-
; CHECK-NEXT: tbz w8, #0, .LBB4_8
268-
; CHECK-NEXT: .LBB4_7: // %if.then
249+
; CHECK-NEXT: madd x1, x1, x2, x8
250+
; CHECK-NEXT: mov w8, wzr
251+
; CHECK-NEXT: tbz w8, #0, .LBB4_4
252+
; CHECK-NEXT: .LBB4_3: // %if.then
269253
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
270254
; CHECK-NEXT: .cfi_def_cfa_offset 16
271255
; CHECK-NEXT: .cfi_offset w30, -16
@@ -274,15 +258,8 @@ define i128 @test_umul_i128(i128 noundef %x, i128 noundef %y) {
274258
; CHECK-NEXT: sxtw x0, w0
275259
; CHECK-NEXT: asr x1, x0, #63
276260
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
277-
; CHECK-NEXT: .LBB4_8: // %cleanup
261+
; CHECK-NEXT: .LBB4_4: // %cleanup
278262
; CHECK-NEXT: ret
279-
; CHECK-NEXT: .LBB4_9: // %overflow.no
280-
; CHECK-NEXT: madd x8, x0, x3, x8
281-
; CHECK-NEXT: mul x0, x0, x2
282-
; CHECK-NEXT: madd x1, x1, x2, x8
283-
; CHECK-NEXT: mov w8, wzr
284-
; CHECK-NEXT: tbnz w8, #0, .LBB4_7
285-
; CHECK-NEXT: b .LBB4_8
286263
entry:
287264
%0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y)
288265
%1 = extractvalue { i128, i1 } %0, 1
@@ -305,13 +282,21 @@ cleanup:
305282
define i128 @test_smul_i128(i128 noundef %x, i128 noundef %y) {
306283
; CHECK-LABEL: test_smul_i128:
307284
; CHECK: // %bb.0: // %overflow.entry
308-
; CHECK-NEXT: asr x8, x2, #63
309285
; CHECK-NEXT: cmp x1, x0, asr #63
310-
; CHECK-NEXT: b.eq .LBB5_3
311-
; CHECK-NEXT: // %bb.1: // %overflow.lhs
286+
; CHECK-NEXT: b.ne .LBB5_3
287+
; CHECK-NEXT: // %bb.1: // %overflow.entry
288+
; CHECK-NEXT: asr x8, x2, #63
312289
; CHECK-NEXT: cmp x3, x8
313-
; CHECK-NEXT: b.eq .LBB5_5
314-
; CHECK-NEXT: // %bb.2: // %overflow
290+
; CHECK-NEXT: b.ne .LBB5_3
291+
; CHECK-NEXT: // %bb.2: // %overflow.no
292+
; CHECK-NEXT: umulh x8, x0, x2
293+
; CHECK-NEXT: madd x8, x0, x3, x8
294+
; CHECK-NEXT: mul x0, x0, x2
295+
; CHECK-NEXT: madd x1, x1, x2, x8
296+
; CHECK-NEXT: mov w8, wzr
297+
; CHECK-NEXT: tbnz w8, #0, .LBB5_4
298+
; CHECK-NEXT: b .LBB5_5
299+
; CHECK-NEXT: .LBB5_3: // %overflow
315300
; CHECK-NEXT: asr x9, x1, #63
316301
; CHECK-NEXT: umulh x10, x0, x2
317302
; CHECK-NEXT: asr x13, x3, #63
@@ -322,97 +307,25 @@ define i128 @test_smul_i128(i128 noundef %x, i128 noundef %y) {
322307
; CHECK-NEXT: mul x14, x0, x3
323308
; CHECK-NEXT: umulh x12, x0, x3
324309
; CHECK-NEXT: adc x8, x8, x9
310+
; CHECK-NEXT: mov x9, x1
325311
; CHECK-NEXT: mul x13, x0, x13
326312
; CHECK-NEXT: asr x11, x8, #63
327-
; CHECK-NEXT: adds x9, x14, x10
328313
; CHECK-NEXT: mul x15, x1, x3
329-
; CHECK-NEXT: smulh x10, x1, x3
330-
; CHECK-NEXT: mov x1, x9
331-
; CHECK-NEXT: adc x9, x12, x13
332-
; CHECK-NEXT: asr x12, x9, #63
314+
; CHECK-NEXT: adds x1, x14, x10
315+
; CHECK-NEXT: smulh x9, x9, x3
316+
; CHECK-NEXT: adc x10, x12, x13
317+
; CHECK-NEXT: asr x12, x10, #63
318+
; CHECK-NEXT: adds x8, x8, x10
319+
; CHECK-NEXT: asr x10, x1, #63
333320
; CHECK-NEXT: mul x0, x0, x2
334-
; CHECK-NEXT: adds x8, x8, x9
335-
; CHECK-NEXT: asr x9, x1, #63
336321
; CHECK-NEXT: adc x11, x11, x12
337322
; CHECK-NEXT: adds x8, x15, x8
338-
; CHECK-NEXT: adc x10, x10, x11
339-
; CHECK-NEXT: cmp x8, x9
340-
; CHECK-NEXT: ccmp x10, x9, #0, eq
341-
; CHECK-NEXT: b .LBB5_7
342-
; CHECK-NEXT: .LBB5_3: // %overflow.no.lhs
343-
; CHECK-NEXT: cmp x3, x8
344-
; CHECK-NEXT: b.eq .LBB5_10
345-
; CHECK-NEXT: // %bb.4: // %overflow.no.lhs.only
346-
; CHECK-NEXT: asr x8, x1, #63
347-
; CHECK-NEXT: asr x10, x3, #63
348-
; CHECK-NEXT: eor x9, x0, x8
349-
; CHECK-NEXT: eor x11, x1, x8
350-
; CHECK-NEXT: eor x12, x2, x10
351-
; CHECK-NEXT: subs x9, x9, x8
352-
; CHECK-NEXT: sbc x8, x11, x8
353-
; CHECK-NEXT: cmp x1, #0
354-
; CHECK-NEXT: eor x11, x3, x10
355-
; CHECK-NEXT: csel x8, x8, x1, lt
356-
; CHECK-NEXT: csel x9, x9, x0, lt
357-
; CHECK-NEXT: cset w13, lt
358-
; CHECK-NEXT: subs x12, x12, x10
359-
; CHECK-NEXT: sbc x10, x11, x10
360-
; CHECK-NEXT: cmp x3, #0
361-
; CHECK-NEXT: csel x11, x12, x2, lt
362-
; CHECK-NEXT: csel x10, x10, x3, lt
363-
; CHECK-NEXT: umulh x12, x9, x11
364-
; CHECK-NEXT: mul x15, x8, x10
365-
; CHECK-NEXT: madd x8, x8, x11, x12
366-
; CHECK-NEXT: cset w12, lt
367-
; CHECK-NEXT: mul x14, x9, x11
368-
; CHECK-NEXT: mul x11, x9, x10
369-
; CHECK-NEXT: umulh x9, x9, x10
370-
; CHECK-NEXT: eor w10, w12, w13
371-
; CHECK-NEXT: b .LBB5_6
372-
; CHECK-NEXT: .LBB5_5: // %overflow.no.rhs.only
373-
; CHECK-NEXT: asr x8, x3, #63
374-
; CHECK-NEXT: asr x10, x1, #63
375-
; CHECK-NEXT: eor x9, x2, x8
376-
; CHECK-NEXT: eor x11, x3, x8
377-
; CHECK-NEXT: eor x12, x0, x10
378-
; CHECK-NEXT: subs x9, x9, x8
379-
; CHECK-NEXT: sbc x8, x11, x8
380-
; CHECK-NEXT: cmp x3, #0
381-
; CHECK-NEXT: eor x11, x1, x10
382-
; CHECK-NEXT: csel x8, x8, x3, lt
383-
; CHECK-NEXT: csel x9, x9, x2, lt
384-
; CHECK-NEXT: cset w13, lt
385-
; CHECK-NEXT: subs x12, x12, x10
386-
; CHECK-NEXT: sbc x10, x11, x10
387-
; CHECK-NEXT: cmp x1, #0
388-
; CHECK-NEXT: csel x11, x12, x0, lt
389-
; CHECK-NEXT: csel x10, x10, x1, lt
390-
; CHECK-NEXT: umulh x12, x9, x11
391-
; CHECK-NEXT: mul x14, x9, x11
392-
; CHECK-NEXT: mul x15, x8, x10
393-
; CHECK-NEXT: madd x8, x8, x11, x12
394-
; CHECK-NEXT: cset w12, lt
395-
; CHECK-NEXT: mul x11, x9, x10
396-
; CHECK-NEXT: umulh x9, x9, x10
397-
; CHECK-NEXT: eor w10, w13, w12
398-
; CHECK-NEXT: .LBB5_6: // %overflow.res
399-
; CHECK-NEXT: sbfx x12, x10, #0, #1
400-
; CHECK-NEXT: adds x8, x8, x11
401-
; CHECK-NEXT: adc x9, x9, x15
402-
; CHECK-NEXT: eor x13, x14, x12
403-
; CHECK-NEXT: eor x8, x8, x12
404-
; CHECK-NEXT: add x0, x13, x10
405-
; CHECK-NEXT: cmp x0, x10
406-
; CHECK-NEXT: cset w10, lo
407-
; CHECK-NEXT: cinc x1, x8, lo
408-
; CHECK-NEXT: eor x8, x9, x12
409-
; CHECK-NEXT: cmp x1, x10
410-
; CHECK-NEXT: cinc x8, x8, lo
411-
; CHECK-NEXT: cmp x8, #0
412-
; CHECK-NEXT: .LBB5_7: // %overflow.res
323+
; CHECK-NEXT: adc x9, x9, x11
324+
; CHECK-NEXT: cmp x8, x10
325+
; CHECK-NEXT: ccmp x9, x10, #0, eq
413326
; CHECK-NEXT: cset w8, ne
414-
; CHECK-NEXT: tbz w8, #0, .LBB5_9
415-
; CHECK-NEXT: .LBB5_8: // %if.then
327+
; CHECK-NEXT: tbz w8, #0, .LBB5_5
328+
; CHECK-NEXT: .LBB5_4: // %if.then
416329
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
417330
; CHECK-NEXT: .cfi_def_cfa_offset 16
418331
; CHECK-NEXT: .cfi_offset w30, -16
@@ -421,16 +334,8 @@ define i128 @test_smul_i128(i128 noundef %x, i128 noundef %y) {
421334
; CHECK-NEXT: sxtw x0, w0
422335
; CHECK-NEXT: asr x1, x0, #63
423336
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
424-
; CHECK-NEXT: .LBB5_9: // %cleanup
337+
; CHECK-NEXT: .LBB5_5: // %cleanup
425338
; CHECK-NEXT: ret
426-
; CHECK-NEXT: .LBB5_10: // %overflow.no
427-
; CHECK-NEXT: umulh x8, x0, x2
428-
; CHECK-NEXT: madd x8, x0, x3, x8
429-
; CHECK-NEXT: mul x0, x0, x2
430-
; CHECK-NEXT: madd x1, x1, x2, x8
431-
; CHECK-NEXT: mov w8, wzr
432-
; CHECK-NEXT: tbnz w8, #0, .LBB5_8
433-
; CHECK-NEXT: b .LBB5_9
434339
entry:
435340
%0 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y)
436341
%1 = extractvalue { i128, i1 } %0, 1

0 commit comments

Comments
 (0)