@@ -224,10 +224,9 @@ cleanup:
224
224
define i128 @test_umul_i128 (i128 noundef %x , i128 noundef %y ) {
225
225
; CHECK-LABEL: test_umul_i128:
226
226
; CHECK: // %bb.0: // %overflow.entry
227
- ; CHECK-NEXT: cbz x1, .LBB4_3
228
- ; CHECK-NEXT: // %bb.1: // %overflow.lhs
229
- ; CHECK-NEXT: cbz x3, .LBB4_5
230
- ; CHECK-NEXT: // %bb.2: // %overflow
227
+ ; CHECK-NEXT: orr x8, x1, x3
228
+ ; CHECK-NEXT: cbz x8, .LBB4_2
229
+ ; CHECK-NEXT: // %bb.1: // %overflow
231
230
; CHECK-NEXT: mul x9, x3, x0
232
231
; CHECK-NEXT: cmp x1, #0
233
232
; CHECK-NEXT: ccmp x3, #0, #4, ne
@@ -241,31 +240,16 @@ define i128 @test_umul_i128(i128 noundef %x, i128 noundef %y) {
241
240
; CHECK-NEXT: cset w8, ne
242
241
; CHECK-NEXT: adds x1, x11, x9
243
242
; CHECK-NEXT: csinc w8, w8, wzr, lo
244
- ; CHECK-NEXT: tbnz w8, #0, .LBB4_7
245
- ; CHECK-NEXT: b .LBB4_8
246
- ; CHECK-NEXT: .LBB4_3 : // %overflow.no.lhs
243
+ ; CHECK-NEXT: tbnz w8, #0, .LBB4_3
244
+ ; CHECK-NEXT: b .LBB4_4
245
+ ; CHECK-NEXT: .LBB4_2 : // %overflow.no
247
246
; CHECK-NEXT: umulh x8, x0, x2
248
- ; CHECK-NEXT: cbz x3, .LBB4_9
249
- ; CHECK-NEXT: // %bb.4: // %overflow.no.lhs.only
250
- ; CHECK-NEXT: madd x8, x1, x2, x8
251
- ; CHECK-NEXT: umulh x9, x0, x3
252
- ; CHECK-NEXT: mul x10, x0, x3
253
- ; CHECK-NEXT: mul x11, x1, x3
247
+ ; CHECK-NEXT: madd x8, x0, x3, x8
254
248
; CHECK-NEXT: mul x0, x0, x2
255
- ; CHECK-NEXT: b .LBB4_6
256
- ; CHECK-NEXT: .LBB4_5: // %overflow.no.rhs.only
257
- ; CHECK-NEXT: umulh x8, x2, x0
258
- ; CHECK-NEXT: umulh x9, x2, x1
259
- ; CHECK-NEXT: madd x8, x3, x0, x8
260
- ; CHECK-NEXT: mul x10, x2, x1
261
- ; CHECK-NEXT: mul x11, x3, x1
262
- ; CHECK-NEXT: mul x0, x2, x0
263
- ; CHECK-NEXT: .LBB4_6: // %overflow.res
264
- ; CHECK-NEXT: adds x1, x8, x10
265
- ; CHECK-NEXT: adcs xzr, x9, x11
266
- ; CHECK-NEXT: cset w8, ne
267
- ; CHECK-NEXT: tbz w8, #0, .LBB4_8
268
- ; CHECK-NEXT: .LBB4_7: // %if.then
249
+ ; CHECK-NEXT: madd x1, x1, x2, x8
250
+ ; CHECK-NEXT: mov w8, wzr
251
+ ; CHECK-NEXT: tbz w8, #0, .LBB4_4
252
+ ; CHECK-NEXT: .LBB4_3: // %if.then
269
253
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
270
254
; CHECK-NEXT: .cfi_def_cfa_offset 16
271
255
; CHECK-NEXT: .cfi_offset w30, -16
@@ -274,15 +258,8 @@ define i128 @test_umul_i128(i128 noundef %x, i128 noundef %y) {
274
258
; CHECK-NEXT: sxtw x0, w0
275
259
; CHECK-NEXT: asr x1, x0, #63
276
260
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
277
- ; CHECK-NEXT: .LBB4_8 : // %cleanup
261
+ ; CHECK-NEXT: .LBB4_4 : // %cleanup
278
262
; CHECK-NEXT: ret
279
- ; CHECK-NEXT: .LBB4_9: // %overflow.no
280
- ; CHECK-NEXT: madd x8, x0, x3, x8
281
- ; CHECK-NEXT: mul x0, x0, x2
282
- ; CHECK-NEXT: madd x1, x1, x2, x8
283
- ; CHECK-NEXT: mov w8, wzr
284
- ; CHECK-NEXT: tbnz w8, #0, .LBB4_7
285
- ; CHECK-NEXT: b .LBB4_8
286
263
entry:
287
264
%0 = tail call { i128 , i1 } @llvm.umul.with.overflow.i128 (i128 %x , i128 %y )
288
265
%1 = extractvalue { i128 , i1 } %0 , 1
@@ -305,13 +282,21 @@ cleanup:
305
282
define i128 @test_smul_i128 (i128 noundef %x , i128 noundef %y ) {
306
283
; CHECK-LABEL: test_smul_i128:
307
284
; CHECK: // %bb.0: // %overflow.entry
308
- ; CHECK-NEXT: asr x8, x2, #63
309
285
; CHECK-NEXT: cmp x1, x0, asr #63
310
- ; CHECK-NEXT: b.eq .LBB5_3
311
- ; CHECK-NEXT: // %bb.1: // %overflow.lhs
286
+ ; CHECK-NEXT: b.ne .LBB5_3
287
+ ; CHECK-NEXT: // %bb.1: // %overflow.entry
288
+ ; CHECK-NEXT: asr x8, x2, #63
312
289
; CHECK-NEXT: cmp x3, x8
313
- ; CHECK-NEXT: b.eq .LBB5_5
314
- ; CHECK-NEXT: // %bb.2: // %overflow
290
+ ; CHECK-NEXT: b.ne .LBB5_3
291
+ ; CHECK-NEXT: // %bb.2: // %overflow.no
292
+ ; CHECK-NEXT: umulh x8, x0, x2
293
+ ; CHECK-NEXT: madd x8, x0, x3, x8
294
+ ; CHECK-NEXT: mul x0, x0, x2
295
+ ; CHECK-NEXT: madd x1, x1, x2, x8
296
+ ; CHECK-NEXT: mov w8, wzr
297
+ ; CHECK-NEXT: tbnz w8, #0, .LBB5_4
298
+ ; CHECK-NEXT: b .LBB5_5
299
+ ; CHECK-NEXT: .LBB5_3: // %overflow
315
300
; CHECK-NEXT: asr x9, x1, #63
316
301
; CHECK-NEXT: umulh x10, x0, x2
317
302
; CHECK-NEXT: asr x13, x3, #63
@@ -322,97 +307,25 @@ define i128 @test_smul_i128(i128 noundef %x, i128 noundef %y) {
322
307
; CHECK-NEXT: mul x14, x0, x3
323
308
; CHECK-NEXT: umulh x12, x0, x3
324
309
; CHECK-NEXT: adc x8, x8, x9
310
+ ; CHECK-NEXT: mov x9, x1
325
311
; CHECK-NEXT: mul x13, x0, x13
326
312
; CHECK-NEXT: asr x11, x8, #63
327
- ; CHECK-NEXT: adds x9, x14, x10
328
313
; CHECK-NEXT: mul x15, x1, x3
329
- ; CHECK-NEXT: smulh x10, x1, x3
330
- ; CHECK-NEXT: mov x1, x9
331
- ; CHECK-NEXT: adc x9, x12, x13
332
- ; CHECK-NEXT: asr x12, x9, #63
314
+ ; CHECK-NEXT: adds x1, x14, x10
315
+ ; CHECK-NEXT: smulh x9, x9, x3
316
+ ; CHECK-NEXT: adc x10, x12, x13
317
+ ; CHECK-NEXT: asr x12, x10, #63
318
+ ; CHECK-NEXT: adds x8, x8, x10
319
+ ; CHECK-NEXT: asr x10, x1, #63
333
320
; CHECK-NEXT: mul x0, x0, x2
334
- ; CHECK-NEXT: adds x8, x8, x9
335
- ; CHECK-NEXT: asr x9, x1, #63
336
321
; CHECK-NEXT: adc x11, x11, x12
337
322
; CHECK-NEXT: adds x8, x15, x8
338
- ; CHECK-NEXT: adc x10, x10, x11
339
- ; CHECK-NEXT: cmp x8, x9
340
- ; CHECK-NEXT: ccmp x10, x9, #0, eq
341
- ; CHECK-NEXT: b .LBB5_7
342
- ; CHECK-NEXT: .LBB5_3: // %overflow.no.lhs
343
- ; CHECK-NEXT: cmp x3, x8
344
- ; CHECK-NEXT: b.eq .LBB5_10
345
- ; CHECK-NEXT: // %bb.4: // %overflow.no.lhs.only
346
- ; CHECK-NEXT: asr x8, x1, #63
347
- ; CHECK-NEXT: asr x10, x3, #63
348
- ; CHECK-NEXT: eor x9, x0, x8
349
- ; CHECK-NEXT: eor x11, x1, x8
350
- ; CHECK-NEXT: eor x12, x2, x10
351
- ; CHECK-NEXT: subs x9, x9, x8
352
- ; CHECK-NEXT: sbc x8, x11, x8
353
- ; CHECK-NEXT: cmp x1, #0
354
- ; CHECK-NEXT: eor x11, x3, x10
355
- ; CHECK-NEXT: csel x8, x8, x1, lt
356
- ; CHECK-NEXT: csel x9, x9, x0, lt
357
- ; CHECK-NEXT: cset w13, lt
358
- ; CHECK-NEXT: subs x12, x12, x10
359
- ; CHECK-NEXT: sbc x10, x11, x10
360
- ; CHECK-NEXT: cmp x3, #0
361
- ; CHECK-NEXT: csel x11, x12, x2, lt
362
- ; CHECK-NEXT: csel x10, x10, x3, lt
363
- ; CHECK-NEXT: umulh x12, x9, x11
364
- ; CHECK-NEXT: mul x15, x8, x10
365
- ; CHECK-NEXT: madd x8, x8, x11, x12
366
- ; CHECK-NEXT: cset w12, lt
367
- ; CHECK-NEXT: mul x14, x9, x11
368
- ; CHECK-NEXT: mul x11, x9, x10
369
- ; CHECK-NEXT: umulh x9, x9, x10
370
- ; CHECK-NEXT: eor w10, w12, w13
371
- ; CHECK-NEXT: b .LBB5_6
372
- ; CHECK-NEXT: .LBB5_5: // %overflow.no.rhs.only
373
- ; CHECK-NEXT: asr x8, x3, #63
374
- ; CHECK-NEXT: asr x10, x1, #63
375
- ; CHECK-NEXT: eor x9, x2, x8
376
- ; CHECK-NEXT: eor x11, x3, x8
377
- ; CHECK-NEXT: eor x12, x0, x10
378
- ; CHECK-NEXT: subs x9, x9, x8
379
- ; CHECK-NEXT: sbc x8, x11, x8
380
- ; CHECK-NEXT: cmp x3, #0
381
- ; CHECK-NEXT: eor x11, x1, x10
382
- ; CHECK-NEXT: csel x8, x8, x3, lt
383
- ; CHECK-NEXT: csel x9, x9, x2, lt
384
- ; CHECK-NEXT: cset w13, lt
385
- ; CHECK-NEXT: subs x12, x12, x10
386
- ; CHECK-NEXT: sbc x10, x11, x10
387
- ; CHECK-NEXT: cmp x1, #0
388
- ; CHECK-NEXT: csel x11, x12, x0, lt
389
- ; CHECK-NEXT: csel x10, x10, x1, lt
390
- ; CHECK-NEXT: umulh x12, x9, x11
391
- ; CHECK-NEXT: mul x14, x9, x11
392
- ; CHECK-NEXT: mul x15, x8, x10
393
- ; CHECK-NEXT: madd x8, x8, x11, x12
394
- ; CHECK-NEXT: cset w12, lt
395
- ; CHECK-NEXT: mul x11, x9, x10
396
- ; CHECK-NEXT: umulh x9, x9, x10
397
- ; CHECK-NEXT: eor w10, w13, w12
398
- ; CHECK-NEXT: .LBB5_6: // %overflow.res
399
- ; CHECK-NEXT: sbfx x12, x10, #0, #1
400
- ; CHECK-NEXT: adds x8, x8, x11
401
- ; CHECK-NEXT: adc x9, x9, x15
402
- ; CHECK-NEXT: eor x13, x14, x12
403
- ; CHECK-NEXT: eor x8, x8, x12
404
- ; CHECK-NEXT: add x0, x13, x10
405
- ; CHECK-NEXT: cmp x0, x10
406
- ; CHECK-NEXT: cset w10, lo
407
- ; CHECK-NEXT: cinc x1, x8, lo
408
- ; CHECK-NEXT: eor x8, x9, x12
409
- ; CHECK-NEXT: cmp x1, x10
410
- ; CHECK-NEXT: cinc x8, x8, lo
411
- ; CHECK-NEXT: cmp x8, #0
412
- ; CHECK-NEXT: .LBB5_7: // %overflow.res
323
+ ; CHECK-NEXT: adc x9, x9, x11
324
+ ; CHECK-NEXT: cmp x8, x10
325
+ ; CHECK-NEXT: ccmp x9, x10, #0, eq
413
326
; CHECK-NEXT: cset w8, ne
414
- ; CHECK-NEXT: tbz w8, #0, .LBB5_9
415
- ; CHECK-NEXT: .LBB5_8 : // %if.then
327
+ ; CHECK-NEXT: tbz w8, #0, .LBB5_5
328
+ ; CHECK-NEXT: .LBB5_4 : // %if.then
416
329
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
417
330
; CHECK-NEXT: .cfi_def_cfa_offset 16
418
331
; CHECK-NEXT: .cfi_offset w30, -16
@@ -421,16 +334,8 @@ define i128 @test_smul_i128(i128 noundef %x, i128 noundef %y) {
421
334
; CHECK-NEXT: sxtw x0, w0
422
335
; CHECK-NEXT: asr x1, x0, #63
423
336
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
424
- ; CHECK-NEXT: .LBB5_9 : // %cleanup
337
+ ; CHECK-NEXT: .LBB5_5 : // %cleanup
425
338
; CHECK-NEXT: ret
426
- ; CHECK-NEXT: .LBB5_10: // %overflow.no
427
- ; CHECK-NEXT: umulh x8, x0, x2
428
- ; CHECK-NEXT: madd x8, x0, x3, x8
429
- ; CHECK-NEXT: mul x0, x0, x2
430
- ; CHECK-NEXT: madd x1, x1, x2, x8
431
- ; CHECK-NEXT: mov w8, wzr
432
- ; CHECK-NEXT: tbnz w8, #0, .LBB5_8
433
- ; CHECK-NEXT: b .LBB5_9
434
339
entry:
435
340
%0 = tail call { i128 , i1 } @llvm.smul.with.overflow.i128 (i128 %x , i128 %y )
436
341
%1 = extractvalue { i128 , i1 } %0 , 1
0 commit comments