@@ -223,22 +223,49 @@ cleanup:
223
223
224
224
define i128 @test_umul_i128 (i128 noundef %x , i128 noundef %y ) {
225
225
; CHECK-LABEL: test_umul_i128:
226
- ; CHECK: // %bb.0: // %entry
226
+ ; CHECK: // %bb.0: // %overflow.entry
227
+ ; CHECK-NEXT: cbz x1, .LBB4_3
228
+ ; CHECK-NEXT: // %bb.1: // %overflow.lhs
229
+ ; CHECK-NEXT: cbz x3, .LBB4_5
230
+ ; CHECK-NEXT: // %bb.2: // %overflow
227
231
; CHECK-NEXT: mul x9, x3, x0
228
232
; CHECK-NEXT: cmp x1, #0
229
233
; CHECK-NEXT: ccmp x3, #0, #4, ne
230
- ; CHECK-NEXT: umulh x8 , x1, x2
231
- ; CHECK-NEXT: umulh x10 , x3, x0
234
+ ; CHECK-NEXT: umulh x10 , x1, x2
235
+ ; CHECK-NEXT: umulh x8 , x3, x0
232
236
; CHECK-NEXT: madd x9, x1, x2, x9
233
- ; CHECK-NEXT: ccmp xzr, x8, #0, eq
234
- ; CHECK-NEXT: umulh x11, x0, x2
235
237
; CHECK-NEXT: ccmp xzr, x10, #0, eq
238
+ ; CHECK-NEXT: umulh x11, x0, x2
239
+ ; CHECK-NEXT: ccmp xzr, x8, #0, eq
240
+ ; CHECK-NEXT: mul x0, x0, x2
236
241
; CHECK-NEXT: cset w8, ne
237
242
; CHECK-NEXT: adds x1, x11, x9
238
243
; CHECK-NEXT: csinc w8, w8, wzr, lo
239
- ; CHECK-NEXT: cmp w8, #1
240
- ; CHECK-NEXT: b.ne .LBB4_2
241
- ; CHECK-NEXT: // %bb.1: // %if.then
244
+ ; CHECK-NEXT: tbnz w8, #0, .LBB4_7
245
+ ; CHECK-NEXT: b .LBB4_8
246
+ ; CHECK-NEXT: .LBB4_3: // %overflow.no.lhs
247
+ ; CHECK-NEXT: umulh x8, x0, x2
248
+ ; CHECK-NEXT: cbz x3, .LBB4_9
249
+ ; CHECK-NEXT: // %bb.4: // %overflow.no.lhs.only
250
+ ; CHECK-NEXT: madd x8, x1, x2, x8
251
+ ; CHECK-NEXT: umulh x9, x0, x3
252
+ ; CHECK-NEXT: mul x10, x0, x3
253
+ ; CHECK-NEXT: mul x11, x1, x3
254
+ ; CHECK-NEXT: mul x0, x0, x2
255
+ ; CHECK-NEXT: b .LBB4_6
256
+ ; CHECK-NEXT: .LBB4_5: // %overflow.no.rhs.only
257
+ ; CHECK-NEXT: umulh x8, x2, x0
258
+ ; CHECK-NEXT: umulh x9, x2, x1
259
+ ; CHECK-NEXT: madd x8, x3, x0, x8
260
+ ; CHECK-NEXT: mul x10, x2, x1
261
+ ; CHECK-NEXT: mul x11, x3, x1
262
+ ; CHECK-NEXT: mul x0, x2, x0
263
+ ; CHECK-NEXT: .LBB4_6: // %overflow.res
264
+ ; CHECK-NEXT: adds x1, x8, x10
265
+ ; CHECK-NEXT: adcs xzr, x9, x11
266
+ ; CHECK-NEXT: cset w8, ne
267
+ ; CHECK-NEXT: tbz w8, #0, .LBB4_8
268
+ ; CHECK-NEXT: .LBB4_7: // %if.then
242
269
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
243
270
; CHECK-NEXT: .cfi_def_cfa_offset 16
244
271
; CHECK-NEXT: .cfi_offset w30, -16
@@ -247,10 +274,15 @@ define i128 @test_umul_i128(i128 noundef %x, i128 noundef %y) {
247
274
; CHECK-NEXT: sxtw x0, w0
248
275
; CHECK-NEXT: asr x1, x0, #63
249
276
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
277
+ ; CHECK-NEXT: .LBB4_8: // %cleanup
250
278
; CHECK-NEXT: ret
251
- ; CHECK-NEXT: .LBB4_2: // %if.end
279
+ ; CHECK-NEXT: .LBB4_9: // %overflow.no
280
+ ; CHECK-NEXT: madd x8, x0, x3, x8
252
281
; CHECK-NEXT: mul x0, x0, x2
253
- ; CHECK-NEXT: ret
282
+ ; CHECK-NEXT: madd x1, x1, x2, x8
283
+ ; CHECK-NEXT: mov w8, wzr
284
+ ; CHECK-NEXT: tbnz w8, #0, .LBB4_7
285
+ ; CHECK-NEXT: b .LBB4_8
254
286
entry:
255
287
%0 = tail call { i128 , i1 } @llvm.umul.with.overflow.i128 (i128 %x , i128 %y )
256
288
%1 = extractvalue { i128 , i1 } %0 , 1
@@ -272,35 +304,115 @@ cleanup:
272
304
273
305
define i128 @test_smul_i128 (i128 noundef %x , i128 noundef %y ) {
274
306
; CHECK-LABEL: test_smul_i128:
275
- ; CHECK: // %bb.0: // %entry
307
+ ; CHECK: // %bb.0: // %overflow.entry
308
+ ; CHECK-NEXT: asr x8, x2, #63
309
+ ; CHECK-NEXT: cmp x1, x0, asr #63
310
+ ; CHECK-NEXT: b.eq .LBB5_3
311
+ ; CHECK-NEXT: // %bb.1: // %overflow.lhs
312
+ ; CHECK-NEXT: cmp x3, x8
313
+ ; CHECK-NEXT: b.eq .LBB5_5
314
+ ; CHECK-NEXT: // %bb.2: // %overflow
315
+ ; CHECK-NEXT: asr x9, x1, #63
316
+ ; CHECK-NEXT: umulh x10, x0, x2
317
+ ; CHECK-NEXT: asr x13, x3, #63
318
+ ; CHECK-NEXT: mul x11, x1, x2
319
+ ; CHECK-NEXT: umulh x8, x1, x2
320
+ ; CHECK-NEXT: mul x9, x9, x2
321
+ ; CHECK-NEXT: adds x10, x11, x10
322
+ ; CHECK-NEXT: mul x14, x0, x3
323
+ ; CHECK-NEXT: umulh x12, x0, x3
324
+ ; CHECK-NEXT: adc x8, x8, x9
325
+ ; CHECK-NEXT: mul x13, x0, x13
326
+ ; CHECK-NEXT: asr x11, x8, #63
327
+ ; CHECK-NEXT: adds x9, x14, x10
328
+ ; CHECK-NEXT: mul x15, x1, x3
329
+ ; CHECK-NEXT: smulh x10, x1, x3
330
+ ; CHECK-NEXT: mov x1, x9
331
+ ; CHECK-NEXT: adc x9, x12, x13
332
+ ; CHECK-NEXT: asr x12, x9, #63
333
+ ; CHECK-NEXT: mul x0, x0, x2
334
+ ; CHECK-NEXT: adds x8, x8, x9
335
+ ; CHECK-NEXT: asr x9, x1, #63
336
+ ; CHECK-NEXT: adc x11, x11, x12
337
+ ; CHECK-NEXT: adds x8, x15, x8
338
+ ; CHECK-NEXT: adc x10, x10, x11
339
+ ; CHECK-NEXT: cmp x8, x9
340
+ ; CHECK-NEXT: ccmp x10, x9, #0, eq
341
+ ; CHECK-NEXT: b .LBB5_7
342
+ ; CHECK-NEXT: .LBB5_3: // %overflow.no.lhs
343
+ ; CHECK-NEXT: cmp x3, x8
344
+ ; CHECK-NEXT: b.eq .LBB5_10
345
+ ; CHECK-NEXT: // %bb.4: // %overflow.no.lhs.only
346
+ ; CHECK-NEXT: asr x8, x1, #63
347
+ ; CHECK-NEXT: asr x10, x3, #63
348
+ ; CHECK-NEXT: eor x9, x0, x8
349
+ ; CHECK-NEXT: eor x11, x1, x8
350
+ ; CHECK-NEXT: eor x12, x2, x10
351
+ ; CHECK-NEXT: subs x9, x9, x8
352
+ ; CHECK-NEXT: sbc x8, x11, x8
353
+ ; CHECK-NEXT: cmp x1, #0
354
+ ; CHECK-NEXT: eor x11, x3, x10
355
+ ; CHECK-NEXT: csel x8, x8, x1, lt
356
+ ; CHECK-NEXT: csel x9, x9, x0, lt
357
+ ; CHECK-NEXT: cset w13, lt
358
+ ; CHECK-NEXT: subs x12, x12, x10
359
+ ; CHECK-NEXT: sbc x10, x11, x10
360
+ ; CHECK-NEXT: cmp x3, #0
361
+ ; CHECK-NEXT: csel x11, x12, x2, lt
362
+ ; CHECK-NEXT: csel x10, x10, x3, lt
363
+ ; CHECK-NEXT: umulh x12, x9, x11
364
+ ; CHECK-NEXT: mul x15, x8, x10
365
+ ; CHECK-NEXT: madd x8, x8, x11, x12
366
+ ; CHECK-NEXT: cset w12, lt
367
+ ; CHECK-NEXT: mul x14, x9, x11
368
+ ; CHECK-NEXT: mul x11, x9, x10
369
+ ; CHECK-NEXT: umulh x9, x9, x10
370
+ ; CHECK-NEXT: eor w10, w12, w13
371
+ ; CHECK-NEXT: b .LBB5_6
372
+ ; CHECK-NEXT: .LBB5_5: // %overflow.no.rhs.only
373
+ ; CHECK-NEXT: asr x8, x3, #63
276
374
; CHECK-NEXT: asr x10, x1, #63
277
- ; CHECK-NEXT: umulh x11, x0, x2
278
- ; CHECK-NEXT: asr x14, x3, #63
279
- ; CHECK-NEXT: mov x8, x1
280
- ; CHECK-NEXT: mul x12, x1, x2
281
- ; CHECK-NEXT: umulh x9, x1, x2
282
- ; CHECK-NEXT: mul x10, x10, x2
283
- ; CHECK-NEXT: adds x11, x12, x11
284
- ; CHECK-NEXT: mul x15, x0, x3
285
- ; CHECK-NEXT: umulh x13, x0, x3
286
- ; CHECK-NEXT: adc x9, x9, x10
287
- ; CHECK-NEXT: mul x14, x0, x14
288
- ; CHECK-NEXT: mul x16, x1, x3
289
- ; CHECK-NEXT: adds x1, x15, x11
290
- ; CHECK-NEXT: asr x11, x9, #63
291
- ; CHECK-NEXT: smulh x8, x8, x3
292
- ; CHECK-NEXT: adc x10, x13, x14
293
- ; CHECK-NEXT: asr x12, x10, #63
294
- ; CHECK-NEXT: adds x9, x9, x10
295
- ; CHECK-NEXT: adc x10, x11, x12
296
- ; CHECK-NEXT: adds x9, x16, x9
297
- ; CHECK-NEXT: asr x11, x1, #63
298
- ; CHECK-NEXT: adc x8, x8, x10
299
- ; CHECK-NEXT: eor x8, x8, x11
300
- ; CHECK-NEXT: eor x9, x9, x11
301
- ; CHECK-NEXT: orr x8, x9, x8
302
- ; CHECK-NEXT: cbz x8, .LBB5_2
303
- ; CHECK-NEXT: // %bb.1: // %if.then
375
+ ; CHECK-NEXT: eor x9, x2, x8
376
+ ; CHECK-NEXT: eor x11, x3, x8
377
+ ; CHECK-NEXT: eor x12, x0, x10
378
+ ; CHECK-NEXT: subs x9, x9, x8
379
+ ; CHECK-NEXT: sbc x8, x11, x8
380
+ ; CHECK-NEXT: cmp x3, #0
381
+ ; CHECK-NEXT: eor x11, x1, x10
382
+ ; CHECK-NEXT: csel x8, x8, x3, lt
383
+ ; CHECK-NEXT: csel x9, x9, x2, lt
384
+ ; CHECK-NEXT: cset w13, lt
385
+ ; CHECK-NEXT: subs x12, x12, x10
386
+ ; CHECK-NEXT: sbc x10, x11, x10
387
+ ; CHECK-NEXT: cmp x1, #0
388
+ ; CHECK-NEXT: csel x11, x12, x0, lt
389
+ ; CHECK-NEXT: csel x10, x10, x1, lt
390
+ ; CHECK-NEXT: umulh x12, x9, x11
391
+ ; CHECK-NEXT: mul x14, x9, x11
392
+ ; CHECK-NEXT: mul x15, x8, x10
393
+ ; CHECK-NEXT: madd x8, x8, x11, x12
394
+ ; CHECK-NEXT: cset w12, lt
395
+ ; CHECK-NEXT: mul x11, x9, x10
396
+ ; CHECK-NEXT: umulh x9, x9, x10
397
+ ; CHECK-NEXT: eor w10, w13, w12
398
+ ; CHECK-NEXT: .LBB5_6: // %overflow.res
399
+ ; CHECK-NEXT: sbfx x12, x10, #0, #1
400
+ ; CHECK-NEXT: adds x8, x8, x11
401
+ ; CHECK-NEXT: adc x9, x9, x15
402
+ ; CHECK-NEXT: eor x13, x14, x12
403
+ ; CHECK-NEXT: eor x8, x8, x12
404
+ ; CHECK-NEXT: add x0, x13, x10
405
+ ; CHECK-NEXT: cmp x0, x10
406
+ ; CHECK-NEXT: cset w10, lo
407
+ ; CHECK-NEXT: cinc x1, x8, lo
408
+ ; CHECK-NEXT: eor x8, x9, x12
409
+ ; CHECK-NEXT: cmp x1, x10
410
+ ; CHECK-NEXT: cinc x8, x8, lo
411
+ ; CHECK-NEXT: cmp x8, #0
412
+ ; CHECK-NEXT: .LBB5_7: // %overflow.res
413
+ ; CHECK-NEXT: cset w8, ne
414
+ ; CHECK-NEXT: tbz w8, #0, .LBB5_9
415
+ ; CHECK-NEXT: .LBB5_8: // %if.then
304
416
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
305
417
; CHECK-NEXT: .cfi_def_cfa_offset 16
306
418
; CHECK-NEXT: .cfi_offset w30, -16
@@ -309,10 +421,16 @@ define i128 @test_smul_i128(i128 noundef %x, i128 noundef %y) {
309
421
; CHECK-NEXT: sxtw x0, w0
310
422
; CHECK-NEXT: asr x1, x0, #63
311
423
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
424
+ ; CHECK-NEXT: .LBB5_9: // %cleanup
312
425
; CHECK-NEXT: ret
313
- ; CHECK-NEXT: .LBB5_2: // %if.end
426
+ ; CHECK-NEXT: .LBB5_10: // %overflow.no
427
+ ; CHECK-NEXT: umulh x8, x0, x2
428
+ ; CHECK-NEXT: madd x8, x0, x3, x8
314
429
; CHECK-NEXT: mul x0, x0, x2
315
- ; CHECK-NEXT: ret
430
+ ; CHECK-NEXT: madd x1, x1, x2, x8
431
+ ; CHECK-NEXT: mov w8, wzr
432
+ ; CHECK-NEXT: tbnz w8, #0, .LBB5_8
433
+ ; CHECK-NEXT: b .LBB5_9
316
434
entry:
317
435
%0 = tail call { i128 , i1 } @llvm.smul.with.overflow.i128 (i128 %x , i128 %y )
318
436
%1 = extractvalue { i128 , i1 } %0 , 1
0 commit comments