Skip to content

Commit c4748ea

Browse files
committed
[DAGCombiner] Fold setcc of trunc, generalizing some NVPTX isel logic
1 parent 324773e commit c4748ea

File tree

5 files changed

+401
-103
lines changed

5 files changed

+401
-103
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "llvm/CodeGen/Analysis.h"
1818
#include "llvm/CodeGen/CallingConvLower.h"
1919
#include "llvm/CodeGen/CodeGenCommonISel.h"
20+
#include "llvm/CodeGen/ISDOpcodes.h"
2021
#include "llvm/CodeGen/MachineFrameInfo.h"
2122
#include "llvm/CodeGen/MachineFunction.h"
2223
#include "llvm/CodeGen/MachineJumpTableInfo.h"
@@ -5125,6 +5126,20 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
51255126
Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE);
51265127
}
51275128

5129+
// fold (setcc (trunc x) c) -> (setcc x c)
5130+
if (N0.getOpcode() == ISD::TRUNCATE &&
5131+
((N0->getFlags().hasNoUnsignedWrap() && !ISD::isSignedIntSetCC(Cond)) ||
5132+
(N0->getFlags().hasNoSignedWrap() &&
5133+
!ISD::isUnsignedIntSetCC(Cond))) &&
5134+
isTypeDesirableForOp(ISD::SETCC, N0.getOperand(0).getValueType())) {
5135+
EVT NewVT = N0.getOperand(0).getValueType();
5136+
SDValue NewConst = DAG.getConstant(ISD::isSignedIntSetCC(Cond)
5137+
? C1.sext(NewVT.getSizeInBits())
5138+
: C1.zext(NewVT.getSizeInBits()),
5139+
dl, NewVT);
5140+
return DAG.getSetCC(dl, VT, N0.getOperand(0), NewConst, Cond);
5141+
}
5142+
51285143
if (SDValue V =
51295144
optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
51305145
return V;
@@ -5646,6 +5661,17 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
56465661
return N0;
56475662
}
56485663

5664+
// Fold (setcc (trunc x) (trunc y)) -> (setcc x y)
5665+
if (N0.getOpcode() == ISD::TRUNCATE && N1.getOpcode() == ISD::TRUNCATE &&
5666+
N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
5667+
((!ISD::isSignedIntSetCC(Cond) && N0->getFlags().hasNoUnsignedWrap() &&
5668+
N1->getFlags().hasNoUnsignedWrap()) ||
5669+
(!ISD::isUnsignedIntSetCC(Cond) && N0->getFlags().hasNoSignedWrap() &&
5670+
N1->getFlags().hasNoSignedWrap())) &&
5671+
isTypeDesirableForOp(ISD::SETCC, N0.getOperand(0).getValueType())) {
5672+
return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5673+
}
5674+
56495675
// Could not fold it.
56505676
return SDValue();
56515677
}

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 2 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1714,40 +1714,16 @@ def cond_signed : PatLeaf<(cond), [{
17141714
return isSignedIntSetCC(N->get());
17151715
}]>;
17161716

1717-
def cond_not_signed : PatLeaf<(cond), [{
1718-
return !isSignedIntSetCC(N->get());
1719-
}]>;
1720-
17211717
// comparisons of i8 extracted with PRMT as i32
17221718
// It's faster to do comparison directly on i32 extracted by PRMT,
17231719
// instead of the long conversion and sign extending.
1724-
def: Pat<(setcc (i16 (sext_inreg (i16 (trunc (prmt i32:$a, 0, byte_extract_prmt:$sel_a, PrmtNONE))), i8)),
1725-
(i16 (sext_inreg (i16 (trunc (prmt i32:$b, 0, byte_extract_prmt:$sel_b, PrmtNONE))), i8)),
1726-
cond_signed:$cc),
1727-
(SETP_i32rr (PRMT_B32rii i32:$a, 0, byte_extract_prmt:$sel_a, PrmtNONE),
1728-
(PRMT_B32rii i32:$b, 0, byte_extract_prmt:$sel_b, PrmtNONE),
1729-
(cond2cc $cc))>;
1730-
17311720
def: Pat<(setcc (i16 (sext_inreg (trunc (prmt i32:$a, 0, byte_extract_prmt:$sel_a, PrmtNONE)), i8)),
17321721
(i16 (sext_inreg (trunc (prmt i32:$b, 0, byte_extract_prmt:$sel_b, PrmtNONE)), i8)),
17331722
cond_signed:$cc),
1734-
(SETP_i32rr (PRMT_B32rii i32:$a, 0, byte_extract_prmt:$sel_a, PrmtNONE),
1735-
(PRMT_B32rii i32:$b, 0, byte_extract_prmt:$sel_b, PrmtNONE),
1723+
(SETP_i32rr (PRMT_B32rii i32:$a, 0, (to_sign_extend_selector $sel_a), PrmtNONE),
1724+
(PRMT_B32rii i32:$b, 0, (to_sign_extend_selector $sel_b), PrmtNONE),
17361725
(cond2cc $cc))>;
17371726

1738-
def: Pat<(setcc (i16 (trunc (prmt i32:$a, 0, byte_extract_prmt:$sel_a, PrmtNONE))),
1739-
(i16 (trunc (prmt i32:$b, 0, byte_extract_prmt:$sel_b, PrmtNONE))),
1740-
cond_signed:$cc),
1741-
(SETP_i32rr (PRMT_B32rii i32:$a, 0, byte_extract_prmt:$sel_a, PrmtNONE),
1742-
(PRMT_B32rii i32:$b, 0, byte_extract_prmt:$sel_b, PrmtNONE),
1743-
(cond2cc $cc))>;
1744-
1745-
def: Pat<(setcc (i16 (trunc (prmt i32:$a, 0, byte_extract_prmt:$sel_a, PrmtNONE))),
1746-
(i16 (trunc (prmt i32:$b, 0, byte_extract_prmt:$sel_b, PrmtNONE))),
1747-
cond_not_signed:$cc),
1748-
(SETP_i32rr (PRMT_B32rii i32:$a, 0, byte_extract_prmt:$sel_a, PrmtNONE),
1749-
(PRMT_B32rii i32:$b, 0, byte_extract_prmt:$sel_b, PrmtNONE),
1750-
(cond2cc $cc))>;
17511727

17521728
def SDTDeclareArrayParam :
17531729
SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>;

llvm/test/CodeGen/NVPTX/i8x4-instructions.ll

Lines changed: 100 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -343,61 +343,77 @@ define <4 x i8> @test_smax(<4 x i8> %a, <4 x i8> %b) #0 {
343343
; O0-LABEL: test_smax(
344344
; O0: {
345345
; O0-NEXT: .reg .pred %p<5>;
346-
; O0-NEXT: .reg .b32 %r<18>;
346+
; O0-NEXT: .reg .b32 %r<26>;
347347
; O0-EMPTY:
348348
; O0-NEXT: // %bb.0:
349349
; O0-NEXT: ld.param.b32 %r2, [test_smax_param_1];
350350
; O0-NEXT: ld.param.b32 %r1, [test_smax_param_0];
351-
; O0-NEXT: prmt.b32 %r3, %r2, 0, 0x7770U;
352-
; O0-NEXT: prmt.b32 %r4, %r1, 0, 0x7770U;
351+
; O0-NEXT: prmt.b32 %r3, %r2, 0, 0x8880U;
352+
; O0-NEXT: prmt.b32 %r4, %r1, 0, 0x8880U;
353353
; O0-NEXT: setp.gt.s32 %p1, %r4, %r3;
354-
; O0-NEXT: prmt.b32 %r5, %r2, 0, 0x7771U;
355-
; O0-NEXT: prmt.b32 %r6, %r1, 0, 0x7771U;
354+
; O0-NEXT: prmt.b32 %r5, %r2, 0, 0x9991U;
355+
; O0-NEXT: prmt.b32 %r6, %r1, 0, 0x9991U;
356356
; O0-NEXT: setp.gt.s32 %p2, %r6, %r5;
357-
; O0-NEXT: prmt.b32 %r7, %r2, 0, 0x7772U;
358-
; O0-NEXT: prmt.b32 %r8, %r1, 0, 0x7772U;
357+
; O0-NEXT: prmt.b32 %r7, %r2, 0, 0xaaa2U;
358+
; O0-NEXT: prmt.b32 %r8, %r1, 0, 0xaaa2U;
359359
; O0-NEXT: setp.gt.s32 %p3, %r8, %r7;
360-
; O0-NEXT: prmt.b32 %r9, %r2, 0, 0x7773U;
361-
; O0-NEXT: prmt.b32 %r10, %r1, 0, 0x7773U;
360+
; O0-NEXT: prmt.b32 %r9, %r2, 0, 0xbbb3U;
361+
; O0-NEXT: prmt.b32 %r10, %r1, 0, 0xbbb3U;
362362
; O0-NEXT: setp.gt.s32 %p4, %r10, %r9;
363-
; O0-NEXT: selp.b32 %r11, %r10, %r9, %p4;
364-
; O0-NEXT: selp.b32 %r12, %r8, %r7, %p3;
365-
; O0-NEXT: prmt.b32 %r13, %r12, %r11, 0x3340U;
366-
; O0-NEXT: selp.b32 %r14, %r6, %r5, %p2;
367-
; O0-NEXT: selp.b32 %r15, %r4, %r3, %p1;
368-
; O0-NEXT: prmt.b32 %r16, %r15, %r14, 0x3340U;
369-
; O0-NEXT: prmt.b32 %r17, %r16, %r13, 0x5410U;
370-
; O0-NEXT: st.param.b32 [func_retval0], %r17;
363+
; O0-NEXT: prmt.b32 %r11, %r2, 0, 0x7770U;
364+
; O0-NEXT: prmt.b32 %r12, %r2, 0, 0x7771U;
365+
; O0-NEXT: prmt.b32 %r13, %r2, 0, 0x7772U;
366+
; O0-NEXT: prmt.b32 %r14, %r2, 0, 0x7773U;
367+
; O0-NEXT: prmt.b32 %r15, %r1, 0, 0x7773U;
368+
; O0-NEXT: selp.b32 %r16, %r15, %r14, %p4;
369+
; O0-NEXT: prmt.b32 %r17, %r1, 0, 0x7772U;
370+
; O0-NEXT: selp.b32 %r18, %r17, %r13, %p3;
371+
; O0-NEXT: prmt.b32 %r19, %r18, %r16, 0x3340U;
372+
; O0-NEXT: prmt.b32 %r20, %r1, 0, 0x7771U;
373+
; O0-NEXT: selp.b32 %r21, %r20, %r12, %p2;
374+
; O0-NEXT: prmt.b32 %r22, %r1, 0, 0x7770U;
375+
; O0-NEXT: selp.b32 %r23, %r22, %r11, %p1;
376+
; O0-NEXT: prmt.b32 %r24, %r23, %r21, 0x3340U;
377+
; O0-NEXT: prmt.b32 %r25, %r24, %r19, 0x5410U;
378+
; O0-NEXT: st.param.b32 [func_retval0], %r25;
371379
; O0-NEXT: ret;
372380
;
373381
; O3-LABEL: test_smax(
374382
; O3: {
375383
; O3-NEXT: .reg .pred %p<5>;
376-
; O3-NEXT: .reg .b32 %r<18>;
384+
; O3-NEXT: .reg .b32 %r<26>;
377385
; O3-EMPTY:
378386
; O3-NEXT: // %bb.0:
379387
; O3-NEXT: ld.param.b32 %r1, [test_smax_param_0];
380388
; O3-NEXT: ld.param.b32 %r2, [test_smax_param_1];
381-
; O3-NEXT: prmt.b32 %r3, %r2, 0, 0x7770U;
382-
; O3-NEXT: prmt.b32 %r4, %r1, 0, 0x7770U;
389+
; O3-NEXT: prmt.b32 %r3, %r2, 0, 0x8880U;
390+
; O3-NEXT: prmt.b32 %r4, %r1, 0, 0x8880U;
383391
; O3-NEXT: setp.gt.s32 %p1, %r4, %r3;
384-
; O3-NEXT: prmt.b32 %r5, %r2, 0, 0x7771U;
385-
; O3-NEXT: prmt.b32 %r6, %r1, 0, 0x7771U;
392+
; O3-NEXT: prmt.b32 %r5, %r2, 0, 0x9991U;
393+
; O3-NEXT: prmt.b32 %r6, %r1, 0, 0x9991U;
386394
; O3-NEXT: setp.gt.s32 %p2, %r6, %r5;
387-
; O3-NEXT: prmt.b32 %r7, %r2, 0, 0x7772U;
388-
; O3-NEXT: prmt.b32 %r8, %r1, 0, 0x7772U;
395+
; O3-NEXT: prmt.b32 %r7, %r2, 0, 0xaaa2U;
396+
; O3-NEXT: prmt.b32 %r8, %r1, 0, 0xaaa2U;
389397
; O3-NEXT: setp.gt.s32 %p3, %r8, %r7;
390-
; O3-NEXT: prmt.b32 %r9, %r2, 0, 0x7773U;
391-
; O3-NEXT: prmt.b32 %r10, %r1, 0, 0x7773U;
398+
; O3-NEXT: prmt.b32 %r9, %r2, 0, 0xbbb3U;
399+
; O3-NEXT: prmt.b32 %r10, %r1, 0, 0xbbb3U;
392400
; O3-NEXT: setp.gt.s32 %p4, %r10, %r9;
393-
; O3-NEXT: selp.b32 %r11, %r10, %r9, %p4;
394-
; O3-NEXT: selp.b32 %r12, %r8, %r7, %p3;
395-
; O3-NEXT: prmt.b32 %r13, %r12, %r11, 0x3340U;
396-
; O3-NEXT: selp.b32 %r14, %r6, %r5, %p2;
397-
; O3-NEXT: selp.b32 %r15, %r4, %r3, %p1;
398-
; O3-NEXT: prmt.b32 %r16, %r15, %r14, 0x3340U;
399-
; O3-NEXT: prmt.b32 %r17, %r16, %r13, 0x5410U;
400-
; O3-NEXT: st.param.b32 [func_retval0], %r17;
401+
; O3-NEXT: prmt.b32 %r11, %r2, 0, 0x7770U;
402+
; O3-NEXT: prmt.b32 %r12, %r2, 0, 0x7771U;
403+
; O3-NEXT: prmt.b32 %r13, %r2, 0, 0x7772U;
404+
; O3-NEXT: prmt.b32 %r14, %r2, 0, 0x7773U;
405+
; O3-NEXT: prmt.b32 %r15, %r1, 0, 0x7773U;
406+
; O3-NEXT: selp.b32 %r16, %r15, %r14, %p4;
407+
; O3-NEXT: prmt.b32 %r17, %r1, 0, 0x7772U;
408+
; O3-NEXT: selp.b32 %r18, %r17, %r13, %p3;
409+
; O3-NEXT: prmt.b32 %r19, %r18, %r16, 0x3340U;
410+
; O3-NEXT: prmt.b32 %r20, %r1, 0, 0x7771U;
411+
; O3-NEXT: selp.b32 %r21, %r20, %r12, %p2;
412+
; O3-NEXT: prmt.b32 %r22, %r1, 0, 0x7770U;
413+
; O3-NEXT: selp.b32 %r23, %r22, %r11, %p1;
414+
; O3-NEXT: prmt.b32 %r24, %r23, %r21, 0x3340U;
415+
; O3-NEXT: prmt.b32 %r25, %r24, %r19, 0x5410U;
416+
; O3-NEXT: st.param.b32 [func_retval0], %r25;
401417
; O3-NEXT: ret;
402418
%cmp = icmp sgt <4 x i8> %a, %b
403419
%r = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %b
@@ -473,61 +489,77 @@ define <4 x i8> @test_smin(<4 x i8> %a, <4 x i8> %b) #0 {
473489
; O0-LABEL: test_smin(
474490
; O0: {
475491
; O0-NEXT: .reg .pred %p<5>;
476-
; O0-NEXT: .reg .b32 %r<18>;
492+
; O0-NEXT: .reg .b32 %r<26>;
477493
; O0-EMPTY:
478494
; O0-NEXT: // %bb.0:
479495
; O0-NEXT: ld.param.b32 %r2, [test_smin_param_1];
480496
; O0-NEXT: ld.param.b32 %r1, [test_smin_param_0];
481-
; O0-NEXT: prmt.b32 %r3, %r2, 0, 0x7770U;
482-
; O0-NEXT: prmt.b32 %r4, %r1, 0, 0x7770U;
497+
; O0-NEXT: prmt.b32 %r3, %r2, 0, 0x8880U;
498+
; O0-NEXT: prmt.b32 %r4, %r1, 0, 0x8880U;
483499
; O0-NEXT: setp.le.s32 %p1, %r4, %r3;
484-
; O0-NEXT: prmt.b32 %r5, %r2, 0, 0x7771U;
485-
; O0-NEXT: prmt.b32 %r6, %r1, 0, 0x7771U;
500+
; O0-NEXT: prmt.b32 %r5, %r2, 0, 0x9991U;
501+
; O0-NEXT: prmt.b32 %r6, %r1, 0, 0x9991U;
486502
; O0-NEXT: setp.le.s32 %p2, %r6, %r5;
487-
; O0-NEXT: prmt.b32 %r7, %r2, 0, 0x7772U;
488-
; O0-NEXT: prmt.b32 %r8, %r1, 0, 0x7772U;
503+
; O0-NEXT: prmt.b32 %r7, %r2, 0, 0xaaa2U;
504+
; O0-NEXT: prmt.b32 %r8, %r1, 0, 0xaaa2U;
489505
; O0-NEXT: setp.le.s32 %p3, %r8, %r7;
490-
; O0-NEXT: prmt.b32 %r9, %r2, 0, 0x7773U;
491-
; O0-NEXT: prmt.b32 %r10, %r1, 0, 0x7773U;
506+
; O0-NEXT: prmt.b32 %r9, %r2, 0, 0xbbb3U;
507+
; O0-NEXT: prmt.b32 %r10, %r1, 0, 0xbbb3U;
492508
; O0-NEXT: setp.le.s32 %p4, %r10, %r9;
493-
; O0-NEXT: selp.b32 %r11, %r10, %r9, %p4;
494-
; O0-NEXT: selp.b32 %r12, %r8, %r7, %p3;
495-
; O0-NEXT: prmt.b32 %r13, %r12, %r11, 0x3340U;
496-
; O0-NEXT: selp.b32 %r14, %r6, %r5, %p2;
497-
; O0-NEXT: selp.b32 %r15, %r4, %r3, %p1;
498-
; O0-NEXT: prmt.b32 %r16, %r15, %r14, 0x3340U;
499-
; O0-NEXT: prmt.b32 %r17, %r16, %r13, 0x5410U;
500-
; O0-NEXT: st.param.b32 [func_retval0], %r17;
509+
; O0-NEXT: prmt.b32 %r11, %r2, 0, 0x7770U;
510+
; O0-NEXT: prmt.b32 %r12, %r2, 0, 0x7771U;
511+
; O0-NEXT: prmt.b32 %r13, %r2, 0, 0x7772U;
512+
; O0-NEXT: prmt.b32 %r14, %r2, 0, 0x7773U;
513+
; O0-NEXT: prmt.b32 %r15, %r1, 0, 0x7773U;
514+
; O0-NEXT: selp.b32 %r16, %r15, %r14, %p4;
515+
; O0-NEXT: prmt.b32 %r17, %r1, 0, 0x7772U;
516+
; O0-NEXT: selp.b32 %r18, %r17, %r13, %p3;
517+
; O0-NEXT: prmt.b32 %r19, %r18, %r16, 0x3340U;
518+
; O0-NEXT: prmt.b32 %r20, %r1, 0, 0x7771U;
519+
; O0-NEXT: selp.b32 %r21, %r20, %r12, %p2;
520+
; O0-NEXT: prmt.b32 %r22, %r1, 0, 0x7770U;
521+
; O0-NEXT: selp.b32 %r23, %r22, %r11, %p1;
522+
; O0-NEXT: prmt.b32 %r24, %r23, %r21, 0x3340U;
523+
; O0-NEXT: prmt.b32 %r25, %r24, %r19, 0x5410U;
524+
; O0-NEXT: st.param.b32 [func_retval0], %r25;
501525
; O0-NEXT: ret;
502526
;
503527
; O3-LABEL: test_smin(
504528
; O3: {
505529
; O3-NEXT: .reg .pred %p<5>;
506-
; O3-NEXT: .reg .b32 %r<18>;
530+
; O3-NEXT: .reg .b32 %r<26>;
507531
; O3-EMPTY:
508532
; O3-NEXT: // %bb.0:
509533
; O3-NEXT: ld.param.b32 %r1, [test_smin_param_0];
510534
; O3-NEXT: ld.param.b32 %r2, [test_smin_param_1];
511-
; O3-NEXT: prmt.b32 %r3, %r2, 0, 0x7770U;
512-
; O3-NEXT: prmt.b32 %r4, %r1, 0, 0x7770U;
535+
; O3-NEXT: prmt.b32 %r3, %r2, 0, 0x8880U;
536+
; O3-NEXT: prmt.b32 %r4, %r1, 0, 0x8880U;
513537
; O3-NEXT: setp.le.s32 %p1, %r4, %r3;
514-
; O3-NEXT: prmt.b32 %r5, %r2, 0, 0x7771U;
515-
; O3-NEXT: prmt.b32 %r6, %r1, 0, 0x7771U;
538+
; O3-NEXT: prmt.b32 %r5, %r2, 0, 0x9991U;
539+
; O3-NEXT: prmt.b32 %r6, %r1, 0, 0x9991U;
516540
; O3-NEXT: setp.le.s32 %p2, %r6, %r5;
517-
; O3-NEXT: prmt.b32 %r7, %r2, 0, 0x7772U;
518-
; O3-NEXT: prmt.b32 %r8, %r1, 0, 0x7772U;
541+
; O3-NEXT: prmt.b32 %r7, %r2, 0, 0xaaa2U;
542+
; O3-NEXT: prmt.b32 %r8, %r1, 0, 0xaaa2U;
519543
; O3-NEXT: setp.le.s32 %p3, %r8, %r7;
520-
; O3-NEXT: prmt.b32 %r9, %r2, 0, 0x7773U;
521-
; O3-NEXT: prmt.b32 %r10, %r1, 0, 0x7773U;
544+
; O3-NEXT: prmt.b32 %r9, %r2, 0, 0xbbb3U;
545+
; O3-NEXT: prmt.b32 %r10, %r1, 0, 0xbbb3U;
522546
; O3-NEXT: setp.le.s32 %p4, %r10, %r9;
523-
; O3-NEXT: selp.b32 %r11, %r10, %r9, %p4;
524-
; O3-NEXT: selp.b32 %r12, %r8, %r7, %p3;
525-
; O3-NEXT: prmt.b32 %r13, %r12, %r11, 0x3340U;
526-
; O3-NEXT: selp.b32 %r14, %r6, %r5, %p2;
527-
; O3-NEXT: selp.b32 %r15, %r4, %r3, %p1;
528-
; O3-NEXT: prmt.b32 %r16, %r15, %r14, 0x3340U;
529-
; O3-NEXT: prmt.b32 %r17, %r16, %r13, 0x5410U;
530-
; O3-NEXT: st.param.b32 [func_retval0], %r17;
547+
; O3-NEXT: prmt.b32 %r11, %r2, 0, 0x7770U;
548+
; O3-NEXT: prmt.b32 %r12, %r2, 0, 0x7771U;
549+
; O3-NEXT: prmt.b32 %r13, %r2, 0, 0x7772U;
550+
; O3-NEXT: prmt.b32 %r14, %r2, 0, 0x7773U;
551+
; O3-NEXT: prmt.b32 %r15, %r1, 0, 0x7773U;
552+
; O3-NEXT: selp.b32 %r16, %r15, %r14, %p4;
553+
; O3-NEXT: prmt.b32 %r17, %r1, 0, 0x7772U;
554+
; O3-NEXT: selp.b32 %r18, %r17, %r13, %p3;
555+
; O3-NEXT: prmt.b32 %r19, %r18, %r16, 0x3340U;
556+
; O3-NEXT: prmt.b32 %r20, %r1, 0, 0x7771U;
557+
; O3-NEXT: selp.b32 %r21, %r20, %r12, %p2;
558+
; O3-NEXT: prmt.b32 %r22, %r1, 0, 0x7770U;
559+
; O3-NEXT: selp.b32 %r23, %r22, %r11, %p1;
560+
; O3-NEXT: prmt.b32 %r24, %r23, %r21, 0x3340U;
561+
; O3-NEXT: prmt.b32 %r25, %r24, %r19, 0x5410U;
562+
; O3-NEXT: st.param.b32 [func_retval0], %r25;
531563
; O3-NEXT: ret;
532564
%cmp = icmp sle <4 x i8> %a, %b
533565
%r = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %b

llvm/test/CodeGen/NVPTX/sext-setcc.ll

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,25 +29,20 @@ define <4 x i8> @sext_setcc_v4i1_to_v4i8(ptr %p) {
2929
; CHECK-LABEL: sext_setcc_v4i1_to_v4i8(
3030
; CHECK: {
3131
; CHECK-NEXT: .reg .pred %p<5>;
32-
; CHECK-NEXT: .reg .b16 %rs<5>;
3332
; CHECK-NEXT: .reg .b32 %r<13>;
3433
; CHECK-NEXT: .reg .b64 %rd<2>;
3534
; CHECK-EMPTY:
3635
; CHECK-NEXT: // %bb.0: // %entry
3736
; CHECK-NEXT: ld.param.b64 %rd1, [sext_setcc_v4i1_to_v4i8_param_0];
3837
; CHECK-NEXT: ld.b32 %r1, [%rd1];
3938
; CHECK-NEXT: prmt.b32 %r2, %r1, 0, 0x7770U;
40-
; CHECK-NEXT: cvt.u16.u32 %rs1, %r2;
41-
; CHECK-NEXT: setp.eq.b16 %p1, %rs1, 0;
39+
; CHECK-NEXT: setp.eq.b32 %p1, %r2, 0;
4240
; CHECK-NEXT: prmt.b32 %r3, %r1, 0, 0x7771U;
43-
; CHECK-NEXT: cvt.u16.u32 %rs2, %r3;
44-
; CHECK-NEXT: setp.eq.b16 %p2, %rs2, 0;
41+
; CHECK-NEXT: setp.eq.b32 %p2, %r3, 0;
4542
; CHECK-NEXT: prmt.b32 %r4, %r1, 0, 0x7772U;
46-
; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
47-
; CHECK-NEXT: setp.eq.b16 %p3, %rs3, 0;
43+
; CHECK-NEXT: setp.eq.b32 %p3, %r4, 0;
4844
; CHECK-NEXT: prmt.b32 %r5, %r1, 0, 0x7773U;
49-
; CHECK-NEXT: cvt.u16.u32 %rs4, %r5;
50-
; CHECK-NEXT: setp.eq.b16 %p4, %rs4, 0;
45+
; CHECK-NEXT: setp.eq.b32 %p4, %r5, 0;
5146
; CHECK-NEXT: selp.b32 %r6, -1, 0, %p4;
5247
; CHECK-NEXT: selp.b32 %r7, -1, 0, %p3;
5348
; CHECK-NEXT: prmt.b32 %r8, %r7, %r6, 0x3340U;

0 commit comments

Comments
 (0)