diff --git a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp index 3090ad313b90d..c3bf3fc7500e4 100644 --- a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp +++ b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp @@ -1163,14 +1163,13 @@ bool X86InstructionSelector::selectUAddSub(MachineInstr &I, I.getOpcode() == TargetOpcode::G_USUBO) && "unexpected instruction"); - const Register DstReg = I.getOperand(0).getReg(); - const Register CarryOutReg = I.getOperand(1).getReg(); - const Register Op0Reg = I.getOperand(2).getReg(); - const Register Op1Reg = I.getOperand(3).getReg(); - bool IsSub = I.getOpcode() == TargetOpcode::G_USUBE || - I.getOpcode() == TargetOpcode::G_USUBO; - bool HasCarryIn = I.getOpcode() == TargetOpcode::G_UADDE || - I.getOpcode() == TargetOpcode::G_USUBE; + auto &CarryMI = cast(I); + + const Register DstReg = CarryMI.getDstReg(); + const Register CarryOutReg = CarryMI.getCarryOutReg(); + const Register Op0Reg = CarryMI.getLHSReg(); + const Register Op1Reg = CarryMI.getRHSReg(); + bool IsSub = CarryMI.isSub(); const LLT DstTy = MRI.getType(DstReg); assert(DstTy.isScalar() && "selectUAddSub only supported for scalar types"); @@ -1206,14 +1205,15 @@ bool X86InstructionSelector::selectUAddSub(MachineInstr &I, llvm_unreachable("selectUAddSub unsupported type."); } - const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); - const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB); + const RegisterBank &CarryRB = *RBI.getRegBank(CarryOutReg, MRI, TRI); + const TargetRegisterClass *CarryRC = + getRegClass(MRI.getType(CarryOutReg), CarryRB); unsigned Opcode = IsSub ? OpSUB : OpADD; // G_UADDE/G_USUBE - find CarryIn def instruction. - if (HasCarryIn) { - Register CarryInReg = I.getOperand(4).getReg(); + if (auto CarryInMI = dyn_cast(&I)) { + Register CarryInReg = CarryInMI->getCarryInReg(); MachineInstr *Def = MRI.getVRegDef(CarryInReg); while (Def->getOpcode() == TargetOpcode::G_TRUNC) { CarryInReg = Def->getOperand(1).getReg(); @@ -1226,11 +1226,12 @@ bool X86InstructionSelector::selectUAddSub(MachineInstr &I, Def->getOpcode() == TargetOpcode::G_USUBE || Def->getOpcode() == TargetOpcode::G_USUBO) { // carry set by prev ADD/SUB. - BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), - X86::EFLAGS) - .addReg(CarryInReg); - if (!RBI.constrainGenericRegister(CarryInReg, *DstRC, MRI)) + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::CMP8ri)) + .addReg(CarryInReg) + .addImm(1); + + if (!RBI.constrainGenericRegister(CarryInReg, *CarryRC, MRI)) return false; Opcode = IsSub ? OpSBB : OpADC; @@ -1249,11 +1250,11 @@ bool X86InstructionSelector::selectUAddSub(MachineInstr &I, .addReg(Op0Reg) .addReg(Op1Reg); - BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), CarryOutReg) - .addReg(X86::EFLAGS); + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::SETCCr), CarryOutReg) + .addImm(X86::COND_B); if (!constrainSelectedInstRegOperands(Inst, TII, TRI, RBI) || - !RBI.constrainGenericRegister(CarryOutReg, *DstRC, MRI)) + !RBI.constrainGenericRegister(CarryOutReg, *CarryRC, MRI)) return false; I.eraseFromParent(); diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp index 7fe58539cd4ec..f47e097514464 100644 --- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp +++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp @@ -167,11 +167,11 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, .scalarize(0); getActionDefinitionsBuilder({G_UADDE, G_UADDO, G_USUBE, G_USUBO}) - .legalFor({{s8, s1}, {s16, s1}, {s32, s1}}) - .legalFor(Is64Bit, {{s64, s1}}) + .legalFor({{s8, s8}, {s16, s8}, {s32, s8}}) + .legalFor(Is64Bit, {{s64, s8}}) .widenScalarToNextPow2(0, /*Min=*/32) .clampScalar(0, s8, sMaxScalar) - .clampScalar(1, s1, s1) + .clampScalar(1, s8, s8) .scalarize(0); // integer multiply diff --git a/llvm/test/CodeGen/X86/GlobalISel/add-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/add-scalar.ll index 7bde1b7a7a8be..7cdfd519f04dc 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/add-scalar.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/add-scalar.ll @@ -7,12 +7,15 @@ define i128 @test_add_i128(i128 %arg1, i128 %arg2) nounwind { ; X64: # %bb.0: ; X64-NEXT: movq %rdx, %rax ; X64-NEXT: addq %rdi, %rax +; X64-NEXT: setb %dl +; X64-NEXT: cmpb $1, %dl ; X64-NEXT: adcq %rsi, %rcx ; X64-NEXT: movq %rcx, %rdx ; X64-NEXT: retq ; ; X86-LABEL: test_add_i128: ; X86: # %bb.0: +; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -21,8 +24,14 @@ define i128 @test_add_i128(i128 %arg1, i128 %arg2) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: setb %bl +; X86-NEXT: cmpb $1, %bl ; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx +; X86-NEXT: setb %bl +; X86-NEXT: cmpb $1, %bl ; X86-NEXT: adcl {{[0-9]+}}(%esp), %esi +; X86-NEXT: setb %bl +; X86-NEXT: cmpb $1, %bl ; X86-NEXT: adcl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl %ecx, (%eax) ; X86-NEXT: movl %edx, 4(%eax) @@ -30,6 +39,7 @@ define i128 @test_add_i128(i128 %arg1, i128 %arg2) nounwind { ; X86-NEXT: movl %edi, 12(%eax) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx ; X86-NEXT: retl %ret = add i128 %arg1, %arg2 ret i128 %ret @@ -46,6 +56,8 @@ define i64 @test_add_i64(i64 %arg1, i64 %arg2) { ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: addl {{[0-9]+}}(%esp), %eax +; X86-NEXT: setb %cl +; X86-NEXT: cmpb $1, %cl ; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx ; X86-NEXT: retl %ret = add i64 %arg1, %arg2 diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-add.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-add.mir index ec9db781b1bc2..dae2ad6f3bb45 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-add.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-add.mir @@ -157,8 +157,8 @@ body: | ; X86: [[COPY:%[0-9]+]]:_(s64) = COPY $rdx ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s8) = G_UADDO [[UV]], [[UV2]] + ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s8) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) ; X86-NEXT: $rax = COPY [[MV]](s64) ; X86-NEXT: RET 0 @@ -192,8 +192,8 @@ body: | ; X86-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64) ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s8) = G_UADDO [[UV]], [[UV2]] + ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s8) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) ; X86-NEXT: $rax = COPY [[MV]](s64) ; X86-NEXT: RET 0 @@ -219,8 +219,8 @@ body: | ; X64-NEXT: [[DEF1:%[0-9]+]]:_(s128) = IMPLICIT_DEF ; X64-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](s128) ; X64-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF1]](s128) - ; X64-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; X64-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; X64-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s8) = G_UADDO [[UV]], [[UV2]] + ; X64-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s8) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] ; X64-NEXT: $rax = COPY [[UADDO]](s64) ; X64-NEXT: $rdx = COPY [[UADDE]](s64) ; X64-NEXT: RET 0 @@ -230,10 +230,10 @@ body: | ; X86-NEXT: [[DEF1:%[0-9]+]]:_(s128) = IMPLICIT_DEF ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s128) ; X86-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s128) - ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV4]] - ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV5]], [[UADDO1]] - ; X86-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV2]], [[UV6]], [[UADDE1]] - ; X86-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV3]], [[UV7]], [[UADDE3]] + ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s8) = G_UADDO [[UV]], [[UV4]] + ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s8) = G_UADDE [[UV1]], [[UV5]], [[UADDO1]] + ; X86-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s8) = G_UADDE [[UV2]], [[UV6]], [[UADDE1]] + ; X86-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s8) = G_UADDE [[UV3]], [[UV7]], [[UADDE3]] ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) ; X86-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE2]](s32), [[UADDE4]](s32) ; X86-NEXT: $rax = COPY [[MV]](s64) diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir index 19fe5b84c73ce..470a30fd36b62 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir @@ -25,6 +25,7 @@ body: | ; X64-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[CTLZ]], [[C1]] ; X64-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C]] ; X64-NEXT: RET 0, implicit [[AND1]](s64) + ; ; X86-LABEL: name: test_ctlz35 ; X86: [[COPY:%[0-9]+]]:_(s64) = COPY $rdx ; X86-NEXT: [[TRUNC:%[0-9]+]]:_(s35) = G_TRUNC [[COPY]](s64) @@ -46,12 +47,15 @@ body: | ; X86-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C]](s32) ; X86-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64) ; X86-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV1]](s64) - ; X86-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]] - ; X86-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] + ; X86-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s8) = G_USUBO [[UV6]], [[UV8]] + ; X86-NEXT: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[USUBO1]](s8) + ; X86-NEXT: [[ZEXT2:%[0-9]+]]:_(s8) = G_ZEXT [[TRUNC1]](s1) + ; X86-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s8) = G_USUBE [[UV7]], [[UV9]], [[ZEXT2]] + ; X86-NEXT: [[TRUNC2:%[0-9]+]]:_(s1) = G_TRUNC [[USUBE1]](s8) ; X86-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; X86-NEXT: [[TRUNC1:%[0-9]+]]:_(s35) = G_TRUNC [[MV2]](s64) - ; X86-NEXT: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC1]](s35) - ; X86-NEXT: RET 0, implicit [[ZEXT2]](s64) + ; X86-NEXT: [[TRUNC3:%[0-9]+]]:_(s35) = G_TRUNC [[MV2]](s64) + ; X86-NEXT: [[ZEXT3:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC3]](s35) + ; X86-NEXT: RET 0, implicit [[ZEXT3]](s64) %0(s64) = COPY $rdx %1:_(s35) = G_TRUNC %0(s64) %2:_(s35) = G_CTLZ %1 @@ -97,6 +101,7 @@ body: | ; X64-NEXT: [[CTLZ:%[0-9]+]]:_(s64) = G_CTLZ [[DEF]](s64) ; X64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[CTLZ]](s64) ; X64-NEXT: RET 0, implicit [[COPY]](s64) + ; ; X86-LABEL: name: test_ctlz64 ; X86: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64) diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-sub.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-sub.mir index ee2b9eefcb01a..ac3bf331373c9 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-sub.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-sub.mir @@ -157,8 +157,8 @@ body: | ; X86: [[COPY:%[0-9]+]]:_(s64) = COPY $rdx ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; X86-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; X86-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; X86-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s8) = G_USUBO [[UV]], [[UV2]] + ; X86-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s8) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) ; X86-NEXT: $rax = COPY [[MV]](s64) ; X86-NEXT: RET 0 @@ -192,8 +192,8 @@ body: | ; X86-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64) ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; X86-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; X86-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; X86-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s8) = G_USUBO [[UV]], [[UV2]] + ; X86-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s8) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) ; X86-NEXT: $rax = COPY [[MV]](s64) ; X86-NEXT: RET 0 @@ -219,8 +219,8 @@ body: | ; X64-NEXT: [[DEF1:%[0-9]+]]:_(s128) = IMPLICIT_DEF ; X64-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](s128) ; X64-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF1]](s128) - ; X64-NEXT: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; X64-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; X64-NEXT: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s8) = G_USUBO [[UV]], [[UV2]] + ; X64-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s8) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] ; X64-NEXT: $rax = COPY [[USUBO]](s64) ; X64-NEXT: $rdx = COPY [[USUBE]](s64) ; X64-NEXT: RET 0 @@ -230,10 +230,10 @@ body: | ; X86-NEXT: [[DEF1:%[0-9]+]]:_(s128) = IMPLICIT_DEF ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s128) ; X86-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s128) - ; X86-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV4]] - ; X86-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV5]], [[USUBO1]] - ; X86-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV2]], [[UV6]], [[USUBE1]] - ; X86-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV7]], [[USUBE3]] + ; X86-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s8) = G_USUBO [[UV]], [[UV4]] + ; X86-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s8) = G_USUBE [[UV1]], [[UV5]], [[USUBO1]] + ; X86-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s8) = G_USUBE [[UV2]], [[UV6]], [[USUBE1]] + ; X86-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s8) = G_USUBE [[UV3]], [[UV7]], [[USUBE3]] ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) ; X86-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBE2]](s32), [[USUBE4]](s32) ; X86-NEXT: $rax = COPY [[MV]](s64) diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros-undef.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros-undef.mir index 9807d13e3235a..57e729fb03ab6 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros-undef.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros-undef.mir @@ -32,8 +32,8 @@ body: | ; X86-NEXT: [[ICMP:%[0-9]+]]:_(s8) = G_ICMP intpred(eq), [[OR]](s32), [[C]] ; X86-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR1]](s32) ; X86-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[CTTZ_ZERO_UNDEF]], [[C2]] - ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDO1]] + ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s8) = G_UADDO [[CTTZ_ZERO_UNDEF]], [[C2]] + ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s8) = G_UADDE [[C]], [[C]], [[UADDO1]] ; X86-NEXT: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32) ; X86-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) ; X86-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 @@ -97,8 +97,8 @@ body: | ; X86-NEXT: [[ICMP:%[0-9]+]]:_(s8) = G_ICMP intpred(eq), [[UV]](s32), [[C]] ; X86-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[UV1]](s32) ; X86-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[CTTZ_ZERO_UNDEF]], [[C1]] - ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDO1]] + ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s8) = G_UADDO [[CTTZ_ZERO_UNDEF]], [[C1]] + ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s8) = G_UADDE [[C]], [[C]], [[UADDO1]] ; X86-NEXT: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[UV]](s32) ; X86-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) ; X86-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros.mir index e2d10423dbec5..f5d847776ec06 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros.mir @@ -32,8 +32,8 @@ body: | ; X86-NEXT: [[ICMP:%[0-9]+]]:_(s8) = G_ICMP intpred(eq), [[OR]](s32), [[C]] ; X86-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR1]](s32) ; X86-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[CTTZ_ZERO_UNDEF]], [[C2]] - ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDO1]] + ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s8) = G_UADDO [[CTTZ_ZERO_UNDEF]], [[C2]] + ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s8) = G_UADDE [[C]], [[C]], [[UADDO1]] ; X86-NEXT: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32) ; X86-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) ; X86-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 @@ -99,8 +99,8 @@ body: | ; X86-NEXT: [[ICMP:%[0-9]+]]:_(s8) = G_ICMP intpred(eq), [[UV]](s32), [[C]] ; X86-NEXT: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[UV1]](s32) ; X86-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[CTTZ]], [[C1]] - ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDO1]] + ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s8) = G_UADDO [[CTTZ]], [[C1]] + ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s8) = G_UADDE [[C]], [[C]], [[UADDO1]] ; X86-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[UV]](s32) ; X86-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) ; X86-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 diff --git a/llvm/test/CodeGen/X86/GlobalISel/pr49087.ll b/llvm/test/CodeGen/X86/GlobalISel/pr49087.ll new file mode 100644 index 0000000000000..41d890bcc3c0b --- /dev/null +++ b/llvm/test/CodeGen/X86/GlobalISel/pr49087.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -o - -global-isel -global-isel-abort=1 < %s 2>&1 | FileCheck %s + +define i32 @test_01(ptr %p, i64 %len, i32 %x) { +; CHECK-LABEL: test_01: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_1: # %loop +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: subq %rax, %rsi +; CHECK-NEXT: setb %cl +; CHECK-NEXT: testb $1, %cl +; CHECK-NEXT: jne .LBB0_4 +; CHECK-NEXT: # %bb.2: # %backedge +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: imulq $4, %rsi, %rcx +; CHECK-NEXT: addq %rdi, %rcx +; CHECK-NEXT: cmpl %edx, (%rcx) +; CHECK-NEXT: sete %cl +; CHECK-NEXT: testb $1, %cl +; CHECK-NEXT: je .LBB0_1 +; CHECK-NEXT: # %bb.3: # %failure +; CHECK-NEXT: .LBB0_4: # %exit +; CHECK-NEXT: movl $-1, %eax +; CHECK-NEXT: retq + +entry: + %scevgep = getelementptr i32, ptr %p, i64 -1 + br label %loop + +loop: ; preds = %backedge, %entry + %iv = phi i64 [ %iv.next, %backedge ], [ %len, %entry ] + %iv.next = add i64 %iv, -1 + %cond_1 = icmp eq i64 %iv, 0 + br i1 %cond_1, label %exit, label %backedge + +backedge: ; preds = %loop + %scevgep1 = getelementptr i32, ptr %scevgep, i64 %iv + %loaded = load atomic i32, ptr %scevgep1 unordered, align 4 + %cond_2 = icmp eq i32 %loaded, %x + br i1 %cond_2, label %failure, label %loop + +exit: ; preds = %loop + ret i32 -1 + +failure: + unreachable +} + diff --git a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir index 8eac3eaf36145..76680ac750625 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir @@ -29,8 +29,8 @@ body: | bb.0 (%ir-block.0): %0(s32) = IMPLICIT_DEF %1(s32) = IMPLICIT_DEF - %2(s1) = IMPLICIT_DEF - %3(s32), %4(s1) = G_UADDE %0, %1, %2 + %2(s8) = IMPLICIT_DEF + %3(s32), %4(s8) = G_UADDE %0, %1, %2 RET 0 ... diff --git a/llvm/test/CodeGen/X86/GlobalISel/select-add-x32.mir b/llvm/test/CodeGen/X86/GlobalISel/select-add-x32.mir index 773813f19cddb..b85180fb467eb 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/select-add-x32.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/select-add-x32.mir @@ -27,25 +27,24 @@ body: | bb.0 (%ir-block.0): ; X32-LABEL: name: test_add_i64 ; X32: [[DEF:%[0-9]+]]:gr32 = IMPLICIT_DEF - ; X32: [[DEF1:%[0-9]+]]:gr32 = IMPLICIT_DEF - ; X32: [[DEF2:%[0-9]+]]:gr32 = IMPLICIT_DEF - ; X32: [[DEF3:%[0-9]+]]:gr32 = IMPLICIT_DEF - ; X32: [[ADD32rr:%[0-9]+]]:gr32 = ADD32rr [[DEF]], [[DEF2]], implicit-def $eflags - ; X32: [[COPY:%[0-9]+]]:gr32 = COPY $eflags - ; X32: $eflags = COPY [[COPY]] - ; X32: [[ADC32rr:%[0-9]+]]:gr32 = ADC32rr [[DEF1]], [[DEF3]], implicit-def $eflags, implicit $eflags - ; X32: [[COPY1:%[0-9]+]]:gr32 = COPY $eflags - ; X32: $eax = COPY [[ADD32rr]] - ; X32: $edx = COPY [[ADC32rr]] - ; X32: RET 0, implicit $eax, implicit $edx + ; X32-NEXT: [[DEF1:%[0-9]+]]:gr32 = IMPLICIT_DEF + ; X32-NEXT: [[DEF2:%[0-9]+]]:gr32 = IMPLICIT_DEF + ; X32-NEXT: [[DEF3:%[0-9]+]]:gr32 = IMPLICIT_DEF + ; X32-NEXT: [[ADD32rr:%[0-9]+]]:gr32 = ADD32rr [[DEF]], [[DEF2]], implicit-def $eflags + ; X32-NEXT: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 2, implicit $eflags + ; X32-NEXT: CMP8ri [[SETCCr]], 1, implicit-def $eflags + ; X32-NEXT: [[ADC32rr:%[0-9]+]]:gr32 = ADC32rr [[DEF1]], [[DEF3]], implicit-def $eflags, implicit $eflags + ; X32-NEXT: [[SETCCr1:%[0-9]+]]:gr8 = SETCCr 2, implicit $eflags + ; X32-NEXT: $eax = COPY [[ADD32rr]] + ; X32-NEXT: $edx = COPY [[ADC32rr]] + ; X32-NEXT: RET 0, implicit $eax, implicit $edx %0(s32) = IMPLICIT_DEF %1(s32) = IMPLICIT_DEF %2(s32) = IMPLICIT_DEF %3(s32) = IMPLICIT_DEF %9(s8) = G_CONSTANT i8 0 - %4(s1) = G_TRUNC %9(s8) - %5(s32), %6(s1) = G_UADDE %0, %2, %4 - %7(s32), %8(s1) = G_UADDE %1, %3, %6 + %5(s32), %6(s8) = G_UADDE %0, %2, %9 + %7(s32), %8(s8) = G_UADDE %1, %3, %6 $eax = COPY %5(s32) $edx = COPY %7(s32) RET 0, implicit $eax, implicit $edx diff --git a/llvm/test/CodeGen/X86/GlobalISel/select-get-carry-bit.ll b/llvm/test/CodeGen/X86/GlobalISel/select-get-carry-bit.ll new file mode 100644 index 0000000000000..0cf1372fed497 --- /dev/null +++ b/llvm/test/CodeGen/X86/GlobalISel/select-get-carry-bit.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel=1 -global-isel-abort=1 | FileCheck %s + +; Issue #120029 +define i16 @use_carry_bit(i16 %2) { +; CHECK-LABEL: use_carry_bit: +; CHECK: # %bb.0: +; CHECK-NEXT: movw $1, %ax +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: addw %di, %ax +; CHECK-NEXT: setb %cl +; CHECK-NEXT: andl $1, %ecx +; CHECK-NEXT: cmovnew %di, %ax +; CHECK-NEXT: retq + %uadd = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 %2, i16 1) + %res = extractvalue { i16, i1 } %uadd, 0 + %carry = extractvalue { i16, i1 } %uadd, 1 + %ret = select i1 %carry, i16 %2, i16 %res + ret i16 %ret +} + diff --git a/llvm/test/CodeGen/X86/GlobalISel/sub-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/sub-scalar.ll index 7a035f5e4ad4d..be75d7c3810e2 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/sub-scalar.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/sub-scalar.ll @@ -7,12 +7,15 @@ define i128 @test_sub_i128(i128 %arg1, i128 %arg2) nounwind { ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: subq %rdx, %rax +; X64-NEXT: setb %dl +; X64-NEXT: cmpb $1, %dl ; X64-NEXT: sbbq %rcx, %rsi ; X64-NEXT: movq %rsi, %rdx ; X64-NEXT: retq ; ; X86-LABEL: test_sub_i128: ; X86: # %bb.0: +; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -21,8 +24,14 @@ define i128 @test_sub_i128(i128 %arg1, i128 %arg2) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: setb %bl +; X86-NEXT: cmpb $1, %bl ; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx +; X86-NEXT: setb %bl +; X86-NEXT: cmpb $1, %bl ; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi +; X86-NEXT: setb %bl +; X86-NEXT: cmpb $1, %bl ; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl %ecx, (%eax) ; X86-NEXT: movl %edx, 4(%eax) @@ -30,6 +39,7 @@ define i128 @test_sub_i128(i128 %arg1, i128 %arg2) nounwind { ; X86-NEXT: movl %edi, 12(%eax) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx ; X86-NEXT: retl %ret = sub i128 %arg1, %arg2 ret i128 %ret @@ -47,6 +57,8 @@ define i64 @test_sub_i64(i64 %arg1, i64 %arg2) { ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: subl {{[0-9]+}}(%esp), %eax +; X86-NEXT: setb %cl +; X86-NEXT: cmpb $1, %cl ; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx ; X86-NEXT: retl %ret = sub i64 %arg1, %arg2 diff --git a/llvm/test/CodeGen/X86/pr49087.ll b/llvm/test/CodeGen/X86/pr49087.ll deleted file mode 100644 index 1a29222466385..0000000000000 --- a/llvm/test/CodeGen/X86/pr49087.ll +++ /dev/null @@ -1,30 +0,0 @@ -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -o - -global-isel < %s 2>&1 | FileCheck %s -; REQUIRES: asserts -; XFAIL: * - -define i32 @test_01(ptr %p, i64 %len, i32 %x) { -; CHECK-LABEL: test_01 - -entry: - %scevgep = getelementptr i32, ptr %p, i64 -1 - br label %loop - -loop: ; preds = %backedge, %entry - %iv = phi i64 [ %iv.next, %backedge ], [ %len, %entry ] - %iv.next = add i64 %iv, -1 - %cond_1 = icmp eq i64 %iv, 0 - br i1 %cond_1, label %exit, label %backedge - -backedge: ; preds = %loop - %scevgep1 = getelementptr i32, ptr %scevgep, i64 %iv - %loaded = load atomic i32, ptr %scevgep1 unordered, align 4 - %cond_2 = icmp eq i32 %loaded, %x - br i1 %cond_2, label %failure, label %loop - -exit: ; preds = %loop - ret i32 -1 - -failure: - unreachable -} -