@@ -802,6 +802,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
802
802
setOperationAction(ISD::BSWAP, VT, Expand);
803
803
}
804
804
805
+ if (!Subtarget->isThumb1Only() && !Subtarget->hasV8_1MMainlineOps())
806
+ setOperationAction(ISD::SCMP, MVT::i32, Custom);
807
+
808
+ if (!Subtarget->hasV8_1MMainlineOps())
809
+ setOperationAction(ISD::UCMP, MVT::i32, Custom);
810
+
805
811
setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
806
812
setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
807
813
@@ -1628,6 +1634,10 @@ bool ARMTargetLowering::useSoftFloat() const {
1628
1634
return Subtarget->useSoftFloat();
1629
1635
}
1630
1636
1637
+ bool ARMTargetLowering::shouldExpandCmpUsingSelects(EVT VT) const {
1638
+ return !Subtarget->isThumb1Only() && VT.getSizeInBits() <= 32;
1639
+ }
1640
+
1631
1641
// FIXME: It might make sense to define the representative register class as the
1632
1642
// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1633
1643
// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
@@ -10614,6 +10624,134 @@ SDValue ARMTargetLowering::LowerFP_TO_BF16(SDValue Op,
10614
10624
return DAG.getBitcast(MVT::i32, Res);
10615
10625
}
10616
10626
10627
+ SDValue ARMTargetLowering::LowerCMP(SDValue Op, SelectionDAG &DAG) const {
10628
+ SDLoc dl(Op);
10629
+ SDValue LHS = Op.getOperand(0);
10630
+ SDValue RHS = Op.getOperand(1);
10631
+
10632
+ // Determine if this is signed or unsigned comparison
10633
+ bool IsSigned = (Op.getOpcode() == ISD::SCMP);
10634
+
10635
+ // Special case for Thumb1 UCMP only
10636
+ if (!IsSigned && Subtarget->isThumb1Only()) {
10637
+ // For Thumb unsigned comparison, use this sequence:
10638
+ // subs r2, r0, r1 ; r2 = LHS - RHS, sets flags
10639
+ // sbc r2, r2 ; r2 = r2 - r2 - !carry
10640
+ // cmp r1, r0 ; compare RHS with LHS
10641
+ // sbc r1, r1 ; r1 = r1 - r1 - !carry
10642
+ // subs r0, r2, r1 ; r0 = r2 - r1 (final result)
10643
+
10644
+ // First subtraction: LHS - RHS
10645
+ SDValue Sub1WithFlags = DAG.getNode(
10646
+ ARMISD::SUBC, dl, DAG.getVTList(MVT::i32, FlagsVT), LHS, RHS);
10647
+ SDValue Sub1Result = Sub1WithFlags.getValue(0);
10648
+ SDValue Flags1 = Sub1WithFlags.getValue(1);
10649
+
10650
+ // SUBE: Sub1Result - Sub1Result - !carry
10651
+ // This gives 0 if LHS >= RHS (unsigned), -1 if LHS < RHS (unsigned)
10652
+ SDValue Sbc1 =
10653
+ DAG.getNode(ARMISD::SUBE, dl, DAG.getVTList(MVT::i32, FlagsVT),
10654
+ Sub1Result, Sub1Result, Flags1);
10655
+ SDValue Sbc1Result = Sbc1.getValue(0);
10656
+
10657
+ // Second comparison: RHS vs LHS (reverse comparison)
10658
+ SDValue CmpFlags = DAG.getNode(ARMISD::CMP, dl, FlagsVT, RHS, LHS);
10659
+
10660
+ // SUBE: RHS - RHS - !carry
10661
+ // This gives 0 if RHS <= LHS (unsigned), -1 if RHS > LHS (unsigned)
10662
+ SDValue Sbc2 = DAG.getNode(
10663
+ ARMISD::SUBE, dl, DAG.getVTList(MVT::i32, FlagsVT), RHS, RHS, CmpFlags);
10664
+ SDValue Sbc2Result = Sbc2.getValue(0);
10665
+
10666
+ // Final subtraction: Sbc1Result - Sbc2Result (no flags needed)
10667
+ SDValue Result =
10668
+ DAG.getNode(ISD::SUB, dl, MVT::i32, Sbc1Result, Sbc2Result);
10669
+ if (Op.getValueType() != MVT::i32)
10670
+ Result = DAG.getSExtOrTrunc(Result, dl, Op.getValueType());
10671
+
10672
+ return Result;
10673
+ }
10674
+
10675
+ // For the ARM assembly pattern:
10676
+ // subs r0, r0, r1 ; subtract RHS from LHS and set flags
10677
+ // movgt r0, #1 ; if LHS > RHS, set result to 1 (GT for signed, HI for
10678
+ // unsigned) mvnlt r0, #0 ; if LHS < RHS, set result to -1 (LT for
10679
+ // signed, LO for unsigned)
10680
+ // ; if LHS == RHS, result remains 0 from the subs
10681
+
10682
+ // Optimization: if RHS is a subtraction against 0, use ADDC instead of SUBC
10683
+ SDValue AddOperand;
10684
+ unsigned Opcode = ARMISD::SUBC;
10685
+
10686
+ // Check if RHS is a subtraction against 0: (0 - X)
10687
+ if (RHS.getOpcode() == ISD::SUB) {
10688
+ SDValue SubLHS = RHS.getOperand(0);
10689
+ SDValue SubRHS = RHS.getOperand(1);
10690
+
10691
+ // Check if it's 0 - X
10692
+ if (isNullConstant(SubLHS)) {
10693
+ bool CanUseAdd = false;
10694
+ if (IsSigned) {
10695
+ // For SCMP: only if X is known to never be INT_MIN (to avoid overflow)
10696
+ if (RHS->getFlags().hasNoSignedWrap() || !DAG.computeKnownBits(SubRHS)
10697
+ .getSignedMinValue()
10698
+ .isMinSignedValue()) {
10699
+ CanUseAdd = true;
10700
+ }
10701
+ } else {
10702
+ // For UCMP: only if X is known to never be zero
10703
+ if (DAG.isKnownNeverZero(SubRHS)) {
10704
+ CanUseAdd = true;
10705
+ }
10706
+ }
10707
+
10708
+ if (CanUseAdd) {
10709
+ Opcode = ARMISD::ADDC;
10710
+ AddOperand = SubRHS; // Replace RHS with X, so we do LHS + X instead of
10711
+ // LHS - (0 - X)
10712
+ }
10713
+ }
10714
+ }
10715
+
10716
+ // Generate the operation with flags
10717
+ SDValue OpWithFlags;
10718
+ if (Opcode == ARMISD::ADDC) {
10719
+ // Use ADDC: LHS + AddOperand (where RHS was 0 - AddOperand)
10720
+ OpWithFlags = DAG.getNode(
10721
+ ARMISD::ADDC, dl, DAG.getVTList(MVT::i32, FlagsVT), LHS, AddOperand);
10722
+ } else {
10723
+ // Use ARMISD::SUBC to generate SUBS instruction (subtract with flags)
10724
+ OpWithFlags = DAG.getNode(ARMISD::SUBC, dl,
10725
+ DAG.getVTList(MVT::i32, FlagsVT), LHS, RHS);
10726
+ }
10727
+
10728
+ SDValue OpResult = OpWithFlags.getValue(0); // The operation result
10729
+ SDValue Flags = OpWithFlags.getValue(1); // The flags
10730
+
10731
+ // Constants for conditional moves
10732
+ SDValue One = DAG.getConstant(1, dl, MVT::i32);
10733
+ SDValue MinusOne = DAG.getAllOnesConstant(dl, MVT::i32);
10734
+
10735
+ // Select condition codes based on signed vs unsigned
10736
+ ARMCC::CondCodes GTCond = IsSigned ? ARMCC::GT : ARMCC::HI;
10737
+ ARMCC::CondCodes LTCond = IsSigned ? ARMCC::LT : ARMCC::LO;
10738
+
10739
+ // First conditional move: if greater than, set to 1
10740
+ SDValue GTCondValue = DAG.getConstant(GTCond, dl, MVT::i32);
10741
+ SDValue Result1 = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, OpResult, One,
10742
+ GTCondValue, Flags);
10743
+
10744
+ // Second conditional move: if less than, set to -1
10745
+ SDValue LTCondValue = DAG.getConstant(LTCond, dl, MVT::i32);
10746
+ SDValue Result2 = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, Result1, MinusOne,
10747
+ LTCondValue, Flags);
10748
+
10749
+ if (Op.getValueType() != MVT::i32)
10750
+ Result2 = DAG.getSExtOrTrunc(Result2, dl, Op.getValueType());
10751
+
10752
+ return Result2;
10753
+ }
10754
+
10617
10755
SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
10618
10756
LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump());
10619
10757
switch (Op.getOpcode()) {
@@ -10742,6 +10880,9 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
10742
10880
case ISD::FP_TO_BF16:
10743
10881
return LowerFP_TO_BF16(Op, DAG);
10744
10882
case ARMISD::WIN__DBZCHK: return SDValue();
10883
+ case ISD::UCMP:
10884
+ case ISD::SCMP:
10885
+ return LowerCMP(Op, DAG);
10745
10886
}
10746
10887
}
10747
10888
0 commit comments