-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[ARM] Move BFI creation to ISELDAGToDAG #152200
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Draft
AZero13
wants to merge
2
commits into
llvm:main
Choose a base branch
from
AZero13:idag
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Draft
+199
−183
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This prevents a bunch of extraneous bfi nodes that do more harm than good, as can be seen in select-imm.ll
@llvm/pr-subscribers-backend-arm Author: AZero13 (AZero13) ChangesThis prevents a bunch of extraneous bfi nodes that do more harm than good, as can be seen in select-imm.ll Full diff: https://github.com/llvm/llvm-project/pull/152200.diff 3 Files Affected:
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 9ad46df159c20..68285f83c58c0 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -303,6 +303,7 @@ class ARMDAGToDAGISel : public SelectionDAGISel {
/// Try to select SBFX/UBFX instructions for ARM.
bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
+ bool tryV6T2BitfieldInsertOp(SDNode *N);
bool tryInsertVectorElt(SDNode *N);
@@ -3323,6 +3324,165 @@ bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) {
N, N, LHS.getOpcode() == ISD::UINT_TO_FP, true);
}
+bool ARMDAGToDAGISel::tryV6T2BitfieldInsertOp(SDNode *N) {
+ if (!Subtarget->hasV6T2Ops())
+ return false;
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i32)
+ return false;
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDLoc DL(N);
+
+ unsigned MaskImm;
+ // Must be a single use AND with an immediate operand.
+ if (!N0.hasOneUse() ||
+ !isOpcWithIntImmediate(N0.getNode(), ISD::AND, MaskImm))
+ return false;
+
+ // If the mask is 0xffff, we can do better
+ // via a movt instruction, so don't use BFI in that case.
+ if (MaskImm == 0xffff)
+ return false;
+
+ SDValue N00 = N0.getOperand(0);
+ unsigned Mask = MaskImm;
+
+ // Case (1): or (and A, mask), val => ARMbfi A, val, mask
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (N1C) {
+ unsigned Val = N1C->getZExtValue();
+ if ((Val & ~Mask) != Val)
+ return false;
+
+ if (ARM::isBitFieldInvertedMask(Mask)) {
+ Val >>= llvm::countr_zero(~Mask);
+
+ // Materialize the constant to be inserted
+ unsigned MovOpc = Subtarget->isThumb() ? ARM::t2MOVi32imm : ARM::MOVi32imm;
+ SDNode *ValNode = CurDAG->getMachineNode(
+ MovOpc, DL, VT, CurDAG->getTargetConstant(Val, DL, MVT::i32));
+
+ SDValue Ops[] = {N00, SDValue(ValNode, 0),
+ CurDAG->getTargetConstant(Mask, DL, MVT::i32),
+ getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32)};
+ unsigned Opc = Subtarget->isThumb() ? ARM::t2BFI : ARM::BFI;
+ CurDAG->SelectNodeTo(N, Opc, VT, Ops);
+ return true;
+ }
+ } else if (N1.getOpcode() == ISD::AND) {
+ // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
+ ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+ if (!N11C)
+ return false;
+ unsigned Mask2 = N11C->getZExtValue();
+
+ // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
+ // as is to match.
+ if (ARM::isBitFieldInvertedMask(Mask) && (Mask == ~Mask2)) {
+ // The pack halfword instruction works better for masks that fit it,
+ // so use that when it's available.
+ if (Subtarget->hasDSP() && (Mask == 0xffff || Mask == 0xffff0000))
+ return false;
+ // 2a
+ unsigned amt = llvm::countr_zero(Mask2);
+
+ // Create machine shift instruction
+ SDNode *Shift;
+ if (Subtarget->isThumb()) {
+ // For Thumb2, use t2LSRri: Rm, imm, pred, pred_reg, cc_out
+ SDValue ShiftOps[] = {
+ N1.getOperand(0), CurDAG->getTargetConstant(amt, DL, MVT::i32),
+ getAL(CurDAG, DL), // predicate
+ CurDAG->getRegister(0, MVT::i32), // pred_reg
+ CurDAG->getRegister(0, MVT::i32) // cc_out
+ };
+ Shift = CurDAG->getMachineNode(ARM::t2LSRri, DL, VT, ShiftOps);
+ } else {
+ // For ARM mode, use MOVsi: Rm, so_reg_imm, pred, pred_reg, cc_out
+ SDValue ShiftOps[] = {
+ N1.getOperand(0),
+ CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsr, amt), DL,
+ MVT::i32),
+ getAL(CurDAG, DL), // predicate
+ CurDAG->getRegister(0, MVT::i32), // pred_reg
+ CurDAG->getRegister(0, MVT::i32) // cc_out
+ };
+ Shift = CurDAG->getMachineNode(ARM::MOVsi, DL, VT, ShiftOps);
+ }
+
+ SDValue Ops[] = {N00, SDValue(Shift, 0),
+ CurDAG->getTargetConstant(Mask, DL, MVT::i32),
+ getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32)};
+ unsigned Opc = Subtarget->isThumb() ? ARM::t2BFI : ARM::BFI;
+ CurDAG->SelectNodeTo(N, Opc, VT, Ops);
+ return true;
+ } else if (ARM::isBitFieldInvertedMask(~Mask) && (~Mask == Mask2)) {
+ // The pack halfword instruction works better for masks that fit it,
+ // so use that when it's available.
+ if (Subtarget->hasDSP() && (Mask2 == 0xffff || Mask2 == 0xffff0000))
+ return false;
+ // 2b
+ unsigned lsb = llvm::countr_zero(Mask);
+
+ // Create machine shift instruction
+ SDNode *Shift;
+ if (Subtarget->isThumb()) {
+ // For Thumb2, use t2LSRri: Rm, imm, pred, pred_reg, cc_out
+ SDValue ShiftOps[] = {
+ N00, CurDAG->getTargetConstant(lsb, DL, MVT::i32),
+ getAL(CurDAG, DL), // predicate
+ CurDAG->getRegister(0, MVT::i32), // pred_reg
+ CurDAG->getRegister(0, MVT::i32) // cc_out
+ };
+ Shift = CurDAG->getMachineNode(ARM::t2LSRri, DL, VT, ShiftOps);
+ } else {
+ // For ARM mode, use MOVsi: Rm, so_reg_imm, pred, pred_reg, cc_out
+ SDValue ShiftOps[] = {
+ N00,
+ CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsr, lsb), DL,
+ MVT::i32),
+ getAL(CurDAG, DL), // predicate
+ CurDAG->getRegister(0, MVT::i32), // pred_reg
+ CurDAG->getRegister(0, MVT::i32) // cc_out
+ };
+ Shift = CurDAG->getMachineNode(ARM::MOVsi, DL, VT, ShiftOps);
+ }
+
+ SDValue Ops[] = {N1.getOperand(0), SDValue(Shift, 0),
+ CurDAG->getTargetConstant(Mask2, DL, MVT::i32),
+ getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32)};
+ unsigned Opc = Subtarget->isThumb() ? ARM::t2BFI : ARM::BFI;
+ CurDAG->SelectNodeTo(N, Opc, VT, Ops);
+ return true;
+ }
+ }
+
+ // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
+ // where lsb(mask) == #shamt and masked bits of B are known zero.
+ if (CurDAG->MaskedValueIsZero(N1, APInt(32, Mask)) &&
+ N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
+ ARM::isBitFieldInvertedMask(~Mask)) {
+ SDValue ShAmt = N00.getOperand(1);
+ unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue();
+ unsigned LSB = llvm::countr_zero(Mask);
+ if (ShAmtC != LSB)
+ return false;
+
+ SDValue Ops[] = {N1, N00.getOperand(0),
+ CurDAG->getTargetConstant(~Mask, DL, MVT::i32),
+ getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32)};
+
+ unsigned Opc = Subtarget->isThumb() ? ARM::t2BFI : ARM::BFI;
+ CurDAG->SelectNodeTo(N, Opc, VT, Ops);
+ return true;
+ }
+
+ return false;
+}
+
bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
if (!Subtarget->hasV6T2Ops())
return false;
@@ -3833,6 +3993,11 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
}
}
break;
+ case ISD::OR: {
+ if (tryV6T2BitfieldInsertOp(N))
+ return;
+ break;
+ }
case ISD::AND: {
// Check for unsigned bitfield extract
if (tryV6T2BitfieldExtractOp(N, false))
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 7f8b4460bb814..5058a35930be3 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -14579,132 +14579,6 @@ static SDValue PerformORCombineToSMULWBT(SDNode *OR,
return SDValue(OR, 0);
}
-static SDValue PerformORCombineToBFI(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- const ARMSubtarget *Subtarget) {
- // BFI is only available on V6T2+
- if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
- return SDValue();
-
- EVT VT = N->getValueType(0);
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
- SelectionDAG &DAG = DCI.DAG;
- SDLoc DL(N);
- // 1) or (and A, mask), val => ARMbfi A, val, mask
- // iff (val & mask) == val
- //
- // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
- // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
- // && mask == ~mask2
- // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
- // && ~mask == mask2
- // (i.e., copy a bitfield value into another bitfield of the same width)
-
- if (VT != MVT::i32)
- return SDValue();
-
- SDValue N00 = N0.getOperand(0);
-
- // The value and the mask need to be constants so we can verify this is
- // actually a bitfield set. If the mask is 0xffff, we can do better
- // via a movt instruction, so don't use BFI in that case.
- SDValue MaskOp = N0.getOperand(1);
- ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);
- if (!MaskC)
- return SDValue();
- unsigned Mask = MaskC->getZExtValue();
- if (Mask == 0xffff)
- return SDValue();
- SDValue Res;
- // Case (1): or (and A, mask), val => ARMbfi A, val, mask
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- if (N1C) {
- unsigned Val = N1C->getZExtValue();
- if ((Val & ~Mask) != Val)
- return SDValue();
-
- if (ARM::isBitFieldInvertedMask(Mask)) {
- Val >>= llvm::countr_zero(~Mask);
-
- Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
- DAG.getConstant(Val, DL, MVT::i32),
- DAG.getConstant(Mask, DL, MVT::i32));
-
- DCI.CombineTo(N, Res, false);
- // Return value from the original node to inform the combiner than N is
- // now dead.
- return SDValue(N, 0);
- }
- } else if (N1.getOpcode() == ISD::AND) {
- // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
- ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
- if (!N11C)
- return SDValue();
- unsigned Mask2 = N11C->getZExtValue();
-
- // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
- // as is to match.
- if (ARM::isBitFieldInvertedMask(Mask) &&
- (Mask == ~Mask2)) {
- // The pack halfword instruction works better for masks that fit it,
- // so use that when it's available.
- if (Subtarget->hasDSP() &&
- (Mask == 0xffff || Mask == 0xffff0000))
- return SDValue();
- // 2a
- unsigned amt = llvm::countr_zero(Mask2);
- Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
- DAG.getConstant(amt, DL, MVT::i32));
- Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
- DAG.getConstant(Mask, DL, MVT::i32));
- DCI.CombineTo(N, Res, false);
- // Return value from the original node to inform the combiner than N is
- // now dead.
- return SDValue(N, 0);
- } else if (ARM::isBitFieldInvertedMask(~Mask) &&
- (~Mask == Mask2)) {
- // The pack halfword instruction works better for masks that fit it,
- // so use that when it's available.
- if (Subtarget->hasDSP() &&
- (Mask2 == 0xffff || Mask2 == 0xffff0000))
- return SDValue();
- // 2b
- unsigned lsb = llvm::countr_zero(Mask);
- Res = DAG.getNode(ISD::SRL, DL, VT, N00,
- DAG.getConstant(lsb, DL, MVT::i32));
- Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
- DAG.getConstant(Mask2, DL, MVT::i32));
- DCI.CombineTo(N, Res, false);
- // Return value from the original node to inform the combiner than N is
- // now dead.
- return SDValue(N, 0);
- }
- }
-
- if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
- N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
- ARM::isBitFieldInvertedMask(~Mask)) {
- // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
- // where lsb(mask) == #shamt and masked bits of B are known zero.
- SDValue ShAmt = N00.getOperand(1);
- unsigned ShAmtC = ShAmt->getAsZExtVal();
- unsigned LSB = llvm::countr_zero(Mask);
- if (ShAmtC != LSB)
- return SDValue();
-
- Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
- DAG.getConstant(~Mask, DL, MVT::i32));
-
- DCI.CombineTo(N, Res, false);
- // Return value from the original node to inform the combiner than N is
- // now dead.
- return SDValue(N, 0);
- }
-
- return SDValue();
-}
-
static bool isValidMVECond(unsigned CC, bool IsFloat) {
switch (CC) {
case ARMCC::EQ:
@@ -14849,13 +14723,6 @@ static SDValue PerformORCombine(SDNode *N,
}
}
- // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
- // reasonable.
- if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
- if (SDValue Res = PerformORCombineToBFI(N, DCI, Subtarget))
- return Res;
- }
-
if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
return Result;
diff --git a/llvm/test/CodeGen/ARM/select-imm.ll b/llvm/test/CodeGen/ARM/select-imm.ll
index 186276b50ceeb..b2e400f4ce923 100644
--- a/llvm/test/CodeGen/ARM/select-imm.ll
+++ b/llvm/test/CodeGen/ARM/select-imm.ll
@@ -696,27 +696,14 @@ define i1 @t11() {
; ARMT2: @ %bb.0: @ %entry
; ARMT2-NEXT: .pad #4
; ARMT2-NEXT: sub sp, sp, #4
-; ARMT2-NEXT: ldr r1, [sp]
-; ARMT2-NEXT: mov r0, #33
-; ARMT2-NEXT: movw r2, #39322
-; ARMT2-NEXT: movt r2, #6553
-; ARMT2-NEXT: bfi r1, r0, #0, #12
-; ARMT2-NEXT: mov r0, #10
-; ARMT2-NEXT: bfi r1, r0, #12, #13
-; ARMT2-NEXT: mov r0, r1
-; ARMT2-NEXT: bfc r0, #12, #20
-; ARMT2-NEXT: umull r2, r3, r0, r2
-; ARMT2-NEXT: add r2, r3, r3, lsl #2
-; ARMT2-NEXT: sub r0, r0, r2, lsl #1
-; ARMT2-NEXT: movw r2, #40960
-; ARMT2-NEXT: movt r2, #65024
-; ARMT2-NEXT: and r1, r1, r2
-; ARMT2-NEXT: orr r0, r1, r0
+; ARMT2-NEXT: ldr r0, [sp]
+; ARMT2-NEXT: movw r1, #40960
+; ARMT2-NEXT: movt r1, #65024
+; ARMT2-NEXT: orr r0, r0, #40960
+; ARMT2-NEXT: and r0, r0, r1
+; ARMT2-NEXT: orr r0, r0, #3
; ARMT2-NEXT: str r0, [sp]
-; ARMT2-NEXT: and r0, r0, #15
-; ARMT2-NEXT: sub r0, r0, #3
-; ARMT2-NEXT: clz r0, r0
-; ARMT2-NEXT: lsr r0, r0, #5
+; ARMT2-NEXT: mov r0, #1
; ARMT2-NEXT: add sp, sp, #4
; ARMT2-NEXT: bx lr
;
@@ -758,27 +745,14 @@ define i1 @t11() {
; THUMB2: @ %bb.0: @ %entry
; THUMB2-NEXT: .pad #4
; THUMB2-NEXT: sub sp, #4
-; THUMB2-NEXT: ldr r1, [sp]
-; THUMB2-NEXT: movs r0, #33
-; THUMB2-NEXT: movw r2, #39322
-; THUMB2-NEXT: bfi r1, r0, #0, #12
-; THUMB2-NEXT: movs r0, #10
-; THUMB2-NEXT: bfi r1, r0, #12, #13
-; THUMB2-NEXT: mov r0, r1
-; THUMB2-NEXT: movt r2, #6553
-; THUMB2-NEXT: bfc r0, #12, #20
-; THUMB2-NEXT: umull r2, r3, r0, r2
-; THUMB2-NEXT: add.w r2, r3, r3, lsl #2
-; THUMB2-NEXT: sub.w r0, r0, r2, lsl #1
-; THUMB2-NEXT: movw r2, #40960
-; THUMB2-NEXT: movt r2, #65024
-; THUMB2-NEXT: ands r1, r2
-; THUMB2-NEXT: orrs r0, r1
+; THUMB2-NEXT: ldr r0, [sp]
+; THUMB2-NEXT: movw r1, #40960
+; THUMB2-NEXT: movt r1, #65024
+; THUMB2-NEXT: orr r0, r0, #40960
+; THUMB2-NEXT: ands r0, r1
+; THUMB2-NEXT: adds r0, #3
; THUMB2-NEXT: str r0, [sp]
-; THUMB2-NEXT: and r0, r0, #15
-; THUMB2-NEXT: subs r0, #3
-; THUMB2-NEXT: clz r0, r0
-; THUMB2-NEXT: lsrs r0, r0, #5
+; THUMB2-NEXT: movs r0, #1
; THUMB2-NEXT: add sp, #4
; THUMB2-NEXT: bx lr
;
|
You can test this locally with the following command:git-clang-format --diff HEAD~1 HEAD --extensions cpp -- llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp llvm/lib/Target/ARM/ARMISelLowering.cpp View the diff from clang-format here.diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 68285f83c..66d9d93e0 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -3361,7 +3361,8 @@ bool ARMDAGToDAGISel::tryV6T2BitfieldInsertOp(SDNode *N) {
Val >>= llvm::countr_zero(~Mask);
// Materialize the constant to be inserted
- unsigned MovOpc = Subtarget->isThumb() ? ARM::t2MOVi32imm : ARM::MOVi32imm;
+ unsigned MovOpc =
+ Subtarget->isThumb() ? ARM::t2MOVi32imm : ARM::MOVi32imm;
SDNode *ValNode = CurDAG->getMachineNode(
MovOpc, DL, VT, CurDAG->getTargetConstant(Val, DL, MVT::i32));
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
This prevents a bunch of extraneous bfi nodes that do more harm than good, as can be seen in select-imm.ll