Skip to content

[ARM] Move BFI creation to ISELDAGToDAG #152200

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
165 changes: 165 additions & 0 deletions llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ class ARMDAGToDAGISel : public SelectionDAGISel {

/// Try to select SBFX/UBFX instructions for ARM.
bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
bool tryV6T2BitfieldInsertOp(SDNode *N);

bool tryInsertVectorElt(SDNode *N);

Expand Down Expand Up @@ -3323,6 +3324,165 @@ bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) {
N, N, LHS.getOpcode() == ISD::UINT_TO_FP, true);
}

bool ARMDAGToDAGISel::tryV6T2BitfieldInsertOp(SDNode *N) {
if (!Subtarget->hasV6T2Ops())
return false;

EVT VT = N->getValueType(0);
if (VT != MVT::i32)
return false;

SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDLoc DL(N);

unsigned MaskImm;
// Must be a single use AND with an immediate operand.
if (!N0.hasOneUse() ||
!isOpcWithIntImmediate(N0.getNode(), ISD::AND, MaskImm))
return false;

// If the mask is 0xffff, we can do better
// via a movt instruction, so don't use BFI in that case.
if (MaskImm == 0xffff)
return false;

SDValue N00 = N0.getOperand(0);
unsigned Mask = MaskImm;

// Case (1): or (and A, mask), val => ARMbfi A, val, mask
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N1C) {
unsigned Val = N1C->getZExtValue();
if ((Val & ~Mask) != Val)
return false;

if (ARM::isBitFieldInvertedMask(Mask)) {
Val >>= llvm::countr_zero(~Mask);

// Materialize the constant to be inserted
unsigned MovOpc = Subtarget->isThumb() ? ARM::t2MOVi32imm : ARM::MOVi32imm;
SDNode *ValNode = CurDAG->getMachineNode(
MovOpc, DL, VT, CurDAG->getTargetConstant(Val, DL, MVT::i32));

SDValue Ops[] = {N00, SDValue(ValNode, 0),
CurDAG->getTargetConstant(Mask, DL, MVT::i32),
getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32)};
unsigned Opc = Subtarget->isThumb() ? ARM::t2BFI : ARM::BFI;
CurDAG->SelectNodeTo(N, Opc, VT, Ops);
return true;
}
} else if (N1.getOpcode() == ISD::AND) {
// case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
if (!N11C)
return false;
unsigned Mask2 = N11C->getZExtValue();

// Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
// as is to match.
if (ARM::isBitFieldInvertedMask(Mask) && (Mask == ~Mask2)) {
// The pack halfword instruction works better for masks that fit it,
// so use that when it's available.
if (Subtarget->hasDSP() && (Mask == 0xffff || Mask == 0xffff0000))
return false;
// 2a
unsigned amt = llvm::countr_zero(Mask2);

// Create machine shift instruction
SDNode *Shift;
if (Subtarget->isThumb()) {
// For Thumb2, use t2LSRri: Rm, imm, pred, pred_reg, cc_out
SDValue ShiftOps[] = {
N1.getOperand(0), CurDAG->getTargetConstant(amt, DL, MVT::i32),
getAL(CurDAG, DL), // predicate
CurDAG->getRegister(0, MVT::i32), // pred_reg
CurDAG->getRegister(0, MVT::i32) // cc_out
};
Shift = CurDAG->getMachineNode(ARM::t2LSRri, DL, VT, ShiftOps);
} else {
// For ARM mode, use MOVsi: Rm, so_reg_imm, pred, pred_reg, cc_out
SDValue ShiftOps[] = {
N1.getOperand(0),
CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsr, amt), DL,
MVT::i32),
getAL(CurDAG, DL), // predicate
CurDAG->getRegister(0, MVT::i32), // pred_reg
CurDAG->getRegister(0, MVT::i32) // cc_out
};
Shift = CurDAG->getMachineNode(ARM::MOVsi, DL, VT, ShiftOps);
}

SDValue Ops[] = {N00, SDValue(Shift, 0),
CurDAG->getTargetConstant(Mask, DL, MVT::i32),
getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32)};
unsigned Opc = Subtarget->isThumb() ? ARM::t2BFI : ARM::BFI;
CurDAG->SelectNodeTo(N, Opc, VT, Ops);
return true;
} else if (ARM::isBitFieldInvertedMask(~Mask) && (~Mask == Mask2)) {
// The pack halfword instruction works better for masks that fit it,
// so use that when it's available.
if (Subtarget->hasDSP() && (Mask2 == 0xffff || Mask2 == 0xffff0000))
return false;
// 2b
unsigned lsb = llvm::countr_zero(Mask);

// Create machine shift instruction
SDNode *Shift;
if (Subtarget->isThumb()) {
// For Thumb2, use t2LSRri: Rm, imm, pred, pred_reg, cc_out
SDValue ShiftOps[] = {
N00, CurDAG->getTargetConstant(lsb, DL, MVT::i32),
getAL(CurDAG, DL), // predicate
CurDAG->getRegister(0, MVT::i32), // pred_reg
CurDAG->getRegister(0, MVT::i32) // cc_out
};
Shift = CurDAG->getMachineNode(ARM::t2LSRri, DL, VT, ShiftOps);
} else {
// For ARM mode, use MOVsi: Rm, so_reg_imm, pred, pred_reg, cc_out
SDValue ShiftOps[] = {
N00,
CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsr, lsb), DL,
MVT::i32),
getAL(CurDAG, DL), // predicate
CurDAG->getRegister(0, MVT::i32), // pred_reg
CurDAG->getRegister(0, MVT::i32) // cc_out
};
Shift = CurDAG->getMachineNode(ARM::MOVsi, DL, VT, ShiftOps);
}

SDValue Ops[] = {N1.getOperand(0), SDValue(Shift, 0),
CurDAG->getTargetConstant(Mask2, DL, MVT::i32),
getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32)};
unsigned Opc = Subtarget->isThumb() ? ARM::t2BFI : ARM::BFI;
CurDAG->SelectNodeTo(N, Opc, VT, Ops);
return true;
}
}

// Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
// where lsb(mask) == #shamt and masked bits of B are known zero.
if (CurDAG->MaskedValueIsZero(N1, APInt(32, Mask)) &&
N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
ARM::isBitFieldInvertedMask(~Mask)) {
SDValue ShAmt = N00.getOperand(1);
unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue();
unsigned LSB = llvm::countr_zero(Mask);
if (ShAmtC != LSB)
return false;

SDValue Ops[] = {N1, N00.getOperand(0),
CurDAG->getTargetConstant(~Mask, DL, MVT::i32),
getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32)};

unsigned Opc = Subtarget->isThumb() ? ARM::t2BFI : ARM::BFI;
CurDAG->SelectNodeTo(N, Opc, VT, Ops);
return true;
}

return false;
}

bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
if (!Subtarget->hasV6T2Ops())
return false;
Expand Down Expand Up @@ -3833,6 +3993,11 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
}
}
break;
case ISD::OR: {
if (tryV6T2BitfieldInsertOp(N))
return;
break;
}
case ISD::AND: {
// Check for unsigned bitfield extract
if (tryV6T2BitfieldExtractOp(N, false))
Expand Down
133 changes: 0 additions & 133 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14579,132 +14579,6 @@ static SDValue PerformORCombineToSMULWBT(SDNode *OR,
return SDValue(OR, 0);
}

static SDValue PerformORCombineToBFI(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
// BFI is only available on V6T2+
if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
return SDValue();

EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
// 1) or (and A, mask), val => ARMbfi A, val, mask
// iff (val & mask) == val
//
// 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
// 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
// && mask == ~mask2
// 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
// && ~mask == mask2
// (i.e., copy a bitfield value into another bitfield of the same width)

if (VT != MVT::i32)
return SDValue();

SDValue N00 = N0.getOperand(0);

// The value and the mask need to be constants so we can verify this is
// actually a bitfield set. If the mask is 0xffff, we can do better
// via a movt instruction, so don't use BFI in that case.
SDValue MaskOp = N0.getOperand(1);
ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);
if (!MaskC)
return SDValue();
unsigned Mask = MaskC->getZExtValue();
if (Mask == 0xffff)
return SDValue();
SDValue Res;
// Case (1): or (and A, mask), val => ARMbfi A, val, mask
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N1C) {
unsigned Val = N1C->getZExtValue();
if ((Val & ~Mask) != Val)
return SDValue();

if (ARM::isBitFieldInvertedMask(Mask)) {
Val >>= llvm::countr_zero(~Mask);

Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
DAG.getConstant(Val, DL, MVT::i32),
DAG.getConstant(Mask, DL, MVT::i32));

DCI.CombineTo(N, Res, false);
// Return value from the original node to inform the combiner than N is
// now dead.
return SDValue(N, 0);
}
} else if (N1.getOpcode() == ISD::AND) {
// case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
if (!N11C)
return SDValue();
unsigned Mask2 = N11C->getZExtValue();

// Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
// as is to match.
if (ARM::isBitFieldInvertedMask(Mask) &&
(Mask == ~Mask2)) {
// The pack halfword instruction works better for masks that fit it,
// so use that when it's available.
if (Subtarget->hasDSP() &&
(Mask == 0xffff || Mask == 0xffff0000))
return SDValue();
// 2a
unsigned amt = llvm::countr_zero(Mask2);
Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
DAG.getConstant(amt, DL, MVT::i32));
Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
DAG.getConstant(Mask, DL, MVT::i32));
DCI.CombineTo(N, Res, false);
// Return value from the original node to inform the combiner than N is
// now dead.
return SDValue(N, 0);
} else if (ARM::isBitFieldInvertedMask(~Mask) &&
(~Mask == Mask2)) {
// The pack halfword instruction works better for masks that fit it,
// so use that when it's available.
if (Subtarget->hasDSP() &&
(Mask2 == 0xffff || Mask2 == 0xffff0000))
return SDValue();
// 2b
unsigned lsb = llvm::countr_zero(Mask);
Res = DAG.getNode(ISD::SRL, DL, VT, N00,
DAG.getConstant(lsb, DL, MVT::i32));
Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
DAG.getConstant(Mask2, DL, MVT::i32));
DCI.CombineTo(N, Res, false);
// Return value from the original node to inform the combiner than N is
// now dead.
return SDValue(N, 0);
}
}

if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
ARM::isBitFieldInvertedMask(~Mask)) {
// Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
// where lsb(mask) == #shamt and masked bits of B are known zero.
SDValue ShAmt = N00.getOperand(1);
unsigned ShAmtC = ShAmt->getAsZExtVal();
unsigned LSB = llvm::countr_zero(Mask);
if (ShAmtC != LSB)
return SDValue();

Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
DAG.getConstant(~Mask, DL, MVT::i32));

DCI.CombineTo(N, Res, false);
// Return value from the original node to inform the combiner than N is
// now dead.
return SDValue(N, 0);
}

return SDValue();
}

static bool isValidMVECond(unsigned CC, bool IsFloat) {
switch (CC) {
case ARMCC::EQ:
Expand Down Expand Up @@ -14849,13 +14723,6 @@ static SDValue PerformORCombine(SDNode *N,
}
}

// Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
// reasonable.
if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
if (SDValue Res = PerformORCombineToBFI(N, DCI, Subtarget))
return Res;
}

if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
return Result;

Expand Down
Loading
Loading