Skip to content

Commit c9684e5

Browse files
authored
[RISCV] Implement EmitTargetCodeForMemset for Xqcilsm (#151555)
This patch adds support for converting memset calls to one or more `QC_SETWMI` instructions when beneficial. We only handle aligned memset calls for now. We limit a `QC_SETWMI` to 16 words or less to improve interruptibility. So for `1-16` words we use a single `QC_SETWMI`: `QC_SETWMI reg1, N, 0(reg2)` For `17-32 `words we use two `QC_SETWMI's` with the first as 16 words and the second for the remainder: ``` QC_SETWMI reg1, 16, 0(reg2) QC_SETWMI reg1, N, 64(reg2) ``` For `33-48` words, we would like to use `(16, 16, n)`, but that means the last QC_SETWMI needs an offset of `128` which the instruction doesn't support. So in this case we use a length of `15` for the second instruction and we do the rest with the third instruction. This means the maximum number of words handled is `47` (for now): ``` QC_SETWMI R2, R0, 16, 0 QC_SETWMI R2, R0, 15, 64 QC_SETWMI R2, R0, N, 124 ``` For `48` words or more, call the target independent memset.
1 parent 5499901 commit c9684e5

File tree

4 files changed

+1016
-0
lines changed

4 files changed

+1016
-0
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,14 @@
1414
// Operand and SDNode transformation definitions.
1515
//===----------------------------------------------------------------------===//
1616

17+
def SDT_SetMultiple : SDTypeProfile<0, 4, [SDTCisSameAs<0, 1>,
18+
SDTCisSameAs<1, 3>,
19+
SDTCisPtrTy<2>,
20+
SDTCisVT<3, XLenVT>]>;
21+
22+
def qc_setwmi : RVSDNode<"QC_SETWMI", SDT_SetMultiple,
23+
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
24+
1725
def uimm5nonzero : RISCVOp<XLenVT>,
1826
ImmLeaf<XLenVT, [{return (Imm != 0) && isUInt<5>(Imm);}]> {
1927
let ParserMatchClass = UImmAsmOperand<5, "NonZero">;
@@ -27,6 +35,8 @@ def uimm5nonzero : RISCVOp<XLenVT>,
2735
}];
2836
}
2937

38+
def tuimm5nonzero : TImmLeaf<XLenVT, [{return (Imm != 0) && isUInt<5>(Imm);}]>;
39+
3040
def uimm5gt3 : RISCVOp<XLenVT>, ImmLeaf<XLenVT,
3141
[{return (Imm > 3) && isUInt<5>(Imm);}]> {
3242
let ParserMatchClass = UImmAsmOperand<5, "GT3">;
@@ -92,6 +102,8 @@ def uimm5slist : RISCVOp<XLenVT>, ImmLeaf<XLenVT,
92102
}];
93103
}
94104

105+
def tuimm7_lsb00 : TImmLeaf<XLenVT,[{return isShiftedUInt<5, 2>(Imm);}]>;
106+
95107
def uimm10 : RISCVUImmLeafOp<10>;
96108

97109
def uimm11 : RISCVUImmLeafOp<11>;
@@ -1566,6 +1578,11 @@ def : QCISELECTIICCPat <SETEQ, QC_SELECTIIEQ>;
15661578
def : QCISELECTIICCPat <SETNE, QC_SELECTIINE>;
15671579
} // Predicates = [HasVendorXqcics, IsRV32]
15681580

1581+
let Predicates = [HasVendorXqcilsm, IsRV32] in {
1582+
def : Pat<(qc_setwmi GPR:$rs3, GPR:$rs1, tuimm5nonzero:$uimm5, tuimm7_lsb00:$uimm7),
1583+
(QC_SETWMI GPR:$rs3, GPR:$rs1, tuimm5nonzero:$uimm5, tuimm7_lsb00:$uimm7)>;
1584+
} // Predicates = [HasVendorXqcilsm, IsRV32]
1585+
15691586
//===----------------------------------------------------------------------===/i
15701587
// Compress Instruction tablegen backend.
15711588
//===----------------------------------------------------------------------===//

llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
//===----------------------------------------------------------------------===//
88

99
#include "RISCVSelectionDAGInfo.h"
10+
#include "RISCVSubtarget.h"
11+
#include "llvm/CodeGen/SelectionDAG.h"
1012

1113
#define GET_SDNODE_DESC
1214
#include "RISCVGenSDNodeInfo.inc"
@@ -62,3 +64,94 @@ void RISCVSelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG,
6264
}
6365
#endif
6466
}
67+
68+
SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset(
69+
SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
70+
SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
71+
MachinePointerInfo DstPtrInfo) const {
72+
const auto &Subtarget = DAG.getSubtarget<RISCVSubtarget>();
73+
// We currently do this only for Xqcilsm
74+
if (!Subtarget.hasVendorXqcilsm())
75+
return SDValue();
76+
77+
// Do this only if we know the size at compile time.
78+
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
79+
if (!ConstantSize)
80+
return SDValue();
81+
82+
uint64_t NumberOfBytesToWrite = ConstantSize->getZExtValue();
83+
84+
// Do this only if it is word aligned and we write a multiple of 4 bytes.
85+
if (!(Alignment >= 4) || !((NumberOfBytesToWrite & 3) == 0))
86+
return SDValue();
87+
88+
SmallVector<SDValue, 8> OutChains;
89+
SDValue SrcValueReplicated = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
90+
int NumberOfWords = NumberOfBytesToWrite / 4;
91+
MachineFunction &MF = DAG.getMachineFunction();
92+
auto Volatile =
93+
isVolatile ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
94+
95+
// Helper for constructing the QC_SETWMI instruction
96+
auto getSetwmiNode = [&](uint8_t SizeWords, uint8_t OffsetSetwmi) -> SDValue {
97+
SDValue Ops[] = {Chain, SrcValueReplicated, Dst,
98+
DAG.getTargetConstant(SizeWords, dl, MVT::i32),
99+
DAG.getTargetConstant(OffsetSetwmi, dl, MVT::i32)};
100+
MachineMemOperand *BaseMemOperand = MF.getMachineMemOperand(
101+
DstPtrInfo.getWithOffset(OffsetSetwmi),
102+
MachineMemOperand::MOStore | Volatile, SizeWords * 4, Align(4));
103+
return DAG.getMemIntrinsicNode(RISCVISD::QC_SETWMI, dl,
104+
DAG.getVTList(MVT::Other), Ops, MVT::i32,
105+
BaseMemOperand);
106+
};
107+
108+
// If i8 type and constant non-zero value.
109+
if ((Src.getValueType() == MVT::i8) && !isNullConstant(Src))
110+
// Replicate byte to word by multiplication with 0x01010101.
111+
SrcValueReplicated =
112+
DAG.getNode(ISD::MUL, dl, MVT::i32, SrcValueReplicated,
113+
DAG.getConstant(0x01010101ul, dl, MVT::i32));
114+
115+
// We limit a QC_SETWMI to 16 words or less to improve interruptibility.
116+
// So for 1-16 words we use a single QC_SETWMI:
117+
//
118+
// QC_SETWMI reg1, N, 0(reg2)
119+
//
120+
// For 17-32 words we use two QC_SETWMI's with the first as 16 words and the
121+
// second for the remainder:
122+
//
123+
// QC_SETWMI reg1, 16, 0(reg2)
124+
// QC_SETWMI reg1, N, 64(reg2)
125+
//
126+
// For 33-48 words, we would like to use (16, 16, n), but that means the last
127+
// QC_SETWMI needs an offset of 128 which the instruction doesn't support.
128+
// So in this case we use a length of 15 for the second instruction and we do
129+
// the rest with the third instruction.
130+
// This means the maximum inlined number of words is 47 (for now):
131+
//
132+
// QC_SETWMI R2, R0, 16, 0
133+
// QC_SETWMI R2, R0, 15, 64
134+
// QC_SETWMI R2, R0, N, 124
135+
//
136+
// For 48 words or more, call the target independent memset
137+
if (NumberOfWords >= 48)
138+
return SDValue();
139+
140+
if (NumberOfWords <= 16) {
141+
// 1 - 16 words
142+
return getSetwmiNode(NumberOfWords, 0);
143+
}
144+
145+
if (NumberOfWords <= 32) {
146+
// 17 - 32 words
147+
OutChains.push_back(getSetwmiNode(NumberOfWords - 16, 64));
148+
OutChains.push_back(getSetwmiNode(16, 0));
149+
} else {
150+
// 33 - 47 words
151+
OutChains.push_back(getSetwmiNode(NumberOfWords - 31, 124));
152+
OutChains.push_back(getSetwmiNode(15, 64));
153+
OutChains.push_back(getSetwmiNode(16, 0));
154+
}
155+
156+
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
157+
}

llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@ class RISCVSelectionDAGInfo : public SelectionDAGGenTargetInfo {
3434
void verifyTargetNode(const SelectionDAG &DAG,
3535
const SDNode *N) const override;
3636

37+
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl,
38+
SDValue Chain, SDValue Dst, SDValue Src,
39+
SDValue Size, Align Alignment,
40+
bool isVolatile, bool AlwaysInline,
41+
MachinePointerInfo DstPtrInfo) const override;
42+
3743
bool hasPassthruOp(unsigned Opcode) const {
3844
return GenNodeInfo.getDesc(Opcode).TSFlags & RISCVISD::HasPassthruOpMask;
3945
}

0 commit comments

Comments
 (0)