Skip to content

Commit 6d6da93

Browse files
MaciejKalinskigfxbot
authored andcommitted
1. URBRead and URBReadOutput emitters were similar to each other,
so the common part has been extracted. 2. Pattern matching has been added for URB read offsets of the form: add offset, var, imm Here we can get rid of add, putting `imm' in the message descriptor, and var in the message payload Change-Id: Ie6e4bade206b53d193074abcb2dac41f8bc3d647
1 parent 437b3f9 commit 6d6da93

File tree

4 files changed

+130
-81
lines changed

4 files changed

+130
-81
lines changed

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 51 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -5859,108 +5859,82 @@ void EmitPass::emitDualBlendRT(llvm::RTDualBlendSourceIntrinsic* inst, bool from
58595859
}
58605860
}
58615861

5862-
void EmitPass::emitURBRead(llvm::GenIntrinsicInst* inst)
5862+
// Common emitter for URBRead and URBReadOutput, used also in associated pattern match pass.
5863+
// The offsets are calculated in the caller.
5864+
void EmitPass::emitURBReadCommon(llvm::GenIntrinsicInst* inst, const QuadEltUnit globalOffset, llvm::Value* const perSlotOffset)
58635865
{
5864-
CVariable* pPerSlotOffset = nullptr;
5865-
CVariable* pVertexIndex = GetSymbol(inst->getOperand(0));
5866-
5867-
QuadEltUnit globalOffset(0);
5868-
if (ConstantInt* offset = dyn_cast<ConstantInt>(inst->getOperand(1)))
5869-
{
5870-
globalOffset = QuadEltUnit(int_cast<unsigned>(offset->getZExtValue()));
5871-
}
5872-
else
5873-
{
5874-
pPerSlotOffset = m_currShader->GetSymbol(inst->getOperand(1));
5875-
}
5876-
const EltUnit payloadSize((pPerSlotOffset != nullptr) ? 2 : 1);
5877-
CVariable* pPayload =
5878-
m_currShader->GetNewVariable(payloadSize.Count() * numLanes(m_SimdMode), ISA_TYPE_UD, EALIGN_GRF);
5879-
58805866
TODO("Have VISA define the URBRead interface instead of using a raw send");
5881-
Unit<Element> messageLength = payloadSize;
5882-
Unit<Element> responseLength(m_destination->GetNumberElement() / numLanes(m_SimdMode));
58835867

5884-
m_encoder->Copy(pPayload, m_currShader->GetURBInputHandle(pVertexIndex));
5885-
m_encoder->Push();
5868+
const EltUnit payloadSize(perSlotOffset ? 2 : 1);
5869+
CVariable* const payload = m_currShader->GetNewVariable(
5870+
payloadSize.Count() * numLanes(m_SimdMode), ISA_TYPE_UD, EALIGN_GRF);
5871+
const Unit<Element> messageLength = payloadSize;
5872+
const Unit<Element> responseLength(m_destination->GetNumberElement() / numLanes(m_SimdMode));
58865873

5887-
if (pPerSlotOffset != nullptr)
5874+
// Get the register with URBHandles and update certain per-opcode data.
5875+
switch (inst->getIntrinsicID())
58885876
{
5889-
m_encoder->SetDstSubVar(1);
5890-
m_encoder->Copy(pPayload, pPerSlotOffset);
5891-
m_encoder->Push();
5892-
}
5893-
5894-
uint desc = UrbMessage(
5895-
messageLength.Count(),
5896-
responseLength.Count(),
5897-
false,
5898-
pPerSlotOffset != nullptr,
5899-
false,
5900-
globalOffset.Count(),
5901-
EU_GEN8_URB_OPCODE_SIMD8_READ);
5902-
5903-
uint exDesc = EU_MESSAGE_TARGET_URB;
5904-
CVariable* pMessDesc = m_currShader->ImmToVariable(desc, ISA_TYPE_UD);
5905-
5906-
m_encoder->Send(m_destination, pPayload, exDesc, pMessDesc);
5907-
m_encoder->Push();
5908-
5909-
// mark input to be pulled
5910-
m_currShader->isInputsPulled = true;
5911-
}
5912-
5913-
void EmitPass::emitURBReadOutput(llvm::GenIntrinsicInst* inst)
5914-
{
5915-
CVariable* pPerSlotOffset = nullptr;
5916-
QuadEltUnit globalOffset(0);
5917-
if (ConstantInt* offset = dyn_cast<ConstantInt>(inst->getOperand(0)))
5877+
case GenISAIntrinsic::GenISA_URBRead:
59185878
{
5919-
globalOffset = QuadEltUnit(int_cast<unsigned>(offset->getZExtValue()));
5879+
CVariable* const pVertexIndex = GetSymbol(inst->getOperand(0));
5880+
m_encoder->Copy(payload, m_currShader->GetURBInputHandle(pVertexIndex));
5881+
m_encoder->Push();
5882+
// Mark input to be pulled.
5883+
m_currShader->isInputsPulled = true;
5884+
break;
59205885
}
5921-
else
5922-
{
5923-
pPerSlotOffset = m_currShader->GetSymbol(inst->getOperand(0));
5886+
case GenISAIntrinsic::GenISA_URBReadOutput:
5887+
m_encoder->Copy(payload, m_currShader->GetURBOutputHandle());
5888+
m_encoder->Push();
5889+
break;
5890+
default:
5891+
assert(0);
59245892
}
59255893

5926-
const bool hasPerSlotOffsets = pPerSlotOffset != nullptr;
5927-
// Payload size is just URB handles (1 GRF) or URB handles and per-slot offsets (2 GRFs).
5928-
const Unit<Element> payloadSize(hasPerSlotOffsets ? 2 : 1);
5929-
5930-
CVariable* pPayload =
5931-
m_currShader->GetNewVariable(payloadSize.Count() * numLanes(m_SimdMode), ISA_TYPE_UD, EALIGN_GRF);
5932-
5933-
// get the register with URBHandles
5934-
m_encoder->Copy(pPayload, m_currShader->GetURBOutputHandle());
5935-
m_encoder->Push();
5936-
5937-
// If we have runtime value in per-slot offsets, we need to copy per-slot offsets to payload
5938-
if (hasPerSlotOffsets)
5894+
if (perSlotOffset)
59395895
{
59405896
m_encoder->SetDstSubVar(1);
5941-
m_encoder->Copy(pPayload, pPerSlotOffset);
5897+
m_encoder->Copy(payload, m_currShader->GetSymbol(perSlotOffset));
59425898
m_encoder->Push();
59435899
}
59445900

59455901
constexpr bool eot = false;
5946-
const Unit<Element> messageLength = payloadSize;
5947-
const Unit<Element> responseLength(m_destination->GetNumberElement() / numLanes(m_SimdMode));
5902+
constexpr bool channelMaskPresent = false;
59485903
const uint desc = UrbMessage(
59495904
messageLength.Count(),
59505905
responseLength.Count(),
59515906
eot,
5952-
hasPerSlotOffsets,
5953-
false,
5907+
perSlotOffset != nullptr,
5908+
channelMaskPresent,
59545909
globalOffset.Count(),
59555910
EU_GEN8_URB_OPCODE_SIMD8_READ);
59565911

5957-
const uint exDesc = EU_MESSAGE_TARGET_URB | (eot ? 1 << 5 : 0);
5958-
CVariable* pMessDesc = m_currShader->ImmToVariable(desc, ISA_TYPE_UD);
5912+
constexpr uint exDesc = EU_MESSAGE_TARGET_URB;
5913+
CVariable* const pMessDesc = m_currShader->ImmToVariable(desc, ISA_TYPE_UD);
59595914

5960-
m_encoder->Send(m_destination, pPayload, exDesc, pMessDesc);
5915+
m_encoder->Send(m_destination, payload, exDesc, pMessDesc);
59615916
m_encoder->Push();
59625917
}
59635918

5919+
// Emitter for URBRead and URBReadOutput.
5920+
void EmitPass::emitURBRead(llvm::GenIntrinsicInst* inst)
5921+
{
5922+
llvm::Value* offset = nullptr;
5923+
switch (inst->getIntrinsicID())
5924+
{
5925+
case GenISAIntrinsic::GenISA_URBRead:
5926+
offset = inst->getOperand(1);
5927+
break;
5928+
case GenISAIntrinsic::GenISA_URBReadOutput:
5929+
offset = inst->getOperand(0);
5930+
break;
5931+
default:
5932+
assert(0);
5933+
}
5934+
assert(!isa<ConstantInt>(offset) && "Constant offsets are expected to be handled elsewhere.");
5935+
emitURBReadCommon(inst, QuadEltUnit(0), offset);
5936+
}
5937+
59645938
void EmitPass::emitURBWrite(llvm::GenIntrinsicInst* inst)
59655939
{
59665940
// input: GenISA_URBWrite(%offset, %mask, %data0, ..., %data7)
@@ -7228,10 +7202,8 @@ void EmitPass::EmitGenIntrinsicMessage(llvm::GenIntrinsicInst* inst)
72287202
emitURBWrite(inst);
72297203
break;
72307204
case GenISAIntrinsic::GenISA_URBRead:
7231-
emitURBRead(inst);
7232-
break;
72337205
case GenISAIntrinsic::GenISA_URBReadOutput:
7234-
emitURBReadOutput(inst);
7206+
emitURBRead(inst);
72357207
break;
72367208
case GenISAIntrinsic::GenISA_cycleCounter:
72377209
emitcycleCounter(inst);

IGC/Compiler/CISACodeGen/EmitVISAPass.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,8 +215,8 @@ class EmitPass : public llvm::FunctionPass
215215
void emitMediaBlockIO(const llvm::GenIntrinsicInst* inst, bool isRead);
216216
void emitMediaBlockRectangleRead(llvm::Instruction* inst);
217217
void emitURBWrite(llvm::GenIntrinsicInst* inst);
218+
void emitURBReadCommon(llvm::GenIntrinsicInst* inst, const QuadEltUnit globalOffset, llvm::Value* const perSlotOffset);
218219
void emitURBRead(llvm::GenIntrinsicInst* inst);
219-
void emitURBReadOutput(llvm::GenIntrinsicInst* inst);
220220
void emitSampleInstruction(llvm::SampleIntrinsic* inst);
221221
void emitLdInstruction(llvm::Instruction* inst);
222222
void emitInfoInstruction(llvm::InfoIntrinsic* inst);

IGC/Compiler/CISACodeGen/PatternMatchPass.cpp

Lines changed: 77 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1112,6 +1112,10 @@ void CodeGenPatternMatch::visitCallInst(CallInst &I)
11121112
match = MatchBlockReadWritePointer(*CI) ||
11131113
MatchSingleInstruction(*CI);
11141114
break;
1115+
case GenISAIntrinsic::GenISA_URBRead:
1116+
case GenISAIntrinsic::GenISA_URBReadOutput:
1117+
match = MatchURBRead(*CI);
1118+
break;
11151119
default:
11161120
match = MatchSingleInstruction(I);
11171121
// no pattern for the rest of the intrinsics
@@ -1793,6 +1797,78 @@ bool CodeGenPatternMatch::MatchBlockReadWritePointer(llvm::GenIntrinsicInst& I)
17931797
return false;
17941798
}
17951799

1800+
// 1. Detect and handle immediate URB read offsets - these can be put in message descriptor.
1801+
// 2. Detect offsets of the form "add dst, var, imm" - here we can remove the add, putting imm in message descriptor,
1802+
// and var in message payload.
1803+
bool CodeGenPatternMatch::MatchURBRead(llvm::GenIntrinsicInst& I)
1804+
{
1805+
struct URBReadPattern : public Pattern
1806+
{
1807+
explicit URBReadPattern(GenIntrinsicInst* I, QuadEltUnit globalOffset, llvm::Value* const perSlotOffset) :
1808+
m_inst(I), m_globalOffset(globalOffset), m_perSlotOffset(perSlotOffset)
1809+
{}
1810+
1811+
virtual void Emit(EmitPass* pass, const DstModifier& modifier)
1812+
{
1813+
assert(m_inst->getIntrinsicID() == GenISAIntrinsic::GenISA_URBRead ||
1814+
m_inst->getIntrinsicID() == GenISAIntrinsic::GenISA_URBReadOutput);
1815+
pass->emitURBReadCommon(m_inst, m_globalOffset, m_perSlotOffset);
1816+
}
1817+
1818+
private:
1819+
GenIntrinsicInst* const m_inst;
1820+
const QuadEltUnit m_globalOffset;
1821+
llvm::Value* const m_perSlotOffset;
1822+
};
1823+
1824+
if (I.getIntrinsicID() != GenISAIntrinsic::GenISA_URBRead &&
1825+
I.getIntrinsicID() != GenISAIntrinsic::GenISA_URBReadOutput)
1826+
{
1827+
return false;
1828+
}
1829+
1830+
const bool hasVertexIndexAsArg0 = I.getIntrinsicID() == GenISAIntrinsic::GenISA_URBRead;
1831+
llvm::Value* const offset = I.getOperand(hasVertexIndexAsArg0 ? 1 : 0);
1832+
if (const ConstantInt* const constOffset = dyn_cast<ConstantInt>(offset))
1833+
{
1834+
const QuadEltUnit globalOffset = QuadEltUnit(int_cast<unsigned>(constOffset->getZExtValue()));
1835+
if (hasVertexIndexAsArg0)
1836+
{
1837+
MarkAsSource(I.getOperand(0));
1838+
}
1839+
URBReadPattern* pattern = new (m_allocator) URBReadPattern(&I, globalOffset, nullptr);
1840+
AddPattern(pattern);
1841+
return true;
1842+
}
1843+
else if (llvm::Instruction* const inst = llvm::dyn_cast<llvm::Instruction>(offset))
1844+
{
1845+
if (inst->getOpcode() == llvm::Instruction::Add)
1846+
{
1847+
const bool isConstant0 = llvm::isa<llvm::ConstantInt>(inst->getOperand(0));
1848+
const bool isConstant1 = llvm::isa<llvm::ConstantInt>(inst->getOperand(1));
1849+
if (isConstant0 || isConstant1)
1850+
{
1851+
assert(!(isConstant0 && isConstant1) &&
1852+
"Both operands are immediate - constants should be folded elsewhere.");
1853+
1854+
if (hasVertexIndexAsArg0)
1855+
{
1856+
MarkAsSource(I.getOperand(0));
1857+
}
1858+
const QuadEltUnit globalOffset = QuadEltUnit(int_cast<unsigned>(cast<ConstantInt>(
1859+
isConstant0 ? inst->getOperand(0) : inst->getOperand(1))->getZExtValue()));
1860+
llvm::Value* const perSlotOffset = isConstant0 ? inst->getOperand(1) : inst->getOperand(0);
1861+
MarkAsSource(perSlotOffset);
1862+
URBReadPattern* pattern = new (m_allocator) URBReadPattern(&I, globalOffset, perSlotOffset);
1863+
AddPattern(pattern);
1864+
return true;
1865+
}
1866+
}
1867+
}
1868+
1869+
return false;
1870+
}
1871+
17961872
bool CodeGenPatternMatch::MatchLoadStorePointer(llvm::Instruction& I, llvm::Value& ptrVal)
17971873
{
17981874
struct LoadStorePointerPattern : public Pattern
@@ -2981,7 +3057,7 @@ bool CodeGenPatternMatch::MatchRotate(llvm::Instruction& I)
29813057
// 2) both operands are instructions.
29823058
Instruction *LHS = dyn_cast<Instruction>(OrInst->getOperand(0));
29833059
Instruction *RHS = dyn_cast<Instruction>(OrInst->getOperand(1));
2984-
if (!LHS || !RHS ||
3060+
if (!LHS || !RHS ||
29853061
(typeWidth != 16 && typeWidth != 32))
29863062
{
29873063
{

IGC/Compiler/CISACodeGen/PatternMatchPass.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ class CodeGenPatternMatch : public llvm::FunctionPass, public llvm::InstVisitor<
209209
bool MatchRsqrt(llvm::BinaryOperator& I);
210210
bool MatchLoadStorePointer(llvm::Instruction& I, llvm::Value& ptrVal);
211211
bool MatchBlockReadWritePointer(llvm::GenIntrinsicInst& I);
212+
bool MatchURBRead(llvm::GenIntrinsicInst& I);
212213
bool MatchGradient(llvm::GenIntrinsicInst& I);
213214
bool MatchSampleDerivative(llvm::GenIntrinsicInst & I);
214215
bool MatchDbgInstruction(llvm::DbgInfoIntrinsic& I);

0 commit comments

Comments
 (0)