Skip to content

Commit f4df3bb

Browse files
jgu222igcbot
authored andcommitted
Use the right execSize to copy opnd to msg payload
Make sure the msg header uses execSize=8 for copying. For example, send (4|M0) nullptr addr data ... will be changed to mov (8|M0) msgPayload(0,0)<1;1,0> header mov (4|M0) msgPayload(1,0)<1;1,0> addr mov (4|M0) msgPayload(2,0)<1;1,0> data send (4|M0) nullptr msgPayload ... Note this is for old non-split send.
1 parent fcd7905 commit f4df3bb

File tree

3 files changed

+57
-21
lines changed

3 files changed

+57
-21
lines changed

visa/BuildIR.h

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1854,10 +1854,29 @@ class IR_Builder
18541854

18551855
G4_Declare* getImmDcl(G4_Imm* val, int numElt);
18561856

1857+
//
1858+
// 'copyExecSize' and preparePayload's batchExSize together provide
1859+
// the execSize of copying instruction.
1860+
// 'copyExecSize' of PayloadSource is used for header only for now.
1861+
// If 'copyExecSize' is present, use it; otherwise, use batchExSize
1862+
// of preparePayload for copying.
1863+
//
1864+
// For example,
1865+
// send(4|M0) nullptr addr:a32 data:ud ...
1866+
// will be changed to
1867+
// mov(8|M0) msgPayload(0,0) <1;1,0> header
1868+
// mov(4|M0) msgPayload(1,0) <1;1,0> addr
1869+
// mov(4|M0) msgPayload(2,0) <1;1,0> data
1870+
// send(4|M0) nullptr msgPayload ...
1871+
// where 'copyExecSize' will be 8 and batchExSize = 4.
1872+
//
18571873
struct PayloadSource {
18581874
G4_SrcRegRegion *opnd;
1859-
uint32_t numElts; // 'opnd's size in msg payload
1875+
uint32_t numElts; // 'opnd's size in msg payload
18601876
G4_InstOpts instOpt;
1877+
G4_ExecSize copyExecSize; // used for copy if given.
1878+
1879+
PayloadSource() : copyExecSize(g4::SIMD_UNDEFINED) {}
18611880
};
18621881

18631882
/// preparePayload - This method prepares payload from the specified header
@@ -1869,7 +1888,10 @@ class IR_Builder
18691888
/// 2-element array must be cleared before calling
18701889
/// preparePayload().
18711890
/// \param batchExSize When it's required to copy sources, batchExSize
1872-
/// specifies the SIMD width of copy.
1891+
/// specifies the SIMD width of copy except when
1892+
/// 'copyExecSize' of PayloadSource is defined. And
1893+
/// in the case 'copyExecSize is defined, it's used as
1894+
/// execsize for copy.
18731895
/// \param splitSendEnabled Whether feature split-send is available. When
18741896
/// feature split-send is available, this function
18751897
/// will check whether two consecutive regions

visa/VisaToG4/TranslateMisc.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,9 @@ void IR_Builder::preparePayload(
304304
unsigned regOff = 0;
305305
for (i = splitPos; i != len; ++i)
306306
{
307-
Copy_Source_To_Payload(this, batchExSize, msg, regOff, srcs[i].opnd,
307+
G4_ExecSize cpExSize = srcs[i].copyExecSize != g4::SIMD_UNDEFINED
308+
? srcs[i].copyExecSize : batchExSize;
309+
Copy_Source_To_Payload(this, cpExSize, msg, regOff, srcs[i].opnd,
308310
srcs[i].numElts, srcs[i].instOpt);
309311
}
310312

visa/VisaToG4/TranslateSendLdStLegacy.cpp

Lines changed: 30 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ int IR_Builder::translateVISAQWScatterInst(
9292
unsigned int instOpt = Get_Gen4_Emask(eMask, instExSize);
9393
bool useSplitSend = useSends();
9494

95-
PayloadSource sources[2]; // Maximal 2 sources, optional header + offsets
95+
PayloadSource sources[2]; // Maximal 2 sources, offsets + src
9696
unsigned len = 0;
9797

9898
sources[len].opnd = addresses;
@@ -109,7 +109,11 @@ int IR_Builder::translateVISAQWScatterInst(
109109

110110
G4_SrcRegRegion *msgs[2] {0, 0};
111111
unsigned sizes[2] {0, 0};
112-
preparePayload(msgs, sizes, exSize, useSplitSend, sources, len);
112+
// For send that has smaller execsize than exSize, like
113+
// "send (4) ..."
114+
// Make sure to use send's execsize (4) as batchsize, not 8/16/32.
115+
// Thus, batchsize is min(exSize, instExSize).
116+
preparePayload(msgs, sizes, std::min(exSize, instExSize), useSplitSend, sources, len);
113117

114118
uint32_t desc = buildDescForScatter(DC_QWORD_SCATTERED_WRITE, numBlocks,
115119
execSize == EXEC_SIZE_8 ? MDC_SM2_SIMD8 : MDC_SM2_SIMD16);
@@ -1484,8 +1488,9 @@ int IR_Builder::translateVISADwordAtomicInst(
14841488
G4_SrcRegRegion *header
14851489
= createSrcRegRegion(dcl, getRegionStride1());
14861490
sources[len].opnd = header;
1487-
sources[len].numElts = g4::SIMD8;
1491+
sources[len].numElts = numEltPerGRF<Type_UD>();
14881492
sources[len].instOpt = InstOpt_WriteEnable;
1493+
sources[len].copyExecSize = g4::SIMD8; // header has 8 DWs
14891494
++len;
14901495
}
14911496

@@ -1510,7 +1515,7 @@ int IR_Builder::translateVISADwordAtomicInst(
15101515

15111516
G4_SrcRegRegion *msgs[2] = {0, 0};
15121517
unsigned sizes[2] = {0, 0};
1513-
preparePayload(msgs, sizes, exSize, useSplitSend, sources, len);
1518+
preparePayload(msgs, sizes, std::min(exSize, instExSize), useSplitSend, sources, len);
15141519

15151520
SFID sfid = SFID::DP_DC1;
15161521
unsigned MD = 0;
@@ -1669,8 +1674,9 @@ int IR_Builder::translateVISAGather4TypedInst(
16691674
G4_SrcRegRegion *header
16701675
= createSrcRegRegion(dcl, getRegionStride1());
16711676
sources[len].opnd = header;
1672-
sources[len].numElts = g4::SIMD8;
1677+
sources[len].numElts = numEltPerGRF<Type_UD>();
16731678
sources[len].instOpt = InstOpt_WriteEnable;
1679+
sources[len].copyExecSize = g4::SIMD8;
16741680
++len;
16751681
}
16761682

@@ -1755,8 +1761,9 @@ int IR_Builder::translateVISAScatter4TypedInst(
17551761
G4_SrcRegRegion *header
17561762
= createSrcRegRegion(dcl, getRegionStride1());
17571763
sources[len].opnd = header;
1758-
sources[len].numElts = g4::SIMD8;
1764+
sources[len].numElts = numEltPerGRF<Type_UD>();
17591765
sources[len].instOpt = InstOpt_WriteEnable;
1766+
sources[len].copyExecSize = g4::SIMD8;
17601767
++len;
17611768
}
17621769

@@ -1877,7 +1884,7 @@ int IR_Builder::translateVISATypedAtomicInst(
18771884

18781885
G4_SrcRegRegion *msgs[2] = {0, 0};
18791886
unsigned sizes[2] = {0, 0};
1880-
preparePayload(msgs, sizes, exSize, useSplitSend, sources, len);
1887+
preparePayload(msgs, sizes, std::min(exSize, instExSize), useSplitSend, sources, len);
18811888

18821889
unsigned dstLength = dst->isNullReg() ? 0 : 1;
18831890

@@ -2081,8 +2088,9 @@ int IR_Builder::translateGather4Inst(
20812088
G4_SrcRegRegion *header
20822089
= createSrcRegRegion(dcl, getRegionStride1());
20832090
sources[len].opnd = header;
2084-
sources[len].numElts = g4::SIMD8;
2091+
sources[len].numElts = numEltPerGRF<Type_UD>();
20852092
sources[len].instOpt = InstOpt_WriteEnable;
2093+
sources[len].copyExecSize = g4::SIMD8;
20862094
++len;
20872095
}
20882096

@@ -2093,7 +2101,7 @@ int IR_Builder::translateGather4Inst(
20932101

20942102
G4_SrcRegRegion *msgs[2] = {0, 0};
20952103
unsigned sizes[2] = {0, 0};
2096-
preparePayload(msgs, sizes, exSize, useSplitSend, sources, len);
2104+
preparePayload(msgs, sizes, std::min(exSize, instExSize), useSplitSend, sources, len);
20972105

20982106
SFID sfid = SFID::DP_DC1;
20992107

@@ -2183,8 +2191,9 @@ int IR_Builder::translateScatter4Inst(
21832191
G4_SrcRegRegion *header
21842192
= createSrcRegRegion(dcl, getRegionStride1());
21852193
sources[len].opnd = header;
2186-
sources[len].numElts = g4::SIMD8;
2194+
sources[len].numElts = numEltPerGRF<Type_UD>();
21872195
sources[len].instOpt = InstOpt_WriteEnable;
2196+
sources[len].copyExecSize = g4::SIMD8;
21882197
++len;
21892198
}
21902199

@@ -2199,7 +2208,7 @@ int IR_Builder::translateScatter4Inst(
21992208

22002209
G4_SrcRegRegion *msgs[2] = {0, 0};
22012210
unsigned sizes[2] = {0, 0};
2202-
preparePayload(msgs, sizes, exSize, useSplitSend, sources, len);
2211+
preparePayload(msgs, sizes, std::min(exSize, instExSize), useSplitSend, sources, len);
22032212

22042213
SFID sfid = SFID::DP_DC1;
22052214

@@ -2373,8 +2382,9 @@ int IR_Builder::translateByteGatherInst(
23732382
G4_SrcRegRegion *header
23742383
= createSrcRegRegion(dcl, getRegionStride1());
23752384
sources[len].opnd = header;
2376-
sources[len].numElts = g4::SIMD8;
2385+
sources[len].numElts = numEltPerGRF<Type_UD>();
23772386
sources[len].instOpt = InstOpt_WriteEnable;
2387+
sources[len].copyExecSize = g4::SIMD8;
23782388
++len;
23792389
}
23802390

@@ -2385,7 +2395,7 @@ int IR_Builder::translateByteGatherInst(
23852395

23862396
G4_SrcRegRegion *msgs[2] = {0, 0};
23872397
unsigned sizes[2] = {0, 0};
2388-
preparePayload(msgs, sizes, exSize, useSplitSend, sources, len);
2398+
preparePayload(msgs, sizes, std::min(exSize, instExSize), useSplitSend, sources, len);
23892399

23902400
SFID sfid = SFID::DP_DC0;
23912401

@@ -2483,8 +2493,9 @@ int IR_Builder::translateByteScatterInst(
24832493
G4_SrcRegRegion *header
24842494
= createSrcRegRegion(dcl, getRegionStride1());
24852495
sources[len].opnd = header;
2486-
sources[len].numElts = g4::SIMD8;
2496+
sources[len].numElts = numEltPerGRF<Type_UD>();
24872497
sources[len].instOpt = InstOpt_WriteEnable;
2498+
sources[len].copyExecSize = g4::SIMD8;
24882499
++len;
24892500
}
24902501

@@ -2499,7 +2510,7 @@ int IR_Builder::translateByteScatterInst(
24992510

25002511
G4_SrcRegRegion *msgs[2] = {0, 0};
25012512
unsigned sizes[2] = {0, 0};
2502-
preparePayload(msgs, sizes, exSize, useSplitSend, sources, len);
2513+
preparePayload(msgs, sizes, std::min(exSize, instExSize), useSplitSend, sources, len);
25032514

25042515
SFID sfid = SFID::DP_DC0;
25052516

@@ -2652,8 +2663,9 @@ int IR_Builder::translateVISASVMBlockWriteInst(
26522663
unsigned len = 0;
26532664

26542665
sources[len].opnd = createSrcRegRegion(dcl, getRegionStride1());
2655-
sources[len].numElts = g4::SIMD8;
2666+
sources[len].numElts = numEltPerGRF<Type_UD>();
26562667
sources[len].instOpt = InstOpt_WriteEnable;
2668+
sources[len].copyExecSize = g4::SIMD8; // block msg header has 8 DWs
26572669
++len;
26582670

26592671
if (src->getElemSize() < TypeSize(Type_UD))
@@ -2824,7 +2836,7 @@ int IR_Builder::translateVISASVMScatterWriteInst(
28242836

28252837
bool useSplitSend = useSends();
28262838

2827-
PayloadSource sources[2]; // Maximal 2 sources, optional header + offsets
2839+
PayloadSource sources[2]; // Maximal 2 sources, offsets + src
28282840
unsigned len = 0;
28292841

28302842
sources[len].opnd = addresses;
@@ -2860,7 +2872,7 @@ int IR_Builder::translateVISASVMScatterWriteInst(
28602872
(TypeSize(srcType) != 4))
28612873
src->setType(*this, Type_UD);
28622874

2863-
preparePayload(msgs, sizes, exSize, useSplitSend, sources, len);
2875+
preparePayload(msgs, sizes, std::min(exSize, instExSize), useSplitSend, sources, len);
28642876

28652877
// set the type back in case we changed it for preparePayload
28662878
src->setType(*this, srcType);

0 commit comments

Comments
 (0)