Skip to content

Commit 93091d0

Browse files
pszymichigcbot
authored andcommitted
Adding Raytracing code in OpenCLKernelCodeGen
.
1 parent 9eff454 commit 93091d0

File tree

4 files changed

+76
-8
lines changed

4 files changed

+76
-8
lines changed

IGC/Compiler/CISACodeGen/OpenCLKernelCodeGen.cpp

Lines changed: 60 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1742,6 +1742,10 @@ namespace IGC
17421742
// Local IDs are non-uniform and may have two instances in SIMD32 mode
17431743
int numAllocInstances = arg.getArgType() == KernelArg::ArgType::IMPLICIT_LOCAL_IDS ? m_numberInstance : 1;
17441744

1745+
if (arg.getArgType() == KernelArg::ArgType::RT_STACK_ID) {
1746+
numAllocInstances = m_numberInstance;
1747+
}
1748+
17451749
auto allocSize = arg.getAllocateSize();
17461750

17471751
if (!IsUnusedArg && !isRuntimeValue)
@@ -1751,6 +1755,22 @@ namespace IGC
17511755
// Align on the desired alignment for this argument
17521756
auto alignment = arg.getAlignment();
17531757

1758+
// FIXME: move alignment checks to implicit arg creation
1759+
if ((arg.getArgType() == KernelArg::ArgType::IMPLICIT_LOCAL_IDS ||
1760+
arg.getArgType() == KernelArg::ArgType::RT_STACK_ID) &&
1761+
m_Platform->getGRFSize() == 64)
1762+
{
1763+
alignment = 64;
1764+
// generate a single SIMD32 variable in this case
1765+
if (m_dispatchSize == SIMDMode::SIMD16 && m_Platform->getGRFSize() == 64)
1766+
{
1767+
allocSize = 64;
1768+
}
1769+
else
1770+
{
1771+
allocSize = PVCLSCEnabled() ? 64 : 32;
1772+
}
1773+
}
17541774
offset = iSTD::Align(offset, alignment);
17551775

17561776
// Arguments larger than a GRF must be at least GRF-aligned.
@@ -1788,6 +1808,7 @@ namespace IGC
17881808

17891809
if (useInlineData && !inlineDataProcessed &&
17901810
arg.getArgType() != KernelArg::ArgType::IMPLICIT_LOCAL_IDS &&
1811+
arg.getArgType() != KernelArg::ArgType::RT_STACK_ID &&
17911812
arg.getArgType() != KernelArg::ArgType::IMPLICIT_R0)
17921813
{
17931814
// Calc if we can fit this arg in inlinedata:
@@ -1809,6 +1830,7 @@ namespace IGC
18091830

18101831
// numAllocInstances can be greater than 1, only when:
18111832
// artype == IMPLICIT_LOCAL_IDS
1833+
// or argtype == RT_STACK_ID,
18121834
// so there is no need to handle it here
18131835

18141836
// current arg is first to be loaded (it does not come in inlinedata)
@@ -2049,6 +2071,7 @@ namespace IGC
20492071

20502072
m_kernelInfo.m_executionEnivronment.CompiledSubGroupsNumber = funcMD.CompiledSubGroupsNumber;
20512073

2074+
m_kernelInfo.m_executionEnivronment.HasRTCalls = funcMD.hasSyncRTCalls;
20522075
}
20532076

20542077
m_kernelInfo.m_executionEnivronment.HasGlobalAtomics = GetHasGlobalAtomics();
@@ -2491,6 +2514,19 @@ namespace IGC
24912514
m_Context->SetSIMDInfo(SIMD_RETRY, simdMode, ShaderDispatchMode::NOT_APPLICABLE);
24922515
}
24932516

2517+
// Currently the FunctionMetaData is being looked up solely in order to get the hasSyncRTCalls
2518+
// If we would need to get some non-raytracing related field out of the FunctionMetaData,
2519+
// then we can move the lookup out of the #if and just leave the bool hasSyncRTCalls inside.
2520+
auto& FuncMap = m_Context->getModuleMetaData()->FuncMD;
2521+
// we want to check the setting for the associated kernel
2522+
auto FuncIter = FuncMap.find(entry);
2523+
if (FuncIter == FuncMap.end()) { // wasn't able to find the meta data for the passed in llvm::Function!
2524+
// All of the kernels should have an entry in the map.
2525+
IGC_ASSERT(0);
2526+
return false;
2527+
}
2528+
const FunctionMetaData& funcMD = FuncIter->second;
2529+
bool hasSyncRTCalls = funcMD.hasSyncRTCalls; // if the function/kernel has sync raytracing calls
24942530

24952531
//If forced SIMD Mode (by driver or regkey), then:
24962532
// 1. Compile only that SIMD mode and nothing else
@@ -2504,15 +2540,16 @@ namespace IGC
25042540
// These statements are basically equivalent to (simdMode == forcedSIMDSize)
25052541
(simdMode == SIMDMode::SIMD8 && m_Context->getModuleMetaData()->csInfo.forcedSIMDSize == 8) ||
25062542
(simdMode == SIMDMode::SIMD16 && m_Context->getModuleMetaData()->csInfo.forcedSIMDSize == 16) ||
2507-
(simdMode == SIMDMode::SIMD32 && m_Context->getModuleMetaData()->csInfo.forcedSIMDSize == 32)
2543+
// if we want to compile SIMD32, we need to be lacking any raytracing calls; raytracing doesn't support SIMD16
2544+
(simdMode == SIMDMode::SIMD32 && m_Context->getModuleMetaData()->csInfo.forcedSIMDSize == 32 && !hasSyncRTCalls)
25082545
);
25092546
}
25102547

2511-
SIMDStatus simdStatus = checkSIMDCompileConds(simdMode, EP, F);
2548+
SIMDStatus simdStatus = checkSIMDCompileConds(simdMode, EP, F, hasSyncRTCalls);
25122549

25132550
if (m_Context->platform.getMinDispatchMode() == SIMDMode::SIMD16)
25142551
{
2515-
simdStatus = checkSIMDCompileCondsPVC(simdMode, EP, F);
2552+
simdStatus = checkSIMDCompileCondsPVC(simdMode, EP, F, hasSyncRTCalls);
25162553
}
25172554

25182555
// Func and Perf checks pass, compile this SIMD
@@ -2535,7 +2572,7 @@ namespace IGC
25352572
return simdStatus == SIMDStatus::SIMD_PASS;
25362573
}
25372574

2538-
SIMDStatus COpenCLKernel::checkSIMDCompileCondsPVC(SIMDMode simdMode, EmitPass& EP, llvm::Function& F)
2575+
SIMDStatus COpenCLKernel::checkSIMDCompileCondsPVC(SIMDMode simdMode, EmitPass& EP, llvm::Function& F, bool hasSyncRTCalls)
25392576
{
25402577
if (simdMode == SIMDMode::SIMD8)
25412578
{
@@ -2590,6 +2627,12 @@ namespace IGC
25902627
pCtx->getModuleMetaData()->csInfo.forcedSIMDSize = (unsigned char)numLanes(SIMDMode::SIMD16);
25912628
}
25922629

2630+
if (simdMode == SIMDMode::SIMD32 && hasSyncRTCalls) {
2631+
return SIMDStatus::SIMD_FUNC_FAIL;
2632+
}
2633+
else if (simdMode == SIMDMode::SIMD16 && hasSyncRTCalls) {
2634+
return SIMDStatus::SIMD_PASS;
2635+
}
25932636

25942637
if (simd_size)
25952638
{
@@ -2657,7 +2700,7 @@ namespace IGC
26572700
return m_annotatedNumThreads;
26582701
}
26592702

2660-
SIMDStatus COpenCLKernel::checkSIMDCompileConds(SIMDMode simdMode, EmitPass& EP, llvm::Function& F)
2703+
SIMDStatus COpenCLKernel::checkSIMDCompileConds(SIMDMode simdMode, EmitPass& EP, llvm::Function& F, bool hasSyncRTCalls)
26612704
{
26622705
CShader* simd8Program = m_parent->GetShader(SIMDMode::SIMD8);
26632706
CShader* simd16Program = m_parent->GetShader(SIMDMode::SIMD16);
@@ -2773,7 +2816,12 @@ namespace IGC
27732816
return SIMDStatus::SIMD_FUNC_FAIL;
27742817
}
27752818
else {
2776-
EP.m_canAbortOnSpill = false;
2819+
if (hasSyncRTCalls) {
2820+
return SIMDStatus::SIMD_FUNC_FAIL; // SIMD32 unsupported with raytracing calls
2821+
}
2822+
else { // simdMode == SIMDMode::SIMD32 && !hasSyncRTCalls
2823+
EP.m_canAbortOnSpill = false;
2824+
}
27772825
}
27782826
break;
27792827
default:
@@ -2802,6 +2850,12 @@ namespace IGC
28022850
return SIMDStatus::SIMD_PASS;
28032851
}
28042852

2853+
if (hasSyncRTCalls) {
2854+
// If we get all the way to here, then set it to the preferred SIMD size for Ray Tracing.
2855+
SIMDMode mode = SIMDMode::UNKNOWN;
2856+
mode = m_Context->platform.getPreferredRayTracingSIMDSize();
2857+
return (mode == simdMode) ? SIMDStatus::SIMD_PASS : SIMDStatus::SIMD_FUNC_FAIL;
2858+
}
28052859

28062860
if (groupSize != 0 && groupSize <= 16)
28072861
{

IGC/Compiler/CISACodeGen/OpenCLKernelCodeGen.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,10 @@ namespace IGC
3131

3232
bool hasReadWriteImage(llvm::Function& F) override;
3333
bool CompileSIMDSize(SIMDMode simdMode, EmitPass& EP, llvm::Function& F) override;
34-
SIMDStatus checkSIMDCompileConds(SIMDMode simdMode, EmitPass& EP, llvm::Function& F);
3534

36-
SIMDStatus checkSIMDCompileCondsPVC(SIMDMode simdMode, EmitPass& EP, llvm::Function& F);
35+
SIMDStatus checkSIMDCompileConds(SIMDMode simdMode, EmitPass& EP, llvm::Function& F, bool hasSyncRTCalls);
36+
SIMDStatus checkSIMDCompileCondsPVC(SIMDMode simdMode, EmitPass& EP, llvm::Function& F, bool hasSyncRTCalls);
37+
3738
unsigned getAnnotatedNumThreads() override;
3839
void FillKernel();
3940

IGC/Compiler/Optimizer/OpenCLPasses/KernelArgs.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,10 @@ KernelArg::ArgType KernelArg::calcArgType(const ImplicitArg& arg) const
345345
return KernelArg::ArgType::IMPLICIT_LOCAL_MEMORY_STATELESS_WINDOW_SIZE;
346346
case ImplicitArg::PRIVATE_MEMORY_STATELESS_SIZE:
347347
return KernelArg::ArgType::IMPLICIT_PRIVATE_MEMORY_STATELESS_SIZE;
348+
case ImplicitArg::RT_STACK_ID:
349+
return KernelArg::ArgType::RT_STACK_ID;
350+
case ImplicitArg::RT_GLOBAL_BUFFER_POINTER:
351+
return KernelArg::ArgType::IMPLICIT_RT_GLOBAL_BUFFER;
348352
case ImplicitArg::BINDLESS_OFFSET:
349353
return KernelArg::ArgType::IMPLICIT_BINDLESS_OFFSET;
350354

@@ -678,6 +682,7 @@ std::map<KernelArg::ArgType, iOpenCL::DATA_PARAMETER_TOKEN> initArgTypeTokenMap(
678682
std::map<KernelArg::ArgType, iOpenCL::DATA_PARAMETER_TOKEN> map
679683
{
680684
{ KernelArg::ArgType::IMPLICIT_LOCAL_IDS, iOpenCL::DATA_PARAMETER_LOCAL_ID },
685+
{ KernelArg::ArgType::RT_STACK_ID, iOpenCL::DATA_PARAMETER_RT_STACK_ID },
681686
{ KernelArg::ArgType::IMPLICIT_WORK_DIM, iOpenCL::DATA_PARAMETER_WORK_DIMENSIONS },
682687
{ KernelArg::ArgType::IMPLICIT_NUM_GROUPS, iOpenCL::DATA_PARAMETER_NUM_WORK_GROUPS },
683688
{ KernelArg::ArgType::IMPLICIT_GLOBAL_SIZE, iOpenCL::DATA_PARAMETER_GLOBAL_WORK_SIZE },
@@ -783,6 +788,7 @@ KernelArgsOrder::KernelArgsOrder(InputType layout)
783788
KernelArg::ArgType::IMPLICIT_PRIVATE_BASE,
784789
KernelArg::ArgType::IMPLICIT_PRINTF_BUFFER,
785790
KernelArg::ArgType::IMPLICIT_SYNC_BUFFER,
791+
KernelArg::ArgType::IMPLICIT_RT_GLOBAL_BUFFER,
786792
KernelArg::ArgType::IMPLICIT_BUFFER_OFFSET,
787793
KernelArg::ArgType::IMPLICIT_WORK_DIM,
788794
KernelArg::ArgType::IMPLICIT_NUM_GROUPS,
@@ -829,6 +835,7 @@ KernelArgsOrder::KernelArgsOrder(InputType layout)
829835
KernelArg::ArgType::IMPLICIT_PRIVATE_MEMORY_STATELESS_SIZE,
830836

831837
KernelArg::ArgType::R1,
838+
KernelArg::ArgType::RT_STACK_ID,
832839
KernelArg::ArgType::IMPLICIT_LOCAL_IDS,
833840

834841
KernelArg::ArgType::IMPLICIT_ARG_BUFFER,
@@ -886,6 +893,7 @@ KernelArgsOrder::KernelArgsOrder(InputType layout)
886893
KernelArg::ArgType::IMPLICIT_R0,
887894

888895
KernelArg::ArgType::R1,
896+
KernelArg::ArgType::RT_STACK_ID,
889897
KernelArg::ArgType::IMPLICIT_LOCAL_IDS,
890898

891899
KernelArg::ArgType::RUNTIME_VALUE,
@@ -902,6 +910,7 @@ KernelArgsOrder::KernelArgsOrder(InputType layout)
902910
KernelArg::ArgType::IMPLICIT_PRIVATE_BASE,
903911
KernelArg::ArgType::IMPLICIT_PRINTF_BUFFER,
904912
KernelArg::ArgType::IMPLICIT_SYNC_BUFFER,
913+
KernelArg::ArgType::IMPLICIT_RT_GLOBAL_BUFFER,
905914
KernelArg::ArgType::IMPLICIT_BUFFER_OFFSET,
906915
KernelArg::ArgType::IMPLICIT_WORK_DIM,
907916
KernelArg::ArgType::IMPLICIT_NUM_GROUPS,

IGC/Compiler/Optimizer/OpenCLPasses/KernelArgs.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ namespace IGC
5959

6060
IMPLICIT_PRINTF_BUFFER,
6161
IMPLICIT_SYNC_BUFFER,
62+
IMPLICIT_RT_GLOBAL_BUFFER,
6263

6364
IMPLICIT_BUFFER_OFFSET,
6465

@@ -107,6 +108,9 @@ namespace IGC
107108

108109
IMPLICIT_LOCAL_IDS,
109110

111+
// Raytracing args
112+
RT_STACK_ID,
113+
110114
// STAGE_IN_GRID runtime values
111115
IMPLICIT_STAGE_IN_GRID_ORIGIN,
112116
IMPLICIT_STAGE_IN_GRID_SIZE,

0 commit comments

Comments
 (0)