@@ -1742,6 +1742,10 @@ namespace IGC
1742
1742
// Local IDs are non-uniform and may have two instances in SIMD32 mode
1743
1743
int numAllocInstances = arg.getArgType () == KernelArg::ArgType::IMPLICIT_LOCAL_IDS ? m_numberInstance : 1 ;
1744
1744
1745
+ if (arg.getArgType () == KernelArg::ArgType::RT_STACK_ID) {
1746
+ numAllocInstances = m_numberInstance;
1747
+ }
1748
+
1745
1749
auto allocSize = arg.getAllocateSize ();
1746
1750
1747
1751
if (!IsUnusedArg && !isRuntimeValue)
@@ -1751,6 +1755,22 @@ namespace IGC
1751
1755
// Align on the desired alignment for this argument
1752
1756
auto alignment = arg.getAlignment ();
1753
1757
1758
+ // FIXME: move alignment checks to implicit arg creation
1759
+ if ((arg.getArgType () == KernelArg::ArgType::IMPLICIT_LOCAL_IDS ||
1760
+ arg.getArgType () == KernelArg::ArgType::RT_STACK_ID) &&
1761
+ m_Platform->getGRFSize () == 64 )
1762
+ {
1763
+ alignment = 64 ;
1764
+ // generate a single SIMD32 variable in this case
1765
+ if (m_dispatchSize == SIMDMode::SIMD16 && m_Platform->getGRFSize () == 64 )
1766
+ {
1767
+ allocSize = 64 ;
1768
+ }
1769
+ else
1770
+ {
1771
+ allocSize = PVCLSCEnabled () ? 64 : 32 ;
1772
+ }
1773
+ }
1754
1774
offset = iSTD::Align (offset, alignment);
1755
1775
1756
1776
// Arguments larger than a GRF must be at least GRF-aligned.
@@ -1788,6 +1808,7 @@ namespace IGC
1788
1808
1789
1809
if (useInlineData && !inlineDataProcessed &&
1790
1810
arg.getArgType () != KernelArg::ArgType::IMPLICIT_LOCAL_IDS &&
1811
+ arg.getArgType () != KernelArg::ArgType::RT_STACK_ID &&
1791
1812
arg.getArgType () != KernelArg::ArgType::IMPLICIT_R0)
1792
1813
{
1793
1814
// Calc if we can fit this arg in inlinedata:
@@ -1809,6 +1830,7 @@ namespace IGC
1809
1830
1810
1831
// numAllocInstances can be greater than 1, only when:
1811
1832
// artype == IMPLICIT_LOCAL_IDS
1833
+ // or argtype == RT_STACK_ID,
1812
1834
// so there is no need to handle it here
1813
1835
1814
1836
// current arg is first to be loaded (it does not come in inlinedata)
@@ -2049,6 +2071,7 @@ namespace IGC
2049
2071
2050
2072
m_kernelInfo.m_executionEnivronment .CompiledSubGroupsNumber = funcMD.CompiledSubGroupsNumber ;
2051
2073
2074
+ m_kernelInfo.m_executionEnivronment .HasRTCalls = funcMD.hasSyncRTCalls ;
2052
2075
}
2053
2076
2054
2077
m_kernelInfo.m_executionEnivronment .HasGlobalAtomics = GetHasGlobalAtomics ();
@@ -2491,6 +2514,19 @@ namespace IGC
2491
2514
m_Context->SetSIMDInfo (SIMD_RETRY, simdMode, ShaderDispatchMode::NOT_APPLICABLE);
2492
2515
}
2493
2516
2517
+ // Currently the FunctionMetaData is being looked up solely in order to get the hasSyncRTCalls
2518
+ // If we would need to get some non-raytracing related field out of the FunctionMetaData,
2519
+ // then we can move the lookup out of the #if and just leave the bool hasSyncRTCalls inside.
2520
+ auto & FuncMap = m_Context->getModuleMetaData ()->FuncMD ;
2521
+ // we want to check the setting for the associated kernel
2522
+ auto FuncIter = FuncMap.find (entry);
2523
+ if (FuncIter == FuncMap.end ()) { // wasn't able to find the meta data for the passed in llvm::Function!
2524
+ // All of the kernels should have an entry in the map.
2525
+ IGC_ASSERT (0 );
2526
+ return false ;
2527
+ }
2528
+ const FunctionMetaData& funcMD = FuncIter->second ;
2529
+ bool hasSyncRTCalls = funcMD.hasSyncRTCalls ; // if the function/kernel has sync raytracing calls
2494
2530
2495
2531
// If forced SIMD Mode (by driver or regkey), then:
2496
2532
// 1. Compile only that SIMD mode and nothing else
@@ -2504,15 +2540,16 @@ namespace IGC
2504
2540
// These statements are basically equivalent to (simdMode == forcedSIMDSize)
2505
2541
(simdMode == SIMDMode::SIMD8 && m_Context->getModuleMetaData ()->csInfo .forcedSIMDSize == 8 ) ||
2506
2542
(simdMode == SIMDMode::SIMD16 && m_Context->getModuleMetaData ()->csInfo .forcedSIMDSize == 16 ) ||
2507
- (simdMode == SIMDMode::SIMD32 && m_Context->getModuleMetaData ()->csInfo .forcedSIMDSize == 32 )
2543
+ // if we want to compile SIMD32, we need to be lacking any raytracing calls; raytracing doesn't support SIMD16
2544
+ (simdMode == SIMDMode::SIMD32 && m_Context->getModuleMetaData ()->csInfo .forcedSIMDSize == 32 && !hasSyncRTCalls)
2508
2545
);
2509
2546
}
2510
2547
2511
- SIMDStatus simdStatus = checkSIMDCompileConds (simdMode, EP, F);
2548
+ SIMDStatus simdStatus = checkSIMDCompileConds (simdMode, EP, F, hasSyncRTCalls );
2512
2549
2513
2550
if (m_Context->platform .getMinDispatchMode () == SIMDMode::SIMD16)
2514
2551
{
2515
- simdStatus = checkSIMDCompileCondsPVC (simdMode, EP, F);
2552
+ simdStatus = checkSIMDCompileCondsPVC (simdMode, EP, F, hasSyncRTCalls );
2516
2553
}
2517
2554
2518
2555
// Func and Perf checks pass, compile this SIMD
@@ -2535,7 +2572,7 @@ namespace IGC
2535
2572
return simdStatus == SIMDStatus::SIMD_PASS;
2536
2573
}
2537
2574
2538
- SIMDStatus COpenCLKernel::checkSIMDCompileCondsPVC (SIMDMode simdMode, EmitPass& EP, llvm::Function& F)
2575
+ SIMDStatus COpenCLKernel::checkSIMDCompileCondsPVC (SIMDMode simdMode, EmitPass& EP, llvm::Function& F, bool hasSyncRTCalls )
2539
2576
{
2540
2577
if (simdMode == SIMDMode::SIMD8)
2541
2578
{
@@ -2590,6 +2627,12 @@ namespace IGC
2590
2627
pCtx->getModuleMetaData ()->csInfo .forcedSIMDSize = (unsigned char )numLanes (SIMDMode::SIMD16);
2591
2628
}
2592
2629
2630
+ if (simdMode == SIMDMode::SIMD32 && hasSyncRTCalls) {
2631
+ return SIMDStatus::SIMD_FUNC_FAIL;
2632
+ }
2633
+ else if (simdMode == SIMDMode::SIMD16 && hasSyncRTCalls) {
2634
+ return SIMDStatus::SIMD_PASS;
2635
+ }
2593
2636
2594
2637
if (simd_size)
2595
2638
{
@@ -2657,7 +2700,7 @@ namespace IGC
2657
2700
return m_annotatedNumThreads;
2658
2701
}
2659
2702
2660
- SIMDStatus COpenCLKernel::checkSIMDCompileConds (SIMDMode simdMode, EmitPass& EP, llvm::Function& F)
2703
+ SIMDStatus COpenCLKernel::checkSIMDCompileConds (SIMDMode simdMode, EmitPass& EP, llvm::Function& F, bool hasSyncRTCalls )
2661
2704
{
2662
2705
CShader* simd8Program = m_parent->GetShader (SIMDMode::SIMD8);
2663
2706
CShader* simd16Program = m_parent->GetShader (SIMDMode::SIMD16);
@@ -2773,7 +2816,12 @@ namespace IGC
2773
2816
return SIMDStatus::SIMD_FUNC_FAIL;
2774
2817
}
2775
2818
else {
2776
- EP.m_canAbortOnSpill = false ;
2819
+ if (hasSyncRTCalls) {
2820
+ return SIMDStatus::SIMD_FUNC_FAIL; // SIMD32 unsupported with raytracing calls
2821
+ }
2822
+ else { // simdMode == SIMDMode::SIMD32 && !hasSyncRTCalls
2823
+ EP.m_canAbortOnSpill = false ;
2824
+ }
2777
2825
}
2778
2826
break ;
2779
2827
default :
@@ -2802,6 +2850,12 @@ namespace IGC
2802
2850
return SIMDStatus::SIMD_PASS;
2803
2851
}
2804
2852
2853
+ if (hasSyncRTCalls) {
2854
+ // If we get all the way to here, then set it to the preferred SIMD size for Ray Tracing.
2855
+ SIMDMode mode = SIMDMode::UNKNOWN;
2856
+ mode = m_Context->platform .getPreferredRayTracingSIMDSize ();
2857
+ return (mode == simdMode) ? SIMDStatus::SIMD_PASS : SIMDStatus::SIMD_FUNC_FAIL;
2858
+ }
2805
2859
2806
2860
if (groupSize != 0 && groupSize <= 16 )
2807
2861
{
0 commit comments