@@ -330,19 +330,65 @@ bool isDG1() const
330
330
return m_platformInfo.eProductFamily == IGFX_DG1;
331
331
}
332
332
333
- bool supportsSIMD16TypedRW () const
333
+ bool simplePushIsFasterThanGather () const
334
334
{
335
- return false ;
335
+ return m_platformInfo. eRenderCoreFamily >= IGFX_GEN12_CORE ;
336
336
}
337
337
338
338
bool singleThreadBasedInstScheduling () const
339
339
{
340
- return true ;
340
+ return m_platformInfo. eRenderCoreFamily < IGFX_GEN12_CORE ;
341
341
}
342
342
343
+ // all the platforms which do not support 64 bit operations and
344
+ // needs int64 emulation support. Except also for BXT where
345
+ // 64-bit inst has much lower throughput compared to SKL.
346
+ // Emulating it improves performance on some benchmarks and
347
+ // won't have impact on the overall performance.
348
+ bool need64BitEmulation () const {
349
+ return m_platformInfo.eProductFamily == IGFX_GEMINILAKE ||
350
+ m_platformInfo.eProductFamily == IGFX_BROXTON ||
351
+ hasNoInt64Inst ();
352
+ }
353
+
354
+ bool HDCCoalesceSLMAtomicINCWithNoReturn () const
355
+ {
356
+ return m_platformInfo.eRenderCoreFamily >= IGFX_GEN12_CORE;
357
+ }
343
358
bool HDCCoalesceAtomicCounterAccess () const
344
359
{
345
- return IGC_IS_FLAG_DISABLED (ForceSWCoalescingOfAtomicCounter);
360
+ return (m_platformInfo.eRenderCoreFamily < IGFX_GEN12_CORE) && IGC_IS_FLAG_DISABLED (ForceSWCoalescingOfAtomicCounter);
361
+ }
362
+
363
+ bool supportsMCSNonCompressedFix () const { return m_platformInfo.eRenderCoreFamily >= IGFX_GEN12_CORE; }
364
+ bool hasHWDp4AddSupport () const { return m_platformInfo.eRenderCoreFamily >= IGFX_GEN12_CORE; }
365
+
366
+ bool useOnlyEightPatchDispatchHS () const
367
+ {
368
+ return (m_platformInfo.eRenderCoreFamily >= IGFX_GEN12_CORE);
369
+ }
370
+
371
+ bool supportsPrimitiveReplication () const
372
+ {
373
+ return ((m_platformInfo.eRenderCoreFamily >= IGFX_GEN12_CORE) ||
374
+ (m_platformInfo.eRenderCoreFamily == IGFX_GEN11_CORE && m_platformInfo.eProductFamily == IGFX_ICELAKE));
375
+ }
376
+
377
+ // If true then screen space coordinates for upper-left vertex of a triangle
378
+ // being rasterized are delivered together with source depth or W deltas.
379
+ bool hasStartCoordinatesDeliveredWithDeltas () const
380
+ {
381
+ return m_platformInfo.eRenderCoreFamily >= IGFX_GEN12_CORE;
382
+ }
383
+
384
+ bool hasEarlyGRFRead () const
385
+ {
386
+ return m_platformInfo.eProductFamily == IGFX_TIGERLAKE_LP && m_platformInfo.usRevId == REVISION_A0;
387
+ }
388
+
389
+ bool supportsSIMD16TypedRW () const
390
+ {
391
+ return false ;
346
392
}
347
393
348
394
// all the platforms which DONOT support 64 bit int operations
@@ -365,18 +411,6 @@ bool hasNoFP64Inst() const {
365
411
m_platformInfo.eProductFamily == IGFX_DG1;
366
412
}
367
413
368
- // all the platforms which do not support 64 bit operations and
369
- // needs int64 emulation support. Except also for BXT where
370
- // 64-bit inst has much lower throughput compared to SKL.
371
- // Emulating it improves performance on some benchmarks and
372
- // won't have impact on the overall performance.
373
- bool need64BitEmulation () const {
374
- return (m_platformInfo.eProductFamily == IGFX_GEMINILAKE ||
375
- m_platformInfo.eProductFamily == IGFX_BROXTON ||
376
- hasNoInt64Inst ());
377
- }
378
-
379
-
380
414
// all the platforms which have correctly rounded macros (INVM, RSQRTM, MADM)
381
415
bool hasCorrectlyRoundedMacros () const {
382
416
return m_platformInfo.eProductFamily != IGFX_ICELAKE_LP &&
@@ -386,10 +420,6 @@ bool hasCorrectlyRoundedMacros() const {
386
420
m_platformInfo.eProductFamily != IGFX_DG1;
387
421
}
388
422
389
- bool hasHWDp4AddSupport () const {
390
- return m_platformInfo.eProductFamily == IGFX_TIGERLAKE_LP;
391
- }
392
- bool useOnlyEightPatchDispatchHS () const { return false ; }
393
423
bool hasFusedEU () const { return m_platformInfo.eRenderCoreFamily >= IGFX_GEN12_CORE; }
394
424
bool supports256GRFPerThread () const { return false ; }
395
425
bool supportMixMode () const {
@@ -416,13 +446,6 @@ uint32_t getGRFSize() const
416
446
return 32 ;
417
447
}
418
448
419
- // If true then screen space coordinates for upper-left vertex of a triangle
420
- // being rasterized are delivered together with source depth or W deltas.
421
- bool hasStartCoordinatesDeliveredWithDeltas () const
422
- {
423
- return false ;
424
- }
425
-
426
449
uint32_t maxPerThreadScratchSpace () const
427
450
{
428
451
return 0x200000 ;
@@ -445,7 +468,15 @@ bool canFuseTypedWrite() const
445
468
446
469
unsigned int getMaxNumberHWThreadForEachWG () const
447
470
{
448
- return getMaxNumberThreadPerSubslice ();
471
+ if (m_platformInfo.eRenderCoreFamily < IGFX_GEN12_CORE)
472
+ {
473
+ // each WG is dispatched into one subslice for GEN11 and before
474
+ return getMaxNumberThreadPerSubslice ();
475
+ }
476
+ else
477
+ {
478
+ return getMaxNumberThreadPerSubslice () * 2 ;
479
+ }
449
480
}
450
481
451
482
// max block size for legacy OWord block messages
0 commit comments