Skip to content

Commit 5259fd6

Browse files
aratajewpszymich
authored andcommitted
Fix explicit generic casts handling
Explicit generic casts allows user to check whether a particular generic pointer is pointing either to global, local or private address space. They are represented in OpenCL C as `to_global`, `to_local` or `to_private` functions. Specification: https://man.opencl.org/addressSpaceQualifierFuncs.html For example: `to_private` returns a pointer that points to a region in the private address space if to_private can cast ptr to the private address space. Otherwise it returns NULL. These builtins are handled incorrectly if IGC decides to allocate private memory in a global buffer (private memory is allocated in a global buffer to minimize negative performance implications caused by dynamic generic address space resolution), since in such case, there is no possibility to distinguish between private and global pointer. This change introduces the following logic: 1. Detect usage of explicit generic casts in a kernel. 2. If the casts are present: a) Force tagging private pointers - to distinguish from global pointers b) Clear tag for addrspacecasts from global to generic - since private accesses may be statically resolved to global accesses when casting back to named addresspace, generic pointer casted to global address space may actually be a private pointer for which a tag has been set in step a).
1 parent 3130fd8 commit 5259fd6

File tree

6 files changed

+45
-5
lines changed

6 files changed

+45
-5
lines changed

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8649,10 +8649,13 @@ void EmitPass::emitAddrSpaceCast(llvm::AddrSpaceCastInst* addrSpaceCast)
86498649

86508650
CVariable* srcV = GetSymbol(addrSpaceCast->getOperand(0));
86518651

8652-
if (!m_canGenericPointToPrivate && !m_canGenericPointToLocal)
8652+
bool skipTagging =
8653+
!m_canGenericPointToLocal &&
8654+
!m_canGenericPointToPrivate &&
8655+
!m_pCtx->mustDistinguishBetweenPrivateAndGlobalPtr();
8656+
8657+
if (skipTagging)
86538658
{
8654-
// If forcing global memory allocation and there are no generic pointers to local AS,
8655-
// there is no need to tag generic pointers.
86568659
m_encoder->Cast(m_destination, srcV);
86578660
m_encoder->Push();
86588661
return;
@@ -8682,7 +8685,7 @@ void EmitPass::emitAddrSpaceCast(llvm::AddrSpaceCastInst* addrSpaceCast)
86828685
return;
86838686
}
86848687

8685-
if (sourceAddrSpace == ADDRESS_SPACE_PRIVATE && !m_pCtx->allocatePrivateAsGlobalBuffer())
8688+
if (sourceAddrSpace == ADDRESS_SPACE_PRIVATE && (!m_pCtx->allocatePrivateAsGlobalBuffer() || m_pCtx->mustDistinguishBetweenPrivateAndGlobalPtr()))
86868689
{
86878690
emitAddrSpaceToGenericCast(addrSpaceCast, srcV, 1);
86888691
}
@@ -8697,7 +8700,9 @@ void EmitPass::emitAddrSpaceCast(llvm::AddrSpaceCastInst* addrSpaceCast)
86978700
}
86988701
}
86998702
else if (sourceAddrSpace == ADDRESS_SPACE_GENERIC &&
8700-
(destAddrSpace == ADDRESS_SPACE_PRIVATE || destAddrSpace == ADDRESS_SPACE_LOCAL))
8703+
(destAddrSpace == ADDRESS_SPACE_PRIVATE ||
8704+
destAddrSpace == ADDRESS_SPACE_LOCAL ||
8705+
(destAddrSpace == ADDRESS_SPACE_GLOBAL && m_pCtx->mustDistinguishBetweenPrivateAndGlobalPtr())))
87018706
{
87028707
// Address space cast is in the form of generic -> {private, local, global}
87038708
// Tag is removed according to the address space of the destination

IGC/Compiler/CISACodeGen/OpenCLKernelCodeGen.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,16 @@ namespace IGC
111111
return m_InternalOptions.NoLocalToGeneric;
112112
}
113113

114+
bool OpenCLProgramContext::mustDistinguishBetweenPrivateAndGlobalPtr() const
115+
{
116+
return m_mustDistinguishBetweenPrivateAndGlobalPtr;
117+
}
118+
119+
void OpenCLProgramContext::setDistinguishBetweenPrivateAndGlobalPtr(bool distinguish)
120+
{
121+
m_mustDistinguishBetweenPrivateAndGlobalPtr = distinguish;
122+
}
123+
114124
bool OpenCLProgramContext::enableTakeGlobalAddress() const
115125
{
116126
return m_Options.EnableTakeGlobalAddress || getModuleMetaData()->capabilities.globalVariableDecorationsINTEL;

IGC/Compiler/CISACodeGen/OpenCLKernelCodeGen.hpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,16 @@ namespace IGC
238238
private:
239239
bool m_enableZEBinary;
240240

241+
// To minimize negative performance implications caused by a dynamic generic address
242+
// space resolution, private memory can be allocated in the same address space as
243+
// global memory. It gives a possibility to treat private memory operations as global
244+
// memory operations, so there is no necessity to distinguish between them.
245+
// However, when a module uses `to_global` or `to_private` OpenCL builtins, differentiating
246+
// between private and global pointer is necessary to preserve conformity.
247+
// Below flag is set to true when IGC detects that any of these builtins is called in
248+
// a module and could not be resolved statically at compile time.
249+
bool m_mustDistinguishBetweenPrivateAndGlobalPtr = false;
250+
241251
public:
242252
// Additional text visaasm to link.
243253
std::vector<const char*> m_VISAAsmToLink;
@@ -308,6 +318,8 @@ namespace IGC
308318
bool forceGlobalMemoryAllocation() const override;
309319
bool allocatePrivateAsGlobalBuffer() const override;
310320
bool noLocalToGenericOptionEnabled() const override;
321+
bool mustDistinguishBetweenPrivateAndGlobalPtr() const override;
322+
void setDistinguishBetweenPrivateAndGlobalPtr(bool);
311323
bool enableTakeGlobalAddress() const override;
312324
int16_t getVectorCoalescingControl() const override;
313325
uint32_t getPrivateMemoryMinimalSizePerThread() const override;

IGC/Compiler/CodeGenContext.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -661,6 +661,11 @@ namespace IGC
661661
return false;
662662
}
663663

664+
bool CodeGenContext::mustDistinguishBetweenPrivateAndGlobalPtr() const
665+
{
666+
return false;
667+
}
668+
664669
bool CodeGenContext::enableTakeGlobalAddress() const
665670
{
666671
return false;

IGC/Compiler/CodeGenPublic.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1027,6 +1027,7 @@ namespace IGC
10271027
virtual bool forceGlobalMemoryAllocation() const;
10281028
virtual bool allocatePrivateAsGlobalBuffer() const;
10291029
virtual bool noLocalToGenericOptionEnabled() const;
1030+
virtual bool mustDistinguishBetweenPrivateAndGlobalPtr() const;
10301031
virtual bool enableTakeGlobalAddress() const;
10311032
virtual int16_t getVectorCoalescingControl() const;
10321033
virtual uint32_t getPrivateMemoryMinimalSizePerThread() const;

IGC/Compiler/Optimizer/OpenCLPasses/GenericAddressResolution/GenericAddressDynamicResolution.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ SPDX-License-Identifier: MIT
1111
#include "AdaptorCommon/ImplicitArgs.hpp"
1212
#include "Compiler/CodeGenContextWrapper.hpp"
1313
#include "Compiler/CodeGenPublicEnums.h"
14+
#include "Compiler/CISACodeGen/OpenCLKernelCodeGen.hpp"
1415
#include "Compiler/IGCPassSupport.h"
1516
#include "Compiler/MetaDataUtilsWrapper.h"
1617
#include "common/LLVMWarningsPush.hpp"
@@ -460,6 +461,12 @@ bool GenericAddressDynamicResolution::visitIntrinsicCall(CallInst& I)
460461
Value* newPtrNull = nullptr;
461462
Value* cmpTag = nullptr;
462463

464+
if (targetAS == ADDRESS_SPACE_GLOBAL || targetAS == ADDRESS_SPACE_PRIVATE)
465+
{
466+
auto ClContext = static_cast<OpenCLProgramContext*>(m_ctx);
467+
ClContext->setDistinguishBetweenPrivateAndGlobalPtr(true);
468+
}
469+
463470
// Tag was already obtained from GAS pointer, now we check its address space (AS)
464471
// and the target AS for this intrinsic call
465472
if (targetAS == ADDRESS_SPACE_PRIVATE)

0 commit comments

Comments
 (0)