Skip to content

Commit 915e732

Browse files
sergey-kozubcopybara-github
authored andcommitted
PR #28782: [XLA:GPU] Annotate cuBLAS/cuDNN outputs to avoid initcheck failures
Imported from GitHub PR openxla/xla#28782 Upgrades NVTX to v3.2.1 and marks the outputs of cuBLAS/cuDNN as initialized (as compute-sanitizer may emit false positives for kernels using TMA). Copybara import of the project: -- 55977057d4c3bc3008649cdedc7ddb7923780958 by Sergey Kozub <[email protected]>: [XLA:GPU] Annotate cuBLAS/cuDNN outputs to avoid initcheck failures Merging this change closes #28782 FUTURE_COPYBARA_INTEGRATE_REVIEW=openxla/xla#28782 from openxla:skozub/nvtx_init_annotation 55977057d4c3bc3008649cdedc7ddb7923780958 PiperOrigin-RevId: 788806680
1 parent 4ed7120 commit 915e732

File tree

3 files changed

+25
-0
lines changed

3 files changed

+25
-0
lines changed

tsl/profiler/lib/nvtx_utils.cc

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ limitations under the License.
2828
#include "nvtx3/nvToolsExt.h"
2929
#include "nvtx3/nvToolsExtCuda.h"
3030
#include "nvtx3/nvToolsExtCudaRt.h"
31+
#include "nvtx3/nvToolsExtMemCudaRt.h"
3132
#include "nvtx3/nvToolsExtPayload.h"
3233
#include "third_party/gpus/cuda/include/cuda.h"
3334

@@ -119,4 +120,21 @@ StringHandle RegisterString(ProfilerDomainHandle domain,
119120
buffer.append(suffix);
120121
return impl(buffer.c_str());
121122
}
123+
124+
void MarkMemoryInitialized(void const* address, size_t size,
125+
StreamHandle stream) {
126+
auto domain = DefaultProfilerDomain();
127+
nvtxMemVirtualRangeDesc_t range_desc{size, address};
128+
nvtxMemMarkInitializedBatch_t regions_desc{
129+
NVTX_EXT_COMPATID_MEM,
130+
sizeof(nvtxMemMarkInitializedBatch_t),
131+
NVTX_MEM_TYPE_VIRTUAL_ADDRESS,
132+
/*regionDescCount=*/1,
133+
sizeof(nvtxMemVirtualRangeDesc_t),
134+
&range_desc};
135+
nvtxMemCudaMarkInitialized(reinterpret_cast<nvtxDomainHandle_t>(domain),
136+
reinterpret_cast<cudaStream_t>(stream),
137+
/*isPerThreadStream=*/false, &regions_desc);
138+
}
139+
122140
} // namespace tsl::profiler

tsl/profiler/lib/nvtx_utils.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,5 +78,10 @@ void RangePush(ProfilerDomainHandle domain, StringHandle title,
7878
// Register the schema of a custom payload type, for use with the more powerful
7979
// version of RangePush
8080
uint64_t RegisterSchema(ProfilerDomainHandle domain, const void* schemaAttr);
81+
82+
// Mark a memory region as initialized.
83+
// This mitigates false positives from the compute sanitizer (initcheck).
84+
void MarkMemoryInitialized(void const* address, size_t size,
85+
StreamHandle stream);
8186
} // namespace tsl::profiler
8287
#endif // TENSORFLOW_TSL_PROFILER_LIB_NVTX_UTILS_H_

tsl/profiler/lib/nvtx_utils_stub.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,4 +31,6 @@ uint64_t RegisterSchema(ProfilerDomainHandle, const void*) { return 0; }
3131
StringHandle RegisterString(ProfilerDomainHandle, const std::string&) {
3232
return {};
3333
}
34+
void MarkMemoryInitialized(void const* address, size_t size,
35+
StreamHandle stream) {}
3436
} // namespace tsl::profiler

0 commit comments

Comments
 (0)