Skip to content

Commit c7aa136

Browse files
authored
HIP: Ignore unsupported unroll transformation in fattn-vec (#14931)
llvm with the amdgcn target dose not support unrolling loops with conditional break statements, when those statements can not be resolved at compile time. Similar to other places in GGML lets simply ignore this warning.
1 parent 1a67fcc commit c7aa136

File tree

2 files changed

+18
-0
lines changed

2 files changed

+18
-0
lines changed

ggml/src/ggml-cuda/fattn-vec-f16.cuh

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
#include "common.cuh"
22
#include "fattn-common.cuh"
33

4+
// Currenlty llvm with the amdgcn target dose not support unrolling loops
5+
// that contain a break that can not be resolved at compile time.
6+
#ifdef __clang__
7+
#pragma clang diagnostic push
8+
#pragma clang diagnostic ignored "-Wpass-failed"
9+
#endif // __clang__
410
template<int D, int ncols, ggml_type type_K, ggml_type type_V, bool use_logit_softcap> // D == head size
511
#ifndef GGML_USE_HIP
612
__launch_bounds__(D, 1)
@@ -341,6 +347,9 @@ static __global__ void flash_attn_vec_ext_f16(
341347
NO_DEVICE_CODE;
342348
#endif // defined(FLASH_ATTN_AVAILABLE) && defined(FP16_AVAILABLE)
343349
}
350+
#ifdef __clang__
351+
#pragma clang diagnostic pop
352+
#endif // __clang__
344353

345354
template <int D, int cols_per_block, ggml_type type_K, ggml_type type_V, bool use_logit_softcap>
346355
void ggml_cuda_flash_attn_ext_vec_f16_case_impl(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {

ggml/src/ggml-cuda/fattn-vec-f32.cuh

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
#include "common.cuh"
22
#include "fattn-common.cuh"
33

4+
// Currenlty llvm with the amdgcn target dose not support unrolling loops
5+
// that contain a break that can not be resolved at compile time.
6+
#ifdef __clang__
7+
#pragma clang diagnostic push
8+
#pragma clang diagnostic ignored "-Wpass-failed"
9+
#endif // __clang__
410
template<int D, int ncols, ggml_type type_K, ggml_type type_V, bool use_logit_softcap> // D == head size
511
#ifndef GGML_USE_HIP
612
__launch_bounds__(D, 1)
@@ -336,6 +342,9 @@ static __global__ void flash_attn_vec_ext_f32(
336342
NO_DEVICE_CODE;
337343
#endif // FLASH_ATTN_AVAILABLE
338344
}
345+
#ifdef __clang__
346+
#pragma clang diagnostic pop
347+
#endif // __clang__
339348

340349
template <int D, int cols_per_block, ggml_type type_K, ggml_type type_V, bool use_logit_softcap>
341350
void ggml_cuda_flash_attn_ext_vec_f32_case_impl(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {

0 commit comments

Comments
 (0)