Fix icpx failure on test_unit_flash_attention_prefill

leonling-lly · leonling-lly · commit 919df17d99d1 · 2025-09-09T18:06:57.000-07:00
diff --git a/test/unit/flash_attention/flash_attention_prefill/flash_prefill_testbed_3x.hpp b/test/unit/flash_attention/flash_attention_prefill/flash_prefill_testbed_3x.hpp
@@ -125,6 +125,17 @@ struct Shape_h192 {
     using GmemTiledCopyV = cute::XE_2D_U16x16x32_LD_V;
     using GmemTiledCopyO = cute::XE_2D_U16x8x16_ST_N;
   };
+  
+  template <class, class> class convert_fp8_to_fp16_name;
+
+  template <typename SrcT, typename DstT>
+  void convert_fp8_to_fp16(const SrcT* d_src, DstT* d_dst, size_t size) {
+    cutlasscompat::get_default_queue().parallel_for<convert_fp8_to_fp16_name<SrcT, DstT>>(size, [=](auto indx) {
+      d_dst[indx] = static_cast<DstT>(d_src[indx]);
+    }).wait();
+  }
+
+
 /////////////////////////////////////////////////////////////////////
 
 template<typename ElementInputType, typename ElementAccumulatorType, typename ElementOutputType,  
@@ -225,15 +236,6 @@ struct TestbedImpl {
   //
   // Methods
   //
-  template <class, class> class convert_fp8_to_fp16_name;
-
-  template <typename SrcT, typename DstT>
-  void convert_fp8_to_fp16(const SrcT* d_src, DstT* d_dst, size_t size) {
-    cutlasscompat::get_default_queue().parallel_for<convert_fp8_to_fp16_name<SrcT, DstT>>(size, [=](auto indx) {
-      d_dst[indx] = static_cast<DstT>(d_src[indx]);
-    }).wait();
-  }
-
   template <typename T>
   static constexpr bool is_fp8_v = cute::is_any_of_v<T, cute::float_e5m2_t, cute::float_e4m3_t>;