Fix test_unit_cute_intel_xe

leonling-lly · leonling-lly · commit 0e231c32a3cf · 2025-09-09T18:06:28.000-07:00
Fixup
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
@@ -95,8 +95,8 @@ function(cutlass_test_unit_add_executable NAME)
       PUBLIC
       GTest::gtest 
       # TODO: This change works for resolving 'cutlasscompat.hpp' not found issue, fix this if it blocks merging
-      cutlass_test_unit_infra
-      cutlass_test_unit_infra_lib
+      # cutlass_test_unit_infra
+      # cutlass_test_unit_infra_lib
     )
   else()
     target_link_libraries(
diff --git a/test/unit/cute/core/CMakeLists.txt b/test/unit/cute/core/CMakeLists.txt
@@ -28,7 +28,7 @@
 
 cutlass_test_unit_add_executable(
   cutlass_test_unit_cute_core
-  WITHOUT_CUDA
+  # WITHOUT_CUDA
   array_subbyte.cpp
   bitfield.cpp
   coalesce.cpp
diff --git a/test/unit/cute/intel_xe/copy_block.cpp b/test/unit/cute/intel_xe/copy_block.cpp
@@ -265,7 +265,7 @@ struct copy_op<uint16_t, load, XE_2D_U16x2x16_ST_N, M, N, false> {
   }
 };
 
-template <class load, class store, int32_t M_, int32_t N_>
+template <class load, class store, uint32_t M_, uint32_t N_>
 struct copy_op<uint32_t, load, store, M_, N_, true> {
   void operator()() {
     //
diff --git a/test/unit/flash_attention/flash_attention_prefill/flash_prefill_testbed_3x.hpp b/test/unit/flash_attention/flash_attention_prefill/flash_prefill_testbed_3x.hpp
@@ -225,11 +225,11 @@ struct TestbedImpl {
   //
   // Methods
   //
-  template <class, class, class> class convert_fp8_to_fp16_name;
+  template <class, class> class convert_fp8_to_fp16_name;
 
-  template <typename SrcT, typename DstT, typename Runner>
+  template <typename SrcT, typename DstT>
   void convert_fp8_to_fp16(const SrcT* d_src, DstT* d_dst, size_t size) {
-    cutlasscompat::get_default_queue().parallel_for<convert_fp8_to_fp16_name<SrcT, DstT, Runner>>(size, [=](auto indx) {
+    cutlasscompat::get_default_queue().parallel_for<convert_fp8_to_fp16_name<SrcT, DstT>>(size, [=](auto indx) {
       d_dst[indx] = static_cast<DstT>(d_src[indx]);
     }).wait();
   }
@@ -241,7 +241,7 @@ struct TestbedImpl {
     using outType = cute::conditional_t<is_fp8_v<Tin>, half_t, Tin>;
     if constexpr(is_fp8_v<Tin>) {
       cutlass::DeviceAllocation<outType> out(in.size());
-      convert_fp8_to_fp16<Tin, outType, TestbedImpl>(in.get(), out.get(), in.size());
+      convert_fp8_to_fp16<Tin, outType>(in.get(), out.get(), in.size());
       return out;
     } else { 
       return in;
@@ -625,7 +625,7 @@ struct TestbedImpl {
 
 #if !defined(SYCL_EXT_ONEAPI_WORK_GROUP_SCRATCH_MEMORY)
     using namespace cutlasscompat::experimental;
-    auto event = launch<cutlass::device_kernel<FlashAttention>, FlashAttention>(
+    auto event = launch<cutlass::device_kernel<FlashAttention>>(
         launch_policy{sycl_grid, sycl_block, local_mem_size{static_cast<std::size_t>(smem_size)},
                       kernel_properties{sycl_exp::sub_group_size<FlashAttention::DispatchPolicy::SubgroupSize>}},
         params);
@@ -680,8 +680,9 @@ template <
   typename FlashAttention
 >
 struct Testbed3x {
-  using TestBedImpl = typename detail::TestbedImpl<FlashAttention>;
-  TestBedImpl impl_;
+  // using TestBedImp = typename detail::TestbedImpl<FlashAttention>;
+  // TestBedImp impl_;
+  detail::TestbedImpl<FlashAttention> impl_;
 
   //
   // Methods

Original file line number	Diff line number	Diff line change
`@@ -265,7 +265,7 @@ struct copy_op<uint16_t, load, XE_2D_U16x2x16_ST_N, M, N, false> {`
`265`	`265`	`}`
`266`	`266`	`};`
`267`	`267`
`268`		`-template <class load, class store, int32_t M_, int32_t N_>`
	`268`	`+template <class load, class store, uint32_t M_, uint32_t N_>`
`269`	`269`	`struct copy_op<uint32_t, load, store, M_, N_, true> {`
`270`	`270`	`void operator()() {`
`271`	`271`	`//`