@@ -964,7 +964,7 @@ std::vector<paddle::Tensor> EPMoeExpertDispatchFP8(
964964 } else {
965965 token_rows = input_dims[0 ];
966966 }
967- const int num_rows = token_rows;
967+
968968 const int hidden_size = input.dims ()[input_dims.size () - 1 ];
969969 const int num_experts_per_rank = num_experts_per_rank_tensor.dims ()[0 ];
970970
@@ -988,9 +988,9 @@ std::vector<paddle::Tensor> EPMoeExpertDispatchFP8(
988988 auto dst_weights = GetEmptyTensor (
989989 {token_nums_feed_to_ffn}, paddle::DataType::FLOAT32, place);
990990 auto dst_indices = GetEmptyTensor (
991- {num_rows , num_experts_per_rank}, paddle::DataType::INT32, place);
991+ {token_rows , num_experts_per_rank}, paddle::DataType::INT32, place);
992992 auto permute_indices_per_token = paddle::full (
993- {num_experts_per_rank, num_rows }, -1 , paddle::DataType::INT32, place);
993+ {num_experts_per_rank, token_rows }, -1 , paddle::DataType::INT32, place);
994994 auto cumsum_idx_gpu =
995995 paddle::full ({num_experts_per_rank}, 0 , paddle::DataType::INT32, place);
996996
@@ -1001,7 +1001,7 @@ std::vector<paddle::Tensor> EPMoeExpertDispatchFP8(
10011001 num_experts_per_rank_tensor,
10021002 num_experts_per_rank_padded_tensor,
10031003 moe_topk,
1004- num_rows ,
1004+ token_rows ,
10051005 -1 ,
10061006 -1 ,
10071007 hidden_size,
0 commit comments