Skip to content

Commit 95243f0

Browse files
[Others] add PADDLE_ENFORCE (#5288)
1 parent 1539fd6 commit 95243f0

File tree

2 files changed

+9
-4
lines changed

2 files changed

+9
-4
lines changed

custom_ops/gpu_ops/moe/ep_moe_expert_dispatch.cu

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -964,7 +964,7 @@ std::vector<paddle::Tensor> EPMoeExpertDispatchFP8(
964964
} else {
965965
token_rows = input_dims[0];
966966
}
967-
const int num_rows = token_rows;
967+
968968
const int hidden_size = input.dims()[input_dims.size() - 1];
969969
const int num_experts_per_rank = num_experts_per_rank_tensor.dims()[0];
970970

@@ -988,9 +988,9 @@ std::vector<paddle::Tensor> EPMoeExpertDispatchFP8(
988988
auto dst_weights = GetEmptyTensor(
989989
{token_nums_feed_to_ffn}, paddle::DataType::FLOAT32, place);
990990
auto dst_indices = GetEmptyTensor(
991-
{num_rows, num_experts_per_rank}, paddle::DataType::INT32, place);
991+
{token_rows, num_experts_per_rank}, paddle::DataType::INT32, place);
992992
auto permute_indices_per_token = paddle::full(
993-
{num_experts_per_rank, num_rows}, -1, paddle::DataType::INT32, place);
993+
{num_experts_per_rank, token_rows}, -1, paddle::DataType::INT32, place);
994994
auto cumsum_idx_gpu =
995995
paddle::full({num_experts_per_rank}, 0, paddle::DataType::INT32, place);
996996

@@ -1001,7 +1001,7 @@ std::vector<paddle::Tensor> EPMoeExpertDispatchFP8(
10011001
num_experts_per_rank_tensor,
10021002
num_experts_per_rank_padded_tensor,
10031003
moe_topk,
1004-
num_rows,
1004+
token_rows,
10051005
-1,
10061006
-1,
10071007
hidden_size,

custom_ops/gpu_ops/per_token_quant_fp8.cu

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,11 @@ std::vector<paddle::Tensor> PerTokenQuantPadding(paddle::Tensor &input,
232232
auto input_dim = input.dims();
233233
const int token_num = input_dim[0];
234234
const int hidden_size = input_dim[1];
235+
236+
PADDLE_ENFORCE(block_size == 128, "now only support block_size = 128");
237+
PADDLE_ENFORCE(hidden_size % 128 == 0,
238+
"hidden_size must be divisible by 128");
239+
235240
const int hidden_size_scale = hidden_size / block_size;
236241
auto quanted_x = GetEmptyTensor(
237242
{token_num, hidden_size}, paddle::DataType::FLOAT8_E4M3FN, input.place());

0 commit comments

Comments
 (0)