Skip to content

Commit 1a2f24d

Browse files
author
K11OntheBoat
committed
clean some log info
1 parent 9ac2929 commit 1a2f24d

File tree

7 files changed

+1
-81
lines changed

7 files changed

+1
-81
lines changed

examples/splitwise/stop.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
pkill -9 -f python
22
pkill -9 -f fastdeploy
33
pkill -9 -f gunicorn
4+
# Kill redis-server if you need.
45
#pkill -9 -f redis-server
56

67
sleep 1

fastdeploy/cache_manager/cache_messager.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,6 @@ def __init__(
204204

205205
elif protocol == "rdma":
206206
logger.info(f"splitwise_role rdma: {self.splitwise_role}, rank: {self.rank}, gpu_id: {gpu_id}")
207-
logger.info(f"====RyanDebug, the cache_v_ptr_list is:{cache_v_ptr_list}")
208207
self.messager[protocol] = RDMACommManager(
209208
splitwise_role,
210209
rank,
@@ -215,7 +214,6 @@ def __init__(
215214
block_bytes,
216215
rdma_port,
217216
)
218-
logger.info("===RyanDebug, #218 Finish RDMACommManager create!!!!!!!")
219217

220218
self.gpu_id = gpu_id
221219
self.cache_info = dict()
@@ -796,7 +794,6 @@ def main():
796794
num_extra_layers = speculative_config.num_extra_cache_layer
797795
key_cache_shape_list = [int(i) for i in args.key_cache_shape.split(",")]
798796
value_cache_shape_list = []
799-
print("===RyanDebug #786 of cache_messager,the args.value_cache_shape is:", args.value_cache_shape)
800797
if args.value_cache_shape:
801798
value_cache_shape_list = [int(i) for i in args.value_cache_shape.split(",")]
802799
total_gpu_blocks = key_cache_shape_list[0]

fastdeploy/cache_manager/transfer_factory/kvcache_transfer/src/kvcache_rdma.cpp

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -66,17 +66,13 @@ RDMACommunicator::RDMACommunicator(std::string& role,
6666

6767
// Step 1: Initialize KV cache config
6868
KVCacheConfig::getInstance().displayConfiguration();
69-
printf(
70-
"====RyanDebugRDMA, Finish #69 KVCacheConfig::getInstance(). ===== "
71-
"\n");
7269

7370
// Step 2: Initialize KV cache structure
7471
// Validate and set number of layers
7572
layer_number = static_cast<int>(local_cache_key_ptr_layer_head_.size());
7673
if (layer_number <= 0) {
7774
throw std::runtime_error("Invalid layer number");
7875
}
79-
printf("====RyanDebugRDMA, Finish #77 layer. ===== \n");
8076

8177
if (local_cache_value_ptr_layer_head_.empty()) {
8278
has_value_cache_ = false;
@@ -90,20 +86,17 @@ RDMACommunicator::RDMACommunicator(std::string& role,
9086
}
9187
}
9288

93-
printf("====RyanDebugRDMA, Finish #91 layer. ===== \n");
9489
// Step 2: Setup cache vectors and pointers
9590
resize_vectors();
9691
assign_pointers();
9792

98-
printf("====RyanDebugRDMA, Finish #97 layer. ===== \n");
9993
// Step 3:Initialize the event channel
10094
rdma_event_channel_epoll_fd = epoll_create1(EPOLL_CLOEXEC);
10195
if (rdma_event_channel_epoll_fd < 0) {
10296
throw std::runtime_error("Failed to create epoll fd: " +
10397
std::string(strerror(errno)));
10498
}
10599

106-
printf("====RyanDebugRDMA, Finish #105 layer. ===== \n");
107100
// Start the server thread (if in decode role)
108101
if (splitwise_role == "decode") {
109102
std::thread server_thread([this]() {
@@ -115,7 +108,6 @@ RDMACommunicator::RDMACommunicator(std::string& role,
115108
});
116109
server_thread.detach();
117110
}
118-
printf("====RyanDebugRDMA, Finish #117 layer. ===== \n");
119111
RDMACommunicator_status = 1;
120112
INFO("RDMA communicator initialized successfully");
121113
} catch (const std::exception& e) {
@@ -871,10 +863,6 @@ bool RDMACommunicator::server_mr_register_per_layer(RdmaContext* ctx) {
871863
}
872864
}
873865

874-
// 【修复点】:无论是否有 Value Cache,都要赋值给 ctx->conn
875-
// 如果没有 Value Cache,write_cache_value_server_mr_list
876-
// 是空的,赋值过去也是空的,这是安全的。 如果不赋值,ctx->conn 里的 vector
877-
// 可能是未定义的脏状态。
878866
ctx->conn.write_cache_key_server_mr_list = write_cache_key_server_mr_list;
879867
ctx->conn.write_cache_value_server_mr_list = write_cache_value_server_mr_list;
880868

fastdeploy/cache_manager/transfer_factory/rdma_cache_transfer.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,6 @@ def __init__(
4646
"The installation of the RDMA library failed."
4747
"Confirm whether your network card supports RDMA transmission."
4848
)
49-
logger.info(f" # 499999999 init rdma messager {gpu_id} {rdma_port}")
50-
logger.info(f" # == RyanDebug, Decode, the cache_v_ptr_list is: {cache_v_ptr_list}")
5149
self.messager = rdma_comm.RDMACommunicator(
5250
splitwise_role,
5351
gpu_id,

fastdeploy/model_executor/layers/attention/mla_attention_backend.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -205,8 +205,6 @@ def init_attention_metadata(self, forward_meta: ForwardMeta):
205205
self.group_size,
206206
self.block_size,
207207
)
208-
print("===RyanDebug, after ini attn meta, the max_len_tensor_cpu[1] is:", forward_meta.max_len_tensor_cpu[1])
209-
print("===RyanDebug, after ini attn meta, the max_len_tensor_cpu[2] is:", forward_meta.max_len_tensor_cpu[2])
210208
# MLA
211209
metadata.max_enc_len_this_time = forward_meta.max_len_tensor_cpu[1]
212210
metadata.max_dec_len_this_time = forward_meta.max_len_tensor_cpu[2]
@@ -428,10 +426,6 @@ def forward_mixed(
428426
"none",
429427
self.max_seq_len,
430428
)
431-
print(
432-
"====After write cache, the metadata.kv_signal_data_list[layer.layer_id] is:",
433-
metadata.kv_signal_data_list[layer.layer_id],
434-
)
435429
# FA
436430
fmha_out = self.flash_attn_func(
437431
q,

fastdeploy/model_executor/models/deepseek_v3.py

Lines changed: 0 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -340,17 +340,7 @@ def forward(
340340
fmha_out = None
341341

342342
# NOTE: (changwenbin) qkv_a_proj horizontal fusion
343-
paddle.device.synchronize()
344-
print("==RyanDebug, the hidden_states is:", hidden_states) # 这是一个输入,我们假设它没问题,但也可以加上检查
345-
print("==RyanDebug, hidden_states contains NaN:", paddle.any(paddle.isnan(hidden_states)).item())
346-
347343
qkv_a_out = self.qkv_a_proj_with_mqa(hidden_states)
348-
paddle.device.synchronize()
349-
350-
# --- NaN Check Start ---
351-
print("===RyanDebug, the qkv_a_out is:", qkv_a_out)
352-
print(" >>> RyanDebug, qkv_a_out contains NaN:", paddle.any(paddle.isnan(qkv_a_out)).item())
353-
# --- NaN Check End ---
354344

355345
query, compressed_kv, key_pe = qkv_a_out.split(
356346
[self.q_lora_rank, self.kv_lora_rank, self.qk_rope_head_dim], axis=-1
@@ -363,13 +353,10 @@ def forward(
363353

364354
key_pe.reshape_([-1, 1, self.qk_rope_head_dim])
365355
query_pe, key_pe = self.rotary_emb(position_ids, query_pe, key_pe)
366-
paddle.device.synchronize()
367356

368357
compressed_kv = self.kv_a_layernorm(compressed_kv)[0]
369358

370-
print("===RyanDebug, in #370, forward_meta.max_len_tensor_cpu[1] is:", forward_meta.max_len_tensor_cpu[1])
371359
if forward_meta.max_len_tensor_cpu[1]: # max_enc_len_this_time
372-
print("===RyanDebug, in #372, forward_meta.max_len_tensor_cpu[1] is:", forward_meta.max_len_tensor_cpu[1])
373360
key_value = self.kv_b_proj(compressed_kv)
374361
key_value.reshape_(
375362
[
@@ -402,12 +389,8 @@ def forward(
402389
fmha_out_prefill = fmha_out_prefill * mask_encoder_batch.cast(fmha_out_prefill.dtype)
403390

404391
fmha_out = fmha_out_prefill
405-
print("====RYanDebug, #404, fmha_out after MLA is: ", fmha_out)
406392

407393
if forward_meta.max_len_tensor_cpu[2]: # max_dec_len_this_time
408-
print("===RyanDebug, D in dsv3 !!!!=====")
409-
paddle.device.synchronize()
410-
411394
q_nope_out = self.kv_b_proj_bmm(query_nope.transpose([1, 0, 2]), proj_type="k").transpose([1, 0, 2])
412395

413396
q_input = paddle.concat([q_nope_out, query_pe], axis=-1)
@@ -418,18 +401,6 @@ def forward(
418401
]
419402
)
420403

421-
print("===RyanDebug, the q_input # 435 is:", q_input)
422-
print(" >>> RyanDebug, q_input # 435 contains NaN:", paddle.any(paddle.isnan(q_input)).item())
423-
424-
print("===RyanDebug, the compressed_kv # 435 is:", compressed_kv)
425-
print(
426-
" >>> RyanDebug, compressed_kv # 435 contains NaN:", paddle.any(paddle.isnan(compressed_kv)).item()
427-
)
428-
429-
print("===RyanDebug, the key_pe # 435 is:", q_input)
430-
print(" >>> RyanDebug, key_pe # 435 contains NaN:", paddle.any(paddle.isnan(key_pe)).item())
431-
432-
paddle.device.synchronize()
433404
fmha_out_decode = self.mla_attn(
434405
q=q_input,
435406
k=None,
@@ -439,39 +410,23 @@ def forward(
439410
k_pe=key_pe,
440411
forward_meta=forward_meta,
441412
)
442-
paddle.device.synchronize()
443-
# --- NaN Check Start ---
444-
print("===RyanDebug, the fmha_out_decode # 448 is:", fmha_out_decode)
445-
print(
446-
" >>> RyanDebug, fmha_out_decode # 448 contains NaN:",
447-
paddle.any(paddle.isnan(fmha_out_decode)).item(),
448-
)
449413

450414
fmha_out_decode = fmha_out_decode.reshape([-1, self.num_attention_heads_tp, self.kv_lora_rank]).transpose(
451415
[1, 0, 2]
452416
)
453417

454-
paddle.device.synchronize()
455-
456418
fmha_out_decode = (
457419
self.kv_b_proj_bmm(fmha_out_decode, proj_type="v")
458420
.transpose([1, 0, 2])
459421
.reshape([-1, self.num_attention_heads_tp * self.v_head_dim])
460422
)
461423

462-
# --- NaN Check Start ---
463-
print("===RyanDebug, the fmha_out_decode is:", fmha_out_decode)
464-
print(" >>> RyanDebug, fmha_out_decode contains NaN:", paddle.any(paddle.isnan(fmha_out_decode)).item())
465-
# --- NaN Check End ---
466-
467-
paddle.device.synchronize()
468424
if fmha_out is None:
469425
fmha_out = fmha_out_decode
470426
else:
471427
fmha_out = fmha_out + fmha_out_decode
472428

473429
output = self.o_proj(fmha_out)
474-
paddle.device.synchronize()
475430
return output
476431

477432
def load_state_dict(self, state_dict):
@@ -559,19 +514,11 @@ def forward(
559514
hidden_states, residual_input=residual, forward_meta=forward_meta
560515
)
561516

562-
print("===RyanDebug, the hidden_states before self_attn is :", hidden_states)
563517
hidden_states = self.self_attn(forward_meta, hidden_states, position_ids, mask_encoder_batch)
564518

565-
print("==RyanDebug, #563 hidden_states contains NaN:", paddle.any(paddle.isnan(hidden_states)).item())
566-
567519
hidden_states, residual = self.post_attention_layernorm(hidden_states, residual)
568-
print("==RyanDebug, #566 hidden_states contains NaN:", paddle.any(paddle.isnan(hidden_states)).item())
569520
hidden_states = self.mlp(hidden_states)
570521

571-
print("===RyanDebug, the hidden_states after mlp is :", hidden_states)
572-
print(
573-
"==RyanDebug, #570 hidden_states after mlp contains NaN:", paddle.any(paddle.isnan(hidden_states)).item()
574-
)
575522
return hidden_states, residual
576523

577524

@@ -731,7 +678,6 @@ def load_weights(self, weights_iterator) -> None:
731678
process_weights_after_loading_fn = process_weights_after_loading(dict(self.named_sublayers()), self.fd_config)
732679
for loaded_weight_name, loaded_weight in weights_iterator:
733680
loaded_weight_name = loaded_weight_name.replace("deepseek_v3", "model")
734-
print(f"loaded_weight_name:{loaded_weight_name}")
735681
for param_name, weight_name, shard_id in stacked_params_mapping:
736682
if weight_name not in loaded_weight_name:
737683
continue

fastdeploy/worker/gpu_model_runner.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1833,10 +1833,6 @@ def _dummy_run(
18331833
self.forward_meta,
18341834
)
18351835
else:
1836-
print(
1837-
"===RyanDebug #1813 of model runner, the self.share_inputs[ids_remove_padding] is:",
1838-
self.share_inputs["ids_remove_padding"],
1839-
)
18401836
model_output = self.model(
18411837
ids_remove_padding=self.share_inputs["ids_remove_padding"],
18421838
forward_meta=self.forward_meta,

0 commit comments

Comments
 (0)