Skip to content

Commit 7c1a09e

Browse files
author
K11OntheBoat
committed
clean some log info
1 parent 4a11a68 commit 7c1a09e

File tree

9 files changed

+3
-90
lines changed

9 files changed

+3
-90
lines changed

examples/splitwise/stop.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
pkill -9 -f python
22
pkill -9 -f fastdeploy
33
pkill -9 -f gunicorn
4+
# Kill redis-server if you need.
45
#pkill -9 -f redis-server
56

67
sleep 1

fastdeploy/cache_manager/cache_messager.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,6 @@ def __init__(
204204

205205
elif protocol == "rdma":
206206
logger.info(f"splitwise_role rdma: {self.splitwise_role}, rank: {self.rank}, gpu_id: {gpu_id}")
207-
logger.info(f"====RyanDebug, the cache_v_ptr_list is:{cache_v_ptr_list}")
208207
self.messager[protocol] = RDMACommManager(
209208
splitwise_role,
210209
rank,
@@ -217,7 +216,6 @@ def __init__(
217216
nranks,
218217
rank,
219218
)
220-
logger.info("===RyanDebug, #218 Finish RDMACommManager create!!!!!!!")
221219

222220
self.gpu_id = gpu_id
223221
self.cache_info = dict()
@@ -825,7 +823,6 @@ def main():
825823
num_extra_layers = speculative_config.num_extra_cache_layer
826824
key_cache_shape_list = [int(i) for i in args.key_cache_shape.split(",")]
827825
value_cache_shape_list = []
828-
print("===RyanDebug #786 of cache_messager,the args.value_cache_shape is:", args.value_cache_shape)
829826
if args.value_cache_shape:
830827
value_cache_shape_list = [int(i) for i in args.value_cache_shape.split(",")]
831828
total_gpu_blocks = key_cache_shape_list[0]

fastdeploy/cache_manager/transfer_factory/kvcache_transfer/include/kvcache_rdma.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ class RDMACommunicator {
149149
struct ibv_pd* g_pd = NULL; // fd
150150
int RDMACommunicator_status; // Communicator status flag
151151
bool start_client_listener = false; // Client listener flag
152-
bool has_value_cache_; // MLA doest not have value cache.
152+
bool has_value_cache_; // MLA does not have value cache.
153153
};
154154

155155
#endif // KVCACHE_RDMA_H

fastdeploy/cache_manager/transfer_factory/kvcache_transfer/src/kvcache_rdma.cpp

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -70,17 +70,13 @@ RDMACommunicator::RDMACommunicator(std::string& role,
7070

7171
// Step 1: Initialize KV cache config
7272
KVCacheConfig::getInstance().displayConfiguration();
73-
printf(
74-
"====RyanDebugRDMA, Finish #69 KVCacheConfig::getInstance(). ===== "
75-
"\n");
7673

7774
// Step 2: Initialize KV cache structure
7875
// Validate and set number of layers
7976
layer_number = static_cast<int>(local_cache_key_ptr_layer_head_.size());
8077
if (layer_number <= 0) {
8178
throw std::runtime_error("Invalid layer number");
8279
}
83-
printf("====RyanDebugRDMA, Finish #77 layer. ===== \n");
8480

8581
if (local_cache_value_ptr_layer_head_.empty()) {
8682
has_value_cache_ = false;
@@ -94,20 +90,17 @@ RDMACommunicator::RDMACommunicator(std::string& role,
9490
}
9591
}
9692

97-
printf("====RyanDebugRDMA, Finish #91 layer. ===== \n");
9893
// Step 2: Setup cache vectors and pointers
9994
resize_vectors();
10095
assign_pointers();
10196

102-
printf("====RyanDebugRDMA, Finish #97 layer. ===== \n");
10397
// Step 3:Initialize the event channel
10498
rdma_event_channel_epoll_fd = epoll_create1(EPOLL_CLOEXEC);
10599
if (rdma_event_channel_epoll_fd < 0) {
106100
throw std::runtime_error("Failed to create epoll fd: " +
107101
std::string(strerror(errno)));
108102
}
109103

110-
printf("====RyanDebugRDMA, Finish #105 layer. ===== \n");
111104
// Start the server thread (if in decode role)
112105
if (splitwise_role == "decode") {
113106
std::thread server_thread([this]() {
@@ -119,7 +112,6 @@ RDMACommunicator::RDMACommunicator(std::string& role,
119112
});
120113
server_thread.detach();
121114
}
122-
printf("====RyanDebugRDMA, Finish #117 layer. ===== \n");
123115
RDMACommunicator_status = 1;
124116
INFO("RDMA communicator initialized successfully");
125117
} catch (const std::exception& e) {
@@ -884,10 +876,6 @@ bool RDMACommunicator::server_mr_register_per_layer(RdmaContext* ctx) {
884876
}
885877
}
886878

887-
// 【修复点】:无论是否有 Value Cache,都要赋值给 ctx->conn
888-
// 如果没有 Value Cache,write_cache_value_server_mr_list
889-
// 是空的,赋值过去也是空的,这是安全的。 如果不赋值,ctx->conn 里的 vector
890-
// 可能是未定义的脏状态。
891879
ctx->conn.write_cache_key_server_mr_list = write_cache_key_server_mr_list;
892880
ctx->conn.write_cache_value_server_mr_list = write_cache_value_server_mr_list;
893881

@@ -972,7 +960,7 @@ int RDMACommunicator::write_cache(const std::string& ip,
972960
cache_key_remote_addr[block_index] = (uint64_t(
973961
char_ptr + remote_block_ids[block_index] * total_block_size_byte +
974962
offset_in_block));
975-
963+
976964
if (has_value_cache_) {
977965
char_ptr = static_cast<char*>(
978966
ctx->conn.write_cache_value_remote_ptr_list[layer_idx]);

fastdeploy/cache_manager/transfer_factory/rdma_cache_transfer.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,16 +40,10 @@ def __init__(
4040
try:
4141
import rdma_comm
4242
except:
43-
logger.error(
44-
"The installation of the RDMA library failed."
45-
"Confirm whether your network card supports RDMA transmission."
46-
)
4743
raise RuntimeError(
4844
"The installation of the RDMA library failed."
4945
"Confirm whether your network card supports RDMA transmission."
5046
)
51-
logger.info(f" # 499999999 init rdma messager {gpu_id} {rdma_port}")
52-
logger.info(f" # == RyanDebug, Decode, the cache_v_ptr_list is: {cache_v_ptr_list}")
5347
self.messager = rdma_comm.RDMACommunicator(
5448
splitwise_role,
5549
gpu_id,

fastdeploy/config.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -307,9 +307,6 @@ def override_name_from_config(self):
307307
if hasattr(self, "n_routed_experts") and getattr(self, "moe_num_experts") is None:
308308
self.moe_num_experts = self.n_routed_experts
309309

310-
if hasattr(self, "n_routed_experts") and getattr(self, "moe_num_experts") is None:
311-
self.moe_num_experts = self.n_routed_experts
312-
313310
def read_from_env(self):
314311
"""
315312
Read configuration information from environment variables and update the object's attributes.

fastdeploy/model_executor/layers/attention/mla_attention_backend.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -205,8 +205,6 @@ def init_attention_metadata(self, forward_meta: ForwardMeta):
205205
self.group_size,
206206
self.block_size,
207207
)
208-
print("===RyanDebug, after ini attn meta, the max_len_tensor_cpu[1] is:", forward_meta.max_len_tensor_cpu[1])
209-
print("===RyanDebug, after ini attn meta, the max_len_tensor_cpu[2] is:", forward_meta.max_len_tensor_cpu[2])
210208
# MLA
211209
metadata.max_enc_len_this_time = forward_meta.max_len_tensor_cpu[1]
212210
metadata.max_dec_len_this_time = forward_meta.max_len_tensor_cpu[2]
@@ -428,10 +426,6 @@ def forward_mixed(
428426
"none",
429427
self.max_seq_len,
430428
)
431-
print(
432-
"====After write cache, the metadata.kv_signal_data_list[layer.layer_id] is:",
433-
metadata.kv_signal_data_list[layer.layer_id],
434-
)
435429
# FA
436430
fmha_out = self.flash_attn_func(
437431
q,

fastdeploy/model_executor/models/deepseek_v3.py

Lines changed: 0 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -340,17 +340,7 @@ def forward(
340340
fmha_out = None
341341

342342
# NOTE: (changwenbin) qkv_a_proj horizontal fusion
343-
paddle.device.synchronize()
344-
print("==RyanDebug, the hidden_states is:", hidden_states) # 这是一个输入,我们假设它没问题,但也可以加上检查
345-
print("==RyanDebug, hidden_states contains NaN:", paddle.any(paddle.isnan(hidden_states)).item())
346-
347343
qkv_a_out = self.qkv_a_proj_with_mqa(hidden_states)
348-
paddle.device.synchronize()
349-
350-
# --- NaN Check Start ---
351-
print("===RyanDebug, the qkv_a_out is:", qkv_a_out)
352-
print(" >>> RyanDebug, qkv_a_out contains NaN:", paddle.any(paddle.isnan(qkv_a_out)).item())
353-
# --- NaN Check End ---
354344

355345
query, compressed_kv, key_pe = qkv_a_out.split(
356346
[self.q_lora_rank, self.kv_lora_rank, self.qk_rope_head_dim], axis=-1
@@ -363,13 +353,10 @@ def forward(
363353

364354
key_pe.reshape_([-1, 1, self.qk_rope_head_dim])
365355
query_pe, key_pe = self.rotary_emb(position_ids, query_pe, key_pe)
366-
paddle.device.synchronize()
367356

368357
compressed_kv = self.kv_a_layernorm(compressed_kv)[0]
369358

370-
print("===RyanDebug, in #370, forward_meta.max_len_tensor_cpu[1] is:", forward_meta.max_len_tensor_cpu[1])
371359
if forward_meta.max_len_tensor_cpu[1]: # max_enc_len_this_time
372-
print("===RyanDebug, in #372, forward_meta.max_len_tensor_cpu[1] is:", forward_meta.max_len_tensor_cpu[1])
373360
key_value = self.kv_b_proj(compressed_kv)
374361
key_value.reshape_(
375362
[
@@ -402,12 +389,8 @@ def forward(
402389
fmha_out_prefill = fmha_out_prefill * mask_encoder_batch.cast(fmha_out_prefill.dtype)
403390

404391
fmha_out = fmha_out_prefill
405-
print("====RYanDebug, #404, fmha_out after MLA is: ", fmha_out)
406392

407393
if forward_meta.max_len_tensor_cpu[2]: # max_dec_len_this_time
408-
print("===RyanDebug, D in dsv3 !!!!=====")
409-
paddle.device.synchronize()
410-
411394
q_nope_out = self.kv_b_proj_bmm(query_nope.transpose([1, 0, 2]), proj_type="k").transpose([1, 0, 2])
412395

413396
q_input = paddle.concat([q_nope_out, query_pe], axis=-1)
@@ -418,18 +401,6 @@ def forward(
418401
]
419402
)
420403

421-
print("===RyanDebug, the q_input # 435 is:", q_input)
422-
print(" >>> RyanDebug, q_input # 435 contains NaN:", paddle.any(paddle.isnan(q_input)).item())
423-
424-
print("===RyanDebug, the compressed_kv # 435 is:", compressed_kv)
425-
print(
426-
" >>> RyanDebug, compressed_kv # 435 contains NaN:", paddle.any(paddle.isnan(compressed_kv)).item()
427-
)
428-
429-
print("===RyanDebug, the key_pe # 435 is:", q_input)
430-
print(" >>> RyanDebug, key_pe # 435 contains NaN:", paddle.any(paddle.isnan(key_pe)).item())
431-
432-
paddle.device.synchronize()
433404
fmha_out_decode = self.mla_attn(
434405
q=q_input,
435406
k=None,
@@ -439,39 +410,23 @@ def forward(
439410
k_pe=key_pe,
440411
forward_meta=forward_meta,
441412
)
442-
paddle.device.synchronize()
443-
# --- NaN Check Start ---
444-
print("===RyanDebug, the fmha_out_decode # 448 is:", fmha_out_decode)
445-
print(
446-
" >>> RyanDebug, fmha_out_decode # 448 contains NaN:",
447-
paddle.any(paddle.isnan(fmha_out_decode)).item(),
448-
)
449413

450414
fmha_out_decode = fmha_out_decode.reshape([-1, self.num_attention_heads_tp, self.kv_lora_rank]).transpose(
451415
[1, 0, 2]
452416
)
453417

454-
paddle.device.synchronize()
455-
456418
fmha_out_decode = (
457419
self.kv_b_proj_bmm(fmha_out_decode, proj_type="v")
458420
.transpose([1, 0, 2])
459421
.reshape([-1, self.num_attention_heads_tp * self.v_head_dim])
460422
)
461423

462-
# --- NaN Check Start ---
463-
print("===RyanDebug, the fmha_out_decode is:", fmha_out_decode)
464-
print(" >>> RyanDebug, fmha_out_decode contains NaN:", paddle.any(paddle.isnan(fmha_out_decode)).item())
465-
# --- NaN Check End ---
466-
467-
paddle.device.synchronize()
468424
if fmha_out is None:
469425
fmha_out = fmha_out_decode
470426
else:
471427
fmha_out = fmha_out + fmha_out_decode
472428

473429
output = self.o_proj(fmha_out)
474-
paddle.device.synchronize()
475430
return output
476431

477432
def load_state_dict(self, state_dict):
@@ -559,19 +514,11 @@ def forward(
559514
hidden_states, residual_input=residual, forward_meta=forward_meta
560515
)
561516

562-
print("===RyanDebug, the hidden_states before self_attn is :", hidden_states)
563517
hidden_states = self.self_attn(forward_meta, hidden_states, position_ids, mask_encoder_batch)
564518

565-
print("==RyanDebug, #563 hidden_states contains NaN:", paddle.any(paddle.isnan(hidden_states)).item())
566-
567519
hidden_states, residual = self.post_attention_layernorm(hidden_states, residual)
568-
print("==RyanDebug, #566 hidden_states contains NaN:", paddle.any(paddle.isnan(hidden_states)).item())
569520
hidden_states = self.mlp(hidden_states)
570521

571-
print("===RyanDebug, the hidden_states after mlp is :", hidden_states)
572-
print(
573-
"==RyanDebug, #570 hidden_states after mlp contains NaN:", paddle.any(paddle.isnan(hidden_states)).item()
574-
)
575522
return hidden_states, residual
576523

577524

@@ -731,7 +678,6 @@ def load_weights(self, weights_iterator) -> None:
731678
process_weights_after_loading_fn = process_weights_after_loading(dict(self.named_sublayers()), self.fd_config)
732679
for loaded_weight_name, loaded_weight in weights_iterator:
733680
loaded_weight_name = loaded_weight_name.replace("deepseek_v3", "model")
734-
print(f"loaded_weight_name:{loaded_weight_name}")
735681
for param_name, weight_name, shard_id in stacked_params_mapping:
736682
if weight_name not in loaded_weight_name:
737683
continue

fastdeploy/worker/gpu_model_runner.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1889,10 +1889,6 @@ def _dummy_run(
18891889
self.forward_meta,
18901890
)
18911891
else:
1892-
print(
1893-
"===RyanDebug #1813 of model runner, the self.share_inputs[ids_remove_padding] is:",
1894-
self.share_inputs["ids_remove_padding"],
1895-
)
18961892
model_output = self.model(
18971893
self.forward_meta.ids_remove_padding,
18981894
self.forward_meta,

0 commit comments

Comments
 (0)