File tree Expand file tree Collapse file tree 1 file changed +2
-7
lines changed Expand file tree Collapse file tree 1 file changed +2
-7
lines changed Original file line number Diff line number Diff line change @@ -20,11 +20,10 @@ def _update_after_schedule(
20
20
super ()._update_after_schedule (scheduler_output )
21
21
for req_id in scheduler_output .num_scheduled_tokens :
22
22
request = self .requests [req_id ]
23
- if (request .num_computed_tokens == request .num_tokens +
23
+ if (request .num_computed_tokens == request .num_tokens_with_spec +
24
24
request .num_output_placeholders ):
25
25
# The request will generate a new token in this scheduling step.
26
- # TODO(woosuk): Support speculative decoding.
27
- request .num_output_placeholders += 1
26
+ request .num_output_placeholders = 1 + len (request .spec_token_ids )
28
27
29
28
def _update_request_with_output (
30
29
self ,
@@ -35,10 +34,6 @@ def _update_request_with_output(
35
34
new_token_ids , stopped = super ()._update_request_with_output (
36
35
request , new_token_ids )
37
36
38
- # Update the number of output placeholders.
39
- request .num_output_placeholders -= len (new_token_ids )
40
- assert request .num_output_placeholders >= 0
41
-
42
37
# Cache the new tokens. Preempted requests should be skipped.
43
38
if status_before_update == RequestStatus .RUNNING :
44
39
self .kv_cache_manager .cache_blocks (
You can’t perform that action at this time.
0 commit comments