Skip to content

Commit 99d2730

Browse files
committed
Fix BadRequestError due to unvalid max_tokens
This patch ensures if max tokens is not defined it is set to None. This avoid some providers to fail, as they don't have protection for it being set to 0 Issue: #3666
1 parent 4dfbe46 commit 99d2730

File tree

8 files changed

+3
-7
lines changed

8 files changed

+3
-7
lines changed

docs/static/deprecated-llama-stack-spec.html

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4218,7 +4218,6 @@
42184218
},
42194219
"max_tokens": {
42204220
"type": "integer",
4221-
"default": 0,
42224221
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
42234222
},
42244223
"repetition_penalty": {

docs/static/deprecated-llama-stack-spec.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3068,7 +3068,6 @@ components:
30683068
description: The sampling strategy.
30693069
max_tokens:
30703070
type: integer
3071-
default: 0
30723071
description: >-
30733072
The maximum number of tokens that can be generated in the completion.
30743073
The token count of your prompt plus max_tokens cannot exceed the model's

docs/static/experimental-llama-stack-spec.html

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2713,7 +2713,6 @@
27132713
},
27142714
"max_tokens": {
27152715
"type": "integer",
2716-
"default": 0,
27172716
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
27182717
},
27192718
"repetition_penalty": {

docs/static/experimental-llama-stack-spec.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1927,7 +1927,6 @@ components:
19271927
description: The sampling strategy.
19281928
max_tokens:
19291929
type: integer
1930-
default: 0
19311930
description: >-
19321931
The maximum number of tokens that can be generated in the completion.
19331932
The token count of your prompt plus max_tokens cannot exceed the model's

docs/static/stainless-llama-stack-spec.html

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14753,7 +14753,6 @@
1475314753
},
1475414754
"max_tokens": {
1475514755
"type": "integer",
14756-
"default": 0,
1475714756
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
1475814757
},
1475914758
"repetition_penalty": {

docs/static/stainless-llama-stack-spec.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10909,7 +10909,6 @@ components:
1090910909
description: The sampling strategy.
1091010910
max_tokens:
1091110911
type: integer
10912-
default: 0
1091310912
description: >-
1091410913
The maximum number of tokens that can be generated in the completion.
1091510914
The token count of your prompt plus max_tokens cannot exceed the model's

llama_stack/apis/inference/inference.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ class SamplingParams(BaseModel):
9696

9797
strategy: SamplingStrategy = Field(default_factory=GreedySamplingStrategy)
9898

99-
max_tokens: int | None = 0
99+
max_tokens: int | None = None
100100
repetition_penalty: float | None = 1.0
101101
stop: list[str] | None = None
102102

tests/integration/eval/test_eval.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ def test_evaluate_rows(llama_stack_client, text_model_id, scoring_fn_id):
5555
"model": text_model_id,
5656
"sampling_params": {
5757
"temperature": 0.0,
58+
"max_tokens": 512,
5859
},
5960
},
6061
},
@@ -88,6 +89,7 @@ def test_evaluate_benchmark(llama_stack_client, text_model_id, scoring_fn_id):
8889
"model": text_model_id,
8990
"sampling_params": {
9091
"temperature": 0.0,
92+
"max_tokens": 128,
9193
},
9294
},
9395
},

0 commit comments

Comments
 (0)