diff --git a/lib/ruby_llm/provider.rb b/lib/ruby_llm/provider.rb index e8402fa2..22ac8e4b 100644 --- a/lib/ruby_llm/provider.rb +++ b/lib/ruby_llm/provider.rb @@ -14,7 +14,6 @@ def complete(messages, tools:, temperature:, model:, connection:, params: {}, sc normalized_temperature = maybe_normalize_temperature(temperature, model) payload = Utils.deep_merge( - params, render_payload( messages, tools: tools, @@ -22,9 +21,12 @@ def complete(messages, tools:, temperature:, model:, connection:, params: {}, sc model: model, stream: block_given?, schema: schema - ) + ), + params ) + payload.compact! + if block_given? stream_response connection, payload, & else diff --git a/lib/ruby_llm/utils.rb b/lib/ruby_llm/utils.rb index df281c92..b8ac18e0 100644 --- a/lib/ruby_llm/utils.rb +++ b/lib/ruby_llm/utils.rb @@ -24,12 +24,12 @@ def to_safe_array(item) end end - def deep_merge(params, payload) - params.merge(payload) do |_key, params_value, payload_value| - if params_value.is_a?(Hash) && payload_value.is_a?(Hash) - deep_merge(params_value, payload_value) + def deep_merge(hash1, hash2) + hash1.merge(hash2) do |_key, value1, value2| + if value1.is_a?(Hash) && value2.is_a?(Hash) + deep_merge(value1, value2) else - payload_value + value2 end end end diff --git a/spec/fixtures/vcr_cassettes/chat_with_params_anthropic_claude-3-5-haiku-20241022_can_override_max_tokens_param_with_a_custom_value.yml b/spec/fixtures/vcr_cassettes/chat_with_params_anthropic_claude-3-5-haiku-20241022_can_override_max_tokens_param_with_a_custom_value.yml new file mode 100644 index 00000000..b40cdf91 --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_with_params_anthropic_claude-3-5-haiku-20241022_can_override_max_tokens_param_with_a_custom_value.yml @@ -0,0 +1,81 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"Always + answer with \"Once upon a time\""}]}],"temperature":0.7,"stream":false,"max_tokens":2}' + headers: + User-Agent: + - Faraday v2.13.4 + X-Api-Key: + - "" + Anthropic-Version: + - '2023-06-01' + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Mon, 28 Jul 2025 16:36:30 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Anthropic-Ratelimit-Input-Tokens-Limit: + - '100000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '100000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2025-07-28T16:36:30Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '20000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '20000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2025-07-28T16:36:30Z' + Anthropic-Ratelimit-Requests-Limit: + - '1000' + Anthropic-Ratelimit-Requests-Remaining: + - '999' + Anthropic-Ratelimit-Requests-Reset: + - '2025-07-28T16:36:30Z' + Anthropic-Ratelimit-Tokens-Limit: + - '120000' + Anthropic-Ratelimit-Tokens-Remaining: + - '120000' + Anthropic-Ratelimit-Tokens-Reset: + - '2025-07-28T16:36:30Z' + Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - "" + Via: + - 1.1 google + Cf-Cache-Status: + - DYNAMIC + X-Robots-Tag: + - none + Server: + - cloudflare + Cf-Ray: + - "" + body: + encoding: ASCII-8BIT + string: '{"id":"msg_01RsFPoB8wheUAgoR2SQ6HkY","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"Once + upon"}],"stop_reason":"max_tokens","stop_sequence":null,"usage":{"input_tokens":16,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":2,"service_tier":"standard"}}' + recorded_at: Mon, 28 Jul 2025 16:36:30 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/ruby_llm/chat_request_options_spec.rb b/spec/ruby_llm/chat_request_options_spec.rb index 07a3f87b..efa9e8f0 100644 --- a/spec/ruby_llm/chat_request_options_spec.rb +++ b/spec/ruby_llm/chat_request_options_spec.rb @@ -78,6 +78,21 @@ json_response = JSON.parse('{' + response.content) # rubocop:disable Style/StringConcatenation expect(json_response).to eq({ 'result' => 8 }) end + + it "#{provider}/#{model} can override max_tokens param with a custom value" do # rubocop:disable RSpec/ExampleLength + chat = RubyLLM + .chat(model: model, provider: provider) + .with_params(max_tokens: 2) + + chat.add_message( + role: :user, + content: 'Always answer with "Once upon a time"' + ) + + response = chat.complete + + expect(response.content).to eq('Once upon') # Only 2 tokens + end end # Providers [:openrouter, :bedrock] supports a {top_k: ...} param to remove low-probability next tokens.