diff --git a/docs/guides/chat.md b/docs/guides/chat.md index 06a413835..9f5fdfc0d 100644 --- a/docs/guides/chat.md +++ b/docs/guides/chat.md @@ -487,6 +487,17 @@ end chat.ask "What is metaprogramming in Ruby?" ``` +## Raw Responses + +You can access the raw response from the API provider with `response.raw`. + +```ruby +response = chat.ask("What is the capital of France?") +puts response.raw.body +``` + +The raw response is a `Faraday::Response` object, which you can use to access the headers, body, and status code. + ## Next Steps This guide covered the core `Chat` interface. Now you might want to explore: diff --git a/lib/ruby_llm/message.rb b/lib/ruby_llm/message.rb index e98482964..5fb804c27 100644 --- a/lib/ruby_llm/message.rb +++ b/lib/ruby_llm/message.rb @@ -7,7 +7,7 @@ module RubyLLM class Message ROLES = %i[system user assistant tool].freeze - attr_reader :role, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id + attr_reader :role, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id, :raw attr_writer :content def initialize(options = {}) @@ -18,6 +18,7 @@ def initialize(options = {}) @output_tokens = options[:output_tokens] @model_id = options[:model_id] @tool_call_id = options[:tool_call_id] + @raw = options[:raw] ensure_valid_role end diff --git a/lib/ruby_llm/providers/anthropic/chat.rb b/lib/ruby_llm/providers/anthropic/chat.rb index dc9be086d..f29cf4b20 100644 --- a/lib/ruby_llm/providers/anthropic/chat.rb +++ b/lib/ruby_llm/providers/anthropic/chat.rb @@ -57,7 +57,7 @@ def parse_completion_response(response) text_content = extract_text_content(content_blocks) tool_use_blocks = Tools.find_tool_uses(content_blocks) - build_message(data, text_content, tool_use_blocks) + build_message(data, text_content, tool_use_blocks, response) end def extract_text_content(blocks) @@ -65,14 +65,15 @@ def extract_text_content(blocks) text_blocks.map { |c| c['text'] }.join end - def build_message(data, content, tool_use_blocks) + def build_message(data, content, tool_use_blocks, response) Message.new( role: :assistant, content: content, tool_calls: Tools.parse_tool_calls(tool_use_blocks), input_tokens: data.dig('usage', 'input_tokens'), output_tokens: data.dig('usage', 'output_tokens'), - model_id: data['model'] + model_id: data['model'], + raw: response ) end diff --git a/lib/ruby_llm/providers/bedrock/streaming/base.rb b/lib/ruby_llm/providers/bedrock/streaming/base.rb index 26860076e..c2ab2ea19 100644 --- a/lib/ruby_llm/providers/bedrock/streaming/base.rb +++ b/lib/ruby_llm/providers/bedrock/streaming/base.rb @@ -34,7 +34,7 @@ def stream_response(connection, payload, &block) payload:) accumulator = StreamAccumulator.new - connection.post stream_url, payload do |req| + response = connection.post stream_url, payload do |req| req.headers.merge! build_headers(signature.headers, streaming: block_given?) req.options.on_data = handle_stream do |chunk| accumulator.add chunk @@ -42,7 +42,7 @@ def stream_response(connection, payload, &block) end end - accumulator.to_message + accumulator.to_message(response) end def handle_stream(&block) diff --git a/lib/ruby_llm/providers/gemini/chat.rb b/lib/ruby_llm/providers/gemini/chat.rb index 080cbb8d8..5e9b3e7c8 100644 --- a/lib/ruby_llm/providers/gemini/chat.rb +++ b/lib/ruby_llm/providers/gemini/chat.rb @@ -81,7 +81,8 @@ def parse_completion_response(response) tool_calls: tool_calls, input_tokens: data.dig('usageMetadata', 'promptTokenCount'), output_tokens: data.dig('usageMetadata', 'candidatesTokenCount'), - model_id: data['modelVersion'] || response.env.url.path.split('/')[3].split(':')[0] + model_id: data['modelVersion'] || response.env.url.path.split('/')[3].split(':')[0], + raw: response ) end diff --git a/lib/ruby_llm/providers/openai/chat.rb b/lib/ruby_llm/providers/openai/chat.rb index 17e9d41a9..593a93caf 100644 --- a/lib/ruby_llm/providers/openai/chat.rb +++ b/lib/ruby_llm/providers/openai/chat.rb @@ -59,7 +59,8 @@ def parse_completion_response(response) tool_calls: parse_tool_calls(message_data['tool_calls']), input_tokens: data['usage']['prompt_tokens'], output_tokens: data['usage']['completion_tokens'], - model_id: data['model'] + model_id: data['model'], + raw: response ) end diff --git a/lib/ruby_llm/stream_accumulator.rb b/lib/ruby_llm/stream_accumulator.rb index 7fca306a2..a2e2e8c91 100644 --- a/lib/ruby_llm/stream_accumulator.rb +++ b/lib/ruby_llm/stream_accumulator.rb @@ -29,14 +29,15 @@ def add(chunk) RubyLLM.logger.debug inspect end - def to_message + def to_message(response) Message.new( role: :assistant, content: content.empty? ? nil : content, model_id: model_id, tool_calls: tool_calls_from_stream, input_tokens: @input_tokens.positive? ? @input_tokens : nil, - output_tokens: @output_tokens.positive? ? @output_tokens : nil + output_tokens: @output_tokens.positive? ? @output_tokens : nil, + raw: response ) end diff --git a/lib/ruby_llm/streaming.rb b/lib/ruby_llm/streaming.rb index b7017896e..30501a375 100644 --- a/lib/ruby_llm/streaming.rb +++ b/lib/ruby_llm/streaming.rb @@ -11,7 +11,7 @@ module Streaming def stream_response(connection, payload, &block) accumulator = StreamAccumulator.new - connection.post stream_url, payload do |req| + response = connection.post stream_url, payload do |req| if req.options.respond_to?(:on_data) # Handle Faraday 2.x streaming with on_data method req.options.on_data = handle_stream do |chunk| @@ -27,7 +27,7 @@ def stream_response(connection, payload, &block) end end - accumulator.to_message + accumulator.to_message(response) end def handle_stream(&block) diff --git a/spec/fixtures/vcr_cassettes/chat_basic_chat_functionality_anthropic_claude-3-5-haiku-20241022_returns_raw_responses.yml b/spec/fixtures/vcr_cassettes/chat_basic_chat_functionality_anthropic_claude-3-5-haiku-20241022_returns_raw_responses.yml new file mode 100644 index 000000000..26a2ada32 --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_basic_chat_functionality_anthropic_claude-3-5-haiku-20241022_returns_raw_responses.yml @@ -0,0 +1,80 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"What''s + 2 + 2?"}]}],"temperature":0.7,"stream":false,"max_tokens":8192}' + headers: + User-Agent: + - Faraday v2.13.2 + X-Api-Key: + - "" + Anthropic-Version: + - '2023-06-01' + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 17 Jul 2025 09:51:10 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Anthropic-Ratelimit-Input-Tokens-Limit: + - '100000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '100000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2025-07-17T09:51:10Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '20000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '20000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2025-07-17T09:51:10Z' + Anthropic-Ratelimit-Requests-Limit: + - '1000' + Anthropic-Ratelimit-Requests-Remaining: + - '999' + Anthropic-Ratelimit-Requests-Reset: + - '2025-07-17T09:51:09Z' + Anthropic-Ratelimit-Tokens-Limit: + - '120000' + Anthropic-Ratelimit-Tokens-Remaining: + - '120000' + Anthropic-Ratelimit-Tokens-Reset: + - '2025-07-17T09:51:10Z' + Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - 0137b15c-16bf-490d-9f90-8cfd7e325ec0 + Via: + - 1.1 google + Cf-Cache-Status: + - DYNAMIC + X-Robots-Tag: + - none + Server: + - cloudflare + Cf-Ray: + - "" + body: + encoding: ASCII-8BIT + string: '{"id":"msg_01MTKfyBFRd3hXEsnbHsDkkE","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"4"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":16,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":5,"service_tier":"standard"}}' + recorded_at: Thu, 17 Jul 2025 09:51:10 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_basic_chat_functionality_bedrock_anthropic_claude-3-5-haiku-20241022-v1_0_returns_raw_responses.yml b/spec/fixtures/vcr_cassettes/chat_basic_chat_functionality_bedrock_anthropic_claude-3-5-haiku-20241022-v1_0_returns_raw_responses.yml new file mode 100644 index 000000000..f9139502c --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_basic_chat_functionality_bedrock_anthropic_claude-3-5-haiku-20241022-v1_0_returns_raw_responses.yml @@ -0,0 +1,53 @@ +--- +http_interactions: +- request: + method: post + uri: https://bedrock-runtime..amazonaws.com/model/anthropic.claude-3-5-haiku-20241022-v1:0/invoke + body: + encoding: UTF-8 + string: '{"anthropic_version":"bedrock-2023-05-31","messages":[{"role":"user","content":[{"type":"text","text":"What''s + 2 + 2?"}]}],"temperature":0.7,"max_tokens":4096}' + headers: + User-Agent: + - Faraday v2.13.2 + Host: + - bedrock-runtime..amazonaws.com + X-Amz-Date: + - 20250717T095124Z + X-Amz-Content-Sha256: + - c9959b2464b5ac20dc26dcc5555c64745b5fb1d651b2c68adbb1e8b276b03a01 + Authorization: + - AWS4-HMAC-SHA256 Credential=/20250717//bedrock/aws4_request, + SignedHeaders=host;x-amz-content-sha256;x-amz-date, Signature=cfece360cd51e292db2325d26d284144a742b319b7f02364c58db247185c884f + Content-Type: + - application/json + Accept: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 17 Jul 2025 09:51:25 GMT + Content-Type: + - application/json + Content-Length: + - '305' + Connection: + - keep-alive + X-Amzn-Requestid: + - 1af8cbca-119c-40c7-a4f6-e80d0404df78 + X-Amzn-Bedrock-Invocation-Latency: + - '996' + X-Amzn-Bedrock-Output-Token-Count: + - '5' + X-Amzn-Bedrock-Input-Token-Count: + - '16' + body: + encoding: UTF-8 + string: '{"id":"msg_bdrk_01Gt3PNni688hT3aFxASoTth","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"4"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":16,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":5}}' + recorded_at: Thu, 17 Jul 2025 09:51:25 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_basic_chat_functionality_deepseek_deepseek-chat_returns_raw_responses.yml b/spec/fixtures/vcr_cassettes/chat_basic_chat_functionality_deepseek_deepseek-chat_returns_raw_responses.yml new file mode 100644 index 000000000..cb14fb482 --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_basic_chat_functionality_deepseek_deepseek-chat_returns_raw_responses.yml @@ -0,0 +1,57 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.deepseek.com/chat/completions + body: + encoding: UTF-8 + string: '{"model":"deepseek-chat","messages":[{"role":"user","content":"What''s + 2 + 2?"}],"stream":false,"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.2 + Authorization: + - Bearer + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 17 Jul 2025 09:51:54 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Vary: + - origin, access-control-request-method, access-control-request-headers + Access-Control-Allow-Credentials: + - 'true' + X-Ds-Trace-Id: + - 5b664ee5c8ff6597624823bc9ee80b7b + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + Cf-Cache-Status: + - DYNAMIC + Set-Cookie: + - "" + Server: + - cloudflare + Cf-Ray: + - "" + body: + encoding: ASCII-8BIT + string: '{"id":"344a47f5-689a-4d25-b190-8cd129a2a93e","object":"chat.completion","created":1752745914,"model":"deepseek-chat","choices":[{"index":0,"message":{"role":"assistant","content":"The + sum of 2 + 2 is **4**. \n\nLet me know if you''d like help with anything else!"},"logprobs":null,"finish_reason":"stop"}],"usage":{"prompt_tokens":11,"completion_tokens":25,"total_tokens":36,"prompt_tokens_details":{"cached_tokens":0},"prompt_cache_hit_tokens":0,"prompt_cache_miss_tokens":11},"system_fingerprint":"fp_8802369eaa_prod0623_fp8_kvcache"}' + recorded_at: Thu, 17 Jul 2025 09:52:00 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_basic_chat_functionality_gemini_gemini-2_0-flash_returns_raw_responses.yml b/spec/fixtures/vcr_cassettes/chat_basic_chat_functionality_gemini_gemini-2_0-flash_returns_raw_responses.yml new file mode 100644 index 000000000..7dcde607f --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_basic_chat_functionality_gemini_gemini-2_0-flash_returns_raw_responses.yml @@ -0,0 +1,86 @@ +--- +http_interactions: +- request: + method: post + uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent + body: + encoding: UTF-8 + string: '{"contents":[{"role":"user","parts":[{"text":"What''s 2 + 2?"}]}],"generationConfig":{"temperature":0.7}}' + headers: + User-Agent: + - Faraday v2.13.2 + X-Goog-Api-Key: + - "" + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Content-Type: + - application/json; charset=UTF-8 + Vary: + - Origin + - Referer + - X-Origin + Date: + - Thu, 17 Jul 2025 09:51:45 GMT + Server: + - scaffolding on HTTPServer2 + X-Xss-Protection: + - '0' + X-Frame-Options: + - SAMEORIGIN + X-Content-Type-Options: + - nosniff + Server-Timing: + - gfet4t7; dur=303 + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Transfer-Encoding: + - chunked + body: + encoding: ASCII-8BIT + string: | + { + "candidates": [ + { + "content": { + "parts": [ + { + "text": "2 + 2 = 4\n" + } + ], + "role": "model" + }, + "finishReason": "STOP", + "avgLogprobs": -7.8827462857589126e-05 + } + ], + "usageMetadata": { + "promptTokenCount": 9, + "candidatesTokenCount": 8, + "totalTokenCount": 17, + "promptTokensDetails": [ + { + "modality": "TEXT", + "tokenCount": 9 + } + ], + "candidatesTokensDetails": [ + { + "modality": "TEXT", + "tokenCount": 8 + } + ] + }, + "modelVersion": "gemini-2.0-flash", + "responseId": "scd4aOrZB-WwmNAP2Zq_-QQ" + } + recorded_at: Thu, 17 Jul 2025 09:51:45 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_basic_chat_functionality_gpustack_qwen3_returns_raw_responses.yml b/spec/fixtures/vcr_cassettes/chat_basic_chat_functionality_gpustack_qwen3_returns_raw_responses.yml new file mode 100644 index 000000000..2c92490d8 --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_basic_chat_functionality_gpustack_qwen3_returns_raw_responses.yml @@ -0,0 +1,49 @@ +--- +http_interactions: +- request: + method: post + uri: "/chat/completions" + body: + encoding: UTF-8 + string: '{"model":"qwen3","messages":[{"role":"user","content":"What''s 2 + + 2?"}],"stream":false,"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + Authorization: + - Bearer + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Fri, 18 Jul 2025 16:32:10 GMT + - Fri, 18 Jul 2025 16:32:11 GMT + Server: + - uvicorn + - uvicorn, llama-box/v0.0.154 + Keep-Alive: + - timeout=15, max=100 + Content-Type: + - application/json + Content-Length: + - '669' + X-Request-Accepted-At: + - "" + Access-Control-Allow-Origin: + - "*" + X-Request-Id: + - "" + body: + encoding: UTF-8 + string: '{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"\n\n2 + + 2 equals **4**.","role":"assistant"}}],"created":1752856339,"id":"chatcmpl-nmYRr4McYECKspTiCk9yzXjjiDB6WnVC","model":"qwen3","object":"chat.completion","usage":{"completion_tokens":181,"completion_tokens_details":{"accepted_prediction_tokens":0,"reasoning_tokens":168,"rejected_prediction_tokens":0},"draft_tokens":0,"draft_tokens_acceptance":0.0,"prompt_tokens":16,"prompt_tokens_details":{"cached_tokens":4},"prompt_tokens_per_second":36.919356587913065,"time_per_output_token_ms":41.89909944751381,"time_to_first_token_ms":433.377,"tokens_per_second":23.866861416739532,"total_tokens":197}}' + recorded_at: Fri, 18 Jul 2025 16:32:19 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_basic_chat_functionality_ollama_qwen3_returns_raw_responses.yml b/spec/fixtures/vcr_cassettes/chat_basic_chat_functionality_ollama_qwen3_returns_raw_responses.yml new file mode 100644 index 000000000..7a1131e13 --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_basic_chat_functionality_ollama_qwen3_returns_raw_responses.yml @@ -0,0 +1,35 @@ +--- +http_interactions: +- request: + method: post + uri: "/chat/completions" + body: + encoding: UTF-8 + string: '{"model":"qwen3","messages":[{"role":"user","content":"What''s 2 + + 2?"}],"stream":false,"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.2 + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Content-Type: + - application/json + Date: + - Thu, 17 Jul 2025 09:54:01 GMT + Content-Length: + - '1919' + body: + encoding: ASCII-8BIT + string: !binary |- + eyJpZCI6ImNoYXRjbXBsLTgwNyIsIm9iamVjdCI6ImNoYXQuY29tcGxldGlvbiIsImNyZWF0ZWQiOjE3NTI3NDYwNDEsIm1vZGVsIjoicXdlbjMiLCJzeXN0ZW1fZmluZ2VycHJpbnQiOiJmcF9vbGxhbWEiLCJjaG9pY2VzIjpbeyJpbmRleCI6MCwibWVzc2FnZSI6eyJyb2xlIjoiYXNzaXN0YW50IiwiY29udGVudCI6Ilx1MDAzY3RoaW5rXHUwMDNlXG5Pa2F5LCBzbyB0aGUgdXNlciBhc2tlZCwgXCJXaGF0J3MgMiArIDI/XCIgVGhhdCBzZWVtcyBzdHJhaWdodGZvcndhcmQsIGJ1dCBtYXliZSB0aGV5J3JlIHRlc3RpbmcgaWYgSSBjYW4gaGFuZGxlIHNpbXBsZSBhcml0aG1ldGljLiBMZXQgbWUgdGhpbmsuIDIgcGx1cyAyIGlzIGEgYmFzaWMgbWF0aCBwcm9ibGVtLiBJIGtub3cgdGhhdCAyICsgMiBlcXVhbHMgNC4gQnV0IHdhaXQsIG1heWJlIHRoZXkgd2FudCBhIG1vcmUgZGV0YWlsZWQgZXhwbGFuYXRpb24/IExldCBtZSBjaGVjayBpZiB0aGVyZSdzIGFueSB0cmljayBvciBjb250ZXh0IGhlcmUuIFRoZSBxdWVzdGlvbiBpcyBwcmV0dHkgZGlyZWN0LCBzbyBwcm9iYWJseSBqdXN0IHRoZSBhbnN3ZXIgaXMgbmVlZGVkLiBIb3dldmVyLCBzb21ldGltZXMgcGVvcGxlIG1pZ2h0IHBocmFzZSBxdWVzdGlvbnMgaW4gYSB3YXkgdGhhdCdzIGEgYml0IG1vcmUgY29tcGxleC4gTGV0IG1lIG1ha2Ugc3VyZSB0aGF0IHRoZXJlJ3Mgbm8gaGlkZGVuIG1lYW5pbmcuIEZvciBleGFtcGxlLCBjb3VsZCBpdCBiZSBhIHRyaWNrIHF1ZXN0aW9uIHdoZXJlIHRoZSBhbnN3ZXIgaXNuJ3QgND8gTGlrZSBpbiBzb21lIGNvbnRleHRzLCBtYXliZSBpbiBkaWZmZXJlbnQgbnVtYmVyIGJhc2VzPyBCdXQgdGhlIHF1ZXN0aW9uIGRvZXNuJ3Qgc3BlY2lmeSBhIGJhc2UuIElmIGl0J3MgYmFzZSAxMCwgdGhlbiB5ZXMsIDIgKyAyID0gNC4gSWYgaXQncyBhbm90aGVyIGJhc2UsIGxpa2UgYmFzZSAzLCB0aGVuIDIgKyAyIHdvdWxkIGJlIDExIChzaW5jZSAyIGluIGJhc2UgMyBpcyAyIGluIGRlY2ltYWwsIHNvIDIrMj00LCB3aGljaCBpcyAxMSBpbiBiYXNlIDMpLiBCdXQgdGhlIHVzZXIgZGlkbid0IG1lbnRpb24gYW55IGJhc2UsIHNvIEkgc2hvdWxkIGFzc3VtZSBiYXNlIDEwLiBBbHNvLCBpbiBzb21lIGNvbnRleHRzLCBsaWtlIGluIHByb2dyYW1taW5nIG9yIHNwZWNpZmljIGZpZWxkcywgdGhlcmUgbWlnaHQgYmUgZGlmZmVyZW50IGludGVycHJldGF0aW9ucywgYnV0IGFnYWluLCB0aGUgcXVlc3Rpb24gaXMgdmVyeSBiYXNpYy4gSSB0aGluayB0aGUgdXNlciBqdXN0IHdhbnRzIHRoZSBzaW1wbGUgYW5zd2VyLiBTbyB0byBjb25maXJtLCAyICsgMiBpcyA0LiBMZXQgbWUgbWFrZSBzdXJlIEknbSBub3QgbWlzc2luZyBhbnl0aGluZy4gTWF5YmUgdGhleSB3YW50ZWQgdG8gc2VlIGlmIEkgY2FuIHJlY29nbml6ZSBhIHNpbXBsZSBxdWVzdGlvbiBhbmQgcmVzcG9uZCBhcHByb3ByaWF0ZWx5LiBZZWFoLCBJIHRoaW5rIHRoYXQncyBpdC4gVGhlIGFuc3dlciBpcyA0LlxuXHUwMDNjL3RoaW5rXHUwMDNlXG5cblRoZSBzdW0gb2YgMiBhbmQgMiBpcyAqKjQqKi4gXG5cbkluIGJhc2ljIGFyaXRobWV0aWMsIGFkZGluZyB0d28gbnVtYmVycyB0b2dldGhlciBtZWFucyBjb21iaW5pbmcgdGhlaXIgdmFsdWVzLiBTbywgMiArIDIgZXF1YWxzIDQuIElmIHlvdSdyZSBhc2tpbmcgZm9yIGEgbW9yZSBkZXRhaWxlZCBleHBsYW5hdGlvbiBvciBoYXZlIGEgc3BlY2lmaWMgY29udGV4dCBpbiBtaW5kIChsaWtlIGRpZmZlcmVudCBudW1iZXIgYmFzZXMgb3Igb3BlcmF0aW9ucyksIGZlZWwgZnJlZSB0byBjbGFyaWZ5ISDwn5iKIn0sImZpbmlzaF9yZWFzb24iOiJzdG9wIn1dLCJ1c2FnZSI6eyJwcm9tcHRfdG9rZW5zIjoxNiwiY29tcGxldGlvbl90b2tlbnMiOjQxOCwidG90YWxfdG9rZW5zIjo0MzR9fQo= + recorded_at: Thu, 17 Jul 2025 09:54:01 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_basic_chat_functionality_openai_gpt-4_1-nano_returns_raw_responses.yml b/spec/fixtures/vcr_cassettes/chat_basic_chat_functionality_openai_gpt-4_1-nano_returns_raw_responses.yml new file mode 100644 index 000000000..cd158390c --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_basic_chat_functionality_openai_gpt-4_1-nano_returns_raw_responses.yml @@ -0,0 +1,115 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.openai.com/v1/chat/completions + body: + encoding: UTF-8 + string: '{"model":"gpt-4.1-nano","messages":[{"role":"user","content":"What''s + 2 + 2?"}],"stream":false,"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.2 + Authorization: + - Bearer + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 17 Jul 2025 09:52:56 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Access-Control-Expose-Headers: + - X-Request-ID + Openai-Organization: + - "" + Openai-Processing-Ms: + - '259' + Openai-Project: + - proj_KyS64Yhsc9qhhwjNcgkOa88E + Openai-Version: + - '2020-10-01' + X-Envoy-Upstream-Service-Time: + - '267' + X-Ratelimit-Limit-Requests: + - '500' + X-Ratelimit-Limit-Tokens: + - '200000' + X-Ratelimit-Remaining-Requests: + - '499' + X-Ratelimit-Remaining-Tokens: + - '199993' + X-Ratelimit-Reset-Requests: + - 120ms + X-Ratelimit-Reset-Tokens: + - 1ms + X-Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Cf-Cache-Status: + - DYNAMIC + Set-Cookie: + - "" + - "" + X-Content-Type-Options: + - nosniff + Server: + - cloudflare + Cf-Ray: + - "" + Alt-Svc: + - h3=":443"; ma=86400 + body: + encoding: ASCII-8BIT + string: | + { + "id": "chatcmpl-BuFaJnHZt6QmtFUrVzg5Ta1cpPEBp", + "object": "chat.completion", + "created": 1752745975, + "model": "gpt-4.1-nano-2025-04-14", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "2 + 2 equals 4.", + "refusal": null, + "annotations": [] + }, + "logprobs": null, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 14, + "completion_tokens": 8, + "total_tokens": 22, + "prompt_tokens_details": { + "cached_tokens": 0, + "audio_tokens": 0 + }, + "completion_tokens_details": { + "reasoning_tokens": 0, + "audio_tokens": 0, + "accepted_prediction_tokens": 0, + "rejected_prediction_tokens": 0 + } + }, + "service_tier": "default", + "system_fingerprint": null + } + recorded_at: Thu, 17 Jul 2025 09:52:56 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_basic_chat_functionality_openrouter_anthropic_claude-3_5-haiku_returns_raw_responses.yml b/spec/fixtures/vcr_cassettes/chat_basic_chat_functionality_openrouter_anthropic_claude-3_5-haiku_returns_raw_responses.yml new file mode 100644 index 000000000..d7ca568dd --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_basic_chat_functionality_openrouter_anthropic_claude-3_5-haiku_returns_raw_responses.yml @@ -0,0 +1,53 @@ +--- +http_interactions: +- request: + method: post + uri: https://openrouter.ai/api/v1/chat/completions + body: + encoding: UTF-8 + string: '{"model":"anthropic/claude-3.5-haiku","messages":[{"role":"user","content":"What''s + 2 + 2?"}],"stream":false,"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.2 + Authorization: + - Bearer + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 17 Jul 2025 09:53:04 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Access-Control-Allow-Origin: + - "*" + Vary: + - Accept-Encoding + Permissions-Policy: + - payment=(self "https://checkout.stripe.com" "https://connect-js.stripe.com" + "https://js.stripe.com" "https://*.js.stripe.com" "https://hooks.stripe.com") + Referrer-Policy: + - no-referrer, strict-origin-when-cross-origin + X-Content-Type-Options: + - nosniff + Server: + - cloudflare + Cf-Ray: + - "" + body: + encoding: ASCII-8BIT + string: "\n \n{\"id\":\"gen-1752745984-II6cU8xU6l9CO2zMxeT5\",\"provider\":\"Google\",\"model\":\"anthropic/claude-3.5-haiku\",\"object\":\"chat.completion\",\"created\":1752745984,\"choices\":[{\"logprobs\":null,\"finish_reason\":\"stop\",\"native_finish_reason\":\"stop\",\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"4\",\"refusal\":null,\"reasoning\":null}}],\"usage\":{\"prompt_tokens\":16,\"completion_tokens\":5,\"total_tokens\":21}}" + recorded_at: Thu, 17 Jul 2025 09:53:04 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/ruby_llm/chat_spec.rb b/spec/ruby_llm/chat_spec.rb index 4962da633..2612dcd1e 100644 --- a/spec/ruby_llm/chat_spec.rb +++ b/spec/ruby_llm/chat_spec.rb @@ -19,6 +19,17 @@ expect(response.output_tokens).to be_positive end + it "#{provider}/#{model} returns raw responses" do # rubocop:disable RSpec/ExampleLength,RSpec/MultipleExpectations + chat = RubyLLM.chat(model: model, provider: provider) + response = chat.ask('What is the capital of France?') + expect(response.raw).to be_present + expect(response.raw.headers).to be_present + expect(response.raw.body).to be_present + expect(response.raw.status).to be_present + expect(response.raw.status).to eq(200) + expect(response.raw.env.request_body).to be_present + end + it "#{provider}/#{model} can handle multi-turn conversations" do # rubocop:disable RSpec/MultipleExpectations chat = RubyLLM.chat(model: model, provider: provider) diff --git a/spec/ruby_llm/chat_streaming_spec.rb b/spec/ruby_llm/chat_streaming_spec.rb index 8ea5f6143..a2de23ad5 100644 --- a/spec/ruby_llm/chat_streaming_spec.rb +++ b/spec/ruby_llm/chat_streaming_spec.rb @@ -13,12 +13,18 @@ chat = RubyLLM.chat(model: model, provider: provider) chunks = [] - chat.ask('Count from 1 to 3') do |chunk| + response = chat.ask('Count from 1 to 3') do |chunk| chunks << chunk end expect(chunks).not_to be_empty expect(chunks.first).to be_a(RubyLLM::Chunk) + expect(response.raw).to be_present + expect(response.raw.headers).to be_present + expect(response.raw.status).to be_present + expect(response.raw.status).to eq(200) + expect(response.raw.env.request_body).to be_present + puts response.raw.env.request_body end it "#{provider}/#{model} reports consistent token counts compared to non-streaming" do # rubocop:disable RSpec/ExampleLength,RSpec/MultipleExpectations