crmne · rhys117 · May 14, 2025 · Jun 6, 2025 · Jun 6, 2025 · Jun 6, 2025
diff --git a/lib/ruby_llm/chat.rb b/lib/ruby_llm/chat.rb
@@ -22,6 +22,7 @@ def initialize(model: nil, provider: nil, assume_model_exists: false, context: n
       @config = context&.config || RubyLLM.config
       model_id = model || @config.default_model
       with_model(model_id, provider: provider, assume_exists: assume_model_exists)
+      @reasoning = false
       @temperature = 0.7
       @messages = []
       @tools = {}
@@ -63,6 +64,8 @@ def with_tools(*tools)
     def with_model(model_id, provider: nil, assume_exists: false)
       @model, @provider = Models.resolve(model_id, provider:, assume_exists:)
       @connection = @context ? @context.connection_for(@provider) : @provider.connection(@config)
+      # TODO: Currently the unsupported errors will not retrigger after model reassignment.
+
       self
     end
 
@@ -71,6 +74,15 @@ def with_temperature(temperature)
       self
     end
 
+    def with_reasoning(reasoning = true)
+      if reasoning && [email protected]?
+        raise UnsupportedReasoningError, "Model #{@model.id} doesn't support reasoning"
+      end
+
+      @reasoning = reasoning
+      self
+    end
+
     def with_context(context)
       @context = context
       @config = context.config
@@ -99,6 +111,7 @@ def complete(&)
         tools: @tools,
         temperature: @temperature,
         model: @model.id,
+        reasoning: @reasoning,
         connection: @connection,
         &
       )
@@ -122,6 +135,10 @@ def reset_messages!
       @messages.clear
     end
 
+    def thinking?
+      @thinking
+    end
+
     private
 
     def handle_tool_calls(response, &)

diff --git a/lib/ruby_llm/configuration.rb b/lib/ruby_llm/configuration.rb
@@ -28,6 +28,7 @@ class Configuration
                   :default_model,
                   :default_embedding_model,
                   :default_image_model,
+                  :default_reasoning_budget,
                   # Connection configuration
                   :request_timeout,
                   :max_retries,
@@ -53,6 +54,7 @@ def initialize
       @default_model = 'gpt-4.1-nano'
       @default_embedding_model = 'text-embedding-3-small'
       @default_image_model = 'dall-e-3'
+      @default_reasoning_budget = 1024
 
       # Logging configuration
       @log_file = $stdout

diff --git a/lib/ruby_llm/error.rb b/lib/ruby_llm/error.rb
@@ -25,6 +25,7 @@ class InvalidRoleError < StandardError; end
   class ModelNotFoundError < StandardError; end
   class UnsupportedFunctionsError < StandardError; end
   class UnsupportedAttachmentError < StandardError; end
+  class UnsupportedReasoningError < StandardError; end
 
   # Error classes for different HTTP status codes
   class BadRequestError < Error; end

diff --git a/lib/ruby_llm/message.rb b/lib/ruby_llm/message.rb
@@ -7,11 +7,12 @@ module RubyLLM
   class Message
     ROLES = %i[system user assistant tool].freeze
 
-    attr_reader :role, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id
+    attr_reader :role, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id, :reasoning_content
 
     def initialize(options = {})
       @role = options.fetch(:role).to_sym
       @content = normalize_content(options.fetch(:content))
+      @reasoning_content = options[:reasoning_content]
       @tool_calls = options[:tool_calls]
       @input_tokens = options[:input_tokens]
       @output_tokens = options[:output_tokens]

diff --git a/lib/ruby_llm/models.json b/lib/ruby_llm/models.json
@@ -162,7 +162,8 @@
       "output": []
     },
     "capabilities": [
-      "function_calling"
+      "function_calling",
+      "reasoning"
     ],
     "pricing": {
       "text_tokens": {
@@ -287,7 +288,8 @@
       ]
     },
     "capabilities": [
-      "function_calling"
+      "function_calling",
+      "reasoning"
     ],
     "pricing": {
       "text_tokens": {
@@ -319,7 +321,8 @@
       ]
     },
     "capabilities": [
-      "function_calling"
+      "function_calling",
+      "reasoning"
     ],
     "pricing": {
       "text_tokens": {
@@ -9512,7 +9515,8 @@
     },
     "capabilities": [
       "streaming",
-      "function_calling"
+      "function_calling",
+      "thinking"
     ],
     "pricing": {
       "text_tokens": {

diff --git a/lib/ruby_llm/provider.rb b/lib/ruby_llm/provider.rb
@@ -10,13 +10,14 @@ module Provider
     module Methods
       extend Streaming
 
-      def complete(messages, tools:, temperature:, model:, connection:, &)
+      def complete(messages, tools:, temperature:, model:, reasoning:, connection:, &) # rubocop:disable Metrics/ParameterLists
         normalized_temperature = maybe_normalize_temperature(temperature, model)
 
         payload = render_payload(messages,
                                  tools: tools,
                                  temperature: normalized_temperature,
                                  model: model,
+                                 reasoning: reasoning,
                                  stream: block_given?)
 
         if block_given?

diff --git a/lib/ruby_llm/providers/anthropic/chat.rb b/lib/ruby_llm/providers/anthropic/chat.rb
@@ -11,12 +11,12 @@ def completion_url
           '/v1/messages'
         end
 
-        def render_payload(messages, tools:, temperature:, model:, stream: false)
+        def render_payload(messages, tools:, temperature:, model:, reasoning:, stream: false) # rubocop:disable Metrics/ParameterLists
           system_messages, chat_messages = separate_messages(messages)
           system_content = build_system_content(system_messages)
 
           build_base_payload(chat_messages, temperature, model, stream).tap do |payload|
-            add_optional_fields(payload, system_content:, tools:)
+            add_optional_fields(payload, system_content:, tools:, reasoning:)
           end
         end
 
@@ -45,30 +45,45 @@ def build_base_payload(chat_messages, temperature, model, stream)
           }
         end
 
-        def add_optional_fields(payload, system_content:, tools:)
+        def add_optional_fields(payload, system_content:, tools:, reasoning:)
           payload[:tools] = tools.values.map { |t| Tools.function_for(t) } if tools.any?
           payload[:system] = system_content unless system_content.empty?
+          return unless reasoning
+
+          payload[:thinking] = {
+            type: 'enabled',
+            budget_tokens: RubyLLM.config.default_reasoning_budget || 1024
+          }
         end
 
         def parse_completion_response(response)
           data = response.body
+          RubyLLM.logger.debug("Anthropic response: #{data}")
+
           content_blocks = data['content'] || []
 
+          reasoning_content = extract_reasoning_content(content_blocks)
           text_content = extract_text_content(content_blocks)
           tool_use = Tools.find_tool_use(content_blocks)
 
-          build_message(data, text_content, tool_use)
+          build_message(data, text_content, tool_use, reasoning_content)
+        end
+
+        def extract_reasoning_content(blocks)
+          thinking_blocks = blocks.select { |c| c['type'] == 'thinking' }
+          thinking_blocks.map { |c| c['thinking'] }.join
         end
 
         def extract_text_content(blocks)
           text_blocks = blocks.select { |c| c['type'] == 'text' }
           text_blocks.map { |c| c['text'] }.join
         end
 
-        def build_message(data, content, tool_use)
+        def build_message(data, content, tool_use, reasoning_content)
           Message.new(
             role: :assistant,
             content: content,
+            reasoning_content: reasoning_content,
             tool_calls: Tools.parse_tool_calls(tool_use),
             input_tokens: data.dig('usage', 'input_tokens'),
             output_tokens: data.dig('usage', 'output_tokens'),

diff --git a/lib/ruby_llm/providers/anthropic/streaming.rb b/lib/ruby_llm/providers/anthropic/streaming.rb
@@ -16,6 +16,7 @@ def build_chunk(data)
             role: :assistant,
             model_id: extract_model_id(data),
             content: data.dig('delta', 'text'),
+            reasoning_content: data.dig('delta', 'thinking'),
             input_tokens: extract_input_tokens(data),
             output_tokens: extract_output_tokens(data),
             tool_calls: extract_tool_calls(data)

diff --git a/lib/ruby_llm/providers/bedrock/chat.rb b/lib/ruby_llm/providers/bedrock/chat.rb
@@ -39,7 +39,7 @@ def completion_url
           "model/#{@model_id}/invoke"
         end
 
-        def render_payload(messages, tools:, temperature:, model:, stream: false) # rubocop:disable Lint/UnusedMethodArgument
+        def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
           # Hold model_id in instance variable for use in completion_url and stream_url
           @model_id = model
 

diff --git a/lib/ruby_llm/providers/gemini/chat.rb b/lib/ruby_llm/providers/gemini/chat.rb
@@ -11,7 +11,7 @@ def completion_url
           "models/#{@model}:generateContent"
         end
 
-        def render_payload(messages, tools:, temperature:, model:, stream: false) # rubocop:disable Lint/UnusedMethodArgument
+        def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
           @model = model # Store model for completion_url/stream_url
           payload = {
             contents: format_messages(messages),

diff --git a/lib/ruby_llm/providers/openai/chat.rb b/lib/ruby_llm/providers/openai/chat.rb
@@ -11,7 +11,7 @@ def completion_url
 
         module_function
 
-        def render_payload(messages, tools:, temperature:, model:, stream: false)
+        def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
           {
             model: model,
             messages: format_messages(messages),

diff --git a/lib/ruby_llm/stream_accumulator.rb b/lib/ruby_llm/stream_accumulator.rb
@@ -9,6 +9,7 @@ class StreamAccumulator
 
     def initialize
       @content = String.new
+      @reasoning_content = String.new
       @tool_calls = {}
       @input_tokens = 0
       @output_tokens = 0
@@ -23,6 +24,7 @@ def add(chunk)
         accumulate_tool_calls chunk.tool_calls
       else
         @content << (chunk.content || '')
+        @reasoning_content << (chunk.reasoning_content || '')
       end
 
       count_tokens chunk
@@ -33,6 +35,7 @@ def to_message
       Message.new(
         role: :assistant,
         content: content.empty? ? nil : content,
+        reasoning_content: @reasoning_content.empty? ? nil : @reasoning_content,
         model_id: model_id,
         tool_calls: tool_calls_from_stream,
         input_tokens: @input_tokens.positive? ? @input_tokens : nil,