crmne · rhys117 · May 14, 2025 · Jun 6, 2025 · Jun 6, 2025 · Jun 6, 2025
diff --git a/lib/ruby_llm/chat.rb b/lib/ruby_llm/chat.rb
@@ -13,7 +13,7 @@ class Chat
 
     attr_reader :model, :messages, :tools
 
-    def initialize(model: nil, provider: nil, assume_model_exists: false, context: nil)
+    def initialize(model: nil, provider: nil, assume_model_exists: false, context: nil, thinking: false)
       if assume_model_exists && !provider
         raise ArgumentError, 'Provider must be specified if assume_model_exists is true'
       end
@@ -22,6 +22,7 @@ def initialize(model: nil, provider: nil, assume_model_exists: false, context: n
       @config = context&.config || RubyLLM.config
       model_id = model || @config.default_model
       with_model(model_id, provider: provider, assume_exists: assume_model_exists)
+      @thinking = thinking
       @temperature = 0.7
       @messages = []
       @tools = {}
@@ -60,9 +61,15 @@ def with_tools(*tools)
       self
     end
 
-    def with_model(model_id, provider: nil, assume_exists: false)
+    def with_model(model_id, provider: nil, thinking: nil, assume_exists: false)
       @model, @provider = Models.resolve(model_id, provider:, assume_exists:)
       @connection = @context ? @context.connection_for(@provider) : @provider.connection(@config)
+
+      # Preserve thinking state from initialization
+      unless thinking.nil?
+        @thinking = thinking
+      end
+
       self
     end
 
@@ -99,6 +106,7 @@ def complete(&)
         tools: @tools,
         temperature: @temperature,
         model: @model.id,
+        thinking: @thinking,
         connection: @connection,
         &
       )

diff --git a/lib/ruby_llm/configuration.rb b/lib/ruby_llm/configuration.rb
@@ -28,6 +28,7 @@ class Configuration
                   :default_model,
                   :default_embedding_model,
                   :default_image_model,
+                  :default_thinking_budget,
                   # Connection configuration
                   :request_timeout,
                   :max_retries,
@@ -53,6 +54,7 @@ def initialize
       @default_model = 'gpt-4.1-nano'
       @default_embedding_model = 'text-embedding-3-small'
       @default_image_model = 'dall-e-3'
+      @default_thinking_budget = 1024
 
       # Logging configuration
       @log_file = $stdout

diff --git a/lib/ruby_llm/message.rb b/lib/ruby_llm/message.rb
@@ -7,11 +7,12 @@ module RubyLLM
   class Message
     ROLES = %i[system user assistant tool].freeze
 
-    attr_reader :role, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id
+    attr_reader :role, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id, :thinking_content
 
     def initialize(options = {})
       @role = options.fetch(:role).to_sym
       @content = normalize_content(options.fetch(:content))
+      @thinking_content = options[:thinking_content]
       @tool_calls = options[:tool_calls]
       @input_tokens = options[:input_tokens]
       @output_tokens = options[:output_tokens]

diff --git a/lib/ruby_llm/model/info.rb b/lib/ruby_llm/model/info.rb
@@ -35,7 +35,7 @@ def supports?(capability)
         capabilities.include?(capability.to_s)
       end
 
-      %w[function_calling structured_output batch reasoning citations streaming].each do |cap|
+      %w[function_calling structured_output batch reasoning citations streaming thinking].each do |cap|
         define_method "#{cap}?" do
           supports?(cap)
         end

diff --git a/lib/ruby_llm/models.json b/lib/ruby_llm/models.json
@@ -162,7 +162,8 @@
       "output": []
     },
     "capabilities": [
-      "function_calling"
+      "function_calling",
+      "thinking"
     ],
     "pricing": {
       "text_tokens": {
@@ -287,7 +288,8 @@
       ]
     },
     "capabilities": [
-      "function_calling"
+      "function_calling",
+      "thinking"
     ],
     "pricing": {
       "text_tokens": {
@@ -319,7 +321,8 @@
       ]
     },
     "capabilities": [
-      "function_calling"
+      "function_calling",
+      "thinking"
     ],
     "pricing": {
       "text_tokens": {
@@ -9512,7 +9515,8 @@
     },
     "capabilities": [
       "streaming",
-      "function_calling"
+      "function_calling",
+      "thinking"
     ],
     "pricing": {
       "text_tokens": {

diff --git a/lib/ruby_llm/provider.rb b/lib/ruby_llm/provider.rb
@@ -10,13 +10,14 @@ module Provider
     module Methods
       extend Streaming
 
-      def complete(messages, tools:, temperature:, model:, connection:, &)
+      def complete(messages, tools:, temperature:, model:, thinking:, connection:, &)
         normalized_temperature = maybe_normalize_temperature(temperature, model)
 
         payload = render_payload(messages,
                                  tools: tools,
                                  temperature: normalized_temperature,
                                  model: model,
+                                 thinking: thinking,
                                  stream: block_given?)
 
         if block_given?

diff --git a/lib/ruby_llm/providers/anthropic/chat.rb b/lib/ruby_llm/providers/anthropic/chat.rb
@@ -11,12 +11,12 @@ def completion_url
           '/v1/messages'
         end
 
-        def render_payload(messages, tools:, temperature:, model:, stream: false)
+        def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false)
           system_messages, chat_messages = separate_messages(messages)
           system_content = build_system_content(system_messages)
 
           build_base_payload(chat_messages, temperature, model, stream).tap do |payload|
-            add_optional_fields(payload, system_content:, tools:)
+            add_optional_fields(payload, system_content:, tools:, thinking:)
           end
         end
 
@@ -39,36 +39,51 @@ def build_base_payload(chat_messages, temperature, model, stream)
           {
             model: model,
             messages: chat_messages.map { |msg| format_message(msg) },
-            temperature: temperature,
+            temperature: 1, # TODO: Ensure to maintain this as being configurable - but must be set to 1 to enable thinking
             stream: stream,
             max_tokens: RubyLLM.models.find(model)&.max_tokens || 4096
           }
         end
 
-        def add_optional_fields(payload, system_content:, tools:)
+        def add_optional_fields(payload, system_content:, tools:, thinking:)
           payload[:tools] = tools.values.map { |t| Tools.function_for(t) } if tools.any?
           payload[:system] = system_content unless system_content.empty?
+          if thinking
+            payload[:thinking] = {
+              type: 'enabled',
+              budget_tokens: RubyLLM.config.default_thinking_budget || 1024,
+            }
+          end
         end
 
         def parse_completion_response(response)
           data = response.body
+          RubyLLM.logger.debug("Anthropic response: #{data}")
+
           content_blocks = data['content'] || []
 
+          thinking_content = extract_thinking_content(content_blocks)
           text_content = extract_text_content(content_blocks)
           tool_use = Tools.find_tool_use(content_blocks)
 
-          build_message(data, text_content, tool_use)
+          build_message(data, text_content, tool_use, thinking_content)
+        end
+
+        def extract_thinking_content(blocks)
+          thinking_blocks = blocks.select { |c| c['type'] == 'thinking' }
+          thinking_blocks.map { |c| c['thinking'] }.join
         end
 
         def extract_text_content(blocks)
           text_blocks = blocks.select { |c| c['type'] == 'text' }
           text_blocks.map { |c| c['text'] }.join
         end
 
-        def build_message(data, content, tool_use)
+        def build_message(data, content, tool_use, thinking_content)
           Message.new(
             role: :assistant,
             content: content,
+            thinking_content: thinking_content,
             tool_calls: Tools.parse_tool_calls(tool_use),
             input_tokens: data.dig('usage', 'input_tokens'),
             output_tokens: data.dig('usage', 'output_tokens'),

diff --git a/lib/ruby_llm/providers/anthropic/streaming.rb b/lib/ruby_llm/providers/anthropic/streaming.rb
@@ -16,6 +16,7 @@ def build_chunk(data)
             role: :assistant,
             model_id: extract_model_id(data),
             content: data.dig('delta', 'text'),
+            thinking_content: data.dig('delta', 'thinking'),
             input_tokens: extract_input_tokens(data),
             output_tokens: extract_output_tokens(data),
             tool_calls: extract_tool_calls(data)

diff --git a/lib/ruby_llm/providers/bedrock/chat.rb b/lib/ruby_llm/providers/bedrock/chat.rb
@@ -39,7 +39,7 @@ def completion_url
           "model/#{@model_id}/invoke"
         end
 
-        def render_payload(messages, tools:, temperature:, model:, stream: false) # rubocop:disable Lint/UnusedMethodArgument
+        def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false) # rubocop:disable Lint/UnusedMethodArgument
           # Hold model_id in instance variable for use in completion_url and stream_url
           @model_id = model
 

diff --git a/lib/ruby_llm/providers/gemini/chat.rb b/lib/ruby_llm/providers/gemini/chat.rb
@@ -11,7 +11,7 @@ def completion_url
           "models/#{@model}:generateContent"
         end
 
-        def render_payload(messages, tools:, temperature:, model:, stream: false) # rubocop:disable Lint/UnusedMethodArgument
+        def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false) # rubocop:disable Lint/UnusedMethodArgument
           @model = model # Store model for completion_url/stream_url
           payload = {
             contents: format_messages(messages),

diff --git a/lib/ruby_llm/providers/openai/chat.rb b/lib/ruby_llm/providers/openai/chat.rb
@@ -11,7 +11,7 @@ def completion_url
 
         module_function
 
-        def render_payload(messages, tools:, temperature:, model:, stream: false)
+        def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false)
           {
             model: model,
             messages: format_messages(messages),

diff --git a/lib/ruby_llm/stream_accumulator.rb b/lib/ruby_llm/stream_accumulator.rb
@@ -9,6 +9,7 @@ class StreamAccumulator
 
     def initialize
       @content = String.new
+      @thinking_content = String.new
       @tool_calls = {}
       @input_tokens = 0
       @output_tokens = 0
@@ -23,6 +24,7 @@ def add(chunk)
         accumulate_tool_calls chunk.tool_calls
       else
         @content << (chunk.content || '')
+        @thinking_content << (chunk.thinking_content || '')
       end
 
       count_tokens chunk
@@ -33,6 +35,7 @@ def to_message
       Message.new(
         role: :assistant,
         content: content.empty? ? nil : content,
+        thinking_content: @thinking_content.empty? ? nil : @thinking_content,
         model_id: model_id,
         tool_calls: tool_calls_from_stream,
         input_tokens: @input_tokens.positive? ? @input_tokens : nil,