Skip to content
Draft
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
81ed712
feat: wip - add thinking content to messages
rhys117 May 14, 2025
a47870a
Merge branch 'main' into 154-thinking
rhys117 Jun 6, 2025
b6e1bb0
chore: add thinking to capabilities
rhys117 Jun 6, 2025
ecb69c9
chore: pass thinking through from chat initialisation
rhys117 Jun 6, 2025
a014b77
chore: add very basic config for thinking budget through global confi…
rhys117 Jun 6, 2025
ddb0ae1
bug: fix config missing comma
rhys117 Jun 6, 2025
6d66491
chore: add streaming content
rhys117 Jun 6, 2025
7da672e
chore: rename to use existing reasoning capability
rhys117 Jun 6, 2025
c948b0e
Merge branch 'main' into 154-thinking
rhys117 Jun 22, 2025
6b4fb83
chore: rename to thinking
rhys117 Jun 22, 2025
7ec6733
Get thinking working with bedrock
hiemanshu Jun 27, 2025
8709018
Merge branch 'main' into 154-thinking
crmne Jul 16, 2025
b8fb932
Merge pull request #1 from recitalsoftware/154-thinking
rhys117 Jul 17, 2025
5577bae
chore: update anthropic capabilities with thinking
rhys117 Jul 18, 2025
5c02af2
chore: move temperature setting to param
rhys117 Jul 18, 2025
153440c
chore: use 'thinking' capability instead of reasoning in Model::Info
rhys117 Jul 18, 2025
627ffe0
chore: allow thinking capabilties on assumed models
rhys117 Jul 18, 2025
8a6453d
bug: fix call to check if thinking supported in 'with_thinking'
rhys117 Jul 18, 2025
cc1ce5f
test: add basic spec for anthropic models
rhys117 Jul 18, 2025
87fa6a5
Merge branch 'main' into 154-thinking
rhys117 Jul 18, 2025
06daa1c
bug: ensure render_payload args compatibility across all providers
rhys117 Jul 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions lib/ruby_llm/chat.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class Chat

attr_reader :model, :messages, :tools

def initialize(model: nil, provider: nil, assume_model_exists: false, context: nil)
def initialize(model: nil, provider: nil, assume_model_exists: false, context: nil, thinking: false)
if assume_model_exists && !provider
raise ArgumentError, 'Provider must be specified if assume_model_exists is true'
end
Expand All @@ -22,6 +22,7 @@ def initialize(model: nil, provider: nil, assume_model_exists: false, context: n
@config = context&.config || RubyLLM.config
model_id = model || @config.default_model
with_model(model_id, provider: provider, assume_exists: assume_model_exists)
@thinking = thinking
@temperature = 0.7
@messages = []
@tools = {}
Expand Down Expand Up @@ -60,9 +61,15 @@ def with_tools(*tools)
self
end

def with_model(model_id, provider: nil, assume_exists: false)
def with_model(model_id, provider: nil, thinking: nil, assume_exists: false)
@model, @provider = Models.resolve(model_id, provider:, assume_exists:)
@connection = @context ? @context.connection_for(@provider) : @provider.connection(@config)

# Preserve thinking state from initialization
unless thinking.nil?
@thinking = thinking
end

self
end

Expand Down Expand Up @@ -99,6 +106,7 @@ def complete(&)
tools: @tools,
temperature: @temperature,
model: @model.id,
thinking: @thinking,
connection: @connection,
&
)
Expand Down
2 changes: 2 additions & 0 deletions lib/ruby_llm/configuration.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class Configuration
:default_model,
:default_embedding_model,
:default_image_model,
:default_thinking_budget,
# Connection configuration
:request_timeout,
:max_retries,
Expand All @@ -53,6 +54,7 @@ def initialize
@default_model = 'gpt-4.1-nano'
@default_embedding_model = 'text-embedding-3-small'
@default_image_model = 'dall-e-3'
@default_thinking_budget = 1024

# Logging configuration
@log_file = $stdout
Expand Down
3 changes: 2 additions & 1 deletion lib/ruby_llm/message.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@ module RubyLLM
class Message
ROLES = %i[system user assistant tool].freeze

attr_reader :role, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id
attr_reader :role, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id, :thinking_content

def initialize(options = {})
@role = options.fetch(:role).to_sym
@content = normalize_content(options.fetch(:content))
@thinking_content = options[:thinking_content]
@tool_calls = options[:tool_calls]
@input_tokens = options[:input_tokens]
@output_tokens = options[:output_tokens]
Expand Down
2 changes: 1 addition & 1 deletion lib/ruby_llm/model/info.rb
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def supports?(capability)
capabilities.include?(capability.to_s)
end

%w[function_calling structured_output batch reasoning citations streaming].each do |cap|
%w[function_calling structured_output batch reasoning citations streaming thinking].each do |cap|
define_method "#{cap}?" do
supports?(cap)
end
Expand Down
12 changes: 8 additions & 4 deletions lib/ruby_llm/models.json
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,8 @@
"output": []
},
"capabilities": [
"function_calling"
"function_calling",
"thinking"
],
"pricing": {
"text_tokens": {
Expand Down Expand Up @@ -287,7 +288,8 @@
]
},
"capabilities": [
"function_calling"
"function_calling",
"thinking"
],
"pricing": {
"text_tokens": {
Expand Down Expand Up @@ -319,7 +321,8 @@
]
},
"capabilities": [
"function_calling"
"function_calling",
"thinking"
],
"pricing": {
"text_tokens": {
Expand Down Expand Up @@ -9512,7 +9515,8 @@
},
"capabilities": [
"streaming",
"function_calling"
"function_calling",
"thinking"
],
"pricing": {
"text_tokens": {
Expand Down
3 changes: 2 additions & 1 deletion lib/ruby_llm/provider.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,14 @@ module Provider
module Methods
extend Streaming

def complete(messages, tools:, temperature:, model:, connection:, &)
def complete(messages, tools:, temperature:, model:, thinking:, connection:, &)
normalized_temperature = maybe_normalize_temperature(temperature, model)

payload = render_payload(messages,
tools: tools,
temperature: normalized_temperature,
model: model,
thinking: thinking,
stream: block_given?)

if block_given?
Expand Down
27 changes: 21 additions & 6 deletions lib/ruby_llm/providers/anthropic/chat.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ def completion_url
'/v1/messages'
end

def render_payload(messages, tools:, temperature:, model:, stream: false)
def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false)
system_messages, chat_messages = separate_messages(messages)
system_content = build_system_content(system_messages)

build_base_payload(chat_messages, temperature, model, stream).tap do |payload|
add_optional_fields(payload, system_content:, tools:)
add_optional_fields(payload, system_content:, tools:, thinking:)
end
end

Expand All @@ -39,36 +39,51 @@ def build_base_payload(chat_messages, temperature, model, stream)
{
model: model,
messages: chat_messages.map { |msg| format_message(msg) },
temperature: temperature,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO:

temperature: 1, # TODO: Ensure to maintain this as being configurable - but must be set to 1 to enable thinking
stream: stream,
max_tokens: RubyLLM.models.find(model)&.max_tokens || 4096
}
end

def add_optional_fields(payload, system_content:, tools:)
def add_optional_fields(payload, system_content:, tools:, thinking:)
payload[:tools] = tools.values.map { |t| Tools.function_for(t) } if tools.any?
payload[:system] = system_content unless system_content.empty?
if thinking
payload[:thinking] = {
type: 'enabled',
budget_tokens: RubyLLM.config.default_thinking_budget || 1024,
}
end
end

def parse_completion_response(response)
data = response.body
RubyLLM.logger.debug("Anthropic response: #{data}")

content_blocks = data['content'] || []

thinking_content = extract_thinking_content(content_blocks)
text_content = extract_text_content(content_blocks)
tool_use = Tools.find_tool_use(content_blocks)

build_message(data, text_content, tool_use)
build_message(data, text_content, tool_use, thinking_content)
end

def extract_thinking_content(blocks)
thinking_blocks = blocks.select { |c| c['type'] == 'thinking' }
thinking_blocks.map { |c| c['thinking'] }.join
end

def extract_text_content(blocks)
text_blocks = blocks.select { |c| c['type'] == 'text' }
text_blocks.map { |c| c['text'] }.join
end

def build_message(data, content, tool_use)
def build_message(data, content, tool_use, thinking_content)
Message.new(
role: :assistant,
content: content,
thinking_content: thinking_content,
tool_calls: Tools.parse_tool_calls(tool_use),
input_tokens: data.dig('usage', 'input_tokens'),
output_tokens: data.dig('usage', 'output_tokens'),
Expand Down
1 change: 1 addition & 0 deletions lib/ruby_llm/providers/anthropic/streaming.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def build_chunk(data)
role: :assistant,
model_id: extract_model_id(data),
content: data.dig('delta', 'text'),
thinking_content: data.dig('delta', 'thinking'),
input_tokens: extract_input_tokens(data),
output_tokens: extract_output_tokens(data),
tool_calls: extract_tool_calls(data)
Expand Down
2 changes: 1 addition & 1 deletion lib/ruby_llm/providers/bedrock/chat.rb
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def completion_url
"model/#{@model_id}/invoke"
end

def render_payload(messages, tools:, temperature:, model:, stream: false) # rubocop:disable Lint/UnusedMethodArgument
def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false) # rubocop:disable Lint/UnusedMethodArgument
# Hold model_id in instance variable for use in completion_url and stream_url
@model_id = model

Expand Down
2 changes: 1 addition & 1 deletion lib/ruby_llm/providers/gemini/chat.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def completion_url
"models/#{@model}:generateContent"
end

def render_payload(messages, tools:, temperature:, model:, stream: false) # rubocop:disable Lint/UnusedMethodArgument
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Discarding unused params using '**' is my preference here, but would be keen to hear others' opinions here please

def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false) # rubocop:disable Lint/UnusedMethodArgument
@model = model # Store model for completion_url/stream_url
payload = {
contents: format_messages(messages),
Expand Down
2 changes: 1 addition & 1 deletion lib/ruby_llm/providers/openai/chat.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def completion_url

module_function

def render_payload(messages, tools:, temperature:, model:, stream: false)
def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false)
{
model: model,
messages: format_messages(messages),
Expand Down
3 changes: 3 additions & 0 deletions lib/ruby_llm/stream_accumulator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ class StreamAccumulator

def initialize
@content = String.new
@thinking_content = String.new
@tool_calls = {}
@input_tokens = 0
@output_tokens = 0
Expand All @@ -23,6 +24,7 @@ def add(chunk)
accumulate_tool_calls chunk.tool_calls
else
@content << (chunk.content || '')
@thinking_content << (chunk.thinking_content || '')
end

count_tokens chunk
Expand All @@ -33,6 +35,7 @@ def to_message
Message.new(
role: :assistant,
content: content.empty? ? nil : content,
thinking_content: @thinking_content.empty? ? nil : @thinking_content,
model_id: model_id,
tool_calls: tool_calls_from_stream,
input_tokens: @input_tokens.positive? ? @input_tokens : nil,
Expand Down