Skip to content
Draft
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
81ed712
feat: wip - add thinking content to messages
rhys117 May 14, 2025
a47870a
Merge branch 'main' into 154-thinking
rhys117 Jun 6, 2025
b6e1bb0
chore: add thinking to capabilities
rhys117 Jun 6, 2025
ecb69c9
chore: pass thinking through from chat initialisation
rhys117 Jun 6, 2025
a014b77
chore: add very basic config for thinking budget through global confi…
rhys117 Jun 6, 2025
ddb0ae1
bug: fix config missing comma
rhys117 Jun 6, 2025
6d66491
chore: add streaming content
rhys117 Jun 6, 2025
7da672e
chore: rename to use existing reasoning capability
rhys117 Jun 6, 2025
c948b0e
Merge branch 'main' into 154-thinking
rhys117 Jun 22, 2025
6b4fb83
chore: rename to thinking
rhys117 Jun 22, 2025
7ec6733
Get thinking working with bedrock
hiemanshu Jun 27, 2025
8709018
Merge branch 'main' into 154-thinking
crmne Jul 16, 2025
b8fb932
Merge pull request #1 from recitalsoftware/154-thinking
rhys117 Jul 17, 2025
5577bae
chore: update anthropic capabilities with thinking
rhys117 Jul 18, 2025
5c02af2
chore: move temperature setting to param
rhys117 Jul 18, 2025
153440c
chore: use 'thinking' capability instead of reasoning in Model::Info
rhys117 Jul 18, 2025
627ffe0
chore: allow thinking capabilties on assumed models
rhys117 Jul 18, 2025
8a6453d
bug: fix call to check if thinking supported in 'with_thinking'
rhys117 Jul 18, 2025
cc1ce5f
test: add basic spec for anthropic models
rhys117 Jul 18, 2025
87fa6a5
Merge branch 'main' into 154-thinking
rhys117 Jul 18, 2025
06daa1c
bug: ensure render_payload args compatibility across all providers
rhys117 Jul 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions lib/ruby_llm/chat.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def initialize(model: nil, provider: nil, assume_model_exists: false, context: n
@config = context&.config || RubyLLM.config
model_id = model || @config.default_model
with_model(model_id, provider: provider, assume_exists: assume_model_exists)
@reasoning = false
@temperature = 0.7
@messages = []
@tools = {}
Expand Down Expand Up @@ -63,6 +64,8 @@ def with_tools(*tools)
def with_model(model_id, provider: nil, assume_exists: false)
@model, @provider = Models.resolve(model_id, provider:, assume_exists:)
@connection = @context ? @context.connection_for(@provider) : @provider.connection(@config)
# TODO: Currently the unsupported errors will not retrigger after model reassignment.

self
end

Expand All @@ -71,6 +74,15 @@ def with_temperature(temperature)
self
end

def with_reasoning(reasoning = true)
if reasoning && [email protected]?
raise UnsupportedReasoningError, "Model #{@model.id} doesn't support reasoning"
end

@reasoning = reasoning
self
end

def with_context(context)
@context = context
@config = context.config
Expand Down Expand Up @@ -99,6 +111,7 @@ def complete(&)
tools: @tools,
temperature: @temperature,
model: @model.id,
reasoning: @reasoning,
connection: @connection,
&
)
Expand All @@ -122,6 +135,10 @@ def reset_messages!
@messages.clear
end

def thinking?
@thinking
end

private

def handle_tool_calls(response, &)
Expand Down
2 changes: 2 additions & 0 deletions lib/ruby_llm/configuration.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class Configuration
:default_model,
:default_embedding_model,
:default_image_model,
:default_reasoning_budget,
# Connection configuration
:request_timeout,
:max_retries,
Expand All @@ -53,6 +54,7 @@ def initialize
@default_model = 'gpt-4.1-nano'
@default_embedding_model = 'text-embedding-3-small'
@default_image_model = 'dall-e-3'
@default_reasoning_budget = 1024

# Logging configuration
@log_file = $stdout
Expand Down
1 change: 1 addition & 0 deletions lib/ruby_llm/error.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ class InvalidRoleError < StandardError; end
class ModelNotFoundError < StandardError; end
class UnsupportedFunctionsError < StandardError; end
class UnsupportedAttachmentError < StandardError; end
class UnsupportedReasoningError < StandardError; end

# Error classes for different HTTP status codes
class BadRequestError < Error; end
Expand Down
3 changes: 2 additions & 1 deletion lib/ruby_llm/message.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@ module RubyLLM
class Message
ROLES = %i[system user assistant tool].freeze

attr_reader :role, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id
attr_reader :role, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id, :reasoning_content

def initialize(options = {})
@role = options.fetch(:role).to_sym
@content = normalize_content(options.fetch(:content))
@reasoning_content = options[:reasoning_content]
@tool_calls = options[:tool_calls]
@input_tokens = options[:input_tokens]
@output_tokens = options[:output_tokens]
Expand Down
12 changes: 8 additions & 4 deletions lib/ruby_llm/models.json
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,8 @@
"output": []
},
"capabilities": [
"function_calling"
"function_calling",
"reasoning"
],
"pricing": {
"text_tokens": {
Expand Down Expand Up @@ -287,7 +288,8 @@
]
},
"capabilities": [
"function_calling"
"function_calling",
"reasoning"
],
"pricing": {
"text_tokens": {
Expand Down Expand Up @@ -319,7 +321,8 @@
]
},
"capabilities": [
"function_calling"
"function_calling",
"reasoning"
],
"pricing": {
"text_tokens": {
Expand Down Expand Up @@ -9512,7 +9515,8 @@
},
"capabilities": [
"streaming",
"function_calling"
"function_calling",
"thinking"
],
"pricing": {
"text_tokens": {
Expand Down
3 changes: 2 additions & 1 deletion lib/ruby_llm/provider.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,14 @@ module Provider
module Methods
extend Streaming

def complete(messages, tools:, temperature:, model:, connection:, &)
def complete(messages, tools:, temperature:, model:, reasoning:, connection:, &) # rubocop:disable Metrics/ParameterLists
normalized_temperature = maybe_normalize_temperature(temperature, model)

payload = render_payload(messages,
tools: tools,
temperature: normalized_temperature,
model: model,
reasoning: reasoning,
stream: block_given?)

if block_given?
Expand Down
25 changes: 20 additions & 5 deletions lib/ruby_llm/providers/anthropic/chat.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ def completion_url
'/v1/messages'
end

def render_payload(messages, tools:, temperature:, model:, stream: false)
def render_payload(messages, tools:, temperature:, model:, reasoning:, stream: false) # rubocop:disable Metrics/ParameterLists
system_messages, chat_messages = separate_messages(messages)
system_content = build_system_content(system_messages)

build_base_payload(chat_messages, temperature, model, stream).tap do |payload|
add_optional_fields(payload, system_content:, tools:)
add_optional_fields(payload, system_content:, tools:, reasoning:)
end
end

Expand Down Expand Up @@ -45,30 +45,45 @@ def build_base_payload(chat_messages, temperature, model, stream)
}
end

def add_optional_fields(payload, system_content:, tools:)
def add_optional_fields(payload, system_content:, tools:, reasoning:)
payload[:tools] = tools.values.map { |t| Tools.function_for(t) } if tools.any?
payload[:system] = system_content unless system_content.empty?
return unless reasoning

payload[:thinking] = {
type: 'enabled',
budget_tokens: RubyLLM.config.default_reasoning_budget || 1024
}
end

def parse_completion_response(response)
data = response.body
RubyLLM.logger.debug("Anthropic response: #{data}")

content_blocks = data['content'] || []

reasoning_content = extract_reasoning_content(content_blocks)
text_content = extract_text_content(content_blocks)
tool_use = Tools.find_tool_use(content_blocks)

build_message(data, text_content, tool_use)
build_message(data, text_content, tool_use, reasoning_content)
end

def extract_reasoning_content(blocks)
thinking_blocks = blocks.select { |c| c['type'] == 'thinking' }
thinking_blocks.map { |c| c['thinking'] }.join
end

def extract_text_content(blocks)
text_blocks = blocks.select { |c| c['type'] == 'text' }
text_blocks.map { |c| c['text'] }.join
end

def build_message(data, content, tool_use)
def build_message(data, content, tool_use, reasoning_content)
Message.new(
role: :assistant,
content: content,
reasoning_content: reasoning_content,
tool_calls: Tools.parse_tool_calls(tool_use),
input_tokens: data.dig('usage', 'input_tokens'),
output_tokens: data.dig('usage', 'output_tokens'),
Expand Down
1 change: 1 addition & 0 deletions lib/ruby_llm/providers/anthropic/streaming.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def build_chunk(data)
role: :assistant,
model_id: extract_model_id(data),
content: data.dig('delta', 'text'),
reasoning_content: data.dig('delta', 'thinking'),
input_tokens: extract_input_tokens(data),
output_tokens: extract_output_tokens(data),
tool_calls: extract_tool_calls(data)
Expand Down
2 changes: 1 addition & 1 deletion lib/ruby_llm/providers/bedrock/chat.rb
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def completion_url
"model/#{@model_id}/invoke"
end

def render_payload(messages, tools:, temperature:, model:, stream: false) # rubocop:disable Lint/UnusedMethodArgument
def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
# Hold model_id in instance variable for use in completion_url and stream_url
@model_id = model

Expand Down
2 changes: 1 addition & 1 deletion lib/ruby_llm/providers/gemini/chat.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def completion_url
"models/#{@model}:generateContent"
end

def render_payload(messages, tools:, temperature:, model:, stream: false) # rubocop:disable Lint/UnusedMethodArgument
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Discarding unused params using '**' is my preference here, but would be keen to hear others' opinions here please

def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
@model = model # Store model for completion_url/stream_url
payload = {
contents: format_messages(messages),
Expand Down
2 changes: 1 addition & 1 deletion lib/ruby_llm/providers/openai/chat.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def completion_url

module_function

def render_payload(messages, tools:, temperature:, model:, stream: false)
def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
{
model: model,
messages: format_messages(messages),
Expand Down
3 changes: 3 additions & 0 deletions lib/ruby_llm/stream_accumulator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ class StreamAccumulator

def initialize
@content = String.new
@reasoning_content = String.new
@tool_calls = {}
@input_tokens = 0
@output_tokens = 0
Expand All @@ -23,6 +24,7 @@ def add(chunk)
accumulate_tool_calls chunk.tool_calls
else
@content << (chunk.content || '')
@reasoning_content << (chunk.reasoning_content || '')
end

count_tokens chunk
Expand All @@ -33,6 +35,7 @@ def to_message
Message.new(
role: :assistant,
content: content.empty? ? nil : content,
reasoning_content: @reasoning_content.empty? ? nil : @reasoning_content,
model_id: model_id,
tool_calls: tool_calls_from_stream,
input_tokens: @input_tokens.positive? ? @input_tokens : nil,
Expand Down