Skip to content

Commit d2f834f

Browse files
committed
Support images in Gemini responses
1 parent e060c00 commit d2f834f

File tree

5 files changed

+94
-22
lines changed

5 files changed

+94
-22
lines changed

lib/ruby_llm/attachment.rb

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,14 @@ def encoded
6565
Base64.strict_encode64(content)
6666
end
6767

68+
def save(path)
69+
return unless io_like?
70+
71+
File.open(path, 'w') do |f|
72+
f.puts(@source.read)
73+
end
74+
end
75+
6876
def for_llm
6977
case type
7078
when :text

lib/ruby_llm/connection.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def setup_logging(faraday)
6565
errors: true,
6666
headers: false,
6767
log_level: :debug do |logger|
68-
logger.filter(%r{[A-Za-z0-9+/=]{100,}}, 'data":"[BASE64 DATA]"')
68+
logger.filter(%r{[A-Za-z0-9+/=]{100,}}, '[BASE64 DATA]')
6969
logger.filter(/[-\d.e,\s]{100,}/, '[EMBEDDINGS ARRAY]')
7070
end
7171
end

lib/ruby_llm/providers/gemini/chat.rb

Lines changed: 6 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -50,22 +50,9 @@ def format_role(role)
5050

5151
def format_parts(msg)
5252
if msg.tool_call?
53-
[{
54-
functionCall: {
55-
name: msg.tool_calls.values.first.name,
56-
args: msg.tool_calls.values.first.arguments
57-
}
58-
}]
53+
format_tool_call(msg)
5954
elsif msg.tool_result?
60-
[{
61-
functionResponse: {
62-
name: msg.tool_call_id,
63-
response: {
64-
name: msg.tool_call_id,
65-
content: Media.format_content(msg.content)
66-
}
67-
}
68-
}]
55+
format_tool_result(msg)
6956
else
7057
Media.format_content(msg.content)
7158
end
@@ -77,7 +64,7 @@ def parse_completion_response(response)
7764

7865
Message.new(
7966
role: :assistant,
80-
content: extract_content(data),
67+
content: parse_content(data),
8168
tool_calls: tool_calls,
8269
input_tokens: data.dig('usageMetadata', 'promptTokenCount'),
8370
output_tokens: calculate_output_tokens(data),
@@ -109,17 +96,16 @@ def normalize_any_of(schema)
10996
{ type: 'string', nullable: true }
11097
end
11198

112-
def extract_content(data)
99+
def parse_content(data)
113100
candidate = data.dig('candidates', 0)
114101
return '' unless candidate
115102

116103
return '' if function_call?(candidate)
117104

118105
parts = candidate.dig('content', 'parts')
119-
text_parts = parts&.select { |p| p['text'] }
120-
return '' unless text_parts&.any?
106+
return '' unless parts&.any?
121107

122-
text_parts.map { |p| p['text'] }.join
108+
build_response_content(parts)
123109
end
124110

125111
def function_call?(candidate)

lib/ruby_llm/providers/gemini/media.rb

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
module RubyLLM
44
module Providers
5-
class Gemini
5+
class Gemini # rubocop:disable Style/Documentation
66
# Media handling methods for the Gemini API integration
77
module Media
88
module_function
@@ -50,6 +50,63 @@ def format_text(text)
5050
}
5151
end
5252
end
53+
54+
def build_response_content(parts) # rubocop:disable Metrics/PerceivedComplexity
55+
text = []
56+
attachments = []
57+
58+
parts.each_with_index do |part, index|
59+
if part['text']
60+
text << part['text']
61+
elsif part['inlineData']
62+
attachment = build_inline_attachment(part['inlineData'], index)
63+
attachments << attachment if attachment
64+
elsif part['fileData']
65+
attachment = build_file_attachment(part['fileData'], index)
66+
attachments << attachment if attachment
67+
end
68+
end
69+
70+
text = text.join
71+
text = nil if text.empty?
72+
return text if attachments.empty?
73+
74+
Content.new(text:, attachments:)
75+
end
76+
77+
def build_inline_attachment(inline_data, index)
78+
encoded = inline_data['data']
79+
return unless encoded
80+
81+
mime_type = inline_data['mimeType']
82+
decoded = Base64.decode64(encoded)
83+
io = StringIO.new(decoded)
84+
io.set_encoding(Encoding::BINARY) if io.respond_to?(:set_encoding)
85+
86+
filename = attachment_filename(mime_type, index)
87+
RubyLLM::Attachment.new(io, filename:)
88+
rescue ArgumentError => e
89+
RubyLLM.logger.warn "Failed to decode Gemini inline data attachment: #{e.message}"
90+
nil
91+
end
92+
93+
def build_file_attachment(file_data, index)
94+
uri = file_data['fileUri']
95+
return unless uri
96+
97+
filename = file_data['filename'] || attachment_filename(file_data['mimeType'], index)
98+
RubyLLM::Attachment.new(uri, filename:)
99+
end
100+
101+
def attachment_filename(mime_type, index)
102+
return "gemini_attachment_#{index + 1}" unless mime_type
103+
104+
extension = mime_type.split('/').last.to_s
105+
extension = 'jpg' if extension == 'jpeg'
106+
extension = 'txt' if extension == 'plain'
107+
extension = extension.tr('+', '.')
108+
"gemini_attachment_#{index + 1}.#{extension}"
109+
end
53110
end
54111
end
55112
end

lib/ruby_llm/providers/gemini/tools.rb

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,27 @@ def format_tools(tools)
1313
}]
1414
end
1515

16+
def format_tool_call(msg)
17+
[{
18+
functionCall: {
19+
name: msg.tool_calls.values.first.name,
20+
args: msg.tool_calls.values.first.arguments
21+
}
22+
}]
23+
end
24+
25+
def format_tool_result(msg)
26+
[{
27+
functionResponse: {
28+
name: msg.tool_call_id,
29+
response: {
30+
name: msg.tool_call_id,
31+
content: Media.format_content(msg.content)
32+
}
33+
}
34+
}]
35+
end
36+
1637
def extract_tool_calls(data)
1738
return nil unless data
1839

0 commit comments

Comments
 (0)