Skip to content

[OPENAI] Support image edits with gpt-image-1 #152

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
Open
6 changes: 6 additions & 0 deletions lib/ruby_llm.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
require 'event_stream_parser'
require 'faraday'
require 'faraday/retry'
require 'faraday/multipart'

require 'json'
require 'logger'
require 'securerandom'
Expand Down Expand Up @@ -50,6 +52,10 @@ def paint(...)
Image.paint(...)
end

def edit(...)
Image.edit(...)
end

def models
Models.instance
end
Expand Down
16 changes: 16 additions & 0 deletions lib/ruby_llm/connection_multipart.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
module RubyLLM
class ConnectionMultipart < Connection
def post(url, payload, &)
@connection.post url, payload do |req|
req.headers.merge! @provider.headers(@config) if @provider.respond_to?(:headers)
req.headers['Content-Type'] = 'multipart/form-data'
yield req if block_given?
end
end

def setup_middleware(faraday)
super
faraday.request :multipart, content_type: 'multipart/form-data'
end
end
end
1 change: 1 addition & 0 deletions lib/ruby_llm/error.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class ConfigurationError < StandardError; end
class InvalidRoleError < StandardError; end
class ModelNotFoundError < StandardError; end
class UnsupportedFunctionsError < StandardError; end
class NetworkError < StandardError; end

# Error classes for different HTTP status codes
class BadRequestError < Error; end
Expand Down
39 changes: 36 additions & 3 deletions lib/ruby_llm/image.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@ module RubyLLM
# Provides an interface to image generation capabilities
# from providers like DALL-E and Gemini's Imagen.
class Image
attr_reader :url, :data, :mime_type, :revised_prompt, :model_id
attr_reader :url, :data, :mime_type, :revised_prompt, :model, :usage

def initialize(url: nil, data: nil, mime_type: nil, revised_prompt: nil, model_id: nil)
def initialize(model:, url: nil, data: nil, mime_type: nil, revised_prompt: nil, usage: {})
@url = url
@data = data
@mime_type = mime_type
@revised_prompt = revised_prompt
@model_id = model_id
@usage = usage
@model = model
end

def base64?
Expand Down Expand Up @@ -51,5 +52,37 @@ def self.paint(prompt, # rubocop:disable Metrics/ParameterLists
connection = context ? context.connection_for(provider) : provider.connection(config)
provider.paint(prompt, model: model_id, size:, connection:)
end

def self.edit(prompt, # rubocop:disable Metrics/ParameterLists
model: nil,
provider: nil,
assume_model_exists: false,
context: nil,
with: {},
options: {})
config = context&.config || RubyLLM.config
model, provider = Models.resolve(model, provider: provider, assume_exists: assume_model_exists) if model
model_id = model&.id || config.default_image_model

provider = Provider.for(model_id) if provider.nil?
connection = context ? context.connection_for(provider) : provider.connection_multipart(config)
provider.edit(prompt, model: model_id, with:, connection:, options:)
end

def total_cost
input_cost + output_cost
end

def model_info
@model_info ||= RubyLLM.models.find(model)
end

def input_cost
usage['input_tokens'] * model_info.input_price_per_million / 1_000_000
end

def output_cost
usage['output_tokens'] * model_info.output_price_per_million / 1_000_000
end
end
end
2 changes: 1 addition & 1 deletion lib/ruby_llm/models.json
Original file line number Diff line number Diff line change
Expand Up @@ -6073,7 +6073,7 @@
"pricing": {
"text_tokens": {
"standard": {
"input_per_million": 5.0,
"input_per_million": 10.0,
"output_per_million": 40.0
}
}
Expand Down
13 changes: 12 additions & 1 deletion lib/ruby_llm/provider.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,14 @@ def embed(text, model:, connection:, dimensions:)
def paint(prompt, model:, size:, connection:)
payload = render_image_payload(prompt, model:, size:)
response = connection.post images_url, payload
parse_image_response response
parse_image_response(response, model:)
end

def edit(prompt, model:, with:, options:, connection:)
payload = render_edit_payload(prompt, model:, with:, options:)

response = connection.post(edits_url, payload)
parse_edit_response(response, model:)
end

def configured?(config = nil)
Expand Down Expand Up @@ -117,6 +124,10 @@ def connection(config)
@connection ||= Connection.new(self, config)
end

def connection_multipart(config)
@connection_multipart ||= ConnectionMultipart.new(self, config)
end

class << self
def extended(base)
base.extend(Methods)
Expand Down
5 changes: 3 additions & 2 deletions lib/ruby_llm/providers/gemini/images.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def render_image_payload(prompt, model:, size:)
}
end

def parse_image_response(response)
def parse_image_response(response, model:)
data = response.body
image_data = data['predictions']&.first

Expand All @@ -38,7 +38,8 @@ def parse_image_response(response)

Image.new(
data: base64_data,
mime_type: mime_type
mime_type: mime_type,
model:
)
end
end
Expand Down
2 changes: 2 additions & 0 deletions lib/ruby_llm/providers/openai.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ module OpenAI
extend OpenAI::Streaming
extend OpenAI::Tools
extend OpenAI::Images
extend OpenAI::Edits
extend OpenAI::Media

def self.extended(base)
Expand All @@ -23,6 +24,7 @@ def self.extended(base)
base.extend(OpenAI::Streaming)
base.extend(OpenAI::Tools)
base.extend(OpenAI::Images)
base.extend(OpenAI::Edits)
base.extend(OpenAI::Media)
end

Expand Down
4 changes: 3 additions & 1 deletion lib/ruby_llm/providers/openai/capabilities.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ module Capabilities
MODEL_PATTERNS = {
dall_e: /^dall-e/,
chatgpt4o: /^chatgpt-4o/,
gpt_image: /^gpt-image/,
gpt41: /^gpt-4\.1(?!-(?:mini|nano))/,
gpt41_mini: /^gpt-4\.1-mini/,
gpt41_nano: /^gpt-4\.1-nano/,
Expand Down Expand Up @@ -105,6 +106,7 @@ def supports_json_mode?(model_id)
end

PRICES = {
gpt_image_1: { input_text: 5.0, input_image: 10.0, output: 8.0, cached_input: 0.5 },
gpt41: { input: 2.0, output: 8.0, cached_input: 0.5 },
gpt41_mini: { input: 0.4, output: 1.6, cached_input: 0.1 },
gpt41_nano: { input: 0.1, output: 0.4 },
Expand Down Expand Up @@ -168,7 +170,7 @@ def model_type(model_id)
when /embedding/ then 'embedding'
when /^tts|whisper|gpt4o_(?:mini_)?(?:transcribe|tts)$/ then 'audio'
when 'moderation' then 'moderation'
when /dall/ then 'image'
when /dall-e|gpt-image/ then 'image'
else 'chat'
end
end
Expand Down
41 changes: 41 additions & 0 deletions lib/ruby_llm/providers/openai/edits.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# frozen_string_literal: true

module RubyLLM
module Providers
module OpenAI
# Image generation methods for the OpenAI API integration
module Edits
module_function

def edits_url
'images/edits'
end

# Options:
# - size: '1024x1024'
# - quality: 'low'
# - user: 'user_123'
# See https://platform.openai.com/docs/api-reference/images/createEdit
def render_edit_payload(prompt, model:, with:, options:)
options.merge({
model:,
prompt:,
image: ImageAttachments.new(with[:image]).format,
n: 1
})
end

def parse_edit_response(response, model:)
data = response.body
image_data = data['data'].first
Image.new(
data: image_data['b64_json'], # Edits API returns base64 when requested
mime_type: 'image/png',
usage: data['usage'],
model:
)
end
end
end
end
end
50 changes: 50 additions & 0 deletions lib/ruby_llm/providers/openai/image_attachments.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# frozen_string_literal: true

require 'open-uri' # Added for fetching URLs

module RubyLLM
module Providers
module OpenAI
class ImageAttachments
def initialize(sources)
@sources = Array(sources)
end

def format
@sources.map do |source|
source.start_with?('http') ? from_remote_url(source) : from_local_file(source)
end
end

private

def mime_type_for_image(path)
ext = File.extname(path).downcase.delete('.')
case ext
when 'png' then 'image/png'
when 'gif' then 'image/gif'
when 'webp' then 'image/webp'
else 'image/jpeg'
end
end

def from_local_file(source)
Faraday::UploadIO.new(source, mime_type_for_image(source), File.basename(source))
end

def from_remote_url(source)
parsed_uri = URI.parse(source)

# Fetch the remote content or open local file. URI.open returns an IO-like object (StringIO or Tempfile)
io = parsed_uri.open
content_type = io.content_type # Get MIME type from the response headers or guess for local files

# Extract filename from path, provide fallback
filename = File.basename(parsed_uri.path)
Faraday::UploadIO.new(io, content_type, filename)
# NOTE: Do NOT close the IO stream here. Faraday will handle it.
end
end
end
end
end
4 changes: 2 additions & 2 deletions lib/ruby_llm/providers/openai/images.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@ def render_image_payload(prompt, model:, size:)
}
end

def parse_image_response(response)
def parse_image_response(response, model:)
data = response.body
image_data = data['data'].first

Image.new(
url: image_data['url'],
mime_type: 'image/png', # DALL-E typically returns PNGs
revised_prompt: image_data['revised_prompt'],
model_id: data['model']
model:
)
end
end
Expand Down

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Loading