crmne · sbounmy · Apr 27, 2025 · Apr 27, 2025 · Apr 28, 2025 · Apr 28, 2025
diff --git a/lib/ruby_llm.rb b/lib/ruby_llm.rb
@@ -4,6 +4,8 @@
 require 'event_stream_parser'
 require 'faraday'
 require 'faraday/retry'
+require 'faraday/multipart'
+
 require 'json'
 require 'logger'
 require 'securerandom'
@@ -50,6 +52,10 @@ def paint(...)
       Image.paint(...)
     end
 
+    def edit(...)
+      Image.edit(...)
+    end
+
     def models
       Models.instance
     end

diff --git a/lib/ruby_llm/connection_multipart.rb b/lib/ruby_llm/connection_multipart.rb
@@ -0,0 +1,16 @@
+module RubyLLM
+  class ConnectionMultipart < Connection
+    def post(url, payload, &)
+      @connection.post url, payload do |req|
+        req.headers.merge! @provider.headers(@config) if @provider.respond_to?(:headers)
+        req.headers['Content-Type'] = 'multipart/form-data'
+        yield req if block_given?
+      end
+    end
+
+    def setup_middleware(faraday)
+      super
+      faraday.request :multipart, content_type: 'multipart/form-data'
+    end
+  end
+end
diff --git a/lib/ruby_llm/error.rb b/lib/ruby_llm/error.rb
@@ -24,6 +24,7 @@ class ConfigurationError < StandardError; end
   class InvalidRoleError < StandardError; end
   class ModelNotFoundError < StandardError; end
   class UnsupportedFunctionsError < StandardError; end
+  class NetworkError < StandardError; end
 
   # Error classes for different HTTP status codes
   class BadRequestError < Error; end

diff --git a/lib/ruby_llm/image.rb b/lib/ruby_llm/image.rb
@@ -5,14 +5,15 @@ module RubyLLM
   # Provides an interface to image generation capabilities
   # from providers like DALL-E and Gemini's Imagen.
   class Image
-    attr_reader :url, :data, :mime_type, :revised_prompt, :model_id
+    attr_reader :url, :data, :mime_type, :revised_prompt, :model, :usage
 
-    def initialize(url: nil, data: nil, mime_type: nil, revised_prompt: nil, model_id: nil)
+    def initialize(model:, url: nil, data: nil, mime_type: nil, revised_prompt: nil, usage: {})
       @url = url
       @data = data
       @mime_type = mime_type
       @revised_prompt = revised_prompt
-      @model_id = model_id
+      @usage = usage
+      @model = model
     end
 
     def base64?
@@ -51,5 +52,37 @@ def self.paint(prompt, # rubocop:disable Metrics/ParameterLists
       connection = context ? context.connection_for(provider) : provider.connection(config)
       provider.paint(prompt, model: model_id, size:, connection:)
     end
+
+    def self.edit(prompt, # rubocop:disable Metrics/ParameterLists
+                  model: nil,
+                  provider: nil,
+                  assume_model_exists: false,
+                  context: nil,
+                  with: {},
+                  options: {})
+      config = context&.config || RubyLLM.config
+      model, provider = Models.resolve(model, provider: provider, assume_exists: assume_model_exists) if model
+      model_id = model&.id || config.default_image_model
+
+      provider = Provider.for(model_id) if provider.nil?
+      connection = context ? context.connection_for(provider) : provider.connection_multipart(config)
+      provider.edit(prompt, model: model_id, with:, connection:, options:)
+    end
+
+    def total_cost
+      input_cost + output_cost
+    end
+
+    def model_info
+      @model_info ||= RubyLLM.models.find(model)
+    end
+
+    def input_cost
+      usage['input_tokens'] * model_info.input_price_per_million / 1_000_000
+    end
+
+    def output_cost
+      usage['output_tokens'] * model_info.output_price_per_million / 1_000_000
+    end
   end
 end
diff --git a/lib/ruby_llm/models.json b/lib/ruby_llm/models.json
@@ -6073,7 +6073,7 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "input_per_million": 5.0,
+          "input_per_million": 10.0,
           "output_per_million": 40.0
         }
       }

diff --git a/lib/ruby_llm/provider.rb b/lib/ruby_llm/provider.rb
@@ -40,7 +40,14 @@ def embed(text, model:, connection:, dimensions:)
       def paint(prompt, model:, size:, connection:)
         payload = render_image_payload(prompt, model:, size:)
         response = connection.post images_url, payload
-        parse_image_response response
+        parse_image_response(response, model:)
+      end
+
+      def edit(prompt, model:, with:, options:, connection:)
+        payload = render_edit_payload(prompt, model:, with:, options:)
+
+        response = connection.post(edits_url, payload)
+        parse_edit_response(response, model:)
       end
 
       def configured?(config = nil)
@@ -117,6 +124,10 @@ def connection(config)
       @connection ||= Connection.new(self, config)
     end
 
+    def connection_multipart(config)
+      @connection_multipart ||= ConnectionMultipart.new(self, config)
+    end
+
     class << self
       def extended(base)
         base.extend(Methods)

diff --git a/lib/ruby_llm/providers/gemini/images.rb b/lib/ruby_llm/providers/gemini/images.rb
@@ -24,7 +24,7 @@ def render_image_payload(prompt, model:, size:)
           }
         end
 
-        def parse_image_response(response)
+        def parse_image_response(response, model:)
           data = response.body
           image_data = data['predictions']&.first
 
@@ -38,7 +38,8 @@ def parse_image_response(response)
 
           Image.new(
             data: base64_data,
-            mime_type: mime_type
+            mime_type: mime_type,
+            model:
           )
         end
       end

diff --git a/lib/ruby_llm/providers/openai.rb b/lib/ruby_llm/providers/openai.rb
@@ -13,6 +13,7 @@ module OpenAI
       extend OpenAI::Streaming
       extend OpenAI::Tools
       extend OpenAI::Images
+      extend OpenAI::Edits
       extend OpenAI::Media
 
       def self.extended(base)
@@ -23,6 +24,7 @@ def self.extended(base)
         base.extend(OpenAI::Streaming)
         base.extend(OpenAI::Tools)
         base.extend(OpenAI::Images)
+        base.extend(OpenAI::Edits)
         base.extend(OpenAI::Media)
       end
 

diff --git a/lib/ruby_llm/providers/openai/capabilities.rb b/lib/ruby_llm/providers/openai/capabilities.rb
@@ -10,6 +10,7 @@ module Capabilities
         MODEL_PATTERNS = {
           dall_e: /^dall-e/,
           chatgpt4o: /^chatgpt-4o/,
+          gpt_image: /^gpt-image/,
           gpt41: /^gpt-4\.1(?!-(?:mini|nano))/,
           gpt41_mini: /^gpt-4\.1-mini/,
           gpt41_nano: /^gpt-4\.1-nano/,
@@ -105,6 +106,7 @@ def supports_json_mode?(model_id)
         end
 
         PRICES = {
+          gpt_image_1: { input_text: 5.0, input_image: 10.0, output: 8.0, cached_input: 0.5 },
           gpt41: { input: 2.0, output: 8.0, cached_input: 0.5 },
           gpt41_mini: { input: 0.4, output: 1.6, cached_input: 0.1 },
           gpt41_nano: { input: 0.1, output: 0.4 },
@@ -168,7 +170,7 @@ def model_type(model_id)
           when /embedding/ then 'embedding'
           when /^tts|whisper|gpt4o_(?:mini_)?(?:transcribe|tts)$/ then 'audio'
           when 'moderation' then 'moderation'
-          when /dall/ then 'image'
+          when /dall-e|gpt-image/ then 'image'
           else 'chat'
           end
         end

diff --git a/lib/ruby_llm/providers/openai/edits.rb b/lib/ruby_llm/providers/openai/edits.rb
@@ -0,0 +1,41 @@
+# frozen_string_literal: true
+
+module RubyLLM
+  module Providers
+    module OpenAI
+      # Image generation methods for the OpenAI API integration
+      module Edits
+        module_function
+
+        def edits_url
+          'images/edits'
+        end
+
+        # Options:
+        # - size: '1024x1024'
+        # - quality: 'low'
+        # - user: 'user_123'
+        # See https://platform.openai.com/docs/api-reference/images/createEdit
+        def render_edit_payload(prompt, model:, with:, options:)
+          options.merge({
+                          model:,
+                          prompt:,
+                          image: ImageAttachments.new(with[:image]).format,
+                          n: 1
+                        })
+        end
+
+        def parse_edit_response(response, model:)
+          data = response.body
+          image_data = data['data'].first
+          Image.new(
+            data: image_data['b64_json'], # Edits API returns base64 when requested
+            mime_type: 'image/png',
+            usage: data['usage'],
+            model:
+          )
+        end
+      end
+    end
+  end
+end
diff --git a/lib/ruby_llm/providers/openai/image_attachments.rb b/lib/ruby_llm/providers/openai/image_attachments.rb
@@ -0,0 +1,50 @@
+# frozen_string_literal: true
+
+require 'open-uri' # Added for fetching URLs
+
+module RubyLLM
+  module Providers
+    module OpenAI
+      class ImageAttachments
+        def initialize(sources)
+          @sources = Array(sources)
+        end
+
+        def format
+          @sources.map do |source|
+            source.start_with?('http') ? from_remote_url(source) : from_local_file(source)
+          end
+        end
+
+        private
+
+        def mime_type_for_image(path)
+          ext = File.extname(path).downcase.delete('.')
+          case ext
+          when 'png' then 'image/png'
+          when 'gif' then 'image/gif'
+          when 'webp' then 'image/webp'
+          else 'image/jpeg'
+          end
+        end
+
+        def from_local_file(source)
+          Faraday::UploadIO.new(source, mime_type_for_image(source), File.basename(source))
+        end
+
+        def from_remote_url(source)
+          parsed_uri = URI.parse(source)
+
+          # Fetch the remote content or open local file. URI.open returns an IO-like object (StringIO or Tempfile)
+          io = parsed_uri.open
+          content_type = io.content_type # Get MIME type from the response headers or guess for local files
+
+          # Extract filename from path, provide fallback
+          filename = File.basename(parsed_uri.path)
+          Faraday::UploadIO.new(io, content_type, filename)
+          # NOTE: Do NOT close the IO stream here. Faraday will handle it.
+        end
+      end
+    end
+  end
+end
diff --git a/lib/ruby_llm/providers/openai/images.rb b/lib/ruby_llm/providers/openai/images.rb
@@ -20,15 +20,15 @@ def render_image_payload(prompt, model:, size:)
           }
         end
 
-        def parse_image_response(response)
+        def parse_image_response(response, model:)
           data = response.body
           image_data = data['data'].first
 
           Image.new(
             url: image_data['url'],
             mime_type: 'image/png', # DALL-E typically returns PNGs
             revised_prompt: image_data['revised_prompt'],
-            model_id: data['model']
+            model:
           )
         end
       end

diff --git a/...tes/RubyLLM_Image/edit_functionality_OpenAI_/with_local_files/customizes_image_output.yml b/...tes/RubyLLM_Image/edit_functionality_OpenAI_/with_local_files/customizes_image_output.yml
diff --git a/...e/edit_functionality_OpenAI_/with_local_files/rejects_edits_with_a_non-PNG_local_file.yml b/...e/edit_functionality_OpenAI_/with_local_files/rejects_edits_with_a_non-PNG_local_file.yml
diff --git a/...it_functionality_OpenAI_/with_local_files/supports_image_edits_with_a_valid_local_PNG.yml b/...it_functionality_OpenAI_/with_local_files/supports_image_edits_with_a_valid_local_PNG.yml