crmne · tpaulshippy · Jul 23, 2025 · Jul 23, 2025 · Aug 5, 2025 · Aug 5, 2025
diff --git a/lib/ruby_llm.rb b/lib/ruby_llm.rb
@@ -14,6 +14,7 @@
   'ruby_llm' => 'RubyLLM',
   'llm' => 'LLM',
   'openai' => 'OpenAI',
+  'openai_base' => 'OpenAIBase',
   'api' => 'API',
   'deepseek' => 'DeepSeek',
   'perplexity' => 'Perplexity',

diff --git a/lib/ruby_llm/providers/deepseek.rb b/lib/ruby_llm/providers/deepseek.rb
@@ -3,7 +3,7 @@
 module RubyLLM
   module Providers
     # DeepSeek API integration.
-    class DeepSeek < OpenAI
+    class DeepSeek < OpenAIBase
       include DeepSeek::Chat
 
       def api_base

diff --git a/lib/ruby_llm/providers/gpustack.rb b/lib/ruby_llm/providers/gpustack.rb
@@ -3,7 +3,7 @@
 module RubyLLM
   module Providers
     # GPUStack API integration based on Ollama.
-    class GPUStack < OpenAI
+    class GPUStack < OpenAIBase
       include GPUStack::Chat
       include GPUStack::Models
 

diff --git a/lib/ruby_llm/providers/mistral.rb b/lib/ruby_llm/providers/mistral.rb
@@ -3,7 +3,7 @@
 module RubyLLM
   module Providers
     # Mistral API integration.
-    class Mistral < OpenAI
+    class Mistral < OpenAIBase
       include Mistral::Chat
       include Mistral::Models
       include Mistral::Embeddings

diff --git a/lib/ruby_llm/providers/ollama.rb b/lib/ruby_llm/providers/ollama.rb
@@ -3,7 +3,7 @@
 module RubyLLM
   module Providers
     # Ollama API integration.
-    class Ollama < OpenAI
+    class Ollama < OpenAIBase
       include Ollama::Chat
       include Ollama::Media
 

diff --git a/lib/ruby_llm/providers/openai.rb b/lib/ruby_llm/providers/openai.rb
@@ -2,41 +2,41 @@
 
 module RubyLLM
   module Providers
-    # OpenAI API integration. Handles chat completion, function calling,
-    # and OpenAI's unique streaming format. Supports GPT-4, GPT-3.5,
+    # OpenAI API integration using the new Responses API. Handles response generation,
+    # function calling, and OpenAI's unique streaming format. Supports GPT-4, GPT-3.5,
     # and other OpenAI models.
-    class OpenAI < Provider
-      include OpenAI::Chat
-      include OpenAI::Embeddings
-      include OpenAI::Models
-      include OpenAI::Streaming
-      include OpenAI::Tools
-      include OpenAI::Images
-      include OpenAI::Media
+    class OpenAI < OpenAIBase
+      include OpenAI::Response
+      include OpenAI::ResponseMedia
 
-      def api_base
-        @config.openai_api_base || 'https://api.openai.com/v1'
-      end
+      def audio_input?(messages)
+        messages.any? do |message|
+          next false unless message.respond_to?(:content) && message.content.respond_to?(:attachments)
 
-      def headers
-        {
-          'Authorization' => "Bearer #{@config.openai_api_key}",
-          'OpenAI-Organization' => @config.openai_organization_id,
-          'OpenAI-Project' => @config.openai_project_id
-        }.compact
+          message.content.attachments.any? { |attachment| attachment.type == :audio }
+        end
       end
 
-      def maybe_normalize_temperature(temperature, model_id)
-        OpenAI::Capabilities.normalize_temperature(temperature, model_id)
-      end
+      def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists
+        @using_responses_api = !audio_input?(messages)
 
-      class << self
-        def capabilities
-          OpenAI::Capabilities
+        if @using_responses_api
+          render_response_payload(messages, tools: tools, temperature: temperature, model: model, stream: stream,
+                                            schema: schema)
+        else
+          super
         end
+      end
+
+      def completion_url
+        @using_responses_api ? responses_url : super
+      end
 
-        def configuration_requirements
-          %i[openai_api_key]
+      def parse_completion_response(response)
+        if @using_responses_api
+          parse_respond_response(response)
+        else
+          super
         end
       end
     end

diff --git a/lib/ruby_llm/providers/openai/chat.rb b/lib/ruby_llm/providers/openai/chat.rb
@@ -21,7 +21,7 @@ def render_payload(messages, tools:, temperature:, model:, stream: false, schema
           # Only include temperature if it's not nil (some models don't accept it)
           payload[:temperature] = temperature unless temperature.nil?
 
-          payload[:tools] = tools.map { |_, tool| tool_for(tool) } if tools.any?
+          payload[:tools] = tools.map { |_, tool| chat_tool_for(tool) } if tools.any?
 
           if schema
             # Use strict mode from schema if specified, default to true

diff --git a/lib/ruby_llm/providers/openai/response.rb b/lib/ruby_llm/providers/openai/response.rb
@@ -0,0 +1,115 @@
+# frozen_string_literal: true
+
+module RubyLLM
+  module Providers
+    class OpenAI
+      # Response methods of the OpenAI API integration
+      module Response
+        def responses_url
+          'responses'
+        end
+
+        module_function
+
+        def render_response_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists
+          payload = {
+            model: model,
+            input: format_input(messages),
+            stream: stream
+          }
+
+          # Only include temperature if it's not nil (some models don't accept it)
+          payload[:temperature] = temperature unless temperature.nil?
+
+          payload[:tools] = tools.map { |_, tool| response_tool_for(tool) } if tools.any?
+
+          if schema
+            # Use strict mode from schema if specified, default to true
+            strict = schema[:strict] != false
+
+            payload[:text] = {
+              format: {
+                type: 'json_schema',
+                name: 'response',
+                schema: schema,
+                strict: strict
+              }
+            }
+          end
+
+          payload
+        end
+
+        def format_input(messages) # rubocop:disable Metrics/PerceivedComplexity
+          all_tool_calls = messages.flat_map do |m|
+            m.tool_calls&.values || []
+          end
+          messages.flat_map do |msg|
+            if msg.tool_call?
+              msg.tool_calls.map do |_, tc|
+                {
+                  type: 'function_call',
+                  call_id: tc.id,
+                  name: tc.name,
+                  arguments: JSON.generate(tc.arguments),
+                  status: 'completed'
+                }
+              end
+            elsif msg.role == :tool
+              {
+                type: 'function_call_output',
+                call_id: all_tool_calls.detect { |tc| tc.id == msg.tool_call_id }&.id,
+                output: msg.content,
+                status: 'completed'
+              }
+            else
+              {
+                type: 'message',
+                role: format_role(msg.role),
+                content: ResponseMedia.format_content(msg.content),
+                status: 'completed'
+              }.compact
+            end
+          end
+        end
+
+        def format_role(role)
+          case role
+          when :system
+            'developer'
+          else
+            role.to_s
+          end
+        end
+
+        def parse_respond_response(response)
+          data = response.body
+          return if data.empty?
+
+          raise Error.new(response, data.dig('error', 'message')) if data.dig('error', 'message')
+
+          outputs = data['output']
+          return unless outputs.any?
+
+          Message.new(
+            role: :assistant,
+            content: all_output_text(outputs),
+            tool_calls: parse_response_tool_calls(outputs),
+            input_tokens: data['usage']['input_tokens'],
+            output_tokens: data['usage']['output_tokens'],
+            model_id: data['model'],
+            raw: response
+          )
+        end
+
+        def all_output_text(outputs)
+          outputs.select { |o| o['type'] == 'message' }.flat_map do |o|
+            o['content'].filter_map do |c|
+              c['type'] == 'output_text' && c['text']
+            end
+          end.join("\n")
+        end
+      end
+    end
+  end
+end
diff --git a/lib/ruby_llm/providers/openai/response_media.rb b/lib/ruby_llm/providers/openai/response_media.rb
@@ -0,0 +1,76 @@
+# frozen_string_literal: true
+
+module RubyLLM
+  module Providers
+    class OpenAI
+      # Handles formatting of media content (images, audio) for OpenAI APIs
+      module ResponseMedia
+        module_function
+
+        def format_content(content)
+          return content.to_json if content.is_a?(Hash) || content.is_a?(Array)
+          return content unless content.is_a?(Content)
+
+          parts = []
+          parts << format_text(content.text) if content.text
+
+          content.attachments.each do |attachment|
+            case attachment.type
+            when :image
+              parts << format_image(attachment)
+            when :pdf
+              parts << format_pdf(attachment)
+            when :audio
+              parts << format_audio(attachment)
+            when :text
+              parts << format_text_file(attachment)
+            else
+              raise UnsupportedAttachmentError, attachment.type
+            end
+          end
+
+          parts
+        end
+
+        def format_image(image)
+          {
+            type: 'input_image',
+            image_url: image.url? ? image.source : "data:#{image.mime_type};base64,#{image.encoded}"
+          }
+        end
+
+        def format_pdf(pdf)
+          {
+            type: 'input_file',
+            filename: pdf.filename,
+            file_data: "data:#{pdf.mime_type};base64,#{pdf.encoded}"
+          }
+        end
+
+        def format_text_file(text_file)
+          {
+            type: 'input_text',
+            text: Utils.format_text_file_for_llm(text_file)
+          }
+        end
+
+        def format_audio(audio)
+          {
+            type: 'input_audio',
+            input_audio: {
+              data: audio.encoded,
+              format: audio.mime_type.split('/').last
+            }
+          }
+        end
+
+        def format_text(text)
+          {
+            type: 'input_text',
+            text: text
+          }
+        end
+      end
+    end
+  end
+end