crmne · tpaulshippy · Jul 23, 2025 · Jul 23, 2025 · Aug 5, 2025 · Aug 5, 2025
diff --git a/lib/ruby_llm.rb b/lib/ruby_llm.rb
@@ -14,6 +14,7 @@
   'ruby_llm' => 'RubyLLM',
   'llm' => 'LLM',
   'openai' => 'OpenAI',
+  'chat_completions' => 'ChatCompletions',
   'api' => 'API',
   'deepseek' => 'DeepSeek',
   'perplexity' => 'Perplexity',

diff --git a/lib/ruby_llm/providers/deepseek.rb b/lib/ruby_llm/providers/deepseek.rb
@@ -4,7 +4,7 @@ module RubyLLM
   module Providers
     # DeepSeek API integration.
     module DeepSeek
-      extend OpenAI
+      extend OpenAI::ChatCompletions
       extend DeepSeek::Chat
 
       module_function

diff --git a/lib/ruby_llm/providers/gpustack.rb b/lib/ruby_llm/providers/gpustack.rb
@@ -4,7 +4,7 @@ module RubyLLM
   module Providers
     # GPUStack API integration based on Ollama.
     module GPUStack
-      extend OpenAI
+      extend OpenAI::ChatCompletions
       extend GPUStack::Chat
       extend GPUStack::Models
 

diff --git a/lib/ruby_llm/providers/mistral.rb b/lib/ruby_llm/providers/mistral.rb
@@ -4,7 +4,7 @@ module RubyLLM
   module Providers
     # Mistral API integration.
     module Mistral
-      extend OpenAI
+      extend OpenAI::ChatCompletions
       extend Mistral::Chat
       extend Mistral::Models
       extend Mistral::Embeddings

diff --git a/lib/ruby_llm/providers/ollama.rb b/lib/ruby_llm/providers/ollama.rb
@@ -4,7 +4,7 @@ module RubyLLM
   module Providers
     # Ollama API integration.
     module Ollama
-      extend OpenAI
+      extend OpenAI::ChatCompletions
       extend Ollama::Chat
       extend Ollama::Media
 

diff --git a/lib/ruby_llm/providers/openai.rb b/lib/ruby_llm/providers/openai.rb
@@ -2,54 +2,58 @@
 
 module RubyLLM
   module Providers
-    # OpenAI API integration. Handles chat completion, function calling,
-    # and OpenAI's unique streaming format. Supports GPT-4, GPT-3.5,
+    # OpenAI API integration using the new Responses API. Handles response generation,
+    # function calling, and OpenAI's unique streaming format. Supports GPT-4, GPT-3.5,
     # and other OpenAI models.
     module OpenAI
-      extend Provider
-      extend OpenAI::Chat
-      extend OpenAI::Embeddings
-      extend OpenAI::Models
-      extend OpenAI::Streaming
-      extend OpenAI::Tools
-      extend OpenAI::Images
-      extend OpenAI::Media
+      extend OpenAI::ChatCompletions
+      extend OpenAI::Response
+      extend OpenAI::ResponseMedia
 
       def self.extended(base)
-        base.extend(Provider)
-        base.extend(OpenAI::Chat)
-        base.extend(OpenAI::Embeddings)
-        base.extend(OpenAI::Models)
-        base.extend(OpenAI::Streaming)
-        base.extend(OpenAI::Tools)
-        base.extend(OpenAI::Images)
-        base.extend(OpenAI::Media)
+        base.extend(OpenAI::ChatCompletions)
+        base.extend(OpenAI::Response)
+        base.extend(OpenAI::ResponseMedia)
       end
 
       module_function
 
-      def api_base(config)
-        config.openai_api_base || 'https://api.openai.com/v1'
-      end
+      # Detect if messages contain audio attachments
+      def audio_input?(messages)
+        messages.any? do |message|
+          next false unless message.respond_to?(:content) && message.content.respond_to?(:attachments)
 
-      def headers(config)
-        {
-          'Authorization' => "Bearer #{config.openai_api_key}",
-          'OpenAI-Organization' => config.openai_organization_id,
-          'OpenAI-Project' => config.openai_project_id
-        }.compact
+          message.content.attachments.any? { |attachment| attachment.type == :audio }
+        end
       end
 
-      def capabilities
-        OpenAI::Capabilities
+      # Override render_payload to conditionally route to chat completions or responses API
+      def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists
+        # Track which API we're using for later methods
+        @using_responses_api = !audio_input?(messages)
+
+        if @using_responses_api
+          # Use responses API for everything else
+          render_response_payload(messages, tools: tools, temperature: temperature, model: model, stream: stream,
+                                            schema: schema)
+        else
+          # Use chat completions for audio - call the original method from ChatCompletions
+          super
+        end
       end
 
-      def slug
-        'openai'
+      # Override completion_url to conditionally route to the right endpoint
+      def completion_url
+        @using_responses_api ? responses_url : super
       end
 
-      def configuration_requirements
-        %i[openai_api_key]
+      # Override parse_completion_response to use the right parser
+      def parse_completion_response(response)
+        if @using_responses_api
+          parse_respond_response(response)
+        else
+          super
+        end
       end
     end
   end

diff --git a/lib/ruby_llm/providers/openai/chat.rb b/lib/ruby_llm/providers/openai/chat.rb
@@ -22,7 +22,7 @@ def render_payload(messages, tools:, temperature:, model:, stream: false, schema
           payload[:temperature] = temperature unless temperature.nil?
 
           if tools.any?
-            payload[:tools] = tools.map { |_, tool| tool_for(tool) }
+            payload[:tools] = tools.map { |_, tool| chat_tool_for(tool) }
             payload[:tool_choice] = 'auto'
           end
 

diff --git a/lib/ruby_llm/providers/openai/chat_completions.rb b/lib/ruby_llm/providers/openai/chat_completions.rb
@@ -0,0 +1,56 @@
+# frozen_string_literal: true
+
+module RubyLLM
+  module Providers
+    module OpenAI
+      # OpenAI Chat Completions API integration. This module contains the original
+      # OpenAI chat completions functionality that is used by providers that extend
+      # the OpenAI-compatible API (DeepSeek, Mistral, OpenRouter, etc.)
+      module ChatCompletions
+        extend Provider
+        extend OpenAI::Chat
+        extend OpenAI::Embeddings
+        extend OpenAI::Models
+        extend OpenAI::Streaming
+        extend OpenAI::Tools
+        extend OpenAI::Images
+        extend OpenAI::Media
+
+        def self.extended(base)
+          base.extend(Provider)
+          base.extend(OpenAI::Chat)
+          base.extend(OpenAI::Embeddings)
+          base.extend(OpenAI::Models)
+          base.extend(OpenAI::Streaming)
+          base.extend(OpenAI::Tools)
+          base.extend(OpenAI::Images)
+          base.extend(OpenAI::Media)
+        end
+
+        def api_base(config)
+          config.openai_api_base || 'https://api.openai.com/v1'
+        end
+
+        def headers(config)
+          {
+            'Authorization' => "Bearer #{config.openai_api_key}",
+            'OpenAI-Organization' => config.openai_organization_id,
+            'OpenAI-Project' => config.openai_project_id
+          }.compact
+        end
+
+        def capabilities
+          OpenAI::Capabilities
+        end
+
+        def slug
+          'openai'
+        end
+
+        def configuration_requirements
+          %i[openai_api_key]
+        end
+      end
+    end
+  end
+end
diff --git a/lib/ruby_llm/providers/openai/response.rb b/lib/ruby_llm/providers/openai/response.rb
@@ -0,0 +1,118 @@
+# frozen_string_literal: true
+
+module RubyLLM
+  module Providers
+    module OpenAI
+      # Response methods of the OpenAI API integration
+      module Response
+        def responses_url
+          'responses'
+        end
+
+        module_function
+
+        def render_response_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists
+          payload = {
+            model: model,
+            input: format_input(messages),
+            stream: stream
+          }
+
+          # Only include temperature if it's not nil (some models don't accept it)
+          payload[:temperature] = temperature unless temperature.nil?
+
+          if tools.any?
+            payload[:tools] = tools.map { |_, tool| response_tool_for(tool) }
+            payload[:tool_choice] = 'auto'
+          end
+
+          if schema
+            # Use strict mode from schema if specified, default to true
+            strict = schema[:strict] != false
+
+            payload[:text] = {
+              format: {
+                type: 'json_schema',
+                name: 'response',
+                schema: schema,
+                strict: strict
+              }
+            }
+          end
+
+          payload
+        end
+
+        def format_input(messages) # rubocop:disable Metrics/PerceivedComplexity
+          all_tool_calls = messages.flat_map do |m|
+            m.tool_calls&.values || []
+          end
+          messages.flat_map do |msg|
+            if msg.tool_call?
+              msg.tool_calls.map do |_, tc|
+                {
+                  type: 'function_call',
+                  call_id: tc.id,
+                  name: tc.name,
+                  arguments: JSON.generate(tc.arguments),
+                  status: 'completed'
+                }
+              end
+            elsif msg.role == :tool
+              {
+                type: 'function_call_output',
+                call_id: all_tool_calls.detect { |tc| tc.id == msg.tool_call_id }&.id,
+                output: msg.content,
+                status: 'completed'
+              }
+            else
+              {
+                type: 'message',
+                role: format_role(msg.role),
+                content: ResponseMedia.format_content(msg.content),
+                status: 'completed'
+              }.compact
+            end
+          end
+        end
+
+        def format_role(role)
+          case role
+          when :system
+            'developer'
+          else
+            role.to_s
+          end
+        end
+
+        def parse_respond_response(response)
+          data = response.body
+          return if data.empty?
+
+          raise Error.new(response, data.dig('error', 'message')) if data.dig('error', 'message')
+
+          outputs = data['output']
+          return unless outputs.any?
+
+          Message.new(
+            role: :assistant,
+            content: all_output_text(outputs),
+            tool_calls: parse_response_tool_calls(outputs),
+            input_tokens: data['usage']['input_tokens'],
+            output_tokens: data['usage']['output_tokens'],
+            model_id: data['model'],
+            raw: response
+          )
+        end
+
+        def all_output_text(outputs)
+          outputs.select { |o| o['type'] == 'message' }.flat_map do |o|
+            o['content'].filter_map do |c|
+              c['type'] == 'output_text' && c['text']
+            end
+          end.join("\n")
+        end
+      end
+    end
+  end
+end
diff --git a/lib/ruby_llm/providers/openai/response_media.rb b/lib/ruby_llm/providers/openai/response_media.rb
@@ -0,0 +1,77 @@
+# frozen_string_literal: true
+
+module RubyLLM
+  module Providers
+    module OpenAI
+      # Handles formatting of media content (images, audio) for OpenAI APIs
+      module ResponseMedia
+        module_function
+
+        def format_content(content)
+          # Convert Hash/Array back to JSON string for API
+          return content.to_json if content.is_a?(Hash) || content.is_a?(Array)
+          return content unless content.is_a?(Content)
+
+          parts = []
+          parts << format_text(content.text) if content.text
+
+          content.attachments.each do |attachment|
+            case attachment.type
+            when :image
+              parts << format_image(attachment)
+            when :pdf
+              parts << format_pdf(attachment)
+            when :audio
+              parts << format_audio(attachment)
+            when :text
+              parts << format_text_file(attachment)
+            else
+              raise UnsupportedAttachmentError, attachment.type
+            end
+          end
+
+          parts
+        end
+
+        def format_image(image)
+          {
+            type: 'input_image',
+            image_url: image.url? ? image.source : "data:#{image.mime_type};base64,#{image.encoded}"
+          }
+        end
+
+        def format_pdf(pdf)
+          {
+            type: 'input_file',
+            filename: pdf.filename,
+            file_data: "data:#{pdf.mime_type};base64,#{pdf.encoded}"
+          }
+        end
+
+        def format_text_file(text_file)
+          {
+            type: 'input_text',
+            text: Utils.format_text_file_for_llm(text_file)
+          }
+        end
+
+        def format_audio(audio)
+          {
+            type: 'input_audio',
+            input_audio: {
+              data: audio.encoded,
+              format: audio.mime_type.split('/').last
+            }
+          }
+        end
+
+        def format_text(text)
+          {
+            type: 'input_text',
+            text: text
+          }
+        end
+      end
+    end
+  end
+end