crmne · jayelkaake · Apr 21, 2025 · mauri · May 16, 2025 · mauri
diff --git a/.gitignore b/.gitignore
@@ -47,8 +47,8 @@ build-iPhoneSimulator/
 # for a library or gem, you might want to ignore these files since the code is
 # intended to run in multiple environments; otherwise, check them in:
 Gemfile.lock
-# .ruby-version
-# .ruby-gemset
+.ruby-version
+.ruby-gemset
 
 # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
 .rvmrc
@@ -57,3 +57,4 @@ Gemfile.lock
 # .rubocop-https?--*
 
 repomix-output.*
+/.idea/
diff --git a/Gemfile b/Gemfile
@@ -18,6 +18,7 @@ group :development do
   gem 'nokogiri'
   gem 'overcommit', '>= 0.66'
   gem 'pry', '>= 0.14'
+  gem 'pry-byebug', '>= 3.11'
   gem 'rake', '>= 13.0'
   gem 'rdoc'
   gem 'reline'

diff --git a/README.md b/README.md
@@ -60,6 +60,9 @@ chat.ask "Tell me a story about a Ruby programmer" do |chunk|
   print chunk.content
 end
 
+# Get structured responses easily (OpenAI only for now)
+chat.with_response_format(:integer).ask("What is 2 + 2?").to_i # => 4
+
 # Generate images
 RubyLLM.paint "a sunset over mountains in watercolor style"
 

diff --git a/docs/guides/chat.md b/docs/guides/chat.md
@@ -261,6 +261,54 @@ end
 chat.ask "What is metaprogramming in Ruby?"
 ```
 
+## Receiving Structured Responses
+You can ensure the responses follow a schema you define like this:
+```ruby
+chat = RubyLLM.chat
+
+chat.with_response_format(:integer).ask("What is 2 + 2?").to_i
+# => 4
+
+chat.with_response_format(:string).ask("Say 'Hello World' and nothing else.").content
+# => "Hello World"
+
+chat.with_response_format(:array, items: { type: :string })
+chat.ask('What are the 2 largest countries? Only respond with country names.').content
+# => ["Russia", "Canada"]
+
+chat.with_response_format(:object, properties: { age: { type: :integer } })
+chat.ask('Provide sample customer age between 10 and 100.').content
+# => { "age" => 42 }
+
+chat.with_response_format(
+  :object,
+  properties: { hobbies: { type: :array, items: { type: :string, enum: %w[Soccer Golf Hockey] } } }
+)
+chat.ask('Provide at least 1 hobby.').content
+# => { "hobbies" => ["Soccer"] }
+```
+
+You can also provide the JSON schema you want directly to the method like this:
+```ruby
+chat.with_response_format(type: :object, properties: { age: { type: :integer } })
+# => { "age" => 31 }
+```
+
+In this example the code is automatically switching to OpenAI's json_mode since no object properties are requested:
+```ruby
+chat.with_response_format(:json) # Don't care about structure, just give me JSON
+
+chat.ask('Provide a sample customer data object with name and email keys.').content
+# => { "name" => "Tobias", "email" => "[email protected]" }
+
+chat.ask('Provide a sample customer data object with name and email keys.').content
+# => { "first_name" => "Michael", "email_address" => "[email protected]" }
+```
+
+{: .note }
+**Only OpenAI supported for now:** Only OpenAI models support this feature for now. We will add support for other models shortly.
+
+
 ## Next Steps
 
 This guide covered the core `Chat` interface. Now you might want to explore:
@@ -269,4 +317,4 @@ This guide covered the core `Chat` interface. Now you might want to explore:
 *   [Using Tools]({% link guides/tools.md %}): Enable the AI to call your Ruby code.
 *   [Streaming Responses]({% link guides/streaming.md %}): Get real-time feedback from the AI.
 *   [Rails Integration]({% link guides/rails.md %}): Persist your chat conversations easily.
-*   [Error Handling]({% link guides/error-handling.md %}): Build robust applications that handle API issues.
+*   [Error Handling]({% link guides/error-handling.md %}): Build robust applications that handle API issues.
diff --git a/lib/ruby_llm/active_record/acts_as.rb b/lib/ruby_llm/active_record/acts_as.rb
@@ -93,6 +93,12 @@ def with_instructions(instructions, replace: false)
         self
       end
 
+      # @see LlmChat#with_response_format
+      def with_response_format(...)
+        to_llm.with_response_format(...)
+        self
+      end
+
       def with_tool(...)
         to_llm.with_tool(...)
         self
@@ -158,14 +164,19 @@ def persist_message_completion(message) # rubocop:disable Metrics/AbcSize,Metric
         end
 
         transaction do
-          @message.update!(
-            role: message.role,
-            content: message.content,
-            model_id: message.model_id,
-            tool_call_id: tool_call_id,
-            input_tokens: message.input_tokens,
-            output_tokens: message.output_tokens
-          )
+          # These are required fields:
+          @message.role = message.role
+          @message.content = message.content
+
+          # These are optional fields:
+          @message.try('model_id=', message.model_id)
+          @message.try('tool_call_id=', tool_call_id)
+          @message.try('input_tokens=', message.input_tokens)
+          @message.try('output_tokens=', message.output_tokens)
+          @message.try('content_schema=', message.content_schema)
+
+          @message.save!
+
           persist_tool_calls(message.tool_calls) if message.tool_calls.present?
         end
       end

diff --git a/lib/ruby_llm/chat.rb b/lib/ruby_llm/chat.rb
@@ -31,6 +31,56 @@ def initialize(model: nil, provider: nil, assume_model_exists: false, context: n
       }
     end
 
+    ##
+    # This method lets you ensure the responses follow a schema you define like this:
+    #
+    #   chat.with_response_format(:integer).ask("What is 2 + 2?").to_i
+    #   # => 4
+    #   chat.with_response_format(:string).ask("Say 'Hello World' and nothing else.").content
+    #   # => "Hello World"
+    #   chat.with_response_format(:array, items: { type: :string })
+    #   chat.ask('What are the 2 largest countries? Only respond with country names.').content
+    #   # => ["Russia", "Canada"]
+    #   chat.with_response_format(:object, properties: { age: { type: :integer } })
+    #   chat.ask('Provide sample customer age between 10 and 100.').content
+    #   # => { "age" => 42 }
+    #   chat.with_response_format(
+    #     :object,
+    #     properties: { hobbies: { type: :array, items: { type: :string, enum: %w[Soccer Golf Hockey] } } }
+    #   )
+    #   chat.ask('Provide at least 1 hobby.').content
+    #   # => { "hobbies" => ["Soccer"] }
+    #
+    # You can also provide the JSON schema you want directly to the method like this:
+    #   chat.with_response_format(type: :object, properties: { age: { type: :integer } })
+    #   # => { "age" => 31 }
+    #
+    # In this example the code is automatically switching to OpenAI's json_mode since no object
+    # properties are requested:
+    #   chat.with_response_format(:json) # Don't care about structure, just give me JSON
+    #   chat.ask('Provide a sample customer data object with name and email keys.').content
+    #   # => { "name" => "Tobias", "email" => "[email protected]" }
+    #   chat.ask('Provide a sample customer data object with name and email keys.').content
+    #   # => { "first_name" => "Michael", "email_address" => "[email protected]" }
+    #
+    # @param type [Symbol] (optional) This can be anything supported by the API JSON schema types (integer, object, etc)
+    # @param schema [Hash] The schema for the response format. It can be a JSON schema or a simple hash.
+    # @return [Chat] (self)
+    def with_response_format(type = nil, **schema)
+      schema_hash = if type.is_a?(Symbol) || type.is_a?(String)
+                      { type: type == :json ? :object : type }
+                    elsif type.is_a?(Hash)
+                      type
+                    else
+                      {}
+                    end.merge(schema)
+
+      @response_schema = Schema.new(schema_hash)
+
+      self
+    end
+    alias with_structured_response with_response_format
+
     def ask(message = nil, with: {}, &)
       add_message role: :user, content: Content.new(message, with)
       complete(&)
@@ -86,17 +136,23 @@ def each(&)
 
     def complete(&) # rubocop:disable Metrics/MethodLength
       @on[:new_message]&.call
-      response = @provider.complete(
-        messages,
-        tools: @tools,
-        temperature: @temperature,
-        model: @model.id,
-        connection: @connection,
-        &
-      )
+      response = @provider.with_response_schema(@response_schema) do
+        @provider.complete(
+          messages,
+          tools: @tools,
+          temperature: @temperature,
+          model: @model.id,
+          connection: @connection,
+          &
+        )
+      end
+
       @on[:end_message]&.call(response)
 
       add_message response
+
+      @response_schema = nil # Reset the response schema after completion of this chat thread
+
       if response.tool_call?
         handle_tool_calls(response, &)
       else

diff --git a/lib/ruby_llm/message.rb b/lib/ruby_llm/message.rb
@@ -7,7 +7,9 @@ module RubyLLM
   class Message
     ROLES = %i[system user assistant tool].freeze
 
-    attr_reader :role, :content, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id
+    attr_reader :role, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id, :content_schema
+
+    delegate :to_i, :to_a, :to_s, to: :content
 
     def initialize(options = {})
       @role = options[:role].to_sym
@@ -17,10 +19,22 @@ def initialize(options = {})
       @output_tokens = options[:output_tokens]
       @model_id = options[:model_id]
       @tool_call_id = options[:tool_call_id]
+      @content_schema = options[:content_schema]
 
       ensure_valid_role
     end
 
+    def content
+      return @content unless @content_schema.present?
+      return @content if @content.nil?
+
+      if @content_schema[:type].to_s == :object.to_s && @content_schema[:properties].to_h.keys.none?
+        json_response
+      else
+        structured_content
+      end
+    end
+
     def tool_call?
       !tool_calls.nil? && !tool_calls.empty?
     end
@@ -47,6 +61,18 @@ def to_h
 
     private
 
+    def json_response
+      return nil if @content.nil?
+
+      JSON.parse(@content)
+    end
+
+    def structured_content
+      return nil if @content.nil?
+
+      json_response['result']
+    end
+
     def normalize_content(content)
       case content
       when Content then content.format

diff --git a/lib/ruby_llm/provider.rb b/lib/ruby_llm/provider.rb
@@ -31,6 +31,29 @@ def list_models(connection:)
         parse_list_models_response response, slug, capabilities
       end
 
+      ##
+      # @return [::RubyLLM::Schema, NilClass]
+      def response_schema
+        Thread.current['RubyLLM::Provider::Methods.response_schema']
+      end
+
+      ##
+      # @param response_schema [::RubyLLM::Schema]
+      def with_response_schema(response_schema)
+        prev_response_schema = Thread.current['RubyLLM::Provider::Methods.response_schema']
+
+        result = nil
+        begin
+          Thread.current['RubyLLM::Provider::Methods.response_schema'] = response_schema
+
+          result = yield
+        ensure
+          Thread.current['RubyLLM::Provider::Methods.response_schema'] = prev_response_schema
+        end
+
+        result
+      end
+
       def embed(text, model:, connection:, dimensions:)
         payload = render_embedding_payload(text, model:, dimensions:)
         response = connection.post(embedding_url(model:), payload)

diff --git a/lib/ruby_llm/providers/openai/chat.rb b/lib/ruby_llm/providers/openai/chat.rb
@@ -22,11 +22,14 @@ def render_payload(messages, tools:, temperature:, model:, stream: false) # rubo
               payload[:tools] = tools.map { |_, tool| tool_for(tool) }
               payload[:tool_choice] = 'auto'
             end
+
+            add_response_schema_to_payload(payload) if response_schema.present?
+
             payload[:stream_options] = { include_usage: true } if stream
           end
         end
 
-        def parse_completion_response(response) # rubocop:disable Metrics/MethodLength
+        def parse_completion_response(response) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize -- ABC is high because of the JSON parsing which is better in 1 method
           data = response.body
           return if data.empty?
 
@@ -37,6 +40,7 @@ def parse_completion_response(response) # rubocop:disable Metrics/MethodLength
 
           Message.new(
             role: :assistant,
+            content_schema: response_schema,
             content: message_data['content'],
             tool_calls: parse_tool_calls(message_data['tool_calls']),
             input_tokens: data['usage']['prompt_tokens'],
@@ -64,6 +68,54 @@ def format_role(role)
             role.to_s
           end
         end
+
+        private
+
+        ##
+        # @param [Hash] payload
+        def add_response_schema_to_payload(payload)
+          payload[:response_format] = gen_response_format_request
+
+          return unless payload[:response_format][:type] == :json_object
+
+          # NOTE: this is required by the Open AI API when requesting arbitrary JSON.
+          payload[:messages].unshift({ role: :developer, content: <<~GUIDANCE
+            You must format your output as a valid JSON object.
+            Format your entire response as valid JSON.
+            Do not include explanations, markdown formatting, or any text outside the JSON.
+          GUIDANCE
+          })
+        end
+
+        ##
+        # @return [Hash]
+        def gen_response_format_request
+          if response_schema[:type].to_s == :object.to_s && response_schema[:properties].to_h.keys.none?
+            { type: :json_object } # Assume we just want json_mode
+          else
+            gen_json_schema_format_request
+          end
+        end
+
+        def gen_json_schema_format_request # rubocop:disable Metrics/MethodLength -- because it's mostly the standard hash
+          result_schema = response_schema.dup # so we don't modify the original in the thread
+          result_schema.add_to_each_object_type!(:additionalProperties, false)
+          result_schema.add_to_each_object_type!(:required, ->(schema) { schema[:properties].to_h.keys })
+
+          {
+            type: :json_schema,
+            json_schema: {
+              name: :response,
+              schema: {
+                type: :object,
+                properties: { result: result_schema.to_h },
+                additionalProperties: false,
+                required: [:result]
+              },
+              strict: true
+            }
+          }
+        end
       end
     end
   end