crmne · kieranklaassen · Feb 19, 2026 · Feb 19, 2026 · Feb 19, 2026 · Feb 19, 2026
diff --git a/README.md b/README.md
@@ -59,6 +59,13 @@ chat.ask "Tell me a story about Ruby" do |chunk|
 end
 ```
 
+```ruby
+# Automatic failover when a model is overloaded
+chat = RubyLLM.chat(model: "gemini-2.5-flash-lite")
+  .with_fallback("gemini-2.5-flash")
+  .ask("Classify this email")
+```
+
 ```ruby
 # Generate images
 RubyLLM.paint "a sunset over mountains in watercolor style"
@@ -131,6 +138,7 @@ response = chat.with_schema(ProductSchema).ask "Analyze this product", with: "pr
 * **Tools:** Let AI call your Ruby methods
 * **Agents:** Reusable assistants with `RubyLLM::Agent`
 * **Structured output:** JSON schemas that just work
+* **Fallback:** Automatic model failover with `with_fallback`
 * **Streaming:** Real-time responses with blocks
 * **Rails:** ActiveRecord integration with `acts_as_chat`
 * **Async:** Fiber-based concurrency

diff --git a/docs/_advanced/error-handling.md b/docs/_advanced/error-handling.md
@@ -233,9 +233,38 @@ This will cause RubyLLM to log detailed information about API requests and respo
 *   **Be Specific:** Rescue specific error classes whenever possible for tailored recovery logic.
 *   **Log Errors:** Always log errors, including relevant context (model used, input data if safe) for debugging. Consider using the `response` attribute on `RubyLLM::Error` for more details.
 *   **User Feedback:** Provide clear, user-friendly feedback when an AI operation fails. Avoid exposing raw API error messages directly.
-*   **Fallbacks:** Consider fallback mechanisms (e.g., trying a different model, using cached data, providing a default response) if the AI service is critical to your application's function.
+*   **Fallbacks:** Use `with_fallback` to automatically try an alternative model when the primary is unavailable (see below).
 *   **Monitor:** Track the frequency of different error types in production to identify recurring issues with providers or your implementation.
 
+## Model Fallback
+
+When a model is overloaded or unavailable, `with_fallback` automatically switches to an alternative model after retries are exhausted.
+
+```ruby
+chat = RubyLLM.chat(model: "gemini-2.5-flash-lite")
+  .with_fallback("gemini-2.5-flash")
+  .ask("Classify this email")
+```
+
+Fallback triggers on transient errors only: `RateLimitError` (429), `ServerError` (500), `ServiceUnavailableError` (502-503), and `OverloadedError` (529). Auth and input errors like `BadRequestError` or `UnauthorizedError` are raised immediately.
+
+```ruby
+# Cross-provider fallback
+chat = RubyLLM.chat(model: "gemini-2.5-flash-lite")
+  .with_fallback("claude-haiku-4-5-20251001")
+
+# Works with streaming
+chat.ask("Summarize this") { |chunk| print chunk.content }
+```
+
+If the fallback model also fails, the original error is re-raised and the chat is restored to its original model. Message history is preserved across fallback attempts.
+
+When fallback triggers, RubyLLM logs a warning:
+
+```
+RubyLLM: RubyLLM::ServiceUnavailableError on gemini-2.5-flash-lite, falling back to gemini-2.5-flash
+```
+
 ## Next Steps
 
 *   [Using Tools]({% link _core_features/tools.md %})

diff --git a/docs/_core_features/chat.md b/docs/_core_features/chat.md
@@ -126,6 +126,16 @@ chat.with_model('{{ site.models.anthropic_latest }}')
 response2 = chat.ask "Follow-up question..."
 ```
 
+You can also set a fallback model that kicks in automatically when the primary model is unavailable:
+
+```ruby
+chat = RubyLLM.chat(model: "gemini-2.5-flash-lite")
+  .with_fallback("gemini-2.5-flash")
+  .ask("Classify this email")
+```
+
+See [Error Handling]({% link _advanced/error-handling.md %}#model-fallback) for details on which errors trigger fallback.
+
 For detailed information about model selection, capabilities, aliases, and working with custom models, see the [Working with Models Guide]({% link _advanced/models.md %}).
 
 ## Multi-modal Conversations

diff --git a/lib/ruby_llm/active_record/acts_as_legacy.rb b/lib/ruby_llm/active_record/acts_as_legacy.rb
@@ -119,6 +119,11 @@ def with_tools(...)
         self
       end
 
+      def with_fallback(...)
+        to_llm.with_fallback(...)
+        self
+      end
+
       def with_model(...)
         update(model_id: to_llm.with_model(...).model.id)
         self

diff --git a/lib/ruby_llm/active_record/chat_methods.rb b/lib/ruby_llm/active_record/chat_methods.rb
@@ -119,6 +119,11 @@ def with_model(model_name, provider: nil, assume_exists: false)
         self
       end
 
+      def with_fallback(...)
+        to_llm.with_fallback(...)
+        self
+      end
+
       def with_temperature(...)
         to_llm.with_temperature(...)
         self
@@ -211,7 +216,7 @@ def ask(message = nil, with: nil, &)
 
       def complete(...)
         to_llm.complete(...)
-      rescue RubyLLM::Error => e
+      rescue *RubyLLM::Fallback::ERRORS => e
         cleanup_failed_messages if @message&.persisted? && @message.content.blank?
         cleanup_orphaned_tool_results
         raise e
@@ -289,6 +294,11 @@ def order_messages_for_llm(messages)
       end
 
       def persist_new_message
+        if @message&.persisted? && @message.content.blank? &&
+           !@message.tool_calls_association.exists? &&
+           (!@message.respond_to?(:content_raw) || @message.content_raw.blank?)
+          @message.destroy
+        end
         @message = messages_association.create!(role: :assistant, content: '')
       end
 

diff --git a/lib/ruby_llm/agent.rb b/lib/ruby_llm/agent.rb
@@ -24,6 +24,7 @@ def inherited(subclass)
         subclass.instance_variable_set(:@context, @context)
         subclass.instance_variable_set(:@chat_model, @chat_model)
         subclass.instance_variable_set(:@input_names, (@input_names || []).dup)
+        subclass.instance_variable_set(:@fallback, @fallback&.dup)
       end
 
       def model(model_id = nil, **options)
@@ -76,6 +77,12 @@ def schema(value = nil, &block)
         @schema = block_given? ? block : value
       end
 
+      def fallback(model_id = nil, provider: nil)
+        return @fallback if model_id.nil?
+
+        @fallback = { model: model_id, provider: provider }
+      end
+
       def context(value = nil)
         return @context if value.nil?
 
@@ -167,6 +174,7 @@ def apply_configuration(chat_object, input_values:, persist_instructions:)
         apply_params(llm_chat, runtime)
         apply_headers(llm_chat, runtime)
         apply_schema(llm_chat, runtime)
+        apply_fallback(llm_chat)
       end
 
       def apply_context(llm_chat)
@@ -208,6 +216,10 @@ def apply_schema(llm_chat, runtime)
         llm_chat.with_schema(value) if value
       end
 
+      def apply_fallback(llm_chat)
+        llm_chat.with_fallback(fallback[:model], provider: fallback[:provider]) if fallback
+      end
+
       def llm_chat_for(chat_object)
         chat_object.respond_to?(:to_llm) ? chat_object.to_llm : chat_object
       end

diff --git a/lib/ruby_llm/chat.rb b/lib/ruby_llm/chat.rb
@@ -4,6 +4,7 @@ module RubyLLM
   # Represents a conversation with an AI model
   class Chat
     include Enumerable
+    include Fallback
 
     attr_reader :model, :messages, :tools, :params, :headers, :schema
 
@@ -23,6 +24,8 @@ def initialize(model: nil, provider: nil, assume_model_exists: false, context: n
       @headers = {}
       @schema = nil
       @thinking = nil
+      @fallback = nil
+      @in_fallback = false
       @on = {
         new_message: nil,
         end_message: nil,
@@ -135,35 +138,37 @@ def each(&)
     end
 
     def complete(&) # rubocop:disable Metrics/PerceivedComplexity
-      response = @provider.complete(
-        messages,
-        tools: @tools,
-        temperature: @temperature,
-        model: @model,
-        params: @params,
-        headers: @headers,
-        schema: @schema,
-        thinking: @thinking,
-        &wrap_streaming_block(&)
-      )
-
-      @on[:new_message]&.call unless block_given?
-
-      if @schema && response.content.is_a?(String)
-        begin
-          response.content = JSON.parse(response.content)
-        rescue JSON::ParserError
-          # If parsing fails, keep content as string
+      with_fallback_protection do
+        response = @provider.complete(
+          messages,
+          tools: @tools,
+          temperature: @temperature,
+          model: @model,
+          params: @params,
+          headers: @headers,
+          schema: @schema,
+          thinking: @thinking,
+          &wrap_streaming_block(&)
+        )
+
+        @on[:new_message]&.call unless block_given?
+
+        if @schema && response.content.is_a?(String)
+          begin
+            response.content = JSON.parse(response.content)
+          rescue JSON::ParserError
+            # If parsing fails, keep content as string
+          end
         end
-      end
 
-      add_message response
-      @on[:end_message]&.call(response)
+        add_message response
+        @on[:end_message]&.call(response)
 
-      if response.tool_call?
-        handle_tool_calls(response, &)
-      else
-        response
+        if response.tool_call?
+          handle_tool_calls(response, &)
+        else
+          response
+        end
       end
     end
 

diff --git a/lib/ruby_llm/fallback.rb b/lib/ruby_llm/fallback.rb
@@ -0,0 +1,67 @@
+# frozen_string_literal: true
+
+module RubyLLM
+  # Handles model-level failover for transient errors.
+  # Included by Chat to keep fallback logic out of the main conversation flow.
+  module Fallback
+    ERRORS = [
+      RateLimitError,
+      ServerError,
+      ServiceUnavailableError,
+      OverloadedError,
+      Faraday::TimeoutError,
+      Faraday::ConnectionFailed
+    ].freeze
+
+    def with_fallback(model_id, provider: nil)
+      @fallback = { model: model_id, provider: provider }
+      self
+    end
+
+    private
+
+    def with_fallback_protection(&)
+      yield
+    rescue *ERRORS => e
+      attempt_fallback(e, &)
+    end
+
+    def attempt_fallback(error, &)
+      raise error unless @fallback && !@in_fallback
+
+      log_fallback(error)
+
+      original_model = @model
+      original_provider = @provider
+      original_connection = @connection
+
+      begin
+        @in_fallback = true
+        with_model(@fallback[:model], provider: @fallback[:provider])
+        yield
+      rescue *ERRORS => fallback_error
+        log_fallback_failure(fallback_error)
+        raise error
+      ensure
+        @in_fallback = false
+        @model = original_model
+        @provider = original_provider
+        @connection = original_connection
+      end
+    end
+
+    def log_fallback(error)
+      RubyLLM.logger.warn "RubyLLM: #{error.class} on #{sanitize_for_log(@model.id)}, " \
+                           "falling back to #{sanitize_for_log(@fallback[:model])}"
+    end
+
+    def log_fallback_failure(error)
+      RubyLLM.logger.warn "RubyLLM: Fallback to #{sanitize_for_log(@fallback[:model])} also failed: " \
+                           "#{error.class} - #{sanitize_for_log(error.message)}"
+    end
+
+    def sanitize_for_log(value)
+      value.to_s.gsub(/[\x00-\x1f\x7f]/, '')
+    end
+  end
+end