diff --git a/README.md b/README.md
index 52aeab436..ebf96d41e 100644
--- a/README.md
+++ b/README.md
@@ -59,6 +59,13 @@ chat.ask "Tell me a story about Ruby" do |chunk|
 end
 ```
 
+```ruby
+# Automatic failover when a model is overloaded
+chat = RubyLLM.chat(model: "gemini-2.5-flash-lite")
+  .with_fallback("gemini-2.5-flash")
+  .ask("Classify this email")
+```
+
 ```ruby
 # Generate images
 RubyLLM.paint "a sunset over mountains in watercolor style"
@@ -131,6 +138,7 @@ response = chat.with_schema(ProductSchema).ask "Analyze this product", with: "pr
 * **Tools:** Let AI call your Ruby methods
 * **Agents:** Reusable assistants with `RubyLLM::Agent`
 * **Structured output:** JSON schemas that just work
+* **Fallback:** Automatic model failover with `with_fallback`
 * **Streaming:** Real-time responses with blocks
 * **Rails:** ActiveRecord integration with `acts_as_chat`
 * **Async:** Fiber-based concurrency
diff --git a/docs/_advanced/error-handling.md b/docs/_advanced/error-handling.md
index 457c00e6a..22402b36d 100644
--- a/docs/_advanced/error-handling.md
+++ b/docs/_advanced/error-handling.md
@@ -233,9 +233,38 @@ This will cause RubyLLM to log detailed information about API requests and respo
 *   **Be Specific:** Rescue specific error classes whenever possible for tailored recovery logic.
 *   **Log Errors:** Always log errors, including relevant context (model used, input data if safe) for debugging. Consider using the `response` attribute on `RubyLLM::Error` for more details.
 *   **User Feedback:** Provide clear, user-friendly feedback when an AI operation fails. Avoid exposing raw API error messages directly.
-*   **Fallbacks:** Consider fallback mechanisms (e.g., trying a different model, using cached data, providing a default response) if the AI service is critical to your application's function.
+*   **Fallbacks:** Use `with_fallback` to automatically try an alternative model when the primary is unavailable (see below).
 *   **Monitor:** Track the frequency of different error types in production to identify recurring issues with providers or your implementation.
 
+## Model Fallback
+
+When a model is overloaded or unavailable, `with_fallback` automatically switches to an alternative model after retries are exhausted.
+
+```ruby
+chat = RubyLLM.chat(model: "gemini-2.5-flash-lite")
+  .with_fallback("gemini-2.5-flash")
+  .ask("Classify this email")
+```
+
+Fallback triggers on transient errors only: `RateLimitError` (429), `ServerError` (500), `ServiceUnavailableError` (502-503), and `OverloadedError` (529). Auth and input errors like `BadRequestError` or `UnauthorizedError` are raised immediately.
+
+```ruby
+# Cross-provider fallback
+chat = RubyLLM.chat(model: "gemini-2.5-flash-lite")
+  .with_fallback("claude-haiku-4-5-20251001")
+
+# Works with streaming
+chat.ask("Summarize this") { |chunk| print chunk.content }
+```
+
+If the fallback model also fails, the original error is re-raised and the chat is restored to its original model. Message history is preserved across fallback attempts.
+
+When fallback triggers, RubyLLM logs a warning:
+
+```
+RubyLLM: RubyLLM::ServiceUnavailableError on gemini-2.5-flash-lite, falling back to gemini-2.5-flash
+```
+
 ## Next Steps
 
 *   [Using Tools]({% link _core_features/tools.md %})
diff --git a/docs/_core_features/chat.md b/docs/_core_features/chat.md
index fd2fa1412..856ac9e4c 100644
--- a/docs/_core_features/chat.md
+++ b/docs/_core_features/chat.md
@@ -126,6 +126,16 @@ chat.with_model('{{ site.models.anthropic_latest }}')
 response2 = chat.ask "Follow-up question..."
 ```
 
+You can also set a fallback model that kicks in automatically when the primary model is unavailable:
+
+```ruby
+chat = RubyLLM.chat(model: "gemini-2.5-flash-lite")
+  .with_fallback("gemini-2.5-flash")
+  .ask("Classify this email")
+```
+
+See [Error Handling]({% link _advanced/error-handling.md %}#model-fallback) for details on which errors trigger fallback.
+
 For detailed information about model selection, capabilities, aliases, and working with custom models, see the [Working with Models Guide]({% link _advanced/models.md %}).
 
 ## Multi-modal Conversations
diff --git a/lib/ruby_llm/active_record/acts_as_legacy.rb b/lib/ruby_llm/active_record/acts_as_legacy.rb
index 9215605ba..03dcf1cbd 100644
--- a/lib/ruby_llm/active_record/acts_as_legacy.rb
+++ b/lib/ruby_llm/active_record/acts_as_legacy.rb
@@ -119,6 +119,11 @@ def with_tools(...)
         self
       end
 
+      def with_fallback(...)
+        to_llm.with_fallback(...)
+        self
+      end
+
       def with_model(...)
         update(model_id: to_llm.with_model(...).model.id)
         self
diff --git a/lib/ruby_llm/active_record/chat_methods.rb b/lib/ruby_llm/active_record/chat_methods.rb
index 7782bfef5..569aaa1da 100644
--- a/lib/ruby_llm/active_record/chat_methods.rb
+++ b/lib/ruby_llm/active_record/chat_methods.rb
@@ -119,6 +119,11 @@ def with_model(model_name, provider: nil, assume_exists: false)
         self
       end
 
+      def with_fallback(...)
+        to_llm.with_fallback(...)
+        self
+      end
+
       def with_temperature(...)
         to_llm.with_temperature(...)
         self
@@ -211,7 +216,7 @@ def ask(message = nil, with: nil, &)
 
       def complete(...)
         to_llm.complete(...)
-      rescue RubyLLM::Error => e
+      rescue *RubyLLM::Fallback::ERRORS => e
         cleanup_failed_messages if @message&.persisted? && @message.content.blank?
         cleanup_orphaned_tool_results
         raise e
@@ -289,6 +294,11 @@ def order_messages_for_llm(messages)
       end
 
       def persist_new_message
+        if @message&.persisted? && @message.content.blank? &&
+           !@message.tool_calls_association.exists? &&
+           (!@message.respond_to?(:content_raw) || @message.content_raw.blank?)
+          @message.destroy
+        end
         @message = messages_association.create!(role: :assistant, content: '')
       end
 
diff --git a/lib/ruby_llm/agent.rb b/lib/ruby_llm/agent.rb
index 2568e638b..621e0a96e 100644
--- a/lib/ruby_llm/agent.rb
+++ b/lib/ruby_llm/agent.rb
@@ -24,6 +24,7 @@ def inherited(subclass)
         subclass.instance_variable_set(:@context, @context)
         subclass.instance_variable_set(:@chat_model, @chat_model)
         subclass.instance_variable_set(:@input_names, (@input_names || []).dup)
+        subclass.instance_variable_set(:@fallback, @fallback&.dup)
       end
 
       def model(model_id = nil, **options)
@@ -76,6 +77,12 @@ def schema(value = nil, &block)
         @schema = block_given? ? block : value
       end
 
+      def fallback(model_id = nil, provider: nil)
+        return @fallback if model_id.nil?
+
+        @fallback = { model: model_id, provider: provider }
+      end
+
       def context(value = nil)
         return @context if value.nil?
 
@@ -167,6 +174,7 @@ def apply_configuration(chat_object, input_values:, persist_instructions:)
         apply_params(llm_chat, runtime)
         apply_headers(llm_chat, runtime)
         apply_schema(llm_chat, runtime)
+        apply_fallback(llm_chat)
       end
 
       def apply_context(llm_chat)
@@ -208,6 +216,10 @@ def apply_schema(llm_chat, runtime)
         llm_chat.with_schema(value) if value
       end
 
+      def apply_fallback(llm_chat)
+        llm_chat.with_fallback(fallback[:model], provider: fallback[:provider]) if fallback
+      end
+
       def llm_chat_for(chat_object)
         chat_object.respond_to?(:to_llm) ? chat_object.to_llm : chat_object
       end
diff --git a/lib/ruby_llm/chat.rb b/lib/ruby_llm/chat.rb
index 79eedd931..4a4a68544 100644
--- a/lib/ruby_llm/chat.rb
+++ b/lib/ruby_llm/chat.rb
@@ -4,6 +4,7 @@ module RubyLLM
   # Represents a conversation with an AI model
   class Chat
     include Enumerable
+    include Fallback
 
     attr_reader :model, :messages, :tools, :params, :headers, :schema
 
@@ -23,6 +24,8 @@ def initialize(model: nil, provider: nil, assume_model_exists: false, context: n
       @headers = {}
       @schema = nil
       @thinking = nil
+      @fallback = nil
+      @in_fallback = false
       @on = {
         new_message: nil,
         end_message: nil,
@@ -135,35 +138,37 @@ def each(&)
     end
 
     def complete(&) # rubocop:disable Metrics/PerceivedComplexity
-      response = @provider.complete(
-        messages,
-        tools: @tools,
-        temperature: @temperature,
-        model: @model,
-        params: @params,
-        headers: @headers,
-        schema: @schema,
-        thinking: @thinking,
-        &wrap_streaming_block(&)
-      )
-
-      @on[:new_message]&.call unless block_given?
-
-      if @schema && response.content.is_a?(String)
-        begin
-          response.content = JSON.parse(response.content)
-        rescue JSON::ParserError
-          # If parsing fails, keep content as string
+      with_fallback_protection do
+        response = @provider.complete(
+          messages,
+          tools: @tools,
+          temperature: @temperature,
+          model: @model,
+          params: @params,
+          headers: @headers,
+          schema: @schema,
+          thinking: @thinking,
+          &wrap_streaming_block(&)
+        )
+
+        @on[:new_message]&.call unless block_given?
+
+        if @schema && response.content.is_a?(String)
+          begin
+            response.content = JSON.parse(response.content)
+          rescue JSON::ParserError
+            # If parsing fails, keep content as string
+          end
         end
-      end
 
-      add_message response
-      @on[:end_message]&.call(response)
+        add_message response
+        @on[:end_message]&.call(response)
 
-      if response.tool_call?
-        handle_tool_calls(response, &)
-      else
-        response
+        if response.tool_call?
+          handle_tool_calls(response, &)
+        else
+          response
+        end
       end
     end
 
diff --git a/lib/ruby_llm/fallback.rb b/lib/ruby_llm/fallback.rb
new file mode 100644
index 000000000..30e2c2c7a
--- /dev/null
+++ b/lib/ruby_llm/fallback.rb
@@ -0,0 +1,67 @@
+# frozen_string_literal: true
+
+module RubyLLM
+  # Handles model-level failover for transient errors.
+  # Included by Chat to keep fallback logic out of the main conversation flow.
+  module Fallback
+    ERRORS = [
+      RateLimitError,
+      ServerError,
+      ServiceUnavailableError,
+      OverloadedError,
+      Faraday::TimeoutError,
+      Faraday::ConnectionFailed
+    ].freeze
+
+    def with_fallback(model_id, provider: nil)
+      @fallback = { model: model_id, provider: provider }
+      self
+    end
+
+    private
+
+    def with_fallback_protection(&)
+      yield
+    rescue *ERRORS => e
+      attempt_fallback(e, &)
+    end
+
+    def attempt_fallback(error, &)
+      raise error unless @fallback && !@in_fallback
+
+      log_fallback(error)
+
+      original_model = @model
+      original_provider = @provider
+      original_connection = @connection
+
+      begin
+        @in_fallback = true
+        with_model(@fallback[:model], provider: @fallback[:provider])
+        yield
+      rescue *ERRORS => fallback_error
+        log_fallback_failure(fallback_error)
+        raise error
+      ensure
+        @in_fallback = false
+        @model = original_model
+        @provider = original_provider
+        @connection = original_connection
+      end
+    end
+
+    def log_fallback(error)
+      RubyLLM.logger.warn "RubyLLM: #{error.class} on #{sanitize_for_log(@model.id)}, " \
+                           "falling back to #{sanitize_for_log(@fallback[:model])}"
+    end
+
+    def log_fallback_failure(error)
+      RubyLLM.logger.warn "RubyLLM: Fallback to #{sanitize_for_log(@fallback[:model])} also failed: " \
+                           "#{error.class} - #{sanitize_for_log(error.message)}"
+    end
+
+    def sanitize_for_log(value)
+      value.to_s.gsub(/[\x00-\x1f\x7f]/, '')
+    end
+  end
+end
diff --git a/spec/ruby_llm/active_record/acts_as_spec.rb b/spec/ruby_llm/active_record/acts_as_spec.rb
index c9856f362..ff873b969 100644
--- a/spec/ruby_llm/active_record/acts_as_spec.rb
+++ b/spec/ruby_llm/active_record/acts_as_spec.rb
@@ -326,6 +326,87 @@ def execute(input:)
     end
   end
 
+  describe 'streaming fallback phantom message cleanup' do
+    it 'destroys blank assistant message when persist_new_message is called again' do
+      chat = Chat.create!(model: model)
+      chat.to_llm # initialize @chat and persistence callbacks
+
+      # Simulate first streaming attempt: on_new_message creates a blank assistant row
+      chat.send(:persist_new_message)
+      orphaned_message = chat.instance_variable_get(:@message)
+      expect(orphaned_message).to be_persisted
+      expect(orphaned_message.content).to eq('')
+
+      orphaned_id = orphaned_message.id
+
+      # Simulate fallback streaming attempt: on_new_message fires again
+      chat.send(:persist_new_message)
+      new_message = chat.instance_variable_get(:@message)
+
+      # The orphaned blank message should be destroyed
+      expect(Message.exists?(orphaned_id)).to be false
+
+      # A new blank assistant message should exist for the fallback attempt
+      expect(new_message).to be_persisted
+      expect(new_message.content).to eq('')
+      expect(new_message.id).not_to eq(orphaned_id)
+    end
+
+    it 'does not destroy a blank assistant message that has tool calls' do
+      chat = Chat.create!(model: model)
+      chat.to_llm
+
+      chat.send(:persist_new_message)
+      tool_call_message = chat.instance_variable_get(:@message)
+      # Simulate a tool call response: blank content but has tool_call records
+      tool_call_message.tool_calls.create!(
+        tool_call_id: 'call_123',
+        name: 'test_tool',
+        arguments: { foo: 'bar' }
+      )
+      tool_call_id = tool_call_message.id
+
+      # Next on_new_message should NOT destroy this message
+      chat.send(:persist_new_message)
+
+      expect(Message.exists?(tool_call_id)).to be true
+    end
+
+    it 'does not destroy a blank assistant message that has content_raw' do
+      chat = Chat.create!(model: model)
+      chat.to_llm
+
+      chat.send(:persist_new_message)
+      structured_message = chat.instance_variable_get(:@message)
+      # Simulate structured output: blank content but content_raw is set
+      structured_message.update!(content: nil, content_raw: { 'name' => 'Alice', 'age' => 25 })
+      structured_id = structured_message.id
+
+      # Next on_new_message should NOT destroy this message
+      chat.send(:persist_new_message)
+
+      expect(Message.exists?(structured_id)).to be true
+    end
+
+    it 'does not destroy a populated assistant message when persist_new_message is called' do
+      chat = Chat.create!(model: model)
+      chat.to_llm
+
+      # Simulate normal flow: on_new_message creates row, on_end_message populates it
+      chat.send(:persist_new_message)
+      populated_message = chat.instance_variable_get(:@message)
+      populated_message.update!(content: 'Hello, I am the assistant response')
+
+      populated_id = populated_message.id
+
+      # Next on_new_message (e.g., for a tool call follow-up) should NOT destroy the populated message
+      chat.send(:persist_new_message)
+
+      expect(Message.exists?(populated_id)).to be true
+      expect(chat.instance_variable_get(:@message).id).not_to eq(populated_id)
+    end
+  end
+
   # Custom configuration tests with inline models
   describe 'custom configurations' do
     before(:all) do # rubocop:disable RSpec/BeforeAfterAll
diff --git a/spec/ruby_llm/agent_spec.rb b/spec/ruby_llm/agent_spec.rb
index b61a6eec3..1395e3c44 100644
--- a/spec/ruby_llm/agent_spec.rb
+++ b/spec/ruby_llm/agent_spec.rb
@@ -136,4 +136,83 @@ def each(&block)
     agent = Class.new(described_class).new(chat: fake_chat)
     expect(agent.map(&:upcase)).to eq(%w[FIRST SECOND])
   end
+
+  describe 'fallback' do
+    it 'stores and retrieves fallback config via class macro' do
+      agent_class = Class.new(RubyLLM::Agent) do
+        model 'gpt-4.1-nano'
+        fallback 'claude-haiku-4-5-20251001', provider: :anthropic
+      end
+
+      expect(agent_class.fallback).to eq({ model: 'claude-haiku-4-5-20251001', provider: :anthropic })
+    end
+
+    it 'returns nil when no fallback is configured' do
+      agent_class = Class.new(RubyLLM::Agent) do
+        model 'gpt-4.1-nano'
+      end
+
+      expect(agent_class.fallback).to be_nil
+    end
+
+    it 'inherits fallback config to subclasses' do
+      parent_class = Class.new(RubyLLM::Agent) do
+        model 'gpt-4.1-nano'
+        fallback 'claude-haiku-4-5-20251001', provider: :anthropic
+      end
+
+      child_class = Class.new(parent_class)
+
+      expect(child_class.fallback).to eq({ model: 'claude-haiku-4-5-20251001', provider: :anthropic })
+    end
+
+    it 'does not affect parent when child overrides fallback' do
+      parent_class = Class.new(RubyLLM::Agent) do
+        model 'gpt-4.1-nano'
+        fallback 'claude-haiku-4-5-20251001', provider: :anthropic
+      end
+
+      child_class = Class.new(parent_class) do
+        fallback 'gpt-4.1-mini'
+      end
+
+      expect(parent_class.fallback).to eq({ model: 'claude-haiku-4-5-20251001', provider: :anthropic })
+      expect(child_class.fallback).to eq({ model: 'gpt-4.1-mini', provider: nil })
+    end
+
+    it 'applies fallback to the underlying chat via .chat' do
+      agent_class = Class.new(RubyLLM::Agent) do
+        model 'gpt-4.1-nano'
+        fallback 'claude-haiku-4-5-20251001', provider: :anthropic
+      end
+
+      chat = agent_class.chat
+      fallback_config = chat.instance_variable_get(:@fallback)
+
+      expect(fallback_config).to eq({ model: 'claude-haiku-4-5-20251001', provider: :anthropic })
+    end
+
+    it 'applies fallback to the underlying chat via .new' do
+      agent_class = Class.new(RubyLLM::Agent) do
+        model 'gpt-4.1-nano'
+        fallback 'claude-haiku-4-5-20251001', provider: :anthropic
+      end
+
+      agent = agent_class.new
+      fallback_config = agent.chat.instance_variable_get(:@fallback)
+
+      expect(fallback_config).to eq({ model: 'claude-haiku-4-5-20251001', provider: :anthropic })
+    end
+
+    it 'does not apply fallback when none is configured' do
+      agent_class = Class.new(RubyLLM::Agent) do
+        model 'gpt-4.1-nano'
+      end
+
+      chat = agent_class.chat
+      fallback_config = chat.instance_variable_get(:@fallback)
+
+      expect(fallback_config).to be_nil
+    end
+  end
 end
diff --git a/spec/ruby_llm/chat_fallback_spec.rb b/spec/ruby_llm/chat_fallback_spec.rb
new file mode 100644
index 000000000..96d57d35a
--- /dev/null
+++ b/spec/ruby_llm/chat_fallback_spec.rb
@@ -0,0 +1,273 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe RubyLLM::Chat do
+  include_context 'with configured RubyLLM'
+
+  describe '#with_fallback' do
+    let(:primary_provider) { instance_double(RubyLLM::Provider) }
+    let(:fallback_provider) { instance_double(RubyLLM::Provider) }
+    let(:chat) { RubyLLM.chat(model: 'gpt-4.1-nano') }
+
+    before do
+      allow(RubyLLM::Models).to receive(:resolve).and_call_original
+      allow(RubyLLM::Models).to receive(:resolve)
+        .with('gpt-4.1-nano', provider: nil, assume_exists: false, config: anything)
+        .and_return([RubyLLM::Models.find('gpt-4.1-nano'), primary_provider])
+      allow(primary_provider).to receive(:connection).and_return(double)
+      allow(RubyLLM::Models).to receive(:resolve)
+        .with('claude-haiku-4-5-20251001', provider: nil, assume_exists: false, config: anything)
+        .and_return([RubyLLM::Models.find('claude-haiku-4-5-20251001'), fallback_provider])
+      allow(fallback_provider).to receive(:connection).and_return(double)
+    end
+
+    it 'returns self for chaining' do
+      expect(chat.with_fallback('claude-haiku-4-5-20251001')).to eq(chat)
+    end
+
+    it 'tries fallback model on transient errors' do
+      chat.with_fallback('claude-haiku-4-5-20251001')
+
+      call_count = 0
+      allow(primary_provider).to receive(:complete) do
+        call_count += 1
+        raise RubyLLM::ServiceUnavailableError.new(nil, 'model experiencing high demand')
+      end
+
+      allow(fallback_provider).to receive(:complete)
+        .and_return(RubyLLM::Message.new(role: :assistant, content: 'Hello from fallback!'))
+
+      chat.add_message(role: :user, content: 'Hello')
+      response = chat.complete
+
+      expect(response.content).to eq('Hello from fallback!')
+      expect(call_count).to eq(1)
+    end
+
+    it 'raises original error when fallback also fails' do
+      chat.with_fallback('claude-haiku-4-5-20251001')
+
+      original_error = RubyLLM::ServiceUnavailableError.new(nil, 'primary down')
+      allow(primary_provider).to receive(:complete).and_raise(original_error)
+      allow(fallback_provider).to receive(:complete)
+        .and_raise(RubyLLM::ServerError.new(nil, 'fallback also down'))
+
+      chat.add_message(role: :user, content: 'Hello')
+
+      expect { chat.complete }.to raise_error(RubyLLM::ServiceUnavailableError, 'primary down')
+    end
+
+    it 'restores original model after successful fallback' do
+      chat.with_fallback('claude-haiku-4-5-20251001')
+
+      original_model = chat.model
+
+      primary_calls = 0
+      allow(primary_provider).to receive(:complete) do
+        primary_calls += 1
+        if primary_calls == 1
+          raise RubyLLM::OverloadedError.new(nil, 'overloaded')
+        else
+          RubyLLM::Message.new(role: :assistant, content: 'primary restored')
+        end
+      end
+      allow(fallback_provider).to receive(:complete)
+        .and_return(RubyLLM::Message.new(role: :assistant, content: 'ok'))
+
+      chat.add_message(role: :user, content: 'Hello')
+      chat.complete
+
+      expect(chat.model).to eq(original_model)
+
+      # Verify provider restoration: next call routes to primary
+      chat.add_message(role: :user, content: 'Hello again')
+      response = chat.complete
+      expect(response.content).to eq('primary restored')
+    end
+
+    it 'restores original model when fallback fails' do
+      chat.with_fallback('claude-haiku-4-5-20251001')
+
+      original_model = chat.model
+
+      primary_calls = 0
+      allow(primary_provider).to receive(:complete) do
+        primary_calls += 1
+        if primary_calls == 1
+          raise RubyLLM::OverloadedError.new(nil, 'overloaded')
+        else
+          RubyLLM::Message.new(role: :assistant, content: 'primary restored')
+        end
+      end
+      allow(fallback_provider).to receive(:complete)
+        .and_raise(RubyLLM::ServerError.new(nil, 'fallback down'))
+
+      chat.add_message(role: :user, content: 'Hello')
+
+      expect { chat.complete }.to raise_error(RubyLLM::OverloadedError)
+      expect(chat.model).to eq(original_model)
+
+      # Verify provider restoration: next call routes to primary
+      chat.add_message(role: :user, content: 'Hello again')
+      response = chat.complete
+      expect(response.content).to eq('primary restored')
+    end
+
+    it 'does not trigger fallback on non-transient errors' do
+      chat.with_fallback('claude-haiku-4-5-20251001')
+
+      allow(primary_provider).to receive(:complete)
+        .and_raise(RubyLLM::BadRequestError.new(nil, 'invalid request'))
+
+      chat.add_message(role: :user, content: 'Hello')
+
+      expect { chat.complete }.to raise_error(RubyLLM::BadRequestError, 'invalid request')
+    end
+
+    it 'does not trigger fallback on auth errors' do
+      chat.with_fallback('claude-haiku-4-5-20251001')
+
+      allow(primary_provider).to receive(:complete)
+        .and_raise(RubyLLM::UnauthorizedError.new(nil, 'bad key'))
+
+      chat.add_message(role: :user, content: 'Hello')
+
+      expect { chat.complete }.to raise_error(RubyLLM::UnauthorizedError)
+    end
+
+    it 'preserves message history across fallback' do
+      chat.with_fallback('claude-haiku-4-5-20251001')
+      chat.add_message(role: :user, content: 'First message')
+      chat.add_message(role: :assistant, content: 'First reply')
+      chat.add_message(role: :user, content: 'Second message')
+
+      allow(primary_provider).to receive(:complete)
+        .and_raise(RubyLLM::RateLimitError.new(nil, 'rate limited'))
+
+      captured_messages = nil
+      allow(fallback_provider).to receive(:complete) do |messages, **_kwargs|
+        captured_messages = messages.dup
+        RubyLLM::Message.new(role: :assistant, content: 'Fallback reply')
+      end
+
+      chat.complete
+
+      expect(captured_messages.length).to eq(3)
+      expect(captured_messages[0].content).to eq('First message')
+      expect(captured_messages[1].content).to eq('First reply')
+      expect(captured_messages[2].content).to eq('Second message')
+    end
+
+    it 'works with streaming' do
+      chat.with_fallback('claude-haiku-4-5-20251001')
+
+      allow(primary_provider).to receive(:complete)
+        .and_raise(RubyLLM::ServiceUnavailableError.new(nil, 'unavailable'))
+
+      allow(fallback_provider).to receive(:complete) do |_messages, **_kwargs, &block|
+        block&.call(RubyLLM::Chunk.new(role: :assistant, content: 'chunk'))
+        RubyLLM::Message.new(role: :assistant, content: 'streamed reply')
+      end
+
+      chat.add_message(role: :user, content: 'Hello')
+
+      chunks = []
+      response = chat.complete { |chunk| chunks << chunk }
+
+      expect(response.content).to eq('streamed reply')
+      expect(chunks).not_to be_empty
+    end
+
+    it 'does not fallback when no fallback is configured' do
+      allow(primary_provider).to receive(:complete)
+        .and_raise(RubyLLM::ServiceUnavailableError.new(nil, 'unavailable'))
+
+      chat.add_message(role: :user, content: 'Hello')
+
+      expect { chat.complete }.to raise_error(RubyLLM::ServiceUnavailableError)
+    end
+
+    [
+      RubyLLM::RateLimitError,
+      RubyLLM::ServerError,
+      RubyLLM::ServiceUnavailableError,
+      RubyLLM::OverloadedError
+    ].each do |error_class|
+      it "triggers fallback on #{error_class.name.split('::').last}" do
+        chat.with_fallback('claude-haiku-4-5-20251001')
+
+        allow(primary_provider).to receive(:complete)
+          .and_raise(error_class.new(nil, 'error'))
+        allow(fallback_provider).to receive(:complete)
+          .and_return(RubyLLM::Message.new(role: :assistant, content: 'ok'))
+
+        chat.add_message(role: :user, content: 'Hello')
+        response = chat.complete
+
+        expect(response.content).to eq('ok')
+      end
+    end
+
+    it 'triggers fallback on Faraday::TimeoutError' do
+      chat.with_fallback('claude-haiku-4-5-20251001')
+
+      allow(primary_provider).to receive(:complete)
+        .and_raise(Faraday::TimeoutError.new('request timed out'))
+      allow(fallback_provider).to receive(:complete)
+        .and_return(RubyLLM::Message.new(role: :assistant, content: 'ok from fallback'))
+
+      chat.add_message(role: :user, content: 'Hello')
+      response = chat.complete
+
+      expect(response.content).to eq('ok from fallback')
+    end
+
+    it 'triggers fallback on Faraday::ConnectionFailed' do
+      chat.with_fallback('claude-haiku-4-5-20251001')
+
+      allow(primary_provider).to receive(:complete)
+        .and_raise(Faraday::ConnectionFailed.new('connection refused'))
+      allow(fallback_provider).to receive(:complete)
+        .and_return(RubyLLM::Message.new(role: :assistant, content: 'ok from fallback'))
+
+      chat.add_message(role: :user, content: 'Hello')
+      response = chat.complete
+
+      expect(response.content).to eq('ok from fallback')
+    end
+
+    it 'logs warning with fallback error details when both primary and fallback fail' do
+      chat.with_fallback('claude-haiku-4-5-20251001')
+
+      allow(primary_provider).to receive(:complete)
+        .and_raise(RubyLLM::ServiceUnavailableError.new(nil, 'primary down'))
+      allow(fallback_provider).to receive(:complete)
+        .and_raise(RubyLLM::ServerError.new(nil, 'fallback also down'))
+
+      chat.add_message(role: :user, content: 'Hello')
+
+      expect(RubyLLM.logger).to receive(:warn).with(/falling back to/)
+      expect(RubyLLM.logger).to receive(:warn).with(/Fallback to claude-haiku-4-5-20251001 also failed: RubyLLM::ServerError - fallback also down/)
+
+      expect { chat.complete }.to raise_error(RubyLLM::ServiceUnavailableError, 'primary down')
+    end
+
+    it 'sanitizes model IDs with control characters in log output' do
+      chat.with_fallback('claude-haiku-4-5-20251001')
+
+      allow(chat.model).to receive(:id).and_return("gpt-4\nnewline-injected")
+
+      allow(primary_provider).to receive(:complete)
+        .and_raise(RubyLLM::ServiceUnavailableError.new(nil, 'unavailable'))
+      allow(fallback_provider).to receive(:complete)
+        .and_return(RubyLLM::Message.new(role: :assistant, content: 'ok'))
+
+      chat.add_message(role: :user, content: 'Hello')
+
+      expect(RubyLLM.logger).to receive(:warn).with(/gpt-4newline-injected/).and_call_original
+
+      chat.complete
+    end
+  end
+end