Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 91 additions & 0 deletions config/initializers/encoding_sanitizer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# frozen_string_literal: true

require "delegate"

# Middleware to handle encoding compatibility issues in requests.
# Fixes Encoding::CompatibilityError when UTF-16LE encoded data is sent in
# query strings or POST bodies (e.g., from certain browsers, bots, or malformed requests).
class EncodingSanitizer
def initialize(app)
@app = app
end

def call(env)
begin
# Sanitize URL-related env vars
%w[QUERY_STRING REQUEST_URI PATH_INFO HTTP_REFERER].each do |key|
sanitize_encoding(env, key)
end

# Wrap rack.input to sanitize POST body
if env["rack.input"]
env["rack.input"] = SanitizedInput.new(env["rack.input"])
end
rescue => e
# Log error but don't crash the request
Rails.logger.error("EncodingSanitizer error: #{e.message}")
end

@app.call(env)
end

private

def sanitize_encoding(env, key)
return unless env[key]

value = env[key]
return if value.encoding == Encoding::UTF_8 && value.valid_encoding?

env[key] = force_utf8(value)
end

def force_utf8(value)
value
.encode(Encoding::UTF_8, invalid: :replace, undef: :replace, replace: "")
rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError
value.dup.force_encoding(Encoding::UTF_8).scrub("")
end

# Wrapper for rack.input that sanitizes encoding on read
class SanitizedInput < SimpleDelegator
def initialize(input)
super(input)
end

def read(*args)
data = __getobj__.read(*args)
return data unless data.is_a?(String)

sanitize(data)
end

def gets(*args)
data = __getobj__.gets(*args)
return data unless data.is_a?(String)

sanitize(data)
end

def each(&block)
__getobj__.each { |line| block.call(sanitize(line)) }
end

private

def sanitize(data)
return data if data.encoding == Encoding::UTF_8 && data.valid_encoding?

# Force to binary first, then encode to UTF-8
data.dup.force_encoding(Encoding::ASCII_8BIT)
.encode(Encoding::UTF_8, invalid: :replace, undef: :replace, replace: "")
rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError
data.dup.force_encoding(Encoding::UTF_8).scrub("")
end
end
end

# Insert before ActionDispatch::Static to ensure we sanitize encoding
# before any Rack middleware tries to parse the request.
# This must run before Rack::MethodOverride which parses POST bodies.
Rails.application.config.middleware.insert_before ActionDispatch::Static, EncodingSanitizer
176 changes: 176 additions & 0 deletions spec/middleware/encoding_sanitizer_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
# frozen_string_literal: true

require "rails_helper"

RSpec.describe EncodingSanitizer do
let(:app) { ->(env) { [200, env, "OK"] } }
let(:middleware) { described_class.new(app) }

describe "#call" do
context "with UTF-8 encoded query string" do
it "passes through unchanged" do
env = {
"QUERY_STRING" => "name=test&value=hello",
"REQUEST_URI" => "/test?name=test",
"PATH_INFO" => "/test"
}

status, response_env, _body = middleware.call(env)

expect(status).to eq(200)
expect(response_env["QUERY_STRING"]).to eq("name=test&value=hello")
end
end

context "with UTF-16LE encoded query string" do
it "converts to valid UTF-8" do
# Simulate UTF-16LE encoded string
utf16_string = "test=value".encode(Encoding::UTF_16LE)
env = {
"QUERY_STRING" => utf16_string,
"REQUEST_URI" => "/test",
"PATH_INFO" => "/test"
}

status, response_env, _body = middleware.call(env)

expect(status).to eq(200)
expect(response_env["QUERY_STRING"].encoding).to eq(Encoding::UTF_8)
expect(response_env["QUERY_STRING"]).to be_valid_encoding
expect(response_env["QUERY_STRING"]).to include("test")
expect(response_env["QUERY_STRING"]).to include("value")
end
end

context "with invalid byte sequences" do
it "sanitizes invalid bytes" do
# String with invalid UTF-8 byte sequence
invalid_string = (+"test=\xFF\xFEvalue").force_encoding(Encoding::UTF_8)
env = {
"QUERY_STRING" => invalid_string,
"REQUEST_URI" => "/test",
"PATH_INFO" => "/test"
}

status, response_env, _body = middleware.call(env)

expect(status).to eq(200)
expect(response_env["QUERY_STRING"]).to be_valid_encoding
end
end

context "with nil values" do
it "handles nil env values gracefully" do
env = {
"QUERY_STRING" => nil,
"REQUEST_URI" => nil,
"PATH_INFO" => "/test"
}

expect { middleware.call(env) }.not_to raise_error
end
end
end

describe EncodingSanitizer::SanitizedInput do
describe "#read" do
it "sanitizes UTF-16LE encoded POST body" do
utf16_body = "name=test&[email protected]".encode(Encoding::UTF_16LE)
input = StringIO.new(utf16_body)
sanitized = described_class.new(input)

result = sanitized.read

expect(result.encoding).to eq(Encoding::UTF_8)
expect(result).to be_valid_encoding
end

it "passes through valid UTF-8 unchanged" do
body = "name=test&[email protected]"
input = StringIO.new(body)
sanitized = described_class.new(input)

result = sanitized.read

expect(result).to eq(body)
expect(result.encoding).to eq(Encoding::UTF_8)
end

it "handles invalid byte sequences in POST body" do
invalid_body = (+"name=test\xFF\xFE&value=data").force_encoding(Encoding::UTF_8)
input = StringIO.new(invalid_body)
sanitized = described_class.new(input)

result = sanitized.read

expect(result).to be_valid_encoding
end
end

describe "#rewind" do
it "delegates to underlying input" do
input = StringIO.new("test data")
sanitized = described_class.new(input)

sanitized.read
sanitized.rewind

expect(sanitized.read).to include("test")
end
end

describe "#each" do
it "sanitizes each line" do
body = "line1\nline2\nline3"
input = StringIO.new(body)
sanitized = described_class.new(input)

lines = []
sanitized.each { |line| lines << line }

expect(lines.all?(&:valid_encoding?)).to be true
end
end

describe "#gets" do
it "sanitizes line-by-line reads" do
body = "line1\nline2"
input = StringIO.new(body)
sanitized = described_class.new(input)

first_line = sanitized.gets
expect(first_line).to be_valid_encoding
expect(first_line).to eq("line1\n")
end
end

describe "#close" do
it "delegates close to underlying input" do
input = StringIO.new("test")
sanitized = described_class.new(input)

allow(input).to receive(:close)
sanitized.close
expect(input).to have_received(:close)
end
end
end

describe "middleware stack position" do
it "is inserted before ActionDispatch::Static" do
middlewares = Rails.application.middleware.map(&:name)
sanitizer_index = middlewares.index("EncodingSanitizer")
static_index = middlewares.index("ActionDispatch::Static")

expect(sanitizer_index).to be < static_index
end

it "runs before Rack::MethodOverride" do
middlewares = Rails.application.middleware.map(&:name)
sanitizer_index = middlewares.index("EncodingSanitizer")
method_override_index = middlewares.index("Rack::MethodOverride")

expect(sanitizer_index).to be < method_override_index
end
end
end
Loading