From 48eb0224d89d3378974a4a0e35b3126a3b13356f Mon Sep 17 00:00:00 2001 From: metsw24-max Date: Mon, 11 May 2026 13:00:31 +0530 Subject: [PATCH 1/2] Fix ISO Latin-1 decoder sign extension --- src/main/cpp/charsetdecoder.cpp | 2 +- .../cpp/helpers/charsetdecodertestcase.cpp | 30 +++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/src/main/cpp/charsetdecoder.cpp b/src/main/cpp/charsetdecoder.cpp index b7f852a7f..569e7c5cf 100644 --- a/src/main/cpp/charsetdecoder.cpp +++ b/src/main/cpp/charsetdecoder.cpp @@ -355,7 +355,7 @@ class ISOLatinCharsetDecoder : public CharsetDecoder while (src < srcEnd) { - auto sv = static_cast(*src++); + auto sv = static_cast(static_cast(*src++)); Transcoder::encode(sv, out); } in.increment_position(availableByteCount); diff --git a/src/test/cpp/helpers/charsetdecodertestcase.cpp b/src/test/cpp/helpers/charsetdecodertestcase.cpp index ad3572327..9f2d6f94d 100644 --- a/src/test/cpp/helpers/charsetdecodertestcase.cpp +++ b/src/test/cpp/helpers/charsetdecodertestcase.cpp @@ -40,6 +40,7 @@ LOGUNIT_CLASS(CharsetDecoderTestCase) LOGUNIT_TEST(decode2); LOGUNIT_TEST(decode3); LOGUNIT_TEST(decode4); + LOGUNIT_TEST(testISOLatinHighBytes); #if LOG4CXX_LOGCHAR_IS_WCHAR && LOG4CXX_HAS_MBSRTOWCS LOGUNIT_TEST(testMbstowcsInfiniteLoop); #endif @@ -152,6 +153,35 @@ LOGUNIT_CLASS(CharsetDecoderTestCase) } } + /** + * Decoding ISO-8859-1 must map every byte 0x80..0xFF to the + * code point of the same numeric value. On platforms where plain + * char is signed (default on MSVC/GCC/Clang for x86/x64), a + * static_cast(*src) sign-extends bytes >= 0x80 into + * 0xFFFFFFxx, which Transcoder::encode then treats as out-of-range + * Unicode and replaces with U+FFFD (or appends garbage on wchar_t + * builds). The .properties configuration loader uses this decoder + * per the Java spec, so the bug silently corrupts any non-ASCII + * Latin-1 byte that appears in a log4cxx configuration file. + */ + void testISOLatinHighBytes() + { + char buf[2]; + auto dec = CharsetDecoder::getISOLatinDecoder(); + for (unsigned int b = 0x80; b <= 0xFF; ++b) + { + buf[0] = static_cast(b); + buf[1] = 0; + ByteBuffer in(buf, 1); + LogString out; + log4cxx_status_t stat = dec->decode(in, out); + LOGUNIT_ASSERT_EQUAL(APR_SUCCESS, stat); + LOGUNIT_ASSERT_EQUAL((size_t) 1, out.size()); + LOGUNIT_ASSERT_EQUAL(static_cast(b), + static_cast(out[0])); + } + } + #if LOG4CXX_LOGCHAR_IS_WCHAR && LOG4CXX_HAS_MBSRTOWCS /** * Tests that we don't loop infinitely when mbsrtowcs refuses to consume From 784cfcc0f8182fb398bd8d82f2c9c862651108f5 Mon Sep 17 00:00:00 2001 From: metsw24-max Date: Mon, 11 May 2026 16:51:14 +0530 Subject: [PATCH 2/2] updated --- src/test/cpp/helpers/charsetdecodertestcase.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/test/cpp/helpers/charsetdecodertestcase.cpp b/src/test/cpp/helpers/charsetdecodertestcase.cpp index 9f2d6f94d..12b04a19e 100644 --- a/src/test/cpp/helpers/charsetdecodertestcase.cpp +++ b/src/test/cpp/helpers/charsetdecodertestcase.cpp @@ -166,19 +166,21 @@ LOGUNIT_CLASS(CharsetDecoderTestCase) */ void testISOLatinHighBytes() { - char buf[2]; + char buf[1]; auto dec = CharsetDecoder::getISOLatinDecoder(); for (unsigned int b = 0x80; b <= 0xFF; ++b) { buf[0] = static_cast(b); - buf[1] = 0; ByteBuffer in(buf, 1); LogString out; log4cxx_status_t stat = dec->decode(in, out); LOGUNIT_ASSERT_EQUAL(APR_SUCCESS, stat); - LOGUNIT_ASSERT_EQUAL((size_t) 1, out.size()); - LOGUNIT_ASSERT_EQUAL(static_cast(b), - static_cast(out[0])); + + // Build the expected LogString by encoding code point b + // through the same Transcoder path the decoder uses. + LogString expected; + Transcoder::encode(b, expected); + LOGUNIT_ASSERT_EQUAL(expected, out); } }