diff --git a/src/main/cpp/charsetdecoder.cpp b/src/main/cpp/charsetdecoder.cpp index b7f852a7f..569e7c5cf 100644 --- a/src/main/cpp/charsetdecoder.cpp +++ b/src/main/cpp/charsetdecoder.cpp @@ -355,7 +355,7 @@ class ISOLatinCharsetDecoder : public CharsetDecoder while (src < srcEnd) { - auto sv = static_cast(*src++); + auto sv = static_cast(static_cast(*src++)); Transcoder::encode(sv, out); } in.increment_position(availableByteCount); diff --git a/src/test/cpp/helpers/charsetdecodertestcase.cpp b/src/test/cpp/helpers/charsetdecodertestcase.cpp index ad3572327..12b04a19e 100644 --- a/src/test/cpp/helpers/charsetdecodertestcase.cpp +++ b/src/test/cpp/helpers/charsetdecodertestcase.cpp @@ -40,6 +40,7 @@ LOGUNIT_CLASS(CharsetDecoderTestCase) LOGUNIT_TEST(decode2); LOGUNIT_TEST(decode3); LOGUNIT_TEST(decode4); + LOGUNIT_TEST(testISOLatinHighBytes); #if LOG4CXX_LOGCHAR_IS_WCHAR && LOG4CXX_HAS_MBSRTOWCS LOGUNIT_TEST(testMbstowcsInfiniteLoop); #endif @@ -152,6 +153,37 @@ LOGUNIT_CLASS(CharsetDecoderTestCase) } } + /** + * Decoding ISO-8859-1 must map every byte 0x80..0xFF to the + * code point of the same numeric value. On platforms where plain + * char is signed (default on MSVC/GCC/Clang for x86/x64), a + * static_cast(*src) sign-extends bytes >= 0x80 into + * 0xFFFFFFxx, which Transcoder::encode then treats as out-of-range + * Unicode and replaces with U+FFFD (or appends garbage on wchar_t + * builds). The .properties configuration loader uses this decoder + * per the Java spec, so the bug silently corrupts any non-ASCII + * Latin-1 byte that appears in a log4cxx configuration file. + */ + void testISOLatinHighBytes() + { + char buf[1]; + auto dec = CharsetDecoder::getISOLatinDecoder(); + for (unsigned int b = 0x80; b <= 0xFF; ++b) + { + buf[0] = static_cast(b); + ByteBuffer in(buf, 1); + LogString out; + log4cxx_status_t stat = dec->decode(in, out); + LOGUNIT_ASSERT_EQUAL(APR_SUCCESS, stat); + + // Build the expected LogString by encoding code point b + // through the same Transcoder path the decoder uses. + LogString expected; + Transcoder::encode(b, expected); + LOGUNIT_ASSERT_EQUAL(expected, out); + } + } + #if LOG4CXX_LOGCHAR_IS_WCHAR && LOG4CXX_HAS_MBSRTOWCS /** * Tests that we don't loop infinitely when mbsrtowcs refuses to consume