diff --git a/src/main/cpp/transcoder.cpp b/src/main/cpp/transcoder.cpp index 82f20c199..02eb520b9 100644 --- a/src/main/cpp/transcoder.cpp +++ b/src/main/cpp/transcoder.cpp @@ -165,7 +165,7 @@ size_t Transcoder::encodeUTF16BE(unsigned int ch, char* dst) unsigned char w = (unsigned char) ((ch >> 16) - 1); dst[0] = (char) (0xD8 + (w >> 2)); dst[1] = (char) (((w & 0x03) << 6) + ((ch >> 10) & 0x3F)); - dst[2] = (char) (0xDC + ((ch & 0x30) >> 4)); + dst[2] = (char) (0xDC + ((ch >> 8) & 0x03)); dst[3] = (char) (ch & 0xFF); return 4; } @@ -194,7 +194,7 @@ size_t Transcoder::encodeUTF16LE(unsigned int ch, char* dst) unsigned char w = (unsigned char) ((ch >> 16) - 1); dst[1] = (char) (0xD8 + (w >> 2)); dst[0] = (char) (((w & 0x03) << 6) + ((ch >> 10) & 0x3F)); - dst[3] = (char) (0xDC + ((ch & 0x30) >> 4)); + dst[3] = (char) (0xDC + ((ch >> 8) & 0x03)); dst[2] = (char) (ch & 0xFF); return 4; } diff --git a/src/test/cpp/helpers/transcodertestcase.cpp b/src/test/cpp/helpers/transcodertestcase.cpp index 23a5caa52..479044562 100644 --- a/src/test/cpp/helpers/transcodertestcase.cpp +++ b/src/test/cpp/helpers/transcodertestcase.cpp @@ -16,6 +16,7 @@ */ #include +#include #include "../insertwide.h" #include "../logunit.h" @@ -63,6 +64,9 @@ LOGUNIT_CLASS(TranscoderTestCase) LOGUNIT_TEST(testDecodeUTF8_2); LOGUNIT_TEST(testDecodeUTF8_3); LOGUNIT_TEST(testDecodeUTF8_4); + LOGUNIT_TEST(testEncodeUTF16BE_BMP); + LOGUNIT_TEST(testEncodeUTF16BE_Supplementary); + LOGUNIT_TEST(testEncodeUTF16LE_Supplementary); #if LOG4CXX_UNICHAR_API LOGUNIT_TEST(udecode2); LOGUNIT_TEST(udecode4); @@ -312,6 +316,44 @@ LOGUNIT_CLASS(TranscoderTestCase) LOGUNIT_ASSERT_EQUAL(true, iter == out.end()); } + void testEncodeUTF16BE_BMP() + { + char raw[4] = { 0, 0, 0, 0 }; + ByteBuffer buf(raw, sizeof(raw)); + Transcoder::encodeUTF16BE(0x4E03, buf); // CJK 七 + LOGUNIT_ASSERT_EQUAL((size_t) 2, buf.position()); + LOGUNIT_ASSERT_EQUAL((unsigned char) 0x4E, (unsigned char) raw[0]); + LOGUNIT_ASSERT_EQUAL((unsigned char) 0x03, (unsigned char) raw[1]); + } + + // U+1F600 (GRINNING FACE) encodes to UTF-16BE as D8 3D DE 00. + // Before the fix the low surrogate's high byte was derived from bits 4-5 + // of the code point, yielding 0xDC here instead of 0xDE — corrupting the + // pair into two unpaired surrogates. + void testEncodeUTF16BE_Supplementary() + { + char raw[4] = { 0, 0, 0, 0 }; + ByteBuffer buf(raw, sizeof(raw)); + Transcoder::encodeUTF16BE(0x1F600, buf); + LOGUNIT_ASSERT_EQUAL((size_t) 4, buf.position()); + LOGUNIT_ASSERT_EQUAL((unsigned char) 0xD8, (unsigned char) raw[0]); + LOGUNIT_ASSERT_EQUAL((unsigned char) 0x3D, (unsigned char) raw[1]); + LOGUNIT_ASSERT_EQUAL((unsigned char) 0xDE, (unsigned char) raw[2]); + LOGUNIT_ASSERT_EQUAL((unsigned char) 0x00, (unsigned char) raw[3]); + } + + void testEncodeUTF16LE_Supplementary() + { + char raw[4] = { 0, 0, 0, 0 }; + ByteBuffer buf(raw, sizeof(raw)); + Transcoder::encodeUTF16LE(0x1F600, buf); + LOGUNIT_ASSERT_EQUAL((size_t) 4, buf.position()); + LOGUNIT_ASSERT_EQUAL((unsigned char) 0x3D, (unsigned char) raw[0]); + LOGUNIT_ASSERT_EQUAL((unsigned char) 0xD8, (unsigned char) raw[1]); + LOGUNIT_ASSERT_EQUAL((unsigned char) 0x00, (unsigned char) raw[2]); + LOGUNIT_ASSERT_EQUAL((unsigned char) 0xDE, (unsigned char) raw[3]); + } + #if LOG4CXX_UNICHAR_API void udecode2()