@@ -139,6 +139,7 @@ class JSON_Parser
139139
140140 virtual bool CompleteComment (Token& token);
141141 virtual bool CompleteStringLiteral (Token& token);
142+ int convert_unicode_to_code_point ();
142143 bool handle_unescape_char (Token& token);
143144
144145private:
@@ -652,7 +653,15 @@ bool JSON_StringParser<CharType>::CompleteComment(typename JSON_Parser<CharType>
652653 return true ;
653654}
654655
655- void convert_append_unicode_code_unit (JSON_Parser<wchar_t >::Token& token, utf16char value)
656+ void convert_append_unicode_code_unit (JSON_Parser<utf16char>::Token& token, utf16string value)
657+ {
658+ token.string_val .append (value);
659+ }
660+ void convert_append_unicode_code_unit (JSON_Parser<char >::Token& token, utf16string value)
661+ {
662+ token.string_val .append (::utility::conversions::utf16_to_utf8 (value));
663+ }
664+ void convert_append_unicode_code_unit (JSON_Parser<utf16char>::Token& token, utf16char value)
656665{
657666 token.string_val .push_back (value);
658667}
@@ -662,6 +671,37 @@ void convert_append_unicode_code_unit(JSON_Parser<char>::Token& token, utf16char
662671 token.string_val .append (::utility::conversions::utf16_to_utf8 (utf16));
663672}
664673
674+ template <typename CharType>
675+ int JSON_Parser<CharType>::convert_unicode_to_code_point()
676+ {
677+ // A four-hexdigit Unicode character.
678+ // Transform into a 16 bit code point.
679+ int decoded = 0 ;
680+ for (int i = 0 ; i < 4 ; ++i)
681+ {
682+ auto ch = NextCharacter ();
683+ int ch_int = static_cast <int >(ch);
684+ if (ch_int < 0 || ch_int > 127 ) return -1 ;
685+ #ifdef _WIN32
686+ const int isxdigitResult = _isxdigit_l (ch_int, utility::details::scoped_c_thread_locale::c_locale ());
687+ #else
688+ const int isxdigitResult = isxdigit (ch_int);
689+ #endif
690+ if (!isxdigitResult) return -1 ;
691+
692+ int val = _hexval[static_cast <size_t >(ch_int)];
693+
694+ _ASSERTE (val != -1 );
695+
696+ // Add the input char to the decoded number
697+ decoded |= (val << (4 * (3 - i)));
698+ }
699+ return decoded;
700+ }
701+
702+ #define H_SURROGATE_START 0xD800
703+ #define H_SURROGATE_END 0xDBFF
704+
665705template <typename CharType>
666706inline bool JSON_Parser<CharType>::handle_unescape_char(Token& token)
667707{
@@ -682,26 +722,31 @@ inline bool JSON_Parser<CharType>::handle_unescape_char(Token& token)
682722 case ' t' : token.string_val .push_back (' \t ' ); return true ;
683723 case ' u' :
684724 {
685- // A four-hexdigit Unicode character.
686- // Transform into a 16 bit code point.
687- int decoded = 0 ;
688- for (int i = 0 ; i < 4 ; ++i)
725+ int decoded = convert_unicode_to_code_point ();
726+ if (decoded == -1 )
689727 {
690- ch = NextCharacter ();
691- int ch_int = static_cast <int >(ch);
692- if (ch_int < 0 || ch_int > 127 ) return false ;
693- #ifdef _WIN32
694- const int isxdigitResult = _isxdigit_l (ch_int, utility::details::scoped_c_thread_locale::c_locale ());
695- #else
696- const int isxdigitResult = isxdigit (ch_int);
697- #endif
698- if (!isxdigitResult) return false ;
728+ return false ;
729+ }
730+
731+ // handle multi-block characters that start with a high-surrogate
732+ if (decoded >= H_SURROGATE_START && decoded <= H_SURROGATE_END)
733+ {
734+ // skip escape character '\u'
735+ if (NextCharacter () != ' \\ ' || NextCharacter () != ' u' )
736+ {
737+ return false ;
738+ }
739+ int decoded2 = convert_unicode_to_code_point ();
740+
741+ if (decoded2 == -1 )
742+ {
743+ return false ;
744+ }
699745
700- int val = _hexval[ static_cast <size_t >(ch_int)] ;
701- _ASSERTE (val != - 1 );
746+ utf16string compoundUTF16 = { static_cast <utf16char>(decoded), static_cast <utf16char>(decoded2)} ;
747+ convert_append_unicode_code_unit (token, compoundUTF16 );
702748
703- // Add the input char to the decoded number
704- decoded |= (val << (4 * (3 - i)));
749+ return true ;
705750 }
706751
707752 // Construct the character based on the decoded number
@@ -1015,9 +1060,13 @@ std::unique_ptr<web::json::details::_Value> JSON_Parser<CharType>::_ParseValue(
10151060{
10161061 switch (tkn.kind )
10171062 {
1018- case JSON_Parser<CharType>::Token::TKN_OpenBrace: { return _ParseObject (tkn);
1063+ case JSON_Parser<CharType>::Token::TKN_OpenBrace:
1064+ {
1065+ return _ParseObject (tkn);
10191066 }
1020- case JSON_Parser<CharType>::Token::TKN_OpenBracket: { return _ParseArray (tkn);
1067+ case JSON_Parser<CharType>::Token::TKN_OpenBracket:
1068+ {
1069+ return _ParseArray (tkn);
10211070 }
10221071 case JSON_Parser<CharType>::Token::TKN_StringLiteral:
10231072 {
0 commit comments