Skip to content

Commit 9b3789a

Browse files
committed
[Clang] Do not warn on UTF-16 -> UTF-32 conversions. (#163927)
UTF-16 to UTF-16 conversions seems widespread, and lone surrogate have a distinct representation in UTF-32. Lets not warn on this case to make the warning easier to adopt. This follows SG-16 guideline https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2025/p3695r2.html#changes-since-r1 Fixes #163719
1 parent 222fc11 commit 9b3789a

File tree

2 files changed

+12
-5
lines changed

2 files changed

+12
-5
lines changed

clang/lib/Sema/SemaChecking.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12014,13 +12014,20 @@ static void DiagnoseMixedUnicodeImplicitConversion(Sema &S, const Type *Source,
1201412014
SourceLocation CC) {
1201512015
assert(Source->isUnicodeCharacterType() && Target->isUnicodeCharacterType() &&
1201612016
Source != Target);
12017+
12018+
// Lone surrogates have a distinct representation in UTF-32.
12019+
// Converting between UTF-16 and UTF-32 codepoints seems very widespread,
12020+
// so don't warn on such conversion.
12021+
if (Source->isChar16Type() && Target->isChar32Type())
12022+
return;
12023+
1201712024
Expr::EvalResult Result;
1201812025
if (E->EvaluateAsInt(Result, S.getASTContext(), Expr::SE_AllowSideEffects,
1201912026
S.isConstantEvaluatedContext())) {
1202012027
llvm::APSInt Value(32);
1202112028
Value = Result.Val.getInt();
1202212029
bool IsASCII = Value <= 0x7F;
12023-
bool IsBMP = Value <= 0xD7FF || (Value >= 0xE000 && Value <= 0xFFFF);
12030+
bool IsBMP = Value <= 0xDFFF || (Value >= 0xE000 && Value <= 0xFFFF);
1202412031
bool ConversionPreservesSemantics =
1202512032
IsASCII || (!Source->isChar8Type() && !Target->isChar8Type() && IsBMP);
1202612033

clang/test/SemaCXX/warn-implicit-unicode-conversions.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ void test(char8_t u8, char16_t u16, char32_t u32) {
1414
c16(u32); // expected-warning {{implicit conversion from 'char32_t' to 'char16_t' may lose precision and change the meaning of the represented code unit}}
1515

1616
c32(u8); // expected-warning {{implicit conversion from 'char8_t' to 'char32_t' may change the meaning of the represented code unit}}
17-
c32(u16); // expected-warning {{implicit conversion from 'char16_t' to 'char32_t' may change the meaning of the represented code unit}}
17+
c32(u16);
1818
c32(u32);
1919

2020

@@ -30,7 +30,7 @@ void test(char8_t u8, char16_t u16, char32_t u32) {
3030
c16(char32_t(0x7f));
3131
c16(char32_t(0x80));
3232
c16(char32_t(0xD7FF));
33-
c16(char32_t(0xD800)); // expected-warning {{implicit conversion from 'char32_t' to 'char16_t' changes the meaning of the code unit '<0xD800>'}}
33+
c16(char32_t(0xD800));
3434
c16(char32_t(0xE000));
3535
c16(char32_t(U'🐉')); // expected-warning {{implicit conversion from 'char32_t' to 'char16_t' changes the meaning of the code point '🐉'}}
3636

@@ -44,8 +44,8 @@ void test(char8_t u8, char16_t u16, char32_t u32) {
4444
c32(char16_t(0x80));
4545

4646
c32(char16_t(0xD7FF));
47-
c32(char16_t(0xD800)); // expected-warning {{implicit conversion from 'char16_t' to 'char32_t' changes the meaning of the code unit '<0xD800>'}}
48-
c32(char16_t(0xDFFF)); // expected-warning {{implicit conversion from 'char16_t' to 'char32_t' changes the meaning of the code unit '<0xDFFF>'}}
47+
c32(char16_t(0xD800));
48+
c32(char16_t(0xDFFF));
4949
c32(char16_t(0xE000));
5050
c32(char16_t(u''));
5151

0 commit comments

Comments
 (0)