Skip to content

Commit cec23f3

Browse files
committed
[libc++][windows] Use _wsetlocale() in __locale_guard
Querying the current locale string on Windows should always be done with _wsetlocale(). The OS and the CRT support localized language and country names, for example "Norwegian Bokmål_Norway". Narrow setlocale() internally calls _wsetlocale() and converts the returned wide string using the current LC_CTYPE charset. However the string may not be representable in the current LC_CTYPE charset. Additionally, if the LC_CTYPE charset is changed after the query, the returned string becomes invalidly-encoded and cannot be used to restore the locale. This is a problem for code that temporarily changes the thread locale using RAII methods. Fixes #160478
1 parent 1e84cb5 commit cec23f3

File tree

3 files changed

+78
-6
lines changed

3 files changed

+78
-6
lines changed

libcxx/include/__cxx03/__locale_dir/locale_base_api/locale_guard.h

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,11 @@ struct __libcpp_locale_guard {
4545
// each category. In the second case, we know at least one category won't
4646
// be what we want, so we only have to check the first case.
4747
if (std::strcmp(__l.__get_locale(), __lc) != 0) {
48-
__locale_all = _strdup(__lc);
48+
// Use wsetlocale to query the current locale string. This avoids a lossy
49+
// conversion of the locale string from UTF-16 to the current LC_CTYPE
50+
// charset. The Windows CRT allows language / country strings outside of
51+
// ASCII, e.g. "Norwegian Bokm\u00E5l_Norway.utf8".
52+
__locale_all = _wcsdup(__wsetlocale(nullptr));
4953
if (__locale_all == nullptr)
5054
__throw_bad_alloc();
5155
__setlocale(__l.__get_locale());
@@ -57,7 +61,7 @@ struct __libcpp_locale_guard {
5761
// for the different categories in the same format as returned by
5862
// setlocale(LC_ALL, nullptr).
5963
if (__locale_all != nullptr) {
60-
__setlocale(__locale_all);
64+
__wsetlocale(__locale_all);
6165
free(__locale_all);
6266
}
6367
_configthreadlocale(__status);
@@ -68,8 +72,14 @@ struct __libcpp_locale_guard {
6872
__throw_bad_alloc();
6973
return __new_locale;
7074
}
75+
static const wchar_t* __wsetlocale(const wchar_t* __locale) {
76+
const wchar_t* __new_locale = _wsetlocale(LC_ALL, __locale);
77+
if (__new_locale == nullptr)
78+
__throw_bad_alloc();
79+
return __new_locale;
80+
}
7181
int __status;
72-
char* __locale_all = nullptr;
82+
wchar_t* __locale_all = nullptr;
7383
};
7484
#endif
7585

libcxx/include/__locale_dir/support/windows.h

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,12 @@ inline _LIBCPP_HIDE_FROM_ABI char* __setlocale(int __category, const char* __loc
162162
std::__throw_bad_alloc();
163163
return __new_locale;
164164
}
165+
inline _LIBCPP_HIDE_FROM_ABI wchar_t* __wsetlocale(int __category, const wchar_t* __locale) {
166+
wchar_t* __new_locale = ::_wsetlocale(__category, __locale);
167+
if (__new_locale == nullptr)
168+
std::__throw_bad_alloc();
169+
return __new_locale;
170+
}
165171
_LIBCPP_EXPORTED_FROM_ABI __lconv_t* __localeconv(__locale_t& __loc);
166172
#endif // _LIBCPP_BUILDING_LIBRARY
167173

@@ -309,7 +315,11 @@ struct __locale_guard {
309315
// each category. In the second case, we know at least one category won't
310316
// be what we want, so we only have to check the first case.
311317
if (std::strcmp(__l.__get_locale(), __lc) != 0) {
312-
__locale_all = _strdup(__lc);
318+
// Use wsetlocale to query the current locale string. This avoids a lossy
319+
// conversion of the locale string from UTF-16 to the current LC_CTYPE
320+
// charset. The Windows CRT allows language / country strings outside of
321+
// ASCII, e.g. "Norwegian Bokm\u00E5l_Norway.utf8".
322+
__locale_all = _wcsdup(__locale::__wsetlocale(LC_ALL, nullptr));
313323
if (__locale_all == nullptr)
314324
std::__throw_bad_alloc();
315325
__locale::__setlocale(LC_ALL, __l.__get_locale());
@@ -321,13 +331,13 @@ struct __locale_guard {
321331
// for the different categories in the same format as returned by
322332
// setlocale(LC_ALL, nullptr).
323333
if (__locale_all != nullptr) {
324-
__locale::__setlocale(LC_ALL, __locale_all);
334+
__locale::__wsetlocale(LC_ALL, __locale_all);
325335
free(__locale_all);
326336
}
327337
_configthreadlocale(__status);
328338
}
329339
int __status;
330-
char* __locale_all = nullptr;
340+
wchar_t* __locale_all = nullptr;
331341
};
332342
#endif // _LIBCPP_BUILDING_LIBRARY
333343

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
// <locale>
10+
11+
// REQUIRES: windows
12+
13+
// The C RunTime library on Windows supports locale strings with
14+
// characters outside the ASCII range. This poses challenges for
15+
// code that temporarily set a custom thread locale.
16+
//
17+
// https://github.com/llvm/llvm-project/issues/160478
18+
19+
#include <locale>
20+
#include <iostream>
21+
#include <iomanip>
22+
#include <algorithm>
23+
24+
#include <cstdlib>
25+
#include <cassert>
26+
#include <clocale>
27+
28+
#include "test_macros.h"
29+
30+
void locale_name_replace_codepage(std::string& locale_name, const std::string& codepage) {
31+
auto dot_position = locale_name.rfind('.');
32+
LIBCPP_ASSERT(dot_position != std::string::npos);
33+
34+
locale_name = locale_name.substr(0, dot_position) + codepage;
35+
}
36+
37+
int main(int, char**) {
38+
_configthreadlocale(_ENABLE_PER_THREAD_LOCALE);
39+
40+
std::string locale_name = std::setlocale(LC_ALL, "norwegian-bokmal");
41+
42+
const auto& not_ascii = [](char c) { return (c & 0x80) != 0; };
43+
LIBCPP_ASSERT(std::any_of(locale_name.begin(), locale_name.end(), not_ascii));
44+
45+
locale_name_replace_codepage(locale_name, ".437");
46+
LIBCPP_ASSERT(std::setlocale(LC_ALL, locale_name.c_str()));
47+
48+
std::cerr.imbue(std::locale::classic());
49+
std::cerr << std::setprecision(2) << 0.1 << std::endl;
50+
51+
return EXIT_SUCCESS;
52+
}

0 commit comments

Comments
 (0)