From cb5ad5b2d1b82d470e67f15cde147245efbbf559 Mon Sep 17 00:00:00 2001 From: Luca Bacci Date: Tue, 14 Oct 2025 09:56:32 +0200 Subject: [PATCH 1/3] _LocInfo: Use _wsetlocale to query and restore locales _LocInfo changes the locale temporarily and then reverts to the previous locale on destruction. The sequence of setlocale calls look as follows: 1. oldlocname = setlocale(LC_ALL, nullptr) to query the locale string 2. setlocale(LC_ALL, newlocname) to set the temporary locale 3. setlocale(LC_ALL, oldlocname) to restore the previous locale However there's a catch here: the fully-qualified locale names returned by setlocale are not always ASCII strings (more on that below). This creates challenges because the oldlocname is encoded depending on the "outer" locale, while the setlocale call at point 3) expects an encoding which depend on the "inner" locale, and the two may not match. To solve this issue, use the wide variant of setlocale: _wsetlocale. This way all strings are UTF-16 and there's no issue with varying narrow string encodings. Addendum: Actually, the C RunTime library does its best to use ASCII strings! It queries the english name of the locale using GetLocaleInfoEx. MSDN says that the returned string is always ASCII [1], but that's not always the case [2]. Fixes #5780 References: 1. https://learn.microsoft.com/en-us/windows/win32/intl/locale-senglish-constants 2. https://developercommunity.visualstudio.com/t/GetLocaleInfoEx-w-LOCALE_SENGLISHLANGUA/10981789 --- stl/inc/xlocinfo | 2 +- stl/src/locale.cpp | 4 +--- stl/src/locale0.cpp | 7 ++----- 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/stl/inc/xlocinfo b/stl/inc/xlocinfo index 054c2524fa6..9b2941b0c89 100644 --- a/stl/inc/xlocinfo +++ b/stl/inc/xlocinfo @@ -382,7 +382,7 @@ private: _Yarn _Months; // month names _Yarn _W_Days; // wide weekday names _Yarn _W_Months; // wide month names - _Yarn _Oldlocname; // old locale name to revert to on destruction + _Yarn _Oldlocname; // old locale name to revert to on destruction _Yarn _Newlocname; // new locale name for this object }; _STD_END diff --git a/stl/src/locale.cpp b/stl/src/locale.cpp index e11e106f16f..2f96937742b 100644 --- a/stl/src/locale.cpp +++ b/stl/src/locale.cpp @@ -137,9 +137,7 @@ void __CLRCALL_PURE_OR_CDECL locale::_Locimp::_Locimp_Addfac( void __CLRCALL_PURE_OR_CDECL _Locinfo::_Locinfo_ctor( _Locinfo* pLocinfo, int cat, const char* locname) { // capture a named locale - const char* oldlocname = setlocale(LC_ALL, nullptr); - - pLocinfo->_Oldlocname = oldlocname == nullptr ? "" : oldlocname; + pLocinfo->_Oldlocname = _wsetlocale(LC_ALL, nullptr); _Locinfo_Addcats(pLocinfo, cat, locname); } diff --git a/stl/src/locale0.cpp b/stl/src/locale0.cpp index 81aae0ad22e..0f41dbb0999 100644 --- a/stl/src/locale0.cpp +++ b/stl/src/locale0.cpp @@ -222,9 +222,8 @@ void __CLRCALL_PURE_OR_CDECL locale::_Locimp::_Locimp_dtor(_Locimp* _This) { // void __CLRCALL_PURE_OR_CDECL _Locinfo::_Locinfo_ctor( _Locinfo* pLocinfo, const char* locname) { // switch to a named locale - const char* oldlocname = setlocale(LC_ALL, nullptr); + pLocinfo->_Oldlocname = _wsetlocale(LC_ALL, nullptr); - pLocinfo->_Oldlocname = oldlocname == nullptr ? "" : oldlocname; if (locname != nullptr) { locname = setlocale(LC_ALL, locname); } @@ -233,9 +232,7 @@ void __CLRCALL_PURE_OR_CDECL _Locinfo::_Locinfo_ctor( } void __CLRCALL_PURE_OR_CDECL _Locinfo::_Locinfo_dtor(_Locinfo* pLocinfo) { // destroy a _Locinfo object, revert locale - if (!pLocinfo->_Oldlocname._Empty()) { - setlocale(LC_ALL, pLocinfo->_Oldlocname._C_str()); - } + _wsetlocale(LC_ALL, pLocinfo->_Oldlocname._C_str()); } _STD_END From 7cb20fb5106272bbe44a04f0ec8f49163254b3c3 Mon Sep 17 00:00:00 2001 From: "A. Jiang" Date: Thu, 16 Oct 2025 11:18:45 +0800 Subject: [PATCH 2/3] Keep ABI for `_Locinfo` --- stl/inc/xlocinfo | 51 +++++++++++++++++++++++++++++++-------------- stl/src/locale.cpp | 2 +- stl/src/locale0.cpp | 10 +++++++-- 3 files changed, 44 insertions(+), 19 deletions(-) diff --git a/stl/inc/xlocinfo b/stl/inc/xlocinfo index 9b2941b0c89..dcd4801cd13 100644 --- a/stl/inc/xlocinfo +++ b/stl/inc/xlocinfo @@ -134,6 +134,28 @@ private: void* _Timeptr; // pointer to time information }; +template +_Elem* _Ntcts_dup_dbg(const _Elem* const _Ptr) noexcept { + _STL_INTERNAL_STATIC_ASSERT(_Is_any_of_v<_Elem, char, wchar_t, unsigned short>); + + auto _Iter = _Ptr; + while (*_Iter != _Elem{}) { + ++_Iter; + } + const size_t _Count = static_cast(_Iter - _Ptr + 1) * sizeof(_Elem); + +#ifdef _DEBUG + const auto _Result = static_cast<_Elem*>(_CSTD _malloc_dbg(_Count, _CRT_BLOCK, __FILE__, __LINE__)); +#else + const auto _Result = static_cast<_Elem*>(_CSTD malloc(_Count)); +#endif + if (_Result) { + _CSTD memcpy(_Result, _Ptr, _Count); + } + + return _Result; +} + extern "C++" template class _CRTIMP2_PURE_IMPORT _Yarn { // wrap a NTCTS public: @@ -156,26 +178,22 @@ public: _Tidy(); if (_Right) { // new is not empty, copy it - const _Elem* _Ptr = _Right; - while (*_Ptr != _Elem{}) { - ++_Ptr; - } + _Myptr = _STD _Ntcts_dup_dbg(_Right); + } + } - const auto _Count = (++_Ptr - _Right) * sizeof(_Elem); + return *this; + } -#ifdef _DEBUG - _Myptr = static_cast<_Elem*>(_malloc_dbg(_Count, _CRT_BLOCK, __FILE__, __LINE__)); -#else - _Myptr = static_cast<_Elem*>(_CSTD malloc(_Count)); -#endif + template , int> = 0> + void _From_wide(const wchar_t* const _Right) noexcept { + if (reinterpret_cast(_Myptr) != _Right) { // new value, discard old and copy new + _Tidy(); - if (_Myptr) { - _CSTD memcpy(_Myptr, _Right, _Count); - } + if (_Right) { // new is not empty, copy it + _Myptr = reinterpret_cast(_STD _Ntcts_dup_dbg(_Right)); } } - - return *this; } __CLR_OR_THIS_CALL ~_Yarn() noexcept { @@ -382,7 +400,8 @@ private: _Yarn _Months; // month names _Yarn _W_Days; // wide weekday names _Yarn _W_Months; // wide month names - _Yarn _Oldlocname; // old locale name to revert to on destruction + // TRANSITION, ABI, `_Oldlocname._Myptr` is reinterpreted as `wchar_t*`. `wchar` should be wrapped instead. + _Yarn _Oldlocname; // old locale name to revert to on destruction _Yarn _Newlocname; // new locale name for this object }; _STD_END diff --git a/stl/src/locale.cpp b/stl/src/locale.cpp index 2f96937742b..75d55c65154 100644 --- a/stl/src/locale.cpp +++ b/stl/src/locale.cpp @@ -137,7 +137,7 @@ void __CLRCALL_PURE_OR_CDECL locale::_Locimp::_Locimp_Addfac( void __CLRCALL_PURE_OR_CDECL _Locinfo::_Locinfo_ctor( _Locinfo* pLocinfo, int cat, const char* locname) { // capture a named locale - pLocinfo->_Oldlocname = _wsetlocale(LC_ALL, nullptr); + pLocinfo->_Oldlocname._From_wide(_wsetlocale(LC_ALL, nullptr)); _Locinfo_Addcats(pLocinfo, cat, locname); } diff --git a/stl/src/locale0.cpp b/stl/src/locale0.cpp index 0f41dbb0999..1974c0e354a 100644 --- a/stl/src/locale0.cpp +++ b/stl/src/locale0.cpp @@ -222,7 +222,7 @@ void __CLRCALL_PURE_OR_CDECL locale::_Locimp::_Locimp_dtor(_Locimp* _This) { // void __CLRCALL_PURE_OR_CDECL _Locinfo::_Locinfo_ctor( _Locinfo* pLocinfo, const char* locname) { // switch to a named locale - pLocinfo->_Oldlocname = _wsetlocale(LC_ALL, nullptr); + pLocinfo->_Oldlocname._From_wide(_wsetlocale(LC_ALL, nullptr)); if (locname != nullptr) { locname = setlocale(LC_ALL, locname); @@ -232,7 +232,13 @@ void __CLRCALL_PURE_OR_CDECL _Locinfo::_Locinfo_ctor( } void __CLRCALL_PURE_OR_CDECL _Locinfo::_Locinfo_dtor(_Locinfo* pLocinfo) { // destroy a _Locinfo object, revert locale - _wsetlocale(LC_ALL, pLocinfo->_Oldlocname._C_str()); + if (pLocinfo->_Oldlocname._Empty()) { + // `pLocinfo->_Oldlocname._C_str()` points to a single `char` of value 0 in this case, + // so reinterpret_cast is not reliable. + _wsetlocale(LC_ALL, L""); + } else { + _wsetlocale(LC_ALL, reinterpret_cast(pLocinfo->_Oldlocname._C_str())); + } } _STD_END From 46e08a9784670eb88eef32a4aa4223786d9d4b61 Mon Sep 17 00:00:00 2001 From: Luca Bacci Date: Fri, 17 Oct 2025 18:39:47 +0200 Subject: [PATCH 3/3] Add test for GH #5780 --- tests/std/test.lst | 1 + .../tests/GH_005780_non_ascii_locales/env.lst | 4 ++ .../GH_005780_non_ascii_locales/test.cpp | 41 +++++++++++++++++++ 3 files changed, 46 insertions(+) create mode 100644 tests/std/tests/GH_005780_non_ascii_locales/env.lst create mode 100644 tests/std/tests/GH_005780_non_ascii_locales/test.cpp diff --git a/tests/std/test.lst b/tests/std/test.lst index 25c6dc46df0..0686b089500 100644 --- a/tests/std/test.lst +++ b/tests/std/test.lst @@ -271,6 +271,7 @@ tests\GH_005472_do_not_overlap tests\GH_005546_containers_size_type_cast tests\GH_005553_regex_character_translation tests\GH_005768_pow_accuracy +tests\GH_005780_non_ascii_locales tests\LWG2381_num_get_floating_point tests\LWG2510_tag_classes tests\LWG2597_complex_branch_cut diff --git a/tests/std/tests/GH_005780_non_ascii_locales/env.lst b/tests/std/tests/GH_005780_non_ascii_locales/env.lst new file mode 100644 index 00000000000..19f025bd0e6 --- /dev/null +++ b/tests/std/tests/GH_005780_non_ascii_locales/env.lst @@ -0,0 +1,4 @@ +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +RUNALL_INCLUDE ..\usual_matrix.lst diff --git a/tests/std/tests/GH_005780_non_ascii_locales/test.cpp b/tests/std/tests/GH_005780_non_ascii_locales/test.cpp new file mode 100644 index 00000000000..47a2e17b160 --- /dev/null +++ b/tests/std/tests/GH_005780_non_ascii_locales/test.cpp @@ -0,0 +1,41 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include +#include +#include +#include +#include + +std::string set_locale(const std::string& locale_name) { + const char* ret = std::setlocale(LC_ALL, locale_name.c_str()); + assert(ret != nullptr); + return ret; +} + +std::string query_locale() { + const char* ret = std::setlocale(LC_ALL, nullptr); + assert(ret != nullptr); + return ret; +} + +void assert_string_non_ascii(const std::string& string) { + const auto char_not_ascii = [](const char c) { return (c & 0x80) != 0; }; + assert(std::any_of(string.begin(), string.end(), char_not_ascii)); +} + +void test_gh_5780() { + // https://learn.microsoft.com/en-us/cpp/c-runtime-library/language-strings#supported-language-strings + std::string locale_name = set_locale("norwegian-bokmal.437"); + assert_string_non_ascii(locale_name); + + std::cerr.imbue(std::locale::classic()); + std::cerr << std::setprecision(2) << 0.1 << std::endl; + + assert(query_locale() == locale_name); +} + +int main() { + test_gh_5780(); +}