From 77bf2b7acb82ea930702c8b0587c019fd48dc0f2 Mon Sep 17 00:00:00 2001 From: Charles Zablit Date: Fri, 18 Jul 2025 12:29:31 +0200 Subject: [PATCH 1/8] [windows][lldb] force the console to use a UTF-8 codepage --- .../Platform/Windows/PlatformWindows.cpp | 20 +++++++++++++++++++ .../Platform/Windows/PlatformWindows.h | 8 ++++++++ 2 files changed, 28 insertions(+) diff --git a/lldb/source/Plugins/Platform/Windows/PlatformWindows.cpp b/lldb/source/Plugins/Platform/Windows/PlatformWindows.cpp index c0c26cc5f1954..d3e981de81313 100644 --- a/lldb/source/Plugins/Platform/Windows/PlatformWindows.cpp +++ b/lldb/source/Plugins/Platform/Windows/PlatformWindows.cpp @@ -41,6 +41,10 @@ LLDB_PLUGIN_DEFINE(PlatformWindows) static uint32_t g_initialize_count = 0; +#if defined(_WIN32) +std::optional g_prev_console_cp = std::nullopt; +#endif + PlatformSP PlatformWindows::CreateInstance(bool force, const lldb_private::ArchSpec *arch) { // The only time we create an instance is when we are creating a remote @@ -98,6 +102,7 @@ void PlatformWindows::Initialize() { default_platform_sp->SetSystemArchitecture(HostInfo::GetArchitecture()); Platform::SetHostPlatform(default_platform_sp); #endif + SetConsoleCodePage(); PluginManager::RegisterPlugin( PlatformWindows::GetPluginNameStatic(false), PlatformWindows::GetPluginDescriptionStatic(false), @@ -108,6 +113,7 @@ void PlatformWindows::Initialize() { void PlatformWindows::Terminate() { if (g_initialize_count > 0) { if (--g_initialize_count == 0) { + ResetConsoleCodePage(); PluginManager::UnregisterPlugin(PlatformWindows::CreateInstance); } } @@ -808,3 +814,17 @@ extern "C" { return Status(); } + +void PlatformWindows::SetConsoleCodePage() { + #if defined(_WIN32) + g_prev_console_cp = GetConsoleOutputCP(); + SetConsoleOutputCP(CP_UTF8); + #endif +} + +void PlatformWindows::ResetConsoleCodePage() { + #if defined(_WIN32) + if (g_prev_console_cp) + SetConsoleOutputCP(*g_prev_console_cp); + #endif +} diff --git a/lldb/source/Plugins/Platform/Windows/PlatformWindows.h b/lldb/source/Plugins/Platform/Windows/PlatformWindows.h index 771133f341e90..d14aa52e5e1c8 100644 --- a/lldb/source/Plugins/Platform/Windows/PlatformWindows.h +++ b/lldb/source/Plugins/Platform/Windows/PlatformWindows.h @@ -80,6 +80,14 @@ class PlatformWindows : public RemoteAwarePlatform { size_t GetSoftwareBreakpointTrapOpcode(Target &target, BreakpointSite *bp_site) override; + /// Set the current console's code page to UTF-8 and store the previous + /// codepage in \a g_prev_console_cp. + static void SetConsoleCodePage(); + + /// Reset the current console's code page to the value stored + /// in \a g_prev_console_cp if any. + static void ResetConsoleCodePage(); + std::vector m_supported_architectures; private: From fcec07f707c9e854e0cded19d0052bec63e0d26f Mon Sep 17 00:00:00 2001 From: Charles Zablit Date: Fri, 18 Jul 2025 12:39:11 +0200 Subject: [PATCH 2/8] fixup! [windows][lldb] force the console to use a UTF-8 codepage --- .../Plugins/Platform/Windows/PlatformWindows.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lldb/source/Plugins/Platform/Windows/PlatformWindows.cpp b/lldb/source/Plugins/Platform/Windows/PlatformWindows.cpp index d3e981de81313..dffbbc1c2806a 100644 --- a/lldb/source/Plugins/Platform/Windows/PlatformWindows.cpp +++ b/lldb/source/Plugins/Platform/Windows/PlatformWindows.cpp @@ -816,15 +816,15 @@ extern "C" { } void PlatformWindows::SetConsoleCodePage() { - #if defined(_WIN32) - g_prev_console_cp = GetConsoleOutputCP(); - SetConsoleOutputCP(CP_UTF8); - #endif +#if defined(_WIN32) + g_prev_console_cp = GetConsoleOutputCP(); + SetConsoleOutputCP(CP_UTF8); +#endif } void PlatformWindows::ResetConsoleCodePage() { - #if defined(_WIN32) +#if defined(_WIN32) if (g_prev_console_cp) SetConsoleOutputCP(*g_prev_console_cp); - #endif +#endif } From 3e58744cc563cf3d48e12e16820a339852dade46 Mon Sep 17 00:00:00 2001 From: Charles Zablit Date: Thu, 24 Jul 2025 13:41:45 +0100 Subject: [PATCH 3/8] Revert "fixup! [windows][lldb] force the console to use a UTF-8 codepage" This reverts commit fcec07f707c9e854e0cded19d0052bec63e0d26f. --- .../Plugins/Platform/Windows/PlatformWindows.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lldb/source/Plugins/Platform/Windows/PlatformWindows.cpp b/lldb/source/Plugins/Platform/Windows/PlatformWindows.cpp index dffbbc1c2806a..d3e981de81313 100644 --- a/lldb/source/Plugins/Platform/Windows/PlatformWindows.cpp +++ b/lldb/source/Plugins/Platform/Windows/PlatformWindows.cpp @@ -816,15 +816,15 @@ extern "C" { } void PlatformWindows::SetConsoleCodePage() { -#if defined(_WIN32) - g_prev_console_cp = GetConsoleOutputCP(); - SetConsoleOutputCP(CP_UTF8); -#endif + #if defined(_WIN32) + g_prev_console_cp = GetConsoleOutputCP(); + SetConsoleOutputCP(CP_UTF8); + #endif } void PlatformWindows::ResetConsoleCodePage() { -#if defined(_WIN32) + #if defined(_WIN32) if (g_prev_console_cp) SetConsoleOutputCP(*g_prev_console_cp); -#endif + #endif } From 01bf62f155af9f2a7d253d292518b9175ff731ab Mon Sep 17 00:00:00 2001 From: Charles Zablit Date: Thu, 24 Jul 2025 13:41:50 +0100 Subject: [PATCH 4/8] Revert "[windows][lldb] force the console to use a UTF-8 codepage" This reverts commit 77bf2b7acb82ea930702c8b0587c019fd48dc0f2. --- .../Platform/Windows/PlatformWindows.cpp | 20 ------------------- .../Platform/Windows/PlatformWindows.h | 8 -------- 2 files changed, 28 deletions(-) diff --git a/lldb/source/Plugins/Platform/Windows/PlatformWindows.cpp b/lldb/source/Plugins/Platform/Windows/PlatformWindows.cpp index d3e981de81313..c0c26cc5f1954 100644 --- a/lldb/source/Plugins/Platform/Windows/PlatformWindows.cpp +++ b/lldb/source/Plugins/Platform/Windows/PlatformWindows.cpp @@ -41,10 +41,6 @@ LLDB_PLUGIN_DEFINE(PlatformWindows) static uint32_t g_initialize_count = 0; -#if defined(_WIN32) -std::optional g_prev_console_cp = std::nullopt; -#endif - PlatformSP PlatformWindows::CreateInstance(bool force, const lldb_private::ArchSpec *arch) { // The only time we create an instance is when we are creating a remote @@ -102,7 +98,6 @@ void PlatformWindows::Initialize() { default_platform_sp->SetSystemArchitecture(HostInfo::GetArchitecture()); Platform::SetHostPlatform(default_platform_sp); #endif - SetConsoleCodePage(); PluginManager::RegisterPlugin( PlatformWindows::GetPluginNameStatic(false), PlatformWindows::GetPluginDescriptionStatic(false), @@ -113,7 +108,6 @@ void PlatformWindows::Initialize() { void PlatformWindows::Terminate() { if (g_initialize_count > 0) { if (--g_initialize_count == 0) { - ResetConsoleCodePage(); PluginManager::UnregisterPlugin(PlatformWindows::CreateInstance); } } @@ -814,17 +808,3 @@ extern "C" { return Status(); } - -void PlatformWindows::SetConsoleCodePage() { - #if defined(_WIN32) - g_prev_console_cp = GetConsoleOutputCP(); - SetConsoleOutputCP(CP_UTF8); - #endif -} - -void PlatformWindows::ResetConsoleCodePage() { - #if defined(_WIN32) - if (g_prev_console_cp) - SetConsoleOutputCP(*g_prev_console_cp); - #endif -} diff --git a/lldb/source/Plugins/Platform/Windows/PlatformWindows.h b/lldb/source/Plugins/Platform/Windows/PlatformWindows.h index d14aa52e5e1c8..771133f341e90 100644 --- a/lldb/source/Plugins/Platform/Windows/PlatformWindows.h +++ b/lldb/source/Plugins/Platform/Windows/PlatformWindows.h @@ -80,14 +80,6 @@ class PlatformWindows : public RemoteAwarePlatform { size_t GetSoftwareBreakpointTrapOpcode(Target &target, BreakpointSite *bp_site) override; - /// Set the current console's code page to UTF-8 and store the previous - /// codepage in \a g_prev_console_cp. - static void SetConsoleCodePage(); - - /// Reset the current console's code page to the value stored - /// in \a g_prev_console_cp if any. - static void ResetConsoleCodePage(); - std::vector m_supported_architectures; private: From 3291838706b9c2660b9899a93920422c04048cdb Mon Sep 17 00:00:00 2001 From: Charles Zablit Date: Thu, 24 Jul 2025 13:44:59 +0100 Subject: [PATCH 5/8] [lldb][windows] use Windows APIs to print to the console --- lldb/include/lldb/Host/File.h | 12 ++++---- lldb/source/Host/common/File.cpp | 37 +++++++++++++++++++++++++ llvm/include/llvm/Support/raw_ostream.h | 2 ++ llvm/lib/Support/raw_ostream.cpp | 2 +- 4 files changed, 45 insertions(+), 8 deletions(-) diff --git a/lldb/include/lldb/Host/File.h b/lldb/include/lldb/Host/File.h index 9e2d0abe0b1af..8406f8d55e58f 100644 --- a/lldb/include/lldb/Host/File.h +++ b/lldb/include/lldb/Host/File.h @@ -382,15 +382,11 @@ class NativeFile : public File { Unowned = false, }; - NativeFile() : m_descriptor(kInvalidDescriptor), m_stream(kInvalidStream) {} + NativeFile(); - NativeFile(FILE *fh, bool transfer_ownership) - : m_descriptor(kInvalidDescriptor), m_own_descriptor(false), m_stream(fh), - m_options(), m_own_stream(transfer_ownership) {} + NativeFile(FILE *fh, bool transfer_ownership); - NativeFile(int fd, OpenOptions options, bool transfer_ownership) - : m_descriptor(fd), m_own_descriptor(transfer_ownership), - m_stream(kInvalidStream), m_options(options), m_own_stream(false) {} + NativeFile(int fd, OpenOptions options, bool transfer_ownership); ~NativeFile() override { Close(); } @@ -455,6 +451,8 @@ class NativeFile : public File { bool m_own_stream = false; std::mutex offset_access_mutex; + bool is_windows_console = false; + private: NativeFile(const NativeFile &) = delete; const NativeFile &operator=(const NativeFile &) = delete; diff --git a/lldb/source/Host/common/File.cpp b/lldb/source/Host/common/File.cpp index 23b6dc9fe850d..de22fdb239ea2 100644 --- a/lldb/source/Host/common/File.cpp +++ b/lldb/source/Host/common/File.cpp @@ -36,6 +36,7 @@ #include "llvm/Support/Errno.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Process.h" +#include "llvm/Support/raw_ostream.h" using namespace lldb; using namespace lldb_private; @@ -247,6 +248,28 @@ uint32_t File::GetPermissions(Status &error) const { return file_stats.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO); } +NativeFile::NativeFile() + : m_descriptor(kInvalidDescriptor), m_stream(kInvalidStream) {} + +NativeFile::NativeFile(FILE *fh, bool transfer_ownership) + : m_descriptor(kInvalidDescriptor), m_own_descriptor(false), m_stream(fh), + m_options(), m_own_stream(transfer_ownership) { +#ifdef _WIN32 + int fd = _fileno(fh); + is_windows_console = + ::GetFileType((HANDLE)::_get_osfhandle(fd)) == FILE_TYPE_CHAR; +#endif +} + +NativeFile::NativeFile(int fd, OpenOptions options, bool transfer_ownership) + : m_descriptor(fd), m_own_descriptor(transfer_ownership), + m_stream(kInvalidStream), m_options(options), m_own_stream(false) { +#ifdef _WIN32 + is_windows_console = + ::GetFileType((HANDLE)::_get_osfhandle(fd)) == FILE_TYPE_CHAR; +#endif +} + bool NativeFile::IsValid() const { std::scoped_lock lock(m_descriptor_mutex, m_stream_mutex); return DescriptorIsValidUnlocked() || StreamIsValidUnlocked(); @@ -618,6 +641,13 @@ Status NativeFile::Write(const void *buf, size_t &num_bytes) { ssize_t bytes_written = -1; if (ValueGuard descriptor_guard = DescriptorIsValid()) { +#ifdef _WIN32 + if (is_windows_console && + write_console_impl(m_descriptor, + llvm::StringRef((char *)buf, num_bytes))) { + return error; + } +#endif bytes_written = llvm::sys::RetryAfterSignal(-1, ::write, m_descriptor, buf, num_bytes); if (bytes_written == -1) { @@ -629,6 +659,13 @@ Status NativeFile::Write(const void *buf, size_t &num_bytes) { } if (ValueGuard stream_guard = StreamIsValid()) { +#ifdef _WIN32 + if (is_windows_console && + write_console_impl(_fileno(m_stream), + llvm::StringRef((char *)buf, num_bytes))) { + return error; + } +#endif bytes_written = ::fwrite(buf, 1, num_bytes, m_stream); if (bytes_written == 0) { diff --git a/llvm/include/llvm/Support/raw_ostream.h b/llvm/include/llvm/Support/raw_ostream.h index f87344e860518..a1df1e5aef4bf 100644 --- a/llvm/include/llvm/Support/raw_ostream.h +++ b/llvm/include/llvm/Support/raw_ostream.h @@ -853,4 +853,6 @@ raw_ostream &operator<<(raw_ostream &OS, const std::optional &O) { } // end namespace llvm +bool write_console_impl(int FD, llvm::StringRef Data); + #endif // LLVM_SUPPORT_RAW_OSTREAM_H diff --git a/llvm/lib/Support/raw_ostream.cpp b/llvm/lib/Support/raw_ostream.cpp index 07b99896543bd..2d97ca07ad270 100644 --- a/llvm/lib/Support/raw_ostream.cpp +++ b/llvm/lib/Support/raw_ostream.cpp @@ -697,7 +697,7 @@ raw_fd_ostream::~raw_fd_ostream() { // the input is UTF-8 or transcode from the local codepage to UTF-8 before // quoting it. If they don't, this may mess up the encoding, but this is still // probably the best compromise we can make. -static bool write_console_impl(int FD, StringRef Data) { +bool write_console_impl(int FD, StringRef Data) { SmallVector WideText; // Fall back to ::write if it wasn't valid UTF-8. From ae790f878d1dcd173b75b7bc0bbe537c8b1d341b Mon Sep 17 00:00:00 2001 From: Charles Zablit Date: Tue, 29 Jul 2025 14:31:23 +0100 Subject: [PATCH 6/8] add default values to NativeFile attributes --- lldb/include/lldb/Host/File.h | 4 ++-- lldb/source/Host/common/File.cpp | 8 +++----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/lldb/include/lldb/Host/File.h b/lldb/include/lldb/Host/File.h index 8406f8d55e58f..7402a2231735a 100644 --- a/lldb/include/lldb/Host/File.h +++ b/lldb/include/lldb/Host/File.h @@ -440,11 +440,11 @@ class NativeFile : public File { return ValueGuard(m_stream_mutex, StreamIsValidUnlocked()); } - int m_descriptor; + int m_descriptor = kInvalidDescriptor; bool m_own_descriptor = false; mutable std::mutex m_descriptor_mutex; - FILE *m_stream; + FILE *m_stream = kInvalidStream; mutable std::mutex m_stream_mutex; OpenOptions m_options{}; diff --git a/lldb/source/Host/common/File.cpp b/lldb/source/Host/common/File.cpp index de22fdb239ea2..b87bc6160ad9b 100644 --- a/lldb/source/Host/common/File.cpp +++ b/lldb/source/Host/common/File.cpp @@ -248,12 +248,10 @@ uint32_t File::GetPermissions(Status &error) const { return file_stats.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO); } -NativeFile::NativeFile() - : m_descriptor(kInvalidDescriptor), m_stream(kInvalidStream) {} +NativeFile::NativeFile() = default; NativeFile::NativeFile(FILE *fh, bool transfer_ownership) - : m_descriptor(kInvalidDescriptor), m_own_descriptor(false), m_stream(fh), - m_options(), m_own_stream(transfer_ownership) { + : m_stream(fh), m_own_stream(transfer_ownership) { #ifdef _WIN32 int fd = _fileno(fh); is_windows_console = @@ -263,7 +261,7 @@ NativeFile::NativeFile(FILE *fh, bool transfer_ownership) NativeFile::NativeFile(int fd, OpenOptions options, bool transfer_ownership) : m_descriptor(fd), m_own_descriptor(transfer_ownership), - m_stream(kInvalidStream), m_options(options), m_own_stream(false) { + m_options(options) { #ifdef _WIN32 is_windows_console = ::GetFileType((HANDLE)::_get_osfhandle(fd)) == FILE_TYPE_CHAR; From 7f88778eb42e6f1e76de885ef65db429bfcae125 Mon Sep 17 00:00:00 2001 From: Charles Zablit Date: Thu, 31 Jul 2025 15:44:47 +0100 Subject: [PATCH 7/8] use raw_fd_ostream --- lldb/source/Host/common/File.cpp | 11 +++++------ llvm/include/llvm/Support/raw_ostream.h | 2 -- llvm/lib/Support/raw_ostream.cpp | 2 +- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/lldb/source/Host/common/File.cpp b/lldb/source/Host/common/File.cpp index b87bc6160ad9b..9ea64fe0fd730 100644 --- a/lldb/source/Host/common/File.cpp +++ b/lldb/source/Host/common/File.cpp @@ -640,9 +640,8 @@ Status NativeFile::Write(const void *buf, size_t &num_bytes) { ssize_t bytes_written = -1; if (ValueGuard descriptor_guard = DescriptorIsValid()) { #ifdef _WIN32 - if (is_windows_console && - write_console_impl(m_descriptor, - llvm::StringRef((char *)buf, num_bytes))) { + if (is_windows_console) { + llvm::raw_fd_ostream(m_descriptor, false).write((char *)buf, num_bytes); return error; } #endif @@ -658,9 +657,9 @@ Status NativeFile::Write(const void *buf, size_t &num_bytes) { if (ValueGuard stream_guard = StreamIsValid()) { #ifdef _WIN32 - if (is_windows_console && - write_console_impl(_fileno(m_stream), - llvm::StringRef((char *)buf, num_bytes))) { + if (is_windows_console) { + llvm::raw_fd_ostream(_fileno(m_stream), false) + .write((char *)buf, num_bytes); return error; } #endif diff --git a/llvm/include/llvm/Support/raw_ostream.h b/llvm/include/llvm/Support/raw_ostream.h index a1df1e5aef4bf..f87344e860518 100644 --- a/llvm/include/llvm/Support/raw_ostream.h +++ b/llvm/include/llvm/Support/raw_ostream.h @@ -853,6 +853,4 @@ raw_ostream &operator<<(raw_ostream &OS, const std::optional &O) { } // end namespace llvm -bool write_console_impl(int FD, llvm::StringRef Data); - #endif // LLVM_SUPPORT_RAW_OSTREAM_H diff --git a/llvm/lib/Support/raw_ostream.cpp b/llvm/lib/Support/raw_ostream.cpp index 2d97ca07ad270..07b99896543bd 100644 --- a/llvm/lib/Support/raw_ostream.cpp +++ b/llvm/lib/Support/raw_ostream.cpp @@ -697,7 +697,7 @@ raw_fd_ostream::~raw_fd_ostream() { // the input is UTF-8 or transcode from the local codepage to UTF-8 before // quoting it. If they don't, this may mess up the encoding, but this is still // probably the best compromise we can make. -bool write_console_impl(int FD, StringRef Data) { +static bool write_console_impl(int FD, StringRef Data) { SmallVector WideText; // Fall back to ::write if it wasn't valid UTF-8. From 76a8a4f07ed966fd87ee08a5d83cc6ccf3aa5292 Mon Sep 17 00:00:00 2001 From: Charles Zablit Date: Mon, 4 Aug 2025 18:47:00 +0200 Subject: [PATCH 8/8] add comment --- lldb/source/Host/common/File.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lldb/source/Host/common/File.cpp b/lldb/source/Host/common/File.cpp index 9ea64fe0fd730..8cbbd7a5e14c7 100644 --- a/lldb/source/Host/common/File.cpp +++ b/lldb/source/Host/common/File.cpp @@ -253,6 +253,9 @@ NativeFile::NativeFile() = default; NativeFile::NativeFile(FILE *fh, bool transfer_ownership) : m_stream(fh), m_own_stream(transfer_ownership) { #ifdef _WIN32 + // In order to properly display non ASCII characters in Windows, we need to + // use Windows APIs to print to the console. This is only required if the + // stream outputs to a console. int fd = _fileno(fh); is_windows_console = ::GetFileType((HANDLE)::_get_osfhandle(fd)) == FILE_TYPE_CHAR; @@ -263,6 +266,9 @@ NativeFile::NativeFile(int fd, OpenOptions options, bool transfer_ownership) : m_descriptor(fd), m_own_descriptor(transfer_ownership), m_options(options) { #ifdef _WIN32 + // In order to properly display non ASCII characters in Windows, we need to + // use Windows APIs to print to the console. This is only required if the + // file outputs to a console. is_windows_console = ::GetFileType((HANDLE)::_get_osfhandle(fd)) == FILE_TYPE_CHAR; #endif