From 5757dcecd44fd1c123d5f1ed36fb8ce1bc9054ac Mon Sep 17 00:00:00 2001 From: Sriya Pratipati Date: Tue, 15 Jul 2025 22:08:14 +0000 Subject: [PATCH 01/14] started mbstowcs implementation --- libc/src/wchar/mbstowcs.cpp | 29 +++++++++++++++++++++++++++++ libc/src/wchar/mbstowcs.h | 22 ++++++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 libc/src/wchar/mbstowcs.cpp create mode 100644 libc/src/wchar/mbstowcs.h diff --git a/libc/src/wchar/mbstowcs.cpp b/libc/src/wchar/mbstowcs.cpp new file mode 100644 index 0000000000000..8b86cb03fe7a3 --- /dev/null +++ b/libc/src/wchar/mbstowcs.cpp @@ -0,0 +1,29 @@ +//===-- Implementation of mbstowcs ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/wchar/mbstowcs.h" + +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/common.h" +#include "src/__support/libc_errno.h" +#include "src/__support/macros/config.h" +#include "src/__support/wchar/mbstate.h" +#include "src/__support/wchar/string_converter.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(int, mbstowcs, + (wchar_t *__restrict pwcs, const char *__restrict s, + size_t n)) { + static internal::mbstate internal_mbstate; + internal::StringConverter str_conv( + reinterpret_cast(pwcs), &internal_mbstate, n); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/mbstowcs.h b/libc/src/wchar/mbstowcs.h new file mode 100644 index 0000000000000..20caaf9f7bcea --- /dev/null +++ b/libc/src/wchar/mbstowcs.h @@ -0,0 +1,22 @@ +//===-- Implementation header for mbstowcs --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_WCHAR_MBSTOWCS_H +#define LLVM_LIBC_SRC_WCHAR_MBSTOWCS_H + +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +int mbstowcs(wchar_t *__restrict pwcs, const char *__restrict s, size_t n); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_WCHAR_MBSTOWCS_H From c9d06fcf9b7eff66efc0bc8127a72717ee3eaf24 Mon Sep 17 00:00:00 2001 From: Sriya Pratipati Date: Tue, 15 Jul 2025 22:16:12 +0000 Subject: [PATCH 02/14] finished mbstowcs implementation --- libc/src/wchar/mbstowcs.cpp | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/libc/src/wchar/mbstowcs.cpp b/libc/src/wchar/mbstowcs.cpp index 8b86cb03fe7a3..e7cd3416099ae 100644 --- a/libc/src/wchar/mbstowcs.cpp +++ b/libc/src/wchar/mbstowcs.cpp @@ -23,7 +23,20 @@ LLVM_LIBC_FUNCTION(int, mbstowcs, size_t n)) { static internal::mbstate internal_mbstate; internal::StringConverter str_conv( - reinterpret_cast(pwcs), &internal_mbstate, n); + reinterpret_cast(s), &internal_mbstate, n); + int dst_idx = 0; + ErrorOr converted = str_conv.popUTF32(); + while (converted.has_value()) { + if (pwcs != nullptr) + pwcs[dst_idx] = converted.value(); + dst_idx++; + converted = str_conv.popUTF32(); + } + if (converted.error() == -1) // if we hit conversion limit + return dst_idx; + + libc_errno = converted.error(); + return -1; } } // namespace LIBC_NAMESPACE_DECL From 26a67410ff16da7d88d4d439741ecbab49cebe80 Mon Sep 17 00:00:00 2001 From: Sriya Pratipati Date: Wed, 16 Jul 2025 16:38:27 +0000 Subject: [PATCH 03/14] implemented mbsrtowcs --- libc/src/wchar/mbsrtowcs.cpp | 47 ++++++++++++++++++++++++++++++++++++ libc/src/wchar/mbsrtowcs.h | 24 ++++++++++++++++++ libc/src/wchar/mbstowcs.cpp | 2 +- libc/src/wchar/mbstowcs.h | 2 +- 4 files changed, 73 insertions(+), 2 deletions(-) create mode 100644 libc/src/wchar/mbsrtowcs.cpp create mode 100644 libc/src/wchar/mbsrtowcs.h diff --git a/libc/src/wchar/mbsrtowcs.cpp b/libc/src/wchar/mbsrtowcs.cpp new file mode 100644 index 0000000000000..4407daa293f34 --- /dev/null +++ b/libc/src/wchar/mbsrtowcs.cpp @@ -0,0 +1,47 @@ +//===-- Implementation of mbsrtowcs ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/wchar/mbsrtowcs.h" + +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/common.h" +#include "src/__support/libc_errno.h" +#include "src/__support/macros/config.h" +#include "src/__support/wchar/mbstate.h" +#include "src/__support/wchar/string_converter.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(size_t, mbsrtowcs, + (wchar_t *__restrict dst, const char **__restrict src, + size_t len, mbstate_t *__restrict ps)) { + static internal::mbstate internal_mbstate; + internal::StringConverter str_conv( + reinterpret_cast(src), + ps == nullptr ? &internal_mbstate + : reinterpret_cast(ps), + len); + + int dst_idx = 0; + ErrorOr converted = str_conv.popUTF32(); + while (converted.has_value()) { + dst[dst_idx] = converted.value(); + dst_idx++; + converted = str_conv.popUTF32(); + } + + src += str_conv.getSourceIndex(); + if (converted.error() == -1) // if we hit conversion limit + return dst_idx; + + libc_errno = converted.error(); + return -1; +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/mbsrtowcs.h b/libc/src/wchar/mbsrtowcs.h new file mode 100644 index 0000000000000..f8d4cc26e63ae --- /dev/null +++ b/libc/src/wchar/mbsrtowcs.h @@ -0,0 +1,24 @@ +//===-- Implementation header for mbsrtowcs -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_WCHAR_MBSRTOWCS_H +#define LLVM_LIBC_SRC_WCHAR_MBSRTOWCS_H + +#include "hdr/types/mbstate_t.h" +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +size_t mbsrtowcs(wchar_t *__restrict dst, const char **__restrict src, + size_t len, mbstate_t *__restrict ps); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_WCHAR_MBSRTOWCS_H diff --git a/libc/src/wchar/mbstowcs.cpp b/libc/src/wchar/mbstowcs.cpp index e7cd3416099ae..3b43bcf753793 100644 --- a/libc/src/wchar/mbstowcs.cpp +++ b/libc/src/wchar/mbstowcs.cpp @@ -18,7 +18,7 @@ namespace LIBC_NAMESPACE_DECL { -LLVM_LIBC_FUNCTION(int, mbstowcs, +LLVM_LIBC_FUNCTION(size_t, mbstowcs, (wchar_t *__restrict pwcs, const char *__restrict s, size_t n)) { static internal::mbstate internal_mbstate; diff --git a/libc/src/wchar/mbstowcs.h b/libc/src/wchar/mbstowcs.h index 20caaf9f7bcea..7d08a838b2324 100644 --- a/libc/src/wchar/mbstowcs.h +++ b/libc/src/wchar/mbstowcs.h @@ -15,7 +15,7 @@ namespace LIBC_NAMESPACE_DECL { -int mbstowcs(wchar_t *__restrict pwcs, const char *__restrict s, size_t n); +size_t mbstowcs(wchar_t *__restrict pwcs, const char *__restrict s, size_t n); } // namespace LIBC_NAMESPACE_DECL From 0694e910d92162fea1dc0b72bae82f066472d08f Mon Sep 17 00:00:00 2001 From: Sriya Pratipati Date: Thu, 17 Jul 2025 16:46:53 +0000 Subject: [PATCH 04/14] added tests for mbstowcs --- libc/config/linux/x86_64/entrypoints.txt | 2 + libc/include/wchar.yaml | 17 ++++ libc/src/wchar/CMakeLists.txt | 32 +++++++ libc/src/wchar/mbstowcs.cpp | 6 ++ libc/test/src/wchar/CMakeLists.txt | 13 +++ libc/test/src/wchar/mbstowcs_test.cpp | 117 +++++++++++++++++++++++ 6 files changed, 187 insertions(+) create mode 100644 libc/test/src/wchar/mbstowcs_test.cpp diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 9223911f04a93..e37370fdf47f6 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -1261,6 +1261,8 @@ if(LLVM_LIBC_FULL_BUILD) # wchar.h entrypoints libc.src.wchar.mbrtowc libc.src.wchar.mbtowc + libc.src.wchar.mbstowcs + libc.src.wchar.mbsrtowcs libc.src.wchar.wcrtomb libc.src.wchar.wctomb ) diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml index 123d3440aeec3..3d63d9476e002 100644 --- a/libc/include/wchar.yaml +++ b/libc/include/wchar.yaml @@ -53,6 +53,23 @@ functions: - type: wchar_t *__restrict - type: const char *__restrict - type: size_t + - name: mbstowcs + standards: + - stdc + return_type: size_t + arguments: + - type: wchar_t *__restrict + - type: const char *__restrict + - type: size_t + - name: mbsrtowcs + standards: + - stdc + return_type: size_t + arguments: + - type: wchar_t *__restrict + - type: const char **__restrict + - type: size_t + - type: mbstate_t *__restrict - name: wmemset standards: - stdc diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt index 7ace1a6ca66ba..a70486950d664 100644 --- a/libc/src/wchar/CMakeLists.txt +++ b/libc/src/wchar/CMakeLists.txt @@ -159,6 +159,38 @@ add_entrypoint_object( libc.src.__support.wchar.mbstate ) +add_entrypoint_object( + mbstowcs + SRCS + mbstowcs.cpp + HDRS + mbstowcs.h + DEPENDS + libc.hdr.types.size_t + libc.hdr.types.wchar_t + libc.src.__support.common + libc.src.__support.macros.config + libc.src.__support.libc_errno + libc.src.__support.wchar.string_converter + libc.src.__support.wchar.mbstate +) + +add_entrypoint_object( + mbsrtowcs + SRCS + mbsrtowcs.cpp + HDRS + mbsrtowcs.h + DEPENDS + libc.hdr.types.size_t + libc.hdr.types.wchar_t + libc.src.__support.common + libc.src.__support.macros.config + libc.src.__support.libc_errno + libc.src.__support.wchar.string_converter + libc.src.__support.wchar.mbstate +) + add_entrypoint_object( wmemset SRCS diff --git a/libc/src/wchar/mbstowcs.cpp b/libc/src/wchar/mbstowcs.cpp index 3b43bcf753793..1bba5cb4893e7 100644 --- a/libc/src/wchar/mbstowcs.cpp +++ b/libc/src/wchar/mbstowcs.cpp @@ -21,14 +21,20 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(size_t, mbstowcs, (wchar_t *__restrict pwcs, const char *__restrict s, size_t n)) { + n = pwcs == nullptr ? SIZE_MAX : n; static internal::mbstate internal_mbstate; internal::StringConverter str_conv( reinterpret_cast(s), &internal_mbstate, n); int dst_idx = 0; + ErrorOr converted = str_conv.popUTF32(); + while (converted.has_value()) { if (pwcs != nullptr) pwcs[dst_idx] = converted.value(); + // if it is null terminator, do not count in return value + if (converted.value() == L'\0') + return dst_idx; dst_idx++; converted = str_conv.popUTF32(); } diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt index 176cf7c3487cd..ba1a59211c778 100644 --- a/libc/test/src/wchar/CMakeLists.txt +++ b/libc/test/src/wchar/CMakeLists.txt @@ -64,6 +64,19 @@ add_libc_test( libc.test.UnitTest.ErrnoCheckingTest ) +add_libc_test( + mbstowcs_test + SUITE + libc_wchar_unittests + SRCS + mbstowcs_test.cpp + DEPENDS + libc.src.__support.libc_errno + libc.src.wchar.mbstowcs + libc.hdr.types.wchar_t + libc.test.UnitTest.ErrnoCheckingTest +) + add_libc_test( wctob_test SUITE diff --git a/libc/test/src/wchar/mbstowcs_test.cpp b/libc/test/src/wchar/mbstowcs_test.cpp new file mode 100644 index 0000000000000..6ae31678e45c8 --- /dev/null +++ b/libc/test/src/wchar/mbstowcs_test.cpp @@ -0,0 +1,117 @@ +//===-- Unittests for mbstowcs --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "hdr/types/wchar_t.h" +#include "src/__support/libc_errno.h" +#include "src/wchar/mbstowcs.h" +#include "test/UnitTest/ErrnoCheckingTest.h" +#include "test/UnitTest/Test.h" + +using LlvmLibcMBSToWCSTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest; + +TEST_F(LlvmLibcMBSToWCSTest, OneByteOneChar) { + const char *ch = "A"; + wchar_t dest[2]; + size_t n = LIBC_NAMESPACE::mbstowcs(dest, ch, 1); + ASSERT_EQ(static_cast(*dest), 'A'); + ASSERT_EQ(static_cast(n), 1); + ASSERT_ERRNO_SUCCESS(); + + n = LIBC_NAMESPACE::mbstowcs(dest + 1, ch + 1, 1); + ASSERT_EQ(static_cast(dest[1]), '\0'); + // Should not include null terminator + ASSERT_EQ(static_cast(n), 0); + ASSERT_ERRNO_SUCCESS(); +} + +TEST_F(LlvmLibcMBSToWCSTest, FourByteOneChar) { + const char *src = "\xf0\x9f\x98\xb9"; // laughing cat emoji 😹 + wchar_t dest[2]; + size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 2); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(static_cast(dest[0]), 128569); + ASSERT_TRUE(dest[1] == L'\0'); + // Should not count null terminator in number + ASSERT_EQ(static_cast(n), 1); +} + +TEST_F(LlvmLibcMBSToWCSTest, MultiByteTwoCharacters) { + // Two laughing cat emojis "😹😹" + const char *src = "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; + wchar_t dest[3]; + size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 3); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(static_cast(dest[0]), 128569); + ASSERT_EQ(static_cast(dest[1]), 128569); + ASSERT_TRUE(dest[2] == L'\0'); + // Should not count null terminator in number + ASSERT_EQ(static_cast(n), 2); +} + +TEST_F(LlvmLibcMBSToWCSTest, MixedNumberOfBytes) { + // 'A', sigma symbol 'Σ', recycling symbol '♻', laughing cat emoji '😹' + const char *src = "A\xce\xa3\xe2\x99\xbb\xf0\x9f\x98\xb9"; + wchar_t dest[5]; + size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 5); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(static_cast(dest[0]), 'A'); + ASSERT_EQ(static_cast(dest[1]), 931); + ASSERT_EQ(static_cast(dest[2]), 9851); + ASSERT_EQ(static_cast(dest[3]), 128569); + ASSERT_TRUE(dest[4] == L'\0'); + // Should not count null terminator in number + ASSERT_EQ(static_cast(n), 4); +} + +TEST_F(LlvmLibcMBSToWCSTest, ReadLessThanStringLength) { + // Four laughing cat emojis "😹😹😹😹" + const char *src = + "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; + wchar_t dest[5] = {L'a', L'b', L'c', L'd', L'e'}; + size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 3); + ASSERT_ERRNO_SUCCESS(); + // Should have read 3 emojis + ASSERT_EQ(static_cast(n), 3); + ASSERT_EQ(static_cast(dest[0]), 128569); + ASSERT_EQ(static_cast(dest[1]), 128569); + ASSERT_EQ(static_cast(dest[2]), 128569); + ASSERT_TRUE(dest[3] == L'd'); + ASSERT_TRUE(dest[4] == L'e'); +} + +TEST_F(LlvmLibcMBSToWCSTest, InvalidFirstByte) { + // 0x80 is invalid first byte of mb character + const char *src = + "\x80\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; + wchar_t dest[3]; + size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 3); + // Should return error and set errno + ASSERT_EQ(static_cast(n), -1); + ASSERT_ERRNO_EQ(EILSEQ); +} + +TEST_F(LlvmLibcMBSToWCSTest, InvalidMiddleByte) { + // The 7th byte is invalid for a 4 byte character + const char *src = + "\xf0\x9f\x98\xb9\xf0\x9f\xf0\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; + wchar_t dest[3]; + size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 5); + // Should return error and set errno + ASSERT_EQ(static_cast(n), -1); + ASSERT_ERRNO_EQ(EILSEQ); +} + +TEST_F(LlvmLibcMBSToWCSTest, NullDestination) { + // Four laughing cat emojis "😹😹😹😹" + const char *src = + "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; + size_t n = LIBC_NAMESPACE::mbstowcs(nullptr, src, 2); + ASSERT_ERRNO_SUCCESS(); + // Null destination should ignore len and read till end of string + ASSERT_EQ(static_cast(n), 4); +} From 192135610909f9b7d0d88cbfdee0e61e3368bbc9 Mon Sep 17 00:00:00 2001 From: Sriya Pratipati Date: Thu, 17 Jul 2025 17:11:51 +0000 Subject: [PATCH 05/14] added final test --- libc/test/src/wchar/mbstowcs_test.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/libc/test/src/wchar/mbstowcs_test.cpp b/libc/test/src/wchar/mbstowcs_test.cpp index 6ae31678e45c8..1f6633bdd2766 100644 --- a/libc/test/src/wchar/mbstowcs_test.cpp +++ b/libc/test/src/wchar/mbstowcs_test.cpp @@ -115,3 +115,20 @@ TEST_F(LlvmLibcMBSToWCSTest, NullDestination) { // Null destination should ignore len and read till end of string ASSERT_EQ(static_cast(n), 4); } + +TEST_F(LlvmLibcMBSToWCSTest, ErrnoChecks) { + // Two laughing cat emojis and invalid 3rd mb char (3rd byte of it) + const char *src = + "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\xf0\xb9\xf0\x9f\x98\xb9"; + wchar_t dest[5]; + // First two bytes are valid --> should not set errno + size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 2); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(static_cast(n), 2); + ASSERT_EQ(static_cast(dest[0]), 128569); + ASSERT_EQ(static_cast(dest[1]), 128569); + // Trying to read the 3rd byte should set errno + n = LIBC_NAMESPACE::mbstowcs(dest, src, 2); + ASSERT_ERRNO_EQ(EILSEQ); + ASSERT_EQ(static_cast(n), -1); +} From 6bc7497d1aef5dc0a6fc96dcfc600db4ae90b8c9 Mon Sep 17 00:00:00 2001 From: Sriya Pratipati Date: Thu, 17 Jul 2025 18:21:11 +0000 Subject: [PATCH 06/14] added internal mbsnrtowcs --- libc/src/__support/wchar/CMakeLists.txt | 18 +++ libc/src/__support/wchar/mbsnrtowcs.cpp | 57 ++++++++++ libc/src/__support/wchar/mbsnrtowcs.h | 29 +++++ libc/src/wchar/CMakeLists.txt | 2 +- libc/src/wchar/mbsrtowcs.cpp | 9 +- libc/src/wchar/mbstowcs.cpp | 27 ++--- libc/test/src/wchar/CMakeLists.txt | 16 +++ libc/test/src/wchar/mbsrtowcs_test.cpp | 139 ++++++++++++++++++++++++ 8 files changed, 275 insertions(+), 22 deletions(-) create mode 100644 libc/src/__support/wchar/mbsnrtowcs.cpp create mode 100644 libc/src/__support/wchar/mbsnrtowcs.h create mode 100644 libc/test/src/wchar/mbsrtowcs_test.cpp diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt index 802441d37fe92..fa5139a8b68b4 100644 --- a/libc/src/__support/wchar/CMakeLists.txt +++ b/libc/src/__support/wchar/CMakeLists.txt @@ -68,3 +68,21 @@ add_object_library( .character_converter .mbstate ) + +add_object_library( + mbsnrtowcs + HDRS + mbsnrtowcs.h + SRCS + mbsnrtowcs.cpp + DEPENDS + libc.hdr.errno_macros + libc.hdr.types.wchar_t + libc.hdr.types.size_t + libc.src.__support.common + libc.src.__support.error_or + libc.src.__support.macros.config + .character_converter + .mbstate + .string_converter +) diff --git a/libc/src/__support/wchar/mbsnrtowcs.cpp b/libc/src/__support/wchar/mbsnrtowcs.cpp new file mode 100644 index 0000000000000..106a596e59303 --- /dev/null +++ b/libc/src/__support/wchar/mbsnrtowcs.cpp @@ -0,0 +1,57 @@ +//===-- Implementation for mbsnrtowcs function ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/wchar/mbsnrtowcs.h" +#include "hdr/errno_macros.h" +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/common.h" +#include "src/__support/error_or.h" +#include "src/__support/macros/config.h" +#include "src/__support/wchar/character_converter.h" +#include "src/__support/wchar/mbstate.h" +#include "src/__support/wchar/string_converter.h" + +namespace LIBC_NAMESPACE_DECL { +namespace internal { + +ErrorOr mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src, + size_t nmc, size_t len, mbstate *__restrict ps) { + if (*src == nullptr) + return 0; + // Checking if mbstate is valid + CharacterConverter char_conv(ps); + if (!char_conv.isValidState()) + return Error(EINVAL); + + StringConverter str_conv(reinterpret_cast(*src), ps, + len, nmc); + size_t dst_idx = 0; + ErrorOr converted = str_conv.popUTF32(); + while (converted.has_value()) { + if (dst != nullptr) + dst[dst_idx] = converted.value(); + // null terminator should not be counted in return value + if (converted.value() == L'\0') { + src = nullptr; + return dst_idx; + } + dst_idx++; + converted = str_conv.popUTF32(); + } + + *src += str_conv.getSourceIndex(); + if (converted.error() == -1) // if we hit conversion limit + return dst_idx; + + return Error(converted.error()); +} + +} // namespace internal + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/__support/wchar/mbsnrtowcs.h b/libc/src/__support/wchar/mbsnrtowcs.h new file mode 100644 index 0000000000000..8ce497325fdad --- /dev/null +++ b/libc/src/__support/wchar/mbsnrtowcs.h @@ -0,0 +1,29 @@ +//===-- Implementation header for mbsnrtowcs function -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSNRTOWCS +#define LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSNRTOWCS + +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/common.h" +#include "src/__support/error_or.h" +#include "src/__support/macros/config.h" +#include "src/__support/wchar/mbstate.h" + +namespace LIBC_NAMESPACE_DECL { +namespace internal { + +ErrorOr mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src, + size_t nmc, size_t len, mbstate *__restrict ps); + +} // namespace internal + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSNRTOWCS diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt index a70486950d664..2613f6daf111d 100644 --- a/libc/src/wchar/CMakeLists.txt +++ b/libc/src/wchar/CMakeLists.txt @@ -171,8 +171,8 @@ add_entrypoint_object( libc.src.__support.common libc.src.__support.macros.config libc.src.__support.libc_errno - libc.src.__support.wchar.string_converter libc.src.__support.wchar.mbstate + libc.src.__support.wchar.mbsnrtowcs ) add_entrypoint_object( diff --git a/libc/src/wchar/mbsrtowcs.cpp b/libc/src/wchar/mbsrtowcs.cpp index 4407daa293f34..fa00a26692d7c 100644 --- a/libc/src/wchar/mbsrtowcs.cpp +++ b/libc/src/wchar/mbsrtowcs.cpp @@ -23,7 +23,7 @@ LLVM_LIBC_FUNCTION(size_t, mbsrtowcs, size_t len, mbstate_t *__restrict ps)) { static internal::mbstate internal_mbstate; internal::StringConverter str_conv( - reinterpret_cast(src), + reinterpret_cast(*src), ps == nullptr ? &internal_mbstate : reinterpret_cast(ps), len); @@ -32,11 +32,16 @@ LLVM_LIBC_FUNCTION(size_t, mbsrtowcs, ErrorOr converted = str_conv.popUTF32(); while (converted.has_value()) { dst[dst_idx] = converted.value(); + // null terminator should not be counted in return value + if (converted.value() == L'\0') { + src = nullptr; + return dst_idx; + } dst_idx++; converted = str_conv.popUTF32(); } - src += str_conv.getSourceIndex(); + *src += str_conv.getSourceIndex(); if (converted.error() == -1) // if we hit conversion limit return dst_idx; diff --git a/libc/src/wchar/mbstowcs.cpp b/libc/src/wchar/mbstowcs.cpp index 1bba5cb4893e7..66046903a8c6f 100644 --- a/libc/src/wchar/mbstowcs.cpp +++ b/libc/src/wchar/mbstowcs.cpp @@ -13,8 +13,8 @@ #include "src/__support/common.h" #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" +#include "src/__support/wchar/mbsnrtowcs.h" #include "src/__support/wchar/mbstate.h" -#include "src/__support/wchar/string_converter.h" namespace LIBC_NAMESPACE_DECL { @@ -23,26 +23,15 @@ LLVM_LIBC_FUNCTION(size_t, mbstowcs, size_t n)) { n = pwcs == nullptr ? SIZE_MAX : n; static internal::mbstate internal_mbstate; - internal::StringConverter str_conv( - reinterpret_cast(s), &internal_mbstate, n); - int dst_idx = 0; + const char *temp = s; + auto ret = internal::mbsnrtowcs(pwcs, &temp, SIZE_MAX, n, &internal_mbstate); - ErrorOr converted = str_conv.popUTF32(); - - while (converted.has_value()) { - if (pwcs != nullptr) - pwcs[dst_idx] = converted.value(); - // if it is null terminator, do not count in return value - if (converted.value() == L'\0') - return dst_idx; - dst_idx++; - converted = str_conv.popUTF32(); + if (!ret.has_value()) { + // Encoding failure + libc_errno = ret.error(); + return -1; } - if (converted.error() == -1) // if we hit conversion limit - return dst_idx; - - libc_errno = converted.error(); - return -1; + return ret.value(); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt index ba1a59211c778..9ed924eae34c4 100644 --- a/libc/test/src/wchar/CMakeLists.txt +++ b/libc/test/src/wchar/CMakeLists.txt @@ -77,6 +77,22 @@ add_libc_test( libc.test.UnitTest.ErrnoCheckingTest ) +add_libc_test( + mbsrtowcs_test + SUITE + libc_wchar_unittests + SRCS + mbsrtowcs_test.cpp + DEPENDS + libc.src.__support.libc_errno + libc.src.__support.wchar.mbstate + libc.src.string.memset + libc.src.wchar.mbsrtowcs + libc.hdr.types.mbstate_t + libc.hdr.types.wchar_t + libc.test.UnitTest.ErrnoCheckingTest +) + add_libc_test( wctob_test SUITE diff --git a/libc/test/src/wchar/mbsrtowcs_test.cpp b/libc/test/src/wchar/mbsrtowcs_test.cpp new file mode 100644 index 0000000000000..2f6e6e6a71b31 --- /dev/null +++ b/libc/test/src/wchar/mbsrtowcs_test.cpp @@ -0,0 +1,139 @@ +//===-- Unittests for mbsetowcs -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "hdr/types/mbstate_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/libc_errno.h" +#include "src/__support/wchar/mbstate.h" +#include "src/string/memset.h" +#include "src/wchar/mbsrtowcs.h" +#include "test/UnitTest/ErrnoCheckingTest.h" +#include "test/UnitTest/Test.h" + +using LlvmLibcMBSRToWCSTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest; + +TEST_F(LlvmLibcMBSRToWCSTest, OneByteOneChar) { + const char *ch = "A"; + wchar_t dest[2]; + mbstate_t *mb; + LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t)); + size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &ch, 1, mb); + ASSERT_EQ(static_cast(*dest), 'A'); + ASSERT_EQ(static_cast(n), 1); + ASSERT_ERRNO_SUCCESS(); + + n = LIBC_NAMESPACE::mbsrtowcs(dest + 1, &ch, 1, mb); + ASSERT_EQ(static_cast(dest[1]), '\0'); + // Should not include null terminator + ASSERT_EQ(static_cast(n), 0); + ASSERT_ERRNO_SUCCESS(); +} + +// TEST_F(LlvmLibcMBSRToWCSTest, FourByteOneChar) { +// const char *src = "\xf0\x9f\x98\xb9"; // laughing cat emoji 😹 +// wchar_t dest[2]; +// size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 2); +// ASSERT_ERRNO_SUCCESS(); +// ASSERT_EQ(static_cast(dest[0]), 128569); +// ASSERT_TRUE(dest[1] == L'\0'); +// // Should not count null terminator in number +// ASSERT_EQ(static_cast(n), 1); +// } + +// TEST_F(LlvmLibcMBSRToWCSTest, MultiByteTwoCharacters) { +// // Two laughing cat emojis "😹😹" +// const char *src = "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; +// wchar_t dest[3]; +// size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 3); +// ASSERT_ERRNO_SUCCESS(); +// ASSERT_EQ(static_cast(dest[0]), 128569); +// ASSERT_EQ(static_cast(dest[1]), 128569); +// ASSERT_TRUE(dest[2] == L'\0'); +// // Should not count null terminator in number +// ASSERT_EQ(static_cast(n), 2); +// } + +// TEST_F(LlvmLibcMBSRToWCSTest, MixedNumberOfBytes) { +// // 'A', sigma symbol 'Σ', recycling symbol '♻', laughing cat emoji '😹' +// const char *src = "A\xce\xa3\xe2\x99\xbb\xf0\x9f\x98\xb9"; +// wchar_t dest[5]; +// size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 5); +// ASSERT_ERRNO_SUCCESS(); +// ASSERT_EQ(static_cast(dest[0]), 'A'); +// ASSERT_EQ(static_cast(dest[1]), 931); +// ASSERT_EQ(static_cast(dest[2]), 9851); +// ASSERT_EQ(static_cast(dest[3]), 128569); +// ASSERT_TRUE(dest[4] == L'\0'); +// // Should not count null terminator in number +// ASSERT_EQ(static_cast(n), 4); +// } + +// TEST_F(LlvmLibcMBSRToWCSTest, ReadLessThanStringLength) { +// // Four laughing cat emojis "😹😹😹😹" +// const char *src = +// "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; +// wchar_t dest[5] = {L'a', L'b', L'c', L'd', L'e'}; +// size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 3); +// ASSERT_ERRNO_SUCCESS(); +// // Should have read 3 emojis +// ASSERT_EQ(static_cast(n), 3); +// ASSERT_EQ(static_cast(dest[0]), 128569); +// ASSERT_EQ(static_cast(dest[1]), 128569); +// ASSERT_EQ(static_cast(dest[2]), 128569); +// ASSERT_TRUE(dest[3] == L'd'); +// ASSERT_TRUE(dest[4] == L'e'); +// } + +// TEST_F(LlvmLibcMBSRToWCSTest, InvalidFirstByte) { +// // 0x80 is invalid first byte of mb character +// const char *src = +// "\x80\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; +// wchar_t dest[3]; +// size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 3); +// // Should return error and set errno +// ASSERT_EQ(static_cast(n), -1); +// ASSERT_ERRNO_EQ(EILSEQ); +// } + +// TEST_F(LlvmLibcMBSRToWCSTest, InvalidMiddleByte) { +// // The 7th byte is invalid for a 4 byte character +// const char *src = +// "\xf0\x9f\x98\xb9\xf0\x9f\xf0\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; +// wchar_t dest[3]; +// size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 5); +// // Should return error and set errno +// ASSERT_EQ(static_cast(n), -1); +// ASSERT_ERRNO_EQ(EILSEQ); +// } + +// TEST_F(LlvmLibcMBSRToWCSTest, NullDestination) { +// // Four laughing cat emojis "😹😹😹😹" +// const char *src = +// "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; +// size_t n = LIBC_NAMESPACE::mbstowcs(nullptr, src, 2); +// ASSERT_ERRNO_SUCCESS(); +// // Null destination should ignore len and read till end of string +// ASSERT_EQ(static_cast(n), 4); +// } + +// TEST_F(LlvmLibcMBSRToWCSTest, ErrnoChecks) { +// // Two laughing cat emojis and invalid 3rd mb char (3rd byte of it) +// const char *src = +// "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\xf0\xb9\xf0\x9f\x98\xb9"; +// wchar_t dest[5]; +// // First two bytes are valid --> should not set errno +// size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 2); +// ASSERT_ERRNO_SUCCESS(); +// ASSERT_EQ(static_cast(n), 2); +// ASSERT_EQ(static_cast(dest[0]), 128569); +// ASSERT_EQ(static_cast(dest[1]), 128569); +// // Trying to read the 3rd byte should set errno +// n = LIBC_NAMESPACE::mbstowcs(dest, src, 2); +// ASSERT_ERRNO_EQ(EILSEQ); +// ASSERT_EQ(static_cast(n), -1); +// } From 40aa6eeee39d8e89118d98a0587c3e10e9f8a575 Mon Sep 17 00:00:00 2001 From: Sriya Pratipati Date: Thu, 17 Jul 2025 23:24:11 +0000 Subject: [PATCH 07/14] implemented mbsnrtowcs and tests --- libc/config/linux/x86_64/entrypoints.txt | 1 + libc/include/wchar.yaml | 10 + libc/src/__support/wchar/mbsnrtowcs.cpp | 8 +- libc/src/wchar/CMakeLists.txt | 20 +- libc/src/wchar/mbsnrtowcs.cpp | 39 ++++ libc/src/wchar/mbsnrtowcs.h | 24 +++ libc/src/wchar/mbsrtowcs.cpp | 35 ++-- libc/src/wchar/mbstowcs.cpp | 1 + libc/test/src/wchar/CMakeLists.txt | 16 ++ libc/test/src/wchar/mbsnrtowcs_test.cpp | 203 ++++++++++++++++++++ libc/test/src/wchar/mbsrtowcs_test.cpp | 229 +++++++++++++---------- libc/test/src/wchar/mbstowcs_test.cpp | 30 ++- 12 files changed, 491 insertions(+), 125 deletions(-) create mode 100644 libc/src/wchar/mbsnrtowcs.cpp create mode 100644 libc/src/wchar/mbsnrtowcs.h create mode 100644 libc/test/src/wchar/mbsnrtowcs_test.cpp diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index e37370fdf47f6..e0a02abef001c 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -1263,6 +1263,7 @@ if(LLVM_LIBC_FULL_BUILD) libc.src.wchar.mbtowc libc.src.wchar.mbstowcs libc.src.wchar.mbsrtowcs + libc.src.wchar.mbsnrtowcs libc.src.wchar.wcrtomb libc.src.wchar.wctomb ) diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml index 3d63d9476e002..d3166d60c19de 100644 --- a/libc/include/wchar.yaml +++ b/libc/include/wchar.yaml @@ -70,6 +70,16 @@ functions: - type: const char **__restrict - type: size_t - type: mbstate_t *__restrict + - name: mbsnrtowcs + standards: + - stdc + return_type: size_t + arguments: + - type: wchar_t *__restrict + - type: const char **__restrict + - type: size_t + - type: size_t + - type: mbstate_t *__restrict - name: wmemset standards: - stdc diff --git a/libc/src/__support/wchar/mbsnrtowcs.cpp b/libc/src/__support/wchar/mbsnrtowcs.cpp index 106a596e59303..9eb2b5423af87 100644 --- a/libc/src/__support/wchar/mbsnrtowcs.cpp +++ b/libc/src/__support/wchar/mbsnrtowcs.cpp @@ -38,16 +38,18 @@ ErrorOr mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src, dst[dst_idx] = converted.value(); // null terminator should not be counted in return value if (converted.value() == L'\0') { - src = nullptr; + if (dst != nullptr) + *src = nullptr; return dst_idx; } dst_idx++; converted = str_conv.popUTF32(); } - *src += str_conv.getSourceIndex(); - if (converted.error() == -1) // if we hit conversion limit + if (converted.error() == -1) { // if we hit conversion limit + *src += str_conv.getSourceIndex(); return dst_idx; + } return Error(converted.error()); } diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt index 2613f6daf111d..ac359064ec10c 100644 --- a/libc/src/wchar/CMakeLists.txt +++ b/libc/src/wchar/CMakeLists.txt @@ -187,7 +187,25 @@ add_entrypoint_object( libc.src.__support.common libc.src.__support.macros.config libc.src.__support.libc_errno - libc.src.__support.wchar.string_converter + libc.src.__support.wchar.mbstate + libc.src.__support.wchar.mbsnrtowcs + libc.src.__support.wchar.mbstate +) + +add_entrypoint_object( + mbsnrtowcs + SRCS + mbsnrtowcs.cpp + HDRS + mbsnrtowcs.h + DEPENDS + libc.hdr.types.size_t + libc.hdr.types.wchar_t + libc.src.__support.common + libc.src.__support.macros.config + libc.src.__support.libc_errno + libc.src.__support.wchar.mbstate + libc.src.__support.wchar.mbsnrtowcs libc.src.__support.wchar.mbstate ) diff --git a/libc/src/wchar/mbsnrtowcs.cpp b/libc/src/wchar/mbsnrtowcs.cpp new file mode 100644 index 0000000000000..28e0ff31b26c4 --- /dev/null +++ b/libc/src/wchar/mbsnrtowcs.cpp @@ -0,0 +1,39 @@ +//===-- Implementation of mbsnrtowcs --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/wchar/mbsnrtowcs.h" + +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/common.h" +#include "src/__support/libc_errno.h" +#include "src/__support/macros/config.h" +#include "src/__support/wchar/mbsnrtowcs.h" +#include "src/__support/wchar/mbstate.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(size_t, mbsnrtowcs, + (wchar_t *__restrict dst, const char **__restrict src, + size_t nmc, size_t len, mbstate_t *__restrict ps)) { + static internal::mbstate internal_mbstate; + // If destination is null, ignore len + len = dst == nullptr ? SIZE_MAX : len; + auto ret = internal::mbsnrtowcs( + dst, src, nmc, len, + ps == nullptr ? &internal_mbstate + : reinterpret_cast(ps)); + if (!ret.has_value()) { + // Encoding failure + libc_errno = ret.error(); + return -1; + } + return ret.value(); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/mbsnrtowcs.h b/libc/src/wchar/mbsnrtowcs.h new file mode 100644 index 0000000000000..5c9687b20b3fa --- /dev/null +++ b/libc/src/wchar/mbsnrtowcs.h @@ -0,0 +1,24 @@ +//===-- Implementation header for mbsnrtowcs ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_WCHAR_MBSNRTOWCS_H +#define LLVM_LIBC_SRC_WCHAR_MBSNRTOWCS_H + +#include "hdr/types/mbstate_t.h" +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +size_t mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src, + size_t nmc, size_t len, mbstate_t *__restrict ps); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_WCHAR_MBSNRTOWCS_H diff --git a/libc/src/wchar/mbsrtowcs.cpp b/libc/src/wchar/mbsrtowcs.cpp index fa00a26692d7c..82ca25a3d863d 100644 --- a/libc/src/wchar/mbsrtowcs.cpp +++ b/libc/src/wchar/mbsrtowcs.cpp @@ -13,8 +13,8 @@ #include "src/__support/common.h" #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" +#include "src/__support/wchar/mbsnrtowcs.h" #include "src/__support/wchar/mbstate.h" -#include "src/__support/wchar/string_converter.h" namespace LIBC_NAMESPACE_DECL { @@ -22,31 +22,18 @@ LLVM_LIBC_FUNCTION(size_t, mbsrtowcs, (wchar_t *__restrict dst, const char **__restrict src, size_t len, mbstate_t *__restrict ps)) { static internal::mbstate internal_mbstate; - internal::StringConverter str_conv( - reinterpret_cast(*src), + // If destination is null, ignore len + len = dst == nullptr ? SIZE_MAX : len; + auto ret = internal::mbsnrtowcs( + dst, src, SIZE_MAX, len, ps == nullptr ? &internal_mbstate - : reinterpret_cast(ps), - len); - - int dst_idx = 0; - ErrorOr converted = str_conv.popUTF32(); - while (converted.has_value()) { - dst[dst_idx] = converted.value(); - // null terminator should not be counted in return value - if (converted.value() == L'\0') { - src = nullptr; - return dst_idx; - } - dst_idx++; - converted = str_conv.popUTF32(); + : reinterpret_cast(ps)); + if (!ret.has_value()) { + // Encoding failure + libc_errno = ret.error(); + return -1; } - - *src += str_conv.getSourceIndex(); - if (converted.error() == -1) // if we hit conversion limit - return dst_idx; - - libc_errno = converted.error(); - return -1; + return ret.value(); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/mbstowcs.cpp b/libc/src/wchar/mbstowcs.cpp index 66046903a8c6f..1a3232847fa71 100644 --- a/libc/src/wchar/mbstowcs.cpp +++ b/libc/src/wchar/mbstowcs.cpp @@ -21,6 +21,7 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(size_t, mbstowcs, (wchar_t *__restrict pwcs, const char *__restrict s, size_t n)) { + // If destination is null, ignore n n = pwcs == nullptr ? SIZE_MAX : n; static internal::mbstate internal_mbstate; const char *temp = s; diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt index 9ed924eae34c4..1a8f9981fc5bb 100644 --- a/libc/test/src/wchar/CMakeLists.txt +++ b/libc/test/src/wchar/CMakeLists.txt @@ -93,6 +93,22 @@ add_libc_test( libc.test.UnitTest.ErrnoCheckingTest ) +add_libc_test( + mbsnrtowcs_test + SUITE + libc_wchar_unittests + SRCS + mbsnrtowcs_test.cpp + DEPENDS + libc.src.__support.libc_errno + libc.src.__support.wchar.mbstate + libc.src.string.memset + libc.src.wchar.mbsnrtowcs + libc.hdr.types.mbstate_t + libc.hdr.types.wchar_t + libc.test.UnitTest.ErrnoCheckingTest +) + add_libc_test( wctob_test SUITE diff --git a/libc/test/src/wchar/mbsnrtowcs_test.cpp b/libc/test/src/wchar/mbsnrtowcs_test.cpp new file mode 100644 index 0000000000000..a042794997b9f --- /dev/null +++ b/libc/test/src/wchar/mbsnrtowcs_test.cpp @@ -0,0 +1,203 @@ +//===-- Unittests for mbsetowcs -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "hdr/types/mbstate_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/libc_errno.h" +#include "src/__support/wchar/mbstate.h" +#include "src/string/memset.h" +#include "src/wchar/mbsnrtowcs.h" +#include "test/UnitTest/ErrnoCheckingTest.h" +#include "test/UnitTest/Test.h" + +using LlvmLibcMBSNRToWCSTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest; + +TEST_F(LlvmLibcMBSNRToWCSTest, OneByteOneChar) { + const char *ch = "A"; + const char *original = ch; + wchar_t dest[2]; + mbstate_t *mb; + LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t)); + size_t n = LIBC_NAMESPACE::mbsnrtowcs(dest, &ch, 1, 1, mb); + ASSERT_EQ(static_cast(*dest), 'A'); + ASSERT_EQ(static_cast(n), 1); + // Should point to null terminator now + ASSERT_EQ(ch, original + 1); + ASSERT_ERRNO_SUCCESS(); + + n = LIBC_NAMESPACE::mbsnrtowcs(dest + 1, &ch, 1, 1, mb); + ASSERT_EQ(static_cast(dest[1]), '\0'); + // Should not include null terminator + ASSERT_EQ(static_cast(n), 0); + // Should now be a nullptr + ASSERT_EQ(ch, nullptr); + ASSERT_ERRNO_SUCCESS(); +} + +TEST_F(LlvmLibcMBSNRToWCSTest, FourByteOneChar) { + const char *src = "\xf0\x9f\x98\xb9"; // laughing cat emoji 😹 + const char *original = src; + wchar_t dest[2]; + mbstate_t *mb; + LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t)); + // Not enough bytes for the full character + size_t n = LIBC_NAMESPACE::mbsnrtowcs(dest, &src, 3, 2, mb); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(static_cast(n), 0); + ASSERT_EQ(src, original + 3); + // Needs 2 more bytes (last byte of cat + null terminator) + n = LIBC_NAMESPACE::mbsnrtowcs(dest, &src, 2, 2, mb); + ASSERT_ERRNO_SUCCESS(); + // Does not include null terminator + ASSERT_EQ(static_cast(n), 1); + ASSERT_EQ(src, nullptr); + ASSERT_EQ(static_cast(dest[0]), 128569); + ASSERT_TRUE(dest[1] == L'\0'); +} + +TEST_F(LlvmLibcMBSNRToWCSTest, MixedNumberOfBytes) { + // 'A', sigma symbol 'Σ', recycling symbol '♻', laughing cat emoji '😹' + const char *src = "A\xce\xa3\xe2\x99\xbb\xf0\x9f\x98\xb9"; + const char *original = src; + wchar_t dest[5]; + mbstate_t *mb; + LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t)); + + // Read 'A' + size_t n = LIBC_NAMESPACE::mbsnrtowcs(dest, &src, 1, 1, mb); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(static_cast(dest[0]), 'A'); + ASSERT_EQ(static_cast(n), 1); + ASSERT_EQ(src, original + 1); + + // Read sigma 'Σ' + n = LIBC_NAMESPACE::mbsnrtowcs(dest + 1, &src, 2, 1, mb); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(static_cast(dest[1]), 931); + ASSERT_EQ(static_cast(n), 1); + ASSERT_EQ(src, original + 3); + + // Read recycling '♻' + n = LIBC_NAMESPACE::mbsnrtowcs(dest + 2, &src, 2, 5, mb); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(static_cast(n), 0); + ASSERT_EQ(src, original + 5); + n = LIBC_NAMESPACE::mbsnrtowcs(dest + 2, &src, 1, 1, mb); +ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(static_cast(n), 1); + ASSERT_EQ(src, original + 6); + ASSERT_EQ(static_cast(dest[2]), 9851); + + // Read laughing cat emoji '😹' + n = LIBC_NAMESPACE::mbsnrtowcs(dest + 3, &src, 4, 5, mb); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(static_cast(n), 1); + ASSERT_EQ(src, original + 10); + ASSERT_EQ(static_cast(dest[3]), 128569); + + + n = LIBC_NAMESPACE::mbsnrtowcs(dest + 4, &src, 4, 4, nullptr); + ASSERT_TRUE(dest[4] == L'\0'); + ASSERT_ERRNO_SUCCESS(); + // Should not count null terminator in number + ASSERT_EQ(static_cast(n), 0); + // Should now be a nullptr + ASSERT_EQ(src, nullptr); +} + +TEST_F(LlvmLibcMBSNRToWCSTest, ReadLessThanStringLength) { + // Four laughing cat emojis "😹😹😹😹" + const char *src = + "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; + const char *original = src; + wchar_t dest[5] = {L'a', L'b', L'c', L'd', L'e'}; + size_t n = LIBC_NAMESPACE::mbsnrtowcs(dest, &src, 100, 3, nullptr); + ASSERT_ERRNO_SUCCESS(); + // Should have read 3 emojis + ASSERT_EQ(static_cast(n), 3); + ASSERT_EQ(static_cast(dest[0]), 128569); + ASSERT_EQ(static_cast(dest[1]), 128569); + ASSERT_EQ(static_cast(dest[2]), 128569); + ASSERT_TRUE(dest[3] == L'd'); + ASSERT_TRUE(dest[4] == L'e'); + // Read three laughing cat emojis, 12 bytes + ASSERT_EQ(src, original + 12); +} + +TEST_F(LlvmLibcMBSNRToWCSTest, InvalidFirstByte) { + // 0x80 is invalid first byte of mb character + const char *src = + "\x80\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; + const char *original = src; + wchar_t dest[3]; + size_t n = LIBC_NAMESPACE::mbsnrtowcs(dest, &src, 88, 88, nullptr); + // Should return error and set errno + ASSERT_EQ(static_cast(n), -1); + ASSERT_ERRNO_EQ(EILSEQ); + // Should not update pointer + ASSERT_EQ(src, original); +} + +TEST_F(LlvmLibcMBSNRToWCSTest, InvalidMiddleByte) { + // The 7th byte is invalid for a 4 byte character + const char *src = + "\xf0\x9f\x98\xb9\xf0\x9f\xf0\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; + const char *original = src; + wchar_t dest[3]; + mbstate_t *mb; + LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t)); + // Successfully read one character and first byte of the second character + size_t n = LIBC_NAMESPACE::mbsnrtowcs(dest, &src, 5, 88, mb); + ASSERT_EQ(static_cast(n), 1); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(src, original + 5); + ASSERT_EQ(static_cast(dest[0]), 128569); + + n = LIBC_NAMESPACE::mbsnrtowcs(dest + 1, &src, 5, 88, mb); + // Should return error, set errno, and not update the pointer + ASSERT_EQ(static_cast(n), -1); + ASSERT_ERRNO_EQ(EILSEQ); + ASSERT_EQ(src, original + 5); +} + +TEST_F(LlvmLibcMBSNRToWCSTest, NullDestination) { + // Four laughing cat emojis "😹😹😹😹" + const char *src = + "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; + const char *original = src; + size_t n = LIBC_NAMESPACE::mbsnrtowcs(nullptr, &src, 88, 88, nullptr); + ASSERT_ERRNO_SUCCESS(); + // Null destination should ignore len and read till end of string + ASSERT_EQ(static_cast(n), 4); + // It should also not change the src pointer + ASSERT_EQ(src, original); +} + +TEST_F(LlvmLibcMBSNRToWCSTest, ErrnoChecks) { + // Two laughing cat emojis and invalid 3rd mb char (3rd byte of it) + const char *src = + "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\xf0\xb9\xf0\x9f\x98\xb9"; + const char *original = src; + wchar_t dest[5]; + mbstate_t *mb; + LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t)); + // First two bytes are valid --> should not set errno + size_t n = LIBC_NAMESPACE::mbsnrtowcs(dest, &src, 80, 2, mb); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(static_cast(n), 2); + ASSERT_EQ(static_cast(dest[0]), 128569); + ASSERT_EQ(static_cast(dest[1]), 128569); + ASSERT_EQ(src, original + 8); + + // Trying to read the 3rd byte should set errno + n = LIBC_NAMESPACE::mbsnrtowcs(dest + 2, &src, 4, 2, mb); + ASSERT_ERRNO_EQ(EILSEQ); + ASSERT_EQ(static_cast(n), -1); + // Should not move the pointer + ASSERT_EQ(src, original + 8); +} diff --git a/libc/test/src/wchar/mbsrtowcs_test.cpp b/libc/test/src/wchar/mbsrtowcs_test.cpp index 2f6e6e6a71b31..fae6074487263 100644 --- a/libc/test/src/wchar/mbsrtowcs_test.cpp +++ b/libc/test/src/wchar/mbsrtowcs_test.cpp @@ -19,121 +19,158 @@ using LlvmLibcMBSRToWCSTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest; TEST_F(LlvmLibcMBSRToWCSTest, OneByteOneChar) { const char *ch = "A"; + const char *original = ch; wchar_t dest[2]; mbstate_t *mb; LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t)); size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &ch, 1, mb); ASSERT_EQ(static_cast(*dest), 'A'); ASSERT_EQ(static_cast(n), 1); + // Should point to null terminator now + ASSERT_EQ(ch, original + 1); ASSERT_ERRNO_SUCCESS(); n = LIBC_NAMESPACE::mbsrtowcs(dest + 1, &ch, 1, mb); ASSERT_EQ(static_cast(dest[1]), '\0'); // Should not include null terminator ASSERT_EQ(static_cast(n), 0); + // Should now be a nullptr + ASSERT_EQ(ch, nullptr); ASSERT_ERRNO_SUCCESS(); } -// TEST_F(LlvmLibcMBSRToWCSTest, FourByteOneChar) { -// const char *src = "\xf0\x9f\x98\xb9"; // laughing cat emoji 😹 -// wchar_t dest[2]; -// size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 2); -// ASSERT_ERRNO_SUCCESS(); -// ASSERT_EQ(static_cast(dest[0]), 128569); -// ASSERT_TRUE(dest[1] == L'\0'); -// // Should not count null terminator in number -// ASSERT_EQ(static_cast(n), 1); -// } +TEST_F(LlvmLibcMBSRToWCSTest, FourByteOneChar) { + const char *src = "\xf0\x9f\x98\xb9"; // laughing cat emoji 😹 + wchar_t dest[2]; + mbstate_t *mb; + LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t)); + size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 2, mb); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(static_cast(dest[0]), 128569); + ASSERT_TRUE(dest[1] == L'\0'); + // Should not count null terminator in number + ASSERT_EQ(static_cast(n), 1); + // Should now be a nullptr + ASSERT_EQ(src, nullptr); +} -// TEST_F(LlvmLibcMBSRToWCSTest, MultiByteTwoCharacters) { -// // Two laughing cat emojis "😹😹" -// const char *src = "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; -// wchar_t dest[3]; -// size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 3); -// ASSERT_ERRNO_SUCCESS(); -// ASSERT_EQ(static_cast(dest[0]), 128569); -// ASSERT_EQ(static_cast(dest[1]), 128569); -// ASSERT_TRUE(dest[2] == L'\0'); -// // Should not count null terminator in number -// ASSERT_EQ(static_cast(n), 2); -// } +TEST_F(LlvmLibcMBSRToWCSTest, MultiByteTwoCharacters) { + // Two laughing cat emojis "😹😹" + const char *src = "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; + wchar_t dest[3]; + mbstate_t *mb; + LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t)); + size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 3, mb); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(static_cast(dest[0]), 128569); + ASSERT_EQ(static_cast(dest[1]), 128569); + ASSERT_TRUE(dest[2] == L'\0'); + // Should not count null terminator in number + ASSERT_EQ(static_cast(n), 2); + // Should now be a nullptr + ASSERT_EQ(src, nullptr); +} -// TEST_F(LlvmLibcMBSRToWCSTest, MixedNumberOfBytes) { -// // 'A', sigma symbol 'Σ', recycling symbol '♻', laughing cat emoji '😹' -// const char *src = "A\xce\xa3\xe2\x99\xbb\xf0\x9f\x98\xb9"; -// wchar_t dest[5]; -// size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 5); -// ASSERT_ERRNO_SUCCESS(); -// ASSERT_EQ(static_cast(dest[0]), 'A'); -// ASSERT_EQ(static_cast(dest[1]), 931); -// ASSERT_EQ(static_cast(dest[2]), 9851); -// ASSERT_EQ(static_cast(dest[3]), 128569); -// ASSERT_TRUE(dest[4] == L'\0'); -// // Should not count null terminator in number -// ASSERT_EQ(static_cast(n), 4); -// } +TEST_F(LlvmLibcMBSRToWCSTest, MixedNumberOfBytes) { + // 'A', sigma symbol 'Σ', recycling symbol '♻', laughing cat emoji '😹' + const char *src = "A\xce\xa3\xe2\x99\xbb\xf0\x9f\x98\xb9"; + const char *original = src; + wchar_t dest[5]; + size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 4, nullptr); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(static_cast(dest[0]), 'A'); + ASSERT_EQ(static_cast(dest[1]), 931); + ASSERT_EQ(static_cast(dest[2]), 9851); + ASSERT_EQ(static_cast(dest[3]), 128569); + // Should point to null terminator (byte at 10th index) + ASSERT_EQ(src, original + 10); + ASSERT_EQ(static_cast(n), 4); + n = LIBC_NAMESPACE::mbsrtowcs(dest + 4, &src, 4, nullptr); + ASSERT_TRUE(dest[4] == L'\0'); + ASSERT_ERRNO_SUCCESS(); + // Should not count null terminator in number + ASSERT_EQ(static_cast(n), 0); + // Should now be a nullptr + ASSERT_EQ(src, nullptr); +} -// TEST_F(LlvmLibcMBSRToWCSTest, ReadLessThanStringLength) { -// // Four laughing cat emojis "😹😹😹😹" -// const char *src = -// "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; -// wchar_t dest[5] = {L'a', L'b', L'c', L'd', L'e'}; -// size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 3); -// ASSERT_ERRNO_SUCCESS(); -// // Should have read 3 emojis -// ASSERT_EQ(static_cast(n), 3); -// ASSERT_EQ(static_cast(dest[0]), 128569); -// ASSERT_EQ(static_cast(dest[1]), 128569); -// ASSERT_EQ(static_cast(dest[2]), 128569); -// ASSERT_TRUE(dest[3] == L'd'); -// ASSERT_TRUE(dest[4] == L'e'); -// } +TEST_F(LlvmLibcMBSRToWCSTest, ReadLessThanStringLength) { + // Four laughing cat emojis "😹😹😹😹" + const char *src = + "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; + const char *original = src; + wchar_t dest[5] = {L'a', L'b', L'c', L'd', L'e'}; + size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 3, nullptr); + ASSERT_ERRNO_SUCCESS(); + // Should have read 3 emojis + ASSERT_EQ(static_cast(n), 3); + ASSERT_EQ(static_cast(dest[0]), 128569); + ASSERT_EQ(static_cast(dest[1]), 128569); + ASSERT_EQ(static_cast(dest[2]), 128569); + ASSERT_TRUE(dest[3] == L'd'); + ASSERT_TRUE(dest[4] == L'e'); + // Read three laughing cat emojis, 12 bytes + ASSERT_EQ(src, original + 12); +} -// TEST_F(LlvmLibcMBSRToWCSTest, InvalidFirstByte) { -// // 0x80 is invalid first byte of mb character -// const char *src = -// "\x80\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; -// wchar_t dest[3]; -// size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 3); -// // Should return error and set errno -// ASSERT_EQ(static_cast(n), -1); -// ASSERT_ERRNO_EQ(EILSEQ); -// } +TEST_F(LlvmLibcMBSRToWCSTest, InvalidFirstByte) { + // 0x80 is invalid first byte of mb character + const char *src = + "\x80\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; + const char *original = src; + wchar_t dest[3]; + size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 3, nullptr); + // Should return error and set errno + ASSERT_EQ(static_cast(n), -1); + ASSERT_ERRNO_EQ(EILSEQ); + // Should not update pointer + ASSERT_EQ(src, original); +} -// TEST_F(LlvmLibcMBSRToWCSTest, InvalidMiddleByte) { -// // The 7th byte is invalid for a 4 byte character -// const char *src = -// "\xf0\x9f\x98\xb9\xf0\x9f\xf0\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; -// wchar_t dest[3]; -// size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 5); -// // Should return error and set errno -// ASSERT_EQ(static_cast(n), -1); -// ASSERT_ERRNO_EQ(EILSEQ); -// } +TEST_F(LlvmLibcMBSRToWCSTest, InvalidMiddleByte) { + // The 7th byte is invalid for a 4 byte character + const char *src = + "\xf0\x9f\x98\xb9\xf0\x9f\xf0\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; + const char *original = src; + wchar_t dest[3]; + size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 5, nullptr); + // Should return error, set errno, and not update the pointer + ASSERT_EQ(static_cast(n), -1); + ASSERT_ERRNO_EQ(EILSEQ); + ASSERT_EQ(src, original); +} -// TEST_F(LlvmLibcMBSRToWCSTest, NullDestination) { -// // Four laughing cat emojis "😹😹😹😹" -// const char *src = -// "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; -// size_t n = LIBC_NAMESPACE::mbstowcs(nullptr, src, 2); -// ASSERT_ERRNO_SUCCESS(); -// // Null destination should ignore len and read till end of string -// ASSERT_EQ(static_cast(n), 4); -// } +TEST_F(LlvmLibcMBSRToWCSTest, NullDestination) { + // Four laughing cat emojis "😹😹😹😹" + const char *src = + "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; + const char *original = src; + size_t n = LIBC_NAMESPACE::mbsrtowcs(nullptr, &src, 2, nullptr); + ASSERT_ERRNO_SUCCESS(); + // Null destination should ignore len and read till end of string + ASSERT_EQ(static_cast(n), 4); + // It should also not change the src pointer + ASSERT_EQ(src, original); +} -// TEST_F(LlvmLibcMBSRToWCSTest, ErrnoChecks) { -// // Two laughing cat emojis and invalid 3rd mb char (3rd byte of it) -// const char *src = -// "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\xf0\xb9\xf0\x9f\x98\xb9"; -// wchar_t dest[5]; -// // First two bytes are valid --> should not set errno -// size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 2); -// ASSERT_ERRNO_SUCCESS(); -// ASSERT_EQ(static_cast(n), 2); -// ASSERT_EQ(static_cast(dest[0]), 128569); -// ASSERT_EQ(static_cast(dest[1]), 128569); -// // Trying to read the 3rd byte should set errno -// n = LIBC_NAMESPACE::mbstowcs(dest, src, 2); -// ASSERT_ERRNO_EQ(EILSEQ); -// ASSERT_EQ(static_cast(n), -1); -// } +TEST_F(LlvmLibcMBSRToWCSTest, ErrnoChecks) { + // Two laughing cat emojis and invalid 3rd mb char (3rd byte of it) + const char *src = + "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\xf0\xb9\xf0\x9f\x98\xb9"; + const char *original = src; + wchar_t dest[5]; + // First two bytes are valid --> should not set errno + size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 2, nullptr); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(static_cast(n), 2); + ASSERT_EQ(static_cast(dest[0]), 128569); + ASSERT_EQ(static_cast(dest[1]), 128569); + ASSERT_EQ(src, original + 8); + + // Trying to read the 3rd byte should set errno + n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 2, nullptr); + ASSERT_ERRNO_EQ(EILSEQ); + ASSERT_EQ(static_cast(n), -1); + // Should not move the pointer + ASSERT_EQ(src, original + 8); +} diff --git a/libc/test/src/wchar/mbstowcs_test.cpp b/libc/test/src/wchar/mbstowcs_test.cpp index 1f6633bdd2766..0fc84aa7a062e 100644 --- a/libc/test/src/wchar/mbstowcs_test.cpp +++ b/libc/test/src/wchar/mbstowcs_test.cpp @@ -16,21 +16,27 @@ using LlvmLibcMBSToWCSTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest; TEST_F(LlvmLibcMBSToWCSTest, OneByteOneChar) { const char *ch = "A"; + const char *original = ch; wchar_t dest[2]; size_t n = LIBC_NAMESPACE::mbstowcs(dest, ch, 1); ASSERT_EQ(static_cast(*dest), 'A'); ASSERT_EQ(static_cast(n), 1); + // Making sure the pointer is not getting updated + ASSERT_EQ(ch, original); ASSERT_ERRNO_SUCCESS(); n = LIBC_NAMESPACE::mbstowcs(dest + 1, ch + 1, 1); ASSERT_EQ(static_cast(dest[1]), '\0'); // Should not include null terminator ASSERT_EQ(static_cast(n), 0); + // Making sure the pointer is not getting updated + ASSERT_EQ(ch, original); ASSERT_ERRNO_SUCCESS(); } TEST_F(LlvmLibcMBSToWCSTest, FourByteOneChar) { const char *src = "\xf0\x9f\x98\xb9"; // laughing cat emoji 😹 + const char *original = src; wchar_t dest[2]; size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 2); ASSERT_ERRNO_SUCCESS(); @@ -38,11 +44,14 @@ TEST_F(LlvmLibcMBSToWCSTest, FourByteOneChar) { ASSERT_TRUE(dest[1] == L'\0'); // Should not count null terminator in number ASSERT_EQ(static_cast(n), 1); + // Making sure the pointer is not getting updated + ASSERT_EQ(src, original); } TEST_F(LlvmLibcMBSToWCSTest, MultiByteTwoCharacters) { // Two laughing cat emojis "😹😹" const char *src = "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; + const char *original = src; wchar_t dest[3]; size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 3); ASSERT_ERRNO_SUCCESS(); @@ -51,11 +60,14 @@ TEST_F(LlvmLibcMBSToWCSTest, MultiByteTwoCharacters) { ASSERT_TRUE(dest[2] == L'\0'); // Should not count null terminator in number ASSERT_EQ(static_cast(n), 2); + // Making sure the pointer is not getting updated + ASSERT_EQ(src, original); } TEST_F(LlvmLibcMBSToWCSTest, MixedNumberOfBytes) { // 'A', sigma symbol 'Σ', recycling symbol '♻', laughing cat emoji '😹' const char *src = "A\xce\xa3\xe2\x99\xbb\xf0\x9f\x98\xb9"; + const char *original = src; wchar_t dest[5]; size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 5); ASSERT_ERRNO_SUCCESS(); @@ -66,12 +78,15 @@ TEST_F(LlvmLibcMBSToWCSTest, MixedNumberOfBytes) { ASSERT_TRUE(dest[4] == L'\0'); // Should not count null terminator in number ASSERT_EQ(static_cast(n), 4); + // Making sure the pointer is not getting updated + ASSERT_EQ(src, original); } TEST_F(LlvmLibcMBSToWCSTest, ReadLessThanStringLength) { // Four laughing cat emojis "😹😹😹😹" const char *src = "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; + const char *original = src; wchar_t dest[5] = {L'a', L'b', L'c', L'd', L'e'}; size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 3); ASSERT_ERRNO_SUCCESS(); @@ -82,6 +97,8 @@ TEST_F(LlvmLibcMBSToWCSTest, ReadLessThanStringLength) { ASSERT_EQ(static_cast(dest[2]), 128569); ASSERT_TRUE(dest[3] == L'd'); ASSERT_TRUE(dest[4] == L'e'); + // Making sure the pointer is not getting updated + ASSERT_EQ(src, original); } TEST_F(LlvmLibcMBSToWCSTest, InvalidFirstByte) { @@ -99,27 +116,34 @@ TEST_F(LlvmLibcMBSToWCSTest, InvalidMiddleByte) { // The 7th byte is invalid for a 4 byte character const char *src = "\xf0\x9f\x98\xb9\xf0\x9f\xf0\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; + const char *original = src; wchar_t dest[3]; size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 5); // Should return error and set errno ASSERT_EQ(static_cast(n), -1); ASSERT_ERRNO_EQ(EILSEQ); + // Making sure the pointer is not getting updated + ASSERT_EQ(src, original); } TEST_F(LlvmLibcMBSToWCSTest, NullDestination) { // Four laughing cat emojis "😹😹😹😹" const char *src = "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; + const char *original = src; size_t n = LIBC_NAMESPACE::mbstowcs(nullptr, src, 2); ASSERT_ERRNO_SUCCESS(); // Null destination should ignore len and read till end of string ASSERT_EQ(static_cast(n), 4); + // Making sure the pointer is not getting updated + ASSERT_EQ(src, original); } TEST_F(LlvmLibcMBSToWCSTest, ErrnoChecks) { // Two laughing cat emojis and invalid 3rd mb char (3rd byte of it) const char *src = "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\xf0\xb9\xf0\x9f\x98\xb9"; + const char *original = src; wchar_t dest[5]; // First two bytes are valid --> should not set errno size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 2); @@ -127,8 +151,12 @@ TEST_F(LlvmLibcMBSToWCSTest, ErrnoChecks) { ASSERT_EQ(static_cast(n), 2); ASSERT_EQ(static_cast(dest[0]), 128569); ASSERT_EQ(static_cast(dest[1]), 128569); + // Making sure the pointer is not getting updated + ASSERT_EQ(src, original); // Trying to read the 3rd byte should set errno - n = LIBC_NAMESPACE::mbstowcs(dest, src, 2); + n = LIBC_NAMESPACE::mbstowcs(dest, src + 2, 2); ASSERT_ERRNO_EQ(EILSEQ); ASSERT_EQ(static_cast(n), -1); + // Making sure the pointer is not getting updated + ASSERT_EQ(src, original); } From b3b7b2a168d794a95eb8004b33c0988cf73bc459 Mon Sep 17 00:00:00 2001 From: Sriya Pratipati Date: Thu, 17 Jul 2025 23:31:58 +0000 Subject: [PATCH 08/14] fixed formatting --- libc/src/wchar/mbsnrtowcs.h | 2 +- libc/test/src/wchar/mbsnrtowcs_test.cpp | 19 +++++++++---------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/libc/src/wchar/mbsnrtowcs.h b/libc/src/wchar/mbsnrtowcs.h index 5c9687b20b3fa..0d66b952db01a 100644 --- a/libc/src/wchar/mbsnrtowcs.h +++ b/libc/src/wchar/mbsnrtowcs.h @@ -17,7 +17,7 @@ namespace LIBC_NAMESPACE_DECL { size_t mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src, - size_t nmc, size_t len, mbstate_t *__restrict ps); + size_t nmc, size_t len, mbstate_t *__restrict ps); } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/test/src/wchar/mbsnrtowcs_test.cpp b/libc/test/src/wchar/mbsnrtowcs_test.cpp index a042794997b9f..be6f4a45fac42 100644 --- a/libc/test/src/wchar/mbsnrtowcs_test.cpp +++ b/libc/test/src/wchar/mbsnrtowcs_test.cpp @@ -67,40 +67,39 @@ TEST_F(LlvmLibcMBSNRToWCSTest, MixedNumberOfBytes) { wchar_t dest[5]; mbstate_t *mb; LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t)); - + // Read 'A' size_t n = LIBC_NAMESPACE::mbsnrtowcs(dest, &src, 1, 1, mb); ASSERT_ERRNO_SUCCESS(); ASSERT_EQ(static_cast(dest[0]), 'A'); - ASSERT_EQ(static_cast(n), 1); + ASSERT_EQ(static_cast(n), 1); ASSERT_EQ(src, original + 1); // Read sigma 'Σ' n = LIBC_NAMESPACE::mbsnrtowcs(dest + 1, &src, 2, 1, mb); ASSERT_ERRNO_SUCCESS(); - ASSERT_EQ(static_cast(dest[1]), 931); - ASSERT_EQ(static_cast(n), 1); + ASSERT_EQ(static_cast(dest[1]), 931); + ASSERT_EQ(static_cast(n), 1); ASSERT_EQ(src, original + 3); // Read recycling '♻' n = LIBC_NAMESPACE::mbsnrtowcs(dest + 2, &src, 2, 5, mb); ASSERT_ERRNO_SUCCESS(); - ASSERT_EQ(static_cast(n), 0); + ASSERT_EQ(static_cast(n), 0); ASSERT_EQ(src, original + 5); n = LIBC_NAMESPACE::mbsnrtowcs(dest + 2, &src, 1, 1, mb); -ASSERT_ERRNO_SUCCESS(); - ASSERT_EQ(static_cast(n), 1); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(static_cast(n), 1); ASSERT_EQ(src, original + 6); ASSERT_EQ(static_cast(dest[2]), 9851); // Read laughing cat emoji '😹' n = LIBC_NAMESPACE::mbsnrtowcs(dest + 3, &src, 4, 5, mb); ASSERT_ERRNO_SUCCESS(); - ASSERT_EQ(static_cast(n), 1); + ASSERT_EQ(static_cast(n), 1); ASSERT_EQ(src, original + 10); ASSERT_EQ(static_cast(dest[3]), 128569); - n = LIBC_NAMESPACE::mbsnrtowcs(dest + 4, &src, 4, 4, nullptr); ASSERT_TRUE(dest[4] == L'\0'); ASSERT_ERRNO_SUCCESS(); @@ -149,7 +148,7 @@ TEST_F(LlvmLibcMBSNRToWCSTest, InvalidMiddleByte) { "\xf0\x9f\x98\xb9\xf0\x9f\xf0\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; const char *original = src; wchar_t dest[3]; - mbstate_t *mb; + mbstate_t *mb; LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t)); // Successfully read one character and first byte of the second character size_t n = LIBC_NAMESPACE::mbsnrtowcs(dest, &src, 5, 88, mb); From d8f2d6fab458d3b6899dbf38dc99aedf9e8b542d Mon Sep 17 00:00:00 2001 From: Sriya Pratipati Date: Fri, 18 Jul 2025 18:03:09 +0000 Subject: [PATCH 09/14] added crash on nullptr for src and moved internal implementation to header file --- libc/src/__support/wchar/CMakeLists.txt | 4 +- libc/src/__support/wchar/mbsnrtowcs.cpp | 59 ------------------------- libc/src/__support/wchar/mbsnrtowcs.h | 44 +++++++++++++++--- libc/src/wchar/CMakeLists.txt | 2 - libc/test/src/wchar/mbsnrtowcs_test.cpp | 45 +++++++++++-------- libc/test/src/wchar/mbsrtowcs_test.cpp | 23 +++++++--- libc/test/src/wchar/mbstowcs_test.cpp | 8 ++++ 7 files changed, 89 insertions(+), 96 deletions(-) delete mode 100644 libc/src/__support/wchar/mbsnrtowcs.cpp diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt index fa5139a8b68b4..607bfe6447c3c 100644 --- a/libc/src/__support/wchar/CMakeLists.txt +++ b/libc/src/__support/wchar/CMakeLists.txt @@ -69,12 +69,10 @@ add_object_library( .mbstate ) -add_object_library( +add_header_library( mbsnrtowcs HDRS mbsnrtowcs.h - SRCS - mbsnrtowcs.cpp DEPENDS libc.hdr.errno_macros libc.hdr.types.wchar_t diff --git a/libc/src/__support/wchar/mbsnrtowcs.cpp b/libc/src/__support/wchar/mbsnrtowcs.cpp deleted file mode 100644 index 9eb2b5423af87..0000000000000 --- a/libc/src/__support/wchar/mbsnrtowcs.cpp +++ /dev/null @@ -1,59 +0,0 @@ -//===-- Implementation for mbsnrtowcs function ------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "src/__support/wchar/mbsnrtowcs.h" -#include "hdr/errno_macros.h" -#include "hdr/types/size_t.h" -#include "hdr/types/wchar_t.h" -#include "src/__support/common.h" -#include "src/__support/error_or.h" -#include "src/__support/macros/config.h" -#include "src/__support/wchar/character_converter.h" -#include "src/__support/wchar/mbstate.h" -#include "src/__support/wchar/string_converter.h" - -namespace LIBC_NAMESPACE_DECL { -namespace internal { - -ErrorOr mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src, - size_t nmc, size_t len, mbstate *__restrict ps) { - if (*src == nullptr) - return 0; - // Checking if mbstate is valid - CharacterConverter char_conv(ps); - if (!char_conv.isValidState()) - return Error(EINVAL); - - StringConverter str_conv(reinterpret_cast(*src), ps, - len, nmc); - size_t dst_idx = 0; - ErrorOr converted = str_conv.popUTF32(); - while (converted.has_value()) { - if (dst != nullptr) - dst[dst_idx] = converted.value(); - // null terminator should not be counted in return value - if (converted.value() == L'\0') { - if (dst != nullptr) - *src = nullptr; - return dst_idx; - } - dst_idx++; - converted = str_conv.popUTF32(); - } - - if (converted.error() == -1) { // if we hit conversion limit - *src += str_conv.getSourceIndex(); - return dst_idx; - } - - return Error(converted.error()); -} - -} // namespace internal - -} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/__support/wchar/mbsnrtowcs.h b/libc/src/__support/wchar/mbsnrtowcs.h index 8ce497325fdad..3aedd08aefea5 100644 --- a/libc/src/__support/wchar/mbsnrtowcs.h +++ b/libc/src/__support/wchar/mbsnrtowcs.h @@ -1,4 +1,4 @@ -//===-- Implementation header for mbsnrtowcs function -----------*- C++ -*-===// +//===-- Implementation for mbsnrtowcs function ------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,24 +6,54 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSNRTOWCS -#define LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSNRTOWCS - +#include "hdr/errno_macros.h" #include "hdr/types/size_t.h" #include "hdr/types/wchar_t.h" #include "src/__support/common.h" #include "src/__support/error_or.h" #include "src/__support/macros/config.h" +#include "src/__support/macros/null_check.h" +#include "src/__support/wchar/character_converter.h" #include "src/__support/wchar/mbstate.h" +#include "src/__support/wchar/string_converter.h" namespace LIBC_NAMESPACE_DECL { namespace internal { ErrorOr mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src, - size_t nmc, size_t len, mbstate *__restrict ps); + size_t nmc, size_t len, mbstate *__restrict ps) { + LIBC_CRASH_ON_NULLPTR(src); + // Checking if mbstate is valid + CharacterConverter char_conv(ps); + if (!char_conv.isValidState()) + return Error(EINVAL); + + StringConverter str_conv(reinterpret_cast(*src), ps, + len, nmc); + size_t dst_idx = 0; + ErrorOr converted = str_conv.popUTF32(); + while (converted.has_value()) { + if (dst != nullptr) + dst[dst_idx] = converted.value(); + // null terminator should not be counted in return value + if (converted.value() == L'\0') { + if (dst != nullptr) + *src = nullptr; + return dst_idx; + } + dst_idx++; + converted = str_conv.popUTF32(); + } + + if (converted.error() == -1) { // if we hit conversion limit + if (dst != nullptr) + *src += str_conv.getSourceIndex(); + return dst_idx; + } + + return Error(converted.error()); +} } // namespace internal } // namespace LIBC_NAMESPACE_DECL - -#endif // LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSNRTOWCS diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt index ac359064ec10c..6df24a810bc6d 100644 --- a/libc/src/wchar/CMakeLists.txt +++ b/libc/src/wchar/CMakeLists.txt @@ -189,7 +189,6 @@ add_entrypoint_object( libc.src.__support.libc_errno libc.src.__support.wchar.mbstate libc.src.__support.wchar.mbsnrtowcs - libc.src.__support.wchar.mbstate ) add_entrypoint_object( @@ -206,7 +205,6 @@ add_entrypoint_object( libc.src.__support.libc_errno libc.src.__support.wchar.mbstate libc.src.__support.wchar.mbsnrtowcs - libc.src.__support.wchar.mbstate ) add_entrypoint_object( diff --git a/libc/test/src/wchar/mbsnrtowcs_test.cpp b/libc/test/src/wchar/mbsnrtowcs_test.cpp index be6f4a45fac42..d1232e36a5880 100644 --- a/libc/test/src/wchar/mbsnrtowcs_test.cpp +++ b/libc/test/src/wchar/mbsnrtowcs_test.cpp @@ -21,16 +21,16 @@ TEST_F(LlvmLibcMBSNRToWCSTest, OneByteOneChar) { const char *ch = "A"; const char *original = ch; wchar_t dest[2]; - mbstate_t *mb; + mbstate_t mb; LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t)); - size_t n = LIBC_NAMESPACE::mbsnrtowcs(dest, &ch, 1, 1, mb); + size_t n = LIBC_NAMESPACE::mbsnrtowcs(dest, &ch, 1, 1, &mb); ASSERT_EQ(static_cast(*dest), 'A'); ASSERT_EQ(static_cast(n), 1); // Should point to null terminator now ASSERT_EQ(ch, original + 1); ASSERT_ERRNO_SUCCESS(); - n = LIBC_NAMESPACE::mbsnrtowcs(dest + 1, &ch, 1, 1, mb); + n = LIBC_NAMESPACE::mbsnrtowcs(dest + 1, &ch, 1, 1, &mb); ASSERT_EQ(static_cast(dest[1]), '\0'); // Should not include null terminator ASSERT_EQ(static_cast(n), 0); @@ -43,15 +43,15 @@ TEST_F(LlvmLibcMBSNRToWCSTest, FourByteOneChar) { const char *src = "\xf0\x9f\x98\xb9"; // laughing cat emoji 😹 const char *original = src; wchar_t dest[2]; - mbstate_t *mb; + mbstate_t mb; LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t)); // Not enough bytes for the full character - size_t n = LIBC_NAMESPACE::mbsnrtowcs(dest, &src, 3, 2, mb); + size_t n = LIBC_NAMESPACE::mbsnrtowcs(dest, &src, 3, 2, &mb); ASSERT_ERRNO_SUCCESS(); ASSERT_EQ(static_cast(n), 0); ASSERT_EQ(src, original + 3); // Needs 2 more bytes (last byte of cat + null terminator) - n = LIBC_NAMESPACE::mbsnrtowcs(dest, &src, 2, 2, mb); + n = LIBC_NAMESPACE::mbsnrtowcs(dest, &src, 2, 2, &mb); ASSERT_ERRNO_SUCCESS(); // Does not include null terminator ASSERT_EQ(static_cast(n), 1); @@ -65,36 +65,36 @@ TEST_F(LlvmLibcMBSNRToWCSTest, MixedNumberOfBytes) { const char *src = "A\xce\xa3\xe2\x99\xbb\xf0\x9f\x98\xb9"; const char *original = src; wchar_t dest[5]; - mbstate_t *mb; + mbstate_t mb; LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t)); // Read 'A' - size_t n = LIBC_NAMESPACE::mbsnrtowcs(dest, &src, 1, 1, mb); + size_t n = LIBC_NAMESPACE::mbsnrtowcs(dest, &src, 1, 1, &mb); ASSERT_ERRNO_SUCCESS(); ASSERT_EQ(static_cast(dest[0]), 'A'); ASSERT_EQ(static_cast(n), 1); ASSERT_EQ(src, original + 1); // Read sigma 'Σ' - n = LIBC_NAMESPACE::mbsnrtowcs(dest + 1, &src, 2, 1, mb); + n = LIBC_NAMESPACE::mbsnrtowcs(dest + 1, &src, 2, 1, &mb); ASSERT_ERRNO_SUCCESS(); ASSERT_EQ(static_cast(dest[1]), 931); ASSERT_EQ(static_cast(n), 1); ASSERT_EQ(src, original + 3); // Read recycling '♻' - n = LIBC_NAMESPACE::mbsnrtowcs(dest + 2, &src, 2, 5, mb); + n = LIBC_NAMESPACE::mbsnrtowcs(dest + 2, &src, 2, 5, &mb); ASSERT_ERRNO_SUCCESS(); ASSERT_EQ(static_cast(n), 0); ASSERT_EQ(src, original + 5); - n = LIBC_NAMESPACE::mbsnrtowcs(dest + 2, &src, 1, 1, mb); + n = LIBC_NAMESPACE::mbsnrtowcs(dest + 2, &src, 1, 1, &mb); ASSERT_ERRNO_SUCCESS(); ASSERT_EQ(static_cast(n), 1); ASSERT_EQ(src, original + 6); ASSERT_EQ(static_cast(dest[2]), 9851); // Read laughing cat emoji '😹' - n = LIBC_NAMESPACE::mbsnrtowcs(dest + 3, &src, 4, 5, mb); + n = LIBC_NAMESPACE::mbsnrtowcs(dest + 3, &src, 4, 5, &mb); ASSERT_ERRNO_SUCCESS(); ASSERT_EQ(static_cast(n), 1); ASSERT_EQ(src, original + 10); @@ -148,16 +148,16 @@ TEST_F(LlvmLibcMBSNRToWCSTest, InvalidMiddleByte) { "\xf0\x9f\x98\xb9\xf0\x9f\xf0\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; const char *original = src; wchar_t dest[3]; - mbstate_t *mb; + mbstate_t mb; LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t)); // Successfully read one character and first byte of the second character - size_t n = LIBC_NAMESPACE::mbsnrtowcs(dest, &src, 5, 88, mb); + size_t n = LIBC_NAMESPACE::mbsnrtowcs(dest, &src, 5, 88, &mb); ASSERT_EQ(static_cast(n), 1); ASSERT_ERRNO_SUCCESS(); ASSERT_EQ(src, original + 5); ASSERT_EQ(static_cast(dest[0]), 128569); - n = LIBC_NAMESPACE::mbsnrtowcs(dest + 1, &src, 5, 88, mb); + n = LIBC_NAMESPACE::mbsnrtowcs(dest + 1, &src, 5, 88, &mb); // Should return error, set errno, and not update the pointer ASSERT_EQ(static_cast(n), -1); ASSERT_ERRNO_EQ(EILSEQ); @@ -183,10 +183,10 @@ TEST_F(LlvmLibcMBSNRToWCSTest, ErrnoChecks) { "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\xf0\xb9\xf0\x9f\x98\xb9"; const char *original = src; wchar_t dest[5]; - mbstate_t *mb; + mbstate_t mb; LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t)); // First two bytes are valid --> should not set errno - size_t n = LIBC_NAMESPACE::mbsnrtowcs(dest, &src, 80, 2, mb); + size_t n = LIBC_NAMESPACE::mbsnrtowcs(dest, &src, 80, 2, &mb); ASSERT_ERRNO_SUCCESS(); ASSERT_EQ(static_cast(n), 2); ASSERT_EQ(static_cast(dest[0]), 128569); @@ -194,9 +194,18 @@ TEST_F(LlvmLibcMBSNRToWCSTest, ErrnoChecks) { ASSERT_EQ(src, original + 8); // Trying to read the 3rd byte should set errno - n = LIBC_NAMESPACE::mbsnrtowcs(dest + 2, &src, 4, 2, mb); + n = LIBC_NAMESPACE::mbsnrtowcs(dest + 2, &src, 4, 2, &mb); ASSERT_ERRNO_EQ(EILSEQ); ASSERT_EQ(static_cast(n), -1); // Should not move the pointer ASSERT_EQ(src, original + 8); } + +#if defined(LIBC_ADD_NULL_CHECKS) && !defined(LIBC_HAS_SANITIZER) +TEST(LlvmLibcMBSNRToWCSTest, NullptrCrash) { + // Passing in a nullptr should crash the program. + EXPECT_DEATH( + [] { LIBC_NAMESPACE::mbsnrtowcs(nullptr, nullptr, 1, 1, nullptr); }, + WITH_SIGNAL(-1)); +} +#endif // LIBC_HAS_ADDRESS_SANITIZER diff --git a/libc/test/src/wchar/mbsrtowcs_test.cpp b/libc/test/src/wchar/mbsrtowcs_test.cpp index fae6074487263..00cb1d5cbadcd 100644 --- a/libc/test/src/wchar/mbsrtowcs_test.cpp +++ b/libc/test/src/wchar/mbsrtowcs_test.cpp @@ -21,16 +21,16 @@ TEST_F(LlvmLibcMBSRToWCSTest, OneByteOneChar) { const char *ch = "A"; const char *original = ch; wchar_t dest[2]; - mbstate_t *mb; + mbstate_t mb; LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t)); - size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &ch, 1, mb); + size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &ch, 1, &mb); ASSERT_EQ(static_cast(*dest), 'A'); ASSERT_EQ(static_cast(n), 1); // Should point to null terminator now ASSERT_EQ(ch, original + 1); ASSERT_ERRNO_SUCCESS(); - n = LIBC_NAMESPACE::mbsrtowcs(dest + 1, &ch, 1, mb); + n = LIBC_NAMESPACE::mbsrtowcs(dest + 1, &ch, 1, &mb); ASSERT_EQ(static_cast(dest[1]), '\0'); // Should not include null terminator ASSERT_EQ(static_cast(n), 0); @@ -42,9 +42,9 @@ TEST_F(LlvmLibcMBSRToWCSTest, OneByteOneChar) { TEST_F(LlvmLibcMBSRToWCSTest, FourByteOneChar) { const char *src = "\xf0\x9f\x98\xb9"; // laughing cat emoji 😹 wchar_t dest[2]; - mbstate_t *mb; + mbstate_t mb; LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t)); - size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 2, mb); + size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 2, &mb); ASSERT_ERRNO_SUCCESS(); ASSERT_EQ(static_cast(dest[0]), 128569); ASSERT_TRUE(dest[1] == L'\0'); @@ -58,9 +58,9 @@ TEST_F(LlvmLibcMBSRToWCSTest, MultiByteTwoCharacters) { // Two laughing cat emojis "😹😹" const char *src = "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9"; wchar_t dest[3]; - mbstate_t *mb; + mbstate_t mb; LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t)); - size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 3, mb); + size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 3, &mb); ASSERT_ERRNO_SUCCESS(); ASSERT_EQ(static_cast(dest[0]), 128569); ASSERT_EQ(static_cast(dest[1]), 128569); @@ -174,3 +174,12 @@ TEST_F(LlvmLibcMBSRToWCSTest, ErrnoChecks) { // Should not move the pointer ASSERT_EQ(src, original + 8); } + +#if defined(LIBC_ADD_NULL_CHECKS) && !defined(LIBC_HAS_SANITIZER) +TEST(LlvmLibcMBSRToWCSTest, NullptrCrash) { + // Passing in a nullptr should crash the program. + EXPECT_DEATH( + [] { LIBC_NAMESPACE::mbsrtowcs(nullptr, nullptr, 1, nullptr); }, + WITH_SIGNAL(-1)); +} +#endif // LIBC_HAS_ADDRESS_SANITIZER diff --git a/libc/test/src/wchar/mbstowcs_test.cpp b/libc/test/src/wchar/mbstowcs_test.cpp index 0fc84aa7a062e..db03bc3e38643 100644 --- a/libc/test/src/wchar/mbstowcs_test.cpp +++ b/libc/test/src/wchar/mbstowcs_test.cpp @@ -160,3 +160,11 @@ TEST_F(LlvmLibcMBSToWCSTest, ErrnoChecks) { // Making sure the pointer is not getting updated ASSERT_EQ(src, original); } + +#if defined(LIBC_ADD_NULL_CHECKS) && !defined(LIBC_HAS_SANITIZER) +TEST(LlvmLibcMBSToWCSTest, NullptrCrash) { + // Passing in a nullptr should crash the program. + EXPECT_DEATH([] { LIBC_NAMESPACE::mbstowcs(nullptr, nullptr, 1); }, + WITH_SIGNAL(-1)); +} +#endif // LIBC_HAS_ADDRESS_SANITIZER From 18e2f267b4145237bf07c38f89f2f9104f2d360e Mon Sep 17 00:00:00 2001 From: Sriya Pratipati Date: Fri, 18 Jul 2025 18:07:28 +0000 Subject: [PATCH 10/14] fixed formatting --- libc/test/src/wchar/mbsrtowcs_test.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/libc/test/src/wchar/mbsrtowcs_test.cpp b/libc/test/src/wchar/mbsrtowcs_test.cpp index 00cb1d5cbadcd..73b650fe85484 100644 --- a/libc/test/src/wchar/mbsrtowcs_test.cpp +++ b/libc/test/src/wchar/mbsrtowcs_test.cpp @@ -178,8 +178,7 @@ TEST_F(LlvmLibcMBSRToWCSTest, ErrnoChecks) { #if defined(LIBC_ADD_NULL_CHECKS) && !defined(LIBC_HAS_SANITIZER) TEST(LlvmLibcMBSRToWCSTest, NullptrCrash) { // Passing in a nullptr should crash the program. - EXPECT_DEATH( - [] { LIBC_NAMESPACE::mbsrtowcs(nullptr, nullptr, 1, nullptr); }, - WITH_SIGNAL(-1)); + EXPECT_DEATH([] { LIBC_NAMESPACE::mbsrtowcs(nullptr, nullptr, 1, nullptr); }, + WITH_SIGNAL(-1)); } #endif // LIBC_HAS_ADDRESS_SANITIZER From 84ad6a9cf18680f0e2fc2dbee5786402099e1636 Mon Sep 17 00:00:00 2001 From: Sriya Pratipati Date: Fri, 18 Jul 2025 19:34:18 +0000 Subject: [PATCH 11/14] header file changes --- libc/src/__support/wchar/CMakeLists.txt | 1 + libc/src/__support/wchar/mbsnrtowcs.h | 11 +++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt index 607bfe6447c3c..9e050247124d4 100644 --- a/libc/src/__support/wchar/CMakeLists.txt +++ b/libc/src/__support/wchar/CMakeLists.txt @@ -80,6 +80,7 @@ add_header_library( libc.src.__support.common libc.src.__support.error_or libc.src.__support.macros.config + libc.src.__support.nacros.null_check .character_converter .mbstate .string_converter diff --git a/libc/src/__support/wchar/mbsnrtowcs.h b/libc/src/__support/wchar/mbsnrtowcs.h index 3aedd08aefea5..54e315210d95c 100644 --- a/libc/src/__support/wchar/mbsnrtowcs.h +++ b/libc/src/__support/wchar/mbsnrtowcs.h @@ -6,6 +6,9 @@ // //===----------------------------------------------------------------------===// +#ifndef LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSNRTOWCS_H +#define LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSNRTOWCS_H + #include "hdr/errno_macros.h" #include "hdr/types/size_t.h" #include "hdr/types/wchar_t.h" @@ -20,8 +23,10 @@ namespace LIBC_NAMESPACE_DECL { namespace internal { -ErrorOr mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src, - size_t nmc, size_t len, mbstate *__restrict ps) { +LIBC_INLINE static ErrorOr mbsnrtowcs(wchar_t *__restrict dst, + const char **__restrict src, + size_t nmc, size_t len, + mbstate *__restrict ps) { LIBC_CRASH_ON_NULLPTR(src); // Checking if mbstate is valid CharacterConverter char_conv(ps); @@ -57,3 +62,5 @@ ErrorOr mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src, } // namespace internal } // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSNRTOWCS_H From 6595702442265f55e2a40dc76b3db1902f7c0290 Mon Sep 17 00:00:00 2001 From: Sriya Pratipati Date: Fri, 18 Jul 2025 20:08:24 +0000 Subject: [PATCH 12/14] fixed spacing in cmake file --- libc/src/__support/wchar/CMakeLists.txt | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt index 9e050247124d4..9c5815d1040ab 100644 --- a/libc/src/__support/wchar/CMakeLists.txt +++ b/libc/src/__support/wchar/CMakeLists.txt @@ -74,14 +74,14 @@ add_header_library( HDRS mbsnrtowcs.h DEPENDS - libc.hdr.errno_macros - libc.hdr.types.wchar_t - libc.hdr.types.size_t - libc.src.__support.common - libc.src.__support.error_or - libc.src.__support.macros.config - libc.src.__support.nacros.null_check - .character_converter - .mbstate - .string_converter + libc.hdr.errno_macros + libc.hdr.types.wchar_t + libc.hdr.types.size_t + libc.src.__support.common + libc.src.__support.error_or + libc.src.__support.macros.config + libc.src.__support.nacros.null_check + .character_converter + .mbstate + .string_converter ) From 6a55826bd2b6308bfd5bd12f61feb005faec201c Mon Sep 17 00:00:00 2001 From: Sriya Pratipati Date: Fri, 18 Jul 2025 20:18:06 +0000 Subject: [PATCH 13/14] fixed typo in cmake --- libc/src/__support/wchar/CMakeLists.txt | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt index 9c5815d1040ab..f0ed04a92f1b3 100644 --- a/libc/src/__support/wchar/CMakeLists.txt +++ b/libc/src/__support/wchar/CMakeLists.txt @@ -59,14 +59,14 @@ add_object_library( SRCS mbrtowc.cpp DEPENDS - libc.hdr.errno_macros - libc.hdr.types.wchar_t - libc.hdr.types.size_t - libc.src.__support.common - libc.src.__support.error_or - libc.src.__support.macros.config - .character_converter - .mbstate + libc.hdr.errno_macros + libc.hdr.types.wchar_t + libc.hdr.types.size_t + libc.src.__support.common + libc.src.__support.error_or + libc.src.__support.macros.config + .character_converter + .mbstate ) add_header_library( @@ -80,7 +80,7 @@ add_header_library( libc.src.__support.common libc.src.__support.error_or libc.src.__support.macros.config - libc.src.__support.nacros.null_check + libc.src.__support.macros.null_check .character_converter .mbstate .string_converter From eacce4445a8332c4b6b3b05ee0f3202195fc945c Mon Sep 17 00:00:00 2001 From: Sriya Pratipati Date: Thu, 24 Jul 2025 16:46:26 +0000 Subject: [PATCH 14/14] alphabetized yaml --- libc/include/wchar.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml index d3166d60c19de..781890dc380ce 100644 --- a/libc/include/wchar.yaml +++ b/libc/include/wchar.yaml @@ -53,14 +53,16 @@ functions: - type: wchar_t *__restrict - type: const char *__restrict - type: size_t - - name: mbstowcs + - name: mbsnrtowcs standards: - stdc return_type: size_t arguments: - type: wchar_t *__restrict - - type: const char *__restrict + - type: const char **__restrict + - type: size_t - type: size_t + - type: mbstate_t *__restrict - name: mbsrtowcs standards: - stdc @@ -70,16 +72,14 @@ functions: - type: const char **__restrict - type: size_t - type: mbstate_t *__restrict - - name: mbsnrtowcs + - name: mbstowcs standards: - stdc return_type: size_t arguments: - type: wchar_t *__restrict - - type: const char **__restrict - - type: size_t + - type: const char *__restrict - type: size_t - - type: mbstate_t *__restrict - name: wmemset standards: - stdc