From 953eca5ad64f2c9387705747cfa2b788f8ac3ce7 Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Wed, 16 Jul 2025 16:40:06 +0000 Subject: [PATCH 01/19] implemented public functions --- libc/config/linux/x86_64/entrypoints.txt | 3 ++ libc/include/wchar.yaml | 28 +++++++++++ libc/src/wchar/CMakeLists.txt | 41 +++++++++++++++ libc/src/wchar/wcsnrtombs.cpp | 50 +++++++++++++++++++ libc/src/wchar/wcsnrtombs.h | 23 +++++++++ libc/src/wchar/wcsrtombs.cpp | 50 +++++++++++++++++++ libc/src/wchar/wcsrtombs.h | 23 +++++++++ libc/src/wchar/wcstombs.cpp | 45 +++++++++++++++++ libc/src/wchar/wcstombs.h | 22 +++++++++ libc/test/src/wchar/wcstombs_test.cpp | 63 ++++++++++++++++++++++++ 10 files changed, 348 insertions(+) create mode 100644 libc/src/wchar/wcsnrtombs.cpp create mode 100644 libc/src/wchar/wcsnrtombs.h create mode 100644 libc/src/wchar/wcsrtombs.cpp create mode 100644 libc/src/wchar/wcsrtombs.h create mode 100644 libc/src/wchar/wcstombs.cpp create mode 100644 libc/src/wchar/wcstombs.h create mode 100644 libc/test/src/wchar/wcstombs_test.cpp diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 9223911f04a93..a067e73e80d13 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -1263,6 +1263,9 @@ if(LLVM_LIBC_FULL_BUILD) libc.src.wchar.mbtowc libc.src.wchar.wcrtomb libc.src.wchar.wctomb + libc.src.wchar.wcstombs + libc.src.wchar.wcsrtombs + libc.src.wchar.wcsnrtombs ) endif() diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml index 123d3440aeec3..226f1614ebcc2 100644 --- a/libc/include/wchar.yaml +++ b/libc/include/wchar.yaml @@ -274,3 +274,31 @@ functions: - type: const wchar_t *__restrict - type: wchar_t **__restrict - type: int + - name: wcstombs + standards: + - stdc + return_type: size_t + arguments: + - type: char *__restrict + - type: const wchar_t *__restrict + - type: size_t + - name: wcsrtombs + standards: + - stdc + return_type: size_t + arguments: + - type: char *__restrict + - type: const wchar_t **__restrict + - type: size_t + - type: mbstate_t + - name: wcsnrtombs + standards: + - stdc + return_type: size_t + arguments: + - type: char *__restrict + - type: const wchar_t *__restrict + - type: size_t + - type: size_t + - type: mbstate_t + diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt index 7ace1a6ca66ba..521ea27fdbb26 100644 --- a/libc/src/wchar/CMakeLists.txt +++ b/libc/src/wchar/CMakeLists.txt @@ -159,6 +159,47 @@ add_entrypoint_object( libc.src.__support.wchar.mbstate ) +add_entrypoint_object( + wcstombs + SRCS + wcstombs.cpp + HDRS + wcstombs.h + DEPENDS + libc.hdr.types.wchar_t + libc.src.__support.wchar.mbstate + libc.src.__support.wchar.string_converter + libc.src.__support.libc_errno +) + +add_entrypoint_object( + wcsrtombs + SRCS + wcsrtombs.cpp + HDRS + wcsrtombs.h + DEPENDS + libc.hdr.types.wchar_t + libc.hdr.types.mbstate_t + libc.src.__support.wchar.mbstate + libc.src.__support.wchar.string_converter + libc.src.__support.libc_errno +) + +add_entrypoint_object( + wcstombs + SRCS + wcsnrtombs.cpp + HDRS + wcsnrtombs.h + DEPENDS + libc.hdr.types.wchar_t + libc.hdr.types.mbstate_t + libc.src.__support.wchar.mbstate + libc.src.__support.wchar.string_converter + libc.src.__support.libc_errno +) + add_entrypoint_object( wmemset SRCS diff --git a/libc/src/wchar/wcsnrtombs.cpp b/libc/src/wchar/wcsnrtombs.cpp new file mode 100644 index 0000000000000..3a0ea79053e33 --- /dev/null +++ b/libc/src/wchar/wcsnrtombs.cpp @@ -0,0 +1,50 @@ +//===-- Implementation of wcsnrtombs --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/wchar/wcsrtombs.h" + +#include "hdr/types/char32_t.h" +#include "hdr/types/mbstate_t.h" +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/common.h" +#include "src/__support/libc_errno.h" +#include "src/__support/macros/config.h" +#include "src/__support/wchar/mbstate.h" +#include "src/__support/wchar/string_converter.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(size_t, wcsnrtombs, + (char *__restrict s, const wchar_t **__restrict pwcs, + size_t nwc, size_t len, mbstate_t *ps)) { + static internal::mbstate internal_mbstate; + internal::StringConverter str_conv( + reinterpret_cast(pwcs), + ps == nullptr ? &internal_mbstate + : reinterpret_cast(ps), + len, nwc); + + int dst_idx = 0; + ErrorOr converted = str_conv.popUTF8(); + while (converted.has_value()) { + if (s != nullptr) + s[dst_idx] = converted.value(); + dst_idx++; + converted = str_conv.popUTF8(); + } + + pwcs += str_conv.getSourceIndex(); + if (converted.error() == -1) // if we hit conversion limit + return dst_idx; + + libc_errno = converted.error(); + return -1; +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/wcsnrtombs.h b/libc/src/wchar/wcsnrtombs.h new file mode 100644 index 0000000000000..793d383660f1b --- /dev/null +++ b/libc/src/wchar/wcsnrtombs.h @@ -0,0 +1,23 @@ +//===-- Implementation header for wcsnrtombs -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_WCHAR_WCSNRTOMBS_H +#define LLVM_LIBC_SRC_WCHAR_WCSNRTOMBS_H + +#include "hdr/types/mbstate_t.h" +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +size_t wcsnrtombs(char *__restrict s, const wchar_t **__restrict pwcs, size_t nwc, size_t len, mbstate_t* ps); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_WCHAR_WCSNRTOMBS_H diff --git a/libc/src/wchar/wcsrtombs.cpp b/libc/src/wchar/wcsrtombs.cpp new file mode 100644 index 0000000000000..1903a7104ab64 --- /dev/null +++ b/libc/src/wchar/wcsrtombs.cpp @@ -0,0 +1,50 @@ +//===-- Implementation of wcsrtombs ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/wchar/wcsrtombs.h" + +#include "hdr/types/char32_t.h" +#include "hdr/types/mbstate_t.h" +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/common.h" +#include "src/__support/libc_errno.h" +#include "src/__support/macros/config.h" +#include "src/__support/wchar/mbstate.h" +#include "src/__support/wchar/string_converter.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(size_t, wcsrtombs, + (char *__restrict s, const wchar_t **__restrict pwcs, + size_t n, mbstate_t *ps)) { + static internal::mbstate internal_mbstate; + internal::StringConverter str_conv( + reinterpret_cast(pwcs), + ps == nullptr ? &internal_mbstate + : reinterpret_cast(ps), + n); + + int dst_idx = 0; + ErrorOr converted = str_conv.popUTF8(); + while (converted.has_value()) { + if (s != nullptr) + s[dst_idx] = converted.value(); + dst_idx++; + converted = str_conv.popUTF8(); + } + + pwcs += str_conv.getSourceIndex(); + if (converted.error() == -1) // if we hit conversion limit + return dst_idx; + + libc_errno = converted.error(); + return -1; +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/wcsrtombs.h b/libc/src/wchar/wcsrtombs.h new file mode 100644 index 0000000000000..af69fccdb296a --- /dev/null +++ b/libc/src/wchar/wcsrtombs.h @@ -0,0 +1,23 @@ +//===-- Implementation header for wcsrtombs --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_WCHAR_WCSRTOMBS_H +#define LLVM_LIBC_SRC_WCHAR_WCSRTOMBS_H + +#include "hdr/types/mbstate_t.h" +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +size_t wcsrtombs(char *__restrict s, const wchar_t **__restrict pwcs, size_t n, mbstate_t* ps); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_WCHAR_WCSRTOMBS_H diff --git a/libc/src/wchar/wcstombs.cpp b/libc/src/wchar/wcstombs.cpp new file mode 100644 index 0000000000000..a2aaca8672924 --- /dev/null +++ b/libc/src/wchar/wcstombs.cpp @@ -0,0 +1,45 @@ +//===-- Implementation of wcstombs ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/wchar/wcstombs.h" + +#include "hdr/types/char32_t.h" +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/common.h" +#include "src/__support/libc_errno.h" +#include "src/__support/macros/config.h" +#include "src/__support/wchar/mbstate.h" +#include "src/__support/wchar/string_converter.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(size_t, wcstombs, + (char *__restrict s, const wchar_t *__restrict pwcs, + size_t n)) { + static internal::mbstate internal_mbstate; + internal::StringConverter str_conv( + reinterpret_cast(pwcs), &internal_mbstate, n); + + int dst_idx = 0; + ErrorOr converted = str_conv.popUTF8(); + while (converted.has_value()) { + if (s != nullptr) + s[dst_idx] = converted.value(); + dst_idx++; + converted = str_conv.popUTF8(); + } + + if (converted.error() == -1) // if we hit conversion limit + return dst_idx; + + libc_errno = converted.error(); + return -1; +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/wcstombs.h b/libc/src/wchar/wcstombs.h new file mode 100644 index 0000000000000..cd0008a168d90 --- /dev/null +++ b/libc/src/wchar/wcstombs.h @@ -0,0 +1,22 @@ +//===-- Implementation header for wcstombs --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_WCHAR_WCSTOMBS_H +#define LLVM_LIBC_SRC_WCHAR_WCSTOMBS_H + +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +size_t wcstombs(char *__restrict s, const wchar_t *__restrict pwcs, size_t n); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_WCHAR_WCSTOMBS_H diff --git a/libc/test/src/wchar/wcstombs_test.cpp b/libc/test/src/wchar/wcstombs_test.cpp new file mode 100644 index 0000000000000..2c87678af9c55 --- /dev/null +++ b/libc/test/src/wchar/wcstombs_test.cpp @@ -0,0 +1,63 @@ +//===-- Unittests for wcstombs --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/wchar/wcstombs.h" +#include "test/UnitTest/ErrnoCheckingTest.h" +#include "test/UnitTest/Test.h" + +using LlvmLibcWcstombs = LIBC_NAMESPACE::testing::ErrnoCheckingTest; + +TEST_F(LlvmLibcWcstombs, AllMultibyteLengths) { + /// clown emoji, sigma symbol, y with diaeresis, letter A + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0xff), static_cast(0x41), + static_cast(0x0)}; + char mbs[11]; + + ASSERT_EQ(wcstombs(mbs, src, 11), static_cast(11)); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(mbs[0], '\xF0'); // clown begin + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\xE2'); // sigma begin + ASSERT_EQ(mbs[5], '\x88'); + ASSERT_EQ(mbs[6], '\x91'); + ASSERT_EQ(mbs[7], '\xC3'); // y diaeresis begin + ASSERT_EQ(mbs[8], '\xBF'); + ASSERT_EQ(mbs[9], '\x41'); // A begin + ASSERT_EQ(mbs[10], '\0'); // null terminator +} + +TEST_F(LlvmLibcWcstombs, PartialConversion) { + /// clown emoji, sigma symbol, y with diaeresis, letter A + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0xff), static_cast(0x41), + static_cast(0x0)}; + char mbs[11] = {0}; + + ASSERT_EQ(wcstombs(mbs, src, 6), static_cast(4)); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(mbs[0], '\xF0'); // clown begin + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\0'); + + ASSERT_EQ(wcstombs(mbs, src, 6), static_cast(4)); + + ASSERT_EQ(mbs[4], '\xE2'); // sigma begin + ASSERT_EQ(mbs[5], '\x88'); + ASSERT_EQ(mbs[6], '\x91'); + ASSERT_EQ(mbs[7], '\xC3'); // y diaeresis begin + ASSERT_EQ(mbs[8], '\xBF'); + ASSERT_EQ(mbs[9], '\x41'); // A begin + ASSERT_EQ(mbs[10], '\0'); // null terminator +} From d4a86b2182f1b0673adab4e325c3188ee0e0ae9e Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Wed, 16 Jul 2025 20:12:16 +0000 Subject: [PATCH 02/19] wcstombs test --- libc/src/wchar/CMakeLists.txt | 2 +- libc/src/wchar/wcstombs.cpp | 11 ++++- libc/test/src/wchar/CMakeLists.txt | 12 ++++++ libc/test/src/wchar/wcstombs_test.cpp | 58 ++++++++++++++++++++------- 4 files changed, 66 insertions(+), 17 deletions(-) diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt index 521ea27fdbb26..36969e032594f 100644 --- a/libc/src/wchar/CMakeLists.txt +++ b/libc/src/wchar/CMakeLists.txt @@ -187,7 +187,7 @@ add_entrypoint_object( ) add_entrypoint_object( - wcstombs + wcsnrtombs SRCS wcsnrtombs.cpp HDRS diff --git a/libc/src/wchar/wcstombs.cpp b/libc/src/wchar/wcstombs.cpp index a2aaca8672924..94c6e9a4d6942 100644 --- a/libc/src/wchar/wcstombs.cpp +++ b/libc/src/wchar/wcstombs.cpp @@ -23,15 +23,22 @@ LLVM_LIBC_FUNCTION(size_t, wcstombs, (char *__restrict s, const wchar_t *__restrict pwcs, size_t n)) { static internal::mbstate internal_mbstate; + + if (s == nullptr) + n = SIZE_MAX; + internal::StringConverter str_conv( reinterpret_cast(pwcs), &internal_mbstate, n); int dst_idx = 0; ErrorOr converted = str_conv.popUTF8(); while (converted.has_value()) { - if (s != nullptr) + if (s != nullptr) s[dst_idx] = converted.value(); - dst_idx++; + + if (converted.value() != '\0') + dst_idx++; + converted = str_conv.popUTF8(); } diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt index 176cf7c3487cd..1d2e45c33f84a 100644 --- a/libc/test/src/wchar/CMakeLists.txt +++ b/libc/test/src/wchar/CMakeLists.txt @@ -101,6 +101,18 @@ add_libc_test( libc.hdr.types.wchar_t ) +add_libc_test( + wcstombs_test + SUITE + libc_wchar_unittests + SRCS + wcstombs_test.cpp + DEPENDS + libc.src.wchar.wcstombs + libc.test.UnitTest.ErrnoCheckingTest + libc.hdr.types.wchar_t +) + add_libc_test( wmemset_test SUITE diff --git a/libc/test/src/wchar/wcstombs_test.cpp b/libc/test/src/wchar/wcstombs_test.cpp index 2c87678af9c55..25ff4baa2e8cd 100644 --- a/libc/test/src/wchar/wcstombs_test.cpp +++ b/libc/test/src/wchar/wcstombs_test.cpp @@ -20,7 +20,7 @@ TEST_F(LlvmLibcWcstombs, AllMultibyteLengths) { static_cast(0x0)}; char mbs[11]; - ASSERT_EQ(wcstombs(mbs, src, 11), static_cast(11)); + ASSERT_EQ(LIBC_NAMESPACE::wcstombs(mbs, src, 11), static_cast(10)); ASSERT_ERRNO_SUCCESS(); ASSERT_EQ(mbs[0], '\xF0'); // clown begin ASSERT_EQ(mbs[1], '\x9F'); @@ -35,29 +35,59 @@ TEST_F(LlvmLibcWcstombs, AllMultibyteLengths) { ASSERT_EQ(mbs[10], '\0'); // null terminator } -TEST_F(LlvmLibcWcstombs, PartialConversion) { +TEST_F(LlvmLibcWcstombs, DestLimit) { /// clown emoji, sigma symbol, y with diaeresis, letter A const wchar_t src[] = {static_cast(0x1f921), static_cast(0x2211), static_cast(0xff), static_cast(0x41), static_cast(0x0)}; - char mbs[11] = {0}; + char mbs[11]; + for (int i = 0; i < 11; ++i) + mbs[i] = '\x01'; // dummy initial values - ASSERT_EQ(wcstombs(mbs, src, 6), static_cast(4)); + ASSERT_EQ(LIBC_NAMESPACE::wcstombs(mbs, src, 4), static_cast(4)); ASSERT_ERRNO_SUCCESS(); - ASSERT_EQ(mbs[0], '\xF0'); // clown begin + ASSERT_EQ(mbs[0], '\xF0'); ASSERT_EQ(mbs[1], '\x9F'); ASSERT_EQ(mbs[2], '\xA4'); ASSERT_EQ(mbs[3], '\xA1'); - ASSERT_EQ(mbs[4], '\0'); + ASSERT_EQ(mbs[4], '\x01'); // didn't write more than 4 bytes - ASSERT_EQ(wcstombs(mbs, src, 6), static_cast(4)); + for (int i = 0; i < 11; ++i) + mbs[i] = '\x01'; // dummy initial values - ASSERT_EQ(mbs[4], '\xE2'); // sigma begin - ASSERT_EQ(mbs[5], '\x88'); - ASSERT_EQ(mbs[6], '\x91'); - ASSERT_EQ(mbs[7], '\xC3'); // y diaeresis begin - ASSERT_EQ(mbs[8], '\xBF'); - ASSERT_EQ(mbs[9], '\x41'); // A begin - ASSERT_EQ(mbs[10], '\0'); // null terminator + // not enough bytes to convert the second character, so only converts one + ASSERT_EQ(LIBC_NAMESPACE::wcstombs(mbs, src, 6), static_cast(4)); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(mbs[0], '\xF0'); + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\x01'); +} + +TEST_F(LlvmLibcWcstombs, NullDest) { + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0xff), static_cast(0x41), + static_cast(0x0)}; + + // n parameter ignored when dest is null + ASSERT_EQ(LIBC_NAMESPACE::wcstombs(nullptr, src, 1), static_cast(10)); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(LIBC_NAMESPACE::wcstombs(nullptr, src, 100), static_cast(10)); + ASSERT_ERRNO_SUCCESS(); } + +TEST_F(LlvmLibcWcstombs, ErrnoTest) { + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0x12ffff), // invalid widechar + static_cast(0x0)}; + + // n parameter ignored when dest is null + ASSERT_EQ(LIBC_NAMESPACE::wcstombs(nullptr, src, 7), static_cast(7)); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(LIBC_NAMESPACE::wcstombs(nullptr, src, 100), static_cast(-1)); + ASSERT_ERRNO_EQ(EILSEQ); +} \ No newline at end of file From 586497df73a43645c00e0173a283931446d5ad08 Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Thu, 17 Jul 2025 17:22:51 +0000 Subject: [PATCH 03/19] wcsrtombs tests --- libc/src/wchar/wcsrtombs.cpp | 2 +- libc/test/src/wchar/CMakeLists.txt | 14 +++ libc/test/src/wchar/wcsrtombs_test.cpp | 129 +++++++++++++++++++++++++ 3 files changed, 144 insertions(+), 1 deletion(-) create mode 100644 libc/test/src/wchar/wcsrtombs_test.cpp diff --git a/libc/src/wchar/wcsrtombs.cpp b/libc/src/wchar/wcsrtombs.cpp index 1903a7104ab64..af6accd848213 100644 --- a/libc/src/wchar/wcsrtombs.cpp +++ b/libc/src/wchar/wcsrtombs.cpp @@ -25,7 +25,7 @@ LLVM_LIBC_FUNCTION(size_t, wcsrtombs, size_t n, mbstate_t *ps)) { static internal::mbstate internal_mbstate; internal::StringConverter str_conv( - reinterpret_cast(pwcs), + reinterpret_cast(*pwcs), ps == nullptr ? &internal_mbstate : reinterpret_cast(ps), n); diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt index 1d2e45c33f84a..ac97b43e4d876 100644 --- a/libc/test/src/wchar/CMakeLists.txt +++ b/libc/test/src/wchar/CMakeLists.txt @@ -113,6 +113,20 @@ add_libc_test( libc.hdr.types.wchar_t ) +add_libc_test( + wcsrtombs_test + SUITE + libc_wchar_unittests + SRCS + wcsrtombs_test.cpp + DEPENDS + libc.src.wchar.wcsrtombs + libc.test.UnitTest.ErrnoCheckingTest + libc.hdr.types.wchar_t + libc.src.string.memset + libc.hdr.types.mbstate_t +) + add_libc_test( wmemset_test SUITE diff --git a/libc/test/src/wchar/wcsrtombs_test.cpp b/libc/test/src/wchar/wcsrtombs_test.cpp new file mode 100644 index 0000000000000..84d0bc3b40980 --- /dev/null +++ b/libc/test/src/wchar/wcsrtombs_test.cpp @@ -0,0 +1,129 @@ +//===-- Unittests for wcsrtombs -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "hdr/types/mbstate_t.h" +#include "src/string/memset.h" +#include "src/wchar/wcsrtombs.h" +#include "test/UnitTest/ErrnoCheckingTest.h" +#include "test/UnitTest/Test.h" + +using LlvmLibcWcsrtombs = LIBC_NAMESPACE::testing::ErrnoCheckingTest; + +TEST_F(LlvmLibcWcsrtombs, AllMultibyteLengths) { + mbstate_t state; + LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); + + /// clown emoji, sigma symbol, y with diaeresis, letter A + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0xff), static_cast(0x41), + static_cast(0x0)}; + const wchar_t *cur = src; + char mbs[11]; + + ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(mbs, &cur, 11, &state), + static_cast(10)); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(cur, nullptr); + ASSERT_EQ(mbs[0], '\xF0'); // clown begin + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\xE2'); // sigma begin + ASSERT_EQ(mbs[5], '\x88'); + ASSERT_EQ(mbs[6], '\x91'); + ASSERT_EQ(mbs[7], '\xC3'); // y diaeresis begin + ASSERT_EQ(mbs[8], '\xBF'); + ASSERT_EQ(mbs[9], '\x41'); // A begin + ASSERT_EQ(mbs[10], '\0'); // null terminator +} + +TEST_F(LlvmLibcWcsrtombs, DestLimit) { + mbstate_t state; + LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); + + /// clown emoji, sigma symbol, y with diaeresis, letter A + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0xff), static_cast(0x41), + static_cast(0x0)}; + const wchar_t *cur = src; + + char mbs[11]; + for (int i = 0; i < 11; ++i) + mbs[i] = '\x01'; // dummy initial values + + ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(mbs, &cur, 4, &state), + static_cast(4)); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(cur, src + 1); + ASSERT_EQ(mbs[0], '\xF0'); + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\x01'); // didn't write more than 4 bytes + + for (int i = 0; i < 11; ++i) + mbs[i] = '\x01'; // dummy initial values + LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); + + // not enough bytes to convert the second character, so only converts one + ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(mbs, &cur, 6, &state), + static_cast(4)); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(cur, src + 1); + ASSERT_EQ(mbs[0], '\xF0'); + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\x01'); +} + +TEST_F(LlvmLibcWcsrtombs, NullDest) { + mbstate_t state; + LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); + + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0xff), static_cast(0x41), + static_cast(0x0)}; + const wchar_t *cur = src; + + // n parameter ignored when dest is null + ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(nullptr, &cur, 1, &state), + static_cast(10)); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(cur, nullptr); + + LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); + ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(nullptr, &cur, 100, &state), + static_cast(10)); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(cur, nullptr); +} + +TEST_F(LlvmLibcWcsrtombs, ErrnoTest) { + mbstate_t state; + LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); + + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0x12ffff), // invalid widechar + static_cast(0x0)}; + const wchar_t *cur = src; + + // n parameter ignored when dest is null + ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(nullptr, &cur, 7, &state), + static_cast(7)); + ASSERT_ERRNO_SUCCESS(); + + LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); + ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(nullptr, &cur, 100, &state), + static_cast(-1)); + ASSERT_ERRNO_EQ(EILSEQ); +} \ No newline at end of file From 7445bf79f948b83f855e5323077a260ae2fe1eb9 Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Thu, 17 Jul 2025 19:52:30 +0000 Subject: [PATCH 04/19] created internal function --- libc/src/__support/wchar/CMakeLists.txt | 19 +++ libc/src/__support/wchar/wcsnrtombs.cpp | 56 ++++++++ libc/src/__support/wchar/wcsnrtombs.h | 27 ++++ libc/src/wchar/wcsrtombs.cpp | 2 +- libc/test/src/__support/wchar/CMakeLists.txt | 17 +++ .../src/__support/wchar/wcsnrtombs_test.cpp | 128 ++++++++++++++++++ 6 files changed, 248 insertions(+), 1 deletion(-) create mode 100644 libc/src/__support/wchar/wcsnrtombs.cpp create mode 100644 libc/src/__support/wchar/wcsnrtombs.h create mode 100644 libc/test/src/__support/wchar/wcsnrtombs_test.cpp diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt index 802441d37fe92..b9efe5888d955 100644 --- a/libc/src/__support/wchar/CMakeLists.txt +++ b/libc/src/__support/wchar/CMakeLists.txt @@ -68,3 +68,22 @@ add_object_library( .character_converter .mbstate ) + +add_object_library( + wcsnrtombs + HDRS + wcsnrtombs.h + SRCS + wcsnrtombs.cpp + DEPENDS + libc.hdr.errno_macros + libc.hdr.types.char8_t + libc.hdr.types.char32_t + libc.hdr.types.size_t + libc.hdr.types.wchar_t + libc.src.__support.error_or + libc.src.__support.common + .string_converter + .character_converter + .mbstate +) diff --git a/libc/src/__support/wchar/wcsnrtombs.cpp b/libc/src/__support/wchar/wcsnrtombs.cpp new file mode 100644 index 0000000000000..b7add66214b5a --- /dev/null +++ b/libc/src/__support/wchar/wcsnrtombs.cpp @@ -0,0 +1,56 @@ +//===-- Implementation of wcsnrtombs --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/wchar/wcsnrtombs.h" + +#include "hdr/types/char32_t.h" +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/common.h" +#include "src/__support/libc_errno.h" +#include "src/__support/macros/config.h" +#include "src/__support/wchar/mbstate.h" +#include "src/__support/wchar/string_converter.h" + +namespace LIBC_NAMESPACE_DECL { +namespace internal { + +ErrorOr wcsnrtombs(char *__restrict s, const wchar_t **__restrict pwcs, + size_t nwc, size_t len, mbstate *ps) { + CharacterConverter cr(ps); + if (!cr.isValidState()) + return Error(EINVAL); + + if (s == nullptr) + len = SIZE_MAX; + + StringConverter str_conv(reinterpret_cast(*pwcs), + ps, len, nwc); + size_t dst_idx = 0; + ErrorOr converted = str_conv.popUTF8(); + while (converted.has_value()) { + if (s != nullptr) + s[dst_idx] = converted.value(); + + if (converted.value() == '\0') { + *pwcs = nullptr; + return dst_idx; + } + + dst_idx++; + converted = str_conv.popUTF8(); + } + + *pwcs += str_conv.getSourceIndex(); + if (converted.error() == -1) // if we hit conversion limit + return dst_idx; + + return Error(converted.error()); +} +} // namespace internal +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/__support/wchar/wcsnrtombs.h b/libc/src/__support/wchar/wcsnrtombs.h new file mode 100644 index 0000000000000..972fbf97e52f8 --- /dev/null +++ b/libc/src/__support/wchar/wcsnrtombs.h @@ -0,0 +1,27 @@ +//===-- Implementation header for wcsnrtombs ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H +#define LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H + +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/error_or.h" +#include "src/__support/macros/config.h" +#include "src/__support/wchar/mbstate.h" + +namespace LIBC_NAMESPACE_DECL { +namespace internal { + +ErrorOr wcsnrtombs(char *__restrict s, const wchar_t **__restrict pwcs, + size_t nwc, size_t len, mbstate *ps); + +} // namespace internal +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H diff --git a/libc/src/wchar/wcsrtombs.cpp b/libc/src/wchar/wcsrtombs.cpp index af6accd848213..cdbb6cb070ec5 100644 --- a/libc/src/wchar/wcsrtombs.cpp +++ b/libc/src/wchar/wcsrtombs.cpp @@ -39,7 +39,7 @@ LLVM_LIBC_FUNCTION(size_t, wcsrtombs, converted = str_conv.popUTF8(); } - pwcs += str_conv.getSourceIndex(); + *pwcs += str_conv.getSourceIndex(); if (converted.error() == -1) // if we hit conversion limit return dst_idx; diff --git a/libc/test/src/__support/wchar/CMakeLists.txt b/libc/test/src/__support/wchar/CMakeLists.txt index f0727451736f9..c112c83dbe9af 100644 --- a/libc/test/src/__support/wchar/CMakeLists.txt +++ b/libc/test/src/__support/wchar/CMakeLists.txt @@ -34,3 +34,20 @@ add_libc_test( libc.hdr.errno_macros libc.hdr.types.char32_t ) + +add_libc_test( + wcsnrtombs_test + SUITE + libc-support-tests + SRCS + wcsnrtombs_test.cpp + DEPENDS + libc.src.__support.wchar.string_converter + libc.src.__support.wchar.character_converter + libc.src.__support.wchar.mbstate + libc.src.__support.error_or + libc.src.__support.wchar.wcsnrtombs + libc.hdr.errno_macros + libc.hdr.types.char32_t + libc.hdr.types.char8_t +) diff --git a/libc/test/src/__support/wchar/wcsnrtombs_test.cpp b/libc/test/src/__support/wchar/wcsnrtombs_test.cpp new file mode 100644 index 0000000000000..0838b8d3ffa32 --- /dev/null +++ b/libc/test/src/__support/wchar/wcsnrtombs_test.cpp @@ -0,0 +1,128 @@ +//===-- Unittests for wcsnrtombs ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "hdr/errno_macros.h" +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/error_or.h" +#include "src/__support/wchar/mbstate.h" +#include "src/__support/wchar/wcsnrtombs.h" +#include "test/UnitTest/Test.h" + +TEST(LlvmLibcWcsnrtombs, AllMultibyteLengths) { + LIBC_NAMESPACE::internal::mbstate state; + + /// clown emoji, sigma symbol, y with diaeresis, letter A + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0xff), static_cast(0x41), + static_cast(0x0)}; + const wchar_t *cur = src; + char mbs[11]; + + auto res = LIBC_NAMESPACE::internal::wcsnrtombs(mbs, &cur, 5, 11, &state); + ASSERT_TRUE(res.has_value()); + ASSERT_EQ(res.value(), static_cast(10)); + ASSERT_EQ(cur, nullptr); + ASSERT_EQ(mbs[0], '\xF0'); // clown begin + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\xE2'); // sigma begin + ASSERT_EQ(mbs[5], '\x88'); + ASSERT_EQ(mbs[6], '\x91'); + ASSERT_EQ(mbs[7], '\xC3'); // y diaeresis begin + ASSERT_EQ(mbs[8], '\xBF'); + ASSERT_EQ(mbs[9], '\x41'); // A begin + ASSERT_EQ(mbs[10], '\0'); // null terminator +} + +TEST(LlvmLibcWcsnrtombs, DestLimit) { + LIBC_NAMESPACE::internal::mbstate state1; + + /// clown emoji, sigma symbol, y with diaeresis, letter A + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0xff), static_cast(0x41), + static_cast(0x0)}; + const wchar_t *cur = src; + + char mbs[11]; + for (int i = 0; i < 11; ++i) + mbs[i] = '\x01'; // dummy initial values + + auto res = LIBC_NAMESPACE::internal::wcsnrtombs(mbs, &cur, 5, 4, &state1); + ASSERT_TRUE(res.has_value()); + ASSERT_EQ(res.value(), static_cast(4)); + ASSERT_EQ(cur, src + 1); + ASSERT_EQ(mbs[0], '\xF0'); + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\x01'); // didn't write more than 4 bytes + + for (int i = 0; i < 11; ++i) + mbs[i] = '\x01'; // dummy initial values + LIBC_NAMESPACE::internal::mbstate state2; + + // not enough bytes to convert the second character, so only converts one + cur = src; + res = LIBC_NAMESPACE::internal::wcsnrtombs(mbs, &cur, 5, 6, &state2); + ASSERT_TRUE(res.has_value()); + ASSERT_EQ(res.value(), static_cast(4)); + ASSERT_EQ(cur, src + 1); + ASSERT_EQ(mbs[0], '\xF0'); + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\x01'); +} + +TEST(LlvmLibcWcsnrtombs, NullDest) { + LIBC_NAMESPACE::internal::mbstate state1; + + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0xff), static_cast(0x41), + static_cast(0x0)}; + const wchar_t *cur = src; + + // n parameter ignored when dest is null + auto res = LIBC_NAMESPACE::internal::wcsnrtombs(nullptr, &cur, 5, 1, &state1); + ASSERT_TRUE(res.has_value()); + ASSERT_EQ(res.value(), static_cast(10)); + ASSERT_EQ(cur, nullptr); + + LIBC_NAMESPACE::internal::mbstate state2; + cur = src; + res = LIBC_NAMESPACE::internal::wcsnrtombs(nullptr, &cur, 5, 100, &state2); + ASSERT_TRUE(res.has_value()); + ASSERT_EQ(res.value(), static_cast(10)); + ASSERT_EQ(cur, nullptr); +} + +TEST(LlvmLibcWcsnrtombs, ErrorTest) { + LIBC_NAMESPACE::internal::mbstate state1; + + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0x12ffff), // invalid widechar + static_cast(0x0)}; + const wchar_t *cur = src; + + // n parameter ignored when dest is null + auto res = LIBC_NAMESPACE::internal::wcsnrtombs(nullptr, &cur, 5, 7, &state1); + ASSERT_TRUE(res.has_value()); + ASSERT_EQ(res.value(), static_cast(7)); + + LIBC_NAMESPACE::internal::mbstate state2; + cur = src; + res = LIBC_NAMESPACE::internal::wcsnrtombs(nullptr, &cur, 5, 100, &state2); + ASSERT_FALSE(res.has_value()); + ASSERT_EQ(res.error(), EILSEQ); +} From e2be69afa8caf47216739dccbd278faef0d35b0e Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Thu, 17 Jul 2025 19:55:39 +0000 Subject: [PATCH 05/19] Merge branch 'main' into wcstombs-functions From 843c79a2c1acf9abc55a41f7997beb7bbd90818b Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Thu, 17 Jul 2025 20:12:07 +0000 Subject: [PATCH 06/19] add invalid state test --- .../src/__support/wchar/wcsnrtombs_test.cpp | 67 ++++++++++++++++++- 1 file changed, 64 insertions(+), 3 deletions(-) diff --git a/libc/test/src/__support/wchar/wcsnrtombs_test.cpp b/libc/test/src/__support/wchar/wcsnrtombs_test.cpp index 0838b8d3ffa32..63d83d7d782f1 100644 --- a/libc/test/src/__support/wchar/wcsnrtombs_test.cpp +++ b/libc/test/src/__support/wchar/wcsnrtombs_test.cpp @@ -83,6 +83,50 @@ TEST(LlvmLibcWcsnrtombs, DestLimit) { ASSERT_EQ(mbs[4], '\x01'); } +TEST(LlvmLibcWcsnrtombs, SrcLimit) { + LIBC_NAMESPACE::internal::mbstate state; + + /// clown emoji, sigma symbol, y with diaeresis, letter A + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0xff), static_cast(0x41), + static_cast(0x0)}; + const wchar_t *cur = src; + + char mbs[11]; + for (int i = 0; i < 11; ++i) + mbs[i] = '\x01'; // dummy initial values + + auto res = LIBC_NAMESPACE::internal::wcsnrtombs(mbs, &cur, 2, 11, &state); + ASSERT_TRUE(res.has_value()); + ASSERT_EQ(res.value(), static_cast(7)); + ASSERT_EQ(cur, src + 2); + ASSERT_EQ(mbs[0], '\xF0'); // clown begin + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\xE2'); // sigma begin + ASSERT_EQ(mbs[5], '\x88'); + ASSERT_EQ(mbs[6], '\x91'); + ASSERT_EQ(mbs[7], '\x01'); + + res = LIBC_NAMESPACE::internal::wcsnrtombs(mbs + res.value(), &cur, 100, 11, &state); + ASSERT_TRUE(res.has_value()); + ASSERT_EQ(res.value(), static_cast(3)); + ASSERT_EQ(cur, nullptr); + ASSERT_EQ(mbs[0], '\xF0'); // clown begin + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\xE2'); // sigma begin + ASSERT_EQ(mbs[5], '\x88'); + ASSERT_EQ(mbs[6], '\x91'); + ASSERT_EQ(mbs[7], '\xC3'); // y diaeresis begin + ASSERT_EQ(mbs[8], '\xBF'); + ASSERT_EQ(mbs[9], '\x41'); // A begin + ASSERT_EQ(mbs[10], '\0'); // null terminator +} + TEST(LlvmLibcWcsnrtombs, NullDest) { LIBC_NAMESPACE::internal::mbstate state1; @@ -106,7 +150,23 @@ TEST(LlvmLibcWcsnrtombs, NullDest) { ASSERT_EQ(cur, nullptr); } -TEST(LlvmLibcWcsnrtombs, ErrorTest) { +TEST(LlvmLibcWcsnrtombs, InvalidState) { + LIBC_NAMESPACE::internal::mbstate state; + state.total_bytes = 100; + + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0xff), static_cast(0x41), + static_cast(0x0)}; + const wchar_t *cur = src; + + // n parameter ignored when dest is null + auto res = LIBC_NAMESPACE::internal::wcsnrtombs(nullptr, &cur, 5, 1, &state); + ASSERT_FALSE(res.has_value()); + ASSERT_EQ(res.error(), EINVAL); +} + +TEST(LlvmLibcWcsnrtombs, InvalidCharacter) { LIBC_NAMESPACE::internal::mbstate state1; const wchar_t src[] = {static_cast(0x1f921), @@ -114,15 +174,16 @@ TEST(LlvmLibcWcsnrtombs, ErrorTest) { static_cast(0x12ffff), // invalid widechar static_cast(0x0)}; const wchar_t *cur = src; + char mbs[11]; // n parameter ignored when dest is null - auto res = LIBC_NAMESPACE::internal::wcsnrtombs(nullptr, &cur, 5, 7, &state1); + auto res = LIBC_NAMESPACE::internal::wcsnrtombs(mbs, &cur, 5, 7, &state1); ASSERT_TRUE(res.has_value()); ASSERT_EQ(res.value(), static_cast(7)); LIBC_NAMESPACE::internal::mbstate state2; cur = src; - res = LIBC_NAMESPACE::internal::wcsnrtombs(nullptr, &cur, 5, 100, &state2); + res = LIBC_NAMESPACE::internal::wcsnrtombs(mbs, &cur, 5, 11, &state2); ASSERT_FALSE(res.has_value()); ASSERT_EQ(res.error(), EILSEQ); } From 69ed44c62abe09f12dcd9feb6dad40bd00fe7bfd Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Thu, 17 Jul 2025 20:58:29 +0000 Subject: [PATCH 07/19] public string functions + tests --- libc/src/wchar/CMakeLists.txt | 6 +- libc/src/wchar/wcsnrtombs.cpp | 29 +--- libc/src/wchar/wcsrtombs.cpp | 27 +-- libc/src/wchar/wcstombs.cpp | 33 +--- .../src/__support/wchar/wcsnrtombs_test.cpp | 1 + libc/test/src/wchar/CMakeLists.txt | 14 ++ libc/test/src/wchar/wcsnrtombs_test.cpp | 156 ++++++++++++++++++ libc/test/src/wchar/wcsrtombs_test.cpp | 36 ++-- libc/test/src/wchar/wcstombs_test.cpp | 24 +-- 9 files changed, 218 insertions(+), 108 deletions(-) create mode 100644 libc/test/src/wchar/wcsnrtombs_test.cpp diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt index 36969e032594f..05c91d459bb95 100644 --- a/libc/src/wchar/CMakeLists.txt +++ b/libc/src/wchar/CMakeLists.txt @@ -168,7 +168,7 @@ add_entrypoint_object( DEPENDS libc.hdr.types.wchar_t libc.src.__support.wchar.mbstate - libc.src.__support.wchar.string_converter + libc.src.__support.wchar.wcsnrtombs libc.src.__support.libc_errno ) @@ -182,7 +182,7 @@ add_entrypoint_object( libc.hdr.types.wchar_t libc.hdr.types.mbstate_t libc.src.__support.wchar.mbstate - libc.src.__support.wchar.string_converter + libc.src.__support.wchar.wcsnrtombs libc.src.__support.libc_errno ) @@ -196,7 +196,7 @@ add_entrypoint_object( libc.hdr.types.wchar_t libc.hdr.types.mbstate_t libc.src.__support.wchar.mbstate - libc.src.__support.wchar.string_converter + libc.src.__support.wchar.wcsnrtombs libc.src.__support.libc_errno ) diff --git a/libc/src/wchar/wcsnrtombs.cpp b/libc/src/wchar/wcsnrtombs.cpp index 3a0ea79053e33..fd4724150e927 100644 --- a/libc/src/wchar/wcsnrtombs.cpp +++ b/libc/src/wchar/wcsnrtombs.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "src/wchar/wcsrtombs.h" +#include "src/wchar/wcsnrtombs.h" #include "hdr/types/char32_t.h" #include "hdr/types/mbstate_t.h" @@ -16,7 +16,7 @@ #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" #include "src/__support/wchar/mbstate.h" -#include "src/__support/wchar/string_converter.h" +#include "src/__support/wchar/wcsnrtombs.h" namespace LIBC_NAMESPACE_DECL { @@ -24,27 +24,16 @@ LLVM_LIBC_FUNCTION(size_t, wcsnrtombs, (char *__restrict s, const wchar_t **__restrict pwcs, size_t nwc, size_t len, mbstate_t *ps)) { static internal::mbstate internal_mbstate; - internal::StringConverter str_conv( - reinterpret_cast(pwcs), + auto result = internal::wcsnrtombs( + s, pwcs, nwc, len, ps == nullptr ? &internal_mbstate - : reinterpret_cast(ps), - len, nwc); - - int dst_idx = 0; - ErrorOr converted = str_conv.popUTF8(); - while (converted.has_value()) { - if (s != nullptr) - s[dst_idx] = converted.value(); - dst_idx++; - converted = str_conv.popUTF8(); + : reinterpret_cast(ps)); + if (!result.has_value()) { + libc_errno = result.error(); + return -1; } - pwcs += str_conv.getSourceIndex(); - if (converted.error() == -1) // if we hit conversion limit - return dst_idx; - - libc_errno = converted.error(); - return -1; + return result.value(); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/wcsrtombs.cpp b/libc/src/wchar/wcsrtombs.cpp index cdbb6cb070ec5..b4632a4a436ba 100644 --- a/libc/src/wchar/wcsrtombs.cpp +++ b/libc/src/wchar/wcsrtombs.cpp @@ -16,7 +16,7 @@ #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" #include "src/__support/wchar/mbstate.h" -#include "src/__support/wchar/string_converter.h" +#include "src/__support/wchar/wcsnrtombs.h" namespace LIBC_NAMESPACE_DECL { @@ -24,27 +24,16 @@ LLVM_LIBC_FUNCTION(size_t, wcsrtombs, (char *__restrict s, const wchar_t **__restrict pwcs, size_t n, mbstate_t *ps)) { static internal::mbstate internal_mbstate; - internal::StringConverter str_conv( - reinterpret_cast(*pwcs), + auto result = internal::wcsnrtombs( + s, pwcs, SIZE_MAX, n, ps == nullptr ? &internal_mbstate - : reinterpret_cast(ps), - n); - - int dst_idx = 0; - ErrorOr converted = str_conv.popUTF8(); - while (converted.has_value()) { - if (s != nullptr) - s[dst_idx] = converted.value(); - dst_idx++; - converted = str_conv.popUTF8(); + : reinterpret_cast(ps)); + if (!result.has_value()) { + libc_errno = result.error(); + return -1; } - - *pwcs += str_conv.getSourceIndex(); - if (converted.error() == -1) // if we hit conversion limit - return dst_idx; - libc_errno = converted.error(); - return -1; + return result.value(); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/wcstombs.cpp b/libc/src/wchar/wcstombs.cpp index 94c6e9a4d6942..28e2425d645e7 100644 --- a/libc/src/wchar/wcstombs.cpp +++ b/libc/src/wchar/wcstombs.cpp @@ -15,38 +15,23 @@ #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" #include "src/__support/wchar/mbstate.h" -#include "src/__support/wchar/string_converter.h" +#include "src/__support/wchar/wcsnrtombs.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(size_t, wcstombs, - (char *__restrict s, const wchar_t *__restrict pwcs, + (char *__restrict s, const wchar_t *__restrict wcs, size_t n)) { static internal::mbstate internal_mbstate; - - if (s == nullptr) - n = SIZE_MAX; - - internal::StringConverter str_conv( - reinterpret_cast(pwcs), &internal_mbstate, n); - - int dst_idx = 0; - ErrorOr converted = str_conv.popUTF8(); - while (converted.has_value()) { - if (s != nullptr) - s[dst_idx] = converted.value(); - - if (converted.value() != '\0') - dst_idx++; - - converted = str_conv.popUTF8(); + const wchar_t *wcs_ptr_copy = wcs; + auto result = + internal::wcsnrtombs(s, &wcs_ptr_copy, SIZE_MAX, n, &internal_mbstate); + if (!result.has_value()) { + libc_errno = result.error(); + return -1; } - if (converted.error() == -1) // if we hit conversion limit - return dst_idx; - - libc_errno = converted.error(); - return -1; + return result.value(); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/test/src/__support/wchar/wcsnrtombs_test.cpp b/libc/test/src/__support/wchar/wcsnrtombs_test.cpp index 63d83d7d782f1..710fc4b568ac0 100644 --- a/libc/test/src/__support/wchar/wcsnrtombs_test.cpp +++ b/libc/test/src/__support/wchar/wcsnrtombs_test.cpp @@ -151,6 +151,7 @@ TEST(LlvmLibcWcsnrtombs, NullDest) { } TEST(LlvmLibcWcsnrtombs, InvalidState) { + // this is more thoroughly tested by CharacterConverter LIBC_NAMESPACE::internal::mbstate state; state.total_bytes = 100; diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt index ac97b43e4d876..553821ddaf9af 100644 --- a/libc/test/src/wchar/CMakeLists.txt +++ b/libc/test/src/wchar/CMakeLists.txt @@ -127,6 +127,20 @@ add_libc_test( libc.hdr.types.mbstate_t ) +add_libc_test( + wcsnrtombs_test + SUITE + libc_wchar_unittests + SRCS + wcsnrtombs_test.cpp + DEPENDS + libc.src.wchar.wcsnrtombs + libc.test.UnitTest.ErrnoCheckingTest + libc.hdr.types.wchar_t + libc.src.string.memset + libc.hdr.types.mbstate_t +) + add_libc_test( wmemset_test SUITE diff --git a/libc/test/src/wchar/wcsnrtombs_test.cpp b/libc/test/src/wchar/wcsnrtombs_test.cpp new file mode 100644 index 0000000000000..f6a333964018d --- /dev/null +++ b/libc/test/src/wchar/wcsnrtombs_test.cpp @@ -0,0 +1,156 @@ +//===-- Unittests for wcsnrtombs ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "hdr/types/mbstate_t.h" +#include "src/string/memset.h" +#include "src/wchar/wcsnrtombs.h" +#include "test/UnitTest/ErrnoCheckingTest.h" +#include "test/UnitTest/Test.h" + +using LlvmLibcWcsnrtombs = LIBC_NAMESPACE::testing::ErrnoCheckingTest; + +// these tests are fairly simple as this function just calls into the internal +// wcsnrtombs which is more thoroughly tested + +TEST_F(LlvmLibcWcsnrtombs, AllMultibyteLengths) { + mbstate_t state; + LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); + + /// clown emoji, sigma symbol, y with diaeresis, letter A + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0xff), static_cast(0x41), + static_cast(0x0)}; + const wchar_t *cur = src; + char mbs[11]; + + ASSERT_EQ(LIBC_NAMESPACE::wcsnrtombs(mbs, &cur, 5, 11, &state), + static_cast(10)); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(cur, nullptr); + ASSERT_EQ(mbs[0], '\xF0'); // clown begin + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\xE2'); // sigma begin + ASSERT_EQ(mbs[5], '\x88'); + ASSERT_EQ(mbs[6], '\x91'); + ASSERT_EQ(mbs[7], '\xC3'); // y diaeresis begin + ASSERT_EQ(mbs[8], '\xBF'); + ASSERT_EQ(mbs[9], '\x41'); // A begin + ASSERT_EQ(mbs[10], '\0'); // null terminator +} + +TEST_F(LlvmLibcWcsnrtombs, DestLimit) { + mbstate_t state; + LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); + + /// clown emoji, sigma symbol, y with diaeresis, letter A + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0xff), static_cast(0x41), + static_cast(0x0)}; + const wchar_t *cur = src; + + char mbs[11]; + for (int i = 0; i < 11; ++i) + mbs[i] = '\x01'; // dummy initial values + + ASSERT_EQ(LIBC_NAMESPACE::wcsnrtombs(mbs, &cur, 5, 4, &state), + static_cast(4)); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(cur, src + 1); + ASSERT_EQ(mbs[0], '\xF0'); + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\x01'); // didn't write more than 4 bytes + + for (int i = 0; i < 11; ++i) + mbs[i] = '\x01'; // dummy initial values + LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); + cur = src; + + // not enough bytes to convert the second character, so only converts one + ASSERT_EQ(LIBC_NAMESPACE::wcsnrtombs(mbs, &cur, 5, 6, &state), + static_cast(4)); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(cur, src + 1); + ASSERT_EQ(mbs[0], '\xF0'); + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\x01'); +} + +TEST(LlvmLibcWcsnrtombs, SrcLimit) { + mbstate_t state; + LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); + + /// clown emoji, sigma symbol, y with diaeresis, letter A + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0xff), static_cast(0x41), + static_cast(0x0)}; + const wchar_t *cur = src; + + char mbs[11]; + for (int i = 0; i < 11; ++i) + mbs[i] = '\x01'; // dummy initial values + + auto res = LIBC_NAMESPACE::wcsnrtombs(mbs, &cur, 2, 11, &state); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(res, static_cast(7)); + ASSERT_EQ(cur, src + 2); + ASSERT_EQ(mbs[0], '\xF0'); // clown begin + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\xE2'); // sigma begin + ASSERT_EQ(mbs[5], '\x88'); + ASSERT_EQ(mbs[6], '\x91'); + ASSERT_EQ(mbs[7], '\x01'); + + res = LIBC_NAMESPACE::wcsnrtombs(mbs + res, &cur, 100, 11, &state); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(res, static_cast(3)); + ASSERT_EQ(cur, nullptr); + ASSERT_EQ(mbs[0], '\xF0'); // clown begin + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\xE2'); // sigma begin + ASSERT_EQ(mbs[5], '\x88'); + ASSERT_EQ(mbs[6], '\x91'); + ASSERT_EQ(mbs[7], '\xC3'); // y diaeresis begin + ASSERT_EQ(mbs[8], '\xBF'); + ASSERT_EQ(mbs[9], '\x41'); // A begin + ASSERT_EQ(mbs[10], '\0'); // null terminator +} + +TEST_F(LlvmLibcWcsnrtombs, ErrnoTest) { + mbstate_t state; + LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); + + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0x12ffff), // invalid widechar + static_cast(0x0)}; + const wchar_t *cur = src; + char mbs[11]; + + // n parameter ignored when dest is null + ASSERT_EQ(LIBC_NAMESPACE::wcsnrtombs(mbs, &cur, 5, 7, &state), + static_cast(7)); + ASSERT_ERRNO_SUCCESS(); + + LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); + ASSERT_EQ(LIBC_NAMESPACE::wcsnrtombs(mbs, &cur, 5, 100, &state), + static_cast(-1)); + ASSERT_ERRNO_EQ(EILSEQ); +} diff --git a/libc/test/src/wchar/wcsrtombs_test.cpp b/libc/test/src/wchar/wcsrtombs_test.cpp index 84d0bc3b40980..00879ee64e25a 100644 --- a/libc/test/src/wchar/wcsrtombs_test.cpp +++ b/libc/test/src/wchar/wcsrtombs_test.cpp @@ -14,6 +14,9 @@ using LlvmLibcWcsrtombs = LIBC_NAMESPACE::testing::ErrnoCheckingTest; +// these tests are fairly simple as this function just calls into the internal +// wcsnrtombs which is more thoroughly tested + TEST_F(LlvmLibcWcsrtombs, AllMultibyteLengths) { mbstate_t state; LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); @@ -73,6 +76,7 @@ TEST_F(LlvmLibcWcsrtombs, DestLimit) { LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); // not enough bytes to convert the second character, so only converts one + cur = src; ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(mbs, &cur, 6, &state), static_cast(4)); ASSERT_ERRNO_SUCCESS(); @@ -84,29 +88,6 @@ TEST_F(LlvmLibcWcsrtombs, DestLimit) { ASSERT_EQ(mbs[4], '\x01'); } -TEST_F(LlvmLibcWcsrtombs, NullDest) { - mbstate_t state; - LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); - - const wchar_t src[] = {static_cast(0x1f921), - static_cast(0x2211), - static_cast(0xff), static_cast(0x41), - static_cast(0x0)}; - const wchar_t *cur = src; - - // n parameter ignored when dest is null - ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(nullptr, &cur, 1, &state), - static_cast(10)); - ASSERT_ERRNO_SUCCESS(); - ASSERT_EQ(cur, nullptr); - - LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); - ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(nullptr, &cur, 100, &state), - static_cast(10)); - ASSERT_ERRNO_SUCCESS(); - ASSERT_EQ(cur, nullptr); -} - TEST_F(LlvmLibcWcsrtombs, ErrnoTest) { mbstate_t state; LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); @@ -116,14 +97,17 @@ TEST_F(LlvmLibcWcsrtombs, ErrnoTest) { static_cast(0x12ffff), // invalid widechar static_cast(0x0)}; const wchar_t *cur = src; + char mbs[11]; // n parameter ignored when dest is null - ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(nullptr, &cur, 7, &state), + ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(mbs, &cur, 7, &state), static_cast(7)); ASSERT_ERRNO_SUCCESS(); LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); - ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(nullptr, &cur, 100, &state), + cur = src; + + ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(mbs, &cur, 100, &state), static_cast(-1)); ASSERT_ERRNO_EQ(EILSEQ); -} \ No newline at end of file +} diff --git a/libc/test/src/wchar/wcstombs_test.cpp b/libc/test/src/wchar/wcstombs_test.cpp index 25ff4baa2e8cd..397ce0460806f 100644 --- a/libc/test/src/wchar/wcstombs_test.cpp +++ b/libc/test/src/wchar/wcstombs_test.cpp @@ -12,6 +12,9 @@ using LlvmLibcWcstombs = LIBC_NAMESPACE::testing::ErrnoCheckingTest; +// these tests are fairly simple as this function just calls into the internal +// wcsnrtombs which is more thoroughly tested + TEST_F(LlvmLibcWcstombs, AllMultibyteLengths) { /// clown emoji, sigma symbol, y with diaeresis, letter A const wchar_t src[] = {static_cast(0x1f921), @@ -66,28 +69,17 @@ TEST_F(LlvmLibcWcstombs, DestLimit) { ASSERT_EQ(mbs[4], '\x01'); } -TEST_F(LlvmLibcWcstombs, NullDest) { - const wchar_t src[] = {static_cast(0x1f921), - static_cast(0x2211), - static_cast(0xff), static_cast(0x41), - static_cast(0x0)}; - - // n parameter ignored when dest is null - ASSERT_EQ(LIBC_NAMESPACE::wcstombs(nullptr, src, 1), static_cast(10)); - ASSERT_ERRNO_SUCCESS(); - ASSERT_EQ(LIBC_NAMESPACE::wcstombs(nullptr, src, 100), static_cast(10)); - ASSERT_ERRNO_SUCCESS(); -} - TEST_F(LlvmLibcWcstombs, ErrnoTest) { const wchar_t src[] = {static_cast(0x1f921), static_cast(0x2211), static_cast(0x12ffff), // invalid widechar static_cast(0x0)}; + char mbs[11]; // n parameter ignored when dest is null - ASSERT_EQ(LIBC_NAMESPACE::wcstombs(nullptr, src, 7), static_cast(7)); + ASSERT_EQ(LIBC_NAMESPACE::wcstombs(mbs, src, 7), static_cast(7)); ASSERT_ERRNO_SUCCESS(); - ASSERT_EQ(LIBC_NAMESPACE::wcstombs(nullptr, src, 100), static_cast(-1)); + ASSERT_EQ(LIBC_NAMESPACE::wcstombs(mbs, src, 100), + static_cast(-1)); ASSERT_ERRNO_EQ(EILSEQ); -} \ No newline at end of file +} From f01702a10634c7b106f348357f99c7fbdf736586 Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Thu, 17 Jul 2025 22:49:26 +0000 Subject: [PATCH 08/19] yaml typo --- libc/include/wchar.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml index 226f1614ebcc2..8c72e1963a425 100644 --- a/libc/include/wchar.yaml +++ b/libc/include/wchar.yaml @@ -297,7 +297,7 @@ functions: return_type: size_t arguments: - type: char *__restrict - - type: const wchar_t *__restrict + - type: const wchar_t **__restrict - type: size_t - type: size_t - type: mbstate_t From ee62e624e03b63005b3ba7ccf3ce3a1e982344b7 Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Thu, 17 Jul 2025 22:53:22 +0000 Subject: [PATCH 09/19] typo --- libc/src/__support/wchar/wcsnrtombs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libc/src/__support/wchar/wcsnrtombs.h b/libc/src/__support/wchar/wcsnrtombs.h index 972fbf97e52f8..f5ba910940692 100644 --- a/libc/src/__support/wchar/wcsnrtombs.h +++ b/libc/src/__support/wchar/wcsnrtombs.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H -#define LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H +#ifndef LLVM_LIBC_SRC__SUPPORT_WCHAR_WCSNRTOMBS_H +#define LLVM_LIBC_SRC__SUPPORT_WCHAR_WCSNRTOMBS_H #include "hdr/types/size_t.h" #include "hdr/types/wchar_t.h" @@ -24,4 +24,4 @@ ErrorOr wcsnrtombs(char *__restrict s, const wchar_t **__restrict pwcs, } // namespace internal } // namespace LIBC_NAMESPACE_DECL -#endif // LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H +#endif // LLVM_LIBC_SRC__SUPPORT_WCHAR_WCSNRTOMBS_H From 1c1a7f981bbbc4ff8a50a39ed9ac421f61f06ea7 Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Fri, 18 Jul 2025 16:52:42 +0000 Subject: [PATCH 10/19] fixed behavior when dest=null (shouldnt update src pointer) --- libc/src/__support/wchar/wcsnrtombs.cpp | 7 +++++-- libc/test/src/__support/wchar/wcsnrtombs_test.cpp | 5 ++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/libc/src/__support/wchar/wcsnrtombs.cpp b/libc/src/__support/wchar/wcsnrtombs.cpp index b7add66214b5a..65c85cffa55c7 100644 --- a/libc/src/__support/wchar/wcsnrtombs.cpp +++ b/libc/src/__support/wchar/wcsnrtombs.cpp @@ -38,7 +38,8 @@ ErrorOr wcsnrtombs(char *__restrict s, const wchar_t **__restrict pwcs, s[dst_idx] = converted.value(); if (converted.value() == '\0') { - *pwcs = nullptr; + if (s != nullptr) + *pwcs = nullptr; return dst_idx; } @@ -46,7 +47,9 @@ ErrorOr wcsnrtombs(char *__restrict s, const wchar_t **__restrict pwcs, converted = str_conv.popUTF8(); } - *pwcs += str_conv.getSourceIndex(); + if (s != nullptr) + *pwcs += str_conv.getSourceIndex(); + if (converted.error() == -1) // if we hit conversion limit return dst_idx; diff --git a/libc/test/src/__support/wchar/wcsnrtombs_test.cpp b/libc/test/src/__support/wchar/wcsnrtombs_test.cpp index 710fc4b568ac0..bec23638e19b7 100644 --- a/libc/test/src/__support/wchar/wcsnrtombs_test.cpp +++ b/libc/test/src/__support/wchar/wcsnrtombs_test.cpp @@ -140,14 +140,13 @@ TEST(LlvmLibcWcsnrtombs, NullDest) { auto res = LIBC_NAMESPACE::internal::wcsnrtombs(nullptr, &cur, 5, 1, &state1); ASSERT_TRUE(res.has_value()); ASSERT_EQ(res.value(), static_cast(10)); - ASSERT_EQ(cur, nullptr); + ASSERT_EQ(cur, src); // pointer not updated when dest = null LIBC_NAMESPACE::internal::mbstate state2; - cur = src; res = LIBC_NAMESPACE::internal::wcsnrtombs(nullptr, &cur, 5, 100, &state2); ASSERT_TRUE(res.has_value()); ASSERT_EQ(res.value(), static_cast(10)); - ASSERT_EQ(cur, nullptr); + ASSERT_EQ(cur, src); } TEST(LlvmLibcWcsnrtombs, InvalidState) { From 61be17f4facde976b16f4a6f37e78db208479d15 Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Fri, 18 Jul 2025 18:14:07 +0000 Subject: [PATCH 11/19] made internal function header only --- libc/src/__support/wchar/CMakeLists.txt | 4 +- libc/src/__support/wchar/wcsnrtombs.cpp | 59 ------------------------- libc/src/__support/wchar/wcsnrtombs.h | 40 ++++++++++++++++- 3 files changed, 39 insertions(+), 64 deletions(-) delete mode 100644 libc/src/__support/wchar/wcsnrtombs.cpp diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt index b9efe5888d955..a3d06ac0dbe6a 100644 --- a/libc/src/__support/wchar/CMakeLists.txt +++ b/libc/src/__support/wchar/CMakeLists.txt @@ -69,12 +69,10 @@ add_object_library( .mbstate ) -add_object_library( +add_header_library( wcsnrtombs HDRS wcsnrtombs.h - SRCS - wcsnrtombs.cpp DEPENDS libc.hdr.errno_macros libc.hdr.types.char8_t diff --git a/libc/src/__support/wchar/wcsnrtombs.cpp b/libc/src/__support/wchar/wcsnrtombs.cpp deleted file mode 100644 index 65c85cffa55c7..0000000000000 --- a/libc/src/__support/wchar/wcsnrtombs.cpp +++ /dev/null @@ -1,59 +0,0 @@ -//===-- Implementation of wcsnrtombs --------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "src/__support/wchar/wcsnrtombs.h" - -#include "hdr/types/char32_t.h" -#include "hdr/types/size_t.h" -#include "hdr/types/wchar_t.h" -#include "src/__support/common.h" -#include "src/__support/libc_errno.h" -#include "src/__support/macros/config.h" -#include "src/__support/wchar/mbstate.h" -#include "src/__support/wchar/string_converter.h" - -namespace LIBC_NAMESPACE_DECL { -namespace internal { - -ErrorOr wcsnrtombs(char *__restrict s, const wchar_t **__restrict pwcs, - size_t nwc, size_t len, mbstate *ps) { - CharacterConverter cr(ps); - if (!cr.isValidState()) - return Error(EINVAL); - - if (s == nullptr) - len = SIZE_MAX; - - StringConverter str_conv(reinterpret_cast(*pwcs), - ps, len, nwc); - size_t dst_idx = 0; - ErrorOr converted = str_conv.popUTF8(); - while (converted.has_value()) { - if (s != nullptr) - s[dst_idx] = converted.value(); - - if (converted.value() == '\0') { - if (s != nullptr) - *pwcs = nullptr; - return dst_idx; - } - - dst_idx++; - converted = str_conv.popUTF8(); - } - - if (s != nullptr) - *pwcs += str_conv.getSourceIndex(); - - if (converted.error() == -1) // if we hit conversion limit - return dst_idx; - - return Error(converted.error()); -} -} // namespace internal -} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/__support/wchar/wcsnrtombs.h b/libc/src/__support/wchar/wcsnrtombs.h index f5ba910940692..4db9dae0b6ad0 100644 --- a/libc/src/__support/wchar/wcsnrtombs.h +++ b/libc/src/__support/wchar/wcsnrtombs.h @@ -9,17 +9,53 @@ #ifndef LLVM_LIBC_SRC__SUPPORT_WCHAR_WCSNRTOMBS_H #define LLVM_LIBC_SRC__SUPPORT_WCHAR_WCSNRTOMBS_H +#include "hdr/types/char32_t.h" #include "hdr/types/size_t.h" #include "hdr/types/wchar_t.h" -#include "src/__support/error_or.h" +#include "src/__support/common.h" +#include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" #include "src/__support/wchar/mbstate.h" +#include "src/__support/wchar/string_converter.h" namespace LIBC_NAMESPACE_DECL { namespace internal { ErrorOr wcsnrtombs(char *__restrict s, const wchar_t **__restrict pwcs, - size_t nwc, size_t len, mbstate *ps); + size_t nwc, size_t len, mbstate *ps) { + CharacterConverter cr(ps); + if (!cr.isValidState()) + return Error(EINVAL); + + if (s == nullptr) + len = SIZE_MAX; + + StringConverter str_conv(reinterpret_cast(*pwcs), + ps, len, nwc); + size_t dst_idx = 0; + ErrorOr converted = str_conv.popUTF8(); + while (converted.has_value()) { + if (s != nullptr) + s[dst_idx] = converted.value(); + + if (converted.value() == '\0') { + if (s != nullptr) + *pwcs = nullptr; + return dst_idx; + } + + dst_idx++; + converted = str_conv.popUTF8(); + } + + if (s != nullptr) + *pwcs += str_conv.getSourceIndex(); + + if (converted.error() == -1) // if we hit conversion limit + return dst_idx; + + return Error(converted.error()); +} } // namespace internal } // namespace LIBC_NAMESPACE_DECL From 2af900190385b24d1eeacbb71ef30d9986b9b914 Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Fri, 18 Jul 2025 18:18:19 +0000 Subject: [PATCH 12/19] formatting --- libc/src/__support/wchar/wcsnrtombs.h | 2 +- libc/src/wchar/wcsnrtombs.h | 5 +++-- libc/src/wchar/wcsrtombs.h | 5 +++-- libc/test/src/__support/wchar/wcsnrtombs_test.cpp | 3 ++- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/libc/src/__support/wchar/wcsnrtombs.h b/libc/src/__support/wchar/wcsnrtombs.h index 4db9dae0b6ad0..d4b5c5a9b5e4e 100644 --- a/libc/src/__support/wchar/wcsnrtombs.h +++ b/libc/src/__support/wchar/wcsnrtombs.h @@ -50,7 +50,7 @@ ErrorOr wcsnrtombs(char *__restrict s, const wchar_t **__restrict pwcs, if (s != nullptr) *pwcs += str_conv.getSourceIndex(); - + if (converted.error() == -1) // if we hit conversion limit return dst_idx; diff --git a/libc/src/wchar/wcsnrtombs.h b/libc/src/wchar/wcsnrtombs.h index 793d383660f1b..bf8add75b2951 100644 --- a/libc/src/wchar/wcsnrtombs.h +++ b/libc/src/wchar/wcsnrtombs.h @@ -1,4 +1,4 @@ -//===-- Implementation header for wcsnrtombs -------------------------------===// +//===-- Implementation header for wcsnrtombs ------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -16,7 +16,8 @@ namespace LIBC_NAMESPACE_DECL { -size_t wcsnrtombs(char *__restrict s, const wchar_t **__restrict pwcs, size_t nwc, size_t len, mbstate_t* ps); +size_t wcsnrtombs(char *__restrict s, const wchar_t **__restrict pwcs, + size_t nwc, size_t len, mbstate_t *ps); } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/wcsrtombs.h b/libc/src/wchar/wcsrtombs.h index af69fccdb296a..d23573f5b9418 100644 --- a/libc/src/wchar/wcsrtombs.h +++ b/libc/src/wchar/wcsrtombs.h @@ -1,4 +1,4 @@ -//===-- Implementation header for wcsrtombs --------------------------------===// +//===-- Implementation header for wcsrtombs -------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -16,7 +16,8 @@ namespace LIBC_NAMESPACE_DECL { -size_t wcsrtombs(char *__restrict s, const wchar_t **__restrict pwcs, size_t n, mbstate_t* ps); +size_t wcsrtombs(char *__restrict s, const wchar_t **__restrict pwcs, size_t n, + mbstate_t *ps); } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/test/src/__support/wchar/wcsnrtombs_test.cpp b/libc/test/src/__support/wchar/wcsnrtombs_test.cpp index bec23638e19b7..3df7b07f90f47 100644 --- a/libc/test/src/__support/wchar/wcsnrtombs_test.cpp +++ b/libc/test/src/__support/wchar/wcsnrtombs_test.cpp @@ -110,7 +110,8 @@ TEST(LlvmLibcWcsnrtombs, SrcLimit) { ASSERT_EQ(mbs[6], '\x91'); ASSERT_EQ(mbs[7], '\x01'); - res = LIBC_NAMESPACE::internal::wcsnrtombs(mbs + res.value(), &cur, 100, 11, &state); + res = LIBC_NAMESPACE::internal::wcsnrtombs(mbs + res.value(), &cur, 100, 11, + &state); ASSERT_TRUE(res.has_value()); ASSERT_EQ(res.value(), static_cast(3)); ASSERT_EQ(cur, nullptr); From f60a0aef681c73784ead1895d77ec65312f02952 Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Fri, 18 Jul 2025 18:22:08 +0000 Subject: [PATCH 13/19] format --- libc/test/src/wchar/wcstombs_test.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libc/test/src/wchar/wcstombs_test.cpp b/libc/test/src/wchar/wcstombs_test.cpp index 397ce0460806f..61e0873dc9711 100644 --- a/libc/test/src/wchar/wcstombs_test.cpp +++ b/libc/test/src/wchar/wcstombs_test.cpp @@ -79,7 +79,6 @@ TEST_F(LlvmLibcWcstombs, ErrnoTest) { // n parameter ignored when dest is null ASSERT_EQ(LIBC_NAMESPACE::wcstombs(mbs, src, 7), static_cast(7)); ASSERT_ERRNO_SUCCESS(); - ASSERT_EQ(LIBC_NAMESPACE::wcstombs(mbs, src, 100), - static_cast(-1)); + ASSERT_EQ(LIBC_NAMESPACE::wcstombs(mbs, src, 100), static_cast(-1)); ASSERT_ERRNO_EQ(EILSEQ); } From 924f32f066e06e0937ae58c0adaa7147c01c5c29 Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Fri, 18 Jul 2025 19:46:41 +0000 Subject: [PATCH 14/19] added tests for src == null and mbstate == null --- libc/src/__support/wchar/wcsnrtombs.h | 10 ++++- libc/src/wchar/wcsnrtombs.cpp | 1 + libc/src/wchar/wcsrtombs.cpp | 1 + libc/src/wchar/wcstombs.cpp | 1 + .../src/__support/wchar/wcsnrtombs_test.cpp | 10 +++++ libc/test/src/wchar/wcsnrtombs_test.cpp | 40 +++++++++++++++++++ libc/test/src/wchar/wcsrtombs_test.cpp | 40 +++++++++++++++++++ 7 files changed, 101 insertions(+), 2 deletions(-) diff --git a/libc/src/__support/wchar/wcsnrtombs.h b/libc/src/__support/wchar/wcsnrtombs.h index d4b5c5a9b5e4e..cf2eda1d2c284 100644 --- a/libc/src/__support/wchar/wcsnrtombs.h +++ b/libc/src/__support/wchar/wcsnrtombs.h @@ -15,14 +15,20 @@ #include "src/__support/common.h" #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" +#include "src/__support/macros/null_check.h" #include "src/__support/wchar/mbstate.h" #include "src/__support/wchar/string_converter.h" namespace LIBC_NAMESPACE_DECL { namespace internal { -ErrorOr wcsnrtombs(char *__restrict s, const wchar_t **__restrict pwcs, - size_t nwc, size_t len, mbstate *ps) { +LIBC_INLINE static ErrorOr wcsnrtombs(char *__restrict s, + const wchar_t **__restrict pwcs, + size_t nwc, size_t len, + mbstate *ps) { + LIBC_CRASH_ON_NULLPTR(pwcs); + LIBC_CRASH_ON_NULLPTR(ps); + CharacterConverter cr(ps); if (!cr.isValidState()) return Error(EINVAL); diff --git a/libc/src/wchar/wcsnrtombs.cpp b/libc/src/wchar/wcsnrtombs.cpp index fd4724150e927..7f25b248a0863 100644 --- a/libc/src/wchar/wcsnrtombs.cpp +++ b/libc/src/wchar/wcsnrtombs.cpp @@ -23,6 +23,7 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(size_t, wcsnrtombs, (char *__restrict s, const wchar_t **__restrict pwcs, size_t nwc, size_t len, mbstate_t *ps)) { + LIBC_CRASH_ON_NULLPTR(pwcs); static internal::mbstate internal_mbstate; auto result = internal::wcsnrtombs( s, pwcs, nwc, len, diff --git a/libc/src/wchar/wcsrtombs.cpp b/libc/src/wchar/wcsrtombs.cpp index b4632a4a436ba..9d2508cb81a8c 100644 --- a/libc/src/wchar/wcsrtombs.cpp +++ b/libc/src/wchar/wcsrtombs.cpp @@ -23,6 +23,7 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(size_t, wcsrtombs, (char *__restrict s, const wchar_t **__restrict pwcs, size_t n, mbstate_t *ps)) { + LIBC_CRASH_ON_NULLPTR(pwcs); static internal::mbstate internal_mbstate; auto result = internal::wcsnrtombs( s, pwcs, SIZE_MAX, n, diff --git a/libc/src/wchar/wcstombs.cpp b/libc/src/wchar/wcstombs.cpp index 28e2425d645e7..c3793cbe912cd 100644 --- a/libc/src/wchar/wcstombs.cpp +++ b/libc/src/wchar/wcstombs.cpp @@ -22,6 +22,7 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(size_t, wcstombs, (char *__restrict s, const wchar_t *__restrict wcs, size_t n)) { + LIBC_CRASH_ON_NULLPTR(wcs); static internal::mbstate internal_mbstate; const wchar_t *wcs_ptr_copy = wcs; auto result = diff --git a/libc/test/src/__support/wchar/wcsnrtombs_test.cpp b/libc/test/src/__support/wchar/wcsnrtombs_test.cpp index 3df7b07f90f47..33d28d791b18c 100644 --- a/libc/test/src/__support/wchar/wcsnrtombs_test.cpp +++ b/libc/test/src/__support/wchar/wcsnrtombs_test.cpp @@ -188,3 +188,13 @@ TEST(LlvmLibcWcsnrtombs, InvalidCharacter) { ASSERT_FALSE(res.has_value()); ASSERT_EQ(res.error(), EILSEQ); } + +TEST(LlvmLibcWcsnrtombs, NullSrc) { + EXPECT_DEATH( + [] { + LIBC_NAMESPACE::internal::mbstate state; + char mbs[10]; + LIBC_NAMESPACE::internal::wcsnrtombs(mbs, nullptr, 1, 1, &state); + }, + WITH_SIGNAL(-1)); +} \ No newline at end of file diff --git a/libc/test/src/wchar/wcsnrtombs_test.cpp b/libc/test/src/wchar/wcsnrtombs_test.cpp index f6a333964018d..73e478fcdd256 100644 --- a/libc/test/src/wchar/wcsnrtombs_test.cpp +++ b/libc/test/src/wchar/wcsnrtombs_test.cpp @@ -154,3 +154,43 @@ TEST_F(LlvmLibcWcsnrtombs, ErrnoTest) { static_cast(-1)); ASSERT_ERRNO_EQ(EILSEQ); } + +TEST_F(LlvmLibcWcsnrtombs, NullState) { + // this test is the same as DestLimit except it uses a nullptr mbstate* + + /// clown emoji, sigma symbol, y with diaeresis, letter A + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0xff), static_cast(0x41), + static_cast(0x0)}; + const wchar_t *cur = src; + + char mbs[11]; + for (int i = 0; i < 11; ++i) + mbs[i] = '\x01'; // dummy initial values + + ASSERT_EQ(LIBC_NAMESPACE::wcsnrtombs(mbs, &cur, 5, 4, nullptr), + static_cast(4)); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(cur, src + 1); + ASSERT_EQ(mbs[0], '\xF0'); + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\x01'); // didn't write more than 4 bytes + + for (int i = 0; i < 11; ++i) + mbs[i] = '\x01'; // dummy initial values + + // not enough bytes to convert the second character, so only converts one + cur = src; + ASSERT_EQ(LIBC_NAMESPACE::wcsnrtombs(mbs, &cur, 5, 6, nullptr), + static_cast(4)); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(cur, src + 1); + ASSERT_EQ(mbs[0], '\xF0'); + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\x01'); +} diff --git a/libc/test/src/wchar/wcsrtombs_test.cpp b/libc/test/src/wchar/wcsrtombs_test.cpp index 00879ee64e25a..a18d2438476ea 100644 --- a/libc/test/src/wchar/wcsrtombs_test.cpp +++ b/libc/test/src/wchar/wcsrtombs_test.cpp @@ -111,3 +111,43 @@ TEST_F(LlvmLibcWcsrtombs, ErrnoTest) { static_cast(-1)); ASSERT_ERRNO_EQ(EILSEQ); } + +TEST_F(LlvmLibcWcsrtombs, NullState) { + // this test is the same as DestLimit except it uses a nullptr mbstate* + + /// clown emoji, sigma symbol, y with diaeresis, letter A + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0xff), static_cast(0x41), + static_cast(0x0)}; + const wchar_t *cur = src; + + char mbs[11]; + for (int i = 0; i < 11; ++i) + mbs[i] = '\x01'; // dummy initial values + + ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(mbs, &cur, 4, nullptr), + static_cast(4)); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(cur, src + 1); + ASSERT_EQ(mbs[0], '\xF0'); + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\x01'); // didn't write more than 4 bytes + + for (int i = 0; i < 11; ++i) + mbs[i] = '\x01'; // dummy initial values + + // not enough bytes to convert the second character, so only converts one + cur = src; + ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(mbs, &cur, 6, nullptr), + static_cast(4)); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(cur, src + 1); + ASSERT_EQ(mbs[0], '\xF0'); + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\x01'); +} From af85c9510d350adf99651dc5b3c4152352d60427 Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Fri, 18 Jul 2025 19:57:48 +0000 Subject: [PATCH 15/19] mssing headers for death tests --- libc/test/src/__support/wchar/wcsnrtombs_test.cpp | 3 ++- libc/test/src/wchar/wcsnrtombs_test.cpp | 1 + libc/test/src/wchar/wcsrtombs_test.cpp | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/libc/test/src/__support/wchar/wcsnrtombs_test.cpp b/libc/test/src/__support/wchar/wcsnrtombs_test.cpp index 33d28d791b18c..4d634a19f9df0 100644 --- a/libc/test/src/__support/wchar/wcsnrtombs_test.cpp +++ b/libc/test/src/__support/wchar/wcsnrtombs_test.cpp @@ -10,6 +10,7 @@ #include "hdr/types/size_t.h" #include "hdr/types/wchar_t.h" #include "src/__support/error_or.h" +#include "src/__support/macros/null_check.h" #include "src/__support/wchar/mbstate.h" #include "src/__support/wchar/wcsnrtombs.h" #include "test/UnitTest/Test.h" @@ -197,4 +198,4 @@ TEST(LlvmLibcWcsnrtombs, NullSrc) { LIBC_NAMESPACE::internal::wcsnrtombs(mbs, nullptr, 1, 1, &state); }, WITH_SIGNAL(-1)); -} \ No newline at end of file +} diff --git a/libc/test/src/wchar/wcsnrtombs_test.cpp b/libc/test/src/wchar/wcsnrtombs_test.cpp index 73e478fcdd256..17c6019b46b5f 100644 --- a/libc/test/src/wchar/wcsnrtombs_test.cpp +++ b/libc/test/src/wchar/wcsnrtombs_test.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "hdr/types/mbstate_t.h" +#include "src/__support/macros/null_check.h" #include "src/string/memset.h" #include "src/wchar/wcsnrtombs.h" #include "test/UnitTest/ErrnoCheckingTest.h" diff --git a/libc/test/src/wchar/wcsrtombs_test.cpp b/libc/test/src/wchar/wcsrtombs_test.cpp index a18d2438476ea..9631f55255d64 100644 --- a/libc/test/src/wchar/wcsrtombs_test.cpp +++ b/libc/test/src/wchar/wcsrtombs_test.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "hdr/types/mbstate_t.h" +#include "src/__support/macros/null_check.h" #include "src/string/memset.h" #include "src/wchar/wcsrtombs.h" #include "test/UnitTest/ErrnoCheckingTest.h" From eb0741aa7d91f9733a4ef523890492ee0f3c90ec Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Fri, 18 Jul 2025 21:49:56 +0000 Subject: [PATCH 16/19] added macro for death tests --- libc/test/src/__support/wchar/wcsnrtombs_test.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libc/test/src/__support/wchar/wcsnrtombs_test.cpp b/libc/test/src/__support/wchar/wcsnrtombs_test.cpp index 4d634a19f9df0..cc4195472e349 100644 --- a/libc/test/src/__support/wchar/wcsnrtombs_test.cpp +++ b/libc/test/src/__support/wchar/wcsnrtombs_test.cpp @@ -190,6 +190,7 @@ TEST(LlvmLibcWcsnrtombs, InvalidCharacter) { ASSERT_EQ(res.error(), EILSEQ); } +#if defined(LIBC_ADD_NULL_CHECKS) && !defined(LIBC_HAS_SANITIZER) TEST(LlvmLibcWcsnrtombs, NullSrc) { EXPECT_DEATH( [] { @@ -199,3 +200,4 @@ TEST(LlvmLibcWcsnrtombs, NullSrc) { }, WITH_SIGNAL(-1)); } +#endif // LIBC_HAS_ADDRESS_SANITIZER From fd70dedf5d282a079de2abf9fb9bd8ac0e537bcd Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Fri, 18 Jul 2025 22:09:17 +0000 Subject: [PATCH 17/19] exclude windows --- libc/test/src/__support/wchar/wcsnrtombs_test.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/libc/test/src/__support/wchar/wcsnrtombs_test.cpp b/libc/test/src/__support/wchar/wcsnrtombs_test.cpp index cc4195472e349..2d431eddf4a6f 100644 --- a/libc/test/src/__support/wchar/wcsnrtombs_test.cpp +++ b/libc/test/src/__support/wchar/wcsnrtombs_test.cpp @@ -11,10 +11,19 @@ #include "hdr/types/wchar_t.h" #include "src/__support/error_or.h" #include "src/__support/macros/null_check.h" +#include "src/__support/macros/properties/os.h" #include "src/__support/wchar/mbstate.h" #include "src/__support/wchar/wcsnrtombs.h" #include "test/UnitTest/Test.h" +// TODO: add support for 16-bit widechars to remove this macro +#ifdef LIBC_TARGET_OS_IS_WINDOWS +TEST(LlvmLibcStringConverterTest, Windows) { + // pass on windows for now +} + +#else + TEST(LlvmLibcWcsnrtombs, AllMultibyteLengths) { LIBC_NAMESPACE::internal::mbstate state; @@ -201,3 +210,4 @@ TEST(LlvmLibcWcsnrtombs, NullSrc) { WITH_SIGNAL(-1)); } #endif // LIBC_HAS_ADDRESS_SANITIZER +#endif From 361141768e64416d1db7b72cab68fef11a89185a Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Tue, 22 Jul 2025 22:56:57 +0000 Subject: [PATCH 18/19] descriptive names; alphabetized yaml; used memset in tests --- libc/include/wchar.yaml | 55 ++++++++++++------------- libc/src/__support/wchar/wcsnrtombs.h | 28 ++++++------- libc/test/src/wchar/wcsnrtombs_test.cpp | 15 +++---- libc/test/src/wchar/wcsrtombs_test.cpp | 12 ++---- 4 files changed, 50 insertions(+), 60 deletions(-) diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml index 8c72e1963a425..9383811b6027b 100644 --- a/libc/include/wchar.yaml +++ b/libc/include/wchar.yaml @@ -189,6 +189,25 @@ functions: - type: wchar_t *__restrict - type: const wchar_t *__restrict - type: size_t + - name: wcsnrtombs + standards: + - stdc + return_type: size_t + arguments: + - type: char *__restrict + - type: const wchar_t **__restrict + - type: size_t + - type: size_t + - type: mbstate_t + - name: wcsrtombs + standards: + - stdc + return_type: size_t + arguments: + - type: char *__restrict + - type: const wchar_t **__restrict + - type: size_t + - type: mbstate_t - name: wcrtomb standards: - stdc @@ -258,6 +277,14 @@ functions: - type: const wchar_t *__restrict - type: wchar_t **__restrict - type: int + - name: wcstombs + standards: + - stdc + return_type: size_t + arguments: + - type: char *__restrict + - type: const wchar_t *__restrict + - type: size_t - name: wcstoul standards: - stdc @@ -274,31 +301,3 @@ functions: - type: const wchar_t *__restrict - type: wchar_t **__restrict - type: int - - name: wcstombs - standards: - - stdc - return_type: size_t - arguments: - - type: char *__restrict - - type: const wchar_t *__restrict - - type: size_t - - name: wcsrtombs - standards: - - stdc - return_type: size_t - arguments: - - type: char *__restrict - - type: const wchar_t **__restrict - - type: size_t - - type: mbstate_t - - name: wcsnrtombs - standards: - - stdc - return_type: size_t - arguments: - - type: char *__restrict - - type: const wchar_t **__restrict - - type: size_t - - type: size_t - - type: mbstate_t - diff --git a/libc/src/__support/wchar/wcsnrtombs.h b/libc/src/__support/wchar/wcsnrtombs.h index cf2eda1d2c284..5286e10c0abc0 100644 --- a/libc/src/__support/wchar/wcsnrtombs.h +++ b/libc/src/__support/wchar/wcsnrtombs.h @@ -22,31 +22,31 @@ namespace LIBC_NAMESPACE_DECL { namespace internal { -LIBC_INLINE static ErrorOr wcsnrtombs(char *__restrict s, - const wchar_t **__restrict pwcs, - size_t nwc, size_t len, +LIBC_INLINE static ErrorOr wcsnrtombs(char *__restrict dest, + const wchar_t **__restrict ptr_to_src, + size_t num_src_widechars, size_t dest_len, mbstate *ps) { - LIBC_CRASH_ON_NULLPTR(pwcs); + LIBC_CRASH_ON_NULLPTR(ptr_to_src); LIBC_CRASH_ON_NULLPTR(ps); CharacterConverter cr(ps); if (!cr.isValidState()) return Error(EINVAL); - if (s == nullptr) - len = SIZE_MAX; + if (dest == nullptr) + dest_len = SIZE_MAX; - StringConverter str_conv(reinterpret_cast(*pwcs), - ps, len, nwc); + StringConverter str_conv(reinterpret_cast(*ptr_to_src), + ps, dest_len, num_src_widechars); size_t dst_idx = 0; ErrorOr converted = str_conv.popUTF8(); while (converted.has_value()) { - if (s != nullptr) - s[dst_idx] = converted.value(); + if (dest != nullptr) + dest[dst_idx] = converted.value(); if (converted.value() == '\0') { - if (s != nullptr) - *pwcs = nullptr; + if (dest != nullptr) + *ptr_to_src = nullptr; return dst_idx; } @@ -54,8 +54,8 @@ LIBC_INLINE static ErrorOr wcsnrtombs(char *__restrict s, converted = str_conv.popUTF8(); } - if (s != nullptr) - *pwcs += str_conv.getSourceIndex(); + if (dest != nullptr) + *ptr_to_src += str_conv.getSourceIndex(); if (converted.error() == -1) // if we hit conversion limit return dst_idx; diff --git a/libc/test/src/wchar/wcsnrtombs_test.cpp b/libc/test/src/wchar/wcsnrtombs_test.cpp index 17c6019b46b5f..04cf426d31cc7 100644 --- a/libc/test/src/wchar/wcsnrtombs_test.cpp +++ b/libc/test/src/wchar/wcsnrtombs_test.cpp @@ -59,8 +59,7 @@ TEST_F(LlvmLibcWcsnrtombs, DestLimit) { const wchar_t *cur = src; char mbs[11]; - for (int i = 0; i < 11; ++i) - mbs[i] = '\x01'; // dummy initial values + LIBC_NAMESPACE::memset(mbs, '\x01', 11); // dummy initial values ASSERT_EQ(LIBC_NAMESPACE::wcsnrtombs(mbs, &cur, 5, 4, &state), static_cast(4)); @@ -72,8 +71,7 @@ TEST_F(LlvmLibcWcsnrtombs, DestLimit) { ASSERT_EQ(mbs[3], '\xA1'); ASSERT_EQ(mbs[4], '\x01'); // didn't write more than 4 bytes - for (int i = 0; i < 11; ++i) - mbs[i] = '\x01'; // dummy initial values + LIBC_NAMESPACE::memset(mbs, '\x01', 11); // dummy initial values LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); cur = src; @@ -101,8 +99,7 @@ TEST(LlvmLibcWcsnrtombs, SrcLimit) { const wchar_t *cur = src; char mbs[11]; - for (int i = 0; i < 11; ++i) - mbs[i] = '\x01'; // dummy initial values + LIBC_NAMESPACE::memset(mbs, '\x01', 11); // dummy initial values auto res = LIBC_NAMESPACE::wcsnrtombs(mbs, &cur, 2, 11, &state); ASSERT_ERRNO_SUCCESS(); @@ -167,8 +164,7 @@ TEST_F(LlvmLibcWcsnrtombs, NullState) { const wchar_t *cur = src; char mbs[11]; - for (int i = 0; i < 11; ++i) - mbs[i] = '\x01'; // dummy initial values + LIBC_NAMESPACE::memset(mbs, '\x01', 11); // dummy initial values ASSERT_EQ(LIBC_NAMESPACE::wcsnrtombs(mbs, &cur, 5, 4, nullptr), static_cast(4)); @@ -180,8 +176,7 @@ TEST_F(LlvmLibcWcsnrtombs, NullState) { ASSERT_EQ(mbs[3], '\xA1'); ASSERT_EQ(mbs[4], '\x01'); // didn't write more than 4 bytes - for (int i = 0; i < 11; ++i) - mbs[i] = '\x01'; // dummy initial values + LIBC_NAMESPACE::memset(mbs, '\x01', 11); // dummy initial values // not enough bytes to convert the second character, so only converts one cur = src; diff --git a/libc/test/src/wchar/wcsrtombs_test.cpp b/libc/test/src/wchar/wcsrtombs_test.cpp index 9631f55255d64..65c69e63aee0c 100644 --- a/libc/test/src/wchar/wcsrtombs_test.cpp +++ b/libc/test/src/wchar/wcsrtombs_test.cpp @@ -59,8 +59,7 @@ TEST_F(LlvmLibcWcsrtombs, DestLimit) { const wchar_t *cur = src; char mbs[11]; - for (int i = 0; i < 11; ++i) - mbs[i] = '\x01'; // dummy initial values + LIBC_NAMESPACE::memset(mbs, '\x01', 11); // dummy initial values ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(mbs, &cur, 4, &state), static_cast(4)); @@ -72,8 +71,7 @@ TEST_F(LlvmLibcWcsrtombs, DestLimit) { ASSERT_EQ(mbs[3], '\xA1'); ASSERT_EQ(mbs[4], '\x01'); // didn't write more than 4 bytes - for (int i = 0; i < 11; ++i) - mbs[i] = '\x01'; // dummy initial values + LIBC_NAMESPACE::memset(mbs, '\x01', 11); // dummy initial values LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); // not enough bytes to convert the second character, so only converts one @@ -124,8 +122,7 @@ TEST_F(LlvmLibcWcsrtombs, NullState) { const wchar_t *cur = src; char mbs[11]; - for (int i = 0; i < 11; ++i) - mbs[i] = '\x01'; // dummy initial values + LIBC_NAMESPACE::memset(mbs, '\x01', 11); // dummy initial values ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(mbs, &cur, 4, nullptr), static_cast(4)); @@ -137,8 +134,7 @@ TEST_F(LlvmLibcWcsrtombs, NullState) { ASSERT_EQ(mbs[3], '\xA1'); ASSERT_EQ(mbs[4], '\x01'); // didn't write more than 4 bytes - for (int i = 0; i < 11; ++i) - mbs[i] = '\x01'; // dummy initial values + LIBC_NAMESPACE::memset(mbs, '\x01', 11); // dummy initial values // not enough bytes to convert the second character, so only converts one cur = src; From d660dcb9b72f7b09856e47b60b180d2ab9502579 Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Tue, 22 Jul 2025 23:04:59 +0000 Subject: [PATCH 19/19] formatting --- libc/src/__support/wchar/wcsnrtombs.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/libc/src/__support/wchar/wcsnrtombs.h b/libc/src/__support/wchar/wcsnrtombs.h index 5286e10c0abc0..433097c937a42 100644 --- a/libc/src/__support/wchar/wcsnrtombs.h +++ b/libc/src/__support/wchar/wcsnrtombs.h @@ -22,10 +22,9 @@ namespace LIBC_NAMESPACE_DECL { namespace internal { -LIBC_INLINE static ErrorOr wcsnrtombs(char *__restrict dest, - const wchar_t **__restrict ptr_to_src, - size_t num_src_widechars, size_t dest_len, - mbstate *ps) { +LIBC_INLINE static ErrorOr +wcsnrtombs(char *__restrict dest, const wchar_t **__restrict ptr_to_src, + size_t num_src_widechars, size_t dest_len, mbstate *ps) { LIBC_CRASH_ON_NULLPTR(ptr_to_src); LIBC_CRASH_ON_NULLPTR(ps); @@ -36,8 +35,9 @@ LIBC_INLINE static ErrorOr wcsnrtombs(char *__restrict dest, if (dest == nullptr) dest_len = SIZE_MAX; - StringConverter str_conv(reinterpret_cast(*ptr_to_src), - ps, dest_len, num_src_widechars); + StringConverter str_conv( + reinterpret_cast(*ptr_to_src), ps, dest_len, + num_src_widechars); size_t dst_idx = 0; ErrorOr converted = str_conv.popUTF8(); while (converted.has_value()) {