diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 1451bd286d8aa..e8f59c9d5cd73 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -1268,6 +1268,9 @@ if(LLVM_LIBC_FULL_BUILD) libc.src.wchar.mbtowc libc.src.wchar.wcrtomb libc.src.wchar.wctomb + libc.src.wchar.wcstombs + libc.src.wchar.wcsrtombs + libc.src.wchar.wcsnrtombs ) endif() diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml index daf05cdcd00c2..0285f1924a2fa 100644 --- a/libc/include/wchar.yaml +++ b/libc/include/wchar.yaml @@ -204,6 +204,25 @@ functions: - type: wchar_t *__restrict - type: const wchar_t *__restrict - type: size_t + - name: wcsnrtombs + standards: + - stdc + return_type: size_t + arguments: + - type: char *__restrict + - type: const wchar_t **__restrict + - type: size_t + - type: size_t + - type: mbstate_t + - name: wcsrtombs + standards: + - stdc + return_type: size_t + arguments: + - type: char *__restrict + - type: const wchar_t **__restrict + - type: size_t + - type: mbstate_t - name: wcrtomb standards: - stdc @@ -279,6 +298,14 @@ functions: - type: const wchar_t *__restrict - type: wchar_t **__restrict - type: int + - name: wcstombs + standards: + - stdc + return_type: size_t + arguments: + - type: char *__restrict + - type: const wchar_t *__restrict + - type: size_t - name: wcstoul standards: - stdc diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt index cf3e641d2d2db..e363ad397079c 100644 --- a/libc/src/__support/wchar/CMakeLists.txt +++ b/libc/src/__support/wchar/CMakeLists.txt @@ -69,3 +69,20 @@ add_object_library( .character_converter .mbstate ) + +add_header_library( + wcsnrtombs + HDRS + wcsnrtombs.h + DEPENDS + libc.hdr.errno_macros + libc.hdr.types.char8_t + libc.hdr.types.char32_t + libc.hdr.types.size_t + libc.hdr.types.wchar_t + libc.src.__support.error_or + libc.src.__support.common + .string_converter + .character_converter + .mbstate +) diff --git a/libc/src/__support/wchar/wcsnrtombs.h b/libc/src/__support/wchar/wcsnrtombs.h new file mode 100644 index 0000000000000..433097c937a42 --- /dev/null +++ b/libc/src/__support/wchar/wcsnrtombs.h @@ -0,0 +1,69 @@ +//===-- Implementation header for wcsnrtombs ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC__SUPPORT_WCHAR_WCSNRTOMBS_H +#define LLVM_LIBC_SRC__SUPPORT_WCHAR_WCSNRTOMBS_H + +#include "hdr/types/char32_t.h" +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/common.h" +#include "src/__support/libc_errno.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/null_check.h" +#include "src/__support/wchar/mbstate.h" +#include "src/__support/wchar/string_converter.h" + +namespace LIBC_NAMESPACE_DECL { +namespace internal { + +LIBC_INLINE static ErrorOr +wcsnrtombs(char *__restrict dest, const wchar_t **__restrict ptr_to_src, + size_t num_src_widechars, size_t dest_len, mbstate *ps) { + LIBC_CRASH_ON_NULLPTR(ptr_to_src); + LIBC_CRASH_ON_NULLPTR(ps); + + CharacterConverter cr(ps); + if (!cr.isValidState()) + return Error(EINVAL); + + if (dest == nullptr) + dest_len = SIZE_MAX; + + StringConverter str_conv( + reinterpret_cast(*ptr_to_src), ps, dest_len, + num_src_widechars); + size_t dst_idx = 0; + ErrorOr converted = str_conv.popUTF8(); + while (converted.has_value()) { + if (dest != nullptr) + dest[dst_idx] = converted.value(); + + if (converted.value() == '\0') { + if (dest != nullptr) + *ptr_to_src = nullptr; + return dst_idx; + } + + dst_idx++; + converted = str_conv.popUTF8(); + } + + if (dest != nullptr) + *ptr_to_src += str_conv.getSourceIndex(); + + if (converted.error() == -1) // if we hit conversion limit + return dst_idx; + + return Error(converted.error()); +} + +} // namespace internal +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC__SUPPORT_WCHAR_WCSNRTOMBS_H diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt index 25319837bdc70..43f44a941d451 100644 --- a/libc/src/wchar/CMakeLists.txt +++ b/libc/src/wchar/CMakeLists.txt @@ -169,6 +169,47 @@ add_entrypoint_object( libc.src.__support.wchar.mbstate ) +add_entrypoint_object( + wcstombs + SRCS + wcstombs.cpp + HDRS + wcstombs.h + DEPENDS + libc.hdr.types.wchar_t + libc.src.__support.wchar.mbstate + libc.src.__support.wchar.wcsnrtombs + libc.src.__support.libc_errno +) + +add_entrypoint_object( + wcsrtombs + SRCS + wcsrtombs.cpp + HDRS + wcsrtombs.h + DEPENDS + libc.hdr.types.wchar_t + libc.hdr.types.mbstate_t + libc.src.__support.wchar.mbstate + libc.src.__support.wchar.wcsnrtombs + libc.src.__support.libc_errno +) + +add_entrypoint_object( + wcsnrtombs + SRCS + wcsnrtombs.cpp + HDRS + wcsnrtombs.h + DEPENDS + libc.hdr.types.wchar_t + libc.hdr.types.mbstate_t + libc.src.__support.wchar.mbstate + libc.src.__support.wchar.wcsnrtombs + libc.src.__support.libc_errno +) + add_entrypoint_object( mblen SRCS diff --git a/libc/src/wchar/wcsnrtombs.cpp b/libc/src/wchar/wcsnrtombs.cpp new file mode 100644 index 0000000000000..7f25b248a0863 --- /dev/null +++ b/libc/src/wchar/wcsnrtombs.cpp @@ -0,0 +1,40 @@ +//===-- Implementation of wcsnrtombs --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/wchar/wcsnrtombs.h" + +#include "hdr/types/char32_t.h" +#include "hdr/types/mbstate_t.h" +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/common.h" +#include "src/__support/libc_errno.h" +#include "src/__support/macros/config.h" +#include "src/__support/wchar/mbstate.h" +#include "src/__support/wchar/wcsnrtombs.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(size_t, wcsnrtombs, + (char *__restrict s, const wchar_t **__restrict pwcs, + size_t nwc, size_t len, mbstate_t *ps)) { + LIBC_CRASH_ON_NULLPTR(pwcs); + static internal::mbstate internal_mbstate; + auto result = internal::wcsnrtombs( + s, pwcs, nwc, len, + ps == nullptr ? &internal_mbstate + : reinterpret_cast(ps)); + if (!result.has_value()) { + libc_errno = result.error(); + return -1; + } + + return result.value(); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/wcsnrtombs.h b/libc/src/wchar/wcsnrtombs.h new file mode 100644 index 0000000000000..bf8add75b2951 --- /dev/null +++ b/libc/src/wchar/wcsnrtombs.h @@ -0,0 +1,24 @@ +//===-- Implementation header for wcsnrtombs ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_WCHAR_WCSNRTOMBS_H +#define LLVM_LIBC_SRC_WCHAR_WCSNRTOMBS_H + +#include "hdr/types/mbstate_t.h" +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +size_t wcsnrtombs(char *__restrict s, const wchar_t **__restrict pwcs, + size_t nwc, size_t len, mbstate_t *ps); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_WCHAR_WCSNRTOMBS_H diff --git a/libc/src/wchar/wcsrtombs.cpp b/libc/src/wchar/wcsrtombs.cpp new file mode 100644 index 0000000000000..9d2508cb81a8c --- /dev/null +++ b/libc/src/wchar/wcsrtombs.cpp @@ -0,0 +1,40 @@ +//===-- Implementation of wcsrtombs ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/wchar/wcsrtombs.h" + +#include "hdr/types/char32_t.h" +#include "hdr/types/mbstate_t.h" +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/common.h" +#include "src/__support/libc_errno.h" +#include "src/__support/macros/config.h" +#include "src/__support/wchar/mbstate.h" +#include "src/__support/wchar/wcsnrtombs.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(size_t, wcsrtombs, + (char *__restrict s, const wchar_t **__restrict pwcs, + size_t n, mbstate_t *ps)) { + LIBC_CRASH_ON_NULLPTR(pwcs); + static internal::mbstate internal_mbstate; + auto result = internal::wcsnrtombs( + s, pwcs, SIZE_MAX, n, + ps == nullptr ? &internal_mbstate + : reinterpret_cast(ps)); + if (!result.has_value()) { + libc_errno = result.error(); + return -1; + } + + return result.value(); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/wcsrtombs.h b/libc/src/wchar/wcsrtombs.h new file mode 100644 index 0000000000000..d23573f5b9418 --- /dev/null +++ b/libc/src/wchar/wcsrtombs.h @@ -0,0 +1,24 @@ +//===-- Implementation header for wcsrtombs -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_WCHAR_WCSRTOMBS_H +#define LLVM_LIBC_SRC_WCHAR_WCSRTOMBS_H + +#include "hdr/types/mbstate_t.h" +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +size_t wcsrtombs(char *__restrict s, const wchar_t **__restrict pwcs, size_t n, + mbstate_t *ps); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_WCHAR_WCSRTOMBS_H diff --git a/libc/src/wchar/wcstombs.cpp b/libc/src/wchar/wcstombs.cpp new file mode 100644 index 0000000000000..c3793cbe912cd --- /dev/null +++ b/libc/src/wchar/wcstombs.cpp @@ -0,0 +1,38 @@ +//===-- Implementation of wcstombs ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/wchar/wcstombs.h" + +#include "hdr/types/char32_t.h" +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/common.h" +#include "src/__support/libc_errno.h" +#include "src/__support/macros/config.h" +#include "src/__support/wchar/mbstate.h" +#include "src/__support/wchar/wcsnrtombs.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(size_t, wcstombs, + (char *__restrict s, const wchar_t *__restrict wcs, + size_t n)) { + LIBC_CRASH_ON_NULLPTR(wcs); + static internal::mbstate internal_mbstate; + const wchar_t *wcs_ptr_copy = wcs; + auto result = + internal::wcsnrtombs(s, &wcs_ptr_copy, SIZE_MAX, n, &internal_mbstate); + if (!result.has_value()) { + libc_errno = result.error(); + return -1; + } + + return result.value(); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/wcstombs.h b/libc/src/wchar/wcstombs.h new file mode 100644 index 0000000000000..cd0008a168d90 --- /dev/null +++ b/libc/src/wchar/wcstombs.h @@ -0,0 +1,22 @@ +//===-- Implementation header for wcstombs --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_WCHAR_WCSTOMBS_H +#define LLVM_LIBC_SRC_WCHAR_WCSTOMBS_H + +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +size_t wcstombs(char *__restrict s, const wchar_t *__restrict pwcs, size_t n); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_WCHAR_WCSTOMBS_H diff --git a/libc/test/src/__support/wchar/CMakeLists.txt b/libc/test/src/__support/wchar/CMakeLists.txt index f0727451736f9..c112c83dbe9af 100644 --- a/libc/test/src/__support/wchar/CMakeLists.txt +++ b/libc/test/src/__support/wchar/CMakeLists.txt @@ -34,3 +34,20 @@ add_libc_test( libc.hdr.errno_macros libc.hdr.types.char32_t ) + +add_libc_test( + wcsnrtombs_test + SUITE + libc-support-tests + SRCS + wcsnrtombs_test.cpp + DEPENDS + libc.src.__support.wchar.string_converter + libc.src.__support.wchar.character_converter + libc.src.__support.wchar.mbstate + libc.src.__support.error_or + libc.src.__support.wchar.wcsnrtombs + libc.hdr.errno_macros + libc.hdr.types.char32_t + libc.hdr.types.char8_t +) diff --git a/libc/test/src/__support/wchar/wcsnrtombs_test.cpp b/libc/test/src/__support/wchar/wcsnrtombs_test.cpp new file mode 100644 index 0000000000000..2d431eddf4a6f --- /dev/null +++ b/libc/test/src/__support/wchar/wcsnrtombs_test.cpp @@ -0,0 +1,213 @@ +//===-- Unittests for wcsnrtombs ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "hdr/errno_macros.h" +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/error_or.h" +#include "src/__support/macros/null_check.h" +#include "src/__support/macros/properties/os.h" +#include "src/__support/wchar/mbstate.h" +#include "src/__support/wchar/wcsnrtombs.h" +#include "test/UnitTest/Test.h" + +// TODO: add support for 16-bit widechars to remove this macro +#ifdef LIBC_TARGET_OS_IS_WINDOWS +TEST(LlvmLibcStringConverterTest, Windows) { + // pass on windows for now +} + +#else + +TEST(LlvmLibcWcsnrtombs, AllMultibyteLengths) { + LIBC_NAMESPACE::internal::mbstate state; + + /// clown emoji, sigma symbol, y with diaeresis, letter A + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0xff), static_cast(0x41), + static_cast(0x0)}; + const wchar_t *cur = src; + char mbs[11]; + + auto res = LIBC_NAMESPACE::internal::wcsnrtombs(mbs, &cur, 5, 11, &state); + ASSERT_TRUE(res.has_value()); + ASSERT_EQ(res.value(), static_cast(10)); + ASSERT_EQ(cur, nullptr); + ASSERT_EQ(mbs[0], '\xF0'); // clown begin + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\xE2'); // sigma begin + ASSERT_EQ(mbs[5], '\x88'); + ASSERT_EQ(mbs[6], '\x91'); + ASSERT_EQ(mbs[7], '\xC3'); // y diaeresis begin + ASSERT_EQ(mbs[8], '\xBF'); + ASSERT_EQ(mbs[9], '\x41'); // A begin + ASSERT_EQ(mbs[10], '\0'); // null terminator +} + +TEST(LlvmLibcWcsnrtombs, DestLimit) { + LIBC_NAMESPACE::internal::mbstate state1; + + /// clown emoji, sigma symbol, y with diaeresis, letter A + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0xff), static_cast(0x41), + static_cast(0x0)}; + const wchar_t *cur = src; + + char mbs[11]; + for (int i = 0; i < 11; ++i) + mbs[i] = '\x01'; // dummy initial values + + auto res = LIBC_NAMESPACE::internal::wcsnrtombs(mbs, &cur, 5, 4, &state1); + ASSERT_TRUE(res.has_value()); + ASSERT_EQ(res.value(), static_cast(4)); + ASSERT_EQ(cur, src + 1); + ASSERT_EQ(mbs[0], '\xF0'); + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\x01'); // didn't write more than 4 bytes + + for (int i = 0; i < 11; ++i) + mbs[i] = '\x01'; // dummy initial values + LIBC_NAMESPACE::internal::mbstate state2; + + // not enough bytes to convert the second character, so only converts one + cur = src; + res = LIBC_NAMESPACE::internal::wcsnrtombs(mbs, &cur, 5, 6, &state2); + ASSERT_TRUE(res.has_value()); + ASSERT_EQ(res.value(), static_cast(4)); + ASSERT_EQ(cur, src + 1); + ASSERT_EQ(mbs[0], '\xF0'); + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\x01'); +} + +TEST(LlvmLibcWcsnrtombs, SrcLimit) { + LIBC_NAMESPACE::internal::mbstate state; + + /// clown emoji, sigma symbol, y with diaeresis, letter A + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0xff), static_cast(0x41), + static_cast(0x0)}; + const wchar_t *cur = src; + + char mbs[11]; + for (int i = 0; i < 11; ++i) + mbs[i] = '\x01'; // dummy initial values + + auto res = LIBC_NAMESPACE::internal::wcsnrtombs(mbs, &cur, 2, 11, &state); + ASSERT_TRUE(res.has_value()); + ASSERT_EQ(res.value(), static_cast(7)); + ASSERT_EQ(cur, src + 2); + ASSERT_EQ(mbs[0], '\xF0'); // clown begin + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\xE2'); // sigma begin + ASSERT_EQ(mbs[5], '\x88'); + ASSERT_EQ(mbs[6], '\x91'); + ASSERT_EQ(mbs[7], '\x01'); + + res = LIBC_NAMESPACE::internal::wcsnrtombs(mbs + res.value(), &cur, 100, 11, + &state); + ASSERT_TRUE(res.has_value()); + ASSERT_EQ(res.value(), static_cast(3)); + ASSERT_EQ(cur, nullptr); + ASSERT_EQ(mbs[0], '\xF0'); // clown begin + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\xE2'); // sigma begin + ASSERT_EQ(mbs[5], '\x88'); + ASSERT_EQ(mbs[6], '\x91'); + ASSERT_EQ(mbs[7], '\xC3'); // y diaeresis begin + ASSERT_EQ(mbs[8], '\xBF'); + ASSERT_EQ(mbs[9], '\x41'); // A begin + ASSERT_EQ(mbs[10], '\0'); // null terminator +} + +TEST(LlvmLibcWcsnrtombs, NullDest) { + LIBC_NAMESPACE::internal::mbstate state1; + + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0xff), static_cast(0x41), + static_cast(0x0)}; + const wchar_t *cur = src; + + // n parameter ignored when dest is null + auto res = LIBC_NAMESPACE::internal::wcsnrtombs(nullptr, &cur, 5, 1, &state1); + ASSERT_TRUE(res.has_value()); + ASSERT_EQ(res.value(), static_cast(10)); + ASSERT_EQ(cur, src); // pointer not updated when dest = null + + LIBC_NAMESPACE::internal::mbstate state2; + res = LIBC_NAMESPACE::internal::wcsnrtombs(nullptr, &cur, 5, 100, &state2); + ASSERT_TRUE(res.has_value()); + ASSERT_EQ(res.value(), static_cast(10)); + ASSERT_EQ(cur, src); +} + +TEST(LlvmLibcWcsnrtombs, InvalidState) { + // this is more thoroughly tested by CharacterConverter + LIBC_NAMESPACE::internal::mbstate state; + state.total_bytes = 100; + + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0xff), static_cast(0x41), + static_cast(0x0)}; + const wchar_t *cur = src; + + // n parameter ignored when dest is null + auto res = LIBC_NAMESPACE::internal::wcsnrtombs(nullptr, &cur, 5, 1, &state); + ASSERT_FALSE(res.has_value()); + ASSERT_EQ(res.error(), EINVAL); +} + +TEST(LlvmLibcWcsnrtombs, InvalidCharacter) { + LIBC_NAMESPACE::internal::mbstate state1; + + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0x12ffff), // invalid widechar + static_cast(0x0)}; + const wchar_t *cur = src; + char mbs[11]; + + // n parameter ignored when dest is null + auto res = LIBC_NAMESPACE::internal::wcsnrtombs(mbs, &cur, 5, 7, &state1); + ASSERT_TRUE(res.has_value()); + ASSERT_EQ(res.value(), static_cast(7)); + + LIBC_NAMESPACE::internal::mbstate state2; + cur = src; + res = LIBC_NAMESPACE::internal::wcsnrtombs(mbs, &cur, 5, 11, &state2); + ASSERT_FALSE(res.has_value()); + ASSERT_EQ(res.error(), EILSEQ); +} + +#if defined(LIBC_ADD_NULL_CHECKS) && !defined(LIBC_HAS_SANITIZER) +TEST(LlvmLibcWcsnrtombs, NullSrc) { + EXPECT_DEATH( + [] { + LIBC_NAMESPACE::internal::mbstate state; + char mbs[10]; + LIBC_NAMESPACE::internal::wcsnrtombs(mbs, nullptr, 1, 1, &state); + }, + WITH_SIGNAL(-1)); +} +#endif // LIBC_HAS_ADDRESS_SANITIZER +#endif diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt index 9b0c63ad8e07b..f420ecc465a53 100644 --- a/libc/test/src/wchar/CMakeLists.txt +++ b/libc/test/src/wchar/CMakeLists.txt @@ -139,6 +139,46 @@ add_libc_test( libc.hdr.types.wchar_t ) +add_libc_test( + wcstombs_test + SUITE + libc_wchar_unittests + SRCS + wcstombs_test.cpp + DEPENDS + libc.src.wchar.wcstombs + libc.test.UnitTest.ErrnoCheckingTest + libc.hdr.types.wchar_t +) + +add_libc_test( + wcsrtombs_test + SUITE + libc_wchar_unittests + SRCS + wcsrtombs_test.cpp + DEPENDS + libc.src.wchar.wcsrtombs + libc.test.UnitTest.ErrnoCheckingTest + libc.hdr.types.wchar_t + libc.src.string.memset + libc.hdr.types.mbstate_t +) + +add_libc_test( + wcsnrtombs_test + SUITE + libc_wchar_unittests + SRCS + wcsnrtombs_test.cpp + DEPENDS + libc.src.wchar.wcsnrtombs + libc.test.UnitTest.ErrnoCheckingTest + libc.hdr.types.wchar_t + libc.src.string.memset + libc.hdr.types.mbstate_t +) + add_libc_test( wmemset_test SUITE diff --git a/libc/test/src/wchar/wcsnrtombs_test.cpp b/libc/test/src/wchar/wcsnrtombs_test.cpp new file mode 100644 index 0000000000000..04cf426d31cc7 --- /dev/null +++ b/libc/test/src/wchar/wcsnrtombs_test.cpp @@ -0,0 +1,192 @@ +//===-- Unittests for wcsnrtombs ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "hdr/types/mbstate_t.h" +#include "src/__support/macros/null_check.h" +#include "src/string/memset.h" +#include "src/wchar/wcsnrtombs.h" +#include "test/UnitTest/ErrnoCheckingTest.h" +#include "test/UnitTest/Test.h" + +using LlvmLibcWcsnrtombs = LIBC_NAMESPACE::testing::ErrnoCheckingTest; + +// these tests are fairly simple as this function just calls into the internal +// wcsnrtombs which is more thoroughly tested + +TEST_F(LlvmLibcWcsnrtombs, AllMultibyteLengths) { + mbstate_t state; + LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); + + /// clown emoji, sigma symbol, y with diaeresis, letter A + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0xff), static_cast(0x41), + static_cast(0x0)}; + const wchar_t *cur = src; + char mbs[11]; + + ASSERT_EQ(LIBC_NAMESPACE::wcsnrtombs(mbs, &cur, 5, 11, &state), + static_cast(10)); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(cur, nullptr); + ASSERT_EQ(mbs[0], '\xF0'); // clown begin + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\xE2'); // sigma begin + ASSERT_EQ(mbs[5], '\x88'); + ASSERT_EQ(mbs[6], '\x91'); + ASSERT_EQ(mbs[7], '\xC3'); // y diaeresis begin + ASSERT_EQ(mbs[8], '\xBF'); + ASSERT_EQ(mbs[9], '\x41'); // A begin + ASSERT_EQ(mbs[10], '\0'); // null terminator +} + +TEST_F(LlvmLibcWcsnrtombs, DestLimit) { + mbstate_t state; + LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); + + /// clown emoji, sigma symbol, y with diaeresis, letter A + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0xff), static_cast(0x41), + static_cast(0x0)}; + const wchar_t *cur = src; + + char mbs[11]; + LIBC_NAMESPACE::memset(mbs, '\x01', 11); // dummy initial values + + ASSERT_EQ(LIBC_NAMESPACE::wcsnrtombs(mbs, &cur, 5, 4, &state), + static_cast(4)); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(cur, src + 1); + ASSERT_EQ(mbs[0], '\xF0'); + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\x01'); // didn't write more than 4 bytes + + LIBC_NAMESPACE::memset(mbs, '\x01', 11); // dummy initial values + LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); + cur = src; + + // not enough bytes to convert the second character, so only converts one + ASSERT_EQ(LIBC_NAMESPACE::wcsnrtombs(mbs, &cur, 5, 6, &state), + static_cast(4)); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(cur, src + 1); + ASSERT_EQ(mbs[0], '\xF0'); + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\x01'); +} + +TEST(LlvmLibcWcsnrtombs, SrcLimit) { + mbstate_t state; + LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); + + /// clown emoji, sigma symbol, y with diaeresis, letter A + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0xff), static_cast(0x41), + static_cast(0x0)}; + const wchar_t *cur = src; + + char mbs[11]; + LIBC_NAMESPACE::memset(mbs, '\x01', 11); // dummy initial values + + auto res = LIBC_NAMESPACE::wcsnrtombs(mbs, &cur, 2, 11, &state); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(res, static_cast(7)); + ASSERT_EQ(cur, src + 2); + ASSERT_EQ(mbs[0], '\xF0'); // clown begin + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\xE2'); // sigma begin + ASSERT_EQ(mbs[5], '\x88'); + ASSERT_EQ(mbs[6], '\x91'); + ASSERT_EQ(mbs[7], '\x01'); + + res = LIBC_NAMESPACE::wcsnrtombs(mbs + res, &cur, 100, 11, &state); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(res, static_cast(3)); + ASSERT_EQ(cur, nullptr); + ASSERT_EQ(mbs[0], '\xF0'); // clown begin + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\xE2'); // sigma begin + ASSERT_EQ(mbs[5], '\x88'); + ASSERT_EQ(mbs[6], '\x91'); + ASSERT_EQ(mbs[7], '\xC3'); // y diaeresis begin + ASSERT_EQ(mbs[8], '\xBF'); + ASSERT_EQ(mbs[9], '\x41'); // A begin + ASSERT_EQ(mbs[10], '\0'); // null terminator +} + +TEST_F(LlvmLibcWcsnrtombs, ErrnoTest) { + mbstate_t state; + LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); + + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0x12ffff), // invalid widechar + static_cast(0x0)}; + const wchar_t *cur = src; + char mbs[11]; + + // n parameter ignored when dest is null + ASSERT_EQ(LIBC_NAMESPACE::wcsnrtombs(mbs, &cur, 5, 7, &state), + static_cast(7)); + ASSERT_ERRNO_SUCCESS(); + + LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); + ASSERT_EQ(LIBC_NAMESPACE::wcsnrtombs(mbs, &cur, 5, 100, &state), + static_cast(-1)); + ASSERT_ERRNO_EQ(EILSEQ); +} + +TEST_F(LlvmLibcWcsnrtombs, NullState) { + // this test is the same as DestLimit except it uses a nullptr mbstate* + + /// clown emoji, sigma symbol, y with diaeresis, letter A + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0xff), static_cast(0x41), + static_cast(0x0)}; + const wchar_t *cur = src; + + char mbs[11]; + LIBC_NAMESPACE::memset(mbs, '\x01', 11); // dummy initial values + + ASSERT_EQ(LIBC_NAMESPACE::wcsnrtombs(mbs, &cur, 5, 4, nullptr), + static_cast(4)); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(cur, src + 1); + ASSERT_EQ(mbs[0], '\xF0'); + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\x01'); // didn't write more than 4 bytes + + LIBC_NAMESPACE::memset(mbs, '\x01', 11); // dummy initial values + + // not enough bytes to convert the second character, so only converts one + cur = src; + ASSERT_EQ(LIBC_NAMESPACE::wcsnrtombs(mbs, &cur, 5, 6, nullptr), + static_cast(4)); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(cur, src + 1); + ASSERT_EQ(mbs[0], '\xF0'); + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\x01'); +} diff --git a/libc/test/src/wchar/wcsrtombs_test.cpp b/libc/test/src/wchar/wcsrtombs_test.cpp new file mode 100644 index 0000000000000..65c69e63aee0c --- /dev/null +++ b/libc/test/src/wchar/wcsrtombs_test.cpp @@ -0,0 +1,150 @@ +//===-- Unittests for wcsrtombs -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "hdr/types/mbstate_t.h" +#include "src/__support/macros/null_check.h" +#include "src/string/memset.h" +#include "src/wchar/wcsrtombs.h" +#include "test/UnitTest/ErrnoCheckingTest.h" +#include "test/UnitTest/Test.h" + +using LlvmLibcWcsrtombs = LIBC_NAMESPACE::testing::ErrnoCheckingTest; + +// these tests are fairly simple as this function just calls into the internal +// wcsnrtombs which is more thoroughly tested + +TEST_F(LlvmLibcWcsrtombs, AllMultibyteLengths) { + mbstate_t state; + LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); + + /// clown emoji, sigma symbol, y with diaeresis, letter A + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0xff), static_cast(0x41), + static_cast(0x0)}; + const wchar_t *cur = src; + char mbs[11]; + + ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(mbs, &cur, 11, &state), + static_cast(10)); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(cur, nullptr); + ASSERT_EQ(mbs[0], '\xF0'); // clown begin + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\xE2'); // sigma begin + ASSERT_EQ(mbs[5], '\x88'); + ASSERT_EQ(mbs[6], '\x91'); + ASSERT_EQ(mbs[7], '\xC3'); // y diaeresis begin + ASSERT_EQ(mbs[8], '\xBF'); + ASSERT_EQ(mbs[9], '\x41'); // A begin + ASSERT_EQ(mbs[10], '\0'); // null terminator +} + +TEST_F(LlvmLibcWcsrtombs, DestLimit) { + mbstate_t state; + LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); + + /// clown emoji, sigma symbol, y with diaeresis, letter A + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0xff), static_cast(0x41), + static_cast(0x0)}; + const wchar_t *cur = src; + + char mbs[11]; + LIBC_NAMESPACE::memset(mbs, '\x01', 11); // dummy initial values + + ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(mbs, &cur, 4, &state), + static_cast(4)); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(cur, src + 1); + ASSERT_EQ(mbs[0], '\xF0'); + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\x01'); // didn't write more than 4 bytes + + LIBC_NAMESPACE::memset(mbs, '\x01', 11); // dummy initial values + LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); + + // not enough bytes to convert the second character, so only converts one + cur = src; + ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(mbs, &cur, 6, &state), + static_cast(4)); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(cur, src + 1); + ASSERT_EQ(mbs[0], '\xF0'); + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\x01'); +} + +TEST_F(LlvmLibcWcsrtombs, ErrnoTest) { + mbstate_t state; + LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); + + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0x12ffff), // invalid widechar + static_cast(0x0)}; + const wchar_t *cur = src; + char mbs[11]; + + // n parameter ignored when dest is null + ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(mbs, &cur, 7, &state), + static_cast(7)); + ASSERT_ERRNO_SUCCESS(); + + LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); + cur = src; + + ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(mbs, &cur, 100, &state), + static_cast(-1)); + ASSERT_ERRNO_EQ(EILSEQ); +} + +TEST_F(LlvmLibcWcsrtombs, NullState) { + // this test is the same as DestLimit except it uses a nullptr mbstate* + + /// clown emoji, sigma symbol, y with diaeresis, letter A + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0xff), static_cast(0x41), + static_cast(0x0)}; + const wchar_t *cur = src; + + char mbs[11]; + LIBC_NAMESPACE::memset(mbs, '\x01', 11); // dummy initial values + + ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(mbs, &cur, 4, nullptr), + static_cast(4)); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(cur, src + 1); + ASSERT_EQ(mbs[0], '\xF0'); + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\x01'); // didn't write more than 4 bytes + + LIBC_NAMESPACE::memset(mbs, '\x01', 11); // dummy initial values + + // not enough bytes to convert the second character, so only converts one + cur = src; + ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(mbs, &cur, 6, nullptr), + static_cast(4)); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(cur, src + 1); + ASSERT_EQ(mbs[0], '\xF0'); + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\x01'); +} diff --git a/libc/test/src/wchar/wcstombs_test.cpp b/libc/test/src/wchar/wcstombs_test.cpp new file mode 100644 index 0000000000000..61e0873dc9711 --- /dev/null +++ b/libc/test/src/wchar/wcstombs_test.cpp @@ -0,0 +1,84 @@ +//===-- Unittests for wcstombs --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/wchar/wcstombs.h" +#include "test/UnitTest/ErrnoCheckingTest.h" +#include "test/UnitTest/Test.h" + +using LlvmLibcWcstombs = LIBC_NAMESPACE::testing::ErrnoCheckingTest; + +// these tests are fairly simple as this function just calls into the internal +// wcsnrtombs which is more thoroughly tested + +TEST_F(LlvmLibcWcstombs, AllMultibyteLengths) { + /// clown emoji, sigma symbol, y with diaeresis, letter A + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0xff), static_cast(0x41), + static_cast(0x0)}; + char mbs[11]; + + ASSERT_EQ(LIBC_NAMESPACE::wcstombs(mbs, src, 11), static_cast(10)); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(mbs[0], '\xF0'); // clown begin + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\xE2'); // sigma begin + ASSERT_EQ(mbs[5], '\x88'); + ASSERT_EQ(mbs[6], '\x91'); + ASSERT_EQ(mbs[7], '\xC3'); // y diaeresis begin + ASSERT_EQ(mbs[8], '\xBF'); + ASSERT_EQ(mbs[9], '\x41'); // A begin + ASSERT_EQ(mbs[10], '\0'); // null terminator +} + +TEST_F(LlvmLibcWcstombs, DestLimit) { + /// clown emoji, sigma symbol, y with diaeresis, letter A + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0xff), static_cast(0x41), + static_cast(0x0)}; + char mbs[11]; + for (int i = 0; i < 11; ++i) + mbs[i] = '\x01'; // dummy initial values + + ASSERT_EQ(LIBC_NAMESPACE::wcstombs(mbs, src, 4), static_cast(4)); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(mbs[0], '\xF0'); + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\x01'); // didn't write more than 4 bytes + + for (int i = 0; i < 11; ++i) + mbs[i] = '\x01'; // dummy initial values + + // not enough bytes to convert the second character, so only converts one + ASSERT_EQ(LIBC_NAMESPACE::wcstombs(mbs, src, 6), static_cast(4)); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(mbs[0], '\xF0'); + ASSERT_EQ(mbs[1], '\x9F'); + ASSERT_EQ(mbs[2], '\xA4'); + ASSERT_EQ(mbs[3], '\xA1'); + ASSERT_EQ(mbs[4], '\x01'); +} + +TEST_F(LlvmLibcWcstombs, ErrnoTest) { + const wchar_t src[] = {static_cast(0x1f921), + static_cast(0x2211), + static_cast(0x12ffff), // invalid widechar + static_cast(0x0)}; + char mbs[11]; + + // n parameter ignored when dest is null + ASSERT_EQ(LIBC_NAMESPACE::wcstombs(mbs, src, 7), static_cast(7)); + ASSERT_ERRNO_SUCCESS(); + ASSERT_EQ(LIBC_NAMESPACE::wcstombs(mbs, src, 100), static_cast(-1)); + ASSERT_ERRNO_EQ(EILSEQ); +}