Skip to content

[libc] wchar string conversion functions mb to wc #149423

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Jul 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions libc/config/linux/x86_64/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1266,6 +1266,9 @@ if(LLVM_LIBC_FULL_BUILD)
libc.src.wchar.mbrlen
libc.src.wchar.mbrtowc
libc.src.wchar.mbtowc
libc.src.wchar.mbstowcs
libc.src.wchar.mbsrtowcs
libc.src.wchar.mbsnrtowcs
libc.src.wchar.wcrtomb
libc.src.wchar.wctomb
libc.src.wchar.wcstombs
Expand Down
27 changes: 27 additions & 0 deletions libc/include/wchar.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,33 @@ functions:
- type: wchar_t *__restrict
- type: const char *__restrict
- type: size_t
- name: mbsnrtowcs
standards:
- stdc
return_type: size_t
arguments:
- type: wchar_t *__restrict
- type: const char **__restrict
- type: size_t
- type: size_t
- type: mbstate_t *__restrict
- name: mbsrtowcs
standards:
- stdc
return_type: size_t
arguments:
- type: wchar_t *__restrict
- type: const char **__restrict
- type: size_t
- type: mbstate_t *__restrict
- name: mbstowcs
standards:
- stdc
return_type: size_t
arguments:
- type: wchar_t *__restrict
- type: const char *__restrict
- type: size_t
- name: mblen
standards:
- stdc
Expand Down
33 changes: 25 additions & 8 deletions libc/src/__support/wchar/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,31 @@ add_object_library(
SRCS
mbrtowc.cpp
DEPENDS
libc.hdr.errno_macros
libc.hdr.types.wchar_t
libc.hdr.types.size_t
libc.src.__support.common
libc.src.__support.error_or
libc.src.__support.macros.config
.character_converter
.mbstate
libc.hdr.errno_macros
libc.hdr.types.wchar_t
libc.hdr.types.size_t
libc.src.__support.common
libc.src.__support.error_or
libc.src.__support.macros.config
.character_converter
.mbstate
)

add_header_library(
mbsnrtowcs
HDRS
mbsnrtowcs.h
DEPENDS
libc.hdr.errno_macros
libc.hdr.types.wchar_t
libc.hdr.types.size_t
libc.src.__support.common
libc.src.__support.error_or
libc.src.__support.macros.config
libc.src.__support.macros.null_check
.character_converter
.mbstate
.string_converter
)

add_header_library(
Expand Down
66 changes: 66 additions & 0 deletions libc/src/__support/wchar/mbsnrtowcs.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
//===-- Implementation for mbsnrtowcs function ------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSNRTOWCS_H
#define LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSNRTOWCS_H

#include "hdr/errno_macros.h"
#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/common.h"
#include "src/__support/error_or.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/null_check.h"
#include "src/__support/wchar/character_converter.h"
#include "src/__support/wchar/mbstate.h"
#include "src/__support/wchar/string_converter.h"

namespace LIBC_NAMESPACE_DECL {
namespace internal {

LIBC_INLINE static ErrorOr<size_t> mbsnrtowcs(wchar_t *__restrict dst,
const char **__restrict src,
size_t nmc, size_t len,
mbstate *__restrict ps) {
LIBC_CRASH_ON_NULLPTR(src);
// Checking if mbstate is valid
CharacterConverter char_conv(ps);
if (!char_conv.isValidState())
return Error(EINVAL);

StringConverter<char8_t> str_conv(reinterpret_cast<const char8_t *>(*src), ps,
len, nmc);
size_t dst_idx = 0;
ErrorOr<char32_t> converted = str_conv.popUTF32();
while (converted.has_value()) {
if (dst != nullptr)
dst[dst_idx] = converted.value();
// null terminator should not be counted in return value
if (converted.value() == L'\0') {
if (dst != nullptr)
*src = nullptr;
return dst_idx;
}
dst_idx++;
converted = str_conv.popUTF32();
}

if (converted.error() == -1) { // if we hit conversion limit
if (dst != nullptr)
*src += str_conv.getSourceIndex();
return dst_idx;
}

return Error(converted.error());
}

} // namespace internal

} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSNRTOWCS_H
48 changes: 48 additions & 0 deletions libc/src/wchar/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,54 @@ add_entrypoint_object(
libc.src.__support.wchar.mbstate
)

add_entrypoint_object(
mbstowcs
SRCS
mbstowcs.cpp
HDRS
mbstowcs.h
DEPENDS
libc.hdr.types.size_t
libc.hdr.types.wchar_t
libc.src.__support.common
libc.src.__support.macros.config
libc.src.__support.libc_errno
libc.src.__support.wchar.mbstate
libc.src.__support.wchar.mbsnrtowcs
)

add_entrypoint_object(
mbsrtowcs
SRCS
mbsrtowcs.cpp
HDRS
mbsrtowcs.h
DEPENDS
libc.hdr.types.size_t
libc.hdr.types.wchar_t
libc.src.__support.common
libc.src.__support.macros.config
libc.src.__support.libc_errno
libc.src.__support.wchar.mbstate
libc.src.__support.wchar.mbsnrtowcs
)

add_entrypoint_object(
mbsnrtowcs
SRCS
mbsnrtowcs.cpp
HDRS
mbsnrtowcs.h
DEPENDS
libc.hdr.types.size_t
libc.hdr.types.wchar_t
libc.src.__support.common
libc.src.__support.macros.config
libc.src.__support.libc_errno
libc.src.__support.wchar.mbstate
libc.src.__support.wchar.mbsnrtowcs
)

add_entrypoint_object(
wcstombs
SRCS
Expand Down
39 changes: 39 additions & 0 deletions libc/src/wchar/mbsnrtowcs.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
//===-- Implementation of mbsnrtowcs --------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/wchar/mbsnrtowcs.h"

#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/common.h"
#include "src/__support/libc_errno.h"
#include "src/__support/macros/config.h"
#include "src/__support/wchar/mbsnrtowcs.h"
#include "src/__support/wchar/mbstate.h"

namespace LIBC_NAMESPACE_DECL {

LLVM_LIBC_FUNCTION(size_t, mbsnrtowcs,
(wchar_t *__restrict dst, const char **__restrict src,
size_t nmc, size_t len, mbstate_t *__restrict ps)) {
static internal::mbstate internal_mbstate;
// If destination is null, ignore len
len = dst == nullptr ? SIZE_MAX : len;
auto ret = internal::mbsnrtowcs(
dst, src, nmc, len,
ps == nullptr ? &internal_mbstate
: reinterpret_cast<internal::mbstate *>(ps));
if (!ret.has_value()) {
// Encoding failure
libc_errno = ret.error();
return -1;
}
return ret.value();
}

} // namespace LIBC_NAMESPACE_DECL
24 changes: 24 additions & 0 deletions libc/src/wchar/mbsnrtowcs.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
//===-- Implementation header for mbsnrtowcs ------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_WCHAR_MBSNRTOWCS_H
#define LLVM_LIBC_SRC_WCHAR_MBSNRTOWCS_H

#include "hdr/types/mbstate_t.h"
#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/macros/config.h"

namespace LIBC_NAMESPACE_DECL {

size_t mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
size_t nmc, size_t len, mbstate_t *__restrict ps);

} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC_WCHAR_MBSNRTOWCS_H
39 changes: 39 additions & 0 deletions libc/src/wchar/mbsrtowcs.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
//===-- Implementation of mbsrtowcs ---------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/wchar/mbsrtowcs.h"

#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/common.h"
#include "src/__support/libc_errno.h"
#include "src/__support/macros/config.h"
#include "src/__support/wchar/mbsnrtowcs.h"
#include "src/__support/wchar/mbstate.h"

namespace LIBC_NAMESPACE_DECL {

LLVM_LIBC_FUNCTION(size_t, mbsrtowcs,
(wchar_t *__restrict dst, const char **__restrict src,
size_t len, mbstate_t *__restrict ps)) {
static internal::mbstate internal_mbstate;
// If destination is null, ignore len
len = dst == nullptr ? SIZE_MAX : len;
auto ret = internal::mbsnrtowcs(
dst, src, SIZE_MAX, len,
ps == nullptr ? &internal_mbstate
: reinterpret_cast<internal::mbstate *>(ps));
if (!ret.has_value()) {
// Encoding failure
libc_errno = ret.error();
return -1;
}
return ret.value();
}

} // namespace LIBC_NAMESPACE_DECL
24 changes: 24 additions & 0 deletions libc/src/wchar/mbsrtowcs.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
//===-- Implementation header for mbsrtowcs -------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_WCHAR_MBSRTOWCS_H
#define LLVM_LIBC_SRC_WCHAR_MBSRTOWCS_H

#include "hdr/types/mbstate_t.h"
#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/macros/config.h"

namespace LIBC_NAMESPACE_DECL {

size_t mbsrtowcs(wchar_t *__restrict dst, const char **__restrict src,
size_t len, mbstate_t *__restrict ps);

} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC_WCHAR_MBSRTOWCS_H
38 changes: 38 additions & 0 deletions libc/src/wchar/mbstowcs.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
//===-- Implementation of mbstowcs ----------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/wchar/mbstowcs.h"

#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/common.h"
#include "src/__support/libc_errno.h"
#include "src/__support/macros/config.h"
#include "src/__support/wchar/mbsnrtowcs.h"
#include "src/__support/wchar/mbstate.h"

namespace LIBC_NAMESPACE_DECL {

LLVM_LIBC_FUNCTION(size_t, mbstowcs,
(wchar_t *__restrict pwcs, const char *__restrict s,
size_t n)) {
// If destination is null, ignore n
n = pwcs == nullptr ? SIZE_MAX : n;
static internal::mbstate internal_mbstate;
const char *temp = s;
auto ret = internal::mbsnrtowcs(pwcs, &temp, SIZE_MAX, n, &internal_mbstate);

if (!ret.has_value()) {
// Encoding failure
libc_errno = ret.error();
return -1;
}
return ret.value();
}

} // namespace LIBC_NAMESPACE_DECL
22 changes: 22 additions & 0 deletions libc/src/wchar/mbstowcs.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
//===-- Implementation header for mbstowcs --------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_WCHAR_MBSTOWCS_H
#define LLVM_LIBC_SRC_WCHAR_MBSTOWCS_H

#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/macros/config.h"

namespace LIBC_NAMESPACE_DECL {

size_t mbstowcs(wchar_t *__restrict pwcs, const char *__restrict s, size_t n);

} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC_WCHAR_MBSTOWCS_H
Loading
Loading