Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
d6fc1ab
transpile: thread expected type through convert_expr
fw-immunant Jul 5, 2025
4b92269
transpile: thread expected types through convert_exprs
fw-immunant Jul 5, 2025
3905f26
transpile: cfg: pass down return type
fw-immunant Jul 5, 2025
f472a99
transpile: pass return type through convert_function_body
fw-immunant Jul 5, 2025
bcbd8ee
transpile: use expected type when translating sizeof/alignof
fw-immunant Jul 5, 2025
b7548d8
transpile: cast DeclRefs to expected type
fw-immunant Jul 5, 2025
108bd26
transpile: use expected type to cast OffsetOf exprs
fw-immunant Jul 5, 2025
656d725
transpile: use expected type when translating unary operators
fw-immunant Jul 10, 2025
6a99bdf
transpile: use expected types when translating binary operators
fw-immunant Jul 5, 2025
bf909d5
transpile: try using expected type even when translating ArrayToPoint…
fw-immunant Aug 6, 2025
2ba191b
transpile: get expected types for direct function calls from decl
fw-immunant Jul 5, 2025
3e11a56
transpile: always cast bitfield accesses to expected type
fw-immunant Jul 5, 2025
7eaa39f
transpile: propagate expected types down to translation of literals
fw-immunant Jul 5, 2025
3b9a408
transpile: add synthetic CTypeKinds for sized types
fw-immunant Jul 1, 2025
dad6e71
create synthetic AST nodes to enable looking up Rust fixed-size types
fw-immunant Jul 9, 2025
db3eee8
transpile: convert typedefs of target-dependent macros to their synth…
fw-immunant Jun 28, 2025
8a0cab6
transpile: keep portable types for typedefs in stdint.h, sys/types.h,…
fw-immunant Jul 1, 2025
f9d6197
transpile: keep portable type for ssize_t
fw-immunant Jul 10, 2025
21aee81
transpile: operators: homogenize types of more binary operators
fw-immunant Jul 7, 2025
ad7309d
ast-exporter: prefer non-implicit declarations when exporting functio…
fw-immunant Jul 5, 2025
d362e77
ast-exporter: add hack for strlen()
fw-immunant Jul 7, 2025
772d269
tests/arrays: use libc crate for wchar_t
fw-immunant Jul 7, 2025
d67eb49
transpile: cast field accesses to expected type
fw-immunant Jul 8, 2025
b52fbcb
transpile: operators: also check if return type is pointer
fw-immunant Jul 9, 2025
1c6d170
transpile: operators: cast the rhs of assignment operators if necessary
fw-immunant Jul 9, 2025
0860531
transpile: cast result of Call expressions if necessary
fw-immunant Jul 9, 2025
50ec00d
transpile: document convert_exprs
fw-immunant Jul 9, 2025
133f647
transpile: create pointer types for modified function pointer types
fw-immunant Jul 8, 2025
98456e2
transpile: apply prefix of expected types based on variadic fn signat…
fw-immunant Jul 9, 2025
0d855b2
transpile: infer arg types from type of called fn ptr
fw-immunant Jul 9, 2025
ce736f8
transpile: handle fiddly cases around function casts
fw-immunant Jul 9, 2025
e706593
transpile: operators: propagate expected type to rhs
fw-immunant Jul 9, 2025
e5bba30
transpile: add cases for macOS header locations
fw-immunant Jul 10, 2025
9367ed2
transpile: add cases for macOS header typedefs
fw-immunant Jul 10, 2025
c6fce75
transpile: fix ssize_t conversion on macOS
fw-immunant Jul 10, 2025
db6d851
transpile: snapshot: don't build tests that use libc
fw-immunant Jul 10, 2025
b6c70ca
transpile: snapshot: add test for type mapping
fw-immunant Jul 10, 2025
bb3a1eb
transpile: tests: add failing portion of arrays/incomplete_arrays.c a…
fw-immunant Jul 10, 2025
0dbf919
transpile: structs: propagate field type down to field initializers
fw-immunant Jul 10, 2025
da42dd2
transpile: operators: propagate expected type to lhs of shifts
fw-immunant Jul 10, 2025
befdab6
transpile: operators: assert to clarify compute/result type semantics
fw-immunant Jul 16, 2025
b4ad7a4
transpile: iterators: generalize NodeVisitor trait
fw-immunant Jul 17, 2025
15b0470
transpile: bubble up unary/binary operator argument types
fw-immunant Jul 17, 2025
df71dee
transpile: prefer synthetic kinds when propagating binary expr types
fw-immunant Jul 17, 2025
5aa6333
transpile: rectify naming conventions around resolved types
fw-immunant Jul 25, 2025
291521c
transpile: tests: snapshots: warn when skipping compiling
fw-immunant Jul 25, 2025
cb7ace2
transpile: refactor and document `TypedAstContext::tys_of_params`
fw-immunant Aug 1, 2025
d91b8e5
transpile: rename `TypedAstContext` methods function -> fn
fw-immunant Aug 1, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 54 additions & 3 deletions c2rust-ast-exporter/src/AstExporter.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
#include <algorithm>
#include <clang/AST/Type.h>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <iterator>
#include <optional>
#include <set>
#include <unordered_map>
#include <unordered_set>
Expand Down Expand Up @@ -98,7 +100,6 @@ Optional<APSInt> getIntegerConstantExpr(const Expr &E, const ASTContext &Ctx) {
#endif // CLANG_VERSION_MAJOR
}
#else
#include <optional>
std::optional<APSInt> getIntegerConstantExpr(const Expr &E,
const ASTContext &Ctx) {
return E.getIntegerConstantExpr(Ctx);
Expand Down Expand Up @@ -1869,6 +1870,15 @@ class TranslateASTVisitor final
auto body =
FD->getBody(paramsFD); // replaces its argument if body exists

// Avoid getting params from an implicit decl if a subsequent non-implicit decl exists.
// Implicit decls will not have names for params, but more importantly, they will never
// reference header-declared typedefs, so we would miss the fact that e.g. malloc is
// declared to accept `size_t` in its stdlib.h declaration, while its implicit declaration
// accepts the built-in `unsigned long`.
if (FD->isImplicit()) {
paramsFD = FD->getMostRecentDecl();
}

std::vector<void *> childIds;
for (auto x : paramsFD->parameters()) {
auto cd = x->getCanonicalDecl();
Expand All @@ -1878,7 +1888,8 @@ class TranslateASTVisitor final

childIds.push_back(body);

auto functionType = FD->getType();
// We prefer non-implicit decls for their type information.
auto functionType = paramsFD->getType();
auto span = paramsFD->getSourceRange();
encode_entry(
FD, TagFunctionDecl, span, childIds, functionType,
Expand Down Expand Up @@ -2257,6 +2268,38 @@ class TranslateASTVisitor final

bool VisitTypedefNameDecl(TypedefNameDecl *D) {
auto typeForDecl = D->getUnderlyingType();
// If this typedef is to a compiler-builtin macro with target-dependent definition, note the
// macro's name so we can map to the appropriate target-independent name (e.g. `size_t`).
auto targetDependentMacro = [&]() -> std::optional<std::string> {
TypeSourceInfo *TSI = D->getTypeSourceInfo();
if (!TSI) {
return std::nullopt;
}

TypeLoc typeLoc = TSI->getTypeLoc();
SourceLocation loc = typeLoc.getBeginLoc();

if (loc.isInvalid()) {
return std::nullopt;
}

// Check if the location is from a macro expansion
if (!loc.isMacroID()) {
return std::nullopt;
}

auto macroName = Lexer::getImmediateMacroName(loc, Context->getSourceManager(), Context->getLangOpts());
// Double-underscore indicates that name is reserved for the implementation,
// so this should not interfere with user code.
#if CLANG_VERSION_MAJOR >= 18
if (!macroName.starts_with("__")) {
#else
if (!macroName.startswith("__")) {
#endif // CLANG_VERSION_MAJOR
return std::nullopt;
}
return std::make_optional(std::string(macroName));
}();
if (!D->isCanonicalDecl()) {
// Emit non-canonical decl so we have a placeholder to attach comments to
std::vector<void *> childIds = {D->getCanonicalDecl()};
Expand All @@ -2267,11 +2310,17 @@ class TranslateASTVisitor final

std::vector<void *> childIds;
encode_entry(D, TagTypedefDecl, childIds, typeForDecl,
[D](CborEncoder *array) {
[D, targetDependentMacro](CborEncoder *array) {
auto name = D->getNameAsString();
cbor_encode_string(array, name);

cbor_encode_boolean(array, D->isImplicit());

if (targetDependentMacro) {
cbor_encode_string(array, targetDependentMacro->data());
} else {
cbor_encode_null(array);
}
});

typeEncoder.VisitQualType(typeForDecl);
Expand Down Expand Up @@ -2746,6 +2795,8 @@ constexpr size_t size(const _Tp (&)[_Sz]) noexcept {
// parsed and string literals are always treated as constant.
static std::vector<const char *> augment_argv(int argc, const char *argv[]) {
const char *const extras[] = {
"-extra-arg=-fno-builtin-strlen", // builtin strlen wrongly returns
// unsigned long despite declaration
"-extra-arg=-fparse-all-comments", // always parse comments
"-extra-arg=-Wwrite-strings", // string literals are constant
"-extra-arg=-D_FORTIFY_SOURCE=0", // we don't want to use checked
Expand Down
151 changes: 150 additions & 1 deletion c2rust-transpile/src/c_ast/conversion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,14 @@ impl ConversionContext {
self.typed_context.comments.push(comment);
}

// Add Rust fixed-size types.
for rust_type_kind in CTypeKind::PULLBACK_KINDS {
let new_id = self.id_mapper.fresh_id();
self.add_type(new_id, not_located(rust_type_kind));
self.processed_nodes
.insert(new_id, self::node_types::OTHER_TYPE);
}

// Continue popping Clang nodes off of the stack of nodes we have promised to visit
while let Some((node_id, expected_ty)) = self.visit_as.pop() {
// Check if we've already processed this node. If so, ascertain that it has the right
Expand Down Expand Up @@ -472,6 +480,30 @@ impl ConversionContext {
self.visit_node(untyped_context, node_id, new_id, expected_ty)
}

// Function declarations' types look through typedefs, but we want to use the types with
// typedefs intact in some cases during translation. To ensure that these types exist in the
// `TypedAstContext`, iterate over all function decls, compute their adjusted type using
// argument types from arg declarations, and then ensure the existence of both this type and
// the type of pointers to it.
for (_decl_id, located_kind) in self.typed_context.c_decls.iter() {
if let kind @ CDeclKind::Function { .. } = &located_kind.kind {
let new_kind = self.typed_context.fn_decl_ty_with_declared_args(kind);
Comment on lines +483 to +490
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to be clear, are we referring to both parameters and arguments as the same thing here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. Other places are not strict about the formal parameter/actual argument distinction.

if self.typed_context.type_for_kind(&new_kind).is_none() {
Comment on lines +489 to +491
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Would be a bit clearer with early continues instead of extra indentation.

// Create and insert fn type
let new_id = CTypeId(self.id_mapper.fresh_id());
self.typed_context
.c_types
.insert(new_id, not_located(new_kind));
// Create and insert fn ptr type
let ptr_kind = CTypeKind::Pointer(CQualTypeId::new(new_id));
let ptr_id = CTypeId(self.id_mapper.fresh_id());
self.typed_context
.c_types
.insert(ptr_id, not_located(ptr_kind));
}
}
}

// Invert the macro invocations to get a list of macro expansion expressions
for (expr_id, macro_ids) in &self.typed_context.macro_invocations {
for mac_id in macro_ids {
Expand Down Expand Up @@ -687,6 +719,14 @@ impl ConversionContext {
CTypeKind::Function(ret, arguments, is_variadic, is_noreturn, has_proto);
self.add_type(new_id, not_located(function_ty));
self.processed_nodes.insert(new_id, FUNC_TYPE);

// In addition to creating the function type for this node, ensure that a
// corresponding function pointer type is created. We may need to reference this
// type depending on how uses of functions of this type are translated.
let pointer_ty = CTypeKind::Pointer(CQualTypeId::new(CTypeId(new_id)));
let new_id = self.id_mapper.fresh_id();
self.add_type(new_id, not_located(pointer_ty));
self.processed_nodes.insert(new_id, OTHER_TYPE);
Comment on lines +726 to +729
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's this doing exactly? Adding a fn ptr type? And how come a fresh new_id is created here shadowing the original one?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. I'll add a comment explaining what/why. The second new_id is because we're also adding a second type; each type needs its own ID.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

}

TypeTag::TagTypeOfType if expected_ty & TYPE != 0 => {
Expand Down Expand Up @@ -1934,12 +1974,121 @@ impl ConversionContext {
let typ_old = node
.type_id
.expect("Expected to find type on typedef declaration");
let typ = self.visit_qualified_type(typ_old);
let mut typ = self.visit_qualified_type(typ_old);

// Clang injects definitions of the form `#define __SIZE_TYPE__ unsigned int` into
// compilation units based on the target. (See lib/Frontend/InitPreprocessor.cpp
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you use a permalink for this in case clang changes?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With our Clang compatibility, we're programming against a large swath of Clang source history rather than one particular version. The location is more important than the instantaneous contents here.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, okay. That makes sense then. Could you mention that briefly in the comment, though?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure what to say. Any time we reference something from Clang, the semantics may differ between Clang versions--this isn't more applicable here than elsewhere in the project.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sometimes we could refer to a particular implementation of something in clang, in which case a permalink is more useful. It's useful to know if that's the intent or were purposefully targeting multiple versions at once (I know we always do that, but some things in clang we're not expecting to change, and would need to patch things if they did).

// in Clang).
//
// Later, headers contain defns like: `typedef __SIZE_TYPE__ size_t;`
//
// We detect these typedefs and alter them replacing the type to which the macro
// expands with a synthetic, portable `CTypeKind` chosen from the macro's name.
//
// This allows us to generate platform-independent Rust types like u64 or usize
// despite the C side internally working with a target-specific type.
let target_dependent_macro: Option<String> = from_value(node.extras[2].clone())
.expect("Expected to find optional target-dependent macro name");

typ = target_dependent_macro
.as_deref()
.and_then(|macro_name| {
let kind = match macro_name {
// Match names in the order Clang defines them.
"__INTMAX_TYPE__" => CTypeKind::IntMax,
"__UINTMAX_TYPE__" => CTypeKind::UIntMax,
"__PTRDIFF_TYPE__" => CTypeKind::PtrDiff,
"__INTPTR_TYPE__" => CTypeKind::IntPtr,
"__SIZE_TYPE__" => CTypeKind::Size,
"__WCHAR_TYPE__" => CTypeKind::WChar,
// __WINT_TYPE__ is defined by Clang but has no obvious translation
// __CHARn_TYPE__ for n ∈ {8, 16, 32} also lack obvious translation
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Couldn't these be u8/u16/u32?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think so; these C types may have platform-dependent sizes larger than 8/16/32 bits respectively: https://stackoverflow.com/questions/6440812/c-what-does-the-size-of-char16-t-depend-on

Given that Rust has no support for these types, I don't think we should lose sleep over them for now; we can revisit it if it ever becomes an issue.

"__UINTPTR_TYPE__" => CTypeKind::UIntPtr,
_ => {
log::debug!("Unknown target-dependent macro {macro_name}!");
return None;
}
};
log::trace!("Selected kind {kind} for typedef {name}");
Some(CQualTypeId::new(
self.typed_context.type_for_kind(&kind).unwrap(),
))
})
.unwrap_or(typ);
Comment on lines +1993 to +2017
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it'd help readability to pull a bunch of these out into separate functions. This function is 2000 lines long.

/// Try to match the target-dependent macro to a platform-independent type.
///
/// Clang injects definitions of the form `#define __SIZE_TYPE__ unsigned int`
/// into compilation units based on the target.
/// (See `lib/Frontend/InitPreprocessor.cpp` in Clang).
///
/// Later, headers contain defns like: `typedef __SIZE_TYPE__ size_t;`.
///
/// We detect these `typedef`s and alter them by replacing the type to which the macro
/// expands with a synthetic, portable `CTypeKind` chosen from the macro's name.
///
/// This allows us to generate platform-independent Rust types like [`u64`] or [`usize`]
/// despite the C side internally working with a target-specific type.
fn platform_independent_type_of_target_dependent_macro(macro_name: &str) -> Option<CTypeKind> {
    let kind = match macro_name {
        // Match names in the order Clang defines them.
        "__INTMAX_TYPE__" => CTypeKind::IntMax,
        "__UINTMAX_TYPE__" => CTypeKind::UIntMax,
        "__PTRDIFF_TYPE__" => CTypeKind::PtrDiff,
        "__INTPTR_TYPE__" => CTypeKind::IntPtr,
        "__SIZE_TYPE__" => CTypeKind::Size,
        "__WCHAR_TYPE__" => CTypeKind::WChar,
        // __WINT_TYPE__ is defined by Clang but has no obvious translation
        // __CHARn_TYPE__ for n ∈ {8, 16, 32} also lack obvious translation
        "__UINTPTR_TYPE__" => CTypeKind::UIntPtr,
        _ => {
            log::debug!("Unknown target-dependent macro {macro_name}!");
            return None;
        }
    };
    Some(kind)
}

/// Try to match the `typedef` to a platform-independent type.
///
/// Other platform-independent types are defined by
/// standard headers and their transitive includes without special compiler involvement
/// (see [`platform_independent_type_of_target_dependent_macro`]).
/// In these contexts, we recognize these `typedef`s by their name.
fn platform_independent_type_of_typedef(typedef: &str) -> Option<CTypeKind> {
    let kind = match typedef {
        "intmax_t" => CTypeKind::IntMax,
        "uintmax_t" => CTypeKind::UIntMax,
        "intptr_t" => CTypeKind::IntPtr,
        "uintptr_t" => CTypeKind::UIntPtr,
        // unlike `size_t`, `ssize_t` does not have a clang-provided `#define`.
        "ssize_t" => CTypeKind::SSize,
        // macOS defines `ssize_t` from `__darwin_ssize_t` in many headers,
        // so we should handle `__darwin_ssize_t` itself.
        "__darwin_ssize_t" => CTypeKind::SSize,
        "__uint8_t" => CTypeKind::UInt8,
        "__uint16_t" => CTypeKind::UInt16,
        "__uint32_t" => CTypeKind::UInt32,
        "__uint64_t" => CTypeKind::UInt64,
        "__uint128_t" => CTypeKind::UInt128,
        "__int8_t" => CTypeKind::Int8,
        "__int16_t" => CTypeKind::Int16,
        "__int32_t" => CTypeKind::Int32,
        "__int64_t" => CTypeKind::Int64,
        "__int128_t" => CTypeKind::Int128,
        // macOS stdint.h typedefs [u]intN_t directly:
        "int8_t" => CTypeKind::Int8,
        "int16_t" => CTypeKind::Int16,
        "int32_t" => CTypeKind::Int32,
        "int64_t" => CTypeKind::Int64,
        "uint8_t" => CTypeKind::UInt8,
        "uint16_t" => CTypeKind::UInt16,
        "uint32_t" => CTypeKind::UInt32,
        "uint64_t" => CTypeKind::UInt64,
        _ => {
            log::debug!("Unknown platform-independent typedef {typedef}!");
            return None;
        }
    };
    Some(kind)
}

/// Check if `src` is a standard header that contains platform-independent `typedef`s.
fn is_platform_independent_type_header(src: &SrcFile) -> bool {
    let filename = src
        .path
        .as_ref()
        .and_then(|path| path.file_name())
        .and_then(|filename| filename.to_str())
        .unwrap_or("");
    // `_types.h`, `_int*`, `_uint*` are for macOS.
    matches!(filename, "stdint.h" | "types.h" | "_types.h")
        || filename.starts_with("__stddef__")
        || filename.starts_with("_int")
        || filename.starts_with("_uint")
}

/// Try to recognize a platform-independent type given the `typedef` name,
/// its `target_dependent_macro` (see [`platform_independent_type_of_target_dependent_macro`]),
/// and the `src` file it was defined in.
fn platform_independent_type(
    typedef: &str,
    target_dependent_macro: Option<&str>,
    src: &SrcFile,
) -> Option<CTypeKind> {
    if let Some(macro_name) = target_dependent_macro {
        if let Some(ty) = platform_independent_type_of_target_dependent_macro(macro_name) {
            return Some(ty);
        }
    }
    if is_platform_independent_type_header(src) {
        if let Some(ty) = platform_independent_type_of_typedef(typedef) {
            return Some(ty);
        }
    }
    None
}

and then use it like this:

                    let target_dependent_macro: Option<String> = from_value(node.extras[2].clone())
                        .expect("Expected to find optional target-dependent macro name");

                    let file = self
                        .typed_context
                        .files
                        .get(self.typed_context.file_map[node.loc.fileid as usize])
                        .expect("missing SrcFile for typedef");
                    if let Some(kind) =
                        platform_independent_type(&name, target_dependent_macro.as_deref(), file)
                    {
                        log::trace!("Selected kind {kind} for typedef {name}");
                        typ = CQualTypeId::new(self.typed_context.type_for_kind(&kind).unwrap());
                    }

Also, I think platform-independent is a more accurate descriptor than fixed-width. Some of these types aren't fixed-width, like size_t. What we care about is that they are platform-independent but that C doesn't treat them internally as such.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR isn't the place for general refactors, and the comments here already explain what's happening as well as splitting out separate functions would.

With respect to naming, this handling covers both fixed-width, platform-independent types like uint16_t and ones that are platform-dependent and not fixed width like ssize_t. So platform-independent isn't really accurate for describing the types themselves (though none of the types handled by the first block here are fixed-size/platform-independent). We are adding logic to ensure that these types are translated in a platform-independent way, and to names that are not platform-dependent. But I don't know if it's worthwhile to bikeshed names here too much.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR isn't the place for general refactors

Not sure how this is a general refactor. This is all new code.

and the comments here already explain what's happening as well as splitting out separate functions would.

That's more subjective, and I don't think it does. I think it's better to err on the side of being clearer and the reviewer when this doesn't really cost anything.

With respect to naming, this handling covers both fixed-width, platform-independent types like uint16_t and ones that are platform-dependent and not fixed width like ssize_t. So platform-independent isn't really accurate for describing the types themselves (though none of the types handled by the first block here are fixed-size/platform-independent). We are adding logic to ensure that these types are translated in a platform-independent way, and to names that are not platform-dependent. But I don't know if it's worthwhile to bikeshed names here too much.

ssize_t isn't technically platform-dependent, but it basically is, and is for platforms we support.

I think consistent naming is really critical to being able to understand code. Referring to the same and different things with slightly different names all over the place is very confusing. And fixed-width is definitely inaccurate for many of these types, while platform-independent is largely correct and captures the intent of what we're trying to do: emit code that is platform-independent.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What I mean is that this function is already well over a thousand lines. Outlining a couple functions from the code this PR adds isn't going to fix that, but will require jumping around to see the functions in their calling context.

With respect to naming, we no longer refer to these as "fixed-width".


// Other fixed-size types are defined without special compiler involvement, by
// standard headers and their transitive includes. In these contexts, we
// recognize these typedefs by the name of the typedef type.
let id_for_name = |name| -> Option<_> {
let kind = match name {
"intmax_t" => CTypeKind::IntMax,
"uintmax_t" => CTypeKind::UIntMax,
"intptr_t" => CTypeKind::IntPtr,
"uintptr_t" => CTypeKind::UIntPtr,
// unlike `size_t`, `ssize_t` does not have a clang-provided `#define`.
"ssize_t" => CTypeKind::SSize,
// macOS defines `ssize_t` from `__darwin_ssize_t` in many headers,
// so we should handle `__darwin_ssize_t` itself.
"__darwin_ssize_t" => CTypeKind::SSize,
"__uint8_t" => CTypeKind::UInt8,
"__uint16_t" => CTypeKind::UInt16,
"__uint32_t" => CTypeKind::UInt32,
"__uint64_t" => CTypeKind::UInt64,
"__uint128_t" => CTypeKind::UInt128,
"__int8_t" => CTypeKind::Int8,
"__int16_t" => CTypeKind::Int16,
"__int32_t" => CTypeKind::Int32,
"__int64_t" => CTypeKind::Int64,
"__int128_t" => CTypeKind::Int128,
// macOS stdint.h typedefs [u]intN_t directly:
"int8_t" => CTypeKind::Int8,
"int16_t" => CTypeKind::Int16,
"int32_t" => CTypeKind::Int32,
"int64_t" => CTypeKind::Int64,
"uint8_t" => CTypeKind::UInt8,
"uint16_t" => CTypeKind::UInt16,
"uint32_t" => CTypeKind::UInt32,
"uint64_t" => CTypeKind::UInt64,
_ => {
log::debug!("Unknown fixed-size type typedef {name}!");
return None;
}
};
log::trace!("Selected kind {kind} for typedef {name}");
Some(CQualTypeId::new(
self.typed_context.type_for_kind(&kind).unwrap(),
))
};
let file = self
.typed_context
.files
.get(self.typed_context.file_map[node.loc.fileid as usize]);
let file = file.unwrap();
if let Some(path) = file.path.as_ref() {
if let Some(filename) = path.file_name() {
if filename == "stdint.h"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do all of these headers come from clang, or are some from glibc? https://github.com/llvm/llvm-project/tree/main/clang/lib/Headers seems to have stdint.h, stddef.h and inttypes.h, but I'm not sure what gets installed.

Other C libraries structure these headers a bit differently. See for example musl's definitions at https://git.musl-libc.org/cgit/musl/tree/include/alltypes.h.in.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At least some of these are expected to come from the C library, not Clang. It's likely that we might need additional handling to support musl and other BSDs. I'm not particularly happy with this unportable logic, but I'm not aware of a predicate that catches exactly what we want here (headers used in the implementation of standard types' definitions) in a portable way.

|| filename == "types.h"
|| filename
.to_str()
.map(|s| s.starts_with("__stddef_"))
.unwrap_or(false)
// for macos
|| filename == "_types.h"
|| filename
.to_str()
.map(|s| s.starts_with("_int") || s.starts_with("_uint"))
.unwrap_or(false)
{
typ = id_for_name(&*name).unwrap_or(typ);
}
}
}

let typdef_decl = CDeclKind::Typedef {
name,
typ,
is_implicit,
target_dependent_macro,
};

self.add_decl(new_id, located(node, typdef_decl));
Expand Down
19 changes: 14 additions & 5 deletions c2rust-transpile/src/c_ast/iterators.rs
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,9 @@ fn immediate_type_children(kind: &CTypeKind) -> Vec<SomeId> {
TypeOfExpr(e) => intos![e],
Void | Bool | Short | Int | Long | LongLong | UShort | UInt | ULong | ULongLong | SChar
| UChar | Char | Double | LongDouble | Float | Int128 | UInt128 | BuiltinFn | Half
| BFloat16 | UnhandledSveType | Float128 => {
| BFloat16 | UnhandledSveType | Float128 | Int8 | Int16 | Int32 | Int64 | IntPtr
| UInt8 | UInt16 | UInt32 | UInt64 | UIntPtr | IntMax | UIntMax | Size | SSize
| PtrDiff | WChar => {
vec![]
}

Expand Down Expand Up @@ -426,14 +428,22 @@ impl VisitNode {
}

pub trait NodeVisitor {
/// Visit nodes in pre-order traversal. Returns true if we should traverse
/// Return the immediate child nodes of a given node.
///
/// Generally implemented via `immediate_children[_all_types]`.
fn children(&mut self, _id: SomeId) -> Vec<SomeId>;
/// Visits nodes in pre-order traversal. Returns true if we should traverse
Comment on lines +434 to +435
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you leave newlines between the methods if they have docs?

Suggested change
fn children(&mut self, _id: SomeId) -> Vec<SomeId>;
/// Visits nodes in pre-order traversal. Returns true if we should traverse
fn children(&mut self, _id: SomeId) -> Vec<SomeId>;
/// Visits nodes in pre-order traversal. Returns true if we should traverse

/// children. If we are not traversing children, the node will still be
/// visited by `post`.
fn pre(&mut self, _id: SomeId) -> bool {
true
}
/// Visits nodes in post-order traversal.
fn post(&mut self, _id: SomeId) {}
fn visit_tree(&mut self, context: &TypedAstContext, root: SomeId) {
/// Perform a traversal of the nodes in the AST starting at the given root node.
/// The `pre` method will be called on each node before visiting its children, and the `post`
/// method afterward.
fn visit_tree(&mut self, root: SomeId) {
let mut stack = vec![VisitNode::new(root)];
while let Some(mut node) = stack.pop() {
if !node.seen {
Expand All @@ -442,9 +452,8 @@ pub trait NodeVisitor {
stack.push(node);

if self.pre(id) {
let children = immediate_children_all_types(context, id);
// Add children in reverse order since we visit the end of the stack first
stack.extend(children.into_iter().rev().map(VisitNode::new));
stack.extend(self.children(id).into_iter().rev().map(VisitNode::new));
}
} else {
self.post(node.id);
Expand Down
Loading