-
Notifications
You must be signed in to change notification settings - Fork 271
Translate C integer types to portable Rust types #1266
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
d6fc1ab
4b92269
3905f26
f472a99
bcbd8ee
b7548d8
108bd26
656d725
6a99bdf
bf909d5
2ba191b
3e11a56
7eaa39f
3b9a408
dad6e71
db3eee8
8a0cab6
f9d6197
21aee81
ad7309d
d362e77
772d269
d67eb49
b52fbcb
1c6d170
0860531
50ec00d
133f647
98456e2
0d855b2
ce736f8
e706593
e5bba30
9367ed2
c6fce75
db6d851
b6c70ca
bb3a1eb
0dbf919
da42dd2
befdab6
b4ad7a4
15b0470
df71dee
5aa6333
291521c
cb7ace2
d91b8e5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -443,6 +443,14 @@ impl ConversionContext { | |
self.typed_context.comments.push(comment); | ||
} | ||
|
||
// Add Rust fixed-size types. | ||
for rust_type_kind in CTypeKind::PULLBACK_KINDS { | ||
let new_id = self.id_mapper.fresh_id(); | ||
self.add_type(new_id, not_located(rust_type_kind)); | ||
self.processed_nodes | ||
.insert(new_id, self::node_types::OTHER_TYPE); | ||
} | ||
|
||
// Continue popping Clang nodes off of the stack of nodes we have promised to visit | ||
while let Some((node_id, expected_ty)) = self.visit_as.pop() { | ||
// Check if we've already processed this node. If so, ascertain that it has the right | ||
|
@@ -472,6 +480,30 @@ impl ConversionContext { | |
self.visit_node(untyped_context, node_id, new_id, expected_ty) | ||
} | ||
|
||
// Function declarations' types look through typedefs, but we want to use the types with | ||
// typedefs intact in some cases during translation. To ensure that these types exist in the | ||
// `TypedAstContext`, iterate over all function decls, compute their adjusted type using | ||
// argument types from arg declarations, and then ensure the existence of both this type and | ||
// the type of pointers to it. | ||
for (_decl_id, located_kind) in self.typed_context.c_decls.iter() { | ||
if let kind @ CDeclKind::Function { .. } = &located_kind.kind { | ||
let new_kind = self.typed_context.fn_decl_ty_with_declared_args(kind); | ||
Comment on lines
+483
to
+490
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just to be clear, are we referring to both parameters and arguments as the same thing here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. Other places are not strict about the formal parameter/actual argument distinction. |
||
if self.typed_context.type_for_kind(&new_kind).is_none() { | ||
Comment on lines
+489
to
+491
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: Would be a bit clearer with early |
||
// Create and insert fn type | ||
let new_id = CTypeId(self.id_mapper.fresh_id()); | ||
self.typed_context | ||
.c_types | ||
.insert(new_id, not_located(new_kind)); | ||
// Create and insert fn ptr type | ||
let ptr_kind = CTypeKind::Pointer(CQualTypeId::new(new_id)); | ||
let ptr_id = CTypeId(self.id_mapper.fresh_id()); | ||
self.typed_context | ||
.c_types | ||
.insert(ptr_id, not_located(ptr_kind)); | ||
fw-immunant marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
} | ||
} | ||
|
||
// Invert the macro invocations to get a list of macro expansion expressions | ||
for (expr_id, macro_ids) in &self.typed_context.macro_invocations { | ||
for mac_id in macro_ids { | ||
|
@@ -687,6 +719,14 @@ impl ConversionContext { | |
CTypeKind::Function(ret, arguments, is_variadic, is_noreturn, has_proto); | ||
self.add_type(new_id, not_located(function_ty)); | ||
self.processed_nodes.insert(new_id, FUNC_TYPE); | ||
|
||
// In addition to creating the function type for this node, ensure that a | ||
// corresponding function pointer type is created. We may need to reference this | ||
// type depending on how uses of functions of this type are translated. | ||
let pointer_ty = CTypeKind::Pointer(CQualTypeId::new(CTypeId(new_id))); | ||
let new_id = self.id_mapper.fresh_id(); | ||
self.add_type(new_id, not_located(pointer_ty)); | ||
self.processed_nodes.insert(new_id, OTHER_TYPE); | ||
Comment on lines
+726
to
+729
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's this doing exactly? Adding a fn ptr type? And how come a fresh There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. I'll add a comment explaining what/why. The second There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
} | ||
|
||
TypeTag::TagTypeOfType if expected_ty & TYPE != 0 => { | ||
|
@@ -1934,12 +1974,121 @@ impl ConversionContext { | |
let typ_old = node | ||
.type_id | ||
.expect("Expected to find type on typedef declaration"); | ||
let typ = self.visit_qualified_type(typ_old); | ||
let mut typ = self.visit_qualified_type(typ_old); | ||
|
||
// Clang injects definitions of the form `#define __SIZE_TYPE__ unsigned int` into | ||
// compilation units based on the target. (See lib/Frontend/InitPreprocessor.cpp | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you use a permalink for this in case clang changes? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. With our Clang compatibility, we're programming against a large swath of Clang source history rather than one particular version. The location is more important than the instantaneous contents here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, okay. That makes sense then. Could you mention that briefly in the comment, though? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure what to say. Any time we reference something from Clang, the semantics may differ between Clang versions--this isn't more applicable here than elsewhere in the project. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sometimes we could refer to a particular implementation of something in clang, in which case a permalink is more useful. It's useful to know if that's the intent or were purposefully targeting multiple versions at once (I know we always do that, but some things in clang we're not expecting to change, and would need to patch things if they did). |
||
// in Clang). | ||
// | ||
// Later, headers contain defns like: `typedef __SIZE_TYPE__ size_t;` | ||
// | ||
// We detect these typedefs and alter them replacing the type to which the macro | ||
// expands with a synthetic, portable `CTypeKind` chosen from the macro's name. | ||
// | ||
// This allows us to generate platform-independent Rust types like u64 or usize | ||
// despite the C side internally working with a target-specific type. | ||
let target_dependent_macro: Option<String> = from_value(node.extras[2].clone()) | ||
.expect("Expected to find optional target-dependent macro name"); | ||
|
||
typ = target_dependent_macro | ||
.as_deref() | ||
.and_then(|macro_name| { | ||
let kind = match macro_name { | ||
// Match names in the order Clang defines them. | ||
"__INTMAX_TYPE__" => CTypeKind::IntMax, | ||
"__UINTMAX_TYPE__" => CTypeKind::UIntMax, | ||
"__PTRDIFF_TYPE__" => CTypeKind::PtrDiff, | ||
"__INTPTR_TYPE__" => CTypeKind::IntPtr, | ||
"__SIZE_TYPE__" => CTypeKind::Size, | ||
"__WCHAR_TYPE__" => CTypeKind::WChar, | ||
// __WINT_TYPE__ is defined by Clang but has no obvious translation | ||
// __CHARn_TYPE__ for n ∈ {8, 16, 32} also lack obvious translation | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Couldn't these be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think so; these C types may have platform-dependent sizes larger than 8/16/32 bits respectively: https://stackoverflow.com/questions/6440812/c-what-does-the-size-of-char16-t-depend-on Given that Rust has no support for these types, I don't think we should lose sleep over them for now; we can revisit it if it ever becomes an issue. |
||
"__UINTPTR_TYPE__" => CTypeKind::UIntPtr, | ||
_ => { | ||
log::debug!("Unknown target-dependent macro {macro_name}!"); | ||
return None; | ||
} | ||
}; | ||
log::trace!("Selected kind {kind} for typedef {name}"); | ||
Some(CQualTypeId::new( | ||
self.typed_context.type_for_kind(&kind).unwrap(), | ||
)) | ||
}) | ||
.unwrap_or(typ); | ||
Comment on lines
+1993
to
+2017
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it'd help readability to pull a bunch of these out into separate functions. This function is 2000 lines long. /// Try to match the target-dependent macro to a platform-independent type.
///
/// Clang injects definitions of the form `#define __SIZE_TYPE__ unsigned int`
/// into compilation units based on the target.
/// (See `lib/Frontend/InitPreprocessor.cpp` in Clang).
///
/// Later, headers contain defns like: `typedef __SIZE_TYPE__ size_t;`.
///
/// We detect these `typedef`s and alter them by replacing the type to which the macro
/// expands with a synthetic, portable `CTypeKind` chosen from the macro's name.
///
/// This allows us to generate platform-independent Rust types like [`u64`] or [`usize`]
/// despite the C side internally working with a target-specific type.
fn platform_independent_type_of_target_dependent_macro(macro_name: &str) -> Option<CTypeKind> {
let kind = match macro_name {
// Match names in the order Clang defines them.
"__INTMAX_TYPE__" => CTypeKind::IntMax,
"__UINTMAX_TYPE__" => CTypeKind::UIntMax,
"__PTRDIFF_TYPE__" => CTypeKind::PtrDiff,
"__INTPTR_TYPE__" => CTypeKind::IntPtr,
"__SIZE_TYPE__" => CTypeKind::Size,
"__WCHAR_TYPE__" => CTypeKind::WChar,
// __WINT_TYPE__ is defined by Clang but has no obvious translation
// __CHARn_TYPE__ for n ∈ {8, 16, 32} also lack obvious translation
"__UINTPTR_TYPE__" => CTypeKind::UIntPtr,
_ => {
log::debug!("Unknown target-dependent macro {macro_name}!");
return None;
}
};
Some(kind)
}
/// Try to match the `typedef` to a platform-independent type.
///
/// Other platform-independent types are defined by
/// standard headers and their transitive includes without special compiler involvement
/// (see [`platform_independent_type_of_target_dependent_macro`]).
/// In these contexts, we recognize these `typedef`s by their name.
fn platform_independent_type_of_typedef(typedef: &str) -> Option<CTypeKind> {
let kind = match typedef {
"intmax_t" => CTypeKind::IntMax,
"uintmax_t" => CTypeKind::UIntMax,
"intptr_t" => CTypeKind::IntPtr,
"uintptr_t" => CTypeKind::UIntPtr,
// unlike `size_t`, `ssize_t` does not have a clang-provided `#define`.
"ssize_t" => CTypeKind::SSize,
// macOS defines `ssize_t` from `__darwin_ssize_t` in many headers,
// so we should handle `__darwin_ssize_t` itself.
"__darwin_ssize_t" => CTypeKind::SSize,
"__uint8_t" => CTypeKind::UInt8,
"__uint16_t" => CTypeKind::UInt16,
"__uint32_t" => CTypeKind::UInt32,
"__uint64_t" => CTypeKind::UInt64,
"__uint128_t" => CTypeKind::UInt128,
"__int8_t" => CTypeKind::Int8,
"__int16_t" => CTypeKind::Int16,
"__int32_t" => CTypeKind::Int32,
"__int64_t" => CTypeKind::Int64,
"__int128_t" => CTypeKind::Int128,
// macOS stdint.h typedefs [u]intN_t directly:
"int8_t" => CTypeKind::Int8,
"int16_t" => CTypeKind::Int16,
"int32_t" => CTypeKind::Int32,
"int64_t" => CTypeKind::Int64,
"uint8_t" => CTypeKind::UInt8,
"uint16_t" => CTypeKind::UInt16,
"uint32_t" => CTypeKind::UInt32,
"uint64_t" => CTypeKind::UInt64,
_ => {
log::debug!("Unknown platform-independent typedef {typedef}!");
return None;
}
};
Some(kind)
}
/// Check if `src` is a standard header that contains platform-independent `typedef`s.
fn is_platform_independent_type_header(src: &SrcFile) -> bool {
let filename = src
.path
.as_ref()
.and_then(|path| path.file_name())
.and_then(|filename| filename.to_str())
.unwrap_or("");
// `_types.h`, `_int*`, `_uint*` are for macOS.
matches!(filename, "stdint.h" | "types.h" | "_types.h")
|| filename.starts_with("__stddef__")
|| filename.starts_with("_int")
|| filename.starts_with("_uint")
}
/// Try to recognize a platform-independent type given the `typedef` name,
/// its `target_dependent_macro` (see [`platform_independent_type_of_target_dependent_macro`]),
/// and the `src` file it was defined in.
fn platform_independent_type(
typedef: &str,
target_dependent_macro: Option<&str>,
src: &SrcFile,
) -> Option<CTypeKind> {
if let Some(macro_name) = target_dependent_macro {
if let Some(ty) = platform_independent_type_of_target_dependent_macro(macro_name) {
return Some(ty);
}
}
if is_platform_independent_type_header(src) {
if let Some(ty) = platform_independent_type_of_typedef(typedef) {
return Some(ty);
}
}
None
} and then use it like this: let target_dependent_macro: Option<String> = from_value(node.extras[2].clone())
.expect("Expected to find optional target-dependent macro name");
let file = self
.typed_context
.files
.get(self.typed_context.file_map[node.loc.fileid as usize])
.expect("missing SrcFile for typedef");
if let Some(kind) =
platform_independent_type(&name, target_dependent_macro.as_deref(), file)
{
log::trace!("Selected kind {kind} for typedef {name}");
typ = CQualTypeId::new(self.typed_context.type_for_kind(&kind).unwrap());
} Also, I think platform-independent is a more accurate descriptor than fixed-width. Some of these types aren't fixed-width, like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This PR isn't the place for general refactors, and the comments here already explain what's happening as well as splitting out separate functions would. With respect to naming, this handling covers both fixed-width, platform-independent types like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Not sure how this is a general refactor. This is all new code.
That's more subjective, and I don't think it does. I think it's better to err on the side of being clearer and the reviewer when this doesn't really cost anything.
I think consistent naming is really critical to being able to understand code. Referring to the same and different things with slightly different names all over the place is very confusing. And fixed-width is definitely inaccurate for many of these types, while platform-independent is largely correct and captures the intent of what we're trying to do: emit code that is platform-independent. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What I mean is that this function is already well over a thousand lines. Outlining a couple functions from the code this PR adds isn't going to fix that, but will require jumping around to see the functions in their calling context. With respect to naming, we no longer refer to these as "fixed-width". |
||
|
||
// Other fixed-size types are defined without special compiler involvement, by | ||
// standard headers and their transitive includes. In these contexts, we | ||
// recognize these typedefs by the name of the typedef type. | ||
let id_for_name = |name| -> Option<_> { | ||
let kind = match name { | ||
"intmax_t" => CTypeKind::IntMax, | ||
"uintmax_t" => CTypeKind::UIntMax, | ||
"intptr_t" => CTypeKind::IntPtr, | ||
"uintptr_t" => CTypeKind::UIntPtr, | ||
// unlike `size_t`, `ssize_t` does not have a clang-provided `#define`. | ||
"ssize_t" => CTypeKind::SSize, | ||
// macOS defines `ssize_t` from `__darwin_ssize_t` in many headers, | ||
// so we should handle `__darwin_ssize_t` itself. | ||
"__darwin_ssize_t" => CTypeKind::SSize, | ||
"__uint8_t" => CTypeKind::UInt8, | ||
"__uint16_t" => CTypeKind::UInt16, | ||
"__uint32_t" => CTypeKind::UInt32, | ||
"__uint64_t" => CTypeKind::UInt64, | ||
"__uint128_t" => CTypeKind::UInt128, | ||
"__int8_t" => CTypeKind::Int8, | ||
"__int16_t" => CTypeKind::Int16, | ||
"__int32_t" => CTypeKind::Int32, | ||
"__int64_t" => CTypeKind::Int64, | ||
"__int128_t" => CTypeKind::Int128, | ||
// macOS stdint.h typedefs [u]intN_t directly: | ||
"int8_t" => CTypeKind::Int8, | ||
"int16_t" => CTypeKind::Int16, | ||
"int32_t" => CTypeKind::Int32, | ||
"int64_t" => CTypeKind::Int64, | ||
"uint8_t" => CTypeKind::UInt8, | ||
"uint16_t" => CTypeKind::UInt16, | ||
"uint32_t" => CTypeKind::UInt32, | ||
"uint64_t" => CTypeKind::UInt64, | ||
_ => { | ||
log::debug!("Unknown fixed-size type typedef {name}!"); | ||
kkysen marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return None; | ||
} | ||
}; | ||
log::trace!("Selected kind {kind} for typedef {name}"); | ||
Some(CQualTypeId::new( | ||
self.typed_context.type_for_kind(&kind).unwrap(), | ||
)) | ||
}; | ||
let file = self | ||
.typed_context | ||
.files | ||
.get(self.typed_context.file_map[node.loc.fileid as usize]); | ||
let file = file.unwrap(); | ||
if let Some(path) = file.path.as_ref() { | ||
if let Some(filename) = path.file_name() { | ||
if filename == "stdint.h" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do all of these headers come from clang, or are some from glibc? https://github.com/llvm/llvm-project/tree/main/clang/lib/Headers seems to have Other C libraries structure these headers a bit differently. See for example musl's definitions at https://git.musl-libc.org/cgit/musl/tree/include/alltypes.h.in. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. At least some of these are expected to come from the C library, not Clang. It's likely that we might need additional handling to support musl and other BSDs. I'm not particularly happy with this unportable logic, but I'm not aware of a predicate that catches exactly what we want here (headers used in the implementation of standard types' definitions) in a portable way. |
||
|| filename == "types.h" | ||
|| filename | ||
.to_str() | ||
.map(|s| s.starts_with("__stddef_")) | ||
.unwrap_or(false) | ||
// for macos | ||
|| filename == "_types.h" | ||
|| filename | ||
.to_str() | ||
.map(|s| s.starts_with("_int") || s.starts_with("_uint")) | ||
.unwrap_or(false) | ||
{ | ||
typ = id_for_name(&*name).unwrap_or(typ); | ||
} | ||
} | ||
} | ||
|
||
let typdef_decl = CDeclKind::Typedef { | ||
name, | ||
typ, | ||
is_implicit, | ||
target_dependent_macro, | ||
}; | ||
|
||
self.add_decl(new_id, located(node, typdef_decl)); | ||
|
Original file line number | Diff line number | Diff line change | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -285,7 +285,9 @@ fn immediate_type_children(kind: &CTypeKind) -> Vec<SomeId> { | |||||||||||
TypeOfExpr(e) => intos![e], | ||||||||||||
Void | Bool | Short | Int | Long | LongLong | UShort | UInt | ULong | ULongLong | SChar | ||||||||||||
| UChar | Char | Double | LongDouble | Float | Int128 | UInt128 | BuiltinFn | Half | ||||||||||||
| BFloat16 | UnhandledSveType | Float128 => { | ||||||||||||
| BFloat16 | UnhandledSveType | Float128 | Int8 | Int16 | Int32 | Int64 | IntPtr | ||||||||||||
| UInt8 | UInt16 | UInt32 | UInt64 | UIntPtr | IntMax | UIntMax | Size | SSize | ||||||||||||
| PtrDiff | WChar => { | ||||||||||||
vec![] | ||||||||||||
} | ||||||||||||
|
||||||||||||
|
@@ -426,14 +428,22 @@ impl VisitNode { | |||||||||||
} | ||||||||||||
|
||||||||||||
pub trait NodeVisitor { | ||||||||||||
/// Visit nodes in pre-order traversal. Returns true if we should traverse | ||||||||||||
/// Return the immediate child nodes of a given node. | ||||||||||||
/// | ||||||||||||
/// Generally implemented via `immediate_children[_all_types]`. | ||||||||||||
fw-immunant marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||
fn children(&mut self, _id: SomeId) -> Vec<SomeId>; | ||||||||||||
/// Visits nodes in pre-order traversal. Returns true if we should traverse | ||||||||||||
Comment on lines
+434
to
+435
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you leave newlines between the methods if they have docs?
Suggested change
|
||||||||||||
/// children. If we are not traversing children, the node will still be | ||||||||||||
/// visited by `post`. | ||||||||||||
fn pre(&mut self, _id: SomeId) -> bool { | ||||||||||||
true | ||||||||||||
} | ||||||||||||
/// Visits nodes in post-order traversal. | ||||||||||||
fn post(&mut self, _id: SomeId) {} | ||||||||||||
fn visit_tree(&mut self, context: &TypedAstContext, root: SomeId) { | ||||||||||||
/// Perform a traversal of the nodes in the AST starting at the given root node. | ||||||||||||
/// The `pre` method will be called on each node before visiting its children, and the `post` | ||||||||||||
/// method afterward. | ||||||||||||
fn visit_tree(&mut self, root: SomeId) { | ||||||||||||
let mut stack = vec![VisitNode::new(root)]; | ||||||||||||
while let Some(mut node) = stack.pop() { | ||||||||||||
if !node.seen { | ||||||||||||
|
@@ -442,9 +452,8 @@ pub trait NodeVisitor { | |||||||||||
stack.push(node); | ||||||||||||
|
||||||||||||
if self.pre(id) { | ||||||||||||
let children = immediate_children_all_types(context, id); | ||||||||||||
// Add children in reverse order since we visit the end of the stack first | ||||||||||||
stack.extend(children.into_iter().rev().map(VisitNode::new)); | ||||||||||||
stack.extend(self.children(id).into_iter().rev().map(VisitNode::new)); | ||||||||||||
} | ||||||||||||
} else { | ||||||||||||
self.post(node.id); | ||||||||||||
|
Uh oh!
There was an error while loading. Please reload this page.