From 5d46a45feac21a6f6d0a367f201dd77d88ae5a96 Mon Sep 17 00:00:00 2001 From: Nia Espera Date: Sun, 13 Jul 2025 18:40:27 +0200 Subject: [PATCH 1/6] native-lib: pass structs to native code --- Cargo.toml | 2 +- src/shims/native_lib/ffi.rs | 113 +++++++ src/shims/native_lib/mod.rs | 290 ++++++++++-------- tests/native-lib/fail/struct_not_extern_c.rs | 19 ++ .../fail/struct_not_extern_c.stderr | 15 + tests/native-lib/pass/scalar_arguments.rs | 49 +++ tests/native-lib/scalar_arguments.c | 37 +++ 7 files changed, 399 insertions(+), 126 deletions(-) create mode 100644 src/shims/native_lib/ffi.rs create mode 100644 tests/native-lib/fail/struct_not_extern_c.rs create mode 100644 tests/native-lib/fail/struct_not_extern_c.stderr diff --git a/Cargo.toml b/Cargo.toml index 99111092d3..924dfed2bc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -39,7 +39,7 @@ features = ['unprefixed_malloc_on_supported_platforms'] [target.'cfg(unix)'.dependencies] libc = "0.2" # native-lib dependencies -libffi = { version = "4.0.0", optional = true } +libffi = { version = "4.1.1", optional = true } libloading = { version = "0.8", optional = true } serde = { version = "1.0.219", features = ["derive"], optional = true } diff --git a/src/shims/native_lib/ffi.rs b/src/shims/native_lib/ffi.rs new file mode 100644 index 0000000000..1f9adc6222 --- /dev/null +++ b/src/shims/native_lib/ffi.rs @@ -0,0 +1,113 @@ +use libffi::low::CodePtr; +use libffi::middle::{Arg as ArgPtr, Cif, Type as FfiType}; + +/// Perform the actual FFI call. +/// +/// SAFETY: The `FfiArg`s passed must have been correctly instantiated (i.e. their +/// type layout must match the data they point to), and the safety invariants of +/// the foreign function being called must be upheld (if any). +pub unsafe fn call<'a, R: libffi::high::CType>(fun: CodePtr, args: Vec>) -> R { + let mut arg_tys = vec![]; + let mut arg_ptrs = vec![]; + for arg in args { + arg_tys.push(arg.ty); + arg_ptrs.push(arg.ptr) + } + let cif = Cif::new(arg_tys, R::reify().into_middle()); + unsafe { cif.call(fun, &arg_ptrs) } +} + +/// A wrapper type for `libffi::middle::Type` which also holds a pointer to the data. +pub struct FfiArg<'a> { + /// The type layout information for the pointed-to data. + ty: FfiType, + /// A pointer to the data described in `ty`. + ptr: ArgPtr, + /// Lifetime of the actual pointed-to data. + _p: std::marker::PhantomData<&'a [u8]>, +} + +impl<'a> FfiArg<'a> { + fn new(ty: FfiType, ptr: ArgPtr) -> Self { + Self { ty, ptr, _p: std::marker::PhantomData } + } +} + +/// An owning form of `FfiArg`. +/// We introduce this enum instead of just calling `Arg::new` and storing a list of +/// `libffi::middle::Arg` directly, because the `libffi::middle::Arg` just wraps a reference to +/// the value it represents and we need to store a copy of the value, and pass a reference to +/// this copy to C instead. +#[derive(Debug, Clone)] +pub enum CArg { + /// Primitive type. + Primitive(CPrimitive), + /// Struct with its computed type layout and bytes. + Struct(FfiType, Box<[u8]>), +} + +impl CArg { + /// Convert a `CArg` to the required FFI argument type. + pub fn arg_downcast<'a>(&'a self) -> FfiArg<'a> { + match self { + CArg::Primitive(cprim) => cprim.arg_downcast(), + // FIXME: Using `&items[0]` to reference the whole array is definitely + // unsound under SB, but we're waiting on + // https://github.com/libffi-rs/libffi-rs/commit/112a37b3b6ffb35bd75241fbcc580de40ba74a73 + // to land in a release so that we don't need to do this. + CArg::Struct(cstruct, items) => FfiArg::new(cstruct.clone(), ArgPtr::new(&items[0])), + } + } +} + +impl From for CArg { + fn from(prim: CPrimitive) -> Self { + Self::Primitive(prim) + } +} + +#[derive(Debug, Clone)] +/// Enum of supported primitive arguments to external C functions. +pub enum CPrimitive { + /// 8-bit signed integer. + Int8(i8), + /// 16-bit signed integer. + Int16(i16), + /// 32-bit signed integer. + Int32(i32), + /// 64-bit signed integer. + Int64(i64), + /// isize. + ISize(isize), + /// 8-bit unsigned integer. + UInt8(u8), + /// 16-bit unsigned integer. + UInt16(u16), + /// 32-bit unsigned integer. + UInt32(u32), + /// 64-bit unsigned integer. + UInt64(u64), + /// usize. + USize(usize), + /// Raw pointer, stored as C's `void*`. + RawPtr(*mut std::ffi::c_void), +} + +impl CPrimitive { + /// Convert a primitive to the required FFI argument type. + fn arg_downcast<'a>(&'a self) -> FfiArg<'a> { + match self { + CPrimitive::Int8(i) => FfiArg::new(FfiType::i8(), ArgPtr::new(i)), + CPrimitive::Int16(i) => FfiArg::new(FfiType::i16(), ArgPtr::new(i)), + CPrimitive::Int32(i) => FfiArg::new(FfiType::i32(), ArgPtr::new(i)), + CPrimitive::Int64(i) => FfiArg::new(FfiType::i64(), ArgPtr::new(i)), + CPrimitive::ISize(i) => FfiArg::new(FfiType::isize(), ArgPtr::new(i)), + CPrimitive::UInt8(i) => FfiArg::new(FfiType::u8(), ArgPtr::new(i)), + CPrimitive::UInt16(i) => FfiArg::new(FfiType::u16(), ArgPtr::new(i)), + CPrimitive::UInt32(i) => FfiArg::new(FfiType::u32(), ArgPtr::new(i)), + CPrimitive::UInt64(i) => FfiArg::new(FfiType::u64(), ArgPtr::new(i)), + CPrimitive::USize(i) => FfiArg::new(FfiType::usize(), ArgPtr::new(i)), + CPrimitive::RawPtr(i) => FfiArg::new(FfiType::pointer(), ArgPtr::new(i)), + } + } +} diff --git a/src/shims/native_lib/mod.rs b/src/shims/native_lib/mod.rs index 74b9b704fe..16aa475cb0 100644 --- a/src/shims/native_lib/mod.rs +++ b/src/shims/native_lib/mod.rs @@ -2,14 +2,15 @@ use std::ops::Deref; -use libffi::high::call as ffi; use libffi::low::CodePtr; -use rustc_abi::{BackendRepr, HasDataLayout, Size}; -use rustc_middle::mir::interpret::Pointer; -use rustc_middle::ty::{self as ty, IntTy, UintTy}; +use libffi::middle::Type as FfiType; +use rustc_abi::Size; +use rustc_middle::ty::{self as ty, IntTy, Ty, UintTy}; use rustc_span::Symbol; use serde::{Deserialize, Serialize}; +mod ffi; + #[cfg_attr( not(all( target_os = "linux", @@ -20,6 +21,7 @@ use serde::{Deserialize, Serialize}; )] pub mod trace; +use self::ffi::{CArg, CPrimitive, FfiArg}; use crate::*; /// The final results of an FFI trace, containing every relevant event detected @@ -75,7 +77,7 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { link_name: Symbol, dest: &MPlaceTy<'tcx>, ptr: CodePtr, - libffi_args: Vec>, + libffi_args: Vec>, ) -> InterpResult<'tcx, (crate::ImmTy<'tcx>, Option)> { let this = self.eval_context_mut(); #[cfg(target_os = "linux")] @@ -93,55 +95,55 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { // Unsafe because of the call to native code. // Because this is calling a C function it is not necessarily sound, // but there is no way around this and we've checked as much as we can. - let x = unsafe { ffi::call::(ptr, libffi_args.as_slice()) }; + let x = unsafe { ffi::call::(ptr, libffi_args) }; Scalar::from_i8(x) } ty::Int(IntTy::I16) => { - let x = unsafe { ffi::call::(ptr, libffi_args.as_slice()) }; + let x = unsafe { ffi::call::(ptr, libffi_args) }; Scalar::from_i16(x) } ty::Int(IntTy::I32) => { - let x = unsafe { ffi::call::(ptr, libffi_args.as_slice()) }; + let x = unsafe { ffi::call::(ptr, libffi_args) }; Scalar::from_i32(x) } ty::Int(IntTy::I64) => { - let x = unsafe { ffi::call::(ptr, libffi_args.as_slice()) }; + let x = unsafe { ffi::call::(ptr, libffi_args) }; Scalar::from_i64(x) } ty::Int(IntTy::Isize) => { - let x = unsafe { ffi::call::(ptr, libffi_args.as_slice()) }; + let x = unsafe { ffi::call::(ptr, libffi_args) }; Scalar::from_target_isize(x.try_into().unwrap(), this) } // uints ty::Uint(UintTy::U8) => { - let x = unsafe { ffi::call::(ptr, libffi_args.as_slice()) }; + let x = unsafe { ffi::call::(ptr, libffi_args) }; Scalar::from_u8(x) } ty::Uint(UintTy::U16) => { - let x = unsafe { ffi::call::(ptr, libffi_args.as_slice()) }; + let x = unsafe { ffi::call::(ptr, libffi_args) }; Scalar::from_u16(x) } ty::Uint(UintTy::U32) => { - let x = unsafe { ffi::call::(ptr, libffi_args.as_slice()) }; + let x = unsafe { ffi::call::(ptr, libffi_args) }; Scalar::from_u32(x) } ty::Uint(UintTy::U64) => { - let x = unsafe { ffi::call::(ptr, libffi_args.as_slice()) }; + let x = unsafe { ffi::call::(ptr, libffi_args) }; Scalar::from_u64(x) } ty::Uint(UintTy::Usize) => { - let x = unsafe { ffi::call::(ptr, libffi_args.as_slice()) }; + let x = unsafe { ffi::call::(ptr, libffi_args) }; Scalar::from_target_usize(x.try_into().unwrap(), this) } // Functions with no declared return type (i.e., the default return) // have the output_type `Tuple([])`. ty::Tuple(t_list) if (*t_list).deref().is_empty() => { - unsafe { ffi::call::<()>(ptr, libffi_args.as_slice()) }; + unsafe { ffi::call::<()>(ptr, libffi_args) }; return interp_ok(ImmTy::uninit(dest.layout)); } ty::RawPtr(..) => { - let x = unsafe { ffi::call::<*const ()>(ptr, libffi_args.as_slice()) }; - let ptr = Pointer::new(Provenance::Wildcard, Size::from_bytes(x.addr())); + let x = unsafe { ffi::call::<*const ()>(ptr, libffi_args) }; + let ptr = StrictPointer::new(Provenance::Wildcard, Size::from_bytes(x.addr())); Scalar::from_pointer(ptr, this) } _ => @@ -267,6 +269,147 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { interp_ok(()) } + + /// Extract the value from the result of reading an operand from the machine + /// and convert it to a `CArg`. + fn op_to_carg(&self, v: &OpTy<'tcx>, tracing: bool) -> InterpResult<'tcx, CArg> { + let this = self.eval_context_ref(); + let scalar = |v| interp_ok(this.read_immediate(v)?.to_scalar()); + interp_ok(match v.layout.ty.kind() { + // If the primitive provided can be converted to a type matching the type pattern + // then create a `CArg` of this primitive value with the corresponding `CArg` constructor. + // the ints + ty::Int(IntTy::I8) => CPrimitive::Int8(scalar(v)?.to_i8()?).into(), + ty::Int(IntTy::I16) => CPrimitive::Int16(scalar(v)?.to_i16()?).into(), + ty::Int(IntTy::I32) => CPrimitive::Int32(scalar(v)?.to_i32()?).into(), + ty::Int(IntTy::I64) => CPrimitive::Int64(scalar(v)?.to_i64()?).into(), + ty::Int(IntTy::Isize) => + CPrimitive::ISize(scalar(v)?.to_target_isize(this)?.try_into().unwrap()).into(), + // the uints + ty::Uint(UintTy::U8) => CPrimitive::UInt8(scalar(v)?.to_u8()?).into(), + ty::Uint(UintTy::U16) => CPrimitive::UInt16(scalar(v)?.to_u16()?).into(), + ty::Uint(UintTy::U32) => CPrimitive::UInt32(scalar(v)?.to_u32()?).into(), + ty::Uint(UintTy::U64) => CPrimitive::UInt64(scalar(v)?.to_u64()?).into(), + ty::Uint(UintTy::Usize) => + CPrimitive::USize(scalar(v)?.to_target_usize(this)?.try_into().unwrap()).into(), + ty::RawPtr(..) => { + let ptr = scalar(v)?.to_pointer(this)?; + // Pointer without provenance may not access any memory anyway, skip. + if let Some(prov) = ptr.provenance { + // The first time this happens, print a warning. + if !this.machine.native_call_mem_warned.replace(true) { + // Newly set, so first time we get here. + this.emit_diagnostic(NonHaltingDiagnostic::NativeCallSharedMem { tracing }); + } + + this.expose_provenance(prov)?; + }; + + // This relies on the `expose_provenance` in the `visit_reachable_allocs` callback + // below to expose the actual interpreter-level allocation. + CPrimitive::RawPtr(std::ptr::with_exposed_provenance_mut(ptr.addr().bytes_usize())) + .into() + } + // For ADTs, create an FfiType from their fields. + ty::Adt(adt_def, args) => { + let strukt = this.adt_to_ffitype(v.layout.ty, *adt_def, args)?; + + // Copy the raw bytes backing this arg. + let bytes = match v.as_mplace_or_imm() { + either::Either::Left(mplace) => { + // We do all of this to grab the bytes without actually + // stripping provenance from them, since it'll later be + // exposed recursively. + let ptr = mplace.ptr(); + // Make sure the provenance of this allocation is exposed; + // there must be one for this mplace to be valid at all. + // The interpreter-level allocation itself is exposed in + // visit_reachable_allocs. + this.expose_provenance(ptr.provenance.unwrap())?; + // Then get the actual bytes. + let id = this + .alloc_id_from_addr( + ptr.addr().bytes(), + mplace.layout.size.bytes_usize().try_into().unwrap(), + /* only_exposed_allocations */ true, + ) + .unwrap(); + let ptr_raw = this.get_alloc_bytes_unchecked_raw(id)?; + // SAFETY: We know for sure that at ptr_raw the next layout.size bytes + // are part of this allocation and initialised. They might be marked as + // uninit in Miri, but all bytes returned by the isolated allocator are + // initialised. + unsafe { + std::slice::from_raw_parts(ptr_raw, mplace.layout.size.bytes_usize()) + .to_vec() + .into_boxed_slice() + } + } + either::Either::Right(imm) => { + // For immediates, we know the backing scalar is going to be 128 bits, + // so we can just copy that. + // TODO: is it possible for this to be a scalar pair? + let scalar = imm.to_scalar(); + if scalar.size().bytes() > 0 { + let bits = scalar.to_bits(scalar.size())?; + bits.to_ne_bytes().to_vec().into_boxed_slice() + } else { + throw_ub_format!("attempting to pass a ZST over FFI: {}", imm.layout.ty) + } + } + }; + + ffi::CArg::Struct(strukt, bytes) + } + _ => throw_unsup_format!("unsupported argument type for native call: {}", v.layout.ty), + }) + } + + /// Parses an ADT to construct the matching libffi type. + fn adt_to_ffitype( + &self, + orig_ty: Ty<'_>, + adt_def: ty::AdtDef<'tcx>, + args: &'tcx ty::List>, + ) -> InterpResult<'tcx, FfiType> { + // TODO: is this correct? Maybe `repr(transparent)` when the inner field + // is itself `repr(c)` is ok? + if !adt_def.repr().c() { + throw_ub_format!("passing a non-#[repr(C)] struct over FFI: {orig_ty}") + } + // TODO: unions, etc. + if !adt_def.is_struct() { + throw_unsup_format!("unsupported argument type for native call: {orig_ty}"); + } + + let this = self.eval_context_ref(); + let mut fields = vec![]; + for field in adt_def.all_fields() { + fields.push(this.ty_to_ffitype(field.ty(*this.tcx, args))?); + } + + interp_ok(FfiType::structure(fields)) + } + + /// Gets the matching libffi type for a given Ty. + fn ty_to_ffitype(&self, ty: Ty<'tcx>) -> InterpResult<'tcx, FfiType> { + interp_ok(match ty.kind() { + ty::Int(IntTy::I8) => FfiType::i8(), + ty::Int(IntTy::I16) => FfiType::i16(), + ty::Int(IntTy::I32) => FfiType::i32(), + ty::Int(IntTy::I64) => FfiType::i64(), + ty::Int(IntTy::Isize) => FfiType::isize(), + // the uints + ty::Uint(UintTy::U8) => FfiType::u8(), + ty::Uint(UintTy::U16) => FfiType::u16(), + ty::Uint(UintTy::U32) => FfiType::u32(), + ty::Uint(UintTy::U64) => FfiType::u64(), + ty::Uint(UintTy::Usize) => FfiType::usize(), + ty::RawPtr(..) => FfiType::pointer(), + ty::Adt(adt_def, args) => self.adt_to_ffitype(ty, *adt_def, args)?, + _ => throw_unsup_format!("unsupported argument type for native call: {}", ty), + }) + } } impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {} @@ -295,36 +438,13 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { // Do we have ptrace? let tracing = trace::Supervisor::is_enabled(); - // Get the function arguments, and convert them to `libffi`-compatible form. + // Get the function arguments, copy them, and prepare the type descriptions. let mut libffi_args = Vec::::with_capacity(args.len()); for arg in args.iter() { - if !matches!(arg.layout.backend_repr, BackendRepr::Scalar(_)) { - throw_unsup_format!("only scalar argument types are supported for native calls") - } - let imm = this.read_immediate(arg)?; - libffi_args.push(imm_to_carg(&imm, this)?); - // If we are passing a pointer, expose its provenance. Below, all exposed memory - // (previously exposed and new exposed) will then be properly prepared. - if matches!(arg.layout.ty.kind(), ty::RawPtr(..)) { - let ptr = imm.to_scalar().to_pointer(this)?; - let Some(prov) = ptr.provenance else { - // Pointer without provenance may not access any memory anyway, skip. - continue; - }; - // The first time this happens, print a warning. - if !this.machine.native_call_mem_warned.replace(true) { - // Newly set, so first time we get here. - this.emit_diagnostic(NonHaltingDiagnostic::NativeCallSharedMem { tracing }); - } - - this.expose_provenance(prov)?; - } + libffi_args.push(this.op_to_carg(arg, tracing)?); } - // Convert arguments to `libffi::high::Arg` type. - let libffi_args = libffi_args - .iter() - .map(|arg| arg.arg_downcast()) - .collect::>>(); + // Convert arguments to a libffi-compatible type. + let libffi_args = libffi_args.iter().map(|arg| arg.arg_downcast()).collect::>(); // Prepare all exposed memory (both previously exposed, and just newly exposed since a // pointer was passed as argument). Uninitialised memory is left as-is, but any data @@ -377,83 +497,3 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { interp_ok(true) } } - -#[derive(Debug, Clone)] -/// Enum of supported arguments to external C functions. -// We introduce this enum instead of just calling `ffi::arg` and storing a list -// of `libffi::high::Arg` directly, because the `libffi::high::Arg` just wraps a reference -// to the value it represents: https://docs.rs/libffi/latest/libffi/high/call/struct.Arg.html -// and we need to store a copy of the value, and pass a reference to this copy to C instead. -enum CArg { - /// 8-bit signed integer. - Int8(i8), - /// 16-bit signed integer. - Int16(i16), - /// 32-bit signed integer. - Int32(i32), - /// 64-bit signed integer. - Int64(i64), - /// isize. - ISize(isize), - /// 8-bit unsigned integer. - UInt8(u8), - /// 16-bit unsigned integer. - UInt16(u16), - /// 32-bit unsigned integer. - UInt32(u32), - /// 64-bit unsigned integer. - UInt64(u64), - /// usize. - USize(usize), - /// Raw pointer, stored as C's `void*`. - RawPtr(*mut std::ffi::c_void), -} - -impl<'a> CArg { - /// Convert a `CArg` to a `libffi` argument type. - fn arg_downcast(&'a self) -> libffi::high::Arg<'a> { - match self { - CArg::Int8(i) => ffi::arg(i), - CArg::Int16(i) => ffi::arg(i), - CArg::Int32(i) => ffi::arg(i), - CArg::Int64(i) => ffi::arg(i), - CArg::ISize(i) => ffi::arg(i), - CArg::UInt8(i) => ffi::arg(i), - CArg::UInt16(i) => ffi::arg(i), - CArg::UInt32(i) => ffi::arg(i), - CArg::UInt64(i) => ffi::arg(i), - CArg::USize(i) => ffi::arg(i), - CArg::RawPtr(i) => ffi::arg(i), - } - } -} - -/// Extract the scalar value from the result of reading a scalar from the machine, -/// and convert it to a `CArg`. -fn imm_to_carg<'tcx>(v: &ImmTy<'tcx>, cx: &impl HasDataLayout) -> InterpResult<'tcx, CArg> { - interp_ok(match v.layout.ty.kind() { - // If the primitive provided can be converted to a type matching the type pattern - // then create a `CArg` of this primitive value with the corresponding `CArg` constructor. - // the ints - ty::Int(IntTy::I8) => CArg::Int8(v.to_scalar().to_i8()?), - ty::Int(IntTy::I16) => CArg::Int16(v.to_scalar().to_i16()?), - ty::Int(IntTy::I32) => CArg::Int32(v.to_scalar().to_i32()?), - ty::Int(IntTy::I64) => CArg::Int64(v.to_scalar().to_i64()?), - ty::Int(IntTy::Isize) => - CArg::ISize(v.to_scalar().to_target_isize(cx)?.try_into().unwrap()), - // the uints - ty::Uint(UintTy::U8) => CArg::UInt8(v.to_scalar().to_u8()?), - ty::Uint(UintTy::U16) => CArg::UInt16(v.to_scalar().to_u16()?), - ty::Uint(UintTy::U32) => CArg::UInt32(v.to_scalar().to_u32()?), - ty::Uint(UintTy::U64) => CArg::UInt64(v.to_scalar().to_u64()?), - ty::Uint(UintTy::Usize) => - CArg::USize(v.to_scalar().to_target_usize(cx)?.try_into().unwrap()), - ty::RawPtr(..) => { - let s = v.to_scalar().to_pointer(cx)?.addr(); - // This relies on the `expose_provenance` in the `visit_reachable_allocs` callback - // above. - CArg::RawPtr(std::ptr::with_exposed_provenance_mut(s.bytes_usize())) - } - _ => throw_unsup_format!("unsupported argument type for native call: {}", v.layout.ty), - }) -} diff --git a/tests/native-lib/fail/struct_not_extern_c.rs b/tests/native-lib/fail/struct_not_extern_c.rs new file mode 100644 index 0000000000..2baa40489f --- /dev/null +++ b/tests/native-lib/fail/struct_not_extern_c.rs @@ -0,0 +1,19 @@ +// Only works on Unix targets +//@ignore-target: windows wasm +//@only-on-host + +#![allow(improper_ctypes)] + +pub struct PassMe { + pub value: i32, + pub other_value: i16, +} + +extern "C" { + fn pass_struct(s: PassMe) -> i32; +} + +fn main() { + let pass_me = PassMe { value: 42, other_value: 1337 }; + unsafe { pass_struct(pass_me) }; //~ ERROR: Undefined Behavior: passing a non-#[repr(C)] struct over FFI +} diff --git a/tests/native-lib/fail/struct_not_extern_c.stderr b/tests/native-lib/fail/struct_not_extern_c.stderr new file mode 100644 index 0000000000..eb0d37645c --- /dev/null +++ b/tests/native-lib/fail/struct_not_extern_c.stderr @@ -0,0 +1,15 @@ +error: Undefined Behavior: passing a non-#[repr(C)] struct over FFI: PassMe + --> tests/native-lib/fail/struct_not_extern_c.rs:LL:CC + | +LL | unsafe { pass_struct(pass_me) }; + | ^^^^^^^^^^^^^^^^^^^^ Undefined Behavior occurred here + | + = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior + = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information + = note: BACKTRACE: + = note: inside `main` at tests/native-lib/fail/struct_not_extern_c.rs:LL:CC + +note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace + +error: aborting due to 1 previous error + diff --git a/tests/native-lib/pass/scalar_arguments.rs b/tests/native-lib/pass/scalar_arguments.rs index 9e99977a69..f885fdacad 100644 --- a/tests/native-lib/pass/scalar_arguments.rs +++ b/tests/native-lib/pass/scalar_arguments.rs @@ -39,5 +39,54 @@ fn main() { // test void function that prints from C printer(); + + test_pass_struct(); + test_pass_struct_complex(); + } +} + +/// Test passing a basic struct as an argument. +fn test_pass_struct() { + #[repr(C)] + struct PassMe { + value: i32, + other_value: i16, } + + extern "C" { + fn pass_struct(s: PassMe) -> i32; + } + + let pass_me = PassMe { value: 42, other_value: 1337 }; + assert_eq!(unsafe { pass_struct(pass_me) }, 42 + 1337); +} + +/// Test passing a more complex struct as an argument. +fn test_pass_struct_complex() { + #[repr(C)] + struct ComplexStruct { + part_1: Part1, + part_2: Part2, + part_3: u32, + } + #[repr(C)] + struct Part1 { + high: u16, + low: u16, + } + #[repr(C)] + struct Part2 { + bits: u32, + } + + extern "C" { + fn pass_struct_complex(s: ComplexStruct) -> i32; + } + + let complex = ComplexStruct { + part_1: Part1 { high: 0xabcd, low: 0xef01 }, + part_2: Part2 { bits: 0xabcdef01 }, + part_3: 0xabcdef01, + }; + assert_eq!(unsafe { pass_struct_complex(complex) }, 0); } diff --git a/tests/native-lib/scalar_arguments.c b/tests/native-lib/scalar_arguments.c index 8cf38f7441..56f41b133a 100644 --- a/tests/native-lib/scalar_arguments.c +++ b/tests/native-lib/scalar_arguments.c @@ -30,6 +30,43 @@ EXPORT int64_t add_short_to_long(int16_t x, int64_t y) { return x + y; } +/* Test: test_pass_struct */ + +typedef struct PassMe { + int32_t value; + int16_t other_value; +} PassMe; + +EXPORT int32_t pass_struct(const PassMe pass_me) { + return pass_me.value + pass_me.other_value; +} + +/* Test: test_pass_struct_complex */ + +typedef struct Part1 { + uint16_t high; + uint16_t low; +} Part1; + +typedef struct Part2 { + uint32_t bits; +} Part2; + +typedef struct ComplexStruct { + Part1 part_1; + Part2 part_2; + uint32_t part_3; +} ComplexStruct; + +EXPORT int32_t pass_struct_complex(const ComplexStruct complex) { + if ((((uint32_t)complex.part_1.high) << 16 | (uint32_t)complex.part_1.low) == complex.part_2.bits + && complex.part_2.bits == complex.part_3) + return 0; + else { + return 1; + } +} + // To test that functions not marked with EXPORT cannot be called by Miri. int32_t not_exported(void) { return 0; From c6765bff48911488068839b5121bf39fec597e1f Mon Sep 17 00:00:00 2001 From: Nia Date: Tue, 22 Jul 2025 21:10:41 +0200 Subject: [PATCH 2/6] Apply suggestions from code review Co-authored-by: Ralf Jung --- src/shims/native_lib/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/shims/native_lib/mod.rs b/src/shims/native_lib/mod.rs index 16aa475cb0..ee23ba458f 100644 --- a/src/shims/native_lib/mod.rs +++ b/src/shims/native_lib/mod.rs @@ -272,7 +272,7 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { /// Extract the value from the result of reading an operand from the machine /// and convert it to a `CArg`. - fn op_to_carg(&self, v: &OpTy<'tcx>, tracing: bool) -> InterpResult<'tcx, CArg> { + fn op_to_ffi_arg(&self, v: &OpTy<'tcx>, tracing: bool) -> InterpResult<'tcx, CArg> { let this = self.eval_context_ref(); let scalar = |v| interp_ok(this.read_immediate(v)?.to_scalar()); interp_ok(match v.layout.ty.kind() { @@ -337,7 +337,7 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { let ptr_raw = this.get_alloc_bytes_unchecked_raw(id)?; // SAFETY: We know for sure that at ptr_raw the next layout.size bytes // are part of this allocation and initialised. They might be marked as - // uninit in Miri, but all bytes returned by the isolated allocator are + // uninit in Miri, but all bytes returned by `MiriAllocBytes` are // initialised. unsafe { std::slice::from_raw_parts(ptr_raw, mplace.layout.size.bytes_usize()) @@ -379,7 +379,7 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { } // TODO: unions, etc. if !adt_def.is_struct() { - throw_unsup_format!("unsupported argument type for native call: {orig_ty}"); + throw_unsup_format!("unsupported argument type for native call: {orig_ty} is an enum or union"); } let this = self.eval_context_ref(); From 9a8af0cabb540818a9911ff01c15a72bd61aeb50 Mon Sep 17 00:00:00 2001 From: Nia Espera Date: Sun, 17 Aug 2025 20:41:54 +0200 Subject: [PATCH 3/6] address review, refactor ffi logic --- src/shims/native_lib/ffi.rs | 118 +++++++------- src/shims/native_lib/mod.rs | 146 +++++++++--------- tests/native-lib/aggregate_arguments.c | 42 +++++ tests/native-lib/fail/multi_struct_alloc.rs | 21 +++ .../native-lib/fail/multi_struct_alloc.stderr | 15 ++ tests/native-lib/fail/struct_not_extern_c.rs | 2 +- .../fail/struct_not_extern_c.stderr | 7 +- tests/native-lib/fail/uninit_struct.rs | 27 ++++ tests/native-lib/fail/uninit_struct.stderr | 15 ++ tests/native-lib/pass/aggregate_arguments.rs | 51 ++++++ tests/native-lib/pass/ptr_read_access.rs | 20 +++ tests/native-lib/pass/scalar_arguments.rs | 49 ------ tests/native-lib/ptr_read_access.c | 10 ++ tests/native-lib/scalar_arguments.c | 37 ----- tests/ui.rs | 1 + 15 files changed, 347 insertions(+), 214 deletions(-) create mode 100644 tests/native-lib/aggregate_arguments.c create mode 100644 tests/native-lib/fail/multi_struct_alloc.rs create mode 100644 tests/native-lib/fail/multi_struct_alloc.stderr create mode 100644 tests/native-lib/fail/uninit_struct.rs create mode 100644 tests/native-lib/fail/uninit_struct.stderr create mode 100644 tests/native-lib/pass/aggregate_arguments.rs diff --git a/src/shims/native_lib/ffi.rs b/src/shims/native_lib/ffi.rs index 1f9adc6222..7991c9a546 100644 --- a/src/shims/native_lib/ffi.rs +++ b/src/shims/native_lib/ffi.rs @@ -3,72 +3,65 @@ use libffi::middle::{Arg as ArgPtr, Cif, Type as FfiType}; /// Perform the actual FFI call. /// -/// SAFETY: The `FfiArg`s passed must have been correctly instantiated (i.e. their -/// type layout must match the data they point to), and the safety invariants of -/// the foreign function being called must be upheld (if any). -pub unsafe fn call<'a, R: libffi::high::CType>(fun: CodePtr, args: Vec>) -> R { +/// SAFETY: The safety invariants of the foreign function being called must be +/// upheld (if any). +pub unsafe fn call(fun: CodePtr, args: &[OwnedArg]) -> R { let mut arg_tys = vec![]; let mut arg_ptrs = vec![]; for arg in args { - arg_tys.push(arg.ty); - arg_ptrs.push(arg.ptr) + arg_tys.push(arg.ty()); + arg_ptrs.push(arg.ptr()) } let cif = Cif::new(arg_tys, R::reify().into_middle()); + // SAFETY: Caller upholds that the function is safe to call, and since we + // were passed a slice reference we know the `OwnedArg`s won't have been + // by this point. unsafe { cif.call(fun, &arg_ptrs) } } -/// A wrapper type for `libffi::middle::Type` which also holds a pointer to the data. -pub struct FfiArg<'a> { - /// The type layout information for the pointed-to data. - ty: FfiType, - /// A pointer to the data described in `ty`. - ptr: ArgPtr, - /// Lifetime of the actual pointed-to data. - _p: std::marker::PhantomData<&'a [u8]>, -} - -impl<'a> FfiArg<'a> { - fn new(ty: FfiType, ptr: ArgPtr) -> Self { - Self { ty, ptr, _p: std::marker::PhantomData } - } -} - -/// An owning form of `FfiArg`. -/// We introduce this enum instead of just calling `Arg::new` and storing a list of -/// `libffi::middle::Arg` directly, because the `libffi::middle::Arg` just wraps a reference to -/// the value it represents and we need to store a copy of the value, and pass a reference to -/// this copy to C instead. +/// An argument for an FFI call. #[derive(Debug, Clone)] -pub enum CArg { +pub enum OwnedArg { /// Primitive type. - Primitive(CPrimitive), - /// Struct with its computed type layout and bytes. - Struct(FfiType, Box<[u8]>), + Primitive(ScalarArg), + /// ADT with its computed type layout and bytes. + Adt(FfiType, Box<[u8]>), } -impl CArg { - /// Convert a `CArg` to the required FFI argument type. - pub fn arg_downcast<'a>(&'a self) -> FfiArg<'a> { +impl OwnedArg { + /// Gets the libffi type descriptor for this argument. + fn ty(&self) -> FfiType { match self { - CArg::Primitive(cprim) => cprim.arg_downcast(), + OwnedArg::Primitive(scalar_arg) => scalar_arg.ty(), + OwnedArg::Adt(ty, _) => ty.clone(), + } + } + + /// Instantiates a libffi argument pointer pointing to this argument's bytes. + /// NB: Since `libffi::middle::Arg` ignores the lifetime of the reference + /// it's derived from, it is up to the caller to ensure the `OwnedArg` is + /// not dropped before unsafely calling `libffi::middle::Cif::call()`! + fn ptr(&self) -> ArgPtr { + match self { + OwnedArg::Primitive(scalar_arg) => scalar_arg.ptr(), // FIXME: Using `&items[0]` to reference the whole array is definitely // unsound under SB, but we're waiting on // https://github.com/libffi-rs/libffi-rs/commit/112a37b3b6ffb35bd75241fbcc580de40ba74a73 // to land in a release so that we don't need to do this. - CArg::Struct(cstruct, items) => FfiArg::new(cstruct.clone(), ArgPtr::new(&items[0])), + OwnedArg::Adt(_, items) => ArgPtr::new(&items[0]), } } } -impl From for CArg { - fn from(prim: CPrimitive) -> Self { +impl From for OwnedArg { + fn from(prim: ScalarArg) -> Self { Self::Primitive(prim) } } #[derive(Debug, Clone)] -/// Enum of supported primitive arguments to external C functions. -pub enum CPrimitive { +/// Enum of supported scalar arguments to external C functions. +pub enum ScalarArg { /// 8-bit signed integer. Int8(i8), /// 16-bit signed integer. @@ -93,21 +86,38 @@ pub enum CPrimitive { RawPtr(*mut std::ffi::c_void), } -impl CPrimitive { - /// Convert a primitive to the required FFI argument type. - fn arg_downcast<'a>(&'a self) -> FfiArg<'a> { +impl ScalarArg { + /// See `OwnedArg::ty()`. + fn ty(&self) -> FfiType { + match self { + ScalarArg::Int8(_) => FfiType::i8(), + ScalarArg::Int16(_) => FfiType::i16(), + ScalarArg::Int32(_) => FfiType::i32(), + ScalarArg::Int64(_) => FfiType::i64(), + ScalarArg::ISize(_) => FfiType::isize(), + ScalarArg::UInt8(_) => FfiType::u8(), + ScalarArg::UInt16(_) => FfiType::u16(), + ScalarArg::UInt32(_) => FfiType::u32(), + ScalarArg::UInt64(_) => FfiType::u64(), + ScalarArg::USize(_) => FfiType::usize(), + ScalarArg::RawPtr(_) => FfiType::pointer(), + } + } + + /// See `OwnedArg::ptr()`. + fn ptr(&self) -> ArgPtr { match self { - CPrimitive::Int8(i) => FfiArg::new(FfiType::i8(), ArgPtr::new(i)), - CPrimitive::Int16(i) => FfiArg::new(FfiType::i16(), ArgPtr::new(i)), - CPrimitive::Int32(i) => FfiArg::new(FfiType::i32(), ArgPtr::new(i)), - CPrimitive::Int64(i) => FfiArg::new(FfiType::i64(), ArgPtr::new(i)), - CPrimitive::ISize(i) => FfiArg::new(FfiType::isize(), ArgPtr::new(i)), - CPrimitive::UInt8(i) => FfiArg::new(FfiType::u8(), ArgPtr::new(i)), - CPrimitive::UInt16(i) => FfiArg::new(FfiType::u16(), ArgPtr::new(i)), - CPrimitive::UInt32(i) => FfiArg::new(FfiType::u32(), ArgPtr::new(i)), - CPrimitive::UInt64(i) => FfiArg::new(FfiType::u64(), ArgPtr::new(i)), - CPrimitive::USize(i) => FfiArg::new(FfiType::usize(), ArgPtr::new(i)), - CPrimitive::RawPtr(i) => FfiArg::new(FfiType::pointer(), ArgPtr::new(i)), + ScalarArg::Int8(i) => ArgPtr::new(i), + ScalarArg::Int16(i) => ArgPtr::new(i), + ScalarArg::Int32(i) => ArgPtr::new(i), + ScalarArg::Int64(i) => ArgPtr::new(i), + ScalarArg::ISize(i) => ArgPtr::new(i), + ScalarArg::UInt8(i) => ArgPtr::new(i), + ScalarArg::UInt16(i) => ArgPtr::new(i), + ScalarArg::UInt32(i) => ArgPtr::new(i), + ScalarArg::UInt64(i) => ArgPtr::new(i), + ScalarArg::USize(i) => ArgPtr::new(i), + ScalarArg::RawPtr(i) => ArgPtr::new(i), } } } diff --git a/src/shims/native_lib/mod.rs b/src/shims/native_lib/mod.rs index ee23ba458f..92e2aa0be7 100644 --- a/src/shims/native_lib/mod.rs +++ b/src/shims/native_lib/mod.rs @@ -21,7 +21,7 @@ mod ffi; )] pub mod trace; -use self::ffi::{CArg, CPrimitive, FfiArg}; +use self::ffi::{OwnedArg, ScalarArg}; use crate::*; /// The final results of an FFI trace, containing every relevant event detected @@ -72,12 +72,12 @@ impl AccessRange { impl<'tcx> EvalContextExtPriv<'tcx> for crate::MiriInterpCx<'tcx> {} trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { /// Call native host function and return the output as an immediate. - fn call_native_with_args<'a>( + fn call_native_with_args( &mut self, link_name: Symbol, dest: &MPlaceTy<'tcx>, ptr: CodePtr, - libffi_args: Vec>, + libffi_args: &[OwnedArg], ) -> InterpResult<'tcx, (crate::ImmTy<'tcx>, Option)> { let this = self.eval_context_mut(); #[cfg(target_os = "linux")] @@ -271,95 +271,90 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { } /// Extract the value from the result of reading an operand from the machine - /// and convert it to a `CArg`. - fn op_to_ffi_arg(&self, v: &OpTy<'tcx>, tracing: bool) -> InterpResult<'tcx, CArg> { + /// and convert it to a `OwnedArg`. + fn op_to_ffi_arg(&self, v: &OpTy<'tcx>, tracing: bool) -> InterpResult<'tcx, OwnedArg> { let this = self.eval_context_ref(); let scalar = |v| interp_ok(this.read_immediate(v)?.to_scalar()); interp_ok(match v.layout.ty.kind() { // If the primitive provided can be converted to a type matching the type pattern - // then create a `CArg` of this primitive value with the corresponding `CArg` constructor. + // then create a `OwnedArg` of this primitive value with the corresponding `OwnedArg` constructor. // the ints - ty::Int(IntTy::I8) => CPrimitive::Int8(scalar(v)?.to_i8()?).into(), - ty::Int(IntTy::I16) => CPrimitive::Int16(scalar(v)?.to_i16()?).into(), - ty::Int(IntTy::I32) => CPrimitive::Int32(scalar(v)?.to_i32()?).into(), - ty::Int(IntTy::I64) => CPrimitive::Int64(scalar(v)?.to_i64()?).into(), + ty::Int(IntTy::I8) => ScalarArg::Int8(scalar(v)?.to_i8()?).into(), + ty::Int(IntTy::I16) => ScalarArg::Int16(scalar(v)?.to_i16()?).into(), + ty::Int(IntTy::I32) => ScalarArg::Int32(scalar(v)?.to_i32()?).into(), + ty::Int(IntTy::I64) => ScalarArg::Int64(scalar(v)?.to_i64()?).into(), ty::Int(IntTy::Isize) => - CPrimitive::ISize(scalar(v)?.to_target_isize(this)?.try_into().unwrap()).into(), + ScalarArg::ISize(scalar(v)?.to_target_isize(this)?.try_into().unwrap()).into(), // the uints - ty::Uint(UintTy::U8) => CPrimitive::UInt8(scalar(v)?.to_u8()?).into(), - ty::Uint(UintTy::U16) => CPrimitive::UInt16(scalar(v)?.to_u16()?).into(), - ty::Uint(UintTy::U32) => CPrimitive::UInt32(scalar(v)?.to_u32()?).into(), - ty::Uint(UintTy::U64) => CPrimitive::UInt64(scalar(v)?.to_u64()?).into(), + ty::Uint(UintTy::U8) => ScalarArg::UInt8(scalar(v)?.to_u8()?).into(), + ty::Uint(UintTy::U16) => ScalarArg::UInt16(scalar(v)?.to_u16()?).into(), + ty::Uint(UintTy::U32) => ScalarArg::UInt32(scalar(v)?.to_u32()?).into(), + ty::Uint(UintTy::U64) => ScalarArg::UInt64(scalar(v)?.to_u64()?).into(), ty::Uint(UintTy::Usize) => - CPrimitive::USize(scalar(v)?.to_target_usize(this)?.try_into().unwrap()).into(), + ScalarArg::USize(scalar(v)?.to_target_usize(this)?.try_into().unwrap()).into(), ty::RawPtr(..) => { let ptr = scalar(v)?.to_pointer(this)?; - // Pointer without provenance may not access any memory anyway, skip. - if let Some(prov) = ptr.provenance { - // The first time this happens, print a warning. - if !this.machine.native_call_mem_warned.replace(true) { - // Newly set, so first time we get here. - this.emit_diagnostic(NonHaltingDiagnostic::NativeCallSharedMem { tracing }); - } - - this.expose_provenance(prov)?; - }; + this.expose_and_warn(ptr.provenance, tracing)?; // This relies on the `expose_provenance` in the `visit_reachable_allocs` callback // below to expose the actual interpreter-level allocation. - CPrimitive::RawPtr(std::ptr::with_exposed_provenance_mut(ptr.addr().bytes_usize())) + ScalarArg::RawPtr(std::ptr::with_exposed_provenance_mut(ptr.addr().bytes_usize())) .into() } // For ADTs, create an FfiType from their fields. ty::Adt(adt_def, args) => { - let strukt = this.adt_to_ffitype(v.layout.ty, *adt_def, args)?; + let adt = this.adt_to_ffitype(v.layout.ty, *adt_def, args)?; // Copy the raw bytes backing this arg. let bytes = match v.as_mplace_or_imm() { either::Either::Left(mplace) => { - // We do all of this to grab the bytes without actually - // stripping provenance from them, since it'll later be - // exposed recursively. - let ptr = mplace.ptr(); - // Make sure the provenance of this allocation is exposed; - // there must be one for this mplace to be valid at all. - // The interpreter-level allocation itself is exposed in - // visit_reachable_allocs. - this.expose_provenance(ptr.provenance.unwrap())?; - // Then get the actual bytes. + // Get the alloc id corresponding to this mplace, alongside + // a pointer that's offset to point to this particular + // mplace (not one at the base addr of the allocation). + let mplace_ptr = mplace.ptr(); + let sz = mplace.layout.size.bytes_usize(); let id = this .alloc_id_from_addr( - ptr.addr().bytes(), - mplace.layout.size.bytes_usize().try_into().unwrap(), - /* only_exposed_allocations */ true, + mplace_ptr.addr().bytes(), + sz.try_into().unwrap(), + /* only_exposed_allocations */ false, ) .unwrap(); - let ptr_raw = this.get_alloc_bytes_unchecked_raw(id)?; - // SAFETY: We know for sure that at ptr_raw the next layout.size bytes - // are part of this allocation and initialised. They might be marked as - // uninit in Miri, but all bytes returned by `MiriAllocBytes` are + // Expose all provenances in the allocation within the byte + // range of the struct, if any. + let alloc = this.get_alloc_raw(id)?; + let alloc_ptr = this.get_alloc_bytes_unchecked_raw(id)?; + let start_addr = + mplace_ptr.addr().bytes_usize().strict_sub(alloc_ptr.addr()); + for byte in start_addr..start_addr.strict_add(sz) { + if let Some(prov) = alloc.provenance().get(Size::from_bytes(byte), this) + { + this.expose_provenance(prov)?; + } + } + // SAFETY: We know for sure that at mplace_ptr.addr() the next layout.size + // bytes are part of this allocation and initialised. They might be marked + // as uninit in Miri, but all bytes returned by `MiriAllocBytes` are // initialised. unsafe { - std::slice::from_raw_parts(ptr_raw, mplace.layout.size.bytes_usize()) - .to_vec() - .into_boxed_slice() + std::slice::from_raw_parts( + alloc_ptr.with_addr(mplace_ptr.addr().bytes_usize()), + mplace.layout.size.bytes_usize(), + ) + .to_vec() + .into_boxed_slice() } } - either::Either::Right(imm) => { - // For immediates, we know the backing scalar is going to be 128 bits, - // so we can just copy that. - // TODO: is it possible for this to be a scalar pair? - let scalar = imm.to_scalar(); - if scalar.size().bytes() > 0 { - let bits = scalar.to_bits(scalar.size())?; - bits.to_ne_bytes().to_vec().into_boxed_slice() - } else { - throw_ub_format!("attempting to pass a ZST over FFI: {}", imm.layout.ty) - } + either::Either::Right(_) => { + // TODO: support this! + throw_unsup_format!( + "Immediate structs can't be passed over FFI: {}", + v.layout.ty + ) } }; - ffi::CArg::Struct(strukt, bytes) + ffi::OwnedArg::Adt(adt, bytes) } _ => throw_unsup_format!("unsupported argument type for native call: {}", v.layout.ty), }) @@ -372,14 +367,15 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { adt_def: ty::AdtDef<'tcx>, args: &'tcx ty::List>, ) -> InterpResult<'tcx, FfiType> { - // TODO: is this correct? Maybe `repr(transparent)` when the inner field - // is itself `repr(c)` is ok? + // TODO: Certain non-C reprs should be okay also. if !adt_def.repr().c() { - throw_ub_format!("passing a non-#[repr(C)] struct over FFI: {orig_ty}") + throw_unsup_format!("passing a non-#[repr(C)] struct over FFI: {orig_ty}") } // TODO: unions, etc. if !adt_def.is_struct() { - throw_unsup_format!("unsupported argument type for native call: {orig_ty} is an enum or union"); + throw_unsup_format!( + "unsupported argument type for native call: {orig_ty} is an enum or union" + ); } let this = self.eval_context_ref(); @@ -410,6 +406,20 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { _ => throw_unsup_format!("unsupported argument type for native call: {}", ty), }) } + + fn expose_and_warn(&self, prov: Option, tracing: bool) -> InterpResult<'tcx> { + let this = self.eval_context_ref(); + if let Some(prov) = prov { + // The first time this happens, print a warning. + if !this.machine.native_call_mem_warned.replace(true) { + // Newly set, so first time we get here. + this.emit_diagnostic(NonHaltingDiagnostic::NativeCallSharedMem { tracing }); + } + + this.expose_provenance(prov)?; + }; + interp_ok(()) + } } impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {} @@ -439,12 +449,10 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { let tracing = trace::Supervisor::is_enabled(); // Get the function arguments, copy them, and prepare the type descriptions. - let mut libffi_args = Vec::::with_capacity(args.len()); + let mut libffi_args = Vec::::with_capacity(args.len()); for arg in args.iter() { - libffi_args.push(this.op_to_carg(arg, tracing)?); + libffi_args.push(this.op_to_ffi_arg(arg, tracing)?); } - // Convert arguments to a libffi-compatible type. - let libffi_args = libffi_args.iter().map(|arg| arg.arg_downcast()).collect::>(); // Prepare all exposed memory (both previously exposed, and just newly exposed since a // pointer was passed as argument). Uninitialised memory is left as-is, but any data @@ -487,7 +495,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { // Call the function and store output, depending on return type in the function signature. let (ret, maybe_memevents) = - this.call_native_with_args(link_name, dest, code_ptr, libffi_args)?; + this.call_native_with_args(link_name, dest, code_ptr, &libffi_args)?; if tracing { this.tracing_apply_accesses(maybe_memevents.unwrap())?; diff --git a/tests/native-lib/aggregate_arguments.c b/tests/native-lib/aggregate_arguments.c new file mode 100644 index 0000000000..01ba186127 --- /dev/null +++ b/tests/native-lib/aggregate_arguments.c @@ -0,0 +1,42 @@ +#include + +// See comments in build_native_lib() +#define EXPORT __attribute__((visibility("default"))) + +/* Test: test_pass_struct */ + +typedef struct PassMe { + int32_t value; + int16_t other_value; +} PassMe; + +EXPORT int32_t pass_struct(const PassMe pass_me) { + return pass_me.value + pass_me.other_value; +} + +/* Test: test_pass_struct_complex */ + +typedef struct Part1 { + uint16_t high; + uint16_t low; +} Part1; + +typedef struct Part2 { + uint32_t bits; +} Part2; + +typedef struct ComplexStruct { + Part1 part_1; + Part2 part_2; + uint32_t part_3; +} ComplexStruct; + +EXPORT int32_t pass_struct_complex(const ComplexStruct complex, uint16_t high, uint16_t low, uint32_t bits) { + if (complex.part_1.high == high && complex.part_1.low == low + && complex.part_2.bits == bits + && complex.part_3 == bits) + return 0; + else { + return 1; + } +} diff --git a/tests/native-lib/fail/multi_struct_alloc.rs b/tests/native-lib/fail/multi_struct_alloc.rs new file mode 100644 index 0000000000..058beab059 --- /dev/null +++ b/tests/native-lib/fail/multi_struct_alloc.rs @@ -0,0 +1,21 @@ +//@compile-flags: -Zmiri-permissive-provenance + +#[repr(C)] +#[derive(Copy, Clone)] +struct HasPointer { + ptr: *const u8, +} + +extern "C" { + fn access_struct_ptr(s: HasPointer) -> u8; +} + +fn main() { + let vals = [10u8, 20u8]; + let structs = + vec![HasPointer { ptr: &raw const vals[0] }, HasPointer { ptr: &raw const vals[1] }]; + unsafe { + access_struct_ptr(structs[1]); + let _val = *std::ptr::with_exposed_provenance::(structs[0].ptr.addr()); //~ ERROR: Undefined Behavior: attempting a read access using + }; +} diff --git a/tests/native-lib/fail/multi_struct_alloc.stderr b/tests/native-lib/fail/multi_struct_alloc.stderr new file mode 100644 index 0000000000..44656f9ee5 --- /dev/null +++ b/tests/native-lib/fail/multi_struct_alloc.stderr @@ -0,0 +1,15 @@ +error: Undefined Behavior: attempting a read access using at ALLOC[0x0], but no exposed tags have suitable permission in the borrow stack for this location + --> tests/native-lib/fail/multi_struct_alloc.rs:LL:CC + | +LL | ...al = *std::ptr::with_exposed_provenance::(structs[0].ptr.addr()); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ this error occurs as part of an access at ALLOC[0x0..0x1] + | + = help: this indicates a potential bug in the program: it performed an invalid operation, but the Stacked Borrows rules it violated are still experimental + = help: see https://github.com/rust-lang/unsafe-code-guidelines/blob/master/wip/stacked-borrows.md for further information + = note: BACKTRACE: + = note: inside `main` at tests/native-lib/fail/multi_struct_alloc.rs:LL:CC + +note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace + +error: aborting due to 1 previous error + diff --git a/tests/native-lib/fail/struct_not_extern_c.rs b/tests/native-lib/fail/struct_not_extern_c.rs index 2baa40489f..e09930dc75 100644 --- a/tests/native-lib/fail/struct_not_extern_c.rs +++ b/tests/native-lib/fail/struct_not_extern_c.rs @@ -15,5 +15,5 @@ extern "C" { fn main() { let pass_me = PassMe { value: 42, other_value: 1337 }; - unsafe { pass_struct(pass_me) }; //~ ERROR: Undefined Behavior: passing a non-#[repr(C)] struct over FFI + unsafe { pass_struct(pass_me) }; //~ ERROR: unsupported operation: passing a non-#[repr(C)] struct over FFI } diff --git a/tests/native-lib/fail/struct_not_extern_c.stderr b/tests/native-lib/fail/struct_not_extern_c.stderr index eb0d37645c..90e59a31da 100644 --- a/tests/native-lib/fail/struct_not_extern_c.stderr +++ b/tests/native-lib/fail/struct_not_extern_c.stderr @@ -1,11 +1,10 @@ -error: Undefined Behavior: passing a non-#[repr(C)] struct over FFI: PassMe +error: unsupported operation: passing a non-#[repr(C)] struct over FFI: PassMe --> tests/native-lib/fail/struct_not_extern_c.rs:LL:CC | LL | unsafe { pass_struct(pass_me) }; - | ^^^^^^^^^^^^^^^^^^^^ Undefined Behavior occurred here + | ^^^^^^^^^^^^^^^^^^^^ unsupported operation occurred here | - = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior - = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information + = help: this is likely not a bug in the program; it indicates that the program performed an operation that Miri does not support = note: BACKTRACE: = note: inside `main` at tests/native-lib/fail/struct_not_extern_c.rs:LL:CC diff --git a/tests/native-lib/fail/uninit_struct.rs b/tests/native-lib/fail/uninit_struct.rs new file mode 100644 index 0000000000..cf61c7f391 --- /dev/null +++ b/tests/native-lib/fail/uninit_struct.rs @@ -0,0 +1,27 @@ +#[repr(C)] +#[derive(Copy, Clone)] +struct ComplexStruct { + part_1: Part1, + part_2: Part2, + part_3: u32, +} +#[repr(C)] +#[derive(Copy, Clone)] +struct Part1 { + high: u16, + low: u16, +} +#[repr(C)] +#[derive(Copy, Clone)] +struct Part2 { + bits: u32, +} + +extern "C" { + fn pass_struct_complex(s: ComplexStruct, high: u16, low: u16, bits: u32) -> i32; +} + +fn main() { + let arg = std::mem::MaybeUninit::::uninit(); + unsafe { pass_struct_complex(*arg.as_ptr(), 0, 0, 0) }; //~ ERROR: Undefined Behavior: constructing invalid value +} diff --git a/tests/native-lib/fail/uninit_struct.stderr b/tests/native-lib/fail/uninit_struct.stderr new file mode 100644 index 0000000000..0fe6ad9c77 --- /dev/null +++ b/tests/native-lib/fail/uninit_struct.stderr @@ -0,0 +1,15 @@ +error: Undefined Behavior: constructing invalid value at .part_1.high: encountered uninitialized memory, but expected an integer + --> tests/native-lib/fail/uninit_struct.rs:LL:CC + | +LL | unsafe { pass_struct_complex(*arg.as_ptr(), 0, 0, 0) }; + | ^^^^^^^^^^^^^ Undefined Behavior occurred here + | + = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior + = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information + = note: BACKTRACE: + = note: inside `main` at tests/native-lib/fail/uninit_struct.rs:LL:CC + +note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace + +error: aborting due to 1 previous error + diff --git a/tests/native-lib/pass/aggregate_arguments.rs b/tests/native-lib/pass/aggregate_arguments.rs new file mode 100644 index 0000000000..85b046665a --- /dev/null +++ b/tests/native-lib/pass/aggregate_arguments.rs @@ -0,0 +1,51 @@ +fn main() { + test_pass_struct(); + test_pass_struct_complex(); +} + +/// Test passing a basic struct as an argument. +fn test_pass_struct() { + #[repr(C)] + struct PassMe { + value: i32, + other_value: i16, + } + + extern "C" { + fn pass_struct(s: PassMe) -> i32; + } + + let pass_me = PassMe { value: 42, other_value: 1337 }; + assert_eq!(unsafe { pass_struct(pass_me) }, 42 + 1337); +} + +/// Test passing a more complex struct as an argument. +fn test_pass_struct_complex() { + #[repr(C)] + struct ComplexStruct { + part_1: Part1, + part_2: Part2, + part_3: u32, + } + #[repr(C)] + struct Part1 { + high: u16, + low: u16, + } + #[repr(C)] + struct Part2 { + bits: u32, + } + + extern "C" { + fn pass_struct_complex(s: ComplexStruct, high: u16, low: u16, bits: u32) -> i32; + } + + let high = 0xabcd; + let low = 0xef01; + let bits = 0xabcdef01; + + let complex = + ComplexStruct { part_1: Part1 { high, low }, part_2: Part2 { bits }, part_3: bits }; + assert_eq!(unsafe { pass_struct_complex(complex, high, low, bits) }, 0); +} diff --git a/tests/native-lib/pass/ptr_read_access.rs b/tests/native-lib/pass/ptr_read_access.rs index 4f3c37f00c..49750a734d 100644 --- a/tests/native-lib/pass/ptr_read_access.rs +++ b/tests/native-lib/pass/ptr_read_access.rs @@ -1,12 +1,14 @@ //@revisions: trace notrace //@[trace] only-target: x86_64-unknown-linux-gnu i686-unknown-linux-gnu //@[trace] compile-flags: -Zmiri-native-lib-enable-tracing +//@compile-flags: -Zmiri-permissive-provenance fn main() { test_access_pointer(); test_access_simple(); test_access_nested(); test_access_static(); + test_access_struct_ptr(); } /// Test function that dereferences an int pointer and prints its contents from C. @@ -74,3 +76,21 @@ fn test_access_static() { assert_eq!(unsafe { access_static(&STATIC) }, 9001); } + +/// Test exposing provenance from a field within a struct. +fn test_access_struct_ptr() { + #[repr(C)] + struct HasPointer { + ptr: *const u8, + } + + extern "C" { + // Return value exists only so the access isn't optimised away. + fn access_struct_ptr(s: HasPointer) -> u8; + } + + let some_val = 42u8; + let ptr = &raw const some_val; + unsafe { access_struct_ptr(HasPointer { ptr }) }; + assert_eq!(some_val, unsafe { *std::ptr::with_exposed_provenance::(ptr.addr()) }) +} diff --git a/tests/native-lib/pass/scalar_arguments.rs b/tests/native-lib/pass/scalar_arguments.rs index f885fdacad..9e99977a69 100644 --- a/tests/native-lib/pass/scalar_arguments.rs +++ b/tests/native-lib/pass/scalar_arguments.rs @@ -39,54 +39,5 @@ fn main() { // test void function that prints from C printer(); - - test_pass_struct(); - test_pass_struct_complex(); - } -} - -/// Test passing a basic struct as an argument. -fn test_pass_struct() { - #[repr(C)] - struct PassMe { - value: i32, - other_value: i16, } - - extern "C" { - fn pass_struct(s: PassMe) -> i32; - } - - let pass_me = PassMe { value: 42, other_value: 1337 }; - assert_eq!(unsafe { pass_struct(pass_me) }, 42 + 1337); -} - -/// Test passing a more complex struct as an argument. -fn test_pass_struct_complex() { - #[repr(C)] - struct ComplexStruct { - part_1: Part1, - part_2: Part2, - part_3: u32, - } - #[repr(C)] - struct Part1 { - high: u16, - low: u16, - } - #[repr(C)] - struct Part2 { - bits: u32, - } - - extern "C" { - fn pass_struct_complex(s: ComplexStruct) -> i32; - } - - let complex = ComplexStruct { - part_1: Part1 { high: 0xabcd, low: 0xef01 }, - part_2: Part2 { bits: 0xabcdef01 }, - part_3: 0xabcdef01, - }; - assert_eq!(unsafe { pass_struct_complex(complex) }, 0); } diff --git a/tests/native-lib/ptr_read_access.c b/tests/native-lib/ptr_read_access.c index 021eb6adca..2107d2bc21 100644 --- a/tests/native-lib/ptr_read_access.c +++ b/tests/native-lib/ptr_read_access.c @@ -55,3 +55,13 @@ EXPORT int32_t access_static(const Static *s_ptr) { EXPORT uintptr_t do_one_deref(const int32_t ***ptr) { return (uintptr_t)*ptr; } + +/* Test: test_access_struct_ptr */ + +typedef struct HasPointer { + uint8_t *ptr; +} HasPointer; + +EXPORT uint8_t access_struct_ptr(const HasPointer s) { + return *s.ptr; +} diff --git a/tests/native-lib/scalar_arguments.c b/tests/native-lib/scalar_arguments.c index 56f41b133a..8cf38f7441 100644 --- a/tests/native-lib/scalar_arguments.c +++ b/tests/native-lib/scalar_arguments.c @@ -30,43 +30,6 @@ EXPORT int64_t add_short_to_long(int16_t x, int64_t y) { return x + y; } -/* Test: test_pass_struct */ - -typedef struct PassMe { - int32_t value; - int16_t other_value; -} PassMe; - -EXPORT int32_t pass_struct(const PassMe pass_me) { - return pass_me.value + pass_me.other_value; -} - -/* Test: test_pass_struct_complex */ - -typedef struct Part1 { - uint16_t high; - uint16_t low; -} Part1; - -typedef struct Part2 { - uint32_t bits; -} Part2; - -typedef struct ComplexStruct { - Part1 part_1; - Part2 part_2; - uint32_t part_3; -} ComplexStruct; - -EXPORT int32_t pass_struct_complex(const ComplexStruct complex) { - if ((((uint32_t)complex.part_1.high) << 16 | (uint32_t)complex.part_1.low) == complex.part_2.bits - && complex.part_2.bits == complex.part_3) - return 0; - else { - return 1; - } -} - // To test that functions not marked with EXPORT cannot be called by Miri. int32_t not_exported(void) { return 0; diff --git a/tests/ui.rs b/tests/ui.rs index f021d5194c..b7286d9a36 100644 --- a/tests/ui.rs +++ b/tests/ui.rs @@ -60,6 +60,7 @@ fn build_native_lib(target: &str) -> PathBuf { native_lib_path.to_str().unwrap(), // FIXME: Automate gathering of all relevant C source files in the directory. "tests/native-lib/scalar_arguments.c", + "tests/native-lib/aggregate_arguments.c", "tests/native-lib/ptr_read_access.c", "tests/native-lib/ptr_write_access.c", // Ensure we notice serious problems in the C code. From 737875c42763f5dff321852c99468e6234084f0c Mon Sep 17 00:00:00 2001 From: Nia Espera Date: Sun, 24 Aug 2025 19:10:46 +0200 Subject: [PATCH 4/6] a word --- src/shims/native_lib/ffi.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shims/native_lib/ffi.rs b/src/shims/native_lib/ffi.rs index 7991c9a546..1447afcdca 100644 --- a/src/shims/native_lib/ffi.rs +++ b/src/shims/native_lib/ffi.rs @@ -15,7 +15,7 @@ pub unsafe fn call(fun: CodePtr, args: &[OwnedArg]) -> R let cif = Cif::new(arg_tys, R::reify().into_middle()); // SAFETY: Caller upholds that the function is safe to call, and since we // were passed a slice reference we know the `OwnedArg`s won't have been - // by this point. + // dropped by this point. unsafe { cif.call(fun, &arg_ptrs) } } From a4e189b1d30a46beae261db4b5a8c43fe4fab8b6 Mon Sep 17 00:00:00 2001 From: Nia Espera Date: Sun, 24 Aug 2025 19:52:30 +0200 Subject: [PATCH 5/6] use get_range, but seems like structs can be imm now --- src/shims/native_lib/mod.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/shims/native_lib/mod.rs b/src/shims/native_lib/mod.rs index 92e2aa0be7..f9dcb3363d 100644 --- a/src/shims/native_lib/mod.rs +++ b/src/shims/native_lib/mod.rs @@ -326,11 +326,11 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { let alloc_ptr = this.get_alloc_bytes_unchecked_raw(id)?; let start_addr = mplace_ptr.addr().bytes_usize().strict_sub(alloc_ptr.addr()); - for byte in start_addr..start_addr.strict_add(sz) { - if let Some(prov) = alloc.provenance().get(Size::from_bytes(byte), this) - { - this.expose_provenance(prov)?; - } + for prov in alloc + .provenance() + .get_range(this, (start_addr..start_addr.strict_add(sz)).into()) + { + this.expose_provenance(prov)?; } // SAFETY: We know for sure that at mplace_ptr.addr() the next layout.size // bytes are part of this allocation and initialised. They might be marked From 1cb641c0e96bc828fcaeb243378885fa5e8d3fc9 Mon Sep 17 00:00:00 2001 From: Nia Espera Date: Sun, 24 Aug 2025 19:10:31 +0200 Subject: [PATCH 6/6] rework + review --- src/shims/native_lib/ffi.rs | 111 ++--------- src/shims/native_lib/mod.rs | 195 +++++++++++-------- tests/native-lib/aggregate_arguments.c | 4 +- tests/native-lib/fail/struct_not_extern_c.rs | 4 +- tests/native-lib/pass/aggregate_arguments.rs | 4 +- 5 files changed, 141 insertions(+), 177 deletions(-) diff --git a/src/shims/native_lib/ffi.rs b/src/shims/native_lib/ffi.rs index 1447afcdca..b2615cedae 100644 --- a/src/shims/native_lib/ffi.rs +++ b/src/shims/native_lib/ffi.rs @@ -5,11 +5,11 @@ use libffi::middle::{Arg as ArgPtr, Cif, Type as FfiType}; /// /// SAFETY: The safety invariants of the foreign function being called must be /// upheld (if any). -pub unsafe fn call(fun: CodePtr, args: &[OwnedArg]) -> R { +pub unsafe fn call(fun: CodePtr, args: &mut [OwnedArg]) -> R { let mut arg_tys = vec![]; let mut arg_ptrs = vec![]; for arg in args { - arg_tys.push(arg.ty()); + arg_tys.push(arg.take_ty()); arg_ptrs.push(arg.ptr()) } let cif = Cif::new(arg_tys, R::reify().into_middle()); @@ -21,20 +21,23 @@ pub unsafe fn call(fun: CodePtr, args: &[OwnedArg]) -> R /// An argument for an FFI call. #[derive(Debug, Clone)] -pub enum OwnedArg { - /// Primitive type. - Primitive(ScalarArg), - /// ADT with its computed type layout and bytes. - Adt(FfiType, Box<[u8]>), +pub struct OwnedArg { + /// The type descriptor for this argument. + ty: Option, + /// Corresponding bytes for the value. + bytes: Box<[u8]>, } impl OwnedArg { - /// Gets the libffi type descriptor for this argument. - fn ty(&self) -> FfiType { - match self { - OwnedArg::Primitive(scalar_arg) => scalar_arg.ty(), - OwnedArg::Adt(ty, _) => ty.clone(), - } + /// Instantiates an argument from a type descriptor and bytes. + pub fn new(ty: FfiType, bytes: Box<[u8]>) -> Self { + Self { ty: Some(ty), bytes } + } + + /// Gets the libffi type descriptor for this argument. Should only be + /// called once on a given `OwnedArg`. + fn take_ty(&mut self) -> FfiType { + self.ty.take().unwrap() } /// Instantiates a libffi argument pointer pointing to this argument's bytes. @@ -42,82 +45,10 @@ impl OwnedArg { /// it's derived from, it is up to the caller to ensure the `OwnedArg` is /// not dropped before unsafely calling `libffi::middle::Cif::call()`! fn ptr(&self) -> ArgPtr { - match self { - OwnedArg::Primitive(scalar_arg) => scalar_arg.ptr(), - // FIXME: Using `&items[0]` to reference the whole array is definitely - // unsound under SB, but we're waiting on - // https://github.com/libffi-rs/libffi-rs/commit/112a37b3b6ffb35bd75241fbcc580de40ba74a73 - // to land in a release so that we don't need to do this. - OwnedArg::Adt(_, items) => ArgPtr::new(&items[0]), - } - } -} - -impl From for OwnedArg { - fn from(prim: ScalarArg) -> Self { - Self::Primitive(prim) - } -} - -#[derive(Debug, Clone)] -/// Enum of supported scalar arguments to external C functions. -pub enum ScalarArg { - /// 8-bit signed integer. - Int8(i8), - /// 16-bit signed integer. - Int16(i16), - /// 32-bit signed integer. - Int32(i32), - /// 64-bit signed integer. - Int64(i64), - /// isize. - ISize(isize), - /// 8-bit unsigned integer. - UInt8(u8), - /// 16-bit unsigned integer. - UInt16(u16), - /// 32-bit unsigned integer. - UInt32(u32), - /// 64-bit unsigned integer. - UInt64(u64), - /// usize. - USize(usize), - /// Raw pointer, stored as C's `void*`. - RawPtr(*mut std::ffi::c_void), -} - -impl ScalarArg { - /// See `OwnedArg::ty()`. - fn ty(&self) -> FfiType { - match self { - ScalarArg::Int8(_) => FfiType::i8(), - ScalarArg::Int16(_) => FfiType::i16(), - ScalarArg::Int32(_) => FfiType::i32(), - ScalarArg::Int64(_) => FfiType::i64(), - ScalarArg::ISize(_) => FfiType::isize(), - ScalarArg::UInt8(_) => FfiType::u8(), - ScalarArg::UInt16(_) => FfiType::u16(), - ScalarArg::UInt32(_) => FfiType::u32(), - ScalarArg::UInt64(_) => FfiType::u64(), - ScalarArg::USize(_) => FfiType::usize(), - ScalarArg::RawPtr(_) => FfiType::pointer(), - } - } - - /// See `OwnedArg::ptr()`. - fn ptr(&self) -> ArgPtr { - match self { - ScalarArg::Int8(i) => ArgPtr::new(i), - ScalarArg::Int16(i) => ArgPtr::new(i), - ScalarArg::Int32(i) => ArgPtr::new(i), - ScalarArg::Int64(i) => ArgPtr::new(i), - ScalarArg::ISize(i) => ArgPtr::new(i), - ScalarArg::UInt8(i) => ArgPtr::new(i), - ScalarArg::UInt16(i) => ArgPtr::new(i), - ScalarArg::UInt32(i) => ArgPtr::new(i), - ScalarArg::UInt64(i) => ArgPtr::new(i), - ScalarArg::USize(i) => ArgPtr::new(i), - ScalarArg::RawPtr(i) => ArgPtr::new(i), - } + // FIXME: Using `&self.bytes[0]` to reference the whole array is + // definitely unsound under SB, but we're waiting on + // https://github.com/libffi-rs/libffi-rs/commit/112a37b3b6ffb35bd75241fbcc580de40ba74a73 + // to land in a release so that we don't need to do this. + ArgPtr::new(&self.bytes[0]) } } diff --git a/src/shims/native_lib/mod.rs b/src/shims/native_lib/mod.rs index f9dcb3363d..756ff992ae 100644 --- a/src/shims/native_lib/mod.rs +++ b/src/shims/native_lib/mod.rs @@ -1,10 +1,11 @@ //! Implements calling functions from a native library. +use std::io::Write; use std::ops::Deref; use libffi::low::CodePtr; use libffi::middle::Type as FfiType; -use rustc_abi::Size; +use rustc_abi::{HasDataLayout, Size}; use rustc_middle::ty::{self as ty, IntTy, Ty, UintTy}; use rustc_span::Symbol; use serde::{Deserialize, Serialize}; @@ -21,7 +22,7 @@ mod ffi; )] pub mod trace; -use self::ffi::{OwnedArg, ScalarArg}; +use self::ffi::OwnedArg; use crate::*; /// The final results of an FFI trace, containing every relevant event detected @@ -77,7 +78,7 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { link_name: Symbol, dest: &MPlaceTy<'tcx>, ptr: CodePtr, - libffi_args: &[OwnedArg], + libffi_args: &mut [OwnedArg], ) -> InterpResult<'tcx, (crate::ImmTy<'tcx>, Option)> { let this = self.eval_context_mut(); #[cfg(target_os = "linux")] @@ -274,90 +275,122 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { /// and convert it to a `OwnedArg`. fn op_to_ffi_arg(&self, v: &OpTy<'tcx>, tracing: bool) -> InterpResult<'tcx, OwnedArg> { let this = self.eval_context_ref(); - let scalar = |v| interp_ok(this.read_immediate(v)?.to_scalar()); - interp_ok(match v.layout.ty.kind() { - // If the primitive provided can be converted to a type matching the type pattern - // then create a `OwnedArg` of this primitive value with the corresponding `OwnedArg` constructor. - // the ints - ty::Int(IntTy::I8) => ScalarArg::Int8(scalar(v)?.to_i8()?).into(), - ty::Int(IntTy::I16) => ScalarArg::Int16(scalar(v)?.to_i16()?).into(), - ty::Int(IntTy::I32) => ScalarArg::Int32(scalar(v)?.to_i32()?).into(), - ty::Int(IntTy::I64) => ScalarArg::Int64(scalar(v)?.to_i64()?).into(), - ty::Int(IntTy::Isize) => - ScalarArg::ISize(scalar(v)?.to_target_isize(this)?.try_into().unwrap()).into(), - // the uints - ty::Uint(UintTy::U8) => ScalarArg::UInt8(scalar(v)?.to_u8()?).into(), - ty::Uint(UintTy::U16) => ScalarArg::UInt16(scalar(v)?.to_u16()?).into(), - ty::Uint(UintTy::U32) => ScalarArg::UInt32(scalar(v)?.to_u32()?).into(), - ty::Uint(UintTy::U64) => ScalarArg::UInt64(scalar(v)?.to_u64()?).into(), - ty::Uint(UintTy::Usize) => - ScalarArg::USize(scalar(v)?.to_target_usize(this)?.try_into().unwrap()).into(), - ty::RawPtr(..) => { - let ptr = scalar(v)?.to_pointer(this)?; - this.expose_and_warn(ptr.provenance, tracing)?; - - // This relies on the `expose_provenance` in the `visit_reachable_allocs` callback - // below to expose the actual interpreter-level allocation. - ScalarArg::RawPtr(std::ptr::with_exposed_provenance_mut(ptr.addr().bytes_usize())) - .into() + + // This should go first so that we emit unsupported before doing a bunch + // of extra work for types that aren't supported yet. + let ty = this.ty_to_ffitype(v.layout.ty)?; + + // Now grab the bytes of the argument. + let bytes = match v.as_mplace_or_imm() { + either::Either::Left(mplace) => { + // Get the alloc id corresponding to this mplace, alongside + // a pointer that's offset to point to this particular + // mplace (not one at the base addr of the allocation). + let mplace_ptr = mplace.ptr(); + let sz = mplace.layout.size.bytes_usize(); + if sz == 0 { + throw_unsup_format!("Attempting to pass a ZST over FFI"); + } + let (id, ofs, _) = this.ptr_get_alloc_id(mplace_ptr, sz.try_into().unwrap())?; + let ofs = ofs.bytes_usize(); + // Expose all provenances in the allocation within the byte + // range of the struct, if any. + let alloc = this.get_alloc_raw(id)?; + let alloc_ptr = this.get_alloc_bytes_unchecked_raw(id)?; + for prov in alloc.provenance().get_range(this, (ofs..ofs.strict_add(sz)).into()) { + this.expose_provenance(prov)?; + } + // SAFETY: We know for sure that at alloc_ptr + ofs the next layout.size + // bytes are part of this allocation and initialised. They might be marked + // as uninit in Miri, but all bytes returned by `MiriAllocBytes` are + // initialised. + unsafe { + Box::from(std::slice::from_raw_parts( + alloc_ptr.add(ofs), + mplace.layout.size.bytes_usize(), + )) + } } - // For ADTs, create an FfiType from their fields. - ty::Adt(adt_def, args) => { - let adt = this.adt_to_ffitype(v.layout.ty, *adt_def, args)?; - - // Copy the raw bytes backing this arg. - let bytes = match v.as_mplace_or_imm() { - either::Either::Left(mplace) => { - // Get the alloc id corresponding to this mplace, alongside - // a pointer that's offset to point to this particular - // mplace (not one at the base addr of the allocation). - let mplace_ptr = mplace.ptr(); - let sz = mplace.layout.size.bytes_usize(); - let id = this - .alloc_id_from_addr( - mplace_ptr.addr().bytes(), - sz.try_into().unwrap(), - /* only_exposed_allocations */ false, - ) - .unwrap(); - // Expose all provenances in the allocation within the byte - // range of the struct, if any. - let alloc = this.get_alloc_raw(id)?; - let alloc_ptr = this.get_alloc_bytes_unchecked_raw(id)?; - let start_addr = - mplace_ptr.addr().bytes_usize().strict_sub(alloc_ptr.addr()); - for prov in alloc - .provenance() - .get_range(this, (start_addr..start_addr.strict_add(sz)).into()) - { - this.expose_provenance(prov)?; + either::Either::Right(imm) => { + let (first, maybe_second) = imm.to_scalar_and_meta(); + // If a scalar is a pointer, then expose its provenance. + if let interpret::Scalar::Ptr(p, _) = first { + // This relies on the `expose_provenance` in the `visit_reachable_allocs` callback + // below to expose the actual interpreter-level allocation. + this.expose_and_warn(Some(p.provenance), tracing)?; + } + + // Turn the scalar(s) into u128s so we can write their bytes + // into the buffer. + let (sc_int_first, sz_first) = { + let sc = first.to_scalar_int()?; + (sc.to_bits_unchecked(), sc.size().bytes_usize()) + }; + let (sc_int_second, sz_second) = match maybe_second { + MemPlaceMeta::Meta(sc) => { + // Might also be a pointer. + if let interpret::Scalar::Ptr(p, _) = first { + this.expose_and_warn(Some(p.provenance), tracing)?; } - // SAFETY: We know for sure that at mplace_ptr.addr() the next layout.size - // bytes are part of this allocation and initialised. They might be marked - // as uninit in Miri, but all bytes returned by `MiriAllocBytes` are - // initialised. - unsafe { - std::slice::from_raw_parts( - alloc_ptr.with_addr(mplace_ptr.addr().bytes_usize()), - mplace.layout.size.bytes_usize(), - ) - .to_vec() - .into_boxed_slice() + let sc = sc.to_scalar_int()?; + (sc.to_bits_unchecked(), sc.size().bytes_usize()) + } + MemPlaceMeta::None => (0, 0), + }; + let sz = imm.layout.size.bytes_usize(); + // TODO: Is this actually ok? Seems like the only way to figure + // out how the scalars are laid out relative to each other. + let align_second = match imm.layout.backend_repr { + rustc_abi::BackendRepr::Scalar(_) => 1, + rustc_abi::BackendRepr::ScalarPair(_, sc2) => sc2.align(this).bytes_usize(), + _ => unreachable!(), + }; + // How many bytes to skip between scalars if necessary for alignment. + let skip = sz_first.next_multiple_of(align_second).strict_sub(sz_first); + + let mut bytes: Box<[u8]> = (0..sz).map(|_| 0u8).collect(); + + // Copy over the bytes in an endianness-agnostic way. Since each + // scalar may be up to 128 bits and write_target_uint doesn't + // give us an easy way to do multiple writes in a row, we + // adapt its logic for two consecutive writes. + let mut bytes_wr = bytes.as_mut(); + match this.data_layout().endian { + rustc_abi::Endian::Little => { + // Only write as many bytes as specified, not all of the u128. + let wr = bytes_wr.write(&sc_int_first.to_le_bytes()[..sz_first]).unwrap(); + assert_eq!(wr, sz_first); + // If the second scalar is zeroed, it's more efficient to skip it. + if sc_int_second != 0 { + bytes_wr = bytes_wr.split_at_mut(skip).1; + let wr = + bytes_wr.write(&sc_int_second.to_le_bytes()[..sz_second]).unwrap(); + assert_eq!(wr, sz_second); } } - either::Either::Right(_) => { - // TODO: support this! - throw_unsup_format!( - "Immediate structs can't be passed over FFI: {}", - v.layout.ty - ) + rustc_abi::Endian::Big => { + // TODO: My gut says this is wrong, let's see if CI complains. + let wr = bytes_wr + .write(&sc_int_first.to_be_bytes()[16usize.strict_sub(sz_first)..]) + .unwrap(); + assert_eq!(wr, sz_first); + if sc_int_second != 0 { + bytes_wr = bytes_wr.split_at_mut(skip).1; + let wr = bytes_wr + .write( + &sc_int_second.to_be_bytes()[16usize.strict_sub(sz_second)..], + ) + .unwrap(); + assert_eq!(wr, sz_second); + } } - }; + } + // Any remaining bytes are padding, so ignore. - ffi::OwnedArg::Adt(adt, bytes) + bytes } - _ => throw_unsup_format!("unsupported argument type for native call: {}", v.layout.ty), - }) + }; + interp_ok(OwnedArg::new(ty, bytes)) } /// Parses an ADT to construct the matching libffi type. @@ -495,7 +528,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { // Call the function and store output, depending on return type in the function signature. let (ret, maybe_memevents) = - this.call_native_with_args(link_name, dest, code_ptr, &libffi_args)?; + this.call_native_with_args(link_name, dest, code_ptr, &mut libffi_args)?; if tracing { this.tracing_apply_accesses(maybe_memevents.unwrap())?; diff --git a/tests/native-lib/aggregate_arguments.c b/tests/native-lib/aggregate_arguments.c index 01ba186127..9c29485e79 100644 --- a/tests/native-lib/aggregate_arguments.c +++ b/tests/native-lib/aggregate_arguments.c @@ -7,10 +7,10 @@ typedef struct PassMe { int32_t value; - int16_t other_value; + int64_t other_value; } PassMe; -EXPORT int32_t pass_struct(const PassMe pass_me) { +EXPORT int64_t pass_struct(const PassMe pass_me) { return pass_me.value + pass_me.other_value; } diff --git a/tests/native-lib/fail/struct_not_extern_c.rs b/tests/native-lib/fail/struct_not_extern_c.rs index e09930dc75..cf8315e0fd 100644 --- a/tests/native-lib/fail/struct_not_extern_c.rs +++ b/tests/native-lib/fail/struct_not_extern_c.rs @@ -6,11 +6,11 @@ pub struct PassMe { pub value: i32, - pub other_value: i16, + pub other_value: i64, } extern "C" { - fn pass_struct(s: PassMe) -> i32; + fn pass_struct(s: PassMe) -> i64; } fn main() { diff --git a/tests/native-lib/pass/aggregate_arguments.rs b/tests/native-lib/pass/aggregate_arguments.rs index 85b046665a..15137c37d7 100644 --- a/tests/native-lib/pass/aggregate_arguments.rs +++ b/tests/native-lib/pass/aggregate_arguments.rs @@ -8,11 +8,11 @@ fn test_pass_struct() { #[repr(C)] struct PassMe { value: i32, - other_value: i16, + other_value: i64, } extern "C" { - fn pass_struct(s: PassMe) -> i32; + fn pass_struct(s: PassMe) -> i64; } let pass_me = PassMe { value: 42, other_value: 1337 };