From 984b0b05cf1e508659e3fc7e4f88fc58124da985 Mon Sep 17 00:00:00 2001 From: Hadrien Grasland Date: Wed, 18 Sep 2019 12:17:37 +0200 Subject: [PATCH 01/25] Move to a NonNull-based exhume() interface --- src/lib.rs | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 2e617dc..f857aab 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -40,6 +40,7 @@ use std::io::Write; // for bytes.write_all; push_all is unstable and extend is s use std::io::Result as IOResult; use std::marker::PhantomData; use std::num::*; +use std::ptr::NonNull; pub mod abomonated; @@ -124,9 +125,9 @@ pub unsafe fn decode(bytes: &mut [u8]) -> Option<(&T, &mut [u8]) if bytes.len() < mem::size_of::() { None } else { let (split1, split2) = bytes.split_at_mut(mem::size_of::()); - let result: &mut T = mem::transmute(split1.get_unchecked_mut(0)); - if let Some(remaining) = result.exhume(split2) { - Some((result, remaining)) + let result: NonNull = mem::transmute(split1.get_unchecked_mut(0)); + if let Some(remaining) = T::exhume(result, split2) { + Some((&*result.as_ptr(), remaining)) } else { None @@ -165,10 +166,17 @@ pub trait Abomonation { /// reports any failures in writing to `write`. #[inline(always)] unsafe fn entomb(&self, _write: &mut W) -> IOResult<()> { Ok(()) } - /// Recover any information for `&mut self` not evident from its binary representation. + /// Recover any information for `self_` not evident from its binary representation. /// /// Most commonly this populates pointers with valid references into `bytes`. - #[inline(always)] unsafe fn exhume<'a,'b>(&'a mut self, bytes: &'b mut [u8]) -> Option<&'b mut [u8]> { Some(bytes) } + /// + /// Implementors should take note that `self_` is initially in an invalid state, as its inner + /// pointers may be dangling. As Rust references come with a data validity invariant, building + /// references to invalid state is undefined behavior, so one should strive to implement + /// `exhume` using raw pointer operations as much as feasible. + // + // FIXME: Replace self_ with self once Rust has arbitrary self types + #[inline(always)] unsafe fn exhume<'a>(_self_: NonNull, bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { Some(bytes) } /// Reports the number of further bytes required to entomb `self`. #[inline(always)] fn extent(&self) -> usize { 0 } @@ -227,7 +235,7 @@ macro_rules! unsafe_abomonate { $( self.$field.entomb(write)?; )* Ok(()) } - #[inline] unsafe fn exhume<'a,'b>(&'a mut self, mut bytes: &'b mut [u8]) -> Option<&'b mut [u8]> { + #[inline] unsafe fn exhume<'a>(self_: ::std::ptr::NonNull, mut bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { $( let temp = bytes; bytes = self.$field.exhume(temp)?; )* Some(bytes) } @@ -251,7 +259,7 @@ macro_rules! tuple_abomonate { Ok(()) } #[allow(non_snake_case)] - #[inline(always)] unsafe fn exhume<'a,'b>(&'a mut self, mut bytes: &'b mut [u8]) -> Option<&'b mut [u8]> { + #[inline(always)] unsafe fn exhume<'a>(self_: NonNull, mut bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { let ($(ref mut $name,)*) = *self; $( let temp = bytes; bytes = $name.exhume(temp)?; )* Some(bytes) @@ -314,7 +322,7 @@ impl Abomonation for Option { } Ok(()) } - #[inline(always)] unsafe fn exhume<'a, 'b>(&'a mut self, mut bytes: &'b mut[u8]) -> Option<&'b mut [u8]> { + #[inline(always)] unsafe fn exhume<'a>(self_: NonNull, mut bytes: &'a mut[u8]) -> Option<&'a mut [u8]> { if let &mut Some(ref mut inner) = self { let tmp = bytes; bytes = inner.exhume(tmp)?; } @@ -333,7 +341,7 @@ impl Abomonation for Result { }; Ok(()) } - #[inline(always)] unsafe fn exhume<'a, 'b>(&'a mut self, bytes: &'b mut[u8]) -> Option<&'b mut [u8]> { + #[inline(always)] unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut[u8]) -> Option<&'a mut [u8]> { match self { &mut Ok(ref mut inner) => inner.exhume(bytes), &mut Err(ref mut inner) => inner.exhume(bytes), @@ -390,7 +398,7 @@ macro_rules! array_abomonate { Ok(()) } #[inline(always)] - unsafe fn exhume<'a, 'b>(&'a mut self, mut bytes: &'b mut[u8]) -> Option<&'b mut [u8]> { + unsafe fn exhume<'a>(self_: NonNull, mut bytes: &'a mut[u8]) -> Option<&'a mut [u8]> { for element in self { let tmp = bytes; bytes = element.exhume(tmp)?; } @@ -448,7 +456,7 @@ impl Abomonation for String { Ok(()) } #[inline] - unsafe fn exhume<'a,'b>(&'a mut self, bytes: &'b mut [u8]) -> Option<&'b mut [u8]> { + unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { if self.len() > bytes.len() { None } else { let (mine, rest) = bytes.split_at_mut(self.len()); @@ -469,7 +477,7 @@ impl Abomonation for Vec { Ok(()) } #[inline] - unsafe fn exhume<'a,'b>(&'a mut self, bytes: &'b mut [u8]) -> Option<&'b mut [u8]> { + unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { // extract memory from bytes to back our vector let binary_len = self.len() * mem::size_of::(); @@ -503,7 +511,7 @@ impl Abomonation for Box { Ok(()) } #[inline] - unsafe fn exhume<'a,'b>(&'a mut self, bytes: &'b mut [u8]) -> Option<&'b mut [u8]> { + unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { let binary_len = mem::size_of::(); if binary_len > bytes.len() { None } else { From e97de1bd5051adc36bf53f99e34ce967d11a0b28 Mon Sep 17 00:00:00 2001 From: Hadrien Grasland Date: Wed, 18 Sep 2019 15:03:58 +0200 Subject: [PATCH 02/25] Port unsafe_abomonate to NonNull-based interface --- src/lib.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index f857aab..48d9491 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -235,10 +235,17 @@ macro_rules! unsafe_abomonate { $( self.$field.entomb(write)?; )* Ok(()) } + #[inline] unsafe fn exhume<'a>(self_: ::std::ptr::NonNull, mut bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { - $( let temp = bytes; bytes = self.$field.exhume(temp)?; )* + $( + // FIXME: This (briefly) constructs an &mut _ to invalid data, which is UB. + // The proposed &raw mut operator would allow avoiding this. + let field_ptr: ::std::ptr::NonNull<_> = From::from(&mut (*self_.as_ptr()).$field); + bytes = Abomonation::exhume(field_ptr, bytes)?; + )* Some(bytes) } + #[inline] fn extent(&self) -> usize { let mut size = 0; $( size += self.$field.extent(); )* From 5fd495799695bb60f8885cb5a63e280120113c2a Mon Sep 17 00:00:00 2001 From: Hadrien G Date: Wed, 25 Sep 2019 22:53:39 +0200 Subject: [PATCH 03/25] Clarify that the tuple_abomonate macro takes types as input --- src/lib.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 48d9491..1d5c034 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -257,25 +257,25 @@ macro_rules! unsafe_abomonate { // general code for tuples (can't use '0', '1', ... as field identifiers) macro_rules! tuple_abomonate { - ( $($name:ident)+) => ( - impl<$($name: Abomonation),*> Abomonation for ($($name,)*) { + ( $($ty:ident)+) => ( + impl<$($ty: Abomonation),*> Abomonation for ($($ty,)*) { #[allow(non_snake_case)] #[inline(always)] unsafe fn entomb(&self, write: &mut WRITE) -> IOResult<()> { - let ($(ref $name,)*) = *self; - $($name.entomb(write)?;)* + let ($(ref $ty,)*) = *self; + $($ty.entomb(write)?;)* Ok(()) } #[allow(non_snake_case)] #[inline(always)] unsafe fn exhume<'a>(self_: NonNull, mut bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { - let ($(ref mut $name,)*) = *self; - $( let temp = bytes; bytes = $name.exhume(temp)?; )* + let ($(ref mut $ty,)*) = *self; + $( let temp = bytes; bytes = $ty.exhume(temp)?; )* Some(bytes) } #[allow(non_snake_case)] #[inline(always)] fn extent(&self) -> usize { let mut size = 0; - let ($(ref $name,)*) = *self; - $( size += $name.extent(); )* + let ($(ref $ty,)*) = *self; + $( size += $ty.extent(); )* size } } From b182a3877f7b4c80ad6181ecdfdaa3854fe47abe Mon Sep 17 00:00:00 2001 From: Hadrien Grasland Date: Wed, 18 Sep 2019 15:04:35 +0200 Subject: [PATCH 04/25] Port tuple_abomonate to NonNull-based interface --- src/lib.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 1d5c034..252f654 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -265,12 +265,20 @@ macro_rules! tuple_abomonate { $($ty.entomb(write)?;)* Ok(()) } + #[allow(non_snake_case)] #[inline(always)] unsafe fn exhume<'a>(self_: NonNull, mut bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { - let ($(ref mut $ty,)*) = *self; - $( let temp = bytes; bytes = $ty.exhume(temp)?; )* + // FIXME: This (briefly) constructs a "ref mut" to invalid data, which is UB. + // I think avoiding this would require a cleaner way to iterate over tuple fields. + // One possibility would be a C++11-style combination of variadic generics and recursion. + let ($(ref mut $ty,)*) = *self_.as_ptr(); + $( + let field_ptr : NonNull<$ty> = From::from($ty); + bytes = $ty::exhume(field_ptr, bytes)?; + )* Some(bytes) } + #[allow(non_snake_case)] #[inline(always)] fn extent(&self) -> usize { let mut size = 0; From d4a78e4b85312d63f390246f8365d048bcfdb240 Mon Sep 17 00:00:00 2001 From: Hadrien Grasland Date: Wed, 18 Sep 2019 15:04:58 +0200 Subject: [PATCH 05/25] Port Box abomonation to NonNull-based interface --- src/lib.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 252f654..0950f44 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -525,17 +525,20 @@ impl Abomonation for Box { (**self).entomb(bytes)?; Ok(()) } + #[inline] unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { let binary_len = mem::size_of::(); if binary_len > bytes.len() { None } else { let (mine, mut rest) = bytes.split_at_mut(binary_len); - std::ptr::write(self, mem::transmute(mine.as_mut_ptr() as *mut T)); - let temp = rest; rest = (**self).exhume(temp)?; + let box_target : NonNull = NonNull::new_unchecked(mine.as_mut_ptr() as *mut T); + rest = T::exhume(box_target, rest)?; + self_.as_ptr().write(Box::from_raw(box_target.as_ptr())); Some(rest) } } + #[inline] fn extent(&self) -> usize { mem::size_of::() + (&**self).extent() } From 421a486b2f11ebc5804be536b044be191598ae00 Mon Sep 17 00:00:00 2001 From: Hadrien Grasland Date: Wed, 18 Sep 2019 19:43:53 +0200 Subject: [PATCH 06/25] Port Rust enum abomonation to NonNull-based interface --- src/lib.rs | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 0950f44..c681214 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -337,12 +337,17 @@ impl Abomonation for Option { } Ok(()) } + #[inline(always)] unsafe fn exhume<'a>(self_: NonNull, mut bytes: &'a mut[u8]) -> Option<&'a mut [u8]> { - if let &mut Some(ref mut inner) = self { - let tmp = bytes; bytes = inner.exhume(tmp)?; + // FIXME: This (briefly) constructs a "ref mut" to invalid data, which is UB. + // I'm not sure if this can be fully resolved without relying on enum implementation details. + if let Some(ref mut inner) = *self_.as_ptr() { + let inner_ptr : NonNull = From::from(inner); + bytes = T::exhume(inner_ptr, bytes)?; } Some(bytes) } + #[inline] fn extent(&self) -> usize { self.as_ref().map(|inner| inner.extent()).unwrap_or(0) } @@ -356,12 +361,22 @@ impl Abomonation for Result { }; Ok(()) } + #[inline(always)] unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut[u8]) -> Option<&'a mut [u8]> { - match self { - &mut Ok(ref mut inner) => inner.exhume(bytes), - &mut Err(ref mut inner) => inner.exhume(bytes), + // FIXME: This (briefly) constructs a "ref mut" to invalid data, which is UB. + // I'm not sure if this can be fully resolved without relying on enum implementation details. + match *self_.as_ptr() { + Ok(ref mut inner) => { + let inner_ptr : NonNull = From::from(inner); + T::exhume(inner_ptr, bytes) + } + Err(ref mut inner) => { + let inner_ptr : NonNull = From::from(inner); + E::exhume(inner_ptr, bytes) + } } } + #[inline] fn extent(&self) -> usize { match self { &Ok(ref inner) => inner.extent(), From 5ef07a7d460d168e3c1dd12a425fba257043c6b4 Mon Sep 17 00:00:00 2001 From: Hadrien Grasland Date: Thu, 19 Sep 2019 13:48:14 +0200 Subject: [PATCH 07/25] Port slice-like abomonations to NonNull-based interface (and deduplicate them) --- src/lib.rs | 78 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 47 insertions(+), 31 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index c681214..e5750eb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -424,22 +424,16 @@ macro_rules! array_abomonate { impl Abomonation for [T; $size] { #[inline(always)] unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { - for element in self { element.entomb(write)?; } - Ok(()) + entomb_slice(&self[..], write) } + #[inline(always)] - unsafe fn exhume<'a>(self_: NonNull, mut bytes: &'a mut[u8]) -> Option<&'a mut [u8]> { - for element in self { - let tmp = bytes; bytes = element.exhume(tmp)?; - } - Some(bytes) + unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut[u8]) -> Option<&'a mut [u8]> { + exhume_slice(self_.as_ptr() as *mut T, $size, bytes) } + #[inline(always)] fn extent(&self) -> usize { - let mut size = 0; - for element in self { - size += element.extent(); - } - size + slice_extent(&self[..]) } } ) @@ -485,15 +479,20 @@ impl Abomonation for String { write.write_all(self.as_bytes())?; Ok(()) } + #[inline] unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { - if self.len() > bytes.len() { None } + // FIXME: This (briefly) constructs an &String to invalid data, which is UB. + // I'm not sure if this can be fully resolved without relying on String implementation details. + let self_len = self_.as_ref().len(); + if self_len > bytes.len() { None } else { - let (mine, rest) = bytes.split_at_mut(self.len()); - std::ptr::write(self, String::from_raw_parts(mem::transmute(mine.as_ptr()), self.len(), self.len())); + let (mine, rest) = bytes.split_at_mut(self_len); + self_.as_ptr().write(String::from_raw_parts(mine.as_mut_ptr(), self_len, self_len)); Some(rest) } } + #[inline] fn extent(&self) -> usize { self.len() } @@ -503,33 +502,28 @@ impl Abomonation for Vec { #[inline] unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { write.write_all(typed_to_bytes(&self[..]))?; - for element in self.iter() { element.entomb(write)?; } - Ok(()) + entomb_slice(&self[..], write) } + #[inline] unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { - - // extract memory from bytes to back our vector - let binary_len = self.len() * mem::size_of::(); + // FIXME: This (briefly) constructs an &Vec to invalid data, which is UB. + // I'm not sure if this can be fully resolved without relying on Vec implementation details. + let self_len = self_.as_ref().len(); + let binary_len = self_len * mem::size_of::(); if binary_len > bytes.len() { None } else { let (mine, mut rest) = bytes.split_at_mut(binary_len); - let slice = std::slice::from_raw_parts_mut(mine.as_mut_ptr() as *mut T, self.len()); - std::ptr::write(self, Vec::from_raw_parts(slice.as_mut_ptr(), self.len(), self.len())); - for element in self.iter_mut() { - let temp = rest; // temp variable explains lifetimes (mysterious!) - rest = element.exhume(temp)?; - } + let first_ptr = mine.as_mut_ptr() as *mut T; + rest = exhume_slice(first_ptr, self_len, rest)?; + self_.as_ptr().write(Vec::from_raw_parts(first_ptr, self_len, self_len)); Some(rest) } } + #[inline] fn extent(&self) -> usize { - let mut sum = mem::size_of::() * self.len(); - for element in self.iter() { - sum += element.extent(); - } - sum + mem::size_of::() * self.len() + slice_extent(&self[..]) } } @@ -564,6 +558,28 @@ impl Abomonation for Box { std::slice::from_raw_parts(slice.as_ptr() as *const u8, slice.len() * mem::size_of::()) } +// Common subset of "entomb" for all [T]-like types +unsafe fn entomb_slice(slice: &[T], write: &mut W) -> IOResult<()> { + for element in slice { element.entomb(write)?; } + Ok(()) +} + +// Common subset of "exhume" for all [T]-like types +// (I'd gladly take a NonNull<[T]>, but it is too difficult to build raw pointers to slices) +#[inline] +unsafe fn exhume_slice<'a, T: Abomonation>(first_ptr: *mut T, length: usize, mut bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { + for i in 0..length { + let element_ptr: NonNull = NonNull::new_unchecked(first_ptr.add(i)); + bytes = T::exhume(element_ptr, bytes)?; + } + Some(bytes) +} + +// Common subset of "extent" for all [T]-like types +fn slice_extent(slice: &[T]) -> usize { + slice.iter().map(T::extent).sum() +} + mod network { use Abomonation; use std::net::{SocketAddr, SocketAddrV4, SocketAddrV6, IpAddr, Ipv4Addr, Ipv6Addr}; From b23d2dc2acb83438eb3716255c7b27e1097a4dc9 Mon Sep 17 00:00:00 2001 From: Hadrien Grasland Date: Thu, 19 Sep 2019 18:22:33 +0200 Subject: [PATCH 08/25] Require Debug and use assert_eq for improved test failure messages --- tests/tests.rs | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/tests/tests.rs b/tests/tests.rs index 340e776..3ff0dec 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -1,6 +1,7 @@ extern crate abomonation; use abomonation::*; +use std::fmt::Debug; #[test] fn test_array() { _test_pass(vec![[0, 1, 2]; 1024]); } #[test] fn test_nonzero() { _test_pass(vec![[std::num::NonZeroI32::new(1)]; 1024]); } @@ -38,21 +39,21 @@ fn test_phantom_data_for_non_abomonatable_type() { _test_pass(PhantomData::::default()); } -fn _test_pass(record: T) { +fn _test_pass(record: T) { let mut bytes = Vec::new(); unsafe { encode(&record, &mut bytes).unwrap(); } { let (result, rest) = unsafe { decode::(&mut bytes[..]) }.unwrap(); - assert!(&record == result); - assert!(rest.len() == 0); + assert_eq!(&record, result); + assert_eq!(rest.len(), 0); } } -fn _test_fail(record: T) { +fn _test_fail(record: T) { let mut bytes = Vec::new(); unsafe { encode(&record, &mut bytes).unwrap(); } bytes.pop(); - assert!(unsafe { decode::(&mut bytes[..]) }.is_none()); + assert_eq!(unsafe { decode::(&mut bytes[..]) }, None); } fn _test_size(record: T) { @@ -62,7 +63,7 @@ fn _test_size(record: T) { } -#[derive(Eq, PartialEq)] +#[derive(Debug, Eq, PartialEq)] struct MyStruct { a: String, b: u64, @@ -82,8 +83,8 @@ fn test_macro() { // decode a &Vec<(u64, String)> from binary data if let Some((result, rest)) = unsafe { decode::(&mut bytes) } { - assert!(result == &record); - assert!(rest.len() == 0); + assert_eq!(result, &record); + assert_eq!(rest.len(), 0); } } @@ -106,10 +107,10 @@ fn test_multiple_encode_decode() { unsafe { encode(&vec![1,2,3], &mut bytes).unwrap(); } unsafe { encode(&"grawwwwrr".to_owned(), &mut bytes).unwrap(); } - let (t, r) = unsafe { decode::(&mut bytes) }.unwrap(); assert!(*t == 0); - let (t, r) = unsafe { decode::(r) }.unwrap(); assert!(*t == 7); - let (t, r) = unsafe { decode::>(r) }.unwrap(); assert!(*t == vec![1,2,3]); - let (t, _r) = unsafe { decode::(r) }.unwrap(); assert!(*t == "grawwwwrr".to_owned()); + let (t, r) = unsafe { decode::(&mut bytes) }.unwrap(); assert_eq!(*t, 0); + let (t, r) = unsafe { decode::(r) }.unwrap(); assert_eq!(*t, 7); + let (t, r) = unsafe { decode::>(r) }.unwrap(); assert_eq!(*t, vec![1,2,3]); + let (t, _r) = unsafe { decode::(r) }.unwrap(); assert_eq!(*t, "grawwwwrr".to_owned()); } #[test] @@ -125,6 +126,6 @@ fn test_net_types() { unsafe { encode(&socket_addr4, &mut bytes).unwrap(); } unsafe { encode(&socket_addr6, &mut bytes).unwrap(); } - let (t, r) = unsafe { decode::(&mut bytes) }.unwrap(); assert!(*t == socket_addr4); - let (t, _r) = unsafe { decode::(r) }.unwrap(); assert!(*t == socket_addr6); + let (t, r) = unsafe { decode::(&mut bytes) }.unwrap(); assert_eq!(*t, socket_addr4); + let (t, _r) = unsafe { decode::(r) }.unwrap(); assert_eq!(*t, socket_addr6); } From b5f7b71bdd7fdea8fa11c48a09cfdb5901b1a0c4 Mon Sep 17 00:00:00 2001 From: Hadrien Grasland Date: Thu, 19 Sep 2019 18:23:11 +0200 Subject: [PATCH 09/25] Clarify invalid data as a source of UB --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1c8565c..8afb905 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ A mortifying serialization library for Rust Abomonation (spelling intentional) is a serialization library for Rust based on the very simple idea that if someone presents data for serialization it will copy those exact bits, and then follow any pointers and copy those bits, and so on. When deserializing it recovers the exact bits, and then corrects pointers to aim at the serialized forms of the chased data. -**Warning**: Abomonation should not be used on any data you care strongly about, or from any computer you value the data on. The `encode` and `decode` methods do things that may be undefined behavior, and you shouldn't stand for that. Specifically, `encode` exposes padding bytes to `memcpy`, and `decode` doesn't much respect alignment. +**Warning**: Abomonation should not be used on any data you care strongly about, or from any computer you value the data on. The `encode` and `decode` methods do things that may be undefined behavior, and you shouldn't stand for that. Specifically, `encode` exposes padding bytes to `memcpy`, and `decode` doesn't much respect alignment and may need to construct Rust references to invalid data. Please consult the [abomonation documentation](https://frankmcsherry.github.com/abomonation) for more specific information. From e4a4963073b32646169a6cc8637447ea7adf6aa4 Mon Sep 17 00:00:00 2001 From: Hadrien Grasland Date: Thu, 19 Sep 2019 18:23:26 +0200 Subject: [PATCH 10/25] Remove outdated Abomonable notion --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 8afb905..13de90c 100644 --- a/README.md +++ b/README.md @@ -49,7 +49,7 @@ Be warned that these numbers are not *goodput*, but rather the total number of b ## unsafe_abomonate! -Abomonation comes with the `unsafe_abomonate!` macro implementing `Abomonation` for structs which are essentially equivalent to a tuple of other `Abomonable` types. To use the macro, you must put the `#[macro_use]` modifier before `extern crate abomonation;`. +Abomonation comes with the `unsafe_abomonate!` macro implementing `Abomonation` for structs which are essentially equivalent to a tuple of other `Abomonation` types. To use the macro, you must put the `#[macro_use]` modifier before `extern crate abomonation;`. Please note that `unsafe_abomonate!` synthesizes unsafe implementations of `Abomonation`, and it is should be considered unsafe to invoke. @@ -82,4 +82,4 @@ if let Some((result, remaining)) = unsafe { decode::(&mut bytes) } { } ``` -Be warned that implementing `Abomonable` for types can be a giant disaster and is entirely discouraged. +Be warned that implementing `Abomonation` for types can be a giant disaster and is entirely discouraged. From b8629a300d1bc71bb8e5f8f47ec81931fe2ce399 Mon Sep 17 00:00:00 2001 From: Hadrien Grasland Date: Fri, 20 Sep 2019 01:11:12 +0200 Subject: [PATCH 11/25] Make sure that black_box does its job --- benches/bench.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benches/bench.rs b/benches/bench.rs index 9ce8a3c..c106228 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -40,7 +40,7 @@ fn _bench_enc(bencher: &mut Bencher, record: T) { bencher.bytes = bytes.len() as u64; bencher.iter(|| { bytes.clear(); - unsafe { encode(&record, &mut bytes).unwrap(); } + unsafe { encode(&record, &mut bytes).unwrap() } }); } From 48b2b9aa9d64cff1bd25b264fd9a8c092e30b4bb Mon Sep 17 00:00:00 2001 From: Hadrien G Date: Thu, 26 Sep 2019 07:49:52 +0200 Subject: [PATCH 12/25] Remove some strange whitespace --- benches/bench.rs | 2 -- benches/clone.rs | 2 -- benches/recycler.rs | 2 -- 3 files changed, 6 deletions(-) diff --git a/benches/bench.rs b/benches/bench.rs index c106228..4492541 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -31,7 +31,6 @@ use test::Bencher; #[bench] fn vec_u_vn_s_dec(bencher: &mut Bencher) { _bench_dec(bencher, vec![vec![(0u64, vec![(); 1 << 40], format!("grawwwwrr!")); 32]; 32]); } fn _bench_enc(bencher: &mut Bencher, record: T) { - // prepare encoded data for bencher.bytes let mut bytes = Vec::new(); unsafe { encode(&record, &mut bytes).unwrap(); } @@ -45,7 +44,6 @@ fn _bench_enc(bencher: &mut Bencher, record: T) { } fn _bench_dec(bencher: &mut Bencher, record: T) { - // prepare encoded data let mut bytes = Vec::new(); unsafe { encode(&record, &mut bytes).unwrap(); } diff --git a/benches/clone.rs b/benches/clone.rs index a040895..a5f5b2e 100644 --- a/benches/clone.rs +++ b/benches/clone.rs @@ -28,7 +28,6 @@ use test::Bencher; #[bench] fn vec_u_vn_s_cln(bencher: &mut Bencher) { _bench_cln(bencher, vec![vec![(0u64, vec![(); 1 << 40], format!("grawwwwrr!")); 32]; 32]); } fn _bench_e_d(bencher: &mut Bencher, record: T) { - // prepare encoded data for bencher.bytes let mut bytes = Vec::new(); unsafe { encode(&record, &mut bytes).unwrap(); } @@ -43,7 +42,6 @@ fn _bench_e_d(bencher: &mut Bencher, record: T) { } fn _bench_cln(bencher: &mut Bencher, record: T) { - // prepare encoded data let mut bytes = Vec::new(); unsafe { encode(&record, &mut bytes).unwrap(); } diff --git a/benches/recycler.rs b/benches/recycler.rs index cc9ae94..96391b5 100644 --- a/benches/recycler.rs +++ b/benches/recycler.rs @@ -28,7 +28,6 @@ use test::Bencher; // #[bench] fn vec_u_vn_s_rec(bencher: &mut Bencher) { _bench_rec(bencher, vec![vec![(0u64, vec![(); 1 << 40], format!("grawwwwrr!")); 32]; 32]); } fn _bench_own(bencher: &mut Bencher, record: T) { - // prepare encoded data let mut bytes = Vec::new(); unsafe { encode(&record, &mut bytes).unwrap(); } @@ -43,7 +42,6 @@ fn _bench_own(bencher: &mut Bencher, record: T) { fn _bench_rec(bencher: &mut Bencher, record: T) { - // prepare encoded data let mut bytes = Vec::new(); unsafe { encode(&record, &mut bytes).unwrap(); } From 7388af5ab46724ef68398d79e5206cd08c33be81 Mon Sep 17 00:00:00 2001 From: Hadrien G Date: Thu, 26 Sep 2019 09:39:22 +0200 Subject: [PATCH 13/25] Snipe some unnecessary usage of Ok(()) --- src/lib.rs | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index e5750eb..3c1c817 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -75,8 +75,7 @@ pub mod abomonated; pub unsafe fn encode(typed: &T, write: &mut W) -> IOResult<()> { let slice = std::slice::from_raw_parts(mem::transmute(typed), mem::size_of::()); write.write_all(slice)?; - typed.entomb(write)?; - Ok(()) + typed.entomb(write) } /// Decodes a mutable binary slice into an immutable typed reference. @@ -262,7 +261,7 @@ macro_rules! tuple_abomonate { #[allow(non_snake_case)] #[inline(always)] unsafe fn entomb(&self, write: &mut WRITE) -> IOResult<()> { let ($(ref $ty,)*) = *self; - $($ty.entomb(write)?;)* + $( $ty.entomb(write)?; )* Ok(()) } @@ -356,10 +355,9 @@ impl Abomonation for Option { impl Abomonation for Result { #[inline(always)] unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { match self { - &Ok(ref inner) => inner.entomb(write)?, - &Err(ref inner) => inner.entomb(write)?, - }; - Ok(()) + &Ok(ref inner) => inner.entomb(write), + &Err(ref inner) => inner.entomb(write), + } } #[inline(always)] unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut[u8]) -> Option<&'a mut [u8]> { @@ -476,8 +474,7 @@ array_abomonate!(32); impl Abomonation for String { #[inline] unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { - write.write_all(self.as_bytes())?; - Ok(()) + write.write_all(self.as_bytes()) } #[inline] @@ -531,8 +528,7 @@ impl Abomonation for Box { #[inline] unsafe fn entomb(&self, bytes: &mut W) -> IOResult<()> { bytes.write_all(std::slice::from_raw_parts(mem::transmute(&**self), mem::size_of::()))?; - (**self).entomb(bytes)?; - Ok(()) + (**self).entomb(bytes) } #[inline] From 3f847833809249459a65b57b143b2b08776bce70 Mon Sep 17 00:00:00 2001 From: Hadrien Grasland Date: Fri, 20 Sep 2019 01:11:30 +0200 Subject: [PATCH 14/25] Don't use manual inlining where benchmarks don't show a benefit --- src/lib.rs | 50 ++++++++++++++++++++------------------------------ 1 file changed, 20 insertions(+), 30 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 3c1c817..dc1a5e6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -119,7 +119,6 @@ pub unsafe fn encode(typed: &T, write: &mut W) -> IORe /// assert!(remaining.len() == 0); /// } /// ``` -#[inline] pub unsafe fn decode(bytes: &mut [u8]) -> Option<(&T, &mut [u8])> { if bytes.len() < mem::size_of::() { None } else { @@ -139,7 +138,6 @@ pub unsafe fn decode(bytes: &mut [u8]) -> Option<(&T, &mut [u8]) /// # Safety /// /// The `measure` method is safe. It neither produces nor consults serialized representations. -#[inline] pub fn measure(typed: &T) -> usize { mem::size_of::() + typed.extent() } @@ -163,7 +161,7 @@ pub trait Abomonation { /// /// Most commonly this is owned data on the other end of pointers in `&self`. The return value /// reports any failures in writing to `write`. - #[inline(always)] unsafe fn entomb(&self, _write: &mut W) -> IOResult<()> { Ok(()) } + unsafe fn entomb(&self, _write: &mut W) -> IOResult<()> { Ok(()) } /// Recover any information for `self_` not evident from its binary representation. /// @@ -175,10 +173,10 @@ pub trait Abomonation { /// `exhume` using raw pointer operations as much as feasible. // // FIXME: Replace self_ with self once Rust has arbitrary self types - #[inline(always)] unsafe fn exhume<'a>(_self_: NonNull, bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { Some(bytes) } + unsafe fn exhume<'a>(_self_: NonNull, bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { Some(bytes) } /// Reports the number of further bytes required to entomb `self`. - #[inline(always)] fn extent(&self) -> usize { 0 } + fn extent(&self) -> usize { 0 } } /// The `unsafe_abomonate!` macro takes a type name with an optional list of fields, and implements @@ -230,12 +228,12 @@ macro_rules! unsafe_abomonate { }; ($t:ty : $($field:ident),*) => { impl Abomonation for $t { - #[inline] unsafe fn entomb(&self, write: &mut W) -> ::std::io::Result<()> { + unsafe fn entomb(&self, write: &mut W) -> ::std::io::Result<()> { $( self.$field.entomb(write)?; )* Ok(()) } - #[inline] unsafe fn exhume<'a>(self_: ::std::ptr::NonNull, mut bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { + unsafe fn exhume<'a>(self_: ::std::ptr::NonNull, mut bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { $( // FIXME: This (briefly) constructs an &mut _ to invalid data, which is UB. // The proposed &raw mut operator would allow avoiding this. @@ -245,7 +243,7 @@ macro_rules! unsafe_abomonate { Some(bytes) } - #[inline] fn extent(&self) -> usize { + fn extent(&self) -> usize { let mut size = 0; $( size += self.$field.extent(); )* size @@ -259,14 +257,14 @@ macro_rules! tuple_abomonate { ( $($ty:ident)+) => ( impl<$($ty: Abomonation),*> Abomonation for ($($ty,)*) { #[allow(non_snake_case)] - #[inline(always)] unsafe fn entomb(&self, write: &mut WRITE) -> IOResult<()> { + unsafe fn entomb(&self, write: &mut WRITE) -> IOResult<()> { let ($(ref $ty,)*) = *self; $( $ty.entomb(write)?; )* Ok(()) } #[allow(non_snake_case)] - #[inline(always)] unsafe fn exhume<'a>(self_: NonNull, mut bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { + unsafe fn exhume<'a>(self_: NonNull, mut bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { // FIXME: This (briefly) constructs a "ref mut" to invalid data, which is UB. // I think avoiding this would require a cleaner way to iterate over tuple fields. // One possibility would be a C++11-style combination of variadic generics and recursion. @@ -279,7 +277,7 @@ macro_rules! tuple_abomonate { } #[allow(non_snake_case)] - #[inline(always)] fn extent(&self) -> usize { + fn extent(&self) -> usize { let mut size = 0; let ($(ref $ty,)*) = *self; $( size += $ty.extent(); )* @@ -330,14 +328,14 @@ impl Abomonation for ::std::time::Duration { } impl Abomonation for PhantomData {} impl Abomonation for Option { - #[inline(always)] unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { + unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { if let &Some(ref inner) = self { inner.entomb(write)?; } Ok(()) } - #[inline(always)] unsafe fn exhume<'a>(self_: NonNull, mut bytes: &'a mut[u8]) -> Option<&'a mut [u8]> { + unsafe fn exhume<'a>(self_: NonNull, mut bytes: &'a mut[u8]) -> Option<&'a mut [u8]> { // FIXME: This (briefly) constructs a "ref mut" to invalid data, which is UB. // I'm not sure if this can be fully resolved without relying on enum implementation details. if let Some(ref mut inner) = *self_.as_ptr() { @@ -347,20 +345,20 @@ impl Abomonation for Option { Some(bytes) } - #[inline] fn extent(&self) -> usize { + fn extent(&self) -> usize { self.as_ref().map(|inner| inner.extent()).unwrap_or(0) } } impl Abomonation for Result { - #[inline(always)] unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { + unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { match self { &Ok(ref inner) => inner.entomb(write), &Err(ref inner) => inner.entomb(write), } } - #[inline(always)] unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut[u8]) -> Option<&'a mut [u8]> { + unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut[u8]) -> Option<&'a mut [u8]> { // FIXME: This (briefly) constructs a "ref mut" to invalid data, which is UB. // I'm not sure if this can be fully resolved without relying on enum implementation details. match *self_.as_ptr() { @@ -375,7 +373,7 @@ impl Abomonation for Result { } } - #[inline] fn extent(&self) -> usize { + fn extent(&self) -> usize { match self { &Ok(ref inner) => inner.extent(), &Err(ref inner) => inner.extent(), @@ -416,21 +414,18 @@ tuple_abomonate!(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z AA AB AC AD tuple_abomonate!(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z AA AB AC AD AE); tuple_abomonate!(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z AA AB AC AD AE AF); - macro_rules! array_abomonate { ($size:expr) => ( impl Abomonation for [T; $size] { - #[inline(always)] unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { entomb_slice(&self[..], write) } - #[inline(always)] unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut[u8]) -> Option<&'a mut [u8]> { exhume_slice(self_.as_ptr() as *mut T, $size, bytes) } - #[inline(always)] fn extent(&self) -> usize { + fn extent(&self) -> usize { slice_extent(&self[..]) } } @@ -472,7 +467,6 @@ array_abomonate!(31); array_abomonate!(32); impl Abomonation for String { - #[inline] unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { write.write_all(self.as_bytes()) } @@ -490,13 +484,12 @@ impl Abomonation for String { } } - #[inline] fn extent(&self) -> usize { + fn extent(&self) -> usize { self.len() } } impl Abomonation for Vec { - #[inline] unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { write.write_all(typed_to_bytes(&self[..]))?; entomb_slice(&self[..], write) @@ -518,20 +511,17 @@ impl Abomonation for Vec { } } - #[inline] fn extent(&self) -> usize { mem::size_of::() * self.len() + slice_extent(&self[..]) } } impl Abomonation for Box { - #[inline] unsafe fn entomb(&self, bytes: &mut W) -> IOResult<()> { bytes.write_all(std::slice::from_raw_parts(mem::transmute(&**self), mem::size_of::()))?; (**self).entomb(bytes) } - #[inline] unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { let binary_len = mem::size_of::(); if binary_len > bytes.len() { None } @@ -544,13 +534,13 @@ impl Abomonation for Box { } } - #[inline] fn extent(&self) -> usize { + fn extent(&self) -> usize { mem::size_of::() + (&**self).extent() } } // This method currently enables undefined behavior, by exposing padding bytes. -#[inline] unsafe fn typed_to_bytes(slice: &[T]) -> &[u8] { +unsafe fn typed_to_bytes(slice: &[T]) -> &[u8] { std::slice::from_raw_parts(slice.as_ptr() as *const u8, slice.len() * mem::size_of::()) } @@ -587,4 +577,4 @@ mod network { impl Abomonation for SocketAddr { } impl Abomonation for SocketAddrV4 { } impl Abomonation for SocketAddrV6 { } -} \ No newline at end of file +} From cb8e2af5eb9810e4eda4156eecf33cac2d998f7b Mon Sep 17 00:00:00 2001 From: Hadrien Grasland Date: Fri, 20 Sep 2019 01:38:36 +0200 Subject: [PATCH 15/25] Add inline(always) where it matters in benchmarks --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index dc1a5e6..c0aa555 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -71,7 +71,7 @@ pub mod abomonated; /// } /// ``` /// -#[inline] +#[inline(always)] pub unsafe fn encode(typed: &T, write: &mut W) -> IOResult<()> { let slice = std::slice::from_raw_parts(mem::transmute(typed), mem::size_of::()); write.write_all(slice)?; From 17a03542b2c0b64d67e8859e6ac0543695fa2bc3 Mon Sep 17 00:00:00 2001 From: Hadrien Grasland Date: Fri, 20 Sep 2019 01:56:57 +0200 Subject: [PATCH 16/25] Avoid multiple codegen units in benchmarks --- Cargo.toml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 7c2f62d..9ee1ca1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,3 +14,9 @@ license = "MIT" [dev-dependencies] recycler="0.1.4" + +[profile.bench] +# Multiple codegen units speed up compilation, but make compilation output less +# deteministic and generally decrease codegen quality through worse inlining. +# Let's turn it off for benchmarking. +codegen-units = 1 From ee283b030f44b98a55bf4514fba4e26f125c988e Mon Sep 17 00:00:00 2001 From: Hadrien Grasland Date: Fri, 20 Sep 2019 11:24:56 +0200 Subject: [PATCH 17/25] Improve Abomonation documentation and make the trait unsafe By its very nature, Abomonation does very unsafe (and UB-ish) things. But we should strive to explain these as well as we can in the current state of the unsafe Rust formalization effort in order to reduce the potential for known misuse. --- src/lib.rs | 114 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 64 insertions(+), 50 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index c0aa555..a01bac7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -149,14 +149,28 @@ pub fn measure(typed: &T) -> usize { /// /// # Safety /// -/// Abomonation has no safe methods. Please do not call them. They should be called only by +/// `entomb` and `exhume` are not meant to be called directly. They should be called only by /// `encode` and `decode`, each of which impose restrictions on ownership and lifetime of the data -/// they take as input and return as output. +/// they take as input and return as output, thus improving safety. +/// +/// Not all Rust types are abomonable, and one should think carefully about the implications of +/// implementing `Abomonation` for a type. To lend itself to this exercise, a type must... +/// +/// - Provide exhaustive access to its internal representation +/// - Allow reconstruction from said representation +/// - Survive having its heap allocations being silently replaced by inline pointers to +/// the same storage block, as long as only a shared reference is observed. +/// +/// The last point is the reason why `Abomonation` only provides a shared reference to the +/// reconstructed object. Without this, it would be trivial to trigger, say, a `Box` destructor +/// that tries to call `free()` on the inner pointer. But the use of a shared reference only +/// provides minimal sanity, and for types with internal mutability (those with an `UnsafeCell` +/// inside), this precaution is insufficient. `Abomonation` is generally not safely implementable +/// for such types, but may work in particular cases like atomics. /// /// If you are concerned about safety, it may be best to avoid Abomonation all together. It does /// several things that may be undefined behavior, depending on how undefined behavior is defined. -pub trait Abomonation { - +pub unsafe trait Abomonation { /// Write any additional information about `&self` beyond its binary representation. /// /// Most commonly this is owned data on the other end of pointers in `&self`. The return value @@ -224,10 +238,10 @@ pub trait Abomonation { #[deprecated(since="0.5", note="please use the abomonation_derive crate")] macro_rules! unsafe_abomonate { ($t:ty) => { - impl Abomonation for $t { } + unsafe impl Abomonation for $t { } }; ($t:ty : $($field:ident),*) => { - impl Abomonation for $t { + unsafe impl Abomonation for $t { unsafe fn entomb(&self, write: &mut W) -> ::std::io::Result<()> { $( self.$field.entomb(write)?; )* Ok(()) @@ -255,7 +269,7 @@ macro_rules! unsafe_abomonate { // general code for tuples (can't use '0', '1', ... as field identifiers) macro_rules! tuple_abomonate { ( $($ty:ident)+) => ( - impl<$($ty: Abomonation),*> Abomonation for ($($ty,)*) { + unsafe impl<$($ty: Abomonation),*> Abomonation for ($($ty,)*) { #[allow(non_snake_case)] unsafe fn entomb(&self, write: &mut WRITE) -> IOResult<()> { let ($(ref $ty,)*) = *self; @@ -287,47 +301,47 @@ macro_rules! tuple_abomonate { ); } -impl Abomonation for u8 { } -impl Abomonation for u16 { } -impl Abomonation for u32 { } -impl Abomonation for u64 { } -impl Abomonation for u128 { } -impl Abomonation for usize { } +unsafe impl Abomonation for u8 { } +unsafe impl Abomonation for u16 { } +unsafe impl Abomonation for u32 { } +unsafe impl Abomonation for u64 { } +unsafe impl Abomonation for u128 { } +unsafe impl Abomonation for usize { } -impl Abomonation for i8 { } -impl Abomonation for i16 { } -impl Abomonation for i32 { } -impl Abomonation for i64 { } -impl Abomonation for i128 { } -impl Abomonation for isize { } +unsafe impl Abomonation for i8 { } +unsafe impl Abomonation for i16 { } +unsafe impl Abomonation for i32 { } +unsafe impl Abomonation for i64 { } +unsafe impl Abomonation for i128 { } +unsafe impl Abomonation for isize { } -impl Abomonation for NonZeroU8 { } -impl Abomonation for NonZeroU16 { } -impl Abomonation for NonZeroU32 { } -impl Abomonation for NonZeroU64 { } -impl Abomonation for NonZeroU128 { } -impl Abomonation for NonZeroUsize { } +unsafe impl Abomonation for NonZeroU8 { } +unsafe impl Abomonation for NonZeroU16 { } +unsafe impl Abomonation for NonZeroU32 { } +unsafe impl Abomonation for NonZeroU64 { } +unsafe impl Abomonation for NonZeroU128 { } +unsafe impl Abomonation for NonZeroUsize { } -impl Abomonation for NonZeroI8 { } -impl Abomonation for NonZeroI16 { } -impl Abomonation for NonZeroI32 { } -impl Abomonation for NonZeroI64 { } -impl Abomonation for NonZeroI128 { } -impl Abomonation for NonZeroIsize { } +unsafe impl Abomonation for NonZeroI8 { } +unsafe impl Abomonation for NonZeroI16 { } +unsafe impl Abomonation for NonZeroI32 { } +unsafe impl Abomonation for NonZeroI64 { } +unsafe impl Abomonation for NonZeroI128 { } +unsafe impl Abomonation for NonZeroIsize { } -impl Abomonation for f32 { } -impl Abomonation for f64 { } +unsafe impl Abomonation for f32 { } +unsafe impl Abomonation for f64 { } -impl Abomonation for bool { } -impl Abomonation for () { } +unsafe impl Abomonation for bool { } +unsafe impl Abomonation for () { } -impl Abomonation for char { } +unsafe impl Abomonation for char { } -impl Abomonation for ::std::time::Duration { } +unsafe impl Abomonation for ::std::time::Duration { } -impl Abomonation for PhantomData {} +unsafe impl Abomonation for PhantomData {} -impl Abomonation for Option { +unsafe impl Abomonation for Option { unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { if let &Some(ref inner) = self { inner.entomb(write)?; @@ -350,7 +364,7 @@ impl Abomonation for Option { } } -impl Abomonation for Result { +unsafe impl Abomonation for Result { unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { match self { &Ok(ref inner) => inner.entomb(write), @@ -416,7 +430,7 @@ tuple_abomonate!(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z AA AB AC AD macro_rules! array_abomonate { ($size:expr) => ( - impl Abomonation for [T; $size] { + unsafe impl Abomonation for [T; $size] { unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { entomb_slice(&self[..], write) } @@ -466,7 +480,7 @@ array_abomonate!(30); array_abomonate!(31); array_abomonate!(32); -impl Abomonation for String { +unsafe impl Abomonation for String { unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { write.write_all(self.as_bytes()) } @@ -489,7 +503,7 @@ impl Abomonation for String { } } -impl Abomonation for Vec { +unsafe impl Abomonation for Vec { unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { write.write_all(typed_to_bytes(&self[..]))?; entomb_slice(&self[..], write) @@ -516,7 +530,7 @@ impl Abomonation for Vec { } } -impl Abomonation for Box { +unsafe impl Abomonation for Box { unsafe fn entomb(&self, bytes: &mut W) -> IOResult<()> { bytes.write_all(std::slice::from_raw_parts(mem::transmute(&**self), mem::size_of::()))?; (**self).entomb(bytes) @@ -570,11 +584,11 @@ mod network { use Abomonation; use std::net::{SocketAddr, SocketAddrV4, SocketAddrV6, IpAddr, Ipv4Addr, Ipv6Addr}; - impl Abomonation for IpAddr { } - impl Abomonation for Ipv4Addr { } - impl Abomonation for Ipv6Addr { } + unsafe impl Abomonation for IpAddr { } + unsafe impl Abomonation for Ipv4Addr { } + unsafe impl Abomonation for Ipv6Addr { } - impl Abomonation for SocketAddr { } - impl Abomonation for SocketAddrV4 { } - impl Abomonation for SocketAddrV6 { } + unsafe impl Abomonation for SocketAddr { } + unsafe impl Abomonation for SocketAddrV4 { } + unsafe impl Abomonation for SocketAddrV6 { } } From 6c12b2e05c5325e88335f7e90fe119a26f8da0fa Mon Sep 17 00:00:00 2001 From: Hadrien Grasland Date: Fri, 20 Sep 2019 11:58:05 +0200 Subject: [PATCH 18/25] Take writer by value --- src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index a01bac7..aba9051 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -72,10 +72,10 @@ pub mod abomonated; /// ``` /// #[inline(always)] -pub unsafe fn encode(typed: &T, write: &mut W) -> IOResult<()> { +pub unsafe fn encode(typed: &T, mut write: W) -> IOResult<()> { let slice = std::slice::from_raw_parts(mem::transmute(typed), mem::size_of::()); write.write_all(slice)?; - typed.entomb(write) + typed.entomb(&mut write) } /// Decodes a mutable binary slice into an immutable typed reference. From a4377f464121e1b45bb8879e970826baa9e5b0ab Mon Sep 17 00:00:00 2001 From: Hadrien G Date: Wed, 25 Sep 2019 22:34:32 +0200 Subject: [PATCH 19/25] Do not rely on autoderef to select the right Abomonation impl --- src/lib.rs | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index aba9051..6611e3d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -75,7 +75,7 @@ pub mod abomonated; pub unsafe fn encode(typed: &T, mut write: W) -> IOResult<()> { let slice = std::slice::from_raw_parts(mem::transmute(typed), mem::size_of::()); write.write_all(slice)?; - typed.entomb(&mut write) + T::entomb(typed, &mut write) } /// Decodes a mutable binary slice into an immutable typed reference. @@ -139,7 +139,7 @@ pub unsafe fn decode(bytes: &mut [u8]) -> Option<(&T, &mut [u8]) /// /// The `measure` method is safe. It neither produces nor consults serialized representations. pub fn measure(typed: &T) -> usize { - mem::size_of::() + typed.extent() + mem::size_of::() + T::extent(&typed) } /// Abomonation provides methods to serialize any heap data the implementor owns. @@ -243,7 +243,7 @@ macro_rules! unsafe_abomonate { ($t:ty : $($field:ident),*) => { unsafe impl Abomonation for $t { unsafe fn entomb(&self, write: &mut W) -> ::std::io::Result<()> { - $( self.$field.entomb(write)?; )* + $( Abomonation::entomb(&self.$field, write)?; )* Ok(()) } @@ -259,7 +259,7 @@ macro_rules! unsafe_abomonate { fn extent(&self) -> usize { let mut size = 0; - $( size += self.$field.extent(); )* + $( size += Abomonation::extent(&self.$field); )* size } } @@ -273,7 +273,7 @@ macro_rules! tuple_abomonate { #[allow(non_snake_case)] unsafe fn entomb(&self, write: &mut WRITE) -> IOResult<()> { let ($(ref $ty,)*) = *self; - $( $ty.entomb(write)?; )* + $( $ty::entomb($ty, write)?; )* Ok(()) } @@ -294,7 +294,7 @@ macro_rules! tuple_abomonate { fn extent(&self) -> usize { let mut size = 0; let ($(ref $ty,)*) = *self; - $( size += $ty.extent(); )* + $( size += $ty::extent($ty); )* size } } @@ -344,7 +344,7 @@ unsafe impl Abomonation for PhantomData {} unsafe impl Abomonation for Option { unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { if let &Some(ref inner) = self { - inner.entomb(write)?; + T::entomb(inner, write)?; } Ok(()) } @@ -360,15 +360,15 @@ unsafe impl Abomonation for Option { } fn extent(&self) -> usize { - self.as_ref().map(|inner| inner.extent()).unwrap_or(0) + self.as_ref().map(T::extent).unwrap_or(0) } } unsafe impl Abomonation for Result { unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { match self { - &Ok(ref inner) => inner.entomb(write), - &Err(ref inner) => inner.entomb(write), + &Ok(ref inner) => T::entomb(inner, write), + &Err(ref inner) => E::entomb(inner, write), } } @@ -389,8 +389,8 @@ unsafe impl Abomonation for Result { fn extent(&self) -> usize { match self { - &Ok(ref inner) => inner.extent(), - &Err(ref inner) => inner.extent(), + &Ok(ref inner) => T::extent(inner), + &Err(ref inner) => E::extent(inner), } } } @@ -533,7 +533,7 @@ unsafe impl Abomonation for Vec { unsafe impl Abomonation for Box { unsafe fn entomb(&self, bytes: &mut W) -> IOResult<()> { bytes.write_all(std::slice::from_raw_parts(mem::transmute(&**self), mem::size_of::()))?; - (**self).entomb(bytes) + T::entomb(&**self, bytes) } unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { @@ -549,7 +549,7 @@ unsafe impl Abomonation for Box { } fn extent(&self) -> usize { - mem::size_of::() + (&**self).extent() + mem::size_of::() + T::extent(&**self) } } @@ -560,7 +560,7 @@ unsafe fn typed_to_bytes(slice: &[T]) -> &[u8] { // Common subset of "entomb" for all [T]-like types unsafe fn entomb_slice(slice: &[T], write: &mut W) -> IOResult<()> { - for element in slice { element.entomb(write)?; } + for element in slice { T::entomb(element, write)?; } Ok(()) } From cda9ebb1479c7eed45ef5e2d9cdb6d0f7bf4f284 Mon Sep 17 00:00:00 2001 From: Hadrien G Date: Thu, 26 Sep 2019 00:01:22 +0200 Subject: [PATCH 20/25] Make Box::entomb simpler and more consistent with others --- src/lib.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 6611e3d..aacd8f1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -531,9 +531,9 @@ unsafe impl Abomonation for Vec { } unsafe impl Abomonation for Box { - unsafe fn entomb(&self, bytes: &mut W) -> IOResult<()> { - bytes.write_all(std::slice::from_raw_parts(mem::transmute(&**self), mem::size_of::()))?; - T::entomb(&**self, bytes) + unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { + write.write_all(typed_to_bytes(std::slice::from_ref(&**self)))?; + T::entomb(&**self, write) } unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { From e63516bcaac2ced8ac31fd24c973d2d036586908 Mon Sep 17 00:00:00 2001 From: Hadrien G Date: Sun, 29 Sep 2019 16:58:34 +0200 Subject: [PATCH 21/25] Add basic support for types which contain references --- benches/bench.rs | 8 +- benches/clone.rs | 8 +- benches/recycler.rs | 8 +- src/abomonated.rs | 93 +++++++++---- src/lib.rs | 330 ++++++++++++++++++++++++++++---------------- tests/tests.rs | 170 ++++++++++++----------- 6 files changed, 384 insertions(+), 233 deletions(-) diff --git a/benches/bench.rs b/benches/bench.rs index 4492541..e5d0c2b 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -30,7 +30,9 @@ use test::Bencher; #[bench] fn vec_u_vn_s_enc(bencher: &mut Bencher) { _bench_enc(bencher, vec![vec![(0u64, vec![(); 1 << 40], format!("grawwwwrr!")); 32]; 32]); } #[bench] fn vec_u_vn_s_dec(bencher: &mut Bencher) { _bench_dec(bencher, vec![vec![(0u64, vec![(); 1 << 40], format!("grawwwwrr!")); 32]; 32]); } -fn _bench_enc(bencher: &mut Bencher, record: T) { +fn _bench_enc(bencher: &mut Bencher, record: T) + where for<'de> T: Abomonation<'de> +{ // prepare encoded data for bencher.bytes let mut bytes = Vec::new(); unsafe { encode(&record, &mut bytes).unwrap(); } @@ -43,7 +45,9 @@ fn _bench_enc(bencher: &mut Bencher, record: T) { }); } -fn _bench_dec(bencher: &mut Bencher, record: T) { +fn _bench_dec(bencher: &mut Bencher, record: T) + where for<'de> T: Abomonation<'de> + Eq +{ // prepare encoded data let mut bytes = Vec::new(); unsafe { encode(&record, &mut bytes).unwrap(); } diff --git a/benches/clone.rs b/benches/clone.rs index a5f5b2e..7078d3d 100644 --- a/benches/clone.rs +++ b/benches/clone.rs @@ -27,7 +27,9 @@ use test::Bencher; #[bench] fn vec_u_vn_s_e_d(bencher: &mut Bencher) { _bench_e_d(bencher, vec![vec![(0u64, vec![(); 1 << 40], format!("grawwwwrr!")); 32]; 32]); } #[bench] fn vec_u_vn_s_cln(bencher: &mut Bencher) { _bench_cln(bencher, vec![vec![(0u64, vec![(); 1 << 40], format!("grawwwwrr!")); 32]; 32]); } -fn _bench_e_d(bencher: &mut Bencher, record: T) { +fn _bench_e_d(bencher: &mut Bencher, record: T) + where for<'de> T: Abomonation<'de> +{ // prepare encoded data for bencher.bytes let mut bytes = Vec::new(); unsafe { encode(&record, &mut bytes).unwrap(); } @@ -41,7 +43,9 @@ fn _bench_e_d(bencher: &mut Bencher, record: T) { }); } -fn _bench_cln(bencher: &mut Bencher, record: T) { +fn _bench_cln(bencher: &mut Bencher, record: T) + where for<'de> T: Abomonation<'de> + Clone +{ // prepare encoded data let mut bytes = Vec::new(); unsafe { encode(&record, &mut bytes).unwrap(); } diff --git a/benches/recycler.rs b/benches/recycler.rs index 96391b5..fef1bee 100644 --- a/benches/recycler.rs +++ b/benches/recycler.rs @@ -27,7 +27,9 @@ use test::Bencher; // TODO : this reveals that working with a `vec![(); 1 << 40]` does not get optimized away. // #[bench] fn vec_u_vn_s_rec(bencher: &mut Bencher) { _bench_rec(bencher, vec![vec![(0u64, vec![(); 1 << 40], format!("grawwwwrr!")); 32]; 32]); } -fn _bench_own(bencher: &mut Bencher, record: T) { +fn _bench_own(bencher: &mut Bencher, record: T) + where for<'de> T: Abomonation<'de> + Clone +{ // prepare encoded data let mut bytes = Vec::new(); unsafe { encode(&record, &mut bytes).unwrap(); } @@ -41,7 +43,9 @@ fn _bench_own(bencher: &mut Bencher, record: T) { } -fn _bench_rec(bencher: &mut Bencher, record: T) { +fn _bench_rec(bencher: &mut Bencher, record: T) + where for<'de> T: Abomonation<'de> + Recyclable +{ // prepare encoded data let mut bytes = Vec::new(); unsafe { encode(&record, &mut bytes).unwrap(); } diff --git a/src/abomonated.rs b/src/abomonated.rs index 328b195..02732ca 100644 --- a/src/abomonated.rs +++ b/src/abomonated.rs @@ -1,5 +1,4 @@ -use std::mem::transmute; use std::marker::PhantomData; use std::ops::{Deref, DerefMut}; @@ -7,17 +6,22 @@ use super::{Abomonation, decode}; /// A type wrapping owned decoded abomonated data. /// -/// This type ensures that decoding and pointer correction has already happened, -/// and implements `Deref` using a pointer cast (transmute). +/// This type ensures that decoding and pointer correction has already happened. +/// It provides a way to move the deserialized data around, while keeping +/// on-demand access to it via the `as_ref()` method. +/// +/// As an extra convenience, `Deref` is also implemented if T does +/// not contain references. Unfortunately, this trait cannot be safely +/// implemented when T does contain references. /// /// #Safety /// -/// The safety of this type, and in particular its `transute` implementation of -/// the `Deref` trait, relies on the owned bytes not being externally mutated -/// once provided. You could imagine a new type implementing `DerefMut` as required, -/// but which also retains the ability (e.g. through `RefCell`) to mutate the bytes. -/// This would be very bad, but seems hard to prevent in the type system. Please -/// don't do this. +/// The safety of this type, and in particular of access to the deserialized +/// data, relies on the owned bytes not being externally mutated after the +/// `Abomonated` is constructed. You could imagine a new type implementing +/// `DerefMut` as required, but which also retains the ability (e.g. through +/// `RefCell`) to mutate the bytes. This would be very bad, but seems hard to +/// prevent in the type system. Please don't do this. /// /// You must also use a type `S` whose bytes have a fixed location in memory. /// Otherwise moving an instance of `Abomonated` may invalidate decoded @@ -54,8 +58,11 @@ pub struct Abomonated> { decoded: S, } -impl> Abomonated { - +impl<'s, 't, T, S> Abomonated + where S: DerefMut + 's, + T: Abomonation<'t>, + 's: 't +{ /// Attempts to create decoded data from owned mutable bytes. /// /// This method will return `None` if it is unable to decode the data with @@ -94,34 +101,64 @@ impl> Abomonated { /// not change if the `bytes: S` instance is moved. Good examples are /// `Vec` whereas bad examples are `[u8; 16]`. pub unsafe fn new(mut bytes: S) -> Option { + // Fix type `T`'s inner pointers. Will return `None` on failure. + // + // FIXME: `slice::from_raw_parts_mut` is used to work around the borrow + // checker marking `bytes` as forever borrowed if `&mut bytes` is + // directly passed as input to `decode()`. But that is itself a + // byproduct of the API contract specified by the `where` clause + // above, which allows S to be `&'t mut [u8]` (and therefore + // require such a perpetual borrow) in the worst case. + // + // A different API contract might allow us to achieve the same + // result without resorting to such evil unsafe tricks. + // + decode::(std::slice::from_raw_parts_mut(bytes.as_mut_ptr(), + bytes.len()))?; - // performs the underlying pointer correction, indicates success. - let decoded = decode::(bytes.deref_mut()).is_some(); - - if decoded { - Some(Abomonated { - phantom: PhantomData, - decoded: bytes, - }) - } - else { - None - } + // Build the Abomonated structure + Some(Abomonated { + phantom: PhantomData, + decoded: bytes, + }) } } -impl> Abomonated { +impl<'t, T, S> Abomonated + where S: DerefMut, + T: Abomonation<'t> +{ + /// Get a read-only view on the deserialized bytes pub fn as_bytes(&self) -> &[u8] { &self.decoded } -} + /// Get a read-only view on the deserialized data + // + // NOTE: This method can be safely used even if type T contains references, + // because it makes sure that the borrow of `self` lasts long enough + // to encompass the lifetime of these references. + // + // Otherwise, it would be possible to extract an `&'static Something` + // from a short-lived borrow of a `Box<[u8]>`, then drop the `Box`, + // and end up with a dangling reference. + // + pub fn as_ref<'a: 't>(&'a self) -> &'a T { + unsafe { &*(self.decoded.as_ptr() as *const T) } + } +} -impl> Deref for Abomonated { +// NOTE: The lifetime constraint that was applied to `as_ref()` cannot be +// applied to a `Deref` implementation. Therefore, `Deref` can only be +// used on types T which do not contain references, as enforced by the +// higher-ranked trait bound below. +impl Deref for Abomonated + where S: DerefMut, + for<'t> T: Abomonation<'t>, +{ type Target = T; #[inline] fn deref(&self) -> &T { - let result: &T = unsafe { transmute(self.decoded.get_unchecked(0)) }; - result + self.as_ref() } } diff --git a/src/lib.rs b/src/lib.rs index aacd8f1..f78bec4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -72,7 +72,7 @@ pub mod abomonated; /// ``` /// #[inline(always)] -pub unsafe fn encode(typed: &T, mut write: W) -> IOResult<()> { +pub unsafe fn encode(typed: &T, mut write: W) -> IOResult<()> { let slice = std::slice::from_raw_parts(mem::transmute(typed), mem::size_of::()); write.write_all(slice)?; T::entomb(typed, &mut write) @@ -119,7 +119,9 @@ pub unsafe fn encode(typed: &T, mut write: W) -> IORes /// assert!(remaining.len() == 0); /// } /// ``` -pub unsafe fn decode(bytes: &mut [u8]) -> Option<(&T, &mut [u8])> { +pub unsafe fn decode<'bytes, T>(bytes: &'bytes mut [u8]) -> Option<(&'bytes T, &'bytes mut [u8])> + where T: Exhume<'bytes> +{ if bytes.len() < mem::size_of::() { None } else { let (split1, split2) = bytes.split_at_mut(mem::size_of::()); @@ -138,12 +140,24 @@ pub unsafe fn decode(bytes: &mut [u8]) -> Option<(&T, &mut [u8]) /// # Safety /// /// The `measure` method is safe. It neither produces nor consults serialized representations. -pub fn measure(typed: &T) -> usize { +pub fn measure(typed: &T) -> usize { mem::size_of::() + T::extent(&typed) } /// Abomonation provides methods to serialize any heap data the implementor owns. /// +/// The trait's lifetime parameter `'de` represents the set of slices of bytes `&'de mut [u8]` +/// from which it is valid to deserialize an `&'de impl Abomonation<'de>`. Types which own all of +/// their data may be deserialized freely, and implement this trait for all `'de`. However, we +/// need to enforce some deserialization restrictions on types which contain references. +/// +/// The reason is that abomonation performes in-place deserialization. To do that, it has to +/// patch a type's reference to point to other serialized data. Where a type _should_ contain an +/// `&'a T`, abomonation patches that into an `&'de T`. For this substitution to be valid, we +/// need `'de` to outlive `'a`. Otherwise, a careless user could ask abomonation to deserialize a +/// `&'static T` and get a `&'de T` which masquerades as an `&'static T` instead. The user could +/// then copy this reference, drop the bytes, and get use-after-free undefined behavior. +/// /// The default implementations for Abomonation's methods are all empty. Many types have no owned /// data to transcribe. Some do, however, and need to carefully implement these unsafe methods. /// @@ -170,13 +184,35 @@ pub fn measure(typed: &T) -> usize { /// /// If you are concerned about safety, it may be best to avoid Abomonation all together. It does /// several things that may be undefined behavior, depending on how undefined behavior is defined. -pub unsafe trait Abomonation { +pub unsafe trait Abomonation<'de> : Entomb + Exhume<'de> {} +unsafe impl<'de, T: Entomb + Exhume<'de>> Abomonation<'de> for T {} + +/// Types which can be serialized into bytes by abomonation +/// +/// Please consult the Abomonation trait's documentation for more details. Most +/// types which are serializable by abomonation are also deserializable by +/// abomonation, but we need to have a separate serialization and +/// deserialization trait for obscure lifetime-related reasons. +/// +pub unsafe trait Entomb { /// Write any additional information about `&self` beyond its binary representation. /// /// Most commonly this is owned data on the other end of pointers in `&self`. The return value /// reports any failures in writing to `write`. unsafe fn entomb(&self, _write: &mut W) -> IOResult<()> { Ok(()) } + /// Reports the number of further bytes required to entomb `self`. + fn extent(&self) -> usize { 0 } +} + +/// Types which can be deserialized from `&'de mut [u8]` to `&'de T` by abomonation +/// +/// Please consult the Abomonation trait's documentation for more details. Most +/// types which are serializable by abomonation are also deserializable by +/// abomonation, but we need to have a separate serialization and +/// deserialization trait for obscure lifetime-related reasons. +/// +pub unsafe trait Exhume<'de> : Entomb + 'de { /// Recover any information for `self_` not evident from its binary representation. /// /// Most commonly this populates pointers with valid references into `bytes`. @@ -187,10 +223,7 @@ pub unsafe trait Abomonation { /// `exhume` using raw pointer operations as much as feasible. // // FIXME: Replace self_ with self once Rust has arbitrary self types - unsafe fn exhume<'a>(_self_: NonNull, bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { Some(bytes) } - - /// Reports the number of further bytes required to entomb `self`. - fn extent(&self) -> usize { 0 } + unsafe fn exhume(_self_: NonNull, bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { Some(bytes) } } /// The `unsafe_abomonate!` macro takes a type name with an optional list of fields, and implements @@ -238,30 +271,33 @@ pub unsafe trait Abomonation { #[deprecated(since="0.5", note="please use the abomonation_derive crate")] macro_rules! unsafe_abomonate { ($t:ty) => { - unsafe impl Abomonation for $t { } + unsafe impl $crate::Entomb for $t { } + unsafe impl $crate::Exhume<'_> for $t { } }; ($t:ty : $($field:ident),*) => { - unsafe impl Abomonation for $t { + unsafe impl $crate::Entomb for $t { unsafe fn entomb(&self, write: &mut W) -> ::std::io::Result<()> { - $( Abomonation::entomb(&self.$field, write)?; )* + $( $crate::Entomb::entomb(&self.$field, write)?; )* Ok(()) } - unsafe fn exhume<'a>(self_: ::std::ptr::NonNull, mut bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { + fn extent(&self) -> usize { + let mut size = 0; + $( size += $crate::Entomb::extent(&self.$field); )* + size + } + } + + unsafe impl<'de> $crate::Exhume<'de> for $t { + unsafe fn exhume(self_: ::std::ptr::NonNull, mut bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { $( // FIXME: This (briefly) constructs an &mut _ to invalid data, which is UB. // The proposed &raw mut operator would allow avoiding this. let field_ptr: ::std::ptr::NonNull<_> = From::from(&mut (*self_.as_ptr()).$field); - bytes = Abomonation::exhume(field_ptr, bytes)?; + bytes = $crate::Exhume::exhume(field_ptr, bytes)?; )* Some(bytes) } - - fn extent(&self) -> usize { - let mut size = 0; - $( size += Abomonation::extent(&self.$field); )* - size - } } }; } @@ -269,7 +305,7 @@ macro_rules! unsafe_abomonate { // general code for tuples (can't use '0', '1', ... as field identifiers) macro_rules! tuple_abomonate { ( $($ty:ident)+) => ( - unsafe impl<$($ty: Abomonation),*> Abomonation for ($($ty,)*) { + unsafe impl<$($ty: Entomb),*> Entomb for ($($ty,)*) { #[allow(non_snake_case)] unsafe fn entomb(&self, write: &mut WRITE) -> IOResult<()> { let ($(ref $ty,)*) = *self; @@ -278,7 +314,17 @@ macro_rules! tuple_abomonate { } #[allow(non_snake_case)] - unsafe fn exhume<'a>(self_: NonNull, mut bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { + fn extent(&self) -> usize { + let mut size = 0; + let ($(ref $ty,)*) = *self; + $( size += $ty::extent($ty); )* + size + } + } + + unsafe impl<'de, $($ty: Exhume<'de>),*> Exhume<'de> for ($($ty,)*) { + #[allow(non_snake_case)] + unsafe fn exhume(self_: NonNull, mut bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { // FIXME: This (briefly) constructs a "ref mut" to invalid data, which is UB. // I think avoiding this would require a cleaner way to iterate over tuple fields. // One possibility would be a C++11-style combination of variadic generics and recursion. @@ -289,59 +335,82 @@ macro_rules! tuple_abomonate { )* Some(bytes) } - - #[allow(non_snake_case)] - fn extent(&self) -> usize { - let mut size = 0; - let ($(ref $ty,)*) = *self; - $( size += $ty::extent($ty); )* - size - } } ); } -unsafe impl Abomonation for u8 { } -unsafe impl Abomonation for u16 { } -unsafe impl Abomonation for u32 { } -unsafe impl Abomonation for u64 { } -unsafe impl Abomonation for u128 { } -unsafe impl Abomonation for usize { } - -unsafe impl Abomonation for i8 { } -unsafe impl Abomonation for i16 { } -unsafe impl Abomonation for i32 { } -unsafe impl Abomonation for i64 { } -unsafe impl Abomonation for i128 { } -unsafe impl Abomonation for isize { } - -unsafe impl Abomonation for NonZeroU8 { } -unsafe impl Abomonation for NonZeroU16 { } -unsafe impl Abomonation for NonZeroU32 { } -unsafe impl Abomonation for NonZeroU64 { } -unsafe impl Abomonation for NonZeroU128 { } -unsafe impl Abomonation for NonZeroUsize { } - -unsafe impl Abomonation for NonZeroI8 { } -unsafe impl Abomonation for NonZeroI16 { } -unsafe impl Abomonation for NonZeroI32 { } -unsafe impl Abomonation for NonZeroI64 { } -unsafe impl Abomonation for NonZeroI128 { } -unsafe impl Abomonation for NonZeroIsize { } - -unsafe impl Abomonation for f32 { } -unsafe impl Abomonation for f64 { } - -unsafe impl Abomonation for bool { } -unsafe impl Abomonation for () { } - -unsafe impl Abomonation for char { } - -unsafe impl Abomonation for ::std::time::Duration { } - -unsafe impl Abomonation for PhantomData {} - -unsafe impl Abomonation for Option { +unsafe impl Entomb for u8 {} +unsafe impl Exhume<'_> for u8 {} +unsafe impl Entomb for u16 {} +unsafe impl Exhume<'_> for u16 {} +unsafe impl Entomb for u32 {} +unsafe impl Exhume<'_> for u32 {} +unsafe impl Entomb for u64 {} +unsafe impl Exhume<'_> for u64 {} +unsafe impl Entomb for u128 {} +unsafe impl Exhume<'_> for u128 {} +unsafe impl Entomb for usize {} +unsafe impl Exhume<'_> for usize {} + +unsafe impl Entomb for i8 {} +unsafe impl Exhume<'_> for i8 {} +unsafe impl Entomb for i16 {} +unsafe impl Exhume<'_> for i16 {} +unsafe impl Entomb for i32 {} +unsafe impl Exhume<'_> for i32 {} +unsafe impl Entomb for i64 {} +unsafe impl Exhume<'_> for i64 {} +unsafe impl Entomb for i128 {} +unsafe impl Exhume<'_> for i128 {} +unsafe impl Entomb for isize {} +unsafe impl Exhume<'_> for isize {} + +unsafe impl Entomb for NonZeroU8 {} +unsafe impl Exhume<'_> for NonZeroU8 {} +unsafe impl Entomb for NonZeroU16 {} +unsafe impl Exhume<'_> for NonZeroU16 {} +unsafe impl Entomb for NonZeroU32 {} +unsafe impl Exhume<'_> for NonZeroU32 {} +unsafe impl Entomb for NonZeroU64 {} +unsafe impl Exhume<'_> for NonZeroU64 {} +unsafe impl Entomb for NonZeroU128 {} +unsafe impl Exhume<'_> for NonZeroU128 {} +unsafe impl Entomb for NonZeroUsize {} +unsafe impl Exhume<'_> for NonZeroUsize {} + +unsafe impl Entomb for NonZeroI8 {} +unsafe impl Exhume<'_> for NonZeroI8 {} +unsafe impl Entomb for NonZeroI16 {} +unsafe impl Exhume<'_> for NonZeroI16 {} +unsafe impl Entomb for NonZeroI32 {} +unsafe impl Exhume<'_> for NonZeroI32 {} +unsafe impl Entomb for NonZeroI64 {} +unsafe impl Exhume<'_> for NonZeroI64 {} +unsafe impl Entomb for NonZeroI128 {} +unsafe impl Exhume<'_> for NonZeroI128 {} +unsafe impl Entomb for NonZeroIsize {} +unsafe impl Exhume<'_> for NonZeroIsize {} + +unsafe impl Entomb for f32 {} +unsafe impl Exhume<'_> for f32 {} +unsafe impl Entomb for f64 {} +unsafe impl Exhume<'_> for f64 {} + +unsafe impl Entomb for bool {} +unsafe impl Exhume<'_> for bool {} +unsafe impl Entomb for () {} +unsafe impl Exhume<'_> for () {} + +unsafe impl Entomb for char {} +unsafe impl Exhume<'_> for char {} + +unsafe impl Entomb for ::std::time::Duration {} +unsafe impl Exhume<'_> for ::std::time::Duration {} + +unsafe impl Entomb for PhantomData {} +unsafe impl<'de, T: 'de> Exhume<'de> for PhantomData {} + +unsafe impl Entomb for Option { unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { if let &Some(ref inner) = self { T::entomb(inner, write)?; @@ -349,7 +418,12 @@ unsafe impl Abomonation for Option { Ok(()) } - unsafe fn exhume<'a>(self_: NonNull, mut bytes: &'a mut[u8]) -> Option<&'a mut [u8]> { + fn extent(&self) -> usize { + self.as_ref().map(T::extent).unwrap_or(0) + } +} +unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for Option { + unsafe fn exhume(self_: NonNull, mut bytes: &'de mut[u8]) -> Option<&'de mut [u8]> { // FIXME: This (briefly) constructs a "ref mut" to invalid data, which is UB. // I'm not sure if this can be fully resolved without relying on enum implementation details. if let Some(ref mut inner) = *self_.as_ptr() { @@ -358,13 +432,9 @@ unsafe impl Abomonation for Option { } Some(bytes) } - - fn extent(&self) -> usize { - self.as_ref().map(T::extent).unwrap_or(0) - } } -unsafe impl Abomonation for Result { +unsafe impl Entomb for Result { unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { match self { &Ok(ref inner) => T::entomb(inner, write), @@ -372,7 +442,15 @@ unsafe impl Abomonation for Result { } } - unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut[u8]) -> Option<&'a mut [u8]> { + fn extent(&self) -> usize { + match self { + &Ok(ref inner) => T::extent(inner), + &Err(ref inner) => E::extent(inner), + } + } +} +unsafe impl<'de, T: Exhume<'de>, E: Exhume<'de>> Exhume<'de> for Result { + unsafe fn exhume(self_: NonNull, bytes: &'de mut[u8]) -> Option<&'de mut [u8]> { // FIXME: This (briefly) constructs a "ref mut" to invalid data, which is UB. // I'm not sure if this can be fully resolved without relying on enum implementation details. match *self_.as_ptr() { @@ -386,13 +464,6 @@ unsafe impl Abomonation for Result { } } } - - fn extent(&self) -> usize { - match self { - &Ok(ref inner) => T::extent(inner), - &Err(ref inner) => E::extent(inner), - } - } } tuple_abomonate!(A); @@ -430,19 +501,21 @@ tuple_abomonate!(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z AA AB AC AD macro_rules! array_abomonate { ($size:expr) => ( - unsafe impl Abomonation for [T; $size] { + unsafe impl Entomb for [T; $size] { unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { entomb_slice(&self[..], write) } - unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut[u8]) -> Option<&'a mut [u8]> { - exhume_slice(self_.as_ptr() as *mut T, $size, bytes) - } - fn extent(&self) -> usize { slice_extent(&self[..]) } } + + unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for [T; $size] { + unsafe fn exhume(self_: NonNull, bytes: &'de mut[u8]) -> Option<&'de mut [u8]> { + exhume_slice(self_.as_ptr() as *mut T, $size, bytes) + } + } ) } @@ -480,13 +553,18 @@ array_abomonate!(30); array_abomonate!(31); array_abomonate!(32); -unsafe impl Abomonation for String { +unsafe impl Entomb for String { unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { write.write_all(self.as_bytes()) } + fn extent(&self) -> usize { + self.len() + } +} +unsafe impl<'de> Exhume<'de> for String { #[inline] - unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { + unsafe fn exhume(self_: NonNull, bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { // FIXME: This (briefly) constructs an &String to invalid data, which is UB. // I'm not sure if this can be fully resolved without relying on String implementation details. let self_len = self_.as_ref().len(); @@ -497,20 +575,21 @@ unsafe impl Abomonation for String { Some(rest) } } - - fn extent(&self) -> usize { - self.len() - } } -unsafe impl Abomonation for Vec { +unsafe impl Entomb for Vec { unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { write.write_all(typed_to_bytes(&self[..]))?; entomb_slice(&self[..], write) } + fn extent(&self) -> usize { + mem::size_of::() * self.len() + slice_extent(&self[..]) + } +} +unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for Vec { #[inline] - unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { + unsafe fn exhume(self_: NonNull, bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { // FIXME: This (briefly) constructs an &Vec to invalid data, which is UB. // I'm not sure if this can be fully resolved without relying on Vec implementation details. let self_len = self_.as_ref().len(); @@ -524,19 +603,20 @@ unsafe impl Abomonation for Vec { Some(rest) } } - - fn extent(&self) -> usize { - mem::size_of::() * self.len() + slice_extent(&self[..]) - } } -unsafe impl Abomonation for Box { +unsafe impl Entomb for Box { unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { write.write_all(typed_to_bytes(std::slice::from_ref(&**self)))?; T::entomb(&**self, write) } - unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { + fn extent(&self) -> usize { + mem::size_of::() + T::extent(&**self) + } +} +unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for Box { + unsafe fn exhume(self_: NonNull, bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { let binary_len = mem::size_of::(); if binary_len > bytes.len() { None } else { @@ -547,10 +627,6 @@ unsafe impl Abomonation for Box { Some(rest) } } - - fn extent(&self) -> usize { - mem::size_of::() + T::extent(&**self) - } } // This method currently enables undefined behavior, by exposing padding bytes. @@ -559,15 +635,24 @@ unsafe fn typed_to_bytes(slice: &[T]) -> &[u8] { } // Common subset of "entomb" for all [T]-like types -unsafe fn entomb_slice(slice: &[T], write: &mut W) -> IOResult<()> { +unsafe fn entomb_slice(slice: &[T], write: &mut W) -> IOResult<()> { for element in slice { T::entomb(element, write)?; } Ok(()) } +// Common subset of "extent" for all [T]-like types +fn slice_extent(slice: &[T]) -> usize { + slice.iter().map(T::extent).sum() +} + // Common subset of "exhume" for all [T]-like types // (I'd gladly take a NonNull<[T]>, but it is too difficult to build raw pointers to slices) #[inline] -unsafe fn exhume_slice<'a, T: Abomonation>(first_ptr: *mut T, length: usize, mut bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { +unsafe fn exhume_slice<'de, T: Exhume<'de>>( + first_ptr: *mut T, + length: usize, + mut bytes: &'de mut [u8] +) -> Option<&'de mut [u8]> { for i in 0..length { let element_ptr: NonNull = NonNull::new_unchecked(first_ptr.add(i)); bytes = T::exhume(element_ptr, bytes)?; @@ -575,20 +660,21 @@ unsafe fn exhume_slice<'a, T: Abomonation>(first_ptr: *mut T, length: usize, mut Some(bytes) } -// Common subset of "extent" for all [T]-like types -fn slice_extent(slice: &[T]) -> usize { - slice.iter().map(T::extent).sum() -} - mod network { - use Abomonation; + use super::{Entomb, Exhume}; use std::net::{SocketAddr, SocketAddrV4, SocketAddrV6, IpAddr, Ipv4Addr, Ipv6Addr}; - unsafe impl Abomonation for IpAddr { } - unsafe impl Abomonation for Ipv4Addr { } - unsafe impl Abomonation for Ipv6Addr { } - - unsafe impl Abomonation for SocketAddr { } - unsafe impl Abomonation for SocketAddrV4 { } - unsafe impl Abomonation for SocketAddrV6 { } + unsafe impl Entomb for IpAddr {} + unsafe impl Exhume<'_> for IpAddr {} + unsafe impl Entomb for Ipv4Addr {} + unsafe impl Exhume<'_> for Ipv4Addr {} + unsafe impl Entomb for Ipv6Addr {} + unsafe impl Exhume<'_> for Ipv6Addr {} + + unsafe impl Entomb for SocketAddr {} + unsafe impl Exhume<'_> for SocketAddr {} + unsafe impl Entomb for SocketAddrV4 {} + unsafe impl Exhume<'_> for SocketAddrV4 {} + unsafe impl Entomb for SocketAddrV6 {} + unsafe impl Exhume<'_> for SocketAddrV6 {} } diff --git a/tests/tests.rs b/tests/tests.rs index 3ff0dec..6ffafce 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -3,44 +3,92 @@ extern crate abomonation; use abomonation::*; use std::fmt::Debug; -#[test] fn test_array() { _test_pass(vec![[0, 1, 2]; 1024]); } -#[test] fn test_nonzero() { _test_pass(vec![[std::num::NonZeroI32::new(1)]; 1024]); } -#[test] fn test_opt_vec() { _test_pass(vec![Some(vec![0,1,2]), None]); } -#[test] fn test_alignment() { _test_pass(vec![(format!("x"), vec![1,2,3]); 1024]); } -#[test] fn test_alignment_128() { _test_pass(vec![(format!("x"), vec![1u128,2,3]); 1024]); } -#[test] fn test_option_box_u64() { _test_pass(vec![Some(Box::new(0u64))]); } -#[test] fn test_option_vec() { _test_pass(vec![Some(vec![0, 1, 2])]); } -#[test] fn test_u32x4_pass() { _test_pass(vec![((1,2,3),vec![(0u32, 0u32, 0u32, 0u32); 1024])]); } -#[test] fn test_u64_pass() { _test_pass(vec![0u64; 1024]); } -#[test] fn test_u128_pass() { _test_pass(vec![0u128; 1024]); } -#[test] fn test_string_pass() { _test_pass(vec![format!("grawwwwrr!"); 1024]); } -#[test] fn test_vec_u_s_pass() { _test_pass(vec![vec![(0u64, format!("grawwwwrr!")); 32]; 32]); } - -#[test] fn test_u64_fail() { _test_fail(vec![0u64; 1024]); } -#[test] fn test_u128_fail() { _test_fail(vec![0u128; 1024]); } -#[test] fn test_string_fail() { _test_fail(vec![format!("grawwwwrr!"); 1024]); } -#[test] fn test_vec_u_s_fail() { _test_fail(vec![vec![(0u64, format!("grawwwwrr!")); 32]; 32]); } - -#[test] fn test_array_size() { _test_size(vec![[0, 1, 2]; 1024]); } -#[test] fn test_opt_vec_size() { _test_size(vec![Some(vec![0,1,2]), None]); } -#[test] fn test_alignment_size() { _test_size(vec![(format!("x"), vec![1,2,3]); 1024]); } -#[test] fn test_option_box_u64_size() { _test_size(vec![Some(Box::new(0u64))]); } -#[test] fn test_option_vec_size() { _test_size(vec![Some(vec![0, 1, 2])]); } -#[test] fn test_u32x4_size() { _test_size(vec![((1,2,3),vec![(0u32, 0u32, 0u32, 0u32); 1024])]); } -#[test] fn test_u64_size() { _test_size(vec![0u64; 1024]); } -#[test] fn test_u128_size() { _test_size(vec![0u128; 1024]); } -#[test] fn test_string_size() { _test_size(vec![format!("grawwwwrr!"); 1024]); } -#[test] fn test_vec_u_s_size() { _test_size(vec![vec![(0u64, format!("grawwwwrr!")); 32]; 32]); } +// Test struct for the unsafe_abomonate macro +#[derive(Clone, Debug, Eq, PartialEq)] +struct MyStruct { + a: String, + b: u64, + c: Vec, +} +unsafe_abomonate!(MyStruct : a, b, c); -#[test] -fn test_phantom_data_for_non_abomonatable_type() { - use std::marker::PhantomData; - struct NotAbomonatable; - _test_pass(PhantomData::::default()); +// Test for PhantomData abomonation, which has no Abomonation bound +struct NotAbomonatable; +type PhantomNotAbo = std::marker::PhantomData::; + +// Generic serialization/deserialization testing procedure, add your data here. +macro_rules! gen_tests { + ( + $( $data:expr => ($pass:ident, $fail:ident, $size:ident) ), *$(,)* + ) => { + $( + #[test] fn $pass() { _test_pass(&mut Vec::new(), vec![$data; 1024]); } + #[test] fn $fail() { _test_fail(&mut Vec::new(), vec![$data; 1024]); } + #[test] fn $size() { _test_size(&mut Vec::new(), vec![$data; 1024]); } + )* + }; +} +gen_tests!{ + [4, 1, 2] => (test_array_pass, + test_array_fail, + test_array_size), + + std::num::NonZeroI32::new(1) => (test_nonzero_pass, + test_nonzero_fail, + test_nonzero_size), + + Some(vec![8, 1, 2]) => (test_option_vec_pass, + test_option_vec_fail, + test_option_vec_size), + + (format!("x"), vec![1, 2, 3]) => (test_alignment_pass, + test_alignment_fail, + test_alignment_size), + + (format!("x"), vec![1u128, 2, 3]) => (test_alignment_128_pass, + test_alignment_128_fail, + test_alignment_128_size), + + Some(Box::new(9u64)) => (test_option_box_u64_pass, + test_option_box_u64_fail, + test_option_box_u64_size), + + (3u32, 8u32, 1u32, 7u32) => (test_u32x4_pass, + test_u32x4_fail, + test_u32x4_size), + + 42u64 => (test_u64_pass, + test_u64_fail, + test_u64_size), + + 687u128 => (test_u128_pass, + test_u128_fail, + test_u128_size), + + format!("grawwwwrr!") => (test_string_pass, + test_string_fail, + test_string_size), + + vec![(0u64, format!("grawwwwrr!")); 32] => (test_vec_u_s_pass, + test_vec_u_s_fail, + test_vec_u_s_size), + + MyStruct{ a: "test".to_owned(), + b: 0, + c: vec![0, 1, 2] } => (test_macro_pass, + test_macro_fail, + test_macro_size), + + PhantomNotAbo::default() => (test_phantom_notabo_pass, + test_phantom_notabo_fail, + test_phantom_notabo_size), } -fn _test_pass(record: T) { - let mut bytes = Vec::new(); +// FIXME: I could not find an API which allows _test_pass to allocate a Vec +// internally without restricting the set of allowed Abomonation impls. +fn _test_pass<'bytes, T>(mut bytes: &'bytes mut Vec, record: T) + where T: Abomonation<'bytes> + Debug + Eq +{ unsafe { encode(&record, &mut bytes).unwrap(); } { let (result, rest) = unsafe { decode::(&mut bytes[..]) }.unwrap(); @@ -49,52 +97,20 @@ fn _test_pass(record: T) { } } -fn _test_fail(record: T) { - let mut bytes = Vec::new(); - unsafe { encode(&record, &mut bytes).unwrap(); } - bytes.pop(); - assert_eq!(unsafe { decode::(&mut bytes[..]) }, None); -} - -fn _test_size(record: T) { - let mut bytes = Vec::new(); +// FIXME: I could not find an API which allows _test_fail to allocate a Vec +// internally without restricting the set of allowed Abomonation impls. +fn _test_fail<'bytes, T>(mut bytes: &'bytes mut Vec, record: T) + where T: Abomonation<'bytes> + Debug + Eq +{ unsafe { encode(&record, &mut bytes).unwrap(); } - assert_eq!(bytes.len(), measure(&record)); -} - - -#[derive(Debug, Eq, PartialEq)] -struct MyStruct { - a: String, - b: u64, - c: Vec, -} - -unsafe_abomonate!(MyStruct : a, b, c); - -#[test] -fn test_macro() { - // create some test data out of abomonation-approved types - let record = MyStruct{ a: "test".to_owned(), b: 0, c: vec![0, 1, 2] }; - - // encode vector into a Vec - let mut bytes = Vec::new(); - unsafe { encode(&record, &mut bytes).unwrap(); } - - // decode a &Vec<(u64, String)> from binary data - if let Some((result, rest)) = unsafe { decode::(&mut bytes) } { - assert_eq!(result, &record); - assert_eq!(rest.len(), 0); + if bytes.pop().is_some() { + assert_eq!(unsafe { decode::(&mut bytes[..]) }, None); } } -#[test] -fn test_macro_size() { - // create some test data out of abomonation-approved types - let record = MyStruct{ a: "test".to_owned(), b: 0, c: vec![0, 1, 2] }; - - // encode vector into a Vec - let mut bytes = Vec::new(); +// FIXME: I could not find an API which allows _test_size to allocate a Vec +// internally without restricting the set of allowed Abomonation impls. +fn _test_size<'bytes, T: Abomonation<'bytes>>(mut bytes: &'bytes mut Vec, record: T) { unsafe { encode(&record, &mut bytes).unwrap(); } assert_eq!(bytes.len(), measure(&record)); } From 7a326fe4791064cab6ed2deebb9e728a3897a5de Mon Sep 17 00:00:00 2001 From: Hadrien G Date: Thu, 3 Oct 2019 21:57:46 +0200 Subject: [PATCH 22/25] Add support for this &[mut] T --- src/lib.rs | 74 +++++++++++++++++++++++++++++++++++++++++++------- tests/tests.rs | 4 +++ 2 files changed, 68 insertions(+), 10 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index f78bec4..797de38 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -605,7 +605,21 @@ unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for Vec { } } -unsafe impl Entomb for Box { +// NOTE: While it might be tempting to decouple 'de from the reference target +// and implement Exhume<'de> for &'target T, the two lifetimes actually +// have to be exactly the same. Here's proof : +// +// - Deserialization would produce an &'de &'target T. A reference is +// only valid if the target is longer-lived, so we need 'target: 'de. +// - The deserializer will silently patch &'target T into &'de T. This is +// only safe to do if &'de T : &'target T, so we also need 'de: 'target. +// +// If 'target must outlive 'de and 'de must outlive 'target, then the two +// lifetimes actually must be exactly the same. Which kind of makes sense: +// we start from 'de bytes, and we end up producing references that point +// into those same bytes. +// +unsafe impl<'de, T: Entomb> Entomb for &'de T { unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { write.write_all(typed_to_bytes(std::slice::from_ref(&**self)))?; T::entomb(&**self, write) @@ -615,17 +629,45 @@ unsafe impl Entomb for Box { mem::size_of::() + T::extent(&**self) } } +unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for &'de T { + unsafe fn exhume(self_: NonNull, bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { + let (target, rest) = exhume_ref(bytes)?; + self_.as_ptr().write(target); + Some(rest) + } +} + +unsafe impl<'de, T: Entomb> Entomb for &'de mut T { + unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { + <&T>::entomb(&&**self, write) + } + + fn extent(&self) -> usize { + <&T>::extent(&&**self) + } +} +unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for &'de mut T { + unsafe fn exhume(self_: NonNull, bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { + let (target, rest) = exhume_ref(bytes)?; + self_.as_ptr().write(target); + Some(rest) + } +} + +unsafe impl Entomb for Box { + unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { + <&T>::entomb(&self.as_ref(), write) + } + + fn extent(&self) -> usize { + <&T>::extent(&self.as_ref()) + } +} unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for Box { unsafe fn exhume(self_: NonNull, bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { - let binary_len = mem::size_of::(); - if binary_len > bytes.len() { None } - else { - let (mine, mut rest) = bytes.split_at_mut(binary_len); - let box_target : NonNull = NonNull::new_unchecked(mine.as_mut_ptr() as *mut T); - rest = T::exhume(box_target, rest)?; - self_.as_ptr().write(Box::from_raw(box_target.as_ptr())); - Some(rest) - } + let (target, rest) = exhume_ref(bytes)?; + self_.as_ptr().write(Box::from_raw(target as *mut _)); + Some(rest) } } @@ -660,6 +702,18 @@ unsafe fn exhume_slice<'de, T: Exhume<'de>>( Some(bytes) } +// Common subset of "exhume" for all &mut T-like types +unsafe fn exhume_ref<'de, T: Exhume<'de>>(bytes: &'de mut [u8]) -> Option<(&'de mut T, &'de mut [u8])> { + let binary_len = mem::size_of::(); + if binary_len > bytes.len() { None } + else { + let (mine, mut rest) = bytes.split_at_mut(binary_len); + let target : NonNull = NonNull::new_unchecked(mine.as_mut_ptr() as *mut T); + rest = T::exhume(target, rest)?; + Some((&mut *target.as_ptr(), rest)) + } +} + mod network { use super::{Entomb, Exhume}; use std::net::{SocketAddr, SocketAddrV4, SocketAddrV6, IpAddr, Ipv4Addr, Ipv6Addr}; diff --git a/tests/tests.rs b/tests/tests.rs index 6ffafce..8028b0b 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -82,6 +82,10 @@ gen_tests!{ PhantomNotAbo::default() => (test_phantom_notabo_pass, test_phantom_notabo_fail, test_phantom_notabo_size), + + Some(&42u64) => (test_ref_u64_pass, + test_ref_u64_fail, + test_ref_u64_size), } // FIXME: I could not find an API which allows _test_pass to allocate a Vec From f2fe84daa864b7ed60d1de8eaf198a6ca0d7461b Mon Sep 17 00:00:00 2001 From: Hadrien G Date: Thu, 3 Oct 2019 22:16:31 +0200 Subject: [PATCH 23/25] Add support for str and &[mut] str --- src/lib.rs | 64 ++++++++++++++++++++++++++++++++++++++++++++------ tests/tests.rs | 4 ++++ 2 files changed, 61 insertions(+), 7 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 797de38..ae1343e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -403,6 +403,8 @@ unsafe impl Exhume<'_> for () {} unsafe impl Entomb for char {} unsafe impl Exhume<'_> for char {} +unsafe impl Entomb for str {} +unsafe impl Exhume<'_> for str {} unsafe impl Entomb for ::std::time::Duration {} unsafe impl Exhume<'_> for ::std::time::Duration {} @@ -553,7 +555,7 @@ array_abomonate!(30); array_abomonate!(31); array_abomonate!(32); -unsafe impl Entomb for String { +unsafe impl<'de> Entomb for &'de str { unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { write.write_all(self.as_bytes()) } @@ -562,18 +564,57 @@ unsafe impl Entomb for String { self.len() } } +unsafe impl<'de> Exhume<'de> for &'de str { + #[inline] + unsafe fn exhume(self_: NonNull, bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { + // FIXME: This (briefly) constructs an &str to invalid data, which is UB. + // I'm not sure if this can be fully resolved without relying on &str implementation details. + let self_len = self_.as_ref().len(); + let (s, rest) = exhume_str_ref(self_len, bytes)?; + self_.as_ptr().write(s); + Some(rest) + } +} + +unsafe impl<'de> Entomb for &'de mut str { + unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { + <&str>::entomb(&self.as_ref(), write) + } + + fn extent(&self) -> usize { + <&str>::extent(&self.as_ref()) + } +} +unsafe impl<'de> Exhume<'de> for &'de mut str { + #[inline] + unsafe fn exhume(self_: NonNull, bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { + // FIXME: This (briefly) constructs an &mut str to invalid data, which is UB. + // I'm not sure if this can be fully resolved without relying on &str implementation details. + let self_len = self_.as_ref().len(); + let (s, rest) = exhume_str_ref(self_len, bytes)?; + self_.as_ptr().write(s); + Some(rest) + } +} + +unsafe impl Entomb for String { + unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { + <&str>::entomb(&self.as_ref(), write) + } + + fn extent(&self) -> usize { + <&str>::extent(&self.as_ref()) + } +} unsafe impl<'de> Exhume<'de> for String { #[inline] unsafe fn exhume(self_: NonNull, bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { // FIXME: This (briefly) constructs an &String to invalid data, which is UB. // I'm not sure if this can be fully resolved without relying on String implementation details. let self_len = self_.as_ref().len(); - if self_len > bytes.len() { None } - else { - let (mine, rest) = bytes.split_at_mut(self_len); - self_.as_ptr().write(String::from_raw_parts(mine.as_mut_ptr(), self_len, self_len)); - Some(rest) - } + let (s, rest) = exhume_str_ref(self_len, bytes)?; + self_.as_ptr().write(String::from_raw_parts(s.as_mut_ptr(), s.len(), s.len())); + Some(rest) } } @@ -714,6 +755,15 @@ unsafe fn exhume_ref<'de, T: Exhume<'de>>(bytes: &'de mut [u8]) -> Option<(&'de } } +// Common subset of "exhume" for all &str-like types +unsafe fn exhume_str_ref<'de>(length: usize, bytes: &'de mut [u8]) -> Option<(&'de mut str, &'de mut [u8])> { + if length > bytes.len() { None } + else { + let (mine, rest) = bytes.split_at_mut(length); + Some((std::str::from_utf8_unchecked_mut(mine), rest)) + } +} + mod network { use super::{Entomb, Exhume}; use std::net::{SocketAddr, SocketAddrV4, SocketAddrV6, IpAddr, Ipv4Addr, Ipv6Addr}; diff --git a/tests/tests.rs b/tests/tests.rs index 8028b0b..85e8708 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -86,6 +86,10 @@ gen_tests!{ Some(&42u64) => (test_ref_u64_pass, test_ref_u64_fail, test_ref_u64_size), + + &"grawwwwrr!" => (test_str_pass, + test_str_fail, + test_str_size), } // FIXME: I could not find an API which allows _test_pass to allocate a Vec From 1b7166bff5d8c6f206055c0fb19df5d253f7c6bf Mon Sep 17 00:00:00 2001 From: Hadrien G Date: Thu, 3 Oct 2019 22:30:41 +0200 Subject: [PATCH 24/25] Add support for [T] and &[mut] [T] --- src/lib.rs | 112 +++++++++++++++++++++++++++++++++++++------------ tests/tests.rs | 4 ++ 2 files changed, 90 insertions(+), 26 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index ae1343e..a9c3822 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -501,15 +501,34 @@ tuple_abomonate!(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z AA AB AC AD tuple_abomonate!(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z AA AB AC AD AE); tuple_abomonate!(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z AA AB AC AD AE AF); +unsafe impl Entomb for [T] { + unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { + for element in self { T::entomb(element, write)?; } + Ok(()) + } + + fn extent(&self) -> usize { + self.iter().map(T::extent).sum() + } +} +unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for [T] { + unsafe fn exhume(self_: NonNull, bytes: &'de mut[u8]) -> Option<&'de mut [u8]> { + // FIXME: This constructs an &[T] to invalid data, which is UB. + // I'm not sure if this can be fully resolved without relying on slice implementation details. + let self_len = self_.as_ref().len(); + exhume_slice(self_.as_ptr() as *mut T, self_len, bytes) + } +} + macro_rules! array_abomonate { ($size:expr) => ( unsafe impl Entomb for [T; $size] { unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { - entomb_slice(&self[..], write) + <[T]>::entomb(&self[..], write) } fn extent(&self) -> usize { - slice_extent(&self[..]) + <[T]>::extent(&self[..]) } } @@ -618,14 +637,56 @@ unsafe impl<'de> Exhume<'de> for String { } } -unsafe impl Entomb for Vec { +unsafe impl<'de, T: Entomb> Entomb for &'de [T] { unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { write.write_all(typed_to_bytes(&self[..]))?; - entomb_slice(&self[..], write) + <[T]>::entomb(&self[..], write) + } + + fn extent(&self) -> usize { + mem::size_of::() * self.len() + <[T]>::extent(&self[..]) + } +} +unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for &'de [T] { + #[inline] + unsafe fn exhume(self_: NonNull, bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { + // FIXME: This (briefly) constructs an &[T] to invalid data, which is UB. + // I'm not sure if this can be fully resolved without relying on slice implementation details. + let self_len = self_.as_ref().len(); + let (s, rest) = exhume_slice_ref(self_len, bytes)?; + self_.as_ptr().write(s); + Some(rest) + } +} + +unsafe impl<'de, T: Entomb> Entomb for &'de mut [T] { + unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { + <&[T]>::entomb(&&self[..], write) + } + + fn extent(&self) -> usize { + <&[T]>::extent(&&self[..]) + } +} +unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for &'de mut [T] { + #[inline] + unsafe fn exhume(self_: NonNull, bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { + // FIXME: This (briefly) constructs an &mut [T] to invalid data, which is UB. + // I'm not sure if this can be fully resolved without relying on slice implementation details. + let self_len = self_.as_ref().len(); + let (s, rest) = exhume_slice_ref(self_len, bytes)?; + self_.as_ptr().write(s); + Some(rest) + } +} + +unsafe impl Entomb for Vec { + unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { + <&[T]>::entomb(&&self[..], write) } fn extent(&self) -> usize { - mem::size_of::() * self.len() + slice_extent(&self[..]) + <&[T]>::extent(&&self[..]) } } unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for Vec { @@ -634,15 +695,9 @@ unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for Vec { // FIXME: This (briefly) constructs an &Vec to invalid data, which is UB. // I'm not sure if this can be fully resolved without relying on Vec implementation details. let self_len = self_.as_ref().len(); - let binary_len = self_len * mem::size_of::(); - if binary_len > bytes.len() { None } - else { - let (mine, mut rest) = bytes.split_at_mut(binary_len); - let first_ptr = mine.as_mut_ptr() as *mut T; - rest = exhume_slice(first_ptr, self_len, rest)?; - self_.as_ptr().write(Vec::from_raw_parts(first_ptr, self_len, self_len)); - Some(rest) - } + let (s, rest) = exhume_slice_ref(self_len, bytes)?; + self_.as_ptr().write(Vec::from_raw_parts(s.as_mut_ptr(), self_len, self_len)); + Some(rest) } } @@ -717,19 +772,8 @@ unsafe fn typed_to_bytes(slice: &[T]) -> &[u8] { std::slice::from_raw_parts(slice.as_ptr() as *const u8, slice.len() * mem::size_of::()) } -// Common subset of "entomb" for all [T]-like types -unsafe fn entomb_slice(slice: &[T], write: &mut W) -> IOResult<()> { - for element in slice { T::entomb(element, write)?; } - Ok(()) -} - -// Common subset of "extent" for all [T]-like types -fn slice_extent(slice: &[T]) -> usize { - slice.iter().map(T::extent).sum() -} - // Common subset of "exhume" for all [T]-like types -// (I'd gladly take a NonNull<[T]>, but it is too difficult to build raw pointers to slices) +// (I'd gladly move this to [T]::exhume, but building a NonNull<[T]> is currently too difficult) #[inline] unsafe fn exhume_slice<'de, T: Exhume<'de>>( first_ptr: *mut T, @@ -743,6 +787,22 @@ unsafe fn exhume_slice<'de, T: Exhume<'de>>( Some(bytes) } +// Common subset of "exhume" for all &[T]-like types +#[inline] +unsafe fn exhume_slice_ref<'de, T: Exhume<'de>>( + length: usize, + bytes: &'de mut [u8] +) -> Option<(&'de mut [T], &'de mut [u8])> { + let binary_len = length * mem::size_of::(); + if binary_len > bytes.len() { None } + else { + let (mine, mut rest) = bytes.split_at_mut(binary_len); + let first_ptr = mine.as_mut_ptr() as *mut T; + rest = exhume_slice(first_ptr, length, rest)?; + Some((std::slice::from_raw_parts_mut(first_ptr, length).into(), rest)) + } +} + // Common subset of "exhume" for all &mut T-like types unsafe fn exhume_ref<'de, T: Exhume<'de>>(bytes: &'de mut [u8]) -> Option<(&'de mut T, &'de mut [u8])> { let binary_len = mem::size_of::(); diff --git a/tests/tests.rs b/tests/tests.rs index 85e8708..ad16bf3 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -90,6 +90,10 @@ gen_tests!{ &"grawwwwrr!" => (test_str_pass, test_str_fail, test_str_size), + + &[0, 1, 2] => (test_slice_pass, + test_slice_fail, + test_slice_size), } // FIXME: I could not find an API which allows _test_pass to allocate a Vec From 8ba2a086947a9e0d7e5867cdfc3dace78ff72735 Mon Sep 17 00:00:00 2001 From: Hadrien G Date: Fri, 11 Oct 2019 21:04:52 +0200 Subject: [PATCH 25/25] Add tests for Abomonated of reference --- tests/tests.rs | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tests/tests.rs b/tests/tests.rs index ad16bf3..26b43cb 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -157,3 +157,33 @@ fn test_net_types() { let (t, r) = unsafe { decode::(&mut bytes) }.unwrap(); assert_eq!(*t, socket_addr4); let (t, _r) = unsafe { decode::(r) }.unwrap(); assert_eq!(*t, socket_addr6); } + +#[test] +fn test_abomonated_owned() { + use abomonation::abomonated::Abomonated; + + let s_owned = "This is an owned string".to_owned(); + + let mut bytes = Vec::new(); + unsafe { encode::(&s_owned, &mut bytes).unwrap(); } + + let abo = unsafe { Abomonated::::new(bytes).unwrap() }; + assert_eq!(abo.as_ref(), &s_owned); + assert_eq!(&*abo, &s_owned); +} + +#[test] +fn test_abomonated_ref() { + use abomonation::abomonated::Abomonated; + + let s_owned = "This is an owned string".to_owned(); + let s_borrow = &s_owned[11..16]; // "owned" + + let mut bytes = Vec::new(); + unsafe { encode::<&str, _>(&s_borrow, &mut bytes).unwrap(); } + + let abo = unsafe { Abomonated::<&str, _>::new(bytes).unwrap() }; + assert_eq!(abo.as_ref(), &s_borrow); + // NOTE: Cannot use Deref here because &str contains a reference + // FIXME: Figure out a way to add a compile_fail test for this +}