Skip to content

Commit 49d4a09

Browse files
committed
Avoid zeroing in encode_latin1_lossy by using pointers
1 parent 841a57c commit 49d4a09

File tree

1 file changed

+21
-8
lines changed

1 file changed

+21
-8
lines changed

src/mem.rs

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1859,7 +1859,7 @@ unsafe fn convert_latin1_to_utf8_partial_raw(
18591859
/// a `&mut str`, use `convert_utf16_to_str()` instead of this function.
18601860
#[inline]
18611861
pub fn convert_latin1_to_utf8(src: &[u8], dst: &mut [u8]) -> usize {
1862-
// SAFETY: the slices satisfy convert_latin1_to_utf8_raw's requirements
1862+
// SAFETY: the slices are valid for reads and writes within them.
18631863
unsafe { convert_latin1_to_utf8_raw(src.as_ptr(), src.len(), dst.as_mut_ptr(), dst.len()) }
18641864
}
18651865

@@ -1956,15 +1956,24 @@ pub fn convert_latin1_to_str(src: &[u8], dst: &mut str) -> usize {
19561956
///
19571957
/// If debug assertions are enabled (and not fuzzing) and the input is
19581958
/// not in the range U+0000 to U+00FF, inclusive.
1959+
#[inline]
19591960
pub fn convert_utf8_to_latin1_lossy(src: &[u8], dst: &mut [u8]) -> usize {
1961+
// SAFETY: the slices are valid for reads and writes within them.
1962+
unsafe { convert_utf8_to_latin1_lossy_raw(src, dst.as_mut_ptr(), dst.len()) }
1963+
}
1964+
1965+
/// # Safety
1966+
/// dst_ptr must be valid for writes at offsets `0..dst_len`.
1967+
///
1968+
/// NOTE: this method does not read values from `dst_ptr`, so `dst_ptr` can point to uninitialized memory.
1969+
unsafe fn convert_utf8_to_latin1_lossy_raw(src: &[u8], dst_ptr: *mut u8, dst_len: usize) -> usize {
19601970
assert!(
1961-
dst.len() >= src.len(),
1971+
dst_len >= src.len(),
19621972
"Destination must not be shorter than the source."
19631973
);
19641974
non_fuzz_debug_assert!(is_utf8_latin1(src));
19651975
let src_len = src.len();
19661976
let src_ptr = src.as_ptr();
1967-
let dst_ptr = dst.as_mut_ptr();
19681977
let mut total_read = 0usize;
19691978
let mut total_written = 0usize;
19701979
loop {
@@ -1987,7 +1996,9 @@ pub fn convert_utf8_to_latin1_lossy(src: &[u8], dst: &mut [u8]) -> usize {
19871996
let trail = src[total_read];
19881997
total_read += 1;
19891998

1990-
dst[total_written] = ((non_ascii & 0x1F) << 6) | (trail & 0x3F);
1999+
dst_ptr
2000+
.add(total_written)
2001+
.write(((non_ascii & 0x1F) << 6) | (trail & 0x3F));
19912002
total_written += 1;
19922003
continue;
19932004
}
@@ -2091,11 +2102,13 @@ pub fn encode_latin1_lossy<'a>(string: &'a str) -> Cow<'a, [u8]> {
20912102
}
20922103
let (head, tail) = bytes.split_at(up_to);
20932104
let capacity = bytes.len();
2094-
let mut vec = Vec::with_capacity(capacity);
2105+
let mut vec = Vec::<u8>::with_capacity(capacity);
20952106
vec.extend(head);
2096-
vec.resize(capacity, 0);
2097-
let written = convert_utf8_to_latin1_lossy(tail, &mut vec[up_to..]);
2098-
vec.truncate(up_to + written);
2107+
// SAFETY: these pointers and lengths are valid for the required reads and writes.
2108+
let written = unsafe {
2109+
convert_utf8_to_latin1_lossy_raw(tail, vec.as_mut_ptr().add(up_to), capacity - up_to)
2110+
};
2111+
unsafe { vec.set_len(up_to + written) };
20992112
Cow::Owned(vec)
21002113
}
21012114

0 commit comments

Comments
 (0)