Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions compiler/rustc_ast/src/util/literal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ impl LitKind {
// programs with many long strings containing escapes.
unescape_str(
s,
&mut #[inline(always)]
#[inline(always)]
|_, res| match res {
Ok(c) => buf.push(c),
Err(err) => {
Expand All @@ -110,7 +110,7 @@ impl LitKind {
token::ByteStr => {
let s = symbol.as_str();
let mut buf = Vec::with_capacity(s.len());
unescape_byte_str(s, &mut |_, res| match res {
unescape_byte_str(s, |_, res| match res {
Ok(b) => buf.push(b),
Err(err) => {
assert!(!err.is_fatal(), "failed to unescape string literal")
Expand All @@ -127,7 +127,7 @@ impl LitKind {
token::CStr => {
let s = symbol.as_str();
let mut buf = Vec::with_capacity(s.len());
unescape_cstr(s, &mut |_span, c| match c {
unescape_cstr(s, |_span, c| match c {
Ok(MixedUnit::Char(c)) => {
buf.extend_from_slice(c.get().encode_utf8(&mut [0; 4]).as_bytes())
}
Expand Down
67 changes: 44 additions & 23 deletions compiler/rustc_lexer/src/unescape.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ macro_rules! check {
" literal (without quotes) and produce a sequence of results of ",
stringify!($unit_ty), " or error (returned via `callback`).",
"\nNB: Raw strings don't do any unescaping, but do produce errors on bare CR.")]
pub fn $check(src: &str, callback: &mut impl FnMut(Range<usize>, Result<$unit, EscapeError>))
pub fn $check(src: &str, mut callback: impl FnMut(Range<usize>, Result<$unit, EscapeError>))
{
src.char_indices().for_each(|(pos, c)| {
callback(
Expand All @@ -162,7 +162,7 @@ macro_rules! unescape {
#[doc = concat!("Take the contents of a ", stringify!($string_ty),
" literal (without quotes) and produce a sequence of results of escaped ",
stringify!($unit_ty), " or error (returned via `callback`).")]
pub fn $unescape(src: &str, callback: &mut impl FnMut(Range<usize>, Result<$unit, EscapeError>))
pub fn $unescape(src: &str, mut callback: impl FnMut(Range<usize>, Result<$unit, EscapeError>))
{
let mut chars = src.chars();
while let Some(c) = chars.next() {
Expand Down Expand Up @@ -356,36 +356,57 @@ fn unicode_escape(chars: &mut impl Iterator<Item = char>) -> Result<u32, EscapeE
}
}

/// Takes the contents of a unicode-only (non-mixed-utf8) literal (without quotes)
/// and produces a sequence of unescaped characters or errors,
/// Takes the contents of a literal (without quotes)
/// and produces a sequence of errors,
/// which are returned by invoking `callback`.
///
/// For `Char` and `Byte` modes, the callback will be called exactly once.
pub fn unescape_unicode<F>(src: &str, mode: Mode, callback: &mut F)
where
F: FnMut(Range<usize>, Result<char, EscapeError>),
{
let mut byte_callback =
|range, res: Result<u8, EscapeError>| callback(range, res.map(char::from));
pub fn unescape_for_errors(
src: &str,
mode: Mode,
mut error_callback: impl FnMut(Range<usize>, EscapeError),
) {
match mode {
Char => {
let mut chars = src.chars();
let res = unescape_char_iter(&mut chars);
callback(0..(src.len() - chars.as_str().len()), res);
if let Err(e) = unescape_char_iter(&mut chars) {
error_callback(0..(src.len() - chars.as_str().len()), e);
}
}
Byte => {
let mut chars = src.chars();
let res = unescape_byte_iter(&mut chars).map(char::from);
callback(0..(src.len() - chars.as_str().len()), res);
if let Err(e) = unescape_byte_iter(&mut chars) {
error_callback(0..(src.len() - chars.as_str().len()), e);
}
}
Str => unescape_str(src, callback),
ByteStr => unescape_byte_str(src, &mut byte_callback),
RawStr => check_raw_str(src, callback),
RawByteStr => check_raw_byte_str(src, &mut byte_callback),
RawCStr => check_raw_cstr(src, &mut |r, res: Result<NonZero<char>, EscapeError>| {
callback(r, res.map(|c| c.get()))
Str => unescape_str(src, |range, res| {
if let Err(e) = res {
error_callback(range, e);
}
}),
ByteStr => unescape_byte_str(src, |range, res| {
if let Err(e) = res {
error_callback(range, e);
}
}),
CStr => unescape_cstr(src, |range, res| {
if let Err(e) = res {
error_callback(range, e);
}
}),
RawStr => check_raw_str(src, |range, res| {
if let Err(e) = res {
error_callback(range, e);
}
}),
RawByteStr => check_raw_byte_str(src, |range, res| {
if let Err(e) = res {
error_callback(range, e);
}
}),
RawCStr => check_raw_cstr(src, |range, res| {
if let Err(e) = res {
error_callback(range, e);
}
}),
CStr => unreachable!(),
}
}

Expand Down
14 changes: 7 additions & 7 deletions compiler/rustc_lexer/src/unescape/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ fn test_unescape_char_good() {
fn test_unescape_str_warn() {
fn check(literal: &str, expected: &[(Range<usize>, Result<char, EscapeError>)]) {
let mut unescaped = Vec::with_capacity(literal.len());
unescape_unicode(literal, Mode::Str, &mut |range, res| unescaped.push((range, res)));
unescape_str(literal, |range, res| unescaped.push((range, res)));
assert_eq!(unescaped, expected);
}

Expand All @@ -124,7 +124,7 @@ fn test_unescape_str_warn() {
fn test_unescape_str_good() {
fn check(literal_text: &str, expected: &str) {
let mut buf = Ok(String::with_capacity(literal_text.len()));
unescape_unicode(literal_text, Mode::Str, &mut |range, c| {
unescape_str(literal_text, |range, c| {
if let Ok(b) = &mut buf {
match c {
Ok(c) => b.push(c),
Expand Down Expand Up @@ -241,7 +241,7 @@ fn test_unescape_byte_good() {
fn test_unescape_byte_str_good() {
fn check(literal_text: &str, expected: &[u8]) {
let mut buf = Ok(Vec::with_capacity(literal_text.len()));
unescape_unicode(literal_text, Mode::ByteStr, &mut |range, c| {
unescape_byte_str(literal_text, |range, c| {
if let Ok(b) = &mut buf {
match c {
Ok(c) => b.push(c as u8),
Expand All @@ -264,7 +264,7 @@ fn test_unescape_byte_str_good() {
fn test_unescape_raw_str() {
fn check(literal: &str, expected: &[(Range<usize>, Result<char, EscapeError>)]) {
let mut unescaped = Vec::with_capacity(literal.len());
unescape_unicode(literal, Mode::RawStr, &mut |range, res| unescaped.push((range, res)));
check_raw_str(literal, |range, res| unescaped.push((range, res)));
assert_eq!(unescaped, expected);
}

Expand All @@ -274,13 +274,13 @@ fn test_unescape_raw_str() {

#[test]
fn test_unescape_raw_byte_str() {
fn check(literal: &str, expected: &[(Range<usize>, Result<char, EscapeError>)]) {
fn check(literal: &str, expected: &[(Range<usize>, Result<u8, EscapeError>)]) {
let mut unescaped = Vec::with_capacity(literal.len());
unescape_unicode(literal, Mode::RawByteStr, &mut |range, res| unescaped.push((range, res)));
check_raw_byte_str(literal, |range, res| unescaped.push((range, res)));
assert_eq!(unescaped, expected);
}

check("\r", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString))]);
check("🦀", &[(0..4, Err(EscapeError::NonAsciiCharInByte))]);
check("🦀a", &[(0..4, Err(EscapeError::NonAsciiCharInByte)), (4..5, Ok('a'))]);
check("🦀a", &[(0..4, Err(EscapeError::NonAsciiCharInByte)), (4..5, Ok(b'a'))]);
}
90 changes: 27 additions & 63 deletions compiler/rustc_parse/src/lexer/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
use std::ops::Range;

use rustc_ast::ast::{self, AttrStyle};
use rustc_ast::token::{self, CommentKind, Delimiter, IdentIsRaw, Token, TokenKind};
use rustc_ast::tokenstream::TokenStream;
Expand Down Expand Up @@ -525,7 +523,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
}
err.emit()
}
self.cook_unicode(token::Char, Mode::Char, start, end, 1, 1) // ' '
self.cook_quoted(token::Char, Mode::Char, start, end, 1, 1) // ' '
}
rustc_lexer::LiteralKind::Byte { terminated } => {
if !terminated {
Expand All @@ -537,7 +535,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
.with_code(E0763)
.emit()
}
self.cook_unicode(token::Byte, Mode::Byte, start, end, 2, 1) // b' '
self.cook_quoted(token::Byte, Mode::Byte, start, end, 2, 1) // b' '
}
rustc_lexer::LiteralKind::Str { terminated } => {
if !terminated {
Expand All @@ -549,7 +547,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
.with_code(E0765)
.emit()
}
self.cook_unicode(token::Str, Mode::Str, start, end, 1, 1) // " "
self.cook_quoted(token::Str, Mode::Str, start, end, 1, 1) // " "
}
rustc_lexer::LiteralKind::ByteStr { terminated } => {
if !terminated {
Expand All @@ -561,7 +559,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
.with_code(E0766)
.emit()
}
self.cook_unicode(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" "
self.cook_quoted(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" "
}
rustc_lexer::LiteralKind::CStr { terminated } => {
if !terminated {
Expand All @@ -573,13 +571,13 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
.with_code(E0767)
.emit()
}
self.cook_mixed(token::CStr, Mode::CStr, start, end, 2, 1) // c" "
self.cook_quoted(token::CStr, Mode::CStr, start, end, 2, 1) // c" "
}
rustc_lexer::LiteralKind::RawStr { n_hashes } => {
if let Some(n_hashes) = n_hashes {
let n = u32::from(n_hashes);
let kind = token::StrRaw(n_hashes);
self.cook_unicode(kind, Mode::RawStr, start, end, 2 + n, 1 + n) // r##" "##
self.cook_quoted(kind, Mode::RawStr, start, end, 2 + n, 1 + n) // r##" "##
} else {
self.report_raw_str_error(start, 1);
}
Expand All @@ -588,7 +586,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
if let Some(n_hashes) = n_hashes {
let n = u32::from(n_hashes);
let kind = token::ByteStrRaw(n_hashes);
self.cook_unicode(kind, Mode::RawByteStr, start, end, 3 + n, 1 + n) // br##" "##
self.cook_quoted(kind, Mode::RawByteStr, start, end, 3 + n, 1 + n) // br##" "##
} else {
self.report_raw_str_error(start, 2);
}
Expand All @@ -597,7 +595,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
if let Some(n_hashes) = n_hashes {
let n = u32::from(n_hashes);
let kind = token::CStrRaw(n_hashes);
self.cook_unicode(kind, Mode::RawCStr, start, end, 3 + n, 1 + n) // cr##" "##
self.cook_quoted(kind, Mode::RawCStr, start, end, 3 + n, 1 + n) // cr##" "##
} else {
self.report_raw_str_error(start, 2);
}
Expand Down Expand Up @@ -913,40 +911,36 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
self.dcx().emit_fatal(errors::TooManyHashes { span: self.mk_sp(start, self.pos), num });
}

fn cook_common(
fn cook_quoted(
&self,
mut kind: token::LitKind,
mode: Mode,
start: BytePos,
end: BytePos,
prefix_len: u32,
postfix_len: u32,
unescape: fn(&str, Mode, &mut dyn FnMut(Range<usize>, Result<(), EscapeError>)),
) -> (token::LitKind, Symbol) {
let content_start = start + BytePos(prefix_len);
let content_end = end - BytePos(postfix_len);
let lit_content = self.str_from_to(content_start, content_end);
unescape(lit_content, mode, &mut |range, result| {
// Here we only check for errors. The actual unescaping is done later.
if let Err(err) = result {
let span_with_quotes = self.mk_sp(start, end);
let (start, end) = (range.start as u32, range.end as u32);
let lo = content_start + BytePos(start);
let hi = lo + BytePos(end - start);
let span = self.mk_sp(lo, hi);
let is_fatal = err.is_fatal();
if let Some(guar) = emit_unescape_error(
self.dcx(),
lit_content,
span_with_quotes,
span,
mode,
range,
err,
) {
assert!(is_fatal);
kind = token::Err(guar);
}
unescape::unescape_for_errors(lit_content, mode, |range, err| {
let span_with_quotes = self.mk_sp(start, end);
let (start, end) = (range.start as u32, range.end as u32);
let lo = content_start + BytePos(start);
let hi = lo + BytePos(end - start);
let span = self.mk_sp(lo, hi);
let is_fatal = err.is_fatal();
if let Some(guar) = emit_unescape_error(
self.dcx(),
lit_content,
span_with_quotes,
span,
mode,
range,
err,
) {
assert!(is_fatal);
kind = token::Err(guar);
}
});

Expand All @@ -959,36 +953,6 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
};
(kind, sym)
}

fn cook_unicode(
&self,
kind: token::LitKind,
mode: Mode,
start: BytePos,
end: BytePos,
prefix_len: u32,
postfix_len: u32,
) -> (token::LitKind, Symbol) {
self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| {
unescape::unescape_unicode(src, mode, &mut |span, result| {
callback(span, result.map(drop))
})
})
}

fn cook_mixed(
&self,
kind: token::LitKind,
mode: Mode,
start: BytePos,
end: BytePos,
prefix_len: u32,
postfix_len: u32,
) -> (token::LitKind, Symbol) {
self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, _mode, callback| {
unescape::unescape_cstr(src, &mut |span, result| callback(span, result.map(drop)))
})
}
}

pub fn nfc_normalize(string: &str) -> Symbol {
Expand Down
8 changes: 3 additions & 5 deletions compiler/rustc_parse_format/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1094,11 +1094,9 @@ fn find_width_map_from_snippet(
fn unescape_string(string: &str) -> Option<String> {
let mut buf = String::new();
let mut ok = true;
unescape::unescape_unicode(string, unescape::Mode::Str, &mut |_, unescaped_char| {
match unescaped_char {
Ok(c) => buf.push(c),
Err(_) => ok = false,
}
unescape::unescape_str(string, &mut |_, res| match res {
Ok(c) => buf.push(c),
Err(_) => ok = false,
});

ok.then_some(buf)
Expand Down
2 changes: 1 addition & 1 deletion src/tools/clippy/clippy_dev/src/update_lints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -830,7 +830,7 @@ fn remove_line_splices(s: &str) -> String {
.and_then(|s| s.strip_suffix('"'))
.unwrap_or_else(|| panic!("expected quoted string, found `{s}`"));
let mut res = String::with_capacity(s.len());
unescape::unescape_unicode(s, unescape::Mode::Str, &mut |range, ch| {
unescape::unescape_str(s, |range, ch| {
if ch.is_ok() {
res.push_str(&s[range]);
}
Expand Down
2 changes: 1 addition & 1 deletion src/tools/rust-analyzer/crates/hir-expand/src/attrs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,7 @@ fn unescape(s: &str) -> Option<Cow<'_, str>> {
let mut buf = String::new();
let mut prev_end = 0;
let mut has_error = false;
unescape::unescape_unicode(s, unescape::Mode::Str, &mut |char_range, unescaped_char| match (
unescape::unescape_str(s, |char_range, unescaped_char| match (
unescaped_char,
buf.capacity() == 0,
) {
Expand Down
Loading
Loading