Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions compiler/rustc_parse/messages.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -967,6 +967,7 @@ parse_unknown_start_of_token = unknown start of token: {$escaped}
.sugg_quotes = Unicode characters '“' (Left Double Quotation Mark) and '”' (Right Double Quotation Mark) look like '{$ascii_str}' ({$ascii_name}), but are not
.sugg_other = Unicode character '{$ch}' ({$u_name}) looks like '{$ascii_str}' ({$ascii_name}), but it is not
.help_null = source files must contain UTF-8 encoded text, unexpected null bytes might occur when a different encoding is used
.help_invisible_char = invisible characters like '{$escaped}' are not usually visible in text editors
.note_repeats = character appears {$repeats ->
[one] once more
*[other] {$repeats} more times
Expand Down
6 changes: 6 additions & 0 deletions compiler/rustc_parse/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2369,6 +2369,8 @@ pub(crate) struct UnknownTokenStart {
pub null: Option<UnknownTokenNull>,
#[subdiagnostic]
pub repeat: Option<UnknownTokenRepeat>,
#[subdiagnostic]
pub invisible: Option<InvisibleCharacter>,
}

#[derive(Subdiagnostic)]
Expand Down Expand Up @@ -2409,6 +2411,10 @@ pub(crate) struct UnknownTokenRepeat {
pub repeats: usize,
}

#[derive(Subdiagnostic)]
#[help(parse_help_invisible_char)]
pub(crate) struct InvisibleCharacter;

#[derive(Subdiagnostic)]
#[help(parse_help_null)]
pub(crate) struct UnknownTokenNull;
Expand Down
5 changes: 5 additions & 0 deletions compiler/rustc_parse/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ use unescape_error_reporting::{emit_unescape_error, escaped_char};
#[cfg(target_pointer_width = "64")]
rustc_data_structures::static_assert_size!(rustc_lexer::Token, 12);

const INVISIBLE_CHARACTERS: [char; 8] = [
'\u{200b}', '\u{200c}', '\u{2060}', '\u{2061}', '\u{2062}', '\u{00ad}', '\u{034f}', '\u{061c}',
];

#[derive(Clone, Debug)]
pub(crate) struct UnmatchedDelim {
pub found_delim: Option<Delimiter>,
Expand Down Expand Up @@ -456,6 +460,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
escaped: escaped_char(c),
sugg,
null: if c == '\x00' { Some(errors::UnknownTokenNull) } else { None },
invisible: if INVISIBLE_CHARACTERS.contains(&c) { Some(errors::InvisibleCharacter) } else { None },
repeat: if repeats > 0 {
swallow_next_invalid = repeats;
Some(errors::UnknownTokenRepeat { repeats })
Expand Down
6 changes: 6 additions & 0 deletions tests/ui/lexer/lex-invisible-characters.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
// Provide extra help when a user has an invisible character in their code

fn main​() {
//~^ ERROR unknown start of token: \u{200b}
//~| HELP invisible characters like '\u{200b}' are not usually visible in text editors
}
10 changes: 10 additions & 0 deletions tests/ui/lexer/lex-invisible-characters.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
error: unknown start of token: \u{200b}
--> $DIR/lex-invisible-characters.rs:3:8
|
LL | fn main​() {
| ^
|
= help: invisible characters like '\u{200b}' are not usually visible in text editors

error: aborting due to 1 previous error

Loading