From bcf9756ea6bb364f1a4b131c3042b766f21caea7 Mon Sep 17 00:00:00 2001 From: Chayim Refael Friedman Date: Thu, 3 Jul 2025 19:52:32 +0300 Subject: [PATCH 1/2] Include a per-token edition in the parser input Because, as it turns out, this is necessary to determine the correct edition (it is not global). The next commits will make use of it. --- crates/parser/src/input.rs | 18 +++++++++---- crates/parser/src/shortcuts.rs | 3 ++- crates/syntax-bridge/src/to_parser_input.rs | 28 ++++++++++----------- 3 files changed, 29 insertions(+), 20 deletions(-) diff --git a/crates/parser/src/input.rs b/crates/parser/src/input.rs index 4490956f9704..7a0dba73e60b 100644 --- a/crates/parser/src/input.rs +++ b/crates/parser/src/input.rs @@ -1,5 +1,7 @@ //! See [`Input`]. +use edition::Edition; + use crate::SyntaxKind; #[allow(non_camel_case_types)] @@ -16,6 +18,7 @@ pub struct Input { kind: Vec, joint: Vec, contextual_kind: Vec, + edition: Vec, } /// `pub` impl used by callers to create `Tokens`. @@ -26,15 +29,16 @@ impl Input { kind: Vec::with_capacity(capacity), joint: Vec::with_capacity(capacity / size_of::()), contextual_kind: Vec::with_capacity(capacity), + edition: Vec::with_capacity(capacity), } } #[inline] - pub fn push(&mut self, kind: SyntaxKind) { - self.push_impl(kind, SyntaxKind::EOF) + pub fn push(&mut self, kind: SyntaxKind, edition: Edition) { + self.push_impl(kind, SyntaxKind::EOF, edition) } #[inline] - pub fn push_ident(&mut self, contextual_kind: SyntaxKind) { - self.push_impl(SyntaxKind::IDENT, contextual_kind) + pub fn push_ident(&mut self, contextual_kind: SyntaxKind, edition: Edition) { + self.push_impl(SyntaxKind::IDENT, contextual_kind, edition) } /// Sets jointness for the last token we've pushed. /// @@ -59,13 +63,14 @@ impl Input { self.joint[idx] |= 1 << b_idx; } #[inline] - fn push_impl(&mut self, kind: SyntaxKind, contextual_kind: SyntaxKind) { + fn push_impl(&mut self, kind: SyntaxKind, contextual_kind: SyntaxKind, edition: Edition) { let idx = self.len(); if idx % (bits::BITS as usize) == 0 { self.joint.push(0); } self.kind.push(kind); self.contextual_kind.push(contextual_kind); + self.edition.push(edition); } } @@ -77,6 +82,9 @@ impl Input { pub(crate) fn contextual_kind(&self, idx: usize) -> SyntaxKind { self.contextual_kind.get(idx).copied().unwrap_or(SyntaxKind::EOF) } + pub(crate) fn edition(&self, idx: usize) -> Edition { + self.edition[idx] + } pub(crate) fn is_joint(&self, n: usize) -> bool { let (idx, b_idx) = self.bit_index(n); self.joint[idx] & (1 << b_idx) != 0 diff --git a/crates/parser/src/shortcuts.rs b/crates/parser/src/shortcuts.rs index e2baec890c3a..3db000043e78 100644 --- a/crates/parser/src/shortcuts.rs +++ b/crates/parser/src/shortcuts.rs @@ -38,12 +38,13 @@ impl LexedStr<'_> { res.push_ident( SyntaxKind::from_contextual_keyword(token_text, edition) .unwrap_or(SyntaxKind::IDENT), + edition, ) } else { if was_joint { res.was_joint(); } - res.push(kind); + res.push(kind, edition); // Tag the token as joint if it is float with a fractional part // we use this jointness to inform the parser about what token split // event to emit when we encounter a float literal in a field access diff --git a/crates/syntax-bridge/src/to_parser_input.rs b/crates/syntax-bridge/src/to_parser_input.rs index 021dc6595f9b..46e518faf16d 100644 --- a/crates/syntax-bridge/src/to_parser_input.rs +++ b/crates/syntax-bridge/src/to_parser_input.rs @@ -16,6 +16,8 @@ pub fn to_parser_input( let mut current = buffer.cursor(); let mut syntax_context_to_edition_cache = FxHashMap::default(); + let mut ctx_edition = + |ctx| *syntax_context_to_edition_cache.entry(ctx).or_insert_with(|| span_to_edition(ctx)); while !current.eof() { let tt = current.token_tree(); @@ -25,8 +27,8 @@ pub fn to_parser_input( if punct.char == '\'' { current.bump(); match current.token_tree() { - Some(tt::TokenTree::Leaf(tt::Leaf::Ident(_ident))) => { - res.push(LIFETIME_IDENT); + Some(tt::TokenTree::Leaf(tt::Leaf::Ident(ident))) => { + res.push(LIFETIME_IDENT, ctx_edition(ident.span.ctx)); current.bump(); continue; } @@ -51,7 +53,7 @@ pub fn to_parser_input( tt::LitKind::CStr | tt::LitKind::CStrRaw(_) => SyntaxKind::C_STRING, tt::LitKind::Err(_) => SyntaxKind::ERROR, }; - res.push(kind); + res.push(kind, ctx_edition(lit.span.ctx)); if kind == FLOAT_NUMBER && !lit.symbol.as_str().ends_with('.') { // Tag the token as joint if it is float with a fractional part @@ -61,20 +63,18 @@ pub fn to_parser_input( } } tt::Leaf::Ident(ident) => { - let edition = *syntax_context_to_edition_cache - .entry(ident.span.ctx) - .or_insert_with(|| span_to_edition(ident.span.ctx)); + let edition = ctx_edition(ident.span.ctx); match ident.sym.as_str() { - "_" => res.push(T![_]), - i if i.starts_with('\'') => res.push(LIFETIME_IDENT), - _ if ident.is_raw.yes() => res.push(IDENT), + "_" => res.push(T![_], edition), + i if i.starts_with('\'') => res.push(LIFETIME_IDENT, edition), + _ if ident.is_raw.yes() => res.push(IDENT, edition), text => match SyntaxKind::from_keyword(text, edition) { - Some(kind) => res.push(kind), + Some(kind) => res.push(kind, edition), None => { let contextual_keyword = SyntaxKind::from_contextual_keyword(text, edition) .unwrap_or(SyntaxKind::IDENT); - res.push_ident(contextual_keyword); + res.push_ident(contextual_keyword, edition); } }, } @@ -82,7 +82,7 @@ pub fn to_parser_input( tt::Leaf::Punct(punct) => { let kind = SyntaxKind::from_char(punct.char) .unwrap_or_else(|| panic!("{punct:#?} is not a valid punct")); - res.push(kind); + res.push(kind, ctx_edition(punct.span.ctx)); if punct.spacing == tt::Spacing::Joint { res.was_joint(); } @@ -97,7 +97,7 @@ pub fn to_parser_input( tt::DelimiterKind::Bracket => Some(T!['[']), tt::DelimiterKind::Invisible => None, } { - res.push(kind); + res.push(kind, ctx_edition(subtree.delimiter.open.ctx)); } current.bump(); } @@ -109,7 +109,7 @@ pub fn to_parser_input( tt::DelimiterKind::Bracket => Some(T![']']), tt::DelimiterKind::Invisible => None, } { - res.push(kind); + res.push(kind, ctx_edition(subtree.delimiter.close.ctx)); } } }; From 8b53dd059eaec13d3f9bc695a3e4ecf96e006d0e Mon Sep 17 00:00:00 2001 From: Chayim Refael Friedman Date: Thu, 3 Jul 2025 20:04:35 +0300 Subject: [PATCH 2/2] Use per-token edition in the parser --- crates/hir-def/src/macro_expansion_tests/mod.rs | 1 - crates/hir-expand/src/builtin/derive_macro.rs | 7 +------ crates/hir-expand/src/db.rs | 8 ++------ crates/hir-expand/src/fixup.rs | 1 - crates/mbe/src/lib.rs | 2 +- crates/mbe/src/tests.rs | 8 ++------ crates/parser/src/grammar/generic_args.rs | 2 +- crates/parser/src/grammar/types.rs | 4 +++- crates/parser/src/lib.rs | 12 ++++++------ crates/parser/src/parser.rs | 9 ++++----- crates/parser/src/tests.rs | 2 +- crates/parser/src/tests/prefix_entries.rs | 2 +- crates/syntax-bridge/src/lib.rs | 3 +-- crates/syntax/src/parsing.rs | 4 ++-- crates/syntax/src/parsing/reparsing.rs | 2 +- 15 files changed, 26 insertions(+), 41 deletions(-) diff --git a/crates/hir-def/src/macro_expansion_tests/mod.rs b/crates/hir-def/src/macro_expansion_tests/mod.rs index 1c69b37f164f..dc127fd58ba4 100644 --- a/crates/hir-def/src/macro_expansion_tests/mod.rs +++ b/crates/hir-def/src/macro_expansion_tests/mod.rs @@ -373,7 +373,6 @@ impl ProcMacroExpander for IdentityWhenValidProcMacroExpander { subtree, syntax_bridge::TopEntryPoint::MacroItems, &mut |_| span::Edition::CURRENT, - span::Edition::CURRENT, ); if parse.errors().is_empty() { Ok(subtree.clone()) diff --git a/crates/hir-expand/src/builtin/derive_macro.rs b/crates/hir-expand/src/builtin/derive_macro.rs index d135584a0809..9349cb497153 100644 --- a/crates/hir-expand/src/builtin/derive_macro.rs +++ b/crates/hir-expand/src/builtin/derive_macro.rs @@ -392,12 +392,7 @@ fn to_adt_syntax( tt: &tt::TopSubtree, call_site: Span, ) -> Result<(ast::Adt, span::SpanMap), ExpandError> { - let (parsed, tm) = crate::db::token_tree_to_syntax_node( - db, - tt, - crate::ExpandTo::Items, - parser::Edition::CURRENT_FIXME, - ); + let (parsed, tm) = crate::db::token_tree_to_syntax_node(db, tt, crate::ExpandTo::Items); let macro_items = ast::MacroItems::cast(parsed.syntax_node()) .ok_or_else(|| ExpandError::other(call_site, "invalid item definition"))?; let item = diff --git a/crates/hir-expand/src/db.rs b/crates/hir-expand/src/db.rs index 888c1405a6bb..60207960ceb5 100644 --- a/crates/hir-expand/src/db.rs +++ b/crates/hir-expand/src/db.rs @@ -315,8 +315,7 @@ pub fn expand_speculative( let expand_to = loc.expand_to(); fixup::reverse_fixups(&mut speculative_expansion.value, &undo_info); - let (node, rev_tmap) = - token_tree_to_syntax_node(db, &speculative_expansion.value, expand_to, loc.def.edition); + let (node, rev_tmap) = token_tree_to_syntax_node(db, &speculative_expansion.value, expand_to); let syntax_node = node.syntax_node(); let token = rev_tmap @@ -358,7 +357,6 @@ fn parse_macro_expansion( ) -> ExpandResult<(Parse, Arc)> { let _p = tracing::info_span!("parse_macro_expansion").entered(); let loc = db.lookup_intern_macro_call(macro_file); - let def_edition = loc.def.edition; let expand_to = loc.expand_to(); let mbe::ValueResult { value: (tt, matched_arm), err } = macro_expand(db, macro_file, loc); @@ -369,7 +367,6 @@ fn parse_macro_expansion( CowArc::Owned(it) => it, }, expand_to, - def_edition, ); rev_token_map.matched_arm = matched_arm; @@ -733,7 +730,6 @@ pub(crate) fn token_tree_to_syntax_node( db: &dyn ExpandDatabase, tt: &tt::TopSubtree, expand_to: ExpandTo, - edition: parser::Edition, ) -> (Parse, ExpansionSpanMap) { let entry_point = match expand_to { ExpandTo::Statements => syntax_bridge::TopEntryPoint::MacroStmts, @@ -742,7 +738,7 @@ pub(crate) fn token_tree_to_syntax_node( ExpandTo::Type => syntax_bridge::TopEntryPoint::Type, ExpandTo::Expr => syntax_bridge::TopEntryPoint::Expr, }; - syntax_bridge::token_tree_to_syntax_node(tt, entry_point, &mut |ctx| ctx.edition(db), edition) + syntax_bridge::token_tree_to_syntax_node(tt, entry_point, &mut |ctx| ctx.edition(db)) } fn check_tt_count(tt: &tt::TopSubtree) -> Result<(), ExpandResult<()>> { diff --git a/crates/hir-expand/src/fixup.rs b/crates/hir-expand/src/fixup.rs index 4a4a3e52aea4..7e8e4525ed80 100644 --- a/crates/hir-expand/src/fixup.rs +++ b/crates/hir-expand/src/fixup.rs @@ -537,7 +537,6 @@ mod tests { &tt, syntax_bridge::TopEntryPoint::MacroItems, &mut |_| parser::Edition::CURRENT, - parser::Edition::CURRENT, ); assert!( parse.errors().is_empty(), diff --git a/crates/mbe/src/lib.rs b/crates/mbe/src/lib.rs index 9f9fa36abd46..068f3f43efdb 100644 --- a/crates/mbe/src/lib.rs +++ b/crates/mbe/src/lib.rs @@ -371,7 +371,7 @@ pub fn expect_fragment<'t>( let buffer = tt_iter.remaining(); // FIXME: Pass the correct edition per token. Due to the split between mbe and hir-expand it's complicated. let parser_input = to_parser_input(buffer, &mut |_ctx| edition); - let tree_traversal = entry_point.parse(&parser_input, edition); + let tree_traversal = entry_point.parse(&parser_input); let mut cursor = buffer.cursor(); let mut error = false; for step in tree_traversal.iter() { diff --git a/crates/mbe/src/tests.rs b/crates/mbe/src/tests.rs index 56034516ef3b..2823915c17d4 100644 --- a/crates/mbe/src/tests.rs +++ b/crates/mbe/src/tests.rs @@ -65,12 +65,8 @@ fn check_( if render_debug { format_to!(expect_res, "{:#?}\n\n", res.value.0); } - let (node, _) = syntax_bridge::token_tree_to_syntax_node( - &res.value.0, - parse, - &mut |_| def_edition, - def_edition, - ); + let (node, _) = + syntax_bridge::token_tree_to_syntax_node(&res.value.0, parse, &mut |_| def_edition); format_to!( expect_res, "{}", diff --git a/crates/parser/src/grammar/generic_args.rs b/crates/parser/src/grammar/generic_args.rs index b9d5bff66301..8f74acd1ba2a 100644 --- a/crates/parser/src/grammar/generic_args.rs +++ b/crates/parser/src/grammar/generic_args.rs @@ -59,7 +59,7 @@ pub(crate) fn generic_arg(p: &mut Parser<'_>) -> bool { // test edition_2015_dyn_prefix_inside_generic_arg 2015 // type A = Foo; - T![ident] if !p.edition().at_least_2018() && types::is_dyn_weak(p) => type_arg(p), + T![ident] if !p.current_edition().at_least_2018() && types::is_dyn_weak(p) => type_arg(p), // test macro_inside_generic_arg // type A = Foo; k if PATH_NAME_REF_KINDS.contains(k) => { diff --git a/crates/parser/src/grammar/types.rs b/crates/parser/src/grammar/types.rs index 908440b5d056..eec2b5c2efd5 100644 --- a/crates/parser/src/grammar/types.rs +++ b/crates/parser/src/grammar/types.rs @@ -54,7 +54,9 @@ fn type_with_bounds_cond(p: &mut Parser<'_>, allow_bounds: bool) { T![dyn] => dyn_trait_type(p), // Some path types are not allowed to have bounds (no plus) T![<] => path_type_bounds(p, allow_bounds), - T![ident] if !p.edition().at_least_2018() && is_dyn_weak(p) => dyn_trait_type_weak(p), + T![ident] if !p.current_edition().at_least_2018() && is_dyn_weak(p) => { + dyn_trait_type_weak(p) + } _ if paths::is_path_start(p) => path_or_macro_type(p, allow_bounds), LIFETIME_IDENT if p.nth_at(1, T![+]) => bare_dyn_trait_type(p), _ => { diff --git a/crates/parser/src/lib.rs b/crates/parser/src/lib.rs index 7963f00bb25c..e5b1ffd6c36e 100644 --- a/crates/parser/src/lib.rs +++ b/crates/parser/src/lib.rs @@ -87,7 +87,7 @@ pub enum TopEntryPoint { } impl TopEntryPoint { - pub fn parse(&self, input: &Input, edition: Edition) -> Output { + pub fn parse(&self, input: &Input) -> Output { let _p = tracing::info_span!("TopEntryPoint::parse", ?self).entered(); let entry_point: fn(&'_ mut parser::Parser<'_>) = match self { TopEntryPoint::SourceFile => grammar::entry::top::source_file, @@ -98,7 +98,7 @@ impl TopEntryPoint { TopEntryPoint::Expr => grammar::entry::top::expr, TopEntryPoint::MetaItem => grammar::entry::top::meta_item, }; - let mut p = parser::Parser::new(input, edition); + let mut p = parser::Parser::new(input); entry_point(&mut p); let events = p.finish(); let res = event::process(events); @@ -150,7 +150,7 @@ pub enum PrefixEntryPoint { } impl PrefixEntryPoint { - pub fn parse(&self, input: &Input, edition: Edition) -> Output { + pub fn parse(&self, input: &Input) -> Output { let entry_point: fn(&'_ mut parser::Parser<'_>) = match self { PrefixEntryPoint::Vis => grammar::entry::prefix::vis, PrefixEntryPoint::Block => grammar::entry::prefix::block, @@ -163,7 +163,7 @@ impl PrefixEntryPoint { PrefixEntryPoint::Item => grammar::entry::prefix::item, PrefixEntryPoint::MetaItem => grammar::entry::prefix::meta_item, }; - let mut p = parser::Parser::new(input, edition); + let mut p = parser::Parser::new(input); entry_point(&mut p); let events = p.finish(); event::process(events) @@ -187,9 +187,9 @@ impl Reparser { /// /// Tokens must start with `{`, end with `}` and form a valid brace /// sequence. - pub fn parse(self, tokens: &Input, edition: Edition) -> Output { + pub fn parse(self, tokens: &Input) -> Output { let Reparser(r) = self; - let mut p = parser::Parser::new(tokens, edition); + let mut p = parser::Parser::new(tokens); r(&mut p); let events = p.finish(); event::process(events) diff --git a/crates/parser/src/parser.rs b/crates/parser/src/parser.rs index 36a363afe93a..f09b7f64ca59 100644 --- a/crates/parser/src/parser.rs +++ b/crates/parser/src/parser.rs @@ -26,14 +26,13 @@ pub(crate) struct Parser<'t> { pos: usize, events: Vec, steps: Cell, - edition: Edition, } const PARSER_STEP_LIMIT: usize = 15_000_000; impl<'t> Parser<'t> { - pub(super) fn new(inp: &'t Input, edition: Edition) -> Parser<'t> { - Parser { inp, pos: 0, events: Vec::new(), steps: Cell::new(0), edition } + pub(super) fn new(inp: &'t Input) -> Parser<'t> { + Parser { inp, pos: 0, events: Vec::new(), steps: Cell::new(0) } } pub(crate) fn finish(self) -> Vec { @@ -288,8 +287,8 @@ impl<'t> Parser<'t> { self.events.push(event); } - pub(crate) fn edition(&self) -> Edition { - self.edition + pub(crate) fn current_edition(&self) -> Edition { + self.inp.edition(self.pos) } } diff --git a/crates/parser/src/tests.rs b/crates/parser/src/tests.rs index 4b19ddc752a0..cec50aa54b7f 100644 --- a/crates/parser/src/tests.rs +++ b/crates/parser/src/tests.rs @@ -80,7 +80,7 @@ fn parse_err() { fn parse(entry: TopEntryPoint, text: &str, edition: Edition) -> (String, bool) { let lexed = LexedStr::new(edition, text); let input = lexed.to_input(edition); - let output = entry.parse(&input, edition); + let output = entry.parse(&input); let mut buf = String::new(); let mut errors = Vec::new(); diff --git a/crates/parser/src/tests/prefix_entries.rs b/crates/parser/src/tests/prefix_entries.rs index e2268eed60ab..9d93a2ae3157 100644 --- a/crates/parser/src/tests/prefix_entries.rs +++ b/crates/parser/src/tests/prefix_entries.rs @@ -86,7 +86,7 @@ fn check(entry: PrefixEntryPoint, input: &str, prefix: &str) { let input = lexed.to_input(Edition::CURRENT); let mut n_tokens = 0; - for step in entry.parse(&input, Edition::CURRENT).iter() { + for step in entry.parse(&input).iter() { match step { Step::Token { n_input_tokens, .. } => n_tokens += n_input_tokens as usize, Step::FloatSplit { .. } => n_tokens += 1, diff --git a/crates/syntax-bridge/src/lib.rs b/crates/syntax-bridge/src/lib.rs index d59229952f52..65b5215a2c7d 100644 --- a/crates/syntax-bridge/src/lib.rs +++ b/crates/syntax-bridge/src/lib.rs @@ -143,7 +143,6 @@ pub fn token_tree_to_syntax_node( tt: &tt::TopSubtree>, entry_point: parser::TopEntryPoint, span_to_edition: &mut dyn FnMut(Ctx) -> Edition, - top_edition: Edition, ) -> (Parse, SpanMap) where Ctx: Copy + fmt::Debug + PartialEq + PartialEq + Eq + Hash, @@ -151,7 +150,7 @@ where let buffer = tt.view().strip_invisible(); let parser_input = to_parser_input(buffer, span_to_edition); // It matters what edition we parse with even when we escape all identifiers correctly. - let parser_output = entry_point.parse(&parser_input, top_edition); + let parser_output = entry_point.parse(&parser_input); let mut tree_sink = TtTreeSink::new(buffer.cursor()); for event in parser_output.iter() { match event { diff --git a/crates/syntax/src/parsing.rs b/crates/syntax/src/parsing.rs index 9e286edc5f98..249c81a1f37b 100644 --- a/crates/syntax/src/parsing.rs +++ b/crates/syntax/src/parsing.rs @@ -13,7 +13,7 @@ pub(crate) fn parse_text(text: &str, edition: parser::Edition) -> (GreenNode, Ve let _p = tracing::info_span!("parse_text").entered(); let lexed = parser::LexedStr::new(edition, text); let parser_input = lexed.to_input(edition); - let parser_output = parser::TopEntryPoint::SourceFile.parse(&parser_input, edition); + let parser_output = parser::TopEntryPoint::SourceFile.parse(&parser_input); let (node, errors, _eof) = build_tree(lexed, parser_output); (node, errors) } @@ -26,7 +26,7 @@ pub(crate) fn parse_text_at( let _p = tracing::info_span!("parse_text_at").entered(); let lexed = parser::LexedStr::new(edition, text); let parser_input = lexed.to_input(edition); - let parser_output = entry.parse(&parser_input, edition); + let parser_output = entry.parse(&parser_input); let (node, errors, _eof) = build_tree(lexed, parser_output); (node, errors) } diff --git a/crates/syntax/src/parsing/reparsing.rs b/crates/syntax/src/parsing/reparsing.rs index c54f14366fa1..5f193f01bc73 100644 --- a/crates/syntax/src/parsing/reparsing.rs +++ b/crates/syntax/src/parsing/reparsing.rs @@ -109,7 +109,7 @@ fn reparse_block( return None; } - let tree_traversal = reparser.parse(&parser_input, edition); + let tree_traversal = reparser.parse(&parser_input); let (green, new_parser_errors, _eof) = build_tree(lexed, tree_traversal);