-
-
Notifications
You must be signed in to change notification settings - Fork 1.4k
refactor(es/minifier): Improve tpl to str #11415
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
7c9954b
f5de72d
5cc1a73
f4d184e
2d8914f
60c7c55
fe6239c
9632cd1
b51f32b
da57de6
2a37002
3bd5b1a
2ed2f08
d0ddc97
09a49dc
02a7aab
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| --- | ||
| swc_ecma_ast: major | ||
| --- | ||
|
|
||
| refactor(es/minifier): Improve tpl to str |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1 +1 @@ | ||
| export const use=`\xffathjax{$login`; | ||
| export const use="\xffathjax{$login"; |
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -6,10 +6,13 @@ use std::{ | |||||||||||||||||||
|
|
||||||||||||||||||||
| use is_macro::Is; | ||||||||||||||||||||
| use num_bigint::BigInt as BigIntValue; | ||||||||||||||||||||
| use swc_atoms::{Atom, Wtf8Atom}; | ||||||||||||||||||||
| use swc_common::{ast_node, util::take::Take, EqIgnoreSpan, Span, DUMMY_SP}; | ||||||||||||||||||||
| use swc_atoms::{ | ||||||||||||||||||||
| wtf8::{CodePoint, Wtf8Buf}, | ||||||||||||||||||||
| Atom, Wtf8Atom, | ||||||||||||||||||||
| }; | ||||||||||||||||||||
| use swc_common::{ast_node, errors::HANDLER, util::take::Take, EqIgnoreSpan, Span, DUMMY_SP}; | ||||||||||||||||||||
|
|
||||||||||||||||||||
| use crate::jsx::JSXText; | ||||||||||||||||||||
| use crate::{jsx::JSXText, TplElement}; | ||||||||||||||||||||
|
|
||||||||||||||||||||
| #[ast_node] | ||||||||||||||||||||
| #[derive(Eq, Hash, EqIgnoreSpan, Is)] | ||||||||||||||||||||
|
|
@@ -253,53 +256,189 @@ impl<'a> arbitrary::Arbitrary<'a> for Str { | |||||||||||||||||||
| } | ||||||||||||||||||||
| } | ||||||||||||||||||||
|
|
||||||||||||||||||||
| fn emit_span_error(span: Span, msg: &str) { | ||||||||||||||||||||
| HANDLER.with(|handler| { | ||||||||||||||||||||
| handler.struct_span_err(span, msg).emit(); | ||||||||||||||||||||
| }); | ||||||||||||||||||||
| } | ||||||||||||||||||||
|
|
||||||||||||||||||||
| impl Str { | ||||||||||||||||||||
| #[inline] | ||||||||||||||||||||
| pub fn is_empty(&self) -> bool { | ||||||||||||||||||||
| self.value.is_empty() | ||||||||||||||||||||
| } | ||||||||||||||||||||
|
|
||||||||||||||||||||
| pub fn from_tpl_raw(tpl_raw: &str) -> Atom { | ||||||||||||||||||||
| let mut buf = String::with_capacity(tpl_raw.len()); | ||||||||||||||||||||
|
|
||||||||||||||||||||
| pub fn from_tpl_raw(tpl: &TplElement) -> Wtf8Atom { | ||||||||||||||||||||
| let tpl_raw = &tpl.raw; | ||||||||||||||||||||
| let span = tpl.span; | ||||||||||||||||||||
| let mut buf: Wtf8Buf = Wtf8Buf::with_capacity(tpl_raw.len()); | ||||||||||||||||||||
| let mut iter = tpl_raw.chars(); | ||||||||||||||||||||
|
|
||||||||||||||||||||
| // prev_result can only be less than 0xdc00 | ||||||||||||||||||||
| // so init with 0xdc00 as no prev result | ||||||||||||||||||||
| const NO_PREV_RESULT: u32 = 0xdc00; | ||||||||||||||||||||
| let mut prev_result: u32 = NO_PREV_RESULT; | ||||||||||||||||||||
| while let Some(c) = iter.next() { | ||||||||||||||||||||
| match c { | ||||||||||||||||||||
| '\\' => { | ||||||||||||||||||||
| if let Some(next) = iter.next() { | ||||||||||||||||||||
| match next { | ||||||||||||||||||||
| if let Some(c) = iter.next() { | ||||||||||||||||||||
| match c { | ||||||||||||||||||||
| '`' | '$' | '\\' => { | ||||||||||||||||||||
| buf.push(next); | ||||||||||||||||||||
| buf.push_char(c); | ||||||||||||||||||||
| } | ||||||||||||||||||||
| 'b' => { | ||||||||||||||||||||
| buf.push('\u{0008}'); | ||||||||||||||||||||
| buf.push_char('\u{0008}'); | ||||||||||||||||||||
| } | ||||||||||||||||||||
| 'f' => { | ||||||||||||||||||||
| buf.push('\u{000C}'); | ||||||||||||||||||||
| buf.push_char('\u{000C}'); | ||||||||||||||||||||
| } | ||||||||||||||||||||
| 'n' => { | ||||||||||||||||||||
| buf.push('\n'); | ||||||||||||||||||||
| buf.push_char('\n'); | ||||||||||||||||||||
| } | ||||||||||||||||||||
| 'r' => { | ||||||||||||||||||||
| buf.push('\r'); | ||||||||||||||||||||
| buf.push_char('\r'); | ||||||||||||||||||||
| } | ||||||||||||||||||||
| 't' => { | ||||||||||||||||||||
| buf.push('\t'); | ||||||||||||||||||||
| buf.push_char('\t'); | ||||||||||||||||||||
| } | ||||||||||||||||||||
| 'v' => { | ||||||||||||||||||||
| buf.push('\u{000B}'); | ||||||||||||||||||||
| buf.push_char('\u{000B}'); | ||||||||||||||||||||
| } | ||||||||||||||||||||
| '\r' => { | ||||||||||||||||||||
| let mut next_iter = iter.clone(); | ||||||||||||||||||||
| if let Some('\n') = next_iter.next() { | ||||||||||||||||||||
| iter = next_iter; | ||||||||||||||||||||
| } | ||||||||||||||||||||
| } | ||||||||||||||||||||
| '\n' | '\u{2028}' | '\u{2029}' => {} | ||||||||||||||||||||
| 'u' | 'x' => { | ||||||||||||||||||||
| let mut count: u8 = 0; | ||||||||||||||||||||
| // result is a 4 digit hex value | ||||||||||||||||||||
| let mut result: u32 = 0; | ||||||||||||||||||||
| let mut max_len = if c == 'u' { 4 } else { 2 }; | ||||||||||||||||||||
| for c in &mut iter { | ||||||||||||||||||||
| match c { | ||||||||||||||||||||
| '{' if max_len == 4 && count == 0 => { | ||||||||||||||||||||
| max_len = 6; | ||||||||||||||||||||
| continue; | ||||||||||||||||||||
| } | ||||||||||||||||||||
| '}' if max_len == 6 => { | ||||||||||||||||||||
| break; | ||||||||||||||||||||
| } | ||||||||||||||||||||
| '0'..='9' => { | ||||||||||||||||||||
| result = (result << 4) | (c as u32 - '0' as u32); | ||||||||||||||||||||
| count += 1; | ||||||||||||||||||||
| } | ||||||||||||||||||||
| 'a'..='f' => { | ||||||||||||||||||||
| result = (result << 4) | (c as u32 - 'a' as u32 + 10); | ||||||||||||||||||||
| count += 1; | ||||||||||||||||||||
| } | ||||||||||||||||||||
| 'A'..='F' => { | ||||||||||||||||||||
| result = (result << 4) | (c as u32 - 'A' as u32 + 10); | ||||||||||||||||||||
| count += 1; | ||||||||||||||||||||
| } | ||||||||||||||||||||
| _ => emit_span_error( | ||||||||||||||||||||
| span, | ||||||||||||||||||||
| "Uncaught SyntaxError: Invalid Unicode escape sequence", | ||||||||||||||||||||
| ), | ||||||||||||||||||||
| } | ||||||||||||||||||||
| if count >= max_len { | ||||||||||||||||||||
| if result > 0x10ffff { | ||||||||||||||||||||
| emit_span_error( | ||||||||||||||||||||
| span, | ||||||||||||||||||||
| "Uncaught SyntaxError: Undefined Unicode \ | ||||||||||||||||||||
| code-point", | ||||||||||||||||||||
| ) | ||||||||||||||||||||
| } else { | ||||||||||||||||||||
| break; | ||||||||||||||||||||
| } | ||||||||||||||||||||
| } | ||||||||||||||||||||
|
Comment on lines
+350
to
+354
|
||||||||||||||||||||
| ) | |
| } else { | |
| break; | |
| } | |
| } | |
| ); | |
| } | |
| break; | |
| } |
cuyl marked this conversation as resolved.
Show resolved
Hide resolved
cuyl marked this conversation as resolved.
Show resolved
Hide resolved
Copilot
AI
Jan 12, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
After pushing the previous high surrogate at lines 369-371, prev_result should be reset to NO_PREV_RESULT to avoid pushing it again at line 396. Without this reset, if the current result is also a high surrogate but not followed by \u, the previous surrogate will be pushed twice. Add prev_result = NO_PREV_RESULT; after line 371.
| }); | |
| }); | |
| prev_result = NO_PREV_RESULT; |
cuyl marked this conversation as resolved.
Show resolved
Hide resolved
cuyl marked this conversation as resolved.
Show resolved
Hide resolved
cuyl marked this conversation as resolved.
Show resolved
Hide resolved
cuyl marked this conversation as resolved.
Show resolved
Hide resolved
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,133 @@ | ||
| use swc_atoms::{wtf8::Wtf8, Atom}; | ||
| use swc_common::DUMMY_SP; | ||
| use swc_ecma_ast::{Str, TplElement}; | ||
|
|
||
| /// Convert Wtf8 to a string representation, escaping invalid surrogates. | ||
| fn convert_wtf8_to_raw(s: &Wtf8) -> String { | ||
| let mut result = String::new(); | ||
| let iter = s.code_points(); | ||
|
|
||
| for code_point in iter { | ||
| if let Some(c) = code_point.to_char() { | ||
| result.push(c); | ||
| } else { | ||
| result.push_str(format!("\\u{:04X}", code_point.to_u32()).as_str()); | ||
| } | ||
| } | ||
|
|
||
| result | ||
| } | ||
|
|
||
| /// Helper function to test `Str::from_tpl_raw` | ||
| fn test_from_tpl_raw(raw: &str, expected: &str) { | ||
| let tpl = TplElement { | ||
| span: DUMMY_SP, | ||
| tail: true, | ||
| raw: Atom::new(raw.to_string()), | ||
| cooked: None, | ||
| }; | ||
|
|
||
| let result = Str::from_tpl_raw(&tpl); | ||
| let result_str = convert_wtf8_to_raw(&result); | ||
|
|
||
| assert_eq!(result_str, expected, "Input: {raw}"); | ||
| } | ||
|
|
||
| #[test] | ||
| fn basic_escape_sequences() { | ||
| test_from_tpl_raw("hello world", "hello world"); | ||
| } | ||
|
|
||
| #[test] | ||
| fn hex_escape_sequences() { | ||
| test_from_tpl_raw("\\xff", "\u{ff}"); | ||
| } | ||
|
|
||
| #[test] | ||
| fn emoji_literals() { | ||
| test_from_tpl_raw("🦀", "🦀"); | ||
| test_from_tpl_raw("🚀", "🚀"); | ||
| } | ||
|
|
||
| #[test] | ||
| fn unicode_escape_sequences() { | ||
| test_from_tpl_raw("\\uD83E\\uDD80", "🦀"); | ||
| test_from_tpl_raw("\\u{D83E}\\u{DD80}", "🦀"); | ||
| test_from_tpl_raw("\\u{1F980}", "🦀"); | ||
| test_from_tpl_raw("\\uD800\\uDC00", "𐀀"); | ||
| } | ||
| #[test] | ||
| fn surrogate_pair_boundary_cases() { | ||
| // First supplementary plane character | ||
| test_from_tpl_raw("\\uD800\\uDC00", "\u{10000}"); | ||
|
|
||
| // Last valid character | ||
| test_from_tpl_raw("\\uDBFF\\uDFFF", "\u{10FFFF}"); | ||
|
|
||
| // Various combinations to ensure formula is correct | ||
| test_from_tpl_raw("\\uD801\\uDC37", "\u{10437}"); // 𐐷 | ||
| test_from_tpl_raw("\\uD852\\uDF62", "\u{24B62}"); // 𤭢 | ||
| } | ||
|
|
||
| #[test] | ||
| fn invalid_surrogate_pairs() { | ||
| test_from_tpl_raw("\\u{d800}", "\\uD800"); | ||
| test_from_tpl_raw("\\u{dc00}", "\\uDC00"); | ||
| test_from_tpl_raw("\\u{dFFF}", "\\uDFFF"); | ||
| } | ||
|
|
||
| #[test] | ||
| fn surrogate_pair_combinations() { | ||
| test_from_tpl_raw("\\u{d800}\\uD83E\\uDD80", "\\uD800🦀"); | ||
| test_from_tpl_raw("\\uD83E-", "\\uD83E-"); | ||
| } | ||
|
|
||
| #[test] | ||
| fn single_high_surrogate() { | ||
| test_from_tpl_raw("\\uD83E", "\\uD83E"); | ||
| } | ||
|
|
||
| #[test] | ||
| fn common_escape_sequences() { | ||
| test_from_tpl_raw("\\0", "\u{0000}"); | ||
| test_from_tpl_raw("\\b", "\u{0008}"); | ||
| test_from_tpl_raw("\\f", "\u{000C}"); | ||
| test_from_tpl_raw("\\n", "\n"); | ||
| test_from_tpl_raw("\\r", "\r"); | ||
| test_from_tpl_raw("\\t", "\t"); | ||
| test_from_tpl_raw("\\v", "\u{000B}"); | ||
| } | ||
|
|
||
| #[test] | ||
| fn escaped_template_chars() { | ||
| test_from_tpl_raw("\\`", "`"); | ||
| test_from_tpl_raw("\\$", "$"); | ||
| test_from_tpl_raw("\\\\", "\\"); | ||
| test_from_tpl_raw("Hello \\\nworld!", "Hello world!"); | ||
| } | ||
|
|
||
| #[test] | ||
| fn combined_escapes() { | ||
| test_from_tpl_raw("hello\\nworld", "hello\nworld"); | ||
| test_from_tpl_raw("\\t\\tindented", "\t\tindented"); | ||
| } | ||
|
|
||
| // Tests for octal escape sequences that should be rejected. | ||
| // These will panic because octal escapes are not allowed in template strings. | ||
| #[test] | ||
| #[should_panic] | ||
| fn should_panic_octal_01() { | ||
| test_from_tpl_raw("\\01", ""); | ||
| } | ||
|
|
||
| #[test] | ||
| #[should_panic] | ||
| fn should_panic_octal_2() { | ||
| test_from_tpl_raw("\\2", ""); | ||
| } | ||
|
|
||
| #[test] | ||
| #[should_panic] | ||
| fn should_panic_octal_7() { | ||
| test_from_tpl_raw("\\7", ""); | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The comment "result is a 4 digit hex value" is misleading because the result can be 2 digits for
\xescapes, 4 digits for\uXXXXescapes, or up to 6 digits for\u{...}escapes. Consider updating the comment to be more accurate, such as "result accumulates the hex digits".