Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/wise-dogs-tease.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
swc_ecma_ast: major
---

refactor(es/minifier): Improve tpl to str
2 changes: 1 addition & 1 deletion crates/swc/tests/fixture/issues-8xxx/8496/output/8.js
Original file line number Diff line number Diff line change
@@ -1 +1 @@
export const use=`\xffathjax{$login`;
export const use="\xffathjax{$login";
177 changes: 158 additions & 19 deletions crates/swc_ecma_ast/src/lit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,13 @@ use std::{

use is_macro::Is;
use num_bigint::BigInt as BigIntValue;
use swc_atoms::{Atom, Wtf8Atom};
use swc_common::{ast_node, util::take::Take, EqIgnoreSpan, Span, DUMMY_SP};
use swc_atoms::{
wtf8::{CodePoint, Wtf8Buf},
Atom, Wtf8Atom,
};
use swc_common::{ast_node, errors::HANDLER, util::take::Take, EqIgnoreSpan, Span, DUMMY_SP};

use crate::jsx::JSXText;
use crate::{jsx::JSXText, TplElement};

#[ast_node]
#[derive(Eq, Hash, EqIgnoreSpan, Is)]
Expand Down Expand Up @@ -253,53 +256,189 @@ impl<'a> arbitrary::Arbitrary<'a> for Str {
}
}

fn emit_span_error(span: Span, msg: &str) {
HANDLER.with(|handler| {
handler.struct_span_err(span, msg).emit();
});
}

impl Str {
#[inline]
pub fn is_empty(&self) -> bool {
self.value.is_empty()
}

pub fn from_tpl_raw(tpl_raw: &str) -> Atom {
let mut buf = String::with_capacity(tpl_raw.len());

pub fn from_tpl_raw(tpl: &TplElement) -> Wtf8Atom {
let tpl_raw = &tpl.raw;
let span = tpl.span;
let mut buf: Wtf8Buf = Wtf8Buf::with_capacity(tpl_raw.len());
let mut iter = tpl_raw.chars();

// prev_result can only be less than 0xdc00
// so init with 0xdc00 as no prev result
const NO_PREV_RESULT: u32 = 0xdc00;
let mut prev_result: u32 = NO_PREV_RESULT;
while let Some(c) = iter.next() {
match c {
'\\' => {
if let Some(next) = iter.next() {
match next {
if let Some(c) = iter.next() {
match c {
'`' | '$' | '\\' => {
buf.push(next);
buf.push_char(c);
}
'b' => {
buf.push('\u{0008}');
buf.push_char('\u{0008}');
}
'f' => {
buf.push('\u{000C}');
buf.push_char('\u{000C}');
}
'n' => {
buf.push('\n');
buf.push_char('\n');
}
'r' => {
buf.push('\r');
buf.push_char('\r');
}
't' => {
buf.push('\t');
buf.push_char('\t');
}
'v' => {
buf.push('\u{000B}');
buf.push_char('\u{000B}');
}
'\r' => {
let mut next_iter = iter.clone();
if let Some('\n') = next_iter.next() {
iter = next_iter;
}
}
'\n' | '\u{2028}' | '\u{2029}' => {}
'u' | 'x' => {
let mut count: u8 = 0;
// result is a 4 digit hex value
Copy link

Copilot AI Jan 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment "result is a 4 digit hex value" is misleading because the result can be 2 digits for \x escapes, 4 digits for \uXXXX escapes, or up to 6 digits for \u{...} escapes. Consider updating the comment to be more accurate, such as "result accumulates the hex digits".

Suggested change
// result is a 4 digit hex value
// result accumulates the hex digits of the escape sequence

Copilot uses AI. Check for mistakes.
let mut result: u32 = 0;
let mut max_len = if c == 'u' { 4 } else { 2 };
for c in &mut iter {
match c {
'{' if max_len == 4 && count == 0 => {
max_len = 6;
continue;
}
'}' if max_len == 6 => {
break;
}
'0'..='9' => {
result = (result << 4) | (c as u32 - '0' as u32);
count += 1;
}
'a'..='f' => {
result = (result << 4) | (c as u32 - 'a' as u32 + 10);
count += 1;
}
'A'..='F' => {
result = (result << 4) | (c as u32 - 'A' as u32 + 10);
count += 1;
}
_ => emit_span_error(
span,
"Uncaught SyntaxError: Invalid Unicode escape sequence",
),
}
if count >= max_len {
if result > 0x10ffff {
emit_span_error(
span,
"Uncaught SyntaxError: Undefined Unicode \
code-point",
)
} else {
break;
}
}
Comment on lines +350 to +354
Copy link

Copilot AI Jan 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When count >= max_len and result > 0x10ffff, the code emits an error but doesn't break out of the loop, allowing it to continue parsing more hex digits beyond the maximum length. This should break after emitting the error to prevent parsing extra digits. Change line 350 to include a break statement or restructure the logic to break after the if-else at line 351-353.

Suggested change
)
} else {
break;
}
}
);
}
break;
}

Copilot uses AI. Check for mistakes.
}
if max_len == 2 && max_len != count {
emit_span_error(
span,
"Uncaught SyntaxError: Invalid hexadecimal escape sequence",
);
}
if (0xd800..=0xdfff).contains(&result) {
// Handle UTF-16 surrogate pair
if result < 0xdc00 {
// High surrogate pair
if prev_result != NO_PREV_RESULT {
// If the previous result is a high surrogate
// We can be sure `prev_result` is less than 0xdc00
buf.push(unsafe {
CodePoint::from_u32_unchecked(prev_result)
});
Copy link

Copilot AI Jan 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After pushing the previous high surrogate at lines 369-371, prev_result should be reset to NO_PREV_RESULT to avoid pushing it again at line 396. Without this reset, if the current result is also a high surrogate but not followed by \u, the previous surrogate will be pushed twice. Add prev_result = NO_PREV_RESULT; after line 371.

Suggested change
});
});
prev_result = NO_PREV_RESULT;

Copilot uses AI. Check for mistakes.
}
let mut iter = iter.clone();
if let Some('\\') = iter.next() {
if let Some('u') = iter.next() {
// less than 0xdc00
prev_result = result;
continue;
}
}
} else if prev_result != NO_PREV_RESULT {
// Low surrogate pair
// Decode to supplementary plane code point
// (0x10000-0x10FFFF)
result = 0x10000
+ ((result & 0x3ff) | ((prev_result & 0x3ff) << 10));
// We can be sure result is a valid code point here
buf.push(unsafe { CodePoint::from_u32_unchecked(result) });
prev_result = NO_PREV_RESULT;
continue;
}
}
if prev_result != NO_PREV_RESULT {
// Could not find a valid low surrogate pair
// We can be sure `prev_result` is less than 0xdc00
buf.push(unsafe { CodePoint::from_u32_unchecked(prev_result) });
prev_result = NO_PREV_RESULT;
}
if result <= 0x10ffff {
// We can be sure result is a valid code point here
buf.push(unsafe { CodePoint::from_u32_unchecked(result) });
} else {
emit_span_error(
span,
"Uncaught SyntaxError: Undefined Unicode code-point",
);
}
}
'0'..='7' => {
let next = iter.clone().next();
if c == '0' {
match next {
Some(next) => {
if !next.is_digit(8) {
buf.push_char('\u{0000}');
continue;
}
}
// \0 is not an octal literal nor decimal literal.
_ => {
buf.push_char('\u{0000}');
continue;
}
}
}
emit_span_error(
span,
"Uncaught SyntaxError: Octal escape sequences are not allowed \
in template strings.",
);
}
_ => {
buf.push('\\');
buf.push(next);
// output raw value when this is not supported
buf.push_char(c);
}
}
}
}

c => {
buf.push(c);
buf.push_char(c);
}
}
}
Expand Down
133 changes: 133 additions & 0 deletions crates/swc_ecma_ast/tests/lit.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
use swc_atoms::{wtf8::Wtf8, Atom};
use swc_common::DUMMY_SP;
use swc_ecma_ast::{Str, TplElement};

/// Convert Wtf8 to a string representation, escaping invalid surrogates.
fn convert_wtf8_to_raw(s: &Wtf8) -> String {
let mut result = String::new();
let iter = s.code_points();

for code_point in iter {
if let Some(c) = code_point.to_char() {
result.push(c);
} else {
result.push_str(format!("\\u{:04X}", code_point.to_u32()).as_str());
}
}

result
}

/// Helper function to test `Str::from_tpl_raw`
fn test_from_tpl_raw(raw: &str, expected: &str) {
let tpl = TplElement {
span: DUMMY_SP,
tail: true,
raw: Atom::new(raw.to_string()),
cooked: None,
};

let result = Str::from_tpl_raw(&tpl);
let result_str = convert_wtf8_to_raw(&result);

assert_eq!(result_str, expected, "Input: {raw}");
}

#[test]
fn basic_escape_sequences() {
test_from_tpl_raw("hello world", "hello world");
}

#[test]
fn hex_escape_sequences() {
test_from_tpl_raw("\\xff", "\u{ff}");
}

#[test]
fn emoji_literals() {
test_from_tpl_raw("🦀", "🦀");
test_from_tpl_raw("🚀", "🚀");
}

#[test]
fn unicode_escape_sequences() {
test_from_tpl_raw("\\uD83E\\uDD80", "🦀");
test_from_tpl_raw("\\u{D83E}\\u{DD80}", "🦀");
test_from_tpl_raw("\\u{1F980}", "🦀");
test_from_tpl_raw("\\uD800\\uDC00", "𐀀");
}
#[test]
fn surrogate_pair_boundary_cases() {
// First supplementary plane character
test_from_tpl_raw("\\uD800\\uDC00", "\u{10000}");

// Last valid character
test_from_tpl_raw("\\uDBFF\\uDFFF", "\u{10FFFF}");

// Various combinations to ensure formula is correct
test_from_tpl_raw("\\uD801\\uDC37", "\u{10437}"); // 𐐷
test_from_tpl_raw("\\uD852\\uDF62", "\u{24B62}"); // 𤭢
}

#[test]
fn invalid_surrogate_pairs() {
test_from_tpl_raw("\\u{d800}", "\\uD800");
test_from_tpl_raw("\\u{dc00}", "\\uDC00");
test_from_tpl_raw("\\u{dFFF}", "\\uDFFF");
}

#[test]
fn surrogate_pair_combinations() {
test_from_tpl_raw("\\u{d800}\\uD83E\\uDD80", "\\uD800🦀");
test_from_tpl_raw("\\uD83E-", "\\uD83E-");
}

#[test]
fn single_high_surrogate() {
test_from_tpl_raw("\\uD83E", "\\uD83E");
}

#[test]
fn common_escape_sequences() {
test_from_tpl_raw("\\0", "\u{0000}");
test_from_tpl_raw("\\b", "\u{0008}");
test_from_tpl_raw("\\f", "\u{000C}");
test_from_tpl_raw("\\n", "\n");
test_from_tpl_raw("\\r", "\r");
test_from_tpl_raw("\\t", "\t");
test_from_tpl_raw("\\v", "\u{000B}");
}

#[test]
fn escaped_template_chars() {
test_from_tpl_raw("\\`", "`");
test_from_tpl_raw("\\$", "$");
test_from_tpl_raw("\\\\", "\\");
test_from_tpl_raw("Hello \\\nworld!", "Hello world!");
}

#[test]
fn combined_escapes() {
test_from_tpl_raw("hello\\nworld", "hello\nworld");
test_from_tpl_raw("\\t\\tindented", "\t\tindented");
}

// Tests for octal escape sequences that should be rejected.
// These will panic because octal escapes are not allowed in template strings.
#[test]
#[should_panic]
fn should_panic_octal_01() {
test_from_tpl_raw("\\01", "");
}

#[test]
#[should_panic]
fn should_panic_octal_2() {
test_from_tpl_raw("\\2", "");
}

#[test]
#[should_panic]
fn should_panic_octal_7() {
test_from_tpl_raw("\\7", "");
}
1 change: 0 additions & 1 deletion crates/swc_ecma_minifier/src/compress/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,6 @@ impl Compressor<'_> {
self.marks,
PureOptimizerConfig {
enable_join_vars: self.pass > 1,
force_str_for_tpl: self.mode.force_str_for_tpl(),
},
);
n.visit_mut_with(&mut visitor);
Expand Down
Loading
Loading