From 7110c7b94c744ac9558e6f91a4fa40d76edae1fa Mon Sep 17 00:00:00 2001 From: Boshen <1430279+Boshen@users.noreply.github.com> Date: Wed, 25 Dec 2024 09:19:55 +0000 Subject: [PATCH] refactor(codegen): add `print_quoted_utf16` and `print_unquoted_utf16` methods (#8107) --- crates/oxc_codegen/src/gen.rs | 125 +-------------------------------- crates/oxc_codegen/src/lib.rs | 126 ++++++++++++++++++++++++++++++---- 2 files changed, 115 insertions(+), 136 deletions(-) diff --git a/crates/oxc_codegen/src/gen.rs b/crates/oxc_codegen/src/gen.rs index 549624b3f1d98..937524494dd87 100644 --- a/crates/oxc_codegen/src/gen.rs +++ b/crates/oxc_codegen/src/gen.rs @@ -4,7 +4,6 @@ use cow_utils::CowUtils; use oxc_ast::ast::*; use oxc_span::GetSpan; use oxc_syntax::{ - identifier::{LS, PS}, operator::UnaryOperator, precedence::{GetPrecedence, Precedence}, }; @@ -1334,133 +1333,11 @@ impl Gen for RegExpLiteral<'_> { } } -fn print_unquoted_str(s: &str, quote: u8, p: &mut Codegen) { - let mut chars = s.chars().peekable(); - - while let Some(c) = chars.next() { - match c { - '\x00' => { - if chars.peek().is_some_and(|&next| next.is_ascii_digit()) { - p.print_str("\\x00"); - } else { - p.print_str("\\0"); - } - } - '\x07' => { - p.print_str("\\x07"); - } - // \b - '\u{8}' => { - p.print_str("\\b"); - } - // \v - '\u{b}' => { - p.print_str("\\v"); - } - // \f - '\u{c}' => { - p.print_str("\\f"); - } - '\n' => { - p.print_str("\\n"); - } - '\r' => { - p.print_str("\\r"); - } - '\x1B' => { - p.print_str("\\x1B"); - } - '\\' => { - p.print_str("\\\\"); - } - '\'' => { - if quote == b'\'' { - p.print_ascii_byte(b'\\'); - } - p.print_ascii_byte(b'\''); - } - '\"' => { - if quote == b'"' { - p.print_ascii_byte(b'\\'); - } - p.print_ascii_byte(b'"'); - } - '`' => { - if quote == b'`' { - p.print_ascii_byte(b'\\'); - } - p.print_ascii_byte(b'`'); - } - '$' => { - if chars.peek() == Some(&'{') { - p.print_ascii_byte(b'\\'); - } - p.print_ascii_byte(b'$'); - } - // Allow `U+2028` and `U+2029` in string literals - // - // - LS => p.print_str("\\u2028"), - PS => p.print_str("\\u2029"), - '\u{a0}' => { - p.print_str("\\xA0"); - } - _ => { - p.print_str(c.encode_utf8([0; 4].as_mut())); - } - } - } -} - impl Gen for StringLiteral<'_> { fn gen(&self, p: &mut Codegen, _ctx: Context) { p.add_source_mapping(self.span); let s = self.value.as_str(); - - let quote = if p.options.minify { - let mut single_cost: u32 = 0; - let mut double_cost: u32 = 0; - let mut backtick_cost: u32 = 0; - let mut bytes = s.as_bytes().iter().peekable(); - while let Some(b) = bytes.next() { - match b { - b'\n' if p.options.minify => { - backtick_cost = backtick_cost.saturating_sub(1); - } - b'\'' => { - single_cost += 1; - } - b'"' => { - double_cost += 1; - } - b'`' => { - backtick_cost += 1; - } - b'$' => { - if bytes.peek() == Some(&&b'{') { - backtick_cost += 1; - } - } - _ => {} - } - } - let mut quote = b'"'; - if double_cost > single_cost { - quote = b'\''; - if single_cost > backtick_cost { - quote = b'`'; - } - } else if double_cost > backtick_cost { - quote = b'`'; - } - quote - } else { - p.quote - }; - - p.print_ascii_byte(quote); - print_unquoted_str(s, quote, p); - p.print_ascii_byte(quote); + p.print_quoted_utf16(s); } } diff --git a/crates/oxc_codegen/src/lib.rs b/crates/oxc_codegen/src/lib.rs index c26c14f61b84e..daf8d22ca0129 100644 --- a/crates/oxc_codegen/src/lib.rs +++ b/crates/oxc_codegen/src/lib.rs @@ -21,7 +21,7 @@ use oxc_ast::ast::{ use oxc_mangler::Mangler; use oxc_span::{GetSpan, Span, SPAN}; use oxc_syntax::{ - identifier::{is_identifier_part, is_identifier_part_ascii}, + identifier::{is_identifier_part, is_identifier_part_ascii, LS, PS}, operator::{BinaryOperator, UnaryOperator, UpdateOperator}, precedence::Precedence, }; @@ -348,6 +348,17 @@ impl<'a> Codegen<'a> { } } + #[inline] + fn wrap(&mut self, wrap: bool, mut f: F) { + if wrap { + self.print_ascii_byte(b'('); + } + f(self); + if wrap { + self.print_ascii_byte(b')'); + } + } + #[inline] fn print_indent(&mut self) { if self.options.minify { @@ -576,6 +587,108 @@ impl<'a> Codegen<'a> { } } + fn print_quoted_utf16(&mut self, s: &str) { + let quote = if self.options.minify { + let mut single_cost: u32 = 0; + let mut double_cost: u32 = 0; + let mut backtick_cost: u32 = 0; + let mut bytes = s.as_bytes().iter().peekable(); + while let Some(b) = bytes.next() { + match b { + b'\n' if self.options.minify => { + backtick_cost = backtick_cost.saturating_sub(1); + } + b'\'' => { + single_cost += 1; + } + b'"' => { + double_cost += 1; + } + b'`' => { + backtick_cost += 1; + } + b'$' => { + if bytes.peek() == Some(&&b'{') { + backtick_cost += 1; + } + } + _ => {} + } + } + let mut quote = b'"'; + if double_cost > single_cost { + quote = b'\''; + if single_cost > backtick_cost { + quote = b'`'; + } + } else if double_cost > backtick_cost { + quote = b'`'; + } + quote + } else { + self.quote + }; + + self.print_ascii_byte(quote); + self.print_unquoted_utf16(s, quote); + self.print_ascii_byte(quote); + } + + fn print_unquoted_utf16(&mut self, s: &str, quote: u8) { + let mut chars = s.chars().peekable(); + + while let Some(c) = chars.next() { + match c { + '\x00' => { + if chars.peek().is_some_and(|&next| next.is_ascii_digit()) { + self.print_str("\\x00"); + } else { + self.print_str("\\0"); + } + } + '\x07' => self.print_str("\\x07"), + '\u{8}' => self.print_str("\\b"), // \b + '\u{b}' => self.print_str("\\v"), // \v + '\u{c}' => self.print_str("\\f"), // \f + '\n' => self.print_str("\\n"), + '\r' => self.print_str("\\r"), + '\x1B' => self.print_str("\\x1B"), + '\\' => self.print_str("\\\\"), + // Allow `U+2028` and `U+2029` in string literals + // + // + LS => self.print_str("\\u2028"), + PS => self.print_str("\\u2029"), + '\u{a0}' => self.print_str("\\xA0"), + '\'' => { + if quote == b'\'' { + self.print_ascii_byte(b'\\'); + } + self.print_ascii_byte(b'\''); + } + '\"' => { + if quote == b'"' { + self.print_ascii_byte(b'\\'); + } + self.print_ascii_byte(b'"'); + } + '`' => { + if quote == b'`' { + self.print_ascii_byte(b'\\'); + } + self.print_ascii_byte(b'`'); + } + '$' => { + if chars.peek() == Some(&'{') { + self.print_ascii_byte(b'\\'); + } + self.print_ascii_byte(b'$'); + } + _ => self.print_str(c.encode_utf8([0; 4].as_mut())), + } + } + } + // `get_minified_number` from terser // https://github.com/terser/terser/blob/c5315c3fd6321d6b2e076af35a70ef532f498505/lib/output.js#L2418 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss, clippy::cast_possible_wrap)] @@ -630,17 +743,6 @@ impl<'a> Codegen<'a> { candidates.into_iter().min_by_key(String::len).unwrap() } - #[inline] - fn wrap(&mut self, wrap: bool, mut f: F) { - if wrap { - self.print_ascii_byte(b'('); - } - f(self); - if wrap { - self.print_ascii_byte(b')'); - } - } - fn add_source_mapping(&mut self, span: Span) { if span == SPAN { return;