refactor(formatter): preserve useless escaped quotes and single quote…

…s in CSS charset (#5237)
biomejs · Mar 2, 2025 · aeb17b7 · aeb17b7
1 parent 3ab73ff
commit aeb17b7
Show file tree

Hide file tree

Showing 20 changed files with 141 additions and 243 deletions.
diff --git a/crates/biome_css_formatter/src/utils/string_utils.rs b/crates/biome_css_formatter/src/utils/string_utils.rs
@@ -125,6 +125,8 @@ impl Format<CssFormatContext> for FormatLiteralStringToken<'_> {
 /// Data structure of convenience to store some information about the
 /// string that has been processed
 struct StringInformation {
+    /// Currently used quote or `None` if it is not a string
+    current_quote: Option<QuoteStyle>,
     /// This is the quote that the is calculated and eventually used inside the string.
     /// It could be different from the one inside the formatter options
     preferred_quote: QuoteStyle,
@@ -162,6 +164,7 @@ impl FormatLiteralStringToken<'_> {
         // preferred quote style without having to check the content.
         if !matches!(self.token().kind(), CSS_STRING_LITERAL) {
             return StringInformation {
+                current_quote: None,
                 preferred_quote: chosen_quote,
             };
         }
@@ -185,7 +188,10 @@ impl FormatLiteralStringToken<'_> {
             },
         );
 
+        let current_quote = literal.bytes().next().and_then(QuoteStyle::from_byte);
+
         StringInformation {
+            current_quote,
             preferred_quote: if chosen_quote_count > alternate_quote_count {
                 alternate_quote
             } else {
@@ -218,10 +224,9 @@ impl<'token> LiteralStringNormaliser<'token> {
     fn normalise_text(&mut self) -> Cow<'token, str> {
         match self.token.parent_kind {
             StringLiteralParentKind::CharsetAtRule => {
-                let string_information = StringInformation {
-                    preferred_quote: QuoteStyle::Double,
-                };
-                self.normalise_tokens(string_information)
+                // `@charset` should use double quotes.
+                // However, Prettier preserve single quotes.
+                Cow::Borrowed(self.get_token().text_trimmed())
             }
             StringLiteralParentKind::Others => {
                 let string_information = self
@@ -246,7 +251,11 @@ impl<'token> LiteralStringNormaliser<'token> {
 
     fn normalise_tokens(&self, string_information: StringInformation) -> Cow<'token, str> {
         let preferred_quote = string_information.preferred_quote;
-        let polished_raw_content = self.normalize_string(&string_information);
+        let polished_raw_content = normalize_string(
+            self.raw_content(),
+            string_information.preferred_quote.into(),
+            string_information.current_quote != Some(string_information.preferred_quote),
+        );
 
         match polished_raw_content {
             Cow::Borrowed(raw_content) => self.swap_quotes(raw_content, &string_information),
@@ -260,12 +269,6 @@ impl<'token> LiteralStringNormaliser<'token> {
         }
     }
 
-    fn normalize_string(&self, string_information: &StringInformation) -> Cow<'token, str> {
-        let raw_content = self.raw_content();
-
-        normalize_string(raw_content, string_information.preferred_quote.into(), true)
-    }
-
     fn raw_content(&self) -> &'token str {
         let token = self.get_token();
         match token.kind() {

diff --git a/crates/biome_css_formatter/tests/specs/prettier/css/atrule/charset.css.snap b/crates/biome_css_formatter/tests/specs/prettier/css/atrule/charset.css.snap
diff --git a/crates/biome_css_formatter/tests/specs/prettier/css/quotes/quotes.css.prettier-snap b/crates/biome_css_formatter/tests/specs/prettier/css/quotes/quotes.css.prettier-snap
@@ -34,7 +34,7 @@
     content: "\"'";
 
     /* One of each with unnecessary escapes. */
-    content: "\"'";
+    content: "\"\'";
     content: "\"'";
 
     /* More double quotes than single quotes. */
@@ -54,8 +54,8 @@
     content: "\\";
 
     /* Backslases. */
-    content: "\"\\\"\\\\\" ''\\'\\'\\\\'";
-    content: '\'\\\'\\\\\' ""\\"\\"\\\\"';
+    content: "\"\\\"\\\\\" '\'\\'\\\'\\\\'";
+    content: '\'\\\'\\\\\' "\"\\"\\\"\\\\"';
 
     /* Somewhat more real-word example. */
     content: "He's sayin': \"How's it goin'?\" Don't ask me why.";

diff --git a/crates/biome_css_formatter/tests/specs/prettier/css/quotes/quotes.css.snap b/crates/biome_css_formatter/tests/specs/prettier/css/quotes/quotes.css.snap
@@ -105,15 +105,6 @@ one 'two' three {}
 ```diff
 --- Prettier
 +++ Biome
-@@ -1,7 +1,7 @@
- @charset "UTF-8";
- /* @charset must always have double quotes: https://www.w3.org/TR/css-syntax-3/#determine-the-fallback-encoding */
- /* Also, it has to be the very first thing in the file, but here are some more tests anyway: */
--@charset 'UTF-8'; /* Single quotes are invalid here. Keep them since we don't know what the user is doing. */
-+@charset "UTF-8"; /* Single quotes are invalid here. Keep them since we don't know what the user is doing. */
-
- @supports (content: one "two" three "four") {
-   a[href="foo" y],
 @@ -87,7 +87,5 @@
  @foo ('one');
  @foo ("one" two 'three');
@@ -132,7 +123,7 @@ one 'two' three {}
 @charset "UTF-8";
 /* @charset must always have double quotes: https://www.w3.org/TR/css-syntax-3/#determine-the-fallback-encoding */
 /* Also, it has to be the very first thing in the file, but here are some more tests anyway: */
-@charset "UTF-8"; /* Single quotes are invalid here. Keep them since we don't know what the user is doing. */
+@charset 'UTF-8'; /* Single quotes are invalid here. Keep them since we don't know what the user is doing. */
 
 @supports (content: one "two" three "four") {
   a[href="foo" y],
@@ -165,7 +156,7 @@ one 'two' three {}
     content: "\"'";
 
     /* One of each with unnecessary escapes. */
-    content: "\"'";
+    content: "\"\'";
     content: "\"'";
 
     /* More double quotes than single quotes. */
@@ -185,8 +176,8 @@ one 'two' three {}
     content: "\\";
 
     /* Backslases. */
-    content: "\"\\\"\\\\\" ''\\'\\'\\\\'";
-    content: '\'\\\'\\\\\' ""\\"\\"\\\\"';
+    content: "\"\\\"\\\\\" '\'\\'\\\'\\\\'";
+    content: '\'\\\'\\\\\' "\"\\"\\\"\\\\"';
 
     /* Somewhat more real-word example. */
     content: "He's sayin': \"How's it goin'?\" Don't ask me why.";
@@ -309,5 +300,5 @@ quotes.css:91:11 parse ━━━━━━━━━━━━━━━━━━━
 ```
     2: /* @charset must always have double quotes: https://www.w3.org/TR/css-syntax-3/#determine-the-fallback-encoding */
     3: /* Also, it has to be the very first thing in the file, but here are some more tests anyway: */
-    4: @charset "UTF-8"; /* Single quotes are invalid here. Keep them since we don't know what the user is doing. */
+    4: @charset 'UTF-8'; /* Single quotes are invalid here. Keep them since we don't know what the user is doing. */
 ```
diff --git a/crates/biome_formatter/src/lib.rs b/crates/biome_formatter/src/lib.rs
@@ -495,6 +495,14 @@ pub enum QuoteStyle {
 }
 
 impl QuoteStyle {
+    pub fn from_byte(byte: u8) -> Option<QuoteStyle> {
+        match byte {
+            b'"' => Some(QuoteStyle::Double),
+            b'\'' => Some(QuoteStyle::Single),
+            _ => None,
+        }
+    }
+
     pub fn as_char(&self) -> char {
         match self {
             QuoteStyle::Double => '"',

diff --git a/crates/biome_formatter/src/token/string.rs b/crates/biome_formatter/src/token/string.rs
@@ -29,26 +29,25 @@ impl Quote {
 
 /// This function is responsible of:
 ///
-/// - escape `preferred_quote`
-/// - unescape alternate quotes of `preferred_quote`
-/// - remove unneed escapes (if `is_escape_preserved` is `true`)
+/// - escaping `preferred_quote`
+/// - unescape alternate quotes of `preferred_quote` if `quotes_will_change`
 /// - normalise the new lines by replacing `\r\n` with `\n`.
 ///
 /// The function allocates a new string only if at least one change is performed.
 ///
-/// In the followinf example `"` is escaped, `\'` and `\l` are unescaped, and the newline is normalized.
+/// In the following example `"` is escaped and the newline is normalized.
 ///
 /// ```
 /// use biome_formatter::token::string::{normalize_string, Quote};
 /// assert_eq!(
-///     normalize_string(" \"He\\llo\\tworld\" \\' \\' \r\n ", Quote::Double, false),
-///     " \\\"Hello\\tworld\\\" ' ' \n ",
+///     normalize_string(" \"He\\llo\\tworld\" \\' \\' \r\n ", Quote::Double, true),
+///     " \\\"He\\llo\\tworld\\\" ' ' \n ",
 /// );
 /// ```
 pub fn normalize_string(
     raw_content: &str,
     preferred_quote: Quote,
-    is_escape_preserved: bool,
+    quotes_will_change: bool,
 ) -> Cow<str> {
     let alternate_quote = preferred_quote.other().as_byte();
     let preferred_quote = preferred_quote.as_byte();
@@ -60,50 +59,14 @@ pub fn normalize_string(
             // If the next character is escaped
             b'\\' => {
                 if let Some((escaped_index, escaped)) = bytes.next() {
-                    // If we encounter an alternate quote that is escaped, we have to
-                    // remove the escape from it.
-                    // This is done because of how the enclosed strings can change.
-                    // Check `computed_preferred_quote` for more details.
-                    let should_unescape = match escaped {
-                        // The next character is another backslash, or
-                        // a character that should be kept in the next iteration
-                        b'^'
-                        | b'\n'
-                        | b'0'..=b'7'
-                        | b'\\'
-                        | b'b'
-                        | b'f'
-                        | b'n'
-                        | b'r'
-                        | b't'
-                        | b'u'
-                        | b'v'
-                        | b'x' => false,
-                        b'\r' => {
-                            // If we encounter the sequence "\r\n", then skip '\r'
-                            if let Some((next_byte_index, b'\n')) = bytes.next() {
-                                reduced_string.push_str(&raw_content[copy_start..escaped_index]);
-                                copy_start = next_byte_index;
-                            }
-                            false
+                    if escaped == b'\r' {
+                        // If we encounter the sequence "\r\n", then skip '\r'
+                        if let Some((next_byte_index, b'\n')) = bytes.next() {
+                            reduced_string.push_str(&raw_content[copy_start..escaped_index]);
+                            copy_start = next_byte_index;
                         }
-                        0xE2 => {
-                            // Preserve escaping of Unicode characters U+2028 and U+2029
-                            !(matches!(bytes.next(), Some((_, 0x80)))
-                                && matches!(bytes.next(), Some((_, 0xA8 | 0xA9))))
-                        }
-                        _ => {
-                            // these, usually characters that can have their
-                            // escape removed: "\a" => "a"
-                            // So we ignore the current slash and we continue
-                            // to the next iteration
-                            //
-                            // We always unescape alternate quotes regardless of `is_escape_preserved`.
-                            escaped == alternate_quote
-                                || (escaped != preferred_quote && !is_escape_preserved)
-                        }
-                    };
-                    if should_unescape {
+                    } else if quotes_will_change && escaped == alternate_quote {
+                        // Unescape alternate quotes if quotes are changing
                         reduced_string.push_str(&raw_content[copy_start..byte_index]);
                         copy_start = escaped_index;
                     }
@@ -145,46 +108,43 @@ mod tests {
     #[test]
     fn normalize_newline() {
         assert_eq!(normalize_string("a\nb", Quote::Double, true), "a\nb");
-        assert_eq!(normalize_string("a\r\nb", Quote::Double, false), "a\nb");
-        assert_eq!(normalize_string("a\\\r\nb", Quote::Double, false), "a\\\nb");
+        assert_eq!(normalize_string("a\r\nb", Quote::Double, true), "a\nb");
+        assert_eq!(normalize_string("a\\\r\nb", Quote::Double, true), "a\\\nb");
     }
 
     #[test]
     fn normalize_escapes() {
-        assert_eq!(normalize_string("\\", Quote::Double, false), "\\");
-        assert_eq!(normalize_string("\\t", Quote::Double, false), "\\t");
+        assert_eq!(normalize_string("\\", Quote::Double, true), "\\");
+        assert_eq!(normalize_string("\\t", Quote::Double, true), "\\t");
         assert_eq!(
-            normalize_string("\\\u{2028}", Quote::Double, false),
+            normalize_string("\\\u{2028}", Quote::Double, true),
             "\\\u{2028}"
         );
         assert_eq!(
-            normalize_string("\\\u{2029}", Quote::Double, false),
+            normalize_string("\\\u{2029}", Quote::Double, true),
             "\\\u{2029}"
         );
 
-        assert_eq!(normalize_string("a\\a", Quote::Double, false), "aa");
-        assert_eq!(normalize_string("👍\\👍", Quote::Single, false), "👍👍");
+        assert_eq!(normalize_string(r"a\a", Quote::Double, true), r"a\a");
+        assert_eq!(normalize_string(r"👍\👍", Quote::Single, true), r"👍\👍");
         assert_eq!(
-            normalize_string("\\\u{2027}", Quote::Double, false),
-            "\u{2027}"
+            normalize_string("\\\u{2027}", Quote::Double, true),
+            "\\\u{2027}"
         );
         assert_eq!(
-            normalize_string("\\\u{2030}", Quote::Double, false),
-            "\u{2030}"
+            normalize_string("\\\u{2030}", Quote::Double, true),
+            "\\\u{2030}"
         );
-
-        assert_eq!(normalize_string("a\\a", Quote::Double, true), "a\\a");
-        assert_eq!(normalize_string("👍\\👍", Quote::Single, true), "👍\\👍");
     }
 
     #[test]
     fn normalize_quotes() {
-        assert_eq!(normalize_string("\"", Quote::Double, false), "\\\"");
-        assert_eq!(normalize_string("\'", Quote::Double, false), "'");
-        assert_eq!(normalize_string("\\'", Quote::Double, false), "'");
+        assert_eq!(normalize_string("\"", Quote::Double, true), "\\\"");
+        assert_eq!(normalize_string(r"\'", Quote::Double, true), r"'");
 
+        assert_eq!(normalize_string(r"\'", Quote::Double, false), r"\'");
         assert_eq!(normalize_string("\"", Quote::Single, false), "\"");
         assert_eq!(normalize_string("\\'", Quote::Single, false), "\\'");
-        assert_eq!(normalize_string("\\\"", Quote::Single, false), "\"");
+        assert_eq!(normalize_string("\\\"", Quote::Single, false), "\\\"");
     }
 }