Skip to content

Commit

Permalink
refactor(formatter): preserve useless escaped quotes and single quote…
Browse files Browse the repository at this point in the history
…s in CSS charset (#5237)
  • Loading branch information
Conaclos authored Mar 2, 2025
1 parent 3ab73ff commit aeb17b7
Show file tree
Hide file tree
Showing 20 changed files with 141 additions and 243 deletions.
25 changes: 14 additions & 11 deletions crates/biome_css_formatter/src/utils/string_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@ impl Format<CssFormatContext> for FormatLiteralStringToken<'_> {
/// Data structure of convenience to store some information about the
/// string that has been processed
struct StringInformation {
/// Currently used quote or `None` if it is not a string
current_quote: Option<QuoteStyle>,
/// This is the quote that the is calculated and eventually used inside the string.
/// It could be different from the one inside the formatter options
preferred_quote: QuoteStyle,
Expand Down Expand Up @@ -162,6 +164,7 @@ impl FormatLiteralStringToken<'_> {
// preferred quote style without having to check the content.
if !matches!(self.token().kind(), CSS_STRING_LITERAL) {
return StringInformation {
current_quote: None,
preferred_quote: chosen_quote,
};
}
Expand All @@ -185,7 +188,10 @@ impl FormatLiteralStringToken<'_> {
},
);

let current_quote = literal.bytes().next().and_then(QuoteStyle::from_byte);

StringInformation {
current_quote,
preferred_quote: if chosen_quote_count > alternate_quote_count {
alternate_quote
} else {
Expand Down Expand Up @@ -218,10 +224,9 @@ impl<'token> LiteralStringNormaliser<'token> {
fn normalise_text(&mut self) -> Cow<'token, str> {
match self.token.parent_kind {
StringLiteralParentKind::CharsetAtRule => {
let string_information = StringInformation {
preferred_quote: QuoteStyle::Double,
};
self.normalise_tokens(string_information)
// `@charset` should use double quotes.
// However, Prettier preserve single quotes.
Cow::Borrowed(self.get_token().text_trimmed())
}
StringLiteralParentKind::Others => {
let string_information = self
Expand All @@ -246,7 +251,11 @@ impl<'token> LiteralStringNormaliser<'token> {

fn normalise_tokens(&self, string_information: StringInformation) -> Cow<'token, str> {
let preferred_quote = string_information.preferred_quote;
let polished_raw_content = self.normalize_string(&string_information);
let polished_raw_content = normalize_string(
self.raw_content(),
string_information.preferred_quote.into(),
string_information.current_quote != Some(string_information.preferred_quote),
);

match polished_raw_content {
Cow::Borrowed(raw_content) => self.swap_quotes(raw_content, &string_information),
Expand All @@ -260,12 +269,6 @@ impl<'token> LiteralStringNormaliser<'token> {
}
}

fn normalize_string(&self, string_information: &StringInformation) -> Cow<'token, str> {
let raw_content = self.raw_content();

normalize_string(raw_content, string_information.preferred_quote.into(), true)
}

fn raw_content(&self) -> &'token str {
let token = self.get_token();
match token.kind() {
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
content: "\"'";

/* One of each with unnecessary escapes. */
content: "\"'";
content: "\"\'";
content: "\"'";

/* More double quotes than single quotes. */
Expand All @@ -54,8 +54,8 @@
content: "\\";

/* Backslases. */
content: "\"\\\"\\\\\" ''\\'\\'\\\\'";
content: '\'\\\'\\\\\' ""\\"\\"\\\\"';
content: "\"\\\"\\\\\" '\'\\'\\\'\\\\'";
content: '\'\\\'\\\\\' "\"\\"\\\"\\\\"';

/* Somewhat more real-word example. */
content: "He's sayin': \"How's it goin'?\" Don't ask me why.";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,15 +105,6 @@ one 'two' three {}
```diff
--- Prettier
+++ Biome
@@ -1,7 +1,7 @@
@charset "UTF-8";
/* @charset must always have double quotes: https://www.w3.org/TR/css-syntax-3/#determine-the-fallback-encoding */
/* Also, it has to be the very first thing in the file, but here are some more tests anyway: */
-@charset 'UTF-8'; /* Single quotes are invalid here. Keep them since we don't know what the user is doing. */
+@charset "UTF-8"; /* Single quotes are invalid here. Keep them since we don't know what the user is doing. */

@supports (content: one "two" three "four") {
a[href="foo" y],
@@ -87,7 +87,5 @@
@foo ('one');
@foo ("one" two 'three');
Expand All @@ -132,7 +123,7 @@ one 'two' three {}
@charset "UTF-8";
/* @charset must always have double quotes: https://www.w3.org/TR/css-syntax-3/#determine-the-fallback-encoding */
/* Also, it has to be the very first thing in the file, but here are some more tests anyway: */
@charset "UTF-8"; /* Single quotes are invalid here. Keep them since we don't know what the user is doing. */
@charset 'UTF-8'; /* Single quotes are invalid here. Keep them since we don't know what the user is doing. */

@supports (content: one "two" three "four") {
a[href="foo" y],
Expand Down Expand Up @@ -165,7 +156,7 @@ one 'two' three {}
content: "\"'";

/* One of each with unnecessary escapes. */
content: "\"'";
content: "\"\'";
content: "\"'";

/* More double quotes than single quotes. */
Expand All @@ -185,8 +176,8 @@ one 'two' three {}
content: "\\";

/* Backslases. */
content: "\"\\\"\\\\\" ''\\'\\'\\\\'";
content: '\'\\\'\\\\\' ""\\"\\"\\\\"';
content: "\"\\\"\\\\\" '\'\\'\\\'\\\\'";
content: '\'\\\'\\\\\' "\"\\"\\\"\\\\"';

/* Somewhat more real-word example. */
content: "He's sayin': \"How's it goin'?\" Don't ask me why.";
Expand Down Expand Up @@ -309,5 +300,5 @@ quotes.css:91:11 parse ━━━━━━━━━━━━━━━━━━━
```
2: /* @charset must always have double quotes: https://www.w3.org/TR/css-syntax-3/#determine-the-fallback-encoding */
3: /* Also, it has to be the very first thing in the file, but here are some more tests anyway: */
4: @charset "UTF-8"; /* Single quotes are invalid here. Keep them since we don't know what the user is doing. */
4: @charset 'UTF-8'; /* Single quotes are invalid here. Keep them since we don't know what the user is doing. */
```
8 changes: 8 additions & 0 deletions crates/biome_formatter/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,14 @@ pub enum QuoteStyle {
}

impl QuoteStyle {
pub fn from_byte(byte: u8) -> Option<QuoteStyle> {
match byte {
b'"' => Some(QuoteStyle::Double),
b'\'' => Some(QuoteStyle::Single),
_ => None,
}
}

pub fn as_char(&self) -> char {
match self {
QuoteStyle::Double => '"',
Expand Down
98 changes: 29 additions & 69 deletions crates/biome_formatter/src/token/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,26 +29,25 @@ impl Quote {

/// This function is responsible of:
///
/// - escape `preferred_quote`
/// - unescape alternate quotes of `preferred_quote`
/// - remove unneed escapes (if `is_escape_preserved` is `true`)
/// - escaping `preferred_quote`
/// - unescape alternate quotes of `preferred_quote` if `quotes_will_change`
/// - normalise the new lines by replacing `\r\n` with `\n`.
///
/// The function allocates a new string only if at least one change is performed.
///
/// In the followinf example `"` is escaped, `\'` and `\l` are unescaped, and the newline is normalized.
/// In the following example `"` is escaped and the newline is normalized.
///
/// ```
/// use biome_formatter::token::string::{normalize_string, Quote};
/// assert_eq!(
/// normalize_string(" \"He\\llo\\tworld\" \\' \\' \r\n ", Quote::Double, false),
/// " \\\"Hello\\tworld\\\" ' ' \n ",
/// normalize_string(" \"He\\llo\\tworld\" \\' \\' \r\n ", Quote::Double, true),
/// " \\\"He\\llo\\tworld\\\" ' ' \n ",
/// );
/// ```
pub fn normalize_string(
raw_content: &str,
preferred_quote: Quote,
is_escape_preserved: bool,
quotes_will_change: bool,
) -> Cow<str> {
let alternate_quote = preferred_quote.other().as_byte();
let preferred_quote = preferred_quote.as_byte();
Expand All @@ -60,50 +59,14 @@ pub fn normalize_string(
// If the next character is escaped
b'\\' => {
if let Some((escaped_index, escaped)) = bytes.next() {
// If we encounter an alternate quote that is escaped, we have to
// remove the escape from it.
// This is done because of how the enclosed strings can change.
// Check `computed_preferred_quote` for more details.
let should_unescape = match escaped {
// The next character is another backslash, or
// a character that should be kept in the next iteration
b'^'
| b'\n'
| b'0'..=b'7'
| b'\\'
| b'b'
| b'f'
| b'n'
| b'r'
| b't'
| b'u'
| b'v'
| b'x' => false,
b'\r' => {
// If we encounter the sequence "\r\n", then skip '\r'
if let Some((next_byte_index, b'\n')) = bytes.next() {
reduced_string.push_str(&raw_content[copy_start..escaped_index]);
copy_start = next_byte_index;
}
false
if escaped == b'\r' {
// If we encounter the sequence "\r\n", then skip '\r'
if let Some((next_byte_index, b'\n')) = bytes.next() {
reduced_string.push_str(&raw_content[copy_start..escaped_index]);
copy_start = next_byte_index;
}
0xE2 => {
// Preserve escaping of Unicode characters U+2028 and U+2029
!(matches!(bytes.next(), Some((_, 0x80)))
&& matches!(bytes.next(), Some((_, 0xA8 | 0xA9))))
}
_ => {
// these, usually characters that can have their
// escape removed: "\a" => "a"
// So we ignore the current slash and we continue
// to the next iteration
//
// We always unescape alternate quotes regardless of `is_escape_preserved`.
escaped == alternate_quote
|| (escaped != preferred_quote && !is_escape_preserved)
}
};
if should_unescape {
} else if quotes_will_change && escaped == alternate_quote {
// Unescape alternate quotes if quotes are changing
reduced_string.push_str(&raw_content[copy_start..byte_index]);
copy_start = escaped_index;
}
Expand Down Expand Up @@ -145,46 +108,43 @@ mod tests {
#[test]
fn normalize_newline() {
assert_eq!(normalize_string("a\nb", Quote::Double, true), "a\nb");
assert_eq!(normalize_string("a\r\nb", Quote::Double, false), "a\nb");
assert_eq!(normalize_string("a\\\r\nb", Quote::Double, false), "a\\\nb");
assert_eq!(normalize_string("a\r\nb", Quote::Double, true), "a\nb");
assert_eq!(normalize_string("a\\\r\nb", Quote::Double, true), "a\\\nb");
}

#[test]
fn normalize_escapes() {
assert_eq!(normalize_string("\\", Quote::Double, false), "\\");
assert_eq!(normalize_string("\\t", Quote::Double, false), "\\t");
assert_eq!(normalize_string("\\", Quote::Double, true), "\\");
assert_eq!(normalize_string("\\t", Quote::Double, true), "\\t");
assert_eq!(
normalize_string("\\\u{2028}", Quote::Double, false),
normalize_string("\\\u{2028}", Quote::Double, true),
"\\\u{2028}"
);
assert_eq!(
normalize_string("\\\u{2029}", Quote::Double, false),
normalize_string("\\\u{2029}", Quote::Double, true),
"\\\u{2029}"
);

assert_eq!(normalize_string("a\\a", Quote::Double, false), "aa");
assert_eq!(normalize_string("👍\\👍", Quote::Single, false), "👍👍");
assert_eq!(normalize_string(r"a\a", Quote::Double, true), r"a\a");
assert_eq!(normalize_string(r"👍\👍", Quote::Single, true), r"👍\👍");
assert_eq!(
normalize_string("\\\u{2027}", Quote::Double, false),
"\u{2027}"
normalize_string("\\\u{2027}", Quote::Double, true),
"\\\u{2027}"
);
assert_eq!(
normalize_string("\\\u{2030}", Quote::Double, false),
"\u{2030}"
normalize_string("\\\u{2030}", Quote::Double, true),
"\\\u{2030}"
);

assert_eq!(normalize_string("a\\a", Quote::Double, true), "a\\a");
assert_eq!(normalize_string("👍\\👍", Quote::Single, true), "👍\\👍");
}

#[test]
fn normalize_quotes() {
assert_eq!(normalize_string("\"", Quote::Double, false), "\\\"");
assert_eq!(normalize_string("\'", Quote::Double, false), "'");
assert_eq!(normalize_string("\\'", Quote::Double, false), "'");
assert_eq!(normalize_string("\"", Quote::Double, true), "\\\"");
assert_eq!(normalize_string(r"\'", Quote::Double, true), r"'");

assert_eq!(normalize_string(r"\'", Quote::Double, false), r"\'");
assert_eq!(normalize_string("\"", Quote::Single, false), "\"");
assert_eq!(normalize_string("\\'", Quote::Single, false), "\\'");
assert_eq!(normalize_string("\\\"", Quote::Single, false), "\"");
assert_eq!(normalize_string("\\\"", Quote::Single, false), "\\\"");
}
}
Loading

0 comments on commit aeb17b7

Please sign in to comment.