Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Memoize text width #6552

Merged
merged 2 commits into from
Sep 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion crates/ruff_formatter/src/arguments.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ mod tests {

#[test]
fn test_nesting() {
let mut context = FormatState::new(());
let mut context = FormatState::new(SimpleFormatContext::default());
let mut buffer = VecBuffer::new(&mut context);

write!(
Expand Down
56 changes: 13 additions & 43 deletions crates/ruff_formatter/src/builders.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ use Tag::*;
use crate::format_element::tag::{Condition, Tag};
use crate::prelude::tag::{DedentMode, GroupMode, LabelId};
use crate::prelude::*;
use crate::{format_element, write, Argument, Arguments, FormatContext, GroupId, TextSize};
use crate::{
format_element, write, Argument, Arguments, FormatContext, FormatOptions, GroupId, TextSize,
};
use crate::{Buffer, VecBuffer};

/// A line break that only gets printed if the enclosing `Group` doesn't fit on a single line.
Expand Down Expand Up @@ -348,14 +350,18 @@ pub struct Text<'a> {
position: Option<TextSize>,
}

impl<Context> Format<Context> for Text<'_> {
impl<Context> Format<Context> for Text<'_>
where
Context: FormatContext,
{
fn fmt(&self, f: &mut Formatter<Context>) -> FormatResult<()> {
if let Some(source_position) = self.position {
f.write_element(FormatElement::SourcePosition(source_position));
}

f.write_element(FormatElement::Text {
text: self.text.to_string().into_boxed_str(),
text_width: TextWidth::from_text(self.text, f.options().tab_width()),
});

Ok(())
Expand All @@ -369,31 +375,13 @@ impl std::fmt::Debug for Text<'_> {
}

/// Emits a text as it is written in the source document. Optimized to avoid allocations.
pub const fn source_text_slice(
range: TextRange,
newlines: ContainsNewlines,
) -> SourceTextSliceBuilder {
SourceTextSliceBuilder {
range,
new_lines: newlines,
}
}

#[derive(Copy, Clone, Eq, PartialEq, Debug)]
pub enum ContainsNewlines {
/// The string contains newline characters
Yes,
/// The string contains no newline characters
No,

/// The string may contain newline characters, search the string to determine if there are any newlines.
Detect,
pub const fn source_text_slice(range: TextRange) -> SourceTextSliceBuilder {
SourceTextSliceBuilder { range }
}

#[derive(Eq, PartialEq, Debug)]
pub struct SourceTextSliceBuilder {
range: TextRange,
new_lines: ContainsNewlines,
}

impl<Context> Format<Context> for SourceTextSliceBuilder
Expand All @@ -405,28 +393,10 @@ where
let slice = source_code.slice(self.range);
debug_assert_no_newlines(slice.text(source_code));

let contains_newlines = match self.new_lines {
ContainsNewlines::Yes => {
debug_assert!(
slice.text(source_code).contains('\n'),
"Text contains no new line characters but the caller specified that it does."
);
true
}
ContainsNewlines::No => {
debug_assert!(
!slice.text(source_code).contains('\n'),
"Text contains new line characters but the caller specified that it does not."
);
false
}
ContainsNewlines::Detect => slice.text(source_code).contains('\n'),
};
let text_width =
TextWidth::from_text(slice.text(source_code), f.context().options().tab_width());

f.write_element(FormatElement::SourceCodeSlice {
slice,
contains_newlines,
});
f.write_element(FormatElement::SourceCodeSlice { slice, text_width });

Ok(())
}
Expand Down
95 changes: 77 additions & 18 deletions crates/ruff_formatter/src/format_element.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@ pub mod tag;

use std::borrow::Cow;
use std::hash::{Hash, Hasher};
use std::num::NonZeroU32;
use std::ops::Deref;
use std::rc::Rc;
use unicode_width::UnicodeWidthChar;

use crate::format_element::tag::{GroupMode, LabelId, Tag};
use crate::source_code::SourceCodeSlice;
use crate::TagKind;
use crate::{TabWidth, TagKind};
use ruff_text_size::TextSize;

/// Language agnostic IR for formatting source code.
Expand Down Expand Up @@ -37,13 +39,13 @@ pub enum FormatElement {
Text {
/// There's no need for the text to be mutable, using `Box<str>` safes 8 bytes over `String`.
text: Box<str>,
text_width: TextWidth,
},

/// Text that gets emitted as it is in the source code. Optimized to avoid any allocations.
SourceCodeSlice {
slice: SourceCodeSlice,
/// Whether the string contains any new line characters
contains_newlines: bool,
text_width: TextWidth,
},

/// Prevents that line suffixes move past this boundary. Forces the printer to print any pending
Expand Down Expand Up @@ -73,13 +75,10 @@ impl std::fmt::Debug for FormatElement {
FormatElement::ExpandParent => write!(fmt, "ExpandParent"),
FormatElement::Token { text } => fmt.debug_tuple("Token").field(text).finish(),
FormatElement::Text { text, .. } => fmt.debug_tuple("DynamicText").field(text).finish(),
FormatElement::SourceCodeSlice {
slice,
contains_newlines,
} => fmt
FormatElement::SourceCodeSlice { slice, text_width } => fmt
.debug_tuple("Text")
.field(slice)
.field(contains_newlines)
.field(text_width)
.finish(),
FormatElement::LineSuffixBoundary => write!(fmt, "LineSuffixBoundary"),
FormatElement::BestFitting { variants, mode } => fmt
Expand Down Expand Up @@ -255,11 +254,8 @@ impl FormatElements for FormatElement {
FormatElement::ExpandParent => true,
FormatElement::Tag(Tag::StartGroup(group)) => !group.mode().is_flat(),
FormatElement::Line(line_mode) => matches!(line_mode, LineMode::Hard | LineMode::Empty),

FormatElement::Text { text, .. } => text.contains('\n'),
FormatElement::SourceCodeSlice {
contains_newlines, ..
} => *contains_newlines,
FormatElement::Text { text_width, .. } => text_width.is_multiline(),
FormatElement::SourceCodeSlice { text_width, .. } => text_width.is_multiline(),
FormatElement::Interned(interned) => interned.will_break(),
// Traverse into the most flat version because the content is guaranteed to expand when even
// the most flat version contains some content that forces a break.
Expand Down Expand Up @@ -403,6 +399,67 @@ pub trait FormatElements {
fn end_tag(&self, kind: TagKind) -> Option<&Tag>;
}

/// New-type wrapper for a single-line text unicode width.
/// Mainly to prevent access to the inner value.
///
/// ## Representation
///
/// Represents the width by adding 1 to the actual width so that the width can be represented by a [`NonZeroU32`],
/// allowing [`TextWidth`] or [`Option<Width>`] fit in 4 bytes rather than 8.
///
/// This means that 2^32 can not be precisely represented and instead has the same value as 2^32-1.
/// This imprecision shouldn't matter in practice because either text are longer than any configured line width
/// and thus, the text should break.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub struct Width(NonZeroU32);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added this newtype wrapper to lock down the access to the inner NonZeroU32. Not that someone uses it and then gets values that are off by one.


impl Width {
pub(crate) const fn new(width: u32) -> Self {
Width(NonZeroU32::MIN.saturating_add(width))
}

pub const fn value(self) -> u32 {
self.0.get() - 1
}
}

/// The pre-computed unicode width of a text if it is a single-line text or a marker
/// that it is a multiline text if it contains a line feed.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum TextWidth {
Width(Width),
Multiline,
}

impl TextWidth {
pub fn from_text(text: &str, tab_width: TabWidth) -> TextWidth {
let mut width = 0u32;

for c in text.chars() {
let char_width = match c {
'\t' => tab_width.value(),
'\n' => return TextWidth::Multiline,
#[allow(clippy::cast_possible_truncation)]
c => c.width().unwrap_or(0) as u32,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When does c not have a width and why would we go with 0 instead, is this about control characters?

Copy link
Member Author

@MichaReiser MichaReiser Sep 6, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From the unicode width documentation

Returns the character's displayed width in columns, or None if the character is a control character other than '\x00'.

So yes, this is about control characters and using 0 seems reasonable to me (this is the same logic as applied by the printer today)

};
width += char_width;
}

Self::Width(Width::new(width))
}

pub const fn width(self) -> Option<Width> {
match self {
TextWidth::Width(width) => Some(width),
TextWidth::Multiline => None,
}
}

pub(crate) const fn is_multiline(self) -> bool {
matches!(self, TextWidth::Multiline)
}
}

#[cfg(test)]
mod tests {

Expand Down Expand Up @@ -430,19 +487,21 @@ mod sizes {
// be recomputed at a later point in time?
// You reduced the size of a format element? Excellent work!

use super::{BestFittingVariants, Interned, TextWidth};
use static_assertions::assert_eq_size;

assert_eq_size!(ruff_text_size::TextRange, [u8; 8]);
assert_eq_size!(crate::prelude::tag::VerbatimKind, [u8; 8]);
assert_eq_size!(crate::prelude::Interned, [u8; 16]);
assert_eq_size!(crate::format_element::BestFittingVariants, [u8; 16]);
assert_eq_size!(TextWidth, [u8; 4]);
assert_eq_size!(super::tag::VerbatimKind, [u8; 8]);
assert_eq_size!(Interned, [u8; 16]);
assert_eq_size!(BestFittingVariants, [u8; 16]);

#[cfg(not(debug_assertions))]
assert_eq_size!(crate::SourceCodeSlice, [u8; 8]);

#[cfg(not(debug_assertions))]
assert_eq_size!(crate::format_element::Tag, [u8; 16]);
assert_eq_size!(super::Tag, [u8; 16]);

#[cfg(not(debug_assertions))]
assert_eq_size!(crate::FormatElement, [u8; 24]);
assert_eq_size!(super::FormatElement, [u8; 24]);
}
35 changes: 18 additions & 17 deletions crates/ruff_formatter/src/format_element/document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,11 @@ impl Document {
expands = false;
continue;
}
FormatElement::Text { text, .. } => text.contains('\n'),
FormatElement::SourceCodeSlice {
contains_newlines, ..
} => *contains_newlines,
FormatElement::Text {
text: _,
text_width,
} => text_width.is_multiline(),
FormatElement::SourceCodeSlice { text_width, .. } => text_width.is_multiline(),
FormatElement::ExpandParent
| FormatElement::Line(LineMode::Hard | LineMode::Empty) => true,
_ => false,
Expand Down Expand Up @@ -259,18 +260,24 @@ impl Format<IrFormatContext<'_>> for &[FormatElement] {
| FormatElement::Text { .. }
| FormatElement::SourceCodeSlice { .. }) => {
fn write_escaped(element: &FormatElement, f: &mut Formatter<IrFormatContext>) {
let text = match element {
FormatElement::Token { text } => text,
FormatElement::Text { text } => text.as_ref(),
FormatElement::SourceCodeSlice { slice, .. } => {
slice.text(f.context().source_code())
let (text, text_width) = match element {
#[allow(clippy::cast_possible_truncation)]
FormatElement::Token { text } => {
(*text, TextWidth::Width(Width::new(text.len() as u32)))
}
FormatElement::Text { text, text_width } => {
(text.as_ref(), *text_width)
}
FormatElement::SourceCodeSlice { slice, text_width } => {
(slice.text(f.context().source_code()), *text_width)
}
_ => unreachable!(),
};

if text.contains('"') {
f.write_element(FormatElement::Text {
text: text.replace('"', r#"\""#).into(),
text_width,
});
} else {
f.write_element(element.clone());
Expand Down Expand Up @@ -854,15 +861,9 @@ mod tests {
[group(&format_args![
token("("),
soft_block_indent(&format_args![
source_text_slice(
TextRange::at(TextSize::new(0), TextSize::new(19)),
ContainsNewlines::No
),
source_text_slice(TextRange::at(TextSize::new(0), TextSize::new(19)),),
space(),
source_text_slice(
TextRange::at(TextSize::new(20), TextSize::new(28)),
ContainsNewlines::No
),
source_text_slice(TextRange::at(TextSize::new(20), TextSize::new(28)),),
])
])]
)
Expand Down
8 changes: 4 additions & 4 deletions crates/ruff_formatter/src/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -343,15 +343,15 @@ mod tests {

struct TestFormat;

impl Format<()> for TestFormat {
fn fmt(&self, f: &mut Formatter<()>) -> FormatResult<()> {
impl Format<SimpleFormatContext> for TestFormat {
fn fmt(&self, f: &mut Formatter<SimpleFormatContext>) -> FormatResult<()> {
write!(f, [token("test")])
}
}

#[test]
fn test_single_element() {
let mut state = FormatState::new(());
let mut state = FormatState::new(SimpleFormatContext::default());
let mut buffer = VecBuffer::new(&mut state);

write![&mut buffer, [TestFormat]].unwrap();
Expand All @@ -364,7 +364,7 @@ mod tests {

#[test]
fn test_multiple_elements() {
let mut state = FormatState::new(());
let mut state = FormatState::new(SimpleFormatContext::default());
let mut buffer = VecBuffer::new(&mut state);

write![
Expand Down
Loading