-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Memoize text width #6552
Memoize text width #6552
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,12 +3,14 @@ pub mod tag; | |
|
||
use std::borrow::Cow; | ||
use std::hash::{Hash, Hasher}; | ||
use std::num::NonZeroU32; | ||
use std::ops::Deref; | ||
use std::rc::Rc; | ||
use unicode_width::UnicodeWidthChar; | ||
|
||
use crate::format_element::tag::{GroupMode, LabelId, Tag}; | ||
use crate::source_code::SourceCodeSlice; | ||
use crate::TagKind; | ||
use crate::{TabWidth, TagKind}; | ||
use ruff_text_size::TextSize; | ||
|
||
/// Language agnostic IR for formatting source code. | ||
|
@@ -37,13 +39,13 @@ pub enum FormatElement { | |
Text { | ||
/// There's no need for the text to be mutable, using `Box<str>` safes 8 bytes over `String`. | ||
text: Box<str>, | ||
text_width: TextWidth, | ||
}, | ||
|
||
/// Text that gets emitted as it is in the source code. Optimized to avoid any allocations. | ||
SourceCodeSlice { | ||
slice: SourceCodeSlice, | ||
/// Whether the string contains any new line characters | ||
contains_newlines: bool, | ||
text_width: TextWidth, | ||
}, | ||
|
||
/// Prevents that line suffixes move past this boundary. Forces the printer to print any pending | ||
|
@@ -73,13 +75,10 @@ impl std::fmt::Debug for FormatElement { | |
FormatElement::ExpandParent => write!(fmt, "ExpandParent"), | ||
FormatElement::Token { text } => fmt.debug_tuple("Token").field(text).finish(), | ||
FormatElement::Text { text, .. } => fmt.debug_tuple("DynamicText").field(text).finish(), | ||
FormatElement::SourceCodeSlice { | ||
slice, | ||
contains_newlines, | ||
} => fmt | ||
FormatElement::SourceCodeSlice { slice, text_width } => fmt | ||
.debug_tuple("Text") | ||
.field(slice) | ||
.field(contains_newlines) | ||
.field(text_width) | ||
.finish(), | ||
FormatElement::LineSuffixBoundary => write!(fmt, "LineSuffixBoundary"), | ||
FormatElement::BestFitting { variants, mode } => fmt | ||
|
@@ -255,11 +254,8 @@ impl FormatElements for FormatElement { | |
FormatElement::ExpandParent => true, | ||
FormatElement::Tag(Tag::StartGroup(group)) => !group.mode().is_flat(), | ||
FormatElement::Line(line_mode) => matches!(line_mode, LineMode::Hard | LineMode::Empty), | ||
|
||
FormatElement::Text { text, .. } => text.contains('\n'), | ||
FormatElement::SourceCodeSlice { | ||
contains_newlines, .. | ||
} => *contains_newlines, | ||
FormatElement::Text { text_width, .. } => text_width.is_multiline(), | ||
FormatElement::SourceCodeSlice { text_width, .. } => text_width.is_multiline(), | ||
FormatElement::Interned(interned) => interned.will_break(), | ||
// Traverse into the most flat version because the content is guaranteed to expand when even | ||
// the most flat version contains some content that forces a break. | ||
|
@@ -403,6 +399,67 @@ pub trait FormatElements { | |
fn end_tag(&self, kind: TagKind) -> Option<&Tag>; | ||
} | ||
|
||
/// New-type wrapper for a single-line text unicode width. | ||
/// Mainly to prevent access to the inner value. | ||
/// | ||
/// ## Representation | ||
/// | ||
/// Represents the width by adding 1 to the actual width so that the width can be represented by a [`NonZeroU32`], | ||
/// allowing [`TextWidth`] or [`Option<Width>`] fit in 4 bytes rather than 8. | ||
/// | ||
/// This means that 2^32 can not be precisely represented and instead has the same value as 2^32-1. | ||
/// This imprecision shouldn't matter in practice because either text are longer than any configured line width | ||
/// and thus, the text should break. | ||
#[derive(Copy, Clone, Debug, Eq, PartialEq)] | ||
pub struct Width(NonZeroU32); | ||
|
||
impl Width { | ||
pub(crate) const fn new(width: u32) -> Self { | ||
Width(NonZeroU32::MIN.saturating_add(width)) | ||
} | ||
|
||
pub const fn value(self) -> u32 { | ||
self.0.get() - 1 | ||
} | ||
} | ||
|
||
/// The pre-computed unicode width of a text if it is a single-line text or a marker | ||
/// that it is a multiline text if it contains a line feed. | ||
#[derive(Copy, Clone, Debug, Eq, PartialEq)] | ||
pub enum TextWidth { | ||
MichaReiser marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Width(Width), | ||
Multiline, | ||
} | ||
|
||
impl TextWidth { | ||
pub fn from_text(text: &str, tab_width: TabWidth) -> TextWidth { | ||
let mut width = 0u32; | ||
|
||
for c in text.chars() { | ||
let char_width = match c { | ||
'\t' => tab_width.value(), | ||
'\n' => return TextWidth::Multiline, | ||
#[allow(clippy::cast_possible_truncation)] | ||
c => c.width().unwrap_or(0) as u32, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When does There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From the unicode width documentation
So yes, this is about control characters and using 0 seems reasonable to me (this is the same logic as applied by the printer today) |
||
}; | ||
width += char_width; | ||
} | ||
|
||
Self::Width(Width::new(width)) | ||
} | ||
|
||
pub const fn width(self) -> Option<Width> { | ||
match self { | ||
TextWidth::Width(width) => Some(width), | ||
TextWidth::Multiline => None, | ||
} | ||
} | ||
|
||
pub(crate) const fn is_multiline(self) -> bool { | ||
matches!(self, TextWidth::Multiline) | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
|
||
|
@@ -430,19 +487,21 @@ mod sizes { | |
// be recomputed at a later point in time? | ||
// You reduced the size of a format element? Excellent work! | ||
|
||
use super::{BestFittingVariants, Interned, TextWidth}; | ||
use static_assertions::assert_eq_size; | ||
|
||
assert_eq_size!(ruff_text_size::TextRange, [u8; 8]); | ||
assert_eq_size!(crate::prelude::tag::VerbatimKind, [u8; 8]); | ||
assert_eq_size!(crate::prelude::Interned, [u8; 16]); | ||
assert_eq_size!(crate::format_element::BestFittingVariants, [u8; 16]); | ||
assert_eq_size!(TextWidth, [u8; 4]); | ||
assert_eq_size!(super::tag::VerbatimKind, [u8; 8]); | ||
assert_eq_size!(Interned, [u8; 16]); | ||
assert_eq_size!(BestFittingVariants, [u8; 16]); | ||
|
||
#[cfg(not(debug_assertions))] | ||
assert_eq_size!(crate::SourceCodeSlice, [u8; 8]); | ||
|
||
#[cfg(not(debug_assertions))] | ||
assert_eq_size!(crate::format_element::Tag, [u8; 16]); | ||
assert_eq_size!(super::Tag, [u8; 16]); | ||
|
||
#[cfg(not(debug_assertions))] | ||
assert_eq_size!(crate::FormatElement, [u8; 24]); | ||
assert_eq_size!(super::FormatElement, [u8; 24]); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I added this newtype wrapper to lock down the access to the inner
NonZeroU32
. Not that someone uses it and then gets values that are off by one.