Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix double-width characters disappearing when wrapping #3180

Merged
merged 22 commits into from
Nov 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
01d787b
Merge branch 'master' of github.com:Textualize/rich
darrenburns May 31, 2023
8fb36b0
Merge remote-tracking branch 'origin/master'
darrenburns Oct 31, 2023
c8e478e
Update docstring for `Text.wrap`s width parameter to indicate that it…
darrenburns Oct 31, 2023
f7d2a1a
Working on double width wrapping fixes
darrenburns Oct 31, 2023
70ce4db
Chop cells to fit to width
darrenburns Nov 1, 2023
85d89d0
Fix folding when theres already text on line
darrenburns Nov 1, 2023
9f93126
Update wrapping logic to fix issues with CJK charcters disappearing w…
darrenburns Nov 1, 2023
4217cd3
Merge branch 'master' of github.com:willmcgugan/rich into double-widt…
darrenburns Nov 1, 2023
4555def
Remove old TODO comments
darrenburns Nov 1, 2023
8d6507b
Add regression test note
darrenburns Nov 1, 2023
8007cd3
Rename function to avoid breaking change
darrenburns Nov 1, 2023
f1cc1d9
Update CHANGELOG
darrenburns Nov 1, 2023
9eed638
Remove old comment that is no longer relevant
darrenburns Nov 1, 2023
df877f2
Cover off some wrapping edge cases
darrenburns Nov 1, 2023
0ab4732
Adding docstrings to tests explaining their purpose
darrenburns Nov 1, 2023
2053eb0
Renaming a local, function scope function alias
darrenburns Nov 1, 2023
077ef44
Update rich/_wrap.py
darrenburns Nov 2, 2023
0280567
PR feedback
darrenburns Nov 7, 2023
3863739
Merge branch 'double-width-wrapping-fix' of github.com:willmcgugan/ri…
darrenburns Nov 7, 2023
55972a9
Testing wrapping with trailing and leading whitespace
darrenburns Nov 7, 2023
f82b464
Improve docstring wording
darrenburns Nov 7, 2023
05c5dfc
Merge branch 'master' into double-width-wrapping-fix
darrenburns Nov 7, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,5 @@ venv.bak/
# airspeed velocity
benchmarks/env/
benchmarks/html/

sandbox/
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Fixed

- Some text goes missing during wrapping when it contains double width characters https://github.com/Textualize/rich/issues/3176
- Ensure font is correctly inherited in exported HTML https://github.com/Textualize/rich/issues/3104

## [13.6.0] - 2023-09-30
Expand Down
73 changes: 55 additions & 18 deletions rich/_wrap.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
from __future__ import annotations

import re
from typing import Iterable, List, Tuple
from typing import Iterable

from ._loop import loop_last
from .cells import cell_len, chop_cells

re_word = re.compile(r"\s*\S+\s*")


def words(text: str) -> Iterable[Tuple[int, int, str]]:
def words(text: str) -> Iterable[tuple[int, int, str]]:
darrenburns marked this conversation as resolved.
Show resolved Hide resolved
"""Yields each word from the text as a tuple
containing (start_index, end_index, word). A "word" in this context may
include the actual word and any whitespace to the right.
"""
position = 0
word_match = re_word.match(text, position)
while word_match is not None:
Expand All @@ -17,40 +23,71 @@ def words(text: str) -> Iterable[Tuple[int, int, str]]:
word_match = re_word.match(text, end)


def divide_line(text: str, width: int, fold: bool = True) -> List[int]:
divides: List[int] = []
append = divides.append
line_position = 0
def divide_line(text: str, width: int, fold: bool = True) -> list[int]:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm assuming cell_offset is keeping track of how much space is already occupied in the line we're currently filling, right?
I personally don't love the name but maybe that could be mitigated with a short comment next to it?

"""Given a string of text, and a width (measured in cells), return a list
of cell offsets which the string should be split at in order for it to fit
within the given width.

Args:
text: The text to examine.
width: The available cell width.
fold: If True, words longer than `width` will be folded onto a new line.

Returns:
A list of indices to break the line at.
"""
break_positions: list[int] = [] # offsets to insert the breaks at
append = break_positions.append
cell_offset = 0
_cell_len = cell_len
rodrigogiraoserrao marked this conversation as resolved.
Show resolved Hide resolved

for start, _end, word in words(text):
word_length = _cell_len(word.rstrip())
if line_position + word_length > width:
remaining_space = width - cell_offset
word_fits_remaining_space = remaining_space >= word_length

if word_fits_remaining_space:
# Simplest case - the word fits within the remaining width for this line.
cell_offset += _cell_len(word)
else:
# Not enough space remaining for this word on the current line.
if word_length > width:
# The word doesn't fit on any line, so we can't simply
# place it on the next line...
if fold:
chopped_words = chop_cells(word, max_size=width, position=0)
for last, line in loop_last(chopped_words):
# Fold the word across multiple lines.
folded_word = chop_cells(word, width=width)
for last, line in loop_last(folded_word):
if start:
append(start)

if last:
line_position = _cell_len(line)
cell_offset = _cell_len(line)
else:
start += len(line)
else:
# Folding isn't allowed, so crop the word.
if start:
append(start)
line_position = _cell_len(word)
elif line_position and start:
cell_offset = _cell_len(word)
elif cell_offset and start:
# The word doesn't fit within the remaining space on the current
# line, but it *can* fit on to the next (empty) line.
append(start)
line_position = _cell_len(word)
else:
line_position += _cell_len(word)
return divides
cell_offset = _cell_len(word)

return break_positions


if __name__ == "__main__": # pragma: no cover
from .console import Console

console = Console(width=10)
console.print("12345 abcdefghijklmnopqrstuvwyxzABCDEFGHIJKLMNOPQRSTUVWXYZ 12345")
print(chop_cells("abcdefghijklmnopqrstuvwxyz", 10, position=2))
print(chop_cells("abcdefghijklmnopqrstuvwxyz", 10))

console = Console(width=20)
console.rule()
console.print("TextualはPythonの高速アプリケーション開発フレームワークです")

console.rule()
console.print("アプリケーションは1670万色を使用でき")
54 changes: 34 additions & 20 deletions rich/cells.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from __future__ import annotations

import re
from functools import lru_cache
from typing import Callable, List
from typing import Callable

from ._cell_widths import CELL_WIDTHS

Expand Down Expand Up @@ -119,27 +121,39 @@ def set_cell_size(text: str, total: int) -> str:
start = pos


# TODO: This is inefficient
# TODO: This might not work with CWJ type characters
def chop_cells(text: str, max_size: int, position: int = 0) -> List[str]:
"""Break text in to equal (cell) length strings, returning the characters in reverse
order"""
def chop_cells(
text: str,
width: int,
) -> list[str]:
"""Split text into lines such that each line fits within the available (cell) width.

Args:
text: The text to fold such that it fits in the given width.
width: The width available (number of cells).

Returns:
A list of strings such that each string in the list has cell width
less than or equal to the available width.
"""
_get_character_cell_size = get_character_cell_size
characters = [
(character, _get_character_cell_size(character)) for character in text
]
total_size = position
lines: List[List[str]] = [[]]
append = lines[-1].append

for character, size in reversed(characters):
if total_size + size > max_size:
lines.append([character])
append = lines[-1].append
total_size = size
lines: list[list[str]] = [[]]

append_new_line = lines.append
append_to_last_line = lines[-1].append

total_width = 0

for character in text:
cell_width = _get_character_cell_size(character)
char_doesnt_fit = total_width + cell_width > width

if char_doesnt_fit:
append_new_line([character])
append_to_last_line = lines[-1].append
total_width = cell_width
else:
total_size += size
append(character)
append_to_last_line(character)
total_width += cell_width

return ["".join(line) for line in lines]

Expand Down
19 changes: 19 additions & 0 deletions tests/test_cells.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from rich import cells
from rich.cells import chop_cells


def test_cell_len_long_string():
Expand Down Expand Up @@ -40,3 +41,21 @@ def test_set_cell_size_infinite():
)
== size
)


def test_chop_cells():
"""Simple example of splitting cells into lines of width 3."""
text = "abcdefghijk"
assert chop_cells(text, 3) == ["abc", "def", "ghi", "jk"]


def test_chop_cells_double_width_boundary():
"""The available width lies within a double-width character."""
text = "ありがとう"
assert chop_cells(text, 3) == ["あ", "り", "が", "と", "う"]


def test_chop_cells_mixed_width():
"""Mixed single and double-width characters."""
text = "あ1り234が5と6う78"
assert chop_cells(text, 3) == ["あ1", "り2", "34", "が5", "と6", "う7", "8"]
66 changes: 66 additions & 0 deletions tests/test_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,20 @@ def test_wrap_cjk_width_mid_character():
]


def test_wrap_cjk_mixed():
"""Regression test covering https://github.com/Textualize/rich/issues/3176 and
https://github.com/Textualize/textual/issues/3567 - double width characters could
result in text going missing when wrapping."""
text = Text("123ありがとうございました")
console = Console(width=20) # let's ensure the width passed to wrap() wins.

wrapped_lines = text.wrap(console, width=8)
with console.capture() as capture:
console.print(wrapped_lines)

assert capture.get() == "123あり\nがとうご\nざいまし\nた\n"


def test_wrap_long():
text = Text("abracadabra", justify="left")
lines = text.wrap(Console(), 4)
Expand Down Expand Up @@ -497,6 +511,47 @@ def test_wrap_long_words_2():
]


def test_wrap_long_words_followed_by_other_words():
"""After folding a word across multiple lines, we should continue from
the next word immediately after the folded word (don't take a newline
following completion of the folded word)."""
text = Text("123 12345678 123 123")
lines = text.wrap(Console(), 6)
assert lines._lines == [
Text("123 "),
Text("123456"),
Text("78 123"),
Text("123"),
]


def test_wrap_long_word_preceeded_by_word_of_full_line_length():
"""The width of the first word is the same as the available width.
Ensures that folding works correctly when there's no space available
on the current line."""
text = Text("123456 12345678 123 123")
lines = text.wrap(Console(), 6)
assert lines._lines == [
Text("123456"),
Text("123456"),
Text("78 123"),
Text("123"),
]


def test_wrap_multiple_consecutive_spaces():
"""Adding multiple consecutive spaces at the end of a line does not impact
the location at which a break will be added during the process of wrapping."""
text = Text("123456 12345678 123 123")
lines = text.wrap(Console(), 6)
assert lines._lines == [
Text("123456"),
Text("123456"),
Text("78 123"),
Text("123"),
]


def test_wrap_long_words_justify_left():
text = Text("X 123456789", justify="left")
lines = text.wrap(Console(), 4)
Expand All @@ -508,6 +563,17 @@ def test_wrap_long_words_justify_left():
assert lines[3] == Text("9 ")


def test_wrap_leading_and_trailing_whitespace():
text = Text(" 123 456 789 ")
lines = text.wrap(Console(), 4)
assert lines._lines == [
Text(" 1"),
Text("23 "),
Text("456 "),
Text("789 "),
]


def test_no_wrap_no_crop():
text = Text("Hello World!" * 3)

Expand Down