Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REF: type change escape in Styler.format to str to allow "html" and "latex" #41619

Merged
merged 16 commits into from
May 27, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions doc/source/user_guide/style.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1462,7 +1462,7 @@
"metadata": {},
"outputs": [],
"source": [
"df4.style.format(escape=True)"
"df4.style.format(escape=\"html\")"
]
},
{
Expand All @@ -1471,7 +1471,7 @@
"metadata": {},
"outputs": [],
"source": [
"df4.style.format('<a href=\"https://pandas.pydata.org\" target=\"_blank\">{}</a>', escape=True)"
"df4.style.format('<a href=\"https://pandas.pydata.org\" target=\"_blank\">{}</a>', escape=\"html\")"
]
},
{
Expand Down
13 changes: 8 additions & 5 deletions pandas/io/formats/style.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def _mpl(func: Callable):


class Styler(StylerRenderer):
"""
r"""
Helps style a DataFrame or Series according to the data with HTML and CSS.

Parameters
Expand Down Expand Up @@ -119,9 +119,12 @@ class Styler(StylerRenderer):

.. versionadded:: 1.3.0

escape : bool, default False
Replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in cell display
strings with HTML-safe sequences.
escape : str, optional
Use 'html' to replace the characters ``&``, ``<``, ``>``, ``'``, and ``"``
in cell display string with HTML-safe sequences.
Use 'latex' to replace the characters ``&``, ``%``, ``$``, ``#``, ``_``,
``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with
LaTeX-safe sequences.

... versionadded:: 1.3.0

Expand Down Expand Up @@ -179,7 +182,7 @@ def __init__(
uuid_len: int = 5,
decimal: str = ".",
thousands: str | None = None,
escape: bool = False,
escape: str | None = None,
):
super().__init__(
data=data,
Expand Down
87 changes: 73 additions & 14 deletions pandas/io/formats/style_render.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,9 +457,9 @@ def format(
precision: int | None = None,
decimal: str = ".",
thousands: str | None = None,
escape: bool = False,
escape: str | None = None,
) -> StylerRenderer:
"""
r"""
Format the text display value of cells.

Parameters
Expand Down Expand Up @@ -492,9 +492,13 @@ def format(

.. versionadded:: 1.3.0

escape : bool, default False
Replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in cell display
string with HTML-safe sequences. Escaping is done before ``formatter``.
escape : str, optional
Use 'html' to replace the characters ``&``, ``<``, ``>``, ``'``, and ``"``
in cell display string with HTML-safe sequences.
Use 'latex' to replace the characters ``&``, ``%``, ``$``, ``#``, ``_``,
``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with
LaTeX-safe sequences.
Escaping is done before ``formatter``.

.. versionadded:: 1.3.0

Expand Down Expand Up @@ -571,13 +575,26 @@ def format(
Using a ``formatter`` with HTML ``escape`` and ``na_rep``.

>>> df = pd.DataFrame([['<div></div>', '"A&B"', None]])
>>> s = df.style.format('<a href="a.com/{0}">{0}</a>', escape=True, na_rep="NA")
>>> s = df.style.format(
... '<a href="a.com/{0}">{0}</a>', escape="html", na_rep="NA"
... )
>>> s.render()
...
<td .. ><a href="a.com/&lt;div&gt;&lt;/div&gt;">&lt;div&gt;&lt;/div&gt;</a></td>
<td .. ><a href="a.com/&#34;A&amp;B&#34;">&#34;A&amp;B&#34;</a></td>
<td .. >NA</td>
...

Using a ``formatter`` with LaTeX ``escape``.

>>> df = pd.DataFrame([["123"], ["~ ^"], ["$%#"]])
>>> s = df.style.format("\\textbf{{{}}}", escape="latex").to_latex()
\begin{tabular}{ll}
{} & {0} \\
0 & \textbf{123} \\
1 & \textbf{\textasciitilde \space \textasciicircum } \\
2 & \textbf{\$\%\#} \\
\end{tabular}
"""
if all(
(
Expand All @@ -587,7 +604,7 @@ def format(
decimal == ".",
thousands is None,
na_rep is None,
escape is False,
escape is None,
)
):
self._display_funcs.clear()
Expand Down Expand Up @@ -771,10 +788,17 @@ def wrapper(x):
return wrapper


def _str_escape_html(x):
"""if escaping html: only use on str, else return input"""
def _str_escape(x, escape):
"""if escaping: only use on str, else return input"""
if isinstance(x, str):
return escape_html(x)
if escape == "html":
return escape_html(x)
elif escape == "latex":
return _escape_latex(x)
jreback marked this conversation as resolved.
Show resolved Hide resolved
else:
raise ValueError(
f"`escape` only permitted in {{'html', 'latex'}}, got {escape}"
)
return x


Expand All @@ -784,7 +808,7 @@ def _maybe_wrap_formatter(
precision: int | None = None,
decimal: str = ".",
thousands: str | None = None,
escape: bool = False,
escape: str | None = None,
) -> Callable:
"""
Allows formatters to be expressed as str, callable or None, where None returns
Expand All @@ -804,9 +828,9 @@ def _maybe_wrap_formatter(
else:
raise TypeError(f"'formatter' expected str or callable, got {type(formatter)}")

# Replace HTML chars if escaping
if escape:
func_1 = lambda x: func_0(_str_escape_html(x))
# Replace chars if escaping
if escape is not None:
func_1 = lambda x: func_0(_str_escape(x, escape=escape))
else:
func_1 = func_0

Expand Down Expand Up @@ -1187,3 +1211,38 @@ def _parse_latex_options_strip(value: str | int | float, arg: str) -> str:
For example: 'red /* --wrap */ ' --> 'red'
"""
return str(value).replace(arg, "").replace("/*", "").replace("*/", "").strip()


def _escape_latex(s):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

where is eacape_html?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

imported from markupsafe which is a jinja2 dependency

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it is a very simple function, can write here instead of having dependency to markupsafe if preferred?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh its fine, just not completely obvious where it is (but its already there so its fine)

r"""
Replace the characters ``&``, ``%``, ``$``, ``#``, ``_``, ``{``, ``}``,
``~``, ``^``, and ``\`` in the string with LaTeX-safe sequences.

Use this if you need to display text that might contain such characters in LaTeX.

Parameters
----------
s : str
Input to be escaped

Return
------
str :
Escaped string
"""
return (
s.replace("\\", "ab2§=§8yz") # rare string for final conversion: avoid \\ clash
.replace("ab2§=§8yz ", "ab2§=§8yz\\space ") # since \backslash gobbles spaces
.replace("&", "\\&")
.replace("%", "\\%")
.replace("$", "\\$")
.replace("#", "\\#")
.replace("_", "\\_")
.replace("{", "\\{")
.replace("}", "\\}")
.replace("~ ", "~\\space ") # since \textasciitilde gobbles spaces
.replace("~", "\\textasciitilde ")
.replace("^ ", "^\\space ") # since \textasciicircum gobbles spaces
.replace("^", "\\textasciicircum ")
.replace("ab2§=§8yz", "\\textbackslash ")
)
46 changes: 36 additions & 10 deletions pandas/tests/io/formats/style/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

pytest.importorskip("jinja2")
from pandas.io.formats.style import Styler
from pandas.io.formats.style_render import _str_escape


@pytest.fixture
Expand Down Expand Up @@ -106,22 +107,36 @@ def test_format_clear(styler):
assert (0, 0) not in styler._display_funcs # formatter cleared to default


def test_format_escape():
df = DataFrame([['<>&"']])
s = Styler(df, uuid_len=0).format("X&{0}>X", escape=False)
expected = '<td id="T__row0_col0" class="data row0 col0" >X&<>&">X</td>'
@pytest.mark.parametrize(
"escape, exp",
[
("html", "&lt;&gt;&amp;&#34;%$#_{}~^\\~ ^ \\ "),
(
"latex",
'<>\\&"\\%\\$\\#\\_\\{\\}\\textasciitilde \\textasciicircum '
"\\textbackslash \\textasciitilde \\space \\textasciicircum \\space "
"\\textbackslash \\space ",
),
],
)
def test_format_escape_html(escape, exp):
chars = '<>&"%$#_{}~^\\~ ^ \\ '
df = DataFrame([[chars]])

s = Styler(df, uuid_len=0).format("&{0}&", escape=None)
expected = f'<td id="T__row0_col0" class="data row0 col0" >&{chars}&</td>'
assert expected in s.render()

# only the value should be escaped before passing to the formatter
s = Styler(df, uuid_len=0).format("X&{0}>X", escape=True)
ex = '<td id="T__row0_col0" class="data row0 col0" >X&&lt;&gt;&amp;&#34;>X</td>'
assert ex in s.render()
s = Styler(df, uuid_len=0).format("&{0}&", escape=escape)
expected = f'<td id="T__row0_col0" class="data row0 col0" >&{exp}&</td>'
assert expected in s.render()


def test_format_escape_na_rep():
# tests the na_rep is not escaped
df = DataFrame([['<>&"', None]])
s = Styler(df, uuid_len=0).format("X&{0}>X", escape=True, na_rep="&")
s = Styler(df, uuid_len=0).format("X&{0}>X", escape="html", na_rep="&")
ex = '<td id="T__row0_col0" class="data row0 col0" >X&&lt;&gt;&amp;&#34;>X</td>'
expected2 = '<td id="T__row0_col1" class="data row0 col1" >&</td>'
assert ex in s.render()
Expand All @@ -130,11 +145,11 @@ def test_format_escape_na_rep():

def test_format_escape_floats(styler):
# test given formatter for number format is not impacted by escape
s = styler.format("{:.1f}", escape=True)
s = styler.format("{:.1f}", escape="html")
for expected in [">0.0<", ">1.0<", ">-1.2<", ">-0.6<"]:
assert expected in s.render()
# tests precision of floats is not impacted by escape
s = styler.format(precision=1, escape=True)
s = styler.format(precision=1, escape="html")
for expected in [">0<", ">1<", ">-1.2<", ">-0.6<"]:
assert expected in s.render()

Expand Down Expand Up @@ -239,3 +254,14 @@ def test_format_decimal(formatter, thousands, precision):
decimal="_", formatter=formatter, thousands=thousands, precision=precision
)._translate(True, True)
assert "000_123" in result["body"][0][1]["display_value"]


def test_str_escape_error():
msg = "`escape` only permitted in {'html', 'latex'}, got "
with pytest.raises(ValueError, match=msg):
_str_escape("text", "bad_escape")

with pytest.raises(ValueError, match=msg):
_str_escape("text", [])

_str_escape(2.00, "bad_escape") # OK since dtype is float