Skip to content

Commit

Permalink
REF: type change escape in Styler.format to str to allow "html" a…
Browse files Browse the repository at this point in the history
…nd "latex" (pandas-dev#41619)
  • Loading branch information
attack68 authored and TLouf committed Jun 1, 2021
1 parent 94a39df commit 3946a96
Show file tree
Hide file tree
Showing 4 changed files with 119 additions and 31 deletions.
4 changes: 2 additions & 2 deletions doc/source/user_guide/style.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1462,7 +1462,7 @@
"metadata": {},
"outputs": [],
"source": [
"df4.style.format(escape=True)"
"df4.style.format(escape=\"html\")"
]
},
{
Expand All @@ -1471,7 +1471,7 @@
"metadata": {},
"outputs": [],
"source": [
"df4.style.format('<a href=\"https://pandas.pydata.org\" target=\"_blank\">{}</a>', escape=True)"
"df4.style.format('<a href=\"https://pandas.pydata.org\" target=\"_blank\">{}</a>', escape=\"html\")"
]
},
{
Expand Down
13 changes: 8 additions & 5 deletions pandas/io/formats/style.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def _mpl(func: Callable):


class Styler(StylerRenderer):
"""
r"""
Helps style a DataFrame or Series according to the data with HTML and CSS.
Parameters
Expand Down Expand Up @@ -119,9 +119,12 @@ class Styler(StylerRenderer):
.. versionadded:: 1.3.0
escape : bool, default False
Replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in cell display
strings with HTML-safe sequences.
escape : str, optional
Use 'html' to replace the characters ``&``, ``<``, ``>``, ``'``, and ``"``
in cell display string with HTML-safe sequences.
Use 'latex' to replace the characters ``&``, ``%``, ``$``, ``#``, ``_``,
``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with
LaTeX-safe sequences.
... versionadded:: 1.3.0
Expand Down Expand Up @@ -179,7 +182,7 @@ def __init__(
uuid_len: int = 5,
decimal: str = ".",
thousands: str | None = None,
escape: bool = False,
escape: str | None = None,
):
super().__init__(
data=data,
Expand Down
87 changes: 73 additions & 14 deletions pandas/io/formats/style_render.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,9 +457,9 @@ def format(
precision: int | None = None,
decimal: str = ".",
thousands: str | None = None,
escape: bool = False,
escape: str | None = None,
) -> StylerRenderer:
"""
r"""
Format the text display value of cells.
Parameters
Expand Down Expand Up @@ -492,9 +492,13 @@ def format(
.. versionadded:: 1.3.0
escape : bool, default False
Replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in cell display
string with HTML-safe sequences. Escaping is done before ``formatter``.
escape : str, optional
Use 'html' to replace the characters ``&``, ``<``, ``>``, ``'``, and ``"``
in cell display string with HTML-safe sequences.
Use 'latex' to replace the characters ``&``, ``%``, ``$``, ``#``, ``_``,
``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with
LaTeX-safe sequences.
Escaping is done before ``formatter``.
.. versionadded:: 1.3.0
Expand Down Expand Up @@ -571,13 +575,26 @@ def format(
Using a ``formatter`` with HTML ``escape`` and ``na_rep``.
>>> df = pd.DataFrame([['<div></div>', '"A&B"', None]])
>>> s = df.style.format('<a href="a.com/{0}">{0}</a>', escape=True, na_rep="NA")
>>> s = df.style.format(
... '<a href="a.com/{0}">{0}</a>', escape="html", na_rep="NA"
... )
>>> s.render()
...
<td .. ><a href="a.com/&lt;div&gt;&lt;/div&gt;">&lt;div&gt;&lt;/div&gt;</a></td>
<td .. ><a href="a.com/&#34;A&amp;B&#34;">&#34;A&amp;B&#34;</a></td>
<td .. >NA</td>
...
Using a ``formatter`` with LaTeX ``escape``.
>>> df = pd.DataFrame([["123"], ["~ ^"], ["$%#"]])
>>> s = df.style.format("\\textbf{{{}}}", escape="latex").to_latex()
\begin{tabular}{ll}
{} & {0} \\
0 & \textbf{123} \\
1 & \textbf{\textasciitilde \space \textasciicircum } \\
2 & \textbf{\$\%\#} \\
\end{tabular}
"""
if all(
(
Expand All @@ -587,7 +604,7 @@ def format(
decimal == ".",
thousands is None,
na_rep is None,
escape is False,
escape is None,
)
):
self._display_funcs.clear()
Expand Down Expand Up @@ -771,10 +788,17 @@ def wrapper(x):
return wrapper


def _str_escape_html(x):
"""if escaping html: only use on str, else return input"""
def _str_escape(x, escape):
"""if escaping: only use on str, else return input"""
if isinstance(x, str):
return escape_html(x)
if escape == "html":
return escape_html(x)
elif escape == "latex":
return _escape_latex(x)
else:
raise ValueError(
f"`escape` only permitted in {{'html', 'latex'}}, got {escape}"
)
return x


Expand All @@ -784,7 +808,7 @@ def _maybe_wrap_formatter(
precision: int | None = None,
decimal: str = ".",
thousands: str | None = None,
escape: bool = False,
escape: str | None = None,
) -> Callable:
"""
Allows formatters to be expressed as str, callable or None, where None returns
Expand All @@ -804,9 +828,9 @@ def _maybe_wrap_formatter(
else:
raise TypeError(f"'formatter' expected str or callable, got {type(formatter)}")

# Replace HTML chars if escaping
if escape:
func_1 = lambda x: func_0(_str_escape_html(x))
# Replace chars if escaping
if escape is not None:
func_1 = lambda x: func_0(_str_escape(x, escape=escape))
else:
func_1 = func_0

Expand Down Expand Up @@ -1187,3 +1211,38 @@ def _parse_latex_options_strip(value: str | int | float, arg: str) -> str:
For example: 'red /* --wrap */ ' --> 'red'
"""
return str(value).replace(arg, "").replace("/*", "").replace("*/", "").strip()


def _escape_latex(s):
r"""
Replace the characters ``&``, ``%``, ``$``, ``#``, ``_``, ``{``, ``}``,
``~``, ``^``, and ``\`` in the string with LaTeX-safe sequences.
Use this if you need to display text that might contain such characters in LaTeX.
Parameters
----------
s : str
Input to be escaped
Return
------
str :
Escaped string
"""
return (
s.replace("\\", "ab2§=§8yz") # rare string for final conversion: avoid \\ clash
.replace("ab2§=§8yz ", "ab2§=§8yz\\space ") # since \backslash gobbles spaces
.replace("&", "\\&")
.replace("%", "\\%")
.replace("$", "\\$")
.replace("#", "\\#")
.replace("_", "\\_")
.replace("{", "\\{")
.replace("}", "\\}")
.replace("~ ", "~\\space ") # since \textasciitilde gobbles spaces
.replace("~", "\\textasciitilde ")
.replace("^ ", "^\\space ") # since \textasciicircum gobbles spaces
.replace("^", "\\textasciicircum ")
.replace("ab2§=§8yz", "\\textbackslash ")
)
46 changes: 36 additions & 10 deletions pandas/tests/io/formats/style/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

pytest.importorskip("jinja2")
from pandas.io.formats.style import Styler
from pandas.io.formats.style_render import _str_escape


@pytest.fixture
Expand Down Expand Up @@ -106,22 +107,36 @@ def test_format_clear(styler):
assert (0, 0) not in styler._display_funcs # formatter cleared to default


def test_format_escape():
df = DataFrame([['<>&"']])
s = Styler(df, uuid_len=0).format("X&{0}>X", escape=False)
expected = '<td id="T__row0_col0" class="data row0 col0" >X&<>&">X</td>'
@pytest.mark.parametrize(
"escape, exp",
[
("html", "&lt;&gt;&amp;&#34;%$#_{}~^\\~ ^ \\ "),
(
"latex",
'<>\\&"\\%\\$\\#\\_\\{\\}\\textasciitilde \\textasciicircum '
"\\textbackslash \\textasciitilde \\space \\textasciicircum \\space "
"\\textbackslash \\space ",
),
],
)
def test_format_escape_html(escape, exp):
chars = '<>&"%$#_{}~^\\~ ^ \\ '
df = DataFrame([[chars]])

s = Styler(df, uuid_len=0).format("&{0}&", escape=None)
expected = f'<td id="T__row0_col0" class="data row0 col0" >&{chars}&</td>'
assert expected in s.render()

# only the value should be escaped before passing to the formatter
s = Styler(df, uuid_len=0).format("X&{0}>X", escape=True)
ex = '<td id="T__row0_col0" class="data row0 col0" >X&&lt;&gt;&amp;&#34;>X</td>'
assert ex in s.render()
s = Styler(df, uuid_len=0).format("&{0}&", escape=escape)
expected = f'<td id="T__row0_col0" class="data row0 col0" >&{exp}&</td>'
assert expected in s.render()


def test_format_escape_na_rep():
# tests the na_rep is not escaped
df = DataFrame([['<>&"', None]])
s = Styler(df, uuid_len=0).format("X&{0}>X", escape=True, na_rep="&")
s = Styler(df, uuid_len=0).format("X&{0}>X", escape="html", na_rep="&")
ex = '<td id="T__row0_col0" class="data row0 col0" >X&&lt;&gt;&amp;&#34;>X</td>'
expected2 = '<td id="T__row0_col1" class="data row0 col1" >&</td>'
assert ex in s.render()
Expand All @@ -130,11 +145,11 @@ def test_format_escape_na_rep():

def test_format_escape_floats(styler):
# test given formatter for number format is not impacted by escape
s = styler.format("{:.1f}", escape=True)
s = styler.format("{:.1f}", escape="html")
for expected in [">0.0<", ">1.0<", ">-1.2<", ">-0.6<"]:
assert expected in s.render()
# tests precision of floats is not impacted by escape
s = styler.format(precision=1, escape=True)
s = styler.format(precision=1, escape="html")
for expected in [">0<", ">1<", ">-1.2<", ">-0.6<"]:
assert expected in s.render()

Expand Down Expand Up @@ -239,3 +254,14 @@ def test_format_decimal(formatter, thousands, precision):
decimal="_", formatter=formatter, thousands=thousands, precision=precision
)._translate(True, True)
assert "000_123" in result["body"][0][1]["display_value"]


def test_str_escape_error():
msg = "`escape` only permitted in {'html', 'latex'}, got "
with pytest.raises(ValueError, match=msg):
_str_escape("text", "bad_escape")

with pytest.raises(ValueError, match=msg):
_str_escape("text", [])

_str_escape(2.00, "bad_escape") # OK since dtype is float

0 comments on commit 3946a96

Please sign in to comment.