diff --git a/doc/source/user_guide/style.ipynb b/doc/source/user_guide/style.ipynb index 219b74407fae4b..7d8d8e90dfbdac 100644 --- a/doc/source/user_guide/style.ipynb +++ b/doc/source/user_guide/style.ipynb @@ -1462,7 +1462,7 @@ "metadata": {}, "outputs": [], "source": [ - "df4.style.format(escape=True)" + "df4.style.format(escape=\"html\")" ] }, { @@ -1471,7 +1471,7 @@ "metadata": {}, "outputs": [], "source": [ - "df4.style.format('{}', escape=True)" + "df4.style.format('{}', escape=\"html\")" ] }, { diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 5a9b5e3c81e84b..73924631aea5c1 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -74,7 +74,7 @@ def _mpl(func: Callable): class Styler(StylerRenderer): - """ + r""" Helps style a DataFrame or Series according to the data with HTML and CSS. Parameters @@ -119,9 +119,12 @@ class Styler(StylerRenderer): .. versionadded:: 1.3.0 - escape : bool, default False - Replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in cell display - strings with HTML-safe sequences. + escape : str, optional + Use 'html' to replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` + in cell display string with HTML-safe sequences. + Use 'latex' to replace the characters ``&``, ``%``, ``$``, ``#``, ``_``, + ``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with + LaTeX-safe sequences. ... versionadded:: 1.3.0 @@ -179,7 +182,7 @@ def __init__( uuid_len: int = 5, decimal: str = ".", thousands: str | None = None, - escape: bool = False, + escape: str | None = None, ): super().__init__( data=data, diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index ce328f00cf7944..41733b77cbbd38 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -457,9 +457,9 @@ def format( precision: int | None = None, decimal: str = ".", thousands: str | None = None, - escape: bool = False, + escape: str | None = None, ) -> StylerRenderer: - """ + r""" Format the text display value of cells. Parameters @@ -492,9 +492,13 @@ def format( .. versionadded:: 1.3.0 - escape : bool, default False - Replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in cell display - string with HTML-safe sequences. Escaping is done before ``formatter``. + escape : str, optional + Use 'html' to replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` + in cell display string with HTML-safe sequences. + Use 'latex' to replace the characters ``&``, ``%``, ``$``, ``#``, ``_``, + ``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with + LaTeX-safe sequences. + Escaping is done before ``formatter``. .. versionadded:: 1.3.0 @@ -571,13 +575,26 @@ def format( Using a ``formatter`` with HTML ``escape`` and ``na_rep``. >>> df = pd.DataFrame([['
', '"A&B"', None]]) - >>> s = df.style.format('{0}', escape=True, na_rep="NA") + >>> s = df.style.format( + ... '{0}', escape="html", na_rep="NA" + ... ) >>> s.render() ... <div></div> "A&B" NA ... + + Using a ``formatter`` with LaTeX ``escape``. + + >>> df = pd.DataFrame([["123"], ["~ ^"], ["$%#"]]) + >>> s = df.style.format("\\textbf{{{}}}", escape="latex").to_latex() + \begin{tabular}{ll} + {} & {0} \\ + 0 & \textbf{123} \\ + 1 & \textbf{\textasciitilde \space \textasciicircum } \\ + 2 & \textbf{\$\%\#} \\ + \end{tabular} """ if all( ( @@ -587,7 +604,7 @@ def format( decimal == ".", thousands is None, na_rep is None, - escape is False, + escape is None, ) ): self._display_funcs.clear() @@ -771,10 +788,17 @@ def wrapper(x): return wrapper -def _str_escape_html(x): - """if escaping html: only use on str, else return input""" +def _str_escape(x, escape): + """if escaping: only use on str, else return input""" if isinstance(x, str): - return escape_html(x) + if escape == "html": + return escape_html(x) + elif escape == "latex": + return _escape_latex(x) + else: + raise ValueError( + f"`escape` only permitted in {{'html', 'latex'}}, got {escape}" + ) return x @@ -784,7 +808,7 @@ def _maybe_wrap_formatter( precision: int | None = None, decimal: str = ".", thousands: str | None = None, - escape: bool = False, + escape: str | None = None, ) -> Callable: """ Allows formatters to be expressed as str, callable or None, where None returns @@ -804,9 +828,9 @@ def _maybe_wrap_formatter( else: raise TypeError(f"'formatter' expected str or callable, got {type(formatter)}") - # Replace HTML chars if escaping - if escape: - func_1 = lambda x: func_0(_str_escape_html(x)) + # Replace chars if escaping + if escape is not None: + func_1 = lambda x: func_0(_str_escape(x, escape=escape)) else: func_1 = func_0 @@ -1187,3 +1211,38 @@ def _parse_latex_options_strip(value: str | int | float, arg: str) -> str: For example: 'red /* --wrap */ ' --> 'red' """ return str(value).replace(arg, "").replace("/*", "").replace("*/", "").strip() + + +def _escape_latex(s): + r""" + Replace the characters ``&``, ``%``, ``$``, ``#``, ``_``, ``{``, ``}``, + ``~``, ``^``, and ``\`` in the string with LaTeX-safe sequences. + + Use this if you need to display text that might contain such characters in LaTeX. + + Parameters + ---------- + s : str + Input to be escaped + + Return + ------ + str : + Escaped string + """ + return ( + s.replace("\\", "ab2§=§8yz") # rare string for final conversion: avoid \\ clash + .replace("ab2§=§8yz ", "ab2§=§8yz\\space ") # since \backslash gobbles spaces + .replace("&", "\\&") + .replace("%", "\\%") + .replace("$", "\\$") + .replace("#", "\\#") + .replace("_", "\\_") + .replace("{", "\\{") + .replace("}", "\\}") + .replace("~ ", "~\\space ") # since \textasciitilde gobbles spaces + .replace("~", "\\textasciitilde ") + .replace("^ ", "^\\space ") # since \textasciicircum gobbles spaces + .replace("^", "\\textasciicircum ") + .replace("ab2§=§8yz", "\\textbackslash ") + ) diff --git a/pandas/tests/io/formats/style/test_format.py b/pandas/tests/io/formats/style/test_format.py index 9db27689a53f5a..77a547098036c6 100644 --- a/pandas/tests/io/formats/style/test_format.py +++ b/pandas/tests/io/formats/style/test_format.py @@ -11,6 +11,7 @@ pytest.importorskip("jinja2") from pandas.io.formats.style import Styler +from pandas.io.formats.style_render import _str_escape @pytest.fixture @@ -106,22 +107,36 @@ def test_format_clear(styler): assert (0, 0) not in styler._display_funcs # formatter cleared to default -def test_format_escape(): - df = DataFrame([['<>&"']]) - s = Styler(df, uuid_len=0).format("X&{0}>X", escape=False) - expected = 'X&<>&">X' +@pytest.mark.parametrize( + "escape, exp", + [ + ("html", "<>&"%$#_{}~^\\~ ^ \\ "), + ( + "latex", + '<>\\&"\\%\\$\\#\\_\\{\\}\\textasciitilde \\textasciicircum ' + "\\textbackslash \\textasciitilde \\space \\textasciicircum \\space " + "\\textbackslash \\space ", + ), + ], +) +def test_format_escape_html(escape, exp): + chars = '<>&"%$#_{}~^\\~ ^ \\ ' + df = DataFrame([[chars]]) + + s = Styler(df, uuid_len=0).format("&{0}&", escape=None) + expected = f'&{chars}&' assert expected in s.render() # only the value should be escaped before passing to the formatter - s = Styler(df, uuid_len=0).format("X&{0}>X", escape=True) - ex = 'X&<>&">X' - assert ex in s.render() + s = Styler(df, uuid_len=0).format("&{0}&", escape=escape) + expected = f'&{exp}&' + assert expected in s.render() def test_format_escape_na_rep(): # tests the na_rep is not escaped df = DataFrame([['<>&"', None]]) - s = Styler(df, uuid_len=0).format("X&{0}>X", escape=True, na_rep="&") + s = Styler(df, uuid_len=0).format("X&{0}>X", escape="html", na_rep="&") ex = 'X&<>&">X' expected2 = '&' assert ex in s.render() @@ -130,11 +145,11 @@ def test_format_escape_na_rep(): def test_format_escape_floats(styler): # test given formatter for number format is not impacted by escape - s = styler.format("{:.1f}", escape=True) + s = styler.format("{:.1f}", escape="html") for expected in [">0.0<", ">1.0<", ">-1.2<", ">-0.6<"]: assert expected in s.render() # tests precision of floats is not impacted by escape - s = styler.format(precision=1, escape=True) + s = styler.format(precision=1, escape="html") for expected in [">0<", ">1<", ">-1.2<", ">-0.6<"]: assert expected in s.render() @@ -239,3 +254,14 @@ def test_format_decimal(formatter, thousands, precision): decimal="_", formatter=formatter, thousands=thousands, precision=precision )._translate(True, True) assert "000_123" in result["body"][0][1]["display_value"] + + +def test_str_escape_error(): + msg = "`escape` only permitted in {'html', 'latex'}, got " + with pytest.raises(ValueError, match=msg): + _str_escape("text", "bad_escape") + + with pytest.raises(ValueError, match=msg): + _str_escape("text", []) + + _str_escape(2.00, "bad_escape") # OK since dtype is float