REF: type change escape in Styler.format to str to allow "html" a…

…nd "latex" (pandas-dev#41619)
TLouf · Jun 1, 2021 · 3946a96 · 3946a96
1 parent 94a39df
commit 3946a96
Show file tree

Hide file tree

Showing 4 changed files with 119 additions and 31 deletions.
diff --git a/doc/source/user_guide/style.ipynb b/doc/source/user_guide/style.ipynb
@@ -1462,7 +1462,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "df4.style.format(escape=True)"
+    "df4.style.format(escape=\"html\")"
    ]
   },
   {
@@ -1471,7 +1471,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "df4.style.format('<a href=\"https://pandas.pydata.org\" target=\"_blank\">{}</a>', escape=True)"
+    "df4.style.format('<a href=\"https://pandas.pydata.org\" target=\"_blank\">{}</a>', escape=\"html\")"
    ]
   },
   {

diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py
@@ -74,7 +74,7 @@ def _mpl(func: Callable):
 
 
 class Styler(StylerRenderer):
-    """
+    r"""
     Helps style a DataFrame or Series according to the data with HTML and CSS.
 
     Parameters
@@ -119,9 +119,12 @@ class Styler(StylerRenderer):
 
         .. versionadded:: 1.3.0
 
-    escape : bool, default False
-        Replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in cell display
-        strings with HTML-safe sequences.
+    escape : str, optional
+        Use 'html' to replace the characters ``&``, ``<``, ``>``, ``'``, and ``"``
+        in cell display string with HTML-safe sequences.
+        Use 'latex' to replace the characters ``&``, ``%``, ``$``, ``#``, ``_``,
+        ``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with
+        LaTeX-safe sequences.
 
         ... versionadded:: 1.3.0
 
@@ -179,7 +182,7 @@ def __init__(
         uuid_len: int = 5,
         decimal: str = ".",
         thousands: str | None = None,
-        escape: bool = False,
+        escape: str | None = None,
     ):
         super().__init__(
             data=data,

diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py
@@ -457,9 +457,9 @@ def format(
         precision: int | None = None,
         decimal: str = ".",
         thousands: str | None = None,
-        escape: bool = False,
+        escape: str | None = None,
     ) -> StylerRenderer:
-        """
+        r"""
         Format the text display value of cells.
 
         Parameters
@@ -492,9 +492,13 @@ def format(
 
             .. versionadded:: 1.3.0
 
-        escape : bool, default False
-            Replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in cell display
-            string with HTML-safe sequences. Escaping is done before ``formatter``.
+        escape : str, optional
+            Use 'html' to replace the characters ``&``, ``<``, ``>``, ``'``, and ``"``
+            in cell display string with HTML-safe sequences.
+            Use 'latex' to replace the characters ``&``, ``%``, ``$``, ``#``, ``_``,
+            ``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with
+            LaTeX-safe sequences.
+            Escaping is done before ``formatter``.
 
             .. versionadded:: 1.3.0
 
@@ -571,13 +575,26 @@ def format(
         Using a ``formatter`` with HTML ``escape`` and ``na_rep``.
 
         >>> df = pd.DataFrame([['<div></div>', '"A&B"', None]])
-        >>> s = df.style.format('<a href="a.com/{0}">{0}</a>', escape=True, na_rep="NA")
+        >>> s = df.style.format(
+        ...     '<a href="a.com/{0}">{0}</a>', escape="html", na_rep="NA"
+        ...     )
         >>> s.render()
         ...
         <td .. ><a href="a.com/&lt;div&gt;&lt;/div&gt;">&lt;div&gt;&lt;/div&gt;</a></td>
         <td .. ><a href="a.com/&#34;A&amp;B&#34;">&#34;A&amp;B&#34;</a></td>
         <td .. >NA</td>
         ...
+
+        Using a ``formatter`` with LaTeX ``escape``.
+
+        >>> df = pd.DataFrame([["123"], ["~ ^"], ["$%#"]])
+        >>> s = df.style.format("\\textbf{{{}}}", escape="latex").to_latex()
+        \begin{tabular}{ll}
+        {} & {0} \\
+        0 & \textbf{123} \\
+        1 & \textbf{\textasciitilde \space \textasciicircum } \\
+        2 & \textbf{\$\%\#} \\
+        \end{tabular}
         """
         if all(
             (
@@ -587,7 +604,7 @@ def format(
                 decimal == ".",
                 thousands is None,
                 na_rep is None,
-                escape is False,
+                escape is None,
             )
         ):
             self._display_funcs.clear()
@@ -771,10 +788,17 @@ def wrapper(x):
     return wrapper
 
 
-def _str_escape_html(x):
-    """if escaping html: only use on str, else return input"""
+def _str_escape(x, escape):
+    """if escaping: only use on str, else return input"""
     if isinstance(x, str):
-        return escape_html(x)
+        if escape == "html":
+            return escape_html(x)
+        elif escape == "latex":
+            return _escape_latex(x)
+        else:
+            raise ValueError(
+                f"`escape` only permitted in {{'html', 'latex'}}, got {escape}"
+            )
     return x
 
 
@@ -784,7 +808,7 @@ def _maybe_wrap_formatter(
     precision: int | None = None,
     decimal: str = ".",
     thousands: str | None = None,
-    escape: bool = False,
+    escape: str | None = None,
 ) -> Callable:
     """
     Allows formatters to be expressed as str, callable or None, where None returns
@@ -804,9 +828,9 @@ def _maybe_wrap_formatter(
     else:
         raise TypeError(f"'formatter' expected str or callable, got {type(formatter)}")
 
-    # Replace HTML chars if escaping
-    if escape:
-        func_1 = lambda x: func_0(_str_escape_html(x))
+    # Replace chars if escaping
+    if escape is not None:
+        func_1 = lambda x: func_0(_str_escape(x, escape=escape))
     else:
         func_1 = func_0
 
@@ -1187,3 +1211,38 @@ def _parse_latex_options_strip(value: str | int | float, arg: str) -> str:
     For example: 'red /* --wrap */  ' --> 'red'
     """
     return str(value).replace(arg, "").replace("/*", "").replace("*/", "").strip()
+
+
+def _escape_latex(s):
+    r"""
+    Replace the characters ``&``, ``%``, ``$``, ``#``, ``_``, ``{``, ``}``,
+    ``~``, ``^``, and ``\`` in the string with LaTeX-safe sequences.
+
+    Use this if you need to display text that might contain such characters in LaTeX.
+
+    Parameters
+    ----------
+    s : str
+        Input to be escaped
+
+    Return
+    ------
+    str :
+        Escaped string
+    """
+    return (
+        s.replace("\\", "ab2§=§8yz")  # rare string for final conversion: avoid \\ clash
+        .replace("ab2§=§8yz ", "ab2§=§8yz\\space ")  # since \backslash gobbles spaces
+        .replace("&", "\\&")
+        .replace("%", "\\%")
+        .replace("$", "\\$")
+        .replace("#", "\\#")
+        .replace("_", "\\_")
+        .replace("{", "\\{")
+        .replace("}", "\\}")
+        .replace("~ ", "~\\space ")  # since \textasciitilde gobbles spaces
+        .replace("~", "\\textasciitilde ")
+        .replace("^ ", "^\\space ")  # since \textasciicircum gobbles spaces
+        .replace("^", "\\textasciicircum ")
+        .replace("ab2§=§8yz", "\\textbackslash ")
+    )
diff --git a/pandas/tests/io/formats/style/test_format.py b/pandas/tests/io/formats/style/test_format.py
@@ -11,6 +11,7 @@
 
 pytest.importorskip("jinja2")
 from pandas.io.formats.style import Styler
+from pandas.io.formats.style_render import _str_escape
 
 
 @pytest.fixture
@@ -106,22 +107,36 @@ def test_format_clear(styler):
     assert (0, 0) not in styler._display_funcs  # formatter cleared to default
 
 
-def test_format_escape():
-    df = DataFrame([['<>&"']])
-    s = Styler(df, uuid_len=0).format("X&{0}>X", escape=False)
-    expected = '<td id="T__row0_col0" class="data row0 col0" >X&<>&">X</td>'
+@pytest.mark.parametrize(
+    "escape, exp",
+    [
+        ("html", "&lt;&gt;&amp;&#34;%$#_{}~^\\~ ^ \\ "),
+        (
+            "latex",
+            '<>\\&"\\%\\$\\#\\_\\{\\}\\textasciitilde \\textasciicircum '
+            "\\textbackslash \\textasciitilde \\space \\textasciicircum \\space "
+            "\\textbackslash \\space ",
+        ),
+    ],
+)
+def test_format_escape_html(escape, exp):
+    chars = '<>&"%$#_{}~^\\~ ^ \\ '
+    df = DataFrame([[chars]])
+
+    s = Styler(df, uuid_len=0).format("&{0}&", escape=None)
+    expected = f'<td id="T__row0_col0" class="data row0 col0" >&{chars}&</td>'
     assert expected in s.render()
 
     # only the value should be escaped before passing to the formatter
-    s = Styler(df, uuid_len=0).format("X&{0}>X", escape=True)
-    ex = '<td id="T__row0_col0" class="data row0 col0" >X&&lt;&gt;&amp;&#34;>X</td>'
-    assert ex in s.render()
+    s = Styler(df, uuid_len=0).format("&{0}&", escape=escape)
+    expected = f'<td id="T__row0_col0" class="data row0 col0" >&{exp}&</td>'
+    assert expected in s.render()
 
 
 def test_format_escape_na_rep():
     # tests the na_rep is not escaped
     df = DataFrame([['<>&"', None]])
-    s = Styler(df, uuid_len=0).format("X&{0}>X", escape=True, na_rep="&")
+    s = Styler(df, uuid_len=0).format("X&{0}>X", escape="html", na_rep="&")
     ex = '<td id="T__row0_col0" class="data row0 col0" >X&&lt;&gt;&amp;&#34;>X</td>'
     expected2 = '<td id="T__row0_col1" class="data row0 col1" >&</td>'
     assert ex in s.render()
@@ -130,11 +145,11 @@ def test_format_escape_na_rep():
 
 def test_format_escape_floats(styler):
     # test given formatter for number format is not impacted by escape
-    s = styler.format("{:.1f}", escape=True)
+    s = styler.format("{:.1f}", escape="html")
     for expected in [">0.0<", ">1.0<", ">-1.2<", ">-0.6<"]:
         assert expected in s.render()
     # tests precision of floats is not impacted by escape
-    s = styler.format(precision=1, escape=True)
+    s = styler.format(precision=1, escape="html")
     for expected in [">0<", ">1<", ">-1.2<", ">-0.6<"]:
         assert expected in s.render()
 
@@ -239,3 +254,14 @@ def test_format_decimal(formatter, thousands, precision):
         decimal="_", formatter=formatter, thousands=thousands, precision=precision
     )._translate(True, True)
     assert "000_123" in result["body"][0][1]["display_value"]
+
+
+def test_str_escape_error():
+    msg = "`escape` only permitted in {'html', 'latex'}, got "
+    with pytest.raises(ValueError, match=msg):
+        _str_escape("text", "bad_escape")
+
+    with pytest.raises(ValueError, match=msg):
+        _str_escape("text", [])
+
+    _str_escape(2.00, "bad_escape")  # OK since dtype is float