diff --git a/doc/source/reference/style.rst b/doc/source/reference/style.rst index 77e1b0abae0c46..5144f12fa373a0 100644 --- a/doc/source/reference/style.rst +++ b/doc/source/reference/style.rst @@ -41,6 +41,7 @@ Style application Styler.applymap_index Styler.format Styler.format_index + Styler.relabel_index Styler.hide Styler.concat Styler.set_td_classes diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 959febb4267fe1..578431e9ab2d83 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -44,6 +44,9 @@ e.g. totals and counts etc. (:issue:`43875`, :issue:`46186`) Additionally there is an alternative output method :meth:`.Styler.to_string`, which allows using the Styler's formatting methods to create, for example, CSVs (:issue:`44502`). +A new feature :meth:`.Styler.relabel_index` is also made available to provide full customisation of the display of +index or column headers (:issue:`47864`) + Minor feature improvements are: - Adding the ability to render ``border`` and ``border-{side}`` CSS properties in Excel (:issue:`42276`) diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 414bd3b76bd0d7..7631ae24055859 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -22,7 +22,10 @@ from pandas._config import get_option from pandas._libs import lib -from pandas._typing import Level +from pandas._typing import ( + Axis, + Level, +) from pandas.compat._optional import import_optional_dependency from pandas.core.dtypes.common import ( @@ -1339,6 +1342,164 @@ def format_index( return self + def relabel_index( + self, + labels: Sequence | Index, + axis: Axis = 0, + level: Level | list[Level] | None = None, + ) -> StylerRenderer: + r""" + Relabel the index, or column header, keys to display a set of specified values. + + .. versionadded:: 1.5.0 + + Parameters + ---------- + labels : list-like or Index + New labels to display. Must have same length as the underlying values not + hidden. + axis : {"index", 0, "columns", 1} + Apply to the index or columns. + level : int, str, list, optional + The level(s) over which to apply the new labels. If `None` will apply + to all levels of an Index or MultiIndex which are not hidden. + + Returns + ------- + self : Styler + + See Also + -------- + Styler.format_index: Format the text display value of index or column headers. + Styler.hide: Hide the index, column headers, or specified data from display. + + Notes + ----- + As part of Styler, this method allows the display of an index to be + completely user-specified without affecting the underlying DataFrame data, + index, or column headers. This means that the flexibility of indexing is + maintained whilst the final display is customisable. + + Since Styler is designed to be progressively constructed with method chaining, + this method is adapted to react to the **currently specified hidden elements**. + This is useful because it means one does not have to specify all the new + labels if the majority of an index, or column headers, have already been hidden. + The following produce equivalent display (note the length of ``labels`` in + each case). + + .. code-block:: python + + # relabel first, then hide + df = pd.DataFrame({"col": ["a", "b", "c"]}) + df.style.relabel_index(["A", "B", "C"]).hide([0,1]) + # hide first, then relabel + df = pd.DataFrame({"col": ["a", "b", "c"]}) + df.style.hide([0,1]).relabel_index(["C"]) + + This method should be used, rather than :meth:`Styler.format_index`, in one of + the following cases (see examples): + + - A specified set of labels are required which are not a function of the + underlying index keys. + - The function of the underlying index keys requires a counter variable, + such as those available upon enumeration. + + Examples + -------- + Basic use + + >>> df = pd.DataFrame({"col": ["a", "b", "c"]}) + >>> df.style.relabel_index(["A", "B", "C"]) # doctest: +SKIP + col + A a + B b + C c + + Chaining with pre-hidden elements + + >>> df.style.hide([0,1]).relabel_index(["C"]) # doctest: +SKIP + col + C c + + Using a MultiIndex + + >>> midx = pd.MultiIndex.from_product([[0, 1], [0, 1], [0, 1]]) + >>> df = pd.DataFrame({"col": list(range(8))}, index=midx) + >>> styler = df.style # doctest: +SKIP + col + 0 0 0 0 + 1 1 + 1 0 2 + 1 3 + 1 0 0 4 + 1 5 + 1 0 6 + 1 7 + >>> styler.hide((midx.get_level_values(0)==0)|(midx.get_level_values(1)==0)) + ... # doctest: +SKIP + >>> styler.hide(level=[0,1]) # doctest: +SKIP + >>> styler.relabel_index(["binary6", "binary7"]) # doctest: +SKIP + col + binary6 6 + binary7 7 + + We can also achieve the above by indexing first and then re-labeling + + >>> styler = df.loc[[(1,1,0), (1,1,1)]].style + >>> styler.hide(level=[0,1]).relabel_index(["binary6", "binary7"]) + ... # doctest: +SKIP + col + binary6 6 + binary7 7 + + Defining a formatting function which uses an enumeration counter. Also note + that the value of the index key is passed in the case of string labels so it + can also be inserted into the label, using curly brackets (or double curly + brackets if the string if pre-formatted), + + >>> df = pd.DataFrame({"samples": np.random.rand(10)}) + >>> styler = df.loc[np.random.randint(0,10,3)].style + >>> styler.relabel_index([f"sample{i+1} ({{}})" for i in range(3)]) + ... # doctest: +SKIP + samples + sample1 (5) 0.315811 + sample2 (0) 0.495941 + sample3 (2) 0.067946 + """ + axis = self.data._get_axis_number(axis) + if axis == 0: + display_funcs_, obj = self._display_funcs_index, self.index + hidden_labels, hidden_lvls = self.hidden_rows, self.hide_index_ + else: + display_funcs_, obj = self._display_funcs_columns, self.columns + hidden_labels, hidden_lvls = self.hidden_columns, self.hide_columns_ + visible_len = len(obj) - len(set(hidden_labels)) + if len(labels) != visible_len: + raise ValueError( + "``labels`` must be of length equal to the number of " + f"visible labels along ``axis`` ({visible_len})." + ) + + if level is None: + level = [i for i in range(obj.nlevels) if not hidden_lvls[i]] + levels_ = refactor_levels(level, obj) + + def alias_(x, value): + if isinstance(value, str): + return value.format(x) + return value + + for ai, i in enumerate([i for i in range(len(obj)) if i not in hidden_labels]): + if len(levels_) == 1: + idx = (i, levels_[0]) if axis == 0 else (levels_[0], i) + display_funcs_[idx] = partial(alias_, value=labels[ai]) + else: + for aj, lvl in enumerate(levels_): + idx = (i, lvl) if axis == 0 else (lvl, i) + display_funcs_[idx] = partial(alias_, value=labels[ai][aj]) + + return self + def _element( html_element: str, diff --git a/pandas/tests/io/formats/style/test_format.py b/pandas/tests/io/formats/style/test_format.py index a52c679e16ad56..0b114ea128b0b7 100644 --- a/pandas/tests/io/formats/style/test_format.py +++ b/pandas/tests/io/formats/style/test_format.py @@ -30,6 +30,20 @@ def styler(df): return Styler(df, uuid_len=0) +@pytest.fixture +def df_multi(): + return DataFrame( + data=np.arange(16).reshape(4, 4), + columns=MultiIndex.from_product([["A", "B"], ["a", "b"]]), + index=MultiIndex.from_product([["X", "Y"], ["x", "y"]]), + ) + + +@pytest.fixture +def styler_multi(df_multi): + return Styler(df_multi, uuid_len=0) + + def test_display_format(styler): ctx = styler.format("{:0.1f}")._translate(True, True) assert all(["display_value" in c for c in row] for row in ctx["body"]) @@ -442,3 +456,46 @@ def test_boolean_format(): ctx = df.style._translate(True, True) assert ctx["body"][0][1]["display_value"] is True assert ctx["body"][0][2]["display_value"] is False + + +@pytest.mark.parametrize( + "hide, labels", + [ + (False, [1, 2]), + (True, [1, 2, 3, 4]), + ], +) +def test_relabel_raise_length(styler_multi, hide, labels): + if hide: + styler_multi.hide(axis=0, subset=[("X", "x"), ("Y", "y")]) + with pytest.raises(ValueError, match="``labels`` must be of length equal"): + styler_multi.relabel_index(labels=labels) + + +def test_relabel_index(styler_multi): + labels = [(1, 2), (3, 4)] + styler_multi.hide(axis=0, subset=[("X", "x"), ("Y", "y")]) + styler_multi.relabel_index(labels=labels) + ctx = styler_multi._translate(True, True) + assert {"value": "X", "display_value": 1}.items() <= ctx["body"][0][0].items() + assert {"value": "y", "display_value": 2}.items() <= ctx["body"][0][1].items() + assert {"value": "Y", "display_value": 3}.items() <= ctx["body"][1][0].items() + assert {"value": "x", "display_value": 4}.items() <= ctx["body"][1][1].items() + + +def test_relabel_columns(styler_multi): + labels = [(1, 2), (3, 4)] + styler_multi.hide(axis=1, subset=[("A", "a"), ("B", "b")]) + styler_multi.relabel_index(axis=1, labels=labels) + ctx = styler_multi._translate(True, True) + assert {"value": "A", "display_value": 1}.items() <= ctx["head"][0][3].items() + assert {"value": "B", "display_value": 3}.items() <= ctx["head"][0][4].items() + assert {"value": "b", "display_value": 2}.items() <= ctx["head"][1][3].items() + assert {"value": "a", "display_value": 4}.items() <= ctx["head"][1][4].items() + + +def test_relabel_roundtrip(styler): + styler.relabel_index(["{}", "{}"]) + ctx = styler._translate(True, True) + assert {"value": "x", "display_value": "x"}.items() <= ctx["body"][0][0].items() + assert {"value": "y", "display_value": "y"}.items() <= ctx["body"][1][0].items()