diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 51fceb1f09a62..b1ca7557c11ca 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3266,7 +3266,7 @@ def to_html( ... ''' >>> assert html_string == df.to_html() """ - if justify is not None and justify not in fmt._VALID_JUSTIFY_PARAMETERS: + if justify is not None and justify not in fmt.VALID_JUSTIFY_PARAMETERS: raise ValueError("Invalid value for justify parameter") formatter = fmt.DataFrameFormatter( diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 6310eb070247e..356db34918447 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -20,7 +20,6 @@ import re from shutil import get_terminal_size from typing import ( - IO, TYPE_CHECKING, Any, Callable, @@ -172,7 +171,7 @@ Character recognized as decimal separator, e.g. ',' in Europe. """ -_VALID_JUSTIFY_PARAMETERS = ( +VALID_JUSTIFY_PARAMETERS = ( "left", "right", "center", @@ -196,10 +195,15 @@ class SeriesFormatter: + """ + Implement the main logic of Series.to_string, which underlies + Series.__repr__. + """ + def __init__( self, series: Series, - buf: IO[str] | None = None, + *, length: bool | str = True, header: bool = True, index: bool = True, @@ -211,7 +215,7 @@ def __init__( min_rows: int | None = None, ) -> None: self.series = series - self.buf = buf if buf is not None else StringIO() + self.buf = StringIO() self.name = name self.na_rep = na_rep self.header = header @@ -355,7 +359,7 @@ def to_string(self) -> str: return str("".join(result)) -class TextAdjustment: +class _TextAdjustment: def __init__(self) -> None: self.encoding = get_option("display.encoding") @@ -371,7 +375,7 @@ def adjoin(self, space: int, *lists, **kwargs) -> str: ) -class EastAsianTextAdjustment(TextAdjustment): +class _EastAsianTextAdjustment(_TextAdjustment): def __init__(self) -> None: super().__init__() if get_option("display.unicode.ambiguous_as_wide"): @@ -410,12 +414,12 @@ def _get_pad(t): return [x.rjust(_get_pad(x)) for x in texts] -def get_adjustment() -> TextAdjustment: +def get_adjustment() -> _TextAdjustment: use_east_asian_width = get_option("display.unicode.east_asian_width") if use_east_asian_width: - return EastAsianTextAdjustment() + return _EastAsianTextAdjustment() else: - return TextAdjustment() + return _TextAdjustment() def get_dataframe_repr_params() -> dict[str, Any]: @@ -469,16 +473,9 @@ def get_series_repr_params() -> dict[str, Any]: True """ width, height = get_terminal_size() - max_rows = ( - height - if get_option("display.max_rows") == 0 - else get_option("display.max_rows") - ) - min_rows = ( - height - if get_option("display.max_rows") == 0 - else get_option("display.min_rows") - ) + max_rows_opt = get_option("display.max_rows") + max_rows = height if max_rows_opt == 0 else max_rows_opt + min_rows = height if max_rows_opt == 0 else get_option("display.min_rows") return { "name": True, @@ -490,7 +487,11 @@ def get_series_repr_params() -> dict[str, Any]: class DataFrameFormatter: - """Class for processing dataframe formatting options and data.""" + """ + Class for processing dataframe formatting options and data. + + Used by DataFrame.to_string, which backs DataFrame.__repr__. + """ __doc__ = __doc__ if __doc__ else "" __doc__ += common_docstring + return_docstring @@ -1102,16 +1103,16 @@ def save_to_buffer( """ Perform serialization. Write to buf or return as string if buf is None. """ - with get_buffer(buf, encoding=encoding) as f: - f.write(string) + with _get_buffer(buf, encoding=encoding) as fd: + fd.write(string) if buf is None: # error: "WriteBuffer[str]" has no attribute "getvalue" - return f.getvalue() # type: ignore[attr-defined] + return fd.getvalue() # type: ignore[attr-defined] return None @contextmanager -def get_buffer( +def _get_buffer( buf: FilePath | WriteBuffer[str] | None, encoding: str | None = None ) -> Generator[WriteBuffer[str], None, None] | Generator[StringIO, None, None]: """ @@ -1188,24 +1189,24 @@ def format_array( ------- List[str] """ - fmt_klass: type[GenericArrayFormatter] + fmt_klass: type[_GenericArrayFormatter] if lib.is_np_dtype(values.dtype, "M"): - fmt_klass = Datetime64Formatter + fmt_klass = _Datetime64Formatter values = cast(DatetimeArray, values) elif isinstance(values.dtype, DatetimeTZDtype): - fmt_klass = Datetime64TZFormatter + fmt_klass = _Datetime64TZFormatter values = cast(DatetimeArray, values) elif lib.is_np_dtype(values.dtype, "m"): - fmt_klass = Timedelta64Formatter + fmt_klass = _Timedelta64Formatter values = cast(TimedeltaArray, values) elif isinstance(values.dtype, ExtensionDtype): - fmt_klass = ExtensionArrayFormatter + fmt_klass = _ExtensionArrayFormatter elif lib.is_np_dtype(values.dtype, "fc"): fmt_klass = FloatArrayFormatter elif lib.is_np_dtype(values.dtype, "iu"): - fmt_klass = IntArrayFormatter + fmt_klass = _IntArrayFormatter else: - fmt_klass = GenericArrayFormatter + fmt_klass = _GenericArrayFormatter if space is None: space = 12 @@ -1233,7 +1234,7 @@ def format_array( return fmt_obj.get_result() -class GenericArrayFormatter: +class _GenericArrayFormatter: def __init__( self, values: ArrayLike, @@ -1315,7 +1316,7 @@ def _format(x): vals = extract_array(self.values, extract_numpy=True) if not isinstance(vals, np.ndarray): raise TypeError( - "ExtensionArray formatting should use ExtensionArrayFormatter" + "ExtensionArray formatting should use _ExtensionArrayFormatter" ) inferred = lib.map_infer(vals, is_float) is_float_type = ( @@ -1345,7 +1346,7 @@ def _format(x): return fmt_values -class FloatArrayFormatter(GenericArrayFormatter): +class FloatArrayFormatter(_GenericArrayFormatter): def __init__(self, *args, **kwargs) -> None: super().__init__(*args, **kwargs) @@ -1546,7 +1547,7 @@ def _format_strings(self) -> list[str]: return list(self.get_result_as_array()) -class IntArrayFormatter(GenericArrayFormatter): +class _IntArrayFormatter(_GenericArrayFormatter): def _format_strings(self) -> list[str]: if self.leading_space is False: formatter_str = lambda x: f"{x:d}".format(x=x) @@ -1557,7 +1558,7 @@ def _format_strings(self) -> list[str]: return fmt_values -class Datetime64Formatter(GenericArrayFormatter): +class _Datetime64Formatter(_GenericArrayFormatter): values: DatetimeArray def __init__( @@ -1586,7 +1587,7 @@ def _format_strings(self) -> list[str]: return fmt_values.tolist() -class ExtensionArrayFormatter(GenericArrayFormatter): +class _ExtensionArrayFormatter(_GenericArrayFormatter): values: ExtensionArray def _format_strings(self) -> list[str]: @@ -1727,7 +1728,7 @@ def get_format_datetime64( return lambda x: _format_datetime64(x, nat_rep=nat_rep) -class Datetime64TZFormatter(Datetime64Formatter): +class _Datetime64TZFormatter(_Datetime64Formatter): values: DatetimeArray def _format_strings(self) -> list[str]: @@ -1742,7 +1743,7 @@ def _format_strings(self) -> list[str]: return fmt_values -class Timedelta64Formatter(GenericArrayFormatter): +class _Timedelta64Formatter(_GenericArrayFormatter): values: TimedeltaArray def __init__( @@ -1809,7 +1810,7 @@ def _make_fixed_width( strings: list[str], justify: str = "right", minimum: int | None = None, - adj: TextAdjustment | None = None, + adj: _TextAdjustment | None = None, ) -> list[str]: if len(strings) == 0 or justify == "all": return strings diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 642ee6446e200..0087149021895 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -2933,9 +2933,9 @@ def test_to_string_empty_col(self): class TestGenericArrayFormatter: def test_1d_array(self): - # GenericArrayFormatter is used on types for which there isn't a dedicated + # _GenericArrayFormatter is used on types for which there isn't a dedicated # formatter. np.bool_ is one of those types. - obj = fmt.GenericArrayFormatter(np.array([True, False])) + obj = fmt._GenericArrayFormatter(np.array([True, False])) res = obj.get_result() assert len(res) == 2 # Results should be right-justified. @@ -2943,14 +2943,14 @@ def test_1d_array(self): assert res[1] == " False" def test_2d_array(self): - obj = fmt.GenericArrayFormatter(np.array([[True, False], [False, True]])) + obj = fmt._GenericArrayFormatter(np.array([[True, False], [False, True]])) res = obj.get_result() assert len(res) == 2 assert res[0] == " [True, False]" assert res[1] == " [False, True]" def test_3d_array(self): - obj = fmt.GenericArrayFormatter( + obj = fmt._GenericArrayFormatter( np.array([[[True, True], [False, False]], [[False, True], [True, False]]]) ) res = obj.get_result() @@ -3187,64 +3187,64 @@ def test_all(self): class TestTimedelta64Formatter: def test_days(self): x = pd.to_timedelta(list(range(5)) + [NaT], unit="D") - result = fmt.Timedelta64Formatter(x, box=True).get_result() + result = fmt._Timedelta64Formatter(x, box=True).get_result() assert result[0].strip() == "'0 days'" assert result[1].strip() == "'1 days'" - result = fmt.Timedelta64Formatter(x[1:2], box=True).get_result() + result = fmt._Timedelta64Formatter(x[1:2], box=True).get_result() assert result[0].strip() == "'1 days'" - result = fmt.Timedelta64Formatter(x, box=False).get_result() + result = fmt._Timedelta64Formatter(x, box=False).get_result() assert result[0].strip() == "0 days" assert result[1].strip() == "1 days" - result = fmt.Timedelta64Formatter(x[1:2], box=False).get_result() + result = fmt._Timedelta64Formatter(x[1:2], box=False).get_result() assert result[0].strip() == "1 days" def test_days_neg(self): x = pd.to_timedelta(list(range(5)) + [NaT], unit="D") - result = fmt.Timedelta64Formatter(-x, box=True).get_result() + result = fmt._Timedelta64Formatter(-x, box=True).get_result() assert result[0].strip() == "'0 days'" assert result[1].strip() == "'-1 days'" def test_subdays(self): y = pd.to_timedelta(list(range(5)) + [NaT], unit="s") - result = fmt.Timedelta64Formatter(y, box=True).get_result() + result = fmt._Timedelta64Formatter(y, box=True).get_result() assert result[0].strip() == "'0 days 00:00:00'" assert result[1].strip() == "'0 days 00:00:01'" def test_subdays_neg(self): y = pd.to_timedelta(list(range(5)) + [NaT], unit="s") - result = fmt.Timedelta64Formatter(-y, box=True).get_result() + result = fmt._Timedelta64Formatter(-y, box=True).get_result() assert result[0].strip() == "'0 days 00:00:00'" assert result[1].strip() == "'-1 days +23:59:59'" def test_zero(self): x = pd.to_timedelta(list(range(1)) + [NaT], unit="D") - result = fmt.Timedelta64Formatter(x, box=True).get_result() + result = fmt._Timedelta64Formatter(x, box=True).get_result() assert result[0].strip() == "'0 days'" x = pd.to_timedelta(list(range(1)), unit="D") - result = fmt.Timedelta64Formatter(x, box=True).get_result() + result = fmt._Timedelta64Formatter(x, box=True).get_result() assert result[0].strip() == "'0 days'" -class TestDatetime64Formatter: +class Test_Datetime64Formatter: def test_mixed(self): x = Series([datetime(2013, 1, 1), datetime(2013, 1, 1, 12), NaT]) - result = fmt.Datetime64Formatter(x).get_result() + result = fmt._Datetime64Formatter(x).get_result() assert result[0].strip() == "2013-01-01 00:00:00" assert result[1].strip() == "2013-01-01 12:00:00" def test_dates(self): x = Series([datetime(2013, 1, 1), datetime(2013, 1, 2), NaT]) - result = fmt.Datetime64Formatter(x).get_result() + result = fmt._Datetime64Formatter(x).get_result() assert result[0].strip() == "2013-01-01" assert result[1].strip() == "2013-01-02" def test_date_nanos(self): x = Series([Timestamp(200)]) - result = fmt.Datetime64Formatter(x).get_result() + result = fmt._Datetime64Formatter(x).get_result() assert result[0].strip() == "1970-01-01 00:00:00.000000200" def test_dates_display(self): @@ -3252,35 +3252,35 @@ def test_dates_display(self): # make sure that we are consistently display date formatting x = Series(date_range("20130101 09:00:00", periods=5, freq="D")) x.iloc[1] = np.nan - result = fmt.Datetime64Formatter(x).get_result() + result = fmt._Datetime64Formatter(x).get_result() assert result[0].strip() == "2013-01-01 09:00:00" assert result[1].strip() == "NaT" assert result[4].strip() == "2013-01-05 09:00:00" x = Series(date_range("20130101 09:00:00", periods=5, freq="s")) x.iloc[1] = np.nan - result = fmt.Datetime64Formatter(x).get_result() + result = fmt._Datetime64Formatter(x).get_result() assert result[0].strip() == "2013-01-01 09:00:00" assert result[1].strip() == "NaT" assert result[4].strip() == "2013-01-01 09:00:04" x = Series(date_range("20130101 09:00:00", periods=5, freq="ms")) x.iloc[1] = np.nan - result = fmt.Datetime64Formatter(x).get_result() + result = fmt._Datetime64Formatter(x).get_result() assert result[0].strip() == "2013-01-01 09:00:00.000" assert result[1].strip() == "NaT" assert result[4].strip() == "2013-01-01 09:00:00.004" x = Series(date_range("20130101 09:00:00", periods=5, freq="us")) x.iloc[1] = np.nan - result = fmt.Datetime64Formatter(x).get_result() + result = fmt._Datetime64Formatter(x).get_result() assert result[0].strip() == "2013-01-01 09:00:00.000000" assert result[1].strip() == "NaT" assert result[4].strip() == "2013-01-01 09:00:00.000004" x = Series(date_range("20130101 09:00:00", periods=5, freq="ns")) x.iloc[1] = np.nan - result = fmt.Datetime64Formatter(x).get_result() + result = fmt._Datetime64Formatter(x).get_result() assert result[0].strip() == "2013-01-01 09:00:00.000000000" assert result[1].strip() == "NaT" assert result[4].strip() == "2013-01-01 09:00:00.000000004" @@ -3291,7 +3291,7 @@ def test_datetime64formatter_yearmonth(self): def format_func(x): return x.strftime("%Y-%m") - formatter = fmt.Datetime64Formatter(x, formatter=format_func) + formatter = fmt._Datetime64Formatter(x, formatter=format_func) result = formatter.get_result() assert result == ["2016-01", "2016-02"] @@ -3303,7 +3303,7 @@ def test_datetime64formatter_hoursecond(self): def format_func(x): return x.strftime("%H:%M") - formatter = fmt.Datetime64Formatter(x, formatter=format_func) + formatter = fmt._Datetime64Formatter(x, formatter=format_func) result = formatter.get_result() assert result == ["10:10", "12:12"] @@ -3315,7 +3315,7 @@ def test_datetime64formatter_tz_ms(self): .dt.tz_localize("US/Pacific") ._values ) - result = fmt.Datetime64TZFormatter(x).get_result() + result = fmt._Datetime64TZFormatter(x).get_result() assert result[0].strip() == "2999-01-01 00:00:00-08:00" assert result[1].strip() == "2999-01-02 00:00:00-08:00" diff --git a/pandas/tests/io/formats/test_printing.py b/pandas/tests/io/formats/test_printing.py index 2d0dc0d937709..492657587ae0e 100644 --- a/pandas/tests/io/formats/test_printing.py +++ b/pandas/tests/io/formats/test_printing.py @@ -48,7 +48,7 @@ def test_adjoin_unicode(self): adjoined = printing.adjoin(2, *data) assert adjoined == expected - adj = fmt.EastAsianTextAdjustment() + adj = fmt._EastAsianTextAdjustment() expected = """あ dd ggg b ええ hhh @@ -73,7 +73,7 @@ def test_adjoin_unicode(self): assert adj.len(cols[2]) == 26 def test_justify(self): - adj = fmt.EastAsianTextAdjustment() + adj = fmt._EastAsianTextAdjustment() def just(x, *args, **kwargs): # wrapper to test single str @@ -95,7 +95,7 @@ def just(x, *args, **kwargs): assert just("パンダ", 10, mode="right") == " パンダ" def test_east_asian_len(self): - adj = fmt.EastAsianTextAdjustment() + adj = fmt._EastAsianTextAdjustment() assert adj.len("abc") == 3 assert adj.len("abc") == 3 @@ -106,11 +106,11 @@ def test_east_asian_len(self): assert adj.len("パンダpanda") == 10 def test_ambiguous_width(self): - adj = fmt.EastAsianTextAdjustment() + adj = fmt._EastAsianTextAdjustment() assert adj.len("¡¡ab") == 4 with cf.option_context("display.unicode.ambiguous_as_wide", True): - adj = fmt.EastAsianTextAdjustment() + adj = fmt._EastAsianTextAdjustment() assert adj.len("¡¡ab") == 6 data = [["あ", "b", "c"], ["dd", "ええ", "ff"], ["ggg", "¡¡ab", "いいい"]] diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index 5811485406b86..38a2bb52930e3 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -68,7 +68,7 @@ def biggie_df_fixture(request): return df -@pytest.fixture(params=fmt._VALID_JUSTIFY_PARAMETERS) +@pytest.fixture(params=fmt.VALID_JUSTIFY_PARAMETERS) def justify(request): return request.param