From 322f56fa7eb549b22c4dc14ae45e75b64fd1f7de Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Tue, 20 Sep 2022 17:18:21 +0800 Subject: [PATCH 1/3] init --- python/pyspark/pandas/internal.py | 2 +- python/pyspark/pandas/namespace.py | 2 +- python/pyspark/sql/pandas/conversion.py | 6 +++--- python/pyspark/sql/tests/test_pandas_grouped_map.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/python/pyspark/pandas/internal.py b/python/pyspark/pandas/internal.py index b2e67492829da..debc68dbd06a2 100644 --- a/python/pyspark/pandas/internal.py +++ b/python/pyspark/pandas/internal.py @@ -1579,7 +1579,7 @@ def prepare_pandas_frame( nullable=bool(col.isnull().any()), ), ) - for (name, col), dtype in zip(reset_index.iteritems(), index_dtypes + data_dtypes) + for (name, col), dtype in zip(reset_index.items(), index_dtypes + data_dtypes) ] return ( diff --git a/python/pyspark/pandas/namespace.py b/python/pyspark/pandas/namespace.py index 67e1af38c432b..21468855858ed 100644 --- a/python/pyspark/pandas/namespace.py +++ b/python/pyspark/pandas/namespace.py @@ -1191,7 +1191,7 @@ def output_func(pdf: pd.DataFrame) -> pd.DataFrame: ) reset_index = pdf.reset_index() - for name, col in reset_index.iteritems(): + for name, col in reset_index.items(): dt = col.dtype if is_datetime64_dtype(dt) or is_datetime64tz_dtype(dt): continue diff --git a/python/pyspark/sql/pandas/conversion.py b/python/pyspark/sql/pandas/conversion.py index d7f1ee5b8dac2..afbb4b6788b4c 100644 --- a/python/pyspark/sql/pandas/conversion.py +++ b/python/pyspark/sql/pandas/conversion.py @@ -471,7 +471,7 @@ def _convert_from_pandas( pdf[field.name] = s else: should_localize = not is_timestamp_ntz_preferred() - for column, series in pdf.iteritems(): + for column, series in pdf.items(): s = series if should_localize and is_datetime64tz_dtype(s.dtype) and s.dt.tz is not None: s = _check_series_convert_timestamps_tz_local(series, timezone) @@ -483,7 +483,7 @@ def _convert_from_pandas( copied = True pdf[column] = s - for column, series in pdf.iteritems(): + for column, series in pdf.items(): if is_timedelta64_dtype(series): if not copied: pdf = pdf.copy() @@ -601,7 +601,7 @@ def _create_from_pandas_with_arrow( # Create list of Arrow (columns, type) for serializer dump_stream arrow_data = [ - [(c, t) for (_, c), t in zip(pdf_slice.iteritems(), arrow_types)] + [(c, t) for (_, c), t in zip(pdf_slice.items(), arrow_types)] for pdf_slice in pdf_slices ] diff --git a/python/pyspark/sql/tests/test_pandas_grouped_map.py b/python/pyspark/sql/tests/test_pandas_grouped_map.py index 4fd5207f73a7b..b05c8fd86a93b 100644 --- a/python/pyspark/sql/tests/test_pandas_grouped_map.py +++ b/python/pyspark/sql/tests/test_pandas_grouped_map.py @@ -708,7 +708,7 @@ def f(key, pdf): window_range = key[1] # Make sure the key with group and window values are correct - for _, i in pdf.id.iteritems(): + for _, i in pdf.id.items(): assert expected_key[i][0] == group, "{} != {}".format(expected_key[i][0], group) assert expected_key[i][1] == window_range, "{} != {}".format( expected_key[i][1], window_range From 65f71525bda2fe0b0c87ebc099c871873c98c100 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Wed, 21 Sep 2022 10:02:04 +0800 Subject: [PATCH 2/3] deprecate iteritems --- python/pyspark/pandas/frame.py | 15 +++++++++++---- python/pyspark/pandas/series.py | 15 +++++++++++---- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py index 014fc175315a0..716d7f3383c06 100644 --- a/python/pyspark/pandas/frame.py +++ b/python/pyspark/pandas/frame.py @@ -1850,7 +1850,7 @@ def corrwith( sser.name = None return sser - def iteritems(self) -> Iterator[Tuple[Name, "Series"]]: + def items(self) -> Iterator[Tuple[Name, "Series"]]: """ Iterator over (column name, Series) pairs. @@ -2054,9 +2054,16 @@ def extract_kv_from_spark_row(row: Row) -> Tuple[Name, Any]: ): yield tuple(([k] if index else []) + list(v)) - def items(self) -> Iterator[Tuple[Name, "Series"]]: - """This is an alias of ``iteritems``.""" - return self.iteritems() + def iteritems(self) -> Iterator[Tuple[Name, "Series"]]: + """ + This is an alias of ``items``. + + .. deprecated:: 3.4.0 + iteritems is deprecated and will be removed in a future version. + Use .items instead. + """ + warnings.warn("Deprecated in 3.4, Use DataFrame.items instead.", FutureWarning) + return self.items() def to_clipboard(self, excel: bool = True, sep: Optional[str] = None, **kwargs: Any) -> None: """ diff --git a/python/pyspark/pandas/series.py b/python/pyspark/pandas/series.py index 5222198ec5ed6..ba954b161c431 100644 --- a/python/pyspark/pandas/series.py +++ b/python/pyspark/pandas/series.py @@ -5951,7 +5951,7 @@ def item(self) -> Scalar: """ return self.head(2)._to_internal_pandas().item() - def iteritems(self) -> Iterable[Tuple[Name, Any]]: + def items(self) -> Iterable[Tuple[Name, Any]]: """ Lazily iterate over (index, value) tuples. @@ -5998,9 +5998,16 @@ def extract_kv_from_spark_row(row: Row) -> Tuple[Name, Any]: ): yield k, v - def items(self) -> Iterable[Tuple[Name, Any]]: - """This is an alias of ``iteritems``.""" - return self.iteritems() + def iteritems(self) -> Iterable[Tuple[Name, Any]]: + """ + This is an alias of ``items``. + + .. deprecated:: 3.4.0 + iteritems is deprecated and will be removed in a future version. + Use .items instead. + """ + warnings.warn("Deprecated in 3.4, Use Series.items instead.", FutureWarning) + return self.items() def droplevel(self, level: Union[int, Name, List[Union[int, Name]]]) -> "Series": """ From 7343632338c3f43a8717e33a193708d97f72fddf Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Wed, 21 Sep 2022 11:06:12 +0900 Subject: [PATCH 3/3] Update python/pyspark/pandas/frame.py --- python/pyspark/pandas/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py index 716d7f3383c06..8061ea8257fca 100644 --- a/python/pyspark/pandas/frame.py +++ b/python/pyspark/pandas/frame.py @@ -2062,7 +2062,7 @@ def iteritems(self) -> Iterator[Tuple[Name, "Series"]]: iteritems is deprecated and will be removed in a future version. Use .items instead. """ - warnings.warn("Deprecated in 3.4, Use DataFrame.items instead.", FutureWarning) + warnings.warn("Deprecated in 3.4.0, Use DataFrame.items instead.", FutureWarning) return self.items() def to_clipboard(self, excel: bool = True, sep: Optional[str] = None, **kwargs: Any) -> None: