From 0fcde87aadc9a92e138f11583119465ca4b5c518 Mon Sep 17 00:00:00 2001 From: byakuinss Date: Tue, 15 Aug 2017 00:41:01 +0900 Subject: [PATCH] [SPARK-21658][SQL][PYSPARK] Add default None for value in na.replace in PySpark ## What changes were proposed in this pull request? JIRA issue: https://issues.apache.org/jira/browse/SPARK-21658 Add default None for value in `na.replace` since `Dataframe.replace` and `DataframeNaFunctions.replace` are alias. The default values are the same now. ``` >>> df = sqlContext.createDataFrame([('Alice', 10, 80.0)]) >>> df.replace({"Alice": "a"}).first() Row(_1=u'a', _2=10, _3=80.0) >>> df.na.replace({"Alice": "a"}).first() Row(_1=u'a', _2=10, _3=80.0) ``` ## How was this patch tested? Existing tests. cc viirya Author: byakuinss Closes #18895 from byakuinss/SPARK-21658. --- python/pyspark/sql/dataframe.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index edc7ca6f5146f..5cd208bb525a3 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -1403,6 +1403,16 @@ def replace(self, to_replace, value=None, subset=None): |null| null|null| +----+------+----+ + >>> df4.na.replace('Alice').show() + +----+------+----+ + | age|height|name| + +----+------+----+ + | 10| 80|null| + | 5| null| Bob| + |null| null| Tom| + |null| null|null| + +----+------+----+ + >>> df4.na.replace(['Alice', 'Bob'], ['A', 'B'], 'name').show() +----+------+----+ | age|height|name| @@ -1837,7 +1847,7 @@ def fill(self, value, subset=None): fill.__doc__ = DataFrame.fillna.__doc__ - def replace(self, to_replace, value, subset=None): + def replace(self, to_replace, value=None, subset=None): return self.df.replace(to_replace, value, subset) replace.__doc__ = DataFrame.replace.__doc__