apache · HyukjinKwon · Jan 11, 2018 · Jan 12, 2018 · gatorsmile · Jan 13, 2018
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
@@ -2184,6 +2184,11 @@ def pandas_udf(f=None, returnType=None, functionType=None):
        |         8|      JOHN DOE|          22|
        +----------+--------------+------------+
 
+       .. note:: The length of `pandas.Series` within a scalar UDF is not that of the whole input
+           column, but is the length of an internal batch used for each call to the function.
+           Therefore, this can be used, for example, to ensure the length of each returned
+           `pandas.Series`, and can not be used as the column length.
+
     2. GROUP_MAP
 
        A group map UDF defines transformation: A `pandas.DataFrame` -> A `pandas.DataFrame`