size = 1000000 pdf = pd.DataFrame({"a": np.random.rand(size), "b": np.random.randint(0, high=999999, size=size), "c": pd.util.testing.rands_array(10, size=size)}) df = spark.createDataFrame(pdf, schema=StructType([StructField("a", DoubleType()), StructField("b", IntegerType()), StructField("c", StringType())])) f = pandas_udf(lambda x, y, z: x, returnType=DoubleType()) result = df.withColumn("result", f(col("a"), col("b"), col("c"))).filter("result < 1.0") t = timeit.repeat(lambda: result.count(), repeat=10, number=3) print(pd.Series(t).describe())