narwhals-dev · MarcoGorelli · Nov 23, 2024 · Oct 14, 2024 · Oct 14, 2024 · Oct 14, 2024
diff --git a/docs/api-reference/expr.md b/docs/api-reference/expr.md
@@ -43,6 +43,7 @@
         - sample
         - shift
         - sort
+        - skew
         - std
         - sum
         - tail

diff --git a/docs/api-reference/series.md b/docs/api-reference/series.md
@@ -52,6 +52,7 @@
         - shape
         - shift
         - sort
+        - skew
         - std
         - sum
         - tail

diff --git a/narwhals/_arrow/expr.py b/narwhals/_arrow/expr.py
@@ -219,6 +219,9 @@ def n_unique(self) -> Self:
     def std(self, ddof: int = 1) -> Self:
         return reuse_series_implementation(self, "std", ddof=ddof, returns_scalar=True)
 
+    def skew(self: Self) -> Self:
+        return reuse_series_implementation(self, "skew", returns_scalar=True)
+
     def cast(self, dtype: DType) -> Self:
         return reuse_series_implementation(self, "cast", dtype)
 

diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py
@@ -305,11 +305,31 @@ def shift(self, n: int) -> Self:
             result = ca
         return self._from_native_series(result)
 
-    def std(self, ddof: int = 1) -> int:
+    def std(self, ddof: int = 1) -> float:
         import pyarrow.compute as pc  # ignore-banned-import()
 
         return pc.stddev(self._native_series, ddof=ddof)  # type: ignore[no-any-return]
 
+    def skew(self: Self) -> float | None:
+        import pyarrow.compute as pc  # ignore-banned-import()
+
+        ser = self._native_series
+        ser_not_null = pc.drop_null(ser)
+        if len(ser_not_null) == 0:
+            return None
+        elif len(ser_not_null) == 1:
+            return float("nan")
+        elif len(ser_not_null) == 2:
+            return 0.0
+        else:
+            m = pc.subtract(ser_not_null, pc.mean(ser_not_null))
+            m2 = pc.mean(pc.power(m, 2))
+            m3 = pc.mean(pc.power(m, 3))
+            m2_py = m2.as_py()
+            m3_py = m3.as_py()
+            g1 = float(m3_py) / (float(m2_py) ** 1.5) if float(m2_py) != 0 else 0
+            return float(g1)  # Biased population skewness
+
     def count(self) -> int:
         import pyarrow.compute as pc  # ignore-banned-import()
 

diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py
@@ -426,6 +426,13 @@ def std(self, ddof: int = 1) -> Self:
             returns_scalar=True,
         )
 
+    def skew(self: Self) -> Self:
+        return self._from_call(
+            lambda _input: _input.skew(),
+            "skew",
+            returns_scalar=True,
+        )
+
     def shift(self, n: int) -> Self:
         return self._from_call(
             lambda _input, n: _input.shift(n),

diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py
@@ -228,6 +228,9 @@ def median(self) -> Self:
     def std(self, *, ddof: int = 1) -> Self:
         return reuse_series_implementation(self, "std", ddof=ddof, returns_scalar=True)
 
+    def skew(self: Self) -> Self:
+        return reuse_series_implementation(self, "skew", returns_scalar=True)
+
     def any(self) -> Self:
         return reuse_series_implementation(self, "any", returns_scalar=True)
 

diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py
@@ -435,13 +435,25 @@ def median(self) -> Any:
         ser = self._native_series
         return ser.median()
 
-    def std(
-        self,
-        *,
-        ddof: int = 1,
-    ) -> Any:
+    def std(self: Self, *, ddof: int = 1) -> float:
         ser = self._native_series
-        return ser.std(ddof=ddof)
+        return ser.std(ddof=ddof)  # type: ignore[no-any-return]
+
+    def skew(self: Self) -> float | None:
+        ser = self._native_series
+        ser_not_null = ser.dropna()
+        if len(ser_not_null) == 0:
+            return None
+        elif len(ser_not_null) == 1:
+            return float("nan")
+        elif len(ser_not_null) == 2:
+            return 0.0
+        else:
+            m = ser_not_null - ser_not_null.mean()
+            m2 = (m**2).mean()
+            m3 = (m**3).mean()
+            g1 = m3 / (m2**1.5) if m2 != 0 else 0
+            return float(g1)  # Biased population skewness
 
     def len(self) -> Any:
         return len(self._native_series)

diff --git a/narwhals/expr.py b/narwhals/expr.py
@@ -496,7 +496,7 @@ def std(self, *, ddof: int = 1) -> Self:
         Get standard deviation.
 
         Arguments:
-            ddof: “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof,
+            ddof: "Delta Degrees of Freedom": the divisor used in the calculation is N - ddof,
                      where N represents the number of elements. By default ddof is 1.
 
         Examples:
@@ -608,6 +608,52 @@ def map_batches(
             )
         )
 
+    def skew(self: Self) -> Self:
+        """
+        Calculate the sample skewness of a column.
+
+        Returns:
+            An expression representing the sample skewness of the column.
+
+        Examples:
+            >>> import pandas as pd
+            >>> import polars as pl
+            >>> import pyarrow as pa
+            >>> import narwhals as nw
+            >>> df_pd = pd.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 2, 10, 100]})
+            >>> df_pl = pl.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 2, 10, 100]})
+            >>> df_pa = pa.Table.from_pandas(df_pd)
+
+            Let's define a dataframe-agnostic function:
+
+            >>> @nw.narwhalify
+            ... def func(df):
+            ...     return df.select(nw.col("a", "b").skew())
+
+            We can then pass pandas, Polars, or PyArrow to `func`:
+
+            >>> func(df_pd)
+                 a         b
+            0  0.0  1.472427
+            >>> func(df_pl)
+            shape: (1, 2)
+            ┌─────┬──────────┐
+            │ a   ┆ b        │
+            │ --- ┆ ---      │
+            │ f64 ┆ f64      │
+            ╞═════╪══════════╡
+            │ 0.0 ┆ 1.472427 │
+            └─────┴──────────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: double
+            b: double
+            ----
+            a: [[0]]
+            b: [[1.4724267269058975]]
+        """
+        return self.__class__(lambda plx: self._call(plx).skew())
+
     def sum(self) -> Expr:
         """
         Return the sum value.

diff --git a/narwhals/series.py b/narwhals/series.py
@@ -559,6 +559,42 @@ def median(self) -> Any:
         """
         return self._compliant_series.median()
 
+    def skew(self: Self) -> Any:
+        """
+        Calculate the sample skewness of the Series.
+
+        Returns:
+            The sample skewness of the Series.
+
+        Examples:
+            >>> import pandas as pd
+            >>> import polars as pl
+            >>> import pyarrow as pa
+            >>> import narwhals as nw
+            >>> s = [1, 1, 2, 10, 100]
+            >>> s_pd = pd.Series(s)
+            >>> s_pl = pl.Series(s)
+            >>> s_pa = pa.array(s)
+
+            We define a library agnostic function:
+
+            >>> @nw.narwhalify
+            ... def func(s):
+            ...     return s.skew()
+
+            We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
+
+            >>> func(s_pd)
+            1.4724267269058975
+            >>> func(s_pl)
+            1.4724267269058975
+
+        Notes:
+            The skewness is a measure of the asymmetry of the probability distribution.
+            A perfectly symmetric distribution has a skewness of 0.
+        """
+        return self._compliant_series.skew()
+
     def count(self) -> Any:
         """
         Returns the number of non-null elements in the Series.

diff --git a/tests/expr_and_series/skew_test.py b/tests/expr_and_series/skew_test.py
@@ -0,0 +1,29 @@
+from __future__ import annotations
+
+import pytest
+
+import narwhals.stable.v1 as nw
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
+
+data = [1, 2, 3, 2, 1]
+
+
+@pytest.mark.parametrize(
+    ("size", "expected"),
+    [
+        (0, None),
+        (1, float("nan")),
+        (2, 0.0),
+        (5, 0.343622),
+    ],
+)
+def test_skew_series(
+    constructor_eager: ConstructorEager, size: int, expected: float | None
+) -> None:
+    result = (
+        nw.from_native(constructor_eager({"a": data}), eager_only=True)
+        .head(size)["a"]
+        .skew()
+    )
+    assert_equal_data({"a": [result]}, {"a": [expected]})
-Original file line number
+Diff line change
@@ Expand Up / @@ -43,6 +43,7 @@ @@
             - sample
             - shift
             - sort
+            - skew
             - std
             - sum
             - tail
@@ Expand Down @@