Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

docs: use type hints + from_native/to_native in dataframe.py #1425

Merged
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 80 additions & 54 deletions narwhals/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -958,21 +958,23 @@ def row(self, index: int) -> tuple[Any, ...]:
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> from narwhals.typing import IntoDataFrame
>>>
>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)

Let's define a library-agnostic function to get the second row.

>>> @nw.narwhalify
... def func(df):
>>> def agnostic_row(df_native: IntoDataFrame) -> tuple[Any, ...]:
MarcoGorelli marked this conversation as resolved.
Show resolved Hide resolved
... df = nw.from_native(df_native)
... return df.row(1)

We can then pass pandas / Polars / any other supported library:

>>> func(df_pd)
>>> agnostic_row(df_pd)
(2, 5)
>>> func(df_pl)
>>> agnostic_row(df_pl)
(2, 5)
"""
return self._compliant_frame.row(index) # type: ignore[no-any-return]
Expand All @@ -985,26 +987,30 @@ def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Se
>>> import polars as pl
>>> import pandas as pd
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrame
>>>
>>> data = {"a": [1, 2, 3], "ba": [4, 5, 6]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)

Let's define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(df):
>>> def agnostic_pipe(df_native: IntoFrame) -> IntoFrame:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we use IntoFrameT? this signals that we return the same kind of object that we started with

... df = nw.from_native(df_native)
... return df.pipe(
... lambda _df: _df.select([x for x in _df.columns if len(x) == 1])
... lambda _df: _df.select(
... [x for x in _df.columns if len(x) == 1]
... ).to_native()
... )

We can then pass either pandas or Polars:

>>> func(df_pd)
>>> agnostic_pipe(df_pd)
a
0 1
1 2
2 3
>>> func(df_pl)
>>> agnostic_pipe(df_pl)
shape: (3, 1)
β”Œβ”€β”€β”€β”€β”€β”
β”‚ a β”‚
Expand Down Expand Up @@ -1033,22 +1039,24 @@ def drop_nulls(self: Self, subset: str | list[str] | None = None) -> Self:
>>> import polars as pl
>>> import pandas as pd
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrame
>>>
>>> data = {"a": [1.0, 2.0, None], "ba": [1.0, None, 2.0]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)

Let's define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(df):
... return df.drop_nulls()
>>> def agnostic_drop_nulls(df_native: IntoFrame) -> IntoFrame:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same

... df = nw.from_native(df_native)
... return df.drop_nulls().to_native()

We can then pass either pandas or Polars:

>>> func(df_pd)
>>> agnostic_drop_nulls(df_pd)
a ba
0 1.0 1.0
>>> func(df_pl)
>>> agnostic_drop_nulls(df_pl)
shape: (1, 2)
β”Œβ”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”
β”‚ a ┆ ba β”‚
Expand All @@ -1069,24 +1077,26 @@ def with_row_index(self, name: str = "index") -> Self:
>>> import polars as pl
>>> import pandas as pd
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrame
>>>
>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)

Let's define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(df):
... return df.with_row_index()
>>> def agnostic_with_row_index(df_native: IntoFrame) -> IntoFrame:
... df = nw.from_native(df_native)
... return df.with_row_index().to_native()

We can then pass either pandas or Polars:

>>> func(df_pd)
>>> agnostic_with_row_index(df_pd)
index a b
0 0 1 4
1 1 2 5
2 2 3 6
>>> func(df_pl)
>>> agnostic_with_row_index(df_pl)
shape: (3, 3)
β”Œβ”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”
β”‚ index ┆ a ┆ b β”‚
Expand All @@ -1108,6 +1118,9 @@ def schema(self) -> Schema:
>>> import polars as pl
>>> import pandas as pd
>>> import narwhals as nw
>>> from narwhals.schema import Schema
>>> from narwhals.typing import IntoFrame
>>>
>>> data = {
... "foo": [1, 2, 3],
... "bar": [6.0, 7.0, 8.0],
Expand All @@ -1118,17 +1131,17 @@ def schema(self) -> Schema:

We define a library agnostic function:

>>> @nw.narwhalify
... def func(df):
>>> def agnostic_schema(df_native: IntoFrame) -> Schema:
... df = nw.from_native(df_native)
... return df.schema

You can pass either pandas or Polars to `func`:
You can pass either pandas or Polars to `agnostic_schema`:

>>> df_pd_schema = func(df_pd)
>>> df_pd_schema = agnostic_schema(df_pd)
>>> df_pd_schema
Schema({'foo': Int64, 'bar': Float64, 'ham': String})

>>> df_pl_schema = func(df_pl)
>>> df_pl_schema = agnostic_schema(df_pl)
>>> df_pl_schema
Schema({'foo': Int64, 'bar': Float64, 'ham': String})
"""
Expand All @@ -1141,6 +1154,9 @@ def collect_schema(self: Self) -> Schema:
>>> import polars as pl
>>> import pandas as pd
>>> import narwhals as nw
>>> from narwhals.schema import Schema
>>> from narwhals.typing import IntoFrame
>>>
>>> data = {
... "foo": [1, 2, 3],
... "bar": [6.0, 7.0, 8.0],
Expand All @@ -1151,17 +1167,17 @@ def collect_schema(self: Self) -> Schema:

We define a library agnostic function:

>>> @nw.narwhalify
... def func(df):
>>> def agnostic_collect_schema(df_native: IntoFrame) -> Schema:
... df = nw.from_native(df_native)
... return df.collect_schema()

You can pass either pandas or Polars to `func`:
You can pass either pandas or Polars to `agnostic_collect_schema`:

>>> df_pd_schema = func(df_pd)
>>> df_pd_schema = agnostic_collect_schema(df_pd)
>>> df_pd_schema
Schema({'foo': Int64, 'bar': Float64, 'ham': String})

>>> df_pl_schema = func(df_pl)
>>> df_pl_schema = agnostic_collect_schema(df_pl)
>>> df_pl_schema
Schema({'foo': Int64, 'bar': Float64, 'ham': String})
"""
Expand All @@ -1176,24 +1192,26 @@ def columns(self) -> list[str]:
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrame
>>>
>>> df = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
>>> df_pd = pd.DataFrame(df)
>>> df_pl = pl.DataFrame(df)
>>> df_pa = pa.table(df)

We define a library agnostic function:

>>> @nw.narwhalify
... def func(df):
>>> def agnostic_columns(df_native: IntoFrame) -> list[str]:
... df = nw.from_native(df_native)
... return df.columns

We can pass any supported library such as pandas, Polars, or PyArrow to `func`:

>>> func(df_pd)
>>> agnostic_columns(df_pd)
['foo', 'bar', 'ham']
>>> func(df_pl)
>>> agnostic_columns(df_pl)
['foo', 'bar', 'ham']
>>> func(df_pa)
>>> agnostic_columns(df_pa)
['foo', 'bar', 'ham']
"""
return super().columns
Expand Down Expand Up @@ -2952,24 +2970,26 @@ def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Se
>>> import polars as pl
>>> import pandas as pd
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrame
>>>
>>> data = {"a": [1, 2, 3], "ba": [4, 5, 6]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.LazyFrame(data)

Let's define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(df):
... return df.pipe(lambda _df: _df.select("a"))
>>> def agnostic_pipe(df_native: IntoFrame) -> IntoFrame:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IntoFrameT

... df = nw.from_native(df_native)
... return df.pipe(lambda _df: _df.select("a")).to_native()

We can then pass either pandas or Polars:

>>> func(df_pd)
>>> agnostic_pipe(df_pd)
a
0 1
1 2
2 3
>>> func(df_pl).collect()
>>> agnostic_pipe(df_pl).collect()
shape: (3, 1)
β”Œβ”€β”€β”€β”€β”€β”
β”‚ a β”‚
Expand Down Expand Up @@ -2998,22 +3018,24 @@ def drop_nulls(self: Self, subset: str | list[str] | None = None) -> Self:
>>> import polars as pl
>>> import pandas as pd
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrame
>>>
>>> data = {"a": [1.0, 2.0, None], "ba": [1.0, None, 2.0]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.LazyFrame(data)

Let's define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(df):
... return df.drop_nulls()
>>> def agnostic_drop_nulls(df_native: IntoFrame) -> IntoFrame:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same

... df = nw.from_native(df_native)
... return df.drop_nulls().to_native()

We can then pass either pandas or Polars:

>>> func(df_pd)
>>> agnostic_drop_nulls(df_pd)
a ba
0 1.0 1.0
>>> func(df_pl).collect()
>>> agnostic_drop_nulls(df_pl).collect()
shape: (1, 2)
β”Œβ”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”
β”‚ a ┆ ba β”‚
Expand All @@ -3032,24 +3054,26 @@ def with_row_index(self, name: str = "index") -> Self:
>>> import polars as pl
>>> import pandas as pd
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrame
>>>
>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.LazyFrame(data)

Let's define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(df):
... return df.with_row_index()
>>> def agnostic_with_row_index(df_native: IntoFrame) -> IntoFrame:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same

... df = nw.from_native(df_native)
... return df.with_row_index().to_native()

We can then pass either pandas or Polars:

>>> func(df_pd)
>>> agnostic_with_row_index(df_pd)
index a b
0 0 1 4
1 1 2 5
2 2 3 6
>>> func(df_pl).collect()
>>> agnostic_with_row_index(df_pl).collect()
shape: (3, 3)
β”Œβ”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”
β”‚ index ┆ a ┆ b β”‚
Expand Down Expand Up @@ -3110,21 +3134,23 @@ def columns(self) -> list[str]:
>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrame
>>>
>>> df = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
>>> df_pd = pd.DataFrame(df)
>>> lf_pl = pl.LazyFrame(df)

We define a library agnostic function:

>>> @nw.narwhalify
... def func(df):
>>> def agnostic_columns(df_native: IntoFrame) -> list[str]:
... df = nw.from_native(df_native)
... return df.columns

We can then pass either pandas or Polars to `func`:
We can then pass either pandas or Polars to `agnostic_columns`:

>>> func(df_pd)
>>> agnostic_columns(df_pd)
['foo', 'bar', 'ham']
>>> func(lf_pl) # doctest: +SKIP
>>> agnostic_columns(lf_pl) # doctest: +SKIP
['foo', 'bar', 'ham']
"""
return super().columns
Expand Down
Loading