From a02b2fd7a7f751faf4e1d6d1166cbe45fdedd946 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 22 Nov 2024 17:47:15 +0000 Subject: [PATCH 1/6] chore: use `pass_through` instead of `strict` in `narwhals.from_native`, use stable.v1 API more --- altair/_magics.py | 4 ++-- altair/utils/_vegafusion_data.py | 8 +++++--- altair/utils/core.py | 9 ++++----- altair/utils/data.py | 19 +++++++++---------- altair/vegalite/v5/api.py | 2 +- tests/utils/test_schemapi.py | 2 +- tools/generate_schema_wrapper.py | 2 +- 7 files changed, 23 insertions(+), 23 deletions(-) diff --git a/altair/_magics.py b/altair/_magics.py index 8f40080db..61abf51e2 100644 --- a/altair/_magics.py +++ b/altair/_magics.py @@ -7,8 +7,8 @@ from importlib.util import find_spec from typing import Any +import narwhals.stable.v1 as nw from IPython.core import magic_arguments -from narwhals.dependencies import is_pandas_dataframe as _is_pandas_dataframe from altair.vegalite import v5 as vegalite_v5 @@ -32,7 +32,7 @@ def _prepare_data(data, data_transformers): """Convert input data to data for use within schema.""" if data is None or isinstance(data, dict): return data - elif _is_pandas_dataframe(data): + elif nw.dependencies.is_pandas_dataframe(data): if func := data_transformers.get(): data = func(data) return data diff --git a/altair/utils/_vegafusion_data.py b/altair/utils/_vegafusion_data.py index 21ca6833f..401258e8c 100644 --- a/altair/utils/_vegafusion_data.py +++ b/altair/utils/_vegafusion_data.py @@ -5,7 +5,7 @@ from typing import TYPE_CHECKING, Any, Callable, Final, TypedDict, Union, overload from weakref import WeakValueDictionary -from narwhals.dependencies import is_into_dataframe +import narwhals.stable.v1 as nw from packaging.version import Version from altair.utils._importers import import_vegafusion @@ -22,7 +22,7 @@ import sys from collections.abc import MutableMapping - from narwhals.typing import IntoDataFrame + from narwhals.stable.v1.typing import IntoDataFrame from vegafusion.runtime import ChartState @@ -54,7 +54,9 @@ def is_supported_by_vf(data: Any) -> TypeIs[DataFrameLike]: # Test whether VegaFusion supports the data type # VegaFusion v2 support narwhals-compatible DataFrames - return isinstance(data, DataFrameLike) or is_into_dataframe(data) + return isinstance(data, DataFrameLike) or nw.dependencies.is_into_dataframe( + data + ) else: diff --git a/altair/utils/core.py b/altair/utils/core.py index 669b81a06..689ff1d41 100644 --- a/altair/utils/core.py +++ b/altair/utils/core.py @@ -16,8 +16,7 @@ import jsonschema import narwhals.stable.v1 as nw -from narwhals.dependencies import get_polars, is_pandas_dataframe -from narwhals.typing import IntoDataFrame +from narwhals.stable.v1.typing import IntoDataFrame from altair.utils.schemapi import SchemaBase, SchemaLike, Undefined @@ -35,7 +34,7 @@ import typing as t import pandas as pd - from narwhals.typing import IntoExpr + from narwhals.stable.v1.typing import IntoExpr from altair.utils._dfi_types import DataFrame as DfiDataFrame from altair.vegalite.v5.schema._typing import StandardType_T as InferredVegaLiteType @@ -471,7 +470,7 @@ def sanitize_narwhals_dataframe( # See https://github.com/vega/altair/issues/1027 for why this is necessary. local_iso_fmt_string = "%Y-%m-%dT%H:%M:%S" for name, dtype in schema.items(): - if dtype == nw.Date and nw.get_native_namespace(data) is get_polars(): + if dtype == nw.Date and nw.dependencies.is_polars_dataframe(data): # Polars doesn't allow formatting `Date` with time directives. # The date -> datetime cast is extremely fast compared with `to_string` columns.append( @@ -673,7 +672,7 @@ def parse_shorthand( # noqa: C901 if schema[unescaped_field] in { nw.Object, nw.Unknown, - } and is_pandas_dataframe(nw.to_native(data_nw)): + } and nw.dependencies.is_pandas_dataframe(nw.to_native(data_nw)): attrs["type"] = infer_vegalite_type_for_pandas(nw.to_native(column)) else: attrs["type"] = infer_vegalite_type_for_narwhals(column) diff --git a/altair/utils/data.py b/altair/utils/data.py index bb6868b07..200cf5e4f 100644 --- a/altair/utils/data.py +++ b/altair/utils/data.py @@ -19,8 +19,7 @@ ) import narwhals.stable.v1 as nw -from narwhals.dependencies import is_pandas_dataframe as _is_pandas_dataframe -from narwhals.typing import IntoDataFrame +from narwhals.stable.v1.typing import IntoDataFrame from ._importers import import_pyarrow_interchange from .core import ( @@ -76,7 +75,7 @@ class SupportsGeoInterface(Protocol): def is_data_type(obj: Any) -> TypeIs[DataType]: return isinstance(obj, (dict, SupportsGeoInterface)) or isinstance( - nw.from_native(obj, eager_or_interchange_only=True, strict=False), + nw.from_native(obj, eager_or_interchange_only=True, pass_through=True), nw.DataFrame, ) @@ -188,7 +187,7 @@ def sample( if data is None: return partial(sample, n=n, frac=frac) check_data_type(data) - if _is_pandas_dataframe(data): + if nw.dependencies.is_pandas_dataframe(data): return data.sample(n=n, frac=frac) elif isinstance(data, dict): if "values" in data: @@ -319,11 +318,11 @@ def _to_text_kwds(prefix: str, extension: str, filename: str, urlpath: str, /) - def to_values(data: DataType) -> ToValuesReturnType: """Replace a DataFrame by a data model with values.""" check_data_type(data) - # `strict=False` passes `data` through as-is if it is not a Narwhals object. - data_native = nw.to_native(data, strict=False) + # `pass_through=True` passes `data` through as-is if it is not a Narwhals object. + data_native = nw.to_native(data, pass_through=True) if isinstance(data_native, SupportsGeoInterface): return {"values": _from_geo_interface(data_native)} - elif _is_pandas_dataframe(data_native): + elif nw.dependencies.is_pandas_dataframe(data_native): data_native = sanitize_pandas_dataframe(data_native) return {"values": data_native.to_dict(orient="records")} elif isinstance(data_native, dict): @@ -364,7 +363,7 @@ def _from_geo_interface(data: SupportsGeoInterface | Any) -> dict[str, Any]: - ``typing.TypeGuard`` - ``pd.DataFrame.__getattr__`` """ - if _is_pandas_dataframe(data): + if nw.dependencies.is_pandas_dataframe(data): data = sanitize_pandas_dataframe(data) return sanitize_geo_interface(data.__geo_interface__) @@ -374,7 +373,7 @@ def _data_to_json_string(data: DataType) -> str: check_data_type(data) if isinstance(data, SupportsGeoInterface): return json.dumps(_from_geo_interface(data)) - elif _is_pandas_dataframe(data): + elif nw.dependencies.is_pandas_dataframe(data): data = sanitize_pandas_dataframe(data) return data.to_json(orient="records", double_precision=15) elif isinstance(data, dict): @@ -401,7 +400,7 @@ def _data_to_csv_string(data: DataType) -> str: f"See https://github.com/vega/altair/issues/3441" ) raise NotImplementedError(msg) - elif _is_pandas_dataframe(data): + elif nw.dependencies.is_pandas_dataframe(data): data = sanitize_pandas_dataframe(data) return data.to_csv(index=False) elif isinstance(data, dict): diff --git a/altair/vegalite/v5/api.py b/altair/vegalite/v5/api.py index e67308004..909ed621e 100644 --- a/altair/vegalite/v5/api.py +++ b/altair/vegalite/v5/api.py @@ -280,7 +280,7 @@ def _prepare_data( # convert dataframes or objects with __geo_interface__ to dict elif not isinstance(data, dict) and _is_data_type(data): if func := data_transformers.get(): - data = func(nw.to_native(data, strict=False)) + data = func(nw.to_native(data, pass_through=True)) # convert string input to a URLData elif isinstance(data, str): diff --git a/tests/utils/test_schemapi.py b/tests/utils/test_schemapi.py index 8772f8196..72d01c0fb 100644 --- a/tests/utils/test_schemapi.py +++ b/tests/utils/test_schemapi.py @@ -37,7 +37,7 @@ if TYPE_CHECKING: from collections.abc import Iterable, Sequence - from narwhals.typing import IntoDataFrame + from narwhals.stable.v1.typing import IntoDataFrame _JSON_SCHEMA_DRAFT_URL = load_schema()["$schema"] # Make tests inherit from _TestSchema, so that when we test from_dict it won't diff --git a/tools/generate_schema_wrapper.py b/tools/generate_schema_wrapper.py index e024c2ca1..e0e7a4d54 100644 --- a/tools/generate_schema_wrapper.py +++ b/tools/generate_schema_wrapper.py @@ -751,7 +751,7 @@ def generate_vegalite_schema_wrapper(fp: Path, /) -> ModuleDef[str]: "from typing import Any, Literal, Union, Protocol, Sequence, List, Iterator, TYPE_CHECKING", "import pkgutil", "import json\n", - "from narwhals.dependencies import is_pandas_dataframe as _is_pandas_dataframe", + "import narwhals.stable.v1 as nw\n", "from altair.utils.schemapi import SchemaBase, Undefined, UndefinedType, _subclasses # noqa: F401\n", import_type_checking( "from datetime import date, datetime", From 308cb7a1d8c29a38d05239609b1b76d95bf6f614 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 22 Nov 2024 17:56:20 +0000 Subject: [PATCH 2/6] unrelated(?) ruff fixes --- pyproject.toml | 2 +- tests/vegalite/v5/test_params.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4890bfd13..1b8d03082 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ keywords = [ ] requires-python = ">=3.9" dynamic = ["version"] -license-files = { paths = ["LICENSE"] } +license = {file = "LICENSE"} classifiers = [ "Development Status :: 5 - Production/Stable", "Environment :: Console", diff --git a/tests/vegalite/v5/test_params.py b/tests/vegalite/v5/test_params.py index d380586a8..dd79e9ce5 100644 --- a/tests/vegalite/v5/test_params.py +++ b/tests/vegalite/v5/test_params.py @@ -107,7 +107,7 @@ def test_parameter_naming(): # test automatic naming which has the form such as param_5 prm0, prm1, prm2 = (alt.param() for _ in range(3)) - res = re.match("param_([0-9]+)", prm0.param.name) + res = re.match(r"param_([0-9]+)", prm0.param.name) assert res From d6986f4a860a32ccc9b60369ca4af3d88d2e5e83 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 22 Nov 2024 18:45:35 +0000 Subject: [PATCH 3/6] fixup, increase coverage --- altair/utils/core.py | 2 +- tests/vegalite/v5/test_api.py | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/altair/utils/core.py b/altair/utils/core.py index 689ff1d41..3f8496e53 100644 --- a/altair/utils/core.py +++ b/altair/utils/core.py @@ -470,7 +470,7 @@ def sanitize_narwhals_dataframe( # See https://github.com/vega/altair/issues/1027 for why this is necessary. local_iso_fmt_string = "%Y-%m-%dT%H:%M:%S" for name, dtype in schema.items(): - if dtype == nw.Date and nw.dependencies.is_polars_dataframe(data): + if dtype == nw.Date and nw.dependencies.is_polars_dataframe(data.to_native()): # Polars doesn't allow formatting `Date` with time directives. # The date -> datetime cast is extremely fast compared with `to_string` columns.append( diff --git a/tests/vegalite/v5/test_api.py b/tests/vegalite/v5/test_api.py index 6bb4ac9ef..390a7217f 100644 --- a/tests/vegalite/v5/test_api.py +++ b/tests/vegalite/v5/test_api.py @@ -1723,6 +1723,18 @@ def test_polars_with_pandas_nor_pyarrow(monkeypatch: pytest.MonkeyPatch): assert "numpy" not in sys.modules +def test_polars_date_32(): + df = pl.DataFrame( + {"a": [1, 2, 3], "b": [date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 3)]} + ) + result = alt.Chart(df).mark_line().encode(x="a", y="b").to_dict() + assert next(iter(result["datasets"].values())) == [ + {"a": 1, "b": "2020-01-01T00:00:00"}, + {"a": 2, "b": "2020-01-02T00:00:00"}, + {"a": 3, "b": "2020-01-03T00:00:00"}, + ] + + @skip_requires_pyarrow(requires_tzdata=True) def test_interchange_with_date_32(): # Test that objects which Narwhals only supports at the interchange From 8ec60f81e7b2a7255c0fe73ac5b3c49a0da06122 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 22 Nov 2024 18:48:50 +0000 Subject: [PATCH 4/6] use to_native method --- altair/utils/core.py | 4 ++-- altair/utils/data.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/altair/utils/core.py b/altair/utils/core.py index 3f8496e53..c6d235581 100644 --- a/altair/utils/core.py +++ b/altair/utils/core.py @@ -672,8 +672,8 @@ def parse_shorthand( # noqa: C901 if schema[unescaped_field] in { nw.Object, nw.Unknown, - } and nw.dependencies.is_pandas_dataframe(nw.to_native(data_nw)): - attrs["type"] = infer_vegalite_type_for_pandas(nw.to_native(column)) + } and nw.dependencies.is_pandas_dataframe(data_nw.to_native()): + attrs["type"] = infer_vegalite_type_for_pandas(column.to_native()) else: attrs["type"] = infer_vegalite_type_for_narwhals(column) if isinstance(attrs["type"], tuple): diff --git a/altair/utils/data.py b/altair/utils/data.py index 200cf5e4f..ca48abb79 100644 --- a/altair/utils/data.py +++ b/altair/utils/data.py @@ -209,7 +209,7 @@ def sample( raise ValueError(msg) n = int(frac * len(data)) indices = random.sample(range(len(data)), n) - return nw.to_native(data[indices]) + return data[indices].to_native() _FormatType = Literal["csv", "json"] From bec91b0237f72ec363782be31781334f4dd2d16e Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 22 Nov 2024 18:58:28 +0000 Subject: [PATCH 5/6] add flake8-tidy-imports rule --- pyproject.toml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 1b8d03082..8871a0bf9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -398,6 +398,15 @@ Use `Union[T, None]` instead. which have a similar but different semantic meaning. See https://github.com/vega/altair/pull/3449 """ +"narwhals.dependencies".msg = """ +Import `dependencies` from `narwhals.stable.v1` instead. +""" +"narwhals.typing".msg = """ +Import `typing` from `narwhals.stable.v1` instead. +""" +"narwhals.dtypes".msg = """ +Import `dtypes` from `narwhals.stable.v1` instead. +""" [tool.ruff.lint.per-file-ignores] # Only enforce type annotation rules on public api From c38799db827d765664565b69f2c5f2e7863caf60 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 22 Nov 2024 18:59:42 +0000 Subject: [PATCH 6/6] take check out of loop --- altair/utils/core.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/altair/utils/core.py b/altair/utils/core.py index c6d235581..c675f98dc 100644 --- a/altair/utils/core.py +++ b/altair/utils/core.py @@ -469,8 +469,9 @@ def sanitize_narwhals_dataframe( columns: list[IntoExpr] = [] # See https://github.com/vega/altair/issues/1027 for why this is necessary. local_iso_fmt_string = "%Y-%m-%dT%H:%M:%S" + is_polars_dataframe = nw.dependencies.is_polars_dataframe(data.to_native()) for name, dtype in schema.items(): - if dtype == nw.Date and nw.dependencies.is_polars_dataframe(data.to_native()): + if dtype == nw.Date and is_polars_dataframe: # Polars doesn't allow formatting `Date` with time directives. # The date -> datetime cast is extremely fast compared with `to_string` columns.append(