feat: make pandas optional dependency (#30)

eakmanrq · May 25, 2024 · 16085ef · 16085ef
1 parent 80591aa
commit 16085ef
Show file tree

Hide file tree

Showing 7 changed files with 27 additions and 12 deletions.
diff --git a/Makefile b/Makefile
@@ -1,5 +1,5 @@
 install-dev:
-	pip install -e ".[dev,docs,duckdb,postgres,redshift,bigquery,snowflake,spark]"
+	pip install -e ".[bigquery,dev,docs,duckdb,pandas,postgres,redshift,snowflake,spark]"
 
 install-pre-commit:
 	pre-commit install

diff --git a/setup.py b/setup.py
@@ -26,7 +26,6 @@
         "bigquery": [
             "google-cloud-bigquery[pandas]>=3,<4",
             "google-cloud-bigquery-storage>=2,<3",
-            "pandas>=2,<3",
         ],
         "dev": [
             "duckdb>=0.9,<0.11",
@@ -56,17 +55,17 @@
             "duckdb>=0.9,<0.11",
             "pandas>=2,<3",
         ],
-        "postgres": [
+        "pandas": [
             "pandas>=2,<3",
+        ],
+        "postgres": [
             "psycopg2>=2.8,<3",
         ],
         "redshift": [
-            "pandas>=2,<3",
             "redshift_connector>=2.1.1,<2.2.0",
         ],
         "snowflake": [
-            "pandas>=2,<3",
-            "snowflake-connector-python[pandas,secure-local-storage]>=3.10.0,<3.11",
+            "snowflake-connector-python[secure-local-storage]>=3.10.0,<3.11",
         ],
         "spark": [
             "pyspark>=2,<3.6",

diff --git a/sqlframe/base/mixins/catalog_mixins.py b/sqlframe/base/mixins/catalog_mixins.py
@@ -13,7 +13,7 @@
     _BaseCatalog,
 )
 from sqlframe.base.decorators import normalize
-from sqlframe.base.util import decoded_str, schema_, to_schema
+from sqlframe.base.util import schema_, to_schema
 
 
 class _BaseInfoSchemaMixin(_BaseCatalog, t.Generic[SESSION, DF]):

diff --git a/sqlframe/base/mixins/readwriter_mixins.py b/sqlframe/base/mixins/readwriter_mixins.py
@@ -3,8 +3,6 @@
 import pathlib
 import typing as t
 
-import pandas as pd
-
 from sqlframe.base.exceptions import UnsupportedOperationError
 from sqlframe.base.readerwriter import (
     DF,
@@ -13,7 +11,7 @@
     _BaseDataFrameWriter,
     _infer_format,
 )
-from sqlframe.base.util import pandas_to_spark_schema
+from sqlframe.base.util import pandas_to_spark_schema, verify_pandas_installed
 
 if t.TYPE_CHECKING:
     from sqlframe.base._typing import OptionalPrimitiveType, PathOrPaths
@@ -72,6 +70,9 @@ def load(
         |100|NULL|
         +---+----+
         """
+        verify_pandas_installed()
+        import pandas as pd
+
         assert path is not None, "path is required"
         assert isinstance(path, str), "path must be a string"
         format = format or _infer_format(path)

diff --git a/sqlframe/base/session.py b/sqlframe/base/session.py
@@ -24,7 +24,10 @@
 from sqlframe.base.catalog import _BaseCatalog
 from sqlframe.base.dataframe import _BaseDataFrame
 from sqlframe.base.readerwriter import _BaseDataFrameReader, _BaseDataFrameWriter
-from sqlframe.base.util import get_column_mapping_from_schema_input
+from sqlframe.base.util import (
+    get_column_mapping_from_schema_input,
+    verify_pandas_installed,
+)
 
 if sys.version_info >= (3, 11):
     from typing import Self
@@ -464,6 +467,7 @@ def _dict_to_row(row: t.Dict[str, t.Any]) -> Row:
     def _fetchdf(
         self, sql: t.Union[str, exp.Expression], *, quote_identifiers: bool = True
     ) -> pd.DataFrame:
+        verify_pandas_installed()
         from pandas.io.sql import read_sql_query
 
         return read_sql_query(self._to_sql(sql, quote_identifiers=quote_identifiers), self._conn)

diff --git a/sqlframe/base/util.py b/sqlframe/base/util.py
@@ -240,3 +240,12 @@ def soundex(s):
 
     result += "0" * (4 - count)
     return "".join(result)
+
+
+def verify_pandas_installed():
+    try:
+        import pandas  # noqa
+    except ImportError:
+        raise ImportError(
+            """Pandas is required for this functionality. `pip install "sqlframe[pandas]"` (also include your engine if needed) to install pandas."""
+        )
diff --git a/sqlframe/spark/session.py b/sqlframe/spark/session.py
@@ -3,7 +3,6 @@
 import typing as t
 import warnings
 
-import pandas as pd
 from sqlglot import exp
 
 from sqlframe.base.session import _BaseSession
@@ -15,6 +14,9 @@
 )
 from sqlframe.spark.types import Row
 
+if t.TYPE_CHECKING:
+    import pandas as pd
+
 
 class SparkSession(
     _BaseSession[  # type: ignore