Skip to content

Commit

Permalink
perf: lazily import pyiceberg and unity catalog if available (#3565)
Browse files Browse the repository at this point in the history
`memray` reports that a not-insignificant amount of memory is being
taken by our catalog module at import-time

<img width="1179" alt="image"
src="https://github.com/user-attachments/assets/f2b88810-bc6b-4631-bbdc-b75f0848b63c"
/>

This PR makes those imports lazy

Co-authored-by: Jay Chia <jaychia94@gmail.com@users.noreply.github.com>
  • Loading branch information
jaychia and Jay Chia authored Dec 14, 2024
1 parent 35ed63c commit 95a61d2
Showing 1 changed file with 19 additions and 12 deletions.
31 changes: 19 additions & 12 deletions daft/catalog/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,21 +45,12 @@

from daft.dataframe import DataFrame

_PYICEBERG_AVAILABLE = False
try:
from pyiceberg.catalog import Catalog as PyIcebergCatalog

_PYICEBERG_AVAILABLE = True
except ImportError:
pass
from typing import TYPE_CHECKING

_UNITY_AVAILABLE = False
try:
if TYPE_CHECKING:
from pyiceberg.catalog import Catalog as PyIcebergCatalog
from daft.unity_catalog import UnityCatalog

_UNITY_AVAILABLE = True
except ImportError:
pass

__all__ = [
"read_table",
Expand Down Expand Up @@ -136,6 +127,22 @@ def register_python_catalog(catalog: PyIcebergCatalog | UnityCatalog, name: str
>>> daft.catalog.register_python_catalog(catalog, "my_daft_catalog")
"""
_PYICEBERG_AVAILABLE = False
try:
from pyiceberg.catalog import Catalog as PyIcebergCatalog

_PYICEBERG_AVAILABLE = True
except ImportError:
pass

_UNITY_AVAILABLE = False
try:
from daft.unity_catalog import UnityCatalog

_UNITY_AVAILABLE = True
except ImportError:
pass

python_catalog: PyIcebergCatalog
if _PYICEBERG_AVAILABLE and isinstance(catalog, PyIcebergCatalog):
from daft.catalog.pyiceberg import PyIcebergCatalogAdaptor
Expand Down

0 comments on commit 95a61d2

Please sign in to comment.