Skip to content

Commit

Permalink
improved exception handling without reraising
Browse files Browse the repository at this point in the history
  • Loading branch information
zilto committed Jan 29, 2025
1 parent 03a708b commit 28b2244
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 25 deletions.
58 changes: 47 additions & 11 deletions dlt/common/libs/pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,27 +47,61 @@


class UnsupportedArrowTypeException(DltException):
"""Exception raised when Arrow type conversion failed.
The setters are used to update the exception with more context
such as the relevant field and tablea it is caught downstream.
"""

def __init__(
self,
arrow_type: pyarrow.DataType,
field_name: Optional[str] = None,
table_name: Optional[str] = None,
column_name: Optional[str] = None,
) -> None:
self.arrow_type = arrow_type
self.column_name = column_name if column_name else ""
self.table_name = table_name if table_name else ""
self._field_name = field_name if field_name else ""
self._table_name = table_name if table_name else ""

msg = self.generate_message(self.arrow_type, self._field_name, self._table_name)
super().__init__(msg)

msg = f"Arrow type `{self.arrow_type}`"
if self.column_name:
msg += f" for column `{self.column_name}`"
if self.table_name:
msg += f" in table `{self.table_name}`"
@staticmethod
def generate_message(arrow_type: pyarrow.DataType, field_name: str, table_name: str) -> str:
msg = f"Arrow type `{arrow_type}`"
if field_name:
msg += f" for field `{field_name}`"
if table_name:
msg += f" in table `{table_name}`"

msg += (
" is unsupported by dlt. See documentation:"
" https://dlthub.com/docs/dlt-ecosystem/verified-sources/arrow-pandas#supported-arrow-data-types"
)
super().__init__(None, msg)
return msg

def _update_message(self) -> None:
"""Modify the `Exception.args` tuple to update message."""
msg = self.generate_message(self.arrow_type, self.field_name, self.table_name)
self.args = (msg,) # must be a tuple

@property
def field_name(self) -> str:
return self._field_name

@field_name.setter
def field_name(self, value: str) -> None:
self._field_name = value
self._update_message()

@property
def table_name(self) -> str:
return self._table_name

@table_name.setter
def table_name(self, value: str) -> None:
self._table_name = value
self._update_message()


def get_py_arrow_datatype(
Expand Down Expand Up @@ -426,8 +460,10 @@ def py_arrow_to_table_schema_columns(schema: pyarrow.Schema) -> TTableSchemaColu
for field in schema:
try:
converted_type = get_column_type_from_py_arrow(field.type)
except UnsupportedArrowTypeException:
raise UnsupportedArrowTypeException(arrow_type=field.type, column_name=field.name)
except UnsupportedArrowTypeException as e:
# modify attributes inplace to add context instead of reraising with `raise e`
e.field_name = field.name
raise

result[field.name] = {
"name": field.name,
Expand Down
7 changes: 2 additions & 5 deletions dlt/extract/extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,11 +420,8 @@ def _compute_table(
try:
arrow_table["columns"] = pyarrow.py_arrow_to_table_schema_columns(item.schema)
except pyarrow.UnsupportedArrowTypeException as e:
raise UnsupportedArrowTypeException(
arrow_type=e.arrow_type,
table_name=arrow_table["name"],
column_name=e.column_name,
) from e
e.table_name = str(arrow_table.get("name"))
raise

# Add load_id column if needed
dlt_load_id = self.naming.normalize_identifier(C_DLT_LOAD_ID)
Expand Down
10 changes: 4 additions & 6 deletions dlt/normalize/items_normalizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,12 +327,10 @@ def _fix_schema_precisions(
data_type = pyarrow.get_column_type_from_py_arrow(
arrow_schema.field(key).type
)
except pyarrow.UnsupportedArrowTypeException:
raise pyarrow.UnsupportedArrowTypeException(
arrow_type=arrow_schema.field(key).type,
table_name=root_table_name,
column_name=key,
)
except pyarrow.UnsupportedArrowTypeException as e:
e.field_name = key
e.table_name = root_table_name
raise

if data_type["data_type"] in ("timestamp", "time"):
prec = data_type["precision"]
Expand Down
4 changes: 1 addition & 3 deletions tests/libs/pyarrow/test_pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,7 @@ def test_exception_for_schema_with_unsupported_arrow_type() -> None:
with pytest.raises(UnsupportedArrowTypeException) as excinfo:
py_arrow_to_table_schema_columns(table.schema)

# this unpacking seems specific to subtypes of DltException
# assert the faulty column name is included in the exception message
_, msg = excinfo.value.args
(msg,) = excinfo.value.args
assert "duration" in msg
assert "col2" in msg

Expand Down

0 comments on commit 28b2244

Please sign in to comment.