Skip to content

Commit

Permalink
fix support for drops
Browse files Browse the repository at this point in the history
  • Loading branch information
hsheth2 committed Aug 7, 2024
1 parent 09f5c94 commit 0d4cd6a
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 27 deletions.
63 changes: 37 additions & 26 deletions metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,32 +189,43 @@ def _table_level_lineage(
statement: sqlglot.Expression, dialect: sqlglot.Dialect
) -> Tuple[Set[_TableName], Set[_TableName]]:
# Generate table-level lineage.
modified = {
_TableName.from_sqlglot_table(expr.this)
for expr in statement.find_all(
sqlglot.exp.Create,
sqlglot.exp.Insert,
sqlglot.exp.Update,
sqlglot.exp.Delete,
sqlglot.exp.Merge,
sqlglot.exp.AlterTable,
sqlglot.exp.Drop,
)
# In some cases like "MERGE ... then INSERT (col1, col2) VALUES (col1, col2)",
# the `this` on the INSERT part isn't a table.
if isinstance(expr.this, sqlglot.exp.Table)
} | {
# For statements that include a column list, like
# CREATE DDL statements and `INSERT INTO table (col1, col2) SELECT ...`
# the table name is nested inside a Schema object.
_TableName.from_sqlglot_table(expr.this.this)
for expr in statement.find_all(
sqlglot.exp.Create,
sqlglot.exp.Insert,
)
if isinstance(expr.this, sqlglot.exp.Schema)
and isinstance(expr.this.this, sqlglot.exp.Table)
}
modified = (
{
_TableName.from_sqlglot_table(expr.this)
for expr in statement.find_all(
sqlglot.exp.Create,
sqlglot.exp.Insert,
sqlglot.exp.Update,
sqlglot.exp.Delete,
sqlglot.exp.Merge,
sqlglot.exp.AlterTable,
)
# In some cases like "MERGE ... then INSERT (col1, col2) VALUES (col1, col2)",
# the `this` on the INSERT part isn't a table.
if isinstance(expr.this, sqlglot.exp.Table)
}
| {
# For statements that include a column list, like
# CREATE DDL statements and `INSERT INTO table (col1, col2) SELECT ...`
# the table name is nested inside a Schema object.
_TableName.from_sqlglot_table(expr.this.this)
for expr in statement.find_all(
sqlglot.exp.Create,
sqlglot.exp.Insert,
)
if isinstance(expr.this, sqlglot.exp.Schema)
and isinstance(expr.this.this, sqlglot.exp.Table)
}
| {
# For drop statements, we only want it if a table/view is being dropped.
# Other "kinds" will not have table.name populated.
_TableName.from_sqlglot_table(expr.this)
for expr in statement.find_all(sqlglot.exp.Drop)
if isinstance(expr.this, sqlglot.exp.Table)
and expr.this.this
and expr.this.name
}
)

tables = (
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
"query_fingerprint": "7d04253c3add0194c557942ef9b7485f38e68762d300dad364b9cec8656035b3",
"in_tables": [],
"out_tables": [
"urn:li:dataset:(urn:li:dataPlatform:bigquery,my-bq-project.covid_data.covid_deaths,PROD)"
"urn:li:dataset:(urn:li:dataPlatform:bigquery,my-bq-project.covid_data.covid_deaths,PROD)",
"urn:li:dataset:(urn:li:dataPlatform:bigquery,patient_name,PROD)"
],
"column_lineage": null,
"debug_info": {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"query_type": "UNKNOWN",
"query_type_props": {},
"query_fingerprint": "4eefab57619a812a94030acce0071857561265945e79d798563adb53bd0b9646",
"in_tables": [],
"out_tables": [],
"column_lineage": null,
"debug_info": {
"confidence": 0.9,
"generalized_statement": "DROP SCHEMA my_schema"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"query_type": "UNKNOWN",
"query_type_props": {},
"query_fingerprint": "35a3c60e7ed98884dde3f1f5fe9079f844832430589a3326b97d617b8303f191",
"in_tables": [],
"out_tables": [
"urn:li:dataset:(urn:li:dataPlatform:sqlite,my_schema.my_view,PROD)"
],
"column_lineage": null,
"debug_info": {
"confidence": 0.2,
"generalized_statement": "DROP VIEW my_schema.my_view"
}
}
20 changes: 20 additions & 0 deletions metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
Original file line number Diff line number Diff line change
Expand Up @@ -1233,3 +1233,23 @@ def test_sqlite_drop_table() -> None:
dialect="sqlite",
expected_file=RESOURCE_DIR / "test_sqlite_drop_table.json",
)


def test_sqlite_drop_view() -> None:
assert_sql_result(
"""\
DROP VIEW my_schema.my_view
""",
dialect="sqlite",
expected_file=RESOURCE_DIR / "test_sqlite_drop_view.json",
)


def test_snowflake_drop_schema() -> None:
assert_sql_result(
"""\
DROP SCHEMA my_schema
""",
dialect="snowflake",
expected_file=RESOURCE_DIR / "test_snowflake_drop_schema.json",
)

0 comments on commit 0d4cd6a

Please sign in to comment.