-
Notifications
You must be signed in to change notification settings - Fork 87
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Make delta format case insensitive #2861
Changes from all commits
120f546
c64732c
793633e
6e21d06
f9b08fe
66effd6
a57a3fa
53c2fa3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -48,7 +48,7 @@ class AclMigrationWhat(Enum): | |
|
||
|
||
@dataclass | ||
class Table: | ||
class Table: # pylint: disable=too-many-public-methods | ||
catalog: str | ||
database: str | ||
name: str | ||
|
@@ -80,12 +80,22 @@ class Table: | |
|
||
UPGRADED_FROM_WS_PARAM: typing.ClassVar[str] = "upgraded_from_workspace_id" | ||
|
||
def __post_init__(self) -> None: | ||
if isinstance(self.table_format, str): # Should not happen according to type hint, still safer | ||
self.table_format = self.table_format.upper() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. okay...
|
||
|
||
@property | ||
def is_delta(self) -> bool: | ||
if self.table_format is None: | ||
return False | ||
return self.table_format.upper() == "DELTA" | ||
|
||
@property | ||
def is_hive(self) -> bool: | ||
if self.table_format is None: | ||
return False | ||
return self.table_format.upper() == "HIVE" | ||
|
||
@property | ||
def key(self) -> str: | ||
if self.is_table_in_mount: | ||
|
@@ -163,13 +173,13 @@ def what(self) -> What: | |
return What.DB_DATASET | ||
if self.is_table_in_mount: | ||
return What.TABLE_IN_MOUNT | ||
if self.is_dbfs_root and self.table_format == "DELTA": | ||
if self.is_dbfs_root and self.is_delta: | ||
return What.DBFS_ROOT_DELTA | ||
if self.is_dbfs_root: | ||
return What.DBFS_ROOT_NON_DELTA | ||
if self.kind == "TABLE" and self.is_format_supported_for_sync: | ||
return What.EXTERNAL_SYNC | ||
if self.kind == "TABLE" and self.table_format.upper() == "HIVE": | ||
if self.kind == "TABLE" and self.is_hive: | ||
return What.EXTERNAL_HIVESERDE | ||
if self.kind == "TABLE": | ||
return What.EXTERNAL_NO_SYNC | ||
|
@@ -194,7 +204,7 @@ def sql_migrate_ctas_managed(self, target_table_key) -> str: | |
) | ||
|
||
def hiveserde_type(self, backend: SqlBackend) -> HiveSerdeType: | ||
if self.table_format != "HIVE": | ||
if not self.is_hive: | ||
return HiveSerdeType.NOT_HIVESERDE | ||
# Extract hive serde info, ideally this should be done by table crawler. | ||
# But doing here to avoid breaking change to the `tables` table in the inventory schema. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,7 +3,7 @@ WITH table_stats AS ( | |
SELECT | ||
`database`, | ||
object_type, | ||
table_format AS `format`, | ||
UPPER(table_format) AS `format`, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Required for backwards compatibility, i.e. users that already populated the UCX inventory |
||
`location`, | ||
IF(object_type IN ('MANAGED', 'EXTERNAL'), 1, 0) AS is_table, | ||
IF(object_type = 'VIEW', 1, 0) AS is_view, | ||
|
@@ -15,7 +15,7 @@ WITH table_stats AS ( | |
ELSE 0 | ||
END AS is_dbfs_root, | ||
CASE WHEN STARTSWITH(location, 'wasb') THEN 1 WHEN STARTSWITH(location, 'adl') THEN 1 ELSE 0 END AS is_unsupported, | ||
IF(table_format = 'DELTA', 1, 0) AS is_delta | ||
IF(UPPER(table_format) = 'DELTA', 1, 0) AS is_delta | ||
FROM inventory.tables | ||
), database_stats AS ( | ||
SELECT | ||
|
@@ -71,4 +71,4 @@ FROM database_stats | |
FULL JOIN grant_stats | ||
USING (`database`) | ||
ORDER BY | ||
tables DESC | ||
tables DESC |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -195,6 +195,7 @@ def test_tables_returning_error_when_show_tables(caplog): | |
'table,dbfs_root,what', | ||
[ | ||
(Table("a", "b", "c", "MANAGED", "DELTA", location="dbfs:/somelocation/tablename"), True, What.DBFS_ROOT_DELTA), | ||
(Table("a", "b", "c", "MANAGED", "delta", location="dbfs:/somelocation/tablename"), True, What.DBFS_ROOT_DELTA), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @HariGS-DB : This test fails without the code changes in the PR. Could be responsible for #2840 |
||
( | ||
Table("a", "b", "c", "MANAGED", "PARQUET", location="dbfs:/somelocation/tablename"), | ||
True, | ||
|
@@ -225,7 +226,7 @@ def test_tables_returning_error_when_show_tables(caplog): | |
(Table("a", "b", "c", "MANAGED", "DELTA", location="adls:/somelocation/tablename"), False, What.EXTERNAL_SYNC), | ||
], | ||
) | ||
def test_is_dbfs_root(table, dbfs_root, what): | ||
def test_is_dbfs_root(table, dbfs_root, what) -> None: | ||
assert table.is_dbfs_root == dbfs_root | ||
assert table.what == what | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@nfx: I added this fall safe to always have the format as an upper case string.