Skip to content

Commit

Permalink
Added upgraded_from_workspace_id property to migrated tables to ind…
Browse files Browse the repository at this point in the history
…icated the source workspace. (#987)

## Changes
Added table parameter `upgraded_from_ws` to migrated tables. The
parameters contains the sources workspace id.

Resolves #899 

### Functionality 

- [ ] added relevant user documentation
- [ ] added new CLI command
- [ ] modified existing command: `databricks labs ucx ...`
- [ ] added a new workflow
- [ ] modified existing workflow: `...`
- [ ] added a new table
- [ ] modified existing table: `...`

### Tests
<!-- How is this tested? Please see the checklist below and also
describe any other relevant tests -->

- [x] manually tested
- [x] added unit tests
- [x] added integration tests
- [x] verified on staging environment (screenshot attached)
  • Loading branch information
FastLee authored and nkvuong committed Mar 6, 2024
1 parent e034cf5 commit 9dba8ee
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 12 deletions.
5 changes: 3 additions & 2 deletions src/databricks/labs/ucx/hive_metastore/table_migrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def _migrate_external_table(self, src_table: Table, rule: Rule):
table_migrate_sql = src_table.sql_migrate_external(target_table_key)
logger.debug(f"Migrating external table {src_table.key} to using SQL query: {table_migrate_sql}")
self._backend.execute(table_migrate_sql)
self._backend.execute(src_table.sql_alter_from(rule.as_uc_table_key, self._ws.get_workspace_id()))
return True

def _migrate_dbfs_root_table(self, src_table: Table, rule: Rule):
Expand All @@ -80,7 +81,7 @@ def _migrate_dbfs_root_table(self, src_table: Table, rule: Rule):
logger.debug(f"Migrating managed table {src_table.key} to using SQL query: {table_migrate_sql}")
self._backend.execute(table_migrate_sql)
self._backend.execute(src_table.sql_alter_to(rule.as_uc_table_key))
self._backend.execute(src_table.sql_alter_from(rule.as_uc_table_key))
self._backend.execute(src_table.sql_alter_from(rule.as_uc_table_key, self._ws.get_workspace_id()))
return True

def _migrate_view(self, src_table: Table, rule: Rule):
Expand All @@ -89,7 +90,7 @@ def _migrate_view(self, src_table: Table, rule: Rule):
logger.debug(f"Migrating view {src_table.key} to using SQL query: {table_migrate_sql}")
self._backend.execute(table_migrate_sql)
self._backend.execute(src_table.sql_alter_to(rule.as_uc_table_key))
self._backend.execute(src_table.sql_alter_from(rule.as_uc_table_key))
self._backend.execute(src_table.sql_alter_from(rule.as_uc_table_key, self._ws.get_workspace_id()))
return True

def _init_seen_tables(self):
Expand Down
10 changes: 8 additions & 2 deletions src/databricks/labs/ucx/hive_metastore/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ class Table:
"dbfs:/databricks-datasets",
]

UPGRADED_FROM_WS_PARAM: typing.ClassVar[str] = "upgraded_from_workspace_id"

@property
def is_delta(self) -> bool:
if self.table_format is None:
Expand All @@ -71,8 +73,12 @@ def kind(self) -> str:
def sql_alter_to(self, target_table_key):
return f"ALTER {self.kind} {self.key} SET TBLPROPERTIES ('upgraded_to' = '{target_table_key}');"

def sql_alter_from(self, target_table_key):
return f"ALTER {self.kind} {target_table_key} SET TBLPROPERTIES ('upgraded_from' = '{self.key}');"
def sql_alter_from(self, target_table_key, ws_id):
return (
f"ALTER {self.kind} {target_table_key} SET TBLPROPERTIES "
f"('upgraded_from' = '{self.key}'"
f" , '{self.UPGRADED_FROM_WS_PARAM}' = '{ws_id}');"
)

def sql_unset_upgraded_to(self):
return f"ALTER {self.kind} {self.key} UNSET TBLPROPERTIES IF EXISTS('upgraded_to');"
Expand Down
5 changes: 5 additions & 0 deletions tests/integration/hive_metastore/test_migrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from databricks.labs.ucx.hive_metastore.mapping import Rule
from databricks.labs.ucx.hive_metastore.table_migrate import TablesMigrate
from databricks.labs.ucx.hive_metastore.tables import Table

from ..conftest import StaticTableMapping, StaticTablesCrawler

Expand Down Expand Up @@ -46,6 +47,7 @@ def test_migrate_managed_tables(ws, sql_backend, inventory_schema, make_catalog,

target_table_properties = ws.tables.get(f"{dst_schema.full_name}.{src_managed_table.name}").properties
assert target_table_properties["upgraded_from"] == src_managed_table.full_name
assert target_table_properties[Table.UPGRADED_FROM_WS_PARAM] == str(ws.get_workspace_id())


@retried(on=[NotFound], timeout=timedelta(minutes=5))
Expand Down Expand Up @@ -136,6 +138,9 @@ def test_migrate_external_table(ws, sql_backend, inventory_schema, make_catalog,

target_tables = list(sql_backend.fetch(f"SHOW TABLES IN {dst_schema.full_name}"))
assert len(target_tables) == 1
target_table_properties = ws.tables.get(f"{dst_schema.full_name}.{src_external_table.name}").properties
assert target_table_properties["upgraded_from"] == src_external_table.full_name
assert target_table_properties[Table.UPGRADED_FROM_WS_PARAM] == str(ws.get_workspace_id())


@retried(on=[NotFound], timeout=timedelta(minutes=5))
Expand Down
23 changes: 15 additions & 8 deletions tests/unit/hive_metastore/test_table_migrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ def test_migrate_dbfs_root_tables_should_produce_proper_queries():
rows = {}
backend = MockBackend(fails_on_first=errors, rows=rows)
table_crawler = TablesCrawler(backend, "inventory_database")
client = MagicMock()
client = create_autospec(WorkspaceClient)
client.get_workspace_id.return_value = "12345"
table_mapping = create_autospec(TableMapping)
table_mapping.get_tables_to_migrate.return_value = [
TableToMigrate(
Expand Down Expand Up @@ -54,8 +55,8 @@ def test_migrate_dbfs_root_tables_should_produce_proper_queries():
"SET TBLPROPERTIES ('upgraded_to' = 'ucx_default.db1_dst.managed_dbfs');"
) in list(backend.queries)
assert (
"ALTER TABLE ucx_default.db1_dst.managed_dbfs "
"SET TBLPROPERTIES ('upgraded_from' = 'hive_metastore.db1_src.managed_dbfs');"
f"ALTER TABLE ucx_default.db1_dst.managed_dbfs "
f"SET TBLPROPERTIES ('upgraded_from' = 'hive_metastore.db1_src.managed_dbfs' , '{Table.UPGRADED_FROM_WS_PARAM}' = '12345');"
) in list(backend.queries)
assert "SYNC TABLE ucx_default.db1_dst.managed_other FROM hive_metastore.db1_src.managed_other;" in list(
backend.queries
Expand Down Expand Up @@ -86,7 +87,8 @@ def test_migrate_external_tables_should_produce_proper_queries():
rows = {}
backend = MockBackend(fails_on_first=errors, rows=rows)
table_crawler = TablesCrawler(backend, "inventory_database")
client = MagicMock()
client = create_autospec(WorkspaceClient)
client.get_workspace_id.return_value = "12345"
table_mapping = create_autospec(TableMapping)
table_mapping.get_tables_to_migrate.return_value = [
TableToMigrate(
Expand All @@ -98,7 +100,11 @@ def test_migrate_external_tables_should_produce_proper_queries():
table_migrate.migrate_tables()

assert (list(backend.queries)) == [
"SYNC TABLE ucx_default.db1_dst.external_dst FROM hive_metastore.db1_src.external_src;"
"SYNC TABLE ucx_default.db1_dst.external_dst FROM hive_metastore.db1_src.external_src;",
(
f"ALTER TABLE ucx_default.db1_dst.external_dst "
f"SET TBLPROPERTIES ('upgraded_from' = 'hive_metastore.db1_src.external_src' , '{Table.UPGRADED_FROM_WS_PARAM}' = '12345');"
),
]


Expand Down Expand Up @@ -159,7 +165,8 @@ def test_migrate_view_should_produce_proper_queries():
rows = {}
backend = MockBackend(fails_on_first=errors, rows=rows)
table_crawler = TablesCrawler(backend, "inventory_database")
client = MagicMock()
client = create_autospec(WorkspaceClient)
client.get_workspace_id.return_value = "12345"
table_mapping = create_autospec(TableMapping)
table_mapping.get_tables_to_migrate.return_value = [
TableToMigrate(
Expand All @@ -176,8 +183,8 @@ def test_migrate_view_should_produce_proper_queries():
"SET TBLPROPERTIES ('upgraded_to' = 'ucx_default.db1_dst.view_dst');"
) in list(backend.queries)
assert (
"ALTER VIEW ucx_default.db1_dst.view_dst "
"SET TBLPROPERTIES ('upgraded_from' = 'hive_metastore.db1_src.view_src');"
f"ALTER VIEW ucx_default.db1_dst.view_dst "
f"SET TBLPROPERTIES ('upgraded_from' = 'hive_metastore.db1_src.view_src' , '{Table.UPGRADED_FROM_WS_PARAM}' = '12345');"
) in list(backend.queries)


Expand Down

0 comments on commit 9dba8ee

Please sign in to comment.