Skip to content

Commit

Permalink
Added deduplication for mount point list (#569)
Browse files Browse the repository at this point in the history
Added deduplication for mount point list.

Fixes #544
  • Loading branch information
dipankarkush-db authored Nov 10, 2023
1 parent fb769c4 commit 7c2fb14
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 1 deletion.
13 changes: 12 additions & 1 deletion src/databricks/labs/ucx/hive_metastore/mounts.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,25 @@ def __init__(self, backend: SqlBackend, ws: WorkspaceClient, inventory_database:
super().__init__(backend, "hive_metastore", inventory_database, "mounts", Mount)
self._dbutils = ws.dbutils

def _deduplicate_mounts(self, mounts: list) -> list:
seen = set()
deduplicated_mounts = []

for obj in mounts:
obj_tuple = (obj.name, obj.source)
if obj_tuple not in seen:
seen.add(obj_tuple)
deduplicated_mounts.append(obj)
return deduplicated_mounts

def inventorize_mounts(self):
self._append_records(self._list_mounts())

def _list_mounts(self):
mounts = []
for mount_point, source, _ in self._dbutils.fs.mounts():
mounts.append(Mount(mount_point, source))
return mounts
return self._deduplicate_mounts(mounts)

def snapshot(self) -> list[Mount]:
return self._snapshot(self._try_fetch, self._list_mounts)
Expand Down
18 changes: 18 additions & 0 deletions tests/unit/hive_metastore/test_mounts.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,24 @@ def test_list_mounts_should_return_a_list_of_mount_without_encryption_type():
client.dbutils.fs.mounts.return_value = [
MountInfo("mp_1", "path_1", "info_1"),
MountInfo("mp_2", "path_2", "info_2"),
MountInfo("mp_3", "path_3", "info_3"),
]

backend = MockBackend()
instance = Mounts(backend, client, "test")

instance.inventorize_mounts()

expected = [Mount("mp_1", "path_1"), Mount("mp_2", "path_2"), Mount("mp_3", "path_3")]
assert expected == backend.rows_written_for("hive_metastore.test.mounts", "append")


def test_list_mounts_should_return_a_deduped_list_of_mount_without_encryption_type():
client = MagicMock()
client.dbutils.fs.mounts.return_value = [
MountInfo("mp_1", "path_1", "info_1"),
MountInfo("mp_2", "path_2", "info_2"),
MountInfo("mp_2", "path_2", "info_2"),
]

backend = MockBackend()
Expand Down

0 comments on commit 7c2fb14

Please sign in to comment.