diff --git a/pyproject.toml b/pyproject.toml index e4540ea7..c7ea859e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,10 +21,9 @@ classifiers = [ requires-python = ">=3.9" dynamic = ["version"] dependencies = [ - "funcy>=1.14", + "funcy>=1.14; python_version < '3.12'", "dictdiffer>=0.8.1", "pygtrie>=2.3.2", - "shortuuid>=0.5.0", "dvc-objects>=4.0.1,<6", "fsspec>=2024.2.0", "diskcache>=5.2.1", @@ -191,8 +190,8 @@ parametrize-names-type = "csv" [tool.ruff.lint.flake8-tidy-imports] [tool.ruff.lint.flake8-tidy-imports.banned-api] -"funcy.cached_property" = {msg = "use `from dvc_data.utils import cached_property` instead."} -"functools.cached_property" = {msg = "use `from dvc_data.utils import cached_property` instead."} +"funcy.cached_property" = {msg = "use `from dvc_data.compat import cached_property` instead."} +"functools.cached_property" = {msg = "use `from dvc_data.compat import cached_property` instead."} [tool.ruff.lint.flake8-type-checking] strict = true diff --git a/src/dvc_data/utils.py b/src/dvc_data/compat.py similarity index 75% rename from src/dvc_data/utils.py rename to src/dvc_data/compat.py index 6ef6405d..2ba12f35 100644 --- a/src/dvc_data/utils.py +++ b/src/dvc_data/compat.py @@ -1,6 +1,7 @@ +import sys from typing import TYPE_CHECKING -if TYPE_CHECKING: +if sys.version_info >= (3, 12) or TYPE_CHECKING: from functools import cached_property # noqa: TID251 else: from funcy import cached_property # noqa: TID251 diff --git a/src/dvc_data/hashfile/db/local.py b/src/dvc_data/hashfile/db/local.py index d3f97eb4..388575ec 100644 --- a/src/dvc_data/hashfile/db/local.py +++ b/src/dvc_data/hashfile/db/local.py @@ -6,9 +6,8 @@ from dvc_objects.db import noop, wrap_iter from dvc_objects.errors import ObjectDBError, ObjectFormatError -from dvc_objects.fs.utils import copyfile, remove +from dvc_objects.fs.utils import copyfile, remove, tmp_fname from fsspec.callbacks import DEFAULT_CALLBACK -from shortuuid import uuid from . import HashFileDB @@ -83,7 +82,7 @@ def _remove_unpacked_dir(self, hash_): def _unprotect_file(self, path, callback=DEFAULT_CALLBACK): if self.fs.is_symlink(path) or self.fs.is_hardlink(path): logger.debug("Unprotecting '%s'", path) - tmp = os.path.join(os.path.dirname(path), "." + uuid()) + tmp = os.path.join(os.path.dirname(path), tmp_fname()) # The operations order is important here - if some application # would access the file during the process of copyfile then it diff --git a/src/dvc_data/hashfile/transfer.py b/src/dvc_data/hashfile/transfer.py index 42702ec2..df3ea232 100644 --- a/src/dvc_data/hashfile/transfer.py +++ b/src/dvc_data/hashfile/transfer.py @@ -11,7 +11,6 @@ ) from fsspec.callbacks import DEFAULT_CALLBACK -from funcy import split from .hash_info import HashInfo @@ -56,7 +55,7 @@ def find_tree_by_obj_id( return None -def _do_transfer( +def _do_transfer( # noqa: C901 src: "HashFileDB", dest: "HashFileDB", obj_ids: Iterable["HashInfo"], @@ -71,18 +70,23 @@ def _do_transfer( Returns: Set containing any hash_infos which failed to transfer. """ - dir_ids, file_ids = split(lambda hash_info: hash_info.isdir, obj_ids) + dir_ids, file_ids = set(), set() + for hash_info in obj_ids: + if hash_info.isdir: + dir_ids.add(hash_info) + else: + file_ids.add(hash_info) + failed_ids: set["HashInfo"] = set() succeeded_dir_objs = [] - all_file_ids = set(file_ids) for dir_hash in dir_ids: dir_obj = find_tree_by_obj_id([cache_odb, src], dir_hash) assert dir_obj entry_ids = {oid for _, _, oid in dir_obj} - bound_file_ids = all_file_ids & entry_ids - all_file_ids -= entry_ids + bound_file_ids = file_ids & entry_ids + file_ids -= entry_ids logger.debug("transfer dir: %s with %d files", dir_hash, len(bound_file_ids)) @@ -114,7 +118,7 @@ def _do_transfer( succeeded_dir_objs.append(dir_obj) # insert the rest - failed_ids.update(_add(src, dest, all_file_ids, **kwargs)) + failed_ids.update(_add(src, dest, file_ids, **kwargs)) if failed_ids: if src_index: src_index.clear() diff --git a/src/dvc_data/hashfile/tree.py b/src/dvc_data/hashfile/tree.py index 57235979..75361f37 100644 --- a/src/dvc_data/hashfile/tree.py +++ b/src/dvc_data/hashfile/tree.py @@ -6,10 +6,10 @@ from dvc_objects.errors import ObjectFormatError +from dvc_data.compat import cached_property from dvc_data.hashfile.hash import DEFAULT_ALGORITHM, hash_file from dvc_data.hashfile.meta import Meta from dvc_data.hashfile.obj import HashFile -from dvc_data.utils import cached_property if TYPE_CHECKING: from pygtrie import Trie diff --git a/src/dvc_data/index/index.py b/src/dvc_data/index/index.py index 120541c5..71bc5156 100644 --- a/src/dvc_data/index/index.py +++ b/src/dvc_data/index/index.py @@ -3,26 +3,15 @@ import os from abc import ABC, abstractmethod from collections.abc import Iterator, MutableMapping -from typing import ( - TYPE_CHECKING, - Any, - Callable, - Optional, - cast, -) +from typing import TYPE_CHECKING, Any, Callable, Optional, cast import attrs -from sqltrie import ( - JSONTrie, - PyGTrie, - ShortKeyError, - SQLiteTrie, -) +from sqltrie import JSONTrie, PyGTrie, ShortKeyError, SQLiteTrie +from dvc_data.compat import cached_property from dvc_data.hashfile.hash_info import HashInfo from dvc_data.hashfile.meta import Meta from dvc_data.hashfile.tree import Tree -from dvc_data.utils import cached_property if TYPE_CHECKING: from dvc_objects.fs.base import FileSystem diff --git a/tests/hashfile/test_db_index.py b/tests/hashfile/test_db_index.py index 00e0f016..426b8fa7 100644 --- a/tests/hashfile/test_db_index.py +++ b/tests/hashfile/test_db_index.py @@ -1,5 +1,4 @@ import pytest -from funcy import first from dvc_data.hashfile.db.index import ObjectDBIndex @@ -22,7 +21,7 @@ def test_roundtrip(tmp_upath, index): def test_clear(index): index.update(["1234.dir"], ["5678"]) index.clear() - assert first(index.hashes()) is None + assert not list(index.hashes()) def test_update(index):