diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 56f6f984..393a9449 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -7,12 +7,6 @@ updates: interval: "weekly" labels: - "maintenance" - # Update via cruft - ignore: - - dependency-name: "mkdocs*" - - dependency-name: "pytest*" - - dependency-name: "pylint" - - dependency-name: "mypy" - directory: "/" package-ecosystem: "github-actions" @@ -20,10 +14,3 @@ updates: interval: "weekly" labels: - "maintenance" - # Update via cruft - ignore: - - dependency-name: "actions/checkout" - - dependency-name: "actions/setup-python" - - dependency-name: "pypa/gh-action-pypi-publish" - - dependency-name: "codecov/codecov-action" - - dependency-name: "peter-evans/create-pull-request" diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index dc0d9e81..118b5d65 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -21,18 +21,18 @@ jobs: env: PYTEST_BENCHMARK_STORAGE: file://${{ github.workspace }}/.benchmarks steps: - - name: Set up Python 3.10 - uses: actions/setup-python@v4 + - name: Set up Python 3.11 + uses: actions/setup-python@v5 with: - python-version: '3.10' + python-version: '3.11' - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: ref: ${{ github.event.pull_request.base.sha }} fetch-depth: 0 path: base - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 path: pr diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 20e499d8..85a370f7 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -21,10 +21,10 @@ jobs: with: fetch-depth: 0 - - name: Set up Python 3.10 - uses: actions/setup-python@v4 + - name: Set up Python 3.11 + uses: actions/setup-python@v5 with: - python-version: '3.10' + python-version: '3.11' - name: Upgrade pip and nox run: | diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index b4d9ecca..98026353 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -32,7 +32,7 @@ jobs: fetch-depth: 0 - name: Set up Python ${{ matrix.pyv }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.pyv }} @@ -49,7 +49,7 @@ jobs: run: nox -s tests-${{ matrix.nox_pyv || matrix.pyv }} -- --cov-report=xml - name: Upload coverage report - uses: codecov/codecov-action@v3.1.4 + uses: codecov/codecov-action@v3 - name: Build package run: nox -s build diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3c44cc02..468bcd88 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,7 +2,7 @@ default_language_version: python: python3 repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.5.0 hooks: - id: check-added-large-files - id: check-case-conflict @@ -20,18 +20,18 @@ repos: - id: sort-simple-yaml - id: trailing-whitespace - repo: https://github.com/astral-sh/ruff-pre-commit - rev: 'v0.1.5' + rev: 'v0.1.7' hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] - id: ruff-format - repo: https://github.com/codespell-project/codespell - rev: v2.2.5 + rev: v2.2.6 hooks: - id: codespell additional_dependencies: ["tomli"] - repo: https://github.com/asottile/pyupgrade - rev: v3.10.1 + rev: v3.15.0 hooks: - id: pyupgrade args: [--py38-plus] diff --git a/pyproject.toml b/pyproject.toml index 86ca5b12..fc6da9ae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta" [project] name = "dvc-data" -description = "dvc data" +description = "DVC's data management subsystem" readme = "README.rst" license = {text = "Apache-2.0"} authors = [{ name = "Iterative", email = "support@dvc.org" }] @@ -44,12 +44,12 @@ all = [ "dvc-data[cli]", ] tests = [ - "pytest==7.2.0", - "pytest-sugar==0.9.6", - "pytest-cov==4.0.0", - "pytest-mock==3.10.0", - "pytest-benchmark==4.0.0", - "mypy==1.5.1", + "pytest<8,>=7", + "pytest-sugar", + "pytest-cov>=4.1.0", + "pytest-mock", + "pytest-benchmark", + "mypy==1.7.1", "pytest-servers[s3]==0.1.3", ] dev = [ @@ -62,7 +62,7 @@ dev = [ dvc-data = "dvc_data.__main__:main" [tool.setuptools.package-data] -dvc_objects = ["py.typed"] +dvc_data = ["py.typed"] [tool.setuptools.packages.find] where = ["src"] @@ -121,3 +121,81 @@ module = [ [tool.codespell] ignore-words-list = "fo" + +[tool.ruff] +ignore = [ + "ISC001", # single-line-implicit-string-concatenation + "PLR2004", # magic-value-comparison + "PLW2901", # redefined-loop-name + "RET501", # unnecessary-return-none + "RET502", # implicit-return-value + "RET503", # implicit-return + "S101", # assert + "SIM105", # suppressible-exception + "SIM108", # if-else-block-instead-of-if-exp + "SIM117", # multiple-with-statements +] +select = [ + "A", # flake8-buitlins + "ASYNC", # flake8-async + "B", # flake8-bugbear + "BLE", # flake8-blind-except + "C4", # flake8-comprehensions + "C90", # mccabe + "DTZ", # flake8-datetimez + "E", # pycodestyle - Error + "EXE", # flake8-executable + "F", # pyflakes + "FLY", # flynt-rules + "G", # flake8-logging-format + "I", # isort + "ICN", # flake8-import-conventions + "INP", # flake8-no-pep420 + "ISC", # flake8-implicit-str-concat + "N", # pep8-naming + "PERF101", # perflint + "PGH", # pygrep-hooks + "PIE", # flake8-pie + "PL", # pylint + "PT", # flake8-pytest-style + "PYI", # flake8-pyi + "Q", # flae8-quotes + "RET", # flake8-return + "RSE", # flake8-raise + "RUF", # ruff + "S", # flake8-bandit + "SIM", # flake8-simplify + "SLOT", # flake8-slots + "T10", # flake8-debugger + "T20", # flake8-print + "TCH", # flake8-type-checking + "TCH", # flake8-type-checking + "TID", # flake8-tidy-imports + "UP", # pyupgrade + "W", # pycodestyle - Warning + "YTT", # flake8-2020 +] +show-source = true +show-fixes = true + +[tool.ruff.lint.flake8-pytest-style] +fixture-parentheses = false +mark-parentheses = false +parametrize-names-type = "csv" + +[tool.ruff.lint.flake8-tidy-imports] +[tool.ruff.lint.flake8-tidy-imports.banned-api] +"funcy.cached_property" = {msg = "use `from dvc_data.utils import cached_property` instead."} +"functools.cached_property" = {msg = "use `from dvc_data.utils import cached_property` instead."} + +[tool.ruff.lint.flake8-type-checking] +strict = true + +[tool.ruff.lint.isort] +known-first-party = ["dvc_data"] + +[tool.ruff.lint.pylint] +max-args = 10 + +[tool.ruff.per-file-ignores] +"src/dvc_data/cli.py" = ["T201", "B008"] diff --git a/src/dvc_data/__main__.py b/src/dvc_data/__main__.py index cc8cc558..ae21793b 100644 --- a/src/dvc_data/__main__.py +++ b/src/dvc_data/__main__.py @@ -5,7 +5,7 @@ def main(): # type: ignore[misc] import sys - print( + print( # noqa: T201 "dvc-data could not run because the required " "dependencies are not installed.\n" "Please install it with: pip install 'dvc-data[cli]'" diff --git a/src/dvc_data/cli.py b/src/dvc_data/cli.py index b53ea72d..b0bb8add 100644 --- a/src/dvc_data/cli.py +++ b/src/dvc_data/cli.py @@ -13,7 +13,7 @@ from typing import List, cast import click -import typer # pylint: disable=import-error +import typer from attrs import asdict from dvc_objects._tqdm import Tqdm from dvc_objects.errors import ObjectFormatError @@ -34,10 +34,9 @@ from dvc_data.hashfile.obj import HashFile from dvc_data.hashfile.state import State from dvc_data.hashfile.transfer import transfer as _transfer -from dvc_data.hashfile.tree import Tree +from dvc_data.hashfile.tree import Tree, merge from dvc_data.hashfile.tree import du as _du -from dvc_data.hashfile.tree import merge -from dvc_data.repo import NotARepo, Repo +from dvc_data.repo import NotARepoError, Repo install(show_locals=True, suppress=[typer, click]) @@ -224,9 +223,9 @@ def from_shortoid(odb: HashFileDB, oid: str) -> str: def get_odb(**config): try: repo = Repo.discover() - except NotARepo as exc: + except NotARepoError as exc: typer.echo(exc, err=True) - raise typer.Abort(1) + raise typer.Abort(1) # noqa: B904 if "state" not in config: config.setdefault("state", State(root_dir=repo.root, tmp_dir=repo.tmp_dir)) @@ -324,7 +323,7 @@ def show(oid: str = typer.Argument(..., allow_dash=True)): obj = load(odb, odb.get(oid).hash_info) if isinstance(obj, Tree): return _ls_tree(obj) - elif isinstance(obj, HashFile): + if isinstance(obj, HashFile): return _cat_object(odb, obj.oid) raise AssertionError(f"unknown object of type {type(obj)}") @@ -429,14 +428,15 @@ def merge_tree(oid1: str, oid2: str, force: bool = False): oid2 = from_shortoid(odb, oid2) obj1 = load(odb, odb.get(oid1).hash_info) obj2 = load(odb, odb.get(oid2).hash_info) - assert isinstance(obj1, Tree) and isinstance(obj2, Tree), "not a tree obj" + assert isinstance(obj1, Tree) + assert isinstance(obj2, Tree), "not a tree obj" if not force: # detect conflicts d = _diff(obj1, obj2, odb) modified = [ posixpath.join(*change.old.key) - for change in d.modified # pylint: disable=not-an-iterable + for change in d.modified if change.old.key != ROOT ] if modified: @@ -478,7 +478,6 @@ def apply_op(odb, obj, application): op = application["op"] path = application["path"] keys = tuple(path.split("/")) - # pylint: disable=protected-access if op in ("add", "modify"): new = tuple(application["to"].split("/")) if op == "add" and new in obj._dict: @@ -594,7 +593,7 @@ def checkout( path: Path = typer.Argument(..., resolve_path=True), relink: bool = False, force: bool = False, - type: List[LinkEnum] = typer.Option(["copy"]), # pylint: disable=redefined-builtin + type: List[LinkEnum] = typer.Option(["copy"]), # noqa: A002 ): odb = get_odb(type=[t.value for t in type]) oid = from_shortoid(odb, oid) diff --git a/src/dvc_data/fs.py b/src/dvc_data/fs.py index da7634a1..aec677c8 100644 --- a/src/dvc_data/fs.py +++ b/src/dvc_data/fs.py @@ -9,8 +9,6 @@ from dvc_objects.fs.callbacks import DEFAULT_CALLBACK from fsspec import AbstractFileSystem -from dvc_data.hashfile.db import HashFileDB - from .utils import cached_property if typing.TYPE_CHECKING: @@ -18,6 +16,8 @@ from dvc_objects.fs.callbacks import Callback from dvc_objects.fs.path import Path + from dvc_data.hashfile.db import HashFileDB + from .hashfile.hash_info import HashInfo from .index import DataIndex, DataIndexEntry, ObjectStorage @@ -33,7 +33,7 @@ class FileInfo(NamedTuple): fs_path: "AnyFSPath" -class DataFileSystem(AbstractFileSystem): # pylint:disable=abstract-method +class DataFileSystem(AbstractFileSystem): root_marker = "/" def __init__(self, index: "DataIndex", **kwargs: Any): @@ -55,7 +55,7 @@ def _get_key(self, path: str) -> Tuple[str, ...]: return () key = self.path.relparts(path, self.root_marker) - if key == (".",) or key == ("",): + if key in ((".",), ("",)): key = () return key @@ -108,9 +108,7 @@ def _cache_remote_file( odb.add(path, fs, oid) return odb.fs, odb.oid_to_path(oid) - def _open( # pylint: disable=arguments-differ - self, path: "AnyFSPath", **kwargs: Any - ) -> "BinaryIO": + def _open(self, path: "AnyFSPath", **kwargs: Any) -> "BinaryIO": typ, _, cache_storage, hi, fs, fspath = self._get_fs_path(path) if kwargs.get("cache", False) and typ == "remote" and cache_storage: @@ -156,7 +154,7 @@ def info(self, path: "AnyFSPath", **kwargs: Any): info["name"] = path return info - def get_file( # pylint: disable=arguments-differ + def get_file( self, rpath: "AnyFSPath", lpath: "AnyFSPath", diff --git a/src/dvc_data/hashfile/_ignore.py b/src/dvc_data/hashfile/_ignore.py index b0f7f32b..3ea4cee5 100644 --- a/src/dvc_data/hashfile/_ignore.py +++ b/src/dvc_data/hashfile/_ignore.py @@ -5,8 +5,6 @@ if TYPE_CHECKING: from dvc_objects.fs.base import AnyFSPath, FileSystem -# pylint: disable=unused-argument - class Ignore(Protocol): def find(self, fs: "FileSystem", path: "AnyFSPath") -> Iterator["AnyFSPath"]: diff --git a/src/dvc_data/hashfile/build.py b/src/dvc_data/hashfile/build.py index 908a711f..d1bd6bcb 100644 --- a/src/dvc_data/hashfile/build.py +++ b/src/dvc_data/hashfile/build.py @@ -72,7 +72,8 @@ def _build_file(path, fs, name, odb=None, upload_odb=None, dry_run=False): state = odb.state if odb else None meta, hash_info = hash_file(path, fs, name, state=state) if upload_odb and not dry_run: - assert odb and name == "md5" + assert odb + assert name == "md5" return _upload_file(path, fs, odb, upload_odb) oid = hash_info.value @@ -197,7 +198,8 @@ def _build_external_tree_info(odb: "HashFileDB", tree: "Tree", name: str) -> "Tr # able to validate .dir files right in the workspace (e.g. check s3 # etag), but could be dropped for manual validation with regular md5, # that would be universal for all clouds. - assert odb and name != "md5" + assert odb + assert name != "md5" assert tree.fs assert tree.path diff --git a/src/dvc_data/hashfile/cache.py b/src/dvc_data/hashfile/cache.py index 9ac92423..dca5fe43 100644 --- a/src/dvc_data/hashfile/cache.py +++ b/src/dvc_data/hashfile/cache.py @@ -1,18 +1,18 @@ import os -import pickle # nosec B403 +import pickle from functools import wraps from typing import Any, Optional import diskcache -from diskcache import Disk as disk -from diskcache import Index # noqa: F401, pylint: disable=unused-import -from diskcache import Timeout # noqa: F401, pylint: disable=unused-import - -# pylint: disable=redefined-builtin +from diskcache import Disk as _Disk +from diskcache import ( + Index, # noqa: F401 + Timeout, # noqa: F401 +) class DiskError(Exception): - def __init__(self, directory: str, type: str) -> None: + def __init__(self, directory: str, type: str) -> None: # noqa: A002 self.directory = directory self.type = type super().__init__(f"Could not open disk '{type}' in {directory}") @@ -26,22 +26,22 @@ def wrapped(self, *args, **kwargs): except (pickle.PickleError, ValueError) as e: if isinstance(e, ValueError) and "pickle protocol" not in str(e): raise - # pylint: disable=protected-access + raise DiskError(self._directory, type=self._type) from e return wrapped -class Disk(disk): +class Disk(_Disk): """Reraise pickle-related errors as DiskError.""" # we need type to differentiate cache for better error messages _type: str - put = translate_pickle_error(disk.put) - get = translate_pickle_error(disk.get) - store = translate_pickle_error(disk.store) - fetch = translate_pickle_error(disk.fetch) + put = translate_pickle_error(_Disk.put) + get = translate_pickle_error(_Disk.get) + store = translate_pickle_error(_Disk.store) + fetch = translate_pickle_error(_Disk.fetch) class Cache(diskcache.Cache): @@ -51,8 +51,8 @@ def __init__( self, directory: Optional[str] = None, timeout: int = 60, - disk: disk = Disk, # pylint: disable=redefined-outer-name - type: Optional[str] = None, + disk: _Disk = Disk, + type: Optional[str] = None, # noqa: A002 **settings: Any, ) -> None: settings.setdefault("disk_pickle_protocol", 4) diff --git a/src/dvc_data/hashfile/checkout.py b/src/dvc_data/hashfile/checkout.py index 7452a8b5..dcdcd8f4 100644 --- a/src/dvc_data/hashfile/checkout.py +++ b/src/dvc_data/hashfile/checkout.py @@ -143,7 +143,7 @@ def _diff( if relink: diff.modified.extend(diff.unchanged) else: - for change in diff.unchanged: # pylint: disable=not-an-iterable + for change in diff.unchanged: if not change.new.in_cache and not ( change.new.oid and change.new.oid.isdir ): @@ -232,7 +232,7 @@ def _checkout( raise CheckoutError(failed) -def checkout( +def checkout( # noqa: PLR0913 path, fs, obj, diff --git a/src/dvc_data/hashfile/db/__init__.py b/src/dvc_data/hashfile/db/__init__.py index b936447b..6f7aebe3 100644 --- a/src/dvc_data/hashfile/db/__init__.py +++ b/src/dvc_data/hashfile/db/__init__.py @@ -3,20 +3,21 @@ import os from contextlib import suppress from copy import copy -from typing import TYPE_CHECKING, Callable, List, Optional, Union +from typing import TYPE_CHECKING, Callable, ClassVar, List, Optional, Union from dvc_objects.db import ObjectDB from dvc_objects.errors import ObjectFormatError from dvc_objects.fs.callbacks import DEFAULT_CALLBACK -from ..hash_info import HashInfo -from ..obj import HashFile +from dvc_data.hashfile.hash_info import HashInfo +from dvc_data.hashfile.obj import HashFile if TYPE_CHECKING: from dvc_objects.fs.base import AnyFSPath, FileSystem from dvc_objects.fs.callbacks import Callback - from ..tree import Tree + from dvc_data.hashfile.tree import Tree + from .index import ObjectDBIndexBase @@ -48,11 +49,11 @@ def get_index(odb) -> "ObjectDBIndexBase": class HashFileDB(ObjectDB): DEFAULT_VERIFY = False - DEFAULT_CACHE_TYPES = ["copy"] + DEFAULT_CACHE_TYPES: ClassVar[List[str]] = ["copy"] CACHE_MODE: Optional[int] = None def __init__(self, fs: "FileSystem", path: str, **config): - from ..state import StateNoop + from dvc_data.hashfile.state import StateNoop super().__init__(fs, path) self.state = config.get("state", StateNoop()) @@ -133,16 +134,16 @@ def add( pass return transferred - def protect(self, path): # pylint: disable=unused-argument + def protect(self, path): pass - def is_protected(self, path): # pylint: disable=unused-argument + def is_protected(self, path): return False - def unprotect(self, path): # pylint: disable=unused-argument + def unprotect(self, path): pass - def set_exec(self, path): # pylint: disable=unused-argument + def set_exec(self, path): pass def check( @@ -163,7 +164,7 @@ def check( - Remove the file from cache if it doesn't match the actual hash """ - from ..hash import hash_file + from dvc_data.hashfile.hash import hash_file obj = self.get(oid) if self.is_protected(obj.path): diff --git a/src/dvc_data/hashfile/db/index.py b/src/dvc_data/hashfile/db/index.py index abcb26fe..6b5d9f64 100644 --- a/src/dvc_data/hashfile/db/index.py +++ b/src/dvc_data/hashfile/db/index.py @@ -55,7 +55,7 @@ def __init__( self, tmp_dir: "AnyFSPath", name: str, - ) -> None: # pylint: disable=super-init-not-called + ) -> None: pass def __iter__(self) -> Iterator[str]: @@ -87,10 +87,10 @@ def __init__( self, tmp_dir: "AnyFSPath", name: str, - ) -> None: # pylint: disable=super-init-not-called + ) -> None: from dvc_objects.fs import LocalFileSystem - from ..cache import Cache, Index + from dvc_data.hashfile.cache import Cache, Index self.index_dir = os.path.join(tmp_dir, self.INDEX_DIR, name) self.fs = LocalFileSystem() @@ -110,7 +110,7 @@ def dir_hashes(self) -> Iterator[str]: def clear(self) -> None: """Clear this index (to force re-indexing later).""" - from ..cache import Timeout + from dvc_data.hashfile.cache import Timeout try: self.index.clear() @@ -119,7 +119,7 @@ def clear(self) -> None: def update(self, dir_hashes: Iterable[str], file_hashes: Iterable[str]) -> None: """Update this index, adding the specified hashes.""" - from ..cache import Timeout + from dvc_data.hashfile.cache import Timeout try: with self.index.transact(): diff --git a/src/dvc_data/hashfile/db/local.py b/src/dvc_data/hashfile/db/local.py index 1aaf6049..0338f18e 100644 --- a/src/dvc_data/hashfile/db/local.py +++ b/src/dvc_data/hashfile/db/local.py @@ -2,6 +2,7 @@ import os import stat from functools import partial +from typing import ClassVar, List from dvc_objects.db import noop, wrap_iter from dvc_objects.errors import ObjectDBError, ObjectFormatError @@ -17,7 +18,7 @@ class LocalHashFileDB(HashFileDB): - DEFAULT_CACHE_TYPES = ["reflink", "copy"] + DEFAULT_CACHE_TYPES: ClassVar[List[str]] = ["reflink", "copy"] CACHE_MODE = 0o444 UNPACKED_DIR_SUFFIX = ".unpacked" @@ -47,7 +48,7 @@ def oid_to_path(self, oid): # being ~5.5 times faster. return f"{self.path}{os.sep}{oid[0:2]}{os.sep}{oid[2:]}" - def oids_exist(self, oids, jobs=None, progress=noop): # pylint: disable=unused-argument + def oids_exist(self, oids, jobs=None, progress=noop): ret = [] progress = partial(progress, "querying", len(oids)) diff --git a/src/dvc_data/hashfile/db/migrate.py b/src/dvc_data/hashfile/db/migrate.py index 50aa1a41..d7c1b07d 100644 --- a/src/dvc_data/hashfile/db/migrate.py +++ b/src/dvc_data/hashfile/db/migrate.py @@ -39,7 +39,6 @@ def prepare( Objects from src will be rehashed for addition to dest. """ - # pylint: disable-next=protected-access src_paths = [src.oid_to_path(oid) for oid in src._list_oids()] callback.set_size(len(src_paths)) with ThreadPoolExecutor( diff --git a/src/dvc_data/hashfile/db/reference.py b/src/dvc_data/hashfile/db/reference.py index 7d3ff89e..4d6181e5 100644 --- a/src/dvc_data/hashfile/db/reference.py +++ b/src/dvc_data/hashfile/db/reference.py @@ -1,7 +1,8 @@ import logging from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Union -from ..obj import HashFile +from dvc_data.hashfile.obj import HashFile + from . import HashFileDB, HashInfo if TYPE_CHECKING: diff --git a/src/dvc_data/hashfile/gc.py b/src/dvc_data/hashfile/gc.py index 2f0ca02c..293b5a6a 100644 --- a/src/dvc_data/hashfile/gc.py +++ b/src/dvc_data/hashfile/gc.py @@ -5,7 +5,7 @@ from .hash_info import HashInfo -def gc( +def gc( # noqa: C901 odb: "HashFileDB", used: Iterable["HashInfo"], jobs: Optional[int] = None, @@ -46,7 +46,6 @@ def _is_dir_hash(_hash): path = odb.oid_to_path(hash_) if _is_dir_hash(hash_): # backward compatibility - # pylint: disable=protected-access odb._remove_unpacked_dir(hash_) dir_paths.append(path) else: diff --git a/src/dvc_data/hashfile/hash.py b/src/dvc_data/hashfile/hash.py index 97bffd83..4f369980 100644 --- a/src/dvc_data/hashfile/hash.py +++ b/src/dvc_data/hashfile/hash.py @@ -170,8 +170,8 @@ def call(self, hook_name=None, **kwargs): if self.size and self.size > self.LARGE_FILE_SIZE: if not self._logged: logger.info( - f"Computing md5 for a large file '{self.fname}'. " - "This is only done once." + "Computing md5 for a large file %r. This is only done once.", + self.fname, ) self._logged = True super().call() diff --git a/src/dvc_data/hashfile/state.py b/src/dvc_data/hashfile/state.py index 13b1aaf6..557680ea 100644 --- a/src/dvc_data/hashfile/state.py +++ b/src/dvc_data/hashfile/state.py @@ -52,7 +52,7 @@ def close(self): def save(self, path, fs, hash_info, info=None): pass - def get(self, path, fs, info=None): # pylint: disable=unused-argument + def get(self, path, fs, info=None): return None, None def save_link(self, path, fs): @@ -71,7 +71,7 @@ def _checksum(info): return str(int(tokenize([info["ino"], info["mtime"], info["size"]]), 16)) -class State(StateBase): # pylint: disable=too-many-instance-attributes +class State(StateBase): HASH_VERSION = 1 def __init__(self, root_dir=None, tmp_dir=None, ignore: Optional["Ignore"] = None): @@ -116,7 +116,7 @@ def save(self, path, fs, hash_info, info=None): self.hashes[path] = json.dumps(entry) - def get(self, path, fs, info=None): + def get(self, path, fs, info=None): # noqa: PLR0911 """Gets the hash for the specified path info. Hash will be retrieved from the state database if available. diff --git a/src/dvc_data/hashfile/status.py b/src/dvc_data/hashfile/status.py index ed835185..59492170 100644 --- a/src/dvc_data/hashfile/status.py +++ b/src/dvc_data/hashfile/status.py @@ -1,13 +1,14 @@ import logging from typing import TYPE_CHECKING, Dict, Iterable, NamedTuple, Optional, Set -from dvc_objects.db import ObjectDB from dvc_objects.fs import Schemes from .hash_info import HashInfo from .tree import Tree if TYPE_CHECKING: + from dvc_objects.db import ObjectDB + from .db import HashFileDB from .db.index import ObjectDBIndexBase from .obj import HashFile @@ -82,7 +83,7 @@ def _indexed_dir_hashes( yield tree.hash_info.value -def status( +def status( # noqa: C901, PLR0912 odb: "HashFileDB", obj_ids: Iterable["HashInfo"], name: Optional[str] = None, @@ -117,7 +118,8 @@ def status( else: tree = Tree.load(cache_odb, hash_info) for _, _, oid in tree: - assert oid and oid.value + assert oid + assert oid.value hash_infos[oid.value] = oid if index: dir_objs[hash_info.value] = tree diff --git a/src/dvc_data/hashfile/transfer.py b/src/dvc_data/hashfile/transfer.py index 567ada5b..31ed5cc3 100644 --- a/src/dvc_data/hashfile/transfer.py +++ b/src/dvc_data/hashfile/transfer.py @@ -39,7 +39,6 @@ class TransferResult(NamedTuple): def _log_exception(oid: str, exc: BaseException): # NOTE: this means we ran out of file descriptors and there is no # reason to try to proceed, as we will hit this error anyways. - # pylint: disable=no-member if isinstance(exc, OSError) and exc.errno == errno.EMFILE: raise exc logger.error("failed to transfer '%s'", oid, exc_info=exc) @@ -113,11 +112,10 @@ def _do_transfer( "directory '%s' contains missing files, skipping .dir file upload", dir_hash, ) + elif _add(src, dest, [dir_obj.hash_info], **kwargs): + failed_ids.add(dir_obj.hash_info) else: - if _add(src, dest, [dir_obj.hash_info], **kwargs): - failed_ids.add(dir_obj.hash_info) - else: - succeeded_dir_objs.append(dir_obj) + succeeded_dir_objs.append(dir_obj) # insert the rest failed_ids.update(_add(src, dest, all_file_ids, **kwargs)) @@ -135,7 +133,8 @@ def _do_transfer( dir_obj.hash_info, len(file_hashes), ) - assert dir_obj.hash_info and dir_obj.hash_info.value + assert dir_obj.hash_info + assert dir_obj.hash_info.value dest_index.update([dir_obj.hash_info.value], file_hashes) return set() @@ -173,7 +172,7 @@ def _error(oid: str, exc: BaseException): return failed -def transfer( +def transfer( # noqa: PLR0913 src: "HashFileDB", dest: "HashFileDB", obj_ids: Iterable["HashInfo"], diff --git a/src/dvc_data/hashfile/tree.py b/src/dvc_data/hashfile/tree.py index e868dd3f..53ad7acc 100644 --- a/src/dvc_data/hashfile/tree.py +++ b/src/dvc_data/hashfile/tree.py @@ -5,17 +5,16 @@ from dvc_objects.errors import ObjectFormatError +from dvc_data.hashfile.hash import DEFAULT_ALGORITHM, hash_file +from dvc_data.hashfile.meta import Meta +from dvc_data.hashfile.obj import HashFile from dvc_data.utils import cached_property -from ..hashfile.hash import DEFAULT_ALGORITHM, hash_file -from ..hashfile.meta import Meta -from ..hashfile.obj import HashFile - if TYPE_CHECKING: from pygtrie import Trie - from ..hashfile.db import HashFileDB - from ..hashfile.hash_info import HashInfo + from dvc_data.hashfile.db import HashFileDB + from dvc_data.hashfile.hash_info import HashInfo logger = logging.getLogger(__name__) @@ -47,7 +46,7 @@ def _try_load( class Tree(HashFile): PARAM_RELPATH: Final = "relpath" - def __init__(self): # pylint: disable=super-init-not-called + def __init__(self): # this should really be part of a TreeBuilder. # HashFile does not support these properties as none values, so we may be losing # type-safety with this. @@ -148,7 +147,7 @@ def _hi_to_dict(hi: Optional["HashInfo"]) -> Dict[str, Any]: **_hi_to_dict(hi), self.PARAM_RELPATH: posixpath.sep.join(parts), } - for parts, meta, hi in self # noqa: B301 + for parts, meta, hi in self ), key=itemgetter(self.PARAM_RELPATH), ) @@ -163,7 +162,7 @@ def as_bytes(self, with_meta: bool = False): @classmethod def from_list(cls, lst, hash_name: Optional[str] = None): - from ..hashfile.hash_info import HashInfo + from dvc_data.hashfile.hash_info import HashInfo tree = cls() for _entry in lst: @@ -212,7 +211,7 @@ def load(cls, odb, hash_info, hash_name: Optional[str] = None) -> "Tree": return tree - def filter(self, prefix: Tuple[str]) -> Optional["Tree"]: + def filter(self, prefix: Tuple[str]) -> Optional["Tree"]: # noqa: A003 """Return a filtered copy of this tree that only contains entries inside prefix. @@ -316,7 +315,7 @@ def _merge(ancestor, our, their, allowed=None): patch_theirs_first = patch(their_diff + our_diff, ancestor) except KeyError as e: # todo: fails if both diffs delete the same object - raise MergeError( + raise MergeError( # noqa: B904 f"unable to auto-merge the following paths:\nboth deleted: {e}" ) unmergeable = list(diff(patch_ours_first, patch_theirs_first)) diff --git a/src/dvc_data/hashfile/utils.py b/src/dvc_data/hashfile/utils.py index a2974c10..670c46cf 100644 --- a/src/dvc_data/hashfile/utils.py +++ b/src/dvc_data/hashfile/utils.py @@ -41,7 +41,7 @@ def get_mtime_and_size( # We track file changes and moves, which cannot be detected with simply # max(mtime(f) for f in non_ignored_files) - hasher = hashlib.md5() # nosec B303, B324 + hasher = hashlib.md5() # noqa: S324 hasher.update(json.dumps(files_mtimes, sort_keys=True).encode("utf-8")) mtime = hasher.hexdigest() return mtime, size diff --git a/src/dvc_data/index/__init__.py b/src/dvc_data/index/__init__.py index 0799c099..d7279671 100644 --- a/src/dvc_data/index/__init__.py +++ b/src/dvc_data/index/__init__.py @@ -1,13 +1,13 @@ -from .add import add # noqa: F401, pylint: disable=unused-import -from .build import build # noqa: F401, pylint: disable=unused-import -from .diff import diff # noqa: F401, pylint: disable=unused-import -from .index import * # noqa: F401,F403, pylint: disable=unused-import -from .save import md5, save # noqa: F401, pylint: disable=unused-import -from .serialize import ( # noqa: F401, pylint: disable=unused-import - read_db, - read_json, - write_db, - write_json, +from .add import add # noqa: F401 +from .build import build # noqa: F401 +from .diff import diff # noqa: F401 +from .index import * # noqa: F403 +from .save import md5, save # noqa: F401 +from .serialize import ( + read_db, # noqa: F401 + read_json, # noqa: F401 + write_db, # noqa: F401 + write_json, # noqa: F401 ) -from .update import update # noqa: F401, pylint: disable=unused-import -from .view import DataIndexView, view # noqa: F401, pylint: disable=unused-import +from .update import update # noqa: F401 +from .view import DataIndexView, view # noqa: F401 diff --git a/src/dvc_data/index/add.py b/src/dvc_data/index/add.py index 4065a681..82b295b2 100644 --- a/src/dvc_data/index/add.py +++ b/src/dvc_data/index/add.py @@ -6,7 +6,8 @@ if TYPE_CHECKING: from dvc_objects.fs import FileSystem - from ..hashfile._ignore import Ignore + from dvc_data.hashfile._ignore import Ignore + from .index import DataIndex, DataIndexKey diff --git a/src/dvc_data/index/build.py b/src/dvc_data/index/build.py index e9e16064..1e46cb8a 100644 --- a/src/dvc_data/index/build.py +++ b/src/dvc_data/index/build.py @@ -1,17 +1,17 @@ from itertools import chain from typing import TYPE_CHECKING, Any, Dict, Iterable, Optional, Tuple -from ..hashfile.hash import DEFAULT_ALGORITHM, hash_file -from ..hashfile.meta import Meta +from dvc_data.hashfile.hash import DEFAULT_ALGORITHM, hash_file +from dvc_data.hashfile.meta import Meta + from .index import DataIndex, DataIndexEntry, FileStorage if TYPE_CHECKING: from dvc_objects.fs.base import FileSystem + from dvc_data.hashfile._ignore import Ignore from dvc_data.hashfile.state import StateBase - from ..hashfile._ignore import Ignore - def build_entry( path: str, diff --git a/src/dvc_data/index/checkout.py b/src/dvc_data/index/checkout.py index 33e4fc64..c3da00d2 100644 --- a/src/dvc_data/index/checkout.py +++ b/src/dvc_data/index/checkout.py @@ -20,7 +20,8 @@ from dvc_objects.fs.local import LocalFileSystem from dvc_objects.fs.utils import exists as batch_exists -from ..hashfile.meta import Meta +from dvc_data.hashfile.meta import Meta + from .diff import ADD, DELETE, MODIFY, UNCHANGED from .diff import diff as idiff from .index import FileStorage, ObjectStorage @@ -28,14 +29,15 @@ if TYPE_CHECKING: from dvc_objects.fs.base import AnyFSPath, FileSystem - from ..hashfile.state import StateBase + from dvc_data.hashfile.state import StateBase + from .diff import Change from .index import BaseDataIndex, DataIndexEntry, DataIndexKey, Storage logger = logging.getLogger(__name__) -class VersioningNotSupported(Exception): +class VersioningNotSupported(Exception): # noqa: N818 pass @@ -67,7 +69,7 @@ def _delete_files( fs.remove([fs.path.join(path, *(entry.key or ())) for entry in entries]) -def _create_files( # noqa: C901 +def _create_files( # noqa: C901, PLR0912, PLR0913 entries, index: Optional["BaseDataIndex"], path: str, @@ -204,7 +206,7 @@ class Diff: dirs_failed: list = field(default=Factory(list)) -def _compare( # noqa: C901 +def _compare( # noqa: C901, PLR0912 old, new, relink: bool = False, @@ -288,14 +290,14 @@ def meta_cmp_key(meta): else: continue else: - raise AssertionError() + raise AssertionError ret.changes[change.key] = change return ret -def compare( # noqa: C901 +def compare( old, new, relink: bool = False, @@ -335,7 +337,7 @@ def _onerror_noop(*args, **kwargs): pass -def apply( +def apply( # noqa: PLR0913 diff: "Diff", path: str, fs: "FileSystem", diff --git a/src/dvc_data/index/collect.py b/src/dvc_data/index/collect.py index 8fa59c8c..14e44ef4 100644 --- a/src/dvc_data/index/collect.py +++ b/src/dvc_data/index/collect.py @@ -71,7 +71,7 @@ def _collect_from_index( cache[(*cache_prefix, *key)] = entry -def collect( # noqa: C901 +def collect( # noqa: C901, PLR0912 idxs, storage, callback: "Callback" = DEFAULT_CALLBACK, @@ -104,9 +104,8 @@ def collect( # noqa: C901 key = (fsid, tokenize(data.path)) - if key not in storage_by_fs: - if cache_index.has_node((*cache_key, *key)): - skip.add(key) + if key not in storage_by_fs and cache_index.has_node((*cache_key, *key)): + skip.add(key) if key not in skip: _collect_from_index( diff --git a/src/dvc_data/index/diff.py b/src/dvc_data/index/diff.py index 411f69cb..ff98ab4c 100644 --- a/src/dvc_data/index/diff.py +++ b/src/dvc_data/index/diff.py @@ -5,8 +5,9 @@ from dvc_objects.fs.callbacks import DEFAULT_CALLBACK, Callback if TYPE_CHECKING: - from dvc_data.hashfile.meta import Meta from dvc_data.hashfile.hash_info import HashInfo + from dvc_data.hashfile.meta import Meta + from .index import BaseDataIndex, DataIndexKey from .index import DataIndexDirError, DataIndexEntry @@ -82,7 +83,7 @@ def _diff_hash_info( return UNCHANGED -def _diff_entry( +def _diff_entry( # noqa: PLR0911 old: Optional["DataIndexEntry"], new: Optional["DataIndexEntry"], *, @@ -156,7 +157,7 @@ def _get_items( return items, unknown -def _diff( +def _diff( # noqa: C901, PLR0912 old: Optional["BaseDataIndex"], new: Optional["BaseDataIndex"], *, @@ -284,7 +285,7 @@ def _get_key(change): yield from deleted -def diff( +def diff( # noqa: PLR0913 old: Optional["BaseDataIndex"], new: Optional["BaseDataIndex"], *, diff --git a/src/dvc_data/index/fetch.py b/src/dvc_data/index/fetch.py index 4e8c61c6..bffb65a2 100644 --- a/src/dvc_data/index/fetch.py +++ b/src/dvc_data/index/fetch.py @@ -8,7 +8,7 @@ from .build import build from .checkout import apply, compare -from .collect import collect # noqa: F401, pylint: disable=unused-import +from .collect import collect # noqa: F401 from .index import ObjectStorage from .save import md5, save @@ -52,9 +52,11 @@ def fetch( try: # NOTE: make sure there are no auth errors data.fs.exists(data.path) - except Exception: # pylint: disable=W0703 + except Exception: failed += len(fs_index) - logger.exception(f"failed to connect to {data.fs.protocol} ({data.path})") + logger.exception( + "failed to connect to %s (%s)", data.fs.protocol, data.path + ) continue with cb: diff --git a/src/dvc_data/index/index.py b/src/dvc_data/index/index.py index c4ae9ab4..d17ade31 100644 --- a/src/dvc_data/index/index.py +++ b/src/dvc_data/index/index.py @@ -15,19 +15,22 @@ ) import attrs -from sqltrie import ShortKeyError # noqa: F401, pylint: disable=unused-import -from sqltrie import JSONTrie, PyGTrie, SQLiteTrie +from sqltrie import ( + JSONTrie, + PyGTrie, + ShortKeyError, + SQLiteTrie, +) +from dvc_data.hashfile.hash_info import HashInfo +from dvc_data.hashfile.meta import Meta +from dvc_data.hashfile.tree import Tree from dvc_data.utils import cached_property -from ..hashfile.hash_info import HashInfo -from ..hashfile.meta import Meta -from ..hashfile.tree import Tree - if TYPE_CHECKING: from dvc_objects.fs.base import FileSystem - from ..hashfile.db import HashFileDB + from dvc_data.hashfile.db import HashFileDB logger = logging.getLogger(__name__) @@ -97,7 +100,7 @@ def _trie(self): return SQLiteTrie() @classmethod - def open(cls, path): + def open(cls, path): # noqa: A003 ret = cls() ret._trie = SQLiteTrie.open(path) return ret @@ -186,9 +189,7 @@ def get_key(self, entry: "DataIndexEntry") -> "DataIndexKey": if not entry.hash_info or not entry.hash_info.value: raise ValueError - return self.odb._oid_parts( # pylint: disable=protected-access - entry.hash_info.value - ) + return self.odb._oid_parts(entry.hash_info.value) def get(self, entry: "DataIndexEntry") -> Tuple["FileSystem", str]: if not entry.hash_info: @@ -205,7 +206,7 @@ def exists(self, entry: "DataIndexEntry", refresh: bool = False) -> bool: if self.index is None: return self.odb.exists(value) - key = self.odb._oid_parts(value) # pylint: disable=protected-access + key = self.odb._oid_parts(value) if not refresh: return key in self.index @@ -583,7 +584,7 @@ def _load_from_storage(trie, entry, storage_info): else: _load_from_file_storage(trie, entry, storage) return True - except Exception as exc: # pylint: disable=W0703 + except Exception as exc: # noqa: BLE001 # NOTE: this might be some random fs exception, e.g. auth error last_exc = exc logger.debug( @@ -613,7 +614,7 @@ def _onerror(_, exc): self.update(*args, **kwargs) @classmethod - def open(cls, path): + def open(cls, path): # noqa: A003 ret = cls() ret._trie = DataIndexTrie.open(path) return ret @@ -622,7 +623,7 @@ def view(self, key): import copy ret = DataIndex() - ret._trie = self._trie.view(key) # pylint: disable=protected-access + ret._trie = self._trie.view(key) ret.storage_map = copy.deepcopy(self.storage_map) return ret diff --git a/src/dvc_data/index/save.py b/src/dvc_data/index/save.py index 416f9339..26706d24 100644 --- a/src/dvc_data/index/save.py +++ b/src/dvc_data/index/save.py @@ -4,16 +4,17 @@ from dvc_objects.fs.callbacks import DEFAULT_CALLBACK -from ..hashfile.hash import DEFAULT_ALGORITHM, hash_file -from ..hashfile.meta import Meta -from ..hashfile.tree import Tree +from dvc_data.hashfile.hash import DEFAULT_ALGORITHM, hash_file +from dvc_data.hashfile.meta import Meta +from dvc_data.hashfile.tree import Tree if TYPE_CHECKING: from dvc_objects.fs.base import FileSystem from dvc_objects.fs.callbacks import Callback - from ..hashfile.db import HashFileDB - from ..hashfile.state import StateBase + from dvc_data.hashfile.db import HashFileDB + from dvc_data.hashfile.state import StateBase + from .index import BaseDataIndex, DataIndexKey @@ -65,7 +66,8 @@ def build_tree( name: str = DEFAULT_ALGORITHM, ) -> Tuple["Meta", Tree]: tree_meta = Meta(size=0, nfiles=0, isdir=True) - assert tree_meta.size is not None and tree_meta.nfiles is not None + assert tree_meta.size is not None + assert tree_meta.nfiles is not None tree = Tree() for key, entry in index.iteritems(prefix=prefix): if key == prefix or entry.meta and entry.meta.isdir: @@ -83,7 +85,8 @@ def _save_dir_entry( key: "DataIndexKey", odb: Optional["HashFileDB"] = None, ) -> None: - from ..hashfile.db import add_update_tree + from dvc_data.hashfile.db import add_update_tree + from .index import StorageKeyError entry = index[key] @@ -98,7 +101,8 @@ def _save_dir_entry( tree = add_update_tree(cache, tree) entry.meta = meta entry.hash_info = tree.hash_info - assert tree.hash_info.name and tree.hash_info.value + assert tree.hash_info.name + assert tree.hash_info.value setattr(entry.meta, tree.hash_info.name, tree.hash_info.value) diff --git a/src/dvc_data/index/serialize.py b/src/dvc_data/index/serialize.py index e6b361ce..5cb0267b 100644 --- a/src/dvc_data/index/serialize.py +++ b/src/dvc_data/index/serialize.py @@ -1,6 +1,7 @@ import json -from ..hashfile.cache import Cache +from dvc_data.hashfile.cache import Cache + from .index import DataIndex, DataIndexEntry diff --git a/src/dvc_data/index/view.py b/src/dvc_data/index/view.py index e171031c..7c870f41 100644 --- a/src/dvc_data/index/view.py +++ b/src/dvc_data/index/view.py @@ -102,7 +102,7 @@ def _load_dir_keys( and entry.hash_info.isdir and not entry.loaded ): - self._index._load(prefix, entry) # pylint: disable=protected-access + self._index._load(prefix, entry) if not shallow: yield from ( (key, val) @@ -126,7 +126,7 @@ def _node_factory(path_conv, key, children, *args): return self._index.traverse(_node_factory, **kwargs) def ls(self, root_key: DataIndexKey, detail=True): - self._index._ensure_loaded(root_key) # pylint: disable=protected-access + self._index._ensure_loaded(root_key) def _filter_fn(entry): key = entry[0] if detail else entry diff --git a/src/dvc_data/repo.py b/src/dvc_data/repo.py index 407dada5..a4097988 100644 --- a/src/dvc_data/repo.py +++ b/src/dvc_data/repo.py @@ -7,7 +7,7 @@ from .index import DataIndex -class NotARepo(Exception): +class NotARepoError(Exception): pass @@ -18,7 +18,7 @@ def __init__(self, root: str = "", fs: Optional[FileSystem] = None) -> None: control_dir: str = os.getenv("DVC_DIR") or fs.path.join(root, ".dvc") if not fs.isdir(control_dir): - raise NotARepo(f"{root} is not a data repo.") + raise NotARepoError(f"{root} is not a data repo.") self.fs = fs or localfs self.root = root @@ -40,9 +40,9 @@ def discover( while remaining: try: return cls(path, fs) - except NotARepo: + except NotARepoError: path, remaining = fs.path.split(path) - raise NotARepo(f"No data repository was found at {start}") + raise NotARepoError(f"No data repository was found at {start}") @property def control_dir(self): diff --git a/src/dvc_data/utils.py b/src/dvc_data/utils.py index bfe703c9..6ef6405d 100644 --- a/src/dvc_data/utils.py +++ b/src/dvc_data/utils.py @@ -1,7 +1,7 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: - from functools import cached_property + from functools import cached_property # noqa: TID251 else: from funcy import cached_property # noqa: TID251 diff --git a/tests/benchmarks/test_checkout.py b/tests/benchmarks/test_checkout.py index 438c724d..0dee511c 100644 --- a/tests/benchmarks/test_checkout.py +++ b/tests/benchmarks/test_checkout.py @@ -1,11 +1,11 @@ import shutil from os import fspath +from pathlib import Path +from tempfile import TemporaryDirectory import pytest from dvc_objects.fs import localfs from dvc_objects.fs.generic import test_links as _test_links -from pathlib import Path -from tempfile import TemporaryDirectory from dvc_data.cli import build, gentree, get_odb from dvc_data.hashfile.checkout import checkout diff --git a/tests/hashfile/test_cache.py b/tests/hashfile/test_cache.py index d3082d2a..1064fdf8 100644 --- a/tests/hashfile/test_cache.py +++ b/tests/hashfile/test_cache.py @@ -44,6 +44,6 @@ def test_pickle_backwards_compat(tmp_path, proto_a, proto_b): directory=fspath(tmp_path / "test"), disk_pickle_protocol=proto_b, ) as cache: - assert ("value1", "value2") == cache["key"] + assert cache["key"] == ("value1", "value2") set_value(cache, "key", ("value3", "value4")) - assert ("value3", "value4") == cache["key"] + assert cache["key"] == ("value3", "value4") diff --git a/tests/hashfile/test_db_index.py b/tests/hashfile/test_db_index.py index f7cb38ae..00e0f016 100644 --- a/tests/hashfile/test_db_index.py +++ b/tests/hashfile/test_db_index.py @@ -6,8 +6,7 @@ @pytest.fixture def index(tmp_upath): - index_ = ObjectDBIndex(tmp_upath, "foo") - yield index_ + return ObjectDBIndex(tmp_upath, "foo") def test_roundtrip(tmp_upath, index): diff --git a/tests/hashfile/test_diff.py b/tests/hashfile/test_diff.py index be7a9f29..00550597 100644 --- a/tests/hashfile/test_diff.py +++ b/tests/hashfile/test_diff.py @@ -14,7 +14,7 @@ def tree(): ] ) tree.digest() - yield tree + return tree @pytest.fixture diff --git a/tests/hashfile/test_tree.py b/tests/hashfile/test_tree.py index f5cff11e..0f97ac40 100644 --- a/tests/hashfile/test_tree.py +++ b/tests/hashfile/test_tree.py @@ -127,7 +127,7 @@ def test_list_dos2unix(lst, trie_dict): ) def test_nfiles(trie_dict, nfiles): tree = Tree() - tree._dict = trie_dict # pylint:disable=protected-access + tree._dict = trie_dict assert len(tree) == nfiles @@ -150,7 +150,7 @@ def test_nfiles(trie_dict, nfiles): ) def test_items(trie_dict): tree = Tree() - tree._dict = trie_dict # pylint:disable=protected-access + tree._dict = trie_dict assert list(tree) == [(key, value[0], value[1]) for key, value in trie_dict.items()] diff --git a/tests/index/test_index.py b/tests/index/test_index.py index ac40639d..115c44d8 100644 --- a/tests/index/test_index.py +++ b/tests/index/test_index.py @@ -1,6 +1,5 @@ import pytest -import dvc_data.index.checkout as checkout from dvc_data.hashfile.hash_info import HashInfo from dvc_data.hashfile.meta import Meta from dvc_data.index import ( @@ -9,6 +8,7 @@ ObjectStorage, add, build, + checkout, md5, read_db, read_json,