Skip to content

Commit

Permalink
cleanup cli and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
skshetry committed Jan 14, 2025
1 parent 5eca69d commit 290e497
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 211 deletions.
7 changes: 3 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,7 @@ Source = "https://github.com/iterative/dvc-data"

[project.optional-dependencies]
cli = [
"typer>=0.6",
"rich>=10.11.0,<14.0.0",
"typer-slim>=0.12",
]
all = [
"dvc-data[cli]",
Expand All @@ -52,12 +51,12 @@ tests = [
"pytest-cov>=4.1.0",
"pytest-mock",
"pytest-benchmark>=4",
"pytest-servers[s3]==0.5.7",
"pytest-servers==0.5.7",
]
dev = [
"dvc-data[all]",
"dvc-data[tests]",
"blake3>=0.3.1; python_version < '3.13'",
"blake3>=0.3.1",
"mypy==1.14.0",
"types-tqdm",
]
Expand Down
174 changes: 1 addition & 173 deletions src/dvc_data/cli.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import enum
import errno
import json
import math
import os
import posixpath
Expand All @@ -18,7 +17,6 @@
from attrs import asdict
from dvc_objects.errors import ObjectFormatError
from dvc_objects.fs import LocalFileSystem, MemoryFileSystem
from rich.traceback import install
from tqdm import tqdm

from dvc_data.callbacks import TqdmCallback
Expand All @@ -35,13 +33,10 @@
from dvc_data.hashfile.obj import HashFile
from dvc_data.hashfile.state import State
from dvc_data.hashfile.transfer import transfer as _transfer
from dvc_data.hashfile.tree import Tree, merge
from dvc_data.hashfile.tree import Tree
from dvc_data.hashfile.tree import du as _du
from dvc_data.repo import NotARepoError, Repo

install(show_locals=True, suppress=[typer, click])


file_type = typer.Argument(
...,
exists=True,
Expand Down Expand Up @@ -418,172 +413,6 @@ def _prepare_info(entry):
print(state, info, sep=": ")


@app.command(help="Merge two trees and optionally write to the database.")
def merge_tree(oid1: str, oid2: str, force: bool = False):
odb = get_odb()
oid1 = from_shortoid(odb, oid1)
oid2 = from_shortoid(odb, oid2)
obj1 = load(odb, odb.get(oid1).hash_info)
obj2 = load(odb, odb.get(oid2).hash_info)
assert isinstance(obj1, Tree)
assert isinstance(obj2, Tree), "not a tree obj"

if not force:
# detect conflicts
d = _diff(obj1, obj2, odb)
modified = [
posixpath.join(*change.old.key)
for change in d.modified
if change.old.key != ROOT
]
if modified:
print("Following files in conflicts:", *modified, sep="\n")
raise typer.Exit(1)

tree = merge(odb, None, obj1.hash_info, obj2.hash_info)
tree.digest()
print(tree)
odb.add(tree.path, tree.fs, tree.oid, hardlink=True)


def process_patch(patch_file, **kwargs):
patch = []
if patch_file:
with typer.open_file(patch_file) as f:
text = f.read()
patch = json.loads(text)
for appl in patch:
op = appl.get("op")
path = appl.get("path")
if op and path and op in ("add", "modify"):
appl["path"] = os.fspath(patch_file.parent.joinpath(path))

for op, items in kwargs.items():
for item in items:
if isinstance(item, tuple):
path, to = item
extra = {"path": os.fspath(path), "to": to}
else:
extra = {"path": item}
patch.append({"op": op, **extra})

return patch


def apply_op(odb, obj, application):
assert "op" in application
op = application["op"]
path = application["path"]
keys = tuple(path.split("/"))
if op in ("add", "modify"):
new = tuple(application["to"].split("/"))
if op == "add" and new in obj._dict:
raise FileExistsError(errno.EEXIST, os.strerror(errno.EEXIST), path)

fs = LocalFileSystem()
_, meta, new_obj = _build(odb, path, fs, "md5")
odb.add(path, fs, new_obj.hash_info.value, hardlink=False)
return obj.add(new, meta, new_obj.hash_info)

if keys not in obj._dict:
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), path)
if op == "test":
return
if op == "remove":
obj._dict.pop(keys)
obj.__dict__.pop("_trie", None)
return
if op in ("copy", "move"):
new = tuple(application["to"].split("/"))
obj.add(new, *obj.get(keys))
if op == "move":
obj._dict.pop(keys)
return
raise ValueError(f"unknown {op=}")


def multi_value(*opts, **kwargs):
return click.option(*opts, multiple=True, required=False, **kwargs)


cl_path = click.Path(
exists=True,
file_okay=True,
dir_okay=False,
readable=True,
path_type=Path,
resolve_path=True,
)
cl_path_dash = click.Path(
exists=True,
file_okay=True,
dir_okay=False,
readable=True,
allow_dash=True,
path_type=Path,
resolve_path=True,
)


@click.command()
@click.argument("oid")
@click.option("--patch-file", type=cl_path_dash)
@multi_value(
"--add",
type=(cl_path, str),
help="Add file from specified local path to a given path in the tree",
)
@multi_value(
"--modify",
type=(cl_path, str),
help="Modify file with specified local path to a given path in the tree",
)
@multi_value("--move", type=(str, str), help="Move a file in the tree")
@multi_value("--copy", type=(str, str), help="Copy path from a tree to another path")
@multi_value("--remove", type=str, help="Remove path from a tree")
@multi_value("--test", type=str, help="Check for the existence of the path")
def update_tree(oid, patch_file, add, modify, move, copy, remove, test):
"""Update tree contents virtually with a patch file in json format.
Example patch file for reference:
[\n
{"op": "remove", "path": "test/0/00004.png"},\n
{"op": "move", "path": "test/1/00003.png", "to": "test/0/00003.png"},\n
{"op": "copy", "path": "test/1/00003.png", "to": "test/1/11113.png"},\n
{"op": "test", "path": "test/1/00003.png"},\n
{"op": "add", "path": "local/path/to/patch.json", "to": "foo"},\n
{"op": "modify", "path": "local/path/to/patch.json", "to": "bar"}\n
]\n
Example: ./cli.py update-tree f23d4 patch.json
"""
odb = get_odb()
oid = from_shortoid(odb, oid)
obj = load(odb, odb.get(oid).hash_info)
assert isinstance(obj, Tree)

patch = process_patch(
patch_file,
add=add,
remove=remove,
modify=modify,
copy=copy,
move=move,
test=test,
)
for application in patch:
try:
apply_op(odb, obj, application)
except (FileExistsError, FileNotFoundError) as exc:
typer.echo(exc, err=True)
raise typer.Exit(1) from exc

obj.digest()
print(obj)
odb.add(obj.path, obj.fs, obj.oid, hardlink=True)


@app.command(help="Check object link")
def check_link( # noqa: C901
path: Path = dir_file_type_no_dash_no_resolve, object_dir: Optional[Path] = None
Expand Down Expand Up @@ -654,7 +483,6 @@ def checkout(
cmd = typer.main.get_command(app)
wrapper = click.version_option()
main = wrapper(cmd)
main.add_command(update_tree, "update-tree") # type: ignore[attr-defined]


if __name__ == "__main__":
Expand Down
15 changes: 6 additions & 9 deletions tests/hashfile/test_istextfile.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import sys

import pytest
from dvc_objects.fs.memory import MemoryFileSystem

from dvc_data.hashfile.istextfile import istextblock, istextfile

Expand All @@ -26,10 +25,8 @@ def test_istextblock(block, expected):
assert istextblock(block) is expected


@pytest.mark.parametrize("tmp_upath", ["local", "s3"], indirect=True)
def test_istextfile(tmp_upath, block, expected):
if sys.version_info >= (3, 13) and "s3" in tmp_upath.fs.protocol:
pytest.skip("universal-pathlib does not support Python 3.13 yet.")
path = tmp_upath / "file"
path.write_bytes(block)
assert istextfile(path.path, path.fs) is expected
def test_istextfile(block, expected):
fs = MemoryFileSystem(global_store=False)
fs.pipe_file("/file", block)

assert istextfile("/file", fs) is expected
30 changes: 10 additions & 20 deletions tests/index/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,27 +16,17 @@ def _make_odb():


@pytest.fixture
def odb(tmp_upath_factory, make_odb):
def odb(make_odb):
odb = make_odb()

foo = tmp_upath_factory.mktemp() / "foo"
foo.write_bytes(b"foo\n")

data = tmp_upath_factory.mktemp() / "data.dir"
data.write_bytes(
b'[{"md5": "c157a79031e1c40f85931829bc5fc552", "relpath": "bar"}, '
b'{"md5": "258622b1688250cb619f3c9ccaefb7eb", "relpath": "baz"}]'
odb.add_bytes("d3b07384d113edec49eaa6238ad5ff00", b"foo\n")
odb.add_bytes("c157a79031e1c40f85931829bc5fc552", b"bar\n")
odb.add_bytes("258622b1688250cb619f3c9ccaefb7eb", b"baz\n")
odb.add_bytes(
"1f69c66028c35037e8bf67e5bc4ceb6a.dir",
(
b'[{"md5": "c157a79031e1c40f85931829bc5fc552", "relpath": "bar"}, '
b'{"md5": "258622b1688250cb619f3c9ccaefb7eb", "relpath": "baz"}]'
),
)

bar = tmp_upath_factory.mktemp() / "bar"
bar.write_bytes(b"bar\n")

baz = tmp_upath_factory.mktemp() / "baz"
baz.write_bytes(b"baz\n")

odb.add(str(foo), odb.fs, "d3b07384d113edec49eaa6238ad5ff00")
odb.add(str(data), odb.fs, "1f69c66028c35037e8bf67e5bc4ceb6a.dir")
odb.add(str(bar), odb.fs, "c157a79031e1c40f85931829bc5fc552")
odb.add(str(baz), odb.fs, "258622b1688250cb619f3c9ccaefb7eb")

return odb
5 changes: 0 additions & 5 deletions tests/test_dvc_data.py

This file was deleted.

0 comments on commit 290e497

Please sign in to comment.