Skip to content

Commit

Permalink
cache migrate: add --dvc-files flag to migrate .dvc and dvc.lock files
Browse files Browse the repository at this point in the history
  • Loading branch information
pmrowla committed Oct 12, 2023
1 parent 50d6788 commit e5dd2bb
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 4 deletions.
11 changes: 11 additions & 0 deletions dvc/commands/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,11 @@ def run(self):
class CmdCacheMigrate(CmdBase):
def run(self):
from dvc.cachemgr import migrate_2_to_3
from dvc.repo.commit import commit_2_to_3

migrate_2_to_3(self.repo, dry=self.args.dry)
if self.args.dvc_files:
commit_2_to_3(self.repo, dry=self.args.dry)
return 0


Expand Down Expand Up @@ -102,6 +105,14 @@ def add_parser(subparsers, parent_parser):
help=CACHE_MIGRATE_HELP,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
cache_migrate_parser.add_argument(
"--dvc-files",
help=(
"Migrate entries in all existing DVC files in the repository "
"to the DVC 3.0 format."
),
action="store_true",
)
cache_migrate_parser.add_argument(
"--dry",
help=(
Expand Down
8 changes: 4 additions & 4 deletions dvc/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -721,7 +721,7 @@ def save(self) -> None:
if self.metric:
self.verify_metric()

self._update_legacy_hash_name()
self.update_legacy_hash_name()
if self.use_cache:
_, self.meta, self.obj = self._build(
self.cache,
Expand All @@ -745,8 +745,8 @@ def save(self) -> None:
self.hash_info = self.obj.hash_info
self.files = None

def _update_legacy_hash_name(self):
if self.hash_name == "md5-dos2unix" and self.changed_checksum():
def update_legacy_hash_name(self, force: bool = False):
if self.hash_name == "md5-dos2unix" and (force or self.changed_checksum()):
self.hash_name = "md5"

def set_exec(self) -> None:
Expand Down Expand Up @@ -1391,7 +1391,7 @@ def add( # noqa: C901
)

assert self.repo
self._update_legacy_hash_name()
self.update_legacy_hash_name()
cache = self.cache if self.use_cache else self.local_cache
assert isinstance(cache, HashFileDB)

Expand Down
51 changes: 51 additions & 0 deletions dvc/repo/commit.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
from typing import TYPE_CHECKING

from dvc import prompt

from . import locked
from .scm_context import scm_context

if TYPE_CHECKING:
from . import Repo


def _prepare_message(stage, changes):
Expand Down Expand Up @@ -68,3 +74,48 @@ def commit(
)
stage.dump(update_pipeline=False)
return [s.stage for s in stages_info]


@locked
@scm_context
def commit_2_to_3(repo: "Repo", dry: bool = False):
"""Force-commit all legacy outputs to use DVC 3.0 hashes."""
from dvc.dvcfile import ProjectFile
from dvc.ui import ui

view = repo.index.targets_view(
targets=None,
outs_filter=lambda o: o.hash_name == "md5-dos2unix",
recursive=True,
)
migrated = set()
for out in view.outs:
dvcfile = out.stage.dvcfile.relpath
if isinstance(out.stage.dvcfile, ProjectFile):
# pylint: disable-next=protected-access
lockfile = out.stage.dvcfile._lockfile.relpath
migrated.add(f"{dvcfile} ({lockfile})")
else:
migrated.add(dvcfile)
if not migrated:
ui.write("No DVC files in the repo to migrate to the 3.0 format.")
return
if dry:
ui.write("Entries in following DVC files will be migrated to the 3.0 format:")
ui.write("\n".join(sorted(f"\t{file}" for file in migrated)))
return
for stage, filter_info in view._stage_infos: # pylint: disable=protected-access
outs_filter = view._outs_filter # pylint: disable=protected-access
outs = {
out
for out in stage.filter_outs(filter_info)
if outs_filter is not None and outs_filter(out)
}
if outs:
for out in outs:
out.update_legacy_hash_name(force=True)
stage.save(allow_missing=True)
stage.commit(allow_missing=True, relink=True)
if not isinstance(stage.dvcfile, ProjectFile):
ui.write(f"Updating DVC file '{stage.dvcfile.relpath}'")
stage.dump(update_pipeline=False)

0 comments on commit e5dd2bb

Please sign in to comment.