Skip to content

Commit

Permalink
import/get: support pre-fetching LFS objects from Git-LFS repos
Browse files Browse the repository at this point in the history
  • Loading branch information
pmrowla committed Nov 8, 2023
1 parent 36fa116 commit 22604f6
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 1 deletion.
5 changes: 4 additions & 1 deletion dvc/fs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from dvc.config import ConfigError as RepoConfigError
from dvc.config_schema import SCHEMA, Invalid
from dvc.scm import lfs_prefetch

# pylint: disable=unused-import
from dvc_objects.fs import ( # noqa: F401
Expand All @@ -29,7 +30,7 @@

from .callbacks import Callback
from .data import DataFileSystem # noqa: F401
from .dvc import DVCFileSystem # noqa: F401
from .dvc import DVCFileSystem
from .git import GitFileSystem # noqa: F401

known_implementations.update(
Expand Down Expand Up @@ -75,6 +76,8 @@ def download(
from_infos = [fs_path]
to_infos = [to]

if isinstance(fs, DVCFileSystem):
lfs_prefetch(fs, from_infos)
cb.set_size(len(from_infos))
jobs = jobs or fs.jobs
generic.copy(
Expand Down
29 changes: 29 additions & 0 deletions dvc/scm.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
if TYPE_CHECKING:
from scmrepo.progress import GitProgressEvent

from dvc.fs import FileSystem


class SCMError(DvcException):
"""Base class for source control management errors."""
Expand Down Expand Up @@ -264,3 +266,30 @@ def _time_filter(rev):

rev_resolver = partial(resolve_rev, scm)
return group_by(rev_resolver, results)


def lfs_prefetch(fs: "FileSystem", paths: List[str]):
from scmrepo.git.lfs import fetch

from dvc.fs.dvc import DVCFileSystem
from dvc.fs.git import GitFileSystem

if isinstance(fs, DVCFileSystem) and isinstance(fs.repo.fs, GitFileSystem):
git_fs = fs.repo.fs
scm = fs.repo.scm
assert isinstance(scm, Git)
else:
return

try:
if "filter=lfs" not in git_fs.open(".gitattributes").read():
return
except OSError:
return
with TqdmGit(desc="Checking for Git-LFS objects") as pbar:
fetch(
scm,
[git_fs.rev],
include=[(path if path.startswith("/") else f"/{path}") for path in paths],
progress=pbar.update_git,
)

0 comments on commit 22604f6

Please sign in to comment.