Skip to content

Commit

Permalink
hashfile/checkout: use save_many to save state
Browse files Browse the repository at this point in the history
For MNIST dataset, this drops total runtime for `dvc add` from 24s
to 12s for me.
  • Loading branch information
skshetry committed Aug 7, 2024
1 parent 3b53bd2 commit 0b72266
Showing 1 changed file with 7 additions and 4 deletions.
11 changes: 7 additions & 4 deletions src/dvc_data/hashfile/checkout.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from fsspec import Callback

from ._ignore import Ignore
from .hash_info import HashInfo

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -110,10 +111,6 @@ def _checkout_file(
else:
link(cache, cache_path, fs, path)
modified = True

if state:
state.save(path, fs, change.new.oid)

return modified


Expand Down Expand Up @@ -203,6 +200,7 @@ def _checkout(
_remove(entry_path, fs, change.old.in_cache, force=force, prompt=prompt)

failed = []
hashes_to_update: list[tuple[str, HashInfo, None]] = []
for change in chain(diff.added, diff.modified):
entry_path = fs.join(path, *change.new.key) if change.new.key != ROOT else path
if change.new.oid.isdir:
Expand All @@ -223,6 +221,11 @@ def _checkout(
)
except CheckoutError as exc:
failed.extend(exc.paths)
else:
hashes_to_update.append((entry_path, change.new.oid, fs.info(entry_path)))

if state is not None:
state.save_many(hashes_to_update, fs)

if failed:
raise CheckoutError(failed)
Expand Down

0 comments on commit 0b72266

Please sign in to comment.