Skip to content

Commit

Permalink
debug
Browse files Browse the repository at this point in the history
  • Loading branch information
efiop committed Jan 16, 2024
1 parent 62e8156 commit ce57c6c
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 15 deletions.
4 changes: 2 additions & 2 deletions src/dvc_data/index/fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def fetch(
fetched += len(result.transferred)
failed += len(result.failed)
elif isinstance(cache, ObjectStorage):
md5(fs_index, check_meta=True)
updated = md5(fs_index, check_meta=True)

def _on_error(failed, oid, exc):
failed += 1
Expand All @@ -136,7 +136,7 @@ def _on_error(failed, oid, exc):
)

fetched += save(
fs_index,
updated,
jobs=jobs,
callback=cb,
on_error=partial(_on_error, failed),
Expand Down
41 changes: 28 additions & 13 deletions src/dvc_data/index/save.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from dvc_data.hashfile.db import HashFileDB
from dvc_data.hashfile.state import StateBase

from .index import BaseDataIndex, DataIndexKey
from .index import BaseDataIndex, DataIndex, DataIndexKey


def is_unchanged(old_meta: Meta, new_meta: Meta, name: str):
Expand All @@ -30,16 +30,22 @@ def md5(
storage: str = "data",
name: str = DEFAULT_ALGORITHM,
check_meta: bool = True,
) -> None:
from .index import DataIndexEntry
) -> "DataIndex":
from .index import DataIndex, DataIndexEntry

entries = {}
ret = DataIndex()

for key, entry in index.iteritems():
for _, entry in index.iteritems():
if entry.meta and entry.meta.isdir:
ret.add(entry)
continue

hash_info = None
if entry.hash_info and entry.hash_info.name in ("md5", "md5-dos2unix"):
hash_info = entry.hash_info

if hash_info and not check_meta:
ret.add(entry)
continue

try:
Expand All @@ -55,22 +61,31 @@ def md5(
continue

meta = Meta.from_info(info, fs.protocol)
if not entry.meta or not is_unchanged(entry.meta, meta, fs.PARAM_CHECKSUM):
old = getattr(entry.meta, fs.PARAM_CHECKSUM, None) if entry.meta else None
new = getattr(meta, fs.PARAM_CHECKSUM, None)
if old and new:
if old == new:
ret.add(entry)
continue

try:
_, hash_info = hash_file(path, fs, name, state=state, info=info)
_, hi = hash_file(path, fs, name, state=state, info=info)
except FileNotFoundError:
continue

entries[key] = DataIndexEntry(
key=entry.key,
meta=entry.meta,
hash_info=hash_info,
if hash_info and hi != hash_info:
continue

ret.add(
DataIndexEntry(
key=entry.key,
meta=entry.meta,
hash_info=hash_info,
)
)

for key, entry in entries.items():
index[key] = entry
ret.storage_map = index.storage_map
return ret


def build_tree(
Expand Down

0 comments on commit ce57c6c

Please sign in to comment.