Skip to content

Commit

Permalink
datafs: implement du
Browse files Browse the repository at this point in the history
  • Loading branch information
efiop committed Nov 2, 2023
1 parent 960bca6 commit 9db4a3e
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 1 deletion.
26 changes: 26 additions & 0 deletions src/dvc_data/fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import logging
import os
import typing
from collections import deque
from typing import Any, BinaryIO, NamedTuple, Optional, Tuple

from dvc_objects.fs.callbacks import DEFAULT_CALLBACK
Expand Down Expand Up @@ -204,3 +205,28 @@ def checksum(self, path: str) -> str:
assert isinstance(md5, str)
return md5
raise NotImplementedError

def du(self, path, total=True, maxdepth=None, withdirs=False, **kwargs):
if maxdepth is not None:
raise NotImplementedError

sizes = {}
todo = deque([self.info(path)])
while todo:
info = todo.popleft()

sizes[info["name"]] = info["size"] or 0

if info["type"] != "directory":
continue

entry = info.get("entry")
if entry is not None and entry.size is not None:
continue

todo.extend(self.ls(info["name"], detail=True))

if total:
return sum(sizes.values())

return sizes
13 changes: 12 additions & 1 deletion src/dvc_data/index/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,13 @@ def to_dict(self) -> Dict[str, Any]:

return ret

@property
def size(self) -> Optional[int]:
if self.meta is None:
return None

return self.meta.size


class DataIndexTrie(JSONTrie):
def __init__(self, *args, **kwargs):
Expand Down Expand Up @@ -664,8 +671,12 @@ def _load(self, key, entry):
if not entry.meta or not entry.meta.isdir:
return

storage_info = self.storage_map.get(key)
if storage_info is None:
return

try:
_load_from_storage(self._trie, entry, self.storage_map[key])
_load_from_storage(self._trie, entry, storage_info)
except DataIndexDirError as exc:
self.onerror(entry, exc)
return
Expand Down
62 changes: 62 additions & 0 deletions tests/index/test_fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,3 +156,65 @@ def onerror(_entry, _exc):
fs.index.onerror = onerror
assert fs.ls("/broken", detail=False) == []
assert fs.ls("/broken", detail=True) == []


def test_fs_du(tmp_upath, odb, as_filesystem):
index = DataIndex(
{
("file_no_meta",): DataIndexEntry(
key=("file_no_meta",),
),
("file_meta_size",): DataIndexEntry(
key=("file_meta_size",),
meta=Meta(size=4),
),
("file_meta_no_size",): DataIndexEntry(
key=("file_meta_no_size",),
meta=Meta(),
),
("prefix",): DataIndexEntry(
key=("prefix",),
meta=Meta(isdir=True),
),
("prefix", "dir"): DataIndexEntry(
key=("prefix", "dir"),
meta=Meta(isdir=True),
),
("prefix", "dir", "dir_size"): DataIndexEntry(
key=("prefix", "dir", "dir_size"),
meta=Meta(isdir=True, size=123),
),
}
)

fs = DataFileSystem(index)

assert fs.du("file_no_meta") == 0
assert fs.du("file_meta_size") == 4
assert fs.du("file_meta_no_size") == 0
assert fs.du("prefix/dir/dir_size") == 123
assert fs.du("prefix/dir") == 123
assert fs.du("prefix") == 123
assert fs.du("/") == 127

assert fs.du("file_meta_size", total=False) == {
"file_meta_size": 4,
}
assert fs.du("prefix", total=False) == {
"prefix": 0,
"prefix/dir": 0,
"prefix/dir/dir_size": 123,
}
assert fs.du("prefix/dir", total=False) == {
"prefix/dir": 0,
"prefix/dir/dir_size": 123,
}
assert fs.du("/", total=False) == {
"/": 0,
"/file_meta_no_size": 0,
"/file_meta_size": 4,
"/file_no_meta": 0,
"/prefix": 0,
"/prefix/dir": 0,
"/prefix/dir/dir_size": 123,
}

0 comments on commit 9db4a3e

Please sign in to comment.