From 9db4a3e29f653ccebe0a1e6ccc11429b9a479a31 Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Thu, 2 Nov 2023 04:20:28 +0200 Subject: [PATCH] datafs: implement du --- src/dvc_data/fs.py | 26 ++++++++++++++++ src/dvc_data/index/index.py | 13 +++++++- tests/index/test_fs.py | 62 +++++++++++++++++++++++++++++++++++++ 3 files changed, 100 insertions(+), 1 deletion(-) diff --git a/src/dvc_data/fs.py b/src/dvc_data/fs.py index 9f6f7e13..da7634a1 100644 --- a/src/dvc_data/fs.py +++ b/src/dvc_data/fs.py @@ -3,6 +3,7 @@ import logging import os import typing +from collections import deque from typing import Any, BinaryIO, NamedTuple, Optional, Tuple from dvc_objects.fs.callbacks import DEFAULT_CALLBACK @@ -204,3 +205,28 @@ def checksum(self, path: str) -> str: assert isinstance(md5, str) return md5 raise NotImplementedError + + def du(self, path, total=True, maxdepth=None, withdirs=False, **kwargs): + if maxdepth is not None: + raise NotImplementedError + + sizes = {} + todo = deque([self.info(path)]) + while todo: + info = todo.popleft() + + sizes[info["name"]] = info["size"] or 0 + + if info["type"] != "directory": + continue + + entry = info.get("entry") + if entry is not None and entry.size is not None: + continue + + todo.extend(self.ls(info["name"], detail=True)) + + if total: + return sum(sizes.values()) + + return sizes diff --git a/src/dvc_data/index/index.py b/src/dvc_data/index/index.py index 4e9910ed..aaa20ad1 100644 --- a/src/dvc_data/index/index.py +++ b/src/dvc_data/index/index.py @@ -79,6 +79,13 @@ def to_dict(self) -> Dict[str, Any]: return ret + @property + def size(self) -> Optional[int]: + if self.meta is None: + return None + + return self.meta.size + class DataIndexTrie(JSONTrie): def __init__(self, *args, **kwargs): @@ -664,8 +671,12 @@ def _load(self, key, entry): if not entry.meta or not entry.meta.isdir: return + storage_info = self.storage_map.get(key) + if storage_info is None: + return + try: - _load_from_storage(self._trie, entry, self.storage_map[key]) + _load_from_storage(self._trie, entry, storage_info) except DataIndexDirError as exc: self.onerror(entry, exc) return diff --git a/tests/index/test_fs.py b/tests/index/test_fs.py index cc1b496b..71e37390 100644 --- a/tests/index/test_fs.py +++ b/tests/index/test_fs.py @@ -156,3 +156,65 @@ def onerror(_entry, _exc): fs.index.onerror = onerror assert fs.ls("/broken", detail=False) == [] assert fs.ls("/broken", detail=True) == [] + + +def test_fs_du(tmp_upath, odb, as_filesystem): + index = DataIndex( + { + ("file_no_meta",): DataIndexEntry( + key=("file_no_meta",), + ), + ("file_meta_size",): DataIndexEntry( + key=("file_meta_size",), + meta=Meta(size=4), + ), + ("file_meta_no_size",): DataIndexEntry( + key=("file_meta_no_size",), + meta=Meta(), + ), + ("prefix",): DataIndexEntry( + key=("prefix",), + meta=Meta(isdir=True), + ), + ("prefix", "dir"): DataIndexEntry( + key=("prefix", "dir"), + meta=Meta(isdir=True), + ), + ("prefix", "dir", "dir_size"): DataIndexEntry( + key=("prefix", "dir", "dir_size"), + meta=Meta(isdir=True, size=123), + ), + } + ) + + fs = DataFileSystem(index) + + assert fs.du("file_no_meta") == 0 + assert fs.du("file_meta_size") == 4 + assert fs.du("file_meta_no_size") == 0 + assert fs.du("prefix/dir/dir_size") == 123 + assert fs.du("prefix/dir") == 123 + assert fs.du("prefix") == 123 + assert fs.du("/") == 127 + + assert fs.du("file_meta_size", total=False) == { + "file_meta_size": 4, + } + assert fs.du("prefix", total=False) == { + "prefix": 0, + "prefix/dir": 0, + "prefix/dir/dir_size": 123, + } + assert fs.du("prefix/dir", total=False) == { + "prefix/dir": 0, + "prefix/dir/dir_size": 123, + } + assert fs.du("/", total=False) == { + "/": 0, + "/file_meta_no_size": 0, + "/file_meta_size": 4, + "/file_no_meta": 0, + "/prefix": 0, + "/prefix/dir": 0, + "/prefix/dir/dir_size": 123, + }