From 8fdf69e65b20b6c3c0f6ec82f0892e70066d4e6c Mon Sep 17 00:00:00 2001 From: pooya-mohammadi Date: Sat, 26 Oct 2024 22:21:42 +0200 Subject: [PATCH] Add AsyncDownload --- deep_utils/__init__.py | 5 ++- deep_utils/dummy_objects/dummies.py | 6 +++ .../download_utils/async_download_utils.py | 40 +++++++++++++++++++ .../utils/download_utils/download_utils.py | 9 +++-- setup.py | 2 +- 5 files changed, 56 insertions(+), 6 deletions(-) create mode 100644 deep_utils/utils/download_utils/async_download_utils.py diff --git a/deep_utils/__init__.py b/deep_utils/__init__.py index 8f2e27d..ac38f8d 100644 --- a/deep_utils/__init__.py +++ b/deep_utils/__init__.py @@ -4,7 +4,7 @@ from .utils.lib_utils.integeration_utils import import_lazy_module # Deep Utils version number -__version__ = "1.3.54" +__version__ = "1.3.55" from .utils.constants import DUMMY_PATH, Backends @@ -82,7 +82,7 @@ import_lazy_module("TikTokenUtils", "utils.tiktoken_utils.tiktoken_utils") import_lazy_module("MemoryUtilsTorch", "utils.memory_utils.torch_memory_utils") import_lazy_module("MinIOUtils", "utils.minio_lib.main") - +import_lazy_module("AsyncDownloadUtils", "utils.download_utils.async_download_utils") if TYPE_CHECKING: from utils.numpy_utils.numpy_utils import NumpyUtils @@ -142,6 +142,7 @@ from .utils.tiktoken_utils.tiktoken_utils import TikTokenUtils from .utils.memory_utils.torch_memory_utils import MemoryUtilsTorch from .utils.minio_lib.main import MinIOUtils + from .utils.download_utils.async_download_utils import AsyncDownloadUtils else: import sys diff --git a/deep_utils/dummy_objects/dummies.py b/deep_utils/dummy_objects/dummies.py index 9a1a08e..4c6d0c0 100644 --- a/deep_utils/dummy_objects/dummies.py +++ b/deep_utils/dummy_objects/dummies.py @@ -161,5 +161,11 @@ class TikTokenUtils(metaclass=DummyObject): class MemoryUtilsTorch(metaclass=DummyObject): _backend = [Backends.TORCH] + + class MinIOUtils(metaclass=DummyObject): _backend = [Backends.MINIO] + + +class AsyncDownloadUtils(metaclass=DummyObject): + _backend = [Backends.AIOHTTP] diff --git a/deep_utils/utils/download_utils/async_download_utils.py b/deep_utils/utils/download_utils/async_download_utils.py new file mode 100644 index 0000000..a397a5f --- /dev/null +++ b/deep_utils/utils/download_utils/async_download_utils.py @@ -0,0 +1,40 @@ +import os.path + +import aiohttp + + +class AsyncDownloadUtils: + + @staticmethod + async def download(url: str, local_filepath: str, chunk_size: int = 69 * 1024, exists_ok=True) -> str: + """ + Download a file from a URL in an asynchronous manner into the input local filepath. + If url is a local file, return url. + :param url: + :param local_filepath: + :param chunk_size: + :param exists_ok: if True, do not download if the file already exists. + :return: + """ + if os.path.exists(url): + return url + if os.path.exists(local_filepath) and exists_ok: + return local_filepath + async with aiohttp.ClientSession() as session: + async with session.get(url) as response: + response.raise_for_status() + with open(local_filepath, 'wb') as file: + while True: + chunk = await response.content.read(chunk_size) + if not chunk: + break + file.write(chunk) + return local_filepath + + +if __name__ == '__main__': + import asyncio + + dl = "http://188.245.157.94:9001/api/v1/download-shared-object/aHR0cDovLzEyNy4wLjAuMTo5MDAwL3ZpZGVvcy8xLzAwYWM5YTVhODI0NzQ5MjI4NjI4NGJjZTNhZmQ3YWUwLm1wND9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUMyTFpQUlcyRERTWUhKMDY5S0lDJTJGMjAyNDEwMjYlMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjQxMDI2VDE5NTkwN1omWC1BbXotRXhwaXJlcz00MzIwMCZYLUFtei1TZWN1cml0eS1Ub2tlbj1leUpoYkdjaU9pSklVelV4TWlJc0luUjVjQ0k2SWtwWFZDSjkuZXlKaFkyTmxjM05MWlhraU9pSkRNa3hhVUZKWE1rUkVVMWxJU2pBMk9VdEpReUlzSW1WNGNDSTZNVGN6TURBeE5UY3pNaXdpY0dGeVpXNTBJam9pYldsdWFXOGlmUS5Zbnc1LUlLekVuQzNvdjBMOUZUXzR0WEVNVjJWSmtrWWZlRTZQS2VUenVYdU02X3A3Y21LY0d3Zm1pWTlSVllNMGNacnV3REUwbDJudm16bGE3dHo1USZYLUFtei1TaWduZWRIZWFkZXJzPWhvc3QmdmVyc2lvbklkPW51bGwmWC1BbXotU2lnbmF0dXJlPWMxMGJkNzA4ZmQ2Njc2YWQ5YTBjMzBjZDU5ZTAyOWQ5ZTU0M2M2ZmE4NTc0ZWE0YWFkNDE4NGJhOTc2YTIwMDM" + + asyncio.run(AsyncDownloadUtils.download(dl, "sample.mp4")) diff --git a/deep_utils/utils/download_utils/download_utils.py b/deep_utils/utils/download_utils/download_utils.py index 18edac2..095f3ca 100644 --- a/deep_utils/utils/download_utils/download_utils.py +++ b/deep_utils/utils/download_utils/download_utils.py @@ -1,7 +1,9 @@ -import sys -import requests import os import shutil +import sys + +import requests + class DownloadUtils: @staticmethod @@ -74,7 +76,8 @@ def download_file( f.write(data) done = int(50 * downloaded / total) sys.stdout.write("\rDownloading {}: {}% [{}{}]" - .format(file_name, round((downloaded*100/total), 2), "█" * done, "." * (50 - done))) + .format(file_name, round((downloaded * 100 / total), 2), "█" * done, + "." * (50 - done))) sys.stdout.flush() sys.stdout.write("\n") shutil.move(temp_download_des, download_des) diff --git a/setup.py b/setup.py index bc57d69..cdf06b9 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ import setuptools -VERSION = "1.3.54" +VERSION = "1.3.55" long_description = open("Readme.md", mode="r", encoding="utf-8").read()