From 0c6f2751d72d55fed7e787c910b71b29b041b98e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Saugat=20Pachhai=20=28=E0=A4=B8=E0=A5=8C=E0=A4=97=E0=A4=BE?= =?UTF-8?q?=E0=A4=A4=29?= Date: Tue, 9 Jan 2024 12:20:14 +0545 Subject: [PATCH] use token_urlsafe instead of shortuuid to generate temp file Python>=3.6 has secrets.token_urlsafe that can create cryptographically safe strings/numbers. So we don't need to use shortuuid. The generated text string is of the same size as `shortuuid.uuid()`, but can contain underscore and hyphen characters (not just alpha-numeric values), which might make it less human-readable. But I am not sure if it's worth it to use a third-party library (which can be gradually removed). --- pyproject.toml | 1 - src/dvc_objects/fs/utils.py | 11 ++++------- tests/fs/test_utils.py | 2 +- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 552bfc7..417a079 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,6 @@ classifiers = [ requires-python = ">=3.8" dynamic = ["version"] dependencies = [ - "shortuuid>=0.5.0", "funcy>=1.14", "fsspec>=2022.10.0", ] diff --git a/src/dvc_objects/fs/utils.py b/src/dvc_objects/fs/utils.py index 7e001b9..bbd1f46 100644 --- a/src/dvc_objects/fs/utils.py +++ b/src/dvc_objects/fs/utils.py @@ -7,6 +7,7 @@ import threading from concurrent import futures from contextlib import contextmanager, suppress +from secrets import token_urlsafe from typing import TYPE_CHECKING, Any, Collection, Dict, Iterator, Optional, Set, Union from dvc_objects.executors import ThreadPoolExecutor @@ -60,10 +61,8 @@ def move(src: "AnyFSPath", dst: "AnyFSPath") -> None: case src and dst are on different filesystems and actual physical copying of data is happening. """ - from shortuuid import uuid - dst = os.path.abspath(dst) - tmp = f"{dst}.{uuid()}" + tmp = tmp_fname(dst) if os.path.islink(src): shutil.copy(src, tmp) @@ -173,11 +172,9 @@ def copyfile( shutil.copyfileobj(fsrc, wrapped, length=LOCAL_CHUNK_SIZE) -def tmp_fname(fname: "AnyFSPath" = "") -> "AnyFSPath": +def tmp_fname(prefix: str = "") -> str: """Temporary name for a partial download""" - from shortuuid import uuid - - return os.fspath(fname) + "." + uuid() + ".tmp" + return f"{prefix}.{token_urlsafe(16)}.tmp" @contextmanager diff --git a/tests/fs/test_utils.py b/tests/fs/test_utils.py index 5849cf2..81c0dad 100644 --- a/tests/fs/test_utils.py +++ b/tests/fs/test_utils.py @@ -10,7 +10,7 @@ def test_tmp_fname(): file = os.path.join("path", "to", "file") def pattern(path): - return r"^" + re.escape(path) + r"\.[a-z0-9]{22}\.tmp$" + return r"^" + re.escape(path) + r"\.[a-z0-9_-]{22}\.tmp$" assert re.search(pattern(file), utils.tmp_fname(file), re.IGNORECASE) assert re.search(