Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[bug fix] Fix check_synapse_cache_size function; allow file size to be float #1389

Merged
merged 11 commits into from
Apr 5, 2024
7 changes: 2 additions & 5 deletions schematic/store/synapse.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,9 @@
from schematic.utils.general import (
entity_type_mapping,
get_dir_size,
convert_gb_to_bytes,
create_temp_folder,
check_synapse_cache_size,
clear_synapse_cache,
profile,
calculate_datetime,
)

from schematic.utils.schema_utils import get_class_label_from_display_name
Expand Down Expand Up @@ -234,8 +231,8 @@ def _purge_synapse_cache(self, maximum_storage_allowed_cache_gb=1):
# try clearing the cache
# scan a directory and check size of files
if os.path.exists(self.root_synapse_cache):
maximum_storage_allowed_cache_bytes = convert_gb_to_bytes(
maximum_storage_allowed_cache_gb
maximum_storage_allowed_cache_bytes = maximum_storage_allowed_cache_gb * (
1024**3
)
nbytes = get_dir_size(self.root_synapse_cache)
dir_size_bytes = check_synapse_cache_size(directory=self.root_synapse_cache)
Expand Down
17 changes: 4 additions & 13 deletions schematic/utils/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,14 +129,14 @@ def calculate_datetime(

def check_synapse_cache_size(
directory: str = "/root/.synapseCache",
) -> Union[float, int]:
) -> float:
"""use du --sh command to calculate size of .synapseCache.

Args:
directory (str, optional): .synapseCache directory. Defaults to '/root/.synapseCache'

Returns:
float or integer: returns size of .synapsecache directory in bytes
float: returns size of .synapsecache directory in bytes
"""
# Note: this command might fail on windows user.
# But since this command is primarily for running on AWS, it is fine.
Expand All @@ -154,8 +154,8 @@ def check_synapse_cache_size(
size_in_mb = float(size.rstrip("M"))
byte_size = size_in_mb * 1000000
elif "G" in size:
size_in_gb = int(size.rstrip("G"))
byte_size = convert_gb_to_bytes(size_in_gb)
size_in_gb = float(size.rstrip("G"))
byte_size = size_in_gb * (1024**3)
elif "B" in size:
byte_size = float(size.rstrip("B"))
else:
Expand All @@ -180,15 +180,6 @@ def clear_synapse_cache(synapse_cache: cache.Cache, minutes: int) -> int:
return num_of_deleted_files


def convert_gb_to_bytes(g_bytes: int) -> int:
"""convert gb to bytes
Args:
g_bytes: number of gb
return: total number of bytes
"""
return g_bytes * 1024 * 1024 * 1024


def entity_type_mapping(syn: Synapse, entity_id: str) -> str:
"""Return the entity type of manifest

Expand Down
60 changes: 35 additions & 25 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,22 +159,38 @@
"bio_things": {"class": "BioThings", "property": "bioThings"},
}

test_disk_storage = [
(2, 4000, 16000),
(1000, 4000, 16000),
(2000000, 1900000, 2000000),
(1073741825, 1073741824, 1181116006.4),
]

@pytest.fixture()
def create_temp_query_file(tmp_path, request):
# define location of mock synapse cache
mock_synapse_cache_dir = tmp_path / ".synapseCache/"
mock_synapse_cache_dir.mkdir()
mock_sub_folder = mock_synapse_cache_dir / "123"
mock_sub_folder.mkdir()
mock_table_query_folder = mock_sub_folder / "456"
mock_table_query_folder.mkdir()

# create mock table query csv
mock_synapse_table_query_csv = (
mock_table_query_folder / "mock_synapse_table_query.csv"
)
with open(mock_synapse_table_query_csv, "wb") as f:
f.write(b'\0' * request.param)
yield mock_synapse_cache_dir, mock_table_query_folder, mock_synapse_table_query_csv


class TestGeneral:
def test_clear_synapse_cache(self, tmp_path):
@pytest.mark.parametrize("create_temp_query_file", [3, 1000], indirect=True)
def test_clear_synapse_cache(self, create_temp_query_file):
# define location of mock synapse cache
mock_synapse_cache_dir = tmp_path / ".synapseCache/"
mock_synapse_cache_dir.mkdir()
mock_sub_folder = mock_synapse_cache_dir / "123"
mock_sub_folder.mkdir()
mock_table_query_folder = mock_sub_folder / "456"
mock_table_query_folder.mkdir()

# create mock table query csv and a mock cache map
mock_synapse_table_query_csv = (
mock_table_query_folder / "mock_synapse_table_query.csv"
)
mock_synapse_table_query_csv.write_text("mock table query content")
mock_synapse_cache_dir, mock_table_query_folder, mock_synapse_table_query_csv = create_temp_query_file
# create a mock cache map
mock_cache_map = mock_table_query_folder / ".cacheMap"
mock_cache_map.write_text(
f"{mock_synapse_table_query_csv}: '2022-06-13T19:24:27.000Z'"
Expand Down Expand Up @@ -222,22 +238,16 @@ def test_calculate_datetime_raise_error(self):

# this test might fail for windows machine
@pytest.mark.not_windows
def test_check_synapse_cache_size(self, tmp_path):
mock_synapse_cache_dir = tmp_path / ".synapseCache"
mock_synapse_cache_dir.mkdir()

mock_synapse_table_query_csv = (
mock_synapse_cache_dir / "mock_synapse_table_query.csv"
)
mock_synapse_table_query_csv.write_text("example file for calculating cache")

file_size = check_synapse_cache_size(mock_synapse_cache_dir)
@pytest.mark.parametrize("create_temp_query_file,local_disk_size,gh_disk_size",test_disk_storage,indirect=["create_temp_query_file"])
def test_check_synapse_cache_size(self,create_temp_query_file,local_disk_size,gh_disk_size):
mock_synapse_cache_dir, mock_table_query_folder, mock_synapse_table_query_csv = create_temp_query_file
disk_size = check_synapse_cache_size(mock_synapse_cache_dir)

# For some reasons, when running in github action, the size of file changes.
if IN_GITHUB_ACTIONS:
assert file_size == 8000
assert disk_size == gh_disk_size
else:
assert file_size == 4000
assert disk_size == local_disk_size

def test_find_duplicates(self):
mock_list = ["foo", "bar", "foo"]
Expand Down
Loading