Skip to content

Commit

Permalink
add a cache stats tool
Browse files Browse the repository at this point in the history
  • Loading branch information
jstzwj committed Aug 14, 2024
1 parent 6ec1dcf commit 33e7cf1
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 8 deletions.
Empty file added olah/cache/__init__.py
Empty file.
2 changes: 1 addition & 1 deletion olah/utils/bitset.py → olah/cache/bitset.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,4 +76,4 @@ def __str__(self):
Returns:
str: A string representation of the Bitset object, showing the binary representation of each byte.
"""
return "".join(bin(byte)[2:].zfill(8) for byte in self.bits)
return "".join(bin(byte)[2:].zfill(8)[::-1] for byte in self.bits)
17 changes: 12 additions & 5 deletions olah/utils/olah_cache.py → olah/cache/olah_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def block_number(self) -> int:
return self._block_number

@property
def block_mask(self) -> int:
def block_mask(self) -> Bitset:
return self._block_mask

def get_header_size(self):
Expand All @@ -76,11 +76,18 @@ def _valid_header(self):
@staticmethod
def read(stream) -> "OlahCacheHeader":
obj = OlahCacheHeader()
magic, version, block_size, file_size, block_mask_size = struct.unpack(
"<4sQQQQ", stream.read(OlahCacheHeader.HEADER_FIX_SIZE)
try:
magic = struct.unpack(
"<4s", stream.read(4)
)
except struct.error:
raise Exception("File is not a Olah cache file.")
if magic[0] != OlahCacheHeader.MAGIC_NUMBER:
raise Exception("File is not a Olah cache file.")

version, block_size, file_size, block_mask_size = struct.unpack(
"<QQQQ", stream.read(OlahCacheHeader.HEADER_FIX_SIZE - 4)
)
if magic != OlahCacheHeader.MAGIC_NUMBER:
raise Exception("The file is not a valid olah cache file.")
obj._version = version
obj._block_size = block_size
obj._file_size = file_size
Expand Down
54 changes: 54 additions & 0 deletions olah/cache/stat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import argparse


import os
import sys
from olah.cache.olah_cache import OlahCache

def get_size_human(size: int) -> str:
if size > 1024 * 1024 * 1024:
return f"{int(size / (1024 * 1024 * 1024)):.4f}GB"
elif size > 1024 * 1024:
return f"{int(size / (1024 * 1024)):.4f}MB"
elif size > 1024:
return f"{int(size / (1024)):.4f}KB"
else:
return f"{size:.4f}B"

def insert_newlines(input_str, every=10):
return '\n'.join(input_str[i:i+every] for i in range(0, len(input_str), every))

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Olah Cache Visualization Tool.")
parser.add_argument("--file", "-f", type=str, required=True, help="The path of Olah cache file")
parser.add_argument("--export", "-e", type=str, default="", help="Export the cached file if all blocks are cached")
args = parser.parse_args()
print(args)

with open(args.file, "rb") as f:
f.seek(0, os.SEEK_END)
bin_size = f.tell()

try:
cache = OlahCache(args.file)
except Exception as e:
print(e)
sys.exit(1)
print(f"File: {args.file}")
print(f"Olah Cache Version: {cache.header.version}")
print(f"File Size: {get_size_human(cache.header.file_size)}")
print(f"Cache Total Size: {get_size_human(bin_size)}")
print(f"Block Size: {cache.header.block_size}")
print(f"Block Number: {cache.header.block_number}")
print(f"Cache Status: ")
cache_status = cache.header.block_mask.__str__()[:cache.header._block_number]
print(insert_newlines(cache_status, every=50))

if args.export != "":
if all([c == "1" for c in cache_status]):
with open(args.file, "rb") as f:
f.seek(cache._get_header_size(), os.SEEK_SET)
with open(args.export, "wb") as fout:
fout.write(f.read())
else:
print("Some blocks are not cached, so the export is skipped.")
2 changes: 1 addition & 1 deletion olah/proxy/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
HUGGINGFACE_HEADER_X_LINKED_SIZE,
ORIGINAL_LOC,
)
from olah.utils.olah_cache import OlahCache
from olah.cache.olah_cache import OlahCache
from olah.utils.url_utils import (
RemoteInfo,
add_query_param,
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "olah"
version = "0.2.0"
version = "0.2.1"
description = "Self-hosted lightweight huggingface mirror."
readme = "README.md"
requires-python = ">=3.8"
Expand Down

0 comments on commit 33e7cf1

Please sign in to comment.