From 19b8540d9cff5c5b38e62be519bcbab103c922e6 Mon Sep 17 00:00:00 2001 From: Gabriele Oliaro Date: Mon, 24 Feb 2025 01:31:31 -0500 Subject: [PATCH] fixes --- conda/flexflow.yml | 1 + docker/flexflow-environment/Dockerfile | 2 +- docker/run.sh | 2 +- python/flexflow/serve/serve.py | 3 ++- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/conda/flexflow.yml b/conda/flexflow.yml index 7568ed648..3f6cd99a4 100644 --- a/conda/flexflow.yml +++ b/conda/flexflow.yml @@ -27,3 +27,4 @@ dependencies: - loralib - triton - peft + - pytest diff --git a/docker/flexflow-environment/Dockerfile b/docker/flexflow-environment/Dockerfile index 92423adf2..88b360074 100644 --- a/docker/flexflow-environment/Dockerfile +++ b/docker/flexflow-environment/Dockerfile @@ -113,7 +113,7 @@ RUN rm /usr/local/bin/install_pytorch.sh RUN pip3 install transformers>=4.47.1 sentencepiece einops RUN pip3 install tensorflow notebook # PEFT-related -RUN pip3 install scipy bitsandbytes datasets accelerate loralib triton peft +RUN pip3 install scipy bitsandbytes datasets accelerate loralib triton peft pytest RUN pip3 install streamlit # Install Rust diff --git a/docker/run.sh b/docker/run.sh index 72bc708db..9feb4522a 100755 --- a/docker/run.sh +++ b/docker/run.sh @@ -131,7 +131,7 @@ if [ -f "$hf_token_path" ]; then fi ssh_key_volume="" -ssh_key_path="$HOME/.ssh/id_rsa" +ssh_key_path="~/.ssh/id_rsa" if [ -f "$ssh_key_path" ] && [ -f "$ssh_key_path.pub" ]; then ssh_key_volume="-v $ssh_key_path:/root/.ssh/id_rsa -v $ssh_key_path.pub:/root/.ssh/id_rsa.pub" fi diff --git a/python/flexflow/serve/serve.py b/python/flexflow/serve/serve.py index 394869426..6db415aea 100644 --- a/python/flexflow/serve/serve.py +++ b/python/flexflow/serve/serve.py @@ -301,8 +301,9 @@ def download_hf_weights_if_needed(self) -> None: If not, or if the refresh_cache parameter is set to True, download new weights and convert them. """ - # TODO: edit this to download the weights using snapshot_download and convert them to FlexFlow format without loading them to GPU def download_and_convert_llm_weights(model_name): + num_cores = os.cpu_count() -1 if os.cpu_count() > 1 else 1 + snapshot_download(repo_id=model_name, allow_patterns="*.safetensors", max_workers=min(30, num_cores)) hf_model = AutoModelForCausalLM.from_pretrained( model_name, trust_remote_code=True,