From 19b8540d9cff5c5b38e62be519bcbab103c922e6 Mon Sep 17 00:00:00 2001
From: Gabriele Oliaro <goliaro@cs.cmu.edu>
Date: Mon, 24 Feb 2025 01:31:31 -0500
Subject: [PATCH] fixes

---
 conda/flexflow.yml                     | 1 +
 docker/flexflow-environment/Dockerfile | 2 +-
 docker/run.sh                          | 2 +-
 python/flexflow/serve/serve.py         | 3 ++-
 4 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/conda/flexflow.yml b/conda/flexflow.yml
index 7568ed648..3f6cd99a4 100644
--- a/conda/flexflow.yml
+++ b/conda/flexflow.yml
@@ -27,3 +27,4 @@ dependencies:
     - loralib
     - triton
     - peft
+    - pytest
diff --git a/docker/flexflow-environment/Dockerfile b/docker/flexflow-environment/Dockerfile
index 92423adf2..88b360074 100644
--- a/docker/flexflow-environment/Dockerfile
+++ b/docker/flexflow-environment/Dockerfile
@@ -113,7 +113,7 @@ RUN rm /usr/local/bin/install_pytorch.sh
 RUN pip3 install transformers>=4.47.1 sentencepiece einops
 RUN pip3 install tensorflow notebook
 # PEFT-related
-RUN pip3 install scipy bitsandbytes datasets accelerate loralib triton peft
+RUN pip3 install scipy bitsandbytes datasets accelerate loralib triton peft pytest
 RUN pip3 install streamlit
 
 # Install Rust
diff --git a/docker/run.sh b/docker/run.sh
index 72bc708db..9feb4522a 100755
--- a/docker/run.sh
+++ b/docker/run.sh
@@ -131,7 +131,7 @@ if [ -f "$hf_token_path" ]; then
 fi
 
 ssh_key_volume=""
-ssh_key_path="$HOME/.ssh/id_rsa"
+ssh_key_path="~/.ssh/id_rsa"
 if [ -f "$ssh_key_path" ] && [ -f "$ssh_key_path.pub" ]; then
   ssh_key_volume="-v $ssh_key_path:/root/.ssh/id_rsa -v $ssh_key_path.pub:/root/.ssh/id_rsa.pub"
 fi
diff --git a/python/flexflow/serve/serve.py b/python/flexflow/serve/serve.py
index 394869426..6db415aea 100644
--- a/python/flexflow/serve/serve.py
+++ b/python/flexflow/serve/serve.py
@@ -301,8 +301,9 @@ def download_hf_weights_if_needed(self) -> None:
         If not, or if the refresh_cache parameter is set to True, download new weights and convert them.
         """
 
-        # TODO: edit this to download the weights using snapshot_download and convert them to FlexFlow format without loading them to GPU
         def download_and_convert_llm_weights(model_name):
+            num_cores = os.cpu_count() -1 if os.cpu_count() > 1 else 1
+            snapshot_download(repo_id=model_name, allow_patterns="*.safetensors", max_workers=min(30, num_cores))
             hf_model = AutoModelForCausalLM.from_pretrained(
                 model_name,
                 trust_remote_code=True,