diff --git a/finetune.py b/finetune.py index d696e5535..c76edb508 100644 --- a/finetune.py +++ b/finetune.py @@ -226,8 +226,6 @@ def train( NOTE: for current pytorch 2.0, flash attention requires installing cuda 11.7 via https://developer.nvidia.com/cuda-11-7-0-download-archive?target_os=Linux&target_arch=x86_64&Distribution=Ubuntu&target_version=20.04&target_type=runfile_local and then when running, to avoid installing driver, docs, samples, just install toolkit. Then when pip installing flash attention do: CUDA_HOME=/usr/local/cuda-11.7 pip install flash-attn""") - from llama_flash_attn_monkey_patch import replace_llama_attn_with_flash_attn - replace_llama_attn_with_flash_attn() assert ( base_model ), "Please specify a --base_model, e.g. --base_model='decapoda-research/llama-7b-hf'" diff --git a/requirements.txt b/requirements.txt index 9f9f1e643..8c943d601 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,13 +1,13 @@ # for generate (gradio server) and finetune -datasets==2.11.0 +datasets==2.12.0 sentencepiece==0.1.97 accelerate==0.18.0 gradio==3.27.0 -huggingface_hub==0.13.4 +huggingface_hub==0.14.1 appdirs==1.4.4 fire==0.5.0 docutils==0.19 -torch==2.0.0 +torch==2.0.1 evaluate==0.4.0 rouge_score==0.1.2 sacrebleu==2.3.1