sanjay920 · tybalex · Apr 10, 2024 · Mar 17, 2024 · Mar 18, 2024 · Mar 20, 2024
diff --git a/.github/SECURITY.md b/.github/SECURITY.md
@@ -1,6 +1,6 @@
 # Reporting Security Issues
 
-To report a security issue, please use the GitHub Security Advisory ["Report a Vulnerability"](https://github.com/electron/electron/security/advisories/new) tab.
+To report a security issue, please use the GitHub Security Advisory ["Report a Vulnerability"](https://github.com/hiyouga/LLaMA-Factory/security/advisories/new) tab.
 
 We will send a response indicating the next steps in handling your report. After the initial reply to your report, the security team will keep you informed of the progress towards a fix and full announcement, and may ask for additional information or guidance.
 

diff --git a/CITATION.cff b/CITATION.cff
@@ -0,0 +1,37 @@
+cff-version: 1.2.0
+date-released: 2024-03
+message: "If you use this software, please cite it as below."
+authors:
+- family-names: "Zheng"
+  given-names: "Yaowei"
+- family-names: "Zhang"
+  given-names: "Richong"
+- family-names: "Zhang"
+  given-names: "Junhao"
+- family-names: "Ye"
+  given-names: "Yanhan"
+- family-names: "Luo"
+  given-names: "Zheyan"
+- family-names: "Ma"
+  given-names: "Yongqiang"
+title: "LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models"
+url: "https://arxiv.org/abs/2403.13372"
+preferred-citation:
+  type: article
+  authors:
+    - family-names: "Zheng"
+      given-names: "Yaowei"
+    - family-names: "Zhang"
+      given-names: "Richong"
+    - family-names: "Zhang"
+      given-names: "Junhao"
+    - family-names: "Ye"
+      given-names: "Yanhan"
+    - family-names: "Luo"
+      given-names: "Zheyan"
+    - family-names: "Ma"
+      given-names: "Yongqiang"
+  journal: "arXiv preprint arXiv:2403.13372"
+  title: "LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models"
+  url: "https://arxiv.org/abs/2403.13372"
+  year: 2024
diff --git a/README.md b/README.md
diff --git a/README_zh.md b/README_zh.md
diff --git a/assets/wechat.jpg b/assets/wechat.jpg
diff --git a/data/README.md b/data/README.md
@@ -34,6 +34,8 @@ If you are using a custom dataset, please provide your dataset definition in the
 
 Given above, you can use the custom dataset via specifying `--dataset dataset_name`.
 
+----
+
 Currently we support dataset in **alpaca** or **sharegpt** format, the dataset in alpaca format should follow the below format:
 
 ```json
@@ -84,6 +86,10 @@ For the preference datasets, the `response` column should be a string list whose
 }
 ```
 
+Remember to set `"ranking": true` for the preference datasets.
+
+----
+
 The dataset in sharegpt format should follow the below format:
 
 ```json

diff --git a/data/README_zh.md b/data/README_zh.md
@@ -34,6 +34,8 @@
 
 添加后可通过指定 `--dataset 数据集名称` 参数使用自定义数据集。
 
+----
+
 该项目目前支持两种格式的数据集：**alpaca** 和 **sharegpt**，其中 alpaca 格式的数据集按照以下方式组织：
 
 ```json
@@ -84,6 +86,10 @@
 }
 ```
 
+添加偏好数据集需要额外指定 `"ranking": true`。
+
+----
+
 而 sharegpt 格式的数据集按照以下方式组织：
 
 ```json

diff --git a/data/belle_multiturn/belle_multiturn.py b/data/belle_multiturn/belle_multiturn.py
@@ -1,7 +1,10 @@
+import os
 import json
 import datasets
 
 
+_HF_ENDPOINT = os.getenv("HF_ENDPOINT", "https://huggingface.co")
+
 _DESCRIPTION = "BELLE multiturn chat dataset."
 
 _CITATION = """\
@@ -13,9 +16,9 @@
 }
 """
 
-_HOMEPAGE = "https://huggingface.co/datasets/BelleGroup/multiturn_chat_0.8M"
+_HOMEPAGE = "{}/datasets/BelleGroup/multiturn_chat_0.8M".format(_HF_ENDPOINT)
 _LICENSE = "gpl-3.0"
-_URL = "https://huggingface.co/datasets/BelleGroup/multiturn_chat_0.8M/resolve/main/multiturn_chat_0.8M.json"
+_URL = "{}/datasets/BelleGroup/multiturn_chat_0.8M/resolve/main/multiturn_chat_0.8M.json".format(_HF_ENDPOINT)
 
 
 class BelleMultiturn(datasets.GeneratorBasedBuilder):

diff --git a/data/dataset_info.json b/data/dataset_info.json
@@ -461,6 +461,14 @@
     "ms_hub_url": "AI-ModelScope/WizardLM_evol_instruct_V2_196k",
     "formatting": "sharegpt"
   },
+  "glaive_toolcall_100k": {
+    "hf_hub_url": "hiyouga/glaive-function-calling-v2-sharegpt",
+    "formatting": "sharegpt",
+    "columns": {
+      "messages": "conversations",
+      "tools": "tools"
+    }
+  },
   "cosmopedia": {
     "hf_hub_url": "HuggingFaceTB/cosmopedia",
     "columns": {
@@ -536,6 +544,16 @@
     "file_sha1": "515b18ed497199131ddcc1af950345c11dc5c7fd",
     "ranking": true
   },
+  "orca_rlhf": {
+    "file_name": "orca_rlhf.json",
+    "file_sha1": "acc8f74d16fd1fc4f68e7d86eaa781c2c3f5ba8e",
+    "ranking": true,
+    "columns": {
+      "prompt": "question",
+      "response": "answer",
+      "system": "system"
+    }
+  },
   "nectar_rm": {
     "hf_hub_url": "mlinmg/RLAIF-Nectar",
     "ms_hub_url": "AI-ModelScope/RLAIF-Nectar",
@@ -587,12 +605,11 @@
     }
   },
   "pile": {
-    "hf_hub_url": "EleutherAI/pile",
+    "hf_hub_url": "monology/pile-uncopyrighted",
     "ms_hub_url": "AI-ModelScope/pile",
     "columns": {
       "prompt": "text"
-    },
-    "subset": "all"
+    }
   },
   "skypile": {
     "hf_hub_url": "Skywork/SkyPile-150B",

diff --git a/data/hh_rlhf_en/hh_rlhf_en.py b/data/hh_rlhf_en/hh_rlhf_en.py
@@ -1,13 +1,14 @@
+import os
 import json
 import datasets
 from typing import List
 
-
+_HF_ENDPOINT = os.getenv("HF_ENDPOINT", "https://huggingface.co")
 _DESCRIPTION = "Human preference data about helpfulness and harmlessness."
 _CITATION = ""
-_HOMEPAGE = "https://huggingface.co/datasets/Anthropic/hh-rlhf"
+_HOMEPAGE = "{}/datasets/Anthropic/hh-rlhf".format(_HF_ENDPOINT)
 _LICENSE = "mit"
-_URL = "https://huggingface.co/datasets/Anthropic/hh-rlhf/resolve/main/"
+_URL = "{}/datasets/Anthropic/hh-rlhf/resolve/main/".format(_HF_ENDPOINT)
 _URLS = {
     "train": [
         _URL + "harmless-base/train.jsonl.gz",

diff --git a/data/orca_rlhf.json b/data/orca_rlhf.json
diff --git a/data/ultra_chat/ultra_chat.py b/data/ultra_chat/ultra_chat.py
@@ -1,7 +1,9 @@
+import os
 import json
 import datasets
 from typing import List
 
+_HF_ENDPOINT = os.getenv("HF_ENDPOINT", "https://huggingface.co")
 
 _DESCRIPTION = "UltraChat: Large-scale, Informative, and Diverse Multi-round Dialogue Data."
 
@@ -16,9 +18,9 @@
 }
 """
 
-_HOMEPAGE = "https://huggingface.co/datasets/stingning/ultrachat"
+_HOMEPAGE = "{}/datasets/stingning/ultrachat".format(_HF_ENDPOINT)
 _LICENSE = "cc-by-nc-4.0"
-_BASE_DATA_URL = "https://huggingface.co/datasets/stingning/ultrachat/resolve/main/train_{idx}.jsonl"
+_BASE_DATA_URL = "{}/datasets/stingning/ultrachat/resolve/main/train_{{idx}}.jsonl".format(_HF_ENDPOINT)
 
 
 class UltraChat(datasets.GeneratorBasedBuilder):

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -10,6 +10,8 @@ services:
       - ./hf_cache:/root/.cache/huggingface/
       - ./data:/app/data
       - ./output:/app/output
+    environment:
+      - CUDA_VISIBLE_DEVICES=0
     ports:
       - "7860:7860"
     ipc: host

diff --git a/examples/README.md b/examples/README.md
@@ -0,0 +1,43 @@
+We provide diverse examples about fine-tuning LLMs.
+
+```
+examples/
+├── lora_single_gpu/
+│   ├── pretrain.sh: Do pre-training
+│   ├── sft.sh: Do supervised fine-tuning
+│   ├── reward.sh: Do reward modeling
+│   ├── ppo.sh: Do PPO training
+│   ├── dpo.sh: Do DPO training
+│   ├── orpo.sh: Do ORPO training
+│   ├── prepare.sh: Save tokenized dataset
+│   └── predict.sh: Do batch predict
+├── qlora_single_gpu/
+│   ├── bitsandbytes.sh: Fine-tune 4/8-bit BNB models
+│   ├── gptq.sh: Fine-tune 4/8-bit GPTQ models
+│   ├── awq.sh: Fine-tune 4-bit AWQ models
+│   └── aqlm.sh: Fine-tune 2-bit AQLM models
+├── lora_multi_gpu/
+│   ├── single_node.sh: Fine-tune model with Accelerate on single node
+│   └── multi_node.sh: Fine-tune model with Accelerate on multiple nodes
+├── full_multi_gpu/
+│   ├── single_node.sh: Fine-tune model with DeepSpeed on single node
+│   └── multi_node.sh: Fine-tune model with DeepSpeed on multiple nodes
+├── merge_lora/
+│   ├── merge.sh: Merge LoRA weights into the pre-trained models
+│   └── quantize.sh: Quantize fine-tuned model with AutoGPTQ
+├── inference/
+│   ├── cli_demo.sh: Launch a command line interface
+│   ├── api_demo.sh: Launch an OpenAI-style API
+│   ├── web_demo.sh: Launch a web interface
+│   └── evaluate.sh: Evaluate model on the MMLU benchmark
+└── extras/
+    ├── galore/
+    │   └── sft.sh: Fine-tune model with GaLore
+    ├── loraplus/
+    │   └── sft.sh: Fine-tune model with LoRA+
+    ├── llama_pro/
+    │   ├── expand.sh: Expand layers in the model
+    │   └── sft.sh: Fine-tune expanded model
+    └── fsdp_qlora/
+        └── sft.sh: Fine-tune quantized model with FSDP
+```
diff --git a/examples/README_zh.md b/examples/README_zh.md
@@ -0,0 +1,43 @@
+我们提供了多样化的示例脚本。
+
+```
+examples/
+├── lora_single_gpu/
+│   ├── pretrain.sh: 进行预训练
+│   ├── sft.sh: 进行指令监督微调
+│   ├── reward.sh: 进行奖励模型训练
+│   ├── ppo.sh: 进行 PPO 训练
+│   ├── dpo.sh: 进行 DPO 训练
+│   ├── orpo.sh: 进行 ORPO 训练
+│   ├── prepare.sh: 保存预处理后的数据集
+│   └── predict.sh: 进行批量预测
+├── qlora_single_gpu/
+│   ├── bitsandbytes.sh: 微调 4/8 比特 BNB 模型
+│   ├── gptq.sh: 微调 4/8 比特 GPTQ 模型
+│   ├── awq.sh: 微调 4 比特 AWQ 模型
+│   └── aqlm.sh: 微调 2 比特 AQLM 模型
+├── lora_multi_gpu/
+│   ├── single_node.sh: 使用 Accelerate 进行单节点训练
+│   └── multi_node.sh: 使用 Accelerate 进行多节点训练
+├── full_multi_gpu/
+│   ├── single_node.sh: 使用 DeepSpeed 进行单节点训练
+│   └── multi_node.sh: 使用 DeepSpeed 进行多节点训练
+├── merge_lora/
+│   ├── merge.sh: 将 LoRA 权重合并到预训练模型中
+│   └── quantize.sh: 使用 AutoGPTQ 量化模型
+├── inference/
+│   ├── cli_demo.sh: 启动命令行推理接口
+│   ├── api_demo.sh: 启动 OpenAI 风格 API
+│   ├── web_demo.sh: 启动浏览器推理接口
+│   └── evaluate.sh: 在 MMLU 数据集上评测模型
+└── extras/
+    ├── galore/
+    │   └── sft.sh: 使用 GaLore 训练模型
+    ├── loraplus/
+    │   └── sft.sh: 使用 LoRA+ 训练模型
+    ├── llama_pro/
+    │   ├── expand.sh: 扩展模型中的层
+    │   └── sft.sh: 训练扩展后的模型
+    └── fsdp_qlora/
+        └── sft.sh: 使用 FSDP 微调量化模型
+```
diff --git a/examples/accelerate/fsdp_config.yaml b/examples/accelerate/fsdp_config.yaml
@@ -0,0 +1,25 @@
+compute_environment: LOCAL_MACHINE
+debug: false
+distributed_type: FSDP
+downcast_bf16: 'no'
+fsdp_config:
+  fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
+  fsdp_backward_prefetch: BACKWARD_PRE
+  fsdp_cpu_ram_efficient_loading: true
+  fsdp_forward_prefetch: false
+  fsdp_offload_params: true
+  fsdp_sharding_strategy: FULL_SHARD
+  fsdp_state_dict_type: FULL_STATE_DICT
+  fsdp_sync_module_states: true
+  fsdp_use_orig_params: false
+machine_rank: 0
+main_training_function: main
+mixed_precision: fp16
+num_machines: 1 # the number of nodes
+num_processes: 2 # the number of GPUs in all nodes
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
diff --git a/examples/lora_multi_gpu/master_config.yaml → examples/accelerate/master_config.yaml b/examples/lora_multi_gpu/master_config.yaml → examples/accelerate/master_config.yaml
@@ -8,8 +8,8 @@ main_process_ip: 192.168.0.1
 main_process_port: 29555
 main_training_function: main
 mixed_precision: fp16
-num_machines: 2
-num_processes: 16
+num_machines: 2 # the number of nodes
+num_processes: 16 # the number of GPUs in all nodes
 rdzv_backend: static
 same_network: true
 tpu_env: []

diff --git a/examples/lora_multi_gpu/single_config.yaml → examples/accelerate/single_config.yaml b/examples/lora_multi_gpu/single_config.yaml → examples/accelerate/single_config.yaml
@@ -6,8 +6,8 @@ gpu_ids: all
 machine_rank: 0
 main_training_function: main
 mixed_precision: fp16
-num_machines: 1
-num_processes: 4
+num_machines: 1 # the number of nodes
+num_processes: 4 # the number of GPUs in all nodes
 rdzv_backend: static
 same_network: true
 tpu_env: []

diff --git a/examples/lora_multi_gpu/slave_config.yaml → examples/accelerate/slave_config.yaml b/examples/lora_multi_gpu/slave_config.yaml → examples/accelerate/slave_config.yaml
@@ -8,8 +8,8 @@ main_process_ip: 192.168.0.1
 main_process_port: 29555
 main_training_function: main
 mixed_precision: fp16
-num_machines: 2
-num_processes: 16
+num_machines: 2 # the number of nodes
+num_processes: 16 # the number of GPUs in all nodes
 rdzv_backend: static
 same_network: true
 tpu_env: []

diff --git a/examples/full_multi_gpu/ds_z2_config.json → examples/deepspeed/ds_z2_config.json b/examples/full_multi_gpu/ds_z2_config.json → examples/deepspeed/ds_z2_config.json
diff --git a/.../full_multi_gpu/ds_z2_offload_config.json → examples/deepspeed/ds_z2_offload_config.json b/.../full_multi_gpu/ds_z2_offload_config.json → examples/deepspeed/ds_z2_offload_config.json
diff --git a/examples/full_multi_gpu/ds_z3_config.json → examples/deepspeed/ds_z3_config.json b/examples/full_multi_gpu/ds_z3_config.json → examples/deepspeed/ds_z3_config.json
diff --git a/.../full_multi_gpu/ds_z3_offload_config.json → examples/deepspeed/ds_z3_offload_config.json b/.../full_multi_gpu/ds_z3_offload_config.json → examples/deepspeed/ds_z3_offload_config.json
diff --git a/examples/extras/galore/adamw_8bit_bf16.sh → examples/extras/fsdp_qlora/sft.sh b/examples/extras/galore/adamw_8bit_bf16.sh → examples/extras/fsdp_qlora/sft.sh
@@ -1,22 +1,28 @@
 #!/bin/bash
 
-CUDA_VISIBLE_DEVICES=0 python ../../../src/train_bash.py \
+pip install "transformers>=4.39.1"
+pip install "accelerate>=0.28.0"
+pip install "bitsandbytes>=0.43.0"
+
+CUDA_VISIBLE_DEVICES=0,1 accelerate launch \
+    --config_file ../../accelerate/fsdp_config.yaml \
+    ../../../src/train_bash.py \
     --stage sft \
     --do_train \
-    --model_name_or_path meta-llama/Llama-2-7b-hf \
+    --model_name_or_path meta-llama/Llama-2-70b-hf \
     --dataset alpaca_gpt4_en,glaive_toolcall \
     --dataset_dir ../../../data \
     --template default \
-    --finetuning_type full \
-    --optim adamw_8bit \
-    --output_dir ../../../saves/LLaMA2-7B/galore/sft \
+    --finetuning_type lora \
+    --lora_target q_proj,v_proj \
+    --output_dir ../../../saves/LLaMA2-70B/lora/sft \
     --overwrite_cache \
     --overwrite_output_dir \
     --cutoff_len 1024 \
     --preprocessing_num_workers 16 \
     --per_device_train_batch_size 1 \
     --per_device_eval_batch_size 1 \
-    --gradient_accumulation_steps 1 \
+    --gradient_accumulation_steps 4 \
     --lr_scheduler_type cosine \
     --logging_steps 10 \
     --warmup_steps 20 \
@@ -28,5 +34,7 @@ CUDA_VISIBLE_DEVICES=0 python ../../../src/train_bash.py \
     --num_train_epochs 3.0 \
     --max_samples 3000 \
     --val_size 0.1 \
+    --ddp_timeout 180000000 \
+    --quantization_bit 4 \
     --plot_loss \
-    --pure_bf16
+    --fp16
-Original file line number
+Diff line change
@@ Expand Up @@
     Given above, you can use the custom dataset via specifying `--dataset dataset_name`.
+    ----
     Currently we support dataset in **alpaca** or **sharegpt** format, the dataset in alpaca format should follow the below format:
     ```json
@@ Expand Down Expand Up @@
     }
     ```
+    Remember to set `"ranking": true` for the preference datasets.
+    ----
     The dataset in sharegpt format should follow the below format:
     ```json
@@ Expand Down @@