Compare commits
341 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b2949b88e9 | ||
|
|
538c79fd8f | ||
|
|
437cc20be6 | ||
|
|
2ac972d6e7 | ||
|
|
4d7f0fbb7a | ||
|
|
40e3d3fbdd | ||
|
|
096677b989 | ||
|
|
7940b968ae | ||
|
|
36a4224bf5 | ||
|
|
d4d36e157c | ||
|
|
c4f5e49d0d | ||
|
|
8e518d6c62 | ||
|
|
79165100e5 | ||
|
|
fc82acbbd8 | ||
|
|
aead3ca8e5 | ||
|
|
b12679ad59 | ||
|
|
8061cb5671 | ||
|
|
0a7e5f2f57 | ||
|
|
812d2c25a7 | ||
|
|
51795e8db1 | ||
|
|
2c011060b1 | ||
|
|
a8c7531250 | ||
|
|
88c34d26a8 | ||
|
|
12d666a63c | ||
|
|
304a2efec8 | ||
|
|
322331df51 | ||
|
|
ba0da83031 | ||
|
|
0a82e15e7c | ||
|
|
6670b36c49 | ||
|
|
7a1d13aae2 | ||
|
|
86a048128b | ||
|
|
fe1a3b1367 | ||
|
|
84ff56c3a0 | ||
|
|
483ed64b43 | ||
|
|
dd4619e9f3 | ||
|
|
905815d878 | ||
|
|
ba72e08901 | ||
|
|
e4972c8fc4 | ||
|
|
5f5f948806 | ||
|
|
2892e5d42a | ||
|
|
542a5d15ef | ||
|
|
b1c791fb0d | ||
|
|
7589123465 | ||
|
|
f94b54b776 | ||
|
|
1e1b8899f5 | ||
|
|
7b02c83399 | ||
|
|
8f1ba07b30 | ||
|
|
1ce400bddf | ||
|
|
6bc0ec63c7 | ||
|
|
25d316b1a0 | ||
|
|
2bcd5b2b73 | ||
|
|
436afcba57 | ||
|
|
db47c53486 | ||
|
|
4efe56fd68 | ||
|
|
d54313fcf9 | ||
|
|
382f096475 | ||
|
|
0ccc76392e | ||
|
|
e2cfcb0a5f | ||
|
|
b530a798c1 | ||
|
|
fdf38b70a0 | ||
|
|
1a78b675be | ||
|
|
9b1008912c | ||
|
|
18241f4ed8 | ||
|
|
223bbd9930 | ||
|
|
9dadff90bb | ||
|
|
827a929f1d | ||
|
|
e508519e0a | ||
|
|
47892418ad | ||
|
|
2aeae4b88b | ||
|
|
c213f2a9a9 | ||
|
|
333f4a69bb | ||
|
|
172600d432 | ||
|
|
4ce4172c87 | ||
|
|
400ae144a4 | ||
|
|
0a1b6ca5a7 | ||
|
|
05ef89cfcc | ||
|
|
6d9d8b92ca | ||
|
|
3f7f1daa33 | ||
|
|
8061e92d07 | ||
|
|
0c811a7653 | ||
|
|
f6ac3796ca | ||
|
|
c1394e7dfc | ||
|
|
ebab655683 | ||
|
|
3d74f21738 | ||
|
|
8493753fab | ||
|
|
0f626a2145 | ||
|
|
5100c290c4 | ||
|
|
4bde37e7c8 | ||
|
|
e3b3a722de | ||
|
|
b9e167e6ca | ||
|
|
1ebd1e50e7 | ||
|
|
14316f6583 | ||
|
|
8e4ab2f7d0 | ||
|
|
196068fa19 | ||
|
|
da2295f8c8 | ||
|
|
ab0741b5a6 | ||
|
|
6aec446940 | ||
|
|
50c71dd29f | ||
|
|
5c9da798b5 | ||
|
|
3d1b0e1864 | ||
|
|
45becd2a45 | ||
|
|
8f1197de7e | ||
|
|
25de4ce56a | ||
|
|
d0597897bf | ||
|
|
4674f3baa7 | ||
|
|
2f5f6722cf | ||
|
|
7ef3788ff4 | ||
|
|
f9aa74715a | ||
|
|
9b187b274c | ||
|
|
68ed89f351 | ||
|
|
342d7da8d7 | ||
|
|
6eda42eb7c | ||
|
|
e9fe8815be | ||
|
|
9381fecca7 | ||
|
|
efa9140577 | ||
|
|
b1b18b2c5a | ||
|
|
37bcbf72b4 | ||
|
|
99125c8825 | ||
|
|
182b974786 | ||
|
|
7a4a6a5522 | ||
|
|
2383e5440c | ||
|
|
1fea91736a | ||
|
|
09d9fb28f9 | ||
|
|
57c6eabf83 | ||
|
|
33d440b577 | ||
|
|
ce8200ad98 | ||
|
|
2cedb59bee | ||
|
|
dd0b85580e | ||
|
|
cd4dad846b | ||
|
|
a11a04a24f | ||
|
|
eb99999ca8 | ||
|
|
ea58cf111e | ||
|
|
2d95127c33 | ||
|
|
57fcdca336 | ||
|
|
3d88589c0f | ||
|
|
dfd153cc81 | ||
|
|
7641a214d8 | ||
|
|
3cef844079 | ||
|
|
4dcd47100d | ||
|
|
a412b4ed4a | ||
|
|
544a6259b6 | ||
|
|
c501f377dd | ||
|
|
cb8b8f40cd | ||
|
|
70bed8ad8f | ||
|
|
51f776ae2a | ||
|
|
697bc20941 | ||
|
|
1480e3a88f | ||
|
|
19029d5b0f | ||
|
|
7773ac0ead | ||
|
|
23b881bff1 | ||
|
|
10a6c395bb | ||
|
|
f9a7732a1f | ||
|
|
c37582af02 | ||
|
|
ece67f8c7f | ||
|
|
e1838e76fe | ||
|
|
2eede9ffd6 | ||
|
|
a6f6b406b3 | ||
|
|
279439abbe | ||
|
|
13117b69d7 | ||
|
|
5d03ac642d | ||
|
|
5062ee547e | ||
|
|
59817c27e3 | ||
|
|
759bee48d2 | ||
|
|
514ffafc12 | ||
|
|
8b2a735c14 | ||
|
|
10d59e9e4a | ||
|
|
058ed5e607 | ||
|
|
110c2ce2a5 | ||
|
|
c425436676 | ||
|
|
266fe908e3 | ||
|
|
dbd905438b | ||
|
|
d64c87f928 | ||
|
|
29eebef696 | ||
|
|
7bfbcb1fe3 | ||
|
|
9b210cf4b3 | ||
|
|
f74e640565 | ||
|
|
d1d08d066a | ||
|
|
6be321b5da | ||
|
|
3c792174db | ||
|
|
9aeb88c426 | ||
|
|
00e2a272ef | ||
|
|
5142349661 | ||
|
|
0e3cc52327 | ||
|
|
6c1db2d012 | ||
|
|
12c51655ce | ||
|
|
36be12a3b7 | ||
|
|
21fac4c98c | ||
|
|
83404c4fa9 | ||
|
|
12f852b8d4 | ||
|
|
a88873116a | ||
|
|
7cfcd69c64 | ||
|
|
a5eabbe933 | ||
|
|
aa25716a5d | ||
|
|
94c8219575 | ||
|
|
ad24a2a0c9 | ||
|
|
c05027d14a | ||
|
|
5420905a2e | ||
|
|
03f2e3284a | ||
|
|
d2bb1b3a6b | ||
|
|
35c4a2c212 | ||
|
|
1e4010a1fb | ||
|
|
1451297c78 | ||
|
|
0b99b13786 | ||
|
|
f5edbf2b49 | ||
|
|
ab6dc0ea30 | ||
|
|
79d34ce0f3 | ||
|
|
1d2e372a8e | ||
|
|
f6a53d83c8 | ||
|
|
4ec56dd958 | ||
|
|
ba06eb65ca | ||
|
|
be716972fe | ||
|
|
719585a128 | ||
|
|
348f29aa50 | ||
|
|
c8fe3f544b | ||
|
|
0f1ad7140f | ||
|
|
233e167f68 | ||
|
|
1d341dcd83 | ||
|
|
d16561e7a4 | ||
|
|
f8e219dc81 | ||
|
|
3365cc8cf0 | ||
|
|
3a5e68b7d9 | ||
|
|
0cb596fee1 | ||
|
|
b3b5b530d1 | ||
|
|
9225c15c88 | ||
|
|
abd9fed445 | ||
|
|
44cda2eece | ||
|
|
8397808d1d | ||
|
|
9e1bd6420d | ||
|
|
619264c854 | ||
|
|
1ebac62e3d | ||
|
|
ce9bdb3509 | ||
|
|
0c8d6369ac | ||
|
|
bee796f6b5 | ||
|
|
9f6349a333 | ||
|
|
171a029c5e | ||
|
|
eaefaa0fe0 | ||
|
|
d301f0a64b | ||
|
|
0a1578e4e3 | ||
|
|
a4167fd925 | ||
|
|
42084e08ae | ||
|
|
9d23f5dc89 | ||
|
|
5978427ae0 | ||
|
|
c7c216069c | ||
|
|
cde9d1b917 | ||
|
|
96213f04b0 | ||
|
|
7ecea08b9b | ||
|
|
191971865d | ||
|
|
ff4f587dd9 | ||
|
|
de728d0371 | ||
|
|
d08e09642d | ||
|
|
351493b183 | ||
|
|
86ab47e121 | ||
|
|
6dd6b3e396 | ||
|
|
5f1418a68b | ||
|
|
7b97a79efc | ||
|
|
ce4f653121 | ||
|
|
b053c6454e | ||
|
|
ebf0f4a77c | ||
|
|
efa808069a | ||
|
|
b5c5283dd6 | ||
|
|
b638c65519 | ||
|
|
d4d471450f | ||
|
|
3144bdec2c | ||
|
|
c6d6c4c209 | ||
|
|
f5f1589662 | ||
|
|
276f2cb24e | ||
|
|
952b785bb3 | ||
|
|
72dd676208 | ||
|
|
dfaa31e991 | ||
|
|
86556b1c74 | ||
|
|
0c80751e87 | ||
|
|
9338f878a3 | ||
|
|
fde3d91242 | ||
|
|
19adfb88a9 | ||
|
|
daaafa900a | ||
|
|
0dcc9e0bca | ||
|
|
aeec78b35c | ||
|
|
c991654cb4 | ||
|
|
f328413646 | ||
|
|
106a0104da | ||
|
|
5486ea09e3 | ||
|
|
31bbbb6d13 | ||
|
|
1a77de82fa | ||
|
|
7468f2535c | ||
|
|
38e4f22605 | ||
|
|
2bc2fe7b5e | ||
|
|
6d0140d8a0 | ||
|
|
7856f98965 | ||
|
|
e25ddef08c | ||
|
|
95a4589bbf | ||
|
|
566d71b7a9 | ||
|
|
6030a4a720 | ||
|
|
5dc0cb94d4 | ||
|
|
325dafcbb0 | ||
|
|
1a8a8b8651 | ||
|
|
61a495cb1e | ||
|
|
75866aa020 | ||
|
|
9e4fda326d | ||
|
|
1131ddfaff | ||
|
|
9f437b5c43 | ||
|
|
0cc03d3f05 | ||
|
|
04fc2f78bf | ||
|
|
3ac333fc6a | ||
|
|
a246ac1914 | ||
|
|
48ceac845c | ||
|
|
b1986a06b9 | ||
|
|
43d134ba29 | ||
|
|
1348f7d860 | ||
|
|
f6530222f7 | ||
|
|
a74a7585e0 | ||
|
|
5bf0cca2b8 | ||
|
|
755b6511ff | ||
|
|
35621c6089 | ||
|
|
38b59664e6 | ||
|
|
933a084999 | ||
|
|
c1510d19c7 | ||
|
|
2074cf99fb | ||
|
|
b12176d818 | ||
|
|
117b67ea30 | ||
|
|
03e20bb5c6 | ||
|
|
0c4a1381a4 | ||
|
|
9e14501edb | ||
|
|
1dc963caa6 | ||
|
|
85726c91ce | ||
|
|
40211db275 | ||
|
|
e7f13098c6 | ||
|
|
61eb3a3d46 | ||
|
|
be0a807e8c | ||
|
|
52d402e2a9 | ||
|
|
c5a46f9113 | ||
|
|
00e17a377c | ||
|
|
9abd83adb1 | ||
|
|
f0d2afcf90 | ||
|
|
1aba442bcd | ||
|
|
d764cd8736 | ||
|
|
526111a303 | ||
|
|
b8364046df | ||
|
|
1f617c6e08 | ||
|
|
a6858a36c0 | ||
|
|
6198121923 | ||
|
|
b0efebf853 |
2
.github/SECURITY.md
vendored
2
.github/SECURITY.md
vendored
@@ -1,6 +1,6 @@
|
|||||||
# Reporting Security Issues
|
# Reporting Security Issues
|
||||||
|
|
||||||
To report a security issue, please use the GitHub Security Advisory ["Report a Vulnerability"](https://github.com/electron/electron/security/advisories/new) tab.
|
To report a security issue, please use the GitHub Security Advisory ["Report a Vulnerability"](https://github.com/hiyouga/LLaMA-Factory/security/advisories/new) tab.
|
||||||
|
|
||||||
We will send a response indicating the next steps in handling your report. After the initial reply to your report, the security team will keep you informed of the progress towards a fix and full announcement, and may ask for additional information or guidance.
|
We will send a response indicating the next steps in handling your report. After the initial reply to your report, the security team will keep you informed of the progress towards a fix and full announcement, and may ask for additional information or guidance.
|
||||||
|
|
||||||
|
|||||||
@@ -11,4 +11,4 @@ RUN pip install -e .[deepspeed,metrics,bitsandbytes,qwen]
|
|||||||
VOLUME [ "/root/.cache/huggingface/", "/app/data", "/app/output" ]
|
VOLUME [ "/root/.cache/huggingface/", "/app/data", "/app/output" ]
|
||||||
EXPOSE 7860
|
EXPOSE 7860
|
||||||
|
|
||||||
CMD [ "python", "src/train_web.py" ]
|
CMD [ "llamafactory-cli", "webui" ]
|
||||||
|
|||||||
619
README.md
619
README.md
@@ -5,7 +5,7 @@
|
|||||||
[](https://github.com/hiyouga/LLaMA-Factory/commits/main)
|
[](https://github.com/hiyouga/LLaMA-Factory/commits/main)
|
||||||
[](https://pypi.org/project/llmtuner/)
|
[](https://pypi.org/project/llmtuner/)
|
||||||
[](https://pypi.org/project/llmtuner/)
|
[](https://pypi.org/project/llmtuner/)
|
||||||
[](#projects-using-llama-factory)
|
[](#projects-using-llama-factory)
|
||||||
[](https://github.com/hiyouga/LLaMA-Factory/pulls)
|
[](https://github.com/hiyouga/LLaMA-Factory/pulls)
|
||||||
[](https://discord.gg/rKfvV9r9FK)
|
[](https://discord.gg/rKfvV9r9FK)
|
||||||
[](https://twitter.com/llamafactory_ai)
|
[](https://twitter.com/llamafactory_ai)
|
||||||
@@ -13,6 +13,8 @@
|
|||||||
[](https://modelscope.cn/studios/hiyouga/LLaMA-Board)
|
[](https://modelscope.cn/studios/hiyouga/LLaMA-Board)
|
||||||
[](https://colab.research.google.com/drive/1eRTPn37ltBbYsISy9Aw2NuI2Aq5CQrD9?usp=sharing)
|
[](https://colab.research.google.com/drive/1eRTPn37ltBbYsISy9Aw2NuI2Aq5CQrD9?usp=sharing)
|
||||||
|
|
||||||
|
[](https://trendshift.io/repositories/4535)
|
||||||
|
|
||||||
👋 Join our [WeChat](assets/wechat.jpg).
|
👋 Join our [WeChat](assets/wechat.jpg).
|
||||||
|
|
||||||
\[ English | [中文](README_zh.md) \]
|
\[ English | [中文](README_zh.md) \]
|
||||||
@@ -43,17 +45,17 @@ Choose your path:
|
|||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
- **Various models**: LLaMA, Mistral, Mixtral-MoE, Qwen, Yi, Gemma, Baichuan, ChatGLM, Phi, etc.
|
- **Various models**: LLaMA, LLaVA, Mistral, Mixtral-MoE, Qwen, Yi, Gemma, Baichuan, ChatGLM, Phi, etc.
|
||||||
- **Integrated methods**: (Continuous) pre-training, supervised fine-tuning, reward modeling, PPO and DPO.
|
- **Integrated methods**: (Continuous) pre-training, (multimodal) supervised fine-tuning, reward modeling, PPO, DPO and ORPO.
|
||||||
- **Scalable resources**: 32-bit full-tuning, 16-bit freeze-tuning, 16-bit LoRA and 2/4/8-bit QLoRA via AQLM/AWQ/GPTQ/LLM.int8.
|
- **Scalable resources**: 32-bit full-tuning, 16-bit freeze-tuning, 16-bit LoRA and 2/4/8-bit QLoRA via AQLM/AWQ/GPTQ/LLM.int8.
|
||||||
- **Advanced algorithms**: GaLore, DoRA, LongLoRA, LLaMA Pro, LoRA+, LoftQ and Agent tuning.
|
- **Advanced algorithms**: GaLore, BAdam, DoRA, LongLoRA, LLaMA Pro, Mixture-of-Depths, LoRA+, LoftQ and Agent tuning.
|
||||||
- **Practical tricks**: FlashAttention-2, Unsloth, RoPE scaling, NEFTune and rsLoRA.
|
- **Practical tricks**: FlashAttention-2, Unsloth, RoPE scaling, NEFTune and rsLoRA.
|
||||||
- **Experiment monitors**: LlamaBoard, TensorBoard, Wandb, MLflow, etc.
|
- **Experiment monitors**: LlamaBoard, TensorBoard, Wandb, MLflow, etc.
|
||||||
- **Faster inference**: OpenAI-style API, Gradio UI and CLI with vLLM worker.
|
- **Faster inference**: OpenAI-style API, Gradio UI and CLI with vLLM worker.
|
||||||
|
|
||||||
## Benchmark
|
## Benchmark
|
||||||
|
|
||||||
Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ptuning), LLaMA-Factory's LoRA tuning offers up to **3.7 times faster** training speed with a better Rouge score on the advertising text generation task. By leveraging 4-bit quantization technique, LLaMA-Factory's QLoRA further improves the efficiency regarding the GPU memory.
|
Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ptuning), LLaMA Factory's LoRA tuning offers up to **3.7 times faster** training speed with a better Rouge score on the advertising text generation task. By leveraging 4-bit quantization technique, LLaMA Factory's QLoRA further improves the efficiency regarding the GPU memory.
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
@@ -62,51 +64,67 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
|
|||||||
- **Training Speed**: the number of training samples processed per second during the training. (bs=4, cutoff_len=1024)
|
- **Training Speed**: the number of training samples processed per second during the training. (bs=4, cutoff_len=1024)
|
||||||
- **Rouge Score**: Rouge-2 score on the development set of the [advertising text generation](https://aclanthology.org/D19-1321.pdf) task. (bs=4, cutoff_len=1024)
|
- **Rouge Score**: Rouge-2 score on the development set of the [advertising text generation](https://aclanthology.org/D19-1321.pdf) task. (bs=4, cutoff_len=1024)
|
||||||
- **GPU Memory**: Peak GPU memory usage in 4-bit quantized training. (bs=1, cutoff_len=1024)
|
- **GPU Memory**: Peak GPU memory usage in 4-bit quantized training. (bs=1, cutoff_len=1024)
|
||||||
- We adopt `pre_seq_len=128` for ChatGLM's P-Tuning and `lora_rank=32` for LLaMA-Factory's LoRA tuning.
|
- We adopt `pre_seq_len=128` for ChatGLM's P-Tuning and `lora_rank=32` for LLaMA Factory's LoRA tuning.
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
## Changelog
|
## Changelog
|
||||||
|
|
||||||
[24/03/21] Our paper "[LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models](https://arxiv.org/abs/2403.13372)" is available at arXiv!
|
[24/05/14] We supported training and inference on the Ascend NPU devices. Check [installation](#installation) section for details.
|
||||||
|
|
||||||
[24/03/20] We supported **FSDP+QLoRA** that fine-tunes a 70B model on 2x24GB GPUs. See `examples/fsdp_qlora` for usage.
|
[24/05/13] We supported fine-tuning the **Yi-1.5** series models.
|
||||||
|
|
||||||
[24/03/13] We supported **[LoRA+](https://arxiv.org/abs/2402.12354)**. See `examples/extras/loraplus` for usage.
|
[24/04/26] We supported fine-tuning the **LLaVA-1.5** multimodal LLMs. See [examples](examples/README.md) for usage.
|
||||||
|
|
||||||
[24/03/07] We supported gradient low-rank projection (**[GaLore](https://arxiv.org/abs/2403.03507)**) algorithm. See `examples/extras/galore` for usage.
|
|
||||||
|
|
||||||
<details><summary>Full Changelog</summary>
|
<details><summary>Full Changelog</summary>
|
||||||
|
|
||||||
[24/03/07] We integrated **[vLLM](https://github.com/vllm-project/vllm)** for faster and concurrent inference. Try `--infer_backend vllm` to enjoy **270%** inference speed. (LoRA is not yet supported, merge it first.)
|
[24/04/22] We provided a **[Colab notebook](https://colab.research.google.com/drive/1eRTPn37ltBbYsISy9Aw2NuI2Aq5CQrD9?usp=sharing)** for fine-tuning the Llama-3 model on a free T4 GPU. Two Llama-3-derived models fine-tuned using LLaMA Factory are available at Hugging Face, check [Llama3-8B-Chinese-Chat](https://huggingface.co/shenzhi-wang/Llama3-8B-Chinese-Chat) and [Llama3-Chinese](https://huggingface.co/zhichen/Llama3-Chinese) for details.
|
||||||
|
|
||||||
[24/02/28] We supported weight-decomposed LoRA (**[DoRA](https://arxiv.org/abs/2402.09353)**). Try `--use_dora` to activate DoRA training.
|
[24/04/21] We supported **[Mixture-of-Depths](https://arxiv.org/abs/2404.02258)** according to [AstraMindAI's implementation](https://github.com/astramind-ai/Mixture-of-depths). See [examples](examples/README.md) for usage.
|
||||||
|
|
||||||
[24/02/15] We supported **block expansion** proposed by [LLaMA Pro](https://github.com/TencentARC/LLaMA-Pro). See `examples/extras/llama_pro` for usage.
|
[24/04/16] We supported **[BAdam](https://arxiv.org/abs/2404.02827)**. See [examples](examples/README.md) for usage.
|
||||||
|
|
||||||
|
[24/04/16] We supported **[unsloth](https://github.com/unslothai/unsloth)**'s long-sequence training (Llama-2-7B-56k within 24GB). It achieves **117%** speed and **50%** memory compared with FlashAttention-2, more benchmarks can be found in [this page](https://github.com/hiyouga/LLaMA-Factory/wiki/Performance-comparison).
|
||||||
|
|
||||||
|
[24/03/31] We supported **[ORPO](https://arxiv.org/abs/2403.07691)**. See [examples](examples/README.md) for usage.
|
||||||
|
|
||||||
|
[24/03/21] Our paper "[LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models](https://arxiv.org/abs/2403.13372)" is available at arXiv!
|
||||||
|
|
||||||
|
[24/03/20] We supported **FSDP+QLoRA** that fine-tunes a 70B model on 2x24GB GPUs. See [examples](examples/README.md) for usage.
|
||||||
|
|
||||||
|
[24/03/13] We supported **[LoRA+](https://arxiv.org/abs/2402.12354)**. See [examples](examples/README.md) for usage.
|
||||||
|
|
||||||
|
[24/03/07] We supported gradient low-rank projection (**[GaLore](https://arxiv.org/abs/2403.03507)**) algorithm. See [examples](examples/README.md) for usage.
|
||||||
|
|
||||||
|
[24/03/07] We integrated **[vLLM](https://github.com/vllm-project/vllm)** for faster and concurrent inference. Try `infer_backend: vllm` to enjoy **270%** inference speed.
|
||||||
|
|
||||||
|
[24/02/28] We supported weight-decomposed LoRA (**[DoRA](https://arxiv.org/abs/2402.09353)**). Try `use_dora: true` to activate DoRA training.
|
||||||
|
|
||||||
|
[24/02/15] We supported **block expansion** proposed by [LLaMA Pro](https://github.com/TencentARC/LLaMA-Pro). See [examples](examples/README.md) for usage.
|
||||||
|
|
||||||
[24/02/05] Qwen1.5 (Qwen2 beta version) series models are supported in LLaMA-Factory. Check this [blog post](https://qwenlm.github.io/blog/qwen1.5/) for details.
|
[24/02/05] Qwen1.5 (Qwen2 beta version) series models are supported in LLaMA-Factory. Check this [blog post](https://qwenlm.github.io/blog/qwen1.5/) for details.
|
||||||
|
|
||||||
[24/01/18] We supported **agent tuning** for most models, equipping model with tool using abilities by fine-tuning with `--dataset glaive_toolcall`.
|
[24/01/18] We supported **agent tuning** for most models, equipping model with tool using abilities by fine-tuning with `dataset: glaive_toolcall`.
|
||||||
|
|
||||||
[23/12/23] We supported **[unsloth](https://github.com/unslothai/unsloth)**'s implementation to boost LoRA tuning for the LLaMA, Mistral and Yi models. Try `--use_unsloth` argument to activate unsloth patch. It achieves **170%** speed in our benchmark, check [this page](https://github.com/hiyouga/LLaMA-Factory/wiki/Performance-comparison) for details.
|
[23/12/23] We supported **[unsloth](https://github.com/unslothai/unsloth)**'s implementation to boost LoRA tuning for the LLaMA, Mistral and Yi models. Try `use_unsloth: true` argument to activate unsloth patch. It achieves **170%** speed in our benchmark, check [this page](https://github.com/hiyouga/LLaMA-Factory/wiki/Performance-comparison) for details.
|
||||||
|
|
||||||
[23/12/12] We supported fine-tuning the latest MoE model **[Mixtral 8x7B](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1)** in our framework. See hardware requirement [here](#hardware-requirement).
|
[23/12/12] We supported fine-tuning the latest MoE model **[Mixtral 8x7B](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1)** in our framework. See hardware requirement [here](#hardware-requirement).
|
||||||
|
|
||||||
[23/12/01] We supported downloading pre-trained models and datasets from the **[ModelScope Hub](https://modelscope.cn/models)** for Chinese mainland users. See [this tutorial](#use-modelscope-hub-optional) for usage.
|
[23/12/01] We supported downloading pre-trained models and datasets from the **[ModelScope Hub](https://modelscope.cn/models)** for Chinese mainland users. See [this tutorial](#download-from-modelscope-hub) for usage.
|
||||||
|
|
||||||
[23/10/21] We supported **[NEFTune](https://arxiv.org/abs/2310.05914)** trick for fine-tuning. Try `--neftune_noise_alpha` argument to activate NEFTune, e.g., `--neftune_noise_alpha 5`.
|
[23/10/21] We supported **[NEFTune](https://arxiv.org/abs/2310.05914)** trick for fine-tuning. Try `neftune_noise_alpha: 5` argument to activate NEFTune.
|
||||||
|
|
||||||
[23/09/27] We supported **$S^2$-Attn** proposed by [LongLoRA](https://github.com/dvlab-research/LongLoRA) for the LLaMA models. Try `--shift_attn` argument to enable shift short attention.
|
[23/09/27] We supported **$S^2$-Attn** proposed by [LongLoRA](https://github.com/dvlab-research/LongLoRA) for the LLaMA models. Try `shift_attn: true` argument to enable shift short attention.
|
||||||
|
|
||||||
[23/09/23] We integrated MMLU, C-Eval and CMMLU benchmarks in this repo. See [this example](#evaluation) to evaluate your models.
|
[23/09/23] We integrated MMLU, C-Eval and CMMLU benchmarks in this repo. See [examples](examples/README.md) for usage.
|
||||||
|
|
||||||
[23/09/10] We supported **[FlashAttention-2](https://github.com/Dao-AILab/flash-attention)**. Try `--flash_attn` argument to enable FlashAttention-2 if you are using RTX4090, A100 or H100 GPUs.
|
[23/09/10] We supported **[FlashAttention-2](https://github.com/Dao-AILab/flash-attention)**. Try `flash_attn: fa2` argument to enable FlashAttention-2 if you are using RTX4090, A100 or H100 GPUs.
|
||||||
|
|
||||||
[23/08/12] We supported **RoPE scaling** to extend the context length of the LLaMA models. Try `--rope_scaling linear` argument in training and `--rope_scaling dynamic` argument at inference to extrapolate the position embeddings.
|
[23/08/12] We supported **RoPE scaling** to extend the context length of the LLaMA models. Try `rope_scaling: linear` argument in training and `rope_scaling: dynamic` argument at inference to extrapolate the position embeddings.
|
||||||
|
|
||||||
[23/08/11] We supported **[DPO training](https://arxiv.org/abs/2305.18290)** for instruction-tuned models. See [this example](#dpo-training) to train your models.
|
[23/08/11] We supported **[DPO training](https://arxiv.org/abs/2305.18290)** for instruction-tuned models. See [examples](examples/README.md) for usage.
|
||||||
|
|
||||||
[23/07/31] We supported **dataset streaming**. Try `--streaming` and `--max_steps 10000` arguments to load your dataset in streaming mode.
|
[23/07/31] We supported **dataset streaming**. Try `streaming: true` and `max_steps: 10000` arguments to load your dataset in streaming mode.
|
||||||
|
|
||||||
[23/07/29] We released two instruction-tuned 13B models at Hugging Face. See these Hugging Face Repos ([LLaMA-2](https://huggingface.co/hiyouga/Llama-2-Chinese-13b-chat) / [Baichuan](https://huggingface.co/hiyouga/Baichuan-13B-sft)) for details.
|
[23/07/29] We released two instruction-tuned 13B models at Hugging Face. See these Hugging Face Repos ([LLaMA-2](https://huggingface.co/hiyouga/Llama-2-Chinese-13b-chat) / [Baichuan](https://huggingface.co/hiyouga/Baichuan-13B-sft)) for details.
|
||||||
|
|
||||||
@@ -118,39 +136,45 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
|
|||||||
|
|
||||||
[23/06/22] We aligned the [demo API](src/api_demo.py) with the [OpenAI's](https://platform.openai.com/docs/api-reference/chat) format where you can insert the fine-tuned model in **arbitrary ChatGPT-based applications**.
|
[23/06/22] We aligned the [demo API](src/api_demo.py) with the [OpenAI's](https://platform.openai.com/docs/api-reference/chat) format where you can insert the fine-tuned model in **arbitrary ChatGPT-based applications**.
|
||||||
|
|
||||||
[23/06/03] We supported quantized training and inference (aka **[QLoRA](https://github.com/artidoro/qlora)**). Try `--quantization_bit 4/8` argument to work with quantized models.
|
[23/06/03] We supported quantized training and inference (aka **[QLoRA](https://github.com/artidoro/qlora)**). See [examples](examples/README.md) for usage.
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
## Supported Models
|
## Supported Models
|
||||||
|
|
||||||
| Model | Model size | Default module | Template |
|
| Model | Model size | Default module | Template |
|
||||||
| -------------------------------------------------------- | --------------------------- | ----------------- | --------- |
|
| -------------------------------------------------------- | -------------------------------- | ----------------- | --------- |
|
||||||
| [Baichuan2](https://huggingface.co/baichuan-inc) | 7B/13B | W_pack | baichuan2 |
|
| [Baichuan2](https://huggingface.co/baichuan-inc) | 7B/13B | W_pack | baichuan2 |
|
||||||
| [BLOOM](https://huggingface.co/bigscience/bloom) | 560M/1.1B/1.7B/3B/7.1B/176B | query_key_value | - |
|
| [BLOOM](https://huggingface.co/bigscience) | 560M/1.1B/1.7B/3B/7.1B/176B | query_key_value | - |
|
||||||
| [BLOOMZ](https://huggingface.co/bigscience/bloomz) | 560M/1.1B/1.7B/3B/7.1B/176B | query_key_value | - |
|
| [BLOOMZ](https://huggingface.co/bigscience) | 560M/1.1B/1.7B/3B/7.1B/176B | query_key_value | - |
|
||||||
| [ChatGLM3](https://huggingface.co/THUDM/chatglm3-6b) | 6B | query_key_value | chatglm3 |
|
| [ChatGLM3](https://huggingface.co/THUDM) | 6B | query_key_value | chatglm3 |
|
||||||
| [DeepSeek (MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B | q_proj,v_proj | deepseek |
|
| [Command-R](https://huggingface.co/CohereForAI) | 35B/104B | q_proj,v_proj | cohere |
|
||||||
| [Falcon](https://huggingface.co/tiiuae) | 7B/40B/180B | query_key_value | falcon |
|
| [DeepSeek (MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | q_proj,v_proj | deepseek |
|
||||||
| [Gemma](https://huggingface.co/google) | 2B/7B | q_proj,v_proj | gemma |
|
| [Falcon](https://huggingface.co/tiiuae) | 7B/40B/180B | query_key_value | falcon |
|
||||||
| [InternLM2](https://huggingface.co/internlm) | 7B/20B | wqkv | intern2 |
|
| [Gemma/CodeGemma](https://huggingface.co/google) | 2B/7B | q_proj,v_proj | gemma |
|
||||||
| [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | q_proj,v_proj | - |
|
| [InternLM2](https://huggingface.co/internlm) | 7B/20B | wqkv | intern2 |
|
||||||
| [LLaMA-2](https://huggingface.co/meta-llama) | 7B/13B/70B | q_proj,v_proj | llama2 |
|
| [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | q_proj,v_proj | - |
|
||||||
| [Mistral](https://huggingface.co/mistralai) | 7B | q_proj,v_proj | mistral |
|
| [LLaMA-2](https://huggingface.co/meta-llama) | 7B/13B/70B | q_proj,v_proj | llama2 |
|
||||||
| [Mixtral](https://huggingface.co/mistralai) | 8x7B | q_proj,v_proj | mistral |
|
| [LLaMA-3](https://huggingface.co/meta-llama) | 8B/70B | q_proj,v_proj | llama3 |
|
||||||
| [OLMo](https://huggingface.co/allenai) | 1B/7B | att_proj | olmo |
|
| [LLaVA-1.5](https://huggingface.co/llava-hf) | 7B/13B | q_proj,v_proj | vicuna |
|
||||||
| [Phi-1.5/2](https://huggingface.co/microsoft) | 1.3B/2.7B | q_proj,v_proj | - |
|
| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | q_proj,v_proj | mistral |
|
||||||
| [Qwen](https://huggingface.co/Qwen) | 1.8B/7B/14B/72B | c_attn | qwen |
|
| [OLMo](https://huggingface.co/allenai) | 1B/7B | q_proj,v_proj | - |
|
||||||
| [Qwen1.5](https://huggingface.co/Qwen) | 0.5B/1.8B/4B/7B/14B/72B | q_proj,v_proj | qwen |
|
| [Phi-1.5/2](https://huggingface.co/microsoft) | 1.3B/2.7B | q_proj,v_proj | - |
|
||||||
| [StarCoder2](https://huggingface.co/bigcode) | 3B/7B/15B | q_proj,v_proj | - |
|
| [Phi-3](https://huggingface.co/microsoft) | 3.8B | qkv_proj | phi |
|
||||||
| [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | q_proj,v_proj | xverse |
|
| [Qwen](https://huggingface.co/Qwen) | 1.8B/7B/14B/72B | c_attn | qwen |
|
||||||
| [Yi](https://huggingface.co/01-ai) | 6B/9B/34B | q_proj,v_proj | yi |
|
| [Qwen1.5 (Code/MoE)](https://huggingface.co/Qwen) | 0.5B/1.8B/4B/7B/14B/32B/72B/110B | q_proj,v_proj | qwen |
|
||||||
| [Yuan](https://huggingface.co/IEITYuan) | 2B/51B/102B | q_proj,v_proj | yuan |
|
| [StarCoder2](https://huggingface.co/bigcode) | 3B/7B/15B | q_proj,v_proj | - |
|
||||||
|
| [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | q_proj,v_proj | xverse |
|
||||||
|
| [Yi (1/1.5)](https://huggingface.co/01-ai) | 6B/9B/34B | q_proj,v_proj | yi |
|
||||||
|
| [Yi-VL](https://huggingface.co/01-ai) | 6B/34B | q_proj,v_proj | yi_vl |
|
||||||
|
| [Yuan](https://huggingface.co/IEITYuan) | 2B/51B/102B | q_proj,v_proj | yuan |
|
||||||
|
|
||||||
> [!NOTE]
|
> [!NOTE]
|
||||||
> **Default module** is used for the `--lora_target` argument, you can use `--lora_target all` to specify all the available modules.
|
> **Default module** is used for the `--lora_target` argument, you can use `--lora_target all` to specify all the available modules for better convergence.
|
||||||
>
|
>
|
||||||
> For the "base" models, the `--template` argument can be chosen from `default`, `alpaca`, `vicuna` etc. But make sure to use the **corresponding template** for the "chat" models.
|
> For the "base" models, the `--template` argument can be chosen from `default`, `alpaca`, `vicuna` etc. But make sure to use the **corresponding template** for the "instruct/chat" models.
|
||||||
|
>
|
||||||
|
> Remember to use the **SAME** template in training and inference.
|
||||||
|
|
||||||
Please refer to [constants.py](src/llmtuner/extras/constants.py) for a full list of models we supported.
|
Please refer to [constants.py](src/llmtuner/extras/constants.py) for a full list of models we supported.
|
||||||
|
|
||||||
@@ -165,9 +189,7 @@ You also can add a custom chat template to [template.py](src/llmtuner/data/templ
|
|||||||
| Reward Modeling | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
|
| Reward Modeling | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
|
||||||
| PPO Training | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
|
| PPO Training | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
|
||||||
| DPO Training | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
|
| DPO Training | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
|
||||||
|
| ORPO Training | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
|
||||||
> [!NOTE]
|
|
||||||
> Use `--quantization_bit 4` argument to enable QLoRA.
|
|
||||||
|
|
||||||
## Provided Datasets
|
## Provided Datasets
|
||||||
|
|
||||||
@@ -190,8 +212,8 @@ You also can add a custom chat template to [template.py](src/llmtuner/data/templ
|
|||||||
- [Stanford Alpaca (en)](https://github.com/tatsu-lab/stanford_alpaca)
|
- [Stanford Alpaca (en)](https://github.com/tatsu-lab/stanford_alpaca)
|
||||||
- [Stanford Alpaca (zh)](https://github.com/ymcui/Chinese-LLaMA-Alpaca)
|
- [Stanford Alpaca (zh)](https://github.com/ymcui/Chinese-LLaMA-Alpaca)
|
||||||
- [Alpaca GPT4 (en&zh)](https://github.com/Instruction-Tuning-with-GPT-4/GPT-4-LLM)
|
- [Alpaca GPT4 (en&zh)](https://github.com/Instruction-Tuning-with-GPT-4/GPT-4-LLM)
|
||||||
- [Self Cognition (zh)](data/self_cognition.json)
|
- [Identity (en&zh)](data/identity.json)
|
||||||
- [Open Assistant (multilingual)](https://huggingface.co/datasets/OpenAssistant/oasst1)
|
- [Open Assistant (zh)](https://huggingface.co/datasets/OpenAssistant/oasst1)
|
||||||
- [ShareGPT (zh)](https://huggingface.co/datasets/QingyiSi/Alpaca-CoT/tree/main/Chinese-instruction-collection)
|
- [ShareGPT (zh)](https://huggingface.co/datasets/QingyiSi/Alpaca-CoT/tree/main/Chinese-instruction-collection)
|
||||||
- [Guanaco Dataset (multilingual)](https://huggingface.co/datasets/JosephusCheung/GuanacoDataset)
|
- [Guanaco Dataset (multilingual)](https://huggingface.co/datasets/JosephusCheung/GuanacoDataset)
|
||||||
- [BELLE 2M (zh)](https://huggingface.co/datasets/BelleGroup/train_2M_CN)
|
- [BELLE 2M (zh)](https://huggingface.co/datasets/BelleGroup/train_2M_CN)
|
||||||
@@ -223,6 +245,7 @@ You also can add a custom chat template to [template.py](src/llmtuner/data/templ
|
|||||||
- [Evol Instruct V2 (en)](https://huggingface.co/datasets/WizardLM/WizardLM_evol_instruct_V2_196k)
|
- [Evol Instruct V2 (en)](https://huggingface.co/datasets/WizardLM/WizardLM_evol_instruct_V2_196k)
|
||||||
- [Glaive Function Calling V2 (en)](https://huggingface.co/datasets/glaiveai/glaive-function-calling-v2)
|
- [Glaive Function Calling V2 (en)](https://huggingface.co/datasets/glaiveai/glaive-function-calling-v2)
|
||||||
- [Cosmopedia (en)](https://huggingface.co/datasets/HuggingFaceTB/cosmopedia)
|
- [Cosmopedia (en)](https://huggingface.co/datasets/HuggingFaceTB/cosmopedia)
|
||||||
|
- [LLaVA mixed (en&zh)](https://huggingface.co/datasets/BUAADreamer/llava-en-zh-300k)
|
||||||
- [Open Assistant (de)](https://huggingface.co/datasets/mayflowergmbh/oasst_de)
|
- [Open Assistant (de)](https://huggingface.co/datasets/mayflowergmbh/oasst_de)
|
||||||
- [Dolly 15k (de)](https://huggingface.co/datasets/mayflowergmbh/dolly-15k_de)
|
- [Dolly 15k (de)](https://huggingface.co/datasets/mayflowergmbh/dolly-15k_de)
|
||||||
- [Alpaca GPT4 (de)](https://huggingface.co/datasets/mayflowergmbh/alpaca-gpt4_de)
|
- [Alpaca GPT4 (de)](https://huggingface.co/datasets/mayflowergmbh/alpaca-gpt4_de)
|
||||||
@@ -238,16 +261,15 @@ You also can add a custom chat template to [template.py](src/llmtuner/data/templ
|
|||||||
<details><summary>Preference datasets</summary>
|
<details><summary>Preference datasets</summary>
|
||||||
|
|
||||||
- [HH-RLHF (en)](https://huggingface.co/datasets/Anthropic/hh-rlhf)
|
- [HH-RLHF (en)](https://huggingface.co/datasets/Anthropic/hh-rlhf)
|
||||||
- [Open Assistant (multilingual)](https://huggingface.co/datasets/OpenAssistant/oasst1)
|
|
||||||
- [GPT-4 Generated Data (en&zh)](https://github.com/Instruction-Tuning-with-GPT-4/GPT-4-LLM)
|
- [GPT-4 Generated Data (en&zh)](https://github.com/Instruction-Tuning-with-GPT-4/GPT-4-LLM)
|
||||||
- [Orca DPO (en)](https://huggingface.co/datasets/Intel/orca_dpo_pairs)
|
- [Orca DPO (en)](https://huggingface.co/datasets/Intel/orca_dpo_pairs)
|
||||||
- [Nectar (en)](https://huggingface.co/datasets/berkeley-nest/Nectar)
|
- [Nectar (en)](https://huggingface.co/datasets/berkeley-nest/Nectar)
|
||||||
|
- [DPO mixed (en&zh)](https://huggingface.co/datasets/hiyouga/DPO-En-Zh-20k)
|
||||||
|
- [Open Assistant (zh)](https://huggingface.co/datasets/OpenAssistant/oasst1)
|
||||||
- [Orca DPO (de)](https://huggingface.co/datasets/mayflowergmbh/intel_orca_dpo_pairs_de)
|
- [Orca DPO (de)](https://huggingface.co/datasets/mayflowergmbh/intel_orca_dpo_pairs_de)
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
Please refer to [data/README.md](data/README.md) for details.
|
|
||||||
|
|
||||||
Some datasets require confirmation before using them, so we recommend logging in with your Hugging Face account using these commands.
|
Some datasets require confirmation before using them, so we recommend logging in with your Hugging Face account using these commands.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@@ -261,54 +283,55 @@ huggingface-cli login
|
|||||||
| ------------ | ------- | --------- |
|
| ------------ | ------- | --------- |
|
||||||
| python | 3.8 | 3.10 |
|
| python | 3.8 | 3.10 |
|
||||||
| torch | 1.13.1 | 2.2.0 |
|
| torch | 1.13.1 | 2.2.0 |
|
||||||
| transformers | 4.37.2 | 4.39.1 |
|
| transformers | 4.37.2 | 4.40.1 |
|
||||||
| datasets | 2.14.3 | 2.17.1 |
|
| datasets | 2.14.3 | 2.19.1 |
|
||||||
| accelerate | 0.27.2 | 0.28.0 |
|
| accelerate | 0.27.2 | 0.30.0 |
|
||||||
| peft | 0.9.0 | 0.10.0 |
|
| peft | 0.9.0 | 0.10.0 |
|
||||||
| trl | 0.8.1 | 0.8.1 |
|
| trl | 0.8.1 | 0.8.6 |
|
||||||
|
|
||||||
| Optional | Minimum | Recommend |
|
| Optional | Minimum | Recommend |
|
||||||
| ------------ | ------- | --------- |
|
| ------------ | ------- | --------- |
|
||||||
| CUDA | 11.6 | 12.2 |
|
| CUDA | 11.6 | 12.2 |
|
||||||
| deepspeed | 0.10.0 | 0.14.0 |
|
| deepspeed | 0.10.0 | 0.14.0 |
|
||||||
| bitsandbytes | 0.39.0 | 0.43.0 |
|
| bitsandbytes | 0.39.0 | 0.43.1 |
|
||||||
| flash-attn | 2.3.0 | 2.5.6 |
|
| vllm | 0.4.0 | 0.4.2 |
|
||||||
|
| flash-attn | 2.3.0 | 2.5.8 |
|
||||||
|
|
||||||
### Hardware Requirement
|
### Hardware Requirement
|
||||||
|
|
||||||
\* *estimated*
|
\* *estimated*
|
||||||
|
|
||||||
| Method | Bits | 7B | 13B | 30B | 70B | 8x7B |
|
| Method | Bits | 7B | 13B | 30B | 70B | 110B | 8x7B | 8x22B |
|
||||||
| ------ | ---- | ----- | ----- | ----- | ------ | ------ |
|
| ----------------- | ---- | ----- | ----- | ----- | ------ | ------ | ----- | ------ |
|
||||||
| Full | AMP | 120GB | 240GB | 600GB | 1200GB | 900GB |
|
| Full | AMP | 120GB | 240GB | 600GB | 1200GB | 2000GB | 900GB | 2400GB |
|
||||||
| Full | 16 | 60GB | 120GB | 300GB | 600GB | 400GB |
|
| Full | 16 | 60GB | 120GB | 300GB | 600GB | 900GB | 400GB | 1200GB |
|
||||||
| GaLore | 16 | 16GB | 32GB | 64GB | 160GB | 120GB |
|
| Freeze | 16 | 20GB | 40GB | 80GB | 200GB | 360GB | 160GB | 400GB |
|
||||||
| Freeze | 16 | 20GB | 40GB | 80GB | 200GB | 160GB |
|
| LoRA/GaLore/BAdam | 16 | 16GB | 32GB | 64GB | 160GB | 240GB | 120GB | 320GB |
|
||||||
| LoRA | 16 | 16GB | 32GB | 64GB | 160GB | 120GB |
|
| QLoRA | 8 | 10GB | 20GB | 40GB | 80GB | 140GB | 60GB | 160GB |
|
||||||
| QLoRA | 8 | 10GB | 20GB | 40GB | 80GB | 60GB |
|
| QLoRA | 4 | 6GB | 12GB | 24GB | 48GB | 72GB | 30GB | 96GB |
|
||||||
| QLoRA | 4 | 6GB | 12GB | 24GB | 48GB | 30GB |
|
| QLoRA | 2 | 4GB | 8GB | 16GB | 24GB | 48GB | 18GB | 48GB |
|
||||||
| QLoRA | 2 | 4GB | 8GB | 16GB | 24GB | 18GB |
|
|
||||||
|
|
||||||
## Getting Started
|
## Getting Started
|
||||||
|
|
||||||
### Data Preparation (optional)
|
### Installation
|
||||||
|
|
||||||
Please refer to [data/README.md](data/README.md) for checking the details about the format of dataset files. You can either use a single `.json` file or a [dataset loading script](https://huggingface.co/docs/datasets/dataset_script) with multiple files to create a custom dataset.
|
> [!IMPORTANT]
|
||||||
|
> Installation is mandatory.
|
||||||
> [!NOTE]
|
|
||||||
> Please update `data/dataset_info.json` to use your custom dataset. About the format of this file, please refer to `data/README.md`.
|
|
||||||
|
|
||||||
### Dependence Installation (optional)
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
git clone https://github.com/hiyouga/LLaMA-Factory.git
|
git clone https://github.com/hiyouga/LLaMA-Factory.git
|
||||||
conda create -n llama_factory python=3.10
|
|
||||||
conda activate llama_factory
|
|
||||||
cd LLaMA-Factory
|
cd LLaMA-Factory
|
||||||
pip install -r requirements.txt
|
pip install -e .[torch,metrics]
|
||||||
```
|
```
|
||||||
|
|
||||||
If you want to enable the quantized LoRA (QLoRA) on the Windows platform, you will be required to install a pre-built version of `bitsandbytes` library, which supports CUDA 11.1 to 12.2, please select the appropriate [release version](https://github.com/jllllll/bitsandbytes-windows-webui/releases/tag/wheels) based on your CUDA version.
|
Extra dependencies available: torch, metrics, deepspeed, bitsandbytes, vllm, galore, badam, gptq, awq, aqlm, qwen, modelscope, quality
|
||||||
|
|
||||||
|
> [!TIP]
|
||||||
|
> Use `pip install --no-deps -e .` to resolve package conflicts.
|
||||||
|
|
||||||
|
<details><summary>For Windows users</summary>
|
||||||
|
|
||||||
|
If you want to enable the quantized LoRA (QLoRA) on the Windows platform, you need to install a pre-built version of `bitsandbytes` library, which supports CUDA 11.1 to 12.2, please select the appropriate [release version](https://github.com/jllllll/bitsandbytes-windows-webui/releases/tag/wheels) based on your CUDA version.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip install https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.2.post2-py3-none-win_amd64.whl
|
pip install https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.2.post2-py3-none-win_amd64.whl
|
||||||
@@ -316,357 +339,83 @@ pip install https://github.com/jllllll/bitsandbytes-windows-webui/releases/downl
|
|||||||
|
|
||||||
To enable FlashAttention-2 on the Windows platform, you need to install the precompiled `flash-attn` library, which supports CUDA 12.1 to 12.2. Please download the corresponding version from [flash-attention](https://github.com/bdashore3/flash-attention/releases) based on your requirements.
|
To enable FlashAttention-2 on the Windows platform, you need to install the precompiled `flash-attn` library, which supports CUDA 12.1 to 12.2. Please download the corresponding version from [flash-attention](https://github.com/bdashore3/flash-attention/releases) based on your requirements.
|
||||||
|
|
||||||
### Use ModelScope Hub (optional)
|
</details>
|
||||||
|
|
||||||
If you have trouble with downloading models and datasets from Hugging Face, you can use LLaMA-Factory together with ModelScope in the following manner.
|
<details><summary>For Ascend NPU users</summary>
|
||||||
|
|
||||||
|
To utilize Ascend NPU devices for (distributed) training and inference, you need to install the **[torch-npu](https://gitee.com/ascend/pytorch)** library and the **[Ascend CANN Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**.
|
||||||
|
|
||||||
|
| Requirement | Minimum | Recommend |
|
||||||
|
| ------------ | ------- | --------- |
|
||||||
|
| CANN | 8.0.RC1 | 8.0.RC1 |
|
||||||
|
| torch | 2.2.0 | 2.2.0 |
|
||||||
|
| torch-npu | 2.2.0 | 2.2.0 |
|
||||||
|
| deepspeed | 0.13.2 | 0.13.2 |
|
||||||
|
|
||||||
|
Docker image:
|
||||||
|
|
||||||
|
- 32GB: [Download page](http://mirrors.cn-central-221.ovaijisuan.com/detail/130.html)
|
||||||
|
- 64GB: Coming soon
|
||||||
|
|
||||||
|
Remember to use `ASCEND_RT_VISIBLE_DEVICES` instead of `CUDA_VISIBLE_DEVICES` to specify the device to use.
|
||||||
|
|
||||||
|
If you cannot infer model on NPU devices, try setting `do_sample: false` in the configurations.
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
|
### Data Preparation
|
||||||
|
|
||||||
|
Please refer to [data/README.md](data/README.md) for checking the details about the format of dataset files. You can either use datasets on HuggingFace / ModelScope hub or load the dataset in local disk.
|
||||||
|
|
||||||
|
> [!NOTE]
|
||||||
|
> Please update `data/dataset_info.json` to use your custom dataset.
|
||||||
|
|
||||||
|
### Quickstart
|
||||||
|
|
||||||
|
Use the following 3 commands to run LoRA **fine-tuning**, **inference** and **merging** of the Llama3-8B-Instruct model, respectively.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
export USE_MODELSCOPE_HUB=1 # `set USE_MODELSCOPE_HUB=1` for Windows
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
Then you can train the corresponding model by specifying a model ID of the ModelScope Hub. (find a full list of model IDs at [ModelScope Hub](https://modelscope.cn/models))
|
See [examples/README.md](examples/README.md) for advanced usage (including distributed training).
|
||||||
|
|
||||||
```bash
|
> [!TIP]
|
||||||
CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
|
> Use `llamafactory-cli help` to show help information.
|
||||||
--model_name_or_path modelscope/Llama-2-7b-ms \
|
|
||||||
... # arguments (same as below)
|
|
||||||
```
|
|
||||||
|
|
||||||
LLaMA Board also supports using the models and datasets on the ModelScope Hub.
|
### Fine-Tuning with LLaMA Board GUI (powered by [Gradio](https://github.com/gradio-app/gradio))
|
||||||
|
|
||||||
```bash
|
|
||||||
CUDA_VISIBLE_DEVICES=0 USE_MODELSCOPE_HUB=1 python src/train_web.py
|
|
||||||
```
|
|
||||||
|
|
||||||
### Train on a single GPU
|
|
||||||
|
|
||||||
> [!IMPORTANT]
|
> [!IMPORTANT]
|
||||||
> If you want to train models on multiple GPUs, please refer to [Distributed Training](#distributed-training).
|
> LLaMA Board GUI only supports training on a single GPU.
|
||||||
|
|
||||||
|
#### Use local environment
|
||||||
#### LLaMA Board GUI
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
CUDA_VISIBLE_DEVICES=0 python src/train_web.py
|
CUDA_VISIBLE_DEVICES=0 GRADIO_SHARE=1 llamafactory-cli webui
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Pre-Training
|
<details><summary>For Alibaba Cloud PAI or AutoDL users</summary>
|
||||||
|
|
||||||
|
If you encountered display problems in LLaMA Board on Alibaba Cloud PAI, try using the following command to set environment variables before starting LLaMA Board:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
|
export GRADIO_SERVER_PORT=7860 GRADIO_ROOT_PATH=/${JUPYTER_NAME}/proxy/7860/
|
||||||
--stage pt \
|
|
||||||
--do_train \
|
|
||||||
--model_name_or_path path_to_llama_model \
|
|
||||||
--dataset wiki_demo \
|
|
||||||
--finetuning_type lora \
|
|
||||||
--lora_target q_proj,v_proj \
|
|
||||||
--output_dir path_to_pt_checkpoint \
|
|
||||||
--overwrite_cache \
|
|
||||||
--per_device_train_batch_size 4 \
|
|
||||||
--gradient_accumulation_steps 4 \
|
|
||||||
--lr_scheduler_type cosine \
|
|
||||||
--logging_steps 10 \
|
|
||||||
--save_steps 1000 \
|
|
||||||
--learning_rate 5e-5 \
|
|
||||||
--num_train_epochs 3.0 \
|
|
||||||
--plot_loss \
|
|
||||||
--fp16
|
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Supervised Fine-Tuning
|
If you are using AutoDL, please install a specific version of Gradio:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
|
pip install gradio==4.10.0
|
||||||
--stage sft \
|
|
||||||
--do_train \
|
|
||||||
--model_name_or_path path_to_llama_model \
|
|
||||||
--dataset alpaca_gpt4_en \
|
|
||||||
--template default \
|
|
||||||
--finetuning_type lora \
|
|
||||||
--lora_target q_proj,v_proj \
|
|
||||||
--output_dir path_to_sft_checkpoint \
|
|
||||||
--overwrite_cache \
|
|
||||||
--per_device_train_batch_size 4 \
|
|
||||||
--gradient_accumulation_steps 4 \
|
|
||||||
--lr_scheduler_type cosine \
|
|
||||||
--logging_steps 10 \
|
|
||||||
--save_steps 1000 \
|
|
||||||
--learning_rate 5e-5 \
|
|
||||||
--num_train_epochs 3.0 \
|
|
||||||
--plot_loss \
|
|
||||||
--fp16
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Reward Modeling
|
|
||||||
|
|
||||||
```bash
|
|
||||||
CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
|
|
||||||
--stage rm \
|
|
||||||
--do_train \
|
|
||||||
--model_name_or_path path_to_llama_model \
|
|
||||||
--adapter_name_or_path path_to_sft_checkpoint \
|
|
||||||
--create_new_adapter \
|
|
||||||
--dataset comparison_gpt4_en \
|
|
||||||
--template default \
|
|
||||||
--finetuning_type lora \
|
|
||||||
--lora_target q_proj,v_proj \
|
|
||||||
--output_dir path_to_rm_checkpoint \
|
|
||||||
--per_device_train_batch_size 2 \
|
|
||||||
--gradient_accumulation_steps 4 \
|
|
||||||
--lr_scheduler_type cosine \
|
|
||||||
--logging_steps 10 \
|
|
||||||
--save_steps 1000 \
|
|
||||||
--learning_rate 1e-5 \
|
|
||||||
--num_train_epochs 1.0 \
|
|
||||||
--plot_loss \
|
|
||||||
--fp16
|
|
||||||
```
|
|
||||||
|
|
||||||
#### PPO Training
|
|
||||||
|
|
||||||
```bash
|
|
||||||
CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
|
|
||||||
--stage ppo \
|
|
||||||
--do_train \
|
|
||||||
--model_name_or_path path_to_llama_model \
|
|
||||||
--adapter_name_or_path path_to_sft_checkpoint \
|
|
||||||
--create_new_adapter \
|
|
||||||
--dataset alpaca_gpt4_en \
|
|
||||||
--template default \
|
|
||||||
--finetuning_type lora \
|
|
||||||
--lora_target q_proj,v_proj \
|
|
||||||
--reward_model path_to_rm_checkpoint \
|
|
||||||
--output_dir path_to_ppo_checkpoint \
|
|
||||||
--per_device_train_batch_size 2 \
|
|
||||||
--gradient_accumulation_steps 4 \
|
|
||||||
--lr_scheduler_type cosine \
|
|
||||||
--top_k 0 \
|
|
||||||
--top_p 0.9 \
|
|
||||||
--logging_steps 10 \
|
|
||||||
--save_steps 1000 \
|
|
||||||
--learning_rate 1e-5 \
|
|
||||||
--num_train_epochs 1.0 \
|
|
||||||
--plot_loss \
|
|
||||||
--fp16
|
|
||||||
```
|
|
||||||
|
|
||||||
> [!TIP]
|
|
||||||
> Use `--adapter_name_or_path path_to_sft_checkpoint,path_to_ppo_checkpoint` to infer the fine-tuned model if `--create_new_adapter` was enabled.
|
|
||||||
|
|
||||||
> [!WARNING]
|
|
||||||
> Use `--per_device_train_batch_size=1` for LLaMA-2 models in fp16 PPO training.
|
|
||||||
|
|
||||||
#### DPO Training
|
|
||||||
|
|
||||||
```bash
|
|
||||||
CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
|
|
||||||
--stage dpo \
|
|
||||||
--do_train \
|
|
||||||
--model_name_or_path path_to_llama_model \
|
|
||||||
--adapter_name_or_path path_to_sft_checkpoint \
|
|
||||||
--create_new_adapter \
|
|
||||||
--dataset comparison_gpt4_en \
|
|
||||||
--template default \
|
|
||||||
--finetuning_type lora \
|
|
||||||
--lora_target q_proj,v_proj \
|
|
||||||
--output_dir path_to_dpo_checkpoint \
|
|
||||||
--per_device_train_batch_size 2 \
|
|
||||||
--gradient_accumulation_steps 4 \
|
|
||||||
--lr_scheduler_type cosine \
|
|
||||||
--logging_steps 10 \
|
|
||||||
--save_steps 1000 \
|
|
||||||
--learning_rate 1e-5 \
|
|
||||||
--num_train_epochs 1.0 \
|
|
||||||
--plot_loss \
|
|
||||||
--fp16
|
|
||||||
```
|
|
||||||
|
|
||||||
> [!TIP]
|
|
||||||
> Use `--adapter_name_or_path path_to_sft_checkpoint,path_to_dpo_checkpoint` to infer the fine-tuned model if `--create_new_adapter` was enabled.
|
|
||||||
|
|
||||||
### Distributed Training
|
|
||||||
|
|
||||||
#### Use Huggingface Accelerate
|
|
||||||
|
|
||||||
```bash
|
|
||||||
accelerate launch --config_file config.yaml src/train_bash.py \
|
|
||||||
--ddp_timeout 180000000 \
|
|
||||||
... # arguments (same as above)
|
|
||||||
```
|
|
||||||
|
|
||||||
<details><summary>Example config.yaml for LoRA training</summary>
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
compute_environment: LOCAL_MACHINE
|
|
||||||
debug: false
|
|
||||||
distributed_type: MULTI_GPU
|
|
||||||
downcast_bf16: 'no'
|
|
||||||
gpu_ids: all
|
|
||||||
machine_rank: 0
|
|
||||||
main_training_function: main
|
|
||||||
mixed_precision: fp16
|
|
||||||
num_machines: 1
|
|
||||||
num_processes: 4
|
|
||||||
rdzv_backend: static
|
|
||||||
same_network: true
|
|
||||||
tpu_env: []
|
|
||||||
tpu_use_cluster: false
|
|
||||||
tpu_use_sudo: false
|
|
||||||
use_cpu: false
|
|
||||||
```
|
```
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
> [!TIP]
|
|
||||||
> We commend using Accelerate for LoRA tuning.
|
|
||||||
|
|
||||||
#### Use DeepSpeed
|
|
||||||
|
|
||||||
```bash
|
|
||||||
deepspeed --num_gpus 8 src/train_bash.py \
|
|
||||||
--deepspeed ds_config.json \
|
|
||||||
--ddp_timeout 180000000 \
|
|
||||||
... # arguments (same as above)
|
|
||||||
```
|
|
||||||
|
|
||||||
<details><summary>Example ds_config.json for full-parameter training with DeepSpeed ZeRO-2</summary>
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"train_batch_size": "auto",
|
|
||||||
"train_micro_batch_size_per_gpu": "auto",
|
|
||||||
"gradient_accumulation_steps": "auto",
|
|
||||||
"gradient_clipping": "auto",
|
|
||||||
"zero_allow_untested_optimizer": true,
|
|
||||||
"fp16": {
|
|
||||||
"enabled": "auto",
|
|
||||||
"loss_scale": 0,
|
|
||||||
"loss_scale_window": 1000,
|
|
||||||
"initial_scale_power": 16,
|
|
||||||
"hysteresis": 2,
|
|
||||||
"min_loss_scale": 1
|
|
||||||
},
|
|
||||||
"bf16": {
|
|
||||||
"enabled": "auto"
|
|
||||||
},
|
|
||||||
"zero_optimization": {
|
|
||||||
"stage": 2,
|
|
||||||
"allgather_partitions": true,
|
|
||||||
"allgather_bucket_size": 5e8,
|
|
||||||
"overlap_comm": true,
|
|
||||||
"reduce_scatter": true,
|
|
||||||
"reduce_bucket_size": 5e8,
|
|
||||||
"contiguous_gradients": true,
|
|
||||||
"round_robin_gradients": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
</details>
|
|
||||||
|
|
||||||
> [!TIP]
|
|
||||||
> Refer to [examples](examples) for more training scripts.
|
|
||||||
|
|
||||||
### Merge LoRA weights and export model
|
|
||||||
|
|
||||||
```bash
|
|
||||||
CUDA_VISIBLE_DEVICES= python src/export_model.py \
|
|
||||||
--model_name_or_path path_to_llama_model \
|
|
||||||
--adapter_name_or_path path_to_checkpoint \
|
|
||||||
--template default \
|
|
||||||
--finetuning_type lora \
|
|
||||||
--export_dir path_to_export \
|
|
||||||
--export_size 2 \
|
|
||||||
--export_legacy_format False
|
|
||||||
```
|
|
||||||
|
|
||||||
> [!WARNING]
|
|
||||||
> Merging LoRA weights into a quantized model is not supported.
|
|
||||||
|
|
||||||
> [!TIP]
|
|
||||||
> Use `--model_name_or_path path_to_export` solely to use the exported model.
|
|
||||||
>
|
|
||||||
> Use `CUDA_VISIBLE_DEVICES=0`, `--export_quantization_bit 4` and `--export_quantization_dataset data/c4_demo.json` to quantize the model with AutoGPTQ after merging the LoRA weights.
|
|
||||||
|
|
||||||
### Inference with OpenAI-style API
|
|
||||||
|
|
||||||
```bash
|
|
||||||
CUDA_VISIBLE_DEVICES=0 API_PORT=8000 python src/api_demo.py \
|
|
||||||
--model_name_or_path path_to_llama_model \
|
|
||||||
--adapter_name_or_path path_to_checkpoint \
|
|
||||||
--template default \
|
|
||||||
--finetuning_type lora
|
|
||||||
```
|
|
||||||
|
|
||||||
> [!TIP]
|
|
||||||
> Visit `http://localhost:8000/docs` for API documentation.
|
|
||||||
|
|
||||||
### Inference with command line
|
|
||||||
|
|
||||||
```bash
|
|
||||||
CUDA_VISIBLE_DEVICES=0 python src/cli_demo.py \
|
|
||||||
--model_name_or_path path_to_llama_model \
|
|
||||||
--adapter_name_or_path path_to_checkpoint \
|
|
||||||
--template default \
|
|
||||||
--finetuning_type lora
|
|
||||||
```
|
|
||||||
|
|
||||||
### Inference with web browser
|
|
||||||
|
|
||||||
```bash
|
|
||||||
CUDA_VISIBLE_DEVICES=0 python src/web_demo.py \
|
|
||||||
--model_name_or_path path_to_llama_model \
|
|
||||||
--adapter_name_or_path path_to_checkpoint \
|
|
||||||
--template default \
|
|
||||||
--finetuning_type lora
|
|
||||||
```
|
|
||||||
|
|
||||||
### Evaluation
|
|
||||||
|
|
||||||
```bash
|
|
||||||
CUDA_VISIBLE_DEVICES=0 python src/evaluate.py \
|
|
||||||
--model_name_or_path path_to_llama_model \
|
|
||||||
--adapter_name_or_path path_to_checkpoint \
|
|
||||||
--template vanilla \
|
|
||||||
--finetuning_type lora \
|
|
||||||
--task mmlu \
|
|
||||||
--split test \
|
|
||||||
--lang en \
|
|
||||||
--n_shot 5 \
|
|
||||||
--batch_size 4
|
|
||||||
```
|
|
||||||
|
|
||||||
### Predict
|
|
||||||
|
|
||||||
```bash
|
|
||||||
CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
|
|
||||||
--stage sft \
|
|
||||||
--do_predict \
|
|
||||||
--model_name_or_path path_to_llama_model \
|
|
||||||
--adapter_name_or_path path_to_checkpoint \
|
|
||||||
--dataset alpaca_gpt4_en \
|
|
||||||
--template default \
|
|
||||||
--finetuning_type lora \
|
|
||||||
--output_dir path_to_predict_result \
|
|
||||||
--per_device_eval_batch_size 1 \
|
|
||||||
--max_samples 100 \
|
|
||||||
--predict_with_generate \
|
|
||||||
--fp16
|
|
||||||
```
|
|
||||||
|
|
||||||
> [!WARNING]
|
|
||||||
> Use `--per_device_train_batch_size=1` for LLaMA-2 models in fp16 predict.
|
|
||||||
|
|
||||||
> [!TIP]
|
|
||||||
> We recommend using `--per_device_eval_batch_size=1` and `--max_target_length 128` at 4/8-bit predict.
|
|
||||||
|
|
||||||
### Dockerize Training
|
|
||||||
|
|
||||||
#### Use Docker
|
#### Use Docker
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker build -f ./Dockerfile -t llama-factory:latest .
|
docker build -f ./Dockerfile -t llama-factory:latest .
|
||||||
|
|
||||||
docker run --gpus=all \
|
docker run --gpus=all \
|
||||||
-v ./hf_cache:/root/.cache/huggingface/ \
|
-v ./hf_cache:/root/.cache/huggingface/ \
|
||||||
-v ./data:/app/data \
|
-v ./data:/app/data \
|
||||||
@@ -684,14 +433,36 @@ docker run --gpus=all \
|
|||||||
docker compose -f ./docker-compose.yml up -d
|
docker compose -f ./docker-compose.yml up -d
|
||||||
```
|
```
|
||||||
|
|
||||||
> [!TIP]
|
<details><summary>Details about volume</summary>
|
||||||
> Details about volume:
|
|
||||||
> * hf_cache: Utilize Hugging Face cache on the host machine. Reassignable if a cache already exists in a different directory.
|
- hf_cache: Utilize Hugging Face cache on the host machine. Reassignable if a cache already exists in a different directory.
|
||||||
> * data: Place datasets on this dir of the host machine so that they can be selected on LLaMA Board GUI.
|
- data: Place datasets on this dir of the host machine so that they can be selected on LLaMA Board GUI.
|
||||||
> * output: Set export dir to this location so that the merged result can be accessed directly on the host machine.
|
- output: Set export dir to this location so that the merged result can be accessed directly on the host machine.
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
|
### Deploy with OpenAI-style API and vLLM
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0,1 API_PORT=8000 llamafactory-cli api examples/inference/llama3_vllm.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
### Download from ModelScope Hub
|
||||||
|
|
||||||
|
If you have trouble with downloading models and datasets from Hugging Face, you can use ModelScope.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export USE_MODELSCOPE_HUB=1 # `set USE_MODELSCOPE_HUB=1` for Windows
|
||||||
|
```
|
||||||
|
|
||||||
|
Train the model by specifying a model ID of the ModelScope Hub as the `--model_name_or_path`. You can find a full list of model IDs at [ModelScope Hub](https://modelscope.cn/models), e.g., `LLM-Research/Meta-Llama-3-8B-Instruct`.
|
||||||
|
|
||||||
## Projects using LLaMA Factory
|
## Projects using LLaMA Factory
|
||||||
|
|
||||||
|
If you have a project that should be incorporated, please contact via email or create a pull request.
|
||||||
|
|
||||||
|
<details><summary>Click to show</summary>
|
||||||
|
|
||||||
1. Wang et al. ESRL: Efficient Sampling-based Reinforcement Learning for Sequence Generation. 2023. [[arxiv]](https://arxiv.org/abs/2308.02223)
|
1. Wang et al. ESRL: Efficient Sampling-based Reinforcement Learning for Sequence Generation. 2023. [[arxiv]](https://arxiv.org/abs/2308.02223)
|
||||||
1. Yu et al. Open, Closed, or Small Language Models for Text Classification? 2023. [[arxiv]](https://arxiv.org/abs/2308.10092)
|
1. Yu et al. Open, Closed, or Small Language Models for Text Classification? 2023. [[arxiv]](https://arxiv.org/abs/2308.10092)
|
||||||
1. Wang et al. UbiPhysio: Support Daily Functioning, Fitness, and Rehabilitation with Action Understanding and Feedback in Natural Language. 2023. [[arxiv]](https://arxiv.org/abs/2308.10526)
|
1. Wang et al. UbiPhysio: Support Daily Functioning, Fitness, and Rehabilitation with Action Understanding and Feedback in Natural Language. 2023. [[arxiv]](https://arxiv.org/abs/2308.10526)
|
||||||
@@ -713,21 +484,37 @@ docker compose -f ./docker-compose.yml up -d
|
|||||||
1. Huang et al. Key-Point-Driven Data Synthesis with its Enhancement on Mathematical Reasoning. 2024. [[arxiv]](https://arxiv.org/abs/2403.02333)
|
1. Huang et al. Key-Point-Driven Data Synthesis with its Enhancement on Mathematical Reasoning. 2024. [[arxiv]](https://arxiv.org/abs/2403.02333)
|
||||||
1. Duan et al. Negating Negatives: Alignment without Human Positive Samples via Distributional Dispreference Optimization. 2024. [[arxiv]](https://arxiv.org/abs/2403.03419)
|
1. Duan et al. Negating Negatives: Alignment without Human Positive Samples via Distributional Dispreference Optimization. 2024. [[arxiv]](https://arxiv.org/abs/2403.03419)
|
||||||
1. Xie and Schwertfeger. Empowering Robotics with Large Language Models: osmAG Map Comprehension with LLMs. 2024. [[arxiv]](https://arxiv.org/abs/2403.08228)
|
1. Xie and Schwertfeger. Empowering Robotics with Large Language Models: osmAG Map Comprehension with LLMs. 2024. [[arxiv]](https://arxiv.org/abs/2403.08228)
|
||||||
|
1. Wu et al. Large Language Models are Parallel Multilingual Learners. 2024. [[arxiv]](https://arxiv.org/abs/2403.09073)
|
||||||
|
1. Zhang et al. EDT: Improving Large Language Models' Generation by Entropy-based Dynamic Temperature Sampling. 2024. [[arxiv]](https://arxiv.org/abs/2403.14541)
|
||||||
|
1. Weller et al. FollowIR: Evaluating and Teaching Information Retrieval Models to Follow Instructions. 2024. [[arxiv]](https://arxiv.org/abs/2403.15246)
|
||||||
1. Hongbin Na. CBT-LLM: A Chinese Large Language Model for Cognitive Behavioral Therapy-based Mental Health Question Answering. 2024. [[arxiv]](https://arxiv.org/abs/2403.16008)
|
1. Hongbin Na. CBT-LLM: A Chinese Large Language Model for Cognitive Behavioral Therapy-based Mental Health Question Answering. 2024. [[arxiv]](https://arxiv.org/abs/2403.16008)
|
||||||
|
1. Zan et al. CodeS: Natural Language to Code Repository via Multi-Layer Sketch. 2024. [[arxiv]](https://arxiv.org/abs/2403.16443)
|
||||||
|
1. Liu et al. Extensive Self-Contrast Enables Feedback-Free Language Model Alignment. 2024. [[arxiv]](https://arxiv.org/abs/2404.00604)
|
||||||
|
1. Luo et al. BAdam: A Memory Efficient Full Parameter Training Method for Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.02827)
|
||||||
|
1. Du et al. Chinese Tiny LLM: Pretraining a Chinese-Centric Large Language Model. 2024. [[arxiv]](https://arxiv.org/abs/2404.04167)
|
||||||
|
1. Ma et al. Parameter Efficient Quasi-Orthogonal Fine-Tuning via Givens Rotation. 2024. [[arxiv]](https://arxiv.org/abs/2404.04316)
|
||||||
|
1. Liu et al. Dynamic Generation of Personalities with Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.07084)
|
||||||
|
1. Shang et al. How Far Have We Gone in Stripped Binary Code Understanding Using Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.09836)
|
||||||
|
1. Huang et al. LLMTune: Accelerate Database Knob Tuning with Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.11581)
|
||||||
|
1. Deng et al. Text-Tuple-Table: Towards Information Integration in Text-to-Table Generation via Global Tuple Extraction. 2024. [[arxiv]](https://arxiv.org/abs/2404.14215)
|
||||||
|
1. Acikgoz et al. Hippocrates: An Open-Source Framework for Advancing Large Language Models in Healthcare. 2024. [[arxiv]](https://arxiv.org/abs/2404.16621)
|
||||||
|
1. Zhang et al. Small Language Models Need Strong Verifiers to Self-Correct Reasoning. 2024. [[arxiv]](https://arxiv.org/abs/2404.17140)
|
||||||
|
1. Zhou et al. FREB-TQA: A Fine-Grained Robustness Evaluation Benchmark for Table Question Answering. 2024. [[arxiv]](https://arxiv.org/abs/2404.18585)
|
||||||
1. **[StarWhisper](https://github.com/Yu-Yang-Li/StarWhisper)**: A large language model for Astronomy, based on ChatGLM2-6B and Qwen-14B.
|
1. **[StarWhisper](https://github.com/Yu-Yang-Li/StarWhisper)**: A large language model for Astronomy, based on ChatGLM2-6B and Qwen-14B.
|
||||||
1. **[DISC-LawLLM](https://github.com/FudanDISC/DISC-LawLLM)**: A large language model specialized in Chinese legal domain, based on Baichuan-13B, is capable of retrieving and reasoning on legal knowledge.
|
1. **[DISC-LawLLM](https://github.com/FudanDISC/DISC-LawLLM)**: A large language model specialized in Chinese legal domain, based on Baichuan-13B, is capable of retrieving and reasoning on legal knowledge.
|
||||||
1. **[Sunsimiao](https://github.com/thomas-yanxin/Sunsimiao)**: A large language model specialized in Chinese medical domain, based on Baichuan-7B and ChatGLM-6B.
|
1. **[Sunsimiao](https://github.com/thomas-yanxin/Sunsimiao)**: A large language model specialized in Chinese medical domain, based on Baichuan-7B and ChatGLM-6B.
|
||||||
1. **[CareGPT](https://github.com/WangRongsheng/CareGPT)**: A series of large language models for Chinese medical domain, based on LLaMA2-7B and Baichuan-13B.
|
1. **[CareGPT](https://github.com/WangRongsheng/CareGPT)**: A series of large language models for Chinese medical domain, based on LLaMA2-7B and Baichuan-13B.
|
||||||
1. **[MachineMindset](https://github.com/PKU-YuanGroup/Machine-Mindset/)**: A series of MBTI Personality large language models, capable of giving any LLM 16 different personality types based on different datasets and training methods.
|
1. **[MachineMindset](https://github.com/PKU-YuanGroup/Machine-Mindset/)**: A series of MBTI Personality large language models, capable of giving any LLM 16 different personality types based on different datasets and training methods.
|
||||||
|
1. **[Luminia-13B-v3](https://huggingface.co/Nekochu/Luminia-13B-v3)**: A large language model specialized in generate metadata for stable diffusion. [[🤗Demo]](https://huggingface.co/spaces/Nekochu/Luminia-13B_SD_Prompt)
|
||||||
|
1. **[Chinese-LLaVA-Med](https://github.com/BUAADreamer/Chinese-LLaVA-Med)**: A multimodal large language model specialized in Chinese medical domain, based on LLaVA-1.5-7B.
|
||||||
|
|
||||||
> [!TIP]
|
</details>
|
||||||
> If you have a project that should be incorporated, please contact via email or create a pull request.
|
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
This repository is licensed under the [Apache-2.0 License](LICENSE).
|
This repository is licensed under the [Apache-2.0 License](LICENSE).
|
||||||
|
|
||||||
Please follow the model licenses to use the corresponding model weights: [Baichuan2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [InternLM2](https://github.com/InternLM/InternLM#license) / [LLaMA](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [LLaMA-2](https://ai.meta.com/llama/license/) / [Mistral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [StarCoder2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yuan](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan)
|
Please follow the model licenses to use the corresponding model weights: [Baichuan2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command-R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [InternLM2](https://github.com/InternLM/InternLM#license) / [LLaMA](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [LLaMA-2 (LLaVA-1.5)](https://ai.meta.com/llama/license/) / [LLaMA-3](https://llama.meta.com/llama3/license/) / [Mistral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [StarCoder2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yi-1.5](LICENSE) / [Yuan](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan)
|
||||||
|
|
||||||
## Citation
|
## Citation
|
||||||
|
|
||||||
@@ -745,7 +532,7 @@ If this work is helpful, please kindly cite as:
|
|||||||
|
|
||||||
## Acknowledgement
|
## Acknowledgement
|
||||||
|
|
||||||
This repo benefits from [PEFT](https://github.com/huggingface/peft), [QLoRA](https://github.com/artidoro/qlora) and [FastChat](https://github.com/lm-sys/FastChat). Thanks for their wonderful works.
|
This repo benefits from [PEFT](https://github.com/huggingface/peft), [TRL](https://github.com/huggingface/trl), [QLoRA](https://github.com/artidoro/qlora) and [FastChat](https://github.com/lm-sys/FastChat). Thanks for their wonderful works.
|
||||||
|
|
||||||
## Star History
|
## Star History
|
||||||
|
|
||||||
|
|||||||
620
README_zh.md
620
README_zh.md
@@ -5,13 +5,15 @@
|
|||||||
[](https://github.com/hiyouga/LLaMA-Factory/commits/main)
|
[](https://github.com/hiyouga/LLaMA-Factory/commits/main)
|
||||||
[](https://pypi.org/project/llmtuner/)
|
[](https://pypi.org/project/llmtuner/)
|
||||||
[](https://pypi.org/project/llmtuner/)
|
[](https://pypi.org/project/llmtuner/)
|
||||||
[](#使用了-llama-factory-的项目)
|
[](#使用了-llama-factory-的项目)
|
||||||
[](https://github.com/hiyouga/LLaMA-Factory/pulls)
|
[](https://github.com/hiyouga/LLaMA-Factory/pulls)
|
||||||
[](https://discord.gg/rKfvV9r9FK)
|
[](https://discord.gg/rKfvV9r9FK)
|
||||||
[](https://twitter.com/llamafactory_ai)
|
[](https://twitter.com/llamafactory_ai)
|
||||||
[](https://huggingface.co/spaces/hiyouga/LLaMA-Board)
|
[](https://huggingface.co/spaces/hiyouga/LLaMA-Board)
|
||||||
[](https://modelscope.cn/studios/hiyouga/LLaMA-Board)
|
[](https://modelscope.cn/studios/hiyouga/LLaMA-Board)
|
||||||
[](https://colab.research.google.com/drive/1eRTPn37ltBbYsISy9Aw2NuI2Aq5CQrD9?usp=sharing)
|
[](https://colab.research.google.com/drive/1d5KQtbemerlSDSxZIfAaWXhKr30QypiK?usp=sharing)
|
||||||
|
|
||||||
|
[](https://trendshift.io/repositories/4535)
|
||||||
|
|
||||||
👋 加入我们的[微信群](assets/wechat.jpg)。
|
👋 加入我们的[微信群](assets/wechat.jpg)。
|
||||||
|
|
||||||
@@ -23,7 +25,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
|
|||||||
|
|
||||||
选择你的打开方式:
|
选择你的打开方式:
|
||||||
|
|
||||||
- **Colab**:https://colab.research.google.com/drive/1eRTPn37ltBbYsISy9Aw2NuI2Aq5CQrD9?usp=sharing
|
- **Colab**:https://colab.research.google.com/drive/1d5KQtbemerlSDSxZIfAaWXhKr30QypiK?usp=sharing
|
||||||
- **本地机器**:请见[如何使用](#如何使用)
|
- **本地机器**:请见[如何使用](#如何使用)
|
||||||
|
|
||||||
## 目录
|
## 目录
|
||||||
@@ -43,17 +45,17 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
|
|||||||
|
|
||||||
## 项目特色
|
## 项目特色
|
||||||
|
|
||||||
- **多种模型**:LLaMA、Mistral、Mixtral-MoE、Qwen、Yi、Gemma、Baichuan、ChatGLM、Phi 等等。
|
- **多种模型**:LLaMA、LLaVA、Mistral、Mixtral-MoE、Qwen、Yi、Gemma、Baichuan、ChatGLM、Phi 等等。
|
||||||
- **集成方法**:(增量)预训练、指令监督微调、奖励模型训练、PPO 训练和 DPO 训练。
|
- **集成方法**:(增量)预训练、(多模态)指令监督微调、奖励模型训练、PPO 训练、DPO 训练和 ORPO 训练。
|
||||||
- **多种精度**:32 比特全参数微调、16 比特冻结微调、16 比特 LoRA 微调和基于 AQLM/AWQ/GPTQ/LLM.int8 的 2/4/8 比特 QLoRA 微调。
|
- **多种精度**:32 比特全参数微调、16 比特冻结微调、16 比特 LoRA 微调和基于 AQLM/AWQ/GPTQ/LLM.int8 的 2/4/8 比特 QLoRA 微调。
|
||||||
- **先进算法**:GaLore、DoRA、LongLoRA、LLaMA Pro、LoRA+、LoftQ 和 Agent 微调。
|
- **先进算法**:GaLore、BAdam、DoRA、LongLoRA、LLaMA Pro、Mixture-of-Depths、LoRA+、LoftQ 和 Agent 微调。
|
||||||
- **实用技巧**:FlashAttention-2、Unsloth、RoPE scaling、NEFTune 和 rsLoRA。
|
- **实用技巧**:FlashAttention-2、Unsloth、RoPE scaling、NEFTune 和 rsLoRA。
|
||||||
- **实验监控**:LlamaBoard、TensorBoard、Wandb、MLflow 等等。
|
- **实验监控**:LlamaBoard、TensorBoard、Wandb、MLflow 等等。
|
||||||
- **极速推理**:基于 vLLM 的 OpenAI 风格 API、浏览器界面和命令行接口。
|
- **极速推理**:基于 vLLM 的 OpenAI 风格 API、浏览器界面和命令行接口。
|
||||||
|
|
||||||
## 性能指标
|
## 性能指标
|
||||||
|
|
||||||
与 ChatGLM 官方的 [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ptuning) 微调相比,LLaMA-Factory 的 LoRA 微调提供了 **3.7 倍**的加速比,同时在广告文案生成任务上取得了更高的 Rouge 分数。结合 4 比特量化技术,LLaMA-Factory 的 QLoRA 微调进一步降低了 GPU 显存消耗。
|
与 ChatGLM 官方的 [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ptuning) 微调相比,LLaMA Factory 的 LoRA 微调提供了 **3.7 倍**的加速比,同时在广告文案生成任务上取得了更高的 Rouge 分数。结合 4 比特量化技术,LLaMA Factory 的 QLoRA 微调进一步降低了 GPU 显存消耗。
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
@@ -62,51 +64,67 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
|
|||||||
- **Training Speed**: 训练阶段每秒处理的样本数量。(批处理大小=4,截断长度=1024)
|
- **Training Speed**: 训练阶段每秒处理的样本数量。(批处理大小=4,截断长度=1024)
|
||||||
- **Rouge Score**: [广告文案生成](https://aclanthology.org/D19-1321.pdf)任务验证集上的 Rouge-2 分数。(批处理大小=4,截断长度=1024)
|
- **Rouge Score**: [广告文案生成](https://aclanthology.org/D19-1321.pdf)任务验证集上的 Rouge-2 分数。(批处理大小=4,截断长度=1024)
|
||||||
- **GPU Memory**: 4 比特量化训练的 GPU 显存峰值。(批处理大小=1,截断长度=1024)
|
- **GPU Memory**: 4 比特量化训练的 GPU 显存峰值。(批处理大小=1,截断长度=1024)
|
||||||
- 我们在 ChatGLM 的 P-Tuning 中采用 `pre_seq_len=128`,在 LLaMA-Factory 的 LoRA 微调中采用 `lora_rank=32`。
|
- 我们在 ChatGLM 的 P-Tuning 中采用 `pre_seq_len=128`,在 LLaMA Factory 的 LoRA 微调中采用 `lora_rank=32`。
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
## 更新日志
|
## 更新日志
|
||||||
|
|
||||||
[24/03/21] 我们的论文 "[LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models](https://arxiv.org/abs/2403.13372)" 可在 arXiv 上查看!
|
[24/05/14] 我们支持了昇腾 NPU 设备的训练和推理。详情请查阅[安装](#安装-llama-factory)部分。
|
||||||
|
|
||||||
[24/03/20] 我们支持了能在 2x24GB GPU 上微调 70B 模型的 **FSDP+QLoRA**。详细用法请参照 `examples/fsdp_qlora`。
|
[24/05/13] 我们支持了 Yi-1.5 系列模型的微调。
|
||||||
|
|
||||||
[24/03/13] 我们支持了 **[LoRA+](https://arxiv.org/abs/2402.12354)**。详细用法请参照 `examples/extras/loraplus`。
|
[24/04/26] 我们支持了多模态模型 **LLaVA-1.5** 的微调。详细用法请参照 [examples](examples/README_zh.md)。
|
||||||
|
|
||||||
[24/03/07] 我们支持了梯度低秩投影(**[GaLore](https://arxiv.org/abs/2403.03507)**)算法。详细用法请参照 `examples/extras/galore`。
|
|
||||||
|
|
||||||
<details><summary>展开日志</summary>
|
<details><summary>展开日志</summary>
|
||||||
|
|
||||||
[24/03/07] 我们集成了 **[vLLM](https://github.com/vllm-project/vllm)** 以实现极速并发推理。请使用 `--infer_backend vllm` 来获得 **270%** 的推理速度。(尚不支持 LoRA,请先合并权重。)
|
[24/04/22] 我们提供了在免费 T4 GPU 上微调 Llama-3 模型的 **[Colab 笔记本](https://colab.research.google.com/drive/1d5KQtbemerlSDSxZIfAaWXhKr30QypiK?usp=sharing)**。Hugging Face 社区公开了两个利用 LLaMA Factory 微调的 Llama-3 模型,详情请见 [Llama3-8B-Chinese-Chat](https://huggingface.co/shenzhi-wang/Llama3-8B-Chinese-Chat) 和 [Llama3-Chinese](https://huggingface.co/zhichen/Llama3-Chinese)。
|
||||||
|
|
||||||
[24/02/28] 我们支持了 **[DoRA](https://arxiv.org/abs/2402.09353)** 微调。请使用 `--use_dora` 参数进行 DoRA 微调。
|
[24/04/21] 我们基于 [AstraMindAI 的仓库](https://github.com/astramind-ai/Mixture-of-depths)支持了 **[混合深度训练](https://arxiv.org/abs/2404.02258)**。详细用法请参照 [examples](examples/README_zh.md)。
|
||||||
|
|
||||||
[24/02/15] 我们支持了 [LLaMA Pro](https://github.com/TencentARC/LLaMA-Pro) 提出的**块扩展**方法。详细用法请参照 `examples/extras/llama_pro`。
|
[24/04/16] 我们支持了 **[BAdam](https://arxiv.org/abs/2404.02827)**。详细用法请参照 [examples](examples/README_zh.md)。
|
||||||
|
|
||||||
|
[24/04/16] 我们支持了 **[unsloth](https://github.com/unslothai/unsloth)** 的长序列训练(24GB 可训练 Llama-2-7B-56k)。该方法相比 FlashAttention-2 提供了 **117%** 的训练速度和 **50%** 的显存节约。更多数据请见[此页面](https://github.com/hiyouga/LLaMA-Factory/wiki/Performance-comparison)。
|
||||||
|
|
||||||
|
[24/03/31] 我们支持了 **[ORPO](https://arxiv.org/abs/2403.07691)**。详细用法请参照 [examples](examples/README_zh.md)。
|
||||||
|
|
||||||
|
[24/03/21] 我们的论文 "[LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models](https://arxiv.org/abs/2403.13372)" 可在 arXiv 上查看!
|
||||||
|
|
||||||
|
[24/03/20] 我们支持了能在 2x24GB GPU 上微调 70B 模型的 **FSDP+QLoRA**。详细用法请参照 [examples](examples/README_zh.md)。
|
||||||
|
|
||||||
|
[24/03/13] 我们支持了 **[LoRA+](https://arxiv.org/abs/2402.12354)**。详细用法请参照 [examples](examples/README_zh.md)。
|
||||||
|
|
||||||
|
[24/03/07] 我们支持了梯度低秩投影(**[GaLore](https://arxiv.org/abs/2403.03507)**)算法。详细用法请参照 [examples](examples/README_zh.md)。
|
||||||
|
|
||||||
|
[24/03/07] 我们集成了 **[vLLM](https://github.com/vllm-project/vllm)** 以实现极速并发推理。请使用 `infer_backend: vllm` 来获得 **270%** 的推理速度。
|
||||||
|
|
||||||
|
[24/02/28] 我们支持了 **[DoRA](https://arxiv.org/abs/2402.09353)** 微调。请使用 `use_dora: true` 参数进行 DoRA 微调。
|
||||||
|
|
||||||
|
[24/02/15] 我们支持了 [LLaMA Pro](https://github.com/TencentARC/LLaMA-Pro) 提出的**块扩展**方法。详细用法请参照 [examples](examples/README_zh.md)。
|
||||||
|
|
||||||
[24/02/05] Qwen1.5(Qwen2 测试版)系列模型已在 LLaMA-Factory 中实现微调支持。详情请查阅该[博客页面](https://qwenlm.github.io/zh/blog/qwen1.5/)。
|
[24/02/05] Qwen1.5(Qwen2 测试版)系列模型已在 LLaMA-Factory 中实现微调支持。详情请查阅该[博客页面](https://qwenlm.github.io/zh/blog/qwen1.5/)。
|
||||||
|
|
||||||
[24/01/18] 我们针对绝大多数模型实现了 **Agent 微调**,微调时指定 `--dataset glaive_toolcall` 即可使模型获得工具调用能力。
|
[24/01/18] 我们针对绝大多数模型实现了 **Agent 微调**,微调时指定 `dataset: glaive_toolcall` 即可使模型获得工具调用能力。
|
||||||
|
|
||||||
[23/12/23] 我们针对 LLaMA, Mistral 和 Yi 模型支持了 **[unsloth](https://github.com/unslothai/unsloth)** 的 LoRA 训练加速。请使用 `--use_unsloth` 参数启用 unsloth 优化。该方法可提供 **170%** 的训练速度,详情请查阅[此页面](https://github.com/hiyouga/LLaMA-Factory/wiki/Performance-comparison)。
|
[23/12/23] 我们针对 LLaMA, Mistral 和 Yi 模型支持了 **[unsloth](https://github.com/unslothai/unsloth)** 的 LoRA 训练加速。请使用 `use_unsloth: true` 参数启用 unsloth 优化。该方法可提供 **170%** 的训练速度,详情请查阅[此页面](https://github.com/hiyouga/LLaMA-Factory/wiki/Performance-comparison)。
|
||||||
|
|
||||||
[23/12/12] 我们支持了微调最新的混合专家模型 **[Mixtral 8x7B](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1)**。硬件需求请查阅[此处](#硬件依赖)。
|
[23/12/12] 我们支持了微调最新的混合专家模型 **[Mixtral 8x7B](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1)**。硬件需求请查阅[此处](#硬件依赖)。
|
||||||
|
|
||||||
[23/12/01] 我们支持了从 **[魔搭社区](https://modelscope.cn/models)** 下载预训练模型和数据集。详细用法请参照 [此教程](#使用魔搭社区可跳过)。
|
[23/12/01] 我们支持了从 **[魔搭社区](https://modelscope.cn/models)** 下载预训练模型和数据集。详细用法请参照 [此教程](#从魔搭社区下载)。
|
||||||
|
|
||||||
[23/10/21] 我们支持了 **[NEFTune](https://arxiv.org/abs/2310.05914)** 训练技巧。请使用 `--neftune_noise_alpha` 参数启用 NEFTune,例如 `--neftune_noise_alpha 5`。
|
[23/10/21] 我们支持了 **[NEFTune](https://arxiv.org/abs/2310.05914)** 训练技巧。请使用 `neftune_noise_alpha: 5` 参数启用 NEFTune。
|
||||||
|
|
||||||
[23/09/27] 我们针对 LLaMA 模型支持了 [LongLoRA](https://github.com/dvlab-research/LongLoRA) 提出的 **$S^2$-Attn**。请使用 `--shift_attn` 参数以启用该功能。
|
[23/09/27] 我们针对 LLaMA 模型支持了 [LongLoRA](https://github.com/dvlab-research/LongLoRA) 提出的 **$S^2$-Attn**。请使用 `shift_attn: true` 参数以启用该功能。
|
||||||
|
|
||||||
[23/09/23] 我们在项目中集成了 MMLU、C-Eval 和 CMMLU 评估集。使用方法请参阅[此示例](#模型评估)。
|
[23/09/23] 我们在项目中集成了 MMLU、C-Eval 和 CMMLU 评估集。详细用法请参照 [examples](examples/README_zh.md)。
|
||||||
|
|
||||||
[23/09/10] 我们支持了 **[FlashAttention-2](https://github.com/Dao-AILab/flash-attention)**。如果您使用的是 RTX4090、A100 或 H100 GPU,请使用 `--flash_attn` 参数以启用 FlashAttention-2。
|
[23/09/10] 我们支持了 **[FlashAttention-2](https://github.com/Dao-AILab/flash-attention)**。如果您使用的是 RTX4090、A100 或 H100 GPU,请使用 `flash_attn: fa2` 参数以启用 FlashAttention-2。
|
||||||
|
|
||||||
[23/08/12] 我们支持了 **RoPE 插值**来扩展 LLaMA 模型的上下文长度。请使用 `--rope_scaling linear` 参数训练模型或使用 `--rope_scaling dynamic` 参数评估模型。
|
[23/08/12] 我们支持了 **RoPE 插值**来扩展 LLaMA 模型的上下文长度。请使用 `rope_scaling: linear` 参数训练模型或使用 `rope_scaling: dynamic` 参数评估模型。
|
||||||
|
|
||||||
[23/08/11] 我们支持了指令模型的 **[DPO 训练](https://arxiv.org/abs/2305.18290)**。使用方法请参阅[此示例](#dpo-训练)。
|
[23/08/11] 我们支持了指令模型的 **[DPO 训练](https://arxiv.org/abs/2305.18290)**。详细用法请参照 [examples](examples/README_zh.md)。
|
||||||
|
|
||||||
[23/07/31] 我们支持了**数据流式加载**。请使用 `--streaming` 和 `--max_steps 10000` 参数来流式加载数据集。
|
[23/07/31] 我们支持了**数据流式加载**。请使用 `streaming: true` 和 `max_steps: 10000` 参数来流式加载数据集。
|
||||||
|
|
||||||
[23/07/29] 我们在 Hugging Face 发布了两个 13B 指令微调模型。详细内容请查阅我们的 Hugging Face 项目([LLaMA-2](https://huggingface.co/hiyouga/Llama-2-Chinese-13b-chat) / [Baichuan](https://huggingface.co/hiyouga/Baichuan-13B-sft))。
|
[23/07/29] 我们在 Hugging Face 发布了两个 13B 指令微调模型。详细内容请查阅我们的 Hugging Face 项目([LLaMA-2](https://huggingface.co/hiyouga/Llama-2-Chinese-13b-chat) / [Baichuan](https://huggingface.co/hiyouga/Baichuan-13B-sft))。
|
||||||
|
|
||||||
@@ -118,39 +136,45 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
|
|||||||
|
|
||||||
[23/06/22] 我们对齐了[示例 API](src/api_demo.py) 与 [OpenAI API](https://platform.openai.com/docs/api-reference/chat) 的格式,您可以将微调模型接入**任意基于 ChatGPT 的应用**中。
|
[23/06/22] 我们对齐了[示例 API](src/api_demo.py) 与 [OpenAI API](https://platform.openai.com/docs/api-reference/chat) 的格式,您可以将微调模型接入**任意基于 ChatGPT 的应用**中。
|
||||||
|
|
||||||
[23/06/03] 我们实现了 4 比特的 LoRA 训练(也称 **[QLoRA](https://github.com/artidoro/qlora)**)。请使用 `--quantization_bit 4` 参数进行 4 比特量化微调。
|
[23/06/03] 我们实现了 4 比特的 LoRA 训练(也称 **[QLoRA](https://github.com/artidoro/qlora)**)。详细用法请参照 [examples](examples/README_zh.md)。
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
## 模型
|
## 模型
|
||||||
|
|
||||||
| 模型名 | 模型大小 | 默认模块 | Template |
|
| 模型名 | 模型大小 | 默认模块 | Template |
|
||||||
| -------------------------------------------------------- | --------------------------- | ----------------- | --------- |
|
| -------------------------------------------------------- | -------------------------------- | ----------------- | --------- |
|
||||||
| [Baichuan2](https://huggingface.co/baichuan-inc) | 7B/13B | W_pack | baichuan2 |
|
| [Baichuan2](https://huggingface.co/baichuan-inc) | 7B/13B | W_pack | baichuan2 |
|
||||||
| [BLOOM](https://huggingface.co/bigscience/bloom) | 560M/1.1B/1.7B/3B/7.1B/176B | query_key_value | - |
|
| [BLOOM](https://huggingface.co/bigscience) | 560M/1.1B/1.7B/3B/7.1B/176B | query_key_value | - |
|
||||||
| [BLOOMZ](https://huggingface.co/bigscience/bloomz) | 560M/1.1B/1.7B/3B/7.1B/176B | query_key_value | - |
|
| [BLOOMZ](https://huggingface.co/bigscience) | 560M/1.1B/1.7B/3B/7.1B/176B | query_key_value | - |
|
||||||
| [ChatGLM3](https://huggingface.co/THUDM/chatglm3-6b) | 6B | query_key_value | chatglm3 |
|
| [ChatGLM3](https://huggingface.co/THUDM) | 6B | query_key_value | chatglm3 |
|
||||||
| [DeepSeek (MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B | q_proj,v_proj | deepseek |
|
| [Command-R](https://huggingface.co/CohereForAI) | 35B/104B | q_proj,v_proj | cohere |
|
||||||
| [Falcon](https://huggingface.co/tiiuae) | 7B/40B/180B | query_key_value | falcon |
|
| [DeepSeek (MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | q_proj,v_proj | deepseek |
|
||||||
| [Gemma](https://huggingface.co/google) | 2B/7B | q_proj,v_proj | gemma |
|
| [Falcon](https://huggingface.co/tiiuae) | 7B/40B/180B | query_key_value | falcon |
|
||||||
| [InternLM2](https://huggingface.co/internlm) | 7B/20B | wqkv | intern2 |
|
| [Gemma/CodeGemma](https://huggingface.co/google) | 2B/7B | q_proj,v_proj | gemma |
|
||||||
| [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | q_proj,v_proj | - |
|
| [InternLM2](https://huggingface.co/internlm) | 7B/20B | wqkv | intern2 |
|
||||||
| [LLaMA-2](https://huggingface.co/meta-llama) | 7B/13B/70B | q_proj,v_proj | llama2 |
|
| [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | q_proj,v_proj | - |
|
||||||
| [Mistral](https://huggingface.co/mistralai) | 7B | q_proj,v_proj | mistral |
|
| [LLaMA-2](https://huggingface.co/meta-llama) | 7B/13B/70B | q_proj,v_proj | llama2 |
|
||||||
| [Mixtral](https://huggingface.co/mistralai) | 8x7B | q_proj,v_proj | mistral |
|
| [LLaMA-3](https://huggingface.co/meta-llama) | 8B/70B | q_proj,v_proj | llama3 |
|
||||||
| [OLMo](https://huggingface.co/allenai) | 1B/7B | att_proj | olmo |
|
| [LLaVA-1.5](https://huggingface.co/llava-hf) | 7B/13B | q_proj,v_proj | vicuna |
|
||||||
| [Phi-1.5/2](https://huggingface.co/microsoft) | 1.3B/2.7B | q_proj,v_proj | - |
|
| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | q_proj,v_proj | mistral |
|
||||||
| [Qwen](https://huggingface.co/Qwen) | 1.8B/7B/14B/72B | c_attn | qwen |
|
| [OLMo](https://huggingface.co/allenai) | 1B/7B | q_proj,v_proj | - |
|
||||||
| [Qwen1.5](https://huggingface.co/Qwen) | 0.5B/1.8B/4B/7B/14B/72B | q_proj,v_proj | qwen |
|
| [Phi-1.5/2](https://huggingface.co/microsoft) | 1.3B/2.7B | q_proj,v_proj | - |
|
||||||
| [StarCoder2](https://huggingface.co/bigcode) | 3B/7B/15B | q_proj,v_proj | - |
|
| [Phi-3](https://huggingface.co/microsoft) | 3.8B | qkv_proj | phi |
|
||||||
| [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | q_proj,v_proj | xverse |
|
| [Qwen](https://huggingface.co/Qwen) | 1.8B/7B/14B/72B | c_attn | qwen |
|
||||||
| [Yi](https://huggingface.co/01-ai) | 6B/9B/34B | q_proj,v_proj | yi |
|
| [Qwen1.5 (Code/MoE)](https://huggingface.co/Qwen) | 0.5B/1.8B/4B/7B/14B/32B/72B/110B | q_proj,v_proj | qwen |
|
||||||
| [Yuan](https://huggingface.co/IEITYuan) | 2B/51B/102B | q_proj,v_proj | yuan |
|
| [StarCoder2](https://huggingface.co/bigcode) | 3B/7B/15B | q_proj,v_proj | - |
|
||||||
|
| [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | q_proj,v_proj | xverse |
|
||||||
|
| [Yi (1/1.5)](https://huggingface.co/01-ai) | 6B/9B/34B | q_proj,v_proj | yi |
|
||||||
|
| [Yi-VL](https://huggingface.co/01-ai) | 6B/34B | q_proj,v_proj | yi_vl |
|
||||||
|
| [Yuan](https://huggingface.co/IEITYuan) | 2B/51B/102B | q_proj,v_proj | yuan |
|
||||||
|
|
||||||
> [!NOTE]
|
> [!NOTE]
|
||||||
> **默认模块**应作为 `--lora_target` 参数的默认值,可使用 `--lora_target all` 参数指定全部模块。
|
> **默认模块**应作为 `--lora_target` 参数的默认值,可使用 `--lora_target all` 参数指定全部模块以取得更好的效果。
|
||||||
>
|
>
|
||||||
> 对于所有“基座”(Base)模型,`--template` 参数可以是 `default`, `alpaca`, `vicuna` 等任意值。但“对话”(Chat)模型请务必使用**对应的模板**。
|
> 对于所有“基座”(Base)模型,`--template` 参数可以是 `default`, `alpaca`, `vicuna` 等任意值。但“对话”(Instruct/Chat)模型请务必使用**对应的模板**。
|
||||||
|
>
|
||||||
|
> 请务必在训练和推理时使用**完全一致**的模板。
|
||||||
|
|
||||||
项目所支持模型的完整列表请参阅 [constants.py](src/llmtuner/extras/constants.py)。
|
项目所支持模型的完整列表请参阅 [constants.py](src/llmtuner/extras/constants.py)。
|
||||||
|
|
||||||
@@ -165,9 +189,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
|
|||||||
| 奖励模型训练 | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
|
| 奖励模型训练 | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
|
||||||
| PPO 训练 | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
|
| PPO 训练 | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
|
||||||
| DPO 训练 | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
|
| DPO 训练 | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
|
||||||
|
| ORPO 训练 | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
|
||||||
> [!NOTE]
|
|
||||||
> 请使用 `--quantization_bit 4` 参数来启用 QLoRA 训练。
|
|
||||||
|
|
||||||
## 数据集
|
## 数据集
|
||||||
|
|
||||||
@@ -190,8 +212,8 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
|
|||||||
- [Stanford Alpaca (en)](https://github.com/tatsu-lab/stanford_alpaca)
|
- [Stanford Alpaca (en)](https://github.com/tatsu-lab/stanford_alpaca)
|
||||||
- [Stanford Alpaca (zh)](https://github.com/ymcui/Chinese-LLaMA-Alpaca)
|
- [Stanford Alpaca (zh)](https://github.com/ymcui/Chinese-LLaMA-Alpaca)
|
||||||
- [Alpaca GPT4 (en&zh)](https://github.com/Instruction-Tuning-with-GPT-4/GPT-4-LLM)
|
- [Alpaca GPT4 (en&zh)](https://github.com/Instruction-Tuning-with-GPT-4/GPT-4-LLM)
|
||||||
- [Self Cognition (zh)](data/self_cognition.json)
|
- [Identity (en&zh)](data/identity.json)
|
||||||
- [Open Assistant (multilingual)](https://huggingface.co/datasets/OpenAssistant/oasst1)
|
- [Open Assistant (zh)](https://huggingface.co/datasets/OpenAssistant/oasst1)
|
||||||
- [ShareGPT (zh)](https://huggingface.co/datasets/QingyiSi/Alpaca-CoT/tree/main/Chinese-instruction-collection)
|
- [ShareGPT (zh)](https://huggingface.co/datasets/QingyiSi/Alpaca-CoT/tree/main/Chinese-instruction-collection)
|
||||||
- [Guanaco Dataset (multilingual)](https://huggingface.co/datasets/JosephusCheung/GuanacoDataset)
|
- [Guanaco Dataset (multilingual)](https://huggingface.co/datasets/JosephusCheung/GuanacoDataset)
|
||||||
- [BELLE 2M (zh)](https://huggingface.co/datasets/BelleGroup/train_2M_CN)
|
- [BELLE 2M (zh)](https://huggingface.co/datasets/BelleGroup/train_2M_CN)
|
||||||
@@ -223,6 +245,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
|
|||||||
- [Evol Instruct V2 (en)](https://huggingface.co/datasets/WizardLM/WizardLM_evol_instruct_V2_196k)
|
- [Evol Instruct V2 (en)](https://huggingface.co/datasets/WizardLM/WizardLM_evol_instruct_V2_196k)
|
||||||
- [Glaive Function Calling V2 (en)](https://huggingface.co/datasets/glaiveai/glaive-function-calling-v2)
|
- [Glaive Function Calling V2 (en)](https://huggingface.co/datasets/glaiveai/glaive-function-calling-v2)
|
||||||
- [Cosmopedia (en)](https://huggingface.co/datasets/HuggingFaceTB/cosmopedia)
|
- [Cosmopedia (en)](https://huggingface.co/datasets/HuggingFaceTB/cosmopedia)
|
||||||
|
- [LLaVA mixed (en&zh)](https://huggingface.co/datasets/BUAADreamer/llava-en-zh-300k)
|
||||||
- [Open Assistant (de)](https://huggingface.co/datasets/mayflowergmbh/oasst_de)
|
- [Open Assistant (de)](https://huggingface.co/datasets/mayflowergmbh/oasst_de)
|
||||||
- [Dolly 15k (de)](https://huggingface.co/datasets/mayflowergmbh/dolly-15k_de)
|
- [Dolly 15k (de)](https://huggingface.co/datasets/mayflowergmbh/dolly-15k_de)
|
||||||
- [Alpaca GPT4 (de)](https://huggingface.co/datasets/mayflowergmbh/alpaca-gpt4_de)
|
- [Alpaca GPT4 (de)](https://huggingface.co/datasets/mayflowergmbh/alpaca-gpt4_de)
|
||||||
@@ -238,16 +261,15 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
|
|||||||
<details><summary>偏好数据集</summary>
|
<details><summary>偏好数据集</summary>
|
||||||
|
|
||||||
- [HH-RLHF (en)](https://huggingface.co/datasets/Anthropic/hh-rlhf)
|
- [HH-RLHF (en)](https://huggingface.co/datasets/Anthropic/hh-rlhf)
|
||||||
- [Open Assistant (multilingual)](https://huggingface.co/datasets/OpenAssistant/oasst1)
|
|
||||||
- [GPT-4 Generated Data (en&zh)](https://github.com/Instruction-Tuning-with-GPT-4/GPT-4-LLM)
|
- [GPT-4 Generated Data (en&zh)](https://github.com/Instruction-Tuning-with-GPT-4/GPT-4-LLM)
|
||||||
- [Orca DPO (en)](https://huggingface.co/datasets/Intel/orca_dpo_pairs)
|
- [Orca DPO (en)](https://huggingface.co/datasets/Intel/orca_dpo_pairs)
|
||||||
- [Nectar (en)](https://huggingface.co/datasets/berkeley-nest/Nectar)
|
- [Nectar (en)](https://huggingface.co/datasets/berkeley-nest/Nectar)
|
||||||
|
- [DPO mixed (en&zh)](https://huggingface.co/datasets/hiyouga/DPO-En-Zh-20k)
|
||||||
|
- [Open Assistant (zh)](https://huggingface.co/datasets/OpenAssistant/oasst1)
|
||||||
- [Orca DPO (de)](https://huggingface.co/datasets/mayflowergmbh/intel_orca_dpo_pairs_de)
|
- [Orca DPO (de)](https://huggingface.co/datasets/mayflowergmbh/intel_orca_dpo_pairs_de)
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
使用方法请参考 [data/README_zh.md](data/README_zh.md) 文件。
|
|
||||||
|
|
||||||
部分数据集的使用需要确认,我们推荐使用下述命令登录您的 Hugging Face 账户。
|
部分数据集的使用需要确认,我们推荐使用下述命令登录您的 Hugging Face 账户。
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@@ -261,53 +283,54 @@ huggingface-cli login
|
|||||||
| ------------ | ------- | --------- |
|
| ------------ | ------- | --------- |
|
||||||
| python | 3.8 | 3.10 |
|
| python | 3.8 | 3.10 |
|
||||||
| torch | 1.13.1 | 2.2.0 |
|
| torch | 1.13.1 | 2.2.0 |
|
||||||
| transformers | 4.37.2 | 4.39.1 |
|
| transformers | 4.37.2 | 4.40.1 |
|
||||||
| datasets | 2.14.3 | 2.17.1 |
|
| datasets | 2.14.3 | 2.19.1 |
|
||||||
| accelerate | 0.27.2 | 0.28.0 |
|
| accelerate | 0.27.2 | 0.30.0 |
|
||||||
| peft | 0.9.0 | 0.10.0 |
|
| peft | 0.9.0 | 0.10.0 |
|
||||||
| trl | 0.8.1 | 0.8.1 |
|
| trl | 0.8.1 | 0.8.6 |
|
||||||
|
|
||||||
| 可选项 | 至少 | 推荐 |
|
| 可选项 | 至少 | 推荐 |
|
||||||
| ------------ | ------- | --------- |
|
| ------------ | ------- | --------- |
|
||||||
| CUDA | 11.6 | 12.2 |
|
| CUDA | 11.6 | 12.2 |
|
||||||
| deepspeed | 0.10.0 | 0.14.0 |
|
| deepspeed | 0.10.0 | 0.14.0 |
|
||||||
| bitsandbytes | 0.39.0 | 0.43.0 |
|
| bitsandbytes | 0.39.0 | 0.43.1 |
|
||||||
| flash-attn | 2.3.0 | 2.5.6 |
|
| vllm | 0.4.0 | 0.4.2 |
|
||||||
|
| flash-attn | 2.3.0 | 2.5.8 |
|
||||||
|
|
||||||
### 硬件依赖
|
### 硬件依赖
|
||||||
|
|
||||||
\* *估算值*
|
\* *估算值*
|
||||||
|
|
||||||
| 训练方法 | 精度 | 7B | 13B | 30B | 70B | 8x7B |
|
| 方法 | 精度 | 7B | 13B | 30B | 70B | 110B | 8x7B | 8x22B |
|
||||||
| ------- | ---- | ----- | ----- | ----- | ------ | ------ |
|
| ----------------- | ---- | ----- | ----- | ----- | ------ | ------ | ----- | ------ |
|
||||||
| 全参数 | AMP | 120GB | 240GB | 600GB | 1200GB | 900GB |
|
| Full | AMP | 120GB | 240GB | 600GB | 1200GB | 2000GB | 900GB | 2400GB |
|
||||||
| 全参数 | 16 | 60GB | 120GB | 300GB | 600GB | 400GB |
|
| Full | 16 | 60GB | 120GB | 300GB | 600GB | 900GB | 400GB | 1200GB |
|
||||||
| GaLore | 16 | 16GB | 32GB | 64GB | 160GB | 120GB |
|
| Freeze | 16 | 20GB | 40GB | 80GB | 200GB | 360GB | 160GB | 400GB |
|
||||||
| 部分参数 | 16 | 20GB | 40GB | 80GB | 200GB | 160GB |
|
| LoRA/GaLore/BAdam | 16 | 16GB | 32GB | 64GB | 160GB | 240GB | 120GB | 320GB |
|
||||||
| LoRA | 16 | 16GB | 32GB | 64GB | 160GB | 120GB |
|
| QLoRA | 8 | 10GB | 20GB | 40GB | 80GB | 140GB | 60GB | 160GB |
|
||||||
| QLoRA | 8 | 10GB | 20GB | 40GB | 80GB | 60GB |
|
| QLoRA | 4 | 6GB | 12GB | 24GB | 48GB | 72GB | 30GB | 96GB |
|
||||||
| QLoRA | 4 | 6GB | 12GB | 24GB | 48GB | 30GB |
|
| QLoRA | 2 | 4GB | 8GB | 16GB | 24GB | 48GB | 18GB | 48GB |
|
||||||
| QLoRA | 2 | 4GB | 8GB | 16GB | 24GB | 18GB |
|
|
||||||
|
|
||||||
## 如何使用
|
## 如何使用
|
||||||
|
|
||||||
### 数据准备(可跳过)
|
### 安装 LLaMA Factory
|
||||||
|
|
||||||
关于数据集文件的格式,请参考 [data/README_zh.md](data/README_zh.md) 的内容。构建自定义数据集时,既可以使用单个 `.json` 文件,也可以使用一个[数据加载脚本](https://huggingface.co/docs/datasets/dataset_script)和多个文件。
|
> [!IMPORTANT]
|
||||||
|
> 此步骤为必需。
|
||||||
> [!NOTE]
|
|
||||||
> 使用自定义数据集时,请更新 `data/dataset_info.json` 文件,该文件的格式请参考 `data/README_zh.md`。
|
|
||||||
|
|
||||||
### 环境搭建(可跳过)
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
git clone https://github.com/hiyouga/LLaMA-Factory.git
|
git clone https://github.com/hiyouga/LLaMA-Factory.git
|
||||||
conda create -n llama_factory python=3.10
|
|
||||||
conda activate llama_factory
|
|
||||||
cd LLaMA-Factory
|
cd LLaMA-Factory
|
||||||
pip install -r requirements.txt
|
pip install -e .[torch,metrics]
|
||||||
```
|
```
|
||||||
|
|
||||||
|
可选的额外依赖项:torch、metrics、deepspeed、bitsandbytes、vllm、galore、badam、gptq、awq、aqlm、qwen、modelscope、quality
|
||||||
|
|
||||||
|
> [!TIP]
|
||||||
|
> 遇到包冲突时,可使用 `pip install --no-deps -e .` 解决。
|
||||||
|
|
||||||
|
<details><summary>Windows 用户指南</summary>
|
||||||
|
|
||||||
如果要在 Windows 平台上开启量化 LoRA(QLoRA),需要安装预编译的 `bitsandbytes` 库, 支持 CUDA 11.1 到 12.2, 请根据您的 CUDA 版本情况选择适合的[发布版本](https://github.com/jllllll/bitsandbytes-windows-webui/releases/tag/wheels)。
|
如果要在 Windows 平台上开启量化 LoRA(QLoRA),需要安装预编译的 `bitsandbytes` 库, 支持 CUDA 11.1 到 12.2, 请根据您的 CUDA 版本情况选择适合的[发布版本](https://github.com/jllllll/bitsandbytes-windows-webui/releases/tag/wheels)。
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@@ -316,356 +339,83 @@ pip install https://github.com/jllllll/bitsandbytes-windows-webui/releases/downl
|
|||||||
|
|
||||||
如果要在 Windows 平台上开启 FlashAttention-2,需要安装预编译的 `flash-attn` 库,支持 CUDA 12.1 到 12.2,请根据需求到 [flash-attention](https://github.com/bdashore3/flash-attention/releases) 下载对应版本安装。
|
如果要在 Windows 平台上开启 FlashAttention-2,需要安装预编译的 `flash-attn` 库,支持 CUDA 12.1 到 12.2,请根据需求到 [flash-attention](https://github.com/bdashore3/flash-attention/releases) 下载对应版本安装。
|
||||||
|
|
||||||
### 使用魔搭社区(可跳过)
|
</details>
|
||||||
|
|
||||||
如果您在 Hugging Face 模型和数据集的下载中遇到了问题,可以通过下述方法使用魔搭社区。
|
<details><summary>昇腾 NPU 用户指南</summary>
|
||||||
|
|
||||||
|
如果使用昇腾 NPU 设备进行(分布式)训练或推理,需要安装 **[torch-npu](https://gitee.com/ascend/pytorch)** 库和 **[Ascend CANN Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**。
|
||||||
|
|
||||||
|
| 依赖项 | 至少 | 推荐 |
|
||||||
|
| ------------ | ------- | --------- |
|
||||||
|
| CANN | 8.0.RC1 | 8.0.RC1 |
|
||||||
|
| torch | 2.2.0 | 2.2.0 |
|
||||||
|
| torch-npu | 2.2.0 | 2.2.0 |
|
||||||
|
| deepspeed | 0.13.2 | 0.13.2 |
|
||||||
|
|
||||||
|
Docker 镜像:
|
||||||
|
|
||||||
|
- 32GB:[下载地址](http://mirrors.cn-central-221.ovaijisuan.com/detail/130.html)
|
||||||
|
- 64GB:敬请期待
|
||||||
|
|
||||||
|
请记得使用 `ASCEND_RT_VISIBLE_DEVICES` 而非 `CUDA_VISIBLE_DEVICES` 来指定您使用的设备。
|
||||||
|
|
||||||
|
如果遇到无法正常推理的情况,请尝试设置 `do_sample: false`。
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
|
### 数据准备
|
||||||
|
|
||||||
|
关于数据集文件的格式,请参考 [data/README_zh.md](data/README_zh.md) 的内容。你可以使用 HuggingFace / ModelScope 上的数据集或加载本地数据集。
|
||||||
|
|
||||||
|
> [!NOTE]
|
||||||
|
> 使用自定义数据集时,请更新 `data/dataset_info.json` 文件。
|
||||||
|
|
||||||
|
### 快速开始
|
||||||
|
|
||||||
|
下面三行命令分别对 Llama3-8B-Instruct 模型进行 LoRA **微调**、**推理**和**合并**。
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
export USE_MODELSCOPE_HUB=1 # Windows 使用 `set USE_MODELSCOPE_HUB=1`
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
接着即可通过指定模型名称来训练对应的模型。(在[魔搭社区](https://modelscope.cn/models)查看所有可用的模型)
|
高级用法请参考 [examples/README_zh.md](examples/README_zh.md)(包括多 GPU 微调)。
|
||||||
|
|
||||||
```bash
|
> [!TIP]
|
||||||
CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
|
> 使用 `llamafactory-cli help` 显示帮助信息。
|
||||||
--model_name_or_path modelscope/Llama-2-7b-ms \
|
|
||||||
... # 参数同下
|
|
||||||
```
|
|
||||||
|
|
||||||
LLaMA Board 同样支持魔搭社区的模型和数据集下载。
|
### LLaMA Board 可视化微调(由 [Gradio](https://github.com/gradio-app/gradio) 驱动)
|
||||||
|
|
||||||
```bash
|
|
||||||
CUDA_VISIBLE_DEVICES=0 USE_MODELSCOPE_HUB=1 python src/train_web.py
|
|
||||||
```
|
|
||||||
|
|
||||||
### 单 GPU 训练
|
|
||||||
|
|
||||||
> [!IMPORTANT]
|
> [!IMPORTANT]
|
||||||
> 如果您使用多张 GPU 训练模型,请移步[多 GPU 分布式训练](#多-gpu-分布式训练)部分。
|
> LLaMA Board 可视化界面目前仅支持单 GPU 训练。
|
||||||
|
|
||||||
#### LLaMA Board GUI
|
#### 使用本地环境
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
CUDA_VISIBLE_DEVICES=0 python src/train_web.py
|
CUDA_VISIBLE_DEVICES=0 GRADIO_SHARE=1 llamafactory-cli webui
|
||||||
```
|
```
|
||||||
|
|
||||||
#### 预训练
|
<details><summary>阿里云 PAI 和 AutoDL 用户指南</summary>
|
||||||
|
|
||||||
|
如果您在阿里云 PAI 上使用 LLaMA Board 时遇到显示问题,请尝试在启动前使用以下命令设置环境变量:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
|
export GRADIO_SERVER_PORT=7860 GRADIO_ROOT_PATH=/${JUPYTER_NAME}/proxy/7860/
|
||||||
--stage pt \
|
|
||||||
--do_train \
|
|
||||||
--model_name_or_path path_to_llama_model \
|
|
||||||
--dataset wiki_demo \
|
|
||||||
--finetuning_type lora \
|
|
||||||
--lora_target q_proj,v_proj \
|
|
||||||
--output_dir path_to_pt_checkpoint \
|
|
||||||
--overwrite_cache \
|
|
||||||
--per_device_train_batch_size 4 \
|
|
||||||
--gradient_accumulation_steps 4 \
|
|
||||||
--lr_scheduler_type cosine \
|
|
||||||
--logging_steps 10 \
|
|
||||||
--save_steps 1000 \
|
|
||||||
--learning_rate 5e-5 \
|
|
||||||
--num_train_epochs 3.0 \
|
|
||||||
--plot_loss \
|
|
||||||
--fp16
|
|
||||||
```
|
```
|
||||||
|
|
||||||
#### 指令监督微调
|
如果您正在使用 AutoDL,请安装下述 Gradio 版本:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
|
pip install gradio==4.10.0
|
||||||
--stage sft \
|
|
||||||
--do_train \
|
|
||||||
--model_name_or_path path_to_llama_model \
|
|
||||||
--dataset alpaca_gpt4_zh \
|
|
||||||
--template default \
|
|
||||||
--finetuning_type lora \
|
|
||||||
--lora_target q_proj,v_proj \
|
|
||||||
--output_dir path_to_sft_checkpoint \
|
|
||||||
--overwrite_cache \
|
|
||||||
--per_device_train_batch_size 4 \
|
|
||||||
--gradient_accumulation_steps 4 \
|
|
||||||
--lr_scheduler_type cosine \
|
|
||||||
--logging_steps 10 \
|
|
||||||
--save_steps 1000 \
|
|
||||||
--learning_rate 5e-5 \
|
|
||||||
--num_train_epochs 3.0 \
|
|
||||||
--plot_loss \
|
|
||||||
--fp16
|
|
||||||
```
|
|
||||||
|
|
||||||
#### 奖励模型训练
|
|
||||||
|
|
||||||
```bash
|
|
||||||
CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
|
|
||||||
--stage rm \
|
|
||||||
--do_train \
|
|
||||||
--model_name_or_path path_to_llama_model \
|
|
||||||
--adapter_name_or_path path_to_sft_checkpoint \
|
|
||||||
--create_new_adapter \
|
|
||||||
--dataset comparison_gpt4_zh \
|
|
||||||
--template default \
|
|
||||||
--finetuning_type lora \
|
|
||||||
--lora_target q_proj,v_proj \
|
|
||||||
--output_dir path_to_rm_checkpoint \
|
|
||||||
--per_device_train_batch_size 2 \
|
|
||||||
--gradient_accumulation_steps 4 \
|
|
||||||
--lr_scheduler_type cosine \
|
|
||||||
--logging_steps 10 \
|
|
||||||
--save_steps 1000 \
|
|
||||||
--learning_rate 1e-5 \
|
|
||||||
--num_train_epochs 1.0 \
|
|
||||||
--plot_loss \
|
|
||||||
--fp16
|
|
||||||
```
|
|
||||||
|
|
||||||
#### PPO 训练
|
|
||||||
|
|
||||||
```bash
|
|
||||||
CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
|
|
||||||
--stage ppo \
|
|
||||||
--do_train \
|
|
||||||
--model_name_or_path path_to_llama_model \
|
|
||||||
--adapter_name_or_path path_to_sft_checkpoint \
|
|
||||||
--create_new_adapter \
|
|
||||||
--dataset alpaca_gpt4_zh \
|
|
||||||
--template default \
|
|
||||||
--finetuning_type lora \
|
|
||||||
--lora_target q_proj,v_proj \
|
|
||||||
--reward_model path_to_rm_checkpoint \
|
|
||||||
--output_dir path_to_ppo_checkpoint \
|
|
||||||
--per_device_train_batch_size 2 \
|
|
||||||
--gradient_accumulation_steps 4 \
|
|
||||||
--lr_scheduler_type cosine \
|
|
||||||
--top_k 0 \
|
|
||||||
--top_p 0.9 \
|
|
||||||
--logging_steps 10 \
|
|
||||||
--save_steps 1000 \
|
|
||||||
--learning_rate 1e-5 \
|
|
||||||
--num_train_epochs 1.0 \
|
|
||||||
--plot_loss \
|
|
||||||
--fp16
|
|
||||||
```
|
|
||||||
|
|
||||||
> [!TIP]
|
|
||||||
> 如果开启了 `--create_new_adapter`,则使用 `--adapter_name_or_path path_to_sft_checkpoint,path_to_ppo_checkpoint` 来进行微调模型的推理。
|
|
||||||
|
|
||||||
> [!WARNING]
|
|
||||||
> 如果使用 fp16 精度进行 LLaMA-2 模型的 PPO 训练,请使用 `--per_device_train_batch_size=1`。
|
|
||||||
|
|
||||||
#### DPO 训练
|
|
||||||
|
|
||||||
```bash
|
|
||||||
CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
|
|
||||||
--stage dpo \
|
|
||||||
--do_train \
|
|
||||||
--model_name_or_path path_to_llama_model \
|
|
||||||
--adapter_name_or_path path_to_sft_checkpoint \
|
|
||||||
--create_new_adapter \
|
|
||||||
--dataset comparison_gpt4_zh \
|
|
||||||
--template default \
|
|
||||||
--finetuning_type lora \
|
|
||||||
--lora_target q_proj,v_proj \
|
|
||||||
--output_dir path_to_dpo_checkpoint \
|
|
||||||
--per_device_train_batch_size 2 \
|
|
||||||
--gradient_accumulation_steps 4 \
|
|
||||||
--lr_scheduler_type cosine \
|
|
||||||
--logging_steps 10 \
|
|
||||||
--save_steps 1000 \
|
|
||||||
--learning_rate 1e-5 \
|
|
||||||
--num_train_epochs 1.0 \
|
|
||||||
--plot_loss \
|
|
||||||
--fp16
|
|
||||||
```
|
|
||||||
|
|
||||||
> [!TIP]
|
|
||||||
> 如果开启了 `--create_new_adapter`,则使用 `--adapter_name_or_path path_to_sft_checkpoint,path_to_dpo_checkpoint` 来进行微调模型的推理。
|
|
||||||
|
|
||||||
### 多 GPU 分布式训练
|
|
||||||
|
|
||||||
#### 使用 Huggingface Accelerate
|
|
||||||
|
|
||||||
```bash
|
|
||||||
accelerate launch --config_file config.yaml src/train_bash.py \
|
|
||||||
--ddp_timeout 180000000 \
|
|
||||||
... # 参数同上
|
|
||||||
```
|
|
||||||
|
|
||||||
<details><summary>使用 Accelerate 进行 LoRA 训练的 config.yaml 示例</summary>
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
compute_environment: LOCAL_MACHINE
|
|
||||||
debug: false
|
|
||||||
distributed_type: MULTI_GPU
|
|
||||||
downcast_bf16: 'no'
|
|
||||||
gpu_ids: all
|
|
||||||
machine_rank: 0
|
|
||||||
main_training_function: main
|
|
||||||
mixed_precision: fp16
|
|
||||||
num_machines: 1
|
|
||||||
num_processes: 4
|
|
||||||
rdzv_backend: static
|
|
||||||
same_network: true
|
|
||||||
tpu_env: []
|
|
||||||
tpu_use_cluster: false
|
|
||||||
tpu_use_sudo: false
|
|
||||||
use_cpu: false
|
|
||||||
```
|
```
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
> [!TIP]
|
|
||||||
> 我们推荐使用 Accelerate 进行 LoRA 训练。
|
|
||||||
|
|
||||||
#### 使用 DeepSpeed
|
|
||||||
|
|
||||||
```bash
|
|
||||||
deepspeed --num_gpus 8 src/train_bash.py \
|
|
||||||
--deepspeed ds_config.json \
|
|
||||||
--ddp_timeout 180000000 \
|
|
||||||
... # 参数同上
|
|
||||||
```
|
|
||||||
|
|
||||||
<details><summary>使用 DeepSpeed ZeRO-2 进行全参数训练的 ds_config.json 示例</summary>
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"train_batch_size": "auto",
|
|
||||||
"train_micro_batch_size_per_gpu": "auto",
|
|
||||||
"gradient_accumulation_steps": "auto",
|
|
||||||
"gradient_clipping": "auto",
|
|
||||||
"zero_allow_untested_optimizer": true,
|
|
||||||
"fp16": {
|
|
||||||
"enabled": "auto",
|
|
||||||
"loss_scale": 0,
|
|
||||||
"loss_scale_window": 1000,
|
|
||||||
"initial_scale_power": 16,
|
|
||||||
"hysteresis": 2,
|
|
||||||
"min_loss_scale": 1
|
|
||||||
},
|
|
||||||
"bf16": {
|
|
||||||
"enabled": "auto"
|
|
||||||
},
|
|
||||||
"zero_optimization": {
|
|
||||||
"stage": 2,
|
|
||||||
"allgather_partitions": true,
|
|
||||||
"allgather_bucket_size": 5e8,
|
|
||||||
"overlap_comm": true,
|
|
||||||
"reduce_scatter": true,
|
|
||||||
"reduce_bucket_size": 5e8,
|
|
||||||
"contiguous_gradients": true,
|
|
||||||
"round_robin_gradients": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
</details>
|
|
||||||
|
|
||||||
> [!TIP]
|
|
||||||
> 更多训练脚本请查看 [examples](examples)。
|
|
||||||
|
|
||||||
### 合并 LoRA 权重并导出模型
|
|
||||||
|
|
||||||
```bash
|
|
||||||
CUDA_VISIBLE_DEVICES= python src/export_model.py \
|
|
||||||
--model_name_or_path path_to_llama_model \
|
|
||||||
--adapter_name_or_path path_to_checkpoint \
|
|
||||||
--template default \
|
|
||||||
--finetuning_type lora \
|
|
||||||
--export_dir path_to_export \
|
|
||||||
--export_size 2 \
|
|
||||||
--export_legacy_format False
|
|
||||||
```
|
|
||||||
|
|
||||||
> [!WARNING]
|
|
||||||
> 尚不支持量化模型的 LoRA 权重合并及导出。
|
|
||||||
|
|
||||||
> [!TIP]
|
|
||||||
> 仅使用 `--model_name_or_path path_to_export` 来加载导出后的模型。
|
|
||||||
>
|
|
||||||
> 合并 LoRA 权重之后可再次使用 `CUDA_VISIBLE_DEVICES=0`、`--export_quantization_bit 4` 和 `--export_quantization_dataset data/c4_demo.json` 基于 AutoGPTQ 量化模型。
|
|
||||||
|
|
||||||
### 使用 OpenAI 风格 API 推理
|
|
||||||
|
|
||||||
```bash
|
|
||||||
CUDA_VISIBLE_DEVICES=0 API_PORT=8000 python src/api_demo.py \
|
|
||||||
--model_name_or_path path_to_llama_model \
|
|
||||||
--adapter_name_or_path path_to_checkpoint \
|
|
||||||
--template default \
|
|
||||||
--finetuning_type lora
|
|
||||||
```
|
|
||||||
|
|
||||||
> [!TIP]
|
|
||||||
> 关于 API 文档请见 `http://localhost:8000/docs`。
|
|
||||||
|
|
||||||
### 使用命令行推理
|
|
||||||
|
|
||||||
```bash
|
|
||||||
CUDA_VISIBLE_DEVICES=0 python src/cli_demo.py \
|
|
||||||
--model_name_or_path path_to_llama_model \
|
|
||||||
--adapter_name_or_path path_to_checkpoint \
|
|
||||||
--template default \
|
|
||||||
--finetuning_type lora
|
|
||||||
```
|
|
||||||
|
|
||||||
### 使用浏览器推理
|
|
||||||
|
|
||||||
```bash
|
|
||||||
CUDA_VISIBLE_DEVICES=0 python src/web_demo.py \
|
|
||||||
--model_name_or_path path_to_llama_model \
|
|
||||||
--adapter_name_or_path path_to_checkpoint \
|
|
||||||
--template default \
|
|
||||||
--finetuning_type lora
|
|
||||||
```
|
|
||||||
|
|
||||||
### 模型评估
|
|
||||||
|
|
||||||
```bash
|
|
||||||
CUDA_VISIBLE_DEVICES=0 python src/evaluate.py \
|
|
||||||
--model_name_or_path path_to_llama_model \
|
|
||||||
--adapter_name_or_path path_to_checkpoint \
|
|
||||||
--template vanilla \
|
|
||||||
--finetuning_type lora \
|
|
||||||
--task ceval \
|
|
||||||
--split validation \
|
|
||||||
--lang zh \
|
|
||||||
--n_shot 5 \
|
|
||||||
--batch_size 4
|
|
||||||
```
|
|
||||||
|
|
||||||
### 模型预测
|
|
||||||
|
|
||||||
```bash
|
|
||||||
CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
|
|
||||||
--stage sft \
|
|
||||||
--do_predict \
|
|
||||||
--model_name_or_path path_to_llama_model \
|
|
||||||
--adapter_name_or_path path_to_checkpoint \
|
|
||||||
--dataset alpaca_gpt4_zh \
|
|
||||||
--template default \
|
|
||||||
--finetuning_type lora \
|
|
||||||
--output_dir path_to_predict_result \
|
|
||||||
--per_device_eval_batch_size 1 \
|
|
||||||
--max_samples 100 \
|
|
||||||
--predict_with_generate \
|
|
||||||
--fp16
|
|
||||||
```
|
|
||||||
|
|
||||||
> [!WARNING]
|
|
||||||
> 如果使用 fp16 精度进行 LLaMA-2 模型的预测,请使用 `--per_device_eval_batch_size=1`。
|
|
||||||
|
|
||||||
> [!TIP]
|
|
||||||
> 我们建议在量化模型的预测中使用 `--per_device_eval_batch_size=1` 和 `--max_target_length 128`。
|
|
||||||
|
|
||||||
### 使用容器
|
|
||||||
|
|
||||||
#### 使用 Docker
|
#### 使用 Docker
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker build -f ./Dockerfile -t llama-factory:latest .
|
docker build -f ./Dockerfile -t llama-factory:latest .
|
||||||
|
|
||||||
docker run --gpus=all \
|
docker run --gpus=all \
|
||||||
-v ./hf_cache:/root/.cache/huggingface/ \
|
-v ./hf_cache:/root/.cache/huggingface/ \
|
||||||
-v ./data:/app/data \
|
-v ./data:/app/data \
|
||||||
@@ -683,14 +433,36 @@ docker run --gpus=all \
|
|||||||
docker compose -f ./docker-compose.yml up -d
|
docker compose -f ./docker-compose.yml up -d
|
||||||
```
|
```
|
||||||
|
|
||||||
> [!TIP]
|
<details><summary>数据卷详情</summary>
|
||||||
> 数据卷详情:
|
|
||||||
> * hf_cache:使用宿主机的 Hugging Face 缓存文件夹,允许更改为新的目录。
|
- hf_cache:使用宿主机的 Hugging Face 缓存文件夹,允许更改为新的目录。
|
||||||
> * data:宿主机中存放数据集的文件夹路径。
|
- data:宿主机中存放数据集的文件夹路径。
|
||||||
> * output:将导出目录设置为该路径后,即可在宿主机中访问导出后的模型。
|
- output:将导出目录设置为该路径后,即可在宿主机中访问导出后的模型。
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
|
### 利用 vLLM 部署 OpenAI API
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0,1 API_PORT=8000 llamafactory-cli api examples/inference/llama3_vllm.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
### 从魔搭社区下载
|
||||||
|
|
||||||
|
如果您在 Hugging Face 模型和数据集的下载中遇到了问题,可以通过下述方法使用魔搭社区。
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export USE_MODELSCOPE_HUB=1 # Windows 使用 `set USE_MODELSCOPE_HUB=1`
|
||||||
|
```
|
||||||
|
|
||||||
|
将 `--model_name_or_path` 设置为模型 ID 来加载对应的模型。在[魔搭社区](https://modelscope.cn/models)查看所有可用的模型,例如 `LLM-Research/Meta-Llama-3-8B-Instruct`。
|
||||||
|
|
||||||
## 使用了 LLaMA Factory 的项目
|
## 使用了 LLaMA Factory 的项目
|
||||||
|
|
||||||
|
如果您有项目希望添加至下述列表,请通过邮件联系或者创建一个 PR。
|
||||||
|
|
||||||
|
<details><summary>点击显示</summary>
|
||||||
|
|
||||||
1. Wang et al. ESRL: Efficient Sampling-based Reinforcement Learning for Sequence Generation. 2023. [[arxiv]](https://arxiv.org/abs/2308.02223)
|
1. Wang et al. ESRL: Efficient Sampling-based Reinforcement Learning for Sequence Generation. 2023. [[arxiv]](https://arxiv.org/abs/2308.02223)
|
||||||
1. Yu et al. Open, Closed, or Small Language Models for Text Classification? 2023. [[arxiv]](https://arxiv.org/abs/2308.10092)
|
1. Yu et al. Open, Closed, or Small Language Models for Text Classification? 2023. [[arxiv]](https://arxiv.org/abs/2308.10092)
|
||||||
1. Wang et al. UbiPhysio: Support Daily Functioning, Fitness, and Rehabilitation with Action Understanding and Feedback in Natural Language. 2023. [[arxiv]](https://arxiv.org/abs/2308.10526)
|
1. Wang et al. UbiPhysio: Support Daily Functioning, Fitness, and Rehabilitation with Action Understanding and Feedback in Natural Language. 2023. [[arxiv]](https://arxiv.org/abs/2308.10526)
|
||||||
@@ -712,21 +484,37 @@ docker compose -f ./docker-compose.yml up -d
|
|||||||
1. Huang et al. Key-Point-Driven Data Synthesis with its Enhancement on Mathematical Reasoning. 2024. [[arxiv]](https://arxiv.org/abs/2403.02333)
|
1. Huang et al. Key-Point-Driven Data Synthesis with its Enhancement on Mathematical Reasoning. 2024. [[arxiv]](https://arxiv.org/abs/2403.02333)
|
||||||
1. Duan et al. Negating Negatives: Alignment without Human Positive Samples via Distributional Dispreference Optimization. 2024. [[arxiv]](https://arxiv.org/abs/2403.03419)
|
1. Duan et al. Negating Negatives: Alignment without Human Positive Samples via Distributional Dispreference Optimization. 2024. [[arxiv]](https://arxiv.org/abs/2403.03419)
|
||||||
1. Xie and Schwertfeger. Empowering Robotics with Large Language Models: osmAG Map Comprehension with LLMs. 2024. [[arxiv]](https://arxiv.org/abs/2403.08228)
|
1. Xie and Schwertfeger. Empowering Robotics with Large Language Models: osmAG Map Comprehension with LLMs. 2024. [[arxiv]](https://arxiv.org/abs/2403.08228)
|
||||||
|
1. Wu et al. Large Language Models are Parallel Multilingual Learners. 2024. [[arxiv]](https://arxiv.org/abs/2403.09073)
|
||||||
|
1. Zhang et al. EDT: Improving Large Language Models' Generation by Entropy-based Dynamic Temperature Sampling. 2024. [[arxiv]](https://arxiv.org/abs/2403.14541)
|
||||||
|
1. Weller et al. FollowIR: Evaluating and Teaching Information Retrieval Models to Follow Instructions. 2024. [[arxiv]](https://arxiv.org/abs/2403.15246)
|
||||||
1. Hongbin Na. CBT-LLM: A Chinese Large Language Model for Cognitive Behavioral Therapy-based Mental Health Question Answering. 2024. [[arxiv]](https://arxiv.org/abs/2403.16008)
|
1. Hongbin Na. CBT-LLM: A Chinese Large Language Model for Cognitive Behavioral Therapy-based Mental Health Question Answering. 2024. [[arxiv]](https://arxiv.org/abs/2403.16008)
|
||||||
|
1. Zan et al. CodeS: Natural Language to Code Repository via Multi-Layer Sketch. 2024. [[arxiv]](https://arxiv.org/abs/2403.16443)
|
||||||
|
1. Liu et al. Extensive Self-Contrast Enables Feedback-Free Language Model Alignment. 2024. [[arxiv]](https://arxiv.org/abs/2404.00604)
|
||||||
|
1. Luo et al. BAdam: A Memory Efficient Full Parameter Training Method for Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.02827)
|
||||||
|
1. Du et al. Chinese Tiny LLM: Pretraining a Chinese-Centric Large Language Model. 2024. [[arxiv]](https://arxiv.org/abs/2404.04167)
|
||||||
|
1. Ma et al. Parameter Efficient Quasi-Orthogonal Fine-Tuning via Givens Rotation. 2024. [[arxiv]](https://arxiv.org/abs/2404.04316)
|
||||||
|
1. Liu et al. Dynamic Generation of Personalities with Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.07084)
|
||||||
|
1. Shang et al. How Far Have We Gone in Stripped Binary Code Understanding Using Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.09836)
|
||||||
|
1. Huang et al. LLMTune: Accelerate Database Knob Tuning with Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.11581)
|
||||||
|
1. Deng et al. Text-Tuple-Table: Towards Information Integration in Text-to-Table Generation via Global Tuple Extraction. 2024. [[arxiv]](https://arxiv.org/abs/2404.14215)
|
||||||
|
1. Acikgoz et al. Hippocrates: An Open-Source Framework for Advancing Large Language Models in Healthcare. 2024. [[arxiv]](https://arxiv.org/abs/2404.16621)
|
||||||
|
1. Zhang et al. Small Language Models Need Strong Verifiers to Self-Correct Reasoning. 2024. [[arxiv]](https://arxiv.org/abs/2404.17140)
|
||||||
|
1. Zhou et al. FREB-TQA: A Fine-Grained Robustness Evaluation Benchmark for Table Question Answering. 2024. [[arxiv]](https://arxiv.org/abs/2404.18585)
|
||||||
1. **[StarWhisper](https://github.com/Yu-Yang-Li/StarWhisper)**: 天文大模型 StarWhisper,基于 ChatGLM2-6B 和 Qwen-14B 在天文数据上微调而得。
|
1. **[StarWhisper](https://github.com/Yu-Yang-Li/StarWhisper)**: 天文大模型 StarWhisper,基于 ChatGLM2-6B 和 Qwen-14B 在天文数据上微调而得。
|
||||||
1. **[DISC-LawLLM](https://github.com/FudanDISC/DISC-LawLLM)**: 中文法律领域大模型 DISC-LawLLM,基于 Baichuan-13B 微调而得,具有法律推理和知识检索能力。
|
1. **[DISC-LawLLM](https://github.com/FudanDISC/DISC-LawLLM)**: 中文法律领域大模型 DISC-LawLLM,基于 Baichuan-13B 微调而得,具有法律推理和知识检索能力。
|
||||||
1. **[Sunsimiao](https://github.com/thomas-yanxin/Sunsimiao)**: 孙思邈中文医疗大模型 Sumsimiao,基于 Baichuan-7B 和 ChatGLM-6B 在中文医疗数据上微调而得。
|
1. **[Sunsimiao](https://github.com/thomas-yanxin/Sunsimiao)**: 孙思邈中文医疗大模型 Sumsimiao,基于 Baichuan-7B 和 ChatGLM-6B 在中文医疗数据上微调而得。
|
||||||
1. **[CareGPT](https://github.com/WangRongsheng/CareGPT)**: 医疗大模型项目 CareGPT,基于 LLaMA2-7B 和 Baichuan-13B 在中文医疗数据上微调而得。
|
1. **[CareGPT](https://github.com/WangRongsheng/CareGPT)**: 医疗大模型项目 CareGPT,基于 LLaMA2-7B 和 Baichuan-13B 在中文医疗数据上微调而得。
|
||||||
1. **[MachineMindset](https://github.com/PKU-YuanGroup/Machine-Mindset/)**:MBTI性格大模型项目,根据数据集与训练方式让任意 LLM 拥有 16 个不同的性格类型。
|
1. **[MachineMindset](https://github.com/PKU-YuanGroup/Machine-Mindset/)**:MBTI性格大模型项目,根据数据集与训练方式让任意 LLM 拥有 16 个不同的性格类型。
|
||||||
|
1. **[Luminia-13B-v3](https://huggingface.co/Nekochu/Luminia-13B-v3)**:一个用于生成 Stable Diffusion 提示词的大型语言模型。[[🤗Demo]](https://huggingface.co/spaces/Nekochu/Luminia-13B_SD_Prompt)
|
||||||
|
1. **[Chinese-LLaVA-Med](https://github.com/BUAADreamer/Chinese-LLaVA-Med)**:中文多模态医学大模型,基于 LLaVA-1.5-7B 在中文多模态医疗数据上微调而得。
|
||||||
|
|
||||||
> [!TIP]
|
</details>
|
||||||
> 如果您有项目希望添加至上述列表,请通过邮件联系或者创建一个 PR。
|
|
||||||
|
|
||||||
## 协议
|
## 协议
|
||||||
|
|
||||||
本仓库的代码依照 [Apache-2.0](LICENSE) 协议开源。
|
本仓库的代码依照 [Apache-2.0](LICENSE) 协议开源。
|
||||||
|
|
||||||
使用模型权重时,请遵循对应的模型协议:[Baichuan2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [InternLM2](https://github.com/InternLM/InternLM#license) / [LLaMA](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [LLaMA-2](https://ai.meta.com/llama/license/) / [Mistral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [StarCoder2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yuan](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan)
|
使用模型权重时,请遵循对应的模型协议:[Baichuan2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command-R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [InternLM2](https://github.com/InternLM/InternLM#license) / [LLaMA](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [LLaMA-2 (LLaVA-1.5)](https://ai.meta.com/llama/license/) / [LLaMA-3](https://llama.meta.com/llama3/license/) / [Mistral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [StarCoder2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yi-1.5](LICENSE) / [Yuan](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan)
|
||||||
|
|
||||||
## 引用
|
## 引用
|
||||||
|
|
||||||
@@ -744,7 +532,7 @@ docker compose -f ./docker-compose.yml up -d
|
|||||||
|
|
||||||
## 致谢
|
## 致谢
|
||||||
|
|
||||||
本项目受益于 [PEFT](https://github.com/huggingface/peft)、[QLoRA](https://github.com/artidoro/qlora) 和 [FastChat](https://github.com/lm-sys/FastChat),感谢以上诸位作者的付出。
|
本项目受益于 [PEFT](https://github.com/huggingface/peft)、[TRL](https://github.com/huggingface/trl)、[QLoRA](https://github.com/artidoro/qlora) 和 [FastChat](https://github.com/lm-sys/FastChat),感谢以上诸位作者的付出。
|
||||||
|
|
||||||
## Star History
|
## Star History
|
||||||
|
|
||||||
|
|||||||
120
data/README.md
120
data/README.md
@@ -1,4 +1,4 @@
|
|||||||
If you are using a custom dataset, please provide your dataset definition in the following format in `dataset_info.json`.
|
If you are using a custom dataset, please add your **dataset description** to `dataset_info.json` according to the following format. We also provide several examples in the next section.
|
||||||
|
|
||||||
```json
|
```json
|
||||||
"dataset_name": {
|
"dataset_name": {
|
||||||
@@ -18,7 +18,8 @@ If you are using a custom dataset, please provide your dataset definition in the
|
|||||||
"history": "the column name in the dataset containing the histories. (default: None)",
|
"history": "the column name in the dataset containing the histories. (default: None)",
|
||||||
"messages": "the column name in the dataset containing the messages. (default: conversations)",
|
"messages": "the column name in the dataset containing the messages. (default: conversations)",
|
||||||
"system": "the column name in the dataset containing the system prompts. (default: None)",
|
"system": "the column name in the dataset containing the system prompts. (default: None)",
|
||||||
"tools": "the column name in the dataset containing the tool description. (default: None)"
|
"tools": "the column name in the dataset containing the tool description. (default: None)",
|
||||||
|
"images": "the column name in the dataset containing the image inputs. (default: None)"
|
||||||
},
|
},
|
||||||
"tags (optional, used for the sharegpt format)": {
|
"tags (optional, used for the sharegpt format)": {
|
||||||
"role_tag": "the key in the message represents the identity. (default: from)",
|
"role_tag": "the key in the message represents the identity. (default: from)",
|
||||||
@@ -32,7 +33,9 @@ If you are using a custom dataset, please provide your dataset definition in the
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
Given above, you can use the custom dataset via specifying `--dataset dataset_name`.
|
After that, you can load the custom dataset by specifying `--dataset dataset_name`.
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
Currently we support dataset in **alpaca** or **sharegpt** format, the dataset in alpaca format should follow the below format:
|
Currently we support dataset in **alpaca** or **sharegpt** format, the dataset in alpaca format should follow the below format:
|
||||||
|
|
||||||
@@ -51,10 +54,11 @@ Currently we support dataset in **alpaca** or **sharegpt** format, the dataset i
|
|||||||
]
|
]
|
||||||
```
|
```
|
||||||
|
|
||||||
Regarding the above dataset, the `columns` in `dataset_info.json` should be:
|
Regarding the above dataset, the description in `dataset_info.json` should be:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
"dataset_name": {
|
"dataset_name": {
|
||||||
|
"file_name": "data.json",
|
||||||
"columns": {
|
"columns": {
|
||||||
"prompt": "instruction",
|
"prompt": "instruction",
|
||||||
"query": "input",
|
"query": "input",
|
||||||
@@ -67,24 +71,60 @@ Regarding the above dataset, the `columns` in `dataset_info.json` should be:
|
|||||||
|
|
||||||
The `query` column will be concatenated with the `prompt` column and used as the user prompt, then the user prompt would be `prompt\nquery`. The `response` column represents the model response.
|
The `query` column will be concatenated with the `prompt` column and used as the user prompt, then the user prompt would be `prompt\nquery`. The `response` column represents the model response.
|
||||||
|
|
||||||
The `system` column will be used as the system prompt. The `history` column is a list consisting string tuples representing prompt-response pairs in the history. Note that the responses in the history **will also be used for training**.
|
The `system` column will be used as the system prompt. The `history` column is a list consisting string tuples representing prompt-response pairs in the history. Note that the responses in the history **will also be used for training** in supervised fine-tuning.
|
||||||
|
|
||||||
For the pre-training datasets, only the `prompt` column will be used for training.
|
For the **pre-training datasets**, only the `prompt` column will be used for training, for example:
|
||||||
|
|
||||||
For the preference datasets, the `response` column should be a string list whose length is 2, with the preferred answers appearing first, for example:
|
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
[
|
||||||
"instruction": "user instruction",
|
{"text": "document"},
|
||||||
"input": "user input",
|
{"text": "document"}
|
||||||
"output": [
|
]
|
||||||
"chosen answer",
|
```
|
||||||
"rejected answer"
|
|
||||||
]
|
Regarding the above dataset, the description in `dataset_info.json` should be:
|
||||||
|
|
||||||
|
```json
|
||||||
|
"dataset_name": {
|
||||||
|
"file_name": "data.json",
|
||||||
|
"columns": {
|
||||||
|
"prompt": "text"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
The dataset in sharegpt format should follow the below format:
|
For the **preference datasets**, the `response` column should be a string list whose length is 2, with the preferred answers appearing first, for example:
|
||||||
|
|
||||||
|
```json
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"instruction": "user instruction",
|
||||||
|
"input": "user input",
|
||||||
|
"output": [
|
||||||
|
"chosen answer",
|
||||||
|
"rejected answer"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
Regarding the above dataset, the description in `dataset_info.json` should be:
|
||||||
|
|
||||||
|
```json
|
||||||
|
"dataset_name": {
|
||||||
|
"file_name": "data.json",
|
||||||
|
"ranking": true,
|
||||||
|
"columns": {
|
||||||
|
"prompt": "instruction",
|
||||||
|
"query": "input",
|
||||||
|
"response": "output",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
|
The dataset in **sharegpt** format should follow the below format:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
[
|
[
|
||||||
@@ -105,10 +145,12 @@ The dataset in sharegpt format should follow the below format:
|
|||||||
]
|
]
|
||||||
```
|
```
|
||||||
|
|
||||||
Regarding the above dataset, the `columns` in `dataset_info.json` should be:
|
Regarding the above dataset, the description in `dataset_info.json` should be:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
"dataset_name": {
|
"dataset_name": {
|
||||||
|
"file_name": "data.json",
|
||||||
|
"formatting": "sharegpt",
|
||||||
"columns": {
|
"columns": {
|
||||||
"messages": "conversations",
|
"messages": "conversations",
|
||||||
"system": "system",
|
"system": "system",
|
||||||
@@ -125,4 +167,46 @@ Regarding the above dataset, the `columns` in `dataset_info.json` should be:
|
|||||||
|
|
||||||
where the `messages` column should be a list following the `u/a/u/a/u/a` order.
|
where the `messages` column should be a list following the `u/a/u/a/u/a` order.
|
||||||
|
|
||||||
Pre-training datasets and preference datasets are incompatible with the sharegpt format yet.
|
We also supports the dataset in the **openai** format:
|
||||||
|
|
||||||
|
```json
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": "system prompt (optional)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "user instruction"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": "model response"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
Regarding the above dataset, the description in `dataset_info.json` should be:
|
||||||
|
|
||||||
|
```json
|
||||||
|
"dataset_name": {
|
||||||
|
"file_name": "data.json",
|
||||||
|
"formatting": "sharegpt",
|
||||||
|
"columns": {
|
||||||
|
"messages": "messages"
|
||||||
|
},
|
||||||
|
"tags": {
|
||||||
|
"role_tag": "role",
|
||||||
|
"content_tag": "content",
|
||||||
|
"user_tag": "user",
|
||||||
|
"assistant_tag": "assistant",
|
||||||
|
"system_tag": "system"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Pre-training datasets and preference datasets are **incompatible** with the sharegpt format yet.
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
如果您使用自定义数据集,请务必在 `dataset_info.json` 文件中按照以下格式提供数据集定义。
|
如果您使用自定义数据集,请务必按照以下格式在 `dataset_info.json` 文件中添加**数据集描述**。我们在下面也提供了一些例子。
|
||||||
|
|
||||||
```json
|
```json
|
||||||
"数据集名称": {
|
"数据集名称": {
|
||||||
@@ -18,7 +18,8 @@
|
|||||||
"history": "数据集代表历史对话的表头名称(默认:None)",
|
"history": "数据集代表历史对话的表头名称(默认:None)",
|
||||||
"messages": "数据集代表消息列表的表头名称(默认:conversations)",
|
"messages": "数据集代表消息列表的表头名称(默认:conversations)",
|
||||||
"system": "数据集代表系统提示的表头名称(默认:None)",
|
"system": "数据集代表系统提示的表头名称(默认:None)",
|
||||||
"tools": "数据集代表工具描述的表头名称(默认:None)"
|
"tools": "数据集代表工具描述的表头名称(默认:None)",
|
||||||
|
"images": "数据集代表图像输入的表头名称(默认:None)"
|
||||||
},
|
},
|
||||||
"tags(可选,用于 sharegpt 格式)": {
|
"tags(可选,用于 sharegpt 格式)": {
|
||||||
"role_tag": "消息中代表发送者身份的键名(默认:from)",
|
"role_tag": "消息中代表发送者身份的键名(默认:from)",
|
||||||
@@ -32,7 +33,9 @@
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
添加后可通过指定 `--dataset 数据集名称` 参数使用自定义数据集。
|
然后,可通过使用 `--dataset 数据集名称` 参数加载自定义数据集。
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
该项目目前支持两种格式的数据集:**alpaca** 和 **sharegpt**,其中 alpaca 格式的数据集按照以下方式组织:
|
该项目目前支持两种格式的数据集:**alpaca** 和 **sharegpt**,其中 alpaca 格式的数据集按照以下方式组织:
|
||||||
|
|
||||||
@@ -51,10 +54,11 @@
|
|||||||
]
|
]
|
||||||
```
|
```
|
||||||
|
|
||||||
对于上述格式的数据,`dataset_info.json` 中的 `columns` 应为:
|
对于上述格式的数据,`dataset_info.json` 中的描述应为:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
"数据集名称": {
|
"数据集名称": {
|
||||||
|
"file_name": "data.json",
|
||||||
"columns": {
|
"columns": {
|
||||||
"prompt": "instruction",
|
"prompt": "instruction",
|
||||||
"query": "input",
|
"query": "input",
|
||||||
@@ -67,24 +71,60 @@
|
|||||||
|
|
||||||
其中 `query` 列对应的内容会与 `prompt` 列对应的内容拼接后作为用户指令,即用户指令为 `prompt\nquery`。`response` 列对应的内容为模型回答。
|
其中 `query` 列对应的内容会与 `prompt` 列对应的内容拼接后作为用户指令,即用户指令为 `prompt\nquery`。`response` 列对应的内容为模型回答。
|
||||||
|
|
||||||
`system` 列对应的内容将被作为系统提示词。`history` 列是由多个字符串二元组构成的列表,分别代表历史消息中每轮的指令和回答。注意历史消息中的回答**也会被用于训练**。
|
`system` 列对应的内容将被作为系统提示词。`history` 列是由多个字符串二元组构成的列表,分别代表历史消息中每轮的指令和回答。注意在指令监督学习时,历史消息中的回答**也会被用于训练**。
|
||||||
|
|
||||||
对于预训练数据集,仅 `prompt` 列中的内容会用于模型训练。
|
对于**预训练数据集**,仅 `prompt` 列中的内容会用于模型训练,例如:
|
||||||
|
|
||||||
对于偏好数据集,`response` 列应当是一个长度为 2 的字符串列表,排在前面的代表更优的回答,例如:
|
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
[
|
||||||
"instruction": "用户指令",
|
{"text": "document"},
|
||||||
"input": "用户输入",
|
{"text": "document"}
|
||||||
"output": [
|
]
|
||||||
"优质回答",
|
```
|
||||||
"劣质回答"
|
|
||||||
]
|
对于上述格式的数据,`dataset_info.json` 中的描述应为:
|
||||||
|
|
||||||
|
```json
|
||||||
|
"数据集名称": {
|
||||||
|
"file_name": "data.json",
|
||||||
|
"columns": {
|
||||||
|
"prompt": "text"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
而 sharegpt 格式的数据集按照以下方式组织:
|
对于**偏好数据集**,`response` 列应当是一个长度为 2 的字符串列表,排在前面的代表更优的回答,例如:
|
||||||
|
|
||||||
|
```json
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"instruction": "用户指令",
|
||||||
|
"input": "用户输入",
|
||||||
|
"output": [
|
||||||
|
"优质回答",
|
||||||
|
"劣质回答"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
对于上述格式的数据,`dataset_info.json` 中的描述应为:
|
||||||
|
|
||||||
|
```json
|
||||||
|
"数据集名称": {
|
||||||
|
"file_name": "data.json",
|
||||||
|
"ranking": true,
|
||||||
|
"columns": {
|
||||||
|
"prompt": "instruction",
|
||||||
|
"query": "input",
|
||||||
|
"response": "output",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
|
而 **sharegpt** 格式的数据集按照以下方式组织:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
[
|
[
|
||||||
@@ -105,10 +145,12 @@
|
|||||||
]
|
]
|
||||||
```
|
```
|
||||||
|
|
||||||
对于上述格式的数据,`dataset_info.json` 中的 `columns` 应为:
|
对于上述格式的数据,`dataset_info.json` 中的描述应为:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
"数据集名称": {
|
"数据集名称": {
|
||||||
|
"file_name": "data.json",
|
||||||
|
"formatting": "sharegpt",
|
||||||
"columns": {
|
"columns": {
|
||||||
"messages": "conversations",
|
"messages": "conversations",
|
||||||
"system": "system",
|
"system": "system",
|
||||||
@@ -125,4 +167,46 @@
|
|||||||
|
|
||||||
其中 `messages` 列应当是一个列表,且符合 `用户/模型/用户/模型/用户/模型` 的顺序。
|
其中 `messages` 列应当是一个列表,且符合 `用户/模型/用户/模型/用户/模型` 的顺序。
|
||||||
|
|
||||||
预训练数据集和偏好数据集尚不支持 sharegpt 格式。
|
我们同样支持 **openai** 格式的数据集:
|
||||||
|
|
||||||
|
```json
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": "系统提示词(选填)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "用户指令"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": "模型回答"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
对于上述格式的数据,`dataset_info.json` 中的描述应为:
|
||||||
|
|
||||||
|
```json
|
||||||
|
"数据集名称": {
|
||||||
|
"file_name": "data.json",
|
||||||
|
"formatting": "sharegpt",
|
||||||
|
"columns": {
|
||||||
|
"messages": "messages"
|
||||||
|
},
|
||||||
|
"tags": {
|
||||||
|
"role_tag": "role",
|
||||||
|
"content_tag": "content",
|
||||||
|
"user_tag": "user",
|
||||||
|
"assistant_tag": "assistant",
|
||||||
|
"system_tag": "system"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
预训练数据集和偏好数据集**尚不支持** sharegpt 格式。
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
34c723573fbc2d7601f6d9c882ccf5aa4f9bcc4b
|
a97cf9475291591843976554878568e046d8a46d
|
||||||
@@ -1,5 +1,6 @@
|
|||||||
import os
|
|
||||||
import json
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
import datasets
|
import datasets
|
||||||
|
|
||||||
|
|
||||||
@@ -22,31 +23,19 @@ _URL = "{}/datasets/BelleGroup/multiturn_chat_0.8M/resolve/main/multiturn_chat_0
|
|||||||
|
|
||||||
|
|
||||||
class BelleMultiturn(datasets.GeneratorBasedBuilder):
|
class BelleMultiturn(datasets.GeneratorBasedBuilder):
|
||||||
|
|
||||||
VERSION = datasets.Version("0.0.0")
|
VERSION = datasets.Version("0.0.0")
|
||||||
|
|
||||||
def _info(self):
|
def _info(self):
|
||||||
features = datasets.Features({
|
features = datasets.Features(
|
||||||
"conversations": [{"from": datasets.Value("string"), "value": datasets.Value("string")}]
|
{"conversations": [{"from": datasets.Value("string"), "value": datasets.Value("string")}]}
|
||||||
})
|
)
|
||||||
return datasets.DatasetInfo(
|
return datasets.DatasetInfo(
|
||||||
description=_DESCRIPTION,
|
description=_DESCRIPTION, features=features, homepage=_HOMEPAGE, license=_LICENSE, citation=_CITATION
|
||||||
features=features,
|
|
||||||
homepage=_HOMEPAGE,
|
|
||||||
license=_LICENSE,
|
|
||||||
citation=_CITATION
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def _split_generators(self, dl_manager: datasets.DownloadManager):
|
def _split_generators(self, dl_manager: datasets.DownloadManager):
|
||||||
file_path = dl_manager.download(_URL)
|
file_path = dl_manager.download(_URL)
|
||||||
return [
|
return [datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"filepath": file_path})]
|
||||||
datasets.SplitGenerator(
|
|
||||||
name=datasets.Split.TRAIN,
|
|
||||||
gen_kwargs={
|
|
||||||
"filepath": file_path
|
|
||||||
}
|
|
||||||
)
|
|
||||||
]
|
|
||||||
|
|
||||||
def _generate_examples(self, filepath: str):
|
def _generate_examples(self, filepath: str):
|
||||||
with open(filepath, "r", encoding="utf-8") as f:
|
with open(filepath, "r", encoding="utf-8") as f:
|
||||||
@@ -58,7 +47,7 @@ class BelleMultiturn(datasets.GeneratorBasedBuilder):
|
|||||||
|
|
||||||
assist_idx = prompt.rfind("Assistant:")
|
assist_idx = prompt.rfind("Assistant:")
|
||||||
human_idx = prompt.rfind("Human:")
|
human_idx = prompt.rfind("Human:")
|
||||||
query = prompt[human_idx+6:assist_idx].strip()
|
query = prompt[human_idx + 6 : assist_idx].strip()
|
||||||
prompt = prompt[:human_idx].strip()
|
prompt = prompt[:human_idx].strip()
|
||||||
conversations.insert(0, {"from": "gpt", "value": response})
|
conversations.insert(0, {"from": "gpt", "value": response})
|
||||||
conversations.insert(0, {"from": "human", "value": query})
|
conversations.insert(0, {"from": "human", "value": query})
|
||||||
@@ -67,8 +56,8 @@ class BelleMultiturn(datasets.GeneratorBasedBuilder):
|
|||||||
assist_idx = prompt.rfind("Assistant:")
|
assist_idx = prompt.rfind("Assistant:")
|
||||||
human_idx = prompt.rfind("Human:")
|
human_idx = prompt.rfind("Human:")
|
||||||
if human_idx != -1:
|
if human_idx != -1:
|
||||||
old_query = prompt[human_idx+6:assist_idx].strip()
|
old_query = prompt[human_idx + 6 : assist_idx].strip()
|
||||||
old_resp = prompt[assist_idx+10:].strip()
|
old_resp = prompt[assist_idx + 10 :].strip()
|
||||||
conversations.insert(0, {"from": "gpt", "value": old_resp})
|
conversations.insert(0, {"from": "gpt", "value": old_resp})
|
||||||
conversations.insert(0, {"from": "human", "value": old_query})
|
conversations.insert(0, {"from": "human", "value": old_query})
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -1,7 +1,8 @@
|
|||||||
import json
|
import json
|
||||||
import datasets
|
|
||||||
from typing import Any, Dict, Generator, List, Tuple
|
from typing import Any, Dict, Generator, List, Tuple
|
||||||
|
|
||||||
|
import datasets
|
||||||
|
|
||||||
|
|
||||||
_DESCRIPTION = "An example of dataset."
|
_DESCRIPTION = "An example of dataset."
|
||||||
_CITATION = ""
|
_CITATION = ""
|
||||||
@@ -11,34 +12,24 @@ _URL = "examples.json"
|
|||||||
|
|
||||||
|
|
||||||
class ExampleDataset(datasets.GeneratorBasedBuilder):
|
class ExampleDataset(datasets.GeneratorBasedBuilder):
|
||||||
|
|
||||||
VERSION = datasets.Version("0.0.0")
|
VERSION = datasets.Version("0.0.0")
|
||||||
|
|
||||||
def _info(self) -> datasets.DatasetInfo:
|
def _info(self) -> datasets.DatasetInfo:
|
||||||
features = datasets.Features({
|
features = datasets.Features(
|
||||||
"instruction": datasets.Value("string"),
|
{
|
||||||
"input": datasets.Value("string"),
|
"instruction": datasets.Value("string"),
|
||||||
"output": datasets.Value("string"),
|
"input": datasets.Value("string"),
|
||||||
"history": datasets.Sequence(datasets.Sequence(datasets.Value("string")))
|
"output": datasets.Value("string"),
|
||||||
})
|
"history": datasets.Sequence(datasets.Sequence(datasets.Value("string"))),
|
||||||
|
}
|
||||||
|
)
|
||||||
return datasets.DatasetInfo(
|
return datasets.DatasetInfo(
|
||||||
description=_DESCRIPTION,
|
description=_DESCRIPTION, features=features, homepage=_HOMEPAGE, license=_LICENSE, citation=_CITATION
|
||||||
features=features,
|
|
||||||
homepage=_HOMEPAGE,
|
|
||||||
license=_LICENSE,
|
|
||||||
citation=_CITATION
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def _split_generators(self, dl_manager: datasets.DownloadManager) -> List[datasets.SplitGenerator]:
|
def _split_generators(self, dl_manager: datasets.DownloadManager) -> List[datasets.SplitGenerator]:
|
||||||
file_path = dl_manager.download(_URL)
|
file_path = dl_manager.download(_URL)
|
||||||
return [
|
return [datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"filepath": file_path})]
|
||||||
datasets.SplitGenerator(
|
|
||||||
name=datasets.Split.TRAIN,
|
|
||||||
gen_kwargs={
|
|
||||||
"filepath": file_path
|
|
||||||
}
|
|
||||||
)
|
|
||||||
]
|
|
||||||
|
|
||||||
def _generate_examples(self, filepath: str) -> Generator[Tuple[int, Dict[str, Any]], None, None]:
|
def _generate_examples(self, filepath: str) -> Generator[Tuple[int, Dict[str, Any]], None, None]:
|
||||||
example_dataset = json.load(open(filepath, "r", encoding="utf-8"))
|
example_dataset = json.load(open(filepath, "r", encoding="utf-8"))
|
||||||
|
|||||||
@@ -1,8 +1,10 @@
|
|||||||
import os
|
|
||||||
import json
|
import json
|
||||||
import datasets
|
import os
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
|
import datasets
|
||||||
|
|
||||||
|
|
||||||
_HF_ENDPOINT = os.getenv("HF_ENDPOINT", "https://huggingface.co")
|
_HF_ENDPOINT = os.getenv("HF_ENDPOINT", "https://huggingface.co")
|
||||||
_DESCRIPTION = "Human preference data about helpfulness and harmlessness."
|
_DESCRIPTION = "Human preference data about helpfulness and harmlessness."
|
||||||
_CITATION = ""
|
_CITATION = ""
|
||||||
@@ -14,50 +16,37 @@ _URLS = {
|
|||||||
_URL + "harmless-base/train.jsonl.gz",
|
_URL + "harmless-base/train.jsonl.gz",
|
||||||
_URL + "helpful-base/train.jsonl.gz",
|
_URL + "helpful-base/train.jsonl.gz",
|
||||||
_URL + "helpful-online/train.jsonl.gz",
|
_URL + "helpful-online/train.jsonl.gz",
|
||||||
_URL + "helpful-rejection-sampled/train.jsonl.gz"
|
_URL + "helpful-rejection-sampled/train.jsonl.gz",
|
||||||
],
|
],
|
||||||
"test": [
|
"test": [
|
||||||
_URL + "harmless-base/test.jsonl.gz",
|
_URL + "harmless-base/test.jsonl.gz",
|
||||||
_URL + "helpful-base/test.jsonl.gz",
|
_URL + "helpful-base/test.jsonl.gz",
|
||||||
_URL + "helpful-online/test.jsonl.gz",
|
_URL + "helpful-online/test.jsonl.gz",
|
||||||
_URL + "helpful-rejection-sampled/test.jsonl.gz"
|
_URL + "helpful-rejection-sampled/test.jsonl.gz",
|
||||||
]
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class HhRlhfEn(datasets.GeneratorBasedBuilder):
|
class HhRlhfEn(datasets.GeneratorBasedBuilder):
|
||||||
|
|
||||||
VERSION = datasets.Version("0.0.0")
|
VERSION = datasets.Version("0.0.0")
|
||||||
|
|
||||||
def _info(self) -> datasets.DatasetInfo:
|
def _info(self) -> datasets.DatasetInfo:
|
||||||
features = datasets.Features({
|
features = datasets.Features(
|
||||||
"instruction": datasets.Value("string"),
|
{
|
||||||
"output": datasets.Sequence(datasets.Value("string")),
|
"instruction": datasets.Value("string"),
|
||||||
"history": datasets.Sequence(datasets.Sequence(datasets.Value("string")))
|
"output": datasets.Sequence(datasets.Value("string")),
|
||||||
})
|
"history": datasets.Sequence(datasets.Sequence(datasets.Value("string"))),
|
||||||
|
}
|
||||||
|
)
|
||||||
return datasets.DatasetInfo(
|
return datasets.DatasetInfo(
|
||||||
description=_DESCRIPTION,
|
description=_DESCRIPTION, features=features, homepage=_HOMEPAGE, license=_LICENSE, citation=_CITATION
|
||||||
features=features,
|
|
||||||
homepage=_HOMEPAGE,
|
|
||||||
license=_LICENSE,
|
|
||||||
citation=_CITATION
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def _split_generators(self, dl_manager: datasets.DownloadManager):
|
def _split_generators(self, dl_manager: datasets.DownloadManager):
|
||||||
file_path = dl_manager.download_and_extract(_URLS)
|
file_path = dl_manager.download_and_extract(_URLS)
|
||||||
return [
|
return [
|
||||||
datasets.SplitGenerator(
|
datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"filepaths": file_path["train"]}),
|
||||||
name=datasets.Split.TRAIN,
|
datasets.SplitGenerator(name=datasets.Split.TEST, gen_kwargs={"filepaths": file_path["test"]}),
|
||||||
gen_kwargs={
|
|
||||||
"filepaths": file_path["train"]
|
|
||||||
}
|
|
||||||
),
|
|
||||||
datasets.SplitGenerator(
|
|
||||||
name=datasets.Split.TEST,
|
|
||||||
gen_kwargs={
|
|
||||||
"filepaths": file_path["test"]
|
|
||||||
}
|
|
||||||
)
|
|
||||||
]
|
]
|
||||||
|
|
||||||
def _generate_examples(self, filepaths: List[str]):
|
def _generate_examples(self, filepaths: List[str]):
|
||||||
@@ -70,12 +59,12 @@ class HhRlhfEn(datasets.GeneratorBasedBuilder):
|
|||||||
rejected = data["rejected"]
|
rejected = data["rejected"]
|
||||||
|
|
||||||
assist_idx = rejected.rfind("\n\nAssistant: ")
|
assist_idx = rejected.rfind("\n\nAssistant: ")
|
||||||
r_reject = rejected[assist_idx+13:].strip()
|
r_reject = rejected[assist_idx + 13 :].strip()
|
||||||
assist_idx = chosen.rfind("\n\nAssistant: ")
|
assist_idx = chosen.rfind("\n\nAssistant: ")
|
||||||
r_accept = chosen[assist_idx+13:].strip()
|
r_accept = chosen[assist_idx + 13 :].strip()
|
||||||
|
|
||||||
human_idx = chosen.rfind("\n\nHuman: ")
|
human_idx = chosen.rfind("\n\nHuman: ")
|
||||||
query = chosen[human_idx+9:assist_idx].strip()
|
query = chosen[human_idx + 9 : assist_idx].strip()
|
||||||
prompt = chosen[:human_idx]
|
prompt = chosen[:human_idx]
|
||||||
history = []
|
history = []
|
||||||
|
|
||||||
@@ -83,16 +72,12 @@ class HhRlhfEn(datasets.GeneratorBasedBuilder):
|
|||||||
assist_idx = prompt.rfind("\n\nAssistant: ")
|
assist_idx = prompt.rfind("\n\nAssistant: ")
|
||||||
human_idx = prompt.rfind("\n\nHuman: ")
|
human_idx = prompt.rfind("\n\nHuman: ")
|
||||||
if human_idx != -1:
|
if human_idx != -1:
|
||||||
old_query = prompt[human_idx+9:assist_idx].strip()
|
old_query = prompt[human_idx + 9 : assist_idx].strip()
|
||||||
old_resp = prompt[assist_idx+13:].strip()
|
old_resp = prompt[assist_idx + 13 :].strip()
|
||||||
history.insert(0, (old_query, old_resp))
|
history.insert(0, (old_query, old_resp))
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
prompt = prompt[:human_idx]
|
prompt = prompt[:human_idx]
|
||||||
|
|
||||||
yield key, {
|
yield key, {"instruction": query, "output": [r_accept, r_reject], "history": history}
|
||||||
"instruction": query,
|
|
||||||
"output": [r_accept, r_reject],
|
|
||||||
"history": history
|
|
||||||
}
|
|
||||||
key += 1
|
key += 1
|
||||||
|
|||||||
@@ -1 +0,0 @@
|
|||||||
274079ea921762be356de85b18f13fa60b7ba8cb
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
57fd080be5bffe4153fe3ee26a175e3d56da30f3
|
|
||||||
@@ -1,8 +1,10 @@
|
|||||||
import os
|
|
||||||
import json
|
import json
|
||||||
import datasets
|
import os
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
|
import datasets
|
||||||
|
|
||||||
|
|
||||||
_HF_ENDPOINT = os.getenv("HF_ENDPOINT", "https://huggingface.co")
|
_HF_ENDPOINT = os.getenv("HF_ENDPOINT", "https://huggingface.co")
|
||||||
|
|
||||||
_DESCRIPTION = "UltraChat: Large-scale, Informative, and Diverse Multi-round Dialogue Data."
|
_DESCRIPTION = "UltraChat: Large-scale, Informative, and Diverse Multi-round Dialogue Data."
|
||||||
@@ -24,31 +26,19 @@ _BASE_DATA_URL = "{}/datasets/stingning/ultrachat/resolve/main/train_{{idx}}.jso
|
|||||||
|
|
||||||
|
|
||||||
class UltraChat(datasets.GeneratorBasedBuilder):
|
class UltraChat(datasets.GeneratorBasedBuilder):
|
||||||
|
|
||||||
VERSION = datasets.Version("0.0.0")
|
VERSION = datasets.Version("0.0.0")
|
||||||
|
|
||||||
def _info(self):
|
def _info(self):
|
||||||
features = datasets.Features({
|
features = datasets.Features(
|
||||||
"conversations": [{"from": datasets.Value("string"), "value": datasets.Value("string")}]
|
{"conversations": [{"from": datasets.Value("string"), "value": datasets.Value("string")}]}
|
||||||
})
|
)
|
||||||
return datasets.DatasetInfo(
|
return datasets.DatasetInfo(
|
||||||
description=_DESCRIPTION,
|
description=_DESCRIPTION, features=features, homepage=_HOMEPAGE, license=_LICENSE, citation=_CITATION
|
||||||
features=features,
|
|
||||||
homepage=_HOMEPAGE,
|
|
||||||
license=_LICENSE,
|
|
||||||
citation=_CITATION
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def _split_generators(self, dl_manager: datasets.DownloadManager):
|
def _split_generators(self, dl_manager: datasets.DownloadManager):
|
||||||
file_paths = [dl_manager.download(_BASE_DATA_URL.format(idx=idx)) for idx in range(10)] # multiple shards
|
file_paths = [dl_manager.download(_BASE_DATA_URL.format(idx=idx)) for idx in range(10)] # multiple shards
|
||||||
return [
|
return [datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"filepaths": file_paths})]
|
||||||
datasets.SplitGenerator(
|
|
||||||
name=datasets.Split.TRAIN,
|
|
||||||
gen_kwargs={
|
|
||||||
"filepaths": file_paths
|
|
||||||
}
|
|
||||||
)
|
|
||||||
]
|
|
||||||
|
|
||||||
def _generate_examples(self, filepaths: List[str]):
|
def _generate_examples(self, filepaths: List[str]):
|
||||||
for filepath in filepaths:
|
for filepath in filepaths:
|
||||||
@@ -56,7 +46,7 @@ class UltraChat(datasets.GeneratorBasedBuilder):
|
|||||||
for row in f:
|
for row in f:
|
||||||
try:
|
try:
|
||||||
data = json.loads(row)
|
data = json.loads(row)
|
||||||
except:
|
except Exception:
|
||||||
continue
|
continue
|
||||||
key: int = data["id"]
|
key: int = data["id"]
|
||||||
content: List[str] = data["data"]
|
content: List[str] = data["data"]
|
||||||
@@ -64,8 +54,7 @@ class UltraChat(datasets.GeneratorBasedBuilder):
|
|||||||
content.pop(-1)
|
content.pop(-1)
|
||||||
if len(content) < 2:
|
if len(content) < 2:
|
||||||
continue
|
continue
|
||||||
conversations = [{
|
conversations = [
|
||||||
"from": "human" if i % 2 == 0 else "gpt",
|
{"from": "human" if i % 2 == 0 else "gpt", "value": content[i]} for i in range(len(content))
|
||||||
"value": content[i]
|
]
|
||||||
} for i in range(len(content))]
|
|
||||||
yield key, {"conversations": conversations}
|
yield key, {"conversations": conversations}
|
||||||
|
|||||||
@@ -133,25 +133,19 @@ class Ceval(datasets.GeneratorBasedBuilder):
|
|||||||
datasets.SplitGenerator(
|
datasets.SplitGenerator(
|
||||||
name=datasets.Split.TEST,
|
name=datasets.Split.TEST,
|
||||||
gen_kwargs={
|
gen_kwargs={
|
||||||
"filepath": os.path.join(
|
"filepath": os.path.join(data_dir, "test", f"{task_name}_test.csv"),
|
||||||
data_dir, "test", f"{task_name}_test.csv"
|
|
||||||
),
|
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
datasets.SplitGenerator(
|
datasets.SplitGenerator(
|
||||||
name=datasets.Split.VALIDATION,
|
name=datasets.Split.VALIDATION,
|
||||||
gen_kwargs={
|
gen_kwargs={
|
||||||
"filepath": os.path.join(
|
"filepath": os.path.join(data_dir, "val", f"{task_name}_val.csv"),
|
||||||
data_dir, "val", f"{task_name}_val.csv"
|
|
||||||
),
|
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
datasets.SplitGenerator(
|
datasets.SplitGenerator(
|
||||||
name=datasets.Split.TRAIN,
|
name=datasets.Split.TRAIN,
|
||||||
gen_kwargs={
|
gen_kwargs={
|
||||||
"filepath": os.path.join(
|
"filepath": os.path.join(data_dir, "dev", f"{task_name}_dev.csv"),
|
||||||
data_dir, "dev", f"{task_name}_dev.csv"
|
|
||||||
),
|
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -37,73 +37,73 @@ _LICENSE = "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 Internatio
|
|||||||
_URL = "cmmlu.zip"
|
_URL = "cmmlu.zip"
|
||||||
|
|
||||||
task_list = [
|
task_list = [
|
||||||
'agronomy',
|
"agronomy",
|
||||||
'anatomy',
|
"anatomy",
|
||||||
'ancient_chinese',
|
"ancient_chinese",
|
||||||
'arts',
|
"arts",
|
||||||
'astronomy',
|
"astronomy",
|
||||||
'business_ethics',
|
"business_ethics",
|
||||||
'chinese_civil_service_exam',
|
"chinese_civil_service_exam",
|
||||||
'chinese_driving_rule',
|
"chinese_driving_rule",
|
||||||
'chinese_food_culture',
|
"chinese_food_culture",
|
||||||
'chinese_foreign_policy',
|
"chinese_foreign_policy",
|
||||||
'chinese_history',
|
"chinese_history",
|
||||||
'chinese_literature',
|
"chinese_literature",
|
||||||
'chinese_teacher_qualification',
|
"chinese_teacher_qualification",
|
||||||
'clinical_knowledge',
|
"clinical_knowledge",
|
||||||
'college_actuarial_science',
|
"college_actuarial_science",
|
||||||
'college_education',
|
"college_education",
|
||||||
'college_engineering_hydrology',
|
"college_engineering_hydrology",
|
||||||
'college_law',
|
"college_law",
|
||||||
'college_mathematics',
|
"college_mathematics",
|
||||||
'college_medical_statistics',
|
"college_medical_statistics",
|
||||||
'college_medicine',
|
"college_medicine",
|
||||||
'computer_science',
|
"computer_science",
|
||||||
'computer_security',
|
"computer_security",
|
||||||
'conceptual_physics',
|
"conceptual_physics",
|
||||||
'construction_project_management',
|
"construction_project_management",
|
||||||
'economics',
|
"economics",
|
||||||
'education',
|
"education",
|
||||||
'electrical_engineering',
|
"electrical_engineering",
|
||||||
'elementary_chinese',
|
"elementary_chinese",
|
||||||
'elementary_commonsense',
|
"elementary_commonsense",
|
||||||
'elementary_information_and_technology',
|
"elementary_information_and_technology",
|
||||||
'elementary_mathematics',
|
"elementary_mathematics",
|
||||||
'ethnology',
|
"ethnology",
|
||||||
'food_science',
|
"food_science",
|
||||||
'genetics',
|
"genetics",
|
||||||
'global_facts',
|
"global_facts",
|
||||||
'high_school_biology',
|
"high_school_biology",
|
||||||
'high_school_chemistry',
|
"high_school_chemistry",
|
||||||
'high_school_geography',
|
"high_school_geography",
|
||||||
'high_school_mathematics',
|
"high_school_mathematics",
|
||||||
'high_school_physics',
|
"high_school_physics",
|
||||||
'high_school_politics',
|
"high_school_politics",
|
||||||
'human_sexuality',
|
"human_sexuality",
|
||||||
'international_law',
|
"international_law",
|
||||||
'journalism',
|
"journalism",
|
||||||
'jurisprudence',
|
"jurisprudence",
|
||||||
'legal_and_moral_basis',
|
"legal_and_moral_basis",
|
||||||
'logical',
|
"logical",
|
||||||
'machine_learning',
|
"machine_learning",
|
||||||
'management',
|
"management",
|
||||||
'marketing',
|
"marketing",
|
||||||
'marxist_theory',
|
"marxist_theory",
|
||||||
'modern_chinese',
|
"modern_chinese",
|
||||||
'nutrition',
|
"nutrition",
|
||||||
'philosophy',
|
"philosophy",
|
||||||
'professional_accounting',
|
"professional_accounting",
|
||||||
'professional_law',
|
"professional_law",
|
||||||
'professional_medicine',
|
"professional_medicine",
|
||||||
'professional_psychology',
|
"professional_psychology",
|
||||||
'public_relations',
|
"public_relations",
|
||||||
'security_study',
|
"security_study",
|
||||||
'sociology',
|
"sociology",
|
||||||
'sports_science',
|
"sports_science",
|
||||||
'traditional_chinese_medicine',
|
"traditional_chinese_medicine",
|
||||||
'virology',
|
"virology",
|
||||||
'world_history',
|
"world_history",
|
||||||
'world_religions',
|
"world_religions",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -136,25 +136,19 @@ class MMLU(datasets.GeneratorBasedBuilder):
|
|||||||
datasets.SplitGenerator(
|
datasets.SplitGenerator(
|
||||||
name=datasets.Split.TEST,
|
name=datasets.Split.TEST,
|
||||||
gen_kwargs={
|
gen_kwargs={
|
||||||
"filepath": os.path.join(
|
"filepath": os.path.join(data_dir, "data", "test", f"{task_name}_test.csv"),
|
||||||
data_dir, "data", "test", f"{task_name}_test.csv"
|
|
||||||
),
|
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
datasets.SplitGenerator(
|
datasets.SplitGenerator(
|
||||||
name=datasets.Split.VALIDATION,
|
name=datasets.Split.VALIDATION,
|
||||||
gen_kwargs={
|
gen_kwargs={
|
||||||
"filepath": os.path.join(
|
"filepath": os.path.join(data_dir, "data", "val", f"{task_name}_val.csv"),
|
||||||
data_dir, "data", "val", f"{task_name}_val.csv"
|
|
||||||
),
|
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
datasets.SplitGenerator(
|
datasets.SplitGenerator(
|
||||||
name=datasets.Split.TRAIN,
|
name=datasets.Split.TRAIN,
|
||||||
gen_kwargs={
|
gen_kwargs={
|
||||||
"filepath": os.path.join(
|
"filepath": os.path.join(data_dir, "data", "dev", f"{task_name}_dev.csv"),
|
||||||
data_dir, "data", "dev", f"{task_name}_dev.csv"
|
|
||||||
),
|
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|||||||
229
examples/README.md
Normal file
229
examples/README.md
Normal file
@@ -0,0 +1,229 @@
|
|||||||
|
We provide diverse examples about fine-tuning LLMs.
|
||||||
|
|
||||||
|
Make sure to execute these commands in the `LLaMA-Factory` directory.
|
||||||
|
|
||||||
|
## Table of Contents
|
||||||
|
|
||||||
|
- [LoRA Fine-Tuning on A Single GPU](#lora-fine-tuning-on-a-single-gpu)
|
||||||
|
- [QLoRA Fine-Tuning on a Single GPU](#qlora-fine-tuning-on-a-single-gpu)
|
||||||
|
- [LoRA Fine-Tuning on Multiple GPUs](#lora-fine-tuning-on-multiple-gpus)
|
||||||
|
- [LoRA Fine-Tuning on Multiple NPUs](#lora-fine-tuning-on-multiple-npus)
|
||||||
|
- [Full-Parameter Fine-Tuning on Multiple GPUs](#full-parameter-fine-tuning-on-multiple-gpus)
|
||||||
|
- [Merging LoRA Adapters and Quantization](#merging-lora-adapters-and-quantization)
|
||||||
|
- [Inferring LoRA Fine-Tuned Models](#inferring-lora-fine-tuned-models)
|
||||||
|
- [Extras](#extras)
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
### LoRA Fine-Tuning on A Single GPU
|
||||||
|
|
||||||
|
#### (Continuous) Pre-Training
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_pretrain.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Supervised Fine-Tuning
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Multimodal Supervised Fine-Tuning
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llava1_5_lora_sft.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Reward Modeling
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_reward.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### PPO Training
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_ppo.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### DPO Training
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_dpo.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### ORPO Training
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_orpo.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Preprocess Dataset
|
||||||
|
|
||||||
|
It is useful for large dataset, use `tokenized_path` in config to load the preprocessed dataset.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_preprocess.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Evaluating on MMLU/CMMLU/C-Eval Benchmarks
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli eval examples/lora_single_gpu/llama3_lora_eval.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Batch Predicting and Computing BLEU and ROUGE Scores
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_predict.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
### QLoRA Fine-Tuning on a Single GPU
|
||||||
|
|
||||||
|
#### Supervised Fine-Tuning with 4/8-bit Bitsandbytes Quantization (Recommended)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Supervised Fine-Tuning with 4/8-bit GPTQ Quantization
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Supervised Fine-Tuning with 4-bit AWQ Quantization
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_awq.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Supervised Fine-Tuning with 2-bit AQLM Quantization
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
### LoRA Fine-Tuning on Multiple GPUs
|
||||||
|
|
||||||
|
#### Supervised Fine-Tuning with Accelerate on Single Node
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash examples/lora_multi_gpu/single_node.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Supervised Fine-Tuning with Accelerate on Multiple Nodes
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash examples/lora_multi_gpu/multi_node.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Supervised Fine-Tuning with DeepSpeed ZeRO-3 (Weight Sharding)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash examples/lora_multi_gpu/ds_zero3.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
### LoRA Fine-Tuning on Multiple NPUs
|
||||||
|
|
||||||
|
#### Supervised Fine-Tuning with DeepSpeed ZeRO-0
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash examples/lora_multi_npu/ds_zero0.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
### Full-Parameter Fine-Tuning on Multiple GPUs
|
||||||
|
|
||||||
|
#### Supervised Fine-Tuning with Accelerate on Single Node
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash examples/full_multi_gpu/single_node.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Supervised Fine-Tuning with Accelerate on Multiple Nodes
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash examples/full_multi_gpu/multi_node.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Batch Predicting and Computing BLEU and ROUGE Scores
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash examples/full_multi_gpu/predict.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
### Merging LoRA Adapters and Quantization
|
||||||
|
|
||||||
|
#### Merge LoRA Adapters
|
||||||
|
|
||||||
|
Note: DO NOT use quantized model or `quantization_bit` when merging LoRA adapters.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Quantizing Model using AutoGPTQ
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_gptq.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
### Inferring LoRA Fine-Tuned Models
|
||||||
|
|
||||||
|
#### Use CLI
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli chat examples/merge_lora/llama3_lora_sft.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Use Web UI
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli webchat examples/merge_lora/llama3_lora_sft.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Launch OpenAI-style API
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli api examples/merge_lora/llama3_lora_sft.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
### Extras
|
||||||
|
|
||||||
|
#### Full-Parameter Fine-Tuning using GaLore
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/galore/llama3_full_sft.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Full-Parameter Fine-Tuning using BAdam
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/badam/llama3_full_sft.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### LoRA+ Fine-Tuning
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/loraplus/llama3_lora_sft.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Mixture-of-Depths Fine-Tuning
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/mod/llama3_full_sft.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### LLaMA-Pro Fine-Tuning
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash examples/extras/llama_pro/expand.sh
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### FSDP+QLoRA Fine-Tuning
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash examples/extras/fsdp_qlora/single_node.sh
|
||||||
|
```
|
||||||
229
examples/README_zh.md
Normal file
229
examples/README_zh.md
Normal file
@@ -0,0 +1,229 @@
|
|||||||
|
我们提供了多样化的大模型微调示例脚本。
|
||||||
|
|
||||||
|
请确保在 `LLaMA-Factory` 目录下执行下述命令。
|
||||||
|
|
||||||
|
## 目录
|
||||||
|
|
||||||
|
- [单 GPU LoRA 微调](#单-gpu-lora-微调)
|
||||||
|
- [单 GPU QLoRA 微调](#单-gpu-qlora-微调)
|
||||||
|
- [多 GPU LoRA 微调](#多-gpu-lora-微调)
|
||||||
|
- [多 NPU LoRA 微调](#多-npu-lora-微调)
|
||||||
|
- [多 GPU 全参数微调](#多-gpu-全参数微调)
|
||||||
|
- [合并 LoRA 适配器与模型量化](#合并-lora-适配器与模型量化)
|
||||||
|
- [推理 LoRA 模型](#推理-lora-模型)
|
||||||
|
- [杂项](#杂项)
|
||||||
|
|
||||||
|
## 示例
|
||||||
|
|
||||||
|
### 单 GPU LoRA 微调
|
||||||
|
|
||||||
|
#### (增量)预训练
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_pretrain.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 指令监督微调
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 多模态指令监督微调
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llava1_5_lora_sft.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 奖励模型训练
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_reward.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### PPO 训练
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_ppo.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### DPO 训练
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_dpo.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### ORPO 训练
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_orpo.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 预处理数据集
|
||||||
|
|
||||||
|
对于大数据集有帮助,在配置中使用 `tokenized_path` 以加载预处理后的数据集。
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_preprocess.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 在 MMLU/CMMLU/C-Eval 上评估
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli eval examples/lora_single_gpu/llama3_lora_eval.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 批量预测并计算 BLEU 和 ROUGE 分数
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_predict.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
### 单 GPU QLoRA 微调
|
||||||
|
|
||||||
|
#### 基于 4/8 比特 Bitsandbytes 量化进行指令监督微调(推荐)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 基于 4/8 比特 GPTQ 量化进行指令监督微调
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 基于 4 比特 AWQ 量化进行指令监督微调
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_awq.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 基于 2 比特 AQLM 量化进行指令监督微调
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
### 多 GPU LoRA 微调
|
||||||
|
|
||||||
|
#### 使用 Accelerate 进行单节点训练
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash examples/lora_multi_gpu/single_node.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 使用 Accelerate 进行多节点训练
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash examples/lora_multi_gpu/multi_node.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 使用 DeepSpeed ZeRO-3 平均分配显存
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash examples/lora_multi_gpu/ds_zero3.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
### 多 NPU LoRA 微调
|
||||||
|
|
||||||
|
#### 使用 DeepSpeed ZeRO-0 训练
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash examples/lora_multi_npu/ds_zero0.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
### 多 GPU 全参数微调
|
||||||
|
|
||||||
|
#### 使用 DeepSpeed 进行单节点训练
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash examples/full_multi_gpu/single_node.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 使用 DeepSpeed 进行多节点训练
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash examples/full_multi_gpu/multi_node.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 批量预测并计算 BLEU 和 ROUGE 分数
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash examples/full_multi_gpu/predict.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
### 合并 LoRA 适配器与模型量化
|
||||||
|
|
||||||
|
#### 合并 LoRA 适配器
|
||||||
|
|
||||||
|
注:请勿使用量化后的模型或 `quantization_bit` 参数来合并 LoRA 适配器。
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 使用 AutoGPTQ 量化模型
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_gptq.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
### 推理 LoRA 模型
|
||||||
|
|
||||||
|
#### 使用命令行接口
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli chat examples/merge_lora/llama3_lora_sft.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 使用浏览器界面
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli webchat examples/merge_lora/llama3_lora_sft.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 启动 OpenAI 风格 API
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli api examples/merge_lora/llama3_lora_sft.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
### 杂项
|
||||||
|
|
||||||
|
#### 使用 GaLore 进行全参数训练
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/galore/llama3_full_sft.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 使用 BAdam 进行全参数训练
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/badam/llama3_full_sft.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### LoRA+ 微调
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/loraplus/llama3_lora_sft.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 深度混合微调
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/mod/llama3_full_sft.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### LLaMA-Pro 微调
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash examples/extras/llama_pro/expand.sh
|
||||||
|
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### FSDP+QLoRA 微调
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash examples/extras/fsdp_qlora/single_node.sh
|
||||||
|
```
|
||||||
@@ -15,8 +15,8 @@ fsdp_config:
|
|||||||
machine_rank: 0
|
machine_rank: 0
|
||||||
main_training_function: main
|
main_training_function: main
|
||||||
mixed_precision: fp16
|
mixed_precision: fp16
|
||||||
num_machines: 1
|
num_machines: 1 # the number of nodes
|
||||||
num_processes: 2
|
num_processes: 2 # the number of GPUs in all nodes
|
||||||
rdzv_backend: static
|
rdzv_backend: static
|
||||||
same_network: true
|
same_network: true
|
||||||
tpu_env: []
|
tpu_env: []
|
||||||
|
|||||||
@@ -8,8 +8,8 @@ main_process_ip: 192.168.0.1
|
|||||||
main_process_port: 29555
|
main_process_port: 29555
|
||||||
main_training_function: main
|
main_training_function: main
|
||||||
mixed_precision: fp16
|
mixed_precision: fp16
|
||||||
num_machines: 2
|
num_machines: 2 # the number of nodes
|
||||||
num_processes: 16
|
num_processes: 8 # the number of GPUs in all nodes
|
||||||
rdzv_backend: static
|
rdzv_backend: static
|
||||||
same_network: true
|
same_network: true
|
||||||
tpu_env: []
|
tpu_env: []
|
||||||
|
|||||||
@@ -6,8 +6,8 @@ gpu_ids: all
|
|||||||
machine_rank: 0
|
machine_rank: 0
|
||||||
main_training_function: main
|
main_training_function: main
|
||||||
mixed_precision: fp16
|
mixed_precision: fp16
|
||||||
num_machines: 1
|
num_machines: 1 # the number of nodes
|
||||||
num_processes: 4
|
num_processes: 4 # the number of GPUs in all nodes
|
||||||
rdzv_backend: static
|
rdzv_backend: static
|
||||||
same_network: true
|
same_network: true
|
||||||
tpu_env: []
|
tpu_env: []
|
||||||
|
|||||||
@@ -8,8 +8,8 @@ main_process_ip: 192.168.0.1
|
|||||||
main_process_port: 29555
|
main_process_port: 29555
|
||||||
main_training_function: main
|
main_training_function: main
|
||||||
mixed_precision: fp16
|
mixed_precision: fp16
|
||||||
num_machines: 2
|
num_machines: 2 # the number of nodes
|
||||||
num_processes: 16
|
num_processes: 8 # the number of GPUs in all nodes
|
||||||
rdzv_backend: static
|
rdzv_backend: static
|
||||||
same_network: true
|
same_network: true
|
||||||
tpu_env: []
|
tpu_env: []
|
||||||
|
|||||||
41
examples/extras/badam/llama3_lora_sft.yaml
Normal file
41
examples/extras/badam/llama3_lora_sft.yaml
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
# model
|
||||||
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
|
||||||
|
# method
|
||||||
|
stage: sft
|
||||||
|
do_train: true
|
||||||
|
finetuning_type: full
|
||||||
|
use_badam: true
|
||||||
|
badam_switch_mode: descending
|
||||||
|
badam_switch_interval: 50
|
||||||
|
badam_verbose: 2
|
||||||
|
|
||||||
|
# dataset
|
||||||
|
dataset: identity,alpaca_gpt4_en
|
||||||
|
template: llama3
|
||||||
|
cutoff_len: 1024
|
||||||
|
max_samples: 1000
|
||||||
|
overwrite_cache: true
|
||||||
|
preprocessing_num_workers: 16
|
||||||
|
|
||||||
|
# output
|
||||||
|
output_dir: saves/llama3-8b/full/sft
|
||||||
|
logging_steps: 10
|
||||||
|
save_steps: 500
|
||||||
|
plot_loss: true
|
||||||
|
overwrite_output_dir: true
|
||||||
|
|
||||||
|
# train
|
||||||
|
per_device_train_batch_size: 1
|
||||||
|
gradient_accumulation_steps: 8
|
||||||
|
learning_rate: 0.0001
|
||||||
|
num_train_epochs: 3.0
|
||||||
|
lr_scheduler_type: cosine
|
||||||
|
warmup_steps: 0.1
|
||||||
|
pure_bf16: true
|
||||||
|
|
||||||
|
# eval
|
||||||
|
val_size: 0.1
|
||||||
|
per_device_eval_batch_size: 1
|
||||||
|
evaluation_strategy: steps
|
||||||
|
eval_steps: 500
|
||||||
42
examples/extras/fsdp_qlora/llama3_lora_sft.yaml
Normal file
42
examples/extras/fsdp_qlora/llama3_lora_sft.yaml
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
# model
|
||||||
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
quantization_bit: 4
|
||||||
|
|
||||||
|
# method
|
||||||
|
stage: sft
|
||||||
|
do_train: true
|
||||||
|
finetuning_type: lora
|
||||||
|
lora_target: q_proj,v_proj
|
||||||
|
|
||||||
|
# ddp
|
||||||
|
ddp_timeout: 180000000
|
||||||
|
|
||||||
|
# dataset
|
||||||
|
dataset: identity,alpaca_gpt4_en
|
||||||
|
template: llama3
|
||||||
|
cutoff_len: 1024
|
||||||
|
max_samples: 1000
|
||||||
|
overwrite_cache: true
|
||||||
|
preprocessing_num_workers: 16
|
||||||
|
|
||||||
|
# output
|
||||||
|
output_dir: saves/llama3-8b/lora/sft
|
||||||
|
logging_steps: 10
|
||||||
|
save_steps: 500
|
||||||
|
plot_loss: true
|
||||||
|
overwrite_output_dir: true
|
||||||
|
|
||||||
|
# train
|
||||||
|
per_device_train_batch_size: 1
|
||||||
|
gradient_accumulation_steps: 8
|
||||||
|
learning_rate: 0.0001
|
||||||
|
num_train_epochs: 3.0
|
||||||
|
lr_scheduler_type: cosine
|
||||||
|
warmup_steps: 0.1
|
||||||
|
fp16: true
|
||||||
|
|
||||||
|
# eval
|
||||||
|
val_size: 0.1
|
||||||
|
per_device_eval_batch_size: 1
|
||||||
|
evaluation_strategy: steps
|
||||||
|
eval_steps: 500
|
||||||
10
examples/extras/fsdp_qlora/single_node.sh
Normal file
10
examples/extras/fsdp_qlora/single_node.sh
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# DO NOT use GPTQ/AWQ model in FSDP+QLoRA
|
||||||
|
|
||||||
|
pip install "transformers>=4.39.1"
|
||||||
|
pip install "accelerate>=0.28.0"
|
||||||
|
pip install "bitsandbytes>=0.43.0"
|
||||||
|
|
||||||
|
CUDA_VISIBLE_DEVICES=0,1 accelerate launch \
|
||||||
|
--config_file examples/accelerate/fsdp_config.yaml \
|
||||||
|
src/train.py examples/extras/fsdp_qlora/llama3_lora_sft.yaml
|
||||||
42
examples/extras/galore/llama3_full_sft.yaml
Normal file
42
examples/extras/galore/llama3_full_sft.yaml
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
# model
|
||||||
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
|
||||||
|
# method
|
||||||
|
stage: sft
|
||||||
|
do_train: true
|
||||||
|
finetuning_type: full
|
||||||
|
use_galore: true
|
||||||
|
galore_layerwise: true
|
||||||
|
galore_target: mlp,self_attn
|
||||||
|
galore_rank: 128
|
||||||
|
galore_scale: 2.0
|
||||||
|
|
||||||
|
# dataset
|
||||||
|
dataset: identity,alpaca_gpt4_en
|
||||||
|
template: llama3
|
||||||
|
cutoff_len: 1024
|
||||||
|
max_samples: 1000
|
||||||
|
overwrite_cache: true
|
||||||
|
preprocessing_num_workers: 16
|
||||||
|
|
||||||
|
# output
|
||||||
|
output_dir: saves/llama3-8b/full/sft
|
||||||
|
logging_steps: 10
|
||||||
|
save_steps: 500
|
||||||
|
plot_loss: true
|
||||||
|
overwrite_output_dir: true
|
||||||
|
|
||||||
|
# train
|
||||||
|
per_device_train_batch_size: 1
|
||||||
|
gradient_accumulation_steps: 1
|
||||||
|
learning_rate: 0.0001
|
||||||
|
num_train_epochs: 3.0
|
||||||
|
lr_scheduler_type: cosine
|
||||||
|
warmup_steps: 0.1
|
||||||
|
pure_bf16: true
|
||||||
|
|
||||||
|
# eval
|
||||||
|
val_size: 0.1
|
||||||
|
per_device_eval_batch_size: 1
|
||||||
|
evaluation_strategy: steps
|
||||||
|
eval_steps: 500
|
||||||
@@ -1,35 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
CUDA_VISIBLE_DEVICES=0 python ../../../src/train_bash.py \
|
|
||||||
--stage sft \
|
|
||||||
--do_train \
|
|
||||||
--model_name_or_path meta-llama/Llama-2-7b-hf \
|
|
||||||
--dataset alpaca_gpt4_en,glaive_toolcall \
|
|
||||||
--dataset_dir ../../../data \
|
|
||||||
--template default \
|
|
||||||
--finetuning_type full \
|
|
||||||
--use_galore \
|
|
||||||
--galore_layerwise \
|
|
||||||
--galore_target mlp,self_attn \
|
|
||||||
--galore_rank 128 \
|
|
||||||
--output_dir ../../../saves/LLaMA2-7B/galore/sft \
|
|
||||||
--overwrite_cache \
|
|
||||||
--overwrite_output_dir \
|
|
||||||
--cutoff_len 1024 \
|
|
||||||
--preprocessing_num_workers 16 \
|
|
||||||
--per_device_train_batch_size 1 \
|
|
||||||
--per_device_eval_batch_size 1 \
|
|
||||||
--gradient_accumulation_steps 1 \
|
|
||||||
--lr_scheduler_type cosine \
|
|
||||||
--logging_steps 10 \
|
|
||||||
--warmup_steps 20 \
|
|
||||||
--save_steps 100 \
|
|
||||||
--eval_steps 100 \
|
|
||||||
--evaluation_strategy steps \
|
|
||||||
--load_best_model_at_end \
|
|
||||||
--learning_rate 5e-5 \
|
|
||||||
--num_train_epochs 3.0 \
|
|
||||||
--max_samples 3000 \
|
|
||||||
--val_size 0.1 \
|
|
||||||
--plot_loss \
|
|
||||||
--pure_bf16
|
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
python ../../../scripts/llama_pro.py \
|
python scripts/llama_pro.py \
|
||||||
--model_name_or_path meta-llama/Llama-2-7b-hf \
|
--model_name_or_path meta-llama/Meta-Llama-3-8B-Instruct \
|
||||||
--output_dir ../../../models/llama2-7b-pro \
|
--output_dir models/llama3-8b-instruct-pro \
|
||||||
--num_expand 8
|
--num_expand 8
|
||||||
|
|||||||
40
examples/extras/llama_pro/llama3_freeze_sft.yaml
Normal file
40
examples/extras/llama_pro/llama3_freeze_sft.yaml
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
# model
|
||||||
|
model_name_or_path: models/llama3-8b-instruct-pro
|
||||||
|
|
||||||
|
# method
|
||||||
|
stage: sft
|
||||||
|
do_train: true
|
||||||
|
finetuning_type: freeze
|
||||||
|
freeze_trainable_layers: 8
|
||||||
|
freeze_trainable_modules: all
|
||||||
|
use_llama_pro: true
|
||||||
|
|
||||||
|
# dataset
|
||||||
|
dataset: identity,alpaca_gpt4_en
|
||||||
|
template: llama3
|
||||||
|
cutoff_len: 1024
|
||||||
|
max_samples: 1000
|
||||||
|
overwrite_cache: true
|
||||||
|
preprocessing_num_workers: 16
|
||||||
|
|
||||||
|
# output
|
||||||
|
output_dir: saves/llama3-8b-instruct-pro/freeze/sft
|
||||||
|
logging_steps: 10
|
||||||
|
save_steps: 500
|
||||||
|
plot_loss: true
|
||||||
|
overwrite_output_dir: true
|
||||||
|
|
||||||
|
# train
|
||||||
|
per_device_train_batch_size: 1
|
||||||
|
gradient_accumulation_steps: 8
|
||||||
|
learning_rate: 0.0001
|
||||||
|
num_train_epochs: 3.0
|
||||||
|
lr_scheduler_type: cosine
|
||||||
|
warmup_steps: 0.1
|
||||||
|
fp16: true
|
||||||
|
|
||||||
|
# eval
|
||||||
|
val_size: 0.1
|
||||||
|
per_device_eval_batch_size: 1
|
||||||
|
evaluation_strategy: steps
|
||||||
|
eval_steps: 500
|
||||||
@@ -1,34 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
CUDA_VISIBLE_DEVICES=0 python ../../../src/train_bash.py \
|
|
||||||
--stage sft \
|
|
||||||
--do_train \
|
|
||||||
--model_name_or_path ../../../models/llama2-7b-pro \
|
|
||||||
--dataset alpaca_gpt4_en,glaive_toolcall \
|
|
||||||
--dataset_dir ../../../data \
|
|
||||||
--template default \
|
|
||||||
--finetuning_type freeze \
|
|
||||||
--name_module_trainable all \
|
|
||||||
--num_layer_trainable 8 \
|
|
||||||
--use_llama_pro \
|
|
||||||
--output_dir ../../../saves/LLaMA2-7B-Pro/lora/sft \
|
|
||||||
--overwrite_cache \
|
|
||||||
--overwrite_output_dir \
|
|
||||||
--cutoff_len 1024 \
|
|
||||||
--preprocessing_num_workers 16 \
|
|
||||||
--per_device_train_batch_size 1 \
|
|
||||||
--per_device_eval_batch_size 1 \
|
|
||||||
--gradient_accumulation_steps 8 \
|
|
||||||
--lr_scheduler_type cosine \
|
|
||||||
--logging_steps 10 \
|
|
||||||
--warmup_steps 20 \
|
|
||||||
--save_steps 100 \
|
|
||||||
--eval_steps 100 \
|
|
||||||
--evaluation_strategy steps \
|
|
||||||
--load_best_model_at_end \
|
|
||||||
--learning_rate 5e-5 \
|
|
||||||
--num_train_epochs 3.0 \
|
|
||||||
--max_samples 3000 \
|
|
||||||
--val_size 0.1 \
|
|
||||||
--plot_loss \
|
|
||||||
--fp16
|
|
||||||
39
examples/extras/loraplus/llama3_lora_sft.yaml
Normal file
39
examples/extras/loraplus/llama3_lora_sft.yaml
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
# model
|
||||||
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
|
||||||
|
# method
|
||||||
|
stage: sft
|
||||||
|
do_train: true
|
||||||
|
finetuning_type: lora
|
||||||
|
lora_target: q_proj,v_proj
|
||||||
|
loraplus_lr_ratio: 16.0
|
||||||
|
|
||||||
|
# dataset
|
||||||
|
dataset: identity,alpaca_gpt4_en
|
||||||
|
template: llama3
|
||||||
|
cutoff_len: 1024
|
||||||
|
max_samples: 1000
|
||||||
|
overwrite_cache: true
|
||||||
|
preprocessing_num_workers: 16
|
||||||
|
|
||||||
|
# output
|
||||||
|
output_dir: saves/llama3-8b/lora/sft
|
||||||
|
logging_steps: 10
|
||||||
|
save_steps: 500
|
||||||
|
plot_loss: true
|
||||||
|
overwrite_output_dir: true
|
||||||
|
|
||||||
|
# train
|
||||||
|
per_device_train_batch_size: 1
|
||||||
|
gradient_accumulation_steps: 8
|
||||||
|
learning_rate: 0.0001
|
||||||
|
num_train_epochs: 3.0
|
||||||
|
lr_scheduler_type: cosine
|
||||||
|
warmup_steps: 0.1
|
||||||
|
fp16: true
|
||||||
|
|
||||||
|
# eval
|
||||||
|
val_size: 0.1
|
||||||
|
per_device_eval_batch_size: 1
|
||||||
|
evaluation_strategy: steps
|
||||||
|
eval_steps: 500
|
||||||
@@ -1,33 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \
|
|
||||||
--stage sft \
|
|
||||||
--do_train \
|
|
||||||
--model_name_or_path meta-llama/Llama-2-7b-hf \
|
|
||||||
--dataset alpaca_gpt4_en,glaive_toolcall \
|
|
||||||
--dataset_dir ../../data \
|
|
||||||
--template default \
|
|
||||||
--finetuning_type lora \
|
|
||||||
--lora_target q_proj,v_proj \
|
|
||||||
--output_dir ../../saves/LLaMA2-7B/loraplus/sft \
|
|
||||||
--overwrite_cache \
|
|
||||||
--overwrite_output_dir \
|
|
||||||
--cutoff_len 1024 \
|
|
||||||
--preprocessing_num_workers 16 \
|
|
||||||
--per_device_train_batch_size 1 \
|
|
||||||
--per_device_eval_batch_size 1 \
|
|
||||||
--gradient_accumulation_steps 8 \
|
|
||||||
--lr_scheduler_type cosine \
|
|
||||||
--logging_steps 10 \
|
|
||||||
--warmup_steps 20 \
|
|
||||||
--save_steps 100 \
|
|
||||||
--eval_steps 100 \
|
|
||||||
--evaluation_strategy steps \
|
|
||||||
--load_best_model_at_end \
|
|
||||||
--learning_rate 5e-5 \
|
|
||||||
--num_train_epochs 3.0 \
|
|
||||||
--max_samples 3000 \
|
|
||||||
--val_size 0.1 \
|
|
||||||
--plot_loss \
|
|
||||||
--fp16 \
|
|
||||||
--loraplus_lr_ratio 16.0
|
|
||||||
39
examples/extras/mod/llama3_full_sft.yaml
Normal file
39
examples/extras/mod/llama3_full_sft.yaml
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
# model
|
||||||
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
|
||||||
|
# method
|
||||||
|
stage: sft
|
||||||
|
do_train: true
|
||||||
|
finetuning_type: full
|
||||||
|
mixture_of_depths: convert
|
||||||
|
|
||||||
|
# dataset
|
||||||
|
dataset: identity,alpaca_gpt4_en
|
||||||
|
template: llama3
|
||||||
|
cutoff_len: 1024
|
||||||
|
max_samples: 1000
|
||||||
|
overwrite_cache: true
|
||||||
|
preprocessing_num_workers: 16
|
||||||
|
|
||||||
|
# output
|
||||||
|
output_dir: saves/llama3-8b-mod/full/sft
|
||||||
|
logging_steps: 10
|
||||||
|
save_steps: 500
|
||||||
|
plot_loss: true
|
||||||
|
overwrite_output_dir: true
|
||||||
|
|
||||||
|
# train
|
||||||
|
per_device_train_batch_size: 1
|
||||||
|
gradient_accumulation_steps: 8
|
||||||
|
optim: paged_adamw_8bit
|
||||||
|
learning_rate: 0.0001
|
||||||
|
num_train_epochs: 3.0
|
||||||
|
lr_scheduler_type: cosine
|
||||||
|
warmup_steps: 0.1
|
||||||
|
pure_bf16: true
|
||||||
|
|
||||||
|
# eval
|
||||||
|
val_size: 0.1
|
||||||
|
per_device_eval_batch_size: 1
|
||||||
|
evaluation_strategy: steps
|
||||||
|
eval_steps: 500
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
```bash
|
|
||||||
pip install "transformers>=4.39.1"
|
|
||||||
pip install "accelerate>=0.28.0"
|
|
||||||
pip install "bitsandbytes>=0.43.0"
|
|
||||||
```
|
|
||||||
@@ -1,33 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
CUDA_VISIBLE_DEVICES=0,1 accelerate launch \
|
|
||||||
--config_file ../accelerate/fsdp_config.yaml \
|
|
||||||
../../src/train_bash.py \
|
|
||||||
--stage sft \
|
|
||||||
--do_train \
|
|
||||||
--model_name_or_path meta-llama/Llama-2-70b-hf \
|
|
||||||
--dataset alpaca_gpt4_en,glaive_toolcall \
|
|
||||||
--dataset_dir ../../data \
|
|
||||||
--template default \
|
|
||||||
--finetuning_type lora \
|
|
||||||
--lora_target q_proj,v_proj \
|
|
||||||
--output_dir ../../saves/LLaMA2-70B/lora/sft \
|
|
||||||
--overwrite_cache \
|
|
||||||
--overwrite_output_dir \
|
|
||||||
--cutoff_len 1024 \
|
|
||||||
--per_device_train_batch_size 1 \
|
|
||||||
--per_device_eval_batch_size 1 \
|
|
||||||
--gradient_accumulation_steps 8 \
|
|
||||||
--lr_scheduler_type cosine \
|
|
||||||
--logging_steps 10 \
|
|
||||||
--save_steps 100 \
|
|
||||||
--eval_steps 100 \
|
|
||||||
--evaluation_strategy steps \
|
|
||||||
--load_best_model_at_end \
|
|
||||||
--learning_rate 5e-5 \
|
|
||||||
--num_train_epochs 3.0 \
|
|
||||||
--max_samples 3000 \
|
|
||||||
--val_size 0.1 \
|
|
||||||
--quantization_bit 4 \
|
|
||||||
--plot_loss \
|
|
||||||
--fp16
|
|
||||||
23
examples/full_multi_gpu/llama3_full_predict.yaml
Normal file
23
examples/full_multi_gpu/llama3_full_predict.yaml
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
# model
|
||||||
|
model_name_or_path: saves/llama3-8b/full/sft
|
||||||
|
|
||||||
|
# method
|
||||||
|
stage: sft
|
||||||
|
do_predict: true
|
||||||
|
finetuning_type: full
|
||||||
|
|
||||||
|
# dataset
|
||||||
|
dataset: identity,alpaca_gpt4_en
|
||||||
|
template: llama3
|
||||||
|
cutoff_len: 1024
|
||||||
|
max_samples: 50
|
||||||
|
overwrite_cache: true
|
||||||
|
preprocessing_num_workers: 16
|
||||||
|
|
||||||
|
# output
|
||||||
|
output_dir: saves/llama3-8b/full/predict
|
||||||
|
overwrite_output_dir: true
|
||||||
|
|
||||||
|
# eval
|
||||||
|
per_device_eval_batch_size: 1
|
||||||
|
predict_with_generate: true
|
||||||
41
examples/full_multi_gpu/llama3_full_sft.yaml
Normal file
41
examples/full_multi_gpu/llama3_full_sft.yaml
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
# model
|
||||||
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
|
||||||
|
# method
|
||||||
|
stage: sft
|
||||||
|
do_train: true
|
||||||
|
finetuning_type: full
|
||||||
|
|
||||||
|
# ddp
|
||||||
|
ddp_timeout: 180000000
|
||||||
|
deepspeed: examples/deepspeed/ds_z3_config.json
|
||||||
|
|
||||||
|
# dataset
|
||||||
|
dataset: identity,alpaca_gpt4_en
|
||||||
|
template: llama3
|
||||||
|
cutoff_len: 1024
|
||||||
|
max_samples: 1000
|
||||||
|
overwrite_cache: true
|
||||||
|
preprocessing_num_workers: 16
|
||||||
|
|
||||||
|
# output
|
||||||
|
output_dir: saves/llama3-8b/full/sft
|
||||||
|
logging_steps: 10
|
||||||
|
save_steps: 500
|
||||||
|
plot_loss: true
|
||||||
|
overwrite_output_dir: true
|
||||||
|
|
||||||
|
# train
|
||||||
|
per_device_train_batch_size: 1
|
||||||
|
gradient_accumulation_steps: 2
|
||||||
|
learning_rate: 0.0001
|
||||||
|
num_train_epochs: 3.0
|
||||||
|
lr_scheduler_type: cosine
|
||||||
|
warmup_steps: 0.1
|
||||||
|
fp16: true
|
||||||
|
|
||||||
|
# eval
|
||||||
|
val_size: 0.1
|
||||||
|
per_device_eval_batch_size: 1
|
||||||
|
evaluation_strategy: steps
|
||||||
|
eval_steps: 500
|
||||||
@@ -1,38 +1,15 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
python -m torch.distributed.run \
|
NPROC_PER_NODE=4
|
||||||
|
NNODES=2
|
||||||
|
RANK=0
|
||||||
|
MASTER_ADDR=192.168.0.1
|
||||||
|
MASTER_PORT=29500
|
||||||
|
|
||||||
|
CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun \
|
||||||
--nproc_per_node $NPROC_PER_NODE \
|
--nproc_per_node $NPROC_PER_NODE \
|
||||||
--nnodes $NNODES \
|
--nnodes $NNODES \
|
||||||
--node_rank $RANK \
|
--node_rank $RANK \
|
||||||
--master_addr $MASTER_ADDR \
|
--master_addr $MASTER_ADDR \
|
||||||
--master_port $MASTER_PORT \
|
--master_port $MASTER_PORT \
|
||||||
../../src/train_bash.py \
|
src/train.py examples/full_multi_gpu/llama3_full_sft.yaml
|
||||||
--deepspeed ../deepspeed/ds_z3_config.json \
|
|
||||||
--stage sft \
|
|
||||||
--do_train \
|
|
||||||
--model_name_or_path meta-llama/Llama-2-7b-hf \
|
|
||||||
--dataset alpaca_gpt4_en,glaive_toolcall \
|
|
||||||
--dataset_dir ../../data \
|
|
||||||
--template default \
|
|
||||||
--finetuning_type full \
|
|
||||||
--output_dir ../../saves/LLaMA2-7B/full/sft \
|
|
||||||
--overwrite_cache \
|
|
||||||
--overwrite_output_dir \
|
|
||||||
--cutoff_len 1024 \
|
|
||||||
--preprocessing_num_workers 16 \
|
|
||||||
--per_device_train_batch_size 1 \
|
|
||||||
--per_device_eval_batch_size 1 \
|
|
||||||
--gradient_accumulation_steps 2 \
|
|
||||||
--lr_scheduler_type cosine \
|
|
||||||
--logging_steps 10 \
|
|
||||||
--warmup_steps 20 \
|
|
||||||
--save_steps 100 \
|
|
||||||
--eval_steps 100 \
|
|
||||||
--evaluation_strategy steps \
|
|
||||||
--learning_rate 5e-5 \
|
|
||||||
--num_train_epochs 3.0 \
|
|
||||||
--max_samples 3000 \
|
|
||||||
--val_size 0.1 \
|
|
||||||
--ddp_timeout 1800000 \
|
|
||||||
--plot_loss \
|
|
||||||
--fp16
|
|
||||||
|
|||||||
5
examples/full_multi_gpu/predict.sh
Normal file
5
examples/full_multi_gpu/predict.sh
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch \
|
||||||
|
--config_file examples/accelerate/single_config.yaml \
|
||||||
|
src/train.py examples/full_multi_gpu/llama3_full_predict.yaml
|
||||||
@@ -1,32 +1,15 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
deepspeed --num_gpus 4 ../../src/train_bash.py \
|
NPROC_PER_NODE=4
|
||||||
--deepspeed ../deepspeed/ds_z3_config.json \
|
NNODES=1
|
||||||
--stage sft \
|
RANK=0
|
||||||
--do_train \
|
MASTER_ADDR=127.0.0.1
|
||||||
--model_name_or_path meta-llama/Llama-2-7b-hf \
|
MASTER_PORT=29500
|
||||||
--dataset alpaca_gpt4_en,glaive_toolcall \
|
|
||||||
--dataset_dir ../../data \
|
CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun \
|
||||||
--template default \
|
--nproc_per_node $NPROC_PER_NODE \
|
||||||
--finetuning_type full \
|
--nnodes $NNODES \
|
||||||
--output_dir ../../saves/LLaMA2-7B/full/sft \
|
--node_rank $RANK \
|
||||||
--overwrite_cache \
|
--master_addr $MASTER_ADDR \
|
||||||
--overwrite_output_dir \
|
--master_port $MASTER_PORT \
|
||||||
--cutoff_len 1024 \
|
src/train.py examples/full_multi_gpu/llama3_full_sft.yaml
|
||||||
--preprocessing_num_workers 16 \
|
|
||||||
--per_device_train_batch_size 1 \
|
|
||||||
--per_device_eval_batch_size 1 \
|
|
||||||
--gradient_accumulation_steps 2 \
|
|
||||||
--lr_scheduler_type cosine \
|
|
||||||
--logging_steps 10 \
|
|
||||||
--warmup_steps 20 \
|
|
||||||
--save_steps 100 \
|
|
||||||
--eval_steps 100 \
|
|
||||||
--evaluation_strategy steps \
|
|
||||||
--learning_rate 5e-5 \
|
|
||||||
--num_train_epochs 3.0 \
|
|
||||||
--max_samples 3000 \
|
|
||||||
--val_size 0.1 \
|
|
||||||
--ddp_timeout 1800000 \
|
|
||||||
--plot_loss \
|
|
||||||
--fp16
|
|
||||||
|
|||||||
2
examples/inference/llama3.yaml
Normal file
2
examples/inference/llama3.yaml
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
template: llama3
|
||||||
4
examples/inference/llama3_lora_sft.yaml
Normal file
4
examples/inference/llama3_lora_sft.yaml
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
adapter_name_or_path: saves/llama3-8b/lora/sft
|
||||||
|
template: llama3
|
||||||
|
finetuning_type: lora
|
||||||
4
examples/inference/llama3_vllm.yaml
Normal file
4
examples/inference/llama3_vllm.yaml
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
template: llama3
|
||||||
|
infer_backend: vllm
|
||||||
|
vllm_enforce_eager: true
|
||||||
15
examples/lora_multi_gpu/ds_zero3.sh
Normal file
15
examples/lora_multi_gpu/ds_zero3.sh
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
NPROC_PER_NODE=4
|
||||||
|
NNODES=1
|
||||||
|
RANK=0
|
||||||
|
MASTER_ADDR=127.0.0.1
|
||||||
|
MASTER_PORT=29500
|
||||||
|
|
||||||
|
CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun \
|
||||||
|
--nproc_per_node $NPROC_PER_NODE \
|
||||||
|
--nnodes $NNODES \
|
||||||
|
--node_rank $RANK \
|
||||||
|
--master_addr $MASTER_ADDR \
|
||||||
|
--master_port $MASTER_PORT \
|
||||||
|
src/train.py examples/lora_multi_gpu/llama3_lora_sft_ds.yaml
|
||||||
41
examples/lora_multi_gpu/llama3_lora_sft.yaml
Normal file
41
examples/lora_multi_gpu/llama3_lora_sft.yaml
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
# model
|
||||||
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
|
||||||
|
# method
|
||||||
|
stage: sft
|
||||||
|
do_train: true
|
||||||
|
finetuning_type: lora
|
||||||
|
lora_target: q_proj,v_proj
|
||||||
|
|
||||||
|
# ddp
|
||||||
|
ddp_timeout: 180000000
|
||||||
|
|
||||||
|
# dataset
|
||||||
|
dataset: identity,alpaca_gpt4_en
|
||||||
|
template: llama3
|
||||||
|
cutoff_len: 1024
|
||||||
|
max_samples: 1000
|
||||||
|
overwrite_cache: true
|
||||||
|
preprocessing_num_workers: 16
|
||||||
|
|
||||||
|
# output
|
||||||
|
output_dir: saves/llama3-8b/lora/sft
|
||||||
|
logging_steps: 10
|
||||||
|
save_steps: 500
|
||||||
|
plot_loss: true
|
||||||
|
overwrite_output_dir: true
|
||||||
|
|
||||||
|
# train
|
||||||
|
per_device_train_batch_size: 1
|
||||||
|
gradient_accumulation_steps: 2
|
||||||
|
learning_rate: 0.0001
|
||||||
|
num_train_epochs: 3.0
|
||||||
|
lr_scheduler_type: cosine
|
||||||
|
warmup_steps: 0.1
|
||||||
|
fp16: true
|
||||||
|
|
||||||
|
# eval
|
||||||
|
val_size: 0.1
|
||||||
|
per_device_eval_batch_size: 1
|
||||||
|
evaluation_strategy: steps
|
||||||
|
eval_steps: 500
|
||||||
42
examples/lora_multi_gpu/llama3_lora_sft_ds.yaml
Normal file
42
examples/lora_multi_gpu/llama3_lora_sft_ds.yaml
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
# model
|
||||||
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
|
||||||
|
# method
|
||||||
|
stage: sft
|
||||||
|
do_train: true
|
||||||
|
finetuning_type: lora
|
||||||
|
lora_target: q_proj,v_proj
|
||||||
|
|
||||||
|
# ddp
|
||||||
|
ddp_timeout: 180000000
|
||||||
|
deepspeed: examples/deepspeed/ds_z3_config.json
|
||||||
|
|
||||||
|
# dataset
|
||||||
|
dataset: identity,alpaca_gpt4_en
|
||||||
|
template: llama3
|
||||||
|
cutoff_len: 1024
|
||||||
|
max_samples: 1000
|
||||||
|
overwrite_cache: true
|
||||||
|
preprocessing_num_workers: 16
|
||||||
|
|
||||||
|
# output
|
||||||
|
output_dir: saves/llama3-8b/lora/sft
|
||||||
|
logging_steps: 10
|
||||||
|
save_steps: 500
|
||||||
|
plot_loss: true
|
||||||
|
overwrite_output_dir: true
|
||||||
|
|
||||||
|
# train
|
||||||
|
per_device_train_batch_size: 1
|
||||||
|
gradient_accumulation_steps: 2
|
||||||
|
learning_rate: 0.0001
|
||||||
|
num_train_epochs: 3.0
|
||||||
|
lr_scheduler_type: cosine
|
||||||
|
warmup_steps: 0.1
|
||||||
|
fp16: true
|
||||||
|
|
||||||
|
# eval
|
||||||
|
val_size: 0.1
|
||||||
|
per_device_eval_batch_size: 1
|
||||||
|
evaluation_strategy: steps
|
||||||
|
eval_steps: 500
|
||||||
@@ -1,35 +1,6 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
# also launch it on slave machine using slave_config.yaml
|
||||||
|
|
||||||
CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch \
|
CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch \
|
||||||
--config_file ../accelerate/master_config.yaml \
|
--config_file examples/accelerate/master_config.yaml \
|
||||||
../../src/train_bash.py \
|
src/train.py examples/lora_multi_gpu/llama3_lora_sft.yaml
|
||||||
--stage sft \
|
|
||||||
--do_train \
|
|
||||||
--model_name_or_path meta-llama/Llama-2-7b-hf \
|
|
||||||
--dataset alpaca_gpt4_en,glaive_toolcall \
|
|
||||||
--dataset_dir ../../data \
|
|
||||||
--template default \
|
|
||||||
--finetuning_type lora \
|
|
||||||
--lora_target q_proj,v_proj \
|
|
||||||
--output_dir ../../saves/LLaMA2-7B/lora/sft \
|
|
||||||
--overwrite_cache \
|
|
||||||
--overwrite_output_dir \
|
|
||||||
--cutoff_len 1024 \
|
|
||||||
--preprocessing_num_workers 16 \
|
|
||||||
--per_device_train_batch_size 1 \
|
|
||||||
--per_device_eval_batch_size 1 \
|
|
||||||
--gradient_accumulation_steps 2 \
|
|
||||||
--lr_scheduler_type cosine \
|
|
||||||
--logging_steps 10 \
|
|
||||||
--warmup_steps 20 \
|
|
||||||
--save_steps 100 \
|
|
||||||
--eval_steps 100 \
|
|
||||||
--evaluation_strategy steps \
|
|
||||||
--load_best_model_at_end \
|
|
||||||
--learning_rate 5e-5 \
|
|
||||||
--num_train_epochs 3.0 \
|
|
||||||
--max_samples 3000 \
|
|
||||||
--val_size 0.1 \
|
|
||||||
--ddp_timeout 1800000 \
|
|
||||||
--plot_loss \
|
|
||||||
--fp16
|
|
||||||
|
|||||||
@@ -1,35 +1,5 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 accelerate launch \
|
CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch \
|
||||||
--config_file ../accelerate/single_config.yaml \
|
--config_file examples/accelerate/single_config.yaml \
|
||||||
../../src/train_bash.py \
|
src/train.py examples/lora_multi_gpu/llama3_lora_sft.yaml
|
||||||
--stage sft \
|
|
||||||
--do_train \
|
|
||||||
--model_name_or_path meta-llama/Llama-2-7b-hf \
|
|
||||||
--dataset alpaca_gpt4_en,glaive_toolcall \
|
|
||||||
--dataset_dir ../../data \
|
|
||||||
--template default \
|
|
||||||
--finetuning_type lora \
|
|
||||||
--lora_target q_proj,v_proj \
|
|
||||||
--output_dir ../../saves/LLaMA2-7B/lora/sft \
|
|
||||||
--overwrite_cache \
|
|
||||||
--overwrite_output_dir \
|
|
||||||
--cutoff_len 1024 \
|
|
||||||
--preprocessing_num_workers 16 \
|
|
||||||
--per_device_train_batch_size 1 \
|
|
||||||
--per_device_eval_batch_size 1 \
|
|
||||||
--gradient_accumulation_steps 2 \
|
|
||||||
--lr_scheduler_type cosine \
|
|
||||||
--logging_steps 10 \
|
|
||||||
--warmup_steps 20 \
|
|
||||||
--save_steps 100 \
|
|
||||||
--eval_steps 100 \
|
|
||||||
--evaluation_strategy steps \
|
|
||||||
--load_best_model_at_end \
|
|
||||||
--learning_rate 5e-5 \
|
|
||||||
--num_train_epochs 3.0 \
|
|
||||||
--max_samples 3000 \
|
|
||||||
--val_size 0.1 \
|
|
||||||
--ddp_timeout 1800000 \
|
|
||||||
--plot_loss \
|
|
||||||
--fp16
|
|
||||||
|
|||||||
15
examples/lora_multi_npu/ds_zero0.sh
Normal file
15
examples/lora_multi_npu/ds_zero0.sh
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
NPROC_PER_NODE=4
|
||||||
|
NNODES=1
|
||||||
|
RANK=0
|
||||||
|
MASTER_ADDR=127.0.0.1
|
||||||
|
MASTER_PORT=29500
|
||||||
|
|
||||||
|
ASCEND_RT_VISIBLE_DEVICES=0,1,2,3 torchrun \
|
||||||
|
--nproc_per_node $NPROC_PER_NODE \
|
||||||
|
--nnodes $NNODES \
|
||||||
|
--node_rank $RANK \
|
||||||
|
--master_addr $MASTER_ADDR \
|
||||||
|
--master_port $MASTER_PORT \
|
||||||
|
src/train.py examples/lora_multi_npu/llama3_lora_sft_ds.yaml
|
||||||
42
examples/lora_multi_npu/llama3_lora_sft_ds.yaml
Normal file
42
examples/lora_multi_npu/llama3_lora_sft_ds.yaml
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
# model
|
||||||
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
|
||||||
|
# method
|
||||||
|
stage: sft
|
||||||
|
do_train: true
|
||||||
|
finetuning_type: lora
|
||||||
|
lora_target: q_proj,v_proj
|
||||||
|
|
||||||
|
# ddp
|
||||||
|
ddp_timeout: 180000000
|
||||||
|
deepspeed: examples/deepspeed/ds_z0_config.json
|
||||||
|
|
||||||
|
# dataset
|
||||||
|
dataset: identity,alpaca_gpt4_en
|
||||||
|
template: llama3
|
||||||
|
cutoff_len: 1024
|
||||||
|
max_samples: 1000
|
||||||
|
overwrite_cache: true
|
||||||
|
preprocessing_num_workers: 16
|
||||||
|
|
||||||
|
# output
|
||||||
|
output_dir: saves/llama3-8b/lora/sft
|
||||||
|
logging_steps: 10
|
||||||
|
save_steps: 500
|
||||||
|
plot_loss: true
|
||||||
|
overwrite_output_dir: true
|
||||||
|
|
||||||
|
# train
|
||||||
|
per_device_train_batch_size: 1
|
||||||
|
gradient_accumulation_steps: 2
|
||||||
|
learning_rate: 0.0001
|
||||||
|
num_train_epochs: 3.0
|
||||||
|
lr_scheduler_type: cosine
|
||||||
|
warmup_steps: 0.1
|
||||||
|
fp16: true
|
||||||
|
|
||||||
|
# eval
|
||||||
|
val_size: 0.1
|
||||||
|
per_device_eval_batch_size: 1
|
||||||
|
evaluation_strategy: steps
|
||||||
|
eval_steps: 500
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
Usage:
|
|
||||||
|
|
||||||
- `pretrain.sh`: do pre-train (optional)
|
|
||||||
- `sft.sh`: do supervised fine-tune
|
|
||||||
- `reward.sh`: do reward modeling (must after sft.sh)
|
|
||||||
- `ppo.sh`: do PPO training (must after sft.sh and reward.sh)
|
|
||||||
- `dpo.sh`: do DPO training (must after sft.sh)
|
|
||||||
- `predict.sh`: do predict (must after sft.sh and dpo.sh)
|
|
||||||
@@ -1,35 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \
|
|
||||||
--stage dpo \
|
|
||||||
--do_train \
|
|
||||||
--model_name_or_path meta-llama/Llama-2-7b-hf \
|
|
||||||
--adapter_name_or_path ../../saves/LLaMA2-7B/lora/sft \
|
|
||||||
--create_new_adapter \
|
|
||||||
--dataset comparison_gpt4_en \
|
|
||||||
--dataset_dir ../../data \
|
|
||||||
--template default \
|
|
||||||
--finetuning_type lora \
|
|
||||||
--lora_target q_proj,v_proj \
|
|
||||||
--output_dir ../../saves/LLaMA2-7B/lora/dpo \
|
|
||||||
--overwrite_cache \
|
|
||||||
--overwrite_output_dir \
|
|
||||||
--cutoff_len 1024 \
|
|
||||||
--preprocessing_num_workers 16 \
|
|
||||||
--per_device_train_batch_size 1 \
|
|
||||||
--per_device_eval_batch_size 1 \
|
|
||||||
--gradient_accumulation_steps 8 \
|
|
||||||
--lr_scheduler_type cosine \
|
|
||||||
--logging_steps 10 \
|
|
||||||
--warmup_steps 20 \
|
|
||||||
--save_steps 100 \
|
|
||||||
--eval_steps 100 \
|
|
||||||
--evaluation_strategy steps \
|
|
||||||
--load_best_model_at_end \
|
|
||||||
--learning_rate 1e-5 \
|
|
||||||
--num_train_epochs 1.0 \
|
|
||||||
--max_samples 1000 \
|
|
||||||
--val_size 0.1 \
|
|
||||||
--dpo_ftx 1.0 \
|
|
||||||
--plot_loss \
|
|
||||||
--fp16
|
|
||||||
39
examples/lora_single_gpu/llama3_lora_dpo.yaml
Normal file
39
examples/lora_single_gpu/llama3_lora_dpo.yaml
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
# model
|
||||||
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
|
||||||
|
# method
|
||||||
|
stage: dpo
|
||||||
|
do_train: true
|
||||||
|
finetuning_type: lora
|
||||||
|
lora_target: q_proj,v_proj
|
||||||
|
dpo_ftx: 1.0
|
||||||
|
|
||||||
|
# dataset
|
||||||
|
dataset: orca_rlhf
|
||||||
|
template: llama3
|
||||||
|
cutoff_len: 1024
|
||||||
|
max_samples: 1000
|
||||||
|
overwrite_cache: true
|
||||||
|
preprocessing_num_workers: 16
|
||||||
|
|
||||||
|
# output
|
||||||
|
output_dir: saves/llama3-8b/lora/dpo
|
||||||
|
logging_steps: 10
|
||||||
|
save_steps: 500
|
||||||
|
plot_loss: true
|
||||||
|
overwrite_output_dir: true
|
||||||
|
|
||||||
|
# train
|
||||||
|
per_device_train_batch_size: 1
|
||||||
|
gradient_accumulation_steps: 8
|
||||||
|
learning_rate: 0.00001
|
||||||
|
num_train_epochs: 3.0
|
||||||
|
lr_scheduler_type: cosine
|
||||||
|
warmup_steps: 0.1
|
||||||
|
fp16: true
|
||||||
|
|
||||||
|
# eval
|
||||||
|
val_size: 0.1
|
||||||
|
per_device_eval_batch_size: 1
|
||||||
|
evaluation_strategy: steps
|
||||||
|
eval_steps: 500
|
||||||
19
examples/lora_single_gpu/llama3_lora_eval.yaml
Normal file
19
examples/lora_single_gpu/llama3_lora_eval.yaml
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
# model
|
||||||
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
adapter_name_or_path: saves/llama3-8b/lora/sft
|
||||||
|
|
||||||
|
# method
|
||||||
|
finetuning_type: lora
|
||||||
|
|
||||||
|
# dataset
|
||||||
|
task: mmlu
|
||||||
|
split: test
|
||||||
|
template: fewshot
|
||||||
|
lang: en
|
||||||
|
n_shot: 5
|
||||||
|
|
||||||
|
# output
|
||||||
|
save_dir: saves/llama3-8b/lora/eval
|
||||||
|
|
||||||
|
# eval
|
||||||
|
batch_size: 4
|
||||||
38
examples/lora_single_gpu/llama3_lora_orpo.yaml
Normal file
38
examples/lora_single_gpu/llama3_lora_orpo.yaml
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
# model
|
||||||
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
|
||||||
|
# method
|
||||||
|
stage: orpo
|
||||||
|
do_train: true
|
||||||
|
finetuning_type: lora
|
||||||
|
lora_target: q_proj,v_proj
|
||||||
|
|
||||||
|
# dataset
|
||||||
|
dataset: orca_rlhf
|
||||||
|
template: llama3
|
||||||
|
cutoff_len: 1024
|
||||||
|
max_samples: 1000
|
||||||
|
overwrite_cache: true
|
||||||
|
preprocessing_num_workers: 16
|
||||||
|
|
||||||
|
# output
|
||||||
|
output_dir: saves/llama3-8b/lora/orpo
|
||||||
|
logging_steps: 10
|
||||||
|
save_steps: 500
|
||||||
|
plot_loss: true
|
||||||
|
overwrite_output_dir: true
|
||||||
|
|
||||||
|
# train
|
||||||
|
per_device_train_batch_size: 1
|
||||||
|
gradient_accumulation_steps: 8
|
||||||
|
learning_rate: 0.00001
|
||||||
|
num_train_epochs: 3.0
|
||||||
|
lr_scheduler_type: cosine
|
||||||
|
warmup_steps: 0.1
|
||||||
|
fp16: true
|
||||||
|
|
||||||
|
# eval
|
||||||
|
val_size: 0.1
|
||||||
|
per_device_eval_batch_size: 1
|
||||||
|
evaluation_strategy: steps
|
||||||
|
eval_steps: 500
|
||||||
38
examples/lora_single_gpu/llama3_lora_ppo.yaml
Normal file
38
examples/lora_single_gpu/llama3_lora_ppo.yaml
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
# model
|
||||||
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
reward_model: saves/llama3-8b/lora/reward
|
||||||
|
|
||||||
|
# method
|
||||||
|
stage: ppo
|
||||||
|
do_train: true
|
||||||
|
finetuning_type: lora
|
||||||
|
lora_target: q_proj,v_proj
|
||||||
|
|
||||||
|
# dataset
|
||||||
|
dataset: identity,alpaca_gpt4_en
|
||||||
|
template: llama3
|
||||||
|
cutoff_len: 1024
|
||||||
|
max_samples: 1000
|
||||||
|
overwrite_cache: true
|
||||||
|
preprocessing_num_workers: 16
|
||||||
|
|
||||||
|
# output
|
||||||
|
output_dir: saves/llama3-8b/lora/ppo
|
||||||
|
logging_steps: 10
|
||||||
|
save_steps: 500
|
||||||
|
plot_loss: true
|
||||||
|
overwrite_output_dir: true
|
||||||
|
|
||||||
|
# train
|
||||||
|
per_device_train_batch_size: 1
|
||||||
|
gradient_accumulation_steps: 8
|
||||||
|
learning_rate: 0.00001
|
||||||
|
num_train_epochs: 3.0
|
||||||
|
lr_scheduler_type: cosine
|
||||||
|
warmup_steps: 0.1
|
||||||
|
fp16: true
|
||||||
|
|
||||||
|
# generate
|
||||||
|
max_new_tokens: 512
|
||||||
|
top_k: 0
|
||||||
|
top_p: 0.9
|
||||||
24
examples/lora_single_gpu/llama3_lora_predict.yaml
Normal file
24
examples/lora_single_gpu/llama3_lora_predict.yaml
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
# model
|
||||||
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
adapter_name_or_path: saves/llama3-8b/lora/sft
|
||||||
|
|
||||||
|
# method
|
||||||
|
stage: sft
|
||||||
|
do_predict: true
|
||||||
|
finetuning_type: lora
|
||||||
|
|
||||||
|
# dataset
|
||||||
|
dataset: identity,alpaca_gpt4_en
|
||||||
|
template: llama3
|
||||||
|
cutoff_len: 1024
|
||||||
|
max_samples: 50
|
||||||
|
overwrite_cache: true
|
||||||
|
preprocessing_num_workers: 16
|
||||||
|
|
||||||
|
# output
|
||||||
|
output_dir: saves/llama3-8b/lora/predict
|
||||||
|
overwrite_output_dir: true
|
||||||
|
|
||||||
|
# eval
|
||||||
|
per_device_eval_batch_size: 1
|
||||||
|
predict_with_generate: true
|
||||||
37
examples/lora_single_gpu/llama3_lora_pretrain.yaml
Normal file
37
examples/lora_single_gpu/llama3_lora_pretrain.yaml
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
# model
|
||||||
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
|
||||||
|
# method
|
||||||
|
stage: pt
|
||||||
|
do_train: true
|
||||||
|
finetuning_type: lora
|
||||||
|
lora_target: q_proj,v_proj
|
||||||
|
|
||||||
|
# dataset
|
||||||
|
dataset: c4_demo
|
||||||
|
cutoff_len: 1024
|
||||||
|
max_samples: 1000
|
||||||
|
overwrite_cache: true
|
||||||
|
preprocessing_num_workers: 16
|
||||||
|
|
||||||
|
# output
|
||||||
|
output_dir: saves/llama3-8b/lora/sft
|
||||||
|
logging_steps: 10
|
||||||
|
save_steps: 500
|
||||||
|
plot_loss: true
|
||||||
|
overwrite_output_dir: true
|
||||||
|
|
||||||
|
# train
|
||||||
|
per_device_train_batch_size: 1
|
||||||
|
gradient_accumulation_steps: 8
|
||||||
|
learning_rate: 0.0001
|
||||||
|
num_train_epochs: 3.0
|
||||||
|
lr_scheduler_type: cosine
|
||||||
|
warmup_steps: 0.1
|
||||||
|
fp16: true
|
||||||
|
|
||||||
|
# eval
|
||||||
|
val_size: 0.1
|
||||||
|
per_device_eval_batch_size: 1
|
||||||
|
evaluation_strategy: steps
|
||||||
|
eval_steps: 500
|
||||||
38
examples/lora_single_gpu/llama3_lora_reward.yaml
Normal file
38
examples/lora_single_gpu/llama3_lora_reward.yaml
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
# model
|
||||||
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
|
||||||
|
# method
|
||||||
|
stage: rm
|
||||||
|
do_train: true
|
||||||
|
finetuning_type: lora
|
||||||
|
lora_target: q_proj,v_proj
|
||||||
|
|
||||||
|
# dataset
|
||||||
|
dataset: orca_rlhf
|
||||||
|
template: llama3
|
||||||
|
cutoff_len: 1024
|
||||||
|
max_samples: 1000
|
||||||
|
overwrite_cache: true
|
||||||
|
preprocessing_num_workers: 16
|
||||||
|
|
||||||
|
# output
|
||||||
|
output_dir: saves/llama3-8b/lora/reward
|
||||||
|
logging_steps: 10
|
||||||
|
save_steps: 500
|
||||||
|
plot_loss: true
|
||||||
|
overwrite_output_dir: true
|
||||||
|
|
||||||
|
# train
|
||||||
|
per_device_train_batch_size: 1
|
||||||
|
gradient_accumulation_steps: 8
|
||||||
|
learning_rate: 0.00001
|
||||||
|
num_train_epochs: 3.0
|
||||||
|
lr_scheduler_type: cosine
|
||||||
|
warmup_steps: 0.1
|
||||||
|
fp16: true
|
||||||
|
|
||||||
|
# eval
|
||||||
|
val_size: 0.1
|
||||||
|
per_device_eval_batch_size: 1
|
||||||
|
evaluation_strategy: steps
|
||||||
|
eval_steps: 500
|
||||||
38
examples/lora_single_gpu/llama3_lora_sft.yaml
Normal file
38
examples/lora_single_gpu/llama3_lora_sft.yaml
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
# model
|
||||||
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
|
||||||
|
# method
|
||||||
|
stage: sft
|
||||||
|
do_train: true
|
||||||
|
finetuning_type: lora
|
||||||
|
lora_target: q_proj,v_proj
|
||||||
|
|
||||||
|
# dataset
|
||||||
|
dataset: identity,alpaca_gpt4_en
|
||||||
|
template: llama3
|
||||||
|
cutoff_len: 1024
|
||||||
|
max_samples: 1000
|
||||||
|
overwrite_cache: true
|
||||||
|
preprocessing_num_workers: 16
|
||||||
|
|
||||||
|
# output
|
||||||
|
output_dir: saves/llama3-8b/lora/sft
|
||||||
|
logging_steps: 10
|
||||||
|
save_steps: 500
|
||||||
|
plot_loss: true
|
||||||
|
overwrite_output_dir: true
|
||||||
|
|
||||||
|
# train
|
||||||
|
per_device_train_batch_size: 1
|
||||||
|
gradient_accumulation_steps: 8
|
||||||
|
learning_rate: 0.0001
|
||||||
|
num_train_epochs: 3.0
|
||||||
|
lr_scheduler_type: cosine
|
||||||
|
warmup_steps: 0.1
|
||||||
|
fp16: true
|
||||||
|
|
||||||
|
# eval
|
||||||
|
val_size: 0.1
|
||||||
|
per_device_eval_batch_size: 1
|
||||||
|
evaluation_strategy: steps
|
||||||
|
eval_steps: 500
|
||||||
21
examples/lora_single_gpu/llama3_preprocess.yaml
Normal file
21
examples/lora_single_gpu/llama3_preprocess.yaml
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
# model
|
||||||
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
|
||||||
|
# method
|
||||||
|
stage: sft
|
||||||
|
do_train: true
|
||||||
|
finetuning_type: lora
|
||||||
|
lora_target: q_proj,v_proj
|
||||||
|
|
||||||
|
# dataset
|
||||||
|
dataset: identity,alpaca_gpt4_en
|
||||||
|
template: llama3
|
||||||
|
cutoff_len: 1024
|
||||||
|
max_samples: 1000
|
||||||
|
overwrite_cache: true
|
||||||
|
preprocessing_num_workers: 16
|
||||||
|
tokenized_path: saves/llama3-8b/dataset/sft
|
||||||
|
|
||||||
|
# output
|
||||||
|
output_dir: saves/llama3-8b/lora/sft
|
||||||
|
overwrite_output_dir: true
|
||||||
39
examples/lora_single_gpu/llava1_5_lora_sft.yaml
Normal file
39
examples/lora_single_gpu/llava1_5_lora_sft.yaml
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
# model
|
||||||
|
model_name_or_path: llava-hf/llava-1.5-7b-hf
|
||||||
|
visual_inputs: true
|
||||||
|
|
||||||
|
# method
|
||||||
|
stage: sft
|
||||||
|
do_train: true
|
||||||
|
finetuning_type: lora
|
||||||
|
lora_target: q_proj,v_proj
|
||||||
|
|
||||||
|
# dataset
|
||||||
|
dataset: mllm_demo
|
||||||
|
template: vicuna
|
||||||
|
cutoff_len: 1024
|
||||||
|
max_samples: 1000
|
||||||
|
overwrite_cache: true
|
||||||
|
preprocessing_num_workers: 16
|
||||||
|
|
||||||
|
# output
|
||||||
|
output_dir: saves/llava1_5-7b/lora/sft
|
||||||
|
logging_steps: 10
|
||||||
|
save_steps: 500
|
||||||
|
plot_loss: true
|
||||||
|
overwrite_output_dir: true
|
||||||
|
|
||||||
|
# train
|
||||||
|
per_device_train_batch_size: 1
|
||||||
|
gradient_accumulation_steps: 8
|
||||||
|
learning_rate: 0.0001
|
||||||
|
num_train_epochs: 3.0
|
||||||
|
lr_scheduler_type: cosine
|
||||||
|
warmup_steps: 0.1
|
||||||
|
fp16: true
|
||||||
|
|
||||||
|
# eval
|
||||||
|
val_size: 0.1
|
||||||
|
per_device_eval_batch_size: 1
|
||||||
|
evaluation_strategy: steps
|
||||||
|
eval_steps: 500
|
||||||
@@ -1,32 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \
|
|
||||||
--stage ppo \
|
|
||||||
--do_train \
|
|
||||||
--model_name_or_path meta-llama/Llama-2-7b-hf \
|
|
||||||
--adapter_name_or_path ../../saves/LLaMA2-7B/lora/sft \
|
|
||||||
--create_new_adapter \
|
|
||||||
--dataset alpaca_gpt4_en \
|
|
||||||
--dataset_dir ../../data \
|
|
||||||
--template default \
|
|
||||||
--finetuning_type lora \
|
|
||||||
--lora_target q_proj,v_proj \
|
|
||||||
--reward_model ../../saves/LLaMA2-7B/lora/reward \
|
|
||||||
--output_dir ../../saves/LLaMA2-7B/lora/ppo \
|
|
||||||
--overwrite_cache \
|
|
||||||
--overwrite_output_dir \
|
|
||||||
--cutoff_len 512 \
|
|
||||||
--preprocessing_num_workers 16 \
|
|
||||||
--per_device_train_batch_size 1 \
|
|
||||||
--gradient_accumulation_steps 8 \
|
|
||||||
--lr_scheduler_type cosine \
|
|
||||||
--logging_steps 10 \
|
|
||||||
--save_steps 100 \
|
|
||||||
--learning_rate 1e-5 \
|
|
||||||
--num_train_epochs 1.0 \
|
|
||||||
--max_samples 1000 \
|
|
||||||
--top_k 0 \
|
|
||||||
--top_p 0.9 \
|
|
||||||
--max_new_tokens 256 \
|
|
||||||
--plot_loss \
|
|
||||||
--fp16
|
|
||||||
@@ -1,19 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \
|
|
||||||
--stage sft \
|
|
||||||
--do_predict \
|
|
||||||
--model_name_or_path meta-llama/Llama-2-7b-hf \
|
|
||||||
--adapter_name_or_path ../../saves/LLaMA2-7B/lora/sft,../../saves/LLaMA2-7B/lora/dpo \
|
|
||||||
--dataset alpaca_gpt4_en,glaive_toolcall \
|
|
||||||
--dataset_dir ../../data \
|
|
||||||
--template default \
|
|
||||||
--finetuning_type lora \
|
|
||||||
--output_dir ../../saves/LLaMA2-7B/lora/predict \
|
|
||||||
--overwrite_cache \
|
|
||||||
--overwrite_output_dir \
|
|
||||||
--cutoff_len 1024 \
|
|
||||||
--preprocessing_num_workers 16 \
|
|
||||||
--per_device_eval_batch_size 1 \
|
|
||||||
--max_samples 20 \
|
|
||||||
--predict_with_generate
|
|
||||||
@@ -1,31 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \
|
|
||||||
--stage pt \
|
|
||||||
--do_train \
|
|
||||||
--model_name_or_path meta-llama/Llama-2-7b-hf \
|
|
||||||
--dataset c4_demo \
|
|
||||||
--dataset_dir ../../data \
|
|
||||||
--finetuning_type lora \
|
|
||||||
--lora_target q_proj,v_proj \
|
|
||||||
--output_dir ../../saves/LLaMA2-7B/lora/pretrain \
|
|
||||||
--overwrite_cache \
|
|
||||||
--overwrite_output_dir \
|
|
||||||
--cutoff_len 1024 \
|
|
||||||
--preprocessing_num_workers 16 \
|
|
||||||
--per_device_train_batch_size 1 \
|
|
||||||
--per_device_eval_batch_size 1 \
|
|
||||||
--gradient_accumulation_steps 8 \
|
|
||||||
--lr_scheduler_type cosine \
|
|
||||||
--logging_steps 10 \
|
|
||||||
--warmup_steps 20 \
|
|
||||||
--save_steps 100 \
|
|
||||||
--eval_steps 100 \
|
|
||||||
--evaluation_strategy steps \
|
|
||||||
--load_best_model_at_end \
|
|
||||||
--learning_rate 5e-5 \
|
|
||||||
--num_train_epochs 3.0 \
|
|
||||||
--max_samples 10000 \
|
|
||||||
--val_size 0.1 \
|
|
||||||
--plot_loss \
|
|
||||||
--fp16
|
|
||||||
@@ -1,33 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \
|
|
||||||
--stage rm \
|
|
||||||
--do_train \
|
|
||||||
--model_name_or_path meta-llama/Llama-2-7b-hf \
|
|
||||||
--adapter_name_or_path ../../saves/LLaMA2-7B/lora/sft \
|
|
||||||
--create_new_adapter \
|
|
||||||
--dataset comparison_gpt4_en \
|
|
||||||
--dataset_dir ../../data \
|
|
||||||
--template default \
|
|
||||||
--finetuning_type lora \
|
|
||||||
--lora_target q_proj,v_proj \
|
|
||||||
--output_dir ../../saves/LLaMA2-7B/lora/reward \
|
|
||||||
--overwrite_cache \
|
|
||||||
--overwrite_output_dir \
|
|
||||||
--cutoff_len 1024 \
|
|
||||||
--preprocessing_num_workers 16 \
|
|
||||||
--per_device_train_batch_size 1 \
|
|
||||||
--per_device_eval_batch_size 1 \
|
|
||||||
--gradient_accumulation_steps 8 \
|
|
||||||
--lr_scheduler_type cosine \
|
|
||||||
--logging_steps 10 \
|
|
||||||
--warmup_steps 20 \
|
|
||||||
--save_steps 100 \
|
|
||||||
--eval_steps 100 \
|
|
||||||
--evaluation_strategy steps \
|
|
||||||
--learning_rate 1e-5 \
|
|
||||||
--num_train_epochs 1.0 \
|
|
||||||
--max_samples 5000 \
|
|
||||||
--val_size 0.1 \
|
|
||||||
--plot_loss \
|
|
||||||
--fp16
|
|
||||||
@@ -1,32 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \
|
|
||||||
--stage sft \
|
|
||||||
--do_train \
|
|
||||||
--model_name_or_path meta-llama/Llama-2-7b-hf \
|
|
||||||
--dataset alpaca_gpt4_en,glaive_toolcall \
|
|
||||||
--dataset_dir ../../data \
|
|
||||||
--template default \
|
|
||||||
--finetuning_type lora \
|
|
||||||
--lora_target q_proj,v_proj \
|
|
||||||
--output_dir ../../saves/LLaMA2-7B/lora/sft \
|
|
||||||
--overwrite_cache \
|
|
||||||
--overwrite_output_dir \
|
|
||||||
--cutoff_len 1024 \
|
|
||||||
--preprocessing_num_workers 16 \
|
|
||||||
--per_device_train_batch_size 1 \
|
|
||||||
--per_device_eval_batch_size 1 \
|
|
||||||
--gradient_accumulation_steps 8 \
|
|
||||||
--lr_scheduler_type cosine \
|
|
||||||
--logging_steps 10 \
|
|
||||||
--warmup_steps 20 \
|
|
||||||
--save_steps 100 \
|
|
||||||
--eval_steps 100 \
|
|
||||||
--evaluation_strategy steps \
|
|
||||||
--load_best_model_at_end \
|
|
||||||
--learning_rate 5e-5 \
|
|
||||||
--num_train_epochs 3.0 \
|
|
||||||
--max_samples 3000 \
|
|
||||||
--val_size 0.1 \
|
|
||||||
--plot_loss \
|
|
||||||
--fp16
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
Usage:
|
|
||||||
|
|
||||||
- `merge.sh`: merge the lora weights
|
|
||||||
- `quantize.sh`: quantize the model with AutoGPTQ (must after merge.sh, optional)
|
|
||||||
11
examples/merge_lora/llama3_gptq.yaml
Normal file
11
examples/merge_lora/llama3_gptq.yaml
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
# model
|
||||||
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
template: llama3
|
||||||
|
|
||||||
|
# export
|
||||||
|
export_dir: models/llama3_gptq
|
||||||
|
export_quantization_bit: 4
|
||||||
|
export_quantization_dataset: data/c4_demo.json
|
||||||
|
export_size: 2
|
||||||
|
export_device: cpu
|
||||||
|
export_legacy_format: false
|
||||||
13
examples/merge_lora/llama3_lora_sft.yaml
Normal file
13
examples/merge_lora/llama3_lora_sft.yaml
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
# Note: DO NOT use quantized model or quantization_bit when merging lora adapters
|
||||||
|
|
||||||
|
# model
|
||||||
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
adapter_name_or_path: saves/llama3-8b/lora/sft
|
||||||
|
template: llama3
|
||||||
|
finetuning_type: lora
|
||||||
|
|
||||||
|
# export
|
||||||
|
export_dir: models/llama3_lora_sft
|
||||||
|
export_size: 2
|
||||||
|
export_device: cpu
|
||||||
|
export_legacy_format: false
|
||||||
@@ -1,10 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
CUDA_VISIBLE_DEVICES=0 python ../../src/export_model.py \
|
|
||||||
--model_name_or_path meta-llama/Llama-2-7b-hf \
|
|
||||||
--adapter_name_or_path ../../saves/LLaMA2-7B/lora/sft \
|
|
||||||
--template default \
|
|
||||||
--finetuning_type lora \
|
|
||||||
--export_dir ../../models/llama2-7b-sft \
|
|
||||||
--export_size 2 \
|
|
||||||
--export_legacy_format False
|
|
||||||
@@ -1,10 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
CUDA_VISIBLE_DEVICES=0 python ../../src/export_model.py \
|
|
||||||
--model_name_or_path ../../models/llama2-7b-sft \
|
|
||||||
--template default \
|
|
||||||
--export_dir ../../models/llama2-7b-sft-int4 \
|
|
||||||
--export_quantization_bit 4 \
|
|
||||||
--export_quantization_dataset ../../data/c4_demo.json \
|
|
||||||
--export_size 2 \
|
|
||||||
--export_legacy_format False
|
|
||||||
@@ -1,30 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \
|
|
||||||
--stage sft \
|
|
||||||
--do_train \
|
|
||||||
--model_name_or_path BlackSamorez/Llama-2-7b-AQLM-2Bit-1x16-hf \
|
|
||||||
--dataset alpaca_gpt4_en,glaive_toolcall \
|
|
||||||
--dataset_dir ../../data \
|
|
||||||
--template default \
|
|
||||||
--finetuning_type lora \
|
|
||||||
--lora_target q_proj,v_proj \
|
|
||||||
--output_dir ../../saves/LLaMA2-7B/lora/sft \
|
|
||||||
--overwrite_cache \
|
|
||||||
--overwrite_output_dir \
|
|
||||||
--cutoff_len 1024 \
|
|
||||||
--per_device_train_batch_size 1 \
|
|
||||||
--per_device_eval_batch_size 1 \
|
|
||||||
--gradient_accumulation_steps 8 \
|
|
||||||
--lr_scheduler_type cosine \
|
|
||||||
--logging_steps 10 \
|
|
||||||
--save_steps 100 \
|
|
||||||
--eval_steps 100 \
|
|
||||||
--evaluation_strategy steps \
|
|
||||||
--load_best_model_at_end \
|
|
||||||
--learning_rate 5e-5 \
|
|
||||||
--num_train_epochs 3.0 \
|
|
||||||
--max_samples 3000 \
|
|
||||||
--val_size 0.1 \
|
|
||||||
--plot_loss \
|
|
||||||
--fp16
|
|
||||||
@@ -1,30 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \
|
|
||||||
--stage sft \
|
|
||||||
--do_train \
|
|
||||||
--model_name_or_path TheBloke/Llama-2-7B-AWQ \
|
|
||||||
--dataset alpaca_gpt4_en,glaive_toolcall \
|
|
||||||
--dataset_dir ../../data \
|
|
||||||
--template default \
|
|
||||||
--finetuning_type lora \
|
|
||||||
--lora_target q_proj,v_proj \
|
|
||||||
--output_dir ../../saves/LLaMA2-7B/lora/sft \
|
|
||||||
--overwrite_cache \
|
|
||||||
--overwrite_output_dir \
|
|
||||||
--cutoff_len 1024 \
|
|
||||||
--per_device_train_batch_size 1 \
|
|
||||||
--per_device_eval_batch_size 1 \
|
|
||||||
--gradient_accumulation_steps 8 \
|
|
||||||
--lr_scheduler_type cosine \
|
|
||||||
--logging_steps 10 \
|
|
||||||
--save_steps 100 \
|
|
||||||
--eval_steps 100 \
|
|
||||||
--evaluation_strategy steps \
|
|
||||||
--load_best_model_at_end \
|
|
||||||
--learning_rate 5e-5 \
|
|
||||||
--num_train_epochs 3.0 \
|
|
||||||
--max_samples 3000 \
|
|
||||||
--val_size 0.1 \
|
|
||||||
--plot_loss \
|
|
||||||
--fp16
|
|
||||||
@@ -1,31 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \
|
|
||||||
--stage sft \
|
|
||||||
--do_train \
|
|
||||||
--model_name_or_path meta-llama/Llama-2-7b-hf \
|
|
||||||
--dataset alpaca_gpt4_en,glaive_toolcall \
|
|
||||||
--dataset_dir ../../data \
|
|
||||||
--template default \
|
|
||||||
--finetuning_type lora \
|
|
||||||
--lora_target q_proj,v_proj \
|
|
||||||
--output_dir ../../saves/LLaMA2-7B/lora/sft \
|
|
||||||
--overwrite_cache \
|
|
||||||
--overwrite_output_dir \
|
|
||||||
--cutoff_len 1024 \
|
|
||||||
--per_device_train_batch_size 1 \
|
|
||||||
--per_device_eval_batch_size 1 \
|
|
||||||
--gradient_accumulation_steps 8 \
|
|
||||||
--lr_scheduler_type cosine \
|
|
||||||
--logging_steps 10 \
|
|
||||||
--save_steps 100 \
|
|
||||||
--eval_steps 100 \
|
|
||||||
--evaluation_strategy steps \
|
|
||||||
--load_best_model_at_end \
|
|
||||||
--learning_rate 5e-5 \
|
|
||||||
--num_train_epochs 3.0 \
|
|
||||||
--max_samples 3000 \
|
|
||||||
--val_size 0.1 \
|
|
||||||
--quantization_bit 4 \
|
|
||||||
--plot_loss \
|
|
||||||
--fp16
|
|
||||||
@@ -1,30 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \
|
|
||||||
--stage sft \
|
|
||||||
--do_train \
|
|
||||||
--model_name_or_path TheBloke/Llama-2-7B-GPTQ \
|
|
||||||
--dataset alpaca_gpt4_en,glaive_toolcall \
|
|
||||||
--dataset_dir ../../data \
|
|
||||||
--template default \
|
|
||||||
--finetuning_type lora \
|
|
||||||
--lora_target q_proj,v_proj \
|
|
||||||
--output_dir ../../saves/LLaMA2-7B/lora/sft \
|
|
||||||
--overwrite_cache \
|
|
||||||
--overwrite_output_dir \
|
|
||||||
--cutoff_len 1024 \
|
|
||||||
--per_device_train_batch_size 1 \
|
|
||||||
--per_device_eval_batch_size 1 \
|
|
||||||
--gradient_accumulation_steps 8 \
|
|
||||||
--lr_scheduler_type cosine \
|
|
||||||
--logging_steps 10 \
|
|
||||||
--save_steps 100 \
|
|
||||||
--eval_steps 100 \
|
|
||||||
--evaluation_strategy steps \
|
|
||||||
--load_best_model_at_end \
|
|
||||||
--learning_rate 5e-5 \
|
|
||||||
--num_train_epochs 3.0 \
|
|
||||||
--max_samples 3000 \
|
|
||||||
--val_size 0.1 \
|
|
||||||
--plot_loss \
|
|
||||||
--fp16
|
|
||||||
38
examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml
Normal file
38
examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
# model
|
||||||
|
model_name_or_path: ISTA-DASLab/Meta-Llama-3-8B-Instruct-AQLM-2Bit-1x16
|
||||||
|
|
||||||
|
# method
|
||||||
|
stage: sft
|
||||||
|
do_train: true
|
||||||
|
finetuning_type: lora
|
||||||
|
lora_target: q_proj,v_proj
|
||||||
|
|
||||||
|
# dataset
|
||||||
|
dataset: identity,alpaca_gpt4_en
|
||||||
|
template: llama3
|
||||||
|
cutoff_len: 1024
|
||||||
|
max_samples: 1000
|
||||||
|
overwrite_cache: true
|
||||||
|
preprocessing_num_workers: 16
|
||||||
|
|
||||||
|
# output
|
||||||
|
output_dir: saves/llama3-8b/lora/sft
|
||||||
|
logging_steps: 10
|
||||||
|
save_steps: 500
|
||||||
|
plot_loss: true
|
||||||
|
overwrite_output_dir: true
|
||||||
|
|
||||||
|
# train
|
||||||
|
per_device_train_batch_size: 1
|
||||||
|
gradient_accumulation_steps: 8
|
||||||
|
learning_rate: 0.0001
|
||||||
|
num_train_epochs: 3.0
|
||||||
|
lr_scheduler_type: cosine
|
||||||
|
warmup_steps: 0.1
|
||||||
|
fp16: true
|
||||||
|
|
||||||
|
# eval
|
||||||
|
val_size: 0.1
|
||||||
|
per_device_eval_batch_size: 1
|
||||||
|
evaluation_strategy: steps
|
||||||
|
eval_steps: 500
|
||||||
38
examples/qlora_single_gpu/llama3_lora_sft_awq.yaml
Normal file
38
examples/qlora_single_gpu/llama3_lora_sft_awq.yaml
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
# model
|
||||||
|
model_name_or_path: TechxGenus/Meta-Llama-3-8B-Instruct-AWQ
|
||||||
|
|
||||||
|
# method
|
||||||
|
stage: sft
|
||||||
|
do_train: true
|
||||||
|
finetuning_type: lora
|
||||||
|
lora_target: q_proj,v_proj
|
||||||
|
|
||||||
|
# dataset
|
||||||
|
dataset: identity,alpaca_gpt4_en
|
||||||
|
template: llama3
|
||||||
|
cutoff_len: 1024
|
||||||
|
max_samples: 1000
|
||||||
|
overwrite_cache: true
|
||||||
|
preprocessing_num_workers: 16
|
||||||
|
|
||||||
|
# output
|
||||||
|
output_dir: saves/llama3-8b/lora/sft
|
||||||
|
logging_steps: 10
|
||||||
|
save_steps: 500
|
||||||
|
plot_loss: true
|
||||||
|
overwrite_output_dir: true
|
||||||
|
|
||||||
|
# train
|
||||||
|
per_device_train_batch_size: 1
|
||||||
|
gradient_accumulation_steps: 8
|
||||||
|
learning_rate: 0.0001
|
||||||
|
num_train_epochs: 3.0
|
||||||
|
lr_scheduler_type: cosine
|
||||||
|
warmup_steps: 0.1
|
||||||
|
fp16: true
|
||||||
|
|
||||||
|
# eval
|
||||||
|
val_size: 0.1
|
||||||
|
per_device_eval_batch_size: 1
|
||||||
|
evaluation_strategy: steps
|
||||||
|
eval_steps: 500
|
||||||
39
examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml
Normal file
39
examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
# model
|
||||||
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
quantization_bit: 4
|
||||||
|
|
||||||
|
# method
|
||||||
|
stage: sft
|
||||||
|
do_train: true
|
||||||
|
finetuning_type: lora
|
||||||
|
lora_target: q_proj,v_proj
|
||||||
|
|
||||||
|
# dataset
|
||||||
|
dataset: identity,alpaca_gpt4_en
|
||||||
|
template: llama3
|
||||||
|
cutoff_len: 1024
|
||||||
|
max_samples: 1000
|
||||||
|
overwrite_cache: true
|
||||||
|
preprocessing_num_workers: 16
|
||||||
|
|
||||||
|
# output
|
||||||
|
output_dir: saves/llama3-8b/lora/sft
|
||||||
|
logging_steps: 10
|
||||||
|
save_steps: 500
|
||||||
|
plot_loss: true
|
||||||
|
overwrite_output_dir: true
|
||||||
|
|
||||||
|
# train
|
||||||
|
per_device_train_batch_size: 1
|
||||||
|
gradient_accumulation_steps: 8
|
||||||
|
learning_rate: 0.0001
|
||||||
|
num_train_epochs: 3.0
|
||||||
|
lr_scheduler_type: cosine
|
||||||
|
warmup_steps: 0.1
|
||||||
|
fp16: true
|
||||||
|
|
||||||
|
# eval
|
||||||
|
val_size: 0.1
|
||||||
|
per_device_eval_batch_size: 1
|
||||||
|
evaluation_strategy: steps
|
||||||
|
eval_steps: 500
|
||||||
38
examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml
Normal file
38
examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
# model
|
||||||
|
model_name_or_path: TechxGenus/Meta-Llama-3-8B-Instruct-GPTQ
|
||||||
|
|
||||||
|
# method
|
||||||
|
stage: sft
|
||||||
|
do_train: true
|
||||||
|
finetuning_type: lora
|
||||||
|
lora_target: q_proj,v_proj
|
||||||
|
|
||||||
|
# dataset
|
||||||
|
dataset: identity,alpaca_gpt4_en
|
||||||
|
template: llama3
|
||||||
|
cutoff_len: 1024
|
||||||
|
max_samples: 1000
|
||||||
|
overwrite_cache: true
|
||||||
|
preprocessing_num_workers: 16
|
||||||
|
|
||||||
|
# output
|
||||||
|
output_dir: saves/llama3-8b/lora/sft
|
||||||
|
logging_steps: 10
|
||||||
|
save_steps: 500
|
||||||
|
plot_loss: true
|
||||||
|
overwrite_output_dir: true
|
||||||
|
|
||||||
|
# train
|
||||||
|
per_device_train_batch_size: 1
|
||||||
|
gradient_accumulation_steps: 8
|
||||||
|
learning_rate: 0.0001
|
||||||
|
num_train_epochs: 3.0
|
||||||
|
lr_scheduler_type: cosine
|
||||||
|
warmup_steps: 0.1
|
||||||
|
fp16: true
|
||||||
|
|
||||||
|
# eval
|
||||||
|
val_size: 0.1
|
||||||
|
per_device_eval_batch_size: 1
|
||||||
|
evaluation_strategy: steps
|
||||||
|
eval_steps: 500
|
||||||
@@ -1,10 +1,9 @@
|
|||||||
torch>=1.13.1
|
|
||||||
transformers>=4.37.2
|
transformers>=4.37.2
|
||||||
datasets>=2.14.3
|
datasets>=2.14.3
|
||||||
accelerate>=0.27.2
|
accelerate>=0.27.2
|
||||||
peft>=0.10.0
|
peft>=0.10.0
|
||||||
trl>=0.8.1
|
trl>=0.8.1
|
||||||
gradio>=3.38.0,<4.0.0
|
gradio>=4.0.0
|
||||||
scipy
|
scipy
|
||||||
einops
|
einops
|
||||||
sentencepiece
|
sentencepiece
|
||||||
@@ -13,6 +12,7 @@ uvicorn
|
|||||||
pydantic
|
pydantic
|
||||||
fastapi
|
fastapi
|
||||||
sse-starlette
|
sse-starlette
|
||||||
matplotlib
|
matplotlib>=3.7.0
|
||||||
fire
|
fire
|
||||||
galore-torch
|
packaging
|
||||||
|
pyyaml
|
||||||
|
|||||||
@@ -3,24 +3,22 @@
|
|||||||
# Usage: python cal_flops.py --model_name_or_path path_to_model --batch_size 1 --seq_length 512
|
# Usage: python cal_flops.py --model_name_or_path path_to_model --batch_size 1 --seq_length 512
|
||||||
# Inspired by: https://www.deepspeed.ai/tutorials/flops-profiler/
|
# Inspired by: https://www.deepspeed.ai/tutorials/flops-profiler/
|
||||||
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
import fire
|
import fire
|
||||||
import torch
|
import torch
|
||||||
from deepspeed.accelerator import get_accelerator # type: ignore
|
from deepspeed.accelerator import get_accelerator # type: ignore
|
||||||
from deepspeed.profiling.flops_profiler import get_model_profile # type: ignore
|
from deepspeed.profiling.flops_profiler import get_model_profile # type: ignore
|
||||||
|
|
||||||
from llmtuner import ChatModel
|
from llmtuner.chat import ChatModel
|
||||||
|
|
||||||
|
|
||||||
def calculate_flops(
|
def calculate_flops(
|
||||||
model_name_or_path: str,
|
model_name_or_path: str,
|
||||||
batch_size: Optional[int] = 1,
|
batch_size: int = 1,
|
||||||
seq_length: Optional[int] = 256,
|
seq_length: int = 256,
|
||||||
flash_attn: Optional[bool] = False,
|
flash_attn: str = "auto",
|
||||||
):
|
):
|
||||||
with get_accelerator().device(0):
|
with get_accelerator().device(0):
|
||||||
chat_model = ChatModel(dict(model_name_or_path=model_name_or_path, template="vanilla", flash_attn=flash_attn))
|
chat_model = ChatModel(dict(model_name_or_path=model_name_or_path, template="empty", flash_attn=flash_attn))
|
||||||
fake_input = torch.ones((batch_size, seq_length), dtype=torch.long, device=chat_model.model.device)
|
fake_input = torch.ones((batch_size, seq_length), dtype=torch.long, device=chat_model.model.device)
|
||||||
input_dict = {"input_ids": fake_input, "labels": fake_input.clone()}
|
input_dict = {"input_ids": fake_input, "labels": fake_input.clone()}
|
||||||
flops, macs, params = get_model_profile(chat_model.model, kwargs=input_dict, print_profile=True, detailed=True)
|
flops, macs, params = get_model_profile(chat_model.model, kwargs=input_dict, print_profile=True, detailed=True)
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
# Inspired by: https://github.com/imoneoi/openchat/blob/master/ochat/training_deepspeed/train.py
|
# Inspired by: https://github.com/imoneoi/openchat/blob/master/ochat/training_deepspeed/train.py
|
||||||
|
|
||||||
import math
|
import math
|
||||||
from typing import Optional
|
from typing import Literal
|
||||||
|
|
||||||
import fire
|
import fire
|
||||||
import torch
|
import torch
|
||||||
@@ -15,7 +15,7 @@ from transformers import DataCollatorForLanguageModeling, DataCollatorForSeq2Seq
|
|||||||
from llmtuner.data import get_dataset
|
from llmtuner.data import get_dataset
|
||||||
from llmtuner.extras.constants import IGNORE_INDEX
|
from llmtuner.extras.constants import IGNORE_INDEX
|
||||||
from llmtuner.hparams import get_train_args
|
from llmtuner.hparams import get_train_args
|
||||||
from llmtuner.model import load_model_and_tokenizer
|
from llmtuner.model import load_tokenizer
|
||||||
|
|
||||||
|
|
||||||
BASE_LR = 3e-4 # 1.5e-4 for 30B-70B models
|
BASE_LR = 3e-4 # 1.5e-4 for 30B-70B models
|
||||||
@@ -25,14 +25,14 @@ BASE_BS = 4_000_000 # from llama paper
|
|||||||
def calculate_lr(
|
def calculate_lr(
|
||||||
model_name_or_path: str,
|
model_name_or_path: str,
|
||||||
batch_size: int, # total batch size, namely (batch size * gradient accumulation * world size)
|
batch_size: int, # total batch size, namely (batch size * gradient accumulation * world size)
|
||||||
stage: Optional[str] = "sft",
|
stage: Literal["pt", "sft"] = "sft",
|
||||||
dataset: Optional[str] = "alpaca_en",
|
dataset: str = "alpaca_en",
|
||||||
dataset_dir: Optional[str] = "data",
|
dataset_dir: str = "data",
|
||||||
template: Optional[str] = "default",
|
template: str = "default",
|
||||||
cutoff_len: Optional[int] = 1024, # i.e. maximum input length during training
|
cutoff_len: int = 1024, # i.e. maximum input length during training
|
||||||
is_mistral: Optional[bool] = False, # mistral model uses a smaller learning rate,
|
is_mistral: bool = False, # mistral model uses a smaller learning rate,
|
||||||
):
|
):
|
||||||
model_args, data_args, training_args, finetuning_args, _ = get_train_args(
|
model_args, data_args, training_args, _, _ = get_train_args(
|
||||||
dict(
|
dict(
|
||||||
stage=stage,
|
stage=stage,
|
||||||
model_name_or_path=model_name_or_path,
|
model_name_or_path=model_name_or_path,
|
||||||
@@ -44,8 +44,9 @@ def calculate_lr(
|
|||||||
overwrite_cache=True,
|
overwrite_cache=True,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
_, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, is_trainable=False, add_valuehead=False)
|
tokenizer_module = load_tokenizer(model_args)
|
||||||
trainset = get_dataset(tokenizer, model_args, data_args, training_args, stage=stage)
|
tokenizer = tokenizer_module["tokenizer"]
|
||||||
|
trainset = get_dataset(model_args, data_args, training_args, stage, **tokenizer_module)
|
||||||
if stage == "pt":
|
if stage == "pt":
|
||||||
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
|
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
|
||||||
elif stage == "sft":
|
elif stage == "sft":
|
||||||
@@ -53,9 +54,7 @@ def calculate_lr(
|
|||||||
else:
|
else:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
dataloader = DataLoader(
|
dataloader = DataLoader(trainset, batch_size, shuffle=False, collate_fn=data_collator, pin_memory=True)
|
||||||
dataset=trainset, batch_size=batch_size, shuffle=True, collate_fn=data_collator, pin_memory=True
|
|
||||||
)
|
|
||||||
valid_tokens, total_tokens = 0, 0
|
valid_tokens, total_tokens = 0, 0
|
||||||
for batch in tqdm(dataloader):
|
for batch in tqdm(dataloader):
|
||||||
valid_tokens += torch.sum(batch["labels"] != IGNORE_INDEX).item()
|
valid_tokens += torch.sum(batch["labels"] != IGNORE_INDEX).item()
|
||||||
|
|||||||
116
scripts/cal_ppl.py
Normal file
116
scripts/cal_ppl.py
Normal file
@@ -0,0 +1,116 @@
|
|||||||
|
# coding=utf-8
|
||||||
|
# Calculates the ppl on the dataset of the pre-trained models.
|
||||||
|
# Usage: python cal_ppl.py --model_name_or_path path_to_model --save_name ppl.json
|
||||||
|
|
||||||
|
import json
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Any, Dict, Literal, Optional, Sequence
|
||||||
|
|
||||||
|
import fire
|
||||||
|
import torch
|
||||||
|
from torch.utils.data import DataLoader
|
||||||
|
from tqdm import tqdm
|
||||||
|
from transformers import DataCollatorForLanguageModeling, DataCollatorForSeq2Seq
|
||||||
|
|
||||||
|
from llmtuner.data import get_dataset
|
||||||
|
from llmtuner.extras.constants import IGNORE_INDEX
|
||||||
|
from llmtuner.hparams import get_train_args
|
||||||
|
from llmtuner.model import load_model, load_tokenizer
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class PairwiseDataCollatorWithPadding(DataCollatorForSeq2Seq):
|
||||||
|
r"""
|
||||||
|
Data collator for pairwise data.
|
||||||
|
"""
|
||||||
|
|
||||||
|
train_on_prompt: bool = False
|
||||||
|
|
||||||
|
def __call__(self, features: Sequence[Dict[str, Any]]) -> Dict[str, torch.Tensor]:
|
||||||
|
r"""
|
||||||
|
Pads batched data to the longest sequence in the batch.
|
||||||
|
|
||||||
|
We generate 2 * n examples where the first n examples represent chosen examples and
|
||||||
|
the last n examples represent rejected examples.
|
||||||
|
"""
|
||||||
|
chosen_features = []
|
||||||
|
for feature in features:
|
||||||
|
prompt_len, answer_len = len(feature["prompt_ids"]), len(feature["chosen_ids"])
|
||||||
|
input_ids = feature["prompt_ids"] + feature["chosen_ids"]
|
||||||
|
attention_mask = [1] * (prompt_len + answer_len)
|
||||||
|
labels = input_ids if self.train_on_prompt else [IGNORE_INDEX] * prompt_len + feature["chosen_ids"]
|
||||||
|
chosen_features.append({"input_ids": input_ids, "attention_mask": attention_mask, "labels": labels})
|
||||||
|
|
||||||
|
return super().__call__(chosen_features)
|
||||||
|
|
||||||
|
|
||||||
|
def cal_ppl(
|
||||||
|
model_name_or_path: str,
|
||||||
|
save_name: str,
|
||||||
|
batch_size: int = 4,
|
||||||
|
stage: Literal["pt", "sft", "rm"] = "sft",
|
||||||
|
dataset: str = "alpaca_en",
|
||||||
|
dataset_dir: str = "data",
|
||||||
|
template: str = "default",
|
||||||
|
cutoff_len: int = 1024,
|
||||||
|
max_samples: Optional[int] = None,
|
||||||
|
train_on_prompt: bool = False,
|
||||||
|
):
|
||||||
|
model_args, data_args, training_args, finetuning_args, _ = get_train_args(
|
||||||
|
dict(
|
||||||
|
stage=stage,
|
||||||
|
model_name_or_path=model_name_or_path,
|
||||||
|
dataset=dataset,
|
||||||
|
dataset_dir=dataset_dir,
|
||||||
|
template=template,
|
||||||
|
cutoff_len=cutoff_len,
|
||||||
|
max_samples=max_samples,
|
||||||
|
train_on_prompt=train_on_prompt,
|
||||||
|
output_dir="dummy_dir",
|
||||||
|
overwrite_cache=True,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
tokenizer_module = load_tokenizer(model_args)
|
||||||
|
tokenizer = tokenizer_module["tokenizer"]
|
||||||
|
trainset = get_dataset(model_args, data_args, training_args, stage, **tokenizer_module)
|
||||||
|
model = load_model(tokenizer, model_args, finetuning_args, is_trainable=False)
|
||||||
|
if stage == "pt":
|
||||||
|
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
|
||||||
|
elif stage == "sft":
|
||||||
|
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, label_pad_token_id=IGNORE_INDEX)
|
||||||
|
elif stage == "rm":
|
||||||
|
data_collator = PairwiseDataCollatorWithPadding(
|
||||||
|
tokenizer=tokenizer, label_pad_token_id=IGNORE_INDEX, train_on_prompt=train_on_prompt
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
dataloader = DataLoader(trainset, batch_size, shuffle=False, collate_fn=data_collator, pin_memory=True)
|
||||||
|
criterion = torch.nn.CrossEntropyLoss(reduction="none")
|
||||||
|
total_ppl = 0
|
||||||
|
perplexities = []
|
||||||
|
batch: Dict[str, "torch.Tensor"]
|
||||||
|
with torch.no_grad():
|
||||||
|
for batch in tqdm(dataloader):
|
||||||
|
batch = batch.to(model.device)
|
||||||
|
outputs = model(**batch)
|
||||||
|
shift_logits: "torch.Tensor" = outputs["logits"][..., :-1, :]
|
||||||
|
shift_labels: "torch.Tensor" = batch["labels"][..., 1:]
|
||||||
|
loss_mask = shift_labels != IGNORE_INDEX
|
||||||
|
flatten_logits = shift_logits.contiguous().view(shift_labels.size(0) * shift_labels.size(1), -1)
|
||||||
|
flatten_labels = shift_labels.contiguous().view(-1)
|
||||||
|
token_logps: "torch.Tensor" = criterion(flatten_logits, flatten_labels)
|
||||||
|
token_logps = token_logps.contiguous().view(shift_logits.size(0), -1)
|
||||||
|
sentence_logps = (token_logps * loss_mask).sum(-1) / loss_mask.sum(-1)
|
||||||
|
total_ppl += sentence_logps.exp().sum().item()
|
||||||
|
perplexities.extend(sentence_logps.exp().tolist())
|
||||||
|
|
||||||
|
with open(save_name, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(perplexities, f, indent=2)
|
||||||
|
|
||||||
|
print("Average perplexity is {:.2f}".format(total_ppl / len(perplexities)))
|
||||||
|
print("Perplexities have been saved at {}.".format(save_name))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
fire.Fire(cal_ppl)
|
||||||
@@ -3,24 +3,23 @@
|
|||||||
# Usage: python length_cdf.py --model_name_or_path path_to_model --dataset alpaca_en --template default
|
# Usage: python length_cdf.py --model_name_or_path path_to_model --dataset alpaca_en --template default
|
||||||
|
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
import fire
|
import fire
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
from llmtuner.data import get_dataset
|
from llmtuner.data import get_dataset
|
||||||
from llmtuner.hparams import get_train_args
|
from llmtuner.hparams import get_train_args
|
||||||
from llmtuner.model import load_model_and_tokenizer
|
from llmtuner.model import load_tokenizer
|
||||||
|
|
||||||
|
|
||||||
def length_cdf(
|
def length_cdf(
|
||||||
model_name_or_path: str,
|
model_name_or_path: str,
|
||||||
dataset: Optional[str] = "alpaca_en",
|
dataset: str = "alpaca_en",
|
||||||
dataset_dir: Optional[str] = "data",
|
dataset_dir: str = "data",
|
||||||
template: Optional[str] = "default",
|
template: str = "default",
|
||||||
interval: Optional[int] = 1000,
|
interval: int = 1000,
|
||||||
):
|
):
|
||||||
model_args, data_args, training_args, finetuning_args, _ = get_train_args(
|
model_args, data_args, training_args, _, _ = get_train_args(
|
||||||
dict(
|
dict(
|
||||||
stage="sft",
|
stage="sft",
|
||||||
model_name_or_path=model_name_or_path,
|
model_name_or_path=model_name_or_path,
|
||||||
@@ -32,8 +31,8 @@ def length_cdf(
|
|||||||
overwrite_cache=True,
|
overwrite_cache=True,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
_, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, is_trainable=False, add_valuehead=False)
|
tokenizer_module = load_tokenizer(model_args)
|
||||||
trainset = get_dataset(tokenizer, model_args, data_args, training_args, stage="sft")
|
trainset = get_dataset(model_args, data_args, training_args, stage="sft", **tokenizer_module)
|
||||||
total_num = len(trainset)
|
total_num = len(trainset)
|
||||||
length_dict = defaultdict(int)
|
length_dict = defaultdict(int)
|
||||||
for sample in tqdm(trainset["input_ids"]):
|
for sample in tqdm(trainset["input_ids"]):
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
# coding=utf-8
|
# coding=utf-8
|
||||||
# Performs block expansion for LLaMA, Mistral or Qwen1.5 models.
|
# Performs block expansion for LLaMA, Mistral, Qwen1.5 or Yi models.
|
||||||
# Usage: python llama_pro.py --model_name_or_path meta-llama/Llama-2-7b-hf --output_dir llama2_pro --num_expand 8
|
# Usage: python llama_pro.py --model_name_or_path meta-llama/Llama-2-7b-hf --output_dir llama2_pro --num_expand 8
|
||||||
# Inspired by: https://github.com/TencentARC/LLaMA-Pro/blob/main/scripts/block_expansion.py
|
# Inspired by: https://github.com/TencentARC/LLaMA-Pro/blob/main/scripts/block_expansion.py
|
||||||
|
|
||||||
@@ -106,8 +106,7 @@ def block_expansion(
|
|||||||
print("Fine-tune this model with:")
|
print("Fine-tune this model with:")
|
||||||
print(" --model_name_or_path {} \\".format(output_dir))
|
print(" --model_name_or_path {} \\".format(output_dir))
|
||||||
print(" --finetuning_type freeze \\")
|
print(" --finetuning_type freeze \\")
|
||||||
print(" --name_module_trainable all \\")
|
print(" --freeze_trainable_layers {} \\".format(num_expand))
|
||||||
print(" --num_layer_trainable {} \\".format(num_expand))
|
|
||||||
print(" --use_llama_pro")
|
print(" --use_llama_pro")
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,114 +0,0 @@
|
|||||||
# coding=utf-8
|
|
||||||
# Converts the InternLM2 model in the same format as LLaMA2.
|
|
||||||
# Usage: python llamafy_internlm2.py --input_dir input --output_dir output
|
|
||||||
# Warning: We have found that the converted model cannot infer correctly. It will be fixed later.
|
|
||||||
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
from collections import OrderedDict
|
|
||||||
from typing import Any, Dict, Optional
|
|
||||||
|
|
||||||
import fire
|
|
||||||
import torch
|
|
||||||
from safetensors.torch import save_file
|
|
||||||
from tqdm import tqdm
|
|
||||||
from transformers.modeling_utils import (
|
|
||||||
SAFE_WEIGHTS_INDEX_NAME,
|
|
||||||
SAFE_WEIGHTS_NAME,
|
|
||||||
WEIGHTS_INDEX_NAME,
|
|
||||||
WEIGHTS_NAME,
|
|
||||||
shard_checkpoint,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
CONFIG_NAME = "config.json"
|
|
||||||
|
|
||||||
|
|
||||||
def save_weight(input_dir: str, output_dir: str, shard_size: str, save_safetensors: bool):
|
|
||||||
with open(os.path.join(input_dir, CONFIG_NAME), "r", encoding="utf-8") as f:
|
|
||||||
internlm2_config_dict: Dict[str, Any] = json.load(f)
|
|
||||||
|
|
||||||
internlm2_state_dict: Dict[str, torch.Tensor] = OrderedDict()
|
|
||||||
for filepath in tqdm(os.listdir(input_dir), desc="Load weights"):
|
|
||||||
if os.path.isfile(os.path.join(input_dir, filepath)) and filepath.endswith(".bin"):
|
|
||||||
shard_weight = torch.load(os.path.join(input_dir, filepath), map_location="cpu")
|
|
||||||
internlm2_state_dict.update(shard_weight)
|
|
||||||
|
|
||||||
llama2_state_dict: Dict[str, torch.Tensor] = OrderedDict()
|
|
||||||
for key, value in tqdm(internlm2_state_dict.items(), desc="Convert format"):
|
|
||||||
if "output" in key:
|
|
||||||
llama2_state_dict[key.replace("output", "lm_head")] = value
|
|
||||||
elif "tok_embeddings" in key:
|
|
||||||
llama2_state_dict[key.replace("tok_embeddings", "embed_tokens")] = value
|
|
||||||
elif "wqkv" in key:
|
|
||||||
num_q_heads = internlm2_config_dict["num_attention_heads"]
|
|
||||||
num_kv_heads = internlm2_config_dict["num_key_value_heads"]
|
|
||||||
q_size = value.size(0) // (num_q_heads + 2 * num_kv_heads) * num_q_heads
|
|
||||||
kv_size = value.size(0) // (num_q_heads + 2 * num_kv_heads) * num_kv_heads
|
|
||||||
llama2_state_dict[key.replace("attention.wqkv", "self_attn.q_proj")] = value[:q_size, ...]
|
|
||||||
llama2_state_dict[key.replace("attention.wqkv", "self_attn.k_proj")] = value[
|
|
||||||
q_size : q_size + kv_size, ...
|
|
||||||
]
|
|
||||||
llama2_state_dict[key.replace("attention.wqkv", "self_attn.v_proj")] = value[q_size + kv_size :, ...]
|
|
||||||
elif "wo" in key:
|
|
||||||
llama2_state_dict[key.replace("attention.wo", "self_attn.o_proj")] = value
|
|
||||||
elif "attention_norm" in key:
|
|
||||||
llama2_state_dict[key.replace("attention_norm", "input_layernorm")] = value
|
|
||||||
elif "ffn_norm" in key:
|
|
||||||
llama2_state_dict[key.replace("ffn_norm", "post_attention_layernorm")] = value
|
|
||||||
elif "w1" in key:
|
|
||||||
llama2_state_dict[key.replace("feed_forward.w1", "mlp.gate_proj")] = value
|
|
||||||
elif "w2" in key:
|
|
||||||
llama2_state_dict[key.replace("feed_forward.w2", "mlp.down_proj")] = value
|
|
||||||
elif "w3" in key:
|
|
||||||
llama2_state_dict[key.replace("feed_forward.w3", "mlp.up_proj")] = value
|
|
||||||
else:
|
|
||||||
llama2_state_dict[key] = value
|
|
||||||
|
|
||||||
weights_name = SAFE_WEIGHTS_NAME if save_safetensors else WEIGHTS_NAME
|
|
||||||
shards, index = shard_checkpoint(llama2_state_dict, max_shard_size=shard_size, weights_name=weights_name)
|
|
||||||
|
|
||||||
for shard_file, shard in tqdm(shards.items(), desc="Save weights"):
|
|
||||||
if save_safetensors:
|
|
||||||
save_file(shard, os.path.join(output_dir, shard_file), metadata={"format": "pt"})
|
|
||||||
else:
|
|
||||||
torch.save(shard, os.path.join(output_dir, shard_file))
|
|
||||||
|
|
||||||
if index is None:
|
|
||||||
print("Model weights saved in {}".format(os.path.join(output_dir, WEIGHTS_NAME)))
|
|
||||||
else:
|
|
||||||
index_name = SAFE_WEIGHTS_INDEX_NAME if save_safetensors else WEIGHTS_INDEX_NAME
|
|
||||||
with open(os.path.join(output_dir, index_name), "w", encoding="utf-8") as f:
|
|
||||||
json.dump(index, f, indent=2, sort_keys=True)
|
|
||||||
print("Model weights saved in {}".format(output_dir))
|
|
||||||
|
|
||||||
|
|
||||||
def save_config(input_dir: str, output_dir: str):
|
|
||||||
with open(os.path.join(input_dir, CONFIG_NAME), "r", encoding="utf-8") as f:
|
|
||||||
llama2_config_dict: Dict[str, Any] = json.load(f)
|
|
||||||
|
|
||||||
llama2_config_dict["architectures"] = ["LlamaForCausalLM"]
|
|
||||||
llama2_config_dict.pop("auto_map", None)
|
|
||||||
llama2_config_dict.pop("bias", None)
|
|
||||||
llama2_config_dict.pop("rope_scaling", None)
|
|
||||||
llama2_config_dict["model_type"] = "llama"
|
|
||||||
|
|
||||||
with open(os.path.join(output_dir, CONFIG_NAME), "w", encoding="utf-8") as f:
|
|
||||||
json.dump(llama2_config_dict, f, indent=2)
|
|
||||||
print("Model config saved in {}".format(os.path.join(output_dir, CONFIG_NAME)))
|
|
||||||
|
|
||||||
|
|
||||||
def llamafy_internlm2(
|
|
||||||
input_dir: str, output_dir: str, shard_size: Optional[str] = "2GB", save_safetensors: Optional[bool] = False
|
|
||||||
):
|
|
||||||
try:
|
|
||||||
os.makedirs(output_dir, exist_ok=False)
|
|
||||||
except Exception as e:
|
|
||||||
raise print("Output dir already exists", e)
|
|
||||||
|
|
||||||
save_weight(input_dir, output_dir, shard_size, save_safetensors)
|
|
||||||
save_config(input_dir, output_dir)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
fire.Fire(llamafy_internlm2)
|
|
||||||
14
setup.py
14
setup.py
@@ -5,9 +5,9 @@ from setuptools import find_packages, setup
|
|||||||
|
|
||||||
|
|
||||||
def get_version():
|
def get_version():
|
||||||
with open(os.path.join("src", "llmtuner", "__init__.py"), "r", encoding="utf-8") as f:
|
with open(os.path.join("src", "llmtuner", "cli.py"), "r", encoding="utf-8") as f:
|
||||||
file_content = f.read()
|
file_content = f.read()
|
||||||
pattern = r"{0}\W*=\W*\"([^\"]+)\"".format("__version__")
|
pattern = r"{}\W*=\W*\"([^\"]+)\"".format("VERSION")
|
||||||
(version,) = re.findall(pattern, file_content)
|
(version,) = re.findall(pattern, file_content)
|
||||||
return version
|
return version
|
||||||
|
|
||||||
@@ -20,15 +20,18 @@ def get_requires():
|
|||||||
|
|
||||||
|
|
||||||
extra_require = {
|
extra_require = {
|
||||||
"deepspeed": ["deepspeed"],
|
"torch": ["torch>=1.13.1"],
|
||||||
"metrics": ["nltk", "jieba", "rouge-chinese"],
|
"metrics": ["nltk", "jieba", "rouge-chinese"],
|
||||||
"unsloth": ["torch==2.2.0", "unsloth[cu121-ampere-torch220]"],
|
"deepspeed": ["deepspeed>=0.10.0,<=0.14.0"],
|
||||||
"vllm": ["vllm>=0.3.3"],
|
|
||||||
"bitsandbytes": ["bitsandbytes>=0.39.0"],
|
"bitsandbytes": ["bitsandbytes>=0.39.0"],
|
||||||
|
"vllm": ["vllm>=0.4.0"],
|
||||||
|
"galore": ["galore-torch"],
|
||||||
|
"badam": ["badam"],
|
||||||
"gptq": ["optimum>=1.16.0", "auto-gptq>=0.5.0"],
|
"gptq": ["optimum>=1.16.0", "auto-gptq>=0.5.0"],
|
||||||
"awq": ["autoawq"],
|
"awq": ["autoawq"],
|
||||||
"aqlm": ["aqlm[gpu]>=1.1.0"],
|
"aqlm": ["aqlm[gpu]>=1.1.0"],
|
||||||
"qwen": ["tiktoken", "transformers_stream_generator"],
|
"qwen": ["tiktoken", "transformers_stream_generator"],
|
||||||
|
"modelscope": ["modelscope"],
|
||||||
"quality": ["ruff"],
|
"quality": ["ruff"],
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -50,6 +53,7 @@ def main():
|
|||||||
python_requires=">=3.8.0",
|
python_requires=">=3.8.0",
|
||||||
install_requires=get_requires(),
|
install_requires=get_requires(),
|
||||||
extras_require=extra_require,
|
extras_require=extra_require,
|
||||||
|
entry_points={"console_scripts": ["llamafactory-cli = llmtuner.cli:main"]},
|
||||||
classifiers=[
|
classifiers=[
|
||||||
"Development Status :: 4 - Beta",
|
"Development Status :: 4 - Beta",
|
||||||
"Intended Audience :: Developers",
|
"Intended Audience :: Developers",
|
||||||
|
|||||||
19
src/api.py
Normal file
19
src/api.py
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
import uvicorn
|
||||||
|
|
||||||
|
from llmtuner.api.app import create_app
|
||||||
|
from llmtuner.chat import ChatModel
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
chat_model = ChatModel()
|
||||||
|
app = create_app(chat_model)
|
||||||
|
api_host = os.environ.get("API_HOST", "0.0.0.0")
|
||||||
|
api_port = int(os.environ.get("API_PORT", "8000"))
|
||||||
|
print("Visit http://localhost:{}/docs for API document.".format(api_port))
|
||||||
|
uvicorn.run(app, host=api_host, port=api_port)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -1,16 +0,0 @@
|
|||||||
import os
|
|
||||||
|
|
||||||
import uvicorn
|
|
||||||
|
|
||||||
from llmtuner import ChatModel, create_app
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
chat_model = ChatModel()
|
|
||||||
app = create_app(chat_model)
|
|
||||||
print("Visit http://localhost:{}/docs for API document.".format(os.environ.get("API_PORT", 8000)))
|
|
||||||
uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("API_PORT", 8000)), workers=1)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,49 +0,0 @@
|
|||||||
from llmtuner import ChatModel
|
|
||||||
from llmtuner.extras.misc import torch_gc
|
|
||||||
|
|
||||||
|
|
||||||
try:
|
|
||||||
import platform
|
|
||||||
|
|
||||||
if platform.system() != "Windows":
|
|
||||||
import readline # noqa: F401
|
|
||||||
except ImportError:
|
|
||||||
print("Install `readline` for a better experience.")
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
chat_model = ChatModel()
|
|
||||||
messages = []
|
|
||||||
print("Welcome to the CLI application, use `clear` to remove the history, use `exit` to exit the application.")
|
|
||||||
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
query = input("\nUser: ")
|
|
||||||
except UnicodeDecodeError:
|
|
||||||
print("Detected decoding error at the inputs, please set the terminal encoding to utf-8.")
|
|
||||||
continue
|
|
||||||
except Exception:
|
|
||||||
raise
|
|
||||||
|
|
||||||
if query.strip() == "exit":
|
|
||||||
break
|
|
||||||
|
|
||||||
if query.strip() == "clear":
|
|
||||||
messages = []
|
|
||||||
torch_gc()
|
|
||||||
print("History has been removed.")
|
|
||||||
continue
|
|
||||||
|
|
||||||
messages.append({"role": "user", "content": query})
|
|
||||||
print("Assistant: ", end="", flush=True)
|
|
||||||
|
|
||||||
response = ""
|
|
||||||
for new_text in chat_model.stream_chat(messages):
|
|
||||||
print(new_text, end="", flush=True)
|
|
||||||
response += new_text
|
|
||||||
print()
|
|
||||||
messages.append({"role": "assistant", "content": response})
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,10 +0,0 @@
|
|||||||
from llmtuner import Evaluator
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
evaluator = Evaluator()
|
|
||||||
evaluator.eval()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,9 +0,0 @@
|
|||||||
from llmtuner import export_model
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
export_model()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,11 +1,6 @@
|
|||||||
# Level: api, webui > chat, eval, train > data, model > extras, hparams
|
# Level: api, webui > chat, eval, train > data, model > extras, hparams
|
||||||
|
|
||||||
from .api import create_app
|
from .cli import VERSION
|
||||||
from .chat import ChatModel
|
|
||||||
from .eval import Evaluator
|
|
||||||
from .train import export_model, run_exp
|
|
||||||
from .webui import create_ui, create_web_demo
|
|
||||||
|
|
||||||
|
|
||||||
__version__ = "0.6.1"
|
__version__ = VERSION
|
||||||
__all__ = ["create_app", "ChatModel", "Evaluator", "export_model", "run_exp", "create_ui", "create_web_demo"]
|
|
||||||
|
|||||||
@@ -1,4 +0,0 @@
|
|||||||
from .app import create_app
|
|
||||||
|
|
||||||
|
|
||||||
__all__ = ["create_app"]
|
|
||||||
|
|||||||
@@ -1,36 +1,31 @@
|
|||||||
import json
|
|
||||||
import os
|
import os
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
from typing import Any, Dict, Sequence
|
from typing import Optional
|
||||||
|
|
||||||
from pydantic import BaseModel
|
from typing_extensions import Annotated
|
||||||
|
|
||||||
from ..chat import ChatModel
|
from ..chat import ChatModel
|
||||||
from ..data import Role as DataRole
|
|
||||||
from ..extras.misc import torch_gc
|
from ..extras.misc import torch_gc
|
||||||
from ..extras.packages import is_fastapi_availble, is_starlette_available, is_uvicorn_available
|
from ..extras.packages import is_fastapi_available, is_starlette_available, is_uvicorn_available
|
||||||
|
from .chat import (
|
||||||
|
create_chat_completion_response,
|
||||||
|
create_score_evaluation_response,
|
||||||
|
create_stream_chat_completion_response,
|
||||||
|
)
|
||||||
from .protocol import (
|
from .protocol import (
|
||||||
ChatCompletionMessage,
|
|
||||||
ChatCompletionRequest,
|
ChatCompletionRequest,
|
||||||
ChatCompletionResponse,
|
ChatCompletionResponse,
|
||||||
ChatCompletionResponseChoice,
|
|
||||||
ChatCompletionResponseStreamChoice,
|
|
||||||
ChatCompletionResponseUsage,
|
|
||||||
ChatCompletionStreamResponse,
|
|
||||||
Finish,
|
|
||||||
Function,
|
|
||||||
FunctionCall,
|
|
||||||
ModelCard,
|
ModelCard,
|
||||||
ModelList,
|
ModelList,
|
||||||
Role,
|
|
||||||
ScoreEvaluationRequest,
|
ScoreEvaluationRequest,
|
||||||
ScoreEvaluationResponse,
|
ScoreEvaluationResponse,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
if is_fastapi_availble():
|
if is_fastapi_available():
|
||||||
from fastapi import FastAPI, HTTPException, status
|
from fastapi import Depends, FastAPI, HTTPException, status
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
from fastapi.security.http import HTTPAuthorizationCredentials, HTTPBearer
|
||||||
|
|
||||||
|
|
||||||
if is_starlette_available():
|
if is_starlette_available():
|
||||||
@@ -47,23 +42,8 @@ async def lifespan(app: "FastAPI"): # collects GPU memory
|
|||||||
torch_gc()
|
torch_gc()
|
||||||
|
|
||||||
|
|
||||||
def dictify(data: "BaseModel") -> Dict[str, Any]:
|
|
||||||
try: # pydantic v2
|
|
||||||
return data.model_dump(exclude_unset=True)
|
|
||||||
except AttributeError: # pydantic v1
|
|
||||||
return data.dict(exclude_unset=True)
|
|
||||||
|
|
||||||
|
|
||||||
def jsonify(data: "BaseModel") -> str:
|
|
||||||
try: # pydantic v2
|
|
||||||
return json.dumps(data.model_dump(exclude_unset=True), ensure_ascii=False)
|
|
||||||
except AttributeError: # pydantic v1
|
|
||||||
return data.json(exclude_unset=True, ensure_ascii=False)
|
|
||||||
|
|
||||||
|
|
||||||
def create_app(chat_model: "ChatModel") -> "FastAPI":
|
def create_app(chat_model: "ChatModel") -> "FastAPI":
|
||||||
app = FastAPI(lifespan=lifespan)
|
app = FastAPI(lifespan=lifespan)
|
||||||
|
|
||||||
app.add_middleware(
|
app.add_middleware(
|
||||||
CORSMiddleware,
|
CORSMiddleware,
|
||||||
allow_origins=["*"],
|
allow_origins=["*"],
|
||||||
@@ -71,154 +51,58 @@ def create_app(chat_model: "ChatModel") -> "FastAPI":
|
|||||||
allow_methods=["*"],
|
allow_methods=["*"],
|
||||||
allow_headers=["*"],
|
allow_headers=["*"],
|
||||||
)
|
)
|
||||||
|
api_key = os.environ.get("API_KEY")
|
||||||
|
security = HTTPBearer(auto_error=False)
|
||||||
|
|
||||||
role_mapping = {
|
async def verify_api_key(auth: Annotated[Optional[HTTPAuthorizationCredentials], Depends(security)]):
|
||||||
Role.USER: DataRole.USER.value,
|
if api_key and (auth is None or auth.credentials != api_key):
|
||||||
Role.ASSISTANT: DataRole.ASSISTANT.value,
|
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid API key.")
|
||||||
Role.SYSTEM: DataRole.SYSTEM.value,
|
|
||||||
Role.FUNCTION: DataRole.FUNCTION.value,
|
|
||||||
Role.TOOL: DataRole.OBSERVATION.value,
|
|
||||||
}
|
|
||||||
|
|
||||||
@app.get("/v1/models", response_model=ModelList)
|
@app.get(
|
||||||
|
"/v1/models",
|
||||||
|
response_model=ModelList,
|
||||||
|
status_code=status.HTTP_200_OK,
|
||||||
|
dependencies=[Depends(verify_api_key)],
|
||||||
|
)
|
||||||
async def list_models():
|
async def list_models():
|
||||||
model_card = ModelCard(id="gpt-3.5-turbo")
|
model_card = ModelCard(id="gpt-3.5-turbo")
|
||||||
return ModelList(data=[model_card])
|
return ModelList(data=[model_card])
|
||||||
|
|
||||||
@app.post("/v1/chat/completions", response_model=ChatCompletionResponse, status_code=status.HTTP_200_OK)
|
@app.post(
|
||||||
|
"/v1/chat/completions",
|
||||||
|
response_model=ChatCompletionResponse,
|
||||||
|
status_code=status.HTTP_200_OK,
|
||||||
|
dependencies=[Depends(verify_api_key)],
|
||||||
|
)
|
||||||
async def create_chat_completion(request: ChatCompletionRequest):
|
async def create_chat_completion(request: ChatCompletionRequest):
|
||||||
if not chat_model.engine.can_generate:
|
if not chat_model.engine.can_generate:
|
||||||
raise HTTPException(status_code=status.HTTP_405_METHOD_NOT_ALLOWED, detail="Not allowed")
|
raise HTTPException(status_code=status.HTTP_405_METHOD_NOT_ALLOWED, detail="Not allowed")
|
||||||
|
|
||||||
if len(request.messages) == 0:
|
|
||||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid length")
|
|
||||||
|
|
||||||
if request.messages[0].role == Role.SYSTEM:
|
|
||||||
system = request.messages.pop(0).content
|
|
||||||
else:
|
|
||||||
system = ""
|
|
||||||
|
|
||||||
if len(request.messages) % 2 == 0:
|
|
||||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Only supports u/a/u/a/u...")
|
|
||||||
|
|
||||||
input_messages = []
|
|
||||||
for i, message in enumerate(request.messages):
|
|
||||||
if i % 2 == 0 and message.role not in [Role.USER, Role.TOOL]:
|
|
||||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid role")
|
|
||||||
elif i % 2 == 1 and message.role not in [Role.ASSISTANT, Role.FUNCTION]:
|
|
||||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid role")
|
|
||||||
|
|
||||||
input_messages.append({"role": role_mapping[message.role], "content": message.content})
|
|
||||||
|
|
||||||
tool_list = request.tools
|
|
||||||
if isinstance(tool_list, list) and len(tool_list):
|
|
||||||
try:
|
|
||||||
tools = json.dumps([tool["function"] for tool in tool_list], ensure_ascii=False)
|
|
||||||
except Exception:
|
|
||||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid tools")
|
|
||||||
else:
|
|
||||||
tools = ""
|
|
||||||
|
|
||||||
if request.stream:
|
if request.stream:
|
||||||
if tools:
|
generate = create_stream_chat_completion_response(request, chat_model)
|
||||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Cannot stream function calls.")
|
|
||||||
|
|
||||||
generate = stream_chat_completion(input_messages, system, tools, request)
|
|
||||||
return EventSourceResponse(generate, media_type="text/event-stream")
|
return EventSourceResponse(generate, media_type="text/event-stream")
|
||||||
|
else:
|
||||||
|
return await create_chat_completion_response(request, chat_model)
|
||||||
|
|
||||||
responses = await chat_model.achat(
|
@app.post(
|
||||||
input_messages,
|
"/v1/score/evaluation",
|
||||||
system,
|
response_model=ScoreEvaluationResponse,
|
||||||
tools,
|
status_code=status.HTTP_200_OK,
|
||||||
do_sample=request.do_sample,
|
dependencies=[Depends(verify_api_key)],
|
||||||
temperature=request.temperature,
|
)
|
||||||
top_p=request.top_p,
|
|
||||||
max_new_tokens=request.max_tokens,
|
|
||||||
num_return_sequences=request.n,
|
|
||||||
)
|
|
||||||
|
|
||||||
prompt_length, response_length = 0, 0
|
|
||||||
choices = []
|
|
||||||
for i, response in enumerate(responses):
|
|
||||||
if tools:
|
|
||||||
result = chat_model.engine.template.format_tools.extract(response.response_text)
|
|
||||||
else:
|
|
||||||
result = response.response_text
|
|
||||||
|
|
||||||
if isinstance(result, tuple):
|
|
||||||
name, arguments = result
|
|
||||||
function = Function(name=name, arguments=arguments)
|
|
||||||
response_message = ChatCompletionMessage(
|
|
||||||
role=Role.ASSISTANT, tool_calls=[FunctionCall(function=function)]
|
|
||||||
)
|
|
||||||
finish_reason = Finish.TOOL
|
|
||||||
else:
|
|
||||||
response_message = ChatCompletionMessage(role=Role.ASSISTANT, content=result)
|
|
||||||
finish_reason = Finish.STOP if response.finish_reason == "stop" else Finish.LENGTH
|
|
||||||
|
|
||||||
choices.append(
|
|
||||||
ChatCompletionResponseChoice(index=i, message=response_message, finish_reason=finish_reason)
|
|
||||||
)
|
|
||||||
prompt_length = response.prompt_length
|
|
||||||
response_length += response.response_length
|
|
||||||
|
|
||||||
usage = ChatCompletionResponseUsage(
|
|
||||||
prompt_tokens=prompt_length,
|
|
||||||
completion_tokens=response_length,
|
|
||||||
total_tokens=prompt_length + response_length,
|
|
||||||
)
|
|
||||||
|
|
||||||
return ChatCompletionResponse(model=request.model, choices=choices, usage=usage)
|
|
||||||
|
|
||||||
async def stream_chat_completion(
|
|
||||||
messages: Sequence[Dict[str, str]], system: str, tools: str, request: ChatCompletionRequest
|
|
||||||
):
|
|
||||||
choice_data = ChatCompletionResponseStreamChoice(
|
|
||||||
index=0, delta=ChatCompletionMessage(role=Role.ASSISTANT, content=""), finish_reason=None
|
|
||||||
)
|
|
||||||
chunk = ChatCompletionStreamResponse(model=request.model, choices=[choice_data])
|
|
||||||
yield jsonify(chunk)
|
|
||||||
|
|
||||||
async for new_token in chat_model.astream_chat(
|
|
||||||
messages,
|
|
||||||
system,
|
|
||||||
tools,
|
|
||||||
do_sample=request.do_sample,
|
|
||||||
temperature=request.temperature,
|
|
||||||
top_p=request.top_p,
|
|
||||||
max_new_tokens=request.max_tokens,
|
|
||||||
):
|
|
||||||
if len(new_token) == 0:
|
|
||||||
continue
|
|
||||||
|
|
||||||
choice_data = ChatCompletionResponseStreamChoice(
|
|
||||||
index=0, delta=ChatCompletionMessage(content=new_token), finish_reason=None
|
|
||||||
)
|
|
||||||
chunk = ChatCompletionStreamResponse(model=request.model, choices=[choice_data])
|
|
||||||
yield jsonify(chunk)
|
|
||||||
|
|
||||||
choice_data = ChatCompletionResponseStreamChoice(
|
|
||||||
index=0, delta=ChatCompletionMessage(), finish_reason=Finish.STOP
|
|
||||||
)
|
|
||||||
chunk = ChatCompletionStreamResponse(model=request.model, choices=[choice_data])
|
|
||||||
yield jsonify(chunk)
|
|
||||||
yield "[DONE]"
|
|
||||||
|
|
||||||
@app.post("/v1/score/evaluation", response_model=ScoreEvaluationResponse, status_code=status.HTTP_200_OK)
|
|
||||||
async def create_score_evaluation(request: ScoreEvaluationRequest):
|
async def create_score_evaluation(request: ScoreEvaluationRequest):
|
||||||
if chat_model.engine.can_generate:
|
if chat_model.engine.can_generate:
|
||||||
raise HTTPException(status_code=status.HTTP_405_METHOD_NOT_ALLOWED, detail="Not allowed")
|
raise HTTPException(status_code=status.HTTP_405_METHOD_NOT_ALLOWED, detail="Not allowed")
|
||||||
|
|
||||||
if len(request.messages) == 0:
|
return await create_score_evaluation_response(request, chat_model)
|
||||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid request")
|
|
||||||
|
|
||||||
scores = await chat_model.aget_scores(request.messages, max_length=request.max_length)
|
|
||||||
return ScoreEvaluationResponse(model=request.model, scores=scores)
|
|
||||||
|
|
||||||
return app
|
return app
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
def run_api() -> None:
|
||||||
chat_model = ChatModel()
|
chat_model = ChatModel()
|
||||||
app = create_app(chat_model)
|
app = create_app(chat_model)
|
||||||
uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("API_PORT", 8000)), workers=1)
|
api_host = os.environ.get("API_HOST", "0.0.0.0")
|
||||||
|
api_port = int(os.environ.get("API_PORT", "8000"))
|
||||||
|
print("Visit http://localhost:{}/docs for API document.".format(api_port))
|
||||||
|
uvicorn.run(app, host=api_host, port=api_port)
|
||||||
|
|||||||
186
src/llmtuner/api/chat.py
Normal file
186
src/llmtuner/api/chat.py
Normal file
@@ -0,0 +1,186 @@
|
|||||||
|
import json
|
||||||
|
import uuid
|
||||||
|
from typing import TYPE_CHECKING, AsyncGenerator, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
|
from ..data import Role as DataRole
|
||||||
|
from ..extras.logging import get_logger
|
||||||
|
from ..extras.packages import is_fastapi_available
|
||||||
|
from .common import dictify, jsonify
|
||||||
|
from .protocol import (
|
||||||
|
ChatCompletionMessage,
|
||||||
|
ChatCompletionResponse,
|
||||||
|
ChatCompletionResponseChoice,
|
||||||
|
ChatCompletionResponseUsage,
|
||||||
|
ChatCompletionStreamResponse,
|
||||||
|
ChatCompletionStreamResponseChoice,
|
||||||
|
Finish,
|
||||||
|
Function,
|
||||||
|
FunctionCall,
|
||||||
|
Role,
|
||||||
|
ScoreEvaluationResponse,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if is_fastapi_available():
|
||||||
|
from fastapi import HTTPException, status
|
||||||
|
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from ..chat import ChatModel
|
||||||
|
from .protocol import ChatCompletionRequest, ScoreEvaluationRequest
|
||||||
|
|
||||||
|
|
||||||
|
logger = get_logger(__name__)
|
||||||
|
ROLE_MAPPING = {
|
||||||
|
Role.USER: DataRole.USER.value,
|
||||||
|
Role.ASSISTANT: DataRole.ASSISTANT.value,
|
||||||
|
Role.SYSTEM: DataRole.SYSTEM.value,
|
||||||
|
Role.FUNCTION: DataRole.FUNCTION.value,
|
||||||
|
Role.TOOL: DataRole.OBSERVATION.value,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _process_request(request: "ChatCompletionRequest") -> Tuple[List[Dict[str, str]], str, str]:
|
||||||
|
logger.info("==== request ====\n{}".format(json.dumps(dictify(request), indent=2, ensure_ascii=False)))
|
||||||
|
|
||||||
|
if len(request.messages) == 0:
|
||||||
|
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid length")
|
||||||
|
|
||||||
|
if request.messages[0].role == Role.SYSTEM:
|
||||||
|
system = request.messages.pop(0).content
|
||||||
|
else:
|
||||||
|
system = ""
|
||||||
|
|
||||||
|
if len(request.messages) % 2 == 0:
|
||||||
|
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Only supports u/a/u/a/u...")
|
||||||
|
|
||||||
|
input_messages = []
|
||||||
|
for i, message in enumerate(request.messages):
|
||||||
|
if i % 2 == 0 and message.role not in [Role.USER, Role.TOOL]:
|
||||||
|
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid role")
|
||||||
|
elif i % 2 == 1 and message.role not in [Role.ASSISTANT, Role.FUNCTION]:
|
||||||
|
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid role")
|
||||||
|
|
||||||
|
if message.role == Role.ASSISTANT and isinstance(message.tool_calls, list) and len(message.tool_calls):
|
||||||
|
name = message.tool_calls[0].function.name
|
||||||
|
arguments = message.tool_calls[0].function.arguments
|
||||||
|
content = json.dumps({"name": name, "argument": arguments}, ensure_ascii=False)
|
||||||
|
input_messages.append({"role": ROLE_MAPPING[Role.FUNCTION], "content": content})
|
||||||
|
else:
|
||||||
|
input_messages.append({"role": ROLE_MAPPING[message.role], "content": message.content})
|
||||||
|
|
||||||
|
tool_list = request.tools
|
||||||
|
if isinstance(tool_list, list) and len(tool_list):
|
||||||
|
try:
|
||||||
|
tools = json.dumps([dictify(tool.function) for tool in tool_list], ensure_ascii=False)
|
||||||
|
except Exception:
|
||||||
|
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid tools")
|
||||||
|
else:
|
||||||
|
tools = ""
|
||||||
|
|
||||||
|
return input_messages, system, tools
|
||||||
|
|
||||||
|
|
||||||
|
def _create_stream_chat_completion_chunk(
|
||||||
|
completion_id: str,
|
||||||
|
model: str,
|
||||||
|
delta: "ChatCompletionMessage",
|
||||||
|
index: Optional[int] = 0,
|
||||||
|
finish_reason: Optional["Finish"] = None,
|
||||||
|
) -> str:
|
||||||
|
choice_data = ChatCompletionStreamResponseChoice(index=index, delta=delta, finish_reason=finish_reason)
|
||||||
|
chunk = ChatCompletionStreamResponse(id=completion_id, model=model, choices=[choice_data])
|
||||||
|
return jsonify(chunk)
|
||||||
|
|
||||||
|
|
||||||
|
async def create_chat_completion_response(
|
||||||
|
request: "ChatCompletionRequest", chat_model: "ChatModel"
|
||||||
|
) -> "ChatCompletionResponse":
|
||||||
|
completion_id = "chatcmpl-{}".format(uuid.uuid4().hex)
|
||||||
|
input_messages, system, tools = _process_request(request)
|
||||||
|
responses = await chat_model.achat(
|
||||||
|
input_messages,
|
||||||
|
system,
|
||||||
|
tools,
|
||||||
|
do_sample=request.do_sample,
|
||||||
|
temperature=request.temperature,
|
||||||
|
top_p=request.top_p,
|
||||||
|
max_new_tokens=request.max_tokens,
|
||||||
|
num_return_sequences=request.n,
|
||||||
|
stop=request.stop,
|
||||||
|
)
|
||||||
|
|
||||||
|
prompt_length, response_length = 0, 0
|
||||||
|
choices = []
|
||||||
|
for i, response in enumerate(responses):
|
||||||
|
if tools:
|
||||||
|
result = chat_model.engine.template.format_tools.extract(response.response_text)
|
||||||
|
else:
|
||||||
|
result = response.response_text
|
||||||
|
|
||||||
|
if isinstance(result, tuple):
|
||||||
|
name, arguments = result
|
||||||
|
function = Function(name=name, arguments=arguments)
|
||||||
|
tool_call = FunctionCall(id="call_{}".format(uuid.uuid4().hex), function=function)
|
||||||
|
response_message = ChatCompletionMessage(role=Role.ASSISTANT, tool_calls=[tool_call])
|
||||||
|
finish_reason = Finish.TOOL
|
||||||
|
else:
|
||||||
|
response_message = ChatCompletionMessage(role=Role.ASSISTANT, content=result)
|
||||||
|
finish_reason = Finish.STOP if response.finish_reason == "stop" else Finish.LENGTH
|
||||||
|
|
||||||
|
choices.append(ChatCompletionResponseChoice(index=i, message=response_message, finish_reason=finish_reason))
|
||||||
|
prompt_length = response.prompt_length
|
||||||
|
response_length += response.response_length
|
||||||
|
|
||||||
|
usage = ChatCompletionResponseUsage(
|
||||||
|
prompt_tokens=prompt_length,
|
||||||
|
completion_tokens=response_length,
|
||||||
|
total_tokens=prompt_length + response_length,
|
||||||
|
)
|
||||||
|
|
||||||
|
return ChatCompletionResponse(id=completion_id, model=request.model, choices=choices, usage=usage)
|
||||||
|
|
||||||
|
|
||||||
|
async def create_stream_chat_completion_response(
|
||||||
|
request: "ChatCompletionRequest", chat_model: "ChatModel"
|
||||||
|
) -> AsyncGenerator[str, None]:
|
||||||
|
completion_id = "chatcmpl-{}".format(uuid.uuid4().hex)
|
||||||
|
input_messages, system, tools = _process_request(request)
|
||||||
|
if tools:
|
||||||
|
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Cannot stream function calls.")
|
||||||
|
|
||||||
|
if request.n > 1:
|
||||||
|
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Cannot stream multiple responses.")
|
||||||
|
|
||||||
|
yield _create_stream_chat_completion_chunk(
|
||||||
|
completion_id=completion_id, model=request.model, delta=ChatCompletionMessage(role=Role.ASSISTANT, content="")
|
||||||
|
)
|
||||||
|
async for new_token in chat_model.astream_chat(
|
||||||
|
input_messages,
|
||||||
|
system,
|
||||||
|
tools,
|
||||||
|
do_sample=request.do_sample,
|
||||||
|
temperature=request.temperature,
|
||||||
|
top_p=request.top_p,
|
||||||
|
max_new_tokens=request.max_tokens,
|
||||||
|
stop=request.stop,
|
||||||
|
):
|
||||||
|
if len(new_token) != 0:
|
||||||
|
yield _create_stream_chat_completion_chunk(
|
||||||
|
completion_id=completion_id, model=request.model, delta=ChatCompletionMessage(content=new_token)
|
||||||
|
)
|
||||||
|
|
||||||
|
yield _create_stream_chat_completion_chunk(
|
||||||
|
completion_id=completion_id, model=request.model, delta=ChatCompletionMessage(), finish_reason=Finish.STOP
|
||||||
|
)
|
||||||
|
yield "[DONE]"
|
||||||
|
|
||||||
|
|
||||||
|
async def create_score_evaluation_response(
|
||||||
|
request: "ScoreEvaluationRequest", chat_model: "ChatModel"
|
||||||
|
) -> "ScoreEvaluationResponse":
|
||||||
|
if len(request.messages) == 0:
|
||||||
|
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid request")
|
||||||
|
|
||||||
|
scores = await chat_model.aget_scores(request.messages, max_length=request.max_length)
|
||||||
|
return ScoreEvaluationResponse(model=request.model, scores=scores)
|
||||||
20
src/llmtuner/api/common.py
Normal file
20
src/llmtuner/api/common.py
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
import json
|
||||||
|
from typing import TYPE_CHECKING, Any, Dict
|
||||||
|
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
|
||||||
|
def dictify(data: "BaseModel") -> Dict[str, Any]:
|
||||||
|
try: # pydantic v2
|
||||||
|
return data.model_dump(exclude_unset=True)
|
||||||
|
except AttributeError: # pydantic v1
|
||||||
|
return data.dict(exclude_unset=True)
|
||||||
|
|
||||||
|
|
||||||
|
def jsonify(data: "BaseModel") -> str:
|
||||||
|
try: # pydantic v2
|
||||||
|
return json.dumps(data.model_dump(exclude_unset=True), ensure_ascii=False)
|
||||||
|
except AttributeError: # pydantic v1
|
||||||
|
return data.json(exclude_unset=True, ensure_ascii=False)
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
import time
|
import time
|
||||||
from enum import Enum, unique
|
from enum import Enum, unique
|
||||||
from typing import List, Optional
|
from typing import Any, Dict, List, Optional, Union
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
from typing_extensions import Literal
|
from typing_extensions import Literal
|
||||||
@@ -39,15 +39,27 @@ class Function(BaseModel):
|
|||||||
arguments: str
|
arguments: str
|
||||||
|
|
||||||
|
|
||||||
|
class FunctionDefinition(BaseModel):
|
||||||
|
name: str
|
||||||
|
description: str
|
||||||
|
parameters: Dict[str, Any]
|
||||||
|
|
||||||
|
|
||||||
|
class FunctionAvailable(BaseModel):
|
||||||
|
type: Literal["function", "code_interpreter"] = "function"
|
||||||
|
function: Optional[FunctionDefinition] = None
|
||||||
|
|
||||||
|
|
||||||
class FunctionCall(BaseModel):
|
class FunctionCall(BaseModel):
|
||||||
id: Literal["call_default"] = "call_default"
|
id: str
|
||||||
type: Literal["function"] = "function"
|
type: Literal["function"] = "function"
|
||||||
function: Function
|
function: Function
|
||||||
|
|
||||||
|
|
||||||
class ChatMessage(BaseModel):
|
class ChatMessage(BaseModel):
|
||||||
role: Role
|
role: Role
|
||||||
content: str
|
content: Optional[str] = None
|
||||||
|
tool_calls: Optional[List[FunctionCall]] = None
|
||||||
|
|
||||||
|
|
||||||
class ChatCompletionMessage(BaseModel):
|
class ChatCompletionMessage(BaseModel):
|
||||||
@@ -59,12 +71,13 @@ class ChatCompletionMessage(BaseModel):
|
|||||||
class ChatCompletionRequest(BaseModel):
|
class ChatCompletionRequest(BaseModel):
|
||||||
model: str
|
model: str
|
||||||
messages: List[ChatMessage]
|
messages: List[ChatMessage]
|
||||||
tools: list = []
|
tools: Optional[List[FunctionAvailable]] = None
|
||||||
do_sample: bool = True
|
do_sample: bool = True
|
||||||
temperature: Optional[float] = None
|
temperature: Optional[float] = None
|
||||||
top_p: Optional[float] = None
|
top_p: Optional[float] = None
|
||||||
n: int = 1
|
n: int = 1
|
||||||
max_tokens: Optional[int] = None
|
max_tokens: Optional[int] = None
|
||||||
|
stop: Optional[Union[str, List[str]]] = None
|
||||||
stream: bool = False
|
stream: bool = False
|
||||||
|
|
||||||
|
|
||||||
@@ -74,7 +87,7 @@ class ChatCompletionResponseChoice(BaseModel):
|
|||||||
finish_reason: Finish
|
finish_reason: Finish
|
||||||
|
|
||||||
|
|
||||||
class ChatCompletionResponseStreamChoice(BaseModel):
|
class ChatCompletionStreamResponseChoice(BaseModel):
|
||||||
index: int
|
index: int
|
||||||
delta: ChatCompletionMessage
|
delta: ChatCompletionMessage
|
||||||
finish_reason: Optional[Finish] = None
|
finish_reason: Optional[Finish] = None
|
||||||
@@ -87,7 +100,7 @@ class ChatCompletionResponseUsage(BaseModel):
|
|||||||
|
|
||||||
|
|
||||||
class ChatCompletionResponse(BaseModel):
|
class ChatCompletionResponse(BaseModel):
|
||||||
id: Literal["chatcmpl-default"] = "chatcmpl-default"
|
id: str
|
||||||
object: Literal["chat.completion"] = "chat.completion"
|
object: Literal["chat.completion"] = "chat.completion"
|
||||||
created: int = Field(default_factory=lambda: int(time.time()))
|
created: int = Field(default_factory=lambda: int(time.time()))
|
||||||
model: str
|
model: str
|
||||||
@@ -96,11 +109,11 @@ class ChatCompletionResponse(BaseModel):
|
|||||||
|
|
||||||
|
|
||||||
class ChatCompletionStreamResponse(BaseModel):
|
class ChatCompletionStreamResponse(BaseModel):
|
||||||
id: Literal["chatcmpl-default"] = "chatcmpl-default"
|
id: str
|
||||||
object: Literal["chat.completion.chunk"] = "chat.completion.chunk"
|
object: Literal["chat.completion.chunk"] = "chat.completion.chunk"
|
||||||
created: int = Field(default_factory=lambda: int(time.time()))
|
created: int = Field(default_factory=lambda: int(time.time()))
|
||||||
model: str
|
model: str
|
||||||
choices: List[ChatCompletionResponseStreamChoice]
|
choices: List[ChatCompletionStreamResponseChoice]
|
||||||
|
|
||||||
|
|
||||||
class ScoreEvaluationRequest(BaseModel):
|
class ScoreEvaluationRequest(BaseModel):
|
||||||
@@ -110,7 +123,7 @@ class ScoreEvaluationRequest(BaseModel):
|
|||||||
|
|
||||||
|
|
||||||
class ScoreEvaluationResponse(BaseModel):
|
class ScoreEvaluationResponse(BaseModel):
|
||||||
id: Literal["scoreeval-default"] = "scoreeval-default"
|
id: str
|
||||||
object: Literal["score.evaluation"] = "score.evaluation"
|
object: Literal["score.evaluation"] = "score.evaluation"
|
||||||
model: str
|
model: str
|
||||||
scores: List[float]
|
scores: List[float]
|
||||||
|
|||||||
@@ -4,15 +4,13 @@ from typing import TYPE_CHECKING, Any, AsyncGenerator, Dict, List, Literal, Opti
|
|||||||
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
|
from numpy.typing import NDArray
|
||||||
from transformers import PreTrainedModel, PreTrainedTokenizer
|
from transformers import PreTrainedModel, PreTrainedTokenizer
|
||||||
|
from vllm import AsyncLLMEngine
|
||||||
|
|
||||||
from ..data import Template
|
from ..data import Template
|
||||||
from ..extras.packages import is_vllm_available
|
|
||||||
from ..hparams import DataArguments, FinetuningArguments, GeneratingArguments, ModelArguments
|
from ..hparams import DataArguments, FinetuningArguments, GeneratingArguments, ModelArguments
|
||||||
|
|
||||||
if is_vllm_available():
|
|
||||||
from vllm import AsyncLLMEngine
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Response:
|
class Response:
|
||||||
@@ -49,6 +47,7 @@ class BaseEngine(ABC):
|
|||||||
messages: Sequence[Dict[str, str]],
|
messages: Sequence[Dict[str, str]],
|
||||||
system: Optional[str] = None,
|
system: Optional[str] = None,
|
||||||
tools: Optional[str] = None,
|
tools: Optional[str] = None,
|
||||||
|
image: Optional["NDArray"] = None,
|
||||||
**input_kwargs,
|
**input_kwargs,
|
||||||
) -> List["Response"]: ...
|
) -> List["Response"]: ...
|
||||||
|
|
||||||
@@ -58,6 +57,7 @@ class BaseEngine(ABC):
|
|||||||
messages: Sequence[Dict[str, str]],
|
messages: Sequence[Dict[str, str]],
|
||||||
system: Optional[str] = None,
|
system: Optional[str] = None,
|
||||||
tools: Optional[str] = None,
|
tools: Optional[str] = None,
|
||||||
|
image: Optional["NDArray"] = None,
|
||||||
**input_kwargs,
|
**input_kwargs,
|
||||||
) -> AsyncGenerator[str, None]: ...
|
) -> AsyncGenerator[str, None]: ...
|
||||||
|
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user