Compare commits
11 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fbd0584391 | ||
|
|
50224b09cc | ||
|
|
32dcc5a491 | ||
|
|
9408366a36 | ||
|
|
f0e564beaa | ||
|
|
14b75a0b93 | ||
|
|
59e6ebf039 | ||
|
|
dc540dfaa8 | ||
|
|
587e65e442 | ||
|
|
a916688723 | ||
|
|
3336422760 |
39
README.md
39
README.md
@@ -5,7 +5,7 @@
|
|||||||
[](https://github.com/hiyouga/LLaMA-Factory/commits/main)
|
[](https://github.com/hiyouga/LLaMA-Factory/commits/main)
|
||||||
[](https://pypi.org/project/llmtuner/)
|
[](https://pypi.org/project/llmtuner/)
|
||||||
[](https://pypi.org/project/llmtuner/)
|
[](https://pypi.org/project/llmtuner/)
|
||||||
[](#projects-using-llama-factory)
|
[](#projects-using-llama-factory)
|
||||||
[](https://github.com/hiyouga/LLaMA-Factory/pulls)
|
[](https://github.com/hiyouga/LLaMA-Factory/pulls)
|
||||||
[](https://discord.gg/rKfvV9r9FK)
|
[](https://discord.gg/rKfvV9r9FK)
|
||||||
[](https://twitter.com/llamafactory_ai)
|
[](https://twitter.com/llamafactory_ai)
|
||||||
@@ -72,14 +72,14 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
|
|||||||
|
|
||||||
[24/03/20] We supported **FSDP+QLoRA** that fine-tunes a 70B model on 2x24GB GPUs. See `examples/fsdp_qlora` for usage.
|
[24/03/20] We supported **FSDP+QLoRA** that fine-tunes a 70B model on 2x24GB GPUs. See `examples/fsdp_qlora` for usage.
|
||||||
|
|
||||||
[24/03/13] We supported **[LoRA+](https://arxiv.org/abs/2402.12354)**. Try `loraplus_lr_ratio=16.0` to enable LoRA+ algorithm.
|
[24/03/13] We supported **[LoRA+](https://arxiv.org/abs/2402.12354)**. See `examples/extras/loraplus` for usage.
|
||||||
|
|
||||||
[24/03/07] We supported gradient low-rank projection (**[GaLore](https://arxiv.org/abs/2403.03507)**) algorithm. Try `--use_galore` to use the memory-efficient optimizer.
|
[24/03/07] We supported gradient low-rank projection (**[GaLore](https://arxiv.org/abs/2403.03507)**) algorithm. See `examples/extras/galore` for usage.
|
||||||
|
|
||||||
[24/03/07] We integrated **[vLLM](https://github.com/vllm-project/vllm)** for faster and concurrent inference. Try `--infer_backend vllm` to enjoy **270%** inference speed. (LoRA is not yet supported, merge it first.)
|
|
||||||
|
|
||||||
<details><summary>Full Changelog</summary>
|
<details><summary>Full Changelog</summary>
|
||||||
|
|
||||||
|
[24/03/07] We integrated **[vLLM](https://github.com/vllm-project/vllm)** for faster and concurrent inference. Try `--infer_backend vllm` to enjoy **270%** inference speed. (LoRA is not yet supported, merge it first.)
|
||||||
|
|
||||||
[24/02/28] We supported weight-decomposed LoRA (**[DoRA](https://arxiv.org/abs/2402.09353)**). Try `--use_dora` to activate DoRA training.
|
[24/02/28] We supported weight-decomposed LoRA (**[DoRA](https://arxiv.org/abs/2402.09353)**). Try `--use_dora` to activate DoRA training.
|
||||||
|
|
||||||
[24/02/15] We supported **block expansion** proposed by [LLaMA Pro](https://github.com/TencentARC/LLaMA-Pro). See `examples/extras/llama_pro` for usage.
|
[24/02/15] We supported **block expansion** proposed by [LLaMA Pro](https://github.com/TencentARC/LLaMA-Pro). See `examples/extras/llama_pro` for usage.
|
||||||
@@ -451,7 +451,7 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
|
|||||||
```
|
```
|
||||||
|
|
||||||
> [!TIP]
|
> [!TIP]
|
||||||
> Use `--adapter_name_or_path path_to_sft_checkpoint,path_to_ppo_checkpoint` to infer the fine-tuned model.
|
> Use `--adapter_name_or_path path_to_sft_checkpoint,path_to_ppo_checkpoint` to infer the fine-tuned model if `--create_new_adapter` was enabled.
|
||||||
|
|
||||||
> [!WARNING]
|
> [!WARNING]
|
||||||
> Use `--per_device_train_batch_size=1` for LLaMA-2 models in fp16 PPO training.
|
> Use `--per_device_train_batch_size=1` for LLaMA-2 models in fp16 PPO training.
|
||||||
@@ -482,7 +482,7 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
|
|||||||
```
|
```
|
||||||
|
|
||||||
> [!TIP]
|
> [!TIP]
|
||||||
> Use `--adapter_name_or_path path_to_sft_checkpoint,path_to_dpo_checkpoint` to infer the fine-tuned model.
|
> Use `--adapter_name_or_path path_to_sft_checkpoint,path_to_dpo_checkpoint` to infer the fine-tuned model if `--create_new_adapter` was enabled.
|
||||||
|
|
||||||
### Distributed Training
|
### Distributed Training
|
||||||
|
|
||||||
@@ -570,7 +570,7 @@ deepspeed --num_gpus 8 src/train_bash.py \
|
|||||||
### Merge LoRA weights and export model
|
### Merge LoRA weights and export model
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
CUDA_VISIBLE_DEVICES=0 python src/export_model.py \
|
CUDA_VISIBLE_DEVICES= python src/export_model.py \
|
||||||
--model_name_or_path path_to_llama_model \
|
--model_name_or_path path_to_llama_model \
|
||||||
--adapter_name_or_path path_to_checkpoint \
|
--adapter_name_or_path path_to_checkpoint \
|
||||||
--template default \
|
--template default \
|
||||||
@@ -586,7 +586,7 @@ CUDA_VISIBLE_DEVICES=0 python src/export_model.py \
|
|||||||
> [!TIP]
|
> [!TIP]
|
||||||
> Use `--model_name_or_path path_to_export` solely to use the exported model.
|
> Use `--model_name_or_path path_to_export` solely to use the exported model.
|
||||||
>
|
>
|
||||||
> Use `--export_quantization_bit 4` and `--export_quantization_dataset data/c4_demo.json` to quantize the model with AutoGPTQ after merging the LoRA weights.
|
> Use `CUDA_VISIBLE_DEVICES=0`, `--export_quantization_bit 4` and `--export_quantization_dataset data/c4_demo.json` to quantize the model with AutoGPTQ after merging the LoRA weights.
|
||||||
|
|
||||||
### Inference with OpenAI-style API
|
### Inference with OpenAI-style API
|
||||||
|
|
||||||
@@ -662,19 +662,23 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
|
|||||||
|
|
||||||
### Dockerize Training
|
### Dockerize Training
|
||||||
|
|
||||||
#### Get ready
|
#### Use Docker
|
||||||
|
|
||||||
Necessary dockerized environment is needed, such as Docker or Docker Compose.
|
|
||||||
|
|
||||||
#### Docker support
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker build -f ./Dockerfile -t llama-factory:latest .
|
docker build -f ./Dockerfile -t llama-factory:latest .
|
||||||
|
|
||||||
docker run --gpus=all -v ./hf_cache:/root/.cache/huggingface/ -v ./data:/app/data -v ./output:/app/output -p 7860:7860 --shm-size 16G --name llama_factory -d llama-factory:latest
|
docker run --gpus=all \
|
||||||
|
-v ./hf_cache:/root/.cache/huggingface/ \
|
||||||
|
-v ./data:/app/data \
|
||||||
|
-v ./output:/app/output \
|
||||||
|
-e CUDA_VISIBLE_DEVICES=0 \
|
||||||
|
-p 7860:7860 \
|
||||||
|
--shm-size 16G \
|
||||||
|
--name llama_factory \
|
||||||
|
-d llama-factory:latest
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Docker Compose support
|
#### Use Docker Compose
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker compose -f ./docker-compose.yml up -d
|
docker compose -f ./docker-compose.yml up -d
|
||||||
@@ -682,7 +686,7 @@ docker compose -f ./docker-compose.yml up -d
|
|||||||
|
|
||||||
> [!TIP]
|
> [!TIP]
|
||||||
> Details about volume:
|
> Details about volume:
|
||||||
> * hf_cache: Utilize Huggingface cache on the host machine. Reassignable if a cache already exists in a different directory.
|
> * hf_cache: Utilize Hugging Face cache on the host machine. Reassignable if a cache already exists in a different directory.
|
||||||
> * data: Place datasets on this dir of the host machine so that they can be selected on LLaMA Board GUI.
|
> * data: Place datasets on this dir of the host machine so that they can be selected on LLaMA Board GUI.
|
||||||
> * output: Set export dir to this location so that the merged result can be accessed directly on the host machine.
|
> * output: Set export dir to this location so that the merged result can be accessed directly on the host machine.
|
||||||
|
|
||||||
@@ -709,6 +713,7 @@ docker compose -f ./docker-compose.yml up -d
|
|||||||
1. Huang et al. Key-Point-Driven Data Synthesis with its Enhancement on Mathematical Reasoning. 2024. [[arxiv]](https://arxiv.org/abs/2403.02333)
|
1. Huang et al. Key-Point-Driven Data Synthesis with its Enhancement on Mathematical Reasoning. 2024. [[arxiv]](https://arxiv.org/abs/2403.02333)
|
||||||
1. Duan et al. Negating Negatives: Alignment without Human Positive Samples via Distributional Dispreference Optimization. 2024. [[arxiv]](https://arxiv.org/abs/2403.03419)
|
1. Duan et al. Negating Negatives: Alignment without Human Positive Samples via Distributional Dispreference Optimization. 2024. [[arxiv]](https://arxiv.org/abs/2403.03419)
|
||||||
1. Xie and Schwertfeger. Empowering Robotics with Large Language Models: osmAG Map Comprehension with LLMs. 2024. [[arxiv]](https://arxiv.org/abs/2403.08228)
|
1. Xie and Schwertfeger. Empowering Robotics with Large Language Models: osmAG Map Comprehension with LLMs. 2024. [[arxiv]](https://arxiv.org/abs/2403.08228)
|
||||||
|
1. Hongbin Na. CBT-LLM: A Chinese Large Language Model for Cognitive Behavioral Therapy-based Mental Health Question Answering. 2024. [[arxiv]](https://arxiv.org/abs/2403.16008)
|
||||||
1. **[StarWhisper](https://github.com/Yu-Yang-Li/StarWhisper)**: A large language model for Astronomy, based on ChatGLM2-6B and Qwen-14B.
|
1. **[StarWhisper](https://github.com/Yu-Yang-Li/StarWhisper)**: A large language model for Astronomy, based on ChatGLM2-6B and Qwen-14B.
|
||||||
1. **[DISC-LawLLM](https://github.com/FudanDISC/DISC-LawLLM)**: A large language model specialized in Chinese legal domain, based on Baichuan-13B, is capable of retrieving and reasoning on legal knowledge.
|
1. **[DISC-LawLLM](https://github.com/FudanDISC/DISC-LawLLM)**: A large language model specialized in Chinese legal domain, based on Baichuan-13B, is capable of retrieving and reasoning on legal knowledge.
|
||||||
1. **[Sunsimiao](https://github.com/thomas-yanxin/Sunsimiao)**: A large language model specialized in Chinese medical domain, based on Baichuan-7B and ChatGLM-6B.
|
1. **[Sunsimiao](https://github.com/thomas-yanxin/Sunsimiao)**: A large language model specialized in Chinese medical domain, based on Baichuan-7B and ChatGLM-6B.
|
||||||
|
|||||||
49
README_zh.md
49
README_zh.md
@@ -5,7 +5,7 @@
|
|||||||
[](https://github.com/hiyouga/LLaMA-Factory/commits/main)
|
[](https://github.com/hiyouga/LLaMA-Factory/commits/main)
|
||||||
[](https://pypi.org/project/llmtuner/)
|
[](https://pypi.org/project/llmtuner/)
|
||||||
[](https://pypi.org/project/llmtuner/)
|
[](https://pypi.org/project/llmtuner/)
|
||||||
[](#使用了-llama-factory-的项目)
|
[](#使用了-llama-factory-的项目)
|
||||||
[](https://github.com/hiyouga/LLaMA-Factory/pulls)
|
[](https://github.com/hiyouga/LLaMA-Factory/pulls)
|
||||||
[](https://discord.gg/rKfvV9r9FK)
|
[](https://discord.gg/rKfvV9r9FK)
|
||||||
[](https://twitter.com/llamafactory_ai)
|
[](https://twitter.com/llamafactory_ai)
|
||||||
@@ -72,14 +72,14 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
|
|||||||
|
|
||||||
[24/03/20] 我们支持了能在 2x24GB GPU 上微调 70B 模型的 **FSDP+QLoRA**。详细用法请参照 `examples/fsdp_qlora`。
|
[24/03/20] 我们支持了能在 2x24GB GPU 上微调 70B 模型的 **FSDP+QLoRA**。详细用法请参照 `examples/fsdp_qlora`。
|
||||||
|
|
||||||
[24/03/13] 我们支持了 **[LoRA+](https://arxiv.org/abs/2402.12354)**。请使用 `loraplus_lr_ratio=16.0` 参数开启 LoRA+ 方法。
|
[24/03/13] 我们支持了 **[LoRA+](https://arxiv.org/abs/2402.12354)**。详细用法请参照 `examples/extras/loraplus`。
|
||||||
|
|
||||||
[24/03/07] 我们支持了梯度低秩投影(**[GaLore](https://arxiv.org/abs/2403.03507)**)算法。请使用 `--use_galore` 参数切换显存高效的优化器。
|
[24/03/07] 我们支持了梯度低秩投影(**[GaLore](https://arxiv.org/abs/2403.03507)**)算法。详细用法请参照 `examples/extras/galore`。
|
||||||
|
|
||||||
[24/03/07] 我们集成了 **[vLLM](https://github.com/vllm-project/vllm)** 以实现极速并发推理。请使用 `--infer_backend vllm` 来获得 **270%** 的推理速度。(尚不支持 LoRA,请先合并权重。)
|
|
||||||
|
|
||||||
<details><summary>展开日志</summary>
|
<details><summary>展开日志</summary>
|
||||||
|
|
||||||
|
[24/03/07] 我们集成了 **[vLLM](https://github.com/vllm-project/vllm)** 以实现极速并发推理。请使用 `--infer_backend vllm` 来获得 **270%** 的推理速度。(尚不支持 LoRA,请先合并权重。)
|
||||||
|
|
||||||
[24/02/28] 我们支持了 **[DoRA](https://arxiv.org/abs/2402.09353)** 微调。请使用 `--use_dora` 参数进行 DoRA 微调。
|
[24/02/28] 我们支持了 **[DoRA](https://arxiv.org/abs/2402.09353)** 微调。请使用 `--use_dora` 参数进行 DoRA 微调。
|
||||||
|
|
||||||
[24/02/15] 我们支持了 [LLaMA Pro](https://github.com/TencentARC/LLaMA-Pro) 提出的**块扩展**方法。详细用法请参照 `examples/extras/llama_pro`。
|
[24/02/15] 我们支持了 [LLaMA Pro](https://github.com/TencentARC/LLaMA-Pro) 提出的**块扩展**方法。详细用法请参照 `examples/extras/llama_pro`。
|
||||||
@@ -450,7 +450,7 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
|
|||||||
```
|
```
|
||||||
|
|
||||||
> [!TIP]
|
> [!TIP]
|
||||||
> 使用 `--adapter_name_or_path path_to_sft_checkpoint,path_to_ppo_checkpoint` 来进行微调模型的推理。
|
> 如果开启了 `--create_new_adapter`,则使用 `--adapter_name_or_path path_to_sft_checkpoint,path_to_ppo_checkpoint` 来进行微调模型的推理。
|
||||||
|
|
||||||
> [!WARNING]
|
> [!WARNING]
|
||||||
> 如果使用 fp16 精度进行 LLaMA-2 模型的 PPO 训练,请使用 `--per_device_train_batch_size=1`。
|
> 如果使用 fp16 精度进行 LLaMA-2 模型的 PPO 训练,请使用 `--per_device_train_batch_size=1`。
|
||||||
@@ -481,7 +481,7 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
|
|||||||
```
|
```
|
||||||
|
|
||||||
> [!TIP]
|
> [!TIP]
|
||||||
> 使用 `--adapter_name_or_path path_to_sft_checkpoint,path_to_dpo_checkpoint` 来进行微调模型的推理。
|
> 如果开启了 `--create_new_adapter`,则使用 `--adapter_name_or_path path_to_sft_checkpoint,path_to_dpo_checkpoint` 来进行微调模型的推理。
|
||||||
|
|
||||||
### 多 GPU 分布式训练
|
### 多 GPU 分布式训练
|
||||||
|
|
||||||
@@ -569,7 +569,7 @@ deepspeed --num_gpus 8 src/train_bash.py \
|
|||||||
### 合并 LoRA 权重并导出模型
|
### 合并 LoRA 权重并导出模型
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
CUDA_VISIBLE_DEVICES=0 python src/export_model.py \
|
CUDA_VISIBLE_DEVICES= python src/export_model.py \
|
||||||
--model_name_or_path path_to_llama_model \
|
--model_name_or_path path_to_llama_model \
|
||||||
--adapter_name_or_path path_to_checkpoint \
|
--adapter_name_or_path path_to_checkpoint \
|
||||||
--template default \
|
--template default \
|
||||||
@@ -585,7 +585,7 @@ CUDA_VISIBLE_DEVICES=0 python src/export_model.py \
|
|||||||
> [!TIP]
|
> [!TIP]
|
||||||
> 仅使用 `--model_name_or_path path_to_export` 来加载导出后的模型。
|
> 仅使用 `--model_name_or_path path_to_export` 来加载导出后的模型。
|
||||||
>
|
>
|
||||||
> 合并 LoRA 权重之后可再次使用 `--export_quantization_bit 4` 和 `--export_quantization_dataset data/c4_demo.json` 基于 AutoGPTQ 量化模型。
|
> 合并 LoRA 权重之后可再次使用 `CUDA_VISIBLE_DEVICES=0`、`--export_quantization_bit 4` 和 `--export_quantization_dataset data/c4_demo.json` 基于 AutoGPTQ 量化模型。
|
||||||
|
|
||||||
### 使用 OpenAI 风格 API 推理
|
### 使用 OpenAI 风格 API 推理
|
||||||
|
|
||||||
@@ -659,6 +659,36 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
|
|||||||
> [!TIP]
|
> [!TIP]
|
||||||
> 我们建议在量化模型的预测中使用 `--per_device_eval_batch_size=1` 和 `--max_target_length 128`。
|
> 我们建议在量化模型的预测中使用 `--per_device_eval_batch_size=1` 和 `--max_target_length 128`。
|
||||||
|
|
||||||
|
### 使用容器
|
||||||
|
|
||||||
|
#### 使用 Docker
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker build -f ./Dockerfile -t llama-factory:latest .
|
||||||
|
|
||||||
|
docker run --gpus=all \
|
||||||
|
-v ./hf_cache:/root/.cache/huggingface/ \
|
||||||
|
-v ./data:/app/data \
|
||||||
|
-v ./output:/app/output \
|
||||||
|
-e CUDA_VISIBLE_DEVICES=0 \
|
||||||
|
-p 7860:7860 \
|
||||||
|
--shm-size 16G \
|
||||||
|
--name llama_factory \
|
||||||
|
-d llama-factory:latest
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 使用 Docker Compose
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose -f ./docker-compose.yml up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
> [!TIP]
|
||||||
|
> 数据卷详情:
|
||||||
|
> * hf_cache:使用宿主机的 Hugging Face 缓存文件夹,允许更改为新的目录。
|
||||||
|
> * data:宿主机中存放数据集的文件夹路径。
|
||||||
|
> * output:将导出目录设置为该路径后,即可在宿主机中访问导出后的模型。
|
||||||
|
|
||||||
## 使用了 LLaMA Factory 的项目
|
## 使用了 LLaMA Factory 的项目
|
||||||
|
|
||||||
1. Wang et al. ESRL: Efficient Sampling-based Reinforcement Learning for Sequence Generation. 2023. [[arxiv]](https://arxiv.org/abs/2308.02223)
|
1. Wang et al. ESRL: Efficient Sampling-based Reinforcement Learning for Sequence Generation. 2023. [[arxiv]](https://arxiv.org/abs/2308.02223)
|
||||||
@@ -682,6 +712,7 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
|
|||||||
1. Huang et al. Key-Point-Driven Data Synthesis with its Enhancement on Mathematical Reasoning. 2024. [[arxiv]](https://arxiv.org/abs/2403.02333)
|
1. Huang et al. Key-Point-Driven Data Synthesis with its Enhancement on Mathematical Reasoning. 2024. [[arxiv]](https://arxiv.org/abs/2403.02333)
|
||||||
1. Duan et al. Negating Negatives: Alignment without Human Positive Samples via Distributional Dispreference Optimization. 2024. [[arxiv]](https://arxiv.org/abs/2403.03419)
|
1. Duan et al. Negating Negatives: Alignment without Human Positive Samples via Distributional Dispreference Optimization. 2024. [[arxiv]](https://arxiv.org/abs/2403.03419)
|
||||||
1. Xie and Schwertfeger. Empowering Robotics with Large Language Models: osmAG Map Comprehension with LLMs. 2024. [[arxiv]](https://arxiv.org/abs/2403.08228)
|
1. Xie and Schwertfeger. Empowering Robotics with Large Language Models: osmAG Map Comprehension with LLMs. 2024. [[arxiv]](https://arxiv.org/abs/2403.08228)
|
||||||
|
1. Hongbin Na. CBT-LLM: A Chinese Large Language Model for Cognitive Behavioral Therapy-based Mental Health Question Answering. 2024. [[arxiv]](https://arxiv.org/abs/2403.16008)
|
||||||
1. **[StarWhisper](https://github.com/Yu-Yang-Li/StarWhisper)**: 天文大模型 StarWhisper,基于 ChatGLM2-6B 和 Qwen-14B 在天文数据上微调而得。
|
1. **[StarWhisper](https://github.com/Yu-Yang-Li/StarWhisper)**: 天文大模型 StarWhisper,基于 ChatGLM2-6B 和 Qwen-14B 在天文数据上微调而得。
|
||||||
1. **[DISC-LawLLM](https://github.com/FudanDISC/DISC-LawLLM)**: 中文法律领域大模型 DISC-LawLLM,基于 Baichuan-13B 微调而得,具有法律推理和知识检索能力。
|
1. **[DISC-LawLLM](https://github.com/FudanDISC/DISC-LawLLM)**: 中文法律领域大模型 DISC-LawLLM,基于 Baichuan-13B 微调而得,具有法律推理和知识检索能力。
|
||||||
1. **[Sunsimiao](https://github.com/thomas-yanxin/Sunsimiao)**: 孙思邈中文医疗大模型 Sumsimiao,基于 Baichuan-7B 和 ChatGLM-6B 在中文医疗数据上微调而得。
|
1. **[Sunsimiao](https://github.com/thomas-yanxin/Sunsimiao)**: 孙思邈中文医疗大模型 Sumsimiao,基于 Baichuan-7B 和 ChatGLM-6B 在中文医疗数据上微调而得。
|
||||||
|
|||||||
@@ -10,6 +10,8 @@ services:
|
|||||||
- ./hf_cache:/root/.cache/huggingface/
|
- ./hf_cache:/root/.cache/huggingface/
|
||||||
- ./data:/app/data
|
- ./data:/app/data
|
||||||
- ./output:/app/output
|
- ./output:/app/output
|
||||||
|
environment:
|
||||||
|
- CUDA_VISIBLE_DEVICES=0
|
||||||
ports:
|
ports:
|
||||||
- "7860:7860"
|
- "7860:7860"
|
||||||
ipc: host
|
ipc: host
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ fsdp_config:
|
|||||||
fsdp_forward_prefetch: false
|
fsdp_forward_prefetch: false
|
||||||
fsdp_offload_params: true
|
fsdp_offload_params: true
|
||||||
fsdp_sharding_strategy: FULL_SHARD
|
fsdp_sharding_strategy: FULL_SHARD
|
||||||
fsdp_state_dict_type: SHARDED_STATE_DICT
|
fsdp_state_dict_type: FULL_STATE_DICT
|
||||||
fsdp_sync_module_states: true
|
fsdp_sync_module_states: true
|
||||||
fsdp_use_orig_params: false
|
fsdp_use_orig_params: false
|
||||||
machine_rank: 0
|
machine_rank: 0
|
||||||
|
|||||||
@@ -1,31 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
CUDA_VISIBLE_DEVICES=0 python ../../../src/train_bash.py \
|
|
||||||
--stage sft \
|
|
||||||
--do_train \
|
|
||||||
--model_name_or_path meta-llama/Llama-2-7b-hf \
|
|
||||||
--dataset alpaca_gpt4_en,glaive_toolcall \
|
|
||||||
--dataset_dir ../../../data \
|
|
||||||
--template default \
|
|
||||||
--finetuning_type full \
|
|
||||||
--output_dir ../../../saves/LLaMA2-7B/galore/sft \
|
|
||||||
--overwrite_cache \
|
|
||||||
--overwrite_output_dir \
|
|
||||||
--cutoff_len 1024 \
|
|
||||||
--preprocessing_num_workers 16 \
|
|
||||||
--per_device_train_batch_size 1 \
|
|
||||||
--per_device_eval_batch_size 1 \
|
|
||||||
--gradient_accumulation_steps 1 \
|
|
||||||
--lr_scheduler_type cosine \
|
|
||||||
--logging_steps 10 \
|
|
||||||
--warmup_steps 20 \
|
|
||||||
--save_steps 100 \
|
|
||||||
--eval_steps 100 \
|
|
||||||
--evaluation_strategy steps \
|
|
||||||
--load_best_model_at_end \
|
|
||||||
--learning_rate 5e-5 \
|
|
||||||
--num_train_epochs 3.0 \
|
|
||||||
--max_samples 3000 \
|
|
||||||
--val_size 0.1 \
|
|
||||||
--plot_loss \
|
|
||||||
--fp16
|
|
||||||
@@ -1,32 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
CUDA_VISIBLE_DEVICES=0 python ../../../src/train_bash.py \
|
|
||||||
--stage sft \
|
|
||||||
--do_train \
|
|
||||||
--model_name_or_path meta-llama/Llama-2-7b-hf \
|
|
||||||
--dataset alpaca_gpt4_en,glaive_toolcall \
|
|
||||||
--dataset_dir ../../../data \
|
|
||||||
--template default \
|
|
||||||
--finetuning_type full \
|
|
||||||
--optim adamw_8bit \
|
|
||||||
--output_dir ../../../saves/LLaMA2-7B/galore/sft \
|
|
||||||
--overwrite_cache \
|
|
||||||
--overwrite_output_dir \
|
|
||||||
--cutoff_len 1024 \
|
|
||||||
--preprocessing_num_workers 16 \
|
|
||||||
--per_device_train_batch_size 1 \
|
|
||||||
--per_device_eval_batch_size 1 \
|
|
||||||
--gradient_accumulation_steps 1 \
|
|
||||||
--lr_scheduler_type cosine \
|
|
||||||
--logging_steps 10 \
|
|
||||||
--warmup_steps 20 \
|
|
||||||
--save_steps 100 \
|
|
||||||
--eval_steps 100 \
|
|
||||||
--evaluation_strategy steps \
|
|
||||||
--load_best_model_at_end \
|
|
||||||
--learning_rate 5e-5 \
|
|
||||||
--num_train_epochs 3.0 \
|
|
||||||
--max_samples 3000 \
|
|
||||||
--val_size 0.1 \
|
|
||||||
--plot_loss \
|
|
||||||
--pure_bf16
|
|
||||||
@@ -1,36 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
CUDA_VISIBLE_DEVICES=0 python ../../../src/train_bash.py \
|
|
||||||
--stage sft \
|
|
||||||
--do_train \
|
|
||||||
--model_name_or_path meta-llama/Llama-2-7b-hf \
|
|
||||||
--dataset alpaca_gpt4_en,glaive_toolcall \
|
|
||||||
--dataset_dir ../../../data \
|
|
||||||
--template default \
|
|
||||||
--finetuning_type full \
|
|
||||||
--optim adamw_8bit \
|
|
||||||
--use_galore \
|
|
||||||
--galore_layerwise \
|
|
||||||
--galore_target mlp,self_attn \
|
|
||||||
--galore_rank 128 \
|
|
||||||
--output_dir ../../../saves/LLaMA2-7B/galore/sft \
|
|
||||||
--overwrite_cache \
|
|
||||||
--overwrite_output_dir \
|
|
||||||
--cutoff_len 1024 \
|
|
||||||
--preprocessing_num_workers 16 \
|
|
||||||
--per_device_train_batch_size 1 \
|
|
||||||
--per_device_eval_batch_size 1 \
|
|
||||||
--gradient_accumulation_steps 1 \
|
|
||||||
--lr_scheduler_type cosine \
|
|
||||||
--logging_steps 10 \
|
|
||||||
--warmup_steps 20 \
|
|
||||||
--save_steps 100 \
|
|
||||||
--eval_steps 100 \
|
|
||||||
--evaluation_strategy steps \
|
|
||||||
--load_best_model_at_end \
|
|
||||||
--learning_rate 5e-5 \
|
|
||||||
--num_train_epochs 3.0 \
|
|
||||||
--max_samples 3000 \
|
|
||||||
--val_size 0.1 \
|
|
||||||
--plot_loss \
|
|
||||||
--pure_bf16
|
|
||||||
@@ -32,4 +32,4 @@ CUDA_VISIBLE_DEVICES=0 python ../../../src/train_bash.py \
|
|||||||
--max_samples 3000 \
|
--max_samples 3000 \
|
||||||
--val_size 0.1 \
|
--val_size 0.1 \
|
||||||
--plot_loss \
|
--plot_loss \
|
||||||
--fp16
|
--pure_bf16
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
```bash
|
```bash
|
||||||
pip install git+https://github.com/huggingface/transformers.git
|
pip install "transformers>=4.39.1"
|
||||||
pip install "accelerate>=0.28.0"
|
pip install "accelerate>=0.28.0"
|
||||||
pip install "bitsandbytes>=0.43.0"
|
pip install "bitsandbytes>=0.43.0"
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ torch>=1.13.1
|
|||||||
transformers>=4.37.2
|
transformers>=4.37.2
|
||||||
datasets>=2.14.3
|
datasets>=2.14.3
|
||||||
accelerate>=0.27.2
|
accelerate>=0.27.2
|
||||||
peft>=0.9.0
|
peft>=0.10.0
|
||||||
trl>=0.8.1
|
trl>=0.8.1
|
||||||
gradio>=3.38.0,<4.0.0
|
gradio>=3.38.0,<4.0.0
|
||||||
scipy
|
scipy
|
||||||
|
|||||||
@@ -7,5 +7,5 @@ from .train import export_model, run_exp
|
|||||||
from .webui import create_ui, create_web_demo
|
from .webui import create_ui, create_web_demo
|
||||||
|
|
||||||
|
|
||||||
__version__ = "0.6.0"
|
__version__ = "0.6.1"
|
||||||
__all__ = ["create_app", "ChatModel", "Evaluator", "export_model", "run_exp", "create_ui", "create_web_demo"]
|
__all__ = ["create_app", "ChatModel", "Evaluator", "export_model", "run_exp", "create_ui", "create_web_demo"]
|
||||||
|
|||||||
@@ -689,6 +689,8 @@ _register_template(
|
|||||||
|
|
||||||
_register_template(
|
_register_template(
|
||||||
name="vanilla",
|
name="vanilla",
|
||||||
|
format_separator=EmptyFormatter(slots=["\n"]),
|
||||||
|
efficient_eos=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,14 +1,10 @@
|
|||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import TYPE_CHECKING, Dict, List, Tuple
|
from typing import Dict, List, Sequence, Tuple
|
||||||
|
|
||||||
from ..data import Role
|
from ..data import Role
|
||||||
from ..extras.constants import CHOICES
|
from ..extras.constants import CHOICES
|
||||||
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
from datasets import Dataset
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class EvalTemplate:
|
class EvalTemplate:
|
||||||
system: str
|
system: str
|
||||||
@@ -16,22 +12,29 @@ class EvalTemplate:
|
|||||||
answer: str
|
answer: str
|
||||||
prefix: str
|
prefix: str
|
||||||
|
|
||||||
def parse_example(self, example: Dict[str, str]) -> Tuple[str, str]:
|
def _parse_example(self, example: Dict[str, str]) -> Tuple[str, str]:
|
||||||
|
r"""
|
||||||
|
input: a dict with keys {"question", "A", "B", "C", "D", "answer"}
|
||||||
|
output: a tuple of (prompt, response)
|
||||||
|
"""
|
||||||
candidates = [self.choice.format(choice=ch, content=example[ch]) for ch in CHOICES if ch in example]
|
candidates = [self.choice.format(choice=ch, content=example[ch]) for ch in CHOICES if ch in example]
|
||||||
return "".join([example["question"]] + candidates + [self.answer]), example["answer"]
|
return "".join([example["question"]] + candidates + [self.answer]), example["answer"]
|
||||||
|
|
||||||
def format_example(
|
def format_example(
|
||||||
self, target_data: Dict[str, str], support_set: "Dataset", subject_name: str
|
self, target_data: Dict[str, str], support_set: Sequence[Dict[str, str]], subject_name: str
|
||||||
) -> List[Dict[str, str]]:
|
) -> List[Dict[str, str]]:
|
||||||
|
r"""
|
||||||
|
Converts dataset examples to messages.
|
||||||
|
"""
|
||||||
messages = []
|
messages = []
|
||||||
for k in range(len(support_set)):
|
for k in range(len(support_set)):
|
||||||
prompt, response = self.parse_example(support_set[k])
|
prompt, response = self._parse_example(support_set[k])
|
||||||
messages.append({"role": Role.USER, "content": prompt})
|
messages.append({"role": Role.USER.value, "content": prompt})
|
||||||
messages.append({"role": Role.ASSISTANT, "content": response})
|
messages.append({"role": Role.ASSISTANT.value, "content": response})
|
||||||
|
|
||||||
prompt, response = self.parse_example(target_data)
|
prompt, response = self._parse_example(target_data)
|
||||||
messages.append({"role": Role.USER, "content": prompt})
|
messages.append({"role": Role.USER.value, "content": prompt})
|
||||||
messages.append({"role": Role.ASSISTANT, "content": response})
|
messages.append({"role": Role.ASSISTANT.value, "content": response})
|
||||||
messages[0]["content"] = self.system.format(subject=subject_name) + messages[0]["content"]
|
messages[0]["content"] = self.system.format(subject=subject_name) + messages[0]["content"]
|
||||||
return messages
|
return messages
|
||||||
|
|
||||||
@@ -39,7 +42,7 @@ class EvalTemplate:
|
|||||||
eval_templates: Dict[str, "EvalTemplate"] = {}
|
eval_templates: Dict[str, "EvalTemplate"] = {}
|
||||||
|
|
||||||
|
|
||||||
def register_eval_template(name: str, system: str, choice: str, answer: str, prefix: str) -> None:
|
def _register_eval_template(name: str, system: str, choice: str, answer: str, prefix: str) -> None:
|
||||||
eval_templates[name] = EvalTemplate(system=system, choice=choice, answer=answer, prefix=prefix)
|
eval_templates[name] = EvalTemplate(system=system, choice=choice, answer=answer, prefix=prefix)
|
||||||
|
|
||||||
|
|
||||||
@@ -49,7 +52,7 @@ def get_eval_template(name: str) -> "EvalTemplate":
|
|||||||
return eval_template
|
return eval_template
|
||||||
|
|
||||||
|
|
||||||
register_eval_template(
|
_register_eval_template(
|
||||||
name="en",
|
name="en",
|
||||||
system="The following are multiple choice questions (with answers) about {subject}.\n\n",
|
system="The following are multiple choice questions (with answers) about {subject}.\n\n",
|
||||||
choice="\n{choice}. {content}",
|
choice="\n{choice}. {content}",
|
||||||
@@ -58,10 +61,10 @@ register_eval_template(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
register_eval_template(
|
_register_eval_template(
|
||||||
name="zh",
|
name="zh",
|
||||||
system="以下是中国关于{subject}考试的单项选择题,请选出其中的正确答案。\n\n",
|
system="以下是中国关于{subject}考试的单项选择题,请选出其中的正确答案。\n\n",
|
||||||
choice="\n{choice}. {content}",
|
choice="\n{choice}. {content}",
|
||||||
answer="\n答案:",
|
answer="\n答案:",
|
||||||
prefix="\n",
|
prefix=" ",
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -58,9 +58,17 @@ class LogCallback(TrainerCallback):
|
|||||||
self.in_training = True
|
self.in_training = True
|
||||||
self.start_time = time.time()
|
self.start_time = time.time()
|
||||||
self.max_steps = state.max_steps
|
self.max_steps = state.max_steps
|
||||||
if os.path.exists(os.path.join(args.output_dir, LOG_FILE_NAME)) and args.overwrite_output_dir:
|
|
||||||
logger.warning("Previous log file in this folder will be deleted.")
|
if args.save_on_each_node:
|
||||||
os.remove(os.path.join(args.output_dir, LOG_FILE_NAME))
|
if not state.is_local_process_zero:
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
if not state.is_world_process_zero:
|
||||||
|
return
|
||||||
|
|
||||||
|
if os.path.exists(os.path.join(args.output_dir, LOG_FILE_NAME)) and args.overwrite_output_dir:
|
||||||
|
logger.warning("Previous log file in this folder will be deleted.")
|
||||||
|
os.remove(os.path.join(args.output_dir, LOG_FILE_NAME))
|
||||||
|
|
||||||
def on_train_end(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
|
def on_train_end(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
|
||||||
r"""
|
r"""
|
||||||
@@ -112,8 +120,12 @@ class LogCallback(TrainerCallback):
|
|||||||
r"""
|
r"""
|
||||||
Event called after logging the last logs.
|
Event called after logging the last logs.
|
||||||
"""
|
"""
|
||||||
if not state.is_local_process_zero:
|
if args.save_on_each_node:
|
||||||
return
|
if not state.is_local_process_zero:
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
if not state.is_world_process_zero:
|
||||||
|
return
|
||||||
|
|
||||||
logs = dict(
|
logs = dict(
|
||||||
current_steps=self.cur_steps,
|
current_steps=self.cur_steps,
|
||||||
|
|||||||
@@ -64,7 +64,7 @@ def check_dependencies() -> None:
|
|||||||
require_version("transformers>=4.37.2", "To fix: pip install transformers>=4.37.2")
|
require_version("transformers>=4.37.2", "To fix: pip install transformers>=4.37.2")
|
||||||
require_version("datasets>=2.14.3", "To fix: pip install datasets>=2.14.3")
|
require_version("datasets>=2.14.3", "To fix: pip install datasets>=2.14.3")
|
||||||
require_version("accelerate>=0.27.2", "To fix: pip install accelerate>=0.27.2")
|
require_version("accelerate>=0.27.2", "To fix: pip install accelerate>=0.27.2")
|
||||||
require_version("peft>=0.9.0", "To fix: pip install peft>=0.9.0")
|
require_version("peft>=0.10.0", "To fix: pip install peft>=0.10.0")
|
||||||
require_version("trl>=0.8.1", "To fix: pip install trl>=0.8.1")
|
require_version("trl>=0.8.1", "To fix: pip install trl>=0.8.1")
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -102,6 +102,10 @@ class RLHFArguments:
|
|||||||
default="sigmoid",
|
default="sigmoid",
|
||||||
metadata={"help": "The type of DPO loss to use."},
|
metadata={"help": "The type of DPO loss to use."},
|
||||||
)
|
)
|
||||||
|
dpo_label_smoothing: float = field(
|
||||||
|
default=0.0,
|
||||||
|
metadata={"help": "The robust DPO label smoothing parameter in cDPO that should be between 0 and 0.5."},
|
||||||
|
)
|
||||||
dpo_ftx: float = field(
|
dpo_ftx: float = field(
|
||||||
default=0.0,
|
default=0.0,
|
||||||
metadata={"help": "The supervised fine-tuning loss coefficient in DPO training."},
|
metadata={"help": "The supervised fine-tuning loss coefficient in DPO training."},
|
||||||
@@ -114,10 +118,6 @@ class RLHFArguments:
|
|||||||
default=4,
|
default=4,
|
||||||
metadata={"help": "The number of epochs to perform in a PPO optimization step."},
|
metadata={"help": "The number of epochs to perform in a PPO optimization step."},
|
||||||
)
|
)
|
||||||
ppo_logger: Optional[str] = field(
|
|
||||||
default=None,
|
|
||||||
metadata={"help": 'Log with either "wandb" or "tensorboard" in PPO training.'},
|
|
||||||
)
|
|
||||||
ppo_score_norm: bool = field(
|
ppo_score_norm: bool = field(
|
||||||
default=False,
|
default=False,
|
||||||
metadata={"help": "Use score normalization in PPO training."},
|
metadata={"help": "Use score normalization in PPO training."},
|
||||||
@@ -248,6 +248,9 @@ class FinetuningArguments(FreezeArguments, LoraArguments, RLHFArguments, GaloreA
|
|||||||
if self.stage == "ppo" and self.reward_model_type == "lora" and self.finetuning_type != "lora":
|
if self.stage == "ppo" and self.reward_model_type == "lora" and self.finetuning_type != "lora":
|
||||||
raise ValueError("`reward_model_type` cannot be lora for Freeze/Full PPO training.")
|
raise ValueError("`reward_model_type` cannot be lora for Freeze/Full PPO training.")
|
||||||
|
|
||||||
|
if self.stage == "dpo" and self.dpo_loss != "sigmoid" and self.dpo_label_smoothing > 1e-6:
|
||||||
|
raise ValueError("`dpo_label_smoothing` is only valid for sigmoid loss function.")
|
||||||
|
|
||||||
if self.use_llama_pro and self.finetuning_type == "full":
|
if self.use_llama_pro and self.finetuning_type == "full":
|
||||||
raise ValueError("`use_llama_pro` is only valid for the Freeze or LoRA method.")
|
raise ValueError("`use_llama_pro` is only valid for the Freeze or LoRA method.")
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ import transformers
|
|||||||
from transformers import HfArgumentParser, Seq2SeqTrainingArguments
|
from transformers import HfArgumentParser, Seq2SeqTrainingArguments
|
||||||
from transformers.trainer_utils import get_last_checkpoint
|
from transformers.trainer_utils import get_last_checkpoint
|
||||||
from transformers.utils import is_torch_bf16_gpu_available
|
from transformers.utils import is_torch_bf16_gpu_available
|
||||||
from transformers.utils.versions import require_version
|
|
||||||
|
|
||||||
from ..extras.logging import get_logger
|
from ..extras.logging import get_logger
|
||||||
from ..extras.misc import check_dependencies
|
from ..extras.misc import check_dependencies
|
||||||
@@ -119,6 +118,13 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS:
|
|||||||
if finetuning_args.stage == "ppo" and finetuning_args.reward_model_type == "lora" and model_args.use_unsloth:
|
if finetuning_args.stage == "ppo" and finetuning_args.reward_model_type == "lora" and model_args.use_unsloth:
|
||||||
raise ValueError("Unsloth does not support lora reward model.")
|
raise ValueError("Unsloth does not support lora reward model.")
|
||||||
|
|
||||||
|
if (
|
||||||
|
finetuning_args.stage == "ppo"
|
||||||
|
and training_args.report_to is not None
|
||||||
|
and training_args.report_to[0] not in ["wandb", "tensorboard"]
|
||||||
|
):
|
||||||
|
raise ValueError("PPO only accepts wandb or tensorboard logger.")
|
||||||
|
|
||||||
if training_args.max_steps == -1 and data_args.streaming:
|
if training_args.max_steps == -1 and data_args.streaming:
|
||||||
raise ValueError("Please specify `max_steps` in streaming mode.")
|
raise ValueError("Please specify `max_steps` in streaming mode.")
|
||||||
|
|
||||||
@@ -128,12 +134,8 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS:
|
|||||||
if training_args.do_train and model_args.use_unsloth and not is_unsloth_available():
|
if training_args.do_train and model_args.use_unsloth and not is_unsloth_available():
|
||||||
raise ValueError("Unsloth was not installed: https://github.com/unslothai/unsloth")
|
raise ValueError("Unsloth was not installed: https://github.com/unslothai/unsloth")
|
||||||
|
|
||||||
if finetuning_args.use_dora:
|
if finetuning_args.use_dora and model_args.use_unsloth:
|
||||||
if model_args.quantization_bit is not None:
|
raise ValueError("Unsloth does not support DoRA.")
|
||||||
require_version("peft>=0.10.0", "To fix: pip install peft>=0.10.0")
|
|
||||||
|
|
||||||
if model_args.use_unsloth:
|
|
||||||
raise ValueError("Unsloth does not support DoRA.")
|
|
||||||
|
|
||||||
if finetuning_args.pure_bf16:
|
if finetuning_args.pure_bf16:
|
||||||
if not is_torch_bf16_gpu_available():
|
if not is_torch_bf16_gpu_available():
|
||||||
|
|||||||
@@ -173,7 +173,7 @@ def _configure_quantization(
|
|||||||
"""
|
"""
|
||||||
if getattr(config, "quantization_config", None): # ptq
|
if getattr(config, "quantization_config", None): # ptq
|
||||||
if is_deepspeed_zero3_enabled():
|
if is_deepspeed_zero3_enabled():
|
||||||
raise ValueError("DeepSpeed ZeRO-3 is incompatible with quantization.")
|
raise ValueError("DeepSpeed ZeRO-3 is incompatible with quantized models.")
|
||||||
|
|
||||||
init_kwargs["device_map"] = {"": get_current_device()}
|
init_kwargs["device_map"] = {"": get_current_device()}
|
||||||
quantization_config: Dict[str, Any] = getattr(config, "quantization_config", None)
|
quantization_config: Dict[str, Any] = getattr(config, "quantization_config", None)
|
||||||
|
|||||||
@@ -47,6 +47,8 @@ def find_all_linear_modules(model: "PreTrainedModel") -> List[str]:
|
|||||||
output_layer_names = ["lm_head"]
|
output_layer_names = ["lm_head"]
|
||||||
if model.config.model_type == "chatglm":
|
if model.config.model_type == "chatglm":
|
||||||
output_layer_names.append("output_layer")
|
output_layer_names.append("output_layer")
|
||||||
|
elif model.config.model_type == "internlm2":
|
||||||
|
output_layer_names.append("output")
|
||||||
|
|
||||||
module_names = set()
|
module_names = set()
|
||||||
for name, module in model.named_modules():
|
for name, module in model.named_modules():
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ from trl import DPOTrainer
|
|||||||
from trl.trainer.utils import disable_dropout_in_model
|
from trl.trainer.utils import disable_dropout_in_model
|
||||||
|
|
||||||
from ...extras.constants import IGNORE_INDEX
|
from ...extras.constants import IGNORE_INDEX
|
||||||
from ..utils import create_custom_optimzer
|
from ..utils import create_custom_optimzer, create_custom_scheduler
|
||||||
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
@@ -20,12 +20,9 @@ if TYPE_CHECKING:
|
|||||||
class CustomDPOTrainer(DPOTrainer):
|
class CustomDPOTrainer(DPOTrainer):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
beta: float,
|
|
||||||
loss_type: Literal["sigmoid", "hinge", "ipo", "kto_pair"],
|
|
||||||
ftx_gamma: float,
|
|
||||||
model: Union["PreTrainedModel", torch.nn.Module],
|
model: Union["PreTrainedModel", torch.nn.Module],
|
||||||
|
ref_model: Optional[Union["PreTrainedModel", torch.nn.Module]],
|
||||||
finetuning_args: "FinetuningArguments",
|
finetuning_args: "FinetuningArguments",
|
||||||
ref_model: Optional[Union["PreTrainedModel", torch.nn.Module]] = None,
|
|
||||||
disable_dropout: bool = True,
|
disable_dropout: bool = True,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
@@ -47,10 +44,10 @@ class CustomDPOTrainer(DPOTrainer):
|
|||||||
self._peft_has_been_casted_to_bf16 = False
|
self._peft_has_been_casted_to_bf16 = False
|
||||||
|
|
||||||
self.ref_model = ref_model
|
self.ref_model = ref_model
|
||||||
self.beta = beta
|
self.beta = finetuning_args.dpo_beta
|
||||||
self.label_smoothing = 0
|
self.label_smoothing = finetuning_args.dpo_label_smoothing
|
||||||
self.loss_type = loss_type
|
self.loss_type = finetuning_args.dpo_loss
|
||||||
self.ftx_gamma = ftx_gamma
|
self.ftx_gamma = finetuning_args.dpo_ftx
|
||||||
self._stored_metrics = defaultdict(lambda: defaultdict(list))
|
self._stored_metrics = defaultdict(lambda: defaultdict(list))
|
||||||
|
|
||||||
Trainer.__init__(self, model=model, **kwargs)
|
Trainer.__init__(self, model=model, **kwargs)
|
||||||
@@ -66,12 +63,16 @@ class CustomDPOTrainer(DPOTrainer):
|
|||||||
else:
|
else:
|
||||||
self.ref_model = self.accelerator.prepare_model(self.ref_model, evaluation_mode=True)
|
self.ref_model = self.accelerator.prepare_model(self.ref_model, evaluation_mode=True)
|
||||||
|
|
||||||
def create_optimizer_and_scheduler(self, num_training_steps: int) -> None:
|
def create_optimizer(self) -> "torch.optim.Optimizer":
|
||||||
self.optimizer = create_custom_optimzer(self.model, self.args, self.finetuning_args, num_training_steps)
|
|
||||||
if self.optimizer is None:
|
if self.optimizer is None:
|
||||||
self.create_optimizer()
|
self.optimizer = create_custom_optimzer(self.model, self.args, self.finetuning_args)
|
||||||
|
return super().create_optimizer()
|
||||||
|
|
||||||
self.create_scheduler(num_training_steps=num_training_steps, optimizer=self.optimizer)
|
def create_scheduler(
|
||||||
|
self, num_training_steps: int, optimizer: Optional["torch.optim.Optimizer"] = None
|
||||||
|
) -> "torch.optim.lr_scheduler.LRScheduler":
|
||||||
|
create_custom_scheduler(self.args, num_training_steps, optimizer)
|
||||||
|
return super().create_scheduler(num_training_steps, optimizer)
|
||||||
|
|
||||||
def sft_loss(self, chosen_logits: torch.FloatTensor, chosen_labels: torch.LongTensor) -> torch.Tensor:
|
def sft_loss(self, chosen_logits: torch.FloatTensor, chosen_labels: torch.LongTensor) -> torch.Tensor:
|
||||||
r"""
|
r"""
|
||||||
|
|||||||
@@ -28,6 +28,7 @@ def run_dpo(
|
|||||||
tokenizer = load_tokenizer(model_args)
|
tokenizer = load_tokenizer(model_args)
|
||||||
dataset = get_dataset(tokenizer, model_args, data_args, training_args, stage="rm")
|
dataset = get_dataset(tokenizer, model_args, data_args, training_args, stage="rm")
|
||||||
model = load_model(tokenizer, model_args, finetuning_args, training_args.do_train)
|
model = load_model(tokenizer, model_args, finetuning_args, training_args.do_train)
|
||||||
|
|
||||||
data_collator = DPODataCollatorWithPadding(
|
data_collator = DPODataCollatorWithPadding(
|
||||||
tokenizer=tokenizer,
|
tokenizer=tokenizer,
|
||||||
pad_to_multiple_of=8,
|
pad_to_multiple_of=8,
|
||||||
@@ -45,13 +46,10 @@ def run_dpo(
|
|||||||
|
|
||||||
# Initialize our Trainer
|
# Initialize our Trainer
|
||||||
trainer = CustomDPOTrainer(
|
trainer = CustomDPOTrainer(
|
||||||
beta=finetuning_args.dpo_beta,
|
|
||||||
loss_type=finetuning_args.dpo_loss,
|
|
||||||
ftx_gamma=finetuning_args.dpo_ftx,
|
|
||||||
finetuning_args=finetuning_args,
|
|
||||||
model=model,
|
model=model,
|
||||||
ref_model=ref_model,
|
ref_model=ref_model,
|
||||||
args=training_args,
|
args=training_args,
|
||||||
|
finetuning_args=finetuning_args,
|
||||||
tokenizer=tokenizer,
|
tokenizer=tokenizer,
|
||||||
data_collator=data_collator,
|
data_collator=data_collator,
|
||||||
callbacks=callbacks,
|
callbacks=callbacks,
|
||||||
|
|||||||
@@ -6,20 +6,23 @@ from typing import TYPE_CHECKING, Dict, List, Optional, Tuple
|
|||||||
import torch
|
import torch
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
from transformers import GenerationConfig, Trainer, TrainerControl, TrainerState
|
from transformers import GenerationConfig, Trainer, TrainerControl, TrainerState
|
||||||
|
from transformers.optimization import get_scheduler
|
||||||
from transformers.trainer_pt_utils import remove_dummy_checkpoint
|
from transformers.trainer_pt_utils import remove_dummy_checkpoint
|
||||||
from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
|
from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
|
||||||
from transformers.utils import SAFE_WEIGHTS_NAME, WEIGHTS_NAME
|
from transformers.utils import SAFE_WEIGHTS_NAME, WEIGHTS_NAME
|
||||||
from trl import PPOTrainer
|
from trl import PPOConfig, PPOTrainer
|
||||||
from trl.core import PPODecorators, logprobs_from_logits
|
from trl.core import PPODecorators, logprobs_from_logits
|
||||||
|
|
||||||
from ...extras.callbacks import FixValueHeadModelCallback, LogCallback
|
from ...extras.callbacks import FixValueHeadModelCallback, LogCallback
|
||||||
from ...extras.logging import get_logger
|
from ...extras.logging import get_logger
|
||||||
from ...extras.misc import AverageMeter, count_parameters, get_current_device, get_logits_processor
|
from ...extras.misc import AverageMeter, count_parameters, get_current_device, get_logits_processor
|
||||||
|
from ..utils import create_custom_optimzer, create_custom_scheduler
|
||||||
from .utils import dump_layernorm, get_rewards_from_server, replace_model, restore_layernorm
|
from .utils import dump_layernorm, get_rewards_from_server, replace_model, restore_layernorm
|
||||||
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from transformers import Seq2SeqTrainingArguments, TrainerCallback
|
from datasets import Dataset
|
||||||
|
from transformers import DataCollatorWithPadding, PreTrainedTokenizer, Seq2SeqTrainingArguments, TrainerCallback
|
||||||
from trl import AutoModelForCausalLMWithValueHead
|
from trl import AutoModelForCausalLMWithValueHead
|
||||||
|
|
||||||
from ...hparams import FinetuningArguments, GeneratingArguments, ModelArguments
|
from ...hparams import FinetuningArguments, GeneratingArguments, ModelArguments
|
||||||
@@ -40,10 +43,53 @@ class CustomPPOTrainer(PPOTrainer, Trainer):
|
|||||||
finetuning_args: "FinetuningArguments",
|
finetuning_args: "FinetuningArguments",
|
||||||
generating_args: "GeneratingArguments",
|
generating_args: "GeneratingArguments",
|
||||||
callbacks: List["TrainerCallback"],
|
callbacks: List["TrainerCallback"],
|
||||||
reward_model: "AutoModelForCausalLMWithValueHead",
|
model: "AutoModelForCausalLMWithValueHead",
|
||||||
**kwargs,
|
reward_model: Optional["AutoModelForCausalLMWithValueHead"],
|
||||||
|
ref_model: Optional["AutoModelForCausalLMWithValueHead"],
|
||||||
|
tokenizer: "PreTrainedTokenizer",
|
||||||
|
dataset: "Dataset",
|
||||||
|
data_collator: "DataCollatorWithPadding",
|
||||||
):
|
):
|
||||||
PPOTrainer.__init__(self, **kwargs)
|
backward_batch_size = training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps
|
||||||
|
ppo_config = PPOConfig(
|
||||||
|
model_name=model_args.model_name_or_path,
|
||||||
|
learning_rate=training_args.learning_rate,
|
||||||
|
mini_batch_size=training_args.per_device_train_batch_size,
|
||||||
|
batch_size=backward_batch_size * finetuning_args.ppo_buffer_size,
|
||||||
|
gradient_accumulation_steps=training_args.gradient_accumulation_steps,
|
||||||
|
ppo_epochs=finetuning_args.ppo_epochs,
|
||||||
|
max_grad_norm=training_args.max_grad_norm,
|
||||||
|
seed=training_args.seed,
|
||||||
|
optimize_device_cache=True,
|
||||||
|
target=finetuning_args.ppo_target,
|
||||||
|
use_score_scaling=finetuning_args.ppo_score_norm,
|
||||||
|
use_score_norm=finetuning_args.ppo_score_norm,
|
||||||
|
whiten_rewards=finetuning_args.ppo_whiten_rewards,
|
||||||
|
accelerator_kwargs={"step_scheduler_with_optimizer": False},
|
||||||
|
log_with=training_args.report_to[0] if training_args.report_to is not None else None,
|
||||||
|
project_kwargs={"logging_dir": training_args.logging_dir},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create optimizer and scheduler
|
||||||
|
if training_args.max_steps > 0:
|
||||||
|
num_training_steps = training_args.max_steps
|
||||||
|
else:
|
||||||
|
total_train_batch_size = backward_batch_size * finetuning_args.ppo_buffer_size * training_args.world_size
|
||||||
|
num_training_steps = training_args.num_train_epochs * math.ceil(len(dataset) / total_train_batch_size)
|
||||||
|
|
||||||
|
optimizer = self.create_optimizer(model, training_args, finetuning_args)
|
||||||
|
scheduler = self.create_scheduler(training_args, num_training_steps, optimizer)
|
||||||
|
|
||||||
|
PPOTrainer.__init__(
|
||||||
|
self,
|
||||||
|
config=ppo_config,
|
||||||
|
model=model,
|
||||||
|
ref_model=ref_model,
|
||||||
|
tokenizer=tokenizer,
|
||||||
|
dataset=dataset,
|
||||||
|
data_collator=data_collator,
|
||||||
|
lr_scheduler=scheduler,
|
||||||
|
)
|
||||||
|
|
||||||
self.args = training_args
|
self.args = training_args
|
||||||
self.model_args = model_args
|
self.model_args = model_args
|
||||||
@@ -205,6 +251,44 @@ class CustomPPOTrainer(PPOTrainer, Trainer):
|
|||||||
self.args, self.state, self.control, model=self.accelerator.unwrap_model(self.model)
|
self.args, self.state, self.control, model=self.accelerator.unwrap_model(self.model)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def create_optimizer(
|
||||||
|
self,
|
||||||
|
model: "AutoModelForCausalLMWithValueHead",
|
||||||
|
training_args: "Seq2SeqTrainingArguments",
|
||||||
|
finetuning_args: "FinetuningArguments",
|
||||||
|
) -> "torch.optim.Optimizer":
|
||||||
|
optimizer = create_custom_optimzer(model, training_args, finetuning_args)
|
||||||
|
if optimizer is None:
|
||||||
|
decay_params, nodecay_params = [], []
|
||||||
|
decay_param_names = self.get_decay_parameter_names(model)
|
||||||
|
for name, param in model.named_parameters():
|
||||||
|
if param.requires_grad:
|
||||||
|
if name in decay_param_names:
|
||||||
|
decay_params.append(param)
|
||||||
|
else:
|
||||||
|
nodecay_params.append(param)
|
||||||
|
|
||||||
|
optim_class, optim_kwargs = Trainer.get_optimizer_cls_and_kwargs(training_args)
|
||||||
|
param_groups = [
|
||||||
|
dict(params=nodecay_params),
|
||||||
|
dict(params=decay_params, weight_decay=training_args.weight_decay),
|
||||||
|
]
|
||||||
|
optimizer = optim_class(param_groups, **optim_kwargs)
|
||||||
|
|
||||||
|
return optimizer
|
||||||
|
|
||||||
|
def create_scheduler(
|
||||||
|
self, training_args: "Seq2SeqTrainingArguments", num_training_steps: int, optimizer: "torch.optim.Optimizer"
|
||||||
|
) -> "torch.optim.lr_scheduler.LRScheduler":
|
||||||
|
create_custom_scheduler(training_args, num_training_steps, optimizer)
|
||||||
|
lr_scheduler = get_scheduler(
|
||||||
|
training_args.lr_scheduler_type,
|
||||||
|
optimizer=optimizer,
|
||||||
|
num_warmup_steps=training_args.get_warmup_steps(num_training_steps),
|
||||||
|
num_training_steps=num_training_steps,
|
||||||
|
)
|
||||||
|
return lr_scheduler
|
||||||
|
|
||||||
@torch.no_grad()
|
@torch.no_grad()
|
||||||
def get_inputs(self, batch: Dict[str, torch.Tensor]) -> Tuple[List[torch.Tensor], List[torch.Tensor]]:
|
def get_inputs(self, batch: Dict[str, torch.Tensor]) -> Tuple[List[torch.Tensor], List[torch.Tensor]]:
|
||||||
r"""
|
r"""
|
||||||
|
|||||||
@@ -1,19 +1,15 @@
|
|||||||
# Inspired by: https://github.com/lvwerra/trl/blob/main/examples/research_projects/stack_llama/scripts/rl_training.py
|
# Inspired by: https://github.com/lvwerra/trl/blob/main/examples/research_projects/stack_llama/scripts/rl_training.py
|
||||||
|
|
||||||
import math
|
|
||||||
from typing import TYPE_CHECKING, List, Optional
|
from typing import TYPE_CHECKING, List, Optional
|
||||||
|
|
||||||
from torch.optim import AdamW
|
|
||||||
from transformers import DataCollatorWithPadding
|
from transformers import DataCollatorWithPadding
|
||||||
from transformers.optimization import get_scheduler
|
|
||||||
from trl import PPOConfig
|
|
||||||
|
|
||||||
from ...data import get_dataset
|
from ...data import get_dataset
|
||||||
from ...extras.callbacks import FixValueHeadModelCallback
|
from ...extras.callbacks import FixValueHeadModelCallback
|
||||||
from ...extras.misc import fix_valuehead_checkpoint
|
from ...extras.misc import fix_valuehead_checkpoint
|
||||||
from ...extras.ploting import plot_loss
|
from ...extras.ploting import plot_loss
|
||||||
from ...model import load_model, load_tokenizer
|
from ...model import load_model, load_tokenizer
|
||||||
from ..utils import create_custom_optimzer, create_ref_model, create_reward_model
|
from ..utils import create_ref_model, create_reward_model
|
||||||
from .trainer import CustomPPOTrainer
|
from .trainer import CustomPPOTrainer
|
||||||
|
|
||||||
|
|
||||||
@@ -42,45 +38,6 @@ def run_ppo(
|
|||||||
ref_model = create_ref_model(model_args, finetuning_args, add_valuehead=True)
|
ref_model = create_ref_model(model_args, finetuning_args, add_valuehead=True)
|
||||||
reward_model = create_reward_model(model, model_args, finetuning_args)
|
reward_model = create_reward_model(model, model_args, finetuning_args)
|
||||||
|
|
||||||
# Create ppo config
|
|
||||||
backward_batch_size = training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps
|
|
||||||
ppo_config = PPOConfig(
|
|
||||||
model_name=model_args.model_name_or_path,
|
|
||||||
learning_rate=training_args.learning_rate,
|
|
||||||
mini_batch_size=training_args.per_device_train_batch_size,
|
|
||||||
batch_size=backward_batch_size * finetuning_args.ppo_buffer_size,
|
|
||||||
gradient_accumulation_steps=training_args.gradient_accumulation_steps,
|
|
||||||
ppo_epochs=finetuning_args.ppo_epochs,
|
|
||||||
max_grad_norm=training_args.max_grad_norm,
|
|
||||||
seed=training_args.seed,
|
|
||||||
optimize_device_cache=True,
|
|
||||||
target=finetuning_args.ppo_target,
|
|
||||||
log_with=finetuning_args.ppo_logger,
|
|
||||||
use_score_scaling=finetuning_args.ppo_score_norm,
|
|
||||||
use_score_norm=finetuning_args.ppo_score_norm,
|
|
||||||
whiten_rewards=finetuning_args.ppo_whiten_rewards,
|
|
||||||
accelerator_kwargs={"step_scheduler_with_optimizer": False},
|
|
||||||
project_kwargs={"logging_dir": training_args.logging_dir},
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create optimizer and scheduler
|
|
||||||
if training_args.max_steps > 0:
|
|
||||||
num_training_steps = training_args.max_steps
|
|
||||||
else:
|
|
||||||
total_train_batch_size = backward_batch_size * finetuning_args.ppo_buffer_size * training_args.world_size
|
|
||||||
num_training_steps = training_args.num_train_epochs * math.ceil(len(dataset) / total_train_batch_size)
|
|
||||||
|
|
||||||
optimizer = create_custom_optimzer(model, training_args, finetuning_args, num_training_steps)
|
|
||||||
if optimizer is None:
|
|
||||||
optimizer = AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=training_args.learning_rate)
|
|
||||||
|
|
||||||
lr_scheduler = get_scheduler(
|
|
||||||
training_args.lr_scheduler_type,
|
|
||||||
optimizer=optimizer,
|
|
||||||
num_warmup_steps=training_args.get_warmup_steps(num_training_steps),
|
|
||||||
num_training_steps=num_training_steps,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Initialize our Trainer
|
# Initialize our Trainer
|
||||||
ppo_trainer = CustomPPOTrainer(
|
ppo_trainer = CustomPPOTrainer(
|
||||||
model_args=model_args,
|
model_args=model_args,
|
||||||
@@ -88,15 +45,12 @@ def run_ppo(
|
|||||||
finetuning_args=finetuning_args,
|
finetuning_args=finetuning_args,
|
||||||
generating_args=generating_args,
|
generating_args=generating_args,
|
||||||
callbacks=callbacks + [FixValueHeadModelCallback()],
|
callbacks=callbacks + [FixValueHeadModelCallback()],
|
||||||
reward_model=reward_model,
|
|
||||||
config=ppo_config,
|
|
||||||
model=model,
|
model=model,
|
||||||
|
reward_model=reward_model,
|
||||||
ref_model=ref_model,
|
ref_model=ref_model,
|
||||||
tokenizer=tokenizer,
|
tokenizer=tokenizer,
|
||||||
dataset=dataset,
|
dataset=dataset,
|
||||||
data_collator=data_collator,
|
data_collator=data_collator,
|
||||||
optimizer=optimizer,
|
|
||||||
lr_scheduler=lr_scheduler,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Training
|
# Training
|
||||||
|
|||||||
@@ -1,12 +1,14 @@
|
|||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING, Optional
|
||||||
|
|
||||||
from transformers import Trainer
|
from transformers import Trainer
|
||||||
|
|
||||||
from ...extras.logging import get_logger
|
from ...extras.logging import get_logger
|
||||||
from ..utils import create_custom_optimzer
|
from ..utils import create_custom_optimzer, create_custom_scheduler
|
||||||
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
|
import torch
|
||||||
|
|
||||||
from ...hparams import FinetuningArguments
|
from ...hparams import FinetuningArguments
|
||||||
|
|
||||||
|
|
||||||
@@ -22,9 +24,13 @@ class CustomTrainer(Trainer):
|
|||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
self.finetuning_args = finetuning_args
|
self.finetuning_args = finetuning_args
|
||||||
|
|
||||||
def create_optimizer_and_scheduler(self, num_training_steps: int) -> None:
|
def create_optimizer(self) -> "torch.optim.Optimizer":
|
||||||
self.optimizer = create_custom_optimzer(self.model, self.args, self.finetuning_args, num_training_steps)
|
|
||||||
if self.optimizer is None:
|
if self.optimizer is None:
|
||||||
self.create_optimizer()
|
self.optimizer = create_custom_optimzer(self.model, self.args, self.finetuning_args)
|
||||||
|
return super().create_optimizer()
|
||||||
|
|
||||||
self.create_scheduler(num_training_steps=num_training_steps, optimizer=self.optimizer)
|
def create_scheduler(
|
||||||
|
self, num_training_steps: int, optimizer: Optional["torch.optim.Optimizer"] = None
|
||||||
|
) -> "torch.optim.lr_scheduler.LRScheduler":
|
||||||
|
create_custom_scheduler(self.args, num_training_steps, optimizer)
|
||||||
|
return super().create_scheduler(num_training_steps, optimizer)
|
||||||
|
|||||||
@@ -1,12 +1,12 @@
|
|||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
from typing import TYPE_CHECKING, Dict, List, Tuple, Union
|
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from transformers import Trainer
|
from transformers import Trainer
|
||||||
|
|
||||||
from ...extras.logging import get_logger
|
from ...extras.logging import get_logger
|
||||||
from ..utils import create_custom_optimzer
|
from ..utils import create_custom_optimzer, create_custom_scheduler
|
||||||
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
@@ -29,12 +29,16 @@ class PairwiseTrainer(Trainer):
|
|||||||
self.finetuning_args = finetuning_args
|
self.finetuning_args = finetuning_args
|
||||||
self.can_return_loss = True # override property to return eval_loss
|
self.can_return_loss = True # override property to return eval_loss
|
||||||
|
|
||||||
def create_optimizer_and_scheduler(self, num_training_steps: int) -> None:
|
def create_optimizer(self) -> "torch.optim.Optimizer":
|
||||||
self.optimizer = create_custom_optimzer(self.model, self.args, self.finetuning_args, num_training_steps)
|
|
||||||
if self.optimizer is None:
|
if self.optimizer is None:
|
||||||
self.create_optimizer()
|
self.optimizer = create_custom_optimzer(self.model, self.args, self.finetuning_args)
|
||||||
|
return super().create_optimizer()
|
||||||
|
|
||||||
self.create_scheduler(num_training_steps=num_training_steps, optimizer=self.optimizer)
|
def create_scheduler(
|
||||||
|
self, num_training_steps: int, optimizer: Optional["torch.optim.Optimizer"] = None
|
||||||
|
) -> "torch.optim.lr_scheduler.LRScheduler":
|
||||||
|
create_custom_scheduler(self.args, num_training_steps, optimizer)
|
||||||
|
return super().create_scheduler(num_training_steps, optimizer)
|
||||||
|
|
||||||
def compute_loss(
|
def compute_loss(
|
||||||
self, model: "PreTrainedModel", inputs: Dict[str, torch.Tensor], return_outputs: bool = False
|
self, model: "PreTrainedModel", inputs: Dict[str, torch.Tensor], return_outputs: bool = False
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ from transformers import Seq2SeqTrainer
|
|||||||
|
|
||||||
from ...extras.constants import IGNORE_INDEX
|
from ...extras.constants import IGNORE_INDEX
|
||||||
from ...extras.logging import get_logger
|
from ...extras.logging import get_logger
|
||||||
from ..utils import create_custom_optimzer
|
from ..utils import create_custom_optimzer, create_custom_scheduler
|
||||||
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
@@ -29,12 +29,16 @@ class CustomSeq2SeqTrainer(Seq2SeqTrainer):
|
|||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
self.finetuning_args = finetuning_args
|
self.finetuning_args = finetuning_args
|
||||||
|
|
||||||
def create_optimizer_and_scheduler(self, num_training_steps: int) -> None:
|
def create_optimizer(self) -> "torch.optim.Optimizer":
|
||||||
self.optimizer = create_custom_optimzer(self.model, self.args, self.finetuning_args, num_training_steps)
|
|
||||||
if self.optimizer is None:
|
if self.optimizer is None:
|
||||||
self.create_optimizer()
|
self.optimizer = create_custom_optimzer(self.model, self.args, self.finetuning_args)
|
||||||
|
return super().create_optimizer()
|
||||||
|
|
||||||
self.create_scheduler(num_training_steps=num_training_steps, optimizer=self.optimizer)
|
def create_scheduler(
|
||||||
|
self, num_training_steps: int, optimizer: Optional["torch.optim.Optimizer"] = None
|
||||||
|
) -> "torch.optim.lr_scheduler.LRScheduler":
|
||||||
|
create_custom_scheduler(self.args, num_training_steps, optimizer)
|
||||||
|
return super().create_scheduler(num_training_steps, optimizer)
|
||||||
|
|
||||||
def prediction_step(
|
def prediction_step(
|
||||||
self,
|
self,
|
||||||
|
|||||||
@@ -29,7 +29,13 @@ logger = get_logger(__name__)
|
|||||||
|
|
||||||
|
|
||||||
class DummyOptimizer(torch.optim.Optimizer):
|
class DummyOptimizer(torch.optim.Optimizer):
|
||||||
def __init__(self, lr: float = 1e-3, optimizer_dict: Optional[dict] = None, *args, **kwargs) -> None:
|
r"""
|
||||||
|
A dummy optimizer used for the GaLore algorithm.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self, lr: float = 1e-3, optimizer_dict: Optional[Dict["torch.nn.Parameter", "torch.optim.Optimizer"]] = None
|
||||||
|
) -> None:
|
||||||
dummy_tensor = torch.randn(1, 1)
|
dummy_tensor = torch.randn(1, 1)
|
||||||
self.optimizer_dict = optimizer_dict
|
self.optimizer_dict = optimizer_dict
|
||||||
super().__init__([dummy_tensor], {"lr": lr})
|
super().__init__([dummy_tensor], {"lr": lr})
|
||||||
@@ -64,7 +70,7 @@ def create_modelcard_and_push(
|
|||||||
|
|
||||||
def create_ref_model(
|
def create_ref_model(
|
||||||
model_args: "ModelArguments", finetuning_args: "FinetuningArguments", add_valuehead: bool = False
|
model_args: "ModelArguments", finetuning_args: "FinetuningArguments", add_valuehead: bool = False
|
||||||
) -> Union["PreTrainedModel", "AutoModelForCausalLMWithValueHead"]:
|
) -> Optional[Union["PreTrainedModel", "AutoModelForCausalLMWithValueHead"]]:
|
||||||
r"""
|
r"""
|
||||||
Creates reference model for PPO/DPO training. Evaluation mode is not supported.
|
Creates reference model for PPO/DPO training. Evaluation mode is not supported.
|
||||||
|
|
||||||
@@ -99,7 +105,7 @@ def create_ref_model(
|
|||||||
|
|
||||||
def create_reward_model(
|
def create_reward_model(
|
||||||
model: "AutoModelForCausalLMWithValueHead", model_args: "ModelArguments", finetuning_args: "FinetuningArguments"
|
model: "AutoModelForCausalLMWithValueHead", model_args: "ModelArguments", finetuning_args: "FinetuningArguments"
|
||||||
) -> "AutoModelForCausalLMWithValueHead":
|
) -> Optional["AutoModelForCausalLMWithValueHead"]:
|
||||||
r"""
|
r"""
|
||||||
Creates reward model for PPO training.
|
Creates reward model for PPO training.
|
||||||
"""
|
"""
|
||||||
@@ -156,8 +162,9 @@ def _create_galore_optimizer(
|
|||||||
model: "PreTrainedModel",
|
model: "PreTrainedModel",
|
||||||
training_args: "Seq2SeqTrainingArguments",
|
training_args: "Seq2SeqTrainingArguments",
|
||||||
finetuning_args: "FinetuningArguments",
|
finetuning_args: "FinetuningArguments",
|
||||||
max_steps: int,
|
|
||||||
) -> "torch.optim.Optimizer":
|
) -> "torch.optim.Optimizer":
|
||||||
|
require_version("galore_torch", "To fix: pip install galore_torch")
|
||||||
|
|
||||||
if len(finetuning_args.galore_target) == 1 and finetuning_args.galore_target[0] == "all":
|
if len(finetuning_args.galore_target) == 1 and finetuning_args.galore_target[0] == "all":
|
||||||
galore_targets = find_all_linear_modules(model)
|
galore_targets = find_all_linear_modules(model)
|
||||||
else:
|
else:
|
||||||
@@ -212,29 +219,19 @@ def _create_galore_optimizer(
|
|||||||
for param in decay_params:
|
for param in decay_params:
|
||||||
param_groups = [dict(params=[param], weight_decay=training_args.weight_decay)]
|
param_groups = [dict(params=[param], weight_decay=training_args.weight_decay)]
|
||||||
optimizer_dict[param] = optim_class(param_groups, **optim_kwargs)
|
optimizer_dict[param] = optim_class(param_groups, **optim_kwargs)
|
||||||
for param in galore_params:
|
for param in galore_params: # galore params have weight decay
|
||||||
param_groups = [dict(params=[param], weight_decay=training_args.weight_decay, **galore_kwargs)]
|
param_groups = [dict(params=[param], weight_decay=training_args.weight_decay, **galore_kwargs)]
|
||||||
optimizer_dict[param] = optim_class(param_groups, **optim_kwargs)
|
optimizer_dict[param] = optim_class(param_groups, **optim_kwargs)
|
||||||
|
|
||||||
scheduler_dict: Dict["torch.Tensor", "torch.optim.lr_scheduler.LRScheduler"] = {}
|
def optimizer_hook(param: "torch.nn.Parameter"):
|
||||||
for param in trainable_params:
|
|
||||||
scheduler_dict[param] = get_scheduler(
|
|
||||||
training_args.lr_scheduler_type,
|
|
||||||
optimizer=optimizer_dict[param],
|
|
||||||
num_warmup_steps=training_args.get_warmup_steps(max_steps) * 2,
|
|
||||||
num_training_steps=max_steps * 2,
|
|
||||||
)
|
|
||||||
|
|
||||||
def optimizer_hook(param: "torch.Tensor"):
|
|
||||||
if param.grad is not None:
|
if param.grad is not None:
|
||||||
optimizer_dict[param].step()
|
optimizer_dict[param].step()
|
||||||
optimizer_dict[param].zero_grad()
|
optimizer_dict[param].zero_grad()
|
||||||
scheduler_dict[param].step()
|
|
||||||
|
|
||||||
for param in trainable_params:
|
for param in trainable_params:
|
||||||
param.register_post_accumulate_grad_hook(optimizer_hook)
|
param.register_post_accumulate_grad_hook(optimizer_hook)
|
||||||
|
|
||||||
optimizer = DummyOptimizer(lr=training_args.learning_rate) # display scheduler result
|
optimizer = DummyOptimizer(lr=training_args.learning_rate, optimizer_dict=optimizer_dict)
|
||||||
else:
|
else:
|
||||||
param_groups = [
|
param_groups = [
|
||||||
dict(params=nodecay_params),
|
dict(params=nodecay_params),
|
||||||
@@ -293,10 +290,34 @@ def create_custom_optimzer(
|
|||||||
model: "PreTrainedModel",
|
model: "PreTrainedModel",
|
||||||
training_args: "Seq2SeqTrainingArguments",
|
training_args: "Seq2SeqTrainingArguments",
|
||||||
finetuning_args: "FinetuningArguments",
|
finetuning_args: "FinetuningArguments",
|
||||||
max_steps: int,
|
|
||||||
) -> Optional["torch.optim.Optimizer"]:
|
) -> Optional["torch.optim.Optimizer"]:
|
||||||
if finetuning_args.use_galore:
|
if finetuning_args.use_galore:
|
||||||
return _create_galore_optimizer(model, training_args, finetuning_args, max_steps)
|
return _create_galore_optimizer(model, training_args, finetuning_args)
|
||||||
|
|
||||||
if finetuning_args.loraplus_lr_ratio is not None:
|
if finetuning_args.loraplus_lr_ratio is not None:
|
||||||
return _create_loraplus_optimizer(model, training_args, finetuning_args)
|
return _create_loraplus_optimizer(model, training_args, finetuning_args)
|
||||||
|
|
||||||
|
|
||||||
|
def create_custom_scheduler(
|
||||||
|
training_args: "Seq2SeqTrainingArguments",
|
||||||
|
num_training_steps: int,
|
||||||
|
optimizer: Optional["torch.optim.Optimizer"] = None,
|
||||||
|
) -> None:
|
||||||
|
if optimizer is not None and isinstance(optimizer, DummyOptimizer):
|
||||||
|
optimizer_dict = optimizer.optimizer_dict
|
||||||
|
scheduler_dict: Dict["torch.nn.Parameter", "torch.optim.lr_scheduler.LRScheduler"] = {}
|
||||||
|
|
||||||
|
for param in optimizer_dict.keys():
|
||||||
|
scheduler_dict[param] = get_scheduler(
|
||||||
|
training_args.lr_scheduler_type,
|
||||||
|
optimizer=optimizer_dict[param],
|
||||||
|
num_warmup_steps=training_args.get_warmup_steps(num_training_steps) * 2,
|
||||||
|
num_training_steps=num_training_steps * 2,
|
||||||
|
)
|
||||||
|
|
||||||
|
def scheduler_hook(param: "torch.nn.Parameter"):
|
||||||
|
if param.grad is not None:
|
||||||
|
scheduler_dict[param].step()
|
||||||
|
|
||||||
|
for param in optimizer_dict.keys():
|
||||||
|
param.register_post_accumulate_grad_hook(scheduler_hook)
|
||||||
|
|||||||
Reference in New Issue
Block a user