Former-commit-id: 3d1b818cb6a77b7603724fbeb756b468aa74e7ea
This commit is contained in:
hiyouga
2024-05-16 00:35:28 +08:00
parent 437cc20be6
commit 538c79fd8f
7 changed files with 133 additions and 77 deletions

View File

@@ -1,5 +1,5 @@
# coding=utf-8
# Performs block expansion for LLaMA, Mistral or Qwen1.5 models.
# Performs block expansion for LLaMA, Mistral, Qwen1.5 or Yi models.
# Usage: python llama_pro.py --model_name_or_path meta-llama/Llama-2-7b-hf --output_dir llama2_pro --num_expand 8
# Inspired by: https://github.com/TencentARC/LLaMA-Pro/blob/main/scripts/block_expansion.py
@@ -106,8 +106,7 @@ def block_expansion(
print("Fine-tune this model with:")
print(" --model_name_or_path {} \\".format(output_dir))
print(" --finetuning_type freeze \\")
print(" --name_module_trainable all \\")
print(" --num_layer_trainable {} \\".format(num_expand))
print(" --freeze_trainable_layers {} \\".format(num_expand))
print(" --use_llama_pro")