289 Commits

Author SHA1 Message Date
hiyouga
0e88c5754f update parser
Former-commit-id: 5262c8702382ff8bc36a172387bc4c8949f326ea
2024-07-19 01:36:39 +08:00
hiyouga
3fff875f99 release v0.8.3
Former-commit-id: 7180a3b99c3c218dfb0dc607ad5e87219269a678
2024-07-19 01:21:18 +08:00
hiyouga
e2d9ab3591 fix test
Former-commit-id: e86f20134b782c8f5c39ead292f8f7582038eb9e
2024-07-19 01:17:37 +08:00
hiyouga
3db5cf44ea fix unittest
Former-commit-id: 73b56ba30b17a32db694d485135d493315293001
2024-07-19 01:10:30 +08:00
hiyouga
994b9089e9 add unittest
Former-commit-id: 8a1f0c5f922989e08a19c65de0b2c4afd2a5771f
2024-07-19 01:06:27 +08:00
hiyouga
4c1513a845 follow #4878 fix #4684
Former-commit-id: 4715e5c5b8040b21e5f401f7e969b9fd2757d520
2024-07-18 22:06:12 +08:00
hoshi-hiyouga
86e009b504 Merge pull request #4878 from ly863/main
Train the last turing conversation.

Former-commit-id: 1fd39b234e23f762021212c6dfde9701f94e7afa
2024-07-18 22:03:41 +08:00
Shiyu Zhang
c1e1918db1 仅仅训练最后一轮对话
Former-commit-id: ab6198e4c099edeb1a400f58729cd617e8cd8e50
2024-07-18 15:30:25 +08:00
hiyouga
341225a405 fix metrics #4786
Former-commit-id: 7d0c4bd394fc3cba197db1719f1164b9dd66ac21
2024-07-17 00:47:00 +08:00
hiyouga
8c93921952 support batch_eval_metrics, fix #4826
Former-commit-id: 3fe1df17188825f8a32fbe6a1294b4b532ce0c85
2024-07-17 00:33:00 +08:00
hiyouga
45367105fc tiny fix
Former-commit-id: 952807b16cd85fa193a05a83b1a735a6b06abc82
2024-07-15 23:09:50 +08:00
hoshi-hiyouga
df71359069 Merge pull request #4822 from codemayq/test-ci
add github action check to ignore some test cases

Former-commit-id: cf698aa7ab4a35b84f429014c4a5a6cb78b565a6
2024-07-15 23:07:55 +08:00
hoshi-hiyouga
a03d14a9a6 Update test_template.py
Former-commit-id: 470dd92f954a06939d83557fed1201632b0c966b
2024-07-15 23:04:39 +08:00
hoshi-hiyouga
41d7ca395e Update test_template.py
Former-commit-id: 7da56ea6d4c08d555e179d419c245b27e5611b97
2024-07-15 23:00:27 +08:00
hoshi-hiyouga
757573bec1 Merge pull request #4821 from codemayq/feature-eval-split
add "split" as suffix in eval task name

Former-commit-id: 5b6033eef3c2cfd5b47bb67e0d803d8de68f3ff0
2024-07-15 22:59:44 +08:00
hoshi-hiyouga
16d655b119 Update llama3_lora_eval.yaml
Former-commit-id: 946836f9a3f3385c8d3bc6ab82df6edf13ee571c
2024-07-15 22:55:12 +08:00
hoshi-hiyouga
f6483de197 Update test_template.py
Former-commit-id: 352afce20adf26f5e616e5aa4e6c7295a865fb1a
2024-07-15 22:55:05 +08:00
hoshi-hiyouga
da34411bf2 Update test_template.py
Former-commit-id: 0ada82c60ed3df637acc624e8a382765d4c5f743
2024-07-15 22:52:25 +08:00
hiyouga
1891b64072 fix #4820
Former-commit-id: 8c0f8357e1eebee32010fe715554f1136b68b4ba
2024-07-15 22:32:07 +08:00
codingma
a14069acf8 add IN_GITHUB_ACTIONS
Former-commit-id: 3681966a3fe37a1c3d2dd60e54047ced1b2925e5
2024-07-15 10:28:07 +08:00
codingma
0ea708c226 1. change the task name format
2. delete split param in data_args.py


Former-commit-id: 309d30efe24785912ff751fc573677875fc5819e
2024-07-15 09:55:33 +08:00
hiyouga
cb474c7b11 allow computing rouge in training
Former-commit-id: ac67d50673989e8137965f5f718fec67c184f55b
2024-07-15 01:16:26 +08:00
hiyouga
e4d11a117b fix up
Former-commit-id: 43a56cb331fae899ca35b0c312730d4ab79d0c42
2024-07-15 01:04:56 +08:00
hoshi-hiyouga
68365045b4 Merge pull request #4691 from codemayq/feature-suppot-eval-dataset
add eval dataset support

Former-commit-id: 51eb379b44fad0336fc96c329ec98dc4528b9c2c
2024-07-15 01:00:34 +08:00
hoshi-hiyouga
502555b65d Update data_args.py
Former-commit-id: c3cee10294d56a1bc226871819b3a725b09aa67e
2024-07-15 00:56:03 +08:00
hoshi-hiyouga
0bc52c0aae Update preprocess.py
Former-commit-id: da92f4a1b9c12a8e2489b964baba5e2c8e739ef1
2024-07-15 00:55:36 +08:00
hoshi-hiyouga
6bf2663b8e Update parser.py
Former-commit-id: 145687997c86b8785e37dd60fbb9f3a5986730a6
2024-07-15 00:55:21 +08:00
hoshi-hiyouga
d337de668e Update data_utils.py
Former-commit-id: 5c2a0e3b1d1afd2a9219d935d3421fffffc3a2c9
2024-07-15 00:54:34 +08:00
hoshi-hiyouga
ec372f91e9 Update loader.py
Former-commit-id: 860e3eb374947b72dcae88cab0a93ef561e3bfb3
2024-07-15 00:50:06 +08:00
hiyouga
20b1bd8c54 update test template
Former-commit-id: 3cbd0739e8b889ef58a7841959a15b6cd1cb6332
2024-07-15 00:49:34 +08:00
hoshi-hiyouga
ee17741591 Update parser.py
Former-commit-id: b9760df588e64270a140d9111241c62c1cefe781
2024-07-14 23:04:34 +08:00
hoshi-hiyouga
93a6925ec5 Update README.md
Former-commit-id: d9aa6a9437994ac29f3e7a0789ec286f091847d6
2024-07-14 21:27:04 +08:00
hiyouga
47405a8e8a add gemma test
Former-commit-id: f29d9f8665021e506d6237f5337d2b1ac8ede6a8
2024-07-14 18:01:45 +08:00
hiyouga
54ba30c47f fix test
Former-commit-id: 4899309b7cac00573215f6530bfc97d7d87d70b2
2024-07-14 15:44:30 +08:00
hiyouga
b92214f78b fix #4699
slow tokenizer for yi models


Former-commit-id: 4d23a0bcda0c15a903a62eec72d14c584ce020dd
2024-07-14 15:34:22 +08:00
hiyouga
71e4404c0d tiny fix
Former-commit-id: 220d7c1ce15e8013a900e59fe0c7937e38b5c3b5
2024-07-14 10:56:45 +08:00
hiyouga
5ab997d484 fix gemma2 attention
Former-commit-id: aeafc68e169ae0ea5939cc81cb0cf89f0ca044b6
2024-07-13 23:33:45 +08:00
hiyouga
6e7048831b update workflows
Former-commit-id: 47c806da4def1694fb30c1c4cf87ae67903eb9f1
2024-07-13 22:31:15 +08:00
hoshi-hiyouga
97cd932c19 Merge pull request #4781 from hzhaoy/fix-dockerfile-cuda
Fix cuda Dockerfile

Former-commit-id: 56696f6c112f82d514dc3bf93182707297642639
2024-07-13 22:25:32 +08:00
hiyouga
dfc7a7d5cd fix #4792
Former-commit-id: d7547d6b9e4c660897e3ce0f4022e08686c172d5
2024-07-13 22:07:58 +08:00
hoshi-hiyouga
27e13a8371 Merge pull request #4804 from codemayq/fix-examples
tiny fix of examples

Former-commit-id: 1e45486a57a4c559e7deedf077acc0b5b79d631f
2024-07-13 20:49:13 +08:00
hoshi-hiyouga
bf6ad1fbed Update llava1_5.yaml
Former-commit-id: 68c9670be5a6f9d9ec589f13b43c45aa0ed90033
2024-07-13 20:30:06 +08:00
codingma
bc71380b59 1. fix output_dir in llama3_lora_pretrain.yaml
2. add llava1_5.yaml for inference


Former-commit-id: 560928ecf04b7aa351812568d317fcde58bc64d6
2024-07-13 13:16:22 +08:00
hzhaoy
137c87ff60 tiny fix
Former-commit-id: 48be67c41eb394d276b41ca22b28e1ef10af4920
2024-07-12 00:28:44 +08:00
hzhaoy
485b8dc18b fix #4780
Former-commit-id: 15f73c41d556c5f8d989697d774725a88d36f1b4
2024-07-12 00:25:48 +08:00
hzhaoy
875f9078d1 fix #4779
Former-commit-id: 0c8cbf9ea57292de5e222618f86e1fc5379fe008
2024-07-12 00:15:15 +08:00
hoshi-hiyouga
d3bfcbd3af Merge pull request #4700 from marko1616/patch-1
Fix Windows command preview

Former-commit-id: bc49af1e8bde9c396ca4b1e608b7fad02b016ce6
2024-07-10 13:51:50 +08:00
hoshi-hiyouga
e36db692e7 Merge pull request #4746 from yzoaim/fix
fix src/llamafactory/train/callbacks.py

Former-commit-id: 79530736bf6d711ed9366386d43d3fdc84d5b6fc
2024-07-10 13:32:49 +08:00
hoshi-hiyouga
460a40756c Update callbacks.py
Former-commit-id: 526376967deaad73b7ca11063a2e3f0c9a0add98
2024-07-10 13:32:20 +08:00
-.-
18057e14ef fix src/llamafactory/train/callbacks.py
Former-commit-id: c79a21aeaa5462770790887a6826d335e1ded5a2
2024-07-10 12:05:51 +08:00
hiyouga
025c8fe302 fix #4731
Former-commit-id: 99e016ee552a551b52b6fcf3616cb57a5b927715
2024-07-10 11:32:36 +08:00
hiyouga
446129ca7a fix ppo trainer
Former-commit-id: a03b2e5ef0d5d6b1b27753438745385d290cb211
2024-07-10 11:05:45 +08:00
hiyouga
834c4e8ad9 fix #4742
Former-commit-id: ae9cf84347878fcc462f35db941c14e1df104276
2024-07-09 23:24:24 +08:00
hoshi-hiyouga
11d961cf3c Merge pull request #4706 from T-Atlas/main
chore: Update vllm_engine.py to support vllm version >= 0.5.1
Former-commit-id: db17d0c801b78ad9d9f38fcf31df8d7e9c7a0994
2024-07-07 15:50:38 +08:00
hoshi-hiyouga
00b93d8b2f Update packages.py
Former-commit-id: c61ee780f3aed51c31a81e912f25fbfd11dc7edd
2024-07-07 15:48:29 +08:00
Lian Junhong
281fd5bb89 chore: Update vllm_engine.py to support vllm version >= 0.5.1
Former-commit-id: b73c23a88cef237db626a16ab2a30261afd36564
2024-07-07 15:08:12 +08:00
hiyouga
cb10050cb9 fix #4705
Former-commit-id: cfd25c6463bcc263c8672d1de365dd81a028b66a
2024-07-07 13:10:06 +08:00
marko1616
2935c4cddb Update utils.py
In windows mutiline command should like
command --arg1 xxx `
--arg2 xxx `

Former-commit-id: b189750520af1fccd0485052792eda269692df89
2024-07-06 20:40:13 +08:00
hiyouga
0d6ec70c6f add codegeex4, internlm2.5
Former-commit-id: 349a5fbc934ac289cad44b4e3eb16f458b94710c
2024-07-06 16:16:47 +08:00
hiyouga
74777b4ded update pissa example
Former-commit-id: d01bae6af5f3a619c50247efc8fd83d9f521c6ed
2024-07-06 15:47:32 +08:00
codingma
5f2bd04799 1. add custom eval dataset support
2. merge load dataset and split dataset function


Former-commit-id: 963d97ba07e7efa3a4544c4d077283d9e112b3ad
2024-07-05 15:52:10 +08:00
hiyouga
9a1a5f9778 fix processors
Former-commit-id: 7215f3a8612b570cd322802d14db532927900117
2024-07-05 08:33:22 +08:00
hiyouga
edc8aefa59 fix #4683
Former-commit-id: cbff0ea0db6971f8ced503a2f0cb6bc43e7037ac
2024-07-05 00:58:05 +08:00
hiyouga
ee1c786a12 fix #4674
Former-commit-id: c4f35627b4f0aeb6d4337c3d0e58318c46449f65
2024-07-05 00:41:03 +08:00
hiyouga
a3e4f2b716 Merge branch 'main' of https://github.com/hiyouga/LLaMA-Factory
Former-commit-id: f0b54254b43e93063232f633cdcf1e31d1419bfe
2024-07-04 14:23:37 +08:00
hiyouga
6685f1fb9e fix #4677
Former-commit-id: d4b6715cab2e475dee2ff9f75c637f7611549ec7
2024-07-04 14:22:07 +08:00
hoshi-hiyouga
c89ff328f6 Merge pull request #4673 from hzhaoy/main
tiny fix

Former-commit-id: e0ef32fc3a5469cdd854288c4bb9eb78bb7e27f1
2024-07-04 10:40:41 +08:00
hzhaoy
c6f1bc65c0 tiny fix
Former-commit-id: 8f43ad988a4fd518a708fba53a173596ce2c59dd
2024-07-04 10:20:28 +08:00
hiyouga
0f43c61229 update tests
Former-commit-id: 8c479a4f7fc97dedc9ca9ceea9e0dd3c4d573253
2024-07-04 04:00:12 +08:00
hiyouga
8567dab167 tiny fix
Former-commit-id: 9b211861eba19ae9fc360bc96eeb8ad67ba40c49
2024-07-04 03:47:05 +08:00
hiyouga
0517d7bee5 tiny fix
Former-commit-id: 935703b46d2871ce1014832da067dfe4a50c0610
2024-07-04 03:02:23 +08:00
hiyouga
5bc0b9b31c fix data map for packing
Former-commit-id: ee6f8f926f084a195b2dbbd074e041e6c62c6ef4
2024-07-04 03:01:31 +08:00
hiyouga
3d219b91b9 fix packing for eager/sdpa attn
Former-commit-id: 735a033ceb7f2da6da71d138ea091d8a665411a9
2024-07-04 01:52:43 +08:00
hoshi-hiyouga
a90c6306f8 Merge pull request #4224 from chuan298/main
Implement efficient packing without cross-contamination attention

Former-commit-id: ac382cc9fe4ec483658fd54f07f9a123788ce1b1
2024-07-04 01:18:54 +08:00
hiyouga
60558388ec update packing
Former-commit-id: f3d9c31efa0e64317bdd5b4ed6f78653cf3b5ba4
2024-07-04 01:10:55 +08:00
hoshi-hiyouga
b29a7f8cd6 Update packing.py
Former-commit-id: 3cc11aa88839c5b99cfd83d9225770a33d0eb6fd
2024-07-03 23:36:01 +08:00
hiyouga
a1501591e8 update func name
Former-commit-id: ed93ac0829fa656194fd32e1ac063843f475746f
2024-07-03 23:29:33 +08:00
hiyouga
1408aa078d update arg name
Former-commit-id: 1509ed550b2060f946ce20e3c5a9e5c49e86e3ab
2024-07-03 23:23:24 +08:00
hiyouga
5acaa476d6 update hparams
Former-commit-id: 1c4feac44192b1f540208837f5a530b0d3f5fb37
2024-07-03 23:18:58 +08:00
hiyouga
8ac4f87c91 update ui
Former-commit-id: b1522a3c0951e2e57f873dc6c758aaed33ca374e
2024-07-03 23:13:49 +08:00
hiyouga
14d3001824 test
Former-commit-id: 610eea0c0a0069fdc9148620b15ffffcfef731ea
2024-07-03 23:05:39 +08:00
hiyouga
1ac9389ddc update scripts
Former-commit-id: 6dd6bae598d4d0b7b7d80341e88e313e49a49c00
2024-07-03 20:07:44 +08:00
hiyouga
0b0e27c2f1 fix #4609
unwrap_model_for_generation(reward_model) is necessary for zero3 training


Former-commit-id: c8d5b21700577cae8d6ca03359bcf1762c8b7cb8
2024-07-03 19:45:51 +08:00
hiyouga
fd1199cce4 update readme
Former-commit-id: 4b5f05b791fce9fdc4678598d7be8dc954f9ff73
2024-07-03 19:39:05 +08:00
hoshi-hiyouga
3c9eda8265 Merge pull request #4662 from wzh1994/wzh/readme
Add `LazyLLM` to `Projects using LLaMA Factory` in `README.md`

Former-commit-id: 5ac6334cc40cefda91f5344f60ec0d4757d76df4
2024-07-03 15:51:02 +08:00
wangzhihong
6622cdb43f Update README_zh.md
Former-commit-id: d4036add433989ad88d54895b6f5af90b393c009
2024-07-03 14:59:09 +08:00
wangzhihong
49c28a7dab add LazyLLM to Projects using LLaMA Factory in README.md
Former-commit-id: e1d8587ea120ad356df35431f84af92193fcbaf3
2024-07-03 11:12:20 +08:00
hiyouga
a42671c2d7 tiny fix
Former-commit-id: d944020257f363f38e62de6279b337e399b7c65e
2024-07-03 02:31:50 +08:00
hiyouga
f17ab6ad92 tiny fix
Former-commit-id: 98c4a0af6b3e27ae393d2847f48a01d23d9c8780
2024-07-02 23:06:13 +08:00
hiyouga
ca548af2a2 remove rlhf support for chatglm2&3
Former-commit-id: bcbb5b71961b89719bffb0d202c431c82e6067cc
2024-07-02 23:03:17 +08:00
hiyouga
579997688f upcast logits
Former-commit-id: df61660351c8af30591471807a20869a45bb055a
2024-07-02 22:32:05 +08:00
hiyouga
e6ba7ef3e6 improve rlhf
Former-commit-id: e441780e3db256ca09a442ea9254e7ce16898a07
2024-07-02 22:23:08 +08:00
ancv
20fdf177e8 move efficient_packing from data_args to model_args
Former-commit-id: 7b61659c707480bcf8c802c73e10d12ad5b9b965
2024-07-02 18:37:55 +07:00
hiyouga
f0b01803ea Update bug-report.yml
Former-commit-id: b92636feff19f144850d7741d8f3fa9fcfdb0580
2024-07-02 19:18:56 +08:00
hiyouga
f5c4841ff2 Update bug-report.yml
Former-commit-id: dc04e33b17dfb798eaee137eef08879a0b7114c7
2024-07-02 19:16:12 +08:00
hoshi-hiyouga
1e01283d81 Merge pull request #4651 from hzhaoy/add-telechat-1b
Add TeleChat-1B

Former-commit-id: 2da64665d3da9dc0084bb782c65e88bac21f45a1
2024-07-02 17:56:43 +08:00
hzhaoy
2196448c21 add TeleChat-1B
Former-commit-id: 1b81b43fc483a21e0c2985b98459ecf5137aa4c4
2024-07-02 17:49:04 +08:00
hiyouga
96a81ce89d fix ppo callbacks
Former-commit-id: 54f1c67c2a802b1d8368a6d1837d4c9a729f2695
2024-07-02 17:34:56 +08:00
hoshi-hiyouga
a715490c2a Merge branch 'main' into main
Former-commit-id: 7be442f37d53a0c6324728fa1fa8e2c84d7f0fa5
2024-07-01 21:01:09 +08:00
hiyouga
973cf8e980 tiny fix
Former-commit-id: 5dd2e5c3323f56420b5845a5ed28bcd9d4da5e41
2024-07-01 05:43:17 +08:00
hiyouga
4357e42391 tiny fix
Former-commit-id: 19e43c3a9ed771e991cb273d394ab28fb923f868
2024-07-01 03:55:20 +08:00
hiyouga
884b49e662 add eval acc
Former-commit-id: 7ffde76fbfb6192e3aac31ccc098f31ce89181ae
2024-07-01 03:51:20 +08:00
hiyouga
38c94d2e9c Update label_issue.yml
Former-commit-id: fffa3defdda02ad579cb703c0704f94bad94f21a
2024-07-01 01:29:09 +08:00
hiyouga
67d2eb6b2a fix #4402 #4617
Deprecate reserved_label_len arg


Former-commit-id: 4b6568984c0be4b31e7aa91b7c0d52b7f7b12b0b
2024-07-01 01:19:27 +08:00
hiyouga
b670fb57db update readme
Former-commit-id: 7998d969bf942c91cf41a189e3941f6e04c81c6f
2024-07-01 00:22:52 +08:00
hiyouga
188b4be64d fix #4398 #4592
Former-commit-id: 8c92d268903c00392c8bd75a731daa1f107d6202
2024-06-30 21:28:51 +08:00
hiyouga
889c042ecd update npu docker
Former-commit-id: 2f4d5174205605b8821d4fb626283e07694ecf80
2024-06-30 21:05:31 +08:00
hiyouga
3c4f8eaa55 loose gemma2 attention
Former-commit-id: a0b645017a2de3d58b6cbc71bd91ec96fc7a818b
2024-06-29 01:42:14 +08:00
hiyouga
6a75d57060 update readme
Former-commit-id: 9f809c311af373508cb51b204ae54b047729a9dc
2024-06-28 06:55:19 +08:00
hiyouga
fda2cf677b bf16 by default, gemma2 attns
Gemma2 finetuning cannot work until merging https://github.com/huggingface/transformers/pull/31674


Former-commit-id: da66c32c7be0adc28d2185b23e9f62d56acb961c
2024-06-28 06:00:26 +08:00
hiyouga
cfdf5a5a78 increase pissa_iter for stability
Former-commit-id: 03f8d9b0fb10ae58e7f68508197330d616957899
2024-06-28 03:18:54 +08:00
hiyouga
a1437c15f7 fix docker flashattn
Former-commit-id: 0966f5d4616a3877a6b921976dc39e8799831d36
2024-06-28 01:28:59 +08:00
hiyouga
42e7489713 add Gemma2 models
Former-commit-id: 8fc5a248ecfd6861cb90dac6c14fe89cdeaf8921
2024-06-28 01:26:50 +08:00
hiyouga
024760f866 update examples
Former-commit-id: 66f248b90cfa2b29c73060459b2337b78154c47b
2024-06-28 01:17:07 +08:00
hiyouga
46f0189e88 refactor pissa, improve llamaboard
Former-commit-id: 619556e46c19718f702c97df5d570a2a4c5fb13a
2024-06-28 01:04:24 +08:00
hoshi-hiyouga
edc7498111 Merge pull request #4580 from hzhaoy/bugfix-deepspeed-pissa
Fix bug when using pissa method with deepspeed

Former-commit-id: f260d458f91d6d2b4ed141f64844cded11d5aaad
2024-06-28 00:46:51 +08:00
hiyouga
9103fdf866 fix #4549
Former-commit-id: c9fdef10de737d1f433209812ef73e29cb60490a
2024-06-28 00:41:58 +08:00
hiyouga
95bf795de4 fix docker file
Former-commit-id: 688f02decb1185deb74b26444f7643cab7d355c1
2024-06-27 20:29:16 +08:00
hiyouga
bf99223a80 tiny fix
Former-commit-id: c1a78a3a9f8ab9d57577cee37f9c457d60863ba2
2024-06-27 20:14:48 +08:00
hoshi-hiyouga
9caf9b6f91 Merge pull request #4590 from injet-zhou/main
Exit the process with the subprocess's return code when utilizing the CLI

Former-commit-id: c6a8a7f239d7aa7c74ba09d55a24d4416181cc02
2024-06-27 20:09:36 +08:00
hoshi-hiyouga
727c7b0dc6 Merge pull request #4461 from hzhaoy/feature/support-flash-attn
support flash-attn in Dockerfile

Former-commit-id: e30a47ab5bda9303c8a2eb814caf0dd40c01b125
2024-06-27 20:05:26 +08:00
hoshi-hiyouga
13d184b280 Merge pull request #4561 from hashstone/fix-docker-npu
fix torch-npu dependency

Former-commit-id: 14867c5cf8be3a5e8a91a6533a615d32d298fd67
2024-06-27 19:58:16 +08:00
hoshi-hiyouga
12a91774b0 Update Dockerfile
Former-commit-id: a239f535a64378b74ef34799cd8e2e4a78f00f4c
2024-06-27 19:57:40 +08:00
hoshi-hiyouga
88018000ac Update Dockerfile
Former-commit-id: 7dea6840256472f8aa2c642f11d9e30bfa0fb96f
2024-06-27 19:51:25 +08:00
hoshi-hiyouga
f6eda1c35d Update setup.py
Former-commit-id: 544e1844fb237eed3eb621f4e6e355eac2ff7b85
2024-06-27 19:38:15 +08:00
hoshi-hiyouga
a2ebdbc112 Update README_zh.md
Former-commit-id: 62f2e27f4355aa35c26e1146dbe90fac3b380118
2024-06-27 19:17:52 +08:00
hoshi-hiyouga
e930a42083 Update README.md
Former-commit-id: 01869ccbb5af2704c9d5bfdd4f2ff30978fb466d
2024-06-27 19:17:35 +08:00
hoshi-hiyouga
4b123f49cb Update setup.py
Former-commit-id: 42293ab26f7fd7ffb77b308655ccd47b7c2ffa84
2024-06-27 19:16:46 +08:00
faddddeout
556eca918d Exit the process with the subprocess's return code when utilizing the CLI
Former-commit-id: ab42a4e2501a80fba1704a506bd1209a441570fa
2024-06-27 09:58:00 +00:00
fanjunliang
31fcd03f3c support docker-npu-[amd64|arm64] build
Former-commit-id: 25f16f5e299c94175e62bac9f0da5b47a2bb31b7
2024-06-27 15:25:12 +08:00
hzhaoy
89d9dd5aa5 fix #4579
Former-commit-id: 0fa298ff6a4febea36ea9f11c7594277a77e6e9b
2024-06-27 13:49:57 +08:00
hiyouga
d1aad72826 add quant checks
Former-commit-id: 15bb053e3549739b1a2134640a659b0f35df7de7
2024-06-27 01:12:25 +08:00
hiyouga
8e5b4bddf4 update examples
Former-commit-id: cce238f7d07919b79237bc9ab39265766c20f020
2024-06-27 00:53:33 +08:00
hiyouga
5a7cb9af4e tiny fix
Former-commit-id: c6747a39dbbdda8decaa104499918bc7ac5f02e4
2024-06-27 00:46:41 +08:00
hiyouga
d1cda4ec68 tiny fix
Former-commit-id: 69dac21ed9f07977b4540eb838a0ef93f3d3abc4
2024-06-27 00:36:04 +08:00
hiyouga
8aaf1185a5 support HQQ/EETQ #4113
Former-commit-id: b7cb51ddb394f04fe4646b2c297fc8d918c9979e
2024-06-27 00:29:42 +08:00
hzhaoy
b46bd07119 add flash-attn installation flag in Dockerfile
Former-commit-id: 2535044e95f6df628bd1f01e0eecb02407105d79
2024-06-27 00:13:30 +08:00
hiyouga
08fa707085 improve autogptq integration
Former-commit-id: d68408c7b123b8ff92014db35cac0b24b414a6f4
2024-06-26 22:11:44 +08:00
hiyouga
72ba29d81a fix #4458
Former-commit-id: aab14b15268dbe74ded22549dbd3677474868cbb
2024-06-26 19:52:35 +08:00
hiyouga
cf2dc4c444 fix #4556
Former-commit-id: 81faa9a985c14e83e38f42aedd228edb676b0695
2024-06-26 19:43:16 +08:00
fanjunliang
d82d86e16d fix torch-npu dependency
Former-commit-id: 7c8a8061d0cda6342f6c883748fb6bc6650df9f9
2024-06-26 18:21:42 +08:00
hoshi-hiyouga
bde31d8600 Merge pull request #4544 from MengqingCao/npu
fix docker-compose path

Former-commit-id: a3389661d2f6eb6ff7f67204a6d11b758e08d9c8
2024-06-26 10:19:24 +08:00
MengqingCao
e115d55585 fix docker-compose path
Former-commit-id: 9de3c24aa2a8268be06c8fef8e47f4fb6715c7ec
2024-06-26 02:15:00 +00:00
hzhaoy
daea86e047 support flash-attn in Dockerfile
Former-commit-id: 0dba000aa178f915cea7d75bf0c9d47e671a21d2
2024-06-25 15:13:07 +08:00
hiyouga
a4f69d8914 fix #4456
Former-commit-id: 920f4fa4ca9e08bcf0d16450e085ee0fa8b4e1c5
2024-06-25 14:34:13 +08:00
hiyouga
98f382fda3 lint
Former-commit-id: c9e424d2198b5872ce118a6ab4c109bf73be2bee
2024-06-25 02:55:50 +08:00
hiyouga
cd899734f3 fix test case
Former-commit-id: 6663057cfbdc96385d901a5dfba22cfcd7a61b23
2024-06-25 02:51:49 +08:00
hiyouga
f51b435bcf fix #4432
Former-commit-id: 972a3b469c600bc6528aef3a49b6fdec63d65803
2024-06-25 02:34:04 +08:00
hiyouga
0f82a55305 fix #4379
Former-commit-id: 96bedb4b6445a04ff8b97fb2aadace50b2f882df
2024-06-25 02:31:44 +08:00
hiyouga
9fd7a410bb tiny fix about badam
Former-commit-id: 03f49267c7406e36aee35639f86e6e0383897090
2024-06-25 01:54:53 +08:00
hiyouga
98fb3d015a fix #4419
Former-commit-id: 15069c3ca814d5ac9beec77d914b71cde7ea0f47
2024-06-25 01:51:29 +08:00
hoshi-hiyouga
bfb2ad7c79 Merge pull request #4352 from Ledzy/main
[Enhancement] Support ZeRO-3 when using BAdam

Former-commit-id: 0dc75275efa7d7540b472783a52ea6aeaa503c0b
2024-06-25 01:49:13 +08:00
hiyouga
135bfbf7c1 tiny fix
Former-commit-id: bb57478366a70a0871af30ab31c890f471e27ff4
2024-06-25 01:15:19 +08:00
hoshi-hiyouga
c6b17ebc20 Merge pull request #4355 from MengqingCao/npu
Add docker-npu

Former-commit-id: 2a59806352713764b1e4b7a54942466f972f5fdc
2024-06-25 01:07:43 +08:00
hoshi-hiyouga
b55eb30474 Update README_zh.md
Former-commit-id: f0c95160fea48b8c6291f42beb79ac089177fbb2
2024-06-25 01:06:59 +08:00
hoshi-hiyouga
cec2f1fc00 Update README.md
Former-commit-id: abe7aca5e133960da9200e3a036d9a550f474171
2024-06-25 01:03:38 +08:00
hoshi-hiyouga
8367ec03a7 Update docker-compose.yml
Former-commit-id: e038daf8dfa5d948b70c18469cb5a0be9aec464a
2024-06-25 00:54:28 +08:00
hoshi-hiyouga
37013f8068 Update Dockerfile
Former-commit-id: cdcd9455c19311394e148476a28ca75849c845b2
2024-06-25 00:50:34 +08:00
hoshi-hiyouga
8360544d65 Update docker-compose.yml
Former-commit-id: 56af208074e6af5465183af85367e7edd89d5aa6
2024-06-25 00:46:47 +08:00
hoshi-hiyouga
b5cdef43a1 Update Dockerfile
Former-commit-id: c897a70501707c0f4c432bb8e9a9beeb4e8953a3
2024-06-25 00:46:08 +08:00
hoshi-hiyouga
2e5d521ed8 Update Dockerfile
Former-commit-id: 632681d8ece0eaac59bb364d971435a3bc6665a9
2024-06-24 23:41:35 +08:00
hoshi-hiyouga
dbe35d52d1 Merge pull request #4409 from kno10/patch-2
Print help if no arguments given

Former-commit-id: 94ff749773d9f30ee0c98872ace6b7b542fadeda
2024-06-24 23:21:31 +08:00
hoshi-hiyouga
8bcdb6f52c Update cli.py
Former-commit-id: 9db6126496ec9e834541823715f700f92b3968c7
2024-06-24 23:21:10 +08:00
hoshi-hiyouga
5cfcb8262e Merge pull request #4417 from mMrBun/main
Add tool_format parameter to rewrite templates for different function call formats.

Former-commit-id: 8d1460cad5bff5e4626fdd675046021e0a3d1947
2024-06-24 23:17:55 +08:00
hoshi-hiyouga
0b331a318b Update test_formatter.py
Former-commit-id: d13ef043441734189b05e739dbbebb16077a6f0b
2024-06-24 23:14:36 +08:00
hoshi-hiyouga
5d6cf55208 Update template.py
Former-commit-id: d53517bff6f8734221d7df9982f3bdd4d2eb2cab
2024-06-24 23:12:59 +08:00
hoshi-hiyouga
9a1ec19845 Update loader.py
Former-commit-id: afa59d61844595e6b615227e6bfdc0b16c8015dd
2024-06-24 23:06:18 +08:00
hiyouga
a79e93f335 fix #4410
Former-commit-id: f49adc4ab5eade21d7a9e029212f17688ee9b0cf
2024-06-24 22:34:31 +08:00
hoshi-hiyouga
abcb94a738 Merge pull request #4445 from MengqingCao/label
auto-label npu issue

Former-commit-id: 87f4779e224c4d81c410a287369285b86e992c1f
2024-06-24 22:02:05 +08:00
hoshi-hiyouga
a4f2d5aa6f Update label_issue.yml
Former-commit-id: dc2f7998b4ae9d7223c7c16732d835cea2a28713
2024-06-24 22:01:23 +08:00
hoshi-hiyouga
6b738d1c89 Update label_issue.yml
Former-commit-id: 90785a69c6210c3a02babb12c56fb7900095247c
2024-06-24 21:59:39 +08:00
hoshi-hiyouga
f4c518b370 Merge pull request #4446 from stceum/bug-fix
Bug Fix: `off` is parsed as `False` in yaml file

Former-commit-id: 243478a3d6c08f5677ee57871862694561617f64
2024-06-24 21:41:28 +08:00
hoshi-hiyouga
d475dd3809 Update parser.py
Former-commit-id: 60e605cd9d399bd04432864ede9c84302890eac8
2024-06-24 21:37:42 +08:00
hoshi-hiyouga
5675c47a01 Update test_attention.py
Former-commit-id: c2cc7a0f152aa14fc03ae413f4a9dc06742a29d7
2024-06-24 21:35:34 +08:00
stceum
16e950454e Bug Fix: off is parsed as False in yaml file, changed to disabled to avoid this.
Former-commit-id: 171289d8e4c111fdca2b100282b64c74a04a4726
2024-06-24 20:39:31 +08:00
MengqingCao
2926265a14 auto-label npu issue
Former-commit-id: d19c9eac783377151e58731723fb7cbb2dab3323
2024-06-24 12:27:00 +00:00
MengqingCao
af2607de1a update docker files
1. add docker-npu (Dockerfile and docker-compose.yml)
  2. move cuda docker to docker-cuda and tiny changes to adapt to the new path


Former-commit-id: 5431c1f18aadb072208efe7fd8e36fdcfbf807c2
2024-06-24 10:57:36 +00:00
hiyouga
826d7808b4 update readme
Former-commit-id: 0775d56ee3cfde34e28a48cbf4a583f4530def19
2024-06-24 18:29:04 +08:00
hiyouga
4c89aca243 update readme
Former-commit-id: a1477208471039d3578980f929f1ca8c2a07aa96
2024-06-24 18:22:12 +08:00
mMrBun
43a065bb07 Add tool_format to overwrite tool formatter template
Former-commit-id: af08971ca50443fd5597e5e4412a3aa17214502f
2024-06-22 02:13:23 +08:00
hiyouga
4513a2cc75 remove dup template
Former-commit-id: 5fec12203b24608af4d4993f44a657eb5a0348e5
2024-06-22 01:31:32 +08:00
hiyouga
f29c1ac6e5 fix api
Former-commit-id: dcbd6d86dfc49f12529b02ec331e3e5c05740061
2024-06-22 00:00:38 +08:00
Erich Schubert
05abe47c8b Print help if no arguments given
Former-commit-id: 08dfb7ec636fd5bfbb30dac9d5fba6e32bfc6728
2024-06-21 09:14:21 +02:00
ancv
6c185a2c57 move configure_packing to llamafactory.model.patcher and fix constants
Former-commit-id: 9c5e972c9c81957f2e9e30bf284ef1c076de9fd0
2024-06-21 00:45:06 +07:00
hiyouga
af2cb33bb2 tiny fix
Former-commit-id: 2d8d47f6126d68db1701ed18fc31310c6f14dd49
2024-06-20 22:56:05 +08:00
hoshi-hiyouga
f16a4a8264 Merge pull request #4382 from MengqingCao/bugfix
upper bound numpy version to <2.0

Former-commit-id: 07a0182cd470132fafe07b8ea1951c9672d0eb87
2024-06-20 10:19:37 +08:00
MengqingCao
b232552d42 update dependencies
Former-commit-id: 25164273d1ca7a8f6f99b41279e342906f6bc4d5
2024-06-20 02:09:47 +00:00
hiyouga
0edccc11a5 improve llamaboard
Former-commit-id: e606ab35c0eced667dde7137c2d72848f264c96c
2024-06-19 23:46:03 +08:00
hiyouga
b2f5c0e0db fix llamaboard abort
Former-commit-id: 9ef609a2c0185040e531dea3829a6f481539cdea
2024-06-19 23:22:28 +08:00
hiyouga
5f5d4c1923 update patcher
Former-commit-id: afb365e515d615dd62f791622450debab60ce5cc
2024-06-19 21:27:00 +08:00
hiyouga
a7d7f79855 set dev version
Former-commit-id: 221665345d97f839ce4ba8d54643da30c71b6083
2024-06-19 21:08:16 +08:00
hiyouga
f0bff18324 Update publish.yml
Former-commit-id: 60b0633e29c9e701aa3813bd1fdc0282bd07f7c8
2024-06-19 20:46:33 +08:00
hiyouga
b631bdc5b7 release v0.8.2
Former-commit-id: 3050bbe51d46acd8473275d2713fc28932e4a3d3
2024-06-19 20:42:09 +08:00
hiyouga
c65f7e9bd5 fix jinja template
Former-commit-id: 0ebf2e2ee23918d28b0cbb20ba456732d6eedfbb
2024-06-19 20:03:50 +08:00
hiyouga
3e0fa4a8da fix templates
Former-commit-id: 6f357d59b73309c5955683008632e7f320e7dcb1
2024-06-19 17:44:05 +08:00
Jonery
fa3150548e Cleaner integration.
Former-commit-id: 26d4b05d424bd71f570195dd433258caf6465d92
2024-06-19 12:29:40 +08:00
hiyouga
235ed85b0f fix bug
Former-commit-id: 412139eaa2fde98ba19e1257d21144382a59f0d6
2024-06-19 03:49:23 +08:00
hiyouga
1ca639a777 use prefix to replace force system
Former-commit-id: 731d9a964f1c3dbfb83825524d697831e691fb9d
2024-06-19 03:39:52 +08:00
hiyouga
e36a994fe6 fix tool formatter, allow parallel function #4362
Former-commit-id: b8f16c976db4ecec1cc8558851c8cbfb6a5b7e9c
2024-06-19 03:23:51 +08:00
hoshi-hiyouga
19ffcfea76 Merge pull request #4173 from mMrBun/main
Implemented the tool_formatter and tool_extractor for glm4 and Qwen2 tool_format

Former-commit-id: 36b02ceed40198ecd5d559ee4ebef9205442ded2
2024-06-19 03:18:55 +08:00
hiyouga
85f3a09c83 tiny fix
Former-commit-id: bb750fa3dde03ec024ae75596ecd4b884cb126c6
2024-06-18 23:32:18 +08:00
hoshi-hiyouga
60b9a9c1fa Merge pull request #4314 from EliMCosta/patch-2
Fix Dockerfile

Former-commit-id: a123a42d98f5c49446762c1d4cfc674d2e4f61b1
2024-06-18 23:30:59 +08:00
hoshi-hiyouga
984e38575c Merge pull request #4309 from EliMCosta/patch-1
Add Magpie and Webinstruct dataset samples

Former-commit-id: 70966de5d4df51a41fef1da5a919dd622aa9c86c
2024-06-18 23:30:19 +08:00
hiyouga
665df5d733 add deepseek coder v2 #4346
Former-commit-id: d83d3846d8e3bf5c40d4b90c24e2c5909ec61864
2024-06-18 22:53:54 +08:00
hiyouga
4bc0bea0e9 fix #4357
Former-commit-id: a6741bba8cebd16a6a3f97a2dc81057d0e27eb39
2024-06-18 22:42:45 +08:00
hoshi-hiyouga
5cfa342f01 Merge pull request #4334 from zzxzz12345/bugfix/add-pandas-versions
Update requirements.txt

Former-commit-id: 219eb5b346bce7e13c2c3511c1638f9dde595787
2024-06-18 22:30:35 +08:00
hoshi-hiyouga
c106cc24e4 Update requirements.txt
Former-commit-id: da8684f9f0b0103d4fa81279343a48ecd0fcc0cd
2024-06-18 22:27:24 +08:00
hiyouga
372da52d4a fix #4335
Former-commit-id: 2ab449adbb160f339a0586edeb846fa311ad8382
2024-06-18 22:08:56 +08:00
Jonery
c7479751e8 add example
Former-commit-id: 75603db09b085e3f703286b87abe041af020e615
2024-06-18 13:50:26 +08:00
Jonery
870a54ac84 fix typo
Former-commit-id: d4bee3716dbf8a84564d5bcc2059172604819f3e
2024-06-18 12:39:26 +08:00
Jonery
12fcfc2b72 Support distributed BAdam.
Former-commit-id: bdcb986e37975911c190a74d3e60bb77aa2033bd
2024-06-18 12:27:47 +08:00
hiyouga
875270b851 lint
Former-commit-id: a19a7ac99af62b6715c96274f6350b124a784331
2024-06-17 22:35:56 +08:00
hiyouga
43fab306b6 update chat engine #4335
Former-commit-id: b163df7de48777e4319c9ccc736b0acdd5f473ed
2024-06-17 19:07:17 +08:00
hiyouga
77242f4169 update readme
Former-commit-id: 07c629f77c3978f339402e578cde1aede3f37699
2024-06-17 18:47:24 +08:00
Jonery
95ae30f678 Merge remote-tracking branch 'upstream/main'
Former-commit-id: 37834a7e79473ccf50ad7f67745b97c274c326d9
2024-06-17 18:44:51 +08:00
Jonery
7408e778ca update gitigore
Former-commit-id: 0068648aee07840cd2a08071e093436aee3f5cb6
2024-06-17 18:29:36 +08:00
Jonery
ba303fd1aa adapt for badam with ds zero3
Former-commit-id: fff2a020ec8713022bd8145f4a7168168ea07ca4
2024-06-17 18:18:10 +08:00
hiyouga
60d9896a70 fix #4326
Former-commit-id: 3c2c45812a720d92f7f5b15b9f03370fe6bf069e
2024-06-17 18:17:48 +08:00
hiyouga
485a80d294 tiny fix
Former-commit-id: 2289436567a7860d25d9da0afb39e4a3e5e83839
2024-06-17 17:47:25 +08:00
胡翀
63bfe9967e Update requirements.txt
add pandas version requirements

Former-commit-id: ed1cf559aa2d02588aacf55a17b439473651f626
2024-06-17 16:45:57 +08:00
Eli Costa
a720b82e63 Fix Dockerfile
Adds the commands to correctly execute LLama-Factory servers

Former-commit-id: 22af40f0895a6f88709a495febeca8507d41d989
2024-06-16 19:16:23 -03:00
Eli Costa
d3b0048d8c Update README_zh.md
Fix details tag in datasets menus

Former-commit-id: d79c1bd4806e9ea13115fabebf9da2d19b0a52be
2024-06-16 11:34:31 -03:00
Eli Costa
9a0aca42a5 Update README_zh.md
Add Magpie and WebInstruct to README

Former-commit-id: 6cf5323959fe9500ba06ab28980fcc8f62e1373f
2024-06-16 11:22:06 -03:00
Eli Costa
5e802b0645 Update README.md
Add Magpie and Webinstruct to README

Former-commit-id: 2b32b9263f12605e48e11dce9b5fbb746d790745
2024-06-16 11:19:25 -03:00
ancv
dd7a1dbfae update packing with sdpa and eager attention mode
Former-commit-id: 285636ba3a57a1038b2f2fd4cf909a1ca07708d4
2024-06-16 02:25:47 +07:00
hoshi-hiyouga
ca67b7a568 Update parser.py
Former-commit-id: d10c97193d08bd368aca1a72f0d1d8a96c76765d
2024-06-16 02:57:00 +08:00
hiyouga
76cd879c84 update pr template
Former-commit-id: 0b7c29674fda10c0ac87e0a0c75990feabb5a3de
2024-06-16 01:43:43 +08:00
hoshi-hiyouga
e0c049e590 Merge pull request #4307 from hiyouga/pissa
Support pissa

Former-commit-id: e7c0eefe96540c106162f5d252476b10b97ae696
2024-06-16 01:41:50 +08:00
hiyouga
727943f078 fix tol
Former-commit-id: bdb54bcb477126687db789bd89f2df84e424a2a3
2024-06-16 01:38:44 +08:00
hiyouga
8393b08666 Update tests.yml
Former-commit-id: 82e83615a706293abbf266d11c57caedafdd4c5b
2024-06-16 01:22:23 +08:00
hiyouga
9049f72d2f increase tol
Former-commit-id: c29071445e34aed23123fdf883a4d877744a1b0e
2024-06-16 01:21:06 +08:00
hiyouga
32f45c9e91 support pissa
Former-commit-id: ef8e45f2eaf466c54e9a671512a2974575677b08
2024-06-16 01:08:12 +08:00
hiyouga
05f3a3c944 tiny fix
Former-commit-id: f7f440986b0ae3b38ea9f2da80789629d4f79ea1
2024-06-16 01:06:41 +08:00
ancv
f91fe10985 remove some unused params
Former-commit-id: fef8132c50505a5fb6a246bd024491bd31798a3c
2024-06-15 23:00:55 +07:00
hiyouga
14f7bfc545 use fixture
Former-commit-id: 10761985691b9f934f7689c1f82aa6dd68febcca
2024-06-15 20:06:17 +08:00
hiyouga
7f90b0cd20 add tests
Former-commit-id: 484634ee9c982e82e919ff67d507e0210345182d
2024-06-15 19:51:20 +08:00
hiyouga
308abfec6c add minicpm #4227
Former-commit-id: e1bb18ce60be9a1b203989def30f1b9194286325
2024-06-15 17:58:52 +08:00
hiyouga
bb88536166 add license
Former-commit-id: 69cfc98d7c81756a5ab6bf962240e393e449fef0
2024-06-15 17:54:33 +08:00
hiyouga
d2df3f2d6e update readme
Former-commit-id: a43d302aa79cbfb9b0606e855b4c1af6865d8e68
2024-06-15 05:13:16 +08:00
hiyouga
2abfad9c1f fix #4271
Former-commit-id: 03707e78d29bfcf5d395a64bb38632bdb3ff47ce
2024-06-15 05:11:33 +08:00
hiyouga
2af932d969 disable DP
Former-commit-id: c18fd609d268389f3e65274992045a6c9f8e6c1f
2024-06-15 04:57:19 +08:00
hiyouga
c29fa61a9c fix #4292
Former-commit-id: 4cd4c179d24eab0fcaec2b29b9dd71970f877fe8
2024-06-15 04:47:13 +08:00
hiyouga
a30931fe0f fix #4295
Former-commit-id: 08f657868f9d605b837c5d8c2946a25cc05c8735
2024-06-15 04:34:55 +08:00
hiyouga
3ff9b87012 add test cases
Former-commit-id: 731176ff34cdf0cbf6b41c40c69f4ceb54c2daf6
2024-06-15 04:05:54 +08:00
hiyouga
f4f315fd11 Update README.md
Former-commit-id: f8d701cd3ce2e56f95b4f5439b8b48d5b62e0d2b
2024-06-13 16:02:21 +08:00
hiyouga
530165d9a5 update examples
Former-commit-id: d6bf6231290d79eb3a63e711f18fa711ef18a4f6
2024-06-13 03:26:10 +08:00
hiyouga
dbd1458adf add quant check in webui export tab
Former-commit-id: 6455ca07061ae9858cd7bc996b28be1fde697a3d
2024-06-13 03:19:18 +08:00
hiyouga
dedefecd2b Update llama3_full_sft_ds3.yaml
Former-commit-id: e715af62d521112d9c155cfa91fbb42fa0e77710
2024-06-13 03:16:20 +08:00
hiyouga
46f441dd37 update examples
Former-commit-id: 19681f93db399d695aa8e35f8ec2a9e720875baa
2024-06-13 03:15:06 +08:00
hiyouga
49b58fd6af fix #4221
Former-commit-id: 05a3be4853b941909e7d193c31e8d62c8c5f879b
2024-06-13 02:48:21 +08:00
hiyouga
103a507b39 fix #4209
DeepSpeed ZeRO3 has inflight param error when calling model.eval()


Former-commit-id: 4be013f18ea6a35b5a11db98db5f0670ffb41619
2024-06-13 02:25:50 +08:00
hiyouga
0a75224f62 clean code
Former-commit-id: f54cafd5c7f0383370d1a2f357834a61a97397ce
2024-06-13 01:58:16 +08:00
hoshi-hiyouga
04d7629abf Merge pull request #4246 from hzhaoy/adapt-vllm-v0.5.0
adapt vllm==0.5.0

Former-commit-id: 1068e25fc8b89f11cc79b164ee4aef9ce137ad4c
2024-06-13 01:54:02 +08:00
hiyouga
1b6786a21f add neo-sft dataset
Former-commit-id: 34863fa7cb641ceca92e3a2eec914126db537b62
2024-06-13 01:00:56 +08:00
hiyouga
5080f2314c fix lint
Former-commit-id: b170165679317af2b3f03633afac27661b3deb06
2024-06-13 00:48:44 +08:00
hiyouga
41beb7f0a3 fix docker compose usage
Former-commit-id: 59a5bd5d5c8d2a44e2dad26b74e77a45e109c8d6
2024-06-13 00:07:48 +08:00
hzhaoy
799873aa14 adapt vllm==0.5.0
Former-commit-id: 02afd9ff64f23e6707ac739ae1269f41bd70c340
2024-06-12 18:29:03 +08:00
hiyouga
fe2c7eaa93 update readme
Former-commit-id: a436aaa83f0cf12c8f404459e5486f9369d538ec
2024-06-12 17:39:12 +08:00
hiyouga
6392d45ea7 fix #4242
Former-commit-id: cf260e7af03f49aa5e3d6daf3b27738ff9b9bcb8
2024-06-12 16:50:11 +08:00
hoshi-hiyouga
c60ea675d7 Merge pull request #4234 from kimdwkimdw/patch-1
Support vllm==0.5.0

Former-commit-id: 0a9da057c9e7ef11cd709b20263c3d2e4c2d72ed
2024-06-12 16:39:09 +08:00
Arthur Kim
16c7c92396 Support vllm==0.5.0
Former-commit-id: e7a8ffd7af21bc3759f055033ba2209fa7a1be0e
2024-06-12 16:49:12 +09:00
ancv
c7ab302c69 implement efficient packing without cross-contamination attention
Former-commit-id: a64a5305c0da5ef092d4cc26faf829bb44de65d1
2024-06-12 11:56:01 +07:00
hoshi-hiyouga
7598b37543 Merge pull request #4204 from dignfei/main
fixbug:llama3在增量预训练时应该使用<|end_of_text|>标识文本的结束

Former-commit-id: e566342636faf0031a0ba5d5dd4fcff8401a2b76
2024-06-11 17:06:10 +08:00
hoshi-hiyouga
cc9717e2f2 Update pretrain.py
Former-commit-id: e2317b2a84149e39fddfd6366be3de23dfb71f82
2024-06-11 17:02:14 +08:00
hiyouga
08f2f99f4b fix deepspeed version
Former-commit-id: 938a69bb07d4de7d82928ff01c582032162c1480
2024-06-11 16:52:36 +08:00
d
77bf3d66c7 经过大量的增量预训练,进行对比试验,发现这个bug:llama3在预训练时使用的tokenizer.eos_toke是'<|end_of_text|>' ,这里在每条数据后面也得用这个,而不是'<|eot_id|>',否则很容易导致严重的性能下降
Former-commit-id: ef470561f742b16eaa0f99c4cadecd7c84ce6bd2
2024-06-11 16:23:40 +08:00
hiyouga
f14f67f803 Update bug-report.yml
Former-commit-id: bb022cd867ebf2593e40fc6ba43b768603b129a3
2024-06-11 15:40:21 +08:00
hiyouga
820b6e7b32 fix #4198
Former-commit-id: 945d2c6cc73542adf9272ebd9aa332ea2c1c7361
2024-06-11 15:38:38 +08:00
hiyouga
27aece94cf tiny fix
Former-commit-id: c4b2e263d9cefbad0fbc5de72422e4ef8edbcb54
2024-06-11 12:48:53 +08:00
hoshi-hiyouga
3f2508be92 Merge pull request #4191 from iamthebot/al--add_manifest_for_reqs
Add MANIFEST.in so requirements.txt is present in sdist

Former-commit-id: fd6d1c3fce855d1ef7396cf33af9f12eadc5a878
2024-06-11 10:41:15 +08:00
Alfredo Luque
fce11bb386 add manifest so requirements.txt in sdist
Former-commit-id: b501a3c56c51786c3006a2aca15a145641a4556c
2024-06-11 00:07:06 +00:00
hiyouga
2723438531 tiny fix
Former-commit-id: b5e9711ef375cc323fc083e742cccfc974550416
2024-06-11 01:04:16 +08:00
hiyouga
f330b73682 set dev version
Former-commit-id: 16c47cc15226119e33e46ba0f2f6ccb37072257f
2024-06-11 00:50:53 +08:00
hiyouga
0f1e592326 release v0.8.1
Former-commit-id: 875a34f492701d1c644facbe9ede411af2931513
2024-06-11 00:44:26 +08:00
hiyouga
4d7dd0330d fix #4160
The split heads should be concatenated in dim=2


Former-commit-id: 4b3f247f270d44df9fe226cfe0dabfb7fcd2deda
2024-06-11 00:37:17 +08:00
hiyouga
ea2ca2777f fix #4145
Fix the docker image


Former-commit-id: a9838281156fe870bfcde5d1f7afc15264fd4aad
2024-06-11 00:19:17 +08:00
hiyouga
4b2b92fd9a update evaluator
Former-commit-id: bb8661e62481ff7027b8969f3d8a6a17290c9da3
2024-06-10 23:56:00 +08:00
hiyouga
784088db3f fix #2666
Former-commit-id: f121d5c4f94af9f165132c4309cb9bdc8217d985
2024-06-10 21:24:15 +08:00
hoshi-hiyouga
0ecf0d51e3 Merge pull request #4167 from yzoaim/branch
fix README

Former-commit-id: 1a877b0fbf54478dbf905fb3e84bd079a55bb725
2024-06-10 16:24:33 +08:00
mMrBun
bc04ca464a Optimize the handling of QWEN2 in scenarios involving multiple tool calls.
Former-commit-id: 48f870edc96ada40360f7e6e67cbf58805295b33
2024-06-10 02:00:14 +08:00
mMrBun
44829df762 Removed unnecessary comments.
Former-commit-id: 2b81252aa693871098931cd7873ef83ef4922ba5
2024-06-09 18:25:22 +08:00
mMrBun
94ddfa66c0 Merge branch 'hiyouga:main' into main
Former-commit-id: c25734d874a36222e0a540a2c994bbda73008b27
2024-06-09 18:17:24 +08:00
mMrBun
8db8ed5a41 Implemented the tool_formatter and tool_extractor for glm4 tool_format
Former-commit-id: db7fa4490ea7f6966418d2879c895cbc1763b16d
2024-06-09 18:16:15 +08:00
-.-
041ecd0de1 fix README
Former-commit-id: fa30028c0b83c38610b596209493a748b8ca0928
2024-06-08 23:51:56 +08:00
hiyouga
d812249db7 add pr ci
Former-commit-id: 9b05bb8540b946d0c74bf804bcafc4a785d22c47
2024-06-08 21:25:35 +08:00
hiyouga
88528f1a87 Update tests.yml
Former-commit-id: e90f0cc30d6bb819246ccc08935c39e714c179a1
2024-06-08 21:15:36 +08:00
hiyouga
82533114a7 update git workflows
Former-commit-id: 5a3f26bc53433caa98b2a66294becaf156280a4c
2024-06-08 21:11:32 +08:00
hiyouga
6d9fbb3fa9 fix llamafactory-cli env
Former-commit-id: b0515e5f42831b67d1f4d049999ecb68756e66db
2024-06-08 07:15:45 +08:00
hiyouga
9953ae3d03 set dev version
Former-commit-id: 08b7fe1c452cc99264ff0312e310b579590c6a45
2024-06-08 06:46:09 +08:00
187 changed files with 6184 additions and 1711 deletions

View File

@@ -4,10 +4,10 @@
.venv
cache
data
docker
saves
hf_cache
output
examples
.dockerignore
.gitattributes
.gitignore
Dockerfile

View File

@@ -1,13 +1,19 @@
name: "\U0001F41B Bug / Help"
description: Create a report to help us improve the LLaMA Factory
body:
- type: markdown
attributes:
value: |
Issues included in **FAQs** or those with **insufficient** information may be closed without a response.
包含在**常见问题**内或提供信息**不完整**的 issues 可能不会被回复。
- type: checkboxes
id: reminder
attributes:
label: Reminder
description: |
Please ensure you have read the README carefully and searched the existing issues.
请确保您已经认真阅读了 README 并且搜索过现有的 Issue。
Please ensure you have read the README carefully and searched the existing issues (including FAQs).
请确保您已经认真阅读了 README 并且搜索过现有的 issues包括常见问题
options:
- label: I have read the README and searched the existing issues.
@@ -38,7 +44,9 @@ body:
请合理使用 Markdown 标签来格式化您的文本。
placeholder: |
```bash
llamafactory-cli train ...
```
- type: textarea
id: expected-behavior

View File

@@ -5,3 +5,4 @@ Fixes # (issue)
## Before submitting
- [ ] Did you read the [contributor guideline](https://github.com/hiyouga/LLaMA-Factory/blob/main/.github/CONTRIBUTING.md)?
- [ ] Did you write any new necessary tests?

30
.github/workflows/label_issue.yml vendored Normal file
View File

@@ -0,0 +1,30 @@
name: label_issue
on:
issues:
types:
- opened
jobs:
label_issue:
runs-on: ubuntu-latest
permissions:
issues: write
steps:
- env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
ISSUE_URL: ${{ github.event.issue.html_url }}
ISSUE_TITLE: ${{ github.event.issue.title }}
run: |
LABEL=pending
NPU_KEYWORDS=(npu huawei ascend 华为 昇腾)
ISSUE_TITLE_LOWER=$(echo $ISSUE_TITLE | tr '[:upper:]' '[:lower:]')
for KEYWORD in ${NPU_KEYWORDS[@]}; do
if [[ $ISSUE_TITLE_LOWER == *$KEYWORD* ]] && [[ $ISSUE_TITLE_LOWER != *input* ]]; then
LABEL=pending,npu
break
fi
done
gh issue edit $ISSUE_URL --add-label $LABEL

40
.github/workflows/publish.yml vendored Normal file
View File

@@ -0,0 +1,40 @@
name: publish
on:
release:
types:
- published
jobs:
publish:
name: Upload release to PyPI
runs-on: ubuntu-latest
environment:
name: release
url: https://pypi.org/p/llamafactory
permissions:
id-token: write
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.8"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install build
- name: Build package
run: |
python -m build
- name: Publish package
uses: pypa/gh-action-pypi-publish@release/v1

View File

@@ -19,21 +19,33 @@ on:
jobs:
tests:
runs-on: ubuntu-latest
environment:
name: tests
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
steps:
- uses: actions/checkout@v4
- name: Checkout
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.8"
cache: "pip"
cache-dependency-path: "setup.py"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install .[torch,dev]
python -m pip install ".[torch,dev]"
- name: Check quality
run: |
make style && make quality
- name: Test with pytest
run: |
make test

6
.gitignore vendored
View File

@@ -160,6 +160,8 @@ cython_debug/
.idea/
# custom .gitignore
user.config
saves/
cache/
config/
saves/
output/
wandb/

View File

@@ -12,12 +12,16 @@ authors:
given-names: "Yanhan"
- family-names: "Luo"
given-names: "Zheyan"
- family-names: "Feng"
given-names: "Zhangchi"
- family-names: "Ma"
given-names: "Yongqiang"
title: "LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models"
url: "https://arxiv.org/abs/2403.13372"
preferred-citation:
type: article
type: conference-paper
conference:
name: "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations)"
authors:
- family-names: "Zheng"
given-names: "Yaowei"
@@ -29,9 +33,12 @@ preferred-citation:
given-names: "Yanhan"
- family-names: "Luo"
given-names: "Zheyan"
- family-names: "Feng"
given-names: "Zhangchi"
- family-names: "Ma"
given-names: "Yongqiang"
journal: "arXiv preprint arXiv:2403.13372"
title: "LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models"
url: "https://arxiv.org/abs/2403.13372"
year: 2024
publisher: "Association for Computational Linguistics"
address: "Bangkok, Thailand"

View File

@@ -1,14 +0,0 @@
FROM nvcr.io/nvidia/pytorch:24.01-py3
WORKDIR /app
COPY requirements.txt /app/
RUN pip install -r requirements.txt
COPY . /app/
RUN pip install -e .[metrics,bitsandbytes,qwen]
VOLUME [ "/root/.cache/huggingface/", "/app/data", "/app/output" ]
EXPOSE 7860
CMD [ "llamafactory-cli", "webui" ]

1
MANIFEST.in Normal file
View File

@@ -0,0 +1 @@
include LICENSE requirements.txt

View File

@@ -11,4 +11,4 @@ style:
ruff format $(check_dirs)
test:
pytest tests/
CUDA_VISIBLE_DEVICES= pytest tests/

228
README.md
View File

@@ -4,7 +4,7 @@
[![GitHub Code License](https://img.shields.io/github/license/hiyouga/LLaMA-Factory)](LICENSE)
[![GitHub last commit](https://img.shields.io/github/last-commit/hiyouga/LLaMA-Factory)](https://github.com/hiyouga/LLaMA-Factory/commits/main)
[![PyPI](https://img.shields.io/pypi/v/llamafactory)](https://pypi.org/project/llamafactory/)
[![Citation](https://img.shields.io/badge/citation-44-green)](#projects-using-llama-factory)
[![Citation](https://img.shields.io/badge/citation-72-green)](#projects-using-llama-factory)
[![GitHub pull request](https://img.shields.io/badge/PRs-welcome-blue)](https://github.com/hiyouga/LLaMA-Factory/pulls)
[![Discord](https://dcbadge.vercel.app/api/server/rKfvV9r9FK?compact=true&style=flat)](https://discord.gg/rKfvV9r9FK)
[![Twitter](https://img.shields.io/twitter/follow/llamafactory_ai)](https://twitter.com/llamafactory_ai)
@@ -15,7 +15,7 @@
[![GitHub Tread](https://trendshift.io/api/badge/repositories/4535)](https://trendshift.io/repositories/4535)
👋 Join our [WeChat](assets/wechat.jpg).
👋 Join our [WeChat](assets/wechat.jpg) or [NPU user group](assets/wechat_npu.jpg).
\[ English | [中文](README_zh.md) \]
@@ -48,8 +48,8 @@ Choose your path:
- **Various models**: LLaMA, LLaVA, Mistral, Mixtral-MoE, Qwen, Yi, Gemma, Baichuan, ChatGLM, Phi, etc.
- **Integrated methods**: (Continuous) pre-training, (multimodal) supervised fine-tuning, reward modeling, PPO, DPO, KTO, ORPO, etc.
- **Scalable resources**: 32-bit full-tuning, 16-bit freeze-tuning, 16-bit LoRA and 2/4/8-bit QLoRA via AQLM/AWQ/GPTQ/LLM.int8.
- **Advanced algorithms**: GaLore, BAdam, DoRA, LongLoRA, LLaMA Pro, Mixture-of-Depths, LoRA+, LoftQ and Agent tuning.
- **Scalable resources**: 16-bit full-tuning, freeze-tuning, LoRA and 2/3/4/5/6/8-bit QLoRA via AQLM/AWQ/GPTQ/LLM.int8/HQQ/EETQ.
- **Advanced algorithms**: GaLore, BAdam, DoRA, LongLoRA, LLaMA Pro, Mixture-of-Depths, LoRA+, LoftQ, PiSSA and Agent tuning.
- **Practical tricks**: FlashAttention-2, Unsloth, RoPE scaling, NEFTune and rsLoRA.
- **Experiment monitors**: LlamaBoard, TensorBoard, Wandb, MLflow, etc.
- **Faster inference**: OpenAI-style API, Gradio UI and CLI with vLLM worker.
@@ -71,9 +71,9 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
## Changelog
[24/06/07] We supported fine-tuning the **[Qwen-2](https://qwenlm.github.io/blog/qwen2/)** series models.
[24/06/16] We support **[PiSSA](https://arxiv.org/abs/2404.02948)** algorithm. See [examples](examples/README.md) for usage.
[24/06/05] We supported fine-tuning the **[GLM-4-9B/GLM-4-9B-Chat](https://github.com/THUDM/GLM-4)** models.
[24/06/07] We supported fine-tuning the **[Qwen2](https://qwenlm.github.io/blog/qwen2/)** and **[GLM-4](https://github.com/THUDM/GLM-4)** models.
[24/05/26] We supported **[SimPO](https://arxiv.org/abs/2405.14734)** algorithm for preference learning. See [examples](examples/README.md) for usage.
@@ -151,35 +151,32 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
## Supported Models
| Model | Model size | Template |
| -------------------------------------------------------- | -------------------------------- | --------- |
| [Baichuan2](https://huggingface.co/baichuan-inc) | 7B/13B | baichuan2 |
| [BLOOM](https://huggingface.co/bigscience) | 560M/1.1B/1.7B/3B/7.1B/176B | - |
| [BLOOMZ](https://huggingface.co/bigscience) | 560M/1.1B/1.7B/3B/7.1B/176B | - |
| [ChatGLM3](https://huggingface.co/THUDM) | 6B | chatglm3 |
| [Command-R](https://huggingface.co/CohereForAI) | 35B/104B | cohere |
| [DeepSeek (MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek |
| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon |
| [Gemma/CodeGemma](https://huggingface.co/google) | 2B/7B | gemma |
| [GLM4](https://huggingface.co/THUDM) | 9B | glm4 |
| [InternLM2](https://huggingface.co/internlm) | 7B/20B | intern2 |
| [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - |
| [LLaMA-2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 |
| [LLaMA-3](https://huggingface.co/meta-llama) | 8B/70B | llama3 |
| [LLaVA-1.5](https://huggingface.co/llava-hf) | 7B/13B | vicuna |
| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral |
| [OLMo](https://huggingface.co/allenai) | 1B/7B | - |
| [PaliGemma](https://huggingface.co/google) | 3B | gemma |
| [Phi-1.5/2](https://huggingface.co/microsoft) | 1.3B/2.7B | - |
| [Phi-3](https://huggingface.co/microsoft) | 4B/7B/14B | phi |
| [Qwen](https://huggingface.co/Qwen) | 1.8B/7B/14B/72B | qwen |
| [Qwen1.5 (Code/MoE)](https://huggingface.co/Qwen) | 0.5B/1.8B/4B/7B/14B/32B/72B/110B | qwen |
| [Qwen2 (MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/7B/57B/72B | qwen |
| [StarCoder2](https://huggingface.co/bigcode) | 3B/7B/15B | - |
| [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | xverse |
| [Yi (1/1.5)](https://huggingface.co/01-ai) | 6B/9B/34B | yi |
| [Yi-VL](https://huggingface.co/01-ai) | 6B/34B | yi_vl |
| [Yuan](https://huggingface.co/IEITYuan) | 2B/51B/102B | yuan |
| Model | Model size | Template |
| ------------------------------------------------------------ | -------------------------------- | --------- |
| [Baichuan 2](https://huggingface.co/baichuan-inc) | 7B/13B | baichuan2 |
| [BLOOM/BLOOMZ](https://huggingface.co/bigscience) | 560M/1.1B/1.7B/3B/7.1B/176B | - |
| [ChatGLM3](https://huggingface.co/THUDM) | 6B | chatglm3 |
| [Command R](https://huggingface.co/CohereForAI) | 35B/104B | cohere |
| [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek |
| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon |
| [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma |
| [GLM-4](https://huggingface.co/THUDM) | 9B | glm4 |
| [InternLM2](https://huggingface.co/internlm) | 7B/20B | intern2 |
| [Llama](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - |
| [Llama 2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 |
| [Llama 3](https://huggingface.co/meta-llama) | 8B/70B | llama3 |
| [LLaVA-1.5](https://huggingface.co/llava-hf) | 7B/13B | vicuna |
| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral |
| [OLMo](https://huggingface.co/allenai) | 1B/7B | - |
| [PaliGemma](https://huggingface.co/google) | 3B | gemma |
| [Phi-1.5/Phi-2](https://huggingface.co/microsoft) | 1.3B/2.7B | - |
| [Phi-3](https://huggingface.co/microsoft) | 4B/7B/14B | phi |
| [Qwen/Qwen1.5/Qwen2 (Code/MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/4B/7B/14B/32B/72B/110B | qwen |
| [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - |
| [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | xverse |
| [Yi/Yi-1.5](https://huggingface.co/01-ai) | 6B/9B/34B | yi |
| [Yi-VL](https://huggingface.co/01-ai) | 6B/34B | yi_vl |
| [Yuan 2](https://huggingface.co/IEITYuan) | 2B/51B/102B | yuan |
> [!NOTE]
> For the "base" models, the `template` argument can be chosen from `default`, `alpaca`, `vicuna` etc. But make sure to use the **corresponding template** for the "instruct/chat" models.
@@ -259,6 +256,9 @@ You also can add a custom chat template to [template.py](src/llamafactory/data/t
- [Cosmopedia (en)](https://huggingface.co/datasets/HuggingFaceTB/cosmopedia)
- [STEM (zh)](https://huggingface.co/datasets/hfl/stem_zh_instruction)
- [Ruozhiba (zh)](https://huggingface.co/datasets/hfl/ruozhiba_gpt4_turbo)
- [Neo-sft (zh)](https://huggingface.co/datasets/m-a-p/neo_sft_phase2)
- [WebInstructSub (en)](https://huggingface.co/datasets/TIGER-Lab/WebInstructSub)
- [Magpie-Pro-300K-Filtered (en)](https://huggingface.co/datasets/Magpie-Align/Magpie-Pro-300K-Filtered)
- [LLaVA mixed (en&zh)](https://huggingface.co/datasets/BUAADreamer/llava-en-zh-300k)
- [Open Assistant (de)](https://huggingface.co/datasets/mayflowergmbh/oasst_de)
- [Dolly 15k (de)](https://huggingface.co/datasets/mayflowergmbh/dolly-15k_de)
@@ -335,10 +335,10 @@ huggingface-cli login
```bash
git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git
cd LLaMA-Factory
pip install -e '.[torch,metrics]'
pip install -e ".[torch,metrics]"
```
Extra dependencies available: torch, torch_npu, metrics, deepspeed, bitsandbytes, vllm, galore, badam, gptq, awq, aqlm, qwen, modelscope, quality
Extra dependencies available: torch, torch-npu, metrics, deepspeed, bitsandbytes, hqq, eetq, gptq, awq, aqlm, vllm, galore, badam, qwen, modelscope, quality
> [!TIP]
> Use `pip install --no-deps -e .` to resolve package conflicts.
@@ -357,9 +357,7 @@ To enable FlashAttention-2 on the Windows platform, you need to install the prec
<details><summary>For Ascend NPU users</summary>
Join [NPU user group](assets/wechat_npu.jpg).
To install LLaMA Factory on Ascend NPU devices, please specify extra dependencies: `pip install -e '.[torch-npu,metrics]'`. Additionally, you need to install the **[Ascend CANN Toolkit and Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**. Please follow the [installation tutorial](https://www.hiascend.com/document/detail/en/CANNCommunityEdition/600alphaX/softwareinstall/instg/atlasdeploy_03_0031.html) or use the following commands:
To install LLaMA Factory on Ascend NPU devices, please specify extra dependencies: `pip install -e ".[torch-npu,metrics]"`. Additionally, you need to install the **[Ascend CANN Toolkit and Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**. Please follow the [installation tutorial](https://www.hiascend.com/document/detail/en/CANNCommunityEdition/600alphaX/softwareinstall/instg/atlasdeploy_03_0031.html) or use the following commands:
```bash
# replace the url according to your CANN version and devices
@@ -382,15 +380,12 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh
| torch-npu | 2.1.0 | 2.1.0.post3 |
| deepspeed | 0.13.2 | 0.13.2 |
Docker image:
- 32GB: [Download page](http://mirrors.cn-central-221.ovaijisuan.com/detail/130.html)
- 64GB: [Download page](http://mirrors.cn-central-221.ovaijisuan.com/detail/131.html)
Remember to use `ASCEND_RT_VISIBLE_DEVICES` instead of `CUDA_VISIBLE_DEVICES` to specify the device to use.
If you cannot infer model on NPU devices, try setting `do_sample: false` in the configurations.
Download the pre-built Docker images: [32GB](http://mirrors.cn-central-221.ovaijisuan.com/detail/130.html) | [64GB](http://mirrors.cn-central-221.ovaijisuan.com/detail/131.html)
</details>
### Data Preparation
@@ -405,9 +400,9 @@ Please refer to [data/README.md](data/README.md) for checking the details about
Use the following 3 commands to run LoRA **fine-tuning**, **inference** and **merging** of the Llama3-8B-Instruct model, respectively.
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
CUDA_VISIBLE_DEVICES=0 llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
```
See [examples/README.md](examples/README.md) for advanced usage (including distributed training).
@@ -417,34 +412,89 @@ See [examples/README.md](examples/README.md) for advanced usage (including distr
### Fine-Tuning with LLaMA Board GUI (powered by [Gradio](https://github.com/gradio-app/gradio))
#### Use local environment
```bash
CUDA_VISIBLE_DEVICES=0 GRADIO_SHARE=1 llamafactory-cli webui
llamafactory-cli webui
```
</details>
### Build Docker
#### Use Docker
For CUDA users:
```bash
docker build -f ./Dockerfile -t llama-factory:latest .
docker run --gpus=all \
-v ./hf_cache:/root/.cache/huggingface/ \
cd docker/docker-cuda/
docker-compose up -d
docker-compose exec llamafactory bash
```
For Ascend NPU users:
```bash
cd docker/docker-npu/
docker-compose up -d
docker-compose exec llamafactory bash
```
<details><summary>Build without Docker Compose</summary>
For CUDA users:
```bash
docker build -f ./docker/docker-cuda/Dockerfile \
--build-arg INSTALL_BNB=false \
--build-arg INSTALL_VLLM=false \
--build-arg INSTALL_DEEPSPEED=false \
--build-arg INSTALL_FLASHATTN=false \
--build-arg PIP_INDEX=https://pypi.org/simple \
-t llamafactory:latest .
docker run -dit --gpus=all \
-v ./hf_cache:/root/.cache/huggingface \
-v ./ms_cache:/root/.cache/modelscope \
-v ./data:/app/data \
-v ./output:/app/output \
-p 7860:7860 \
-p 8000:8000 \
--shm-size 16G \
--name llama_factory \
-d llama-factory:latest
--name llamafactory \
llamafactory:latest
docker exec -it llamafactory bash
```
#### Use Docker Compose
For Ascend NPU users:
```bash
docker compose -f ./docker-compose.yml up -d
# Choose docker image upon your environment
docker build -f ./docker/docker-npu/Dockerfile \
--build-arg INSTALL_DEEPSPEED=false \
--build-arg PIP_INDEX=https://pypi.org/simple \
-t llamafactory:latest .
# Change `device` upon your resources
docker run -dit \
-v ./hf_cache:/root/.cache/huggingface \
-v ./ms_cache:/root/.cache/modelscope \
-v ./data:/app/data \
-v ./output:/app/output \
-v /usr/local/dcmi:/usr/local/dcmi \
-v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \
-v /usr/local/Ascend/driver:/usr/local/Ascend/driver \
-v /etc/ascend_install.info:/etc/ascend_install.info \
-p 7860:7860 \
-p 8000:8000 \
--device /dev/davinci0 \
--device /dev/davinci_manager \
--device /dev/devmm_svm \
--device /dev/hisi_hdc \
--shm-size 16G \
--name llamafactory \
llamafactory:latest
docker exec -it llamafactory bash
```
</details>
<details><summary>Details about volume</summary>
- hf_cache: Utilize Hugging Face cache on the host machine. Reassignable if a cache already exists in a different directory.
@@ -456,7 +506,7 @@ docker compose -f ./docker-compose.yml up -d
### Deploy with OpenAI-style API and vLLM
```bash
CUDA_VISIBLE_DEVICES=0,1 API_PORT=8000 llamafactory-cli api examples/inference/llama3_vllm.yaml
API_PORT=8000 llamafactory-cli api examples/inference/llama3_vllm.yaml
```
> [!TIP]
@@ -474,7 +524,7 @@ Train the model by specifying a model ID of the ModelScope Hub as the `model_nam
### Use W&B Logger
To use [Weights & Biases](https://wandb.ai) for logging experimental results, you need to add the following arguments.
To use [Weights & Biases](https://wandb.ai) for logging experimental results, you need to add the following arguments to yaml files.
```yaml
report_to: wandb
@@ -494,38 +544,63 @@ If you have a project that should be incorporated, please contact via email or c
1. Wang et al. UbiPhysio: Support Daily Functioning, Fitness, and Rehabilitation with Action Understanding and Feedback in Natural Language. 2023. [[arxiv]](https://arxiv.org/abs/2308.10526)
1. Luceri et al. Leveraging Large Language Models to Detect Influence Campaigns in Social Media. 2023. [[arxiv]](https://arxiv.org/abs/2311.07816)
1. Zhang et al. Alleviating Hallucinations of Large Language Models through Induced Hallucinations. 2023. [[arxiv]](https://arxiv.org/abs/2312.15710)
1. Wang et al. Know Your Needs Better: Towards Structured Understanding of Marketer Demands with Analogical Reasoning Augmented LLMs. 2024. [[arxiv]](https://arxiv.org/abs/2401.04319)
1. Wang et al. CANDLE: Iterative Conceptualization and Instantiation Distillation from Large Language Models for Commonsense Reasoning. 2024. [[arxiv]](https://arxiv.org/abs/2401.07286)
1. Wang et al. Know Your Needs Better: Towards Structured Understanding of Marketer Demands with Analogical Reasoning Augmented LLMs. KDD 2024. [[arxiv]](https://arxiv.org/abs/2401.04319)
1. Wang et al. CANDLE: Iterative Conceptualization and Instantiation Distillation from Large Language Models for Commonsense Reasoning. ACL 2024. [[arxiv]](https://arxiv.org/abs/2401.07286)
1. Choi et al. FACT-GPT: Fact-Checking Augmentation via Claim Matching with LLMs. 2024. [[arxiv]](https://arxiv.org/abs/2402.05904)
1. Zhang et al. AutoMathText: Autonomous Data Selection with Language Models for Mathematical Texts. 2024. [[arxiv]](https://arxiv.org/abs/2402.07625)
1. Lyu et al. KnowTuning: Knowledge-aware Fine-tuning for Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2402.11176)
1. Yang et al. LaCo: Large Language Model Pruning via Layer Collaps. 2024. [[arxiv]](https://arxiv.org/abs/2402.11187)
1. Bhardwaj et al. Language Models are Homer Simpson! Safety Re-Alignment of Fine-tuned Language Models through Task Arithmetic. 2024. [[arxiv]](https://arxiv.org/abs/2402.11746)
1. Yang et al. Enhancing Empathetic Response Generation by Augmenting LLMs with Small-scale Empathetic Models. 2024. [[arxiv]](https://arxiv.org/abs/2402.11801)
1. Yi et al. Generation Meets Verification: Accelerating Large Language Model Inference with Smart Parallel Auto-Correct Decoding. 2024. [[arxiv]](https://arxiv.org/abs/2402.11809)
1. Yi et al. Generation Meets Verification: Accelerating Large Language Model Inference with Smart Parallel Auto-Correct Decoding. ACL 2024 Findings. [[arxiv]](https://arxiv.org/abs/2402.11809)
1. Cao et al. Head-wise Shareable Attention for Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2402.11819)
1. Zhang et al. Enhancing Multilingual Capabilities of Large Language Models through Self-Distillation from Resource-Rich Languages. 2024. [[arxiv]](https://arxiv.org/abs/2402.12204)
1. Kim et al. Efficient and Effective Vocabulary Expansion Towards Multilingual Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2402.14714)
1. Yu et al. KIEval: A Knowledge-grounded Interactive Evaluation Framework for Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2402.15043)
1. Yu et al. KIEval: A Knowledge-grounded Interactive Evaluation Framework for Large Language Models. ACL 2024. [[arxiv]](https://arxiv.org/abs/2402.15043)
1. Huang et al. Key-Point-Driven Data Synthesis with its Enhancement on Mathematical Reasoning. 2024. [[arxiv]](https://arxiv.org/abs/2403.02333)
1. Duan et al. Negating Negatives: Alignment without Human Positive Samples via Distributional Dispreference Optimization. 2024. [[arxiv]](https://arxiv.org/abs/2403.03419)
1. Xie and Schwertfeger. Empowering Robotics with Large Language Models: osmAG Map Comprehension with LLMs. 2024. [[arxiv]](https://arxiv.org/abs/2403.08228)
1. Wu et al. Large Language Models are Parallel Multilingual Learners. 2024. [[arxiv]](https://arxiv.org/abs/2403.09073)
1. Zhang et al. EDT: Improving Large Language Models' Generation by Entropy-based Dynamic Temperature Sampling. 2024. [[arxiv]](https://arxiv.org/abs/2403.14541)
1. Weller et al. FollowIR: Evaluating and Teaching Information Retrieval Models to Follow Instructions. 2024. [[arxiv]](https://arxiv.org/abs/2403.15246)
1. Hongbin Na. CBT-LLM: A Chinese Large Language Model for Cognitive Behavioral Therapy-based Mental Health Question Answering. 2024. [[arxiv]](https://arxiv.org/abs/2403.16008)
1. Hongbin Na. CBT-LLM: A Chinese Large Language Model for Cognitive Behavioral Therapy-based Mental Health Question Answering. COLING 2024. [[arxiv]](https://arxiv.org/abs/2403.16008)
1. Zan et al. CodeS: Natural Language to Code Repository via Multi-Layer Sketch. 2024. [[arxiv]](https://arxiv.org/abs/2403.16443)
1. Liu et al. Extensive Self-Contrast Enables Feedback-Free Language Model Alignment. 2024. [[arxiv]](https://arxiv.org/abs/2404.00604)
1. Luo et al. BAdam: A Memory Efficient Full Parameter Training Method for Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.02827)
1. Du et al. Chinese Tiny LLM: Pretraining a Chinese-Centric Large Language Model. 2024. [[arxiv]](https://arxiv.org/abs/2404.04167)
1. Ma et al. Parameter Efficient Quasi-Orthogonal Fine-Tuning via Givens Rotation. 2024. [[arxiv]](https://arxiv.org/abs/2404.04316)
1. Ma et al. Parameter Efficient Quasi-Orthogonal Fine-Tuning via Givens Rotation. ICML 2024. [[arxiv]](https://arxiv.org/abs/2404.04316)
1. Liu et al. Dynamic Generation of Personalities with Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.07084)
1. Shang et al. How Far Have We Gone in Stripped Binary Code Understanding Using Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.09836)
1. Huang et al. LLMTune: Accelerate Database Knob Tuning with Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.11581)
1. Deng et al. Text-Tuple-Table: Towards Information Integration in Text-to-Table Generation via Global Tuple Extraction. 2024. [[arxiv]](https://arxiv.org/abs/2404.14215)
1. Acikgoz et al. Hippocrates: An Open-Source Framework for Advancing Large Language Models in Healthcare. 2024. [[arxiv]](https://arxiv.org/abs/2404.16621)
1. Zhang et al. Small Language Models Need Strong Verifiers to Self-Correct Reasoning. 2024. [[arxiv]](https://arxiv.org/abs/2404.17140)
1. Zhou et al. FREB-TQA: A Fine-Grained Robustness Evaluation Benchmark for Table Question Answering. 2024. [[arxiv]](https://arxiv.org/abs/2404.18585)
1. Zhang et al. Small Language Models Need Strong Verifiers to Self-Correct Reasoning. ACL 2024 Findings. [[arxiv]](https://arxiv.org/abs/2404.17140)
1. Zhou et al. FREB-TQA: A Fine-Grained Robustness Evaluation Benchmark for Table Question Answering. NAACL 2024. [[arxiv]](https://arxiv.org/abs/2404.18585)
1. Xu et al. Large Language Models for Cyber Security: A Systematic Literature Review. 2024. [[arxiv]](https://arxiv.org/abs/2405.04760)
1. Dammu et al. "They are uncultured": Unveiling Covert Harms and Social Threats in LLM Generated Conversations. 2024. [[arxiv]](https://arxiv.org/abs/2405.05378)
1. Yi et al. A safety realignment framework via subspace-oriented model fusion for large language models. 2024. [[arxiv]](https://arxiv.org/abs/2405.09055)
1. Lou et al. SPO: Multi-Dimensional Preference Sequential Alignment With Implicit Reward Modeling. 2024. [[arxiv]](https://arxiv.org/abs/2405.12739)
1. Zhang et al. Getting More from Less: Large Language Models are Good Spontaneous Multilingual Learners. 2024. [[arxiv]](https://arxiv.org/abs/2405.13816)
1. Zhang et al. TS-Align: A Teacher-Student Collaborative Framework for Scalable Iterative Finetuning of Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2405.20215)
1. Zihong Chen. Sentence Segmentation and Sentence Punctuation Based on XunziALLM. 2024. [[paper]](https://aclanthology.org/2024.lt4hala-1.30)
1. Gao et al. The Best of Both Worlds: Toward an Honest and Helpful Large Language Model. 2024. [[arxiv]](https://arxiv.org/abs/2406.00380)
1. Wang and Song. MARS: Benchmarking the Metaphysical Reasoning Abilities of Language Models with a Multi-task Evaluation Dataset. 2024. [[arxiv]](https://arxiv.org/abs/2406.02106)
1. Hu et al. Computational Limits of Low-Rank Adaptation (LoRA) for Transformer-Based Models. 2024. [[arxiv]](https://arxiv.org/abs/2406.03136)
1. Ge et al. Time Sensitive Knowledge Editing through Efficient Finetuning. ACL 2024. [[arxiv]](https://arxiv.org/abs/2406.04496)
1. Tan et al. Peer Review as A Multi-Turn and Long-Context Dialogue with Role-Based Interactions. 2024. [[arxiv]](https://arxiv.org/abs/2406.05688)
1. Song et al. Turbo Sparse: Achieving LLM SOTA Performance with Minimal Activated Parameters. 2024. [[arxiv]](https://arxiv.org/abs/2406.05955)
1. Gu et al. RWKV-CLIP: A Robust Vision-Language Representation Learner. 2024. [[arxiv]](https://arxiv.org/abs/2406.06973)
1. Chen et al. Advancing Tool-Augmented Large Language Models: Integrating Insights from Errors in Inference Trees. 2024. [[arxiv]](https://arxiv.org/abs/2406.07115)
1. Zhu et al. Are Large Language Models Good Statisticians?. 2024. [[arxiv]](https://arxiv.org/abs/2406.07815)
1. Li et al. Know the Unknown: An Uncertainty-Sensitive Method for LLM Instruction Tuning. 2024. [[arxiv]](https://arxiv.org/abs/2406.10099)
1. Ding et al. IntentionQA: A Benchmark for Evaluating Purchase Intention Comprehension Abilities of Language Models in E-commerce. 2024. [[arxiv]](https://arxiv.org/abs/2406.10173)
1. He et al. COMMUNITY-CROSS-INSTRUCT: Unsupervised Instruction Generation for Aligning Large Language Models to Online Communities. 2024. [[arxiv]](https://arxiv.org/abs/2406.12074)
1. Lin et al. FVEL: Interactive Formal Verification Environment with Large Language Models via Theorem Proving. 2024. [[arxiv]](https://arxiv.org/abs/2406.14408)
1. Treutlein et al. Connecting the Dots: LLMs can Infer and Verbalize Latent Structure from Disparate Training Data. 2024. [[arxiv]](https://arxiv.org/abs/2406.14546)
1. Feng et al. SS-Bench: A Benchmark for Social Story Generation and Evaluation. 2024. [[arxiv]](https://arxiv.org/abs/2406.15695)
1. Feng et al. Self-Constructed Context Decompilation with Fined-grained Alignment Enhancement. 2024. [[arxiv]](https://arxiv.org/abs/2406.17233)
1. Liu et al. Large Language Models for Cuffless Blood Pressure Measurement From Wearable Biosignals. 2024. [[arxiv]](https://arxiv.org/abs/2406.18069)
1. Iyer et al. Exploring Very Low-Resource Translation with LLMs: The University of Edinburghs Submission to AmericasNLP 2024 Translation Task. AmericasNLP 2024. [[paper]](https://aclanthology.org/2024.americasnlp-1.25)
1. **[StarWhisper](https://github.com/Yu-Yang-Li/StarWhisper)**: A large language model for Astronomy, based on ChatGLM2-6B and Qwen-14B.
1. **[DISC-LawLLM](https://github.com/FudanDISC/DISC-LawLLM)**: A large language model specialized in Chinese legal domain, based on Baichuan-13B, is capable of retrieving and reasoning on legal knowledge.
1. **[Sunsimiao](https://github.com/X-D-Lab/Sunsimiao)**: A large language model specialized in Chinese medical domain, based on Baichuan-7B and ChatGLM-6B.
@@ -533,6 +608,9 @@ If you have a project that should be incorporated, please contact via email or c
1. **[MachineMindset](https://github.com/PKU-YuanGroup/Machine-Mindset/)**: A series of MBTI Personality large language models, capable of giving any LLM 16 different personality types based on different datasets and training methods.
1. **[Luminia-13B-v3](https://huggingface.co/Nekochu/Luminia-13B-v3)**: A large language model specialized in generate metadata for stable diffusion. [[🤗Demo]](https://huggingface.co/spaces/Nekochu/Luminia-13B_SD_Prompt)
1. **[Chinese-LLaVA-Med](https://github.com/BUAADreamer/Chinese-LLaVA-Med)**: A multimodal large language model specialized in Chinese medical domain, based on LLaVA-1.5-7B.
1. **[AutoRE](https://github.com/THUDM/AutoRE)**: A document-level relation extraction system based on large language models.
1. **[NVIDIA RTX AI Toolkit](https://github.com/NVIDIA/RTX-AI-Toolkit)**: SDKs for fine-tuning LLMs on Windows PC for NVIDIA RTX.
1. **[LazyLLM](https://github.com/LazyAGI/LazyLLM)**: An easy and lazy way for building multi-agent LLMs applications and supports model fine-tuning via LLaMA Factory.
</details>
@@ -540,17 +618,19 @@ If you have a project that should be incorporated, please contact via email or c
This repository is licensed under the [Apache-2.0 License](LICENSE).
Please follow the model licenses to use the corresponding model weights: [Baichuan2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command-R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [GLM4](https://huggingface.co/THUDM/glm-4-9b/blob/main/LICENSE) / [InternLM2](https://github.com/InternLM/InternLM#license) / [LLaMA](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [LLaMA-2 (LLaVA-1.5)](https://ai.meta.com/llama/license/) / [LLaMA-3](https://llama.meta.com/llama3/license/) / [Mistral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [StarCoder2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yi-1.5](LICENSE) / [Yuan](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan)
Please follow the model licenses to use the corresponding model weights: [Baichuan 2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [GLM-4](https://huggingface.co/THUDM/glm-4-9b/blob/main/LICENSE) / [InternLM2](https://github.com/InternLM/InternLM#license) / [Llama](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [Llama 2 (LLaVA-1.5)](https://ai.meta.com/llama/license/) / [Llama 3](https://llama.meta.com/llama3/license/) / [Mistral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/Phi-2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [StarCoder 2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yi-1.5](LICENSE) / [Yuan 2](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan)
## Citation
If this work is helpful, please kindly cite as:
```bibtex
@article{zheng2024llamafactory,
@inproceedings{zheng2024llamafactory,
title={LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models},
author={Yaowei Zheng and Richong Zhang and Junhao Zhang and Yanhan Ye and Zheyan Luo and Yongqiang Ma},
journal={arXiv preprint arXiv:2403.13372},
author={Yaowei Zheng and Richong Zhang and Junhao Zhang and Yanhan Ye and Zheyan Luo and Zhangchi Feng and Yongqiang Ma},
booktitle={Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations)},
address={Bangkok, Thailand},
publisher={Association for Computational Linguistics},
year={2024},
url={http://arxiv.org/abs/2403.13372}
}

View File

@@ -4,7 +4,7 @@
[![GitHub Code License](https://img.shields.io/github/license/hiyouga/LLaMA-Factory)](LICENSE)
[![GitHub last commit](https://img.shields.io/github/last-commit/hiyouga/LLaMA-Factory)](https://github.com/hiyouga/LLaMA-Factory/commits/main)
[![PyPI](https://img.shields.io/pypi/v/llamafactory)](https://pypi.org/project/llamafactory/)
[![Citation](https://img.shields.io/badge/citation-44-green)](#使用了-llama-factory-的项目)
[![Citation](https://img.shields.io/badge/citation-72-green)](#使用了-llama-factory-的项目)
[![GitHub pull request](https://img.shields.io/badge/PRs-welcome-blue)](https://github.com/hiyouga/LLaMA-Factory/pulls)
[![Discord](https://dcbadge.vercel.app/api/server/rKfvV9r9FK?compact=true&style=flat)](https://discord.gg/rKfvV9r9FK)
[![Twitter](https://img.shields.io/twitter/follow/llamafactory_ai)](https://twitter.com/llamafactory_ai)
@@ -15,7 +15,7 @@
[![GitHub Tread](https://trendshift.io/api/badge/repositories/4535)](https://trendshift.io/repositories/4535)
👋 加入我们的[微信群](assets/wechat.jpg)。
👋 加入我们的[微信群](assets/wechat.jpg)或 [NPU 用户群](assets/wechat_npu.jpg)
\[ [English](README.md) | 中文 \]
@@ -48,8 +48,8 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
- **多种模型**LLaMA、LLaVA、Mistral、Mixtral-MoE、Qwen、Yi、Gemma、Baichuan、ChatGLM、Phi 等等。
- **集成方法**增量预训练、多模态指令监督微调、奖励模型训练、PPO 训练、DPO 训练、KTO 训练、ORPO 训练等等。
- **多种精度**32 比特全参数微调、16 比特冻结微调、16 比特 LoRA 微调和基于 AQLM/AWQ/GPTQ/LLM.int8 的 2/4/8 比特 QLoRA 微调。
- **先进算法**GaLore、BAdam、DoRA、LongLoRA、LLaMA Pro、Mixture-of-Depths、LoRA+、LoftQ 和 Agent 微调。
- **多种精度**16 比特全参数微调、冻结微调、LoRA 微调和基于 AQLM/AWQ/GPTQ/LLM.int8/HQQ/EETQ 的 2/3/4/5/6/8 比特 QLoRA 微调。
- **先进算法**GaLore、BAdam、DoRA、LongLoRA、LLaMA Pro、Mixture-of-Depths、LoRA+、LoftQ、PiSSA 和 Agent 微调。
- **实用技巧**FlashAttention-2、Unsloth、RoPE scaling、NEFTune 和 rsLoRA。
- **实验监控**LlamaBoard、TensorBoard、Wandb、MLflow 等等。
- **极速推理**:基于 vLLM 的 OpenAI 风格 API、浏览器界面和命令行接口。
@@ -71,9 +71,9 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
## 更新日志
[24/06/07] 我们支持了 **[Qwen-2](https://qwenlm.github.io/blog/qwen2/)** 系列模型的微调
[24/06/16] 我们支持了 **[PiSSA](https://arxiv.org/abs/2404.02948)** 算法。详细用法请参照 [examples](examples/README_zh.md)
[24/06/05] 我们支持了 **[GLM-4-9B/GLM-4-9B-Chat](https://github.com/THUDM/GLM-4)** 模型的微调。
[24/06/07] 我们支持了 **[Qwen2](https://qwenlm.github.io/blog/qwen2/)** 和 **[GLM-4](https://github.com/THUDM/GLM-4)** 模型的微调。
[24/05/26] 我们支持了 **[SimPO](https://arxiv.org/abs/2405.14734)** 偏好对齐算法。详细用法请参照 [examples](examples/README_zh.md)。
@@ -151,35 +151,32 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
## 模型
| 模型名 | 模型大小 | Template |
| -------------------------------------------------------- | -------------------------------- | --------- |
| [Baichuan2](https://huggingface.co/baichuan-inc) | 7B/13B | baichuan2 |
| [BLOOM](https://huggingface.co/bigscience) | 560M/1.1B/1.7B/3B/7.1B/176B | - |
| [BLOOMZ](https://huggingface.co/bigscience) | 560M/1.1B/1.7B/3B/7.1B/176B | - |
| [ChatGLM3](https://huggingface.co/THUDM) | 6B | chatglm3 |
| [Command-R](https://huggingface.co/CohereForAI) | 35B/104B | cohere |
| [DeepSeek (MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek |
| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon |
| [Gemma/CodeGemma](https://huggingface.co/google) | 2B/7B | gemma |
| [GLM4](https://huggingface.co/THUDM) | 9B | glm4 |
| [InternLM2](https://huggingface.co/internlm) | 7B/20B | intern2 |
| [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - |
| [LLaMA-2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 |
| [LLaMA-3](https://huggingface.co/meta-llama) | 8B/70B | llama3 |
| [LLaVA-1.5](https://huggingface.co/llava-hf) | 7B/13B | vicuna |
| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral |
| [OLMo](https://huggingface.co/allenai) | 1B/7B | - |
| [PaliGemma](https://huggingface.co/google) | 3B | gemma |
| [Phi-1.5/2](https://huggingface.co/microsoft) | 1.3B/2.7B | - |
| [Phi-3](https://huggingface.co/microsoft) | 4B/7B/14B | phi |
| [Qwen](https://huggingface.co/Qwen) | 1.8B/7B/14B/72B | qwen |
| [Qwen1.5 (Code/MoE)](https://huggingface.co/Qwen) | 0.5B/1.8B/4B/7B/14B/32B/72B/110B | qwen |
| [Qwen2 (MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/7B/57B/72B | qwen |
| [StarCoder2](https://huggingface.co/bigcode) | 3B/7B/15B | - |
| [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | xverse |
| [Yi (1/1.5)](https://huggingface.co/01-ai) | 6B/9B/34B | yi |
| [Yi-VL](https://huggingface.co/01-ai) | 6B/34B | yi_vl |
| [Yuan](https://huggingface.co/IEITYuan) | 2B/51B/102B | yuan |
| 模型名 | 模型大小 | Template |
| ------------------------------------------------------------ | -------------------------------- | --------- |
| [Baichuan 2](https://huggingface.co/baichuan-inc) | 7B/13B | baichuan2 |
| [BLOOM/BLOOMZ](https://huggingface.co/bigscience) | 560M/1.1B/1.7B/3B/7.1B/176B | - |
| [ChatGLM3](https://huggingface.co/THUDM) | 6B | chatglm3 |
| [Command R](https://huggingface.co/CohereForAI) | 35B/104B | cohere |
| [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek |
| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon |
| [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma |
| [GLM-4](https://huggingface.co/THUDM) | 9B | glm4 |
| [InternLM2](https://huggingface.co/internlm) | 7B/20B | intern2 |
| [Llama](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - |
| [Llama 2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 |
| [Llama 3](https://huggingface.co/meta-llama) | 8B/70B | llama3 |
| [LLaVA-1.5](https://huggingface.co/llava-hf) | 7B/13B | vicuna |
| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral |
| [OLMo](https://huggingface.co/allenai) | 1B/7B | - |
| [PaliGemma](https://huggingface.co/google) | 3B | gemma |
| [Phi-1.5/Phi-2](https://huggingface.co/microsoft) | 1.3B/2.7B | - |
| [Phi-3](https://huggingface.co/microsoft) | 4B/7B/14B | phi |
| [Qwen/Qwen1.5/Qwen2 (Code/MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/4B/7B/14B/32B/72B/110B | qwen |
| [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - |
| [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | xverse |
| [Yi/Yi-1.5](https://huggingface.co/01-ai) | 6B/9B/34B | yi |
| [Yi-VL](https://huggingface.co/01-ai) | 6B/34B | yi_vl |
| [Yuan 2](https://huggingface.co/IEITYuan) | 2B/51B/102B | yuan |
> [!NOTE]
> 对于所有“基座”Base模型`template` 参数可以是 `default`, `alpaca`, `vicuna` 等任意值。但“对话”Instruct/Chat模型请务必使用**对应的模板**。
@@ -259,6 +256,9 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
- [Cosmopedia (en)](https://huggingface.co/datasets/HuggingFaceTB/cosmopedia)
- [STEM (zh)](https://huggingface.co/datasets/hfl/stem_zh_instruction)
- [Ruozhiba (zh)](https://huggingface.co/datasets/hfl/ruozhiba_gpt4_turbo)
- [Neo-sft (zh)](https://huggingface.co/datasets/m-a-p/neo_sft_phase2)
- [WebInstructSub (en)](https://huggingface.co/datasets/TIGER-Lab/WebInstructSub)
- [Magpie-Pro-300K-Filtered (en)](https://huggingface.co/datasets/Magpie-Align/Magpie-Pro-300K-Filtered)
- [LLaVA mixed (en&zh)](https://huggingface.co/datasets/BUAADreamer/llava-en-zh-300k)
- [Open Assistant (de)](https://huggingface.co/datasets/mayflowergmbh/oasst_de)
- [Dolly 15k (de)](https://huggingface.co/datasets/mayflowergmbh/dolly-15k_de)
@@ -335,10 +335,10 @@ huggingface-cli login
```bash
git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git
cd LLaMA-Factory
pip install -e '.[torch,metrics]'
pip install -e ".[torch,metrics]"
```
可选的额外依赖项torch、torch_npu、metrics、deepspeed、bitsandbytes、vllm、galore、badam、gptq、awq、aqlm、qwen、modelscope、quality
可选的额外依赖项torch、torch-npu、metrics、deepspeed、bitsandbytes、hqq、eetq、gptq、awq、aqlm、vllm、galore、badam、qwen、modelscope、quality
> [!TIP]
> 遇到包冲突时,可使用 `pip install --no-deps -e .` 解决。
@@ -357,9 +357,7 @@ pip install https://github.com/jllllll/bitsandbytes-windows-webui/releases/downl
<details><summary>昇腾 NPU 用户指南</summary>
加入 [NPU 用户群](assets/wechat_npu.jpg)。
在昇腾 NPU 设备上安装 LLaMA Factory 时,需要指定额外依赖项,使用 `pip install -e '.[torch-npu,metrics]'` 命令安装。此外,还需要安装 **[Ascend CANN Toolkit and Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**,安装方法请参考[安装教程](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC2alpha002/quickstart/quickstart/quickstart_18_0004.html)或使用以下命令:
在昇腾 NPU 设备上安装 LLaMA Factory 时,需要指定额外依赖项,使用 `pip install -e ".[torch-npu,metrics]"` 命令安装。此外,还需要安装 **[Ascend CANN Toolkit 与 Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**,安装方法请参考[安装教程](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC2alpha002/quickstart/quickstart/quickstart_18_0004.html)或使用以下命令:
```bash
# 请替换 URL 为 CANN 版本和设备型号对应的 URL
@@ -382,15 +380,12 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh
| torch-npu | 2.1.0 | 2.1.0.post3 |
| deepspeed | 0.13.2 | 0.13.2 |
Docker 镜像:
- 32GB[下载地址](http://mirrors.cn-central-221.ovaijisuan.com/detail/130.html)
- 64GB[下载地址](http://mirrors.cn-central-221.ovaijisuan.com/detail/131.html)
请使用 `ASCEND_RT_VISIBLE_DEVICES` 而非 `CUDA_VISIBLE_DEVICES` 来指定运算设备。
如果遇到无法正常推理的情况,请尝试设置 `do_sample: false`
下载预构建 Docker 镜像:[32GB](http://mirrors.cn-central-221.ovaijisuan.com/detail/130.html) | [64GB](http://mirrors.cn-central-221.ovaijisuan.com/detail/131.html)
</details>
### 数据准备
@@ -405,9 +400,9 @@ Docker 镜像:
下面三行命令分别对 Llama3-8B-Instruct 模型进行 LoRA **微调**、**推理**和**合并**。
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
CUDA_VISIBLE_DEVICES=0 llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
```
高级用法请参考 [examples/README_zh.md](examples/README_zh.md)(包括多 GPU 微调)。
@@ -417,32 +412,89 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_s
### LLaMA Board 可视化微调(由 [Gradio](https://github.com/gradio-app/gradio) 驱动)
#### 使用本地环境
```bash
CUDA_VISIBLE_DEVICES=0 GRADIO_SHARE=1 llamafactory-cli webui
llamafactory-cli webui
```
#### 使用 Docker
### 构建 Docker
CUDA 用户:
```bash
docker build -f ./Dockerfile -t llama-factory:latest .
docker run --gpus=all \
-v ./hf_cache:/root/.cache/huggingface/ \
cd docker/docker-cuda/
docker-compose up -d
docker-compose exec llamafactory bash
```
昇腾 NPU 用户:
```bash
cd docker/docker-npu/
docker-compose up -d
docker-compose exec llamafactory bash
```
<details><summary>不使用 Docker Compose 构建</summary>
CUDA 用户:
```bash
docker build -f ./docker/docker-cuda/Dockerfile \
--build-arg INSTALL_BNB=false \
--build-arg INSTALL_VLLM=false \
--build-arg INSTALL_DEEPSPEED=false \
--build-arg INSTALL_FLASHATTN=false \
--build-arg PIP_INDEX=https://pypi.org/simple \
-t llamafactory:latest .
docker run -dit --gpus=all \
-v ./hf_cache:/root/.cache/huggingface \
-v ./ms_cache:/root/.cache/modelscope \
-v ./data:/app/data \
-v ./output:/app/output \
-p 7860:7860 \
-p 8000:8000 \
--shm-size 16G \
--name llama_factory \
-d llama-factory:latest
--name llamafactory \
llamafactory:latest
docker exec -it llamafactory bash
```
#### 使用 Docker Compose
昇腾 NPU 用户:
```bash
docker compose -f ./docker-compose.yml up -d
# 根据您的环境选择镜像
docker build -f ./docker/docker-npu/Dockerfile \
--build-arg INSTALL_DEEPSPEED=false \
--build-arg PIP_INDEX=https://pypi.org/simple \
-t llamafactory:latest .
# 根据您的资源更改 `device`
docker run -dit \
-v ./hf_cache:/root/.cache/huggingface \
-v ./ms_cache:/root/.cache/modelscope \
-v ./data:/app/data \
-v ./output:/app/output \
-v /usr/local/dcmi:/usr/local/dcmi \
-v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \
-v /usr/local/Ascend/driver:/usr/local/Ascend/driver \
-v /etc/ascend_install.info:/etc/ascend_install.info \
-p 7860:7860 \
-p 8000:8000 \
--device /dev/davinci0 \
--device /dev/davinci_manager \
--device /dev/devmm_svm \
--device /dev/hisi_hdc \
--shm-size 16G \
--name llamafactory \
llamafactory:latest
docker exec -it llamafactory bash
```
</details>
<details><summary>数据卷详情</summary>
- hf_cache使用宿主机的 Hugging Face 缓存文件夹,允许更改为新的目录。
@@ -454,7 +506,7 @@ docker compose -f ./docker-compose.yml up -d
### 利用 vLLM 部署 OpenAI API
```bash
CUDA_VISIBLE_DEVICES=0,1 API_PORT=8000 llamafactory-cli api examples/inference/llama3_vllm.yaml
API_PORT=8000 llamafactory-cli api examples/inference/llama3_vllm.yaml
```
> [!TIP]
@@ -472,7 +524,7 @@ export USE_MODELSCOPE_HUB=1 # Windows 使用 `set USE_MODELSCOPE_HUB=1`
### 使用 W&B 面板
若要使用 [Weights & Biases](https://wandb.ai) 记录实验数据,请添加下面的参数。
若要使用 [Weights & Biases](https://wandb.ai) 记录实验数据,请在 yaml 文件中添加下面的参数。
```yaml
report_to: wandb
@@ -492,38 +544,63 @@ run_name: test_run # 可选
1. Wang et al. UbiPhysio: Support Daily Functioning, Fitness, and Rehabilitation with Action Understanding and Feedback in Natural Language. 2023. [[arxiv]](https://arxiv.org/abs/2308.10526)
1. Luceri et al. Leveraging Large Language Models to Detect Influence Campaigns in Social Media. 2023. [[arxiv]](https://arxiv.org/abs/2311.07816)
1. Zhang et al. Alleviating Hallucinations of Large Language Models through Induced Hallucinations. 2023. [[arxiv]](https://arxiv.org/abs/2312.15710)
1. Wang et al. Know Your Needs Better: Towards Structured Understanding of Marketer Demands with Analogical Reasoning Augmented LLMs. 2024. [[arxiv]](https://arxiv.org/abs/2401.04319)
1. Wang et al. CANDLE: Iterative Conceptualization and Instantiation Distillation from Large Language Models for Commonsense Reasoning. 2024. [[arxiv]](https://arxiv.org/abs/2401.07286)
1. Wang et al. Know Your Needs Better: Towards Structured Understanding of Marketer Demands with Analogical Reasoning Augmented LLMs. KDD 2024. [[arxiv]](https://arxiv.org/abs/2401.04319)
1. Wang et al. CANDLE: Iterative Conceptualization and Instantiation Distillation from Large Language Models for Commonsense Reasoning. ACL 2024. [[arxiv]](https://arxiv.org/abs/2401.07286)
1. Choi et al. FACT-GPT: Fact-Checking Augmentation via Claim Matching with LLMs. 2024. [[arxiv]](https://arxiv.org/abs/2402.05904)
1. Zhang et al. AutoMathText: Autonomous Data Selection with Language Models for Mathematical Texts. 2024. [[arxiv]](https://arxiv.org/abs/2402.07625)
1. Lyu et al. KnowTuning: Knowledge-aware Fine-tuning for Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2402.11176)
1. Yang et al. LaCo: Large Language Model Pruning via Layer Collaps. 2024. [[arxiv]](https://arxiv.org/abs/2402.11187)
1. Bhardwaj et al. Language Models are Homer Simpson! Safety Re-Alignment of Fine-tuned Language Models through Task Arithmetic. 2024. [[arxiv]](https://arxiv.org/abs/2402.11746)
1. Yang et al. Enhancing Empathetic Response Generation by Augmenting LLMs with Small-scale Empathetic Models. 2024. [[arxiv]](https://arxiv.org/abs/2402.11801)
1. Yi et al. Generation Meets Verification: Accelerating Large Language Model Inference with Smart Parallel Auto-Correct Decoding. 2024. [[arxiv]](https://arxiv.org/abs/2402.11809)
1. Yi et al. Generation Meets Verification: Accelerating Large Language Model Inference with Smart Parallel Auto-Correct Decoding. ACL 2024 Findings. [[arxiv]](https://arxiv.org/abs/2402.11809)
1. Cao et al. Head-wise Shareable Attention for Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2402.11819)
1. Zhang et al. Enhancing Multilingual Capabilities of Large Language Models through Self-Distillation from Resource-Rich Languages. 2024. [[arxiv]](https://arxiv.org/abs/2402.12204)
1. Kim et al. Efficient and Effective Vocabulary Expansion Towards Multilingual Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2402.14714)
1. Yu et al. KIEval: A Knowledge-grounded Interactive Evaluation Framework for Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2402.15043)
1. Yu et al. KIEval: A Knowledge-grounded Interactive Evaluation Framework for Large Language Models. ACL 2024. [[arxiv]](https://arxiv.org/abs/2402.15043)
1. Huang et al. Key-Point-Driven Data Synthesis with its Enhancement on Mathematical Reasoning. 2024. [[arxiv]](https://arxiv.org/abs/2403.02333)
1. Duan et al. Negating Negatives: Alignment without Human Positive Samples via Distributional Dispreference Optimization. 2024. [[arxiv]](https://arxiv.org/abs/2403.03419)
1. Xie and Schwertfeger. Empowering Robotics with Large Language Models: osmAG Map Comprehension with LLMs. 2024. [[arxiv]](https://arxiv.org/abs/2403.08228)
1. Wu et al. Large Language Models are Parallel Multilingual Learners. 2024. [[arxiv]](https://arxiv.org/abs/2403.09073)
1. Zhang et al. EDT: Improving Large Language Models' Generation by Entropy-based Dynamic Temperature Sampling. 2024. [[arxiv]](https://arxiv.org/abs/2403.14541)
1. Weller et al. FollowIR: Evaluating and Teaching Information Retrieval Models to Follow Instructions. 2024. [[arxiv]](https://arxiv.org/abs/2403.15246)
1. Hongbin Na. CBT-LLM: A Chinese Large Language Model for Cognitive Behavioral Therapy-based Mental Health Question Answering. 2024. [[arxiv]](https://arxiv.org/abs/2403.16008)
1. Hongbin Na. CBT-LLM: A Chinese Large Language Model for Cognitive Behavioral Therapy-based Mental Health Question Answering. COLING 2024. [[arxiv]](https://arxiv.org/abs/2403.16008)
1. Zan et al. CodeS: Natural Language to Code Repository via Multi-Layer Sketch. 2024. [[arxiv]](https://arxiv.org/abs/2403.16443)
1. Liu et al. Extensive Self-Contrast Enables Feedback-Free Language Model Alignment. 2024. [[arxiv]](https://arxiv.org/abs/2404.00604)
1. Luo et al. BAdam: A Memory Efficient Full Parameter Training Method for Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.02827)
1. Du et al. Chinese Tiny LLM: Pretraining a Chinese-Centric Large Language Model. 2024. [[arxiv]](https://arxiv.org/abs/2404.04167)
1. Ma et al. Parameter Efficient Quasi-Orthogonal Fine-Tuning via Givens Rotation. 2024. [[arxiv]](https://arxiv.org/abs/2404.04316)
1. Ma et al. Parameter Efficient Quasi-Orthogonal Fine-Tuning via Givens Rotation. ICML 2024. [[arxiv]](https://arxiv.org/abs/2404.04316)
1. Liu et al. Dynamic Generation of Personalities with Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.07084)
1. Shang et al. How Far Have We Gone in Stripped Binary Code Understanding Using Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.09836)
1. Huang et al. LLMTune: Accelerate Database Knob Tuning with Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.11581)
1. Deng et al. Text-Tuple-Table: Towards Information Integration in Text-to-Table Generation via Global Tuple Extraction. 2024. [[arxiv]](https://arxiv.org/abs/2404.14215)
1. Acikgoz et al. Hippocrates: An Open-Source Framework for Advancing Large Language Models in Healthcare. 2024. [[arxiv]](https://arxiv.org/abs/2404.16621)
1. Zhang et al. Small Language Models Need Strong Verifiers to Self-Correct Reasoning. 2024. [[arxiv]](https://arxiv.org/abs/2404.17140)
1. Zhou et al. FREB-TQA: A Fine-Grained Robustness Evaluation Benchmark for Table Question Answering. 2024. [[arxiv]](https://arxiv.org/abs/2404.18585)
1. Zhang et al. Small Language Models Need Strong Verifiers to Self-Correct Reasoning. ACL 2024 Findings. [[arxiv]](https://arxiv.org/abs/2404.17140)
1. Zhou et al. FREB-TQA: A Fine-Grained Robustness Evaluation Benchmark for Table Question Answering. NAACL 2024. [[arxiv]](https://arxiv.org/abs/2404.18585)
1. Xu et al. Large Language Models for Cyber Security: A Systematic Literature Review. 2024. [[arxiv]](https://arxiv.org/abs/2405.04760)
1. Dammu et al. "They are uncultured": Unveiling Covert Harms and Social Threats in LLM Generated Conversations. 2024. [[arxiv]](https://arxiv.org/abs/2405.05378)
1. Yi et al. A safety realignment framework via subspace-oriented model fusion for large language models. 2024. [[arxiv]](https://arxiv.org/abs/2405.09055)
1. Lou et al. SPO: Multi-Dimensional Preference Sequential Alignment With Implicit Reward Modeling. 2024. [[arxiv]](https://arxiv.org/abs/2405.12739)
1. Zhang et al. Getting More from Less: Large Language Models are Good Spontaneous Multilingual Learners. 2024. [[arxiv]](https://arxiv.org/abs/2405.13816)
1. Zhang et al. TS-Align: A Teacher-Student Collaborative Framework for Scalable Iterative Finetuning of Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2405.20215)
1. Zihong Chen. Sentence Segmentation and Sentence Punctuation Based on XunziALLM. 2024. [[paper]](https://aclanthology.org/2024.lt4hala-1.30)
1. Gao et al. The Best of Both Worlds: Toward an Honest and Helpful Large Language Model. 2024. [[arxiv]](https://arxiv.org/abs/2406.00380)
1. Wang and Song. MARS: Benchmarking the Metaphysical Reasoning Abilities of Language Models with a Multi-task Evaluation Dataset. 2024. [[arxiv]](https://arxiv.org/abs/2406.02106)
1. Hu et al. Computational Limits of Low-Rank Adaptation (LoRA) for Transformer-Based Models. 2024. [[arxiv]](https://arxiv.org/abs/2406.03136)
1. Ge et al. Time Sensitive Knowledge Editing through Efficient Finetuning. ACL 2024. [[arxiv]](https://arxiv.org/abs/2406.04496)
1. Tan et al. Peer Review as A Multi-Turn and Long-Context Dialogue with Role-Based Interactions. 2024. [[arxiv]](https://arxiv.org/abs/2406.05688)
1. Song et al. Turbo Sparse: Achieving LLM SOTA Performance with Minimal Activated Parameters. 2024. [[arxiv]](https://arxiv.org/abs/2406.05955)
1. Gu et al. RWKV-CLIP: A Robust Vision-Language Representation Learner. 2024. [[arxiv]](https://arxiv.org/abs/2406.06973)
1. Chen et al. Advancing Tool-Augmented Large Language Models: Integrating Insights from Errors in Inference Trees. 2024. [[arxiv]](https://arxiv.org/abs/2406.07115)
1. Zhu et al. Are Large Language Models Good Statisticians?. 2024. [[arxiv]](https://arxiv.org/abs/2406.07815)
1. Li et al. Know the Unknown: An Uncertainty-Sensitive Method for LLM Instruction Tuning. 2024. [[arxiv]](https://arxiv.org/abs/2406.10099)
1. Ding et al. IntentionQA: A Benchmark for Evaluating Purchase Intention Comprehension Abilities of Language Models in E-commerce. 2024. [[arxiv]](https://arxiv.org/abs/2406.10173)
1. He et al. COMMUNITY-CROSS-INSTRUCT: Unsupervised Instruction Generation for Aligning Large Language Models to Online Communities. 2024. [[arxiv]](https://arxiv.org/abs/2406.12074)
1. Lin et al. FVEL: Interactive Formal Verification Environment with Large Language Models via Theorem Proving. 2024. [[arxiv]](https://arxiv.org/abs/2406.14408)
1. Treutlein et al. Connecting the Dots: LLMs can Infer and Verbalize Latent Structure from Disparate Training Data. 2024. [[arxiv]](https://arxiv.org/abs/2406.14546)
1. Feng et al. SS-Bench: A Benchmark for Social Story Generation and Evaluation. 2024. [[arxiv]](https://arxiv.org/abs/2406.15695)
1. Feng et al. Self-Constructed Context Decompilation with Fined-grained Alignment Enhancement. 2024. [[arxiv]](https://arxiv.org/abs/2406.17233)
1. Liu et al. Large Language Models for Cuffless Blood Pressure Measurement From Wearable Biosignals. 2024. [[arxiv]](https://arxiv.org/abs/2406.18069)
1. Iyer et al. Exploring Very Low-Resource Translation with LLMs: The University of Edinburghs Submission to AmericasNLP 2024 Translation Task. AmericasNLP 2024. [[paper]](https://aclanthology.org/2024.americasnlp-1.25)
1. **[StarWhisper](https://github.com/Yu-Yang-Li/StarWhisper)**: 天文大模型 StarWhisper基于 ChatGLM2-6B 和 Qwen-14B 在天文数据上微调而得。
1. **[DISC-LawLLM](https://github.com/FudanDISC/DISC-LawLLM)**: 中文法律领域大模型 DISC-LawLLM基于 Baichuan-13B 微调而得,具有法律推理和知识检索能力。
1. **[Sunsimiao](https://github.com/X-D-Lab/Sunsimiao)**: 孙思邈中文医疗大模型 Sumsimiao基于 Baichuan-7B 和 ChatGLM-6B 在中文医疗数据上微调而得。
@@ -531,6 +608,9 @@ run_name: test_run # 可选
1. **[MachineMindset](https://github.com/PKU-YuanGroup/Machine-Mindset/)**MBTI性格大模型项目根据数据集与训练方式让任意 LLM 拥有 16 个不同的性格类型。
1. **[Luminia-13B-v3](https://huggingface.co/Nekochu/Luminia-13B-v3)**:一个用于生成 Stable Diffusion 提示词的大型语言模型。[[🤗Demo]](https://huggingface.co/spaces/Nekochu/Luminia-13B_SD_Prompt)
1. **[Chinese-LLaVA-Med](https://github.com/BUAADreamer/Chinese-LLaVA-Med)**:中文多模态医学大模型,基于 LLaVA-1.5-7B 在中文多模态医疗数据上微调而得。
1. **[AutoRE](https://github.com/THUDM/AutoRE)**:基于大语言模型的文档级关系抽取系统。
1. **[NVIDIA RTX AI Toolkit](https://github.com/NVIDIA/RTX-AI-Toolkit)**:在 Windows 主机上利用英伟达 RTX 设备进行大型语言模型微调的开发包。
1. **[LazyLLM](https://github.com/LazyAGI/LazyLLM)**:一个低代码构建多 Agent 大模型应用的开发工具,支持基于 LLaMA Factory 的模型微调.
</details>
@@ -538,17 +618,19 @@ run_name: test_run # 可选
本仓库的代码依照 [Apache-2.0](LICENSE) 协议开源。
使用模型权重时,请遵循对应的模型协议:[Baichuan2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command-R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [GLM4](https://huggingface.co/THUDM/glm-4-9b/blob/main/LICENSE) / [InternLM2](https://github.com/InternLM/InternLM#license) / [LLaMA](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [LLaMA-2 (LLaVA-1.5)](https://ai.meta.com/llama/license/) / [LLaMA-3](https://llama.meta.com/llama3/license/) / [Mistral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [StarCoder2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yi-1.5](LICENSE) / [Yuan](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan)
使用模型权重时,请遵循对应的模型协议:[Baichuan 2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [GLM-4](https://huggingface.co/THUDM/glm-4-9b/blob/main/LICENSE) / [InternLM2](https://github.com/InternLM/InternLM#license) / [Llama](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [Llama 2 (LLaVA-1.5)](https://ai.meta.com/llama/license/) / [Llama 3](https://llama.meta.com/llama3/license/) / [Mistral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/Phi-2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [StarCoder 2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yi-1.5](LICENSE) / [Yuan 2](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan)
## 引用
如果您觉得此项目有帮助,请考虑以下列格式引用
```bibtex
@article{zheng2024llamafactory,
title={LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models},
author={Yaowei Zheng and Richong Zhang and Junhao Zhang and Yanhan Ye and Zheyan Luo and Yongqiang Ma},
journal={arXiv preprint arXiv:2403.13372},
@inproceedings{zheng2024llamafactory,
title={LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models},
author={Yaowei Zheng and Richong Zhang and Junhao Zhang and Yanhan Ye and Zheyan Luo and Zhangchi Feng and Yongqiang Ma},
booktitle={Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations)},
address={Bangkok, Thailand},
publisher={Association for Computational Linguistics},
year={2024},
url={http://arxiv.org/abs/2403.13372}
}

View File

@@ -11,8 +11,9 @@ Currently we support datasets in **alpaca** and **sharegpt** format.
"formatting": "the format of the dataset. (optional, default: alpaca, can be chosen from {alpaca, sharegpt})",
"ranking": "whether the dataset is a preference dataset or not. (default: False)",
"subset": "the name of the subset. (optional, default: None)",
"split": "the name of dataset split to be used. (optional, default: train)",
"folder": "the name of the folder of the dataset repository on the Hugging Face hub. (optional, default: None)",
"num_samples": "the number of samples in the dataset used for training. (optional, default: None)",
"num_samples": "the number of samples in the dataset to be used. (optional, default: None)",
"columns (optional)": {
"prompt": "the column name in the dataset containing the prompts. (default: instruction)",
"query": "the column name in the dataset containing the queries. (default: input)",

View File

@@ -11,8 +11,9 @@
"formatting": "数据集格式可选默认alpaca可以为 alpaca 或 sharegpt",
"ranking": "是否为偏好数据集可选默认False",
"subset": "数据集子集的名称可选默认None",
"split": "所使用的数据集切分可选默认train",
"folder": "Hugging Face 仓库的文件夹名称可选默认None",
"num_samples": "该数据集中用于训练的样本数量。可选默认None",
"num_samples": "该数据集所使用的样本数量。可选默认None",
"columns可选": {
"prompt": "数据集代表提示词的表头名称默认instruction",
"query": "数据集代表请求的表头名称默认input",

View File

@@ -1,23 +0,0 @@
version: '3.8'
services:
llama-factory:
build:
dockerfile: Dockerfile
context: .
container_name: llama_factory
volumes:
- ./hf_cache:/root/.cache/huggingface/
- ./data:/app/data
- ./output:/app/output
ports:
- "7860:7860"
ipc: host
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: "all"
capabilities: [gpu]
restart: unless-stopped

View File

@@ -0,0 +1,59 @@
# Use the NVIDIA official image with PyTorch 2.3.0
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-02.html
FROM nvcr.io/nvidia/pytorch:24.02-py3
# Define environments
ENV MAX_JOBS=4
ENV FLASH_ATTENTION_FORCE_BUILD=TRUE
ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
# Define installation arguments
ARG INSTALL_BNB=false
ARG INSTALL_VLLM=false
ARG INSTALL_DEEPSPEED=false
ARG INSTALL_FLASHATTN=false
ARG PIP_INDEX=https://pypi.org/simple
# Set the working directory
WORKDIR /app
# Install the requirements
COPY requirements.txt /app
RUN pip config set global.index-url "$PIP_INDEX" && \
pip config set global.extra-index-url "$PIP_INDEX" && \
python -m pip install --upgrade pip && \
python -m pip install -r requirements.txt
# Copy the rest of the application into the image
COPY . /app
# Install the LLaMA Factory
RUN EXTRA_PACKAGES="metrics"; \
if [ "$INSTALL_BNB" == "true" ]; then \
EXTRA_PACKAGES="${EXTRA_PACKAGES},bitsandbytes"; \
fi; \
if [ "$INSTALL_VLLM" == "true" ]; then \
EXTRA_PACKAGES="${EXTRA_PACKAGES},vllm"; \
fi; \
if [ "$INSTALL_DEEPSPEED" == "true" ]; then \
EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
fi; \
pip install -e ".[$EXTRA_PACKAGES]"
# Rebuild flash attention
RUN pip uninstall -y transformer-engine flash-attn && \
if [ "$INSTALL_FLASHATTN" == "true" ]; then \
pip uninstall -y ninja && pip install ninja && \
pip install --no-cache-dir flash-attn --no-build-isolation; \
fi
# Set up volumes
VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ]
# Expose port 7860 for the LLaMA Board
ENV GRADIO_SERVER_PORT 7860
EXPOSE 7860
# Expose port 8000 for the API service
ENV API_PORT 8000
EXPOSE 8000

View File

@@ -0,0 +1,32 @@
services:
llamafactory:
build:
dockerfile: ./docker/docker-cuda/Dockerfile
context: ../..
args:
INSTALL_BNB: false
INSTALL_VLLM: false
INSTALL_DEEPSPEED: false
INSTALL_FLASHATTN: false
PIP_INDEX: https://pypi.org/simple
container_name: llamafactory
volumes:
- ../../hf_cache:/root/.cache/huggingface
- ../../ms_cache:/root/.cache/modelscope
- ../../data:/app/data
- ../../output:/app/output
ports:
- "7860:7860"
- "8000:8000"
ipc: host
tty: true
stdin_open: true
command: bash
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: "all"
capabilities: [gpu]
restart: unless-stopped

View File

@@ -0,0 +1,45 @@
# Use the Ubuntu 22.04 image with CANN 8.0.rc1
# More versions can be found at https://hub.docker.com/r/cosdt/cann/tags
# FROM cosdt/cann:8.0.rc1-910-ubuntu22.04
FROM cosdt/cann:8.0.rc1-910b-ubuntu22.04
# FROM cosdt/cann:8.0.rc1-910-openeuler22.03
# FROM cosdt/cann:8.0.rc1-910b-openeuler22.03
# Define environments
ENV DEBIAN_FRONTEND=noninteractive
# Define installation arguments
ARG INSTALL_DEEPSPEED=false
ARG PIP_INDEX=https://pypi.org/simple
ARG TORCH_INDEX=https://download.pytorch.org/whl/cpu
# Set the working directory
WORKDIR /app
# Install the requirements
COPY requirements.txt /app
RUN pip config set global.index-url "$PIP_INDEX" && \
pip config set global.extra-index-url "$TORCH_INDEX" && \
python -m pip install --upgrade pip && \
python -m pip install -r requirements.txt
# Copy the rest of the application into the image
COPY . /app
# Install the LLaMA Factory
RUN EXTRA_PACKAGES="torch-npu,metrics"; \
if [ "$INSTALL_DEEPSPEED" == "true" ]; then \
EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
fi; \
pip install -e ".[$EXTRA_PACKAGES]"
# Set up volumes
VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ]
# Expose port 7860 for the LLaMA Board
ENV GRADIO_SERVER_PORT 7860
EXPOSE 7860
# Expose port 8000 for the API service
ENV API_PORT 8000
EXPOSE 8000

View File

@@ -0,0 +1,31 @@
services:
llamafactory:
build:
dockerfile: ./docker/docker-npu/Dockerfile
context: ../..
args:
INSTALL_DEEPSPEED: false
PIP_INDEX: https://pypi.org/simple
container_name: llamafactory
volumes:
- ../../hf_cache:/root/.cache/huggingface
- ../../ms_cache:/root/.cache/modelscope
- ../../data:/app/data
- ../../output:/app/output
- /usr/local/dcmi:/usr/local/dcmi
- /usr/local/bin/npu-smi:/usr/local/bin/npu-smi
- /usr/local/Ascend/driver:/usr/local/Ascend/driver
- /etc/ascend_install.info:/etc/ascend_install.info
ports:
- "7860:7860"
- "8000:8000"
ipc: host
tty: true
stdin_open: true
command: bash
devices:
- /dev/davinci0
- /dev/davinci_manager
- /dev/devmm_svm
- /dev/hisi_hdc
restart: unless-stopped

View File

@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import datasets

View File

@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import datasets

View File

@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import datasets

View File

@@ -4,59 +4,59 @@ Make sure to execute these commands in the `LLaMA-Factory` directory.
## Table of Contents
- [LoRA Fine-Tuning on A Single GPU](#lora-fine-tuning-on-a-single-gpu)
- [QLoRA Fine-Tuning on a Single GPU](#qlora-fine-tuning-on-a-single-gpu)
- [LoRA Fine-Tuning on Multiple GPUs](#lora-fine-tuning-on-multiple-gpus)
- [LoRA Fine-Tuning on Multiple NPUs](#lora-fine-tuning-on-multiple-npus)
- [Full-Parameter Fine-Tuning on Multiple GPUs](#full-parameter-fine-tuning-on-multiple-gpus)
- [LoRA Fine-Tuning](#lora-fine-tuning)
- [QLoRA Fine-Tuning](#qlora-fine-tuning)
- [Full-Parameter Fine-Tuning](#full-parameter-fine-tuning)
- [Merging LoRA Adapters and Quantization](#merging-lora-adapters-and-quantization)
- [Inferring LoRA Fine-Tuned Models](#inferring-lora-fine-tuned-models)
- [Extras](#extras)
Use `CUDA_VISIBLE_DEVICES` (GPU) or `ASCEND_RT_VISIBLE_DEVICES` (NPU) to choose computing devices.
## Examples
### LoRA Fine-Tuning on A Single GPU
### LoRA Fine-Tuning
#### (Continuous) Pre-Training
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_pretrain.yaml
llamafactory-cli train examples/train_lora/llama3_lora_pretrain.yaml
```
#### Supervised Fine-Tuning
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
```
#### Multimodal Supervised Fine-Tuning
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llava1_5_lora_sft.yaml
llamafactory-cli train examples/train_lora/llava1_5_lora_sft.yaml
```
#### Reward Modeling
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_reward.yaml
llamafactory-cli train examples/train_lora/llama3_lora_reward.yaml
```
#### PPO Training
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_ppo.yaml
llamafactory-cli train examples/train_lora/llama3_lora_ppo.yaml
```
#### DPO/ORPO/SimPO Training
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_dpo.yaml
llamafactory-cli train examples/train_lora/llama3_lora_dpo.yaml
```
#### KTO Training
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_kto.yaml
llamafactory-cli train examples/train_lora/llama3_lora_kto.yaml
```
#### Preprocess Dataset
@@ -64,95 +64,79 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lo
It is useful for large dataset, use `tokenized_path` in config to load the preprocessed dataset.
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_preprocess.yaml
llamafactory-cli train examples/train_lora/llama3_preprocess.yaml
```
#### Evaluating on MMLU/CMMLU/C-Eval Benchmarks
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli eval examples/lora_single_gpu/llama3_lora_eval.yaml
llamafactory-cli eval examples/train_lora/llama3_lora_eval.yaml
```
#### Batch Predicting and Computing BLEU and ROUGE Scores
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_predict.yaml
```
### QLoRA Fine-Tuning on a Single GPU
#### Supervised Fine-Tuning with 4/8-bit Bitsandbytes Quantization (Recommended)
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml
```
#### Supervised Fine-Tuning with 4/8-bit GPTQ Quantization
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml
```
#### Supervised Fine-Tuning with 4-bit AWQ Quantization
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_awq.yaml
```
#### Supervised Fine-Tuning with 2-bit AQLM Quantization
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml
```
### LoRA Fine-Tuning on Multiple GPUs
#### Supervised Fine-Tuning on Single Node
```bash
CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml
llamafactory-cli train examples/train_lora/llama3_lora_predict.yaml
```
#### Supervised Fine-Tuning on Multiple Nodes
```bash
CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml
CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml
FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
```
#### Supervised Fine-Tuning with DeepSpeed ZeRO-3 (Weight Sharding)
```bash
CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft_ds.yaml
FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds3.yaml
```
### LoRA Fine-Tuning on Multiple NPUs
### QLoRA Fine-Tuning
#### Supervised Fine-Tuning with DeepSpeed ZeRO-0
#### Supervised Fine-Tuning with 4/8-bit Bitsandbytes/HQQ/EETQ Quantization (Recommended)
```bash
ASCEND_RT_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_npu/llama3_lora_sft_ds.yaml
llamafactory-cli train examples/train_qlora/llama3_lora_sft_otfq.yaml
```
### Full-Parameter Fine-Tuning on Multiple GPUs
#### Supervised Fine-Tuning with 4/8-bit GPTQ Quantization
```bash
llamafactory-cli train examples/train_qlora/llama3_lora_sft_gptq.yaml
```
#### Supervised Fine-Tuning with 4-bit AWQ Quantization
```bash
llamafactory-cli train examples/train_qlora/llama3_lora_sft_awq.yaml
```
#### Supervised Fine-Tuning with 2-bit AQLM Quantization
```bash
llamafactory-cli train examples/train_qlora/llama3_lora_sft_aqlm.yaml
```
### Full-Parameter Fine-Tuning
#### Supervised Fine-Tuning on Single Node
```bash
CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml
FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/llama3_full_sft_ds3.yaml
```
#### Supervised Fine-Tuning on Multiple Nodes
```bash
CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml
CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml
FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft_ds3.yaml
FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft_ds3.yaml
```
#### Batch Predicting and Computing BLEU and ROUGE Scores
```bash
CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/full_multi_gpu/llama3_full_predict.yaml
llamafactory-cli train examples/train_full/llama3_full_predict.yaml
```
### Merging LoRA Adapters and Quantization
@@ -162,35 +146,33 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/full_multi_gpu/llam
Note: DO NOT use quantized model or `quantization_bit` when merging LoRA adapters.
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
```
#### Quantizing Model using AutoGPTQ
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_gptq.yaml
llamafactory-cli export examples/merge_lora/llama3_gptq.yaml
```
### Inferring LoRA Fine-Tuned Models
Use `CUDA_VISIBLE_DEVICES=0,1` to infer models on multiple devices.
#### Use CLI
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
```
#### Use Web UI
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli webchat examples/inference/llama3_lora_sft.yaml
llamafactory-cli webchat examples/inference/llama3_lora_sft.yaml
```
#### Launch OpenAI-style API
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli api examples/inference/llama3_lora_sft.yaml
llamafactory-cli api examples/inference/llama3_lora_sft.yaml
```
### Extras
@@ -198,36 +180,42 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli api examples/inference/llama3_lora_sft.y
#### Full-Parameter Fine-Tuning using GaLore
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/galore/llama3_full_sft.yaml
llamafactory-cli train examples/extras/galore/llama3_full_sft.yaml
```
#### Full-Parameter Fine-Tuning using BAdam
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/badam/llama3_full_sft.yaml
llamafactory-cli train examples/extras/badam/llama3_full_sft.yaml
```
#### LoRA+ Fine-Tuning
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/loraplus/llama3_lora_sft.yaml
llamafactory-cli train examples/extras/loraplus/llama3_lora_sft.yaml
```
#### PiSSA Fine-Tuning
```bash
llamafactory-cli train examples/extras/pissa/llama3_lora_sft.yaml
```
#### Mixture-of-Depths Fine-Tuning
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/mod/llama3_full_sft.yaml
llamafactory-cli train examples/extras/mod/llama3_full_sft.yaml
```
#### LLaMA-Pro Fine-Tuning
```bash
bash examples/extras/llama_pro/expand.sh
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml
llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml
```
#### FSDP+QLoRA Fine-Tuning
```bash
bash examples/extras/fsdp_qlora/single_node.sh
bash examples/extras/fsdp_qlora/train.sh
```

View File

@@ -4,59 +4,59 @@
## 目录
- [单 GPU LoRA 微调](#单-gpu-lora-微调)
- [单 GPU QLoRA 微调](#单-gpu-qlora-微调)
- [多 GPU LoRA 微调](#多-gpu-lora-微调)
- [多 NPU LoRA 微调](#多-npu-lora-微调)
- [多 GPU 全参数微调](#多-gpu-全参数微调)
- [LoRA 微调](#lora-微调)
- [QLoRA 微调](#qlora-微调)
- [全参数微调](#全参数微调)
- [合并 LoRA 适配器与模型量化](#合并-lora-适配器与模型量化)
- [推理 LoRA 模型](#推理-lora-模型)
- [杂项](#杂项)
使用 `CUDA_VISIBLE_DEVICES`GPU`ASCEND_RT_VISIBLE_DEVICES`NPU选择计算设备。
## 示例
### 单 GPU LoRA 微调
### LoRA 微调
#### (增量)预训练
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_pretrain.yaml
llamafactory-cli train examples/train_lora/llama3_lora_pretrain.yaml
```
#### 指令监督微调
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
```
#### 多模态指令监督微调
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llava1_5_lora_sft.yaml
llamafactory-cli train examples/train_lora/llava1_5_lora_sft.yaml
```
#### 奖励模型训练
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_reward.yaml
llamafactory-cli train examples/train_lora/llama3_lora_reward.yaml
```
#### PPO 训练
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_ppo.yaml
llamafactory-cli train examples/train_lora/llama3_lora_ppo.yaml
```
#### DPO/ORPO/SimPO 训练
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_dpo.yaml
llamafactory-cli train examples/train_lora/llama3_lora_dpo.yaml
```
#### KTO 训练
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_kto.yaml
llamafactory-cli train examples/train_lora/llama3_lora_kto.yaml
```
#### 预处理数据集
@@ -64,95 +64,79 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lo
对于大数据集有帮助,在配置中使用 `tokenized_path` 以加载预处理后的数据集。
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_preprocess.yaml
llamafactory-cli train examples/train_lora/llama3_preprocess.yaml
```
#### 在 MMLU/CMMLU/C-Eval 上评估
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli eval examples/lora_single_gpu/llama3_lora_eval.yaml
llamafactory-cli eval examples/train_lora/llama3_lora_eval.yaml
```
#### 批量预测并计算 BLEU 和 ROUGE 分数
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_predict.yaml
llamafactory-cli train examples/train_lora/llama3_lora_predict.yaml
```
### 单 GPU QLoRA 微调
#### 基于 4/8 比特 Bitsandbytes 量化进行指令监督微调(推荐)
#### 多机指令监督微调
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml
```
#### 基于 4/8 比特 GPTQ 量化进行指令监督微调
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml
```
#### 基于 4 比特 AWQ 量化进行指令监督微调
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_awq.yaml
```
#### 基于 2 比特 AQLM 量化进行指令监督微调
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml
```
### 多 GPU LoRA 微调
#### 在单机上进行指令监督微调
```bash
CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml
```
#### 在多机上进行指令监督微调
```bash
CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml
CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml
FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
```
#### 使用 DeepSpeed ZeRO-3 平均分配显存
```bash
CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft_ds.yaml
FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds3.yaml
```
### 多 NPU LoRA 微调
### QLoRA 微调
#### 使用 DeepSpeed ZeRO-0 进行指令监督微调
#### 基于 4/8 比特 Bitsandbytes/HQQ/EETQ 量化进行指令监督微调(推荐)
```bash
ASCEND_RT_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_npu/llama3_lora_sft_ds.yaml
llamafactory-cli train examples/train_qlora/llama3_lora_sft_otfq.yaml
```
### 多 GPU 全参数微调
#### 基于 4/8 比特 GPTQ 量化进行指令监督微调
```bash
llamafactory-cli train examples/train_qlora/llama3_lora_sft_gptq.yaml
```
#### 基于 4 比特 AWQ 量化进行指令监督微调
```bash
llamafactory-cli train examples/train_qlora/llama3_lora_sft_awq.yaml
```
#### 基于 2 比特 AQLM 量化进行指令监督微调
```bash
llamafactory-cli train examples/train_qlora/llama3_lora_sft_aqlm.yaml
```
### 全参数微调
#### 在单机上进行指令监督微调
```bash
CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml
FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/llama3_full_sft_ds3.yaml
```
#### 在多机上进行指令监督微调
```bash
CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml
CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml
FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft_ds3.yaml
FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft_ds3.yaml
```
#### 批量预测并计算 BLEU 和 ROUGE 分数
```bash
CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/full_multi_gpu/llama3_full_predict.yaml
llamafactory-cli train examples/train_full/llama3_full_predict.yaml
```
### 合并 LoRA 适配器与模型量化
@@ -162,35 +146,33 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/full_multi_gpu/llam
注:请勿使用量化后的模型或 `quantization_bit` 参数来合并 LoRA 适配器。
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
```
#### 使用 AutoGPTQ 量化模型
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_gptq.yaml
llamafactory-cli export examples/merge_lora/llama3_gptq.yaml
```
### 推理 LoRA 模型
使用 `CUDA_VISIBLE_DEVICES=0,1` 进行多卡推理。
#### 使用命令行接口
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
```
#### 使用浏览器界面
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli webchat examples/inference/llama3_lora_sft.yaml
llamafactory-cli webchat examples/inference/llama3_lora_sft.yaml
```
#### 启动 OpenAI 风格 API
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli api examples/inference/llama3_lora_sft.yaml
llamafactory-cli api examples/inference/llama3_lora_sft.yaml
```
### 杂项
@@ -198,36 +180,42 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli api examples/inference/llama3_lora_sft.y
#### 使用 GaLore 进行全参数训练
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/galore/llama3_full_sft.yaml
llamafactory-cli train examples/extras/galore/llama3_full_sft.yaml
```
#### 使用 BAdam 进行全参数训练
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/badam/llama3_full_sft.yaml
llamafactory-cli train examples/extras/badam/llama3_full_sft.yaml
```
#### LoRA+ 微调
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/loraplus/llama3_lora_sft.yaml
llamafactory-cli train examples/extras/loraplus/llama3_lora_sft.yaml
```
#### PiSSA 微调
```bash
llamafactory-cli train examples/extras/pissa/llama3_lora_sft.yaml
```
#### 深度混合微调
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/mod/llama3_full_sft.yaml
llamafactory-cli train examples/extras/mod/llama3_full_sft.yaml
```
#### LLaMA-Pro 微调
```bash
bash examples/extras/llama_pro/expand.sh
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml
llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml
```
#### FSDP+QLoRA 微调
```bash
bash examples/extras/fsdp_qlora/single_node.sh
bash examples/extras/fsdp_qlora/train.sh
```

View File

@@ -5,10 +5,11 @@ model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
stage: sft
do_train: true
finetuning_type: full
### ddp
ddp_timeout: 180000000
deepspeed: examples/deepspeed/ds_z3_config.json
use_badam: true
badam_mode: layer
badam_switch_mode: ascending
badam_switch_interval: 50
badam_verbose: 2
### dataset
dataset: identity,alpaca_en_demo
@@ -27,12 +28,11 @@ overwrite_output_dir: true
### train
per_device_train_batch_size: 1
gradient_accumulation_steps: 2
gradient_accumulation_steps: 8
learning_rate: 1.0e-4
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
### eval
val_size: 0.1

View File

@@ -6,9 +6,11 @@ stage: sft
do_train: true
finetuning_type: full
use_badam: true
badam_mode: layer
badam_switch_mode: ascending
badam_switch_interval: 50
badam_verbose: 2
deepspeed: examples/deepspeed/ds_z3_config.json
### dataset
dataset: identity,alpaca_en_demo
@@ -32,7 +34,6 @@ learning_rate: 1.0e-4
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
pure_bf16: true
### eval
val_size: 0.1

View File

@@ -8,9 +8,6 @@ do_train: true
finetuning_type: lora
lora_target: all
### ddp
ddp_timeout: 180000000
### dataset
dataset: identity,alpaca_en_demo
template: llama3
@@ -33,7 +30,8 @@ learning_rate: 1.0e-4
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
bf16: true
ddp_timeout: 180000000
### eval
val_size: 0.1

View File

@@ -31,7 +31,8 @@ learning_rate: 1.0e-4
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
bf16: true
ddp_timeout: 180000000
### eval
val_size: 0.1

View File

@@ -30,7 +30,8 @@ learning_rate: 1.0e-4
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
bf16: true
ddp_timeout: 180000000
### eval
val_size: 0.1

View File

@@ -31,6 +31,7 @@ num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
pure_bf16: true
ddp_timeout: 180000000
### eval
val_size: 0.1

View File

@@ -1,12 +1,14 @@
### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
quantization_bit: 4
### method
stage: sft
do_train: true
finetuning_type: lora
lora_target: all
pissa_init: true
pissa_iter: 16
pissa_convert: true
### dataset
dataset: identity,alpaca_en_demo
@@ -30,7 +32,8 @@ learning_rate: 1.0e-4
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
bf16: true
ddp_timeout: 180000000
### eval
val_size: 0.1

View File

@@ -0,0 +1,3 @@
model_name_or_path: llava-hf/llava-1.5-7b-hf
template: vicuna
visual_inputs: true

View File

@@ -4,11 +4,8 @@ model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
### method
stage: sft
do_train: true
finetuning_type: lora
lora_target: all
### ddp
ddp_timeout: 180000000
finetuning_type: full
deepspeed: examples/deepspeed/ds_z3_config.json
### dataset
dataset: identity,alpaca_en_demo
@@ -19,7 +16,7 @@ overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: saves/llama3-8b/lora/sft
output_dir: saves/llama3-8b/full/sft
logging_steps: 10
save_steps: 500
plot_loss: true
@@ -32,7 +29,8 @@ learning_rate: 1.0e-4
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
bf16: true
ddp_timeout: 180000000
### eval
val_size: 0.1

View File

@@ -7,7 +7,7 @@ do_train: true
finetuning_type: lora
lora_target: all
pref_beta: 0.1
pref_loss: sigmoid # [sigmoid (dpo), orpo, simpo]
pref_loss: sigmoid # choices: [sigmoid (dpo), orpo, simpo]
### dataset
dataset: dpo_en_demo
@@ -31,7 +31,8 @@ learning_rate: 5.0e-6
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
bf16: true
ddp_timeout: 180000000
### eval
val_size: 0.1

View File

@@ -6,8 +6,7 @@ adapter_name_or_path: saves/llama3-8b/lora/sft
finetuning_type: lora
### dataset
task: mmlu
split: test
task: mmlu_test # choices: [mmlu_test, ceval_validation, cmmlu_test]
template: fewshot
lang: en
n_shot: 5

View File

@@ -6,6 +6,7 @@ stage: kto
do_train: true
finetuning_type: lora
lora_target: all
pref_beta: 0.1
### dataset
dataset: kto_en_demo
@@ -29,7 +30,8 @@ learning_rate: 5.0e-6
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
bf16: true
ddp_timeout: 180000000
### eval
val_size: 0.1

View File

@@ -30,7 +30,8 @@ learning_rate: 1.0e-5
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
bf16: true
ddp_timeout: 180000000
### generate
max_new_tokens: 512

View File

@@ -8,7 +8,7 @@ do_predict: true
finetuning_type: lora
### dataset
dataset: identity,alpaca_en_demo
eval_dataset: identity,alpaca_en_demo
template: llama3
cutoff_len: 1024
max_samples: 50
@@ -22,3 +22,4 @@ overwrite_output_dir: true
### eval
per_device_eval_batch_size: 1
predict_with_generate: true
ddp_timeout: 180000000

View File

@@ -15,7 +15,7 @@ overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: saves/llama3-8b/lora/sft
output_dir: saves/llama3-8b/lora/pretrain
logging_steps: 10
save_steps: 500
plot_loss: true
@@ -28,7 +28,8 @@ learning_rate: 1.0e-4
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
bf16: true
ddp_timeout: 180000000
### eval
val_size: 0.1

View File

@@ -25,11 +25,12 @@ overwrite_output_dir: true
### train
per_device_train_batch_size: 1
gradient_accumulation_steps: 8
learning_rate: 1.0e-5
learning_rate: 1.0e-4
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
bf16: true
ddp_timeout: 180000000
### eval
val_size: 0.1

View File

@@ -29,7 +29,8 @@ learning_rate: 1.0e-4
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
bf16: true
ddp_timeout: 180000000
### eval
val_size: 0.1

View File

@@ -6,9 +6,6 @@ stage: sft
do_train: true
finetuning_type: lora
lora_target: all
### ddp
ddp_timeout: 180000000
deepspeed: examples/deepspeed/ds_z0_config.json
### dataset
@@ -33,7 +30,8 @@ learning_rate: 1.0e-4
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
bf16: true
ddp_timeout: 180000000
### eval
val_size: 0.1

View File

@@ -6,9 +6,6 @@ stage: sft
do_train: true
finetuning_type: lora
lora_target: all
### ddp
ddp_timeout: 180000000
deepspeed: examples/deepspeed/ds_z3_config.json
### dataset
@@ -33,7 +30,8 @@ learning_rate: 1.0e-4
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
bf16: true
ddp_timeout: 180000000
### eval
val_size: 0.1

View File

@@ -30,7 +30,8 @@ learning_rate: 1.0e-4
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
bf16: true
ddp_timeout: 180000000
### eval
val_size: 0.1

View File

@@ -29,7 +29,8 @@ learning_rate: 1.0e-4
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
bf16: true
ddp_timeout: 180000000
### eval
val_size: 0.1

View File

@@ -29,7 +29,8 @@ learning_rate: 1.0e-4
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
bf16: true
ddp_timeout: 180000000
### eval
val_size: 0.1

View File

@@ -29,7 +29,8 @@ learning_rate: 1.0e-4
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
bf16: true
ddp_timeout: 180000000
### eval
val_size: 0.1

View File

@@ -0,0 +1,41 @@
### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
quantization_bit: 4
quantization_method: bitsandbytes # choices: [bitsandbytes (4/8), hqq (2/3/4/5/6/8), eetq (8)]
### method
stage: sft
do_train: true
finetuning_type: lora
lora_target: all
### dataset
dataset: identity,alpaca_en_demo
template: llama3
cutoff_len: 1024
max_samples: 1000
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: saves/llama3-8b/lora/sft
logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
### train
per_device_train_batch_size: 1
gradient_accumulation_steps: 8
learning_rate: 1.0e-4
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
bf16: true
ddp_timeout: 180000000
### eval
val_size: 0.1
per_device_eval_batch_size: 1
eval_strategy: steps
eval_steps: 500

View File

@@ -4,6 +4,7 @@ accelerate>=0.30.1
peft>=0.11.1
trl>=0.8.6
gradio>=4.0.0
pandas>=2.0.0
scipy
einops
sentencepiece
@@ -17,3 +18,4 @@ matplotlib>=3.7.0
fire
packaging
pyyaml
numpy<2.0.0

View File

@@ -1,7 +1,20 @@
# coding=utf-8
# Calculates the flops of pre-trained models.
# Usage: python cal_flops.py --model_name_or_path path_to_model --batch_size 1 --seq_length 512
# Inspired by: https://www.deepspeed.ai/tutorials/flops-profiler/
# Copyright 2024 Microsoft Corporation and the LlamaFactory team.
#
# This code is inspired by the Microsoft's DeepSpeed library.
# https://www.deepspeed.ai/tutorials/flops-profiler/
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import fire
import torch
@@ -17,6 +30,10 @@ def calculate_flops(
seq_length: int = 256,
flash_attn: str = "auto",
):
r"""
Calculates the flops of pre-trained models.
Usage: python cal_flops.py --model_name_or_path path_to_model --batch_size 1 --seq_length 512
"""
with get_accelerator().device(0):
chat_model = ChatModel(dict(model_name_or_path=model_name_or_path, template="empty", flash_attn=flash_attn))
fake_input = torch.ones((batch_size, seq_length), dtype=torch.long, device=chat_model.model.device)

View File

@@ -1,7 +1,20 @@
# coding=utf-8
# Calculates the optimal learning rate for 7B/13B models using LLaMA's hyper-parameters.
# Usage: python cal_lr.py --model_name_or_path path_to_model --dataset alpaca_en --cutoff_len 1024 --batch_size 16
# Inspired by: https://github.com/imoneoi/openchat/blob/master/ochat/training_deepspeed/train.py
# Copyright 2024 imoneoi and the LlamaFactory team.
#
# This code is inspired by the imoneoi's OpenChat library.
# https://github.com/imoneoi/openchat/blob/3.6.0/ochat/training_deepspeed/train.py
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
from typing import Literal
@@ -31,7 +44,12 @@ def calculate_lr(
template: str = "default",
cutoff_len: int = 1024, # i.e. maximum input length during training
is_mistral: bool = False, # mistral model uses a smaller learning rate,
packing: bool = False,
):
r"""
Calculates the optimal learning rate for 7B/13B models using LLaMA's hyper-parameters.
Usage: python cal_lr.py --model_name_or_path path_to_model --dataset alpaca_en --cutoff_len 1024 --batch_size 16
"""
model_args, data_args, training_args, _, _ = get_train_args(
dict(
stage=stage,
@@ -40,19 +58,21 @@ def calculate_lr(
dataset_dir=dataset_dir,
template=template,
cutoff_len=cutoff_len,
packing=packing,
output_dir="dummy_dir",
overwrite_cache=True,
do_train=True,
)
)
tokenizer_module = load_tokenizer(model_args)
tokenizer = tokenizer_module["tokenizer"]
trainset = get_dataset(model_args, data_args, training_args, stage, **tokenizer_module)
trainset = get_dataset(model_args, data_args, training_args, stage, **tokenizer_module)["train_dataset"]
if stage == "pt":
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
elif stage == "sft":
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, label_pad_token_id=IGNORE_INDEX)
else:
raise NotImplementedError
raise NotImplementedError("Stage does not supported: {}.".format(stage))
dataloader = DataLoader(trainset, batch_size, shuffle=False, collate_fn=data_collator, pin_memory=True)
valid_tokens, total_tokens = 0, 0

View File

@@ -1,6 +1,17 @@
# coding=utf-8
# Calculates the ppl on the dataset of the pre-trained models.
# Usage: python cal_ppl.py --model_name_or_path path_to_model --save_name ppl.json
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
from dataclasses import dataclass
@@ -56,6 +67,10 @@ def cal_ppl(
max_samples: Optional[int] = None,
train_on_prompt: bool = False,
):
r"""
Calculates the ppl on the dataset of the pre-trained models.
Usage: python cal_ppl.py --model_name_or_path path_to_model --save_name ppl.json
"""
model_args, data_args, training_args, finetuning_args, _ = get_train_args(
dict(
stage=stage,
@@ -68,11 +83,12 @@ def cal_ppl(
train_on_prompt=train_on_prompt,
output_dir="dummy_dir",
overwrite_cache=True,
do_train=True,
)
)
tokenizer_module = load_tokenizer(model_args)
tokenizer = tokenizer_module["tokenizer"]
trainset = get_dataset(model_args, data_args, training_args, stage, **tokenizer_module)
trainset = get_dataset(model_args, data_args, training_args, stage, **tokenizer_module)["train_dataset"]
model = load_model(tokenizer, model_args, finetuning_args, is_trainable=False)
if stage == "pt":
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
@@ -83,7 +99,7 @@ def cal_ppl(
tokenizer=tokenizer, label_pad_token_id=IGNORE_INDEX, train_on_prompt=train_on_prompt
)
else:
raise NotImplementedError
raise NotImplementedError("Stage does not supported: {}.".format(stage))
dataloader = DataLoader(trainset, batch_size, shuffle=False, collate_fn=data_collator, pin_memory=True)
criterion = torch.nn.CrossEntropyLoss(reduction="none")

View File

@@ -1,6 +1,17 @@
# coding=utf-8
# Calculates the distribution of the input lengths in the dataset.
# Usage: python length_cdf.py --model_name_or_path path_to_model --dataset alpaca_en --template default
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import defaultdict
@@ -19,6 +30,10 @@ def length_cdf(
template: str = "default",
interval: int = 1000,
):
r"""
Calculates the distribution of the input lengths in the dataset.
Usage: python length_cdf.py --model_name_or_path path_to_model --dataset alpaca_en --template default
"""
model_args, data_args, training_args, _, _ = get_train_args(
dict(
stage="sft",
@@ -29,10 +44,11 @@ def length_cdf(
cutoff_len=1_000_000,
output_dir="dummy_dir",
overwrite_cache=True,
do_train=True,
)
)
tokenizer_module = load_tokenizer(model_args)
trainset = get_dataset(model_args, data_args, training_args, stage="sft", **tokenizer_module)
trainset = get_dataset(model_args, data_args, training_args, stage="sft", **tokenizer_module)["train_dataset"]
total_num = len(trainset)
length_dict = defaultdict(int)
for sample in tqdm(trainset["input_ids"]):

View File

@@ -1,7 +1,20 @@
# coding=utf-8
# Performs block expansion for LLaMA, Mistral, Qwen1.5 or Yi models.
# Usage: python llama_pro.py --model_name_or_path meta-llama/Llama-2-7b-hf --output_dir llama2_pro --num_expand 8
# Inspired by: https://github.com/TencentARC/LLaMA-Pro/blob/main/scripts/block_expansion.py
# Copyright 2024 Tencent Inc. and the LlamaFactory team.
#
# This code is inspired by the Tencent's LLaMA-Pro library.
# https://github.com/TencentARC/LLaMA-Pro/blob/main/scripts/block_expansion.py
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import os
@@ -37,6 +50,10 @@ def block_expansion(
shard_size: Optional[str] = "2GB",
save_safetensors: Optional[bool] = False,
):
r"""
Performs block expansion for LLaMA, Mistral, Qwen1.5 or Yi models.
Usage: python llama_pro.py --model_name_or_path meta-llama/Llama-2-7b-hf --output_dir llama2_pro --num_expand 8
"""
config: "PretrainedConfig" = AutoConfig.from_pretrained(model_name_or_path)
num_layers = getattr(config, "num_hidden_layers")
setattr(config, "num_hidden_layers", num_layers + num_expand)
@@ -103,7 +120,7 @@ def block_expansion(
json.dump(index, f, indent=2, sort_keys=True)
print("Model weights saved in {}".format(output_dir))
print("Fine-tune this model with:")
print("- Fine-tune this model with:")
print("model_name_or_path: {}".format(output_dir))
print("finetuning_type: freeze")
print("freeze_trainable_layers: {}".format(num_expand))

View File

@@ -1,8 +1,17 @@
# coding=utf-8
# Converts the Baichuan2-7B model in the same format as LLaMA2-7B.
# Usage: python llamafy_baichuan2.py --input_dir input --output_dir output
# Inspired by: https://huggingface.co/fireballoon/baichuan-llama-7b/blob/main/convert_baichuan_to_llama.py
# Converted model: https://huggingface.co/hiyouga/Baichuan2-7B-Base-LLaMAfied
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import os
@@ -79,6 +88,11 @@ def save_config(input_dir: str, output_dir: str):
def llamafy_baichuan2(
input_dir: str, output_dir: str, shard_size: Optional[str] = "2GB", save_safetensors: Optional[bool] = False
):
r"""
Converts the Baichuan2-7B model in the same format as LLaMA2-7B.
Usage: python llamafy_baichuan2.py --input_dir input --output_dir output
Converted model: https://huggingface.co/hiyouga/Baichuan2-7B-Base-LLaMAfied
"""
try:
os.makedirs(output_dir, exist_ok=False)
except Exception as e:

View File

@@ -1,7 +1,17 @@
# coding=utf-8
# Converts the Qwen models in the same format as LLaMA2.
# Usage: python llamafy_qwen.py --input_dir input --output_dir output
# Converted model: https://huggingface.co/hiyouga/Qwen-14B-Chat-LLaMAfied
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import os
@@ -131,6 +141,11 @@ def save_config(input_dir: str, output_dir: str, torch_dtype: str):
def llamafy_qwen(
input_dir: str, output_dir: str, shard_size: Optional[str] = "2GB", save_safetensors: Optional[bool] = False
):
r"""
Converts the Qwen models in the same format as LLaMA2.
Usage: python llamafy_qwen.py --input_dir input --output_dir output
Converted model: https://huggingface.co/hiyouga/Qwen-14B-Chat-LLaMAfied
"""
try:
os.makedirs(output_dir, exist_ok=False)
except Exception as e:

View File

@@ -1,14 +1,25 @@
# coding=utf-8
# Initializes LoRA weights with LoRA-fine-tuning-aware Quantization (LoftQ)
# Usage: python loftq_init.py --model_name_or_path path_to_model --save_dir output_dir
# Inspired by: https://github.com/huggingface/peft/blob/main/examples/loftq_finetuning/quantize_save_load.py
# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
#
# This code is based on the HuggingFace's PEFT library.
# https://github.com/huggingface/peft/blob/v0.10.0/examples/loftq_finetuning/quantize_save_load.py
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from typing import TYPE_CHECKING, Optional
from typing import TYPE_CHECKING
import fire
import torch
import torch.nn as nn
from peft import LoftQConfig, LoraConfig, TaskType, get_peft_model
from transformers import AutoModelForCausalLM, AutoTokenizer
@@ -17,65 +28,61 @@ if TYPE_CHECKING:
from transformers import PreTrainedModel
class Shell(nn.Module):
def __init__(self, weight: torch.Tensor, bias: Optional[torch.Tensor] = None):
super().__init__()
self.weight = nn.Parameter(weight, requires_grad=False)
if bias is not None:
self.bias = nn.Parameter(bias, requires_grad=False)
def unwrap_model(model: nn.Module, pattern=".base_layer") -> None:
for name in {k.split(pattern)[0] for k, _ in model.named_modules() if pattern in k}:
parent_name = ".".join(name.split(".")[:-1])
child_name = name.split(".")[-1]
parent_module = model.get_submodule(parent_name)
child_module = getattr(parent_module, child_name)
base_layer = getattr(child_module, "base_layer")
weight = getattr(base_layer, "weight", None)
bias = getattr(base_layer, "bias", None)
setattr(parent_module, child_name, Shell(weight, bias))
print("Model unwrapped.")
def quantize_loftq(
model_name_or_path: str,
save_dir: str,
loftq_bits: Optional[int] = 4,
loftq_iter: Optional[int] = 1,
lora_alpha: Optional[int] = None,
lora_rank: Optional[int] = 16,
lora_target: Optional[str] = "q_proj,v_proj",
save_safetensors: Optional[bool] = False,
output_dir: str,
loftq_bits: int = 4,
loftq_iter: int = 4,
lora_alpha: int = None,
lora_rank: int = 16,
lora_dropout: float = 0,
lora_target: tuple = ("q_proj", "v_proj"),
save_safetensors: bool = True,
):
r"""
Initializes LoRA weights with LoRA-fine-tuning-aware Quantization (LoftQ)
Usage: python loftq_init.py --model_name_or_path path_to_model --output_dir output_dir
"""
if isinstance(lora_target, str):
lora_target = [name.strip() for name in lora_target.split(",")]
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True, torch_dtype="auto")
loftq_config = LoftQConfig(loftq_bits=loftq_bits, loftq_iter=loftq_iter)
lora_config = LoraConfig(
task_type=TaskType.CAUSAL_LM,
inference_mode=True,
r=lora_rank,
lora_alpha=lora_alpha if lora_alpha is not None else lora_rank * 2,
lora_dropout=0.1,
target_modules=[name.strip() for name in lora_target.split(",")],
lora_dropout=lora_dropout,
target_modules=lora_target,
init_lora_weights="loftq",
loftq_config=loftq_config,
)
# Init LoftQ model
lora_model = get_peft_model(model, lora_config)
base_model: "PreTrainedModel" = lora_model.get_base_model()
print("Initializing LoftQ weights, it may be take several minutes, wait patiently.")
peft_model = get_peft_model(model, lora_config)
loftq_dir = os.path.join(output_dir, "loftq_init")
# Save LoftQ model
setattr(lora_model.base_model.peft_config["default"], "base_model_name_or_path", save_dir)
setattr(lora_model.base_model.peft_config["default"], "init_lora_weights", True)
lora_model.save_pretrained(os.path.join(save_dir, "adapters"), safe_serialization=save_safetensors)
setattr(peft_model.peft_config["default"], "base_model_name_or_path", output_dir)
setattr(peft_model.peft_config["default"], "init_lora_weights", True) # don't apply loftq again
peft_model.save_pretrained(loftq_dir, safe_serialization=save_safetensors)
print("Adapter weights saved in {}".format(loftq_dir))
# Save base model
unwrap_model(base_model)
base_model.save_pretrained(save_dir, safe_serialization=save_safetensors)
tokenizer.save_pretrained(save_dir)
base_model: "PreTrainedModel" = peft_model.unload()
base_model.save_pretrained(output_dir, safe_serialization=save_safetensors)
tokenizer.save_pretrained(output_dir)
print("Model weights saved in {}".format(output_dir))
print("- Fine-tune this model with:")
print("model_name_or_path: {}".format(output_dir))
print("adapter_name_or_path: {}".format(loftq_dir))
print("finetuning_type: lora")
print("quantization_bit: {}".format(loftq_bits))
if __name__ == "__main__":

86
scripts/pissa_init.py Normal file
View File

@@ -0,0 +1,86 @@
# coding=utf-8
# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
#
# This code is based on the HuggingFace's PEFT library.
# https://github.com/huggingface/peft/blob/v0.11.0/examples/pissa_finetuning/preprocess.py
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from typing import TYPE_CHECKING
import fire
from peft import LoraConfig, TaskType, get_peft_model
from transformers import AutoModelForCausalLM, AutoTokenizer
if TYPE_CHECKING:
from transformers import PreTrainedModel
def quantize_pissa(
model_name_or_path: str,
output_dir: str,
pissa_iter: int = 4,
lora_alpha: int = None,
lora_rank: int = 16,
lora_dropout: float = 0,
lora_target: tuple = ("q_proj", "v_proj"),
save_safetensors: bool = True,
):
r"""
Initializes LoRA weights with Principal Singular values and Singular vectors Adaptation (PiSSA)
Usage: python pissa_init.py --model_name_or_path path_to_model --output_dir output_dir
"""
if isinstance(lora_target, str):
lora_target = [name.strip() for name in lora_target.split(",")]
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True, torch_dtype="auto")
lora_config = LoraConfig(
task_type=TaskType.CAUSAL_LM,
r=lora_rank,
lora_alpha=lora_alpha if lora_alpha is not None else lora_rank * 2,
lora_dropout=lora_dropout,
target_modules=lora_target,
init_lora_weights="pissa" if pissa_iter == -1 else "pissa_niter_{}".format(pissa_iter),
)
# Init PiSSA model
peft_model = get_peft_model(model, lora_config)
pissa_dir = os.path.join(output_dir, "pissa_init")
# Save PiSSA model
setattr(peft_model.peft_config["default"], "init_lora_weights", True) # don't apply pissa again
peft_model.save_pretrained(pissa_dir, safe_serialization=save_safetensors)
print("Adapter weights saved in {}".format(pissa_dir))
# Save base model
base_model: "PreTrainedModel" = peft_model.unload()
base_model.save_pretrained(output_dir, safe_serialization=save_safetensors)
tokenizer.save_pretrained(output_dir)
print("Model weights saved in {}".format(output_dir))
print("- Fine-tune this model with:")
print("model_name_or_path: {}".format(output_dir))
print("adapter_name_or_path: {}".format(pissa_dir))
print("finetuning_type: lora")
print("pissa_init: false")
print("pissa_convert: true")
print("- and optionally with:")
print("quantization_bit: 4")
if __name__ == "__main__":
fire.Fire(quantize_pissa)

View File

@@ -1,3 +1,18 @@
# coding=utf-8
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import os
from typing import Sequence

View File

@@ -1,3 +1,17 @@
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import re
@@ -23,14 +37,16 @@ extra_require = {
"torch": ["torch>=1.13.1"],
"torch-npu": ["torch==2.1.0", "torch-npu==2.1.0.post3", "decorator"],
"metrics": ["nltk", "jieba", "rouge-chinese"],
"deepspeed": ["deepspeed>=0.10.0,<=0.14.0"],
"deepspeed": ["deepspeed>=0.10.0"],
"bitsandbytes": ["bitsandbytes>=0.39.0"],
"vllm": ["vllm>=0.4.3"],
"galore": ["galore-torch"],
"badam": ["badam"],
"gptq": ["optimum>=1.16.0", "auto-gptq>=0.5.0"],
"hqq": ["hqq"],
"eetq": ["eetq"],
"gptq": ["optimum>=1.17.0", "auto-gptq>=0.5.0"],
"awq": ["autoawq"],
"aqlm": ["aqlm[gpu]>=1.1.0"],
"vllm": ["vllm>=0.4.3"],
"galore": ["galore-torch"],
"badam": ["badam>=1.2.1"],
"qwen": ["transformers_stream_generator"],
"modelscope": ["modelscope"],
"dev": ["ruff", "pytest"],

View File

@@ -1,3 +1,17 @@
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import uvicorn

View File

@@ -1,4 +1,39 @@
# Level: api, webui > chat, eval, train > data, model > hparams > extras
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
r"""
Efficient fine-tuning of large language models.
Level:
api, webui > chat, eval, train > data, model > hparams > extras
Dependency graph:
main:
transformers>=4.41.2
datasets>=2.16.0
accelerate>=0.30.1
peft>=0.11.1
trl>=0.8.6
attention:
transformers>=4.42.4 (gemma+fa2)
longlora:
transformers>=4.41.2,<=4.42.4
packing:
transformers>=4.41.2,<=4.42.4
patcher:
transformers==4.41.2 (chatglm)
"""
from .cli import VERSION

View File

@@ -1,3 +1,17 @@
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from contextlib import asynccontextmanager
from typing import Optional

View File

@@ -1,3 +1,17 @@
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import base64
import io
import json
@@ -78,9 +92,11 @@ def _process_request(
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid role")
if message.role == Role.ASSISTANT and isinstance(message.tool_calls, list) and len(message.tool_calls):
name = message.tool_calls[0].function.name
arguments = message.tool_calls[0].function.arguments
content = json.dumps({"name": name, "argument": arguments}, ensure_ascii=False)
tool_calls = [
{"name": tool_call.function.name, "arguments": tool_call.function.arguments}
for tool_call in message.tool_calls
]
content = json.dumps(tool_calls, ensure_ascii=False)
input_messages.append({"role": ROLE_MAPPING[Role.FUNCTION], "content": content})
elif isinstance(message.content, list):
for input_item in message.content:
@@ -104,7 +120,7 @@ def _process_request(
if isinstance(tool_list, list) and len(tool_list):
try:
tools = json.dumps([dictify(tool.function) for tool in tool_list], ensure_ascii=False)
except Exception:
except json.JSONDecodeError:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid tools")
else:
tools = None
@@ -146,15 +162,17 @@ async def create_chat_completion_response(
choices = []
for i, response in enumerate(responses):
if tools:
result = chat_model.engine.template.format_tools.extract(response.response_text)
result = chat_model.engine.template.extract_tool(response.response_text)
else:
result = response.response_text
if isinstance(result, tuple):
name, arguments = result
function = Function(name=name, arguments=arguments)
tool_call = FunctionCall(id="call_{}".format(uuid.uuid4().hex), function=function)
response_message = ChatCompletionMessage(role=Role.ASSISTANT, tool_calls=[tool_call])
if isinstance(result, list):
tool_calls = []
for tool in result:
function = Function(name=tool[0], arguments=tool[1])
tool_calls.append(FunctionCall(id="call_{}".format(uuid.uuid4().hex), function=function))
response_message = ChatCompletionMessage(role=Role.ASSISTANT, tool_calls=tool_calls)
finish_reason = Finish.TOOL
else:
response_message = ChatCompletionMessage(role=Role.ASSISTANT, content=result)

View File

@@ -1,3 +1,17 @@
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
from typing import TYPE_CHECKING, Any, Dict

View File

@@ -1,3 +1,17 @@
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import time
from enum import Enum, unique
from typing import Any, Dict, List, Optional, Union
@@ -82,7 +96,7 @@ class ChatCompletionRequest(BaseModel):
model: str
messages: List[ChatMessage]
tools: Optional[List[FunctionAvailable]] = None
do_sample: bool = True
do_sample: Optional[bool] = None
temperature: Optional[float] = None
top_p: Optional[float] = None
n: int = 1

View File

@@ -1,3 +1,17 @@
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .base_engine import BaseEngine
from .chat_model import ChatModel

View File

@@ -1,3 +1,17 @@
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any, AsyncGenerator, Dict, List, Literal, Optional, Sequence, Union
@@ -36,11 +50,6 @@ class BaseEngine(ABC):
generating_args: "GeneratingArguments",
) -> None: ...
@abstractmethod
async def start(
self,
) -> None: ...
@abstractmethod
async def chat(
self,

View File

@@ -1,4 +1,22 @@
# Copyright 2024 THUDM and the LlamaFactory team.
#
# This code is inspired by the THUDM's ChatGLM implementation.
# https://github.com/THUDM/ChatGLM-6B/blob/main/cli_demo.py
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import asyncio
import os
from threading import Thread
from typing import TYPE_CHECKING, Any, AsyncGenerator, Dict, Generator, List, Optional, Sequence
@@ -14,7 +32,7 @@ if TYPE_CHECKING:
from .base_engine import BaseEngine, Response
def _start_background_loop(loop: asyncio.AbstractEventLoop) -> None:
def _start_background_loop(loop: "asyncio.AbstractEventLoop") -> None:
asyncio.set_event_loop(loop)
loop.run_forever()
@@ -32,7 +50,6 @@ class ChatModel:
self._loop = asyncio.new_event_loop()
self._thread = Thread(target=_start_background_loop, args=(self._loop,), daemon=True)
self._thread.start()
asyncio.run_coroutine_threadsafe(self.engine.start(), self._loop)
def chat(
self,
@@ -99,13 +116,11 @@ class ChatModel:
def run_chat() -> None:
try:
import platform
if platform.system() != "Windows":
if os.name != "nt":
try:
import readline # noqa: F401
except ImportError:
print("Install `readline` for a better experience.")
except ImportError:
print("Install `readline` for a better experience.")
chat_model = ChatModel()
messages = []

View File

@@ -1,3 +1,17 @@
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import asyncio
import concurrent.futures
import os
@@ -40,11 +54,19 @@ class HuggingfaceEngine(BaseEngine):
self.tokenizer = tokenizer_module["tokenizer"]
self.processor = tokenizer_module["processor"]
self.tokenizer.padding_side = "left" if self.can_generate else "right"
self.template = get_template_and_fix_tokenizer(self.tokenizer, data_args.template)
self.template = get_template_and_fix_tokenizer(self.tokenizer, data_args.template, data_args.tool_format)
self.model = load_model(
self.tokenizer, model_args, finetuning_args, is_trainable=False, add_valuehead=(not self.can_generate)
) # must after fixing tokenizer to resize vocab
self.generating_args = generating_args.to_dict()
try:
asyncio.get_event_loop()
except RuntimeError:
logger.warning("There is no current event loop, creating a new one.")
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
self.semaphore = asyncio.Semaphore(int(os.environ.get("MAX_CONCURRENT", "1")))
@staticmethod
def _process_args(
@@ -97,7 +119,7 @@ class HuggingfaceEngine(BaseEngine):
stop: Optional[Union[str, List[str]]] = input_kwargs.pop("stop", None)
if stop is not None:
logger.warning("Stop parameter is not supported in Huggingface engine yet.")
logger.warning("Stop parameter is not supported by the huggingface engine yet.")
generating_args = generating_args.copy()
generating_args.update(
@@ -245,9 +267,6 @@ class HuggingfaceEngine(BaseEngine):
return scores
async def start(self) -> None:
self._semaphore = asyncio.Semaphore(int(os.environ.get("MAX_CONCURRENT", 1)))
async def chat(
self,
messages: Sequence[Dict[str, str]],
@@ -272,7 +291,7 @@ class HuggingfaceEngine(BaseEngine):
image,
input_kwargs,
)
async with self._semaphore:
async with self.semaphore:
with concurrent.futures.ThreadPoolExecutor() as pool:
return await loop.run_in_executor(pool, self._chat, *input_args)
@@ -300,7 +319,7 @@ class HuggingfaceEngine(BaseEngine):
image,
input_kwargs,
)
async with self._semaphore:
async with self.semaphore:
with concurrent.futures.ThreadPoolExecutor() as pool:
stream = self._stream_chat(*input_args)
while True:
@@ -319,6 +338,6 @@ class HuggingfaceEngine(BaseEngine):
loop = asyncio.get_running_loop()
input_args = (self.model, self.tokenizer, batch_input, input_kwargs)
async with self._semaphore:
async with self.semaphore:
with concurrent.futures.ThreadPoolExecutor() as pool:
return await loop.run_in_executor(pool, self._get_scores, *input_args)

View File

@@ -1,11 +1,26 @@
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import uuid
from typing import TYPE_CHECKING, AsyncGenerator, AsyncIterator, Dict, List, Optional, Sequence, Union
from typing import TYPE_CHECKING, Any, AsyncGenerator, AsyncIterator, Dict, List, Optional, Sequence, Union
from ..data import get_template_and_fix_tokenizer
from ..extras.logging import get_logger
from ..extras.misc import get_device_count
from ..extras.packages import is_vllm_available
from ..extras.packages import is_vllm_available, is_vllm_version_greater_than_0_5, is_vllm_version_greater_than_0_5_1
from ..model import load_config, load_tokenizer
from ..model.model_utils.quantization import QuantizationMethod
from ..model.model_utils.visual import LlavaMultiModalProjectorForYiVLForVLLM
from .base_engine import BaseEngine, Response
@@ -13,7 +28,13 @@ from .base_engine import BaseEngine, Response
if is_vllm_available():
from vllm import AsyncEngineArgs, AsyncLLMEngine, RequestOutput, SamplingParams
from vllm.lora.request import LoRARequest
from vllm.sequence import MultiModalData
if is_vllm_version_greater_than_0_5_1():
pass
elif is_vllm_version_greater_than_0_5():
from vllm.multimodal.image import ImagePixelData
else:
from vllm.sequence import MultiModalData
if TYPE_CHECKING:
@@ -35,20 +56,25 @@ class VllmEngine(BaseEngine):
generating_args: "GeneratingArguments",
) -> None:
config = load_config(model_args) # may download model from ms hub
if getattr(config, "quantization_config", None): # gptq models should use float16
quantization_config: Dict[str, Any] = getattr(config, "quantization_config", None)
quant_method = quantization_config.get("quant_method", "")
if quant_method == QuantizationMethod.GPTQ and model_args.infer_dtype == "auto":
model_args.infer_dtype = "float16"
self.can_generate = finetuning_args.stage == "sft"
tokenizer_module = load_tokenizer(model_args)
self.tokenizer = tokenizer_module["tokenizer"]
self.processor = tokenizer_module["processor"]
self.tokenizer.padding_side = "left"
self.template = get_template_and_fix_tokenizer(self.tokenizer, data_args.template)
self.template = get_template_and_fix_tokenizer(self.tokenizer, data_args.template, data_args.tool_format)
self.generating_args = generating_args.to_dict()
engine_args = {
"model": model_args.model_name_or_path,
"trust_remote_code": True,
"download_dir": model_args.cache_dir,
"dtype": model_args.vllm_dtype,
"dtype": model_args.infer_dtype,
"max_model_len": model_args.vllm_maxlen,
"tensor_parallel_size": get_device_count() or 1,
"gpu_memory_utilization": model_args.vllm_gpu_util,
@@ -106,7 +132,12 @@ class VllmEngine(BaseEngine):
if self.processor is not None and image is not None: # add image features
image_processor: "BaseImageProcessor" = getattr(self.processor, "image_processor")
pixel_values = image_processor(image, return_tensors="pt")["pixel_values"]
multi_modal_data = MultiModalData(type=MultiModalData.Type.IMAGE, data=pixel_values)
if is_vllm_version_greater_than_0_5_1():
multi_modal_data = {"image": pixel_values}
elif is_vllm_version_greater_than_0_5():
multi_modal_data = ImagePixelData(image=pixel_values)
else: # TODO: remove vllm 0.4.3 support
multi_modal_data = MultiModalData(type=MultiModalData.Type.IMAGE, data=pixel_values)
else:
multi_modal_data = None
@@ -162,9 +193,6 @@ class VllmEngine(BaseEngine):
)
return result_generator
async def start(self) -> None:
pass
async def chat(
self,
messages: Sequence[Dict[str, str]],

View File

@@ -1,3 +1,17 @@
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import random
import subprocess
@@ -60,7 +74,7 @@ class Command(str, Enum):
def main():
command = sys.argv.pop(1)
command = sys.argv.pop(1) if len(sys.argv) != 1 else Command.HELP
if command == Command.API:
run_api()
elif command == Command.CHAT:
@@ -77,7 +91,7 @@ def main():
master_addr = os.environ.get("MASTER_ADDR", "127.0.0.1")
master_port = os.environ.get("MASTER_PORT", str(random.randint(20001, 29999)))
logger.info("Initializing distributed tasks at: {}:{}".format(master_addr, master_port))
subprocess.run(
process = subprocess.run(
(
"torchrun --nnodes {nnodes} --node_rank {node_rank} --nproc_per_node {nproc_per_node} "
"--master_addr {master_addr} --master_port {master_port} {file_name} {args}"
@@ -92,6 +106,7 @@ def main():
),
shell=True,
)
sys.exit(process.returncode)
else:
run_exp()
elif command == Command.WEBDEMO:

View File

@@ -1,4 +1,18 @@
from .collator import KTODataCollatorWithPadding, PairwiseDataCollatorWithPadding
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .collator import KTODataCollatorWithPadding, PairwiseDataCollatorWithPadding, SFTDataCollatorWith4DAttentionMask
from .data_utils import Role, split_dataset
from .loader import get_dataset
from .template import TEMPLATES, Template, get_template_and_fix_tokenizer
@@ -7,6 +21,7 @@ from .template import TEMPLATES, Template, get_template_and_fix_tokenizer
__all__ = [
"KTODataCollatorWithPadding",
"PairwiseDataCollatorWithPadding",
"SFTDataCollatorWith4DAttentionMask",
"Role",
"split_dataset",
"get_dataset",

View File

@@ -1,3 +1,17 @@
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from functools import partial
from typing import TYPE_CHECKING, Any, Dict, List, Union
@@ -10,6 +24,7 @@ from .data_utils import Role
if TYPE_CHECKING:
from datasets import Dataset, IterableDataset
from transformers import Seq2SeqTrainingArguments
from ..hparams import DataArguments
from .parser import DatasetAttr
@@ -175,7 +190,10 @@ def convert_sharegpt(
def align_dataset(
dataset: Union["Dataset", "IterableDataset"], dataset_attr: "DatasetAttr", data_args: "DataArguments"
dataset: Union["Dataset", "IterableDataset"],
dataset_attr: "DatasetAttr",
data_args: "DataArguments",
training_args: "Seq2SeqTrainingArguments",
) -> Union["Dataset", "IterableDataset"]:
r"""
Aligned dataset:
@@ -208,7 +226,7 @@ def align_dataset(
if not data_args.streaming:
kwargs = dict(
num_proc=data_args.preprocessing_num_workers,
load_from_cache_file=(not data_args.overwrite_cache),
load_from_cache_file=(not data_args.overwrite_cache) or (training_args.local_process_index != 0),
desc="Converting format of dataset",
)

View File

@@ -1,17 +1,91 @@
# Copyright 2024 OpenAccess AI Collective and the LlamaFactory team.
#
# This code is inspired by the OpenAccess AI Collective's axolotl library.
# https://github.com/OpenAccess-AI-Collective/axolotl/blob/main/src/axolotl/monkeypatch/utils.py
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from dataclasses import dataclass
from typing import Any, Dict, Sequence
from typing import Any, Dict, Literal, Sequence
import torch
from transformers import DataCollatorForSeq2Seq
def prepare_4d_attention_mask(attention_mask_with_indices: "torch.Tensor", dtype: "torch.dtype") -> "torch.Tensor":
r"""
Expands the attention mask with indices from (batch_size, seq_len) to (batch_size, 1, seq_len, seq_len),
while handles packed sequences and transforms the mask to lower triangular form to prevent future peeking.
e.g.
```python
# input
[[1, 1, 2, 2, 2, 0]]
# output
[
[
[
[o, x, x, x, x, x],
[o, o, x, x, x, x],
[x, x, o, x, x, x],
[x, x, o, o, x, x],
[x, x, o, o, o, x],
[x, x, x, x, x, x],
]
]
]
```
where `o` equals to `0.0`, `x` equals to `min_dtype`.
"""
bsz, seq_len = attention_mask_with_indices.size()
min_dtype = torch.finfo(dtype).min
expanded_mask = attention_mask_with_indices[:, None, None, :].expand(bsz, 1, seq_len, seq_len)
# Create a binary mask from the original mask where zeros remain zeros and all other values are set to one
padding_mask = torch.where(expanded_mask != 0, 1, 0)
# Create a block-diagonal mask.
attention_mask_4d = torch.eq(expanded_mask, expanded_mask.transpose(-1, -2)).int() * padding_mask
# Use the lower triangular mask to zero out the upper triangular part
attention_mask_4d *= torch.tril(torch.ones((seq_len, seq_len), dtype=torch.long))
# Invert the attention mask.
attention_mask_4d = torch.where(attention_mask_4d != 0, torch.tensor(0, dtype=dtype), min_dtype)
return attention_mask_4d
@dataclass
class SFTDataCollatorWith4DAttentionMask(DataCollatorForSeq2Seq):
r"""
Data collator for 4d attention mask.
"""
block_diag_attn: bool = False
attn_implementation: Literal["eager", "sdpa", "flash_attention_2"] = "eager"
compute_dtype: "torch.dtype" = torch.float32
def __call__(self, features: Sequence[Dict[str, Any]]) -> Dict[str, "torch.Tensor"]:
features = super().__call__(features)
if self.block_diag_attn and self.attn_implementation != "flash_attention_2":
features["attention_mask"] = prepare_4d_attention_mask(features["attention_mask"], self.compute_dtype)
return features
@dataclass
class PairwiseDataCollatorWithPadding(DataCollatorForSeq2Seq):
r"""
Data collator for pairwise data.
"""
def __call__(self, features: Sequence[Dict[str, Any]]) -> Dict[str, torch.Tensor]:
def __call__(self, features: Sequence[Dict[str, Any]]) -> Dict[str, "torch.Tensor"]:
r"""
Pads batched data to the longest sequence in the batch.
@@ -43,7 +117,7 @@ class KTODataCollatorWithPadding(DataCollatorForSeq2Seq):
Data collator for KTO data.
"""
def __call__(self, features: Sequence[Dict[str, Any]]) -> Dict[str, torch.Tensor]:
def __call__(self, features: Sequence[Dict[str, Any]]) -> Dict[str, "torch.Tensor"]:
target_features = []
kl_features = []
kto_tags = []

View File

@@ -1,14 +1,27 @@
from enum import Enum, unique
from typing import TYPE_CHECKING, Dict, List, Tuple, Union
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from datasets import concatenate_datasets, interleave_datasets
from enum import Enum, unique
from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Set, TypedDict, Union
from datasets import DatasetDict, concatenate_datasets, interleave_datasets
from ..extras.logging import get_logger
if TYPE_CHECKING:
from datasets import Dataset, IterableDataset
from transformers import Seq2SeqTrainingArguments
from ..hparams import DataArguments
@@ -16,6 +29,9 @@ if TYPE_CHECKING:
logger = get_logger(__name__)
SLOTS = Sequence[Union[str, Set[str], Dict[str, str]]]
@unique
class Role(str, Enum):
USER = "user"
@@ -25,31 +41,29 @@ class Role(str, Enum):
OBSERVATION = "observation"
def infer_max_len(source_len: int, target_len: int, max_len: int, reserved_label_len: int) -> Tuple[int, int]:
max_target_len = int(max_len * (target_len / (source_len + target_len)))
max_target_len = max(max_target_len, reserved_label_len)
max_source_len = max_len - min(max_target_len, target_len)
return max_source_len, max_target_len
class DatasetModule(TypedDict):
train_dataset: Optional[Union["Dataset", "IterableDataset"]]
eval_dataset: Optional[Union["Dataset", "IterableDataset"]]
def merge_dataset(
all_datasets: List[Union["Dataset", "IterableDataset"]],
data_args: "DataArguments",
training_args: "Seq2SeqTrainingArguments",
all_datasets: List[Union["Dataset", "IterableDataset"]], data_args: "DataArguments", seed: int
) -> Union["Dataset", "IterableDataset"]:
if len(all_datasets) == 1:
return all_datasets[0]
elif data_args.mix_strategy == "concat":
if data_args.streaming:
logger.warning("The samples between different datasets will not be mixed in streaming mode.")
return concatenate_datasets(all_datasets)
elif data_args.mix_strategy.startswith("interleave"):
if not data_args.streaming:
logger.warning("We recommend using `mix_strategy=concat` in non-streaming mode.")
return interleave_datasets(
datasets=all_datasets,
probabilities=data_args.interleave_probs,
seed=training_args.seed,
seed=seed,
stopping_strategy="first_exhausted" if data_args.mix_strategy.endswith("under") else "all_exhausted",
)
else:
@@ -57,22 +71,17 @@ def merge_dataset(
def split_dataset(
dataset: Union["Dataset", "IterableDataset"], data_args: "DataArguments", training_args: "Seq2SeqTrainingArguments"
) -> Dict[str, "Dataset"]:
if training_args.do_train:
if data_args.val_size > 1e-6: # Split the dataset
if data_args.streaming:
dataset = dataset.shuffle(buffer_size=data_args.buffer_size, seed=training_args.seed)
val_set = dataset.take(int(data_args.val_size))
train_set = dataset.skip(int(data_args.val_size))
return {"train_dataset": train_set, "eval_dataset": val_set}
else:
val_size = int(data_args.val_size) if data_args.val_size > 1 else data_args.val_size
dataset = dataset.train_test_split(test_size=val_size, seed=training_args.seed)
return {"train_dataset": dataset["train"], "eval_dataset": dataset["test"]}
else:
if data_args.streaming:
dataset = dataset.shuffle(buffer_size=data_args.buffer_size, seed=training_args.seed)
return {"train_dataset": dataset}
else: # do_eval or do_predict
return {"eval_dataset": dataset}
dataset: Union["Dataset", "IterableDataset"], data_args: "DataArguments", seed: int
) -> "DatasetDict":
r"""
Splits the dataset and returns a dataset dict containing train set (required) and validation set (optional).
"""
if data_args.streaming:
dataset = dataset.shuffle(buffer_size=data_args.buffer_size, seed=seed)
val_set = dataset.take(int(data_args.val_size))
train_set = dataset.skip(int(data_args.val_size))
return DatasetDict({"train": train_set, "validation": val_set})
else:
val_size = int(data_args.val_size) if data_args.val_size > 1 else data_args.val_size
dataset = dataset.train_test_split(test_size=val_size, seed=seed)
return DatasetDict({"train": dataset["train"], "validation": dataset["test"]})

View File

@@ -1,83 +1,36 @@
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import re
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from typing import Any, Dict, List, Literal, Optional, Sequence, Set, Tuple, Union
from typing import List, Literal, Optional, Tuple, Union
SLOTS = Sequence[Union[str, Set[str], Dict[str, str]]]
JSON_FORMAT_PROMPT = (
""", in a JSON format representing the kwargs (e.g. ```{"input": "hello world", "num_beams": 5}```)"""
)
TOOL_SYSTEM_PROMPT = (
"You have access to the following tools:\n{tool_text}"
"Use the following format if using a tool:\n"
"```\n"
"Action: tool name (one of [{tool_names}]).\n"
"Action Input: the input to the tool{format_prompt}.\n"
"```\n"
)
def default_tool_formatter(tools: List[Dict[str, Any]]) -> str:
tool_text = ""
tool_names = []
for tool in tools:
param_text = ""
for name, param in tool["parameters"]["properties"].items():
required = ", required" if name in tool["parameters"].get("required", []) else ""
enum = ", should be one of [{}]".format(", ".join(param["enum"])) if param.get("enum", None) else ""
items = (
", where each item should be {}".format(param["items"].get("type", "")) if param.get("items") else ""
)
param_text += " - {name} ({type}{required}): {desc}{enum}{items}\n".format(
name=name,
type=param.get("type", ""),
required=required,
desc=param.get("description", ""),
enum=enum,
items=items,
)
tool_text += "> Tool Name: {name}\nTool Description: {desc}\nTool Args:\n{args}\n".format(
name=tool["name"], desc=tool.get("description", ""), args=param_text
)
tool_names.append(tool["name"])
return TOOL_SYSTEM_PROMPT.format(
tool_text=tool_text, tool_names=", ".join(tool_names), format_prompt=JSON_FORMAT_PROMPT
)
def default_tool_extractor(content: str) -> Union[str, Tuple[str, str]]:
regex = re.compile(r"Action:\s*([a-zA-Z0-9_]+).*?Action Input:\s*(.*)", re.DOTALL)
action_match = re.search(regex, content)
if not action_match:
return content
tool_name = action_match.group(1).strip()
tool_input = action_match.group(2).strip().strip('"').strip("```")
try:
arguments = json.loads(tool_input)
except json.JSONDecodeError:
return content
return tool_name, json.dumps(arguments, ensure_ascii=False)
from .data_utils import SLOTS
from .tool_utils import DefaultToolUtils, GLM4ToolUtils
@dataclass
class Formatter(ABC):
slots: SLOTS = field(default_factory=list)
tool_format: Optional[Literal["default"]] = None
tool_format: Optional[Literal["default", "glm4"]] = None
@abstractmethod
def apply(self, **kwargs) -> SLOTS: ...
def extract(self, content: str) -> Union[str, Tuple[str, str]]:
def extract(self, content: str) -> Union[str, List[Tuple[str, str]]]:
raise NotImplementedError
@@ -128,34 +81,37 @@ class StringFormatter(Formatter):
@dataclass
class FunctionFormatter(Formatter):
def __post_init__(self):
has_name, has_args = False, False
for slot in filter(lambda s: isinstance(s, str), self.slots):
if "{{name}}" in slot:
has_name = True
if "{{arguments}}" in slot:
has_args = True
if not has_name or not has_args:
raise ValueError("Name and arguments placeholders are required in the function formatter.")
if self.tool_format == "default":
self.slots = DefaultToolUtils.get_function_slots() + self.slots
elif self.tool_format == "glm4":
self.slots = GLM4ToolUtils.get_function_slots() + self.slots
else:
raise NotImplementedError("Tool format {} was not found.".format(self.tool_format))
def apply(self, **kwargs) -> SLOTS:
content = kwargs.pop("content")
functions: List[Tuple[str, str]] = []
try:
function = json.loads(content)
name = function["name"]
arguments = json.dumps(function["arguments"], ensure_ascii=False)
except Exception:
name, arguments = "", ""
tool_calls = json.loads(content)
if not isinstance(tool_calls, list): # parallel function call
tool_calls = [tool_calls]
for tool_call in tool_calls:
functions.append((tool_call["name"], json.dumps(tool_call["arguments"], ensure_ascii=False)))
except json.JSONDecodeError:
functions = []
elements = []
for slot in self.slots:
if isinstance(slot, str):
slot = slot.replace("{{name}}", name).replace("{{arguments}}", arguments)
elements.append(slot)
elif isinstance(slot, (dict, set)):
elements.append(slot)
else:
raise RuntimeError("Input must be string, set[str] or dict[str, str], got {}".format(type(slot)))
for name, arguments in functions:
for slot in self.slots:
if isinstance(slot, str):
slot = slot.replace("{{name}}", name).replace("{{arguments}}", arguments)
elements.append(slot)
elif isinstance(slot, (dict, set)):
elements.append(slot)
else:
raise RuntimeError("Input must be string, set[str] or dict[str, str], got {}".format(type(slot)))
return elements
@@ -163,25 +119,22 @@ class FunctionFormatter(Formatter):
@dataclass
class ToolFormatter(Formatter):
def __post_init__(self):
if self.tool_format is None:
raise ValueError("Tool format was not found.")
if self.tool_format == "default":
self._tool_formatter = DefaultToolUtils.tool_formatter
self._tool_extractor = DefaultToolUtils.tool_extractor
elif self.tool_format == "glm4":
self._tool_formatter = GLM4ToolUtils.tool_formatter
self._tool_extractor = GLM4ToolUtils.tool_extractor
else:
raise NotImplementedError("Tool format {} was not found.".format(self.tool_format))
def apply(self, **kwargs) -> SLOTS:
content = kwargs.pop("content")
try:
tools = json.loads(content)
if not len(tools):
return [""]
if self.tool_format == "default":
return [default_tool_formatter(tools)]
else:
raise NotImplementedError
except Exception:
return [self._tool_formatter(tools) if len(tools) != 0 else ""]
except json.JSONDecodeError:
return [""]
def extract(self, content: str) -> Union[str, Tuple[str, str]]:
if self.tool_format == "default":
return default_tool_extractor(content)
else:
raise NotImplementedError
def extract(self, content: str) -> Union[str, List[Tuple[str, str]]]:
return self._tool_extractor(content)

View File

@@ -1,16 +1,30 @@
import inspect
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
from typing import TYPE_CHECKING, Literal, Optional, Union
from typing import TYPE_CHECKING, Dict, Literal, Optional, Sequence, Union
import numpy as np
from datasets import load_dataset, load_from_disk
from datasets import DatasetDict, load_dataset, load_from_disk
from transformers.utils.versions import require_version
from ..extras.constants import FILEEXT2TYPE
from ..extras.logging import get_logger
from ..extras.misc import has_tokenized_data
from .aligner import align_dataset
from .data_utils import merge_dataset
from .data_utils import merge_dataset, split_dataset
from .parser import get_dataset_list
from .preprocess import get_preprocess_and_print_func
from .template import get_template_and_fix_tokenizer
@@ -18,20 +32,22 @@ from .template import get_template_and_fix_tokenizer
if TYPE_CHECKING:
from datasets import Dataset, IterableDataset
from transformers import ProcessorMixin, Seq2SeqTrainingArguments
from transformers.tokenization_utils import PreTrainedTokenizer
from transformers import PreTrainedTokenizer, ProcessorMixin, Seq2SeqTrainingArguments
from ..hparams import DataArguments, ModelArguments
from .data_utils import DatasetModule
from .parser import DatasetAttr
from .template import Template
logger = get_logger(__name__)
def load_single_dataset(
def _load_single_dataset(
dataset_attr: "DatasetAttr",
model_args: "ModelArguments",
data_args: "DataArguments",
training_args: "Seq2SeqTrainingArguments",
) -> Union["Dataset", "IterableDataset"]:
logger.info("Loading dataset {}...".format(dataset_attr))
data_path, data_name, data_dir, data_files = None, None, None, None
@@ -67,41 +83,34 @@ def load_single_dataset(
raise NotImplementedError("Unknown load type: {}.".format(dataset_attr.load_from))
if dataset_attr.load_from == "ms_hub":
try:
from modelscope import MsDataset
from modelscope.utils.config_ds import MS_DATASETS_CACHE
require_version("modelscope>=1.11.0", "To fix: pip install modelscope>=1.11.0")
from modelscope import MsDataset
from modelscope.utils.config_ds import MS_DATASETS_CACHE
cache_dir = model_args.cache_dir or MS_DATASETS_CACHE
dataset = MsDataset.load(
dataset_name=data_path,
subset_name=data_name,
data_dir=data_dir,
data_files=data_files,
split=data_args.split,
cache_dir=cache_dir,
token=model_args.ms_hub_token,
use_streaming=(data_args.streaming and (dataset_attr.load_from != "file")),
)
if isinstance(dataset, MsDataset):
dataset = dataset.to_hf_dataset()
except ImportError:
raise ImportError("Please install modelscope via `pip install modelscope -U`")
cache_dir = model_args.cache_dir or MS_DATASETS_CACHE
dataset = MsDataset.load(
dataset_name=data_path,
subset_name=data_name,
data_dir=data_dir,
data_files=data_files,
split=dataset_attr.split,
cache_dir=cache_dir,
token=model_args.ms_hub_token,
use_streaming=(data_args.streaming and (dataset_attr.load_from != "file")),
)
if isinstance(dataset, MsDataset):
dataset = dataset.to_hf_dataset()
else:
if "trust_remote_code" in inspect.signature(load_dataset).parameters: # for datasets==2.16.0
kwargs = {"trust_remote_code": True}
else:
kwargs = {}
dataset = load_dataset(
path=data_path,
name=data_name,
data_dir=data_dir,
data_files=data_files,
split=data_args.split,
split=dataset_attr.split,
cache_dir=model_args.cache_dir,
token=model_args.hf_hub_token,
streaming=(data_args.streaming and (dataset_attr.load_from != "file")),
**kwargs,
trust_remote_code=True,
)
if data_args.streaming and (dataset_attr.load_from == "file"): # faster than specifying streaming=True
@@ -123,7 +132,67 @@ def load_single_dataset(
max_samples = min(data_args.max_samples, len(dataset))
dataset = dataset.select(range(max_samples))
return align_dataset(dataset, dataset_attr, data_args)
return align_dataset(dataset, dataset_attr, data_args, training_args)
def _get_merged_dataset(
dataset_names: Optional[Sequence[str]],
model_args: "ModelArguments",
data_args: "DataArguments",
training_args: "Seq2SeqTrainingArguments",
stage: Literal["pt", "sft", "rm", "ppo", "kto"],
) -> Optional[Union["Dataset", "IterableDataset"]]:
if dataset_names is None:
return None
datasets = []
for dataset_attr in get_dataset_list(dataset_names, data_args.dataset_dir):
if (stage == "rm" and dataset_attr.ranking is False) or (stage != "rm" and dataset_attr.ranking is True):
raise ValueError("The dataset is not applicable in the current training stage.")
datasets.append(_load_single_dataset(dataset_attr, model_args, data_args, training_args))
return merge_dataset(datasets, data_args, seed=training_args.seed)
def _get_preprocessed_dataset(
dataset: Optional[Union["Dataset", "IterableDataset"]],
data_args: "DataArguments",
training_args: "Seq2SeqTrainingArguments",
stage: Literal["pt", "sft", "rm", "ppo", "kto"],
template: "Template",
tokenizer: "PreTrainedTokenizer",
processor: Optional["ProcessorMixin"] = None,
is_eval: bool = False,
) -> Optional[Union["Dataset", "IterableDataset"]]:
if dataset is None:
return None
preprocess_func, print_function = get_preprocess_and_print_func(
data_args, stage, template, tokenizer, processor, do_generate=(training_args.predict_with_generate and is_eval)
)
column_names = list(next(iter(dataset)).keys())
kwargs = {}
if not data_args.streaming:
kwargs = dict(
num_proc=data_args.preprocessing_num_workers,
load_from_cache_file=(not data_args.overwrite_cache) or (training_args.local_process_index != 0),
desc="Running tokenizer on dataset",
)
dataset = dataset.map(preprocess_func, batched=True, remove_columns=column_names, **kwargs)
if training_args.should_log:
try:
print("eval example:" if is_eval else "training example:")
print_function(next(iter(dataset)))
except StopIteration:
if stage == "pt":
raise RuntimeError("Cannot find sufficient samples, consider increasing dataset size.")
else:
raise RuntimeError("Cannot find valid samples, check `data/README.md` for the data format.")
return dataset
def get_dataset(
@@ -133,8 +202,8 @@ def get_dataset(
stage: Literal["pt", "sft", "rm", "ppo", "kto"],
tokenizer: "PreTrainedTokenizer",
processor: Optional["ProcessorMixin"] = None,
) -> Union["Dataset", "IterableDataset"]:
template = get_template_and_fix_tokenizer(tokenizer, data_args.template)
) -> "DatasetModule":
template = get_template_and_fix_tokenizer(tokenizer, data_args.template, data_args.tool_format)
if data_args.train_on_prompt and template.efficient_eos:
raise ValueError("Current template does not support `train_on_prompt`.")
@@ -142,54 +211,66 @@ def get_dataset(
if data_args.tokenized_path is not None:
if has_tokenized_data(data_args.tokenized_path):
logger.warning("Loading dataset from disk will ignore other data arguments.")
dataset = load_from_disk(data_args.tokenized_path)
dataset_dict: "DatasetDict" = load_from_disk(data_args.tokenized_path)
logger.info("Loaded tokenized dataset from {}.".format(data_args.tokenized_path))
dataset_module: Dict[str, "Dataset"] = {}
if "train" in dataset_dict:
dataset_module["train_dataset"] = dataset_dict["train"]
if "validation" in dataset_dict:
dataset_module["eval_dataset"] = dataset_dict["validation"]
if data_args.streaming:
dataset = dataset.to_iterable_dataset()
return dataset
dataset_module = {k: v.to_iterable_dataset() for k, v in dataset_module.items()}
return dataset_module
if data_args.streaming:
raise ValueError("Turn off `streaming` when saving dataset to disk.")
# Load and preprocess dataset
with training_args.main_process_first(desc="load dataset"):
all_datasets = []
for dataset_attr in get_dataset_list(data_args):
if (stage == "rm" and dataset_attr.ranking is False) or (stage != "rm" and dataset_attr.ranking is True):
raise ValueError("The dataset is not applicable in the current training stage.")
all_datasets.append(load_single_dataset(dataset_attr, model_args, data_args))
dataset = merge_dataset(all_datasets, data_args, training_args)
dataset = _get_merged_dataset(data_args.dataset, model_args, data_args, training_args, stage)
eval_dataset = _get_merged_dataset(data_args.eval_dataset, model_args, data_args, training_args, stage)
with training_args.main_process_first(desc="pre-process dataset"):
preprocess_func, print_function = get_preprocess_and_print_func(
data_args, training_args, stage, template, tokenizer, processor
dataset = _get_preprocessed_dataset(
dataset, data_args, training_args, stage, template, tokenizer, processor, is_eval=False
)
eval_dataset = _get_preprocessed_dataset(
eval_dataset, data_args, training_args, stage, template, tokenizer, processor, is_eval=True
)
column_names = list(next(iter(dataset)).keys())
kwargs = {}
if not data_args.streaming:
kwargs = dict(
num_proc=data_args.preprocessing_num_workers,
load_from_cache_file=(not data_args.overwrite_cache),
desc="Running tokenizer on dataset",
)
dataset = dataset.map(preprocess_func, batched=True, remove_columns=column_names, **kwargs)
if data_args.val_size > 1e-6:
dataset_dict = split_dataset(dataset, data_args, seed=training_args.seed)
else:
dataset_dict = {}
if dataset is not None:
if data_args.streaming:
dataset = dataset.shuffle(buffer_size=data_args.buffer_size, seed=training_args.seed)
dataset_dict["train"] = dataset
if eval_dataset is not None:
if data_args.streaming:
eval_dataset = eval_dataset.shuffle(buffer_size=data_args.buffer_size, seed=training_args.seed)
dataset_dict["validation"] = eval_dataset
dataset_dict = DatasetDict(dataset_dict)
if data_args.tokenized_path is not None:
if training_args.should_save:
dataset.save_to_disk(data_args.tokenized_path)
dataset_dict.save_to_disk(data_args.tokenized_path)
logger.info("Tokenized dataset saved at {}.".format(data_args.tokenized_path))
logger.info("Please restart the training with `tokenized_path: {}`.".format(data_args.tokenized_path))
sys.exit(0)
if training_args.should_log:
try:
print_function(next(iter(dataset)))
except StopIteration:
if stage == "pt":
raise RuntimeError("Cannot find sufficient samples, consider increasing dataset size.")
else:
raise RuntimeError("Cannot find valid samples, check `data/README.md` for the data format.")
dataset_module = {}
if "train" in dataset_dict:
dataset_module["train_dataset"] = dataset_dict["train"]
if "validation" in dataset_dict:
dataset_module["eval_dataset"] = dataset_dict["validation"]
return dataset
return dataset_module

View File

@@ -1,47 +1,60 @@
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import os
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional
from typing import Any, Dict, List, Literal, Optional, Sequence
from transformers.utils import cached_file
from ..extras.constants import DATA_CONFIG
from ..extras.misc import use_modelscope
if TYPE_CHECKING:
from ..hparams import DataArguments
@dataclass
class DatasetAttr:
r"""
Dataset attributes.
"""
""" basic configs """
# basic configs
load_from: Literal["hf_hub", "ms_hub", "script", "file"]
dataset_name: str
formatting: Literal["alpaca", "sharegpt"] = "alpaca"
ranking: bool = False
""" extra configs """
# extra configs
subset: Optional[str] = None
split: str = "train"
folder: Optional[str] = None
num_samples: Optional[int] = None
""" common columns """
# common columns
system: Optional[str] = None
tools: Optional[str] = None
images: Optional[str] = None
""" rlhf columns """
# rlhf columns
chosen: Optional[str] = None
rejected: Optional[str] = None
kto_tag: Optional[str] = None
""" alpaca columns """
# alpaca columns
prompt: Optional[str] = "instruction"
query: Optional[str] = "input"
response: Optional[str] = "output"
history: Optional[str] = None
""" sharegpt columns """
# sharegpt columns
messages: Optional[str] = "conversations"
""" sharegpt tags """
# sharegpt tags
role_tag: Optional[str] = "from"
content_tag: Optional[str] = "value"
user_tag: Optional[str] = "human"
@@ -57,31 +70,33 @@ class DatasetAttr:
setattr(self, key, obj.get(key, default))
def get_dataset_list(data_args: "DataArguments") -> List["DatasetAttr"]:
if data_args.dataset is not None:
dataset_names = [ds.strip() for ds in data_args.dataset.split(",")]
else:
def get_dataset_list(dataset_names: Optional[Sequence[str]], dataset_dir: str) -> List["DatasetAttr"]:
r"""
Gets the attributes of the datasets.
"""
if dataset_names is None:
dataset_names = []
if data_args.dataset_dir == "ONLINE":
if dataset_dir == "ONLINE":
dataset_info = None
else:
if dataset_dir.startswith("REMOTE:"):
config_path = cached_file(path_or_repo_id=dataset_dir[7:], filename=DATA_CONFIG, repo_type="dataset")
else:
config_path = os.path.join(dataset_dir, DATA_CONFIG)
try:
with open(os.path.join(data_args.dataset_dir, DATA_CONFIG), "r") as f:
with open(config_path, "r") as f:
dataset_info = json.load(f)
except Exception as err:
if len(dataset_names) != 0:
raise ValueError(
"Cannot open {} due to {}.".format(os.path.join(data_args.dataset_dir, DATA_CONFIG), str(err))
)
raise ValueError("Cannot open {} due to {}.".format(config_path, str(err)))
dataset_info = None
if data_args.interleave_probs is not None:
data_args.interleave_probs = [float(prob.strip()) for prob in data_args.interleave_probs.split(",")]
dataset_list: List[DatasetAttr] = []
dataset_list: List["DatasetAttr"] = []
for name in dataset_names:
if dataset_info is None:
if dataset_info is None: # dataset_dir is ONLINE
load_from = "ms_hub" if use_modelscope() else "hf_hub"
dataset_attr = DatasetAttr(load_from, dataset_name=name)
dataset_list.append(dataset_attr)
@@ -106,6 +121,7 @@ def get_dataset_list(data_args: "DataArguments") -> List["DatasetAttr"]:
dataset_attr.set_attr("formatting", dataset_info[name], default="alpaca")
dataset_attr.set_attr("ranking", dataset_info[name], default=False)
dataset_attr.set_attr("subset", dataset_info[name])
dataset_attr.set_attr("split", dataset_info[name], default="train")
dataset_attr.set_attr("folder", dataset_info[name])
dataset_attr.set_attr("num_samples", dataset_info[name])

View File

@@ -1,3 +1,17 @@
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from functools import partial
from typing import TYPE_CHECKING, Callable, Literal, Optional, Tuple
@@ -13,8 +27,7 @@ from .processors.unsupervised import preprocess_unsupervised_dataset, print_unsu
if TYPE_CHECKING:
from transformers import ProcessorMixin, Seq2SeqTrainingArguments
from transformers.tokenization_utils import PreTrainedTokenizer
from transformers import PreTrainedTokenizer, ProcessorMixin
from ..hparams import DataArguments
from .template import Template
@@ -22,11 +35,11 @@ if TYPE_CHECKING:
def get_preprocess_and_print_func(
data_args: "DataArguments",
training_args: "Seq2SeqTrainingArguments",
stage: Literal["pt", "sft", "rm", "ppo", "kto"],
template: "Template",
tokenizer: "PreTrainedTokenizer",
processor: Optional["ProcessorMixin"],
do_generate: bool = False,
) -> Tuple[Callable, Callable]:
if stage == "pt":
preprocess_func = partial(
@@ -35,8 +48,21 @@ def get_preprocess_and_print_func(
data_args=data_args,
)
print_function = partial(print_unsupervised_dataset_example, tokenizer=tokenizer)
elif stage == "sft" and not training_args.predict_with_generate:
elif stage == "sft" and not do_generate:
if data_args.packing:
if data_args.neat_packing:
from datasets.arrow_writer import OptimizedTypedSequence, TypedSequence
def __init__(self, data, **kwargs):
return TypedSequence.__init__(
self,
data,
type=kwargs.pop("type", None),
try_type=kwargs.pop("try_type", None),
optimized_int_type=kwargs.pop("optimized_int_type", None),
)
OptimizedTypedSequence.__init__ = __init__
preprocess_func = partial(
preprocess_packed_supervised_dataset,
template=template,

View File

@@ -1,13 +1,26 @@
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple
from ...extras.constants import IGNORE_INDEX
from ...extras.logging import get_logger
from .processor_utils import get_paligemma_token_type_ids, get_pixel_values
from .processor_utils import get_paligemma_token_type_ids, get_pixel_values, infer_seqlen
if TYPE_CHECKING:
from transformers import ProcessorMixin
from transformers.tokenization_utils import PreTrainedTokenizer
from transformers import PreTrainedTokenizer, ProcessorMixin
from ...hparams import DataArguments
from ..template import Template
@@ -42,12 +55,8 @@ def _encode_feedback_example(
else:
kl_messages = prompt + [kl_response[1]]
prompt_ids, response_ids = template.encode_oneturn(
tokenizer, messages, system, tools, data_args.cutoff_len, data_args.reserved_label_len
)
_, kl_response_ids = template.encode_oneturn(
tokenizer, kl_messages, system, tools, data_args.cutoff_len, data_args.reserved_label_len
)
prompt_ids, response_ids = template.encode_oneturn(tokenizer, messages, system, tools)
kl_prompt_ids, kl_response_ids = template.encode_oneturn(tokenizer, kl_messages, system, tools)
if template.efficient_eos:
response_ids += [tokenizer.eos_token_id]
@@ -56,11 +65,19 @@ def _encode_feedback_example(
if processor is not None and hasattr(processor, "image_seq_length"): # paligemma models
image_token_id = tokenizer.convert_tokens_to_ids(template.image_token)
prompt_ids = [image_token_id] * getattr(processor, "image_seq_length") + prompt_ids
kl_prompt_ids = [image_token_id] * getattr(processor, "image_seq_length") + kl_prompt_ids
source_len, target_len = infer_seqlen(len(prompt_ids), len(response_ids), data_args.cutoff_len)
prompt_ids = prompt_ids[:source_len]
response_ids = response_ids[:target_len]
kl_source_len, kl_target_len = infer_seqlen(len(kl_prompt_ids), len(kl_response_ids), data_args.cutoff_len)
kl_prompt_ids = kl_prompt_ids[:kl_source_len]
kl_response_ids = kl_response_ids[:kl_target_len]
input_ids = prompt_ids + response_ids
labels = [IGNORE_INDEX] * len(prompt_ids) + response_ids
kl_input_ids = prompt_ids + kl_response_ids
kl_labels = [IGNORE_INDEX] * len(prompt_ids) + kl_response_ids
labels = [IGNORE_INDEX] * source_len + response_ids
kl_input_ids = kl_prompt_ids + kl_response_ids
kl_labels = [IGNORE_INDEX] * kl_source_len + kl_response_ids
return input_ids, labels, kl_input_ids, kl_labels, kto_tag

View File

@@ -1,13 +1,26 @@
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple
from ...extras.constants import IGNORE_INDEX
from ...extras.logging import get_logger
from .processor_utils import get_paligemma_token_type_ids, get_pixel_values
from .processor_utils import get_paligemma_token_type_ids, get_pixel_values, infer_seqlen
if TYPE_CHECKING:
from transformers import ProcessorMixin
from transformers.tokenization_utils import PreTrainedTokenizer
from transformers import PreTrainedTokenizer, ProcessorMixin
from ...hparams import DataArguments
from ..template import Template
@@ -31,12 +44,8 @@ def _encode_pairwise_example(
chosen_messages = prompt + [response[0]]
rejected_messages = prompt + [response[1]]
prompt_ids, chosen_ids = template.encode_oneturn(
tokenizer, chosen_messages, system, tools, data_args.cutoff_len, data_args.reserved_label_len
)
_, rejected_ids = template.encode_oneturn(
tokenizer, rejected_messages, system, tools, data_args.cutoff_len, data_args.reserved_label_len
)
prompt_ids, chosen_ids = template.encode_oneturn(tokenizer, chosen_messages, system, tools)
_, rejected_ids = template.encode_oneturn(tokenizer, rejected_messages, system, tools)
if template.efficient_eos:
chosen_ids += [tokenizer.eos_token_id]
@@ -46,10 +55,17 @@ def _encode_pairwise_example(
image_token_id = tokenizer.convert_tokens_to_ids(template.image_token)
prompt_ids = [image_token_id] * getattr(processor, "image_seq_length") + prompt_ids
source_len, target_len = infer_seqlen(
len(prompt_ids), max(len(chosen_ids), len(rejected_ids)), data_args.cutoff_len
) # consider the response is more important
prompt_ids = prompt_ids[:source_len]
chosen_ids = chosen_ids[:target_len]
rejected_ids = rejected_ids[:target_len]
chosen_input_ids = prompt_ids + chosen_ids
chosen_labels = [IGNORE_INDEX] * len(prompt_ids) + chosen_ids
chosen_labels = [IGNORE_INDEX] * source_len + chosen_ids
rejected_input_ids = prompt_ids + rejected_ids
rejected_labels = [IGNORE_INDEX] * len(prompt_ids) + rejected_ids
rejected_labels = [IGNORE_INDEX] * source_len + rejected_ids
return chosen_input_ids, chosen_labels, rejected_input_ids, rejected_labels

View File

@@ -1,9 +1,26 @@
# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
#
# This code is inspired by the HuggingFace's transformers library.
# https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from itertools import chain
from typing import TYPE_CHECKING, Any, Dict, List
if TYPE_CHECKING:
from transformers.tokenization_utils import PreTrainedTokenizer
from transformers import PreTrainedTokenizer
from ...hparams import DataArguments
@@ -12,7 +29,8 @@ def preprocess_pretrain_dataset(
examples: Dict[str, List[Any]], tokenizer: "PreTrainedTokenizer", data_args: "DataArguments"
) -> Dict[str, List[List[int]]]:
# build grouped texts with format `X1 X2 X3 ...` if packing is enabled
text_examples = [messages[0]["content"] + tokenizer.eos_token for messages in examples["prompt"]]
eos_token = "<|end_of_text|>" if data_args.template == "llama3" else tokenizer.eos_token
text_examples = [messages[0]["content"] + eos_token for messages in examples["prompt"]]
if not data_args.packing:
if data_args.template == "gemma":

View File

@@ -1,5 +1,19 @@
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import bisect
from typing import TYPE_CHECKING, List, Sequence
from typing import TYPE_CHECKING, List, Sequence, Tuple
from ...extras.packages import is_pillow_available
@@ -62,3 +76,20 @@ def get_paligemma_token_type_ids(input_len: int, processor: "ProcessorMixin") ->
"""
image_seq_length = getattr(processor, "image_seq_length")
return [0] * image_seq_length + [1] * (input_len - image_seq_length)
def infer_seqlen(source_len: int, target_len: int, cutoff_len: int) -> Tuple[int, int]:
r"""
Computes the real sequence length after truncation by the cutoff_len.
"""
if target_len * 2 < cutoff_len: # truncate source
max_target_len = cutoff_len
elif source_len * 2 < cutoff_len: # truncate target
max_target_len = cutoff_len - source_len
else: # truncate both
max_target_len = int(cutoff_len * (target_len / (source_len + target_len)))
new_target_len = min(max_target_len, target_len)
max_source_len = max(cutoff_len - new_target_len, 0)
new_source_len = min(max_source_len, source_len)
return new_source_len, new_target_len

View File

@@ -1,14 +1,27 @@
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import defaultdict
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple
from ...extras.constants import IGNORE_INDEX
from ...extras.logging import get_logger
from .processor_utils import get_paligemma_token_type_ids, get_pixel_values, greedy_knapsack
from .processor_utils import get_paligemma_token_type_ids, get_pixel_values, greedy_knapsack, infer_seqlen
if TYPE_CHECKING:
from transformers import ProcessorMixin
from transformers.tokenization_utils import PreTrainedTokenizer
from transformers import PreTrainedTokenizer, ProcessorMixin
from ...hparams import DataArguments
from ..template import Template
@@ -38,19 +51,31 @@ def _encode_supervised_example(
input_ids += [image_token_id] * getattr(processor, "image_seq_length")
labels += [IGNORE_INDEX] * getattr(processor, "image_seq_length")
encoded_pairs = template.encode_multiturn(
tokenizer, messages, system, tools, data_args.cutoff_len, data_args.reserved_label_len
)
encoded_pairs = template.encode_multiturn(tokenizer, messages, system, tools)
total_length = 1 if template.efficient_eos else 0
for turn_idx, (source_ids, target_ids) in enumerate(encoded_pairs):
if total_length >= data_args.cutoff_len:
break
source_len, target_len = infer_seqlen(len(source_ids), len(target_ids), data_args.cutoff_len - total_length)
source_ids = source_ids[:source_len]
target_ids = target_ids[:target_len]
total_length += source_len + target_len
if data_args.train_on_prompt:
source_mask = source_ids
source_label = source_ids
elif turn_idx != 0 and template.efficient_eos:
source_mask = [tokenizer.eos_token_id] + [IGNORE_INDEX] * (len(source_ids) - 1)
source_label = [tokenizer.eos_token_id] + [IGNORE_INDEX] * (source_len - 1)
else:
source_mask = [IGNORE_INDEX] * len(source_ids)
source_label = [IGNORE_INDEX] * source_len
if data_args.mask_history and turn_idx != len(encoded_pairs) - 1:
target_label = [IGNORE_INDEX] * target_len
else:
target_label = target_ids
input_ids += source_ids + target_ids
labels += source_mask + target_ids
labels += source_label + target_label
if template.efficient_eos:
input_ids += [tokenizer.eos_token_id]
@@ -140,22 +165,30 @@ def preprocess_packed_supervised_dataset(
model_inputs = {"input_ids": [], "attention_mask": [], "labels": []}
knapsacks = greedy_knapsack(lengths, data_args.cutoff_len)
for knapsack in knapsacks:
packed_input_ids, packed_labels = [], []
for length in knapsack:
packed_input_ids, packed_attention_masks, packed_labels = [], [], []
for i, length in enumerate(knapsack):
index = length2indexes[length].pop()
packed_input_ids += batch_input_ids[index]
packed_labels += batch_labels[index]
if data_args.neat_packing:
packed_attention_masks += [i + 1] * len(batch_input_ids[index]) # start from 1
else:
packed_attention_masks += [1] * len(batch_input_ids[index])
if len(packed_input_ids) < data_args.cutoff_len:
pad_length = data_args.cutoff_len - len(packed_input_ids)
packed_input_ids += [tokenizer.pad_token_id] * pad_length
packed_labels += [IGNORE_INDEX] * pad_length
if data_args.neat_packing:
packed_attention_masks += [0] * pad_length
else:
packed_attention_masks += [1] * pad_length # more efficient flash_attn
if len(packed_input_ids) != data_args.cutoff_len:
raise ValueError("The length of packed example should be identical to the cutoff length.")
model_inputs["input_ids"].append(packed_input_ids)
model_inputs["attention_mask"].append([1] * data_args.cutoff_len)
model_inputs["attention_mask"].append(packed_attention_masks)
model_inputs["labels"].append(packed_labels)
return model_inputs

View File

@@ -1,13 +1,26 @@
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple
from ...extras.logging import get_logger
from ..data_utils import Role
from .processor_utils import get_paligemma_token_type_ids, get_pixel_values
from .processor_utils import get_paligemma_token_type_ids, get_pixel_values, infer_seqlen
if TYPE_CHECKING:
from transformers import ProcessorMixin
from transformers.tokenization_utils import PreTrainedTokenizer
from transformers import PreTrainedTokenizer, ProcessorMixin
from ...hparams import DataArguments
from ..template import Template
@@ -34,9 +47,7 @@ def _encode_unsupervised_example(
else:
messages = prompt + [{"role": Role.ASSISTANT.value, "content": ""}]
input_ids, labels = template.encode_oneturn(
tokenizer, messages, system, tools, data_args.cutoff_len, data_args.reserved_label_len
)
input_ids, labels = template.encode_oneturn(tokenizer, messages, system, tools)
if template.efficient_eos:
labels += [tokenizer.eos_token_id]
@@ -44,6 +55,9 @@ def _encode_unsupervised_example(
image_token_id = tokenizer.convert_tokens_to_ids(template.image_token)
input_ids = [image_token_id] * getattr(processor, "image_seq_length") + input_ids
source_len, target_len = infer_seqlen(len(input_ids), len(labels), data_args.cutoff_len)
input_ids = input_ids[:source_len]
labels = labels[:target_len]
return input_ids, labels

View File

@@ -1,8 +1,22 @@
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from dataclasses import dataclass
from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Tuple, Union
from ..extras.logging import get_logger
from .data_utils import Role, infer_max_len
from .data_utils import Role
from .formatter import EmptyFormatter, FunctionFormatter, StringFormatter, ToolFormatter
@@ -24,69 +38,74 @@ class Template:
format_observation: "Formatter"
format_tools: "Formatter"
format_separator: "Formatter"
format_prefix: "Formatter"
default_system: str
stop_words: List[str]
image_token: str
efficient_eos: bool
replace_eos: bool
force_system: bool
def encode_oneturn(
self,
tokenizer: "PreTrainedTokenizer",
messages: List[Dict[str, str]],
messages: Sequence[Dict[str, str]],
system: Optional[str] = None,
tools: Optional[str] = None,
cutoff_len: int = 1_000_000,
reserved_label_len: int = 1,
) -> Tuple[List[int], List[int]]:
r"""
Returns a single pair of token ids representing prompt and response respectively.
"""
encoded_pairs = self._encode(tokenizer, messages, system, tools, cutoff_len, reserved_label_len)
encoded_messages = self._encode(tokenizer, messages, system, tools)
prompt_ids = []
for query_ids, resp_ids in encoded_pairs[:-1]:
prompt_ids += query_ids + resp_ids
prompt_ids = prompt_ids + encoded_pairs[-1][0]
answer_ids = encoded_pairs[-1][1]
for encoded_ids in encoded_messages[:-1]:
prompt_ids += encoded_ids
answer_ids = encoded_messages[-1]
return prompt_ids, answer_ids
def encode_multiturn(
self,
tokenizer: "PreTrainedTokenizer",
messages: List[Dict[str, str]],
messages: Sequence[Dict[str, str]],
system: Optional[str] = None,
tools: Optional[str] = None,
cutoff_len: int = 1_000_000,
reserved_label_len: int = 1,
) -> Sequence[Tuple[List[int], List[int]]]:
) -> List[Tuple[List[int], List[int]]]:
r"""
Returns multiple pairs of token ids representing prompts and responses respectively.
"""
return self._encode(tokenizer, messages, system, tools, cutoff_len, reserved_label_len)
encoded_messages = self._encode(tokenizer, messages, system, tools)
return [(encoded_messages[i], encoded_messages[i + 1]) for i in range(0, len(encoded_messages), 2)]
def extract_tool(self, content: str) -> Union[str, List[Tuple[str, str]]]:
r"""
Extracts tool message.
"""
return self.format_tools.extract(content)
def _encode(
self,
tokenizer: "PreTrainedTokenizer",
messages: List[Dict[str, str]],
messages: Sequence[Dict[str, str]],
system: Optional[str],
tools: Optional[str],
cutoff_len: int,
reserved_label_len: int,
) -> Sequence[Tuple[List[int], List[int]]]:
) -> List[List[int]]:
r"""
Encodes formatted inputs to pairs of token ids.
Turn 0: system + query resp
Turn t: sep + query resp
Turn 0: prefix + system + query resp
Turn t: sep + query resp
"""
system = system or self.default_system
encoded_messages = []
for i, message in enumerate(messages):
elements = []
if i == 0 and (system or tools or self.force_system):
tool_text = self.format_tools.apply(content=tools)[0] if tools else ""
elements += self.format_system.apply(content=(system + tool_text))
elif i > 0 and i % 2 == 0:
if i == 0:
elements += self.format_prefix.apply()
if system or tools:
tool_text = self.format_tools.apply(content=tools)[0] if tools else ""
elements += self.format_system.apply(content=(system + tool_text))
if i > 0 and i % 2 == 0:
elements += self.format_separator.apply()
if message["role"] == Role.USER.value:
@@ -102,11 +121,9 @@ class Template:
encoded_messages.append(self._convert_elements_to_ids(tokenizer, elements))
return self._make_pairs(encoded_messages, cutoff_len, reserved_label_len)
return encoded_messages
def _convert_elements_to_ids(
self, tokenizer: "PreTrainedTokenizer", elements: List[Union[str, Dict[str, str]]]
) -> List[int]:
def _convert_elements_to_ids(self, tokenizer: "PreTrainedTokenizer", elements: "SLOTS") -> List[int]:
r"""
Converts elements to token ids.
"""
@@ -127,57 +144,34 @@ class Template:
return token_ids
def _make_pairs(
self,
encoded_messages: Sequence[List[int]],
cutoff_len: int,
reserved_label_len: int,
) -> Sequence[Tuple[List[int], List[int]]]:
encoded_pairs = []
total_length = 0
for i in range(0, len(encoded_messages), 2):
if total_length >= cutoff_len:
break
max_source_len, max_target_len = infer_max_len(
source_len=len(encoded_messages[i]),
target_len=len(encoded_messages[i + 1]),
max_len=(cutoff_len - total_length),
reserved_label_len=reserved_label_len,
)
source_ids = encoded_messages[i][:max_source_len]
target_ids = encoded_messages[i + 1][:max_target_len]
total_length += len(source_ids) + len(target_ids)
encoded_pairs.append((source_ids, target_ids))
return encoded_pairs
@dataclass
class Llama2Template(Template):
def _encode(
self,
tokenizer: "PreTrainedTokenizer",
messages: List[Dict[str, str]],
messages: Sequence[Dict[str, str]],
system: str,
tools: str,
cutoff_len: int,
reserved_label_len: int,
) -> Sequence[Tuple[List[int], List[int]]]:
) -> List[List[int]]:
r"""
Encodes formatted inputs to pairs of token ids.
Turn 0: system + query resp
Turn t: sep + query resp
Turn 0: prefix + system + query resp
Turn t: sep + query resp
"""
system = system or self.default_system
encoded_messages = []
for i, message in enumerate(messages):
elements = []
system_text = ""
if i == 0 and (system or tools or self.force_system):
tool_text = self.format_tools.apply(content=tools)[0] if tools else ""
system_text = self.format_system.apply(content=(system + tool_text))[0]
elif i > 0 and i % 2 == 0:
if i == 0:
elements += self.format_prefix.apply()
if system or tools:
tool_text = self.format_tools.apply(content=tools)[0] if tools else ""
system_text = self.format_system.apply(content=(system + tool_text))[0]
if i > 0 and i % 2 == 0:
elements += self.format_separator.apply()
if message["role"] == Role.USER.value:
@@ -193,7 +187,7 @@ class Llama2Template(Template):
encoded_messages.append(self._convert_elements_to_ids(tokenizer, elements))
return self._make_pairs(encoded_messages, cutoff_len, reserved_label_len)
return encoded_messages
TEMPLATES: Dict[str, Template] = {}
@@ -208,12 +202,12 @@ def _register_template(
format_observation: Optional["Formatter"] = None,
format_tools: Optional["Formatter"] = None,
format_separator: Optional["Formatter"] = None,
format_prefix: Optional["Formatter"] = None,
default_system: str = "",
stop_words: List[str] = [],
stop_words: Sequence[str] = [],
image_token: str = "<image>",
efficient_eos: bool = False,
replace_eos: bool = False,
force_system: bool = False,
) -> None:
r"""
Registers a chat template.
@@ -245,9 +239,10 @@ def _register_template(
template_class = Llama2Template if name.startswith("llama2") else Template
default_user_formatter = StringFormatter(slots=["{{content}}"])
default_assistant_formatter = StringFormatter(slots=["{{content}}"] + eos_slots)
default_function_formatter = FunctionFormatter(slots=["Action: {{name}}\nAction Input: {{arguments}}"] + eos_slots)
default_function_formatter = FunctionFormatter(slots=eos_slots, tool_format="default")
default_tool_formatter = ToolFormatter(tool_format="default")
default_separator_formatter = EmptyFormatter()
default_prefix_formatter = EmptyFormatter()
TEMPLATES[name] = template_class(
format_user=format_user or default_user_formatter,
format_assistant=format_assistant or default_assistant_formatter,
@@ -256,12 +251,12 @@ def _register_template(
format_observation=format_observation or format_user or default_user_formatter,
format_tools=format_tools or default_tool_formatter,
format_separator=format_separator or default_separator_formatter,
format_prefix=format_prefix or default_prefix_formatter,
default_system=default_system,
stop_words=stop_words,
image_token=image_token,
efficient_eos=efficient_eos,
replace_eos=replace_eos,
force_system=force_system,
)
@@ -307,6 +302,10 @@ def _convert_slots_to_jinja(slots: "SLOTS", tokenizer: "PreTrainedTokenizer", pl
def _get_jinja_template(template: "Template", tokenizer: "PreTrainedTokenizer") -> str:
jinja_template = ""
prefix = _convert_slots_to_jinja(template.format_prefix.apply(), tokenizer)
if prefix:
jinja_template += "{{ " + prefix + " }}"
if template.default_system:
jinja_template += "{% set system_message = '" + _jinja_escape(template.default_system) + "' %}"
@@ -315,11 +314,7 @@ def _get_jinja_template(template: "Template", tokenizer: "PreTrainedTokenizer")
)
system_message = _convert_slots_to_jinja(template.format_system.apply(), tokenizer, placeholder="system_message")
if isinstance(template, Llama2Template):
pass
elif template.force_system:
jinja_template += "{{ " + system_message + " }}"
else:
if not isinstance(template, Llama2Template):
jinja_template += "{% if system_message is defined %}{{ " + system_message + " }}{% endif %}"
jinja_template += "{% for message in messages %}"
@@ -346,6 +341,7 @@ def _get_jinja_template(template: "Template", tokenizer: "PreTrainedTokenizer")
def get_template_and_fix_tokenizer(
tokenizer: "PreTrainedTokenizer",
name: Optional[str] = None,
tool_format: Optional[str] = None,
) -> Template:
if name is None:
template = TEMPLATES["empty"] # placeholder
@@ -354,6 +350,12 @@ def get_template_and_fix_tokenizer(
if template is None:
raise ValueError("Template {} does not exist.".format(name))
if tool_format is not None:
logger.info("Using tool format: {}.".format(tool_format))
eos_slots = [] if template.efficient_eos else [{"eos_token"}]
template.format_tools = ToolFormatter(tool_format=tool_format)
template.format_function = FunctionFormatter(slots=eos_slots, tool_format=tool_format)
stop_words = template.stop_words
if template.replace_eos:
if not stop_words:
@@ -435,9 +437,8 @@ _register_template(
_register_template(
name="belle",
format_user=StringFormatter(slots=["Human: {{content}}\n\nBelle: "]),
format_system=StringFormatter(slots=[{"bos_token"}, "{{content}}"]),
format_separator=EmptyFormatter(slots=["\n\n"]),
force_system=True,
format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
)
@@ -450,11 +451,7 @@ _register_template(
_register_template(
name="breeze",
format_user=StringFormatter(slots=["[INST] {{content}} [/INST] "]),
format_system=StringFormatter(slots=[{"bos_token"}, "{{content}}"]),
default_system=(
"You are a helpful AI assistant built by MediaTek Research. "
"The user you are helping speaks Traditional Chinese and comes from Taiwan."
),
format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
efficient_eos=True,
)
@@ -462,10 +459,9 @@ _register_template(
_register_template(
name="chatglm2",
format_user=StringFormatter(slots=["[Round {{idx}}]\n\n问:{{content}}\n\n答:"]),
format_system=StringFormatter(slots=[{"token": "[gMASK]"}, {"token": "sop"}, "{{content}}"]),
format_separator=EmptyFormatter(slots=["\n\n"]),
format_prefix=EmptyFormatter(slots=[{"token": "[gMASK]"}, {"token": "sop"}]),
efficient_eos=True,
force_system=True,
)
@@ -473,32 +469,13 @@ _register_template(
name="chatglm3",
format_user=StringFormatter(slots=[{"token": "<|user|>"}, "\n", "{{content}}", {"token": "<|assistant|>"}]),
format_assistant=StringFormatter(slots=["\n", "{{content}}"]),
format_system=StringFormatter(slots=[{"token": "[gMASK]"}, {"token": "sop"}, "{{content}}"]),
format_function=FunctionFormatter(slots=["{{name}}\n{{arguments}}"]),
format_system=StringFormatter(slots=[{"token": "<|system|>"}, "\n", "{{content}}"]),
format_function=FunctionFormatter(slots=[], tool_format="glm4"),
format_observation=StringFormatter(
slots=[{"token": "<|observation|>"}, "\n", "{{content}}", {"token": "<|assistant|>"}]
),
stop_words=["<|user|>", "<|observation|>"],
efficient_eos=True,
force_system=True,
)
_register_template(
name="chatglm3_system",
format_user=StringFormatter(slots=[{"token": "<|user|>"}, "\n", "{{content}}", {"token": "<|assistant|>"}]),
format_assistant=StringFormatter(slots=["\n", "{{content}}"]),
format_system=StringFormatter(
slots=[{"token": "[gMASK]"}, {"token": "sop"}, {"token": "<|system|>"}, "\n", "{{content}}"]
),
format_function=FunctionFormatter(slots=["{{name}}\n{{arguments}}"]),
format_observation=StringFormatter(
slots=[{"token": "<|observation|>"}, "\n", "{{content}}", {"token": "<|assistant|>"}]
),
default_system=(
"You are ChatGLM3, a large language model trained by Zhipu.AI. "
"Follow the user's instructions carefully. Respond using markdown."
),
format_tools=ToolFormatter(tool_format="glm4"),
format_prefix=EmptyFormatter(slots=[{"token": "[gMASK]"}, {"token": "sop"}]),
stop_words=["<|user|>", "<|observation|>"],
efficient_eos=True,
)
@@ -529,8 +506,24 @@ _register_template(
_register_template(
name="codegeex2",
format_system=StringFormatter(slots=[{"token": "[gMASK]"}, {"token": "sop"}, "{{content}}"]),
force_system=True,
format_prefix=EmptyFormatter(slots=[{"token": "[gMASK]"}, {"token": "sop"}]),
)
_register_template(
name="codegeex4",
format_user=StringFormatter(slots=["<|user|>\n{{content}}<|assistant|>\n"]),
format_system=StringFormatter(slots=["<|system|>\n{{content}}"]),
format_function=FunctionFormatter(slots=[], tool_format="glm4"),
format_observation=StringFormatter(slots=["<|observation|>\n{{content}}<|assistant|>\n"]),
format_tools=ToolFormatter(tool_format="glm4"),
format_prefix=EmptyFormatter(slots=["[gMASK]<sop>"]),
default_system=(
"你是一位智能编程助手你叫CodeGeeX。你会为用户回答关于编程、代码、计算机方面的任何问题"
"并提供格式规范、可以执行、准确安全的代码,并在必要时提供详细的解释。"
),
stop_words=["<|user|>", "<|observation|>"],
efficient_eos=True,
)
@@ -544,21 +537,15 @@ _register_template(
)
]
),
format_system=StringFormatter(
slots=[{"bos_token"}, "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{content}}<|END_OF_TURN_TOKEN|>"]
),
default_system=(
"You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users "
"by providing thorough responses. You are trained by Cohere."
),
format_system=StringFormatter(slots=["<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{content}}<|END_OF_TURN_TOKEN|>"]),
format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
)
_register_template(
name="cpm",
format_user=StringFormatter(slots=["<用户>{{content}}<AI>"]),
format_system=StringFormatter(slots=[{"bos_token"}, "{{content}}"]),
force_system=True,
format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
)
@@ -591,30 +578,28 @@ _register_template(
_register_template(
name="deepseek",
format_user=StringFormatter(slots=["User: {{content}}\n\nAssistant:"]),
format_system=StringFormatter(slots=[{"bos_token"}, "{{content}}"]),
force_system=True,
format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
)
_register_template(
name="deepseekcoder",
format_user=StringFormatter(slots=["### Instruction:\n{{content}}\n### Response:"]),
format_assistant=StringFormatter(slots=["\n", "{{content}}"]),
format_separator=EmptyFormatter(slots=["\n<|EOT|>\n"]),
format_assistant=StringFormatter(slots=["\n{{content}}\n"]),
format_separator=EmptyFormatter(slots=["\n"]),
format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
default_system=(
"You are an AI programming assistant, utilizing the Deepseek Coder model, "
"developed by Deepseek Company, and you only answer questions related to computer science. "
"For politically sensitive questions, security and privacy issues, "
"and other non-computer science questions, you will refuse to answer\n"
),
stop_words=["<|EOT|>"],
efficient_eos=True,
)
_register_template(
name="default",
format_user=StringFormatter(slots=["Human: {{content}}\nAssistant: "]),
format_user=StringFormatter(slots=["Human: {{content}}\nAssistant:"]),
format_system=StringFormatter(slots=["{{content}}\n"]),
format_separator=EmptyFormatter(slots=["\n"]),
)
@@ -622,11 +607,7 @@ _register_template(
_register_template(
name="empty",
format_user=StringFormatter(slots=["{{content}}"]),
format_assistant=StringFormatter(slots=["{{content}}"]),
format_system=StringFormatter(slots=[{"bos_token"}, "{{content}}"]),
efficient_eos=True,
force_system=True,
)
@@ -648,13 +629,12 @@ _register_template(
_register_template(
name="gemma",
format_user=StringFormatter(slots=["<start_of_turn>user\n{{content}}<end_of_turn>\n<start_of_turn>model\n"]),
format_system=StringFormatter(slots=[{"bos_token"}, "{{content}}"]),
format_observation=StringFormatter(
slots=["<start_of_turn>tool\n{{content}}<end_of_turn>\n<start_of_turn>model\n"]
),
format_separator=EmptyFormatter(slots=["<end_of_turn>\n"]),
format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
efficient_eos=True,
force_system=True,
)
@@ -662,36 +642,33 @@ _register_template(
name="glm4",
format_user=StringFormatter(slots=["<|user|>\n{{content}}<|assistant|>"]),
format_assistant=StringFormatter(slots=["\n{{content}}"]),
format_system=StringFormatter(slots=["[gMASK]<sop>{{content}}"]),
format_function=FunctionFormatter(slots=["{{name}}\n{{arguments}}"]),
format_system=StringFormatter(slots=["<|system|>\n{{content}}"]),
format_function=FunctionFormatter(slots=[], tool_format="glm4"),
format_observation=StringFormatter(slots=["<|observation|>\n{{content}}<|assistant|>"]),
format_tools=ToolFormatter(tool_format="glm4"),
format_prefix=EmptyFormatter(slots=["[gMASK]<sop>"]),
stop_words=["<|user|>", "<|observation|>"],
efficient_eos=True,
force_system=True,
)
_register_template(
name="intern",
format_user=StringFormatter(slots=["<|User|>:{{content}}", {"token": "<eoh>"}, "\n<|Bot|>:"]),
format_separator=EmptyFormatter(slots=[{"token": "<eoa>"}, "\n"]),
format_user=StringFormatter(slots=["<|User|>:{{content}}\n<|Bot|>:"]),
format_system=StringFormatter(slots=["<|System|>:{{content}}\n"]),
format_separator=EmptyFormatter(slots=["<eoa>\n"]),
format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
stop_words=["<eoa>"],
efficient_eos=True,
efficient_eos=True, # internlm tokenizer cannot set eos_token_id
)
_register_template(
name="intern2",
format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
format_system=StringFormatter(slots=[{"bos_token"}, "<|im_start|>system\n{{content}}<|im_end|>\n"]),
format_separator=EmptyFormatter(slots=["\n"]),
default_system=(
"You are an AI assistant whose name is InternLM (书生·浦语).\n"
"- InternLM (书生·浦语) is a conversational language model that is developed "
"by Shanghai AI Laboratory (上海人工智能实验室). It is designed to be helpful, honest, and harmless.\n"
"- InternLM (书生·浦语) can understand and communicate fluently in the language chosen "
"by the user such as English and 中文."
),
format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
format_separator=EmptyFormatter(slots=["<|im_end|>\n"]),
format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
stop_words=["<|im_end|>"],
efficient_eos=True, # internlm2 tokenizer cannot set eos_token_id
)
@@ -700,7 +677,6 @@ _register_template(
_register_template(
name="llama2",
format_user=StringFormatter(slots=[{"bos_token"}, "[INST] {{content}} [/INST]"]),
format_assistant=StringFormatter(slots=[" {{content}} ", {"eos_token"}]),
format_system=StringFormatter(slots=["<<SYS>>\n{{content}}\n<</SYS>>\n\n"]),
)
@@ -723,9 +699,7 @@ _register_template(
)
]
),
format_system=StringFormatter(
slots=[{"bos_token"}, "<|start_header_id|>system<|end_header_id|>\n\n{{content}}<|eot_id|>"]
),
format_system=StringFormatter(slots=["<|start_header_id|>system<|end_header_id|>\n\n{{content}}<|eot_id|>"]),
format_observation=StringFormatter(
slots=[
(
@@ -734,7 +708,7 @@ _register_template(
)
]
),
default_system="You are a helpful assistant.",
format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
stop_words=["<|eot_id|>"],
replace_eos=True,
)
@@ -743,24 +717,21 @@ _register_template(
_register_template(
name="mistral",
format_user=StringFormatter(slots=["[INST] {{content}} [/INST]"]),
format_system=StringFormatter(slots=[{"bos_token"}, "{{content}}"]),
force_system=True,
format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
)
_register_template(
name="olmo",
format_user=StringFormatter(slots=["<|user|>\n{{content}}<|assistant|>\n"]),
format_system=StringFormatter(slots=[{"eos_token"}, "{{content}}"]),
force_system=True,
format_prefix=EmptyFormatter(slots=[{"eos_token"}]),
)
_register_template(
name="openchat",
format_user=StringFormatter(slots=["GPT4 Correct User: {{content}}", {"eos_token"}, "GPT4 Correct Assistant:"]),
format_system=StringFormatter(slots=[{"bos_token"}, "{{content}}"]),
force_system=True,
format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
)
@@ -774,27 +745,25 @@ _register_template(
)
]
),
format_system=StringFormatter(slots=[{"bos_token"}, "{{content}}"]),
format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
stop_words=["<|eot_id|>"],
replace_eos=True,
force_system=True,
)
_register_template(
name="orion",
format_user=StringFormatter(slots=["Human: {{content}}\n\nAssistant: ", {"eos_token"}]),
format_system=StringFormatter(slots=[{"bos_token"}, "{{content}}"]),
force_system=True,
format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
)
_register_template(
name="phi",
format_user=StringFormatter(slots=["<|user|>\n{{content}}<|end|>\n<|assistant|>\n"]),
format_system=StringFormatter(slots=[{"bos_token"}, "<|system|>\n{{content}}<|end|>\n"]),
format_system=StringFormatter(slots=["<|system|>\n{{content}}<|end|>\n"]),
format_separator=EmptyFormatter(slots=["\n"]),
default_system="You are a helpful AI assistant.",
format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
stop_words=["<|end|>"],
replace_eos=True,
)
@@ -827,7 +796,6 @@ _register_template(
format_separator=EmptyFormatter(slots=["\n"]),
stop_words=["<|end|>"],
replace_eos=True,
force_system=True,
)
@@ -924,8 +892,7 @@ _register_template(
_register_template(
name="zephyr",
format_user=StringFormatter(slots=["<|user|>\n{{content}}", {"eos_token"}, "<|assistant|>"]),
format_assistant=StringFormatter(slots=["\n{{content}}", {"eos_token"}]),
format_user=StringFormatter(slots=["<|user|>\n{{content}}", {"eos_token"}, "<|assistant|>\n"]),
format_system=StringFormatter(slots=["<|system|>\n{{content}}", {"eos_token"}]),
default_system="You are Zephyr, a helpful assistant.",
)

View File

@@ -0,0 +1,140 @@
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import re
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Any, Dict, List, Tuple, Union
from .data_utils import SLOTS
DEFAULT_TOOL_PROMPT = (
"You have access to the following tools:\n{tool_text}"
"Use the following format if using a tool:\n"
"```\n"
"Action: tool name (one of [{tool_names}])\n"
"Action Input: the input to the tool, in a JSON format representing the kwargs "
"""(e.g. ```{{"input": "hello world", "num_beams": 5}}```)\n"""
"```\n"
)
GLM4_TOOL_PROMPT = (
"你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的,"
"你的任务是针对用户的问题和要求提供适当的答复和支持。# 可用工具{tool_text}"
)
@dataclass
class ToolUtils(ABC):
@staticmethod
@abstractmethod
def get_function_slots() -> SLOTS: ...
@staticmethod
@abstractmethod
def tool_formatter(tools: List[Dict[str, Any]]) -> str: ...
@staticmethod
@abstractmethod
def tool_extractor(content: str) -> Union[str, List[Tuple[str, str]]]: ...
class DefaultToolUtils(ToolUtils):
@staticmethod
def get_function_slots() -> SLOTS:
return ["Action: {{name}}\nAction Input: {{arguments}}\n"]
@staticmethod
def tool_formatter(tools: List[Dict[str, Any]]) -> str:
tool_text = ""
tool_names = []
for tool in tools:
param_text = ""
for name, param in tool["parameters"]["properties"].items():
required, enum, items = "", "", ""
if name in tool["parameters"].get("required", []):
required = ", required"
if param.get("enum", None):
enum = ", should be one of [{}]".format(", ".join(param["enum"]))
if param.get("items", None):
items = ", where each item should be {}".format(param["items"].get("type", ""))
param_text += " - {name} ({type}{required}): {desc}{enum}{items}\n".format(
name=name,
type=param.get("type", ""),
required=required,
desc=param.get("description", ""),
enum=enum,
items=items,
)
tool_text += "> Tool Name: {name}\nTool Description: {desc}\nTool Args:\n{args}\n".format(
name=tool["name"], desc=tool.get("description", ""), args=param_text
)
tool_names.append(tool["name"])
return DEFAULT_TOOL_PROMPT.format(tool_text=tool_text, tool_names=", ".join(tool_names))
@staticmethod
def tool_extractor(content: str) -> Union[str, List[Tuple[str, str]]]:
regex = re.compile(r"Action:\s*([a-zA-Z0-9_]+)\s*Action Input:\s*(.+?)(?=\s*Action:|\s*$)", re.DOTALL)
action_match: List[Tuple[str, str]] = re.findall(regex, content)
if not action_match:
return content
results = []
for match in action_match:
tool_name = match[0].strip()
tool_input = match[1].strip().strip('"').strip("```")
try:
arguments = json.loads(tool_input)
results.append((tool_name, json.dumps(arguments, ensure_ascii=False)))
except json.JSONDecodeError:
return content
return results
class GLM4ToolUtils(ToolUtils):
@staticmethod
def get_function_slots() -> SLOTS:
return ["{{name}}\n{{arguments}}"]
@staticmethod
def tool_formatter(tools: List[Dict[str, Any]]) -> str:
tool_text = ""
for tool in tools:
tool_text += "\n\n## {name}\n\n{body}\n在调用上述函数时,请使用 Json 格式表示调用的参数。".format(
name=tool["name"], body=json.dumps(tool, indent=4, ensure_ascii=False)
)
return GLM4_TOOL_PROMPT.format(tool_text=tool_text)
@staticmethod
def tool_extractor(content: str) -> Union[str, List[Tuple[str, str]]]:
if "\n" not in content:
return content
tool_name, tool_input = content.split("\n", maxsplit=1)
try:
arguments = json.loads(tool_input)
except json.JSONDecodeError:
return content
return [(tool_name, json.dumps(arguments, ensure_ascii=False))]

View File

@@ -1,6 +1,42 @@
# Inspired by: https://github.com/hendrycks/test/blob/master/evaluate_flan.py
# Copyright 2024 the LlamaFactory team.
#
# This code is inspired by the Dan's test library.
# https://github.com/hendrycks/test/blob/master/evaluate_flan.py
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# MIT License
#
# Copyright (c) 2020 Dan Hendrycks
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import inspect
import json
import os
from typing import Any, Dict, List, Optional
@@ -26,9 +62,7 @@ class Evaluator:
self.template = get_template_and_fix_tokenizer(self.tokenizer, self.data_args.template)
self.model = load_model(self.tokenizer, self.model_args, finetuning_args)
self.eval_template = get_eval_template(self.eval_args.lang)
self.choice_inputs = [
self.tokenizer.encode(self.eval_template.prefix + ch, add_special_tokens=False)[-1] for ch in CHOICES
]
self.choice_inputs = [self.tokenizer.encode(ch, add_special_tokens=False)[-1] for ch in CHOICES]
@torch.inference_mode()
def batch_inference(self, batch_input: Dict[str, torch.Tensor]) -> List[str]:
@@ -39,8 +73,11 @@ class Evaluator:
return [chr(ord("A") + offset.item()) for offset in torch.argmax(choice_probs, dim=-1)]
def eval(self) -> None:
eval_task = self.eval_args.task.split("_")[0]
eval_split = self.eval_args.task.split("_")[1]
mapping = cached_file(
path_or_repo_id=os.path.join(self.eval_args.task_dir, self.eval_args.task),
path_or_repo_id=os.path.join(self.eval_args.task_dir, eval_task),
filename="mapping.json",
cache_dir=self.model_args.cache_dir,
token=self.model_args.hf_hub_token,
@@ -53,27 +90,22 @@ class Evaluator:
pbar = tqdm(categorys.keys(), desc="Processing subjects", position=0)
results = {}
for subject in pbar:
if "trust_remote_code" in inspect.signature(load_dataset).parameters: # for datasets==2.16.0
kwargs = {"trust_remote_code": True}
else:
kwargs = {}
dataset = load_dataset(
path=os.path.join(self.eval_args.task_dir, self.eval_args.task),
path=os.path.join(self.eval_args.task_dir, eval_task),
name=subject,
cache_dir=self.model_args.cache_dir,
download_mode=self.eval_args.download_mode,
token=self.model_args.hf_hub_token,
**kwargs,
trust_remote_code=True,
)
pbar.set_postfix_str(categorys[subject]["name"])
inputs, outputs, labels = [], [], []
for i in trange(len(dataset[self.data_args.split]), desc="Formatting batches", position=1, leave=False):
for i in trange(len(dataset[eval_split]), desc="Formatting batches", position=1, leave=False):
support_set = (
dataset["train"].shuffle().select(range(min(self.eval_args.n_shot, len(dataset["train"]))))
)
messages = self.eval_template.format_example(
target_data=dataset[self.data_args.split][i],
target_data=dataset[eval_split][i],
support_set=support_set,
subject_name=categorys[subject]["name"],
)

View File

@@ -1,3 +1,17 @@
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from dataclasses import dataclass
from typing import Dict, List, Sequence, Tuple
@@ -10,7 +24,6 @@ class EvalTemplate:
system: str
choice: str
answer: str
prefix: str
def _parse_example(self, example: Dict[str, str]) -> Tuple[str, str]:
r"""
@@ -42,8 +55,8 @@ class EvalTemplate:
eval_templates: Dict[str, "EvalTemplate"] = {}
def _register_eval_template(name: str, system: str, choice: str, answer: str, prefix: str) -> None:
eval_templates[name] = EvalTemplate(system=system, choice=choice, answer=answer, prefix=prefix)
def _register_eval_template(name: str, system: str, choice: str, answer: str) -> None:
eval_templates[name] = EvalTemplate(system=system, choice=choice, answer=answer)
def get_eval_template(name: str) -> "EvalTemplate":
@@ -56,8 +69,7 @@ _register_eval_template(
name="en",
system="The following are multiple choice questions (with answers) about {subject}.\n\n",
choice="\n{choice}. {content}",
answer="\nAnswer: ",
prefix=" ",
answer="\nAnswer:",
)
@@ -66,5 +78,4 @@ _register_eval_template(
system="以下是中国关于{subject}考试的单项选择题,请选出其中的正确答案。\n\n",
choice="\n{choice}. {content}",
answer="\n答案:",
prefix=" ",
)

View File

@@ -1,3 +1,17 @@
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import OrderedDict, defaultdict
from enum import Enum
from typing import Dict, Optional
@@ -64,6 +78,19 @@ TRAINING_STAGES = {
STAGES_USE_PAIR_DATA = {"rm", "dpo"}
SUPPORTED_CLASS_FOR_BLOCK_DIAG_ATTN = {
"cohere",
"falcon",
"gemma",
"gemma2",
"llama",
"mistral",
"phi",
"phi3",
"qwen2",
"starcoder2",
}
SUPPORTED_CLASS_FOR_S2ATTN = {"llama"}
V_HEAD_WEIGHTS_NAME = "value_head.bin"
@@ -272,6 +299,17 @@ register_model_group(
)
register_model_group(
models={
"CodeGeeX4-9B-Chat": {
DownloadSource.DEFAULT: "THUDM/codegeex4-all-9b",
DownloadSource.MODELSCOPE: "ZhipuAI/codegeex4-all-9b",
},
},
template="codegeex4",
)
register_model_group(
models={
"CodeGemma-7B": {
@@ -389,6 +427,18 @@ register_model_group(
DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-V2-Chat",
DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-V2-Chat",
},
"DeepSeek-MoE-Coder-16B-Base": {
DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-Coder-V2-Lite-Base",
},
"DeepSeek-MoE-Coder-236B-Base": {
DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-Coder-V2-Base",
},
"DeepSeek-MoE-Coder-16B-Chat": {
DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct",
},
"DeepSeek-MoE-Coder-236B-Chat": {
DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-Coder-V2-Instruct",
},
},
template="deepseek",
)
@@ -481,6 +531,22 @@ register_model_group(
"Gemma-1.1-7B-Chat": {
DownloadSource.DEFAULT: "google/gemma-1.1-7b-it",
},
"Gemma-2-9B": {
DownloadSource.DEFAULT: "google/gemma-2-9b",
DownloadSource.MODELSCOPE: "LLM-Research/gemma-2-9b",
},
"Gemma-2-27B": {
DownloadSource.DEFAULT: "google/gemma-2-27b",
DownloadSource.MODELSCOPE: "LLM-Research/gemma-2-27b",
},
"Gemma-2-9B-Chat": {
DownloadSource.DEFAULT: "google/gemma-2-9b-it",
DownloadSource.MODELSCOPE: "LLM-Research/gemma-2-9b-it",
},
"Gemma-2-27B-Chat": {
DownloadSource.DEFAULT: "google/gemma-2-27b-it",
DownloadSource.MODELSCOPE: "LLM-Research/gemma-2-27b-it",
},
},
template="gemma",
)
@@ -553,7 +619,26 @@ register_model_group(
register_model_group(
models={
"Jambda-v0.1": {
"InternLM2.5-7B": {
DownloadSource.DEFAULT: "internlm/internlm2_5-7b",
DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm2_5-7b",
},
"InternLM2.5-7B-Chat": {
DownloadSource.DEFAULT: "internlm/internlm2_5-7b-chat",
DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm2_5-7b-chat",
},
"InternLM2.5-7B-1M-Chat": {
DownloadSource.DEFAULT: "internlm/internlm2_5-7b-chat-1m",
DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm2_5-7b-chat-1m",
},
},
template="intern2",
)
register_model_group(
models={
"Jamba-v0.1": {
DownloadSource.DEFAULT: "ai21labs/Jamba-v0.1",
DownloadSource.MODELSCOPE: "AI-ModelScope/Jamba-v0.1",
}
@@ -668,6 +753,21 @@ register_model_group(
)
register_model_group(
models={
"MiniCPM-2B-SFT-Chat": {
DownloadSource.DEFAULT: "openbmb/MiniCPM-2B-sft-bf16",
DownloadSource.MODELSCOPE: "OpenBMB/miniCPM-bf16",
},
"MiniCPM-2B-DPO-Chat": {
DownloadSource.DEFAULT: "openbmb/MiniCPM-2B-dpo-bf16",
DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM-2B-dpo-bf16",
},
},
template="cpm",
)
register_model_group(
models={
"Mistral-7B-v0.1": {
@@ -1207,6 +1307,10 @@ register_model_group(
register_model_group(
models={
"TeleChat-1B-Chat": {
DownloadSource.DEFAULT: "Tele-AI/TeleChat-1B",
DownloadSource.MODELSCOPE: "TeleAI/TeleChat-1B",
},
"TeleChat-7B-Chat": {
DownloadSource.DEFAULT: "Tele-AI/telechat-7B",
DownloadSource.MODELSCOPE: "TeleAI/telechat-7B",

View File

@@ -1,3 +1,20 @@
# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
#
# This code is inspired by the HuggingFace's transformers library.
# https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/commands/env.py
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import platform
import accelerate
@@ -6,13 +23,10 @@ import peft
import torch
import transformers
import trl
from transformers.integrations import is_deepspeed_available
from transformers.utils import is_bitsandbytes_available, is_torch_cuda_available, is_torch_npu_available
from .packages import is_vllm_available
from transformers.utils import is_torch_cuda_available, is_torch_npu_available
VERSION = "0.8.0"
VERSION = "0.8.3"
def print_env() -> None:
@@ -37,19 +51,25 @@ def print_env() -> None:
info["NPU type"] = torch.npu.get_device_name()
info["CANN version"] = torch.version.cann
if is_deepspeed_available():
try:
import deepspeed # type: ignore
info["DeepSpeed version"] = deepspeed.__version__
except Exception:
pass
if is_bitsandbytes_available():
try:
import bitsandbytes
info["Bitsandbytes version"] = bitsandbytes.__version__
except Exception:
pass
if is_vllm_available():
try:
import vllm
info["vLLM version"] = vllm.__version__
except Exception:
pass
print("\n" + "\n".join(["- {}: {}".format(key, value) for key, value in info.items()]) + "\n")

View File

@@ -1,3 +1,17 @@
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import os
import sys

View File

@@ -1,13 +1,29 @@
# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
#
# This code is inspired by the HuggingFace's PEFT library.
# https://github.com/huggingface/peft/blob/v0.10.0/src/peft/peft_model.py
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import gc
import os
from typing import TYPE_CHECKING, Dict, Tuple
from typing import TYPE_CHECKING, Tuple, Union
import torch
from peft import PeftModel
from transformers import InfNanRemoveLogitsProcessor, LogitsProcessorList, PreTrainedModel
import transformers.dynamic_module_utils
from transformers import InfNanRemoveLogitsProcessor, LogitsProcessorList
from transformers.dynamic_module_utils import get_relative_imports
from transformers.utils import (
SAFE_WEIGHTS_NAME,
WEIGHTS_NAME,
is_torch_bf16_gpu_available,
is_torch_cuda_available,
is_torch_mps_available,
@@ -16,7 +32,6 @@ from transformers.utils import (
)
from transformers.utils.versions import require_version
from .constants import V_HEAD_SAFE_WEIGHTS_NAME, V_HEAD_WEIGHTS_NAME
from .logging import get_logger
@@ -28,7 +43,7 @@ except Exception:
if TYPE_CHECKING:
from trl import AutoModelForCausalLMWithValueHead
from numpy.typing import NDArray
from ..hparams import ModelArguments
@@ -58,6 +73,9 @@ class AverageMeter:
def check_dependencies() -> None:
r"""
Checks the version of the required packages.
"""
if os.environ.get("DISABLE_VERSION_CHECK", "0").lower() in ["true", "1"]:
logger.warning("Version checking has been disabled, may lead to unexpected behaviors.")
else:
@@ -68,7 +86,7 @@ def check_dependencies() -> None:
require_version("trl>=0.8.6", "To fix: pip install trl>=0.8.6")
def count_parameters(model: torch.nn.Module) -> Tuple[int, int]:
def count_parameters(model: "torch.nn.Module") -> Tuple[int, int]:
r"""
Returns the number of trainable parameters and number of all parameters in the model.
"""
@@ -79,7 +97,7 @@ def count_parameters(model: torch.nn.Module) -> Tuple[int, int]:
if num_params == 0 and hasattr(param, "ds_numel"):
num_params = param.ds_numel
# Due to the design of 4bit linear layers from bitsandbytes, multiply the number of parameters by 2
# Due to the design of 4bit linear layers from bitsandbytes, multiply the number of parameters by itemsize
if param.__class__.__name__ == "Params4bit":
if hasattr(param, "quant_storage") and hasattr(param.quant_storage, "itemsize"):
num_bytes = param.quant_storage.itemsize
@@ -97,55 +115,7 @@ def count_parameters(model: torch.nn.Module) -> Tuple[int, int]:
return trainable_params, all_param
def fix_valuehead_checkpoint(
model: "AutoModelForCausalLMWithValueHead", output_dir: str, safe_serialization: bool
) -> None:
r"""
The model is already unwrapped.
There are three cases:
1. full tuning without ds_zero3: state_dict = {"model.layers.*": ..., "v_head.summary.*": ...}
2. lora tuning without ds_zero3: state_dict = {"v_head.summary.*": ...}
3. under deepspeed zero3: state_dict = {"pretrained_model.model.layers.*": ..., "v_head.summary.*": ...}
We assume `stage3_gather_16bit_weights_on_model_save=true`.
"""
if not isinstance(model.pretrained_model, (PreTrainedModel, PeftModel)):
return
if safe_serialization:
from safetensors import safe_open
from safetensors.torch import save_file
path_to_checkpoint = os.path.join(output_dir, SAFE_WEIGHTS_NAME)
with safe_open(path_to_checkpoint, framework="pt", device="cpu") as f:
state_dict: Dict[str, torch.Tensor] = {key: f.get_tensor(key) for key in f.keys()}
else:
path_to_checkpoint = os.path.join(output_dir, WEIGHTS_NAME)
state_dict: Dict[str, torch.Tensor] = torch.load(path_to_checkpoint, map_location="cpu")
decoder_state_dict = {}
v_head_state_dict = {}
for name, param in state_dict.items():
if name.startswith("v_head."):
v_head_state_dict[name] = param
else:
decoder_state_dict[name.replace("pretrained_model.", "")] = param
os.remove(path_to_checkpoint)
model.pretrained_model.save_pretrained(
output_dir, state_dict=decoder_state_dict or None, safe_serialization=safe_serialization
)
if safe_serialization:
save_file(v_head_state_dict, os.path.join(output_dir, V_HEAD_SAFE_WEIGHTS_NAME), metadata={"format": "pt"})
else:
torch.save(v_head_state_dict, os.path.join(output_dir, V_HEAD_WEIGHTS_NAME))
logger.info("Value head model saved at: {}".format(output_dir))
def get_current_device() -> torch.device:
def get_current_device() -> "torch.device":
r"""
Gets the current available device.
"""
@@ -184,7 +154,14 @@ def get_logits_processor() -> "LogitsProcessorList":
return logits_processor
def infer_optim_dtype(model_dtype: torch.dtype) -> torch.dtype:
def has_tokenized_data(path: "os.PathLike") -> bool:
r"""
Checks if the path has a tokenized dataset.
"""
return os.path.isdir(path) and len(os.listdir(path)) > 0
def infer_optim_dtype(model_dtype: "torch.dtype") -> "torch.dtype":
r"""
Infers the optimal dtype according to the model_dtype and device compatibility.
"""
@@ -203,11 +180,20 @@ def is_gpu_or_npu_available() -> bool:
return is_torch_npu_available() or is_torch_cuda_available()
def has_tokenized_data(path: os.PathLike) -> bool:
r"""
Checks if the path has a tokenized dataset.
"""
return os.path.isdir(path) and len(os.listdir(path)) > 0
def numpify(inputs: Union["NDArray", "torch.Tensor"]) -> "NDArray":
if isinstance(inputs, torch.Tensor):
inputs = inputs.cpu()
if inputs.dtype == torch.bfloat16: # numpy does not support bfloat16 until 1.21.4
inputs = inputs.to(torch.float32)
inputs = inputs.numpy()
return inputs
def skip_check_imports() -> None:
if os.environ.get("FORCE_CHECK_IMPORTS", "0").lower() not in ["true", "1"]:
transformers.dynamic_module_utils.check_imports = get_relative_imports
def torch_gc() -> None:

View File

@@ -1,5 +1,23 @@
# Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
#
# This code is inspired by the HuggingFace's transformers library.
# https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/utils/import_utils.py
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import importlib.metadata
import importlib.util
from functools import lru_cache
from typing import TYPE_CHECKING
from packaging import version
@@ -24,10 +42,6 @@ def is_fastapi_available():
return _is_package_available("fastapi")
def is_flash_attn2_available():
return _is_package_available("flash_attn") and _get_package_version("flash_attn") > version.parse("2.0.0")
def is_galore_available():
return _is_package_available("galore_torch")
@@ -36,18 +50,10 @@ def is_gradio_available():
return _is_package_available("gradio")
def is_jieba_available():
return _is_package_available("jieba")
def is_matplotlib_available():
return _is_package_available("matplotlib")
def is_nltk_available():
return _is_package_available("nltk")
def is_pillow_available():
return _is_package_available("PIL")
@@ -60,10 +66,6 @@ def is_rouge_available():
return _is_package_available("rouge_chinese")
def is_sdpa_available():
return _get_package_version("torch") > version.parse("2.1.1")
def is_starlette_available():
return _is_package_available("sse_starlette")
@@ -74,3 +76,13 @@ def is_uvicorn_available():
def is_vllm_available():
return _is_package_available("vllm")
@lru_cache
def is_vllm_version_greater_than_0_5():
return _get_package_version("vllm") >= version.parse("0.5.0")
@lru_cache
def is_vllm_version_greater_than_0_5_1():
return _get_package_version("vllm") >= version.parse("0.5.1")

View File

@@ -1,3 +1,17 @@
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import math
import os

Some files were not shown because too many files have changed in this diff Show More