Merge pull request #4224 from chuan298/main

Implement efficient packing without cross-contamination attention

Former-commit-id: ac382cc9fe4ec483658fd54f07f9a123788ce1b1
This commit is contained in:
hoshi-hiyouga
2024-07-04 01:18:54 +08:00
committed by GitHub
13 changed files with 358 additions and 39 deletions

View File

@@ -78,6 +78,18 @@ TRAINING_STAGES = {
STAGES_USE_PAIR_DATA = {"rm", "dpo"}
SUPPORTED_CLASS_FOR_BLOCK_DIAG_ATTN = {
"falcon",
"gemma",
"gemma2",
"llama",
"mistral",
"phi",
"phi3",
"qwen2",
"starcoder2",
}
SUPPORTED_CLASS_FOR_S2ATTN = {"llama"}
V_HEAD_WEIGHTS_NAME = "value_head.bin"