Merge pull request #4224 from chuan298/main
Implement efficient packing without cross-contamination attention Former-commit-id: ac382cc9fe4ec483658fd54f07f9a123788ce1b1
This commit is contained in:
@@ -78,6 +78,18 @@ TRAINING_STAGES = {
|
||||
|
||||
STAGES_USE_PAIR_DATA = {"rm", "dpo"}
|
||||
|
||||
SUPPORTED_CLASS_FOR_BLOCK_DIAG_ATTN = {
|
||||
"falcon",
|
||||
"gemma",
|
||||
"gemma2",
|
||||
"llama",
|
||||
"mistral",
|
||||
"phi",
|
||||
"phi3",
|
||||
"qwen2",
|
||||
"starcoder2",
|
||||
}
|
||||
|
||||
SUPPORTED_CLASS_FOR_S2ATTN = {"llama"}
|
||||
|
||||
V_HEAD_WEIGHTS_NAME = "value_head.bin"
|
||||
|
||||
Reference in New Issue
Block a user