Former-commit-id: 43a56cb331fae899ca35b0c312730d4ab79d0c42
This commit is contained in:
hiyouga
2024-07-15 01:04:56 +08:00
parent 68365045b4
commit e4d11a117b
18 changed files with 46 additions and 41 deletions

View File

@@ -71,8 +71,6 @@ def llama_attention_forward(
cos, sin = self.rotary_emb(value_states, position_ids)
query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin)
past_key_value = getattr(self, "past_key_value", past_key_value)
if past_key_value is not None:
cache_kwargs = {"sin": sin, "cos": cos, "cache_position": cache_position}
key_states, value_states = past_key_value.update(key_states, value_states, self.layer_idx, cache_kwargs)
@@ -156,8 +154,6 @@ def llama_flash_attention_2_forward(
cos, sin = self.rotary_emb(value_states, position_ids)
query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin)
past_key_value = getattr(self, "past_key_value", past_key_value)
if past_key_value is not None:
cache_kwargs = {"sin": sin, "cos": cos, "cache_position": cache_position}
key_states, value_states = past_key_value.update(key_states, value_states, self.layer_idx, cache_kwargs)