Skip to content

Commit

Permalink
Update attention.py
Browse files Browse the repository at this point in the history
  • Loading branch information
SolenoidWGT authored Aug 13, 2024
1 parent bd28606 commit bc41ee8
Showing 1 changed file with 0 additions and 2 deletions.
2 changes: 0 additions & 2 deletions internlm/model/ops/attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,6 @@ def _npu_varlen_kvpacked_attn(
):
# TODO: support npu native varlen flash attention
k, v = kv.unbind(dim=2)
# k, v = k.squeeze(dim=2), v.squeeze(dim=2)
return _npu_varlen_qkvsplited_attn(
q,
k,
Expand All @@ -393,7 +392,6 @@ def _npu_varlen_kvpacked_attn(

def _npu_fixedlen_kvpacked_attn(q: torch.Tensor, kv: torch.Tensor, dropout_p: float, softmax_scale=None, causal=False):
k, v = kv.unbind(dim=2)
# k, v = k.squeeze(dim=2), v.squeeze(dim=2)
return _npu_fixedlen_qkvsplited_attn(q, k, v, dropout_p, softmax_scale, causal)


Expand Down

0 comments on commit bc41ee8

Please sign in to comment.