o
    
sh                     @   sv   d dl mZ d dlZd dlmZ dejdedejfddZ		dd
ejdejdejdejdeej dedefddZ	dS )    )OptionalN)nnhidden_statesn_repreturnc                 C   s^   | j \}}}}|dkr| S | dddddddddf |||||} | ||| ||S )z
    This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
       N)shapeexpandreshape)r   r   batchnum_key_value_headsslenhead_dim r   c/var/www/html/alpaca_bot/venv/lib/python3.10/site-packages/transformers/integrations/eager_paged.py	repeat_kv   s
   0r           modulequerykeyvalueattention_maskscalingdropoutc                 K   s   | dd }|d ur|j||| jfi |\}}t|| j}	t|| j}
t||	dd| }|d urK|d d d d d d d |	jd f }|| }t	j
j|dtjd|j}t||
}|dd }||fS )Ncache      )dimdtyper   )popupdate	layer_idxr   num_key_value_groupstorchmatmul	transposer   r   
functionalsoftmaxfloat32tor    
contiguous)r   r   r   r   r   r   r   kwargsr   
key_statesvalue_statesattn_weightscausal_maskattn_outputr   r   r   eager_paged_attention_forward   s   
&r3   )r   )
typingr   r%   r   Tensorintr   Modulefloatr3   r   r   r   r   <module>   s(    