
    h                         d dl mZ d dlZd dlmZ dej                  dedej                  fdZdej                  d	ej                  d
ej                  dej                  deej                     defdZ	y)    )OptionalN)nnhidden_statesn_repreturnc                     | j                   \  }}}}|dk(  r| S | dddddddddf   j                  |||||      } | j                  |||z  ||      S )z
    This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
       N)shapeexpandreshape)r   r   batchnum_key_value_headsslenhead_dims         d/var/www/html/aiagenthome/venv/lib/python3.12/site-packages/transformers/integrations/eager_paged.py	repeat_kvr      so    
 2?1D1D.Ehz!!Qa"23::5BUW\^bdlmM  (;e(CT8TT    modulequerykeyvalueattention_maskscalingc                    |j                  dd       }|d |j                  ||| j                  fi |\  }}|j                  dd      j	                  d      }|j                  dd      j	                  d      }t        | d      r,t        || j                        }t        || j                        }t        |t              rt        | dd      }|dk(  s|dnd}	||	   }
n|}
t        j                  ||j                  dd	            |z  }|
||
z   }t        | d
      r| j                  j                  dddd      j                  |j                   d   d|j                   d   d      }t        j"                  ||gd      }||j%                  dd      j&                  z
  }t(        j*                  j-                  |dt        j.                        j1                  |j2                        }|dd df   }nIt(        j*                  j-                  |dt        j.                        j1                  |j2                        }t        j                  ||      }|j                  dd      j5                         }||fS )Ncacher   r	   num_key_value_groupssliding_windowfull_attentionsliding_attention      sinks)dimT)r%   keepdim)r%   dtype.)popupdate	layer_idx	transpose	unsqueezehasattrr   r   
isinstancedictgetattrtorchmatmulr"   r   r   r
   catmaxvaluesr   
functionalsoftmaxfloat32tor'   
contiguous)r   r   r   r   r   r   kwargsr   r   
layer_typecausal_maskattn_weightsr"   attn_outputs                 r   eager_paged_attention_forwardr@      s;    JJw%E!U\\#uf.>.>I&I
UmmAq!++A.1%//2 v-.V889%!<!<= .$' )91=)71)<@V%\o
$Z0$<<s}}Q':;gEL#k1 vw$$QAq188QU[[Y[_^`ayy,!6B?#l&6&62t&6&L&S&SS}},,\r,WZZ[`[f[fg#C"H-}},,\r,WZZ[`[f[fg,,|U3K''1-88:K$$r   )
typingr   r1   r   Tensorintr   Modulefloatr@    r   r   <module>rG      s      	UU\\ 	U# 	U%,, 	U2%II2%<<2% 
2% <<	2%
 U\\*2% 2%r   