
    <h                     R   S SK Jr  S SKrSSKJrJr  SSKJr  \R                  " \	5      r
\" 5       r    SS\R                  R                  S\R                  S\R                  S	\R                  S
\\R                     S\S\\   S\\   S\\   S\\R                  S4   4S jjrg)    )OptionalN   )_flash_attention_forward!flash_attn_supports_top_left_mask)loggingmodulequerykeyvalueattention_maskdropoutscalingsliding_windowsoftcapreturnc	                    U	R                  SS5      (       d  U	R                  S5      b  [        R                  S5        UR                  S   n
[	        S UR                   5       5      (       a  [        S5      eUR                  SS5      nUR                  SS5      nUR                  SS5      nS nUR                  [        R                  :X  a  [        R                  " 5       (       a  [        R                  " 5       nOf[        U R                  S	5      (       a  U R                  R                  nO4[        S
 U R!                  5        5       5      R"                  R                  nU	R%                  SS 5        ['        UUUU4U
U R(                  UUUU[*        UU R                  R,                  [        U S5      (       a  U R.                  OS S.
U	D6nUS 4$ )Noutput_attentionsF	head_maskz`flash_attention_2` does not support `output_attentions=True` or `head_mask`. Please set your attention to `eager` if you want any of these features.r   c              3   (   #    U H	  oS :H  v   M     g7f)r   N ).0dims     a/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/integrations/flash_attention.py	<genexpr>*flash_attention_forward.<locals>.<genexpr>#   s     
+{!8{s   zTensor query has shape  with a zero dimension.
FlashAttention does not support inputs with dim=0.
Please check your input shapes or use SDPA instead.   _pre_quantization_dtypec              3   z   #    U H2  n[        U[        R                  R                  5      (       d  M.  Uv   M4     g 7f)N)
isinstancetorchnnLinear)r   layers     r   r   r   ;   s+     j3C%zRWY^YaYaYhYhGi3Cs   ,;	;	is_causal	layer_idx)
query_lengthr$   r   softmax_scaler   r   use_top_left_masktarget_dtypeattn_implementationr%   )getloggerwarning_onceshapeany
ValueError	transposedtyper    float32is_autocast_enabledget_autocast_gpu_dtypehasattrconfigr   nextmodulesweightpopr   r$   _use_top_left_mask_attn_implementationr%   )r   r	   r
   r   r   r   r   r   r   kwargsseq_lenr)   attn_outputs                r   flash_attention_forwardrA      s    zz%u--K1H1TW	
 kk!nG

+u{{
+++B
 	
 OOAq!E
--1
COOAq!E L{{emm#$$&& 779LV]]$=>>!==@@Lj6>>3CjjqqwwL JJ{D!*	
 ""%,!"MM>>&-fk&B&B&"" K$     )g        NNN)typingr   r    modeling_flash_attention_utilsr   r   utilsr   
get_logger__name__r,   r<   r!   ModuleTensorfloatinttuplerA   r   rB   r   <module>rM      s      h  
		H	%68  #$(#DHHOOD<<D 
D <<	D
 U\\*D D e_D SMD e_D 5<<DrB   