o
    
sh5                     @   s   d dl mZ d dlZddlmZmZ ddlmZ ee	Z
e Z				ddejjdejd	ejd
ejdeej dedee dee dee deejdf fddZdS )    )OptionalN   )_flash_attention_forward!flash_attn_supports_top_left_mask)logging        modulequerykeyvalueattention_maskdropoutscalingsliding_windowsoftcapreturnc	                 K   s"  |	 dds|	 dd urtd |jd }
tdd |jD r%td|d	d}|d	d}|d	d}d }|jtj	kr`t
 rHt }nt| jd
rS| jj}ntdd |  D jj}|	dd }|d u rm| j}t||||f|
|||||t|| jjt| dr| jnd d
|	}|d fS )Noutput_attentionsF	head_maskz`flash_attention_2` does not support `output_attentions=True` or `head_mask`. Please set your attention to `eager` if you want any of these features.r   c                 s   s    | ]}|d kV  qdS )r   N ).0dimr   r   g/var/www/html/alpaca_bot/venv/lib/python3.10/site-packages/transformers/integrations/flash_attention.py	<genexpr>#   s    z*flash_attention_forward.<locals>.<genexpr>zTensor query has shape  with a zero dimension.
FlashAttention does not support inputs with dim=0.
Please check your input shapes or use SDPA instead.   _pre_quantization_dtypec                 s   s"    | ]}t |tjjr|V  qd S )N)
isinstancetorchnnLinear)r   layerr   r   r   r   ;   s     	is_causal	layer_idx)
query_lengthr    r   softmax_scaler   r   use_top_left_masktarget_dtypeattn_implementationr!   )getloggerwarning_onceshapeany
ValueError	transposedtyper   float32is_autocast_enabledget_autocast_gpu_dtypehasattrconfigr   nextmodulesweightpopr    r   _use_top_left_mask_attn_implementationr!   )r   r	   r
   r   r   r   r   r   r   kwargsseq_lenr%   r    attn_outputr   r   r   flash_attention_forward   sT   


r=   )r   NNN)typingr   r   modeling_flash_attention_utilsr   r   utilsr   
get_logger__name__r(   r8   r   ModuleTensorfloatinttupler=   r   r   r   r   <module>   s>    
		