o
    
sh*-                     @   s   d dl mZ d dlmZ d dlZd dlmZ ddlmZ ddl	m
Z
mZmZmZ ddlmZ ddlmZ dd	lmZmZmZmZ eeZG d
d dejZeG dd deZeG dd deZeG dd deZdS )    )partial)OptionalN   )Cache)BaseModelOutputWithPastQuestionAnsweringModelOutput SequenceClassifierOutputWithPastTokenClassifierOutput)	AutoModel)Unpack)TransformersKwargsauto_docstringcan_return_tupleloggingc                       s$   e Zd ZdZdZ fddZ  ZS )GradientCheckpointingLayera  Base class for layers with gradient checkpointing.

    This class enables gradient checkpointing functionality for a layer. By default, gradient checkpointing is disabled
    (`gradient_checkpointing = False`). When `model.set_gradient_checkpointing()` is called, gradient checkpointing is
    enabled by setting `gradient_checkpointing = True` and assigning a checkpointing function to `_gradient_checkpointing_func`.

    Important:

        When using gradient checkpointing with `use_reentrant=True`, inputs that require gradients (e.g. hidden states)
        must be passed as positional arguments (`*args`) rather than keyword arguments to properly propagate gradients.

        Example:

            ```python
            >>> # Correct - hidden_states passed as positional arg
            >>> out = self.layer(hidden_states, attention_mask=attention_mask)

            >>> # Incorrect - hidden_states passed as keyword arg
            >>> out = self.layer(hidden_states=hidden_states, attention_mask=attention_mask)
            ```
    Fc                    s  | j r| jrd}| jj}d| d}d|v r$|d r$d|d< |d7 }d}d|v r8|d d ur8d |d< |d7 }d}d	|v rL|d	 d urLd |d	< |d
7 }d}d|v r`|d d ur`d |d< |d7 }d}|rn|dd }t| | jtt	 j
fi |g|R  S t	 j
|i |S )NFz7Caching is incompatible with gradient checkpointing in z	. Setting	use_cachez `use_cache=False`,Tpast_key_valuez `past_key_value=None`,past_key_valuesz `past_key_values=None`,
layer_pastz `layer_past=None`,,.)gradient_checkpointingtraining	__class____name__rstriploggerwarning_once_gradient_checkpointing_funcr   super__call__)selfargskwargsdo_warn
layer_namemessager    Z/var/www/html/alpaca_bot/venv/lib/python3.10/site-packages/transformers/modeling_layers.pyr    <   s2   
"z#GradientCheckpointingLayer.__call__)r   
__module____qualname____doc__r   r    __classcell__r(   r(   r'   r)   r   #   s    r   c                       s   e Zd ZdZ fddZee							ddeej	 deej
 deej	 dee d	eej d
eej	 dee dee defddZ  ZS ) GenericForSequenceClassificationmodelc                    sJ   t  | |j| _t| | jt| tj|j	| jdd| _
|   d S )NF)bias)r   __init__
num_labelssetattrbase_model_prefixr
   from_confignnLinearhidden_sizescore	post_initr!   configr'   r(   r)   r1   e   s
   z)GenericForSequenceClassification.__init__N	input_idsattention_maskposition_idsr   inputs_embedslabelsr   r#   returnc                 K   s2  t | | j|f|||||d|}	|	j}
| |
}|d ur$|jd }n|jd }| jjd u r7|dkr7td| jjd u r@d}n1|d ure|| jjk|j	t
j}t
j|jd |j	t
jd}|| d}nd}t| jj d |t
j||j	d|f }d }|d ur| j|||| jd	}t|||	j|	j|	jd
S )Nr>   r?   r   r@   r   r   r   z=Cannot handle batch sizes > 1 if no padding token is defined.)devicedtypez will not detect padding tokens in `inputs_embeds`. Results may be unexpected if using padding tokens in conjunction with `inputs_embeds.`)rE   )logitsrA   pooled_logitsr<   )lossrG   r   hidden_states
attentions)getattrr4   last_hidden_stater9   shaper<   pad_token_id
ValueErrortorE   torchint32arangeargmaxr   r   r   r   loss_functionr   r   rJ   rK   )r!   r=   r>   r?   r   r@   rA   r   r#   transformer_outputsrJ   rG   
batch_sizelast_non_pad_tokennon_pad_masktoken_indicesrH   rI   r(   r(   r)   forwardo   sN   
	

z(GenericForSequenceClassification.forwardNNNNNNN)r   r*   r+   r4   r1   r   r   r   rR   
LongTensorTensorr   FloatTensorboolr   r   r   r\   r-   r(   r(   r'   r)   r.   a   s>    
	
r.   c                       s   e Zd ZdZ fddZdd Zdd Zee							dd	e	e
j d
e	e
j de	e
j de	e de	e
j de	e
j de	e
j dee defddZ  ZS )GenericForQuestionAnsweringr/   c                    s<   t  | t| | jt| t|jd| _	| 
  d S )N   )r   r1   r3   r4   r
   r5   r6   r7   r8   
qa_outputsr:   r;   r'   r(   r)   r1      s   z$GenericForQuestionAnswering.__init__c                 C   s   t | | jjS NrL   r4   embed_tokens)r!   r(   r(   r)   get_input_embeddings   s   z0GenericForQuestionAnswering.get_input_embeddingsc                 C   s   |t | | j_d S re   rf   )r!   valuer(   r(   r)   set_input_embeddings   s   z0GenericForQuestionAnswering.set_input_embeddingsNr=   r>   r?   r   r@   start_positionsend_positionsr#   rB   c                 K   s   t | | j|f||||d|}	|	j}
| |
}|jddd\}}|d }|d }d }|d urF|d urF| j||||fi |}t||||	j	|	j
dS )N)r>   r?   r   r@   r   rD   )dim)rI   start_logits
end_logitsrJ   rK   )rL   r4   rM   rd   splitsqueeze
contiguousrV   r   rJ   rK   )r!   r=   r>   r?   r   r@   rk   rl   r#   outputssequence_outputrG   rn   ro   rI   r(   r(   r)   r\      s2   
	
z#GenericForQuestionAnswering.forwardr]   )r   r*   r+   r4   r1   rh   rj   r   r   r   rR   r^   r_   r   r`   r   r   r   r\   r-   r(   r(   r'   r)   rb      sB    		
rb   c                       s   e Zd ZdZ fddZee							ddeej	 deej
 deej	 dee d	eej d
eej	 dee defddZ  ZS )GenericForTokenClassificationr/   c                    s   t  | |j| _t| | jt| t|dd d ur |j}nt|dd d ur,|j	}nd}t
|| _t
|j|j| _|   d S )Nclassifier_dropouthidden_dropoutg?)r   r1   r2   r3   r4   r
   r5   rL   rv   rw   r6   Dropoutdropoutr7   r8   r9   r:   )r!   r<   rv   r'   r(   r)   r1      s   z&GenericForTokenClassification.__init__Nr=   r>   r?   r   r@   rA   r   rB   c                 K   sn   t | | j|f|||||d|}	|	j}
| |
}
| |
}d }|d ur-| ||| j}t|||	j|	j	dS )NrC   )rI   rG   rJ   rK   )
rL   r4   rM   ry   r9   rV   r<   r	   rJ   rK   )r!   r=   r>   r?   r   r@   rA   r   r#   rs   rt   rG   rI   r(   r(   r)   r\      s.   
	

z%GenericForTokenClassification.forwardr]   )r   r*   r+   r4   r1   r   r   r   rR   r^   r_   r   r`   ra   r	   r\   r-   r(   r(   r'   r)   ru      s:    
ru   ) 	functoolsr   typingr   rR   torch.nnr6   cache_utilsr   modeling_outputsr   r   r   r	   models.autor
   processing_utilsr   utilsr   r   r   r   
get_loggerr   r   Moduler   objectr.   rb   ru   r(   r(   r(   r)   <module>   s"   
>J<