
    <h,                     >   S SK Jr  S SKJr  S SKJr  S SKrS SKJr  SSK	J
r
  SSKJrJrJrJr  SSKJr  SS	KJr  SS
KJrJrJrJr  \R2                  " \5      r " S S\R8                  5      r\ " S S\5      5       r\ " S S\5      5       r\ " S S\5      5       r g)    )ABC)partial)OptionalN   )Cache)BaseModelOutputWithPastQuestionAnsweringModelOutput SequenceClassifierOutputWithPastTokenClassifierOutput)	AutoModel)Unpack)TransformersKwargsauto_docstringcan_return_tupleloggingc                   0   ^  \ rS rSrSrSrU 4S jrSrU =r$ )GradientCheckpointingLayer$   a  Base class for layers with gradient checkpointing.

This class enables gradient checkpointing functionality for a layer. By default, gradient checkpointing is disabled
(`gradient_checkpointing = False`). When `model.set_gradient_checkpointing()` is called, gradient checkpointing is
enabled by setting `gradient_checkpointing = True` and assigning a checkpointing function to `_gradient_checkpointing_func`.

Important:

    When using gradient checkpointing with `use_reentrant=True`, inputs that require gradients (e.g. hidden states)
    must be passed as positional arguments (`*args`) rather than keyword arguments to properly propagate gradients.

    Example:

        ```python
        >>> # Correct - hidden_states passed as positional arg
        >>> out = self.layer(hidden_states, attention_mask=attention_mask)

        >>> # Incorrect - hidden_states passed as keyword arg
        >>> out = self.layer(hidden_states=hidden_states, attention_mask=attention_mask)
        ```
Fc                   > U R                   (       a  U R                  (       a  SnU R                  R                  nSU S3nSU;   a  US   (       a  SUS'   US-  nSnSU;   a  US   b  S US'   US-  nSnS	U;   a  US	   b  S US	'   US
-  nSnSU;   a  US   b  S US'   US-  nSnU(       a)  UR	                  S5      S-   n[
        R                  U5        U R                  " [        [        TU ](  40 UD6/UQ76 $ [        TU ](  " U0 UD6$ )NFz7Caching is incompatible with gradient checkpointing in z	. Setting	use_cachez `use_cache=False`,Tpast_key_valuez `past_key_value=None`,past_key_valuesz `past_key_values=None`,
layer_pastz `layer_past=None`,,.)gradient_checkpointingtraining	__class____name__rstriploggerwarning_gradient_checkpointing_funcr   super__call__)selfargskwargsdo_warn
layer_namemessager   s         T/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/modeling_layers.pyr%   #GradientCheckpointingLayer.__call__=   sI   &&4==G00JOPZ|[deGf$)<&+{#00  6)f5E.F.R+/'(44 F*v6G/H/T,0()55v%&*>*J'+|$00 !..-3w'44WUW=M5XQW5X`[_``w000     )	r   
__module____qualname____firstlineno____doc__r   r%   __static_attributes____classcell__r   s   @r,   r   r   $   s    , #!1 !1r.   r   c                     ^  \ rS rSrSrU 4S jr\\       SS\\	R                     S\\	R                     S\\	R                     S\\   S\\	R                     S	\\	R                     S
\\   S\\   S\4S jj5       5       rSrU =r$ ) GenericForSequenceClassificationa   modelc                   > [         TU ]  U5        UR                  U l        [        X R                  [
        R                  " U5      5        [        R                  " UR                  U R                  SS9U l
        U R                  5         g )NF)bias)r$   __init__
num_labelssetattrbase_model_prefixr   from_confignnLinearhidden_sizescore	post_initr&   configr   s     r,   r=   )GenericForSequenceClassification.__init__e   sb      ++,,i.C.CF.KLYYv114??O
 	r.   	input_idsattention_maskposition_idsr   inputs_embedslabelsr   r(   returnc           	         [        X R                  5      " U4UUUUUS.UD6n	U	R                  n
U R                  U
5      nUb  UR                  S   nOUR                  S   nU R
                  R                  c  US:w  a  [        S5      eU R
                  R                  c  SnOUb  XR
                  R                  :g  R                  UR                  [        R                  5      n[        R                  " UR                  S   UR                  [        R                  S9nX-  R                  S5      nO.Sn[        R                  U R                   R"                   S35        U[        R                  " XR                  S9U4   nS nUb  U R%                  XUU R
                  S	9n['        UUU	R(                  U	R*                  U	R,                  S
9$ )NrK   rL   r   rM   r   r   r   z=Cannot handle batch sizes > 1 if no padding token is defined.)devicedtypez will not detect padding tokens in `inputs_embeds`. Results may be unexpected if using padding tokens in conjunction with `inputs_embeds.`)rS   )logitsrN   pooled_logitsrH   )lossrU   r   hidden_states
attentions)getattrr@   last_hidden_staterE   shaperH   pad_token_id
ValueErrortorS   torchint32arangeargmaxr!   warning_oncer   r   loss_functionr
   r   rX   rY   )r&   rJ   rK   rL   r   rM   rN   r   r(   transformer_outputsrX   rU   
batch_sizelast_non_pad_tokennon_pad_masktoken_indicesrV   rW   s                     r,   forward(GenericForSequenceClassification.forwardo   s    8?tE[E[7\8
)%+'8
 8
 ,==M* "+J&,,Q/J;;##+
a\]];;##+!#"%)A)AAEEfmmUZU`U`aL!LL)<V]]Z_ZeZefM"/">!F!Fr!J!#>>**+ ,Z Z
 u||J}}MOaab%%VR_hlhshs%tD/ /??-;;*55
 	
r.   )r>   rE   NNNNNNN)r   r0   r1   r2   r@   r=   r   r   r   r`   
LongTensorTensorr   FloatTensorboolr   r   r
   rk   r4   r5   r6   s   @r,   r8   r8   a   s      151537+/59-1$(8
E,,-8
 !.8
 u//0	8

 "%8
   1 128
 ))*8
 D>8
 +,8
 
*8
  8
r.   r8   c                   4  ^  \ rS rSrSrU 4S jrS rS r\\	       SS\
\R                     S\
\R                     S\
\R                     S	\
\   S
\
\R                     S\
\R                     S\
\R                     S\\   S\4S jj5       5       rSrU =r$ )GenericForQuestionAnswering   r:   c                    > [         TU ]  U5        [        X R                  [        R
                  " U5      5        [        R                  " UR                  S5      U l	        U R                  5         g )N   )r$   r=   r?   r@   r   rA   rB   rC   rD   
qa_outputsrF   rG   s     r,   r=   $GenericForQuestionAnswering.__init__   sO     ,,i.C.CF.KL))F$6$6: 	r.   c                 @    [        X R                  5      R                  $ NrZ   r@   embed_tokens)r&   s    r,   get_input_embeddings0GenericForQuestionAnswering.get_input_embeddings   s    t334AAAr.   c                 8    U[        X R                  5      l        g rz   r{   )r&   values     r,   set_input_embeddings0GenericForQuestionAnswering.set_input_embeddings   s    =B,,-:r.   rJ   rK   rL   r   rM   start_positionsend_positionsr(   rO   c                    [        X R                  5      " U4UUUUS.UD6n	U	R                  n
U R                  U
5      nUR	                  SSS9u  pUR                  S5      R                  5       nUR                  S5      R                  5       nS nUb  Ub  U R                  " XXg40 UD6n[        UUUU	R                  U	R                  S9$ )N)rK   rL   r   rM   r   rR   )dim)rW   start_logits
end_logitsrX   rY   )rZ   r@   r[   rw   splitsqueeze
contiguousre   r	   rX   rY   )r&   rJ   rK   rL   r   rM   r   r   r(   outputssequence_outputrU   r   r   rW   s                  r,   rk   #GenericForQuestionAnswering.forward   s     ,349O9O+P,
)%+',
 ,
 "331#)<<r<#: #++B/::<''+668
&=+D%%libhiD+%!!//))
 	
r.   )rw   rm   )r   r0   r1   r2   r@   r=   r}   r   r   r   r   r`   rn   ro   r   rp   r   r   r	   rk   r4   r5   r6   s   @r,   rs   rs      s    BC  151537+/596:48%
E,,-%
 !.%
 u//0	%

 "%%
   1 12%
 "%"2"23%
   0 01%
 +,%
 
&%
  %
r.   rs   c                   
  ^  \ rS rSrSrU 4S jr\\       SS\\	R                     S\\	R                     S\\	R                     S\\   S\\	R                     S	\\	R                     S
\\   S\4S jj5       5       rSrU =r$ )GenericForTokenClassification   r:   c                   > [         TU ]  U5        UR                  U l        [        X R                  [
        R                  " U5      5        [        USS 5      b  UR                  nO[        USS 5      b  UR                  nOSn[        R                  " U5      U l        [        R                  " UR                  UR                  5      U l        U R!                  5         g )Nclassifier_dropouthidden_dropoutg?)r$   r=   r>   r?   r@   r   rA   rZ   r   r   rB   DropoutdropoutrC   rD   rE   rF   )r&   rH   r   r   s      r,   r=   &GenericForTokenClassification.__init__   s      ++,,i.C.CF.KL6/6B!'!:!:V-t4@!'!6!6!$zz"45YYv1163D3DE
 	r.   rJ   rK   rL   r   rM   rN   r   rO   c           	      $   [        X R                  5      " U4UUUUUS.UD6n	U	R                  n
U R                  U
5      n
U R	                  U
5      nS nUb  U R                  XU R                  5      n[        UUU	R                  U	R                  S9$ )NrQ   )rW   rU   rX   rY   )
rZ   r@   r[   r   rE   re   rH   r   rX   rY   )r&   rJ   rK   rL   r   rM   rN   r   r(   r   r   rU   rW   s                r,   rk   %GenericForTokenClassification.forward   s     ,349O9O+P,
)%+',
 ,
 "33,,7O,%%fdkkBD$!//))	
 	
r.   )r   r>   rE   rm   )r   r0   r1   r2   r@   r=   r   r   r   r`   rn   ro   r   rp   rq   r   rk   r4   r5   r6   s   @r,   r   r      s    "  151537+/59-1$(!
E,,-!
 !.!
 u//0	!

 "%!
   1 12!
 ))*!
 D>!
 
!
  !
r.   r   )!abcr   	functoolsr   typingr   r`   torch.nnrB   cache_utilsr   modeling_outputsr   r	   r
   r   models.autor   processing_utilsr   utilsr   r   r   r   
get_loggerr   r!   Moduler   r8   rs   r   r/   r.   r,   <module>r      s           # $ P P 
		H	%:1 :1z G
s G
 G
T 9
# 9
 9
x 7
C 7
 7
r.   