
    <hF                        S r SSKrSSKJrJr  SSKrSSKrSSKJr  SSKJ	r	J
r
Jr  SSKJr  SSKJrJrJrJrJrJrJr  SS	KJr  SS
KJrJr  SSKJrJr  SSKJr  SSK J!r!J"r"J#r#J$r$J%r%J&r&  \RN                  " \(5      r) " S S\RT                  5      r+ " S S\RT                  5      r, " S S\RT                  5      r- " S S\RT                  5      r. " S S\RT                  5      r/ " S S\RT                  5      r0 " S S\RT                  5      r1 " S S\RT                  5      r2 " S S \RT                  5      r3\ " S! S"\5      5       r4\ " S# S$\45      5       r5\ " S% S&\45      5       r6 " S' S(\RT                  5      r7\" S)S*9 " S+ S,\45      5       r8\ " S- S.\45      5       r9\ " S/ S0\45      5       r: " S1 S2\RT                  5      r;\ " S3 S4\45      5       r<S7S5 jr=/ S6Qr>g)8zPyTorch I-BERT model.    N)OptionalUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )gelu))BaseModelOutputWithPastAndCrossAttentions,BaseModelOutputWithPoolingAndCrossAttentionsMaskedLMOutputMultipleChoiceModelOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)PreTrainedModel) find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringlogging   )IBertConfig)IntGELUIntLayerNorm
IntSoftmaxQuantActQuantEmbeddingQuantLinearc                   >   ^  \ rS rSrSrU 4S jr SS jrS rSrU =r	$ )IBertEmbeddings0   zN
Same as BertEmbeddings with a tiny tweak for positional embeddings indexing.
c                 r  > [         TU ]  5         UR                  U l        SU l        SU l        SU l        SU l        SU l        [        UR                  UR                  UR                  U R                  U R                  S9U l        [        UR                  UR                  U R                  U R                  S9U l        U R                  S[         R"                  " UR$                  5      R'                  S5      S	S
9  [)        USS5      U l        UR                  U l        [        UR$                  UR                  U R,                  U R                  U R                  S9U l        [1        U R                  U R                  S9U l        [1        U R                  U R                  S9U l        [7        UR                  UR8                  U R                  U R                  UR:                  S9U l        [1        U R
                  U R                  S9U l        [@        RB                  " URD                  5      U l#        g )N             )padding_idx
weight_bit
quant_mode)r(   r)   position_ids)r   F)
persistentposition_embedding_typeabsoluter)   eps
output_bitr)   force_dequant)$super__init__r)   embedding_bitembedding_act_bitact_bitln_input_bitln_output_bitr   
vocab_sizehidden_sizepad_token_idword_embeddingstype_vocab_sizetoken_type_embeddingsregister_buffertorcharangemax_position_embeddingsexpandgetattrr-   r'   position_embeddingsr   embeddings_act1embeddings_act2r   layer_norm_epsr3   	LayerNormoutput_activationr   Dropouthidden_dropout_probdropoutselfconfig	__class__s     `/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/models/ibert/modeling_ibert.pyr5   IBertEmbeddings.__init__5   s    ++!#-++)) 
 &4""F$6$64CUCUbfbqbq&
"
 	ELL)G)GHOOPWXej 	 	
 (/v7PR\']$ "..#1**(())$
   ((>(>4??['(>(>4??[ &%%)) ..
 "*$,,4??!Szz&"<"<=    c                    UcD  Ub0  [        XR                  U5      R                  UR                  5      nOU R	                  U5      nUb  UR                  5       nOUR                  5       S S nUc8  [        R                  " U[        R                  U R                  R                  S9nUc  U R                  U5      u  pGOS nU R                  U5      u  pU R                  UUUU	S9u  pU R                  S:X  a'  U R                  U5      u  pU R                  U
UUUS9u  pU R                  X5      u  pU R!                  U
5      n
U R#                  X5      u  pX4$ )Nr+   dtypedeviceidentityidentity_scaling_factorr.   )"create_position_ids_from_input_idsr'   torZ   &create_position_ids_from_inputs_embedssizerB   zeroslongr*   r>   r@   rH   r-   rG   rK   rO   rL   )rQ   	input_idstoken_type_idsr*   inputs_embedspast_key_values_lengthinput_shapeinputs_embeds_scaling_factorr@   $token_type_embeddings_scaling_factor
embeddingsembeddings_scaling_factorrG   "position_embeddings_scaling_factors                 rT   forwardIBertEmbeddings.forwardi   s}    $A//1G "Y%%&   $JJ=Y #..*K',,.s3K!"[[EJJtO`O`OgOghN :>:N:Ny:Y7M7+/(FJF`F`aoFpC040D0D(*$H	 1E 1
-
 '':5FJF^F^_kFlC484H4H),(J	 5I 51J 15z0e-
\\*-
040F0Fz0m-
44rV   c                    UR                  5       SS nUS   n[        R                  " U R                  S-   X0R                  -   S-   [        R                  UR
                  S9nUR                  S5      R                  U5      $ )z
We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.

Args:
    inputs_embeds: torch.Tensor

Returns: torch.Tensor
Nr+   r   rX   r   )ra   rB   rC   r'   rc   rZ   	unsqueezerE   )rQ   rf   rh   sequence_lengthr*   s        rT   r`   6IBertEmbeddings.create_position_ids_from_inputs_embeds   s~     $((*3B/%a.||q /4D4D"Dq"HPUPZPZcpcwcw
 %%a(//<<rV   )rK   r8   rO   r7   r6   rH   rI   r9   r:   rL   r'   r-   rG   r)   r@   r>   )NNNNr   )
__name__
__module____qualname____firstlineno____doc__r5   rn   r`   __static_attributes____classcell__rS   s   @rT   r    r    0   s%    2>j rs-5^= =rV   r    c                   8   ^  \ rS rSrU 4S jr   SS jrSrU =r$ )IBertSelfAttention   c           
      ~  > [         TU ]  5         UR                  UR                  -  S:w  a7  [	        US5      (       d&  [        SUR                   SUR                   S35      eUR                  U l        SU l        SU l        SU l	        UR                  U l        [        UR                  UR                  -  5      U l        U R                  U R                  -  U l        [        UR                  U R                  SU R                  U R                  U R                  SS	9U l        [        UR                  U R                  SU R                  U R                  U R                  SS	9U l        [        UR                  U R                  SU R                  U R                  U R                  SS	9U l        [#        U R                  U R                  S
9U l        [#        U R                  U R                  S
9U l        [#        U R                  U R                  S
9U l        [#        U R                  U R                  S
9U l        [,        R.                  " UR0                  5      U l        [5        USS5      U l        U R6                  S:w  a  [        S5      e[9        U R                  U R                  UR:                  S9U l        g )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()r#   r&   Tbiasr(   bias_bitr)   per_channelr/   r-   r.   zDI-BERT only supports 'absolute' for `config.position_embedding_type`r)   r3   )r4   r5   r<   num_attention_headshasattr
ValueErrorr)   r(   r   r8   intattention_head_sizeall_head_sizer   querykeyvaluer   query_activationkey_activationvalue_activationrL   r   rM   attention_probs_dropout_probrO   rF   r-   r   r3   softmaxrP   s     rT   r5   IBertSelfAttention.__init__   sB    : ::a?PVXhHiHi#F$6$6#7 8 445Q8  !++#)#=#= #&v'9'9F<V<V'V#W !558P8PP !]]

 ]]
 !]]

 !)$// R&t||P ($// R!)$,,4??!Szz&"E"EF'.v7PR\']$'':5cdd!$,,4??Z`ZnZnorV   c                    U R                  X5      u  pgU R                  X5      u  pU R                  X5      u  pU R                  Xg5      u  pU R	                  X5      u  pU R                  X5      u  nnUR                  u  nnnUR                  USU R                  U R                  5      R                  SS5      nUR                  USU R                  U R                  5      R                  SS5      nUR                  USU R                  U R                  5      R                  SS5      n[        R                  " XR                  SS5      5      n[        R                  " U R                  5      nUU-  nU R                  (       a  X-  U-  nOS nUb  UU-   nU R!                  UU5      u  nnU R#                  U5      nUb  UU-  n[        R                  " UU5      nUb  UU-  nOS nUR%                  SSSS5      R'                  5       nUR)                  5       S S U R*                  4-   nUR                  " U6 nU R-                  UU5      u  nnU(       a  UU4OU4nU(       a  UU4OU4nUU4$ )Nr+   r      r   r	   )r   r   r   r   r   r   shapeviewr   r   	transposerB   matmulmathsqrtr)   r   rO   permute
contiguousra   r   rL   )rQ   hidden_stateshidden_states_scaling_factorattention_mask	head_maskoutput_attentionsmixed_query_layer mixed_query_layer_scaling_factormixed_key_layermixed_key_layer_scaling_factormixed_value_layer mixed_value_layer_scaling_factorquery_layerquery_layer_scaling_factor	key_layerkey_layer_scaling_factorvalue_layervalue_layer_scaling_factor
batch_size
seq_length_attention_scoresscaleattention_scores_scaling_factorattention_probsattention_probs_scaling_factorcontext_layercontext_layer_scaling_factornew_context_layer_shapeoutputsoutput_scaling_factors                                  rT   rn   IBertSelfAttention.forward   s    ?Cjj>u;:>((=:o7>Bjj>u; 372G2G3
/ /3.A.A/.r+	262G2G3
//
 %2$7$7!
J!&&z2t7O7OQUQiQijttq
 NN:r43K3KTMeMefppqrtuv	!&&z2t7O7OQUQiQijttq

 !<<5H5HR5PQ		$223+e3??.H.cfk.k+.2+%/.@ ;?,,=;
77 ,,7  -	9O_kB)5+ILf+f(+/(%--aAq9DDF"/"4"4"6s";t?Q?Q>S"S%**,CD 7;6L6L77
33 7H=/2mM] ! *+IJ.0 	 ---rV   )r8   r   r   r   rO   r   r   r   rL   r-   r)   r   r   r   r   r   r(   NNFrt   ru   rv   rw   r5   rn   ry   rz   r{   s   @rT   r}   r}      s!    8p| P. P.rV   r}   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )IBertSelfOutputi8  c           
      t  > [         TU ]  5         UR                  U l        SU l        SU l        SU l        SU l        SU l        [        UR                  UR                  SU R                  U R
                  U R                  SS9U l
        [        U R                  U R                  S9U l        [        UR                  UR                  U R                  U R                  UR                  S9U l        [        U R                  U R                  S9U l        [$        R&                  " UR(                  5      U l        g Nr#   r&   r%   Tr   r/   r0   )r4   r5   r)   r8   r(   r   r9   r:   r   r<   denser   ln_input_actr   rJ   r3   rK   rL   r   rM   rN   rO   rP   s     rT   r5   IBertSelfOutput.__init__9  s     ++ ]]

 %T%6%64??S%%%)) ..
 "*$,,4??!Szz&"<"<=rV   c                     U R                  X5      u  pU R                  U5      nU R                  UUUUS9u  pU R                  X5      u  pU R	                  X5      u  pX4$ Nr[   r   rO   r   rK   rL   rQ   r   r   input_tensorinput_tensor_scaling_factors        rT   rn   IBertSelfOutput.forwardV  z    6:jj6m3]36:6G6G(!$?	 7H 7
3 7;nn]6q36:6L6L7
3 ::rV   rK   r8   r   r   rO   r   r9   r:   rL   r)   r(   r   r{   s   @rT   r   r   8      >:; ;rV   r   c                   >   ^  \ rS rSrU 4S jrS r   SS jrSrU =r$ )IBertAttentionig  c                    > [         TU ]  5         UR                  U l        [        U5      U l        [        U5      U l        [        5       U l        g N)	r4   r5   r)   r}   rQ   r   outputsetpruned_headsrP   s     rT   r5   IBertAttention.__init__h  s=     ++&v.	%f-ErV   c                 6   [        U5      S:X  a  g [        XR                  R                  U R                  R                  U R
                  5      u  p[        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l	        [        U R                  R                  USS9U R                  l        U R                  R                  [        U5      -
  U R                  l        U R                  R                  U R                  R                  -  U R                  l        U R
                  R                  U5      U l        g )Nr   r   dim)lenr   rQ   r   r   r   r   r   r   r   r   r   r   union)rQ   headsindexs      rT   prune_headsIBertAttention.prune_headso  s   u:?79900$))2O2OQUQbQb

 -TYY__eD		*499==%@		,TYY__eD		.t{{/@/@%QO )-		(E(EE
(R		%"&))"?"?$))B_B_"_		 --33E:rV   c                     U R                  UUUUU5      u  pgU R                  US   US   X5      u  pU4USS  -   n
U	4USS  -   nX4$ )Nr   r   )rQ   r   )rQ   r   r   r   r   r   self_outputsself_outputs_scaling_factorattention_outputattention_output_scaling_factorr   outputs_scaling_factors               rT   rn   IBertAttention.forward  s     59II(5
1 =AKKO8;]=
9 $%QR(88"A!CFabcbdFe!e..rV   )r   r   r)   rQ   r   )	rt   ru   rv   rw   r5   r   rn   ry   rz   r{   s   @rT   r   r   g  s"    ";, / /rV   r   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )IBertIntermediatei  c           
        > [         TU ]  5         UR                  U l        SU l        SU l        SU l        [        UR                  UR                  SU R                  U R
                  U R                  SS9U l	        UR                  S:w  a  [        S5      e[        U R                  UR                  S9U l        [        U R                  U R                  S9U l        g )	Nr#   r&   Tr   r
   z3I-BERT only supports 'gelu' for `config.hidden_act`r   r/   )r4   r5   r)   r8   r(   r   r   r<   intermediate_sizer   
hidden_actr   r   r3   intermediate_act_fnr   rL   rP   s     rT   r5   IBertIntermediate.__init__  s     ++ $$]]

 &RSS#*dooU[UiUi#j !)$,,4??!SrV   c                 z    U R                  X5      u  pU R                  X5      u  pU R                  X5      u  pX4$ r   )r   r   rL   )rQ   r   r   s      rT   rn   IBertIntermediate.forward  sL    6:jj6m36:6N6N7
3
 7;6L6L7
3 ::rV   )r8   r   r   r   rL   r)   r(   r   r{   s   @rT   r   r     s    T(
; 
;rV   r   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )IBertOutputi  c           
      t  > [         TU ]  5         UR                  U l        SU l        SU l        SU l        SU l        SU l        [        UR                  UR                  SU R                  U R
                  U R                  SS9U l        [        U R                  U R                  S9U l        [        UR                  UR                  U R                  U R                  UR                   S9U l        [        U R                  U R                  S9U l        [&        R(                  " UR*                  5      U l        g r   )r4   r5   r)   r8   r(   r   r9   r:   r   r   r<   r   r   r   r   rJ   r3   rK   rL   r   rM   rN   rO   rP   s     rT   r5   IBertOutput.__init__  s     ++ $$]]

 %T%6%64??S%%%)) ..
 "*$,,4??!Szz&"<"<=rV   c                     U R                  X5      u  pU R                  U5      nU R                  UUUUS9u  pU R                  X5      u  pU R	                  X5      u  pX4$ r   r   r   s        rT   rn   IBertOutput.forward  r   rV   r   r   r{   s   @rT   r   r     r   rV   r   c                   >   ^  \ rS rSrU 4S jr   SS jrS rSrU =r$ )
IBertLayeri  c                 L  > [         TU ]  5         UR                  U l        SU l        SU l        [        U5      U l        [        U5      U l        [        U5      U l
        [        U R                  U R                  S9U l        [        U R                  U R                  S9U l        g )Nr#   r   r/   )r4   r5   r)   r8   seq_len_dimr   	attentionr   intermediater   r   r   pre_intermediate_actpre_output_actrP   s     rT   r5   IBertLayer.__init__  s}     ++'/-f5!&)$,T\\doo$V!&t||PrV   c                     U R                  UUUUUS9u  pgUS   nUS   n	USS  n
U R                  X5      u  pU4U
-   n
U
$ )N)r   r   r   )r   feed_forward_chunk)rQ   r   r   r   r   r   self_attention_outputs%self_attention_outputs_scaling_factorr   r   r   layer_outputlayer_output_scaling_factors                rT   rn   IBertLayer.forward  s}     IM(/ IW I
E 2!4*OPQ*R'(,484K4K5
1  /G+rV   c                     U R                  X5      u  pU R                  X5      u  p4U R                  X45      u  p4U R                  X4X5      u  pVXV4$ r   )r   r   r  r   )rQ   r   r   intermediate_output"intermediate_output_scaling_factorr  r  s          rT   r  IBertLayer.feed_forward_chunk  su    <@<U<U=
9 CGBSBSC
? CGBUBUC
? 59KKEU5
1 88rV   )r8   r   r   r   r   r  r)   r   r   )	rt   ru   rv   rw   r5   rn   r  ry   rz   r{   s   @rT   r   r     s#    Q" 69 9rV   r   c                   <   ^  \ rS rSrU 4S jr     SS jrSrU =r$ )IBertEncoderi#  c                    > [         TU ]  5         Xl        UR                  U l        [        R
                  " [        UR                  5       Vs/ sH  n[        U5      PM     sn5      U l	        g s  snf r   )
r4   r5   rR   r)   r   
ModuleListrangenum_hidden_layersr   layer)rQ   rR   r   rS   s      rT   r5   IBertEncoder.__init__$  sT     ++]]fF^F^@_#`@_1Jv$6@_#`a
#`s   A/c                 F   U(       a  SOS nU(       a  SOS n	S n
[        U R                  5       H<  u  pU(       a  X4-   nUb  XK   OS nU" UUUUU5      nUS   nU(       d  M4  XS   4-   n	M>     U(       a  X4-   nU(       d  [        S UUU	U
4 5       5      $ [        UUU	U
S9$ )N r   r   c              3   .   #    U H  nUc  M  Uv   M     g 7fr   r  ).0vs     rT   	<genexpr>'IBertEncoder.forward.<locals>.<genexpr>N  s"      	A  s   	)last_hidden_stater   
attentionscross_attentions)	enumerater  tupler   )rQ   r   r   r   r   r   output_hidden_statesreturn_dictall_hidden_statesall_self_attentionsall_cross_attentionsilayer_modulelayer_head_masklayer_outputss                  rT   rn   IBertEncoder.forward*  s     #7BD$5b4#(4OA#$58H$H!.7.CilO(,!M *!,M  &91=M<O&O#!  5$   14D D 	 "%'(		 	 	 9++*1	
 	
rV   )rR   r  r)   )NNFFTr   r{   s   @rT   r  r  #  s$    b "3
 3
rV   r  c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )IBertPooleri`  c                    > [         TU ]  5         UR                  U l        [        R                  " UR
                  UR
                  5      U l        [        R                  " 5       U l        g r   )	r4   r5   r)   r   Linearr<   r   Tanh
activationrP   s     rT   r5   IBertPooler.__init__a  sF     ++YYv1163E3EF
'')rV   c                 \    US S 2S4   nU R                  U5      nU R                  U5      nU$ Nr   )r   r1  )rQ   r   first_token_tensorpooled_outputs       rT   rn   IBertPooler.forwardg  s6     +1a40

#566rV   )r1  r   r)   r   r{   s   @rT   r-  r-  `  s    $ rV   r-  c                   4    \ rS rSr% \\S'   SrS rSS jrSr	g)	IBertPreTrainedModelip  rR   ibertc                    [        U[        [        R                  45      (       ak  UR                  R
                  R                  SU R                  R                  S9  UR                  b%  UR                  R
                  R                  5         gg[        U[        [        R                  45      (       ax  UR                  R
                  R                  SU R                  R                  S9  UR                  b2  UR                  R
                  UR                     R                  5         gg[        U[        [        R                  45      (       aJ  UR                  R
                  R                  5         UR                  R
                  R!                  S5        g[        U["        5      (       a%  UR                  R
                  R                  5         gg)zInitialize the weightsg        )meanstdNg      ?)
isinstancer   r   r/  weightdatanormal_rR   initializer_ranger   zero_r   	Embeddingr'   r   rK   fill_IBertLMHead)rQ   modules     rT   _init_weights"IBertPreTrainedModel._init_weightsu  sA   f{BII677 MM&&CT[[5R5R&S{{&  &&( ' >??MM&&CT[[5R5R&S!!-""6#5#56<<> .r|| <==KK""$MM$$S),,KK""$ -rV   Nc                     [        S5      e)Nz6`resize_token_embeddings` is not supported for I-BERT.)NotImplementedError)rQ   new_num_tokenss     rT   resize_token_embeddings,IBertPreTrainedModel.resize_token_embeddings  s    !"Z[[rV   r  r   )
rt   ru   rv   rw   r   __annotations__base_model_prefixrH  rM  ry   r  rV   rT   r9  r9  p  s    %$\rV   r9  c                   f  ^  \ rS rSrSrSU 4S jjrS rS rS r\	         SS\
\R                     S\
\R                     S	\
\R                     S
\
\R                     S\
\R                     S\
\R                     S\
\   S\
\   S\
\   S\\\\R                     4   4S jj5       rSrU =r$ )
IBertModeli  a  

The model can behave as an encoder (with only self-attention) as well as a decoder, in which case a layer of
cross-attention is added between the self-attention layers, following the architecture described in [Attention is
all you need](https://huggingface.co/papers/1706.03762) by Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit,
Llion Jones, Aidan N. Gomez, Lukasz Kaiser and Illia Polosukhin.

c                    > [         TU ]  U5        Xl        UR                  U l        [	        U5      U l        [        U5      U l        U(       a  [        U5      OSU l	        U R                  5         g)z^
add_pooling_layer (bool, *optional*, defaults to `True`):
    Whether to add a pooling layer
N)r4   r5   rR   r)   r    rk   r  encoderr-  pooler	post_init)rQ   rR   add_pooling_layerrS   s      rT   r5   IBertModel.__init__  sX    
 	  ++)&1#F+->k&)D 	rV   c                 .    U R                   R                  $ r   rk   r>   rQ   s    rT   get_input_embeddingsIBertModel.get_input_embeddings  s    ...rV   c                 $    XR                   l        g r   rZ  )rQ   r   s     rT   set_input_embeddingsIBertModel.set_input_embeddings  s    */'rV   c                     UR                  5        H7  u  p#U R                  R                  U   R                  R	                  U5        M9     g)z
Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
class PreTrainedModel
N)itemsrT  r  r   r   )rQ   heads_to_pruner  r   s       rT   _prune_headsIBertModel._prune_heads  s<    
 +002LELLu%//;;EB 3rV   rd   r   re   r*   r   rf   r   r"  r#  returnc
           
         Ub  UOU R                   R                  nUb  UOU R                   R                  nU	b  U	OU R                   R                  n	Ub  Ub  [	        S5      eUb"  U R                  X5        UR                  5       n
O"Ub  UR                  5       S S n
O[	        S5      eU
u  pUb  UR                  OUR                  nUc  [        R                  " X4US9nUc$  [        R                  " U
[        R                  US9nU R                  X*5      nU R                  XPR                   R                  5      nU R                  UUUUS9u  nnU R!                  UUUUUUU	S9nUS   nU R"                  b  U R#                  U5      OS nU	(       d
  UU4US	S  -   $ [%        UUUR&                  UR(                  UR*                  S
9$ )NzDYou cannot specify both input_ids and inputs_embeds at the same timer+   z5You have to specify either input_ids or inputs_embeds)rZ   rX   )rd   r*   re   rf   )r   r   r   r"  r#  r   r   )r  pooler_outputr   r  r  )rR   r   r"  use_return_dictr   %warn_if_padding_and_no_attention_maskra   rZ   rB   onesrb   rc   get_extended_attention_maskget_head_maskr  rk   rT  rU  r   r   r  r  )rQ   rd   r   re   r*   r   rf   r   r"  r#  rh   r   r   rZ   extended_attention_maskembedding_outputembedding_output_scaling_factorencoder_outputssequence_outputr6  s                       rT   rn   IBertModel.forward  s    2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B] ]%>cdd"66yQ#..*K&',,.s3KTUU!,
%.%:!!@T@T!"ZZ*)A6RN!"[[EJJvVN 150P0PQ_0m &&y++2O2OP	<@OO%)'	 =L =
99 ,,+2/!5# ' 
 *!,8<8OO4UY#]3oab6III;-')77&11,==
 	
rV   )rR   rk   rT  rU  r)   )T)	NNNNNNNNN)rt   ru   rv   rw   rx   r5   r\  r_  rd  r   r   rB   
LongTensorFloatTensorboolr   r   r!  rn   ry   rz   r{   s   @rT   rR  rR    s    "/0C  156:59371559,0/3&*J
E,,-J
 !!2!23J
 !!1!12	J

 u//0J
 E--.J
   1 12J
 $D>J
 'tnJ
 d^J
 
;U5CTCT=UU	VJ
 J
rV   rR  c                     ^  \ rS rSrSS/rU 4S jrS rS r\          SS\	\
R                     S\	\
R                     S	\	\
R                     S
\	\
R                     S\	\
R                     S\	\
R                     S\	\
R                     S\	\   S\	\   S\	\   S\\\\
R                     4   4S jj5       rSrU =r$ )IBertForMaskedLMi  zlm_head.decoder.biaszlm_head.decoder.weightc                    > [         TU ]  U5        [        USS9U l        [	        U5      U l        U R                  5         g NF)rW  )r4   r5   rR  r:  rF  lm_headrV  rP   s     rT   r5   IBertForMaskedLM.__init__  s6     %@
"6* 	rV   c                 .    U R                   R                  $ r   )r{  decoderr[  s    rT   get_output_embeddings&IBertForMaskedLM.get_output_embeddings  s    ||###rV   c                 Z    XR                   l        UR                  U R                   l        g r   )r{  r~  r   )rQ   new_embeddingss     rT   set_output_embeddings&IBertForMaskedLM.set_output_embeddings  s    -*//rV   rd   r   re   r*   r   rf   labelsr   r"  r#  rf  c                    U
b  U
OU R                   R                  n
U R                  UUUUUUUU	U
S9	nUS   nU R                  U5      nSnUbF  [	        5       nU" UR                  SU R                   R                  5      UR                  S5      5      nU
(       d  U4USS -   nUb  U4U-   $ U$ [        UUUR                  UR                  S9$ )az  
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
    config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
    loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
Nr   re   r*   r   rf   r   r"  r#  r   r+   r   losslogitsr   r  )
rR   ri  r:  r{  r   r   r;   r   r   r  )rQ   rd   r   re   r*   r   rf   r  r   r"  r#  r   rr  prediction_scoresmasked_lm_lossloss_fctr   s                    rT   rn   IBertForMaskedLM.forward  s    ( &1%<k$++B]B]**))%'/!5#  

 "!* LL9')H%&7&<&<RAWAW&XZ`ZeZefhZijN')GABK7F3A3M^%.YSYY$!//))	
 	
rV   )r:  r{  
NNNNNNNNNN)rt   ru   rv   rw   _tied_weights_keysr5   r  r  r   r   rB   rt  ru  rv  r   r   r!  rn   ry   rz   r{   s   @rT   rx  rx    s,   02JK$0  156:59371559-1,0/3&*1
E,,-1
 !!2!231
 !!1!12	1

 u//01
 E--.1
   1 121
 ))*1
 $D>1
 'tn1
 d^1
 
~uU%6%677	81
 1
rV   rx  c                   <   ^  \ rS rSrSrU 4S jrS rSS jrSrU =r	$ )rF  iL  z)I-BERT Head for masked language modeling.c                   > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " UR                  UR                  S9U l        [        R                  " UR                  UR                  5      U l
        [        R                  " [        R                  " UR                  5      5      U l        U R                  U R                  l        g )N)r1   )r4   r5   r   r/  r<   r   rK   rJ   
layer_normr;   r~  	ParameterrB   rb   r   rP   s     rT   r5   IBertLMHead.__init__O  s    YYv1163E3EF
,,v'9'9v?T?TUyy!3!3V5F5FGLLV->->!?@	 IIrV   c                     U R                  U5      n[        U5      nU R                  U5      nU R                  U5      nU$ r   )r   r
   r  r~  )rQ   featureskwargsxs       rT   rn   IBertLMHead.forwardX  s;    JJx GOOA LLOrV   c                     U R                   R                  R                  R                  S:X  a  U R                  U R                   l        g U R                   R                  U l        g )Nmeta)r~  r   rZ   typer[  s    rT   _tie_weightsIBertLMHead._tie_weightsb  sC    <<##((F2 $		DLL ))DIrV   )r   r~  r   r  )rf  N)
rt   ru   rv   rw   rx   r5   rn   r  ry   rz   r{   s   @rT   rF  rF  L  s    3&* *rV   rF  z
    I-BERT Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled
    output) e.g. for GLUE tasks.
    )custom_introc                   l  ^  \ rS rSrU 4S jr\          SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S	\\R                     S
\\
   S\\
   S\\
   S\\\\R                     4   4S jj5       rSrU =r$ )IBertForSequenceClassificationik  c                    > [         TU ]  U5        UR                  U l        [        USS9U l        [        U5      U l        U R                  5         g rz  )r4   r5   
num_labelsrR  r:  IBertClassificationHead
classifierrV  rP   s     rT   r5   'IBertForSequenceClassification.__init__r  sC      ++%@
1&9 	rV   rd   r   re   r*   r   rf   r  r   r"  r#  rf  c                 0   U
b  U
OU R                   R                  n
U R                  UUUUUUUU	U
S9	nUS   nU R                  U5      nSnUGb  U R                   R                  c  U R
                  S:X  a  SU R                   l        OoU R
                  S:  aN  UR                  [        R                  :X  d  UR                  [        R                  :X  a  SU R                   l        OSU R                   l        U R                   R                  S:X  aI  [        5       nU R
                  S:X  a&  U" UR                  5       UR                  5       5      nOU" X5      nOU R                   R                  S:X  a=  [        5       nU" UR                  SU R
                  5      UR                  S5      5      nO,U R                   R                  S:X  a  [        5       nU" X5      nU
(       d  U4US	S -   nUb  U4U-   $ U$ [        UUUR                   UR"                  S
9$ )ae  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
Nr  r   r   
regressionsingle_label_classificationmulti_label_classificationr+   r   r  )rR   ri  r:  r  problem_typer  rY   rB   rc   r   r   squeezer   r   r   r   r   r  rQ   rd   r   re   r*   r   rf   r  r   r"  r#  r   rr  r  r  r  r   s                    rT   rn   &IBertForSequenceClassification.forward|  s   ( &1%<k$++B]B]**))%'/!5#  

 "!*1{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#F3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE'!//))	
 	
rV   )r  r:  r  r  )rt   ru   rv   rw   r5   r   r   rB   rt  ru  rv  r   r   r!  rn   ry   rz   r{   s   @rT   r  r  k  s$     156:59371559-1,0/3&*B
E,,-B
 !!2!23B
 !!1!12	B

 u//0B
 E--.B
   1 12B
 ))*B
 $D>B
 'tnB
 d^B
 
'u/@/@)AA	BB
 B
rV   r  c                   l  ^  \ rS rSrU 4S jr\          SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S	\\R                     S
\\
   S\\
   S\\
   S\\\\R                     4   4S jj5       rSrU =r$ )IBertForMultipleChoicei  c                    > [         TU ]  U5        [        U5      U l        [        R
                  " UR                  5      U l        [        R                  " UR                  S5      U l
        U R                  5         g )Nr   )r4   r5   rR  r:  r   rM   rN   rO   r/  r<   r  rV  rP   s     rT   r5   IBertForMultipleChoice.__init__  sV     '
zz&"<"<=))F$6$6: 	rV   rd   re   r   r  r*   r   rf   r   r"  r#  rf  c                 \   U
b  U
OU R                   R                  n
Ub  UR                  S   OUR                  S   nUb!  UR                  SUR	                  S5      5      OSnUb!  UR                  SUR	                  S5      5      OSnUb!  UR                  SUR	                  S5      5      OSnUb!  UR                  SUR	                  S5      5      OSnUb1  UR                  SUR	                  S5      UR	                  S5      5      OSnU R                  UUUUUUUU	U
S9	nUS   nU R                  U5      nU R                  U5      nUR                  SU5      nSnUb  [        5       nU" UU5      nU
(       d  U4USS -   nUb  U4U-   $ U$ [        UUUR                  UR                  S9$ )a  
input_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`):
    Indices of input sequence tokens in the vocabulary.

    Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
    [`PreTrainedTokenizer.__call__`] for details.

    [What are input IDs?](../glossary#input-ids)
token_type_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
    Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
    1]`:

    - 0 corresponds to a *sentence A* token,
    - 1 corresponds to a *sentence B* token.

    [What are token type IDs?](../glossary#token-type-ids)
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
    num_choices-1]` where `num_choices` is the size of the second dimension of the input tensors. (See
    `input_ids` above)
position_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
    Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
    config.max_position_embeddings - 1]`.

    [What are position IDs?](../glossary#position-ids)
inputs_embeds (`torch.FloatTensor` of shape `(batch_size, num_choices, sequence_length, hidden_size)`, *optional*):
    Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
    is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
    model's internal embedding lookup matrix.
Nr   r+   r   )r*   re   r   r   rf   r   r"  r#  r   r  )rR   ri  r   r   ra   r:  rO   r  r   r   r   r  )rQ   rd   re   r   r  r*   r   rf   r   r"  r#  num_choicesflat_input_idsflat_position_idsflat_token_type_idsflat_attention_maskflat_inputs_embedsr   r6  r  reshaped_logitsr  r  r   s                           rT   rn   IBertForMultipleChoice.forward  s   X &1%<k$++B]B],5,Aiooa(}GZGZ[\G]CLCXINN2,>?^bLXLdL--b,2C2CB2GHjnR`Rln11"n6I6I"6MNrvR`Rln11"n6I6I"6MNrv ( r=#5#5b#9=;M;Mb;QR 	 ***..,/!5#  

  
]3/ ++b+6')HOV4D%''!"+5F)-)9TGf$EvE("!//))	
 	
rV   )r  rO   r:  r  )rt   ru   rv   rw   r5   r   r   rB   rt  ru  rv  r   r   r!  rn   ry   rz   r{   s   @rT   r  r    s$     15596:-1371559,0/3&*W
E,,-W
 !!1!12W
 !!2!23	W

 ))*W
 u//0W
 E--.W
   1 12W
 $D>W
 'tnW
 d^W
 
(%0A0A*BB	CW
 W
rV   r  c                   l  ^  \ rS rSrU 4S jr\          SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S	\\R                     S
\\
   S\\
   S\\
   S\\\\R                     4   4S jj5       rSrU =r$ )IBertForTokenClassificationi)  c                 .  > [         TU ]  U5        UR                  U l        [        USS9U l        [
        R                  " UR                  5      U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g rz  )r4   r5   r  rR  r:  r   rM   rN   rO   r/  r<   r  rV  rP   s     rT   r5   $IBertForTokenClassification.__init__+  sk      ++%@
zz&"<"<=))F$6$68I8IJ 	rV   rd   r   re   r*   r   rf   r  r   r"  r#  rf  c                    U
b  U
OU R                   R                  n
U R                  UUUUUUUU	U
S9	nUS   nU R                  U5      nU R	                  U5      nSnUb<  [        5       nU" UR                  SU R                  5      UR                  S5      5      nU
(       d  U4USS -   nUb  U4U-   $ U$ [        UUUR                  UR                  S9$ )z
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
Nr  r   r+   r   r  )rR   ri  r:  rO   r  r   r   r  r   r   r  r  s                    rT   rn   #IBertForTokenClassification.forward6  s    $ &1%<k$++B]B]**))%'/!5#  

 "!*,,71')HFKKDOO<fkk"oNDY,F)-)9TGf$EvE$!//))	
 	
rV   )r  rO   r:  r  r  )rt   ru   rv   rw   r5   r   r   rB   rt  ru  rv  r   r   r!  rn   ry   rz   r{   s   @rT   r  r  )  s   	  156:59371559-1,0/3&*2
E,,-2
 !!2!232
 !!1!12	2

 u//02
 E--.2
   1 122
 ))*2
 $D>2
 'tn2
 d^2
 
$eE,=,=&>>	?2
 2
rV   r  c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )r  il  z-Head for sentence-level classification tasks.c                 ,  > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " UR                  5      U l        [        R                  " UR                  UR                  5      U l
        g r   )r4   r5   r   r/  r<   r   rM   rN   rO   r  out_projrP   s     rT   r5    IBertClassificationHead.__init__o  s`    YYv1163E3EF
zz&"<"<=		&"4"4f6G6GHrV   c                     US S 2SS S 24   nU R                  U5      nU R                  U5      n[        R                  " U5      nU R                  U5      nU R	                  U5      nU$ r4  )rO   r   rB   tanhr  )rQ   r  r  r   s       rT   rn   IBertClassificationHead.forwardu  s^     Aq)]3

=1

=1]3m4rV   )r   rO   r  )	rt   ru   rv   rw   rx   r5   rn   ry   rz   r{   s   @rT   r  r  l  s    7I rV   r  c                     ^  \ rS rSrU 4S jr\           SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S	\\R                     S
\\R                     S\\
   S\\
   S\\
   S\\\\R                     4   4S jj5       rSrU =r$ )IBertForQuestionAnsweringi  c                    > [         TU ]  U5        UR                  U l        [        USS9U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g rz  )
r4   r5   r  rR  r:  r   r/  r<   
qa_outputsrV  rP   s     rT   r5   "IBertForQuestionAnswering.__init__  sU      ++%@
))F$6$68I8IJ 	rV   rd   r   re   r*   r   rf   start_positionsend_positionsr   r"  r#  rf  c                 $   Ub  UOU R                   R                  nU R                  UUUUUUU	U
US9	nUS   nU R                  U5      nUR	                  SSS9u  nnUR                  S5      R                  5       nUR                  S5      R                  5       nS nUb  Ub  [        UR                  5       5      S:  a  UR                  S5      n[        UR                  5       5      S:  a  UR                  S5      nUR                  S5      nUR                  SU5      nUR                  SU5      n[        US9nU" X5      nU" UU5      nUU-   S-  nU(       d  UU4USS  -   nUb  U4U-   $ U$ [        UUUUR                  UR                  S9$ )	Nr  r   r   r+   r   )ignore_indexr   )r  start_logits
end_logitsr   r  )rR   ri  r:  r  splitr  r   r   ra   clampr   r   r   r  )rQ   rd   r   re   r*   r   rf   r  r  r   r"  r#  r   rr  r  r  r  
total_lossignored_indexr  
start_lossend_lossr   s                          rT   rn   !IBertForQuestionAnswering.forward  s    &1%<k$++B]B]**))%'/!5#  

 "!*1#)<<r<#: j#++B/::<''+668

&=+D?'')*Q."1"9"9""==%%'(1, - 5 5b 9(--a0M-33A}EO)//=AM']CH!,@J
M:H$x/14J"J/'!"+=F/9/EZMF*Q6Q+%!!//))
 	
rV   )r:  r  r  )NNNNNNNNNNN)rt   ru   rv   rw   r5   r   r   rB   rt  ru  rv  r   r   r!  rn   ry   rz   r{   s   @rT   r  r    s/     156:593715596:48,0/3&*>
E,,->
 !!2!23>
 !!1!12	>

 u//0>
 E--.>
   1 12>
 "%"2"23>
   0 01>
 $D>>
 'tn>
 d^>
 
+U53D3D-EE	F>
 >
rV   r  c                     U R                  U5      R                  5       n[        R                  " USS9R	                  U5      U-   U-  nUR                  5       U-   $ )a1  
Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
are ignored. This is modified from fairseq's *utils.make_positions*.

Args:
input_ids (`torch.LongTensor`):
       Indices of input sequence tokens in the vocabulary.

Returns: torch.Tensor
r   r   )ner   rB   cumsumtype_asrc   )rd   r'   rg   maskincremental_indicess        rT   r^   r^     sW     <<$((*D <<!4<<TBE[[_cc##%33rV   )rx  r  r  r  r  rR  r9  )r   )?rx   r   typingr   r   rB   torch.utils.checkpointr   torch.nnr   r   r   activationsr
   modeling_outputsr   r   r   r   r   r   r   modeling_utilsr   pytorch_utilsr   r   utilsr   r   configuration_ibertr   quant_modulesr   r   r   r   r   r   
get_loggerrt   loggerModuler    r}   r   r   r   r   r   r  r-  r9  rR  rx  rF  r  r  r  r  r  r^   __all__r  rV   rT   <module>r     s  $   "    A A    . Q , , c c 
		H	%w=bii w=tK. K.\,;bii ,;^./RYY ./b;		 ;D,;")) ,;^79 79t:
299 :
z"))   \? \ \4 t
% t
 t
n E
+ E
 E
P*")) *> N
%9 N
N
b c
1 c
 c
L ?
"6 ?
 ?
Dbii & J
 4 J
 J
Z4"rV   