
    <h                     r   S r SSKrSSKJrJrJr  SSKrSSKrSSKJr  SSK	J
r
JrJr  SSKJr  SSKJr  SS	KJrJrJrJrJrJr  SS
KJrJrJrJrJr  SSKJrJ r J!r!  SSK"J#r#  \!RH                  " \%5      r& " S S\RN                  5      r(S@S jr) " S S\RN                  5      r* " S S\RN                  5      r+ " S S\RN                  5      r, " S S\RN                  5      r- " S S\RN                  5      r. " S S\RN                  5      r/ " S S\RN                  5      r0 " S S \RN                  5      r1  SAS!\RN                  S"\Rd                  S#\Rd                  S$\Rd                  S%\\Rd                     S&\3S'\3S(\\Rd                     4S) jjr4 " S* S+\RN                  5      r5 " S, S-\RN                  5      r6 " S. S/\5      r7 " S0 S1\RN                  5      r8\ " S2 S3\5      5       r9\ " S4 S5\95      5       r:\ " S6 S7\95      5       r;\" S8S99 " S: S;\95      5       r<\" S<S99 " S= S>\95      5       r=/ S?Qr>g)BzPyTorch MarkupLM model.    N)CallableOptionalUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)GradientCheckpointingLayer)BaseModelOutputBaseModelOutputWithPoolingMaskedLMOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)ALL_ATTENTION_FUNCTIONSPreTrainedModelapply_chunking_to_forward find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringcan_return_tuplelogging   )MarkupLMConfigc                   6   ^  \ rS rSrSrU 4S jrSS jrSrU =r$ )XPathEmbeddings1   zConstruct the embeddings from xpath tags and subscripts.

We drop tree-id in this version, as its info can be covered by xpath.
c           	        > [         TU ]  5         UR                  U l        [        R                  " UR
                  U R                  -  UR                  5      U l        [        R                  " UR                  5      U l
        [        R                  " 5       U l        [        R                  " UR
                  U R                  -  SUR                  -  5      U l        [        R                  " SUR                  -  UR                  5      U l        [        R                  " [!        U R                  5       Vs/ sH.  n[        R"                  " UR$                  UR
                  5      PM0     sn5      U l        [        R                  " [!        U R                  5       Vs/ sH.  n[        R"                  " UR(                  UR
                  5      PM0     sn5      U l        g s  snf s  snf )N   )super__init__	max_depthr   Linearxpath_unit_hidden_sizehidden_sizexpath_unitseq2_embeddingsDropouthidden_dropout_probdropoutReLU
activationxpath_unitseq2_inner	inner2emb
ModuleListrange	Embeddingmax_xpath_tag_unit_embeddingsxpath_tag_sub_embeddingsmax_xpath_subs_unit_embeddingsxpath_subs_sub_embeddings)selfconfig_	__class__s      f/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/models/markuplm/modeling_markuplm.pyr#   XPathEmbeddings.__init__7   sd   )))+63P3PSWSaSa3acicucu)v&zz&"<"<='')$&IIf.K.Kdnn.\^_bhbtbt^t$u!1v'9'9#96;M;MN(* t~~..A VAA6C`C`a.)
% *, t~~..A VBBFDaDab.*
&s   74G4G$c           	         / n/ n[        U R                  5       H_  nUR                  U R                  U   " US S 2S S 2U4   5      5        UR                  U R                  U   " US S 2S S 2U4   5      5        Ma     [
        R                  " USS9n[
        R                  " USS9nX4-   nU R                  U R                  U R                  U R                  U5      5      5      5      nU$ )Ndim)r1   r$   appendr4   r6   torchcatr/   r+   r-   r.   )r7   xpath_tags_seqxpath_subs_seqxpath_tags_embeddingsxpath_subs_embeddingsixpath_embeddingss          r;   forwardXPathEmbeddings.forwardQ   s     " "t~~&A!(()F)Fq)I.YZ\]_`Y`Ja)bc!(()G)G)J>Z[]^`aZaKb)cd ' !&		*?R H %		*?R H0H>>$,,ttG`G`aqGr7s*tu    )r-   r+   r/   r$   r6   r4   r(   r.   )NN)	__name__
__module____qualname____firstlineno____doc__r#   rJ   __static_attributes____classcell__r:   s   @r;   r   r   1   s    

4   rL   r   c                     U R                  U5      R                  5       n[        R                  " USS9R	                  U5      U-   U-  nUR                  5       U-   $ )z
Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
are ignored. This is modified from fairseq's `utils.make_positions`.

Args:
    x: torch.Tensor x:

Returns: torch.Tensor
r   r?   )neintrB   cumsumtype_aslong)	input_idspadding_idxpast_key_values_lengthmaskincremental_indicess        r;   "create_position_ids_from_input_idsr`   d   sW     <<$((*D <<!4<<TBE[[_cc##%33rL   c                   J   ^  \ rS rSrSrU 4S jrS r       SS jrSrU =r	$ )MarkupLMEmbeddingst   zGConstruct the embeddings from word, position and token_type embeddings.c                 X  > [         TU ]  5         Xl        [        R                  " UR
                  UR                  UR                  S9U l        [        R                  " UR                  UR                  5      U l
        UR                  U l        [        U5      U l        [        R                  " UR                  UR                  5      U l        [        R                   " UR                  UR"                  S9U l        [        R$                  " UR&                  5      U l        U R+                  S[,        R.                  " UR                  5      R1                  S5      SS9  UR                  U l        [        R                  " UR                  UR                  U R2                  S9U l
        g )N)r\   epsposition_ids)r   r>   F)
persistent)r"   r#   r8   r   r2   
vocab_sizer'   pad_token_idword_embeddingsmax_position_embeddingsposition_embeddingsr$   r   rI   type_vocab_sizetoken_type_embeddings	LayerNormlayer_norm_epsr)   r*   r+   register_bufferrB   arangeexpandr\   r7   r8   r:   s     r;   r#   MarkupLMEmbeddings.__init__w   s8   !||F,=,=v?Q?Q_e_r_rs#%<<0N0NPVPbPb#c )) / 7%'\\&2H2H&J\J\%]"f&8&8f>S>STzz&"<"<=ELL)G)GHOOPWXej 	 	
 "..#%<<**F,>,>DL\L\$
 rL   c                    UR                  5       SS nUS   n[        R                  " U R                  S-   X0R                  -   S-   [        R                  UR
                  S9nUR                  S5      R                  U5      $ )z
We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.

Args:
    inputs_embeds: torch.Tensor

Returns: torch.Tensor
Nr>   r   dtypedevicer   )sizerB   rs   r\   rZ   rz   	unsqueezert   )r7   inputs_embedsinput_shapesequence_lengthrg   s        r;   &create_position_ids_from_inputs_embeds9MarkupLMEmbeddings.create_position_ids_from_inputs_embeds   s~     $((*3B/%a.||q /4D4D"Dq"HPUPZPZcpcwcw
 %%a(//<<rL   c                    Ub  UR                  5       nOUR                  5       S S nUb  UR                  OUR                  n	Uc+  Ub  [        XR                  U5      nOU R	                  U5      nUc$  [
        R                  " U[
        R                  U	S9nUc  U R                  U5      nUc[  U R                  R                  [
        R                  " [        [        U5      U R                  /-   5      [
        R                  U	S9-  nUc[  U R                  R                  [
        R                  " [        [        U5      U R                  /-   5      [
        R                  U	S9-  nUn
U R!                  U5      nU R#                  U5      nU R%                  X#5      nX-   U-   U-   nU R'                  U5      nU R)                  U5      nU$ )Nr>   rx   )r{   rz   r`   r\   r   rB   zerosrZ   rk   r8   
tag_pad_idonestuplelistr$   subs_pad_idrm   ro   rI   rp   r+   )r7   r[   rD   rE   token_type_idsrg   r}   r]   r~   rz   words_embeddingsrm   ro   rI   
embeddingss                  r;   rJ   MarkupLMEmbeddings.forward   s     #..*K',,.s3K%.%:!!@T@T$A)M]M]_uv#JJ=Y!"[[EJJvVN  00;M !![[33ejjd;'4>>*::;5::V\7 N !![[44uzzd;'4>>*::;5::V\8 N )"66|D $ : :> J00P%;>SSVff
^^J/
\\*-
rL   )	rp   r8   r+   r$   r\   rm   ro   rk   rI   )NNNNNNr   )
rM   rN   rO   rP   rQ   r#   r   rJ   rR   rS   rT   s   @r;   rb   rb   t   s1    Q
2=&  2 2rL   rb   c                   z   ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  4S jrSrU =r	$ )MarkupLMSelfOutput   c                 (  > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " UR                  UR                  S9U l        [        R                  " UR                  5      U l
        g Nre   )r"   r#   r   r%   r'   denserp   rq   r)   r*   r+   ru   s     r;   r#   MarkupLMSelfOutput.__init__   s`    YYv1163E3EF
f&8&8f>S>STzz&"<"<=rL   hidden_statesinput_tensorreturnc                 p    U R                  U5      nU R                  U5      nU R                  X-   5      nU$ Nr   r+   rp   r7   r   r   s      r;   rJ   MarkupLMSelfOutput.forward   5    

=1]3}'CDrL   rp   r   r+   
rM   rN   rO   rP   r#   rB   TensorrJ   rR   rS   rT   s   @r;   r   r      6    >U\\  RWR^R^  rL   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )MarkupLMIntermediate   c                   > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        UR                  [        5      (       a  [        UR                     U l        g UR                  U l        g r   )r"   r#   r   r%   r'   intermediate_sizer   
isinstance
hidden_actstrr   intermediate_act_fnru   s     r;   r#   MarkupLMIntermediate.__init__   s`    YYv1163K3KL
f''--'-f.?.?'@D$'-'8'8D$rL   r   r   c                 J    U R                  U5      nU R                  U5      nU$ r   r   r   r7   r   s     r;   rJ   MarkupLMIntermediate.forward   s&    

=100?rL   r   r   rT   s   @r;   r   r      s(    9U\\ ell  rL   r   c                   z   ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  4S jrSrU =r	$ )MarkupLMOutput   c                 (  > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        R                  " UR
                  UR                  S9U l        [        R                  " UR                  5      U l        g r   )r"   r#   r   r%   r   r'   r   rp   rq   r)   r*   r+   ru   s     r;   r#   MarkupLMOutput.__init__   s`    YYv779K9KL
f&8&8f>S>STzz&"<"<=rL   r   r   r   c                 p    U R                  U5      nU R                  U5      nU R                  X-   5      nU$ r   r   r   s      r;   rJ   MarkupLMOutput.forward   r   rL   r   r   rT   s   @r;   r   r      r   rL   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )MarkupLMPooleri  c                    > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " 5       U l        g r   )r"   r#   r   r%   r'   r   Tanhr-   ru   s     r;   r#   MarkupLMPooler.__init__  s9    YYv1163E3EF
'')rL   r   r   c                 \    US S 2S4   nU R                  U5      nU R                  U5      nU$ )Nr   )r   r-   )r7   r   first_token_tensorpooled_outputs       r;   rJ   MarkupLMPooler.forward  s6     +1a40

#566rL   )r-   r   r   rT   s   @r;   r   r     s(    $
U\\ ell  rL   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )MarkupLMPredictionHeadTransformi  c                 p  > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        UR                  [        5      (       a  [        UR                     U l
        OUR                  U l
        [        R                  " UR                  UR                  S9U l        g r   )r"   r#   r   r%   r'   r   r   r   r   r   transform_act_fnrp   rq   ru   s     r;   r#   (MarkupLMPredictionHeadTransform.__init__  s~    YYv1163E3EF
f''--$*6+<+<$=D!$*$5$5D!f&8&8f>S>STrL   r   r   c                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ r   )r   r   rp   r   s     r;   rJ   'MarkupLMPredictionHeadTransform.forward  s4    

=1--m<}5rL   )rp   r   r   r   rT   s   @r;   r   r     s)    UU\\ ell  rL   r   c                   4   ^  \ rS rSrU 4S jrS rS rSrU =r$ )MarkupLMLMPredictionHeadi'  c                 H  > [         TU ]  5         [        U5      U l        [        R
                  " UR                  UR                  SS9U l        [        R                  " [        R                  " UR                  5      5      U l        U R                  U R                  l        g )NF)bias)r"   r#   r   	transformr   r%   r'   ri   decoder	ParameterrB   r   r   ru   s     r;   r#   !MarkupLMLMPredictionHead.__init__(  sm    8@ yy!3!3V5F5FUSLLV->->!?@	 !IIrL   c                 :    U R                   U R                  l         g r   )r   r   r7   s    r;   _tie_weights%MarkupLMLMPredictionHead._tie_weights5  s     IIrL   c                 J    U R                  U5      nU R                  U5      nU$ r   )r   r   r   s     r;   rJ    MarkupLMLMPredictionHead.forward8  s$    }5]3rL   )r   r   r   )	rM   rN   rO   rP   r#   r   rJ   rR   rS   rT   s   @r;   r   r   '  s    && rL   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )MarkupLMOnlyMLMHeadi?  c                 B   > [         TU ]  5         [        U5      U l        g r   )r"   r#   r   predictionsru   s     r;   r#   MarkupLMOnlyMLMHead.__init__@  s    3F;rL   sequence_outputr   c                 (    U R                  U5      nU$ r   r   )r7   r   prediction_scoress      r;   rJ   MarkupLMOnlyMLMHead.forwardD  s     ,,_=  rL   r   r   rT   s   @r;   r   r   ?  s(    <!u|| ! ! !rL   r   modulequerykeyvalueattention_maskscalingr+   	head_maskc                    [         R                  " XR                  SS5      5      U-  n	Ub"  US S 2S S 2S S 2S UR                  S   24   n
X-   n	[        R
                  R                  U	S[         R                  S9R                  UR                  5      n	[        R
                  R                  XU R                  S9n	Ub  XR                  SSSS5      -  n	[         R                  " X5      nUR                  SS5      R                  5       nX4$ )N   r
   r>   )r@   ry   )ptrainingr   )rB   matmul	transposeshaper   
functionalsoftmaxfloat32tory   r+   r   view
contiguous)r   r   r   r   r   r   r+   r   kwargsattn_weightscausal_maskattn_outputs               r;   eager_attention_forwardr   J  s     <<}}Q':;gEL!$Q1o		"o%=>#1==((2U]](SVVW\WbWbcL==((6??([L#nnQAq&AA,,|3K''1-88:K$$rL   c                      ^  \ rS rSrU 4S jr   S
S\R                  S\\R                     S\\R                     S\\	   S\
\R                     4
S jjrS	rU =r$ )MarkupLMSelfAttentionif  c                 6  > [         TU ]  5         UR                  UR                  -  S:w  a7  [	        US5      (       d&  [        SUR                   SUR                   S35      eXl        UR                  U l        [        UR                  UR                  -  5      U l        U R                  U R                  -  U l	        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                   5      U l        UR                   U l        U R                  S-  U l        g )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()g      )r"   r#   r'   num_attention_headshasattr
ValueErrorr8   rW   attention_head_sizeall_head_sizer   r%   r   r   r   r)   attention_probs_dropout_probr+   attention_dropoutr   ru   s     r;   r#   MarkupLMSelfAttention.__init__g  sD    : ::a?PVXhHiHi#F$6$6#7 8 445Q8 
 #)#=#= #&v'9'9F<V<V'V#W !558P8PPYYv1143E3EF
99V//1C1CDYYv1143E3EF
zz&"E"EF!'!D!D//5rL   r   r   r   output_attentionsr   c                    UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      R                  U5      R	                  SS5      n	U R                  U5      R                  U5      R	                  SS5      n
[        nU R                  R                  S:w  a  [        U R                  R                     nU" U UU	U
U4U R                  (       d  SOU R                  U R                  US.UD6u  pUR                  " / UQSP76 R                  5       nU(       a  X4nU$ U4nU$ )Nr>   r   r   eager        )r+   r   r   )r   r   r   r   r   r   r   r   r8   _attn_implementationr   r   r  r   reshaper   )r7   r   r   r   r  r   r~   hidden_shapequery_states
key_statesvalue_statesattention_interfacer   r   outputss                  r;   rJ   MarkupLMSelfAttention.forward|  s[    $))#2.CCbC$*B*BCzz-055lCMMaQRSXXm,11,?II!QO
zz-055lCMMaQRS(?;;++w6"9$++:Z:Z"[$7
%
  $}}C$2H2HLL
%
 
%
! "));;;;FFH1B;- JUrL   )
r  r  r   r8   r+   r   r   r   r   r   NNF)rM   rN   rO   rP   r#   rB   r   r   FloatTensorboolr   rJ   rR   rS   rT   s   @r;   r   r   f  st    60 7;15,1!||! !!2!23! E--.	!
 $D>! 
u||	! !rL   r   c                      ^  \ rS rSrU 4S jrS r   SS\R                  S\\R                     S\\R                     S\\
   S\\R                     4
S	 jjrS
rU =r$ )MarkupLMAttentioni  c                    > [         TU ]  5         [        U5      U l        [	        U5      U l        [        5       U l        g r   )r"   r#   r   r7   r   outputsetpruned_headsru   s     r;   r#   MarkupLMAttention.__init__  s0    )&1	(0ErL   c                 6   [        U5      S:X  a  g [        XR                  R                  U R                  R                  U R
                  5      u  p[        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l	        [        U R                  R                  USS9U R                  l        U R                  R                  [        U5      -
  U R                  l        U R                  R                  U R                  R                  -  U R                  l        U R
                  R                  U5      U l        g )Nr   r   r?   )lenr   r7   r   r   r  r   r   r   r   r  r   r  union)r7   headsindexs      r;   prune_headsMarkupLMAttention.prune_heads  s   u:?79900$))2O2OQUQbQb

 -TYY__eD		*499==%@		,TYY__eD		.t{{/@/@%QO )-		(E(EE
(R		%"&))"?"?$))B_B_"_		 --33E:rL   r   r   r   r  r   c                 p    U R                   " U4UUUS.UD6nU R                  US   U5      nU4USS  -   nU$ N)r   r   r  r   r   )r7   r  )	r7   r   r   r   r  r   self_outputsattention_outputr  s	            r;   rJ   MarkupLMAttention.forward  s]     yy
)/	

 
  ;;|AF#%QR(88rL   )r  r  r7   r  )rM   rN   rO   rP   r#   r!  rB   r   r   r  r  r   rJ   rR   rS   rT   s   @r;   r  r    sy    ";* 7;15,1|| !!2!23 E--.	
 $D> 
u||	 rL   r  c                      ^  \ rS rSrU 4S jr   SS\R                  S\\R                     S\\R                     S\\	   S\
\R                     4
S jjrS	 rS
rU =r$ )MarkupLMLayeri  c                    > [         TU ]  5         UR                  U l        SU l        [	        U5      U l        [        U5      U l        [        U5      U l	        g )Nr   )
r"   r#   chunk_size_feed_forwardseq_len_dimr  	attentionr   intermediater   r  ru   s     r;   r#   MarkupLMLayer.__init__  sI    '-'E'E$*6208$V,rL   r   r   r   r  r   c                     U R                   " U4UUUS.UD6nUS   nUSS  n[        U R                  U R                  U R                  U5      n	U	4U-   nU$ r$  )r-  r   feed_forward_chunkr+  r,  )
r7   r   r   r   r  r   self_attention_outputsr&  r  layer_outputs
             r;   rJ   MarkupLMLayer.forward  s     "&"
)/	"

 "
 2!4(,0##T%A%A4CSCSUe
  /G+rL   c                 J    U R                  U5      nU R                  X!5      nU$ r   )r.  r  )r7   r&  intermediate_outputr3  s       r;   r1   MarkupLMLayer.feed_forward_chunk  s)    "//0@A{{#6IrL   )r-  r+  r.  r  r,  r  )rM   rN   rO   rP   r#   rB   r   r   r  r  r   rJ   r1  rR   rS   rT   s   @r;   r)  r)    sy    - 7;15,1|| !!2!23 E--.	
 $D> 
u||	2 rL   r)  c                      ^  \ rS rSrU 4S jr\     SS\R                  S\\R                     S\\R                     S\\
   S\\
   S\\
   S	\\\R                     \4   4S
 jj5       rSrU =r$ )MarkupLMEncoderi  c                    > [         TU ]  5         Xl        [        R                  " [        UR                  5       Vs/ sH  n[        U5      PM     sn5      U l        SU l	        g s  snf )NF)
r"   r#   r8   r   r0   r1   num_hidden_layersr)  layergradient_checkpointing)r7   r8   rH   r:   s      r;   r#   MarkupLMEncoder.__init__  sR    ]]5IaIaCb#cCbaM&$9Cb#cd
&+# $ds   A%r   r   r   r  output_hidden_statesreturn_dictr   c           	         U(       a  SOS nU(       a  SOS n	[        U R                  5       H=  u  pU(       a  X4-   nUb  X:   OS nU" SUUUUS.UD6nUS   nU(       d  M5  XS   4-   n	M?     U(       a  X4-   n[        UUU	S9$ )N )r   r   r   r  r   r   )last_hidden_stater   
attentions)	enumerater<  r   )r7   r   r   r   r  r?  r@  r   all_hidden_statesall_self_attentionsrH   layer_modulelayer_head_masklayer_outputss                 r;   rJ   MarkupLMEncoder.forward  s     #7BD$5b4(4OA#$58H$H!.7.CilO( +-)"3	
 M *!,M  &91=M<O&O#!  5$   14D D++*
 	
rL   )r8   r=  r<  )NNFFT)rM   rN   rO   rP   r#   r   rB   r   r   r  r  r   r   r   rJ   rR   rS   rT   s   @r;   r9  r9    s    ,  7;15,1/4&*&
||&
 !!2!23&
 E--.	&

 $D>&
 'tn&
 d^&
 
uU\\"O3	4&
 &
rL   r9  c                   t   ^  \ rS rSr% \\S'   SrS r\S\	\
\\R                  4      4U 4S jj5       rSrU =r$ )MarkupLMPreTrainedModeli)  r8   markuplmc                    [        U[        R                  5      (       ak  UR                  R                  R                  SU R                  R                  S9  UR                  b%  UR                  R                  R                  5         gg[        U[        R                  5      (       ax  UR                  R                  R                  SU R                  R                  S9  UR                  b2  UR                  R                  UR                     R                  5         gg[        U[        R                  5      (       aJ  UR                  R                  R                  5         UR                  R                  R                  S5        g[        U[        5      (       a%  UR                  R                  R                  5         gg)zInitialize the weightsr  )meanstdN      ?)r   r   r%   weightdatanormal_r8   initializer_ranger   zero_r2   r\   rp   fill_r   )r7   r   s     r;   _init_weights%MarkupLMPreTrainedModel._init_weights/  s3   fbii(( MM&&CT[[5R5R&S{{&  &&( '--MM&&CT[[5R5R&S!!-""6#5#56<<> .--KK""$MM$$S) 899KK""$ :rL   pretrained_model_name_or_pathc                 ,   > [         TU ]  " U/UQ70 UD6$ r   )r"   from_pretrained)clsr[  
model_argsr   r:   s       r;   r]  'MarkupLMPreTrainedModel.from_pretrainedA  s    w&'D\z\U[\\rL   rB  )rM   rN   rO   rP   r   __annotations__base_model_prefixrY  classmethodr   r   r   osPathLiker]  rR   rS   rT   s   @r;   rM  rM  )  sH    "%$ ]HU3PRP[P[K[E\<] ] ]rL   rM  c                     ^  \ rS rSrSU 4S jjrS rS rS r\\	           SS\
\R                     S\
\R                     S\
\R                     S	\
\R                     S
\
\R                     S\
\R                     S\
\R                     S\
\R                     S\
\   S\
\   S\
\   S\\\4   4S jj5       5       rSrU =r$ )MarkupLMModeliF  c                    > [         TU ]  U5        Xl        [        U5      U l        [        U5      U l        U(       a  [        U5      OSU l        U R                  5         g)z^
add_pooling_layer (bool, *optional*, defaults to `True`):
    Whether to add a pooling layer
N)
r"   r#   r8   rb   r   r9  encoderr   pooler	post_init)r7   r8   add_pooling_layerr:   s      r;   r#   MarkupLMModel.__init__I  sK    
 	 ,V4&v.0AnV,t 	rL   c                 .    U R                   R                  $ r   r   rk   r   s    r;   get_input_embeddings"MarkupLMModel.get_input_embeddingsY  s    ...rL   c                 $    XR                   l        g r   ro  )r7   r   s     r;   set_input_embeddings"MarkupLMModel.set_input_embeddings\  s    */'rL   c                     UR                  5        H7  u  p#U R                  R                  U   R                  R	                  U5        M9     g)z
Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
class PreTrainedModel
N)itemsri  r<  r-  r!  )r7   heads_to_pruner<  r  s       r;   _prune_headsMarkupLMModel._prune_heads_  s<    
 +002LELLu%//;;EB 3rL   r[   rD   rE   r   r   rg   r   r}   r  r?  r@  r   c           	      |   U	b  U	OU R                   R                  n	U
b  U
OU R                   R                  n
Ub  UOU R                   R                  nUb  Ub  [	        S5      eUb"  U R                  X5        UR                  5       nO"Ub  UR                  5       SS nO[	        S5      eUb  UR                  OUR                  nUc  [        R                  " XS9nUc$  [        R                  " U[        R                  US9nUR                  S5      R                  S5      nUR                  U R                  S	9nS
U-
  S-  nUb  UR                  5       S:X  ah  UR                  S5      R                  S5      R                  S5      R                  S5      nUR!                  U R                   R"                  SSSS5      nOCUR                  5       S:X  a/  UR                  S5      R                  S5      R                  S5      nUR                  [%        U R'                  5       5      R                  S	9nOS/U R                   R"                  -  nU R)                  UUUUUUS9nU R+                  UUUU	U
SS9nUS   nU R,                  b  U R-                  U5      OSn[/        UUUR0                  UR2                  S9$ )ao  
xpath_tags_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
    Tag IDs for each token in the input sequence, padded up to config.max_depth.
xpath_subs_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
    Subscript IDs for each token in the input sequence, padded up to config.max_depth.

Examples:

```python
>>> from transformers import AutoProcessor, MarkupLMModel

>>> processor = AutoProcessor.from_pretrained("microsoft/markuplm-base")
>>> model = MarkupLMModel.from_pretrained("microsoft/markuplm-base")

>>> html_string = "<html> <head> <title>Page Title</title> </head> </html>"

>>> encoding = processor(html_string, return_tensors="pt")

>>> outputs = model(**encoding)
>>> last_hidden_states = outputs.last_hidden_state
>>> list(last_hidden_states.shape)
[1, 4, 768]
```NzDYou cannot specify both input_ids and inputs_embeds at the same timer>   z5You have to specify either input_ids or inputs_embeds)rz   rx   r   r   )ry   rR  g     r   )r[   rD   rE   rg   r   r}   T)r   r  r?  r@  )rC  pooler_outputr   rD  )r8   r  r?  use_return_dictr   %warn_if_padding_and_no_attention_maskr{   rz   rB   r   r   rZ   r|   r   ry   r@   rt   r;  next
parametersr   ri  rj  r   r   rD  )r7   r[   rD   rE   r   r   rg   r   r}   r  r?  r@  r~   rz   extended_attention_maskembedding_outputencoder_outputsr   r   s                      r;   rJ   MarkupLMModel.forwardg  s   N 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B] ]%>cdd"66yQ#..*K&',,.s3KTUU%.%:!!@T@T!"ZZCN!"[[EJJvVN"0":":1"="G"G"J"9"<"<4::"<"N#&)@#@H"L }}!#%//2<<Q?II"MWWXZ[	%,,T[[-J-JBPRTVXZ[	A%%//2<<R@JJ2N	!40A+B+H+HII!>!>>I??))%)' + 
 ,,#/!5 ' 
 *!,8<8OO4UY)-')77&11	
 	
rL   )r8   r   ri  rj  )T)NNNNNNNNNNN)rM   rN   rO   rP   r#   rp  rs  rx  r   r   r   rB   
LongTensorr  r  r   r   r   rJ   rR   rS   rT   s   @r;   rg  rg  F  sJ    /0C  1559596:59371559,0/3&*c
E,,-c
 !!1!12c
 !!1!12	c

 !!2!23c
 !!1!12c
 u//0c
 E--.c
   1 12c
 $D>c
 'tnc
 d^c
 
u00	1c
  c
rL   rg  c            !         ^  \ rS rSrU 4S jr\\             SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S	\\R                     S
\\R                     S\\R                     S\\R                     S\\
   S\\
   S\\
   S\\\R                     \4   4S jj5       5       rSrU =r$ )MarkupLMForQuestionAnsweringi  c                    > [         TU ]  U5        UR                  U l        [        USS9U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g NF)rl  )
r"   r#   
num_labelsrg  rN  r   r%   r'   
qa_outputsrk  ru   s     r;   r#   %MarkupLMForQuestionAnswering.__init__  sU      ++%fF))F$6$68I8IJ 	rL   r[   rD   rE   r   r   rg   r   r}   start_positionsend_positionsr  r?  r@  r   c                    Ub  UOU R                   R                  nU R                  UUUUUUUUUUSS9nUS   nU R                  U5      nUR	                  SSS9u  nnUR                  S5      R                  5       nUR                  S5      R                  5       nSnU	b  U
b  [        U	R                  5       5      S:  a  U	R                  S5      n	[        U
R                  5       5      S:  a  U
R                  S5      n
UR                  S5      nU	R                  SU5        U
R                  SU5        [        US9nU" UU	5      nU" UU
5      nUU-   S	-  n[        UUUUR                  UR                  S
9$ )a  
xpath_tags_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
    Tag IDs for each token in the input sequence, padded up to config.max_depth.
xpath_subs_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
    Subscript IDs for each token in the input sequence, padded up to config.max_depth.

Examples:

```python
>>> from transformers import AutoProcessor, MarkupLMForQuestionAnswering
>>> import torch

>>> processor = AutoProcessor.from_pretrained("microsoft/markuplm-base-finetuned-websrc")
>>> model = MarkupLMForQuestionAnswering.from_pretrained("microsoft/markuplm-base-finetuned-websrc")

>>> html_string = "<html> <head> <title>My name is Niels</title> </head> </html>"
>>> question = "What's his name?"

>>> encoding = processor(html_string, questions=question, return_tensors="pt")

>>> with torch.no_grad():
...     outputs = model(**encoding)

>>> answer_start_index = outputs.start_logits.argmax()
>>> answer_end_index = outputs.end_logits.argmax()

>>> predict_answer_tokens = encoding.input_ids[0, answer_start_index : answer_end_index + 1]
>>> processor.decode(predict_answer_tokens).strip()
'Niels'
```NT
rD   rE   r   r   rg   r   r}   r  r?  r@  r   r   r>   r?   )ignore_indexr   )lossstart_logits
end_logitsr   rD  )r8   r|  rN  r  splitsqueezer   r  r{   clamp_r   r   r   rD  )r7   r[   rD   rE   r   r   rg   r   r}   r  r  r  r?  r@  r  r   logitsr  r  
total_lossignored_indexloss_fct
start_lossend_losss                           r;   rJ   $MarkupLMForQuestionAnswering.forward  s   ` &1%<k$++B]B]--))))%'/!5   
 "!*1#)<<r<#: j#++B/::<''+668

&=+D?'')*Q."1"9"9""==%%'(1, - 5 5b 9(--a0M""1m4  M2']CH!,@J
M:H$x/14J+%!!//))
 	
rL   )rN  r  r  )NNNNNNNNNNNNN)rM   rN   rO   rP   r#   r   r   r   rB   r   r  r   r   r   rJ   rR   rS   rT   s   @r;   r  r    sa     -115151515/3,0042604,0/3&*\
ELL)\
 !.\
 !.	\

 !.\
 !.\
 u||,\
 ELL)\
  -\
 "%,,/\
  -\
 $D>\
 'tn\
 d^\
 
uU\\"$@@	A\
  \
rL   r  zC
    MarkupLM Model with a `token_classification` head on top.
    )custom_introc                     ^  \ rS rSrU 4S jr\\            SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S	\\R                     S
\\R                     S\\R                     S\\
   S\\
   S\\
   S\\\R                     \4   4S jj5       5       rSrU =r$ )MarkupLMForTokenClassificationi=  c                 d  > [         TU ]  U5        UR                  U l        [        USS9U l        UR
                  b  UR
                  OUR                  n[        R                  " U5      U l	        [        R                  " UR                  UR                  5      U l        U R                  5         g r  )r"   r#   r  rg  rN  classifier_dropoutr*   r   r)   r+   r%   r'   
classifierrk  r7   r8   r  r:   s      r;   r#   'MarkupLMForTokenClassification.__init__D  s      ++%fF)/)B)B)NF%%TZTnTn 	 zz"45))F$6$68I8IJ 	rL   r[   rD   rE   r   r   rg   r   r}   labelsr  r?  r@  r   c                 l   Ub  UOU R                   R                  nU R                  UUUUUUUUU
USS9nUS   nU R                  U5      nSnU	bF  [	        5       nU" UR                  SU R                   R                  5      U	R                  S5      5      n[        UUUR                  UR                  S9$ )a  
xpath_tags_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
    Tag IDs for each token in the input sequence, padded up to config.max_depth.
xpath_subs_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
    Subscript IDs for each token in the input sequence, padded up to config.max_depth.
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.

Examples:

```python
>>> from transformers import AutoProcessor, AutoModelForTokenClassification
>>> import torch

>>> processor = AutoProcessor.from_pretrained("microsoft/markuplm-base")
>>> processor.parse_html = False
>>> model = AutoModelForTokenClassification.from_pretrained("microsoft/markuplm-base", num_labels=7)

>>> nodes = ["hello", "world"]
>>> xpaths = ["/html/body/div/li[1]/div/span", "/html/body/div/li[1]/div/span"]
>>> node_labels = [1, 2]
>>> encoding = processor(nodes=nodes, xpaths=xpaths, node_labels=node_labels, return_tensors="pt")

>>> with torch.no_grad():
...     outputs = model(**encoding)

>>> loss = outputs.loss
>>> logits = outputs.logits
```NTr  r   r>   r  r  r   rD  )
r8   r|  rN  r  r   r   r  r   r   rD  )r7   r[   rD   rE   r   r   rg   r   r}   r  r  r?  r@  r  r   r   r  r  s                     r;   rJ   &MarkupLMForTokenClassification.forwardR  s    \ &1%<k$++B]B]--))))%'/!5   
 "!* OOO<')H!&&r4;;+A+ABBD
 %$!//))	
 	
rL   )r  r+   rN  r  NNNNNNNNNNNN)rM   rN   rO   rP   r#   r   r   r   rB   r   r  r   r   r   rJ   rR   rS   rT   s   @r;   r  r  =  sI     -115151515/3,004)-,0/3&*L
ELL)L
 !.L
 !.	L

 !.L
 !.L
 u||,L
 ELL)L
  -L
 &L
 $D>L
 'tnL
 d^L
 
uU\\"N2	3L
  L
rL   r  z
    MarkupLM Model transformer with a sequence classification/regression head on top (a linear layer on top of the
    pooled output) e.g. for GLUE tasks.
    c                     ^  \ rS rSrU 4S jr\\            SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S	\\R                     S
\\R                     S\\R                     S\\
   S\\
   S\\
   S\\\R                     \4   4S jj5       5       rSrU =r$ )!MarkupLMForSequenceClassificationi  c                 r  > [         TU ]  U5        UR                  U l        Xl        [	        U5      U l        UR                  b  UR                  OUR                  n[        R                  " U5      U l
        [        R                  " UR                  UR                  5      U l        U R                  5         g r   )r"   r#   r  r8   rg  rN  r  r*   r   r)   r+   r%   r'   r  rk  r  s      r;   r#   *MarkupLMForSequenceClassification.__init__  s      ++%f-)/)B)B)NF%%TZTnTn 	 zz"45))F$6$68I8IJ 	rL   r[   rD   rE   r   r   rg   r   r}   r  r  r?  r@  r   c                     Ub  UOU R                   R                  nU R                  UUUUUUUUU
USS9nUS   nU R                  U5      nU R	                  U5      nSnU	Gb  U R                   R
                  c  U R                  S:X  a  SU R                   l        OoU R                  S:  aN  U	R                  [        R                  :X  d  U	R                  [        R                  :X  a  SU R                   l        OSU R                   l        U R                   R
                  S:X  aI  [        5       nU R                  S:X  a&  U" UR                  5       U	R                  5       5      nOU" X5      nOU R                   R
                  S:X  a=  [        5       nU" UR                  SU R                  5      U	R                  S5      5      nO,U R                   R
                  S:X  a  [        5       nU" X5      n[!        UUUR"                  UR$                  S	9$ )
a  
xpath_tags_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
    Tag IDs for each token in the input sequence, padded up to config.max_depth.
xpath_subs_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
    Subscript IDs for each token in the input sequence, padded up to config.max_depth.
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).

Examples:

```python
>>> from transformers import AutoProcessor, AutoModelForSequenceClassification
>>> import torch

>>> processor = AutoProcessor.from_pretrained("microsoft/markuplm-base")
>>> model = AutoModelForSequenceClassification.from_pretrained("microsoft/markuplm-base", num_labels=7)

>>> html_string = "<html> <head> <title>Page Title</title> </head> </html>"
>>> encoding = processor(html_string, return_tensors="pt")

>>> with torch.no_grad():
...     outputs = model(**encoding)

>>> loss = outputs.loss
>>> logits = outputs.logits
```NTr  r   
regressionsingle_label_classificationmulti_label_classificationr>   r  )r8   r|  rN  r+   r  problem_typer  ry   rB   rZ   rW   r	   r  r   r   r   r   r   rD  )r7   r[   rD   rE   r   r   rg   r   r}   r  r  r?  r@  r  r   r  r  r  s                     r;   rJ   )MarkupLMForSequenceClassification.forward  s   Z &1%<k$++B]B]--))))%'/!5   
  
]3/{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#F3D))-JJ+-B @&++b/R))-II,./'!//))	
 	
rL   )r  r8   r+   rN  r  r  )rM   rN   rO   rP   r#   r   r   r   rB   r   r  r   r   r   rJ   rR   rS   rT   s   @r;   r  r    sJ     -115151515/3,004)-,0/3&*\
ELL)\
 !.\
 !.	\

 !.\
 !.\
 u||,\
 ELL)\
  -\
 &\
 $D>\
 'tn\
 d^\
 
uU\\"$<<	=\
  \
rL   r  )r  r  r  rg  rM  )r   )r  N)?rQ   rd  typingr   r   r   rB   torch.utils.checkpointr   torch.nnr   r   r	   activationsr   modeling_layersr   modeling_outputsr   r   r   r   r   r   modeling_utilsr   r   r   r   r   utilsr   r   r   configuration_markuplmr   
get_loggerrM   loggerModuler   r`   rb   r   r   r   r   r   r   r   r   floatr   r   r  r)  r9  rM  rg  r  r  r  __all__rB  rL   r;   <module>r     sg    	 , ,    A A ! 9   ? > 2 
		H	%/ bii / f4 _ _F 299  RYY RYY  bii $ryy 0!")) !$ (,%II%<<% 
% <<	%
 U\\*% % % %%87BII 7v*		 *\%. %R.
bii .
b ]o ] ]8 E
+ E
 E
P j
#: j
 j
Z 
^
%< ^

^
B o
(? o
o
drL   