
    <h                     v   S r SSKJrJrJr  SSKrSSKrSSKJr  SSKJ	r	J
r
Jr  SSKJr  SSKJr  SS	KJrJrJrJrJrJr  SS
KJrJr  SSKJrJrJr  SSKJrJ r J!r!  SSK"J#r#  \!RH                  " \%5      r&\RN                  r( " S S\RR                  5      r*  S@S\RR                  S\RV                  S\RV                  S\RV                  S\\RV                     S\,S\,S\\RV                     4S jjr- " S S\RR                  5      r. " S S\RR                  5      r/ " S S\RR                  5      r0 " S  S!\RR                  5      r1 " S" S#\RR                  5      r2 " S$ S%\5      r3 " S& S'\RR                  5      r4 " S( S)\RR                  5      r5 " S* S+\RR                  5      r6 " S, S-\RR                  5      r7 " S. S/\RR                  5      r8\ " S0 S1\5      5       r9\ " S2 S3\95      5       r:\ " S4 S5\95      5       r;\" S6S79 " S8 S9\95      5       r<\" S:S79 " S; S<\95      5       r=\ " S= S>\95      5       r>/ S?Qr?g)AzPyTorch LayoutLM model.    )CallableOptionalUnionN)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)GradientCheckpointingLayer)BaseModelOutputBaseModelOutputWithPoolingMaskedLMOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)ALL_ATTENTION_FUNCTIONSPreTrainedModel)apply_chunking_to_forward find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringcan_return_tuplelogging   )LayoutLMConfigc                   @   ^  \ rS rSrSrU 4S jr     SS jrSrU =r$ )LayoutLMEmbeddings.   zGConstruct the embeddings from word, position and token_type embeddings.c                   > [         TU ]  5         [        R                  " UR                  UR
                  UR                  S9U l        [        R                  " UR                  UR
                  5      U l	        [        R                  " UR                  UR
                  5      U l        [        R                  " UR                  UR
                  5      U l        [        R                  " UR                  UR
                  5      U l        [        R                  " UR                  UR
                  5      U l        [        R                  " UR                  UR
                  5      U l        [#        UR
                  UR$                  S9U l        [        R(                  " UR*                  5      U l        U R/                  S[0        R2                  " UR                  5      R5                  S5      SS9  g )N)padding_idxepsposition_ids)r   F)
persistent)super__init__r   	Embedding
vocab_sizehidden_sizepad_token_idword_embeddingsmax_position_embeddingsposition_embeddingsmax_2d_position_embeddingsx_position_embeddingsy_position_embeddingsh_position_embeddingsw_position_embeddingstype_vocab_sizetoken_type_embeddingsLayoutLMLayerNormlayer_norm_eps	LayerNormDropouthidden_dropout_probdropoutregister_buffertorcharangeexpandselfconfig	__class__s     f/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/models/layoutlm/modeling_layoutlm.pyr(   LayoutLMEmbeddings.__init__1   s[   !||F,=,=v?Q?Q_e_r_rs#%<<0N0NPVPbPb#c %'\\&2S2SU[UgUg%h"%'\\&2S2SU[UgUg%h"%'\\&2S2SU[UgUg%h"%'\\&2S2SU[UgUg%h"%'\\&2H2H&J\J\%]"*6+=+=6CXCXYzz&"<"<=ELL)G)GHOOPWXej 	 	
    c                    Ub  UR                  5       nOUR                  5       S S nUS   nUb  UR                  OUR                  nUc  U R                  S S 2S U24   nUc$  [        R                  " U[        R
                  US9nUc  U R                  U5      nUn	U R                  U5      n
 U R                  US S 2S S 2S4   5      nU R                  US S 2S S 2S4   5      nU R                  US S 2S S 2S4   5      nU R                  US S 2S S 2S4   5      nU R                  US S 2S S 2S4   US S 2S S 2S4   -
  5      nU R                  US S 2S S 2S4   US S 2S S 2S4   -
  5      nU R                  U5      nU	U
-   U-   U-   U-   U-   U-   U-   U-   nU R                  U5      nU R                  U5      nU$ ! [         a  n[        S5      UeS nAff = f)Nr%   r   dtypedevicer      r
   z:The `bbox`coordinate values should be within 0-1000 range.)sizerK   r$   r>   zeroslongr-   r/   r1   r2   
IndexErrorr3   r4   r6   r9   r<   )rB   	input_idsbboxtoken_type_idsr$   inputs_embedsinput_shape
seq_lengthrK   words_embeddingsr/   left_position_embeddingsupper_position_embeddingsright_position_embeddingslower_position_embeddingser3   r4   r6   
embeddingss                       rE   forwardLayoutLMEmbeddings.forwardB   s1     #..*K',,.s3K ^
%.%:!!@T@T,,Q^<L!"[[EJJvVN  00;M("66|D	b'+'A'A$q!Qw-'P$(,(B(B41a=(Q%(,(B(B41a=(Q%(,(B(B41a=(Q% !% : :41a=4PQSTVWPW=;X Y $ : :41a=4PQSTVWPW=;X Y $ : :> J !"&' (( (	(
 (( $$ $$ $$ 	 ^^J/
\\*-
)  	bYZ`aa	bs   /A,F: :
GGG)	r9   r<   r3   r/   r6   r4   r-   r1   r2   )NNNNN)	__name__
__module____qualname____firstlineno____doc__r(   r^   __static_attributes____classcell__rD   s   @rE   r   r   .   s&    Q
& 5 5rG   r   modulequerykeyvalueattention_maskscalingr<   	head_maskc                    [         R                  " XR                  SS5      5      U-  n	Ub"  US S 2S S 2S S 2S UR                  S   24   n
X-   n	[        R
                  R                  U	S[         R                  S9R                  UR                  5      n	[        R
                  R                  XU R                  S9n	Ub  XR                  SSSS5      -  n	[         R                  " X5      nUR                  SS5      R                  5       nX4$ )NrL   r
   r%   )dimrJ   )ptrainingr   )r>   matmul	transposeshaper   
functionalsoftmaxfloat32torJ   r<   rs   view
contiguous)rh   ri   rj   rk   rl   rm   r<   rn   kwargsattn_weightscausal_maskattn_outputs               rE   eager_attention_forwardr   {   s     <<}}Q':;gEL!$Q1o		"o%=>#1==((2U]](SVVW\WbWbcL==((6??([L#nnQAq&AA,,|3K''1-88:K$$rG   c                      ^  \ rS rSrU 4S jr   S
S\R                  S\\R                     S\\R                     S\\	   S\
\R                     4
S jjrS	rU =r$ )LayoutLMSelfAttention   c                 6  > [         TU ]  5         UR                  UR                  -  S:w  a7  [	        US5      (       d&  [        SUR                   SUR                   S35      eXl        UR                  U l        [        UR                  UR                  -  5      U l        U R                  U R                  -  U l	        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                   5      U l        UR                   U l        U R                  S-  U l        g )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()g      )r'   r(   r+   num_attention_headshasattr
ValueErrorrC   intattention_head_sizeall_head_sizer   Linearri   rj   rk   r:   attention_probs_dropout_probr<   attention_dropoutrm   rA   s     rE   r(   LayoutLMSelfAttention.__init__   sD    : ::a?PVXhHiHi#F$6$6#7 8 445Q8 
 #)#=#= #&v'9'9F<V<V'V#W !558P8PPYYv1143E3EF
99V//1C1CDYYv1143E3EF
zz&"E"EF!'!D!D//5rG   hidden_statesrl   rn   output_attentionsreturnc                    UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      R                  U5      R	                  SS5      n	U R                  U5      R                  U5      R	                  SS5      n
[        nU R                  R                  S:w  a  [        U R                  R                     nU" U UU	U
U4U R                  (       d  SOU R                  U R                  US.UD6u  pUR                  " / UQSP76 R                  5       nU(       a  X4nU$ U4nU$ )Nr%   r   rL   eager        )r<   rm   rn   )rv   r   ri   r{   ru   rj   rk   r   rC   _attn_implementationr   rs   r   rm   reshaper|   )rB   r   rl   rn   r   r}   rU   hidden_shapequery_states
key_statesvalue_statesattention_interfacer   r~   outputss                  rE   r^   LayoutLMSelfAttention.forward   s[    $))#2.CCbC$*B*BCzz-055lCMMaQRSXXm,11,?II!QO
zz-055lCMMaQRS(?;;++w6"9$++:Z:Z"[$7
%
  $}}C$2H2HLL
%
 
%
! "));;;;FFH1B;- JUrG   )
r   r   r   rC   r<   rj   r   ri   rm   rk   NNF)r`   ra   rb   rc   r(   r>   Tensorr   FloatTensorbooltupler^   re   rf   rg   s   @rE   r   r      st    60 7;15,1!||! !!2!23! E--.	!
 $D>! 
u||	! !rG   r   c                   z   ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  4S jrSrU =r	$ )LayoutLMSelfOutput   c                 (  > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " UR                  UR                  S9U l        [        R                  " UR                  5      U l
        g Nr"   )r'   r(   r   r   r+   denser9   r8   r:   r;   r<   rA   s     rE   r(   LayoutLMSelfOutput.__init__   s`    YYv1163E3EF
f&8&8f>S>STzz&"<"<=rG   r   input_tensorr   c                 p    U R                  U5      nU R                  U5      nU R                  X-   5      nU$ Nr   r<   r9   rB   r   r   s      rE   r^   LayoutLMSelfOutput.forward   5    

=1]3}'CDrG   r9   r   r<   
r`   ra   rb   rc   r(   r>   r   r^   re   rf   rg   s   @rE   r   r      6    >U\\  RWR^R^  rG   r   c                      ^  \ rS rSrU 4S jrS r   SS\R                  S\\R                     S\\R                     S\\
   S\\R                     4
S	 jjrS
rU =r$ )LayoutLMAttention   c                    > [         TU ]  5         [        U5      U l        [	        U5      U l        [        5       U l        g r   )r'   r(   r   rB   r   outputsetpruned_headsrA   s     rE   r(   LayoutLMAttention.__init__   s0    )&1	(0ErG   c                 6   [        U5      S:X  a  g [        XR                  R                  U R                  R                  U R
                  5      u  p[        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l	        [        U R                  R                  USS9U R                  l        U R                  R                  [        U5      -
  U R                  l        U R                  R                  U R                  R                  -  U R                  l        U R
                  R                  U5      U l        g )Nr   r   rq   )lenr   rB   r   r   r   r   ri   rj   rk   r   r   r   union)rB   headsindexs      rE   prune_headsLayoutLMAttention.prune_heads   s   u:?79900$))2O2OQUQbQb

 -TYY__eD		*499==%@		,TYY__eD		.t{{/@/@%QO )-		(E(EE
(R		%"&))"?"?$))B_B_"_		 --33E:rG   r   rl   rn   r   r   c                 p    U R                   " U4UUUS.UD6nU R                  US   U5      nU4USS  -   nU$ N)rl   rn   r   r   r   )rB   r   )	rB   r   rl   rn   r   r}   self_outputsattention_outputr   s	            rE   r^   LayoutLMAttention.forward   s]     yy
)/	

 
  ;;|AF#%QR(88rG   )r   r   rB   r   )r`   ra   rb   rc   r(   r   r>   r   r   r   r   r   r^   re   rf   rg   s   @rE   r   r      sy    ";* 7;15,1|| !!2!23 E--.	
 $D> 
u||	 rG   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )LayoutLMIntermediatei  c                   > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        UR                  [        5      (       a  [        UR                     U l        g UR                  U l        g r   )r'   r(   r   r   r+   intermediate_sizer   
isinstance
hidden_actstrr   intermediate_act_fnrA   s     rE   r(   LayoutLMIntermediate.__init__  s`    YYv1163K3KL
f''--'-f.?.?'@D$'-'8'8D$rG   r   r   c                 J    U R                  U5      nU R                  U5      nU$ r   r   r   rB   r   s     rE   r^   LayoutLMIntermediate.forward  s&    

=100?rG   r   r   rg   s   @rE   r   r     s(    9U\\ ell  rG   r   c                   z   ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  4S jrSrU =r	$ )LayoutLMOutputi  c                 (  > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        R                  " UR
                  UR                  S9U l        [        R                  " UR                  5      U l        g r   )r'   r(   r   r   r   r+   r   r9   r8   r:   r;   r<   rA   s     rE   r(   LayoutLMOutput.__init__   s`    YYv779K9KL
f&8&8f>S>STzz&"<"<=rG   r   r   r   c                 p    U R                  U5      nU R                  U5      nU R                  X-   5      nU$ r   r   r   s      rE   r^   LayoutLMOutput.forward&  r   rG   r   r   rg   s   @rE   r   r     r   rG   r   c                      ^  \ rS rSrU 4S jr   SS\R                  S\\R                     S\\R                     S\\	   S\
\R                     4
S jjrS	 rS
rU =r$ )LayoutLMLayeri.  c                    > [         TU ]  5         UR                  U l        SU l        [	        U5      U l        [        U5      U l        [        U5      U l	        g )Nr   )
r'   r(   chunk_size_feed_forwardseq_len_dimr   	attentionr   intermediater   r   rA   s     rE   r(   LayoutLMLayer.__init__/  sI    '-'E'E$*6208$V,rG   r   rl   rn   r   r   c                     U R                   " U4UUUS.UD6nUS   nUSS  n[        U R                  U R                  U R                  U5      n	U	4U-   nU$ r   )r   r   feed_forward_chunkr   r   )
rB   r   rl   rn   r   r}   self_attention_outputsr   r   layer_outputs
             rE   r^   LayoutLMLayer.forward7  s     "&"
)/	"

 "
 2!4(,0##T%A%A4CSCSUe
  /G+rG   c                 J    U R                  U5      nU R                  X!5      nU$ r   )r   r   )rB   r   intermediate_outputr   s       rE   r    LayoutLMLayer.feed_forward_chunkP  s)    "//0@A{{#6IrG   )r   r   r   r   r   r   )r`   ra   rb   rc   r(   r>   r   r   r   r   r   r^   r   re   rf   rg   s   @rE   r   r   .  sy    - 7;15,1|| !!2!23 E--.	
 $D> 
u||	2 rG   r   c                      ^  \ rS rSrU 4S jr\     SS\R                  S\\R                     S\\R                     S\\
   S\\
   S\\
   S	\\\R                     \4   4S
 jj5       rSrU =r$ )LayoutLMEncoderiW  c                    > [         TU ]  5         Xl        [        R                  " [        UR                  5       Vs/ sH  n[        U5      PM     sn5      U l        SU l	        g s  snf )NF)
r'   r(   rC   r   
ModuleListrangenum_hidden_layersr   layergradient_checkpointing)rB   rC   irD   s      rE   r(   LayoutLMEncoder.__init__X  sR    ]]5IaIaCb#cCbaM&$9Cb#cd
&+# $ds   A%r   rl   rn   r   output_hidden_statesreturn_dictr   c           	         U(       a  SOS nU(       a  SOS n	[        U R                  5       H=  u  pU(       a  X4-   nUb  X:   OS nU" SUUUUS.UD6nUS   nU(       d  M5  XS   4-   n	M?     U(       a  X4-   n[        UUU	S9$ )N )r   rl   rn   r   r   r   )last_hidden_stater   
attentions)	enumerater   r   )rB   r   rl   rn   r   r   r   r}   all_hidden_statesall_self_attentionsr   layer_modulelayer_head_masklayer_outputss                 rE   r^   LayoutLMEncoder.forward^  s     #7BD$5b4(4OA#$58H$H!.7.CilO( +-)"3	
 M *!,M  &91=M<O&O#!  5$   14D D++*
 	
rG   )rC   r   r   )NNFFT)r`   ra   rb   rc   r(   r   r>   r   r   r   r   r   r   r   r^   re   rf   rg   s   @rE   r   r   W  s    ,  7;15,1/4&*&
||&
 !!2!23&
 E--.	&

 $D>&
 'tn&
 d^&
 
uU\\"O3	4&
 &
rG   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )LayoutLMPooleri  c                    > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " 5       U l        g r   )r'   r(   r   r   r+   r   Tanh
activationrA   s     rE   r(   LayoutLMPooler.__init__  s9    YYv1163E3EF
'')rG   r   r   c                 \    US S 2S4   nU R                  U5      nU R                  U5      nU$ )Nr   )r   r
  )rB   r   first_token_tensorpooled_outputs       rE   r^   LayoutLMPooler.forward  s6     +1a40

#566rG   )r
  r   r   rg   s   @rE   r  r    s(    $
U\\ ell  rG   r  c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )LayoutLMPredictionHeadTransformi  c                 p  > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        UR                  [        5      (       a  [        UR                     U l
        OUR                  U l
        [        R                  " UR                  UR                  S9U l        g r   )r'   r(   r   r   r+   r   r   r   r   r   transform_act_fnr9   r8   rA   s     rE   r(   (LayoutLMPredictionHeadTransform.__init__  s~    YYv1163E3EF
f''--$*6+<+<$=D!$*$5$5D!f&8&8f>S>STrG   r   r   c                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ r   )r   r  r9   r   s     rE   r^   'LayoutLMPredictionHeadTransform.forward  s4    

=1--m<}5rG   )r9   r   r  r   rg   s   @rE   r  r    s)    UU\\ ell  rG   r  c                   4   ^  \ rS rSrU 4S jrS rS rSrU =r$ )LayoutLMLMPredictionHeadi  c                 H  > [         TU ]  5         [        U5      U l        [        R
                  " UR                  UR                  SS9U l        [        R                  " [        R                  " UR                  5      5      U l        U R                  U R                  l        g )NF)bias)r'   r(   r  	transformr   r   r+   r*   decoder	Parameterr>   rN   r  rA   s     rE   r(   !LayoutLMLMPredictionHead.__init__  sm    8@ yy!3!3V5F5FUSLLV->->!?@	 !IIrG   c                 :    U R                   U R                  l         g r   )r  r  rB   s    rE   _tie_weights%LayoutLMLMPredictionHead._tie_weights  s     IIrG   c                 J    U R                  U5      nU R                  U5      nU$ r   )r  r  r   s     rE   r^    LayoutLMLMPredictionHead.forward  s$    }5]3rG   )r  r  r  )	r`   ra   rb   rc   r(   r!  r^   re   rf   rg   s   @rE   r  r    s    && rG   r  c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )LayoutLMOnlyMLMHeadi  c                 B   > [         TU ]  5         [        U5      U l        g r   )r'   r(   r  predictionsrA   s     rE   r(   LayoutLMOnlyMLMHead.__init__  s    3F;rG   sequence_outputr   c                 (    U R                  U5      nU$ r   r(  )rB   r*  prediction_scoress      rE   r^   LayoutLMOnlyMLMHead.forward  s     ,,_=  rG   r,  r   rg   s   @rE   r&  r&    s(    <!u|| ! ! !rG   r&  c                   .    \ rS rSr% \\S'   SrSrS rSr	g)LayoutLMPreTrainedModeli  rC   layoutlmTc                 x   [        U[        R                  5      (       ak  UR                  R                  R                  SU R                  R                  S9  UR                  b%  UR                  R                  R                  5         gg[        U[        R                  5      (       ax  UR                  R                  R                  SU R                  R                  S9  UR                  b2  UR                  R                  UR                     R                  5         gg[        U[        5      (       aJ  UR                  R                  R                  5         UR                  R                  R                  S5        g[        U[        5      (       a%  UR                  R                  R                  5         gg)zInitialize the weightsr   )meanstdN      ?)r   r   r   weightdatanormal_rC   initializer_ranger  zero_r)   r!   r7   fill_r  )rB   rh   s     rE   _init_weights%LayoutLMPreTrainedModel._init_weights  s0   fbii(( MM&&CT[[5R5R&S{{&  &&( '--MM&&CT[[5R5R&S!!-""6#5#56<<> . 122KK""$MM$$S) 899KK""$ :rG   r   N)
r`   ra   rb   rc   r   __annotations__base_model_prefixsupports_gradient_checkpointingr<  re   r   rG   rE   r0  r0    s    "&*#%rG   r0  c                   n  ^  \ rS rSrU 4S jrS rS rS r\\	          SS\
\R                     S\
\R                     S\
\R                     S	\
\R                     S
\
\R                     S\
\R                     S\
\R                     S\
\   S\
\   S\
\   S\\\4   4S jj5       5       rSrU =r$ )LayoutLMModeli  c                    > [         TU ]  U5        Xl        [        U5      U l        [        U5      U l        [        U5      U l        U R                  5         g r   )
r'   r(   rC   r   r]   r   encoderr  pooler	post_initrA   s     rE   r(   LayoutLMModel.__init__  sE     ,V4&v.$V, 	rG   c                 .    U R                   R                  $ r   r]   r-   r   s    rE   get_input_embeddings"LayoutLMModel.get_input_embeddings  s    ...rG   c                 $    XR                   l        g r   rI  )rB   rk   s     rE   set_input_embeddings"LayoutLMModel.set_input_embeddings  s    */'rG   c                     UR                  5        H7  u  p#U R                  R                  U   R                  R	                  U5        M9     g)z
Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
class PreTrainedModel
N)itemsrD  r   r   r   )rB   heads_to_pruner   r   s       rE   _prune_headsLayoutLMModel._prune_heads  s<    
 +002LELLu%//;;EB 3rG   rQ   rR   rl   rS   r$   rn   rT   r   r   r   r   c           	          Ub  UOU R                   R                  nU	b  U	OU R                   R                  n	U
b  U
OU R                   R                  n
Ub  Ub  [	        S5      eUb"  U R                  X5        UR                  5       nO"Ub  UR                  5       SS nO[	        S5      eUb  UR                  OUR                  nUc  [        R                  " XS9nUc$  [        R                  " U[        R                  US9nUc'  [        R                  " US-   [        R                  US9nUR                  S5      R                  S	5      nUR                  U R                  S
9nSU-
  [        R                  " U R                  5      R                   -  nUb  UR#                  5       S:X  ah  UR                  S5      R                  S5      R                  S5      R                  S5      nUR%                  U R                   R&                  SSSS5      nOCUR#                  5       S	:X  a/  UR                  S5      R                  S5      R                  S5      nUR                  [)        U R+                  5       5      R                  S
9nOS/U R                   R&                  -  nU R-                  UUUUUS9nU R/                  UUUUU	SS9nUS   nU R1                  U5      n[3        UUUR4                  UR6                  S9$ )a  
bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
    Bounding boxes of each input sequence tokens. Selected in the range `[0,
    config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
    format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
    y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.

Examples:

```python
>>> from transformers import AutoTokenizer, LayoutLMModel
>>> import torch

>>> tokenizer = AutoTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
>>> model = LayoutLMModel.from_pretrained("microsoft/layoutlm-base-uncased")

>>> words = ["Hello", "world"]
>>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]

>>> token_boxes = []
>>> for word, box in zip(words, normalized_word_boxes):
...     word_tokens = tokenizer.tokenize(word)
...     token_boxes.extend([box] * len(word_tokens))
>>> # add bounding boxes of cls + sep tokens
>>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]

>>> encoding = tokenizer(" ".join(words), return_tensors="pt")
>>> input_ids = encoding["input_ids"]
>>> attention_mask = encoding["attention_mask"]
>>> token_type_ids = encoding["token_type_ids"]
>>> bbox = torch.tensor([token_boxes])

>>> outputs = model(
...     input_ids=input_ids, bbox=bbox, attention_mask=attention_mask, token_type_ids=token_type_ids
... )

>>> last_hidden_states = outputs.last_hidden_state
```NzDYou cannot specify both input_ids and inputs_embeds at the same timer%   z5You have to specify either input_ids or inputs_embeds)rK   rI   )   r   rL   )rJ   r5  r   )rQ   rR   r$   rS   rT   T)rn   r   r   r   )r   pooler_outputr   r   )rC   r   r   use_return_dictr   %warn_if_padding_and_no_attention_maskrM   rK   r>   onesrN   rO   	unsqueezerz   rJ   finfominrq   r@   r   next
parametersr]   rD  rE  r   r   r   )rB   rQ   rR   rl   rS   r$   rn   rT   r   r   r   rU   rK   extended_attention_maskembedding_outputencoder_outputsr*  r  s                     rE   r^   LayoutLMModel.forward  s   j 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B] ]%>cdd"66yQ#..*K&',,.s3KTUU%.%:!!@T@T!"ZZCN!"[[EJJvVN<;;{T1FSD"0":":1"="G"G"J"9"<"<4::"<"N#&)@#@EKKPTPZPZD[D_D_"_ }}!#%//2<<Q?II"MWWXZ[	%,,T[[-J-JBPRTVXZ[	A%%//2<<R@JJ2N	!40A+B+H+HII!>!>>I??%)' + 
 ,,#/!5 ' 
 *!,O4)-')77&11	
 	
rG   )rC   r]   rD  rE  )
NNNNNNNNNN)r`   ra   rb   rc   r(   rJ  rM  rR  r   r   r   r>   
LongTensorr   r   r   r   r   r^   re   rf   rg   s   @rE   rB  rB    s1   	/0C  15+/6:59371559,0/3&*s
E,,-s
 u''(s
 !!2!23	s

 !!1!12s
 u//0s
 E--.s
   1 12s
 $D>s
 'tns
 d^s
 
u00	1s
  s
rG   rB  c                     ^  \ rS rSrSS/rU 4S jrS rS rS r\	\
           SS\\R                     S	\\R                     S
\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\   S\\   S\\   S\\\4   4S jj5       5       rSrU =r$ )LayoutLMForMaskedLMiy  zcls.predictions.decoder.biaszcls.predictions.decoder.weightc                    > [         TU ]  U5        [        U5      U l        [	        U5      U l        U R                  5         g r   )r'   r(   rB  r1  r&  clsrF  rA   s     rE   r(   LayoutLMForMaskedLM.__init__}  s4     %f-&v. 	rG   c                 B    U R                   R                  R                  $ r   r1  r]   r-   r   s    rE   rJ  (LayoutLMForMaskedLM.get_input_embeddings      }}''777rG   c                 B    U R                   R                  R                  $ r   )rg  r(  r  r   s    rE   get_output_embeddings)LayoutLMForMaskedLM.get_output_embeddings  s    xx##+++rG   c                     XR                   R                  l        UR                  U R                   R                  l        g r   )rg  r(  r  r  )rB   new_embeddingss     rE   set_output_embeddings)LayoutLMForMaskedLM.set_output_embeddings  s*    '5$$2$7$7!rG   rQ   rR   rl   rS   r$   rn   rT   labelsr   r   r   r   c                 j   Ub  UOU R                   R                  nU R                  UUUUUUUU	U
SS9
nUS   nU R                  U5      nSnUbF  [	        5       nU" UR                  SU R                   R                  5      UR                  S5      5      n[        UUUR                  UR                  S9$ )a  
bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
    Bounding boxes of each input sequence tokens. Selected in the range `[0,
    config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
    format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
    y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
    config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
    loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`

Examples:

```python
>>> from transformers import AutoTokenizer, LayoutLMForMaskedLM
>>> import torch

>>> tokenizer = AutoTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
>>> model = LayoutLMForMaskedLM.from_pretrained("microsoft/layoutlm-base-uncased")

>>> words = ["Hello", "[MASK]"]
>>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]

>>> token_boxes = []
>>> for word, box in zip(words, normalized_word_boxes):
...     word_tokens = tokenizer.tokenize(word)
...     token_boxes.extend([box] * len(word_tokens))
>>> # add bounding boxes of cls + sep tokens
>>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]

>>> encoding = tokenizer(" ".join(words), return_tensors="pt")
>>> input_ids = encoding["input_ids"]
>>> attention_mask = encoding["attention_mask"]
>>> token_type_ids = encoding["token_type_ids"]
>>> bbox = torch.tensor([token_boxes])

>>> labels = tokenizer("Hello world", return_tensors="pt")["input_ids"]

>>> outputs = model(
...     input_ids=input_ids,
...     bbox=bbox,
...     attention_mask=attention_mask,
...     token_type_ids=token_type_ids,
...     labels=labels,
... )

>>> loss = outputs.loss
```NT)rl   rS   r$   rn   rT   r   r   r   r   r%   losslogitsr   r   )
rC   rW  r1  rg  r   r{   r*   r   r   r   )rB   rQ   rR   rl   rS   r$   rn   rT   rt  r   r   r   r   r*  r-  masked_lm_lossloss_fcts                    rE   r^   LayoutLMForMaskedLM.forward  s    @ &1%<k$++B]B]--))%'/!5   
 "!* HH_5')H%!&&r4;;+A+ABBN
 $!//))	
 	
rG   )rg  r1  NNNNNNNNNNN)r`   ra   rb   rc   _tied_weights_keysr(   rJ  rn  rr  r   r   r   r>   rc  r   r   r   r   r   r^   re   rf   rg   s   @rE   re  re  y  sT   8:Z[8,8  15+/6:59371559-1,0/3&*]
E,,-]
 u''(]
 !!2!23	]

 !!1!12]
 u//0]
 E--.]
   1 12]
 ))*]
 $D>]
 'tn]
 d^]
 
un$	%]
  ]
rG   re  z
    LayoutLM Model with a sequence classification head on top (a linear layer on top of the pooled output) e.g. for
    document image classification tasks such as the [RVL-CDIP](https://www.cs.cmu.edu/~aharley/rvl-cdip/) dataset.
    )custom_introc                     ^  \ rS rSrU 4S jrS r\\           SS\\	R                     S\\	R                     S\\	R                     S\\	R                     S\\	R                     S	\\	R                     S
\\	R                     S\\	R                     S\\   S\\   S\\   S\\\4   4S jj5       5       rSrU =r$ )!LayoutLMForSequenceClassificationi  c                 0  > [         TU ]  U5        UR                  U l        [        U5      U l        [
        R                  " UR                  5      U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g r   r'   r(   
num_labelsrB  r1  r   r:   r;   r<   r   r+   
classifierrF  rA   s     rE   r(   *LayoutLMForSequenceClassification.__init__  i      ++%f-zz&"<"<=))F$6$68I8IJ 	rG   c                 B    U R                   R                  R                  $ r   rj  r   s    rE   rJ  6LayoutLMForSequenceClassification.get_input_embeddings  rl  rG   rQ   rR   rl   rS   r$   rn   rT   rt  r   r   r   r   c                    Ub  UOU R                   R                  nU R                  UUUUUUUU	U
SS9
nUS   nU R                  U5      nU R	                  U5      nSnUGb  U R                   R
                  c  U R                  S:X  a  SU R                   l        OoU R                  S:  aN  UR                  [        R                  :X  d  UR                  [        R                  :X  a  SU R                   l        OSU R                   l        U R                   R
                  S:X  aI  [        5       nU R                  S:X  a&  U" UR                  5       UR                  5       5      nOU" X5      nOU R                   R
                  S:X  a=  [        5       nU" UR                  SU R                  5      UR                  S5      5      nO,U R                   R
                  S:X  a  [        5       nU" X5      n[!        UUUR"                  UR$                  S	9$ )
a
  
bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
    Bounding boxes of each input sequence tokens. Selected in the range `[0,
    config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
    format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
    y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).

Examples:

```python
>>> from transformers import AutoTokenizer, LayoutLMForSequenceClassification
>>> import torch

>>> tokenizer = AutoTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
>>> model = LayoutLMForSequenceClassification.from_pretrained("microsoft/layoutlm-base-uncased")

>>> words = ["Hello", "world"]
>>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]

>>> token_boxes = []
>>> for word, box in zip(words, normalized_word_boxes):
...     word_tokens = tokenizer.tokenize(word)
...     token_boxes.extend([box] * len(word_tokens))
>>> # add bounding boxes of cls + sep tokens
>>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]

>>> encoding = tokenizer(" ".join(words), return_tensors="pt")
>>> input_ids = encoding["input_ids"]
>>> attention_mask = encoding["attention_mask"]
>>> token_type_ids = encoding["token_type_ids"]
>>> bbox = torch.tensor([token_boxes])
>>> sequence_label = torch.tensor([1])

>>> outputs = model(
...     input_ids=input_ids,
...     bbox=bbox,
...     attention_mask=attention_mask,
...     token_type_ids=token_type_ids,
...     labels=sequence_label,
... )

>>> loss = outputs.loss
>>> logits = outputs.logits
```NT
rQ   rR   rl   rS   r$   rn   rT   r   r   r   r   
regressionsingle_label_classificationmulti_label_classificationr%   rv  )rC   rW  r1  r<   r  problem_typer  rJ   r>   rO   r   r	   squeezer   r{   r   r   r   r   )rB   rQ   rR   rl   rS   r$   rn   rT   rt  r   r   r   r   r  rx  rw  rz  s                    rE   r^   )LayoutLMForSequenceClassification.forward  s   @ &1%<k$++B]B]--))%'/!5   
  
]3/{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#F3D))-JJ+-B @&++b/R))-II,./'!//))	
 	
rG   r  r<   r1  r  r|  )r`   ra   rb   rc   r(   rJ  r   r   r   r>   rc  r   r   r   r   r   r^   re   rf   rg   s   @rE   r  r    s?   8  15+/6:59371559-1,0/3&*n
E,,-n
 u''(n
 !!2!23	n

 !!1!12n
 u//0n
 E--.n
   1 12n
 ))*n
 $D>n
 'tnn
 d^n
 
u..	/n
  n
rG   r  a3  
    LayoutLM Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for
    sequence labeling (information extraction) tasks such as the [FUNSD](https://guillaumejaume.github.io/FUNSD/)
    dataset and the [SROIE](https://rrc.cvc.uab.es/?ch=13) dataset.
    c                     ^  \ rS rSrU 4S jrS r\\           SS\\	R                     S\\	R                     S\\	R                     S\\	R                     S\\	R                     S	\\	R                     S
\\	R                     S\\	R                     S\\   S\\   S\\   S\\\4   4S jj5       5       rSrU =r$ )LayoutLMForTokenClassificationiy  c                 0  > [         TU ]  U5        UR                  U l        [        U5      U l        [
        R                  " UR                  5      U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g r   r  rA   s     rE   r(   'LayoutLMForTokenClassification.__init__  r  rG   c                 B    U R                   R                  R                  $ r   rj  r   s    rE   rJ  3LayoutLMForTokenClassification.get_input_embeddings  rl  rG   rQ   rR   rl   rS   r$   rn   rT   rt  r   r   r   r   c                 x   Ub  UOU R                   R                  nU R                  UUUUUUUU	U
SS9
nUS   nU R                  U5      nU R	                  U5      nSnUb<  [        5       nU" UR                  SU R                  5      UR                  S5      5      n[        UUUR                  UR                  S9$ )a  
bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
    Bounding boxes of each input sequence tokens. Selected in the range `[0,
    config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
    format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
    y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.

Examples:

```python
>>> from transformers import AutoTokenizer, LayoutLMForTokenClassification
>>> import torch

>>> tokenizer = AutoTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
>>> model = LayoutLMForTokenClassification.from_pretrained("microsoft/layoutlm-base-uncased")

>>> words = ["Hello", "world"]
>>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]

>>> token_boxes = []
>>> for word, box in zip(words, normalized_word_boxes):
...     word_tokens = tokenizer.tokenize(word)
...     token_boxes.extend([box] * len(word_tokens))
>>> # add bounding boxes of cls + sep tokens
>>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]

>>> encoding = tokenizer(" ".join(words), return_tensors="pt")
>>> input_ids = encoding["input_ids"]
>>> attention_mask = encoding["attention_mask"]
>>> token_type_ids = encoding["token_type_ids"]
>>> bbox = torch.tensor([token_boxes])
>>> token_labels = torch.tensor([1, 1, 0, 0]).unsqueeze(0)  # batch size of 1

>>> outputs = model(
...     input_ids=input_ids,
...     bbox=bbox,
...     attention_mask=attention_mask,
...     token_type_ids=token_type_ids,
...     labels=token_labels,
... )

>>> loss = outputs.loss
>>> logits = outputs.logits
```NTr  r   r%   rv  )rC   rW  r1  r<   r  r   r{   r  r   r   r   )rB   rQ   rR   rl   rS   r$   rn   rT   rt  r   r   r   r   r*  rx  rw  rz  s                    rE   r^   &LayoutLMForTokenClassification.forward  s    | &1%<k$++B]B]--))%'/!5   
 "!*,,71')HFKKDOO<fkk"oND$!//))	
 	
rG   r  r|  )r`   ra   rb   rc   r(   rJ  r   r   r   r>   rc  r   r   r   r   r   r^   re   rf   rg   s   @rE   r  r  y  s?   8  15+/6:59371559-1,0/3&*Z
E,,-Z
 u''(Z
 !!2!23	Z

 !!1!12Z
 u//0Z
 E--.Z
   1 12Z
 ))*Z
 $D>Z
 'tnZ
 d^Z
 
u++	,Z
  Z
rG   r  c                     ^  \ rS rSrSU 4S jjrS r\\            SS\\	R                     S\\	R                     S\\	R                     S\\	R                     S\\	R                     S	\\	R                     S
\\	R                     S\\	R                     S\\	R                     S\\   S\\   S\\   S\\\4   4S jj5       5       rSrU =r$ )LayoutLMForQuestionAnsweringi  c                    > [         TU ]  U5        UR                  U l        [        U5      U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g)z}
has_visual_segment_embedding (`bool`, *optional*, defaults to `True`):
    Whether or not to add visual segment embeddings.
N)
r'   r(   r  rB  r1  r   r   r+   
qa_outputsrF  )rB   rC   has_visual_segment_embeddingrD   s      rE   r(   %LayoutLMForQuestionAnswering.__init__  sU    
 	  ++%f-))F$6$68I8IJ 	rG   c                 B    U R                   R                  R                  $ r   rj  r   s    rE   rJ  1LayoutLMForQuestionAnswering.get_input_embeddings  rl  rG   rQ   rR   rl   rS   r$   rn   rT   start_positionsend_positionsr   r   r   r   c                    Ub  UOU R                   R                  nU R                  UUUUUUUU
USS9
nUS   nU R                  U5      nUR	                  SSS9u  nnUR                  S5      R                  5       nUR                  S5      R                  5       nSnUb  U	b  [        UR                  5       5      S:  a  UR                  S5      n[        U	R                  5       5      S:  a  U	R                  S5      n	UR                  S5      nUR                  SU5      nU	R                  SU5      n	[        US9nU" UU5      nU" UU	5      nUU-   S	-  n[        UUUUR                  UR                  S
9$ )a  
bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
    Bounding boxes of each input sequence tokens. Selected in the range `[0,
    config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
    format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
    y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.

Example:

In the example below, we prepare a question + context pair for the LayoutLM model. It will give us a prediction
of what it thinks the answer is (the span of the answer within the texts parsed from the image).

```python
>>> from transformers import AutoTokenizer, LayoutLMForQuestionAnswering
>>> from datasets import load_dataset
>>> import torch

>>> tokenizer = AutoTokenizer.from_pretrained("impira/layoutlm-document-qa", add_prefix_space=True)
>>> model = LayoutLMForQuestionAnswering.from_pretrained("impira/layoutlm-document-qa", revision="1e3ebac")

>>> dataset = load_dataset("nielsr/funsd", split="train")
>>> example = dataset[0]
>>> question = "what's his name?"
>>> words = example["words"]
>>> boxes = example["bboxes"]

>>> encoding = tokenizer(
...     question.split(), words, is_split_into_words=True, return_token_type_ids=True, return_tensors="pt"
... )
>>> bbox = []
>>> for i, s, w in zip(encoding.input_ids[0], encoding.sequence_ids(0), encoding.word_ids(0)):
...     if s == 1:
...         bbox.append(boxes[w])
...     elif i == tokenizer.sep_token_id:
...         bbox.append([1000] * 4)
...     else:
...         bbox.append([0] * 4)
>>> encoding["bbox"] = torch.tensor([bbox])

>>> word_ids = encoding.word_ids(0)
>>> outputs = model(**encoding)
>>> loss = outputs.loss
>>> start_scores = outputs.start_logits
>>> end_scores = outputs.end_logits
>>> start, end = word_ids[start_scores.argmax(-1)], word_ids[end_scores.argmax(-1)]
>>> print(" ".join(words[start : end + 1]))
M. Hamann P. Harper, P. Martinez
```NTr  r   r   r%   r   )ignore_indexrL   )rw  start_logits
end_logitsr   r   )rC   rW  r1  r  splitr  r|   r   rM   clampr   r   r   r   )rB   rQ   rR   rl   rS   r$   rn   rT   r  r  r   r   r   r   r*  rx  r  r  
total_lossignored_indexrz  
start_lossend_losss                          rE   r^   $LayoutLMForQuestionAnswering.forward   s   D &1%<k$++B]B]--))%'/!5   
 "!*1#)<<r<#: j#++B/::<''+668

&=+D?'')*Q."1"9"9""==%%'(1, - 5 5b 9(--a0M-33A}EO)//=AM']CH!,@J
M:H$x/14J+%!!//))
 	
rG   )r1  r  r  )T)NNNNNNNNNNNN)r`   ra   rb   rc   r(   rJ  r   r   r   r>   rc  r   r   r   r   r   r^   re   rf   rg   s   @rE   r  r    sX   8  15+/6:593715596:48,0/3&*m
E,,-m
 u''(m
 !!2!23	m

 !!1!12m
 u//0m
 E--.m
   1 12m
 "%"2"23m
   0 01m
 $D>m
 'tnm
 d^m
 
u22	3m
  m
rG   r  )re  r  r  r  rB  r0  )r   N)@rd   typingr   r   r   r>   torch.utils.checkpointr   torch.nnr   r   r	   activationsr   modeling_layersr   modeling_outputsr   r   r   r   r   r   modeling_utilsr   r   pytorch_utilsr   r   r   utilsr   r   r   configuration_layoutlmr   
get_loggerr`   loggerr9   r7   Moduler   r   floatr   r   r   r   r   r   r   r   r  r  r  r&  r0  rB  re  r  r  r  __all__r   rG   rE   <module>r     sw    , ,    A A ! 9  G l l > > 2 
		H	% LL I Ih (,%II%<<% 
% <<	%
 U\\*% % % %%87BII 7v *		 *\299  RYY %. %R.
bii .
dRYY  bii $ryy 0!")) ! %o % %0 O
+ O
 O
d u
1 u
 u
p ~
(? ~
~
B j
%< j
j
Z A
#: A
 A
HrG   