
    <h{                        S r SSKJr  SSKJrJrJr  SSKrSSKrSSKJ	r	  SSK
Jr  SSKJr  SS	KJr  SS
KJrJrJr  SSKJrJr  SSKJrJrJr  SSKJrJrJr  SSKJ r   \RB                  " \"5      r# " S S\	RH                  5      r%  S;S\	RH                  S\RL                  S\RL                  S\RL                  S\\RL                     S\'S\'S\\RL                     4S jjr( " S S\	RH                  5      r) " S S\	RH                  5      r* " S S \	RH                  5      r+ " S! S"\	RH                  5      r, " S# S$\	RH                  5      r- " S% S&\5      r. " S' S(\	RH                  5      r/\ " S) S*\5      5       r0\ " S+ S,\05      5       r1 " S- S.\	RH                  5      r2 " S/ S0\	RH                  5      r3\ " S1 S2\05      5       r4\\" S3S49 " S5 S6\5      5       5       r5\" S7S49 " S8 S9\05      5       r6/ S:Qr7g)<zPyTorch Splinter model.    )	dataclass)CallableOptionalUnionN)nn)CrossEntropyLoss   )ACT2FN)GradientCheckpointingLayer)BaseModelOutputModelOutputQuestionAnsweringModelOutput)ALL_ATTENTION_FUNCTIONSPreTrainedModel)apply_chunking_to_forward find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringcan_return_tuplelogging   )SplinterConfigc                      ^  \ rS rSrSrU 4S jr    SS\\R                     S\\R                     S\\R                     S\\R                     S\
4
S	 jjrS
rU =r$ )SplinterEmbeddings)   zGConstruct the embeddings from word, position and token_type embeddings.c                   > [         TU ]  5         [        R                  " UR                  UR
                  UR                  S9U l        [        R                  " UR                  UR
                  5      U l	        [        R                  " UR                  UR
                  5      U l        [        R                  " UR
                  UR                  S9U l        [        R                  " UR                  5      U l        U R#                  S[$        R&                  " UR                  5      R)                  S5      SS9  [+        USS5      U l        g )	N)padding_idxepsposition_ids)r   F)
persistentposition_embedding_typeabsolute)super__init__r   	Embedding
vocab_sizehidden_sizepad_token_idword_embeddingsmax_position_embeddingsposition_embeddingstype_vocab_sizetoken_type_embeddings	LayerNormlayer_norm_epsDropouthidden_dropout_probdropoutregister_buffertorcharangeexpandgetattrr#   selfconfig	__class__s     f/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/models/splinter/modeling_splinter.pyr&   SplinterEmbeddings.__init__,   s    !||F,=,=v?Q?Q_e_r_rs#%<<0N0NPVPbPb#c %'\\&2H2H&J\J\%]" f&8&8f>S>STzz&"<"<= 	ELL)G)GHOOPWXej 	 	
 (/v7PR\']$    	input_idstoken_type_idsr    inputs_embedsreturnc                    Ub  UR                  5       nOUR                  5       S S nUS   nUc  U R                  S S 2S U24   nUc8  [        R                  " U[        R                  U R                  R
                  S9nUc  U R                  U5      nU R                  U5      nXG-   nU R                  S:X  a  U R                  U5      n	X-  nU R                  U5      nU R                  U5      nU$ )Nr!   r   dtypedevicer$   )sizer    r6   zeroslongrH   r+   r/   r#   r-   r0   r4   )
r;   rA   rB   r    rC   input_shape
seq_lengthr/   
embeddingsr-   s
             r>   forwardSplinterEmbeddings.forward=   s      #..*K',,.s3K ^
,,Q^<L!"[[EJJtO`O`OgOghN  00;M $ : :> J":
'':5"&":":<"H-J^^J/
\\*-
r@   )r0   r4   r#   r-   r/   r+   )NNNN)__name__
__module____qualname____firstlineno____doc__r&   r   r6   
LongTensorFloatTensortuplerO   __static_attributes____classcell__r=   s   @r>   r   r   )   s    Q^& 15593759E,,- !!1!12 u//0	
   1 12 
 r@   r   modulequerykeyvalueattention_maskscalingr4   	head_maskc                    [         R                  " XR                  SS5      5      U-  n	Ub"  US S 2S S 2S S 2S UR                  S   24   n
X-   n	[        R
                  R                  U	S[         R                  S9R                  UR                  5      n	[        R
                  R                  XU R                  S9n	Ub  XR                  SSSS5      -  n	[         R                  " X5      nUR                  SS5      R                  5       nX4$ )N   r	   r!   )dimrG   )ptrainingr   )r6   matmul	transposeshaper   
functionalsoftmaxfloat32torG   r4   rh   view
contiguous)r\   r]   r^   r_   r`   ra   r4   rb   kwargsattn_weightscausal_maskattn_outputs               r>   eager_attention_forwardrv   _   s     <<}}Q':;gEL!$Q1o		"o%=>#1==((2U]](SVVW\WbWbcL==((6??([L#nnQAq&AA,,|3K''1-88:K$$r@   c                      ^  \ rS rSrU 4S jr   S
S\R                  S\\R                     S\\R                     S\\	   S\
\R                     4
S jjrS	rU =r$ )SplinterSelfAttention{   c                 6  > [         TU ]  5         UR                  UR                  -  S:w  a7  [	        US5      (       d&  [        SUR                   SUR                   S35      eXl        UR                  U l        [        UR                  UR                  -  5      U l        U R                  U R                  -  U l	        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                   5      U l        UR                   U l        U R                  S-  U l        g )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()g      )r%   r&   r)   num_attention_headshasattr
ValueErrorr<   intattention_head_sizeall_head_sizer   Linearr]   r^   r_   r2   attention_probs_dropout_probr4   attention_dropoutra   r:   s     r>   r&   SplinterSelfAttention.__init__|   sD    : ::a?PVXhHiHi#F$6$6#7 8 445Q8 
 #)#=#= #&v'9'9F<V<V'V#W !558P8PPYYv1143E3EF
99V//1C1CDYYv1143E3EF
zz&"E"EF!'!D!D//5r@   hidden_statesr`   rb   output_attentionsrD   c                    UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      R                  U5      R	                  SS5      n	U R                  U5      R                  U5      R	                  SS5      n
[        nU R                  R                  S:w  a  [        U R                  R                     nU" U UU	U
U4U R                  (       d  SOU R                  U R                  US.UD6u  pUR                  " / UQSP76 R                  5       nU(       a  X4nU$ U4nU$ )Nr!   r   rd   eager        )r4   ra   rb   )rk   r   r]   rp   rj   r^   r_   rv   r<   _attn_implementationr   rh   r   ra   reshaperq   )r;   r   r`   rb   r   rr   rL   hidden_shapequery_states
key_statesvalue_statesattention_interfaceru   rs   outputss                  r>   rO   SplinterSelfAttention.forward   s[    $))#2.CCbC$*B*BCzz-055lCMMaQRSXXm,11,?II!QO
zz-055lCMMaQRS(?;;++w6"9$++:Z:Z"[$7
%
  $}}C$2H2HLL
%
 
%
! "));;;;FFH1B;- JUr@   )
r   r   r   r<   r4   r^   r}   r]   ra   r_   NNF)rQ   rR   rS   rT   r&   r6   Tensorr   rW   boolrX   rO   rY   rZ   r[   s   @r>   rx   rx   {   st    60 7;15,1!||! !!2!23! E--.	!
 $D>! 
u||	! !r@   rx   c                   z   ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  4S jrSrU =r	$ )SplinterSelfOutput   c                 (  > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " UR                  UR                  S9U l        [        R                  " UR                  5      U l
        g Nr   )r%   r&   r   r   r)   denser0   r1   r2   r3   r4   r:   s     r>   r&   SplinterSelfOutput.__init__   s`    YYv1163E3EF
f&8&8f>S>STzz&"<"<=r@   r   input_tensorrD   c                 p    U R                  U5      nU R                  U5      nU R                  X-   5      nU$ Nr   r4   r0   r;   r   r   s      r>   rO   SplinterSelfOutput.forward   5    

=1]3}'CDr@   r0   r   r4   
rQ   rR   rS   rT   r&   r6   r   rO   rY   rZ   r[   s   @r>   r   r      6    >U\\  RWR^R^  r@   r   c                      ^  \ rS rSrU 4S jrS r   SS\R                  S\\R                     S\\R                     S\\
   S\\R                     4
S	 jjrS
rU =r$ )SplinterAttention   c                    > [         TU ]  5         [        U5      U l        [	        U5      U l        [        5       U l        g r   )r%   r&   rx   r;   r   outputsetpruned_headsr:   s     r>   r&   SplinterAttention.__init__   s0    )&1	(0Er@   c                 6   [        U5      S:X  a  g [        XR                  R                  U R                  R                  U R
                  5      u  p[        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l	        [        U R                  R                  USS9U R                  l        U R                  R                  [        U5      -
  U R                  l        U R                  R                  U R                  R                  -  U R                  l        U R
                  R                  U5      U l        g )Nr   r   rf   )lenr   r;   r}   r   r   r   r]   r^   r_   r   r   r   union)r;   headsindexs      r>   prune_headsSplinterAttention.prune_heads   s   u:?79900$))2O2OQUQbQb

 -TYY__eD		*499==%@		,TYY__eD		.t{{/@/@%QO )-		(E(EE
(R		%"&))"?"?$))B_B_"_		 --33E:r@   r   r`   rb   r   rD   c                 p    U R                   " U4UUUS.UD6nU R                  US   U5      nU4USS  -   nU$ N)r`   rb   r   r   r   )r;   r   )	r;   r   r`   rb   r   rr   self_outputsattention_outputr   s	            r>   rO   SplinterAttention.forward   s]     yy
)/	

 
  ;;|AF#%QR(88r@   )r   r   r;   r   )rQ   rR   rS   rT   r&   r   r6   r   r   rW   r   rX   rO   rY   rZ   r[   s   @r>   r   r      sy    ";* 7;15,1|| !!2!23 E--.	
 $D> 
u||	 r@   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )SplinterIntermediate   c                   > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        UR                  [        5      (       a  [        UR                     U l        g UR                  U l        g r   )r%   r&   r   r   r)   intermediate_sizer   
isinstance
hidden_actstrr
   intermediate_act_fnr:   s     r>   r&   SplinterIntermediate.__init__   s`    YYv1163K3KL
f''--'-f.?.?'@D$'-'8'8D$r@   r   rD   c                 J    U R                  U5      nU R                  U5      nU$ r   r   r   )r;   r   s     r>   rO   SplinterIntermediate.forward   s&    

=100?r@   r   r   r[   s   @r>   r   r      s(    9U\\ ell  r@   r   c                   z   ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  4S jrSrU =r	$ )SplinterOutputi  c                 (  > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        R                  " UR
                  UR                  S9U l        [        R                  " UR                  5      U l        g r   )r%   r&   r   r   r   r)   r   r0   r1   r2   r3   r4   r:   s     r>   r&   SplinterOutput.__init__  s`    YYv779K9KL
f&8&8f>S>STzz&"<"<=r@   r   r   rD   c                 p    U R                  U5      nU R                  U5      nU R                  X-   5      nU$ r   r   r   s      r>   rO   SplinterOutput.forward
  r   r@   r   r   r[   s   @r>   r   r     r   r@   r   c                      ^  \ rS rSrU 4S jr   SS\R                  S\\R                     S\\R                     S\\	   S\
\R                     4
S jjrS	 rS
rU =r$ )SplinterLayeri  c                    > [         TU ]  5         UR                  U l        SU l        [	        U5      U l        [        U5      U l        [        U5      U l	        g )Nr   )
r%   r&   chunk_size_feed_forwardseq_len_dimr   	attentionr   intermediater   r   r:   s     r>   r&   SplinterLayer.__init__  sI    '-'E'E$*6208$V,r@   r   r`   rb   r   rD   c                     U R                   " U4UUUS.UD6nUS   nUSS  n[        U R                  U R                  U R                  U5      n	U	4U-   nU$ r   )r   r   feed_forward_chunkr   r   )
r;   r   r`   rb   r   rr   self_attention_outputsr   r   layer_outputs
             r>   rO   SplinterLayer.forward  s     "&"
)/	"

 "
 2!4(,0##T%A%A4CSCSUe
  /G+r@   c                 J    U R                  U5      nU R                  X!5      nU$ r   )r   r   )r;   r   intermediate_outputr   s       r>   r    SplinterLayer.feed_forward_chunk4  s)    "//0@A{{#6Ir@   )r   r   r   r   r   r   )rQ   rR   rS   rT   r&   r6   r   r   rW   r   rX   rO   r   rY   rZ   r[   s   @r>   r   r     sy    - 7;15,1|| !!2!23 E--.	
 $D> 
u||	2 r@   r   c                      ^  \ rS rSrU 4S jr\     SS\R                  S\\R                     S\\R                     S\\
   S\\
   S\\
   S	\\\R                     \4   4S
 jj5       rSrU =r$ )SplinterEncoderi;  c                    > [         TU ]  5         Xl        [        R                  " [        UR                  5       Vs/ sH  n[        U5      PM     sn5      U l        SU l	        g s  snf )NF)
r%   r&   r<   r   
ModuleListrangenum_hidden_layersr   layergradient_checkpointing)r;   r<   ir=   s      r>   r&   SplinterEncoder.__init__<  sR    ]]5IaIaCb#cCbaM&$9Cb#cd
&+# $ds   A%r   r`   rb   r   output_hidden_statesreturn_dictrD   c           	         U(       a  SOS nU(       a  SOS n	[        U R                  5       H=  u  pU(       a  X4-   nUb  X:   OS nU" SUUUUS.UD6nUS   nU(       d  M5  XS   4-   n	M?     U(       a  X4-   n[        UUU	S9$ )N )r   r`   rb   r   r   r   last_hidden_stater   
attentions)	enumerater   r   )r;   r   r`   rb   r   r   r   rr   all_hidden_statesall_self_attentionsr   layer_modulelayer_head_masklayer_outputss                 r>   rO   SplinterEncoder.forwardB  s     #7BD$5b4(4OA#$58H$H!.7.CilO( +-)"3	
 M *!,M  &91=M<O&O#!  5$   14D D++*
 	
r@   )r<   r   r   )NNFFT)rQ   rR   rS   rT   r&   r   r6   r   r   rW   r   r   rX   r   rO   rY   rZ   r[   s   @r>   r   r   ;  s    ,  7;15,1/4&*&
||&
 !!2!23&
 E--.	&

 $D>&
 'tn&
 d^&
 
uU\\"O3	4&
 &
r@   r   c                   .    \ rS rSr% \\S'   SrSrS rSr	g)SplinterPreTrainedModelil  r<   splinterTc                    [        U[        R                  5      (       ak  UR                  R                  R                  SU R                  R                  S9  UR                  b%  UR                  R                  R                  5         gg[        U[        R                  5      (       ax  UR                  R                  R                  SU R                  R                  S9  UR                  b2  UR                  R                  UR                     R                  5         gg[        U[        R                  5      (       aJ  UR                  R                  R                  5         UR                  R                  R                  S5        gg)zInitialize the weightsr   )meanstdNg      ?)r   r   r   weightdatanormal_r<   initializer_rangebiaszero_r'   r   r0   fill_)r;   r\   s     r>   _init_weights%SplinterPreTrainedModel._init_weightsr  s   fbii(( MM&&CT[[5R5R&S{{&  &&( '--MM&&CT[[5R5R&S!!-""6#5#56<<> .--KK""$MM$$S) .r@   r   N)
rQ   rR   rS   rT   r   __annotations__base_model_prefixsupports_gradient_checkpointingr  rY   r   r@   r>   r   r   l  s    "&*#*r@   r   c                   R  ^  \ rS rSrSrU 4S jrS rS rS r\	\
         SS\\R                     S\\R                     S	\\R                     S
\\R                     S\\R                     S\\R                     S\\   S\\   S\\   S\\\4   4S jj5       5       rSrU =r$ )SplinterModeli  a"  
The model is an encoder (with only self-attention) following the architecture described in [Attention is all you
need](https://huggingface.co/papers/1706.03762) by Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones,
Aidan N. Gomez, Lukasz Kaiser and Illia Polosukhin.
c                    > [         TU ]  U5        Xl        [        U5      U l        [        U5      U l        U R                  5         g r   )r%   r&   r<   r   rN   r   encoder	post_initr:   s     r>   r&   SplinterModel.__init__  s9     ,V4&v. 	r@   c                 .    U R                   R                  $ r   rN   r+   )r;   s    r>   get_input_embeddings"SplinterModel.get_input_embeddings  s    ...r@   c                 $    XR                   l        g r   r  )r;   r_   s     r>   set_input_embeddings"SplinterModel.set_input_embeddings  s    */'r@   c                     UR                  5        H7  u  p#U R                  R                  U   R                  R	                  U5        M9     g)z
Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
class PreTrainedModel
N)itemsr  r   r   r   )r;   heads_to_pruner   r   s       r>   _prune_headsSplinterModel._prune_heads  s<    
 +002LELLu%//;;EB 3r@   rA   r`   rB   r    rb   rC   r   r   r   rD   c
           	         Ub  UOU R                   R                  nUb  UOU R                   R                  nU	b  U	OU R                   R                  n	Ub  Ub  [	        S5      eUb"  U R                  X5        UR                  5       n
O"Ub  UR                  5       SS n
O[	        S5      eU
u  pUb  UR                  OUR                  nUc  [        R                  " X4US9nUc$  [        R                  " U
[        R                  US9nU R                  X*5      nU R                  XPR                   R                  5      nU R                  UUUUS9nU R!                  UUUUUSS	9nUS
   n[#        UUR$                  UR&                  S9$ )a  
token_type_ids (`torch.LongTensor` of shape `batch_size, sequence_length`, *optional*):
    Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
    1]`:

    - 0 corresponds to a *sentence A* token,
    - 1 corresponds to a *sentence B* token.

    [What are token type IDs?](../glossary#token-type-ids)
position_ids (`torch.LongTensor` of shape `batch_size, sequence_length`, *optional*):
    Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
    config.max_position_embeddings - 1]`.

    [What are position IDs?](../glossary#position-ids)
NzDYou cannot specify both input_ids and inputs_embeds at the same timer!   z5You have to specify either input_ids or inputs_embeds)rH   rF   )rA   r    rB   rC   T)r`   rb   r   r   r   r   r   )r<   r   r   use_return_dictr   %warn_if_padding_and_no_attention_maskrI   rH   r6   onesrJ   rK   get_extended_attention_maskget_head_maskr   rN   r  r   r   r   )r;   rA   r`   rB   r    rb   rC   r   r   r   rL   
batch_sizerM   rH   extended_attention_maskembedding_outputencoder_outputssequence_outputs                     r>   rO   SplinterModel.forward  s   : 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B] ]%>cdd"66yQ#..*K&',,.s3KTUU!,
%.%:!!@T@T!"ZZ*)A6RN!"[[EJJvVN 150P0PQ_0m &&y++2O2OP	??%)'	 + 
 ,,2/!5 ' 
 *!,-)77&11
 	
r@   )r<   rN   r  )	NNNNNNNNN)rQ   rR   rS   rT   rU   r&   r  r  r  r   r   r   r6   r   r   r   rX   r   rO   rY   rZ   r[   s   @r>   r  r    s   /0C  -11515/3,004,0/3&*R
ELL)R
 !.R
 !.	R

 u||,R
 ELL)R
  -R
 $D>R
 'tnR
 d^R
 
uo%	&R
  R
r@   r  c                   f   ^  \ rS rSrSU 4S jjrS\R                  S\R                  4S jrSrU =r	$ )SplinterFullyConnectedLayeri  c                    > [         TU ]  5         Xl        X l        [        R
                  " U R                  U R                  5      U l        [        U   U l        [        R                  " U R                  5      U l	        g r   )
r%   r&   	input_dim
output_dimr   r   r   r
   act_fnr0   )r;   r,  r-  r   r=   s       r>   r&   $SplinterFullyConnectedLayer.__init__  sR    "$YYt~~t?
Z(doo6r@   inputsrD   c                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ r   )r   r.  r0   )r;   r0  r   s      r>   rO   #SplinterFullyConnectedLayer.forward  s2    

6*M2}5r@   )r0   r.  r   r,  r-  )gelur   r[   s   @r>   r*  r*    s(    7ell u||  r@   r*  c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )QuestionAwareSpanSelectionHeadi  z^
Implementation of Question-Aware Span Selection (QASS) head, described in Splinter's paper:

c                   > [         TU ]  5         [        UR                  UR                  5      U l        [        UR                  UR                  5      U l        [        UR                  UR                  5      U l        [        UR                  UR                  5      U l        [        R                  " UR                  UR                  SS9U l
        [        R                  " UR                  UR                  SS9U l        g )NF)r  )r%   r&   r*  r)   query_start_transformquery_end_transformstart_transformend_transformr   r   start_classifierend_classifierr:   s     r>   r&   'QuestionAwareSpanSelectionHead.__init__  s    %@ASASU[UgUg%h"#>v?Q?QSYSeSe#f :6;M;MvOaOab89K9KVM_M_` "		&*<*<f>P>PW\ ] ii(:(:F<N<NUZ[r@   c                    UR                  5       u    p4UR                  S5      R                  SSU5      n[        R                  " USUS9nU R                  U5      nU R                  U5      nU R                  U5      n	U R                  U5      n
U R                  U5      nU	R                  SSS5      n	[        R                  " X5      nU R                  U5      nU
R                  SSS5      n
[        R                  " X5      nX4$ )Nr!   r   )rf   r   r   rd   )rI   	unsqueezerepeatr6   gatherr7  r8  r9  r:  r;  permuteri   r<  )r;   r0  	positions_rf   r   gathered_repsquery_start_repsquery_end_reps
start_repsend_repsr   start_logits
end_logitss                 r>   rO   &QuestionAwareSpanSelectionHead.forward  s    KKM	1##B'..q!S9V%@55mD11-@))&1
%%f---.>?''1a0
||M>++N;##Aq!,\\-:
''r@   )r<  r:  r8  r7  r;  r9  )	rQ   rR   rS   rT   rU   r&   rO   rY   rZ   r[   s   @r>   r5  r5    s    
	\( (r@   r5  c                     ^  \ rS rSrU 4S jr\            SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S	\\R                     S
\\R                     S\\
   S\\
   S\\
   S\\R                     S\\\4   4S jj5       rSrU =r$ )SplinterForQuestionAnsweringi2  c                    > [         TU ]  U5        [        U5      U l        [	        U5      U l        UR                  U l        U R                  5         g r   r%   r&   r  r   r5  splinter_qassquestion_token_idr  r:   s     r>   r&   %SplinterForQuestionAnswering.__init__4  C     %f-;FC!'!9!9 	r@   rA   r`   rB   r    rb   rC   start_positionsend_positionsr   r   r   question_positionsrD   c                    Ub  UOU R                   R                  nSnUc  UbB  [        R                  " [        R                  " XR
                  5      R                  5       SS9nOH[        R                  " UR                  S5      [        R                  UR                  UR                  S9nUR                  S5      nSnU R                  UUUUUUU	U
US9	nUS   nU R                  UU5      u  nnU(       a"  UR                  S	5      UR                  S	5      nnUbf  US	U-
  [        R                   " UR"                  5      R$                  -  -   nUS	U-
  [        R                   " UR"                  5      R$                  -  -   nSnUb  Ub  ['        UR                  5       5      S	:  a  UR                  S5      n['        UR                  5       5      S	:  a  UR                  S5      nUR                  S	5      nUR)                  SU5        UR)                  SU5        [+        US
9nU" UU5      nU" UU5      nUU-   S-  nU(       d  UU4US	S -   nUb  U4U-   $ U$ [-        UUUUR.                  UR0                  S9$ )aI  
token_type_ids (`torch.LongTensor` of shape `batch_size, sequence_length`, *optional*):
    Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
    1]`:

    - 0 corresponds to a *sentence A* token,
    - 1 corresponds to a *sentence B* token.

    [What are token type IDs?](../glossary#token-type-ids)
position_ids (`torch.LongTensor` of shape `batch_size, sequence_length`, *optional*):
    Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
    config.max_position_embeddings - 1]`.

    [What are position IDs?](../glossary#position-ids)
question_positions (`torch.LongTensor` of shape `(batch_size, num_questions)`, *optional*):
    The positions of all question tokens. If given, start_logits and end_logits will be of shape `(batch_size,
    num_questions, sequence_length)`. If None, the first question token in each sequence in the batch will be
    the only one for which start_logits and end_logits are calculated and they will be of shape `(batch_size,
    sequence_length)`.
NFr!   r   r   )rG   layoutrH   Tr`   rB   r    rb   rC   r   r   r   r   ignore_indexrd   lossrJ  rK  r   r   )r<   r  r6   argmaxeqrR  r   rJ   rI   rK   rY  rH   r?  r   rQ  squeezefinforG   minr   clamp_r   r   r   r   )r;   rA   r`   rB   r    rb   rC   rU  rV  r   r   r   rW  question_positions_were_none"question_position_for_each_exampler   r'  rJ  rK  
total_lossignored_indexloss_fct
start_lossend_lossr   s                            r>   rO   $SplinterForQuestionAnswering.forward>  s   H &1%<k$++B]B]',$%$5:\\XXi)?)?@EEGR62 6;[[!&&q)MDXDXanauau62 "D!M!Mb!Q+/(--))%'/!5#   

 "!*#'#5#5oGY#Z j''3';';A'>
@R@RST@U*L%'1~+=\M_M_A`AdAd*ddL#q>'9U[[IYIY=Z=^=^&^^J
&=+D?'')*Q."1"9"9""==%%'(1, - 5 5b 9(--a0M""1m4  M2']CH!,@J
M:H$x/14J"J/'!"+=F/9/EZMF*Q6Q+%!!//))
 	
r@   rR  r   rQ  NNNNNNNNNNNN)rQ   rR   rS   rT   r&   r   r   r6   r   rV   r   r   rX   r   rO   rY   rZ   r[   s   @r>   rN  rN  2  s?     -11515/3,0046:48,0/3&*9=c
ELL)c
 !.c
 !.	c

 u||,c
 ELL)c
  -c
 "%"2"23c
   0 01c
 $D>c
 'tnc
 d^c
 %U%5%56c
 
u22	3c
 c
r@   rN  zB
    Class for outputs of Splinter as a span selection model.
    )custom_introc                       \ rS rSr% SrSr\\R                     \	S'   Sr
\\R                     \	S'   Sr\\R                     \	S'   Sr\\\R                        \	S'   Sr\\\R                        \	S'   S	rg)
SplinterForPreTrainingOutputi  a  
loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when start and end positions are provided):
    Total span extraction loss is the sum of a Cross-Entropy for the start and end positions.
start_logits (`torch.FloatTensor` of shape `(batch_size, num_questions, sequence_length)`):
    Span-start scores (before SoftMax).
end_logits (`torch.FloatTensor` of shape `(batch_size, num_questions, sequence_length)`):
    Span-end scores (before SoftMax).
Nr^  rJ  rK  r   r   r   )rQ   rR   rS   rT   rU   r^  r   r6   rW   r  rJ  rK  r   rX   r   rY   r   r@   r>   rq  rq    s|     )-D(5$$
%,04L(5,,-4.2J**+28<M8E%"3"345<59Ju00129r@   rq  z
    Splinter Model for the recurring span selection task as done during the pretraining. The difference to the QA task
    is that we do not have a question, but multiple question tokens that replace the occurrences of recurring spans
    instead.
    c                     ^  \ rS rSrU 4S jr\            SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S	\\R                     S
\\R                     S\\
   S\\
   S\\
   S\\R                     S\\\4   4S jj5       rS\R                  S\R                  4S jrSrU =r$ )SplinterForPreTrainingi  c                    > [         TU ]  U5        [        U5      U l        [	        U5      U l        UR                  U l        U R                  5         g r   rP  r:   s     r>   r&   SplinterForPreTraining.__init__  rT  r@   rA   r`   rB   r    rb   rC   rU  rV  r   r   r   rW  rD   c                 r   Ub  UOU R                   R                  nUc  Ub  Ub  [        S5      eUc  Uc  [        S5      eUc  U R                  U5      nU R	                  UUUUUUU	U
US9	nUS   nUR                  5       u  nnnU R                  X5      u  nnUR                  S5      nUb  UR                  S5      R                  UUU5      nUSU-
  [        R                  " UR                  5      R                  -  -   nUSU-
  [        R                  " UR                  5      R                  -  -   nSnUb  Ub  UR                  S[        SUS-
  5      5        UR                  S[        SUS-
  5      5        [        U R                   R                   S9nU" UR#                  UU-  U5      UR#                  UU-  5      5      nU" UR#                  UU-  U5      UR#                  UU-  5      5      nUU-   S-  nU(       d  UU4USS -   nUb  U4U-   $ U$ [%        UUUUR&                  UR(                  S	9$ )
a
  
input_ids (`torch.LongTensor` of shape `(batch_size, num_questions, sequence_length)`):
    Indices of input sequence tokens in the vocabulary.

    Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
    [`PreTrainedTokenizer.__call__`] for details.

    [What are input IDs?](../glossary#input-ids)
token_type_ids (`torch.LongTensor` of shape `batch_size, num_questions, sequence_length`, *optional*):
    Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
    1]`:

    - 0 corresponds to a *sentence A* token,
    - 1 corresponds to a *sentence B* token.

    [What are token type IDs?](../glossary#token-type-ids)
position_ids (`torch.LongTensor` of shape `batch_size, num_questions, sequence_length`, *optional*):
    Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
    config.max_position_embeddings - 1]`.

    [What are position IDs?](../glossary#position-ids)
inputs_embeds (`torch.FloatTensor` of shape `(batch_size, num_questions, sequence_length, hidden_size)`, *optional*):
    Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
    is useful if you want more control over how to convert *input_ids* indices into associated vectors than the
    model's internal embedding lookup matrix.
start_positions (`torch.LongTensor` of shape `(batch_size, num_questions)`, *optional*):
    Labels for position (index) of the start of the labelled span for computing the token classification loss.
    Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
    are not taken into account for computing the loss.
end_positions (`torch.LongTensor` of shape `(batch_size, num_questions)`, *optional*):
    Labels for position (index) of the end of the labelled span for computing the token classification loss.
    Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
    are not taken into account for computing the loss.
question_positions (`torch.LongTensor` of shape `(batch_size, num_questions)`, *optional*):
    The positions of all question tokens. If given, start_logits and end_logits will be of shape `(batch_size,
    num_questions, sequence_length)`. If None, the first question token in each sequence in the batch will be
    the only one for which start_logits and end_logits are calculated and they will be of shape `(batch_size,
    sequence_length)`.
NzCquestion_positions must be specified in order to calculate the lossz>question_positions must be specified when input_embeds is usedrZ  r   r   r[  rd   r]  )r<   r  	TypeError_prepare_question_positionsr   rI   rQ  r?  r8   r6   rb  rG   rc  rd  maxr   r*   rp   rq  r   r   )r;   rA   r`   rB   r    rb   rC   rU  rV  r   r   r   rW  r   r'  r#  sequence_lengthrf   rJ  rK  num_questions attention_mask_for_each_questionrg  ri  rj  rk  r   s                              r>   rO   SplinterForPreTraining.forward  s   n &1%<k$++B]B]%/*E-Jcabb'I,=\]]'!%!A!A)!L--))%'/!5#   

 "!*+:+?+?+A(
OS#'#5#5o#Z j*//2%/=/G/G/J/Q/QM?0, (1/O+OSXS^S^_k_q_qSrSvSv*vvL#q+K'Ku{{[e[k[kOlOpOp&ppJ
&=+D""1c!_q-@&AB  C?Q+>$?@ (T[[5M5MNH!!!*}"<oN$$Z-%?@J  
] :OL"":#=>H %x/14J"J/'!"+=F/9/EZMF*Q6Q+%!!//))
 	
r@   c                    [         R                  " XR                  R                  :H  5      u  p#[         R                  " U5      n[         R
                  " UR                  S5      UR                  5       4U R                  R                  [         R                  UR                  S9n[         R                  " U Vs/ sH  n[         R                  " U5      PM     sn5      nX5X'4'   U$ s  snf )Nr   rF   )r6   wherer<   rR  bincountfullrI   ry  r*   rK   rH   catr7   )r;   rA   rowsflat_positionsr{  rC  ncolss           r>   rx  2SplinterForPreTraining._prepare_question_positionsK  s    ${{98U8U+UVt,JJ^^A 1 1 34KK$$**##	
	 yy=A=a%,,q/=AB .$* Bs   ;C(rm  rn  )rQ   rR   rS   rT   r&   r   r   r6   r   rV   r   r   rX   rq  rO   rx  rY   rZ   r[   s   @r>   rs  rs    s`     -11515/3,0046:48,0/3&*9=z
ELL)z
 !.z
 !.	z

 u||,z
 ELL)z
  -z
 "%"2"23z
   0 01z
 $D>z
 'tnz
 d^z
 %U%5%56z
 
u22	3z
 z
xU\\ ell  r@   rs  )rN  rs  r   r  r   )r   N)8rU   dataclassesr   typingr   r   r   r6   torch.utils.checkpointr   torch.nnr   activationsr
   modeling_layersr   modeling_outputsr   r   r   modeling_utilsr   r   pytorch_utilsr   r   r   utilsr   r   r   configuration_splinterr   
get_loggerrQ   loggerModuler   r   floatrv   rx   r   r   r   r   r   r   r   r  r*  r5  rN  rq  rs  __all__r   r@   r>   <module>r     s,    ! , ,    % ! 9 Z Z F l l 
 3 
		H	%2 2z (,%II%<<% 
% <<	%
 U\\*% % % %%87BII 7v *		 *\299  RYY %. %R.
bii .
b *o * *, s
+ s
 s
l")) $#(RYY #(L o
#: o
 o
d 
:; : :" S4 SSlr@   