
    <h                     `   S r SSKrSSKJr  SSKJrJr  SSKrSSK	r	SSK	J
r
  SSKJrJrJr  SSKJr  SS	KJrJrJrJrJrJr  SS
KJr  SSKJrJrJr  SSKJr  \R@                  " \!5      r"Sr#S r$ " S S\
RJ                  5      r& " S S\
RJ                  5      r'S\	RP                  S\)S\)S\	RP                  4S jr* " S S\
RJ                  5      r+ " S S\
RJ                  5      r, " S S\
RJ                  5      r- " S S \
RJ                  5      r. SGS!\	RP                  S"\)S#\)S$\/S%\/S\	RP                  4S& jjr0 " S' S(\
RJ                  5      r1 " S) S*\
RJ                  5      r2\ " S+ S,\5      5       r3 " S- S.\
RJ                  5      r4\\" S/S09 " S1 S2\5      5       5       r5\" S3S09 " S4 S5\35      5       r6\ " S6 S7\35      5       r7\" S8S09 " S9 S:\35      5       r8\ " S; S<\35      5       r9\" S=S09 " S> S?\35      5       r:\ " S@ SA\35      5       r;\ " SB SC\35      5       r<\ " SD SE\35      5       r=/ SFQr>g)Hz!PyTorch Funnel Transformer model.    N)	dataclass)OptionalUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)BaseModelOutputMaskedLMOutputMultipleChoiceModelOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)PreTrainedModel)ModelOutputauto_docstringlogging   )FunnelConfigg    .Ac                     SSK nSSKnSSKn[        R                  R                  U5      n[        R                  SU 35        UR                  R                  U5      n/ n/ n	U H]  u  p[        R                  SU
 SU 35        UR                  R                  Xj5      nUR                  U
5        U	R                  U5        M_     SSS	S
SSSSSSSSSSS.n[        X5       GH"  u  pU
R                  S5      n
[!        S U
 5       5      (       a)  [        R                  SSR#                  U
5       35        MW  U
S   S:X  a  Mb  U nSnU
SS  GH.  n[%        U[&        5      (       d  UR)                  SU5      (       a  [+        UR-                  SU5      R/                  5       S   5      nUUR0                  :  aU  SnUUR2                  U   :  a,  UUR2                  U   -  nUS-  nUUR2                  U   :  a  M,  UR4                  U   U   nM  UUR0                  -  nUR6                  U   nM  US:X  a#  [%        U[8        5      (       a  UR:                  n  O)UU;   a  [=        XU   5      nGM!   [=        UU5      nGM1     U(       a  GM  [E        URB                  5      [E        URB                  5      :w  a  URG                  URB                  5      nWS:X  a  URH                  " U5      n[J        RL                  " U5      Ul'        GM%     U $ ! [         a    [        R                  S5        e f = f! [>         a/    [A        SSR#                  U
5       3URB                  5        Sn   M  f = f)z'Load tf checkpoints in a pytorch model.r   NzLoading a TensorFlow model in PyTorch, requires TensorFlow to be installed. Please see https://www.tensorflow.org/install/ for installation instructions.z&Converting TensorFlow checkpoint from zLoading TF weight z with shape k_headq_headv_head	post_projlinear_1linear_2	attentionffnweightbiasword_embeddings
embeddings)kqvolayer_1layer_2rel_attnffkernelgammabetalookup_tableword_embeddinginput/c              3   *   #    U H
  nUS ;   v   M     g7f))adam_vadam_mAdamWeightDecayOptimizerAdamWeightDecayOptimizer_1global_stepN ).0ns     b/var/www/html/shao/venv/lib/python3.13/site-packages/transformers/models/funnel/modeling_funnel.py	<genexpr>,load_tf_weights_in_funnel.<locals>.<genexpr>\   s      
 nns   z	Skipping 	generatorFr   z	layer_\d+zlayer_(\d+)rTr-   )(renumpy
tensorflowImportErrorloggererrorospathabspathinfotrainlist_variablesload_variableappendzipsplitanyjoin
isinstanceFunnelPositionwiseFFN	fullmatchintsearchgroupsnum_hidden_layersblock_sizesblockslayersFunnelRelMultiheadAttentionr_kernelgetattrAttributeErrorprintshapelenreshape	transposetorch
from_numpydata)modelconfigtf_checkpoint_pathrB   nptftf_path	init_varsnamesarraysnamerc   array
_layer_mappointerskippedm_namelayer_index	block_idxs                      r=   load_tf_weights_in_funnelr{   .   s>   
 ggoo01G
KK8	BC''0IEF (l5'BC&&w5Te	 !  +J" 5)zz#  

 
 
 KK)CHHTN#3457k!12hFg'<==",,|]cBdBd!"))NF"C"J"J"LQ"OP!9!99 !I%););I)FF#v'9'9)'DD!Q	 &););I)FF &nnY7DG6#;#;;K%nn[9G3:g7R#S#S!**:%!'f+=>%gv6G' 0 w7==!S%55gmm4!U+ ++E2GLW *Z La  Q	
 	J & Ichhtn%56D"Gs   L 0L0!L-04M)(M)c                      ^  \ rS rSrS\SS4U 4S jjr S
S\\R                     S\\R                     S\R                  4S jjr	S	r
U =r$ )FunnelEmbeddings   rk   returnNc                 :  > [         TU ]  5         [        R                  " UR                  UR
                  UR                  S9U l        [        R                  " UR                  UR                  S9U l        [        R                  " UR                  5      U l        g )N)padding_idxeps)super__init__r   	Embedding
vocab_sizehidden_sizepad_token_idr#   	LayerNormd_modellayer_norm_eps
layer_normDropouthidden_dropoutdropoutselfrk   	__class__s     r=   r   FunnelEmbeddings.__init__   sh    !||F,=,=v?Q?Q_e_r_rs,,v~~6;P;PQzz&"7"78    	input_idsinputs_embedsc                 r    Uc  U R                  U5      nU R                  U5      nU R                  U5      nU$ N)r#   r   r   )r   r   r   r$   s       r=   forwardFunnelEmbeddings.forward   s<       00;M__]3
\\*-
r   )r   r   r#   NN)__name__
__module____qualname____firstlineno__r   r   r   rg   Tensorr   __static_attributes____classcell__r   s   @r=   r}   r}      sX    9| 9 9 ae!%,,/GOPUP\P\G]	 r   r}   c                     ^  \ rS rSr% SrSr\\S'   S\SS4U 4S jjr	  S"S	\
R                  S
\\
R                     S\\
R                     S\\
R                     4S jjrS\
R                  S\
R                  4S jrS\S\
R                   S\
R"                  S\\\
R                     \\\
R                        4   4S jrS\
R                  S\4S jrS#S\
R                  S\S\S\
R                  4S jjrS\\
R                  \\
R                     \\
R                     4   S\\\\   \\   4   S\
R                  4S jr S$S\\
R                  \\
R                     \\
R                     4   S\S\S\
R                  4S jjrS\\
R                     S\\
R                  \\
R                     4   4S jrS\\
R                     S\\
R                     4S  jrS!rU =r$ )%FunnelAttentionStructure   z6
Contains helpers for `FunnelRelMultiheadAttention `.
   cls_token_type_idrk   r   Nc                    > [         TU ]  5         Xl        [        R                  " UR
                  5      U l        [        R                  " UR
                  5      U l        S U l        g r   )	r   r   rk   r   r   r   sin_dropoutcos_dropoutpooling_multr   s     r=   r   !FunnelAttentionStructure.__init__   sK    ::f&;&;<::f&;&;< !r   r   attention_masktoken_type_idsc                 h   SU l         UR                  S5      =U l        nU R                  XAR                  UR
                  5      nUb  U R                  U5      OSnU R                  R                  (       a7  [        R                  R                  UR                  US-
  US-
  /5      S5      OSnXVX'4$ )zCReturns the attention inputs associated to the inputs of the model.r   N)r   r   r   r   )r   sizeseq_lenget_position_embedsdtypedevicetoken_type_ids_to_matrk   separate_clsr   
functionalpadnew_ones)r   r   r   r   r   position_embedstoken_type_matcls_masks           r=   init_attention_inputs.FunnelAttentionStructure.init_attention_inputs   s     !.!3!3A!66w227<O<OQ^QeQefGUGa33NCgk {{'' MMm44gk7Q;5OPR^_ 	
  JJr   c                 ~    USS2SS2S4   USS2S4   :H  nXR                   :H  nUSS2SS2S4   USS2S4   -  nXB-  $ )z-Convert `token_type_ids` to `token_type_mat`.N)r   )r   r   r   cls_idscls_mats        r=   r   .FunnelAttentionStructure.token_type_ids_to_mat   sU    '1d
3~ag7NN $:$::!Q*%4(88''r   r   r   r   c                 R   U R                   R                  nU R                   R                  S:X  Ga6  [        R                  " SUS[        R
                  US9R                  U5      n[        R                  " SUS-  S[        R
                  US9R                  U5      nSSXdS-  -  -  -  nUSS2S4   US   -  n[        R                  " U5      n	U R                  U	5      n
[        R                  " U5      nU R                  U5      n[        R                  " X/S	S
9n[        R                  " X/S	S
9n[        R                  " X/S	S
9n[        R                  " U	* U/S	S
9nXUU4$ [        R                  " SUS-  S[        R
                  US9R                  U5      nSSXdS-  -  -  -  n[        R                  " U* S-  US-  S[        R
                  US9R                  U5      nUS-  nUSS2S4   US   -  nU R                  [        R                  " U5      5      n	U R                  [        R                  " U5      5      n[        R                  " X/S	S
9n[        R                  " SU[        R
                  US9R                  U5      nUn/ n[        SU R                   R                  5       H  nUS:X  a  SnOqU R                  UU5      nSUS-
  -  nU R                  UUUSS9nUSS2S4   U-   nUR!                  UR#                  S5      U5      n[        R$                  " USU5      nUnSU-  nU R                  UU5      nUSS2S4   U-   nUR!                  UR#                  S5      U5      n[        R$                  " USU5      nUR'                  UU/5        M     U$ )a  
Create and cache inputs related to relative position encoding. Those are very different depending on whether we
are using the factorized or the relative shift attention:

For the factorized attention, it returns the matrices (phi, pi, psi, omega) used in the paper, appendix A.2.2,
final formula.

For the relative shift attention, it returns all possible vectors R used in the paper, appendix A.2.1, final
formula.

Paper link: https://huggingface.co/papers/2006.03236

factorizedr         ?r   r   r   r   i'  Ndim)shift)rk   r   attention_typerg   arangeint64tosinr   cosr   catrange
num_blocksstride_pool_posrelative_posexpandr   gatherrO   )r   r   r   r   r   pos_seqfreq_seqinv_freqsinusoid	sin_embedsin_embed_d	cos_embedcos_embed_dphipsipiomega
rel_pos_idzero_offset	pos_embedpos
pooled_posposition_embeds_listblock_indexposition_embeds_poolingstriderel_posposition_embeds_no_poolings                               r=   r   ,FunnelAttentionStructure.get_position_embeds   sh    ++%%;;%%5 ll1gs%++fUXXY^_G||Aw!|STZ[^^_deHEhQ,&?@AHq$w'(4.8H		(+I**95K		(+I**95K))[6B?C))Y2;CK52>BII	z952>ES%(( ||Aw!|STZ[^^_deHEhQ,&?@AHwhlGaKEKK`fgjjkpqJ!A+K!!T'*Xd^;H((8)<=I((8)<=I		9"8bAI,,q'VLOOPUVCJ#% $Q(>(>? !#.2+!%!5!5c;!GJ ;?3F"//VZq/QG%ag.<G%nnW\\!_gFG.3ll9a.Q+ !K++C8!!T'*[8!..a'B-2\\)Q-P*$++-GI`,ab9  @: ('r   pos_idr   c                     U R                   R                  (       a\  UR                  SU-  * S-   /5      nU R                   R                  (       a  USS OUSS n[        R
                  " X4SSS2   /S5      $ USSS2   $ )zU
Pool `pos_id` while keeping the cls token separate (if `config.separate_cls=True`).
r   r   r   Nr   )rk   r   
new_tensortruncate_seqrg   r   )r   r   r   cls_pospooled_pos_ids        r=   r   (FunnelAttentionStructure.stride_pool_pos  s~     ;;##
 ''1k>):Q)>(?@G,0KK,D,DF1RL&QRQS*M99gSqS'9:A>>#A#;r   r   r   r   c                     Uc  UnUS   US   -
  nU[        U5      -  nXVU-  -   nUS   US   -
  n[        R                  " XxS-
  U* [        R                  UR                  S9$ )zF
Build the relative positional vector between `pos` and `pooled_pos`.
r   r   r   r   )rd   rg   r   longr   )	r   r   r   r   r   	ref_point
num_removemax_distmin_dists	            r=   r   %FunnelAttentionStructure.relative_pos$  st     JqMCF*	S_,
F22a=3r7*||HlVG5::VYV`V`aar   tensoraxisc                 x  ^ ^ Uc  g[        T[        [        45      (       a  T H  nT R                  X5      nM     U$ [        U[        [        45      (       a  [	        U5      " UU 4S jU 5       5      $ TUR
                  -  mT R                  R                  (       a(  T R                  R                  (       a  [        SSS5      O[        SSS5      n[        S5      /T-  U/-   nT R                  R                  (       a6  [        S5      /T-  [        SS5      /-   n[        R                  " X   U/TS9nX   $ )zD
Perform pooling by stride slicing the tensor along the given axis.
Nc              3   F   >#    U H  nTR                  UT5      v   M     g 7fr   )stride_pool)r;   xr  r   s     r=   r>   7FunnelAttentionStructure.stride_pool.<locals>.<genexpr>E  s!     J6a 0 0D 9 96s   !r   r   r   )r  )rT   listtupler  typendimrk   r   r   slicerg   r   )r   r   r  ax
axis_slice	enc_slice	cls_slices   ` `    r=   r  $FunnelAttentionStructure.stride_pool2  s    > dT5M**))&5 M fudm,,<J6JJJ 	 #'++":":t{{?W?WE$A]bcgimop]q 	 4[MD(J<7	;;##t,dA/??IYY 16:FF  r   modec                   ^ ^^^ Tc  g[        T[        [        45      (       a  [        T5      " UU UU4S jT 5       5      $ T R                  R
                  (       aH  T R                  R                  (       a  TSS2SS24   OTn[        R                  " TSS2SS24   U/SS9mTR                  nUS:X  a  TSS2SSS2S4   mOUS:X  a  TSS2SSS2SS24   mTS4mTS:X  a!  [        R                  R                  TTTS	S
9mO[TS:X  a!  [        R                  R                  TTTS	S
9mO4TS:X  a#  [        R                  R                  T* TTS	S
9* mO[        S5      eUS:X  a  TSS2SSS2S4   $ US:X  a	  TSS2S4   $ T$ )z3Apply 1D pooling to a tensor of size [B x T (x H)].Nc              3   D   >#    U H  nTR                  TTTS 9v   M     g7f))r  r   N)pool_tensor)r;   r  r  r   r   r   s     r=   r>   7FunnelAttentionStructure.pool_tensor.<locals>.<genexpr>\  s&     c\bWX 0 0d6 0 R\bs    r   r   r   r   r
   meanT)r   	ceil_modemaxminz0The supported modes are 'mean', 'max' and 'min'.r   )rT   r  r  r	  rk   r   r   rg   r   r
  r   r   
avg_pool2d
max_pool2dNotImplementedError)r   r   r  r   suffixr
  s   ````  r=   r  $FunnelAttentionStructure.pool_tensorS  s|    > fudm,,<c\bccc;;##'+{{'?'?VAssF^VFYYq"1"uv6A>F{{19AtQ,-FQYAtQM*F!6>]]--ffVW[-\FU]]]--ffVW[-\FU]mm..wvY].^^F%&XYY19!Q1*%%QY!Q$<r   attention_inputsc                    Uu  p4pVU R                   R                  (       a}  U R                   R                  S:X  a  U R                  USS S5      USS -   nU R                  US5      nU R                  US5      nU R	                  XR                   R
                  S9nOU =R                  S-  sl        U R                   R                  S:X  a  U R                  US5      nU R                  USS/5      nU R                  USS/5      nU R	                  USS9nU R	                  XR                   R
                  S9nX4XV4nX4$ )zTPool `output` and the proper parts of `attention_inputs` before the attention layer.r   Nr   r   r   r  r  )rk   pool_q_onlyr   r  r  pooling_typer   )r   outputr  r   r   r   r   s          r=   pre_attention_pooling.FunnelAttentionStructure.pre_attention_poolingy  s>    EUA;;""{{))\9"&"2"2?2A3F"J_]^]_M`"`!--na@N''!4H%%f;;3K3K%LF"{{))\9"&"2"2?A"F!--nq!fEN''1a&9H!--n5-IN%%f;;3K3K%LF+^V''r   c                 J   Uu  p#pEU R                   R                  (       a~  U =R                  S-  sl        U R                   R                  S:X  a  USS U R	                  USS S5      -   nU R	                  US5      nU R	                  US5      nU R                  USS9nX#XE4nU$ )zFPool the proper parts of `attention_inputs` after the attention layer.r   r   Nr   r   r  r!  )rk   r"  r   r   r  r  )r   r  r   r   r   r   s         r=   post_attention_pooling/FunnelAttentionStructure.post_attention_pooling  s    DTA;;"""{{))\9"1"1"58H8HYZY[I\^_8`"`!--na@N''!4H!--n5-IN+^Vr   )rk   r   r   r   r   r   Nr   )r  r   )r   r   r   r   __doc__r   rW   __annotations__r   r   rg   r   r   r  r   r   r   r   r   r  r   r   r   r  strr  r%  r(  r   r   r   s   @r=   r   r      sH    s!| ! ! 2615	K||K !.K !.	K
 
u||	K((ELL (U\\ (N(N(#(;;N(8=N(	uU\\"Dell);$<<	=N(`ell  b bc bSV b_d_k_k b!ellE%,,$7ell9KKL! CsT#Y./! 
	!D wx$ELL%*=tELL?QQR$Z]$ps$	$L((-ell(;(	u||U5<<00	1(, uU\\7J  uUZUaUaOb    r   r   positional_attncontext_lenr   r   c                     U R                   u  p4pV[        R                  " XXFU/5      n U S S 2S S 2US 2S S 24   n [        R                  " XXEXb-
  /5      n U SS U24   n U $ )N.)rc   rg   re   )r.  r/  r   
batch_sizen_headr   max_rel_lens          r=   _relative_shift_gatherr4    sp    />/D/D,J mmO&W^5_`O%aEFAo6OmmO&S^Sf5ghO%c<K<&78Or   c                      ^  \ rS rSrS\S\SS4U 4S jjrSS jrSS jr SS	\	R                  S
\	R                  S\	R                  S\\	R                     S\S\\	R                  S4   4S jjrSrU =r$ )r^   i  rk   r   r   Nc                 F  > [         TU ]  5         Xl        X l        UR                  UR
                  UR                  pTn[        R                  " UR                  5      U l	        [        R                  " UR                  5      U l
        [        R                  " X4U-  SS9U l        [        R                  " X4U-  5      U l        [        R                  " X4U-  5      U l        [        R                  " [         R"                  " XE/5      5      U l        [        R                  " [         R"                  " XE/5      5      U l        [        R                  " [         R"                  " X4U/5      5      U l        [        R                  " [         R"                  " XE/5      5      U l        [        R                  " [         R"                  " SXE/5      5      U l        [        R                  " XE-  U5      U l        [        R0                  " X1R2                  S9U l        SUS-  -  U l        g )NF)r"   r   r   r   g      ?)r   r   rk   r   r   r2  d_headr   r   r   attention_dropoutLinearr   r   r   	Parameterrg   zerosr_w_biasr_r_biasr_   r_s_bias	seg_embedr   r   r   r   scale)r   rk   r   r   r2  r7  r   s         r=   r   $FunnelRelMultiheadAttention.__init__  sf   &"(..&-- jj)>)>?!#F,D,D!Eii&uEii&9ii&9U[[&1A%BCU[[&1A%BCU[['61J%KLU[[&1A%BCekk1f2E&FG6?G<,,w4I4IJFCK(
r   c                 z   U R                   R                  S:X  a  Uu  pVpxU R                  U R                  -  n	U R                  n
[
        R                  " SX)-   U
5      nXSS2S4   -  nXSS2S4   -  n[
        R                  " SX5      [
        R                  " SX5      -   nOUR                  S   U:w  a  SOSnXR                     US-
     nU R                  U R                  -  nU R                  n
[
        R                  " SUU
5      n[
        R                  " SUU-   U5      n[        XU5      nUb  X-  nU$ )	z5Relative attention score for the positional encodingsr   zbinh,dnh->bindNzbind,jd->bnijr   r   ztd,dnh->tnhzbinh,tnh->bnit)
rk   r   r=  r@  r_   rg   einsumrc   r   r4  )r   r   r   r/  r   r   r   r   r   uw_rq_r_attentionq_r_attention_1q_r_attention_2r.  r   rA   r'   r_heads                      r=   relative_positional_attention9FunnelRelMultiheadAttention.relative_positional_attention  s7    ;;%%5 #2CS

*A--C "LL)96:sKM+!T'l:O+Dk9O $ll?OQTYT`T`U O  aK7AQE   0 01%!)<A

*A--C \\-C8F#ll+;VaZPO4_SXYO'Or   c                    Uc  gUR                   u  pEnU R                  U R                  -  n[        R                  " SX'-   U R
                  5      nUSS2S4   R                  XBR                   S   XV/5      n[        R                  " USSS9u  p[        R                  " XR                  UR                   5      U	R                  UR                   5      5      nUb  X-  nU$ )z/Relative attention score for the token_type_idsNr   zbind,snd->bnisr   r   r   r   )	rc   r>  r@  rg   rC  r?  r   rQ   where)r   r   r   r   r1  r   r/  r>  token_type_biasdiff_token_typesame_token_typetoken_type_attns               r=   relative_token_type_attention9FunnelRelMultiheadAttention.relative_token_type_attention  s    !+9+?+?(
[ ==4::-  ,,'79JDNN['4077\\RS_V]8kl+0;;r+R(++22>3G3GH/J`J`aoauauJv
 'Or   querykeyvaluer  output_attentions.c                    Uu  pgpUR                   u  pnUR                   S   nU R                  R                  U R                  R                  pU R	                  U5      R                  XX5      nU R                  U5      R                  XX5      nU R                  U5      R                  XX5      nUU R                  -  nU R                  U R                  -  n[        R                  " SUU-   U5      nU R                  UUX5      nU R                  UUU	5      nUU-   U-   nUR                  nUR                  5       nUb%  U[         SUS S 2S S 4   R                  5       -
  -  -
  n[        R"                  " USUS9nU R%                  U5      n[        R                  " SUU5      nU R'                  UR)                  XX-  5      5      nU R+                  U5      nU R-                  UU-   5      nU(       a  UU4$ U4$ )Nr   zbind,bjnd->bnijr   )r   r   zbnij,bjnd->bind)rc   rk   r2  r7  r   viewr   r   r@  r<  rg   rC  rJ  rR  r   floatINFsoftmaxr8  r   re   r   r   )r   rT  rU  rV  r  rW  r   r   r   r   r1  r   _r/  r2  r7  r   r   r   r<  content_scorer.  rQ  
attn_scorer   	attn_probattn_vecattn_outr$  s                                r=   r   #FunnelRelMultiheadAttention.forward  s    EUA!&
Qiil++T[[-?-? U#((fMS!&&zOU#((&Q$**$==4::-%68I6R<<_fVal<<^VU]^ #_4F
   %%'
%#cQ41N1T1T1V-V&WWJMM*"EB	**95	 << 19fE >>("2"2:"XY&&x0!12&7	"FfYFr   )r8  r   rk   r   r   r   r   r   r_   r=  r>  r<  r@  r?  r   r   F)r   r   r   r   r   rW   r   rJ  rR  rg   r   r  boolr   r   r   r   s   @r=   r^   r^     s    )| )# )$ ).(T< #(3G||3G \\3G ||	3G
  -3G  3G 
u||S 	!3G 3Gr   r^   c                   n   ^  \ rS rSrS\SS4U 4S jjrS\R                  S\R                  4S jrSr	U =r
$ )	rU   i=  rk   r   Nc                   > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        UR                     U l	        [        R                  " UR                  5      U l        [        R                  " UR
                  UR                  5      U l        [        R                  " UR                  5      U l        [        R                  " UR                  UR                   5      U l        g r   )r   r   r   r9  r   d_innerr   r   
hidden_actactivation_functionr   activation_dropoutr   r   r   r   r   r   r   s     r=   r   FunnelPositionwiseFFN.__init__>  s    		&..&..A#)&*;*;#< "$**V-F-F"G		&..&..Azz&"7"78,,v~~v7L7LMr   hiddenc                     U R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU R	                  U5      nU R                  X-   5      $ r   )r   rj  rk  r   r   r   )r   rm  hs      r=   r   FunnelPositionwiseFFN.forwardG  s\    MM&!$$Q'##A&MM!LLOvz**r   )rk  rj  r   r   r   r   )r   r   r   r   r   r   rg   r   r   r   r   r   s   @r=   rU   rU   =  s9    N| N N+ell +u|| + +r   rU   c                      ^  \ rS rSrS\S\SS4U 4S jjr SS\R                  S\R                  S	\R                  S
\	S\
4
S jjrSrU =r$ )FunnelLayeriP  rk   r   r   Nc                 b   > [         TU ]  5         [        X5      U l        [	        U5      U l        g r   )r   r   r^   r   rU   r    )r   rk   r   r   s      r=   r   FunnelLayer.__init__Q  s&    4VI(0r   rT  rU  rV  rW  c                 l    U R                  XX4US9nU R                  US   5      nU(       a  XvS   4$ U4$ )NrW  r   r   r   r    )r   rT  rU  rV  r  rW  attnr$  s           r=   r   FunnelLayer.forwardV  sA     ~~e%Uf~g$q'"$5Q DF9Dr   rw  rd  )r   r   r   r   r   rW   r   rg   r   re  r  r   r   r   r   s   @r=   rr  rr  P  so    1| 1# 1$ 1 #(
E||
E \\
E ||	
E  
E 

E 
Er   rr  c                      ^  \ rS rSrS\SS4U 4S jjr     SS\R                  S\\R                     S\\R                     S	\	S
\	S\	S\
\\4   4S jjrSrU =r$ )FunnelEncoderic  rk   r   Nc                 Z  > [         TU ]  5         Xl        [        U5      U l        [
        R                  " [        UR                  5       VVVs/ sH=  u  p#[
        R                  " [        U5       Vs/ sH  n[        X5      PM     sn5      PM?     snnn5      U l        g s  snf s  snnnf r   )r   r   rk   r   attention_structurer   
ModuleList	enumerater[   r   rr  r\   )r   rk   r   
block_sizer]  r   s        r=   r   FunnelEncoder.__init__d  s    #;F#C mm 099K9K/L/L+K zIZ[IZA{6?IZ[\/L
[s   $B&3B!B&!B&r   r   r   rW  output_hidden_statesreturn_dictc                    UR                  U5      nU R                  R                  UUUS9nUnU(       a  U4OS n	U(       a  SOS n
[        U R                  5       GH?  u  pUR                  S5      U R                  R                  (       a  SOS:  nU=(       a    US:  nU(       a  U R                  R                  X5      u  p[        U5       H  u  nn[        U R                  R                  U   5       H  nUS:H  =(       a    US:H  =(       a    UnU(       a$  WnU R                  R                  (       a  UOU=nnOU=n=nnU" UUUXtS9nUS   nU(       a  U R                  R                  U5      nU(       a  U
USS  -   n
U(       d  M  X4-   n	M     M     GMB     U(       d  [        S XU
4 5       5      $ [        XU
S9$ )	Nr   r   r:   r   r   r   rv  c              3   ,   #    U H  oc  M  Uv   M     g 7fr   r:   r;   r'   s     r=   r>   (FunnelEncoder.forward.<locals>.<genexpr>       a$Oq$O   	last_hidden_statehidden_states
attentions)type_asr}  r   r  r\   r   rk   r   r%  r   block_repeatsr"  r(  r  r   )r   r   r   r   rW  r  r  r  rm  all_hidden_statesall_attentionsr   blockpooling_flagpooled_hiddenry   layerrepeat_index
do_poolingrT  rU  rV  layer_outputs                          r=   r   FunnelEncoder.forwardo  s    (//>33II)) J 

 0D],$0d"+DKK"8K!;;q>$++2J2JQPQRL';K!OL262J2J2`2`3/ '0&6"U$)$++*C*CK*P$QL".!"3!\+:J!\P\J! -040G0Gf]Ze.444e#(U<L#rL)!_F!+/+C+C+Z+Z[k+l(()7,qr:J)J++,=	,I) %R '7 #92 aV$Oaaaesttr   )r}  r\   rk   NNFFTr   r   r   r   r   r   rg   r   r   re  r   r  r   r   r   r   r   s   @r=   r{  r{  c  s    	
| 	
 	
 2615"'%* 0u||0u !.0u !.	0u
  0u #0u 0u 
uo%	&0u 0ur   r{  r  r   
target_lenr   r   c           	      L   US:X  a  U $ U(       a  U SS2SS24   nU SS2SS24   n [         R                  " XSS9nU(       aW  U(       a)  [        R                  R	                  USSSUS-
  SS45      nUSS2SUS-
  24   n[         R
                  " WU/SS9nU$ USS2SU24   nU$ )zs
Upsample tensor `x` to match `target_len` by repeating the tokens `stride` time on the sequence length dimension.
r   N)repeatsr   r   r   )rg   repeat_interleaver   r   r   r   )r  r   r  r   r   clsr$  s          r=   upsampler    s     {2A2haeH$$QA>F]]&&v1a!Q/JKF+Z!^++,C=a0 M ;J;'Mr   c                      ^  \ rS rSrS\SS4U 4S jjr     SS\R                  S\R                  S\\R                     S	\\R                     S
\	S\	S\	S\
\\4   4S jjrSrU =r$ )FunnelDecoderi  rk   r   Nc           	         > [         TU ]  5         Xl        [        U5      U l        [
        R                  " [        UR                  5       Vs/ sH  n[        US5      PM     sn5      U l
        g s  snf )Nr   )r   r   rk   r   r}  r   r~  r   num_decoder_layersrr  r]   )r   rk   r]  r   s      r=   r   FunnelDecoder.__init__  sV    #;F#C mmU6KdKdEe$fEe[%;Ee$fg$fs   A/final_hiddenfirst_block_hiddenr   r   rW  r  r  c           	         [        US[        U R                  R                  5      S-
  -  UR                  S   U R                  R
                  U R                  R                  S9nX-   n	U(       a  U	4OS n
U(       a  SOS nU R                  R                  U	UUS9nU R                   H,  nU" XXUS9nUS   n	U(       a  XSS  -   nU(       d  M'  X4-   n
M.     U(       d  [        S XU4 5       5      $ [        XUS	9$ )
Nr   r   )r   r  r   r   r:   r  rv  r   c              3   ,   #    U H  oc  M  Uv   M     g 7fr   r:   r  s     r=   r>   (FunnelDecoder.forward.<locals>.<genexpr>  r  r  r  )r  rd   rk   r[   rc   r   r   r}  r   r]   r  r   )r   r  r  r   r   rW  r  r  upsampled_hiddenrm  r  r  r  r  r  s                  r=   r   FunnelDecoder.forward  s    $T[[4459:)//21111
 "6)=VI40d33II)) J 
 [[E ]noL!!_F !/qr2B!B##$5	$A! ! aV$Oaaaesttr   )r}  rk   r]   r  r  r   s   @r=   r  r    s    h| h h 2615"'%* 'ull'u "LL'u !.	'u
 !.'u  'u #'u 'u 
uo%	&'u 'ur   r  c                   r   ^  \ rS rSrSrS\SS4U 4S jjrS\R                  S\R                  4S jr	S	r
U =r$ )
FunnelDiscriminatorPredictionsi  zEPrediction module for the discriminator, made up of two dense layers.rk   r   Nc                    > [         TU ]  5         Xl        [        R                  " UR
                  UR
                  5      U l        [        R                  " UR
                  S5      U l        g r*  )r   r   rk   r   r9  r   densedense_predictionr   s     r=   r   'FunnelDiscriminatorPredictions.__init__  sD    YYv~~v~~>
 "		&..! <r   discriminator_hidden_statesc                     U R                  U5      n[        U R                  R                     " U5      nU R	                  U5      R                  S5      nU$ )Nr   )r  r   rk   ri  r  squeeze)r   r  r  logitss       r=   r   &FunnelDiscriminatorPredictions.forward  sJ    

#>?t{{556}E&&}5==bAr   )rk   r  r  )r   r   r   r   r+  r   r   rg   r   r   r   r   r   s   @r=   r  r    s9    O=| = =5<< ELL  r   r  c                   .    \ rS rSr% \\S'   \rSrS r	Sr
g)FunnelPreTrainedModeli  rk   funnelc                     UR                   R                  nUR                  S5      S:w  a  [        USS 5      b  U R                  R
                  c=  UR                  R                  u  p4[        R                  " S[        XC-   5      -  5      nOU R                  R
                  n[        R                  R                  UR                  US9  [        USS 5      b+  [        R                  R                  UR                  S5        g g US:X  Ga-  [        R                  R!                  UR"                  U R                  R$                  S	9  [        R                  R!                  UR&                  U R                  R$                  S	9  [        R                  R!                  UR(                  U R                  R$                  S	9  [        R                  R!                  UR*                  U R                  R$                  S	9  [        R                  R!                  UR,                  U R                  R$                  S	9  g US
:X  a  U R                  R
                  c  SOU R                  R
                  n[        R                  R                  UR.                  R                  US9  UR.                  R0                  bF  UR.                  R                  R2                  UR.                  R0                     R5                  5         g g g )Nr9  r   r!   r   )stdr"   g        r^   )br}   )r   r   findr`   rk   initializer_stdr!   rc   rm   sqrtrZ  r   initnormal_	constant_r"   uniform_r<  initializer_ranger=  r_   r>  r?  r#   r   ri   zero_)r   module	classnamefan_outfan_inr  s         r=   _init_weights#FunnelPreTrainedModel._init_weights  s   $$--	>>(#r)vx.:;;..6&,mm&9&9OG''#f.>(?"?@C++55C37vvt,8!!&++s3 977GGV__0M0MNGGV__0M0MNGGV__0M0MNGGV__0M0MNGGV--1N1NO,,44<#$++B]B]CGGOOF2299sOC%%11=&&--2263I3I3U3UV\\^ > -r   r:   N)r   r   r   r   r   r,  r{   load_tf_weightsbase_model_prefixr  r   r:   r   r=   r  r    s    /O _r   r  c                   r   ^  \ rS rSrS\S\SS4U 4S jjrS\R                  S\R                  4S jr	S	r
U =r$ )
FunnelClassificationHeadi  rk   n_labelsr   Nc                   > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " UR                  5      U l        [        R                  " UR                  U5      U l	        g r   )
r   r   r   r9  r   linear_hiddenr   r   r   
linear_out)r   rk   r  r   s      r=   r   !FunnelClassificationHead.__init__  sU    YYv~~v~~Fzz&"7"78))FNNH=r   rm  c                     U R                  U5      n[        R                  " U5      nU R                  U5      nU R	                  U5      $ r   )r  rg   tanhr   r  )r   rm  s     r=   r    FunnelClassificationHead.forward  s=    ##F+F#f%v&&r   )r   r  r  )r   r   r   r   r   rW   r   rg   r   r   r   r   r   s   @r=   r  r    s=    >| >s >t >'ell 'u|| ' 'r   r  z2
    Output type of [`FunnelForPreTraining`].
    )custom_introc                       \ rS rSr% SrSr\\R                     \	S'   Sr
\\R                     \	S'   Sr\\\R                        \	S'   Sr\\\R                        \	S'   Srg)	FunnelForPreTrainingOutputi&  a  
loss (*optional*, returned when `labels` is provided, `torch.FloatTensor` of shape `(1,)`):
    Total loss of the ELECTRA-style objective.
logits (`torch.FloatTensor` of shape `(batch_size, sequence_length)`):
    Prediction scores of the head (scores for each token before SoftMax).
Nlossr  r  r  r:   )r   r   r   r   r+  r  r   rg   FloatTensorr,  r  r  r  r  r   r:   r   r=   r  r  &  sg     )-D(5$$
%,*.FHU&&'.8<M8E%"3"345<59Ju00129r   r  z
    The base Funnel Transformer Model transformer outputting raw hidden-states without upsampling head (also called
    decoder) or any task-specific head on top.
    c                     ^  \ rS rSrS\SS4U 4S jjrS\R                  4S jrS\R                  SS4S jr	\
         SS	\\R                     S
\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\   S\\   S\\   S\\\4   4S jj5       rSrU =r$ )FunnelBaseModeli:  rk   r   Nc                    > [         TU ]  U5        [        U5      U l        [	        U5      U l        U R                  5         g r   )r   r   r}   r$   r{  encoder	post_initr   s     r=   r   FunnelBaseModel.__init__A  s4     *62$V, 	r   c                 .    U R                   R                  $ r   r$   r#   r   s    r=   get_input_embeddings$FunnelBaseModel.get_input_embeddingsJ      ...r   new_embeddingsc                 $    XR                   l        g r   r  r   r  s     r=   set_input_embeddings$FunnelBaseModel.set_input_embeddingsM      *8'r   r   r   r   position_ids	head_maskr   rW  r  r  c
           	      P   Ub  UOU R                   R                  nUb  UOU R                   R                  nU	b  U	OU R                   R                  n	Ub  Ub  [	        S5      eUb"  U R                  X5        UR                  5       n
O"Ub  UR                  5       S S n
O[	        S5      eUb  UR                  OUR                  nUc  [        R                  " XS9nUc$  [        R                  " U
[        R                  US9nU R                  XS9nU R                  UUUUUU	S9nU$ )NDYou cannot specify both input_ids and inputs_embeds at the same timer   5You have to specify either input_ids or inputs_embedsr   r   r   r   r   rW  r  r  )rk   rW  r  use_return_dict
ValueError%warn_if_padding_and_no_attention_maskr   r   rg   onesr;  r   r$   r  )r   r   r   r   r  r  r   rW  r  r  input_shaper   encoder_outputss                r=   r   FunnelBaseModel.forwardP  s7    2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B] ]%>cdd"66yQ#..*K&',,.s3KTUU%.%:!!@T@T!"ZZCN!"[[EJJvVN 	O,,))/!5# ' 
 r   )r$   r  	NNNNNNNNNr   r   r   r   r   r   r   r   r  r  r   r   rg   r   re  r   r  r   r   r   r   r   s   @r=   r  r  :  s   |  /bll /92<< 9D 9  -11515/3,004,0/3&*/ELL)/ !./ !.	/
 u||,/ ELL)/  -/ $D>/ 'tn/ d^/ 
uo%	&/ /r   r  c                   F  ^  \ rS rSrS\SS4U 4S jjrS\R                  4S jrS\R                  SS4S jr	\
       SS	\\R                     S
\\R                     S\\R                     S\\R                     S\\   S\\   S\\   S\\\4   4S jj5       rSrU =r$ )FunnelModeli  rk   r   Nc                    > [         TU ]  U5        Xl        [        U5      U l        [        U5      U l        [        U5      U l        U R                  5         g r   )
r   r   rk   r}   r$   r{  r  r  decoderr  r   s     r=   r   FunnelModel.__init__  sE     *62$V,$V, 	r   c                 .    U R                   R                  $ r   r  r  s    r=   r   FunnelModel.get_input_embeddings  r  r   r  c                 $    XR                   l        g r   r  r  s     r=   r   FunnelModel.set_input_embeddings  r  r   r   r   r   r   rW  r  r  c           
         Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nUb  Ub  [	        S5      eUb"  U R                  X5        UR                  5       nO"Ub  UR                  5       S S nO[	        S5      eUb  UR                  OUR                  n	Uc  [        R                  " XS9nUc$  [        R                  " U[        R                  U	S9nU R                  XS9nU R                  UUUUSUS9n
U R                  U
S	   U
S
   U R                   R                  S	      UUUUUS9nU(       d<  S	nUS	   4nU(       a  US
-  nXS
   X   -   4-   nU(       a  US
-  nXS   X   -   4-   nU$ [!        US	   U(       a  U
R"                  UR"                  -   OS U(       a  U
R$                  UR$                  -   S9$ S S9$ )Nr  r   r  r  r   r  Tr  r   r   )r  r  r   r   rW  r  r  r   r  )rk   rW  r  r  r  r  r   r   rg   r  r;  r   r$   r  r  r[   r   r  r  )r   r   r   r   r   rW  r  r  r  r   r  decoder_outputsidxoutputss                 r=   r   FunnelModel.forward  s3    2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B] ]%>cdd"66yQ#..*K&',,.s3KTUU%.%:!!@T@T!"ZZCN!"[[EJJvVN 	O,,))/!%# ' 
 ,,(+.q1$++2I2I!2LM))/!5# ' 
 C&q)+G#q!Q%7/:N%N$PP q!Q%7/:N%N$PPN-a0# +88?;X;XXTe22_5O5OO
 	

 lp
 	
r   )rk   r  r$   r  )NNNNNNNr  r   s   @r=   r  r    s    |  /bll /92<< 9D 9  -1151504,0/3&*H
ELL)H
 !.H
 !.	H

  -H
 $D>H
 'tnH
 d^H
 
uo%	&H
 H
r   r  z
    Funnel Transformer model with a binary classification head on top as used during pretraining for identifying
    generated tokens.
    c                     ^  \ rS rSrS\SS4U 4S jjr\        SS\\R                     S\\R                     S\\R                     S	\\R                     S
\\R                     S\\
   S\\
   S\\
   S\\\4   4S jj5       rSrU =r$ )FunnelForPreTrainingi  rk   r   Nc                    > [         TU ]  U5        [        U5      U l        [	        U5      U l        U R                  5         g r   )r   r   r  r  r  discriminator_predictionsr  r   s     r=   r   FunnelForPreTraining.__init__  s3     !&))G)O&r   r   r   r   r   labelsrW  r  r  c	           
      f   Ub  UOU R                   R                  nU R                  UUUUUUUS9n	U	S   n
U R                  U
5      nSnUb  [        R
                  " 5       nUb`  UR                  SU
R                  S   5      S:H  nUR                  SU
R                  S   5      U   nX^   nU" UUR                  5       5      nO4U" UR                  SU
R                  S   5      UR                  5       5      nU(       d  U4U	SS -   nUb  U4U-   $ U$ [        UUU	R                  U	R                  S9$ )a  
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the ELECTRA-style loss. Input should be a sequence of tokens (see `input_ids`
    docstring) Indices should be in `[0, 1]`:

    - 0 indicates the token is an original token,
    - 1 indicates the token was replaced.

Examples:

```python
>>> from transformers import AutoTokenizer, FunnelForPreTraining
>>> import torch

>>> tokenizer = AutoTokenizer.from_pretrained("funnel-transformer/small")
>>> model = FunnelForPreTraining.from_pretrained("funnel-transformer/small")

>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> logits = model(**inputs).logits
```Nr   r   r   rW  r  r  r   r   r   r  r  r  r  )rk   r  r  r  r   r   rY  rc   rZ  r  r  r  )r   r   r   r   r   r  rW  r  r  r  discriminator_sequence_outputr  r  loss_fctactive_lossactive_logitsactive_labelsr$  s                     r=   r   FunnelForPreTraining.forward  sh   @ &1%<k$++B]B]&*kk))'/!5# '2 '
# )DA(F%//0MN++-H),11"6S6Y6YZ[6\]abb &B0M0S0STU0V WXc d & 3}/B/B/DEB0M0S0STU0V WY_YeYeYghY!<QR!@@F)-)9TGf$EvE)5CC2==	
 	
r   )r  r  NNNNNNNN)r   r   r   r   r   r   r   r   rg   r   re  r   r  r  r   r   r   r   s   @r=   r  r    s    |    -1151504)-,0/3&*B
ELL)B
 !.B
 !.	B

  -B
 &B
 $D>B
 'tnB
 d^B
 
u00	1B
 B
r   r  c                   l  ^  \ rS rSrS/rS\SS4U 4S jjrS\R                  4S jr	S\R                  SS4S	 jr\        SS
\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\   S\\   S\\   S\\\4   4S jj5       rSrU =r$ )FunnelForMaskedLMi6  zlm_head.weightrk   r   Nc                    > [         TU ]  U5        [        U5      U l        [        R
                  " UR                  UR                  5      U l        U R                  5         g r   )
r   r   r  r  r   r9  r   r   lm_headr  r   s     r=   r   FunnelForMaskedLM.__init__:  sD     !&)yy1B1BC 	r   c                     U R                   $ r   r$  r  s    r=   get_output_embeddings'FunnelForMaskedLM.get_output_embeddingsC  s    ||r   r  c                     Xl         g r   r'  r  s     r=   set_output_embeddings'FunnelForMaskedLM.set_output_embeddingsF  s    %r   r   r   r   r   r  rW  r  r  c	           
         Ub  UOU R                   R                  nU R                  UUUUUUUS9n	U	S   n
U R                  U
5      nSnUbF  [	        5       nU" UR                  SU R                   R                  5      UR                  S5      5      nU(       d  U4U	SS -   nUb  U4U-   $ U$ [        UUU	R                  U	R                  S9$ )az  
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
    config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
    loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
Nr  r   r   r   r  )
rk   r  r  r$  r   rY  r   r   r  r  )r   r   r   r   r   r  rW  r  r  r  r  prediction_logitsmasked_lm_lossr  r$  s                  r=   r   FunnelForMaskedLM.forwardI  s    $ &1%<k$++B]B]++))'/!5#  
 $AJ LL):;')H%&7&<&<RAWAW&XZ`ZeZefhZijN')GABK7F3A3M^%.YSYY$!//))	
 	
r   )r  r$  r   )r   r   r   r   _tied_weights_keysr   r   r   r9  r(  r   r+  r   r   rg   r   re  r   r  r   r   r   r   r   s   @r=   r"  r"  6  s   *+|  ryy &BLL &T &  -1151504)-,0/3&*.
ELL).
 !..
 !.	.

  -.
 &.
 $D>.
 'tn.
 d^.
 
un$	%.
 .
r   r"  z
    Funnel Transformer Model with a sequence classification/regression head on top (two linear layer on top of the
    first timestep of the last hidden state) e.g. for GLUE tasks.
    c                     ^  \ rS rSrS\SS4U 4S jjr\        SS\\R                     S\\R                     S\\R                     S	\\R                     S
\\R                     S\\
   S\\
   S\\
   S\\\4   4S jj5       rSrU =r$ )FunnelForSequenceClassificationi{  rk   r   Nc                    > [         TU ]  U5        UR                  U l        Xl        [	        U5      U l        [        XR                  5      U l        U R                  5         g r   )	r   r   
num_labelsrk   r  r  r  
classifierr  r   s     r=   r   (FunnelForSequenceClassification.__init__  sJ      ++%f-26;L;LMr   r   r   r   r   r  rW  r  r  c	           
      >   Ub  UOU R                   R                  nU R                  UUUUUUUS9n	U	S   n
U
SS2S4   nU R                  U5      nSnUGb  U R                   R                  c  U R
                  S:X  a  SU R                   l        OoU R
                  S:  aN  UR                  [        R                  :X  d  UR                  [        R                  :X  a  SU R                   l        OSU R                   l        U R                   R                  S:X  aI  [        5       nU R
                  S:X  a&  U" UR                  5       UR                  5       5      nOU" X5      nOU R                   R                  S:X  a=  [        5       nU" UR                  SU R
                  5      UR                  S5      5      nO,U R                   R                  S:X  a  [        5       nU" X5      nU(       d  U4U	SS -   nUb  U4U-   $ U$ [        UUU	R                   U	R"                  S	9$ )
ae  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
Nr  r   r   
regressionsingle_label_classificationmulti_label_classificationr   r  )rk   r  r  r6  problem_typer5  r   rg   r   rW   r	   r  r   rY  r   r   r  r  )r   r   r   r   r   r  rW  r  r  r  r  pooled_outputr  r  r  r$  s                   r=   r   'FunnelForSequenceClassification.forward  s   $ &1%<k$++B]B]++))'/!5#  
 $AJ)!Q$//{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#F3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE'!//))	
 	
r   )r6  rk   r  r5  r   )r   r   r   r   r   r   r   r   rg   r   re  r   r  r   r   r   r   r   s   @r=   r3  r3  {  s    |    -1151504)-,0/3&*A
ELL)A
 !.A
 !.	A

  -A
 &A
 $D>A
 'tnA
 d^A
 
u..	/A
 A
r   r3  c                     ^  \ rS rSrS\SS4U 4S jjr\        SS\\R                     S\\R                     S\\R                     S	\\R                     S
\\R                     S\\
   S\\
   S\\
   S\\\4   4S jj5       rSrU =r$ )FunnelForMultipleChoicei  rk   r   Nc                    > [         TU ]  U5        [        U5      U l        [	        US5      U l        U R                  5         g r*  )r   r   r  r  r  r6  r  r   s     r=   r    FunnelForMultipleChoice.__init__  s4     %f-261=r   r   r   r   r   r  rW  r  r  c	           
         Ub  UOU R                   R                  nUb  UR                  S   OUR                  S   n	Ub!  UR                  SUR	                  S5      5      OSnUb!  UR                  SUR	                  S5      5      OSnUb!  UR                  SUR	                  S5      5      OSnUb1  UR                  SUR	                  S5      UR	                  S5      5      OSnU R                  UUUUUUUS9n
U
S   nUSS2S4   nU R                  U5      nUR                  SU	5      nSnUb  [        5       nU" X5      nU(       d  U4U
SS -   nUb  U4U-   $ U$ [        UUU
R                  U
R                  S9$ )a"  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
    num_choices-1]` where `num_choices` is the size of the second dimension of the input tensors. (See
    `input_ids` above)
Nr   r   r  r   r  )rk   r  rc   rY  r   r  r6  r   r   r  r  )r   r   r   r   r   r  rW  r  r  num_choicesr  r  r=  r  reshaped_logitsr  r  r$  s                     r=   r   FunnelForMultipleChoice.forward  s   $ &1%<k$++B]B],5,Aiooa(}GZGZ[\G]>G>SINN2y~~b'9:Y]	M[Mg,,R1D1DR1HImqM[Mg,,R1D1DR1HImq ( r=#5#5b#9=;M;Mb;QR 	 ++))'/!5#  
 $AJ)!Q$// ++b+6')HO4D%''!"+5F)-)9TGf$EvE("!//))	
 	
r   )r6  r  r   )r   r   r   r   r   r   r   r   rg   r   re  r   r  r   r   r   r   r   s   @r=   r@  r@    s    |    -1151504)-,0/3&*:
ELL):
 !.:
 !.	:

  -:
 &:
 $D>:
 'tn:
 d^:
 
u//	0:
 :
r   r@  c                     ^  \ rS rSrS\SS4U 4S jjr\        SS\\R                     S\\R                     S\\R                     S	\\R                     S
\\R                     S\\
   S\\
   S\\
   S\\\4   4S jj5       rSrU =r$ )FunnelForTokenClassificationi  rk   r   Nc                 0  > [         TU ]  U5        UR                  U l        [        U5      U l        [
        R                  " UR                  5      U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g r   )r   r   r5  r  r  r   r   r   r   r9  r   r6  r  r   s     r=   r   %FunnelForTokenClassification.__init__  si      ++!&)zz&"7"78))F$6$68I8IJ 	r   r   r   r   r   r  rW  r  r  c	           
         Ub  UOU R                   R                  nU R                  UUUUUUUS9n	U	S   n
U R                  U
5      n
U R	                  U
5      nSnUb<  [        5       nU" UR                  SU R                  5      UR                  S5      5      nU(       d  U4U	SS -   nUb  U4U-   $ U$ [        UUU	R                  U	R                  S9$ )z
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
Nr  r   r   r   r  )rk   r  r  r   r6  r   rY  r5  r   r  r  )r   r   r   r   r   r  rW  r  r  r  r  r  r  r  r$  s                  r=   r   $FunnelForTokenClassification.forward&  s      &1%<k$++B]B]++))'/!5#  
 $AJ LL):;!23')HFKKDOO<fkk"oNDY,F)-)9TGf$EvE$!//))	
 	
r   )r6  r   r  r5  r   )r   r   r   r   r   r   r   r   rg   r   re  r   r  r   r   r   r   r   s   @r=   rI  rI    s    	| 	 	  -1151504)-,0/3&*-
ELL)-
 !.-
 !.	-

  --
 &-
 $D>-
 'tn-
 d^-
 
u++	,-
 -
r   rI  c                   >  ^  \ rS rSrS\SS4U 4S jjr\         SS\\R                     S\\R                     S\\R                     S	\\R                     S
\\R                     S\\R                     S\\
   S\\
   S\\
   S\\\4   4S jj5       rSrU =r$ )FunnelForQuestionAnsweringiW  rk   r   Nc                    > [         TU ]  U5        UR                  U l        [        U5      U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g r   )
r   r   r5  r  r  r   r9  r   
qa_outputsr  r   s     r=   r   #FunnelForQuestionAnswering.__init__Y  sS      ++!&)))F$6$68I8IJ 	r   r   r   r   r   start_positionsend_positionsrW  r  r  c
           
         U	b  U	OU R                   R                  n	U R                  UUUUUUU	S9n
U
S   nU R                  U5      nUR	                  SSS9u  pUR                  S5      R                  5       nUR                  S5      R                  5       nS nUb  Ub  [        UR                  5       5      S:  a  UR                  S5      n[        UR                  5       5      S:  a  UR                  S5      nUR                  S5      nUR                  SU5      nUR                  SU5      n[        US9nU" X5      nU" X5      nUU-   S-  nU	(       d  X4U
SS  -   nUb  U4U-   $ U$ [        UUUU
R                  U
R                  S9$ )	Nr  r   r   r   r   )ignore_indexr   )r  start_logits
end_logitsr  r  )rk   r  r  rQ  rQ   r  
contiguousrd   r   squezeclampr   r   r  r  )r   r   r   r   r   rS  rT  rW  r  r  r  r  r  rW  rX  
total_lossignored_indexr  
start_lossend_lossr$  s                        r=   r   "FunnelForQuestionAnswering.forwardc  s    &1%<k$++B]B]++))'/!5#  
 $AJ!23#)<<r<#: #++B/::<''+668

&=+D?'')*Q."1"8"8"<=%%'(1, - 5 5b 9(--a0M-33A}EO)//=AM']CH!,@J
:H$x/14J"/'!"+=F/9/EZMF*Q6Q+%!!//))
 	
r   )r  r5  rQ  r  )r   r   r   r   r   r   r   r   rg   r   re  r   r  r   r   r   r   r   s   @r=   rO  rO  W  s    |    -11515042604,0/3&*:
ELL):
 !.:
 !.	:

  -:
 "%,,/:
  -:
 $D>:
 'tn:
 d^:
 
u22	3:
 :
r   rO  )
r  r"  r@  r  rO  r3  rI  r  r  r{   )TF)?r+  rH   dataclassesr   typingr   r   rC   rm   rg   r   torch.nnr   r   r	   activationsr   modeling_outputsr   r   r   r   r   r   modeling_utilsr   utilsr   r   r   configuration_funnelr   
get_loggerr   rF   r[  r{   Moduler}   r   r   rW   r4  r^   rU   rr  r{  re  r  r  r  r  r  r  r  r  r  r"  r3  r@  rI  rO  __all__r:   r   r=   <module>rl     s   ( 	 ! "    A A !  . 9 9 . 
		H	% 
Wtryy "A ryy A HELL s SV [`[g[g  MG")) MG`+BII +&E")) E&<uBII <u@ di|| .1AE\`
\\,.uBII .ubRYY   _O _ _<'ryy ' 
: : : @+ @@F Z
' Z
 Z
z L
0 L
L
^ A
- A
 A
H M
&; M
M
` D
3 D
 D
N :
#8 :
 :
z F
!6 F
 F
Rr   